Merge to Fedora kernel-2.6.17-1.2187_FC5-vs2.0.2.1 patched with stable patch-2.6...
authorMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 19 Oct 2006 03:31:43 +0000 (03:31 +0000)
committerMarc Fiuczynski <mef@cs.princeton.edu>
Thu, 19 Oct 2006 03:31:43 +0000 (03:31 +0000)
322 files changed:
Documentation/networking/netdevices.txt
MAINTAINERS
Makefile
arch/i386/Kconfig
arch/i386/Kconfig.cpu
arch/i386/Kconfig.debug
arch/i386/Makefile
arch/i386/kernel/Makefile
arch/i386/kernel/acpi/Makefile
arch/i386/kernel/asm-offsets.c
arch/i386/kernel/cpu/Makefile
arch/i386/kernel/cpu/cpufreq/powernow-k7.c
arch/i386/kernel/cpu/cpufreq/powernow-k8.c
arch/i386/kernel/cpu/mtrr/Makefile
arch/i386/kernel/entry.S
arch/i386/kernel/irq.c
arch/i386/kernel/reboot.c
arch/i386/kernel/sysenter.c
arch/i386/kernel/traps.c
arch/i386/kernel/vm86.c
arch/i386/mm/Makefile
arch/i386/mm/pageattr.c
arch/i386/mm/pgtable.c
arch/i386/oprofile/Makefile
arch/i386/pci/Makefile
arch/i386/pci/mmconfig.c
arch/i386/power/Makefile
arch/ia64/Kconfig
arch/ia64/kernel/sys_ia64.c
arch/ia64/sn/kernel/xpc_channel.c
arch/ia64/sn/kernel/xpc_main.c
arch/ia64/sn/kernel/xpc_partition.c
arch/powerpc/Kconfig
arch/powerpc/kernel/cpu_setup_power4.S
arch/powerpc/kernel/crash.c
arch/powerpc/platforms/pseries/xics.c
arch/sparc/kernel/sys_sparc.c
arch/sparc64/kernel/sys_sparc.c
arch/sparc64/mm/generic.c
arch/um/kernel/physmem.c
arch/x86_64/Kconfig
arch/x86_64/Makefile
arch/x86_64/ia32/Makefile
arch/x86_64/ia32/vsyscall-sigreturn.S
arch/x86_64/kernel/Makefile
arch/x86_64/kernel/acpi/Makefile
arch/x86_64/kernel/asm-offsets.c
arch/x86_64/kernel/entry.S
arch/x86_64/kernel/init_task.c
arch/x86_64/kernel/irq.c
arch/x86_64/kernel/pmtimer.c
arch/x86_64/kernel/setup.c
arch/x86_64/kernel/setup64.c
arch/x86_64/kernel/smp.c
arch/x86_64/mm/Makefile
arch/x86_64/oprofile/Makefile
arch/x86_64/pci/Makefile
arch/x86_64/pci/mmconfig.c
block/elevator.c
block/ll_rw_blk.c
configs/kernel-2.6.17-i686-planetlab.config
configs/kernel-2.6.17-i686-smp-planetlab.config
drivers/Makefile
drivers/acpi/Kconfig
drivers/acpi/namespace/nsxfeval.c
drivers/block/aoe/aoenet.c
drivers/cdrom/cdrom.c
drivers/char/mem.c
drivers/char/tpm/tpm_tis.c
drivers/char/tty_io.c
drivers/firmware/Kconfig
drivers/i2c/busses/scx200_acb.c
drivers/i2c/i2c-core.c
drivers/ide/ide-lib.c
drivers/ide/pci/via82cxxx.c
drivers/ieee1394/ohci1394.c
drivers/ieee1394/sbp2.c
drivers/infiniband/hw/mthca/mthca_reset.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/md/dm-exception-store.c
drivers/md/dm-ioctl.c
drivers/md/dm-mpath.c
drivers/md/dm-raid1.c
drivers/md/dm-snap.c
drivers/md/dm.c
drivers/md/raid1.c
drivers/media/dvb/bt8xx/dvb-bt8xx.c
drivers/media/dvb/bt8xx/dvb-bt8xx.h
drivers/media/dvb/dvb-core/dvb_frontend.c
drivers/media/dvb/dvb-core/dvb_net.c
drivers/media/dvb/frontends/dvb-pll.c
drivers/media/dvb/ttpci/budget-av.c
drivers/media/dvb/ttpci/budget.c
drivers/media/video/Kconfig
drivers/media/video/stradis.c
drivers/net/8139cp.c
drivers/net/bnx2.c
drivers/net/bonding/bond_main.c
drivers/net/chelsio/sge.c
drivers/net/e1000/e1000_hw.c
drivers/net/e1000/e1000_hw.h
drivers/net/e1000/e1000_main.c
drivers/net/forcedeth.c
drivers/net/hamradio/6pack.c
drivers/net/hamradio/mkiss.c
drivers/net/ifb.c
drivers/net/irda/vlsi_ir.c
drivers/net/ixgb/ixgb_main.c
drivers/net/loopback.c
drivers/net/mv643xx_eth.c
drivers/net/natsemi.c
drivers/net/r8169.c
drivers/net/s2io.c
drivers/net/sky2.c
drivers/net/sky2.h
drivers/net/tg3.c
drivers/net/tg3.h
drivers/net/tulip/winbond-840.c
drivers/net/typhoon.c
drivers/net/via-velocity.c
drivers/net/wireless/orinoco.c
drivers/net/wireless/spectrum_cs.c
drivers/oprofile/buffer_sync.c
drivers/oprofile/cpu_buffer.c
drivers/oprofile/cpu_buffer.h
drivers/oprofile/event_buffer.h
drivers/oprofile/oprof.c
drivers/oprofile/oprof.h
drivers/oprofile/oprofile_files.c
drivers/pci/Kconfig
drivers/pci/quirks.c
drivers/pnp/resource.c
drivers/s390/net/qeth_eddp.c
drivers/s390/net/qeth_main.c
drivers/s390/net/qeth_tso.h
drivers/scsi/Makefile
drivers/scsi/iscsi_tcp.c
drivers/scsi/iscsi_tcp.h
drivers/scsi/libiscsi.c [deleted file]
drivers/scsi/scsi_ioctl.c
drivers/scsi/scsi_transport_iscsi.c
drivers/serial/8250.c
drivers/serial/Kconfig
drivers/serial/serial_core.c
drivers/usb/host/uhci-q.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/storage/unusual_devs.h
drivers/usb/storage/usb.c
drivers/video/Kconfig
drivers/video/console/softcursor.c
drivers/xen/Makefile [new file with mode: 0644]
drivers/xen/balloon/Makefile [new file with mode: 0644]
drivers/xen/balloon/balloon.c [new file with mode: 0644]
drivers/xen/blkback/Makefile [new file with mode: 0644]
drivers/xen/blkback/blkback.c [new file with mode: 0644]
drivers/xen/blkback/common.h [new file with mode: 0644]
drivers/xen/blkback/interface.c [new file with mode: 0644]
drivers/xen/blkback/vbd.c [new file with mode: 0644]
drivers/xen/blkfront/Kconfig [new file with mode: 0644]
drivers/xen/blkfront/Makefile [new file with mode: 0644]
drivers/xen/blkfront/blkfront.c [new file with mode: 0644]
drivers/xen/blkfront/block.h [new file with mode: 0644]
drivers/xen/blkfront/vbd.c [new file with mode: 0644]
drivers/xen/blktap/Makefile [new file with mode: 0644]
drivers/xen/blktap/blktap.c [new file with mode: 0644]
drivers/xen/console/Makefile [new file with mode: 0644]
drivers/xen/console/console.c [new file with mode: 0644]
drivers/xen/evtchn/Makefile [new file with mode: 0644]
drivers/xen/evtchn/evtchn.c [new file with mode: 0644]
drivers/xen/netback/Makefile [new file with mode: 0644]
drivers/xen/netback/common.h [new file with mode: 0644]
drivers/xen/netback/interface.c [new file with mode: 0644]
drivers/xen/netback/netback.c [new file with mode: 0644]
drivers/xen/netfront/Kconfig [new file with mode: 0644]
drivers/xen/netfront/Makefile [new file with mode: 0644]
drivers/xen/netfront/netfront.c [new file with mode: 0644]
drivers/xen/privcmd/Makefile [new file with mode: 0644]
drivers/xen/privcmd/privcmd.c [new file with mode: 0644]
fs/Kconfig
fs/befs/linuxvfs.c
fs/binfmt_elf.c
fs/buffer.c
fs/ext2/super.c
fs/ext3/inode.c
fs/ext3/namei.c
fs/ext3/super.c
fs/file.c
fs/locks.c
fs/namei.c
fs/proc/base.c
fs/splice.c
fs/udf/super.c
fs/udf/truncate.c
fs/xfs/xfs_dir2_node.c
include/asm-generic/mman.h
include/asm-i386/apic.h
include/asm-i386/desc.h
include/asm-i386/fixmap.h
include/asm-i386/mach-default/mach_traps.h
include/asm-i386/page.h
include/asm-i386/pgtable-2level-defs.h
include/asm-i386/pgtable-3level-defs.h
include/asm-i386/spinlock.h
include/asm-ia64/mman.h
include/asm-ia64/sn/xp.h
include/asm-ia64/sn/xpc.h
include/asm-s390/futex.h
include/asm-sparc/mman.h
include/asm-sparc64/mman.h
include/asm-sparc64/sfp-machine.h
include/asm-um/page.h
include/asm-x86_64/apic.h
include/asm-x86_64/hw_irq.h
include/asm-x86_64/ipi.h
include/asm-x86_64/pgtable.h
include/linux/ethtool.h
include/linux/ext3_fs.h
include/linux/gfp.h
include/linux/highmem.h
include/linux/idr.h
include/linux/interrupt.h
include/linux/mm.h
include/linux/netdevice.h
include/linux/netfilter_bridge.h
include/linux/oprofile.h
include/linux/pci_ids.h
include/linux/pfn.h
include/linux/skbuff.h
include/linux/tty.h
include/net/pkt_sched.h
include/net/protocol.h
include/net/sctp/sctp.h
include/net/sctp/sm.h
include/net/sock.h
include/net/tcp.h
include/scsi/iscsi_if.h
include/scsi/iscsi_proto.h
include/scsi/libiscsi.h [deleted file]
include/scsi/scsi_transport_iscsi.h
include/sound/initval.h
kernel/Kconfig.preempt
kernel/fork.c
kernel/futex.c
kernel/irq/manage.c
kernel/irq/spurious.c
kernel/sched.c
kernel/stop_machine.c
kernel/timer.c
lib/Makefile
lib/idr.c
lib/spinlock_debug.c
lib/ts_bm.c
mm/Kconfig
mm/filemap.c
mm/filemap.h
mm/highmem.c
mm/memory.c
mm/mmap.c
mm/page_alloc.c
mm/pdflush.c
mm/swapfile.c
net/8021q/vlan.c
net/atm/clip.c
net/bridge/br_device.c
net/bridge/br_forward.c
net/bridge/br_if.c
net/bridge/br_netfilter.c
net/bridge/netfilter/ebt_ulog.c
net/core/dev.c
net/core/dev_mcast.c
net/core/dst.c
net/core/ethtool.c
net/core/netpoll.c
net/core/pktgen.c
net/core/rtnetlink.c
net/core/skbuff.c
net/decnet/dn_nsp_in.c
net/decnet/dn_route.c
net/decnet/dn_rules.c
net/ieee80211/Kconfig
net/ipv4/af_inet.c
net/ipv4/fib_rules.c
net/ipv4/fib_semantics.c
net/ipv4/ip_output.c
net/ipv4/ipcomp.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_conntrack_helper_h323.c
net/ipv4/netfilter/ip_nat_proto_tcp.c
net/ipv4/netfilter/ip_nat_proto_udp.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/netfilter/ipt_ULOG.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/xfrm4_output.c
net/ipv6/addrconf.c
net/ipv6/exthdrs.c
net/ipv6/ip6_output.c
net/ipv6/ipcomp6.c
net/ipv6/xfrm6_output.c
net/ipx/af_ipx.c
net/netfilter/nfnetlink_log.c
net/sched/act_api.c
net/sched/sch_generic.c
net/sched/sch_teql.c
net/sctp/sm_make_chunk.c
net/sctp/sm_statefuns.c
net/sctp/socket.c
net/sunrpc/cache.c
sound/core/oss/pcm_oss.c
sound/core/timer.c
sound/isa/cs423x/Makefile
sound/pci/Kconfig
sound/pci/au88x0/au88x0_mpu401.c
sound/pci/fm801.c
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_analog.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/patch_sigmatel.c
sound/pci/rme9652/hdsp.c

index 3c0a5ba..847cedb 100644 (file)
@@ -42,9 +42,9 @@ dev->get_stats:
        Context: nominally process, but don't sleep inside an rwlock
 
 dev->hard_start_xmit:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        When the driver sets NETIF_F_LLTX in dev->features this will be
-       called without holding xmit_lock. In this case the driver 
+       called without holding netif_tx_lock. In this case the driver
        has to lock by itself when needed. It is recommended to use a try lock
        for this and return -1 when the spin lock fails. 
        The locking there should also properly protect against 
@@ -62,12 +62,12 @@ dev->hard_start_xmit:
          Only valid when NETIF_F_LLTX is set.
 
 dev->tx_timeout:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        Context: BHs disabled
        Notes: netif_queue_stopped() is guaranteed true
 
 dev->set_multicast_list:
-       Synchronization: dev->xmit_lock spinlock.
+       Synchronization: netif_tx_lock spinlock.
        Context: BHs disabled
 
 dev->poll:
index c3c5842..6a1bb87 100644 (file)
@@ -2572,6 +2572,14 @@ M:       dbrownell@users.sourceforge.net
 L:     spi-devel-general@lists.sourceforge.net
 S:     Maintained
 
+STABLE BRANCH:
+P:     Greg Kroah-Hartman
+M:     greg@kroah.com
+P:     Chris Wright
+M:     chrisw@sous-sol.org
+L:     stable@kernel.org
+S:     Maintained
+
 TPM DEVICE DRIVER
 P:     Kylene Hall
 M:     kjhall@us.ibm.com
index 4b9e53d..bfefdc7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 17
-EXTRAVERSION = -1.2142_FC4.4
+EXTRAVERSION = -1.2142_FC4.3smp
 NAME=Crazed Snow-Weasel
 
 # *DOCUMENTATION*
index 03f53c2..d75fa40 100644 (file)
@@ -91,6 +91,15 @@ config X86_PC
        help
          Choose this option if your computer is a standard PC or compatible.
 
+config X86_XEN
+       bool "Xen-compatible"
+       select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
+       select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
+       select SWIOTLB
+       help
+         Choose this option if you plan to run this kernel on top of the
+         Xen Hypervisor.
+
 config X86_ELAN
        bool "AMD Elan"
        help
@@ -193,6 +202,7 @@ source "arch/i386/Kconfig.cpu"
 
 config HPET_TIMER
        bool "HPET Timer Support"
+       depends on !X86_XEN
        help
          This enables the use of the HPET for the kernel's internal timer.
          HPET is the next generation timer replacing legacy 8254s.
@@ -223,7 +233,7 @@ config NR_CPUS
 
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
-       depends on SMP
+       depends on SMP && !X86_XEN
        default off
        help
          SMT scheduler support improves the CPU scheduler's decision making
@@ -233,7 +243,7 @@ config SCHED_SMT
 
 config SCHED_MC
        bool "Multi-core scheduler support"
-       depends on SMP
+       depends on SMP && !X86_XEN
        default y
        help
          Multi-core scheduler support improves the CPU scheduler's decision
@@ -244,7 +254,7 @@ source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
        bool "Local APIC support on uniprocessors"
-       depends on !SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on !SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
        help
          A local APIC (Advanced Programmable Interrupt Controller) is an
          integrated interrupt controller in the CPU. If you have a single-CPU
@@ -269,12 +279,12 @@ config X86_UP_IOAPIC
 
 config X86_LOCAL_APIC
        bool
-       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER)
+       depends on X86_UP_APIC || ((X86_VISWS || SMP) && !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_IO_APIC
        bool
-       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER))
+       depends on X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER || XEN_UNPRIVILEGED_GUEST))
        default y
 
 config X86_VISWS_APIC
@@ -282,9 +292,14 @@ config X86_VISWS_APIC
        depends on X86_VISWS
        default y
 
+config X86_TSC
+       bool
+       depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ && !X86_XEN
+       default y
+
 config X86_MCE
        bool "Machine Check Exception"
-       depends on !X86_VOYAGER
+       depends on !(X86_VOYAGER || X86_XEN)
        ---help---
          Machine Check Exception support allows the processor to notify the
          kernel if it detects a problem (e.g. overheating, component failure).
@@ -374,6 +389,7 @@ config X86_REBOOTFIXUPS
 
 config MICROCODE
        tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support"
+       depends on !XEN_UNPRIVILEGED_GUEST
        ---help---
          If you say Y here and also to "/dev file system support" in the
          'File systems' section, you will be able to update the microcode on
@@ -391,6 +407,7 @@ config MICROCODE
 
 config X86_MSR
        tristate "/dev/cpu/*/msr - Model-specific register support"
+       depends on !X86_XEN
        help
          This device gives privileged processes access to the x86
          Model-Specific Registers (MSRs).  It is a character device with
@@ -406,6 +423,10 @@ config X86_CPUID
          with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
          /dev/cpu/31/cpuid.
 
+config SWIOTLB
+       bool
+       default n
+
 source "drivers/firmware/Kconfig"
 
 choice
@@ -578,7 +599,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
 
 config HIGHPTE
        bool "Allocate 3rd-level pagetables from highmem"
-       depends on HIGHMEM4G || HIGHMEM64G
+       depends on (HIGHMEM4G || HIGHMEM64G) && !X86_XEN
        help
          The VM uses one page table entry for each page of physical memory.
          For systems with a lot of RAM, this can be wasteful of precious
@@ -587,6 +608,7 @@ config HIGHPTE
 
 config MATH_EMULATION
        bool "Math emulation"
+       depends on !X86_XEN
        ---help---
          Linux can emulate a math coprocessor (used for floating point
          operations) if you don't have one. 486DX and Pentium processors have
@@ -612,6 +634,8 @@ config MATH_EMULATION
 
 config MTRR
        bool "MTRR (Memory Type Range Register) support"
+       depends on !XEN_UNPRIVILEGED_GUEST
+       default y if X86_XEN
        ---help---
          On Intel P6 family processors (Pentium Pro, Pentium II and later)
          the Memory Type Range Registers (MTRRs) may be used to control
@@ -646,7 +670,7 @@ config MTRR
 
 config EFI
        bool "Boot from EFI support (EXPERIMENTAL)"
-       depends on ACPI
+       depends on ACPI && !X86_XEN
        default n
        ---help---
        This enables the the kernel to boot on EFI platforms using
@@ -664,7 +688,7 @@ config EFI
 
 config IRQBALANCE
        bool "Enable kernel irq balancing"
-       depends on SMP && X86_IO_APIC
+       depends on SMP && X86_IO_APIC && !X86_XEN
        default y
        help
          The default yes will allow the kernel to do irq load balancing.
@@ -712,7 +736,7 @@ source kernel/Kconfig.hz
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on EXPERIMENTAL && !X86_XEN
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
@@ -765,20 +789,25 @@ config HOTPLUG_CPU
 
 endmenu
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+       def_bool y
+       depends on HIGHMEM
 
 menu "Power management options (ACPI, APM)"
-       depends on !X86_VOYAGER
+       depends on !(X86_VOYAGER || XEN_UNPRIVILEGED_GUEST)
 
+if !X86_XEN
 source kernel/power/Kconfig
+endif
 
 source "drivers/acpi/Kconfig"
 
 menu "APM (Advanced Power Management) BIOS Support"
-depends on PM && !X86_VISWS
+depends on PM && !(X86_VISWS || X86_XEN)
 
 config APM
        tristate "APM (Advanced Power Management) BIOS support"
-       depends on PM
+       depends on PM && PM_LEGACY
        ---help---
          APM is a BIOS specification for saving power using several different
          techniques. This is mostly useful for battery powered laptops with
@@ -963,6 +992,7 @@ choice
 
 config PCI_GOBIOS
        bool "BIOS"
+       depends on !X86_XEN
 
 config PCI_GOMMCONFIG
        bool "MMConfig"
@@ -970,6 +1000,13 @@ config PCI_GOMMCONFIG
 config PCI_GODIRECT
        bool "Direct"
 
+config PCI_GOXEN_FE
+       bool "Xen PCI Frontend"
+       depends on X86_XEN
+       help
+         The PCI device frontend driver allows the kernel to import arbitrary
+         PCI devices from a PCI backend to support PCI driver domains.
+
 config PCI_GOANY
        bool "Any"
 
@@ -977,7 +1014,7 @@ endchoice
 
 config PCI_BIOS
        bool
-       depends on !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY)
+       depends on !(X86_VISWS || X86_XEN) && PCI && (PCI_GOBIOS || PCI_GOANY)
        default y
 
 config PCI_DIRECT
@@ -990,6 +1027,18 @@ config PCI_MMCONFIG
        depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
        default y
 
+config XEN_PCIDEV_FRONTEND
+       bool
+       depends on PCI && X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)
+       default y
+
+config XEN_PCIDEV_FE_DEBUG
+       bool "Xen PCI Frontend Debugging"
+       depends on XEN_PCIDEV_FRONTEND
+       default n
+       help
+         Enables some debug statements within the PCI Frontend.
+
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
@@ -1000,7 +1049,7 @@ config ISA_DMA_API
 
 config ISA
        bool "ISA support"
-       depends on !(X86_VOYAGER || X86_VISWS)
+       depends on !(X86_VOYAGER || X86_VISWS || X86_XEN)
        help
          Find out whether you have ISA slots on your motherboard.  ISA is the
          name of a bus system, i.e. the way the CPU talks to the other stuff
@@ -1027,7 +1076,7 @@ config EISA
 source "drivers/eisa/Kconfig"
 
 config MCA
-       bool "MCA support" if !(X86_VISWS || X86_VOYAGER)
+       bool "MCA support" if !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y if X86_VOYAGER
        help
          MicroChannel Architecture is found in some IBM PS/2 machines and
@@ -1091,6 +1140,8 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+source "drivers/xen/Kconfig"
+
 source "lib/Kconfig"
 
 #
@@ -1116,7 +1167,7 @@ config X86_SMP
 
 config X86_HT
        bool
-       depends on SMP && !(X86_VISWS || X86_VOYAGER)
+       depends on SMP && !(X86_VISWS || X86_VOYAGER || X86_XEN)
        default y
 
 config X86_BIOS_REBOOT
@@ -1129,6 +1180,16 @@ config X86_TRAMPOLINE
        depends on X86_SMP || (X86_VOYAGER && SMP)
        default y
 
+config X86_NO_TSS
+       bool
+       depends on X86_XEN
+       default y
+
+config X86_NO_IDT
+       bool
+       depends on X86_XEN
+       default y
+
 config KTIME_SCALAR
        bool
        default y
index eb13048..f90d8d0 100644 (file)
@@ -251,7 +251,7 @@ config X86_PPRO_FENCE
 
 config X86_F00F_BUG
        bool
-       depends on M586MMX || M586TSC || M586 || M486 || M386
+       depends on (M586MMX || M586TSC || M586 || M486 || M386) && !X86_NO_IDT
        default y
 
 config X86_WP_WORKS_OK
index c92191b..fb28fe7 100644 (file)
@@ -84,6 +84,7 @@ config X86_MPPARSE
 config DOUBLEFAULT
        default y
        bool "Enable doublefault exception handler" if EMBEDDED
+       depends on !X86_NO_TSS
        help
           This option allows trapping of rare doublefault exceptions that
           would otherwise cause a system to silently reboot. Disabling this
index 94e0faf..53b7343 100644 (file)
@@ -48,6 +48,11 @@ CFLAGS                               += $(shell if [ $(call cc-version) -lt 0400 ] ; then echo $(call cc-op
 
 CFLAGS += $(cflags-y)
 
+cppflags-$(CONFIG_XEN) += \
+       -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
+
+CPPFLAGS += $(cppflags-y)
+
 # Default subarch .c files
 mcore-y  := mach-default
 
@@ -71,6 +76,10 @@ mcore-$(CONFIG_X86_BIGSMP)   := mach-default
 mflags-$(CONFIG_X86_SUMMIT) := -Iinclude/asm-i386/mach-summit
 mcore-$(CONFIG_X86_SUMMIT)  := mach-default
 
+# Xen subarch support
+mflags-$(CONFIG_X86_XEN)       := -Iinclude/asm-i386/mach-xen
+mcore-$(CONFIG_X86_XEN)                := mach-xen
+
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH) := -Iinclude/asm-i386/mach-generic
 mcore-$(CONFIG_X86_GENERICARCH) := mach-default
@@ -105,6 +114,19 @@ boot := arch/i386/boot
 PHONY += zImage bzImage compressed zlilo bzlilo \
          zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
 
+ifdef CONFIG_XEN
+CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
+head-y := arch/i386/kernel/head-xen.o arch/i386/kernel/init_task-xen.o
+boot := arch/i386/boot-xen
+.PHONY: vmlinuz
+all: vmlinuz
+
+vmlinuz: vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $@
+
+install:
+       $(Q)$(MAKE) $(build)=$(boot) XENGUEST=$(XENGUEST) $@
+else
 all: bzImage
 
 # KBUILD_IMAGE specify target image being built
@@ -127,6 +149,7 @@ fdimage fdimage144 fdimage288 isoimage: vmlinux
 
 install:
        $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
+endif
 
 archclean:
        $(Q)$(MAKE) $(clean)=arch/i386/boot
@@ -145,3 +168,4 @@ endef
 CLEAN_FILES += arch/$(ARCH)/boot/fdimage \
               arch/$(ARCH)/boot/image.iso \
               arch/$(ARCH)/boot/mtools.conf
+CLEAN_FILES += vmlinuz vmlinux-stripped
index 96fb8a0..ddaa19d 100644 (file)
@@ -42,6 +42,12 @@ EXTRA_AFLAGS   := -traditional
 
 obj-$(CONFIG_SCx200)           += scx200.o
 
+ifdef CONFIG_XEN
+vsyscall_note := vsyscall-note-xen.o
+else
+vsyscall_note := vsyscall-note.o
+endif
+
 # vsyscall.o contains the vsyscall DSO images as __initdata.
 # We must build both images before we can assemble it.
 # Note: kbuild does not track this dependency due to usage of .incbin
@@ -62,7 +68,7 @@ SYSCFLAGS_vsyscall-int80.so   = $(vsyscall-flags)
 
 $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds \
-                     $(obj)/vsyscall-%.o $(obj)/vsyscall-note.o FORCE
+                     $(obj)/vsyscall-%.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
 
 # We also create a special relocatable object that should mirror the symbol
@@ -74,5 +80,17 @@ $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
 
 SYSCFLAGS_vsyscall-syms.o = -r
 $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
-                       $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE
+                       $(obj)/vsyscall-sysenter.o $(obj)/$(vsyscall_note) FORCE
        $(call if_changed,syscall)
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
+n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+extra-y := $(call cherrypickxen, $(extra-y))
+endif
index 7e9ac99..fa783e6 100644 (file)
@@ -6,3 +6,7 @@ ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y                          += cstate.o processor.o
 endif
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
index ac3f4e5..c6b52aa 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/fixmap.h>
 #include <asm/processor.h>
 #include <asm/thread_info.h>
+#include <asm/elf.h>
 
 #define DEFINE(sym, val) \
         asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -64,10 +65,14 @@ void foo(void)
        OFFSET(pbe_orig_address, pbe, orig_address);
        OFFSET(pbe_next, pbe, next);
 
+#ifndef CONFIG_X86_NO_TSS
        /* Offset from the sysenter stack to tss.esp0 */
-       DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
+       DEFINE(SYSENTER_stack_esp0, offsetof(struct tss_struct, esp0) -
                 sizeof(struct tss_struct));
+#else
+       /* sysenter stack points directly to esp0 */
+       DEFINE(SYSENTER_stack_esp0, 0);
+#endif
 
        DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
-       DEFINE(VSYSCALL_BASE, __fix_to_virt(FIX_VSYSCALL));
 }
index 010aecf..753f1d7 100644 (file)
@@ -17,3 +17,8 @@ obj-$(CONFIG_X86_MCE) +=      mcheck/
 
 obj-$(CONFIG_MTRR)     +=      mtrr/
 obj-$(CONFIG_CPU_FREQ) +=      cpufreq/
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y), $(src))
+endif
index da2db31..a748409 100644 (file)
@@ -581,7 +581,6 @@ static int __init powernow_cpu_init (struct cpufreq_policy *policy)
 
        rdmsrl (MSR_K7_FID_VID_STATUS, fidvidstatus.val);
 
-       /* recalibrate cpu_khz */
        recalibrate_cpu_khz();
 
        fsb = (10 * cpu_khz) / fid_codes[fidvidstatus.bits.CFID];
index 71fffa1..2ea3c6c 100644 (file)
@@ -1008,7 +1008,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
                 * an UP version, and is deprecated by AMD.
                 */
 
-               if ((num_online_cpus() != 1) || (num_possible_cpus() != 1)) {
+               if (num_online_cpus() != 1) {
                        printk(KERN_ERR PFX "MP systems not supported by PSB BIOS structure\n");
                        kfree(data);
                        return -ENODEV;
index a25b701..06df4fe 100644 (file)
@@ -3,3 +3,10 @@ obj-y          += amd.o
 obj-y          += cyrix.o
 obj-y          += centaur.o
 
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+n-obj-xen := generic.o state.o amd.o cyrix.o centaur.o
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index 840d990..eed41e6 100644 (file)
@@ -177,7 +177,7 @@ need_resched:
 
        # sysenter call handler stub
 ENTRY(sysenter_entry)
-       movl TSS_sysenter_esp0(%esp),%esp
+       movl SYSENTER_stack_esp0(%esp),%esp
 sysenter_past_esp:
        sti
        pushl $(__USER_DS)
@@ -414,7 +414,7 @@ vector=0
 ENTRY(irq_entries_start)
 .rept NR_IRQS
        ALIGN
-1:     pushl $vector-256
+1:     pushl $~(vector)
        jmp common_interrupt
 .data
        .long 1b
@@ -431,7 +431,7 @@ common_interrupt:
 
 #define BUILD_INTERRUPT(name, nr)      \
 ENTRY(name)                            \
-       pushl $nr-256;                  \
+       pushl $~(nr);                   \
        SAVE_ALL                        \
        movl %esp,%eax;                 \
        call smp_/**/name;              \
@@ -500,7 +500,7 @@ device_not_available_emulate:
  * that sets up the real kernel stack. Check here, since we can't
  * allow the wrong stack to be used.
  *
- * "TSS_sysenter_esp0+12" is because the NMI/debug handler will have
+ * "SYSENTER_stack_esp0+12" is because the NMI/debug handler will have
  * already pushed 3 words if it hits on the sysenter instruction:
  * eflags, cs and eip.
  *
@@ -512,7 +512,7 @@ device_not_available_emulate:
        cmpw $__KERNEL_CS,4(%esp);              \
        jne ok;                                 \
 label:                                         \
-       movl TSS_sysenter_esp0+offset(%esp),%esp;       \
+       movl SYSENTER_stack_esp0+offset(%esp),%esp;     \
        pushfl;                                 \
        pushl $__KERNEL_CS;                     \
        pushl $sysenter_past_esp
index f3a9c78..2904a66 100644 (file)
@@ -53,8 +53,8 @@ static union irq_ctx *softirq_ctx[NR_CPUS];
  */
 fastcall unsigned int do_IRQ(struct pt_regs *regs)
 {      
-       /* high bits used in ret_from_ code */
-       int irq = regs->orig_eax & 0xff;
+       /* high bit used in ret_from_ code */
+       int irq = ~regs->orig_eax;
 #ifdef CONFIG_4KSTACKS
        union irq_ctx *curctx, *irqctx;
        u32 *isp;
index 4163865..d207242 100644 (file)
@@ -120,14 +120,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
                },
        },
-       {       /* HP laptops have weird reboot issues */
-               .callback = set_bios_reboot,
-               .ident = "HP Laptop",
-               .matches = {
-                       DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
-                       DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq"),
-               },
-       },
        { }
 };
 
index ec4959f..b97719a 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <linux/mm.h>
 #include <linux/mman.h>
 
 #include <asm/a.out.h>
 #include <asm/pgtable.h>
 #include <asm/unistd.h>
 
+#ifdef CONFIG_XEN
+#include <xen/interface/callback.h>
+#endif
+
 extern asmlinkage void sysenter_entry(void);
 
 void enable_sep_cpu(void)
 {
+#ifndef CONFIG_X86_NO_TSS
        int cpu = get_cpu();
        struct tss_struct *tss = &per_cpu(init_tss, cpu);
 
@@ -39,6 +45,7 @@ void enable_sep_cpu(void)
        wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
        wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
        put_cpu();      
+#endif
 }
 
 /*
@@ -47,7 +54,6 @@ void enable_sep_cpu(void)
  */
 extern const char vsyscall_int80_start, vsyscall_int80_end;
 extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-
 static struct page *sysenter_pages[2];
 
 int __init sysenter_setup(void)
@@ -56,16 +62,28 @@ int __init sysenter_setup(void)
 
        sysenter_pages[0] = virt_to_page(page);
 
-       if (!boot_cpu_has(X86_FEATURE_SEP)) {
+#ifdef CONFIG_XEN
+       if (boot_cpu_has(X86_FEATURE_SEP)) {
+               struct callback_register sysenter = {
+                       .type = CALLBACKTYPE_sysenter,
+                       .address = { __KERNEL_CS, (unsigned long)sysenter_entry },
+               };
+
+               if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
+                       clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability);
+       }
+#endif
+
+       if (boot_cpu_has(X86_FEATURE_SEP)) {
                memcpy(page,
-                      &vsyscall_int80_start,
-                      &vsyscall_int80_end - &vsyscall_int80_start);
+                      &vsyscall_sysenter_start,
+                      &vsyscall_sysenter_end - &vsyscall_sysenter_start);
                return 0;
        }
 
        memcpy(page,
-              &vsyscall_sysenter_start,
-              &vsyscall_sysenter_end - &vsyscall_sysenter_start);
+              &vsyscall_int80_start,
+              &vsyscall_int80_end - &vsyscall_int80_start);
 
        return 0;
 }
@@ -130,6 +148,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
        return err;
 }
 
+#ifndef CONFIG_XEN
 int in_gate_area_no_task(unsigned long addr)
 {
        return 0;
@@ -144,3 +163,4 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
 {
        return NULL;
 }
+#endif
index 0b9d741..5b65cb8 100644 (file)
@@ -705,18 +705,11 @@ static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
 
 static void io_check_error(unsigned char reason, struct pt_regs * regs)
 {
-       unsigned long i;
-
        printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
        show_registers(regs);
 
        /* Re-enable the IOCK line, wait for a few seconds */
-       reason = (reason & 0xf) | 8;
-       outb(reason, 0x61);
-       i = 2000;
-       while (--i) udelay(1000);
-       reason &= ~8;
-       outb(reason, 0x61);
+       clear_io_check_error(reason);
 }
 
 static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
index 00e0118..c0adee5 100644 (file)
@@ -98,7 +98,9 @@
 struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
 struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
 {
+#ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss;
+#endif
        struct pt_regs *ret;
        unsigned long tmp;
 
@@ -123,12 +125,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs)
                do_exit(SIGSEGV);
        }
 
+#ifndef CONFIG_X86_NO_TSS
        tss = &per_cpu(init_tss, get_cpu());
+#endif
        current->thread.esp0 = current->thread.saved_esp0;
        current->thread.sysenter_cs = __KERNEL_CS;
        load_esp0(tss, &current->thread);
        current->thread.saved_esp0 = 0;
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        loadsegment(fs, current->thread.saved_fs);
        loadsegment(gs, current->thread.saved_gs);
@@ -252,7 +258,9 @@ out:
 
 static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk)
 {
+#ifndef CONFIG_X86_NO_TSS
        struct tss_struct *tss;
+#endif
        long eax;
 /*
  * make sure the vm86() system call doesn't try to do anything silly
@@ -297,12 +305,16 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        savesegment(fs, tsk->thread.saved_fs);
        savesegment(gs, tsk->thread.saved_gs);
 
+#ifndef CONFIG_X86_NO_TSS
        tss = &per_cpu(init_tss, get_cpu());
+#endif
        tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0;
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
        load_esp0(tss, &tsk->thread);
+#ifndef CONFIG_X86_NO_TSS
        put_cpu();
+#endif
 
        tsk->thread.screen_bitmap = info->screen_bitmap;
        if (info->flags & VM86_SCREEN_BITMAP)
index 80908b5..2b33b20 100644 (file)
@@ -8,3 +8,11 @@ obj-$(CONFIG_NUMA) += discontig.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_HIGHMEM) += highmem.o
 obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+obj-y          += hypervisor.o
+
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index 92c3d9f..a00267a 100644 (file)
@@ -85,7 +85,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
        unsigned long flags;
 
        set_pte_atomic(kpte, pte);      /* change init_mm */
-       if (PTRS_PER_PMD > 1)
+       if (HAVE_SHARED_KERNEL_PMD)
                return;
 
        spin_lock_irqsave(&pgd_lock, flags);
index 2889567..de8d187 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/spinlock.h>
+#include <linux/module.h>
 
 #include <asm/system.h>
 #include <asm/pgtable.h>
@@ -138,6 +139,10 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
        __flush_tlb_one(vaddr);
 }
 
+static int nr_fixmaps = 0;
+unsigned long __FIXADDR_TOP = 0xfffff000;
+EXPORT_SYMBOL(__FIXADDR_TOP);
+
 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
 {
        unsigned long address = __fix_to_virt(idx);
@@ -147,6 +152,13 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
                return;
        }
        set_pte_pfn(address, phys >> PAGE_SHIFT, flags);
+       nr_fixmaps++;
+}
+
+void set_fixaddr_top(unsigned long top)
+{
+       BUG_ON(nr_fixmaps > 0);
+       __FIXADDR_TOP = top - PAGE_SIZE;
 }
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
index 30f3eb3..e596c39 100644 (file)
@@ -6,7 +6,11 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
                oprofilefs.o oprofile_stats.o  \
                timer_int.o )
 
+ifdef CONFIG_XEN
+oprofile-y                             := $(DRIVER_OBJS) xenoprof.o
+else 
 oprofile-y                             := $(DRIVER_OBJS) init.o backtrace.o
 oprofile-$(CONFIG_X86_LOCAL_APIC)      += nmi_int.o op_model_athlon.o \
                                           op_model_ppro.o op_model_p4.o
 oprofile-$(CONFIG_X86_IO_APIC)         += nmi_timer_int.o
+endif
index 62ad75c..4447f39 100644 (file)
@@ -4,6 +4,10 @@ obj-$(CONFIG_PCI_BIOS)         += pcbios.o
 obj-$(CONFIG_PCI_MMCONFIG)     += mmconfig.o direct.o
 obj-$(CONFIG_PCI_DIRECT)       += direct.o
 
+# pcifront should be after pcbios.o, mmconfig.o, and direct.o as it should only
+# take over if direct access to the PCI bus is unavailable
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront.o
+
 pci-y                          := fixup.o
 pci-$(CONFIG_ACPI)             += acpi.o
 pci-y                          += legacy.o irq.o
@@ -12,3 +16,8 @@ pci-$(CONFIG_X86_VISWS)               := visws.o fixup.o
 pci-$(CONFIG_X86_NUMAQ)                := numa.o irq.o
 
 obj-y                          += $(pci-y) common.o
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index 6b1ea0c..e545b09 100644 (file)
@@ -15,7 +15,9 @@
 #include <asm/e820.h>
 #include "pci.h"
 
-#define MMCONFIG_APER_SIZE (256*1024*1024)
+/* aperture is up to 256MB but BIOS may reserve less */
+#define MMCONFIG_APER_MIN      (2 * 1024*1024)
+#define MMCONFIG_APER_MAX      (256 * 1024*1024)
 
 /* Assume systems with more busses have correct MCFG */
 #define MAX_CHECK_BUS 16
@@ -197,9 +199,10 @@ void __init pci_mmcfg_init(void)
                return;
 
        if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
-                       pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE,
+                       pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
                        E820_RESERVED)) {
-               printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n");
+               printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+                               pci_mmcfg_config[0].base_address);
                printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
                return;
        }
index 8cfa4e8..e74fee6 100644 (file)
@@ -1,2 +1,4 @@
-obj-$(CONFIG_PM)               += cpu.o
+obj-$(CONFIG_PM_LEGACY)                += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += cpu.o
+obj-$(CONFIG_ACPI_SLEEP)       += cpu.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
index 67a9053..1e3ad1e 100644 (file)
@@ -270,6 +270,9 @@ config HOTPLUG_CPU
          can be controlled through /sys/devices/system/cpu/cpu#.
          Say N if you want to disable CPU hotplug.
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+       def_bool y
+
 config SCHED_SMT
        bool "SMT scheduler support"
        depends on SMP
index c7b943f..3edefc8 100644 (file)
@@ -164,10 +164,25 @@ sys_pipe (void)
        return retval;
 }
 
+int ia64_map_check_rgn(unsigned long addr, unsigned long len,
+               unsigned long flags)
+{
+       unsigned long roff;
+
+       /*
+        * Don't permit mappings into unmapped space, the virtual page table
+        * of a region, or across a region boundary.  Note: RGN_MAP_LIMIT is
+        * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0.
+        */
+       roff = REGION_OFFSET(addr);
+       if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len)))
+               return -EINVAL;
+       return 0;
+}
+
 static inline unsigned long
 do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
 {
-       unsigned long roff;
        struct file *file = NULL;
 
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
@@ -189,17 +204,6 @@ do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, un
                goto out;
        }
 
-       /*
-        * Don't permit mappings into unmapped space, the virtual page table of a region,
-        * or across a region boundary.  Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
-        * (for some integer n <= 61) and len > 0.
-        */
-       roff = REGION_OFFSET(addr);
-       if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
-               addr = -EINVAL;
-               goto out;
-       }
-
        down_write(&current->mm->mmap_sem);
        addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
        up_write(&current->mm->mmap_sem);
index 0bf6fbc..7e8a4d1 100644 (file)
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
 #include <linux/sched.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
-#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/completion.h>
 #include <asm/sn/bte.h>
 #include <asm/sn/sn_sal.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
+
+
+/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+       /* see if kzalloc will give us cachline aligned memory by default */
+       *base = kzalloc(size, flags);
+       if (*base == NULL) {
+               return NULL;
+       }
+       if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+               return *base;
+       }
+       kfree(*base);
+
+       /* nope, we'll have to do it ourselves */
+       *base = kzalloc(size + L1_CACHE_BYTES, flags);
+       if (*base == NULL) {
+               return NULL;
+       }
+       return (void *) L1_CACHE_ALIGN((u64) *base);
+}
 
 
 /*
@@ -56,7 +82,8 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
                atomic_set(&ch->n_to_notify, 0);
 
                spin_lock_init(&ch->lock);
-               sema_init(&ch->msg_to_pull_sema, 1);    /* mutex */
+               mutex_init(&ch->msg_to_pull_mutex);
+               init_completion(&ch->wdisconnect_wait);
 
                atomic_set(&ch->n_on_msg_allocate_wq, 0);
                init_waitqueue_head(&ch->msg_allocate_wq);
@@ -72,7 +99,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
 enum xpc_retval
 xpc_setup_infrastructure(struct xpc_partition *part)
 {
-       int ret;
+       int ret, cpuid;
        struct timer_list *timer;
        partid_t partid = XPC_PARTID(part);
 
@@ -90,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
         * Allocate all of the channel structures as a contiguous chunk of
         * memory.
         */
-       part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+       part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
                                                                GFP_KERNEL);
        if (part->channels == NULL) {
                dev_err(xpc_chan, "can't get memory for channels\n");
                return xpcNoMemory;
        }
-       memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
 
        part->nchannels = XPC_NCHANNELS;
 
 
        /* allocate all the required GET/PUT values */
 
-       part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+       part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
                                        GFP_KERNEL, &part->local_GPs_base);
        if (part->local_GPs == NULL) {
                kfree(part->channels);
@@ -112,60 +138,57 @@ xpc_setup_infrastructure(struct xpc_partition *part)
                        "values\n");
                return xpcNoMemory;
        }
-       memset(part->local_GPs, 0, XPC_GP_SIZE);
 
-       part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+       part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
                                        GFP_KERNEL, &part->remote_GPs_base);
        if (part->remote_GPs == NULL) {
-               kfree(part->channels);
-               part->channels = NULL;
-               kfree(part->local_GPs_base);
-               part->local_GPs = NULL;
                dev_err(xpc_chan, "can't get memory for remote get/put "
                        "values\n");
+               kfree(part->local_GPs_base);
+               part->local_GPs = NULL;
+               kfree(part->channels);
+               part->channels = NULL;
                return xpcNoMemory;
        }
-       memset(part->remote_GPs, 0, XPC_GP_SIZE);
 
 
        /* allocate all the required open and close args */
 
-       part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
+       part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
                                        XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
                                        &part->local_openclose_args_base);
        if (part->local_openclose_args == NULL) {
-               kfree(part->channels);
-               part->channels = NULL;
-               kfree(part->local_GPs_base);
-               part->local_GPs = NULL;
+               dev_err(xpc_chan, "can't get memory for local connect args\n");
                kfree(part->remote_GPs_base);
                part->remote_GPs = NULL;
-               dev_err(xpc_chan, "can't get memory for local connect args\n");
+               kfree(part->local_GPs_base);
+               part->local_GPs = NULL;
+               kfree(part->channels);
+               part->channels = NULL;
                return xpcNoMemory;
        }
-       memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
 
-       part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
+       part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
                                        XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
                                        &part->remote_openclose_args_base);
        if (part->remote_openclose_args == NULL) {
-               kfree(part->channels);
-               part->channels = NULL;
-               kfree(part->local_GPs_base);
-               part->local_GPs = NULL;
-               kfree(part->remote_GPs_base);
-               part->remote_GPs = NULL;
+               dev_err(xpc_chan, "can't get memory for remote connect args\n");
                kfree(part->local_openclose_args_base);
                part->local_openclose_args = NULL;
-               dev_err(xpc_chan, "can't get memory for remote connect args\n");
+               kfree(part->remote_GPs_base);
+               part->remote_GPs = NULL;
+               kfree(part->local_GPs_base);
+               part->local_GPs = NULL;
+               kfree(part->channels);
+               part->channels = NULL;
                return xpcNoMemory;
        }
-       memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
 
 
        xpc_initialize_channels(part, partid);
 
        atomic_set(&part->nchannels_active, 0);
+       atomic_set(&part->nchannels_engaged, 0);
 
 
        /* local_IPI_amo were set to 0 by an earlier memset() */
@@ -182,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
        ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
                                part->IPI_owner, (void *) (u64) partid);
        if (ret != 0) {
-               kfree(part->channels);
-               part->channels = NULL;
-               kfree(part->local_GPs_base);
-               part->local_GPs = NULL;
-               kfree(part->remote_GPs_base);
-               part->remote_GPs = NULL;
-               kfree(part->local_openclose_args_base);
-               part->local_openclose_args = NULL;
-               kfree(part->remote_openclose_args_base);
-               part->remote_openclose_args = NULL;
                dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
                        "errno=%d\n", -ret);
+               kfree(part->remote_openclose_args_base);
+               part->remote_openclose_args = NULL;
+               kfree(part->local_openclose_args_base);
+               part->local_openclose_args = NULL;
+               kfree(part->remote_GPs_base);
+               part->remote_GPs = NULL;
+               kfree(part->local_GPs_base);
+               part->local_GPs = NULL;
+               kfree(part->channels);
+               part->channels = NULL;
                return xpcLackOfResources;
        }
 
@@ -209,7 +232,7 @@ xpc_setup_infrastructure(struct xpc_partition *part)
         * With the setting of the partition setup_state to XPC_P_SETUP, we're
         * declaring that this partition is ready to go.
         */
-       (volatile u8) part->setup_state = XPC_P_SETUP;
+       part->setup_state = XPC_P_SETUP;
 
 
        /*
@@ -223,11 +246,11 @@ xpc_setup_infrastructure(struct xpc_partition *part)
        xpc_vars_part[partid].openclose_args_pa =
                                        __pa(part->local_openclose_args);
        xpc_vars_part[partid].IPI_amo_pa = __pa(part->local_IPI_amo_va);
-       xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(smp_processor_id());
-       xpc_vars_part[partid].IPI_phys_cpuid =
-                                       cpu_physical_id(smp_processor_id());
+       cpuid = raw_smp_processor_id(); /* any CPU in this partition will do */
+       xpc_vars_part[partid].IPI_nasid = cpuid_to_nasid(cpuid);
+       xpc_vars_part[partid].IPI_phys_cpuid = cpu_physical_id(cpuid);
        xpc_vars_part[partid].nchannels = part->nchannels;
-       (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
+       xpc_vars_part[partid].magic = XPC_VP_MAGIC1;
 
        return xpcSuccess;
 }
@@ -256,8 +279,8 @@ xpc_pull_remote_cachelines(struct xpc_partition *part, void *dst,
                return part->reason;
        }
 
-       bte_ret = xp_bte_copy((u64) src, (u64) ia64_tpa((u64) dst),
-                               (u64) cnt, (BTE_NORMAL | BTE_WACQUIRE), NULL);
+       bte_ret = xp_bte_copy((u64) src, (u64) dst, (u64) cnt,
+                                       (BTE_NORMAL | BTE_WACQUIRE), NULL);
        if (bte_ret == BTE_SUCCESS) {
                return xpcSuccess;
        }
@@ -355,7 +378,7 @@ xpc_pull_remote_vars_part(struct xpc_partition *part)
 
                /* let the other side know that we've pulled their variables */
 
-               (volatile u64) xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
+               xpc_vars_part[partid].magic = XPC_VP_MAGIC2;
        }
 
        if (pulled_entry->magic == XPC_VP_MAGIC1) {
@@ -442,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
        for (nentries = ch->local_nentries; nentries > 0; nentries--) {
 
                nbytes = nentries * ch->msg_size;
-               ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
-                                               (GFP_KERNEL | GFP_DMA),
+               ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
+                                               GFP_KERNEL,
                                                &ch->local_msgqueue_base);
                if (ch->local_msgqueue == NULL) {
                        continue;
                }
-               memset(ch->local_msgqueue, 0, nbytes);
 
                nbytes = nentries * sizeof(struct xpc_notify);
-               ch->notify_queue = kmalloc(nbytes, (GFP_KERNEL | GFP_DMA));
+               ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
                if (ch->notify_queue == NULL) {
                        kfree(ch->local_msgqueue_base);
                        ch->local_msgqueue = NULL;
                        continue;
                }
-               memset(ch->notify_queue, 0, nbytes);
 
                spin_lock_irqsave(&ch->lock, irq_flags);
                if (nentries < ch->local_nentries) {
@@ -497,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
        for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
 
                nbytes = nentries * ch->msg_size;
-               ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
-                                               (GFP_KERNEL | GFP_DMA),
+               ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
+                                               GFP_KERNEL,
                                                &ch->remote_msgqueue_base);
                if (ch->remote_msgqueue == NULL) {
                        continue;
                }
-               memset(ch->remote_msgqueue, 0, nbytes);
 
                spin_lock_irqsave(&ch->lock, irq_flags);
                if (nentries < ch->remote_nentries) {
@@ -532,7 +552,6 @@ static enum xpc_retval
 xpc_allocate_msgqueues(struct xpc_channel *ch)
 {
        unsigned long irq_flags;
-       int i;
        enum xpc_retval ret;
 
 
@@ -550,13 +569,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
                return ret;
        }
 
-       for (i = 0; i < ch->local_nentries; i++) {
-               /* use a semaphore as an event wait queue */
-               sema_init(&ch->notify_queue[i].sema, 0);
-       }
-
-       sema_init(&ch->teardown_sema, 0);       /* event wait */
-
        spin_lock_irqsave(&ch->lock, irq_flags);
        ch->flags |= XPC_C_SETUP;
        spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -625,6 +637,55 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
 }
 
 
+/*
+ * Notify those who wanted to be notified upon delivery of their message.
+ */
+static void
+xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
+{
+       struct xpc_notify *notify;
+       u8 notify_type;
+       s64 get = ch->w_remote_GP.get - 1;
+
+
+       while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
+
+               notify = &ch->notify_queue[get % ch->local_nentries];
+
+               /*
+                * See if the notify entry indicates it was associated with
+                * a message who's sender wants to be notified. It is possible
+                * that it is, but someone else is doing or has done the
+                * notification.
+                */
+               notify_type = notify->type;
+               if (notify_type == 0 ||
+                               cmpxchg(&notify->type, notify_type, 0) !=
+                                                               notify_type) {
+                       continue;
+               }
+
+               DBUG_ON(notify_type != XPC_N_CALL);
+
+               atomic_dec(&ch->n_to_notify);
+
+               if (notify->func != NULL) {
+                       dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
+                               "msg_number=%ld, partid=%d, channel=%d\n",
+                               (void *) notify, get, ch->partid, ch->number);
+
+                       notify->func(reason, ch->partid, ch->number,
+                                                               notify->key);
+
+                       dev_dbg(xpc_chan, "notify->func() returned, "
+                               "notify=0x%p, msg_number=%ld, partid=%d, "
+                               "channel=%d\n", (void *) notify, get,
+                               ch->partid, ch->number);
+               }
+       }
+}
+
+
 /*
  * Free up message queues and other stuff that were allocated for the specified
  * channel.
@@ -669,9 +730,6 @@ xpc_free_msgqueues(struct xpc_channel *ch)
                ch->remote_msgqueue = NULL;
                kfree(ch->notify_queue);
                ch->notify_queue = NULL;
-
-               /* in case someone is waiting for the teardown to complete */
-               up(&ch->teardown_sema);
        }
 }
 
@@ -683,7 +741,7 @@ static void
 xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 {
        struct xpc_partition *part = &xpc_partitions[ch->partid];
-       u32 ch_flags = ch->flags;
+       u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
 
 
        DBUG_ON(!spin_is_locked(&ch->lock));
@@ -696,17 +754,20 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
 
        /* make sure all activity has settled down first */
 
-       if (atomic_read(&ch->references) > 0) {
+       if (atomic_read(&ch->references) > 0 ||
+                       ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+                       !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) {
                return;
        }
        DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
 
-       /* it's now safe to free the channel's message queues */
-
-       xpc_free_msgqueues(ch);
-       DBUG_ON(ch->flags & XPC_C_SETUP);
+       if (part->act_state == XPC_P_DEACTIVATING) {
+               /* can't proceed until the other side disengages from us */
+               if (xpc_partition_engaged(1UL << ch->partid)) {
+                       return;
+               }
 
-       if (part->act_state != XPC_P_DEACTIVATING) {
+       } else {
 
                /* as long as the other side is up do the full protocol */
 
@@ -724,16 +785,46 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
                }
        }
 
+       /* wake those waiting for notify completion */
+       if (atomic_read(&ch->n_to_notify) > 0) {
+               /* >>> we do callout while holding ch->lock */
+               xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
+       }
+
        /* both sides are disconnected now */
 
-       ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */
+       if (ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE) {
+               spin_unlock_irqrestore(&ch->lock, *irq_flags);
+               xpc_disconnect_callout(ch, xpcDisconnected);
+               spin_lock_irqsave(&ch->lock, *irq_flags);
+       }
+
+       /* it's now safe to free the channel's message queues */
+       xpc_free_msgqueues(ch);
+
+       /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
+       ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
 
        atomic_dec(&part->nchannels_active);
 
-       if (ch_flags & XPC_C_WASCONNECTED) {
+       if (channel_was_connected) {
                dev_info(xpc_chan, "channel %d to partition %d disconnected, "
                        "reason=%d\n", ch->number, ch->partid, ch->reason);
        }
+
+       if (ch->flags & XPC_C_WDISCONNECT) {
+               /* we won't lose the CPU since we're holding ch->lock */
+               complete(&ch->wdisconnect_wait);
+       } else if (ch->delayed_IPI_flags) {
+               if (part->act_state != XPC_P_DEACTIVATING) {
+                       /* time to take action on any delayed IPI flags */
+                       spin_lock(&part->IPI_lock);
+                       XPC_SET_IPI_FLAGS(part->local_IPI_amo, ch->number,
+                                                       ch->delayed_IPI_flags);
+                       spin_unlock(&part->IPI_lock);
+               }
+               ch->delayed_IPI_flags = 0;
+       }
 }
 
 
@@ -754,6 +845,19 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 
        spin_lock_irqsave(&ch->lock, irq_flags);
 
+again:
+
+       if ((ch->flags & XPC_C_DISCONNECTED) &&
+                                       (ch->flags & XPC_C_WDISCONNECT)) {
+               /*
+                * Delay processing IPI flags until thread waiting disconnect
+                * has had a chance to see that the channel is disconnected.
+                */
+               ch->delayed_IPI_flags |= IPI_flags;
+               spin_unlock_irqrestore(&ch->lock, irq_flags);
+               return;
+       }
+
 
        if (IPI_flags & XPC_IPI_CLOSEREQUEST) {
 
@@ -764,7 +868,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                /*
                 * If RCLOSEREQUEST is set, we're probably waiting for
                 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
-                * with this RCLOSEQREUQEST in the IPI_flags.
+                * with this RCLOSEREQUEST in the IPI_flags.
                 */
 
                if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -779,14 +883,22 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
 
                        /* both sides have finished disconnecting */
                        xpc_process_disconnect(ch, &irq_flags);
+                       DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+                       goto again;
                }
 
                if (ch->flags & XPC_C_DISCONNECTED) {
-                       // >>> explain this section
-
                        if (!(IPI_flags & XPC_IPI_OPENREQUEST)) {
-                               DBUG_ON(part->act_state !=
-                                                       XPC_P_DEACTIVATING);
+                               if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo,
+                                        ch_number) & XPC_IPI_OPENREQUEST)) {
+
+                                       DBUG_ON(ch->delayed_IPI_flags != 0);
+                                       spin_lock(&part->IPI_lock);
+                                       XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+                                                       ch_number,
+                                                       XPC_IPI_CLOSEREQUEST);
+                                       spin_unlock(&part->IPI_lock);
+                               }
                                spin_unlock_irqrestore(&ch->lock, irq_flags);
                                return;
                        }
@@ -816,9 +928,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                        }
 
                        XPC_DISCONNECT_CHANNEL(ch, reason, &irq_flags);
-               } else {
-                       xpc_process_disconnect(ch, &irq_flags);
+
+                       DBUG_ON(IPI_flags & XPC_IPI_CLOSEREPLY);
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       return;
                }
+
+               xpc_process_disconnect(ch, &irq_flags);
        }
 
 
@@ -834,7 +950,20 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                }
 
                DBUG_ON(!(ch->flags & XPC_C_CLOSEREQUEST));
-               DBUG_ON(!(ch->flags & XPC_C_RCLOSEREQUEST));
+
+               if (!(ch->flags & XPC_C_RCLOSEREQUEST)) {
+                       if ((XPC_GET_IPI_FLAGS(part->local_IPI_amo, ch_number)
+                                               & XPC_IPI_CLOSEREQUEST)) {
+
+                               DBUG_ON(ch->delayed_IPI_flags != 0);
+                               spin_lock(&part->IPI_lock);
+                               XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+                                               ch_number, XPC_IPI_CLOSEREPLY);
+                               spin_unlock(&part->IPI_lock);
+                       }
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       return;
+               }
 
                ch->flags |= XPC_C_RCLOSEREPLY;
 
@@ -852,8 +981,14 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                        "channel=%d\n", args->msg_size, args->local_nentries,
                        ch->partid, ch->number);
 
-               if ((ch->flags & XPC_C_DISCONNECTING) ||
-                                       part->act_state == XPC_P_DEACTIVATING) {
+               if (part->act_state == XPC_P_DEACTIVATING ||
+                                       (ch->flags & XPC_C_ROPENREQUEST)) {
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       return;
+               }
+
+               if (ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) {
+                       ch->delayed_IPI_flags |= XPC_IPI_OPENREQUEST;
                        spin_unlock_irqrestore(&ch->lock, irq_flags);
                        return;
                }
@@ -867,8 +1002,11 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                 *      msg_size = size of channel's messages in bytes
                 *      local_nentries = remote partition's local_nentries
                 */
-               DBUG_ON(args->msg_size == 0);
-               DBUG_ON(args->local_nentries == 0);
+               if (args->msg_size == 0 || args->local_nentries == 0) {
+                       /* assume OPENREQUEST was delayed by mistake */
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       return;
+               }
 
                ch->flags |= (XPC_C_ROPENREQUEST | XPC_C_CONNECTING);
                ch->remote_nentries = args->local_nentries;
@@ -906,7 +1044,13 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
                        spin_unlock_irqrestore(&ch->lock, irq_flags);
                        return;
                }
-               DBUG_ON(!(ch->flags & XPC_C_OPENREQUEST));
+               if (!(ch->flags & XPC_C_OPENREQUEST)) {
+                       XPC_DISCONNECT_CHANNEL(ch, xpcOpenCloseError,
+                                                               &irq_flags);
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+                       return;
+               }
+
                DBUG_ON(!(ch->flags & XPC_C_ROPENREQUEST));
                DBUG_ON(ch->flags & XPC_C_CONNECTED);
 
@@ -960,12 +1104,12 @@ xpc_connect_channel(struct xpc_channel *ch)
        struct xpc_registration *registration = &xpc_registrations[ch->number];
 
 
-       if (down_interruptible(&registration->sema) != 0) {
-               return xpcInterrupted;
+       if (mutex_trylock(&registration->mutex) == 0) {
+               return xpcRetry;
        }
 
        if (!XPC_CHANNEL_REGISTERED(ch->number)) {
-               up(&registration->sema);
+               mutex_unlock(&registration->mutex);
                return xpcUnregistered;
        }
 
@@ -976,7 +1120,7 @@ xpc_connect_channel(struct xpc_channel *ch)
 
        if (ch->flags & XPC_C_DISCONNECTING) {
                spin_unlock_irqrestore(&ch->lock, irq_flags);
-               up(&registration->sema);
+               mutex_unlock(&registration->mutex);
                return ch->reason;
        }
 
@@ -1008,7 +1152,7 @@ xpc_connect_channel(struct xpc_channel *ch)
                         * channel lock be locked and will unlock and relock
                         * the channel lock as needed.
                         */
-                       up(&registration->sema);
+                       mutex_unlock(&registration->mutex);
                        XPC_DISCONNECT_CHANNEL(ch, xpcUnequalMsgSizes,
                                                                &irq_flags);
                        spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1023,7 +1167,7 @@ xpc_connect_channel(struct xpc_channel *ch)
                atomic_inc(&xpc_partitions[ch->partid].nchannels_active);
        }
 
-       up(&registration->sema);
+       mutex_unlock(&registration->mutex);
 
 
        /* initiate the connection */
@@ -1039,55 +1183,6 @@ xpc_connect_channel(struct xpc_channel *ch)
 }
 
 
-/*
- * Notify those who wanted to be notified upon delivery of their message.
- */
-static void
-xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
-{
-       struct xpc_notify *notify;
-       u8 notify_type;
-       s64 get = ch->w_remote_GP.get - 1;
-
-
-       while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
-
-               notify = &ch->notify_queue[get % ch->local_nentries];
-
-               /*
-                * See if the notify entry indicates it was associated with
-                * a message who's sender wants to be notified. It is possible
-                * that it is, but someone else is doing or has done the
-                * notification.
-                */
-               notify_type = notify->type;
-               if (notify_type == 0 ||
-                               cmpxchg(&notify->type, notify_type, 0) !=
-                                                               notify_type) {
-                       continue;
-               }
-
-               DBUG_ON(notify_type != XPC_N_CALL);
-
-               atomic_dec(&ch->n_to_notify);
-
-               if (notify->func != NULL) {
-                       dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
-                               "msg_number=%ld, partid=%d, channel=%d\n",
-                               (void *) notify, get, ch->partid, ch->number);
-
-                       notify->func(reason, ch->partid, ch->number,
-                                                               notify->key);
-
-                       dev_dbg(xpc_chan, "notify->func() returned, "
-                               "notify=0x%p, msg_number=%ld, partid=%d, "
-                               "channel=%d\n", (void *) notify, get,
-                               ch->partid, ch->number);
-               }
-       }
-}
-
-
 /*
  * Clear some of the msg flags in the local message queue.
  */
@@ -1183,7 +1278,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
                 */
                xpc_clear_local_msgqueue_flags(ch);
 
-               (volatile s64) ch->w_remote_GP.get = ch->remote_GP.get;
+               ch->w_remote_GP.get = ch->remote_GP.get;
 
                dev_dbg(xpc_chan, "w_remote_GP.get changed to %ld, partid=%d, "
                        "channel=%d\n", ch->w_remote_GP.get, ch->partid,
@@ -1211,7 +1306,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
                 */
                xpc_clear_remote_msgqueue_flags(ch);
 
-               (volatile s64) ch->w_remote_GP.put = ch->remote_GP.put;
+               ch->w_remote_GP.put = ch->remote_GP.put;
 
                dev_dbg(xpc_chan, "w_remote_GP.put changed to %ld, partid=%d, "
                        "channel=%d\n", ch->w_remote_GP.put, ch->partid,
@@ -1223,7 +1318,7 @@ xpc_process_msg_IPI(struct xpc_partition *part, int ch_number)
                                "delivered=%d, partid=%d, channel=%d\n",
                                nmsgs_sent, ch->partid, ch->number);
 
-                       if (ch->flags & XPC_C_CONNECTCALLOUT) {
+                       if (ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) {
                                xpc_activate_kthreads(ch, nmsgs_sent);
                        }
                }
@@ -1240,6 +1335,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
        u64 IPI_amo, IPI_flags;
        struct xpc_channel *ch;
        int ch_number;
+       u32 ch_flags;
 
 
        IPI_amo = xpc_get_IPI_flags(part);
@@ -1266,8 +1362,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
                        xpc_process_openclose_IPI(part, ch_number, IPI_flags);
                }
 
+               ch_flags = ch->flags;   /* need an atomic snapshot of flags */
 
-               if (ch->flags & XPC_C_DISCONNECTING) {
+               if (ch_flags & XPC_C_DISCONNECTING) {
                        spin_lock_irqsave(&ch->lock, irq_flags);
                        xpc_process_disconnect(ch, &irq_flags);
                        spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1278,9 +1375,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
                        continue;
                }
 
-               if (!(ch->flags & XPC_C_CONNECTED)) {
-                       if (!(ch->flags & XPC_C_OPENREQUEST)) {
-                               DBUG_ON(ch->flags & XPC_C_SETUP);
+               if (!(ch_flags & XPC_C_CONNECTED)) {
+                       if (!(ch_flags & XPC_C_OPENREQUEST)) {
+                               DBUG_ON(ch_flags & XPC_C_SETUP);
                                (void) xpc_connect_channel(ch);
                        } else {
                                spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1305,8 +1402,8 @@ xpc_process_channel_activity(struct xpc_partition *part)
 
 
 /*
- * XPC's heartbeat code calls this function to inform XPC that a partition has
- * gone down.  XPC responds by tearing down the XPartition Communication
+ * XPC's heartbeat code calls this function to inform XPC that a partition is
+ * going down.  XPC responds by tearing down the XPartition Communication
  * infrastructure used for the just downed partition.
  *
  * XPC's heartbeat code will never call this function and xpc_partition_up()
@@ -1314,7 +1411,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
  * at the same time.
  */
 void
-xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
+xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
 {
        unsigned long irq_flags;
        int ch_number;
@@ -1330,12 +1427,11 @@ xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
        }
 
 
-       /* disconnect all channels associated with the downed partition */
+       /* disconnect channels associated with the partition going down */
 
        for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
                ch = &part->channels[ch_number];
 
-
                xpc_msgqueue_ref(ch);
                spin_lock_irqsave(&ch->lock, irq_flags);
 
@@ -1370,6 +1466,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part)
         * this partition.
         */
 
+       DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
        DBUG_ON(atomic_read(&part->nchannels_active) != 0);
        DBUG_ON(part->setup_state != XPC_P_SETUP);
        part->setup_state = XPC_P_WTEARDOWN;
@@ -1428,19 +1525,11 @@ xpc_initiate_connect(int ch_number)
                if (xpc_part_ref(part)) {
                        ch = &part->channels[ch_number];
 
-                       if (!(ch->flags & XPC_C_DISCONNECTING)) {
-                               DBUG_ON(ch->flags & XPC_C_OPENREQUEST);
-                               DBUG_ON(ch->flags & XPC_C_CONNECTED);
-                               DBUG_ON(ch->flags & XPC_C_SETUP);
-
-                               /*
-                                * Initiate the establishment of a connection
-                                * on the newly registered channel to the
-                                * remote partition.
-                                */
-                               xpc_wakeup_channel_mgr(part);
-                       }
-
+                       /*
+                        * Initiate the establishment of a connection on the
+                        * newly registered channel to the remote partition.
+                        */
+                       xpc_wakeup_channel_mgr(part);
                        xpc_part_deref(part);
                }
        }
@@ -1450,9 +1539,6 @@ xpc_initiate_connect(int ch_number)
 void
 xpc_connected_callout(struct xpc_channel *ch)
 {
-       unsigned long irq_flags;
-
-
        /* let the registerer know that a connection has been established */
 
        if (ch->func != NULL) {
@@ -1465,10 +1551,6 @@ xpc_connected_callout(struct xpc_channel *ch)
                dev_dbg(xpc_chan, "ch->func() returned, reason=xpcConnected, "
                        "partid=%d, channel=%d\n", ch->partid, ch->number);
        }
-
-       spin_lock_irqsave(&ch->lock, irq_flags);
-       ch->flags |= XPC_C_CONNECTCALLOUT;
-       spin_unlock_irqrestore(&ch->lock, irq_flags);
 }
 
 
@@ -1506,8 +1588,12 @@ xpc_initiate_disconnect(int ch_number)
 
                        spin_lock_irqsave(&ch->lock, irq_flags);
 
-                       XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
+                       if (!(ch->flags & XPC_C_DISCONNECTED)) {
+                               ch->flags |= XPC_C_WDISCONNECT;
+
+                               XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
                                                                &irq_flags);
+                       }
 
                        spin_unlock_irqrestore(&ch->lock, irq_flags);
 
@@ -1523,8 +1609,9 @@ xpc_initiate_disconnect(int ch_number)
 /*
  * To disconnect a channel, and reflect it back to all who may be waiting.
  *
- * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
- * >>> xpc_free_msgqueues().
+ * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
+ * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
+ * xpc_disconnect_wait().
  *
  * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
  */
@@ -1532,7 +1619,7 @@ void
 xpc_disconnect_channel(const int line, struct xpc_channel *ch,
                        enum xpc_retval reason, unsigned long *irq_flags)
 {
-       u32 flags;
+       u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
 
 
        DBUG_ON(!spin_is_locked(&ch->lock));
@@ -1547,61 +1634,51 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
 
        XPC_SET_REASON(ch, reason, line);
 
-       flags = ch->flags;
+       ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
        /* some of these may not have been set */
        ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
                        XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
                        XPC_C_CONNECTING | XPC_C_CONNECTED);
 
-       ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
        xpc_IPI_send_closerequest(ch, irq_flags);
 
-       if (flags & XPC_C_CONNECTED) {
+       if (channel_was_connected) {
                ch->flags |= XPC_C_WASCONNECTED;
        }
 
+       spin_unlock_irqrestore(&ch->lock, *irq_flags);
+
+       /* wake all idle kthreads so they can exit */
        if (atomic_read(&ch->kthreads_idle) > 0) {
-               /* wake all idle kthreads so they can exit */
                wake_up_all(&ch->idle_wq);
        }
 
-       spin_unlock_irqrestore(&ch->lock, *irq_flags);
-
-
        /* wake those waiting to allocate an entry from the local msg queue */
-
        if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
                wake_up(&ch->msg_allocate_wq);
        }
 
-       /* wake those waiting for notify completion */
-
-       if (atomic_read(&ch->n_to_notify) > 0) {
-               xpc_notify_senders(ch, reason, ch->w_local_GP.put);
-       }
-
        spin_lock_irqsave(&ch->lock, *irq_flags);
 }
 
 
 void
-xpc_disconnected_callout(struct xpc_channel *ch)
+xpc_disconnect_callout(struct xpc_channel *ch, enum xpc_retval reason)
 {
        /*
-        * Let the channel's registerer know that the channel is now
+        * Let the channel's registerer know that the channel is being
         * disconnected. We don't want to do this if the registerer was never
-        * informed of a connection being made, unless the disconnect was for
-        * abnormal reasons.
+        * informed of a connection being made.
         */
 
        if (ch->func != NULL) {
                dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, "
-                       "channel=%d\n", ch->reason, ch->partid, ch->number);
+                       "channel=%d\n", reason, ch->partid, ch->number);
 
-               ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key);
+               ch->func(reason, ch->partid, ch->number, NULL, ch->key);
 
                dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, "
-                       "channel=%d\n", ch->reason, ch->partid, ch->number);
+                       "channel=%d\n", reason, ch->partid, ch->number);
        }
 }
 
@@ -1754,7 +1831,7 @@ xpc_initiate_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
 {
        struct xpc_partition *part = &xpc_partitions[partid];
        enum xpc_retval ret = xpcUnknownReason;
-       struct xpc_msg *msg;
+       struct xpc_msg *msg = NULL;
 
 
        DBUG_ON(partid <= 0 || partid >= XP_MAX_PARTITIONS);
@@ -1848,7 +1925,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
                        xpc_notify_func func, void *key)
 {
        enum xpc_retval ret = xpcSuccess;
-       struct xpc_notify *notify = NULL;   // >>> to keep the compiler happy!!
+       struct xpc_notify *notify = notify;
        s64 put, msg_number = msg->number;
 
 
@@ -1875,7 +1952,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
                notify = &ch->notify_queue[msg_number % ch->local_nentries];
                notify->func = func;
                notify->key = key;
-               (volatile u8) notify->type = notify_type;
+               notify->type = notify_type;
 
                // >>> is a mb() needed here?
 
@@ -2024,7 +2101,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
        enum xpc_retval ret;
 
 
-       if (down_interruptible(&ch->msg_to_pull_sema) != 0) {
+       if (mutex_lock_interruptible(&ch->msg_to_pull_mutex) != 0) {
                /* we were interrupted by a signal */
                return NULL;
        }
@@ -2060,7 +2137,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
 
                        XPC_DEACTIVATE_PARTITION(part, ret);
 
-                       up(&ch->msg_to_pull_sema);
+                       mutex_unlock(&ch->msg_to_pull_mutex);
                        return NULL;
                }
 
@@ -2069,7 +2146,7 @@ xpc_pull_remote_msg(struct xpc_channel *ch, s64 get)
                ch->next_msg_to_pull += nmsgs;
        }
 
-       up(&ch->msg_to_pull_sema);
+       mutex_unlock(&ch->msg_to_pull_mutex);
 
        /* return the message we were looking for */
        msg_offset = (get % ch->remote_nentries) * ch->msg_size;
index 1def91f..6b5d4c4 100644 (file)
@@ -1055,6 +1055,8 @@ xpc_do_exit(enum xpc_retval reason)
        if (xpc_sysctl) {
                unregister_sysctl_table(xpc_sysctl);
        }
+
+       kfree(xpc_remote_copy_buffer_base);
 }
 
 
@@ -1215,24 +1217,20 @@ xpc_init(void)
        partid_t partid;
        struct xpc_partition *part;
        pid_t pid;
+       size_t buf_size;
 
 
        if (!ia64_platform_is("sn2")) {
                return -ENODEV;
        }
 
-       /*
-        * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
-        * various portions of a partition's reserved page. Its size is based
-        * on the size of the reserved page header and part_nasids mask. So we
-        * need to ensure that the other items will fit as well.
-        */
-       if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
-               dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
-               return -EPERM;
-       }
-       DBUG_ON((u64) xpc_remote_copy_buffer !=
-                               L1_CACHE_ALIGN((u64) xpc_remote_copy_buffer));
+
+       buf_size = max(XPC_RP_VARS_SIZE,
+                               XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES);
+       xpc_remote_copy_buffer = xpc_kmalloc_cacheline_aligned(buf_size,
+                                    GFP_KERNEL, &xpc_remote_copy_buffer_base);
+       if (xpc_remote_copy_buffer == NULL)
+               return -ENOMEM;
 
        snprintf(xpc_part->bus_id, BUS_ID_SIZE, "part");
        snprintf(xpc_chan->bus_id, BUS_ID_SIZE, "chan");
@@ -1296,6 +1294,8 @@ xpc_init(void)
                if (xpc_sysctl) {
                        unregister_sysctl_table(xpc_sysctl);
                }
+
+               kfree(xpc_remote_copy_buffer_base);
                return -EBUSY;
        }
 
@@ -1314,6 +1314,8 @@ xpc_init(void)
                if (xpc_sysctl) {
                        unregister_sysctl_table(xpc_sysctl);
                }
+
+               kfree(xpc_remote_copy_buffer_base);
                return -EBUSY;
        }
 
@@ -1365,6 +1367,8 @@ xpc_init(void)
                if (xpc_sysctl) {
                        unregister_sysctl_table(xpc_sysctl);
                }
+
+               kfree(xpc_remote_copy_buffer_base);
                return -EBUSY;
        }
 
index 2c3c4a8..57c723f 100644 (file)
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
 #include <linux/cache.h>
 #include <linux/mmzone.h>
 #include <linux/nodemask.h>
+#include <asm/uncached.h>
 #include <asm/sn/bte.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
 #include <asm/sn/nodepda.h>
 #include <asm/sn/addrs.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* XPC is exiting flag */
@@ -43,16 +44,19 @@ static u64 xpc_sh2_IPI_access3;
 
 
 /* original protection values for each node */
-u64 xpc_prot_vec[MAX_COMPACT_NODES];
+u64 xpc_prot_vec[MAX_NUMNODES];
 
 
-/* this partition's reserved page */
+/* this partition's reserved page pointers */
 struct xpc_rsvd_page *xpc_rsvd_page;
-
-/* this partition's XPC variables (within the reserved page) */
+static u64 *xpc_part_nasids;
+static u64 *xpc_mach_nasids;
 struct xpc_vars *xpc_vars;
 struct xpc_vars_part *xpc_vars_part;
 
+static int xp_nasid_mask_bytes;        /* actual size in bytes of nasid mask */
+static int xp_nasid_mask_words;        /* actual size in words of nasid mask */
+
 
 /*
  * For performance reasons, each entry of xpc_partitions[] is cacheline
@@ -64,20 +68,37 @@ struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
 
 
 /*
- * Generic buffer used to store a local copy of the remote partitions
- * reserved page or XPC variables.
- *
- * xpc_discovery runs only once and is a seperate thread that is
- * very likely going to be processing in parallel with receiving
- * interrupts.
+ * Generic buffer used to store a local copy of portions of a remote
+ * partition's reserved page (either its header and part_nasids mask,
+ * or its vars).
  */
-char ____cacheline_aligned
-               xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
+char *xpc_remote_copy_buffer;
+void *xpc_remote_copy_buffer_base;
+
 
+/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+       /* see if kmalloc will give us cachline aligned memory by default */
+       *base = kmalloc(size, flags);
+       if (*base == NULL) {
+               return NULL;
+       }
+       if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+               return *base;
+       }
+       kfree(*base);
 
-/* systune related variables */
-int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
-int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
+       /* nope, we'll have to do it ourselves */
+       *base = kmalloc(size + L1_CACHE_BYTES, flags);
+       if (*base == NULL) {
+               return NULL;
+       }
+       return (void *) L1_CACHE_ALIGN((u64) *base);
+}
 
 
 /*
@@ -85,13 +106,16 @@ int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
  * for that nasid. This function returns 0 on any error.
  */
 static u64
-xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
+xpc_get_rsvd_page_pa(int nasid)
 {
        bte_result_t bte_res;
        s64 status;
        u64 cookie = 0;
        u64 rp_pa = nasid;      /* seed with nasid */
        u64 len = 0;
+       u64 buf = buf;
+       u64 buf_len = 0;
+       void *buf_base = NULL;
 
 
        while (1) {
@@ -107,13 +131,20 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
                        break;
                }
 
-               if (len > buf_size) {
-                       dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
-                       status = SALRET_ERROR;
-                       break;
+               if (L1_CACHE_ALIGN(len) > buf_len) {
+                       kfree(buf_base);
+                       buf_len = L1_CACHE_ALIGN(len);
+                       buf = (u64) xpc_kmalloc_cacheline_aligned(buf_len,
+                                                       GFP_KERNEL, &buf_base);
+                       if (buf_base == NULL) {
+                               dev_err(xpc_part, "unable to kmalloc "
+                                       "len=0x%016lx\n", buf_len);
+                               status = SALRET_ERROR;
+                               break;
+                       }
                }
 
-               bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
+               bte_res = xp_bte_copy(rp_pa, buf, buf_len,
                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
                if (bte_res != BTE_SUCCESS) {
                        dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
@@ -122,6 +153,8 @@ xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
                }
        }
 
+       kfree(buf_base);
+
        if (status != SALRET_OK) {
                rp_pa = 0;
        }
@@ -140,15 +173,15 @@ xpc_rsvd_page_init(void)
 {
        struct xpc_rsvd_page *rp;
        AMO_t *amos_page;
-       u64 rp_pa, next_cl, nasid_array = 0;
+       u64 rp_pa, nasid_array = 0;
        int i, ret;
 
 
        /* get the local reserved page's address */
 
-       rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
-                                       (u64) xpc_remote_copy_buffer,
-                                               XPC_RSVD_PAGE_ALIGNED_SIZE);
+       preempt_disable();
+       rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
+       preempt_enable();
        if (rp_pa == 0) {
                dev_err(xpc_part, "SAL failed to locate the reserved page\n");
                return NULL;
@@ -163,12 +196,19 @@ xpc_rsvd_page_init(void)
 
        rp->version = XPC_RP_VERSION;
 
-       /*
-        * Place the XPC variables on the cache line following the
-        * reserved page structure.
-        */
-       next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
-       xpc_vars = (struct xpc_vars *) next_cl;
+       /* establish the actual sizes of the nasid masks */
+       if (rp->SAL_version == 1) {
+               /* SAL_version 1 didn't set the nasids_size field */
+               rp->nasids_size = 128;
+       }
+       xp_nasid_mask_bytes = rp->nasids_size;
+       xp_nasid_mask_words = xp_nasid_mask_bytes / 8;
+
+       /* setup the pointers to the various items in the reserved page */
+       xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
+       xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
+       xpc_vars = XPC_RP_VARS(rp);
+       xpc_vars_part = XPC_RP_VARS_PART(rp);
 
        /*
         * Before clearing xpc_vars, see if a page of AMOs had been previously
@@ -183,7 +223,7 @@ xpc_rsvd_page_init(void)
         * memory protections are never restricted.
         */
        if ((amos_page = xpc_vars->amos_page) == NULL) {
-               amos_page = (AMO_t *) mspec_kalloc_page(0);
+               amos_page = (AMO_t *) TO_AMO(uncached_alloc_page(0));
                if (amos_page == NULL) {
                        dev_err(xpc_part, "can't allocate page of AMOs\n");
                        return NULL;
@@ -200,7 +240,8 @@ xpc_rsvd_page_init(void)
                        if (ret != 0) {
                                dev_err(xpc_part, "can't change memory "
                                        "protections\n");
-                               mspec_kfree_page((unsigned long) amos_page);
+                               uncached_free_page(__IA64_UNCACHED_OFFSET |
+                                                  TO_PHYS((u64) amos_page));
                                return NULL;
                        }
                }
@@ -219,39 +260,38 @@ xpc_rsvd_page_init(void)
                amos_page = (AMO_t *) TO_AMO((u64) amos_page);
        }
 
+       /* clear xpc_vars */
        memset(xpc_vars, 0, sizeof(struct xpc_vars));
 
-       /*
-        * Place the XPC per partition specific variables on the cache line
-        * following the XPC variables structure.
-        */
-       next_cl += XPC_VARS_ALIGNED_SIZE;
-       memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
-                                                       XP_MAX_PARTITIONS);
-       xpc_vars_part = (struct xpc_vars_part *) next_cl;
-       xpc_vars->vars_part_pa = __pa(next_cl);
-
        xpc_vars->version = XPC_V_VERSION;
        xpc_vars->act_nasid = cpuid_to_nasid(0);
        xpc_vars->act_phys_cpuid = cpu_physical_id(0);
+       xpc_vars->vars_part_pa = __pa(xpc_vars_part);
+       xpc_vars->amos_page_pa = ia64_tpa((u64) amos_page);
        xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
 
 
-       /*
-        * Initialize the activation related AMO variables.
-        */
-       xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
-       for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
-               xpc_IPI_init(i + XP_MAX_PARTITIONS);
+       /* clear xpc_vars_part */
+       memset((u64 *) xpc_vars_part, 0, sizeof(struct xpc_vars_part) *
+                                                       XP_MAX_PARTITIONS);
+
+       /* initialize the activate IRQ related AMO variables */
+       for (i = 0; i < xp_nasid_mask_words; i++) {
+               (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
        }
-       /* export AMO page's physical address to other partitions */
-       xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
+
+       /* initialize the engaged remote partitions related AMO variables */
+       (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
+       (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
+
+       /* timestamp of when reserved page was setup by XPC */
+       rp->stamp = CURRENT_TIME;
 
        /*
         * This signifies to the remote partition that our reserved
         * page is initialized.
         */
-       (volatile u64) rp->vars_pa = __pa(xpc_vars);
+       rp->vars_pa = __pa(xpc_vars);
 
        return rp;
 }
@@ -385,6 +425,11 @@ xpc_check_remote_hb(void)
        remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
 
        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+
+               if (xpc_exiting) {
+                       break;
+               }
+
                if (partid == sn_partition_id) {
                        continue;
                }
@@ -398,8 +443,8 @@ xpc_check_remote_hb(void)
 
                /* pull the remote_hb cache line */
                bres = xp_bte_copy(part->remote_vars_pa,
-                                       ia64_tpa((u64) remote_vars),
-                                       XPC_VARS_ALIGNED_SIZE,
+                                       (u64) remote_vars,
+                                       XPC_RP_VARS_SIZE,
                                        (BTE_NOTIFY | BTE_WACQUIRE), NULL);
                if (bres != BTE_SUCCESS) {
                        XPC_DEACTIVATE_PARTITION(part,
@@ -408,14 +453,14 @@ xpc_check_remote_hb(void)
                }
 
                dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
-                       " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
-                       remote_vars->heartbeat, part->last_heartbeat,
-                       remote_vars->kdb_status,
+                       " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
+                       partid, remote_vars->heartbeat, part->last_heartbeat,
+                       remote_vars->heartbeat_offline,
                        remote_vars->heartbeating_to_mask);
 
                if (((remote_vars->heartbeat == part->last_heartbeat) &&
-                       (remote_vars->kdb_status == 0)) ||
-                            !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+                       (remote_vars->heartbeat_offline == 0)) ||
+                            !xpc_hb_allowed(sn_partition_id, remote_vars)) {
 
                        XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
                        continue;
@@ -427,31 +472,30 @@ xpc_check_remote_hb(void)
 
 
 /*
- * Get a copy of the remote partition's rsvd page.
+ * Get a copy of a portion of the remote partition's rsvd page.
  *
  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
+ * is large enough to contain a copy of their reserved page header and
+ * part_nasids mask.
  */
 static enum xpc_retval
 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
-               struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
+               struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
 {
        int bres, i;
 
 
        /* get the reserved page's physical address */
 
-       *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
-                                               XPC_RSVD_PAGE_ALIGNED_SIZE);
-       if (*remote_rsvd_page_pa == 0) {
+       *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
+       if (*remote_rp_pa == 0) {
                return xpcNoRsvdPageAddr;
        }
 
 
-       /* pull over the reserved page structure */
-
-       bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
-                               XPC_RSVD_PAGE_ALIGNED_SIZE,
+       /* pull over the reserved page header and part_nasids mask */
+       bres = xp_bte_copy(*remote_rp_pa, (u64) remote_rp,
+                               XPC_RP_HEADER_SIZE + xp_nasid_mask_bytes,
                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
        if (bres != BTE_SUCCESS) {
                return xpc_map_bte_errors(bres);
@@ -459,8 +503,11 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 
        if (discovered_nasids != NULL) {
-               for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
-                       discovered_nasids[i] |= remote_rp->part_nasids[i];
+               u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
+
+
+               for (i = 0; i < xp_nasid_mask_words; i++) {
+                       discovered_nasids[i] |= remote_part_nasids[i];
                }
        }
 
@@ -487,10 +534,10 @@ xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
 
 
 /*
- * Get a copy of the remote partition's XPC variables.
+ * Get a copy of the remote partition's XPC variables from the reserved page.
  *
  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
- * assumed to be of size XPC_VARS_ALIGNED_SIZE.
+ * assumed to be of size XPC_RP_VARS_SIZE.
  */
 static enum xpc_retval
 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
@@ -502,11 +549,8 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
                return xpcVarsNotSet;
        }
 
-
        /* pull over the cross partition variables */
-
-       bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
-                               XPC_VARS_ALIGNED_SIZE,
+       bres = xp_bte_copy(remote_vars_pa, (u64) remote_vars, XPC_RP_VARS_SIZE,
                                (BTE_NOTIFY | BTE_WACQUIRE), NULL);
        if (bres != BTE_SUCCESS) {
                return xpc_map_bte_errors(bres);
@@ -522,7 +566,56 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
 
 
 /*
- * Prior code has determine the nasid which generated an IPI.  Inspect
+ * Update the remote partition's info.
+ */
+static void
+xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
+               struct timespec *remote_rp_stamp, u64 remote_rp_pa,
+               u64 remote_vars_pa, struct xpc_vars *remote_vars)
+{
+       part->remote_rp_version = remote_rp_version;
+       dev_dbg(xpc_part, "  remote_rp_version = 0x%016lx\n",
+               part->remote_rp_version);
+
+       part->remote_rp_stamp = *remote_rp_stamp;
+       dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
+               part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
+
+       part->remote_rp_pa = remote_rp_pa;
+       dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
+
+       part->remote_vars_pa = remote_vars_pa;
+       dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
+               part->remote_vars_pa);
+
+       part->last_heartbeat = remote_vars->heartbeat;
+       dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
+               part->last_heartbeat);
+
+       part->remote_vars_part_pa = remote_vars->vars_part_pa;
+       dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
+               part->remote_vars_part_pa);
+
+       part->remote_act_nasid = remote_vars->act_nasid;
+       dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
+               part->remote_act_nasid);
+
+       part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
+       dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
+               part->remote_act_phys_cpuid);
+
+       part->remote_amos_page_pa = remote_vars->amos_page_pa;
+       dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
+               part->remote_amos_page_pa);
+
+       part->remote_vars_version = remote_vars->version;
+       dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
+               part->remote_vars_version);
+}
+
+
+/*
+ * Prior code has determined the nasid which generated an IPI.  Inspect
  * that nasid to determine if its partition needs to be activated or
  * deactivated.
  *
@@ -540,8 +633,12 @@ xpc_identify_act_IRQ_req(int nasid)
 {
        struct xpc_rsvd_page *remote_rp;
        struct xpc_vars *remote_vars;
-       u64 remote_rsvd_page_pa;
+       u64 remote_rp_pa;
        u64 remote_vars_pa;
+       int remote_rp_version;
+       int reactivate = 0;
+       int stamp_diff;
+       struct timespec remote_rp_stamp = { 0, 0 };
        partid_t partid;
        struct xpc_partition *part;
        enum xpc_retval ret;
@@ -551,7 +648,7 @@ xpc_identify_act_IRQ_req(int nasid)
 
        remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
 
-       ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
+       ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
        if (ret != xpcSuccess) {
                dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
                        "which sent interrupt, reason=%d\n", nasid, ret);
@@ -559,6 +656,10 @@ xpc_identify_act_IRQ_req(int nasid)
        }
 
        remote_vars_pa = remote_rp->vars_pa;
+       remote_rp_version = remote_rp->version;
+       if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+               remote_rp_stamp = remote_rp->stamp;
+       }
        partid = remote_rp->partid;
        part = &xpc_partitions[partid];
 
@@ -584,44 +685,118 @@ xpc_identify_act_IRQ_req(int nasid)
                "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
                remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
 
+       if (xpc_partition_disengaged(part) &&
+                                       part->act_state == XPC_P_INACTIVE) {
 
-       if (part->act_state == XPC_P_INACTIVE) {
+               xpc_update_partition_info(part, remote_rp_version,
+                                       &remote_rp_stamp, remote_rp_pa,
+                                       remote_vars_pa, remote_vars);
+
+               if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+                       if (xpc_partition_disengage_requested(1UL << partid)) {
+                               /*
+                                * Other side is waiting on us to disengage,
+                                * even though we already have.
+                                */
+                               return;
+                       }
+               } else {
+                       /* other side doesn't support disengage requests */
+                       xpc_clear_partition_disengage_request(1UL << partid);
+               }
 
-               part->remote_rp_pa = remote_rsvd_page_pa;
-               dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n",
-                       part->remote_rp_pa);
+               xpc_activate_partition(part);
+               return;
+       }
 
-               part->remote_vars_pa = remote_vars_pa;
-               dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
-                       part->remote_vars_pa);
+       DBUG_ON(part->remote_rp_version == 0);
+       DBUG_ON(part->remote_vars_version == 0);
+
+       if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
+               DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+                                                       remote_vars_version));
+
+               if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+                       DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+                                                               version));
+                       /* see if the other side rebooted */
+                       if (part->remote_amos_page_pa ==
+                               remote_vars->amos_page_pa &&
+                                       xpc_hb_allowed(sn_partition_id,
+                                                               remote_vars)) {
+                               /* doesn't look that way, so ignore the IPI */
+                               return;
+                       }
+               }
 
-               part->last_heartbeat = remote_vars->heartbeat;
-               dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
-                       part->last_heartbeat);
+               /*
+                * Other side rebooted and previous XPC didn't support the
+                * disengage request, so we don't need to do anything special.
+                */
 
-               part->remote_vars_part_pa = remote_vars->vars_part_pa;
-               dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
-                       part->remote_vars_part_pa);
+               xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+               part->reactivate_nasid = nasid;
+               XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+               return;
+       }
 
-               part->remote_act_nasid = remote_vars->act_nasid;
-               dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
-                       part->remote_act_nasid);
+       DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
 
-               part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
-               dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
-                       part->remote_act_phys_cpuid);
+       if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
+               DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
 
-               part->remote_amos_page_pa = remote_vars->amos_page_pa;
-               dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
-                       part->remote_amos_page_pa);
+               /*
+                * Other side rebooted and previous XPC did support the
+                * disengage request, but the new one doesn't.
+                */
 
-               xpc_activate_partition(part);
+               xpc_clear_partition_engaged(1UL << partid);
+               xpc_clear_partition_disengage_request(1UL << partid);
+
+               xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+               reactivate = 1;
 
-       } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
-                       !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
+       } else {
+               DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
+
+               stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
+                                                       &remote_rp_stamp);
+               if (stamp_diff != 0) {
+                       DBUG_ON(stamp_diff >= 0);
+
+                       /*
+                        * Other side rebooted and the previous XPC did support
+                        * the disengage request, as does the new one.
+                        */
+
+                       DBUG_ON(xpc_partition_engaged(1UL << partid));
+                       DBUG_ON(xpc_partition_disengage_requested(1UL <<
+                                                               partid));
+
+                       xpc_update_partition_info(part, remote_rp_version,
+                                               &remote_rp_stamp, remote_rp_pa,
+                                               remote_vars_pa, remote_vars);
+                       reactivate = 1;
+               }
+       }
+
+       if (part->disengage_request_timeout > 0 &&
+                                       !xpc_partition_disengaged(part)) {
+               /* still waiting on other side to disengage from us */
+               return;
+       }
 
+       if (reactivate) {
                part->reactivate_nasid = nasid;
                XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
+
+       } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
+                       xpc_partition_disengage_requested(1UL << partid)) {
+               XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
        }
 }
 
@@ -641,14 +816,17 @@ xpc_identify_act_IRQ_sender(void)
        u64 nasid;                      /* remote nasid */
        int n_IRQs_detected = 0;
        AMO_t *act_amos;
-       struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
 
 
-       act_amos = xpc_vars->act_amos;
+       act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
 
 
        /* scan through act AMO variable looking for non-zero entries */
-       for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
+       for (word = 0; word < xp_nasid_mask_words; word++) {
+
+               if (xpc_exiting) {
+                       break;
+               }
 
                nasid_mask = xpc_IPI_receive(&act_amos[word]);
                if (nasid_mask == 0) {
@@ -666,7 +844,7 @@ xpc_identify_act_IRQ_sender(void)
                 * remote nasid in our reserved pages machine mask.
                 * This is used in the event of module reload.
                 */
-               rp->mach_nasids[word] |= nasid_mask;
+               xpc_mach_nasids[word] |= nasid_mask;
 
 
                /* locate the nasid(s) which sent interrupts */
@@ -685,6 +863,58 @@ xpc_identify_act_IRQ_sender(void)
 }
 
 
+/*
+ * See if the other side has responded to a partition disengage request
+ * from us.
+ */
+int
+xpc_partition_disengaged(struct xpc_partition *part)
+{
+       partid_t partid = XPC_PARTID(part);
+       int disengaged;
+
+
+       disengaged = (xpc_partition_engaged(1UL << partid) == 0);
+       if (part->disengage_request_timeout) {
+               if (!disengaged) {
+                       if (jiffies < part->disengage_request_timeout) {
+                               /* timelimit hasn't been reached yet */
+                               return 0;
+                       }
+
+                       /*
+                        * Other side hasn't responded to our disengage
+                        * request in a timely fashion, so assume it's dead.
+                        */
+
+                       dev_info(xpc_part, "disengage from remote partition %d "
+                               "timed out\n", partid);
+                       xpc_disengage_request_timedout = 1;
+                       xpc_clear_partition_engaged(1UL << partid);
+                       disengaged = 1;
+               }
+               part->disengage_request_timeout = 0;
+
+               /* cancel the timer function, provided it's not us */
+               if (!in_interrupt()) {
+                       del_singleshot_timer_sync(&part->
+                                                     disengage_request_timer);
+               }
+
+               DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
+                                       part->act_state != XPC_P_INACTIVE);
+               if (part->act_state != XPC_P_INACTIVE) {
+                       xpc_wakeup_channel_mgr(part);
+               }
+
+               if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+                       xpc_cancel_partition_disengage_request(part);
+               }
+       }
+       return disengaged;
+}
+
+
 /*
  * Mark specified partition as active.
  */
@@ -719,7 +949,6 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
                                enum xpc_retval reason)
 {
        unsigned long irq_flags;
-       partid_t partid = XPC_PARTID(part);
 
 
        spin_lock_irqsave(&part->act_lock, irq_flags);
@@ -747,17 +976,27 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
 
        spin_unlock_irqrestore(&part->act_lock, irq_flags);
 
-       XPC_DISALLOW_HB(partid, xpc_vars);
+       if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
+               xpc_request_partition_disengage(part);
+               xpc_IPI_send_disengage(part);
 
-       dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
-               reason);
+               /* set a timelimit on the disengage request */
+               part->disengage_request_timeout = jiffies +
+                                       (xpc_disengage_request_timelimit * HZ);
+               part->disengage_request_timer.expires =
+                                       part->disengage_request_timeout;
+               add_timer(&part->disengage_request_timer);
+       }
+
+       dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
+               XPC_PARTID(part), reason);
 
-       xpc_partition_down(part, reason);
+       xpc_partition_going_down(part, reason);
 }
 
 
 /*
- * Mark specified partition as active.
+ * Mark specified partition as inactive.
  */
 void
 xpc_mark_partition_inactive(struct xpc_partition *part)
@@ -790,9 +1029,10 @@ xpc_discovery(void)
        void *remote_rp_base;
        struct xpc_rsvd_page *remote_rp;
        struct xpc_vars *remote_vars;
-       u64 remote_rsvd_page_pa;
+       u64 remote_rp_pa;
        u64 remote_vars_pa;
        int region;
+       int region_size;
        int max_regions;
        int nasid;
        struct xpc_rsvd_page *rp;
@@ -802,7 +1042,8 @@ xpc_discovery(void)
        enum xpc_retval ret;
 
 
-       remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
+       remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
+                                               xp_nasid_mask_bytes,
                                                GFP_KERNEL, &remote_rp_base);
        if (remote_rp == NULL) {
                return;
@@ -810,13 +1051,12 @@ xpc_discovery(void)
        remote_vars = (struct xpc_vars *) remote_rp;
 
 
-       discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
+       discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
                                                        GFP_KERNEL);
        if (discovered_nasids == NULL) {
                kfree(remote_rp_base);
                return;
        }
-       memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
 
        rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
 
@@ -825,11 +1065,19 @@ xpc_discovery(void)
         * nodes that can comprise an access protection grouping. The access
         * protection is in regards to memory, IOI and IPI.
         */
-//>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
-//>>> include/asm-ia64/sn/addrs.h
-#define SH1_MAX_REGIONS                64
-#define SH2_MAX_REGIONS                256
-       max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
+       max_regions = 64;
+       region_size = sn_region_size;
+
+       switch (region_size) {
+       case 128:
+               max_regions *= 2;
+       case 64:
+               max_regions *= 2;
+       case 32:
+               max_regions *= 2;
+               region_size = 16;
+               DBUG_ON(!is_shub2());
+       }
 
        for (region = 0; region < max_regions; region++) {
 
@@ -839,8 +1087,8 @@ xpc_discovery(void)
 
                dev_dbg(xpc_part, "searching region %d\n", region);
 
-               for (nasid = (region * sn_region_size * 2);
-                    nasid < ((region + 1) * sn_region_size * 2);
+               for (nasid = (region * region_size * 2);
+                    nasid < ((region + 1) * region_size * 2);
                     nasid += 2) {
 
                        if ((volatile int) xpc_exiting) {
@@ -850,14 +1098,14 @@ xpc_discovery(void)
                        dev_dbg(xpc_part, "checking nasid %d\n", nasid);
 
 
-                       if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
+                       if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
                                dev_dbg(xpc_part, "PROM indicates Nasid %d is "
                                        "part of the local partition; skipping "
                                        "region\n", nasid);
                                break;
                        }
 
-                       if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
+                       if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
                                dev_dbg(xpc_part, "PROM indicates Nasid %d was "
                                        "not on Numa-Link network at reset\n",
                                        nasid);
@@ -875,7 +1123,7 @@ xpc_discovery(void)
                        /* pull over the reserved page structure */
 
                        ret = xpc_get_remote_rp(nasid, discovered_nasids,
-                                             remote_rp, &remote_rsvd_page_pa);
+                                             remote_rp, &remote_rp_pa);
                        if (ret != xpcSuccess) {
                                dev_dbg(xpc_part, "unable to get reserved page "
                                        "from nasid %d, reason=%d\n", nasid,
@@ -946,6 +1194,13 @@ xpc_discovery(void)
                                remote_vars->act_nasid,
                                remote_vars->act_phys_cpuid);
 
+                       if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
+                                                               version)) {
+                               part->remote_amos_page_pa =
+                                               remote_vars->amos_page_pa;
+                               xpc_mark_partition_disengaged(part);
+                               xpc_cancel_partition_disengage_request(part);
+                       }
                        xpc_IPI_send_activate(remote_vars);
                }
        }
@@ -972,12 +1227,12 @@ xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
                return xpcPartitionDown;
        }
 
-       part_nasid_pa = part->remote_rp_pa +
-               (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
+       memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
 
-       bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
-                               L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
-                               (BTE_NOTIFY | BTE_WACQUIRE), NULL);
+       part_nasid_pa = (u64) XPC_RP_PART_NASIDS(part->remote_rp_pa);
+
+       bte_res = xp_bte_copy(part_nasid_pa, (u64) nasid_mask,
+                       xp_nasid_mask_bytes, (BTE_NOTIFY | BTE_WACQUIRE), NULL);
 
        return xpc_map_bte_errors(bte_res);
 }
index 7470259..f35dd7b 100644 (file)
@@ -620,6 +620,9 @@ config HOTPLUG_CPU
 
          Say N if you are unsure.
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+       def_bool y
+
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
        depends on PPC_MULTIPLATFORM && EXPERIMENTAL
index b61d86e..55f367e 100644 (file)
@@ -94,6 +94,8 @@ _GLOBAL(__setup_cpu_ppc970)
        mfspr   r0,SPRN_HID0
        li      r11,5                   /* clear DOZE and SLEEP */
        rldimi  r0,r11,52,8             /* set NAP and DPM */
+       li      r11,0
+       rldimi  r0,r11,32,31            /* clear EN_ATTN */
        mtspr   SPRN_HID0,r0
        mfspr   r0,SPRN_HID0
        mfspr   r0,SPRN_HID0
index 778f22f..dbcb859 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/elf.h>
 #include <linux/elfcore.h>
 #include <linux/init.h>
+#include <linux/irq.h>
 #include <linux/types.h>
 
 #include <asm/processor.h>
@@ -174,6 +175,8 @@ static void crash_kexec_prepare_cpus(void)
 
 void default_machine_crash_shutdown(struct pt_regs *regs)
 {
+       unsigned int irq;
+
        /*
         * This function is only called after the system
         * has paniced or is otherwise in a critical state.
@@ -186,6 +189,16 @@ void default_machine_crash_shutdown(struct pt_regs *regs)
         */
        local_irq_disable();
 
+       for_each_irq(irq) {
+               struct irq_desc *desc = irq_descp(irq);
+
+               if (desc->status & IRQ_INPROGRESS)
+                       desc->handler->end(irq);
+
+               if (!(desc->status & IRQ_DISABLED))
+                       desc->handler->disable(irq);
+       }
+
        if (ppc_md.kexec_cpu_down)
                ppc_md.kexec_cpu_down(1, 0);
 
index 2d60ea3..9ff7105 100644 (file)
@@ -641,23 +641,28 @@ void xics_teardown_cpu(int secondary)
        ops->cppr_info(cpu, 0x00);
        iosync();
 
+       /*
+        * Clear IPI
+        */
+       ops->qirr_info(cpu, 0xff);
+       /*
+        * we need to EOI the IPI if we got here from kexec down IPI
+        *
+        * probably need to check all the other interrupts too
+        * should we be flagging idle loop instead?
+        * or creating some task to be scheduled?
+        */
+       ops->xirr_info_set(cpu, XICS_IPI);
+
        /*
         * Some machines need to have at least one cpu in the GIQ,
         * so leave the master cpu in the group.
         */
-       if (secondary) {
-               /*
-                * we need to EOI the IPI if we got here from kexec down IPI
-                *
-                * probably need to check all the other interrupts too
-                * should we be flagging idle loop instead?
-                * or creating some task to be scheduled?
-                */
-               ops->xirr_info_set(cpu, XICS_IPI);
+       if (secondary) 
                rtas_set_indicator(GLOBAL_INTERRUPT_QUEUE,
                        (1UL << interrupt_server_size) - 1 -
                        default_distrib_server, 0);
-       }
+       
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
index 8fa541c..5a220d6 100644 (file)
@@ -220,6 +220,21 @@ out:
        return err;
 }
 
+int sparc_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+{
+       if (ARCH_SUN4C_SUN4 &&
+           (len > 0x20000000 ||
+            ((flags & MAP_FIXED) &&
+             addr < 0xe0000000 && addr + len > 0x20000000)))
+               return -EINVAL;
+
+       /* See asm-sparc/uaccess.h */
+       if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE)
+               return -EINVAL;
+
+       return 0;
+}
+
 /* Linux version of mmap */
 static unsigned long do_mmap2(unsigned long addr, unsigned long len,
        unsigned long prot, unsigned long flags, unsigned long fd,
@@ -234,25 +249,13 @@ static unsigned long do_mmap2(unsigned long addr, unsigned long len,
                        goto out;
        }
 
-       retval = -EINVAL;
        len = PAGE_ALIGN(len);
-       if (ARCH_SUN4C_SUN4 &&
-           (len > 0x20000000 ||
-            ((flags & MAP_FIXED) &&
-             addr < 0xe0000000 && addr + len > 0x20000000)))
-               goto out_putf;
-
-       /* See asm-sparc/uaccess.h */
-       if (len > TASK_SIZE - PAGE_SIZE || addr + len > TASK_SIZE - PAGE_SIZE)
-               goto out_putf;
-
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
        down_write(&current->mm->mmap_sem);
        retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
        up_write(&current->mm->mmap_sem);
 
-out_putf:
        if (file)
                fput(file);
 out:
index 62d7497..ef41925 100644 (file)
@@ -550,6 +550,26 @@ asmlinkage long sparc64_personality(unsigned long personality)
        return ret;
 }
 
+int sparc64_mmap_check(unsigned long addr, unsigned long len,
+               unsigned long flags)
+{
+       if (test_thread_flag(TIF_32BIT)) {
+               if (len >= STACK_TOP32)
+                       return -EINVAL;
+
+               if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
+                       return -EINVAL;
+       } else {
+               if (len >= VA_EXCLUDE_START)
+                       return -EINVAL;
+
+               if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
+                       return -EINVAL;
+       }
+
+       return 0;
+}
+
 /* Linux version of mmap */
 asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
        unsigned long prot, unsigned long flags, unsigned long fd,
@@ -565,27 +585,11 @@ asmlinkage unsigned long sys_mmap(unsigned long addr, unsigned long len,
        }
        flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
        len = PAGE_ALIGN(len);
-       retval = -EINVAL;
-
-       if (test_thread_flag(TIF_32BIT)) {
-               if (len >= STACK_TOP32)
-                       goto out_putf;
-
-               if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
-                       goto out_putf;
-       } else {
-               if (len >= VA_EXCLUDE_START)
-                       goto out_putf;
-
-               if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
-                       goto out_putf;
-       }
 
        down_write(&current->mm->mmap_sem);
        retval = do_mmap(file, addr, len, prot, flags, off);
        up_write(&current->mm->mmap_sem);
 
-out_putf:
        if (file)
                fput(file);
 out:
index 8cb0620..af9d81d 100644 (file)
@@ -69,6 +69,8 @@ static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
                } else
                        offset += PAGE_SIZE;
 
+               if (pte_write(entry))
+                       entry = pte_mkdirty(entry);
                do {
                        BUG_ON(!pte_none(*pte));
                        set_pte_at(mm, address, pte, entry);
index fc0f0b0..bd4627b 100644 (file)
@@ -226,7 +226,7 @@ EXPORT_SYMBOL(physmem_forget_descriptor);
 EXPORT_SYMBOL(physmem_remove_mapping);
 EXPORT_SYMBOL(physmem_subst_mapping);
 
-void arch_free_page(struct page *page, int order)
+int arch_free_page(struct page *page, int order)
 {
        void *virt;
        int i;
@@ -235,6 +235,8 @@ void arch_free_page(struct page *page, int order)
                virt = __va(page_to_phys(page + i));
                physmem_remove_mapping(virt);
        }
+
+       return 0;
 }
 
 int is_remapped(void *virt)
index b2cd1f2..0f4ed71 100644 (file)
@@ -123,6 +123,22 @@ config GENERIC_CPU
 
 endchoice
 
+config X86_64_XEN
+       bool "Enable Xen compatible kernel"
+       select SWIOTLB
+       help
+         This option will compile a kernel compatible with Xen hypervisor
+
+config X86_NO_TSS
+       bool
+       depends on X86_64_XEN
+       default y
+
+config X86_NO_IDT
+       bool
+       depends on X86_64_XEN
+       default y
+
 #
 # Define implied options from the CPU selection here
 #
@@ -143,6 +159,7 @@ config X86_INTERNODE_CACHE_BYTES
 
 config X86_TSC
        bool
+       depends on !X86_64_XEN
        default y
 
 config X86_GOOD_APIC
@@ -185,7 +202,7 @@ config X86_CPUID
 
 config X86_HT
        bool
-       depends on SMP && !MK8
+       depends on SMP && !MK8 && !X86_64_XEN
        default y
 
 config MATH_EMULATION
@@ -199,14 +216,22 @@ config EISA
 
 config X86_IO_APIC
        bool
+       depends !XEN_UNPRIVILEGED_GUEST
        default y
 
+config X86_XEN_GENAPIC
+       bool
+       depends X86_64_XEN
+       default XEN_PRIVILEGED_GUEST || SMP
+
 config X86_LOCAL_APIC
        bool
+       depends !XEN_UNPRIVILEGED_GUEST
        default y
 
 config MTRR
        bool "MTRR (Memory Type Range Register) support"
+       depends on !XEN_UNPRIVILEGED_GUEST
        ---help---
          On Intel P6 family processors (Pentium Pro, Pentium II and later)
          the Memory Type Range Registers (MTRRs) may be used to control
@@ -247,7 +272,7 @@ config SMP
 
 config SCHED_SMT
        bool "SMT (Hyperthreading) scheduler support"
-       depends on SMP
+       depends on SMP && !X86_64_XEN
        default n
        help
          SMT scheduler support improves the CPU scheduler's decision making
@@ -257,7 +282,7 @@ config SCHED_SMT
 
 config SCHED_MC
        bool "Multi-core scheduler support"
-       depends on SMP
+       depends on SMP && !X86_64_XEN
        default y
        help
          Multi-core scheduler support improves the CPU scheduler's decision
@@ -268,7 +293,7 @@ source "kernel/Kconfig.preempt"
 
 config NUMA
        bool "Non Uniform Memory Access (NUMA) Support"
-       depends on SMP
+       depends on SMP && !X86_64_XEN
        help
         Enable NUMA (Non Uniform Memory Access) support. The kernel 
         will try to allocate memory used by a CPU on the local memory 
@@ -328,7 +353,7 @@ config ARCH_DISCONTIGMEM_DEFAULT
 
 config ARCH_SPARSEMEM_ENABLE
        def_bool y
-       depends on (NUMA || EXPERIMENTAL)
+       depends on (NUMA || EXPERIMENTAL) && !X86_64_XEN
 
 config ARCH_MEMORY_PROBE
        def_bool y
@@ -352,6 +377,7 @@ config NR_CPUS
        int "Maximum number of CPUs (2-256)"
        range 2 255
        depends on SMP
+       default "16" if X86_64_XEN
        default "8"
        help
          This allows you to specify the maximum number of CPUs which this
@@ -369,9 +395,12 @@ config HOTPLUG_CPU
                can be controlled through /sys/devices/system/cpu/cpu#.
                Say N if you want to disable CPU hotplug.
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+       def_bool y
 
 config HPET_TIMER
        bool
+       depends on !X86_64_XEN
        default y
        help
          Use the IA-PC HPET (High Precision Event Timer) to manage
@@ -389,7 +418,7 @@ config GART_IOMMU
        bool "K8 GART IOMMU support"
        default y
        select SWIOTLB
-       depends on PCI
+       depends on PCI && !X86_64_XEN
        help
          Support for hardware IOMMU in AMD's Opteron/Athlon64 Processors
          and for the bounce buffering software IOMMU.
@@ -409,6 +438,7 @@ config SWIOTLB
 
 config X86_MCE
        bool "Machine check support" if EMBEDDED
+       depends on !X86_64_XEN
        default y
        help
           Include a machine check error handler to report hardware errors.
@@ -434,7 +464,7 @@ config X86_MCE_AMD
 
 config KEXEC
        bool "kexec system call (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       depends on EXPERIMENTAL && !X86_64_XEN
        help
          kexec is a system call that implements the ability to shutdown your
          current kernel, and to start another kernel.  It is like a reboot
@@ -525,8 +555,11 @@ config GENERIC_PENDING_IRQ
        default y
 
 menu "Power management options"
+       depends on !XEN_UNPRIVILEGED_GUEST
 
+if !X86_64_XEN
 source kernel/power/Kconfig
+endif
 
 source "drivers/acpi/Kconfig"
 
@@ -549,6 +582,21 @@ config PCI_MMCONFIG
        bool "Support mmconfig PCI config space access"
        depends on PCI && ACPI
 
+config XEN_PCIDEV_FRONTEND
+       bool "Xen PCI Frontend"
+       depends on PCI && X86_64_XEN
+       default y
+       help
+         The PCI device frontend driver allows the kernel to import arbitrary
+         PCI devices from a PCI backend to support PCI driver domains.
+
+config XEN_PCIDEV_FE_DEBUG
+       bool "Xen PCI Frontend Debugging"
+       depends on XEN_PCIDEV_FRONTEND
+       default n
+       help
+         Enables some debug statements within the PCI Frontend.
+
 source "drivers/pci/pcie/Kconfig"
 
 source "drivers/pci/Kconfig"
@@ -621,4 +669,6 @@ source "security/Kconfig"
 
 source "crypto/Kconfig"
 
+source "drivers/xen/Kconfig"
+
 source "lib/Kconfig"
index e573e2a..4b81d9f 100644 (file)
@@ -31,6 +31,10 @@ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
 cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
 cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
 
+cppflags-$(CONFIG_XEN) += \
+       -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION)
+CPPFLAGS += $(cppflags-y)
+
 cflags-y += -m64
 cflags-y += -mno-red-zone
 cflags-y += -mcmodel=kernel
@@ -72,6 +76,21 @@ boot := arch/x86_64/boot
 PHONY += bzImage bzlilo install archmrproper \
         fdimage fdimage144 fdimage288 isoimage archclean
 
+ifdef CONFIG_XEN
+CPPFLAGS := -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS)
+head-y := arch/x86_64/kernel/head-xen.o arch/x86_64/kernel/head64-xen.o arch/x86_64/kernel/init_task.o
+LDFLAGS_vmlinux := -e _start
+boot := arch/i386/boot-xen
+.PHONY: vmlinuz
+#Default target when executing "make"
+all: vmlinuz
+
+vmlinuz: vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $@
+
+install:
+       $(Q)$(MAKE) $(build)=$(boot) XENGUEST=$(XENGUEST) $@
+else
 #Default target when executing "make"
 all: bzImage
 
@@ -92,6 +111,7 @@ fdimage fdimage144 fdimage288 isoimage: vmlinux
 
 install:
        $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ 
+endif
 
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
index e9263b4..a84151e 100644 (file)
@@ -23,9 +23,25 @@ quiet_cmd_syscall = SYSCALL $@
                           -Wl,-soname=linux-gate.so.1 -o $@ \
                           -Wl,-T,$(filter-out FORCE,$^)
 
+$(obj)/vsyscall-int80.so \
 $(obj)/vsyscall-sysenter.so $(obj)/vsyscall-syscall.so: \
 $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE
        $(call if_changed,syscall)
 
-AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32
-AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32
+AFLAGS_vsyscall-sysenter.o = -m32 -Wa,-32 -Iarch/i386/kernel
+AFLAGS_vsyscall-syscall.o = -m32 -Wa,-32 -Iarch/i386/kernel
+
+ifdef CONFIG_XEN
+AFLAGS_vsyscall-int80.o = -m32 -Wa,-32 -Iarch/i386/kernel
+CFLAGS_syscall32-xen.o += -DUSE_INT80
+AFLAGS_syscall32_syscall-xen.o += -DUSE_INT80
+
+$(obj)/syscall32_syscall-xen.o: \
+       $(foreach F,int80 sysenter syscall,$(obj)/vsyscall-$F.so)
+
+targets := $(foreach F,int80 sysenter syscall,vsyscall-$F.o vsyscall-$F.so)
+
+include $(srctree)/scripts/Makefile.xen
+
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index 1384367..6086ac6 100644 (file)
@@ -139,5 +139,5 @@ __kernel_rt_sigreturn:
        .align 4
 .LENDFDE3:
 
-#include "../../i386/kernel/vsyscall-note.S"
+#include <vsyscall-note.S>
 
index 059c883..b0913f3 100644 (file)
@@ -20,11 +20,13 @@ obj-$(CONFIG_MICROCODE)             += microcode.o
 obj-$(CONFIG_X86_CPUID)                += cpuid.o
 obj-$(CONFIG_SMP)              += smp.o smpboot.o trampoline.o
 obj-$(CONFIG_X86_LOCAL_APIC)   += apic.o  nmi.o
+obj-$(CONFIG_X86_XEN_GENAPIC)  += genapic.o genapic_xen.o
 obj-$(CONFIG_X86_IO_APIC)      += io_apic.o mpparse.o \
                genapic.o genapic_cluster.o genapic_flat.o
 obj-$(CONFIG_KEXEC)            += machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
-obj-$(CONFIG_PM)               += suspend.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
+obj-$(CONFIG_ACPI_SLEEP)       += suspend.o
 obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
 obj-$(CONFIG_CPU_FREQ)         += cpufreq/
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
@@ -49,3 +51,18 @@ intel_cacheinfo-y            += ../../i386/kernel/cpu/intel_cacheinfo.o
 quirks-y                       += ../../i386/kernel/quirks.o
 i8237-y                                += ../../i386/kernel/i8237.o
 msr-$(subst m,y,$(CONFIG_X86_MSR))  += ../../i386/kernel/msr.o
+
+ifdef CONFIG_XEN
+time-y                         += ../../i386/kernel/time-xen.o
+pci-dma-y                      += ../../i386/kernel/pci-dma-xen.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE))  := ../../i386/kernel/microcode-xen.o
+quirks-y                       := ../../i386/kernel/quirks-xen.o
+
+n-obj-xen := i8259.o reboot.o i8237.o smpboot.o trampoline.o
+
+include $(srctree)/scripts/Makefile.xen
+
+obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
+obj-y := $(call cherrypickxen, $(obj-y))
+extra-y := $(call cherrypickxen, $(extra-y))
+endif
index 4fe9707..aa84f6e 100644 (file)
@@ -6,3 +6,4 @@ ifneq ($(CONFIG_ACPI_PROCESSOR),)
 obj-y                  += processor.o
 endif
 
+boot-$(CONFIG_XEN)             := ../../../i386/kernel/acpi/boot-xen.o
index 38834bb..06cf058 100644 (file)
@@ -66,7 +66,9 @@ int main(void)
        DEFINE(pbe_address, offsetof(struct pbe, address));
        DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address));
        DEFINE(pbe_next, offsetof(struct pbe, next));
+#ifndef CONFIG_X86_NO_TSS
        BLANK();
        DEFINE(TSS_ist, offsetof(struct tss_struct, ist));
+#endif
        return 0;
 }
index 586b34c..4c599ee 100644 (file)
@@ -596,7 +596,7 @@ retint_kernel:
  */            
        .macro apicinterrupt num,func
        INTR_FRAME
-       pushq $\num-256
+       pushq $~(\num)
        CFI_ADJUST_CFA_OFFSET 8
        interrupt \func
        jmp ret_from_intr
index ce31d90..9c141e1 100644 (file)
@@ -37,6 +37,8 @@ union thread_union init_thread_union
 struct task_struct init_task = INIT_TASK(init_task);
 
 EXPORT_SYMBOL(init_task);
+
+#ifndef CONFIG_X86_NO_TSS
 /*
  * per-CPU TSS segments. Threads are completely 'soft' on Linux,
  * no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -45,5 +47,6 @@ EXPORT_SYMBOL(init_task);
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */ 
 DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS;
+#endif
 
 #define ALIGN_TO_4K __attribute__((section(".data.init_task")))
index d8bd0b3..c874885 100644 (file)
@@ -91,8 +91,8 @@ skip:
  */
 asmlinkage unsigned int do_IRQ(struct pt_regs *regs)
 {      
-       /* high bits used in ret_from_ code  */
-       unsigned irq = regs->orig_rax & 0xff;
+       /* high bit used in ret_from_ code  */
+       unsigned irq = ~regs->orig_rax;
 
        exit_idle();
        irq_enter();
index feb5f10..07340ae 100644 (file)
@@ -60,7 +60,9 @@ int pmtimer_mark_offset(void)
        delta = cyc2us((tick - last_pmtmr_tick) & ACPI_PM_MASK);
 
        last_pmtmr_tick = tick;
+#ifndef CONFIG_XEN
        monotonic_base += delta * NSEC_PER_USEC;
+#endif
 
        delta += offset_delay;
 
@@ -68,7 +70,7 @@ int pmtimer_mark_offset(void)
        offset_delay = delta % (USEC_PER_SEC / HZ);
 
        rdtscll(tsc);
-       vxtime.last_tsc = tsc - offset_delay * cpu_khz;
+       vxtime.last_tsc = tsc - offset_delay * (u64)cpu_khz / 1000;
 
        /* don't calculate delay for first run,
           or if we've got less then a tick */
@@ -80,6 +82,32 @@ int pmtimer_mark_offset(void)
        return lost - 1;
 }
 
+static unsigned pmtimer_wait_tick(void)
+{
+       u32 a, b;
+       for (a = b = inl(pmtmr_ioport) & ACPI_PM_MASK;
+            a == b;
+            b = inl(pmtmr_ioport) & ACPI_PM_MASK)
+               cpu_relax();
+       return b;
+}
+
+/* note: wait time is rounded up to one tick */
+void pmtimer_wait(unsigned us)
+{
+       u32 a, b;
+       a = pmtimer_wait_tick();
+       do {
+               b = inl(pmtmr_ioport);
+               cpu_relax();
+       } while (cyc2us(b - a) < us);
+}
+
+void pmtimer_resume(void)
+{
+       last_pmtmr_tick = inl(pmtmr_ioport);
+}
+
 unsigned int do_gettimeoffset_pm(void)
 {
        u32 now, offset, delta = 0;
@@ -95,7 +123,7 @@ unsigned int do_gettimeoffset_pm(void)
 static int __init nopmtimer_setup(char *s)
 {
        pmtmr_ioport = 0;
-       return 0;
+       return 1;
 }
 
 __setup("nopmtimer", nopmtimer_setup);
index 655b919..e8d5f84 100644 (file)
@@ -1440,7 +1440,7 @@ struct seq_operations cpuinfo_op = {
        .show = show_cpuinfo,
 };
 
-#ifdef CONFIG_INPUT_PCSPKR
+#if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE)
 #include <linux/platform_device.h>
 static __init int add_pcspkr(void)
 {
index 8a691fa..189a737 100644 (file)
@@ -46,7 +46,7 @@ Control non executable mappings for 64bit processes.
 on     Enable(default)
 off    Disable
 */ 
-int __init nonx_setup(char *str)
+void __init nonx_setup(const char *str)
 {
        if (!strncmp(str, "on", 2)) {
                 __supported_pte_mask |= _PAGE_NX; 
@@ -55,28 +55,7 @@ int __init nonx_setup(char *str)
                do_not_nx = 1;
                __supported_pte_mask &= ~_PAGE_NX;
         }
-       return 1;
-} 
-__setup("noexec=", nonx_setup);        /* parsed early actually */
-
-int force_personality32 = 0; 
-
-/* noexec32=on|off
-Control non executable heap for 32bit processes.
-To control the stack too use noexec=off
-
-on     PROT_READ does not imply PROT_EXEC for 32bit processes
-off    PROT_READ implies PROT_EXEC (default)
-*/
-static int __init nonx32_setup(char *str)
-{
-       if (!strcmp(str, "on"))
-               force_personality32 &= ~READ_IMPLIES_EXEC;
-       else if (!strcmp(str, "off"))
-               force_personality32 |= READ_IMPLIES_EXEC;
-       return 1;
 }
-__setup("noexec32=", nonx32_setup);
 
 /*
  * Great future plan:
index 4a6628b..e13938d 100644 (file)
@@ -135,10 +135,10 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
 
        cpu = smp_processor_id();
        /*
-        * orig_rax contains the interrupt vector - 256.
+        * orig_rax contains the negated interrupt vector.
         * Use that to determine where the sender put the data.
         */
-       sender = regs->orig_rax + 256 - INVALIDATE_TLB_VECTOR_START;
+       sender = ~regs->orig_rax - INVALIDATE_TLB_VECTOR_START;
        f = &per_cpu(flush_state, sender);
 
        if (!cpu_isset(cpu, f->flush_cpumask))
index d25ac86..7d6ceae 100644 (file)
@@ -9,3 +9,13 @@ obj-$(CONFIG_K8_NUMA) += k8topology.o
 obj-$(CONFIG_ACPI_NUMA) += srat.o
 
 hugetlbpage-y = ../../i386/mm/hugetlbpage.o
+
+ifdef CONFIG_XEN
+include $(srctree)/scripts/Makefile.xen
+
+ioremap-y      += ../../i386/mm/ioremap-xen.o
+hypervisor-y   += ../../i386/mm/hypervisor.o
+obj-y          += hypervisor.o
+
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index 6be3268..589a796 100644 (file)
@@ -11,9 +11,12 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \
        oprofilefs.o oprofile_stats.o \
        timer_int.o )
 
+ifdef CONFIG_XEN
+OPROFILE-y := xenoprof.o
+else
 OPROFILE-y := init.o backtrace.o
 OPROFILE-$(CONFIG_X86_LOCAL_APIC) += nmi_int.o op_model_athlon.o op_model_p4.o \
                                     op_model_ppro.o
 OPROFILE-$(CONFIG_X86_IO_APIC)    += nmi_timer_int.o 
-
+endif
 oprofile-y = $(DRIVER_OBJS) $(addprefix ../../i386/oprofile/, $(OPROFILE-y))
index a3f6ad5..1ea6ea9 100644 (file)
@@ -15,11 +15,23 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
 
 obj-$(CONFIG_NUMA)     += k8-bus.o
 
+# pcifront should be after mmconfig.o and direct.o as it should only
+# take over if direct access to the PCI bus is unavailable
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront.o
+
 direct-y += ../../i386/pci/direct.o
 acpi-y   += ../../i386/pci/acpi.o
+pcifront-y += ../../i386/pci/pcifront.o
 legacy-y += ../../i386/pci/legacy.o
 irq-y    += ../../i386/pci/irq.o
 common-y += ../../i386/pci/common.o
 fixup-y  += ../../i386/pci/fixup.o
 i386-y  += ../../i386/pci/i386.o
 init-y += ../../i386/pci/init.o
+
+ifdef CONFIG_XEN
+irq-y          := ../../i386/pci/irq-xen.o
+include $(srctree)/scripts/Makefile.xen
+
+obj-y := $(call cherrypickxen, $(obj-y))
+endif
index a2060e4..3c55c76 100644 (file)
 
 #include "pci.h"
 
-#define MMCONFIG_APER_SIZE (256*1024*1024)
+/* aperture is up to 256MB but BIOS may reserve less */
+#define MMCONFIG_APER_MIN      (2 * 1024*1024)
+#define MMCONFIG_APER_MAX      (256 * 1024*1024)
+
 /* Verify the first 16 busses. We assume that systems with more busses
    get MCFG right. */
 #define MAX_CHECK_BUS 16
@@ -175,9 +178,10 @@ void __init pci_mmcfg_init(void)
                return;
 
        if (!e820_all_mapped(pci_mmcfg_config[0].base_address,
-                       pci_mmcfg_config[0].base_address + MMCONFIG_APER_SIZE,
+                       pci_mmcfg_config[0].base_address + MMCONFIG_APER_MIN,
                        E820_RESERVED)) {
-               printk(KERN_ERR "PCI: BIOS Bug: MCFG area is not E820-reserved\n");
+               printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %x is not E820-reserved\n",
+                               pci_mmcfg_config[0].base_address);
                printk(KERN_ERR "PCI: Not using MMCONFIG.\n");
                return;
        }
@@ -190,7 +194,8 @@ void __init pci_mmcfg_init(void)
        }
        for (i = 0; i < pci_mmcfg_config_num; ++i) {
                pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i];
-               pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address, MMCONFIG_APER_SIZE);
+               pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].base_address,
+                                                        MMCONFIG_APER_MAX);
                if (!pci_mmcfg_virt[i].virt) {
                        printk("PCI: Cannot map mmconfig aperture for segment %d\n",
                               pci_mmcfg_config[i].pci_segment_group_number);
index a0afdd3..7be96bb 100644 (file)
@@ -766,7 +766,8 @@ void elv_unregister(struct elevator_type *e)
                read_lock(&tasklist_lock);
                do_each_thread(g, p) {
                        task_lock(p);
-                       e->ops.trim(p->io_context);
+                       if (p->io_context)
+                               e->ops.trim(p->io_context);
                        task_unlock(p);
                } while_each_thread(g, p);
                read_unlock(&tasklist_lock);
index 7eb36c5..4293143 100644 (file)
@@ -638,7 +638,7 @@ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
        /* Assume anything <= 4GB can be handled by IOMMU.
           Actually some IOMMUs can handle everything, but I don't
           know of a way to test this here. */
-       if (bounce_pfn < (0xffffffff>>PAGE_SHIFT))
+       if (bounce_pfn < (min_t(u64,0xffffffff,BLK_BOUNCE_HIGH) >> PAGE_SHIFT))
                dma = 1;
        q->bounce_pfn = max_low_pfn;
 #else
index 3d0b18a..2acdd7a 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-1.2142_FC4-1.planetlab
-# Mon Aug 21 16:39:45 2006
+# Linux kernel version: 2.6.17-1.2142_FC4.3
+# Wed Oct 18 22:35:27 2006
 #
 CONFIG_X86_32=y
 CONFIG_SEMAPHORE_SLEEPERS=y
@@ -93,6 +93,7 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 #
 # CONFIG_SMP is not set
 CONFIG_X86_PC=y
+# CONFIG_X86_XEN is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_NUMAQ is not set
@@ -156,6 +157,7 @@ CONFIG_X86_MCE_P4THERMAL=y
 # CONFIG_MICROCODE is not set
 # CONFIG_X86_MSR is not set
 # CONFIG_X86_CPUID is not set
+# CONFIG_SWIOTLB is not set
 
 #
 # Firmware Drivers
@@ -194,6 +196,7 @@ CONFIG_HZ=1000
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 CONFIG_PHYSICAL_START=0x100000
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 
 #
 # Power management options (ACPI, APM)
@@ -243,6 +246,7 @@ CONFIG_PCI=y
 # CONFIG_PCI_GOBIOS is not set
 # CONFIG_PCI_GOMMCONFIG is not set
 # CONFIG_PCI_GODIRECT is not set
+# CONFIG_PCI_GOXEN_FE is not set
 CONFIG_PCI_GOANY=y
 CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
@@ -887,7 +891,7 @@ CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
 CONFIG_BNX2=m
-CONFIG_BCM5700=m
+# CONFIG_BCM5700 is not set
 
 #
 # Ethernet (10000 Mbit)
index e488761..3fe7f15 100644 (file)
@@ -1,7 +1,7 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.17-1.2142_FC4-1.planetlab
-# Mon Aug 21 16:40:40 2006
+# Linux kernel version: 2.6.17-1.2142_FC4.3smp
+# Wed Oct 18 22:53:28 2006
 #
 CONFIG_X86_32=y
 CONFIG_SEMAPHORE_SLEEPERS=y
@@ -95,6 +95,7 @@ CONFIG_DEFAULT_IOSCHED="cfq"
 #
 CONFIG_SMP=y
 # CONFIG_X86_PC is not set
+# CONFIG_X86_XEN is not set
 # CONFIG_X86_ELAN is not set
 # CONFIG_X86_VOYAGER is not set
 # CONFIG_X86_NUMAQ is not set
@@ -161,6 +162,7 @@ CONFIG_X86_MCE_P4THERMAL=y
 # CONFIG_MICROCODE is not set
 # CONFIG_X86_MSR is not set
 # CONFIG_X86_CPUID is not set
+# CONFIG_SWIOTLB is not set
 
 #
 # Firmware Drivers
@@ -200,6 +202,7 @@ CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 CONFIG_PHYSICAL_START=0x100000
 CONFIG_HOTPLUG_CPU=y
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 
 #
 # Power management options (ACPI, APM)
@@ -250,6 +253,7 @@ CONFIG_PCI=y
 # CONFIG_PCI_GOBIOS is not set
 # CONFIG_PCI_GOMMCONFIG is not set
 # CONFIG_PCI_GODIRECT is not set
+# CONFIG_PCI_GOXEN_FE is not set
 CONFIG_PCI_GOANY=y
 CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
@@ -894,7 +898,7 @@ CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
 CONFIG_BNX2=m
-CONFIG_BCM5700=m
+# CONFIG_BCM5700 is not set
 
 #
 # Ethernet (10000 Mbit)
index 447d8e6..26b2956 100644 (file)
@@ -31,6 +31,7 @@ obj-y                         += base/ block/ misc/ mfd/ net/ media/
 obj-$(CONFIG_NUBUS)            += nubus/
 obj-$(CONFIG_ATM)              += atm/
 obj-$(CONFIG_PPC_PMAC)         += macintosh/
+obj-$(CONFIG_XEN)              += xen/
 obj-$(CONFIG_IDE)              += ide/
 obj-$(CONFIG_FC4)              += fc4/
 obj-$(CONFIG_SCSI)             += scsi/
index c24652d..f0c8926 100644 (file)
@@ -46,7 +46,7 @@ if ACPI
 
 config ACPI_SLEEP
        bool "Sleep States"
-       depends on X86 && (!SMP || SUSPEND_SMP)
+       depends on X86 && (!SMP || SUSPEND_SMP) && !XEN
        depends on PM
        default y
        ---help---
@@ -300,6 +300,7 @@ config ACPI_SYSTEM
 config X86_PM_TIMER
        bool "Power Management Timer Support" if EMBEDDED
        depends on X86
+       depends on !XEN
        default y
        help
          The Power Management Timer is available on all ACPI-capable,
index a95f636..d7791fb 100644 (file)
@@ -238,8 +238,9 @@ acpi_evaluate_object(acpi_handle handle,
                        ACPI_ERROR((AE_INFO,
                                    "Both Handle and Pathname are NULL"));
                } else {
-                       ACPI_ERROR((AE_INFO,
-                                   "Handle is NULL and Pathname is relative"));
+                       ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+                                         "Null Handle with relative pathname [%s]",
+                                         pathname));
                }
 
                status = AE_BAD_PARAMETER;
index 9e6f51c..c1434ed 100644 (file)
@@ -92,18 +92,6 @@ mac_addr(char addr[6])
        return __be64_to_cpu(n);
 }
 
-static struct sk_buff *
-skb_check(struct sk_buff *skb)
-{
-       if (skb_is_nonlinear(skb))
-       if ((skb = skb_share_check(skb, GFP_ATOMIC)))
-       if (skb_linearize(skb, GFP_ATOMIC) < 0) {
-               dev_kfree_skb(skb);
-               return NULL;
-       }
-       return skb;
-}
-
 void
 aoenet_xmit(struct sk_buff *sl)
 {
@@ -120,19 +108,18 @@ aoenet_xmit(struct sk_buff *sl)
  * (1) len doesn't include the header by default.  I want this. 
  */
 static int
-aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt)
+aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev)
 {
        struct aoe_hdr *h;
        u32 n;
 
-       skb = skb_check(skb);
-       if (!skb)
+       skb = skb_share_check(skb, GFP_ATOMIC);
+       if (skb == NULL)
                return 0;
-
+       if (skb_linearize(skb))
+               goto exit;
        if (!is_aoe_netif(ifp))
                goto exit;
-
-       //skb->len += ETH_HLEN; /* (1) */
        skb_push(skb, ETH_HLEN);        /* (1) */
 
        h = (struct aoe_hdr *) skb->mac.raw;
index 3170eaa..fdd37e4 100644 (file)
@@ -1838,7 +1838,7 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s)
        init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
        cgc.cmd[0] = GPCMD_READ_DVD_STRUCTURE;
        cgc.cmd[7] = s->type;
-       cgc.cmd[9] = cgc.buflen = 0xff;
+       cgc.cmd[9] = cgc.buflen & 0xff;
 
        if ((ret = cdo->generic_packet(cdi, &cgc)))
                return ret;
index 6a7aa93..418da02 100644 (file)
 # include <linux/efi.h>
 #endif
 
+static inline int range_is_allowed(unsigned long from, unsigned long to)
+{
+       unsigned long cursor;
+
+       cursor = from >> PAGE_SHIFT;
+       while ((cursor << PAGE_SHIFT) < to) {
+               if (!devmem_is_allowed(cursor)) {
+                       printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
+                                       current->comm, from, to);
+                       return 0;
+               }
+               cursor++;
+       }
+       return 1;
+}
+
 /*
  * Architectures vary in how they handle caching for addresses
  * outside of main memory.
@@ -103,22 +119,7 @@ static inline int valid_mmap_phys_addr_range(unsigned long addr, size_t size)
 }
 #endif
 
-static inline int range_is_allowed(unsigned long from, unsigned long to)
-{
-       unsigned long cursor;
-
-       cursor = from >> PAGE_SHIFT;
-       while ((cursor << PAGE_SHIFT) < to) {
-               if (!devmem_is_allowed(cursor)) {
-                       printk ("Program %s tried to read /dev/mem between %lx->%lx.\n",
-                                       current->comm, from, to);
-                       return 0;
-               }
-               cursor++;
-       }
-       return 1;
-}
-
+#ifndef ARCH_HAS_DEV_MEM
 /*
  * This funcion reads the *physical* memory. The f_pos points directly to the 
  * memory location. 
@@ -245,6 +246,7 @@ static ssize_t write_mem(struct file * file, const char __user * buf,
        *ppos += written;
        return written;
 }
+#endif
 
 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
@@ -681,6 +683,7 @@ static int open_port(struct inode * inode, struct file * filp)
 #define open_kmem      open_mem
 #define open_oldmem    open_mem
 
+#ifndef ARCH_HAS_DEV_MEM
 static struct file_operations mem_fops = {
        .llseek         = memory_lseek,
        .read           = read_mem,
@@ -688,6 +691,9 @@ static struct file_operations mem_fops = {
        .mmap           = mmap_mem,
        .open           = open_mem,
 };
+#else
+extern struct file_operations mem_fops;
+#endif
 
 static struct file_operations kmem_fops = {
        .llseek         = memory_lseek,
index 8ea7062..2678034 100644 (file)
@@ -424,6 +424,7 @@ static irqreturn_t tis_int_handler(int irq, void *dev_id, struct pt_regs *regs)
        iowrite32(interrupt,
                  chip->vendor.iobase +
                  TPM_INT_STATUS(chip->vendor.locality));
+       ioread32(chip->vendor.iobase + TPM_INT_STATUS(chip->vendor.locality));
        return IRQ_HANDLED;
 }
 
index 0b31cf2..12c1582 100644 (file)
@@ -133,6 +133,8 @@ LIST_HEAD(tty_drivers);                     /* linked list of tty drivers */
    vt.c for deeply disgusting hack reasons */
 DEFINE_MUTEX(tty_mutex);
 
+int console_use_vt = 1;
+
 #ifdef CONFIG_UNIX98_PTYS
 extern struct tty_driver *ptm_driver;  /* Unix98 pty masters; for /dev/ptmx */
 extern int pty_limit;          /* Config limit on Unix98 ptys */
@@ -1674,19 +1676,6 @@ release_mem_out:
        goto end_init;
 }
 
-/*
- * Get a copy of the termios structure for the driver/index
- */
-void tty_get_termios(struct tty_driver *driver, int idx, struct termios *tio)
-{
-       lock_kernel();
-       if (driver->termios[idx])
-               *tio = *driver->termios[idx];
-       else
-               *tio = driver->init_termios;
-       unlock_kernel();
-}
-
 /*
  * Releases memory associated with a tty structure, and clears out the
  * driver table slots.
@@ -2074,7 +2063,7 @@ retry_open:
                goto got_driver;
        }
 #ifdef CONFIG_VT
-       if (device == MKDEV(TTY_MAJOR,0)) {
+       if (console_use_vt && (device == MKDEV(TTY_MAJOR,0))) {
                extern struct tty_driver *console_driver;
                driver = console_driver;
                index = fg_console;
@@ -2795,7 +2784,7 @@ static void flush_to_ldisc(void *private_)
        struct tty_struct *tty = (struct tty_struct *) private_;
        unsigned long   flags;
        struct tty_ldisc *disc;
-       struct tty_buffer *tbuf;
+       struct tty_buffer *tbuf, *head;
        int count;
        char *char_buf;
        unsigned char *flag_buf;
@@ -2812,7 +2801,9 @@ static void flush_to_ldisc(void *private_)
                goto out;
        }
        spin_lock_irqsave(&tty->buf.lock, flags);
-       while((tbuf = tty->buf.head) != NULL) {
+       head = tty->buf.head;
+       tty->buf.head = NULL;
+       while((tbuf = head) != NULL) {
                while ((count = tbuf->commit - tbuf->read) != 0) {
                        char_buf = tbuf->char_buf_ptr + tbuf->read;
                        flag_buf = tbuf->flag_buf_ptr + tbuf->read;
@@ -2821,10 +2812,12 @@ static void flush_to_ldisc(void *private_)
                        disc->receive_buf(tty, char_buf, flag_buf, count);
                        spin_lock_irqsave(&tty->buf.lock, flags);
                }
-               if (tbuf->active)
+               if (tbuf->active) {
+                       tty->buf.head = head;
                        break;
-               tty->buf.head = tbuf->next;
-               if (tty->buf.head == NULL)
+               }
+               head = tbuf->next;
+               if (head == NULL)
                        tty->buf.tail = NULL;
                tty_buffer_free(tty, tbuf);
        }
@@ -3277,6 +3270,8 @@ static int __init tty_init(void)
 #endif
 
 #ifdef CONFIG_VT
+       if (!console_use_vt)
+               goto out_vt;
        cdev_init(&vc0_cdev, &console_fops);
        if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) ||
            register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
@@ -3285,6 +3280,7 @@ static int __init tty_init(void)
        class_device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
 
        vty_init();
+ out_vt:
 #endif
        return 0;
 }
index 1e371a5..f0dff5a 100644 (file)
@@ -8,7 +8,7 @@ menu "Firmware Drivers"
 config EDD
        tristate "BIOS Enhanced Disk Drive calls determine boot disk (EXPERIMENTAL)"
        depends on EXPERIMENTAL
-       depends on !IA64
+       depends on !IA64 && !XEN
        help
          Say Y or M here if you want to enable BIOS Enhanced Disk Drive
          Services real mode BIOS calls to determine which disk
index 766cc96..9b4f4ee 100644 (file)
@@ -181,21 +181,21 @@ static void scx200_acb_machine(struct scx200_acb_iface *iface, u8 status)
                break;
 
        case state_read:
-               /* Set ACK if receiving the last byte */
-               if (iface->len == 1)
+               /* Set ACK if _next_ byte will be the last one */
+               if (iface->len == 2)
                        outb(inb(ACBCTL1) | ACBCTL1_ACK, ACBCTL1);
                else
                        outb(inb(ACBCTL1) & ~ACBCTL1_ACK, ACBCTL1);
 
-               *iface->ptr++ = inb(ACBSDA);
-               --iface->len;
-
-               if (iface->len == 0) {
+               if (iface->len == 1) {
                        iface->result = 0;
                        iface->state = state_idle;
                        outb(inb(ACBCTL1) | ACBCTL1_STOP, ACBCTL1);
                }
 
+               *iface->ptr++ = inb(ACBSDA);
+               --iface->len;
+
                break;
 
        case state_write:
@@ -304,8 +304,12 @@ static s32 scx200_acb_smbus_xfer(struct i2c_adapter *adapter,
                buffer = (u8 *)&cur_word;
                break;
 
-       case I2C_SMBUS_BLOCK_DATA:
+       case I2C_SMBUS_I2C_BLOCK_DATA:
+               if (rw == I2C_SMBUS_READ)
+                       data->block[0] = I2C_SMBUS_BLOCK_MAX; /* For now */
                len = data->block[0];
+               if (len == 0 || len > I2C_SMBUS_BLOCK_MAX)
+                       return -EINVAL;
                buffer = &data->block[1];
                break;
 
@@ -369,7 +373,7 @@ static u32 scx200_acb_func(struct i2c_adapter *adapter)
 {
        return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
               I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
-              I2C_FUNC_SMBUS_BLOCK_DATA;
+              I2C_FUNC_SMBUS_I2C_BLOCK;
 }
 
 /* For now, we only handle combined mode (smbus) */
index 45e2cdf..2e79137 100644 (file)
@@ -756,9 +756,9 @@ int i2c_probe(struct i2c_adapter *adapter,
                                        "parameter for adapter %d, "
                                        "addr 0x%02x\n", adap_id,
                                        address_data->ignore[j + 1]);
+                               ignore = 1;
+                               break;
                        }
-                       ignore = 1;
-                       break;
                }
                if (ignore)
                        continue;
index 41d46db..b09a653 100644 (file)
@@ -410,10 +410,10 @@ void ide_toggle_bounce(ide_drive_t *drive, int on)
 {
        u64 addr = BLK_BOUNCE_HIGH;     /* dma64_addr_t */
 
-       if (!PCI_DMA_BUS_IS_PHYS) {
-               addr = BLK_BOUNCE_ANY;
-       } else if (on && drive->media == ide_disk) {
-               if (HWIF(drive)->pci_dev)
+       if (on && drive->media == ide_disk) {
+               if (!PCI_DMA_BUS_IS_PHYS)
+                       addr = BLK_BOUNCE_ANY;
+               else if (HWIF(drive)->pci_dev)
                        addr = HWIF(drive)->pci_dev->dma_mask;
        }
 
index 3e677c4..9914a78 100644 (file)
@@ -6,7 +6,7 @@
  *
  *   vt82c576, vt82c586, vt82c586a, vt82c586b, vt82c596a, vt82c596b,
  *   vt82c686, vt82c686a, vt82c686b, vt8231, vt8233, vt8233c, vt8233a,
- *   vt8235, vt8237
+ *   vt8235, vt8237, vt8237a
  *
  * Copyright (c) 2000-2002 Vojtech Pavlik
  *
@@ -82,6 +82,7 @@ static struct via_isa_bridge {
        { "vt6410",     PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
        { "vt8251",     PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
        { "vt8237",     PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
+       { "vt8237a",    PCI_DEVICE_ID_VIA_8237A,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
        { "vt8235",     PCI_DEVICE_ID_VIA_8235,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
        { "vt8233a",    PCI_DEVICE_ID_VIA_8233A,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
        { "vt8233c",    PCI_DEVICE_ID_VIA_8233C_0,  0x00, 0x2f, VIA_UDMA_100 },
index 8f1292c..d1788cd 100644 (file)
@@ -3548,6 +3548,8 @@ static int ohci1394_pci_resume (struct pci_dev *pdev)
 
 static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 {
+       pci_save_state(pdev);
+
 #ifdef CONFIG_PPC_PMAC
        if (machine_is(powermac)) {
                struct device_node *of_node;
@@ -3559,8 +3561,6 @@ static int ohci1394_pci_suspend (struct pci_dev *pdev, pm_message_t state)
        }
 #endif
 
-       pci_save_state(pdev);
-
        return 0;
 }
 
index 5413dc4..e084dbf 100644 (file)
@@ -2541,6 +2541,9 @@ static int sbp2scsi_slave_configure(struct scsi_device *sdev)
                sdev->skip_ms_page_8 = 1;
        if (scsi_id->workarounds & SBP2_WORKAROUND_FIX_CAPACITY)
                sdev->fix_capacity = 1;
+       if (scsi_id->ne->guid_vendor_id == 0x0010b9 && /* Maxtor's OUI */
+           (sdev->type == TYPE_DISK || sdev->type == TYPE_RBC))
+               sdev->allow_restart = 1;
        return 0;
 }
 
index 8ea8012..f4fddd5 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/errno.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 
 #include "mthca_dev.h"
 #include "mthca_cmd.h"
@@ -48,6 +49,12 @@ int mthca_reset(struct mthca_dev *mdev)
        u32 *hca_header    = NULL;
        u32 *bridge_header = NULL;
        struct pci_dev *bridge = NULL;
+       int bridge_pcix_cap = 0;
+       int hca_pcie_cap = 0;
+       int hca_pcix_cap = 0;
+
+       u16 devctl;
+       u16 linkctl;
 
 #define MTHCA_RESET_OFFSET 0xf0010
 #define MTHCA_RESET_VALUE  swab32(1)
@@ -71,8 +78,8 @@ int mthca_reset(struct mthca_dev *mdev)
                                                bridge)) != NULL) {
                        if (bridge->hdr_type    == PCI_HEADER_TYPE_BRIDGE &&
                            bridge->subordinate == mdev->pdev->bus) {
-                               mthca_dbg(mdev, "Found bridge: %s (%s)\n",
-                                         pci_pretty_name(bridge), pci_name(bridge));
+                               mthca_dbg(mdev, "Found bridge: %s\n",
+                                         pci_name(bridge));
                                break;
                        }
                }
@@ -83,8 +90,8 @@ int mthca_reset(struct mthca_dev *mdev)
                         * assume we're in no-bridge mode and hope for
                         * the best.
                         */
-                       mthca_warn(mdev, "No bridge found for %s (%s)\n",
-                                 pci_pretty_name(mdev->pdev), pci_name(mdev->pdev));
+                       mthca_warn(mdev, "No bridge found for %s\n",
+                                 pci_name(mdev->pdev));
                }
 
        }
@@ -109,6 +116,9 @@ int mthca_reset(struct mthca_dev *mdev)
                }
        }
 
+       hca_pcix_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_PCIX);
+       hca_pcie_cap = pci_find_capability(mdev->pdev, PCI_CAP_ID_EXP);
+
        if (bridge) {
                bridge_header = kmalloc(256, GFP_KERNEL);
                if (!bridge_header) {
@@ -128,6 +138,13 @@ int mthca_reset(struct mthca_dev *mdev)
                                goto out;
                        }
                }
+               bridge_pcix_cap = pci_find_capability(bridge, PCI_CAP_ID_PCIX);
+               if (!bridge_pcix_cap) {
+                               err = -ENODEV;
+                               mthca_err(mdev, "Couldn't locate HCA bridge "
+                                         "PCI-X capability, aborting.\n");
+                               goto out;
+               }
        }
 
        /* actually hit reset */
@@ -177,6 +194,20 @@ int mthca_reset(struct mthca_dev *mdev)
 good:
        /* Now restore the PCI headers */
        if (bridge) {
+               if (pci_write_config_dword(bridge, bridge_pcix_cap + 0x8,
+                                bridge_header[(bridge_pcix_cap + 0x8) / 4])) {
+                       err = -ENODEV;
+                       mthca_err(mdev, "Couldn't restore HCA bridge Upstream "
+                                 "split transaction control, aborting.\n");
+                       goto out;
+               }
+               if (pci_write_config_dword(bridge, bridge_pcix_cap + 0xc,
+                                bridge_header[(bridge_pcix_cap + 0xc) / 4])) {
+                       err = -ENODEV;
+                       mthca_err(mdev, "Couldn't restore HCA bridge Downstream "
+                                 "split transaction control, aborting.\n");
+                       goto out;
+               }
                /*
                 * Bridge control register is at 0x3e, so we'll
                 * naturally restore it last in this loop.
@@ -202,6 +233,35 @@ good:
                }
        }
 
+       if (hca_pcix_cap) {
+               if (pci_write_config_dword(mdev->pdev, hca_pcix_cap,
+                                hca_header[hca_pcix_cap / 4])) {
+                       err = -ENODEV;
+                       mthca_err(mdev, "Couldn't restore HCA PCI-X "
+                                 "command register, aborting.\n");
+                       goto out;
+               }
+       }
+
+       if (hca_pcie_cap) {
+               devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
+               if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
+                                          devctl)) {
+                       err = -ENODEV;
+                       mthca_err(mdev, "Couldn't restore HCA PCI Express "
+                                 "Device Control register, aborting.\n");
+                       goto out;
+               }
+               linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
+               if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
+                                          linkctl)) {
+                       err = -ENODEV;
+                       mthca_err(mdev, "Couldn't restore HCA PCI Express "
+                                 "Link control register, aborting.\n");
+                       goto out;
+               }
+       }
+
        for (i = 0; i < 16; ++i) {
                if (i * 4 == PCI_COMMAND)
                        continue;
index 70208c3..1d917ed 100644 (file)
@@ -1,5 +1,7 @@
 /*
  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -41,6 +43,8 @@
 #include <linux/delay.h>
 #include <linux/completion.h>
 
+#include <net/dst.h>
+
 #include "ipoib.h"
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
@@ -51,7 +55,7 @@ MODULE_PARM_DESC(mcast_debug_level,
                 "Enable multicast debug tracing if > 0");
 #endif
 
-static DECLARE_MUTEX(mcast_mutex);
+static DEFINE_MUTEX(mcast_mutex);
 
 /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
 struct ipoib_mcast {
@@ -93,8 +97,7 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_neigh *neigh, *tmp;
        unsigned long flags;
-       LIST_HEAD(ah_list);
-       struct ipoib_ah *ah, *tah;
+       int tx_dropped = 0;
 
        ipoib_dbg_mcast(netdev_priv(dev),
                        "deleting multicast group " IPOIB_GID_FMT "\n",
@@ -103,28 +106,31 @@ static void ipoib_mcast_free(struct ipoib_mcast *mcast)
        spin_lock_irqsave(&priv->lock, flags);
 
        list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
+               /*
+                * It's safe to call ipoib_put_ah() inside priv->lock
+                * here, because we know that mcast->ah will always
+                * hold one more reference, so ipoib_put_ah() will
+                * never do more than decrement the ref count.
+                */
                if (neigh->ah)
-                       list_add_tail(&neigh->ah->list, &ah_list);
-               *to_ipoib_neigh(neigh->neighbour) = NULL;
-               neigh->neighbour->ops->destructor = NULL;
-               kfree(neigh);
+                       ipoib_put_ah(neigh->ah);
+               ipoib_neigh_free(neigh);
        }
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       list_for_each_entry_safe(ah, tah, &ah_list, list)
-               ipoib_put_ah(ah);
-
        if (mcast->ah)
                ipoib_put_ah(mcast->ah);
 
        while (!skb_queue_empty(&mcast->pkt_queue)) {
-               struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
-               skb->dev = dev;
-               dev_kfree_skb_any(skb);
+               ++tx_dropped;
+               dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
        }
 
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       priv->stats.tx_dropped += tx_dropped;
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
        kfree(mcast);
 }
 
@@ -133,26 +139,18 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
 {
        struct ipoib_mcast *mcast;
 
-       mcast = kmalloc(sizeof (*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+       mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
        if (!mcast)
                return NULL;
 
-       memset(mcast, 0, sizeof (*mcast));
-
-       init_completion(&mcast->done);
-
        mcast->dev = dev;
        mcast->created = jiffies;
-       mcast->backoff = HZ;
-       mcast->logcount = 0;
+       mcast->backoff = 1;
 
        INIT_LIST_HEAD(&mcast->list);
        INIT_LIST_HEAD(&mcast->neigh_list);
        skb_queue_head_init(&mcast->pkt_queue);
 
-       mcast->ah    = NULL;
-       mcast->query = NULL;
-
        return mcast;
 }
 
@@ -213,6 +211,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 {
        struct net_device *dev = mcast->dev;
        struct ipoib_dev_priv *priv = netdev_priv(dev);
+       struct ipoib_ah *ah;
        int ret;
 
        mcast->mcmember = *mcmember;
@@ -251,6 +250,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                        .port_num      = priv->port,
                        .sl            = mcast->mcmember.sl,
                        .ah_flags      = IB_AH_GRH,
+                       .static_rate   = mcast->mcmember.rate,
                        .grh           = {
                                .flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
                                .hop_limit     = mcast->mcmember.hop_limit,
@@ -258,19 +258,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                                .traffic_class = mcast->mcmember.traffic_class
                        }
                };
-               int path_rate = ib_sa_rate_enum_to_int(mcast->mcmember.rate);
-
                av.grh.dgid = mcast->mcmember.mgid;
 
-               if (path_rate > 0 && priv->local_rate > path_rate)
-                       av.static_rate = (priv->local_rate - 1) / path_rate;
-
-               ipoib_dbg_mcast(priv, "static_rate %d for local port %dX, mcmember %dX\n",
-                               av.static_rate, priv->local_rate,
-                               ib_sa_rate_enum_to_int(mcast->mcmember.rate));
-
-               mcast->ah = ipoib_create_ah(dev, priv->pd, &av);
-               if (!mcast->ah) {
+               ah = ipoib_create_ah(dev, priv->pd, &av);
+               if (!ah) {
                        ipoib_warn(priv, "ib_address_create failed\n");
                } else {
                        ipoib_dbg_mcast(priv, "MGID " IPOIB_GID_FMT
@@ -280,11 +271,17 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
                                        be16_to_cpu(mcast->mcmember.mlid),
                                        mcast->mcmember.sl);
                }
+
+               spin_lock_irq(&priv->lock);
+               mcast->ah = ah;
+               spin_unlock_irq(&priv->lock);
        }
 
        /* actually send any queued packets */
+       spin_lock_irq(&priv->tx_lock);
        while (!skb_queue_empty(&mcast->pkt_queue)) {
                struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
+               spin_unlock_irq(&priv->tx_lock);
 
                skb->dev = dev;
 
@@ -295,7 +292,9 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
                if (dev_queue_xmit(skb))
                        ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
+               spin_lock_irq(&priv->tx_lock);
        }
+       spin_unlock_irq(&priv->tx_lock);
 
        return 0;
 }
@@ -307,6 +306,7 @@ ipoib_mcast_sendonly_join_complete(int status,
 {
        struct ipoib_mcast *mcast = mcast_ptr;
        struct net_device *dev = mcast->dev;
+       struct ipoib_dev_priv *priv = netdev_priv(dev);
 
        if (!status)
                ipoib_mcast_join_finish(mcast, mcmember);
@@ -317,13 +317,12 @@ ipoib_mcast_sendonly_join_complete(int status,
                                        IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
                /* Flush out any queued packets */
+               spin_lock_irq(&priv->tx_lock);
                while (!skb_queue_empty(&mcast->pkt_queue)) {
-                       struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
-
-                       skb->dev = dev;
-
-                       dev_kfree_skb_any(skb);
+                       ++priv->stats.tx_dropped;
+                       dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
                }
+               spin_unlock_irq(&priv->tx_lock);
 
                /* Clear the busy flag so we try again */
                clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
@@ -357,7 +356,9 @@ static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
 
        rec.mgid     = mcast->mcmember.mgid;
        rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
+
+       init_completion(&mcast->done);
 
        ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec,
                                     IB_SA_MCMEMBER_REC_MGID            |
@@ -394,11 +395,11 @@ static void ipoib_mcast_join_complete(int status,
                        IPOIB_GID_ARG(mcast->mcmember.mgid), status);
 
        if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) {
-               mcast->backoff = HZ;
-               down(&mcast_mutex);
+               mcast->backoff = 1;
+               mutex_lock(&mcast_mutex);
                if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
                        queue_work(ipoib_workqueue, &priv->mcast_task);
-               up(&mcast_mutex);
+               mutex_unlock(&mcast_mutex);
                complete(&mcast->done);
                return;
        }
@@ -426,9 +427,11 @@ static void ipoib_mcast_join_complete(int status,
        if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
                mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
+       mutex_lock(&mcast_mutex);
+
+       spin_lock_irq(&priv->lock);
        mcast->query = NULL;
 
-       down(&mcast_mutex);
        if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) {
                if (status == -ETIMEDOUT)
                        queue_work(ipoib_workqueue, &priv->mcast_task);
@@ -437,7 +440,8 @@ static void ipoib_mcast_join_complete(int status,
                                           mcast->backoff * HZ);
        } else
                complete(&mcast->done);
-       up(&mcast_mutex);
+       spin_unlock_irq(&priv->lock);
+       mutex_unlock(&mcast_mutex);
 
        return;
 }
@@ -457,7 +461,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
 
        rec.mgid     = mcast->mcmember.mgid;
        rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
 
        comp_mask =
                IB_SA_MCMEMBER_REC_MGID         |
@@ -478,6 +482,8 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                rec.traffic_class = priv->broadcast->mcmember.traffic_class;
        }
 
+       init_completion(&mcast->done);
+
        ret = ib_sa_mcmember_rec_set(priv->ca, priv->port, &rec, comp_mask,
                                     mcast->backoff * 1000, GFP_ATOMIC,
                                     ipoib_mcast_join_complete,
@@ -490,12 +496,12 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast,
                if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
                        mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-               down(&mcast_mutex);
+               mutex_lock(&mcast_mutex);
                if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
                        queue_delayed_work(ipoib_workqueue,
                                           &priv->mcast_task,
-                                          mcast->backoff);
-               up(&mcast_mutex);
+                                          mcast->backoff * HZ);
+               mutex_unlock(&mcast_mutex);
        } else
                mcast->query_id = ret;
 }
@@ -525,21 +531,24 @@ void ipoib_mcast_join_task(void *dev_ptr)
        }
 
        if (!priv->broadcast) {
-               priv->broadcast = ipoib_mcast_alloc(dev, 1);
-               if (!priv->broadcast) {
+               struct ipoib_mcast *broadcast;
+
+               broadcast = ipoib_mcast_alloc(dev, 1);
+               if (!broadcast) {
                        ipoib_warn(priv, "failed to allocate broadcast group\n");
-                       down(&mcast_mutex);
+                       mutex_lock(&mcast_mutex);
                        if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
                                queue_delayed_work(ipoib_workqueue,
                                                   &priv->mcast_task, HZ);
-                       up(&mcast_mutex);
+                       mutex_unlock(&mcast_mutex);
                        return;
                }
 
-               memcpy(priv->broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
+               spin_lock_irq(&priv->lock);
+               memcpy(broadcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
                       sizeof (union ib_gid));
+               priv->broadcast = broadcast;
 
-               spin_lock_irq(&priv->lock);
                __ipoib_mcast_add(dev, priv->broadcast);
                spin_unlock_irq(&priv->lock);
        }
@@ -588,44 +597,57 @@ int ipoib_mcast_start_thread(struct net_device *dev)
 
        ipoib_dbg_mcast(priv, "starting multicast thread\n");
 
-       down(&mcast_mutex);
+       mutex_lock(&mcast_mutex);
        if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
                queue_work(ipoib_workqueue, &priv->mcast_task);
-       up(&mcast_mutex);
+       mutex_unlock(&mcast_mutex);
+
+       spin_lock_irq(&priv->lock);
+       set_bit(IPOIB_MCAST_STARTED, &priv->flags);
+       spin_unlock_irq(&priv->lock);
 
        return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev)
+static void wait_for_mcast_join(struct ipoib_dev_priv *priv,
+                               struct ipoib_mcast *mcast)
+{
+       spin_lock_irq(&priv->lock);
+       if (mcast && mcast->query) {
+               ib_sa_cancel_query(mcast->query_id, mcast->query);
+               mcast->query = NULL;
+               spin_unlock_irq(&priv->lock);
+               ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
+                               IPOIB_GID_ARG(mcast->mcmember.mgid));
+               wait_for_completion(&mcast->done);
+       }
+       else
+               spin_unlock_irq(&priv->lock);
+}
+
+int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        struct ipoib_mcast *mcast;
 
        ipoib_dbg_mcast(priv, "stopping multicast thread\n");
 
-       down(&mcast_mutex);
+       spin_lock_irq(&priv->lock);
+       clear_bit(IPOIB_MCAST_STARTED, &priv->flags);
+       spin_unlock_irq(&priv->lock);
+
+       mutex_lock(&mcast_mutex);
        clear_bit(IPOIB_MCAST_RUN, &priv->flags);
        cancel_delayed_work(&priv->mcast_task);
-       up(&mcast_mutex);
+       mutex_unlock(&mcast_mutex);
 
-       flush_workqueue(ipoib_workqueue);
+       if (flush)
+               flush_workqueue(ipoib_workqueue);
 
-       if (priv->broadcast && priv->broadcast->query) {
-               ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query);
-               priv->broadcast->query = NULL;
-               ipoib_dbg_mcast(priv, "waiting for bcast\n");
-               wait_for_completion(&priv->broadcast->done);
-       }
+       wait_for_mcast_join(priv, priv->broadcast);
 
-       list_for_each_entry(mcast, &priv->multicast_list, list) {
-               if (mcast->query) {
-                       ib_sa_cancel_query(mcast->query_id, mcast->query);
-                       mcast->query = NULL;
-                       ipoib_dbg_mcast(priv, "waiting for MGID " IPOIB_GID_FMT "\n",
-                                       IPOIB_GID_ARG(mcast->mcmember.mgid));
-                       wait_for_completion(&mcast->done);
-               }
-       }
+       list_for_each_entry(mcast, &priv->multicast_list, list)
+               wait_for_mcast_join(priv, mcast);
 
        return 0;
 }
@@ -646,7 +668,7 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 
        rec.mgid     = mcast->mcmember.mgid;
        rec.port_gid = priv->local_gid;
-       rec.pkey     = be16_to_cpu(priv->pkey);
+       rec.pkey     = cpu_to_be16(priv->pkey);
 
        /* Remove ourselves from the multicast group */
        ret = ipoib_mcast_detach(dev, be16_to_cpu(mcast->mcmember.mlid),
@@ -684,6 +706,14 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
         */
        spin_lock(&priv->lock);
 
+       if (!test_bit(IPOIB_MCAST_STARTED, &priv->flags)        ||
+           !priv->broadcast                                    ||
+           !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
+               ++priv->stats.tx_dropped;
+               dev_kfree_skb_any(skb);
+               goto unlock;
+       }
+
        mcast = __ipoib_mcast_find(dev, mgid);
        if (!mcast) {
                /* Let's create a new send only group now */
@@ -694,6 +724,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
                if (!mcast) {
                        ipoib_warn(priv, "unable to allocate memory for "
                                   "multicast structure\n");
+                       ++priv->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
                        goto out;
                }
@@ -707,8 +738,10 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid,
        if (!mcast->ah) {
                if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
                        skb_queue_tail(&mcast->pkt_queue, skb);
-               else
+               else {
+                       ++priv->stats.tx_dropped;
                        dev_kfree_skb_any(skb);
+               }
 
                if (mcast->query)
                        ipoib_dbg_mcast(priv, "no address vector, "
@@ -728,13 +761,11 @@ out:
                if (skb->dst            &&
                    skb->dst->neighbour &&
                    !*to_ipoib_neigh(skb->dst->neighbour)) {
-                       struct ipoib_neigh *neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
+                       struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour);
 
                        if (neigh) {
                                kref_get(&mcast->ah->ref);
                                neigh->ah       = mcast->ah;
-                               neigh->neighbour = skb->dst->neighbour;
-                               *to_ipoib_neigh(skb->dst->neighbour) = neigh;
                                list_add_tail(&neigh->list, &mcast->neigh_list);
                        }
                }
@@ -742,6 +773,7 @@ out:
                ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
        }
 
+unlock:
        spin_unlock(&priv->lock);
 }
 
@@ -749,48 +781,23 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
 {
        struct ipoib_dev_priv *priv = netdev_priv(dev);
        LIST_HEAD(remove_list);
-       struct ipoib_mcast *mcast, *tmcast, *nmcast;
+       struct ipoib_mcast *mcast, *tmcast;
        unsigned long flags;
 
        ipoib_dbg_mcast(priv, "flushing multicast list\n");
 
        spin_lock_irqsave(&priv->lock, flags);
-       list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
-               nmcast = ipoib_mcast_alloc(dev, 0);
-               if (nmcast) {
-                       nmcast->flags =
-                               mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY);
-
-                       nmcast->mcmember.mgid = mcast->mcmember.mgid;
-
-                       /* Add the new group in before the to-be-destroyed group */
-                       list_add_tail(&nmcast->list, &mcast->list);
-                       list_del_init(&mcast->list);
 
-                       rb_replace_node(&mcast->rb_node, &nmcast->rb_node,
-                                       &priv->multicast_tree);
-
-                       list_add_tail(&mcast->list, &remove_list);
-               } else {
-                       ipoib_warn(priv, "could not reallocate multicast group "
-                                  IPOIB_GID_FMT "\n",
-                                  IPOIB_GID_ARG(mcast->mcmember.mgid));
-               }
+       list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
+               list_del(&mcast->list);
+               rb_erase(&mcast->rb_node, &priv->multicast_tree);
+               list_add_tail(&mcast->list, &remove_list);
        }
 
        if (priv->broadcast) {
-               nmcast = ipoib_mcast_alloc(dev, 0);
-               if (nmcast) {
-                       nmcast->mcmember.mgid = priv->broadcast->mcmember.mgid;
-
-                       rb_replace_node(&priv->broadcast->rb_node,
-                                       &nmcast->rb_node,
-                                       &priv->multicast_tree);
-
-                       list_add_tail(&priv->broadcast->list, &remove_list);
-               }
-
-               priv->broadcast = nmcast;
+               rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
+               list_add_tail(&priv->broadcast->list, &remove_list);
+               priv->broadcast = NULL;
        }
 
        spin_unlock_irqrestore(&priv->lock, flags);
@@ -801,24 +808,6 @@ void ipoib_mcast_dev_flush(struct net_device *dev)
        }
 }
 
-void ipoib_mcast_dev_down(struct net_device *dev)
-{
-       struct ipoib_dev_priv *priv = netdev_priv(dev);
-       unsigned long flags;
-
-       /* Delete broadcast since it will be recreated */
-       if (priv->broadcast) {
-               ipoib_dbg_mcast(priv, "deleting broadcast group\n");
-
-               spin_lock_irqsave(&priv->lock, flags);
-               rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
-               spin_unlock_irqrestore(&priv->lock, flags);
-               ipoib_mcast_leave(dev, priv->broadcast);
-               ipoib_mcast_free(priv->broadcast);
-               priv->broadcast = NULL;
-       }
-}
-
 void ipoib_mcast_restart_task(void *dev_ptr)
 {
        struct net_device *dev = dev_ptr;
@@ -830,9 +819,11 @@ void ipoib_mcast_restart_task(void *dev_ptr)
 
        ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
-       ipoib_mcast_stop_thread(dev);
+       ipoib_mcast_stop_thread(dev, 0);
 
-       spin_lock_irqsave(&priv->lock, flags);
+       local_irq_save(flags);
+       netif_tx_lock(dev);
+       spin_lock(&priv->lock);
 
        /*
         * Unfortunately, the networking core only gives us a list of all of
@@ -904,10 +895,14 @@ void ipoib_mcast_restart_task(void *dev_ptr)
                        list_add_tail(&mcast->list, &remove_list);
                }
        }
-       spin_unlock_irqrestore(&priv->lock, flags);
+
+       spin_unlock(&priv->lock);
+       netif_tx_unlock(dev);
+       local_irq_restore(flags);
 
        /* We have to cancel outside of the spinlock */
        list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+               wait_for_mcast_join(priv, mcast);
                ipoib_mcast_leave(mcast->dev, mcast);
                ipoib_mcast_free(mcast);
        }
@@ -916,6 +911,8 @@ void ipoib_mcast_restart_task(void *dev_ptr)
                ipoib_mcast_start_thread(dev);
 }
 
+#ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
+
 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
 {
        struct ipoib_mcast_iter *iter;
@@ -925,21 +922,16 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
                return NULL;
 
        iter->dev = dev;
-       memset(iter->mgid.raw, 0, sizeof iter->mgid);
+       memset(iter->mgid.raw, 0, 16);
 
        if (ipoib_mcast_iter_next(iter)) {
-               ipoib_mcast_iter_free(iter);
+               kfree(iter);
                return NULL;
        }
 
        return iter;
 }
 
-void ipoib_mcast_iter_free(struct ipoib_mcast_iter *iter)
-{
-       kfree(iter);
-}
-
 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
 {
        struct ipoib_dev_priv *priv = netdev_priv(iter->dev);
@@ -988,3 +980,5 @@ void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
        *complete  = iter->complete;
        *send_only = iter->send_only;
 }
+
+#endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */
index 17212b4..34a7593 100644 (file)
@@ -91,7 +91,6 @@ struct pstore {
        struct dm_snapshot *snap;       /* up pointer to my snapshot */
        int version;
        int valid;
-       uint32_t chunk_size;
        uint32_t exceptions_per_area;
 
        /*
@@ -133,7 +132,7 @@ static int alloc_area(struct pstore *ps)
        int r = -ENOMEM;
        size_t len;
 
-       len = ps->chunk_size << SECTOR_SHIFT;
+       len = ps->snap->chunk_size << SECTOR_SHIFT;
 
        /*
         * Allocate the chunk_size block of memory that will hold
@@ -160,8 +159,8 @@ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
        unsigned long bits;
 
        where.bdev = ps->snap->cow->bdev;
-       where.sector = ps->chunk_size * chunk;
-       where.count = ps->chunk_size;
+       where.sector = ps->snap->chunk_size * chunk;
+       where.count = ps->snap->chunk_size;
 
        return dm_io_sync_vm(1, &where, rw, ps->area, &bits);
 }
@@ -188,7 +187,7 @@ static int area_io(struct pstore *ps, uint32_t area, int rw)
 
 static int zero_area(struct pstore *ps, uint32_t area)
 {
-       memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
+       memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
        return area_io(ps, area, WRITE);
 }
 
@@ -196,6 +195,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
 {
        int r;
        struct disk_header *dh;
+       chunk_t chunk_size;
 
        r = chunk_io(ps, 0, READ);
        if (r)
@@ -210,8 +210,29 @@ static int read_header(struct pstore *ps, int *new_snapshot)
                *new_snapshot = 0;
                ps->valid = le32_to_cpu(dh->valid);
                ps->version = le32_to_cpu(dh->version);
-               ps->chunk_size = le32_to_cpu(dh->chunk_size);
+               chunk_size = le32_to_cpu(dh->chunk_size);
+               if (ps->snap->chunk_size != chunk_size) {
+                       DMWARN("chunk size %llu in device metadata overrides "
+                              "table chunk size of %llu.",
+                              (unsigned long long)chunk_size,
+                              (unsigned long long)ps->snap->chunk_size);
+
+                       /* We had a bogus chunk_size. Fix stuff up. */
+                       dm_io_put(sectors_to_pages(ps->snap->chunk_size));
+                       free_area(ps);
+
+                       ps->snap->chunk_size = chunk_size;
+                       ps->snap->chunk_mask = chunk_size - 1;
+                       ps->snap->chunk_shift = ffs(chunk_size) - 1;
 
+                       r = alloc_area(ps);
+                       if (r)
+                               return r;
+
+                       r = dm_io_get(sectors_to_pages(chunk_size));
+                       if (r)
+                               return r;
+               }
        } else {
                DMWARN("Invalid/corrupt snapshot");
                r = -ENXIO;
@@ -224,13 +245,13 @@ static int write_header(struct pstore *ps)
 {
        struct disk_header *dh;
 
-       memset(ps->area, 0, ps->chunk_size << SECTOR_SHIFT);
+       memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT);
 
        dh = (struct disk_header *) ps->area;
        dh->magic = cpu_to_le32(SNAP_MAGIC);
        dh->valid = cpu_to_le32(ps->valid);
        dh->version = cpu_to_le32(ps->version);
-       dh->chunk_size = cpu_to_le32(ps->chunk_size);
+       dh->chunk_size = cpu_to_le32(ps->snap->chunk_size);
 
        return chunk_io(ps, 0, WRITE);
 }
@@ -365,7 +386,7 @@ static void persistent_destroy(struct exception_store *store)
 {
        struct pstore *ps = get_info(store);
 
-       dm_io_put(sectors_to_pages(ps->chunk_size));
+       dm_io_put(sectors_to_pages(ps->snap->chunk_size));
        vfree(ps->callbacks);
        free_area(ps);
        kfree(ps);
@@ -383,6 +404,16 @@ static int persistent_read_metadata(struct exception_store *store)
        if (r)
                return r;
 
+       /*
+        * Now we know correct chunk_size, complete the initialisation.
+        */
+       ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) /
+                                 sizeof(struct disk_exception);
+       ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
+                       sizeof(*ps->callbacks));
+       if (!ps->callbacks)
+               return -ENOMEM;
+
        /*
         * Do we need to setup a new snapshot ?
         */
@@ -533,9 +564,6 @@ int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
        ps->snap = store->snap;
        ps->valid = 1;
        ps->version = SNAPSHOT_DISK_VERSION;
-       ps->chunk_size = chunk_size;
-       ps->exceptions_per_area = (chunk_size << SECTOR_SHIFT) /
-           sizeof(struct disk_exception);
        ps->next_free = 2;      /* skipping the header and first area */
        ps->current_committed = 0;
 
@@ -543,18 +571,9 @@ int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
        if (r)
                goto bad;
 
-       /*
-        * Allocate space for all the callbacks.
-        */
        ps->callback_count = 0;
        atomic_set(&ps->pending_count, 0);
-       ps->callbacks = dm_vcalloc(ps->exceptions_per_area,
-                                  sizeof(*ps->callbacks));
-
-       if (!ps->callbacks) {
-               r = -ENOMEM;
-               goto bad;
-       }
+       ps->callbacks = NULL;
 
        store->destroy = persistent_destroy;
        store->read_metadata = persistent_read_metadata;
@@ -568,12 +587,9 @@ int dm_create_persistent(struct exception_store *store, uint32_t chunk_size)
 
       bad:
        dm_io_put(sectors_to_pages(chunk_size));
-       if (ps) {
-               if (ps->area)
-                       free_area(ps);
-
-               kfree(ps);
-       }
+       if (ps && ps->area)
+               free_area(ps);
+       kfree(ps);
        return r;
 }
 
index 8edd643..f7e7436 100644 (file)
@@ -102,8 +102,10 @@ static struct hash_cell *__get_name_cell(const char *str)
        unsigned int h = hash_str(str);
 
        list_for_each_entry (hc, _name_buckets + h, name_list)
-               if (!strcmp(hc->name, str))
+               if (!strcmp(hc->name, str)) {
+                       dm_get(hc->md);
                        return hc;
+               }
 
        return NULL;
 }
@@ -114,8 +116,10 @@ static struct hash_cell *__get_uuid_cell(const char *str)
        unsigned int h = hash_str(str);
 
        list_for_each_entry (hc, _uuid_buckets + h, uuid_list)
-               if (!strcmp(hc->uuid, str))
+               if (!strcmp(hc->uuid, str)) {
+                       dm_get(hc->md);
                        return hc;
+               }
 
        return NULL;
 }
@@ -191,7 +195,7 @@ static int unregister_with_devfs(struct hash_cell *hc)
  */
 static int dm_hash_insert(const char *name, const char *uuid, struct mapped_device *md)
 {
-       struct hash_cell *cell;
+       struct hash_cell *cell, *hc;
 
        /*
         * Allocate the new cells.
@@ -204,14 +208,19 @@ static int dm_hash_insert(const char *name, const char *uuid, struct mapped_devi
         * Insert the cell into both hash tables.
         */
        down_write(&_hash_lock);
-       if (__get_name_cell(name))
+       hc = __get_name_cell(name);
+       if (hc) {
+               dm_put(hc->md);
                goto bad;
+       }
 
        list_add(&cell->name_list, _name_buckets + hash_str(name));
 
        if (uuid) {
-               if (__get_uuid_cell(uuid)) {
+               hc = __get_uuid_cell(uuid);
+               if (hc) {
                        list_del(&cell->name_list);
+                       dm_put(hc->md);
                        goto bad;
                }
                list_add(&cell->uuid_list, _uuid_buckets + hash_str(uuid));
@@ -289,6 +298,7 @@ static int dm_hash_rename(const char *old, const char *new)
        if (hc) {
                DMWARN("asked to rename to an already existing name %s -> %s",
                       old, new);
+               dm_put(hc->md);
                up_write(&_hash_lock);
                kfree(new_name);
                return -EBUSY;
@@ -328,6 +338,7 @@ static int dm_hash_rename(const char *old, const char *new)
                dm_table_put(table);
        }
 
+       dm_put(hc->md);
        up_write(&_hash_lock);
        kfree(old_name);
        return 0;
@@ -611,10 +622,8 @@ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
                return __get_name_cell(param->name);
 
        md = dm_get_md(huge_decode_dev(param->dev));
-       if (md) {
+       if (md)
                mdptr = dm_get_mdptr(md);
-               dm_put(md);
-       }
 
        return mdptr;
 }
@@ -628,7 +637,6 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
        hc = __find_device_hash_cell(param);
        if (hc) {
                md = hc->md;
-               dm_get(md);
 
                /*
                 * Sneakily write in both the name and the uuid
@@ -653,6 +661,7 @@ static struct mapped_device *find_device(struct dm_ioctl *param)
 static int dev_remove(struct dm_ioctl *param, size_t param_size)
 {
        struct hash_cell *hc;
+       struct mapped_device *md;
 
        down_write(&_hash_lock);
        hc = __find_device_hash_cell(param);
@@ -663,8 +672,11 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size)
                return -ENXIO;
        }
 
+       md = hc->md;
+
        __hash_remove(hc);
        up_write(&_hash_lock);
+       dm_put(md);
        param->data_size = 0;
        return 0;
 }
@@ -790,7 +802,6 @@ static int do_resume(struct dm_ioctl *param)
        }
 
        md = hc->md;
-       dm_get(md);
 
        new_map = hc->new_map;
        hc->new_map = NULL;
@@ -1078,6 +1089,7 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
 {
        int r;
        struct hash_cell *hc;
+       struct mapped_device *md;
 
        down_write(&_hash_lock);
 
@@ -1096,7 +1108,9 @@ static int table_clear(struct dm_ioctl *param, size_t param_size)
        param->flags &= ~DM_INACTIVE_PRESENT_FLAG;
 
        r = __dev_status(hc->md, param);
+       md = hc->md;
        up_write(&_hash_lock);
+       dm_put(md);
        return r;
 }
 
index 0c1b852..5af5265 100644 (file)
@@ -63,6 +63,7 @@ struct multipath {
        unsigned nr_priority_groups;
        struct list_head priority_groups;
        unsigned pg_init_required;      /* pg_init needs calling? */
+       unsigned pg_init_in_progress;   /* Only one pg_init allowed at once */
 
        unsigned nr_valid_paths;        /* Total number of usable paths */
        struct pgpath *current_pgpath;
@@ -72,7 +73,7 @@ struct multipath {
 
        unsigned queue_io;              /* Must we queue all I/O? */
        unsigned queue_if_no_path;      /* Queue I/O if last path fails? */
-       unsigned suspended;             /* Has dm core suspended our I/O? */
+       unsigned saved_queue_if_no_path;/* Saved state during suspension */
 
        struct work_struct process_queued_ios;
        struct bio_list queued_ios;
@@ -178,8 +179,7 @@ static struct multipath *alloc_multipath(void)
                m->queue_io = 1;
                INIT_WORK(&m->process_queued_ios, process_queued_ios, m);
                INIT_WORK(&m->trigger_event, trigger_event, m);
-               m->mpio_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
-                                             mempool_free_slab, _mpio_cache);
+               m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache);
                if (!m->mpio_pool) {
                        kfree(m);
                        return NULL;
@@ -304,11 +304,12 @@ static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
                m->queue_size--;
 
        if ((pgpath && m->queue_io) ||
-           (!pgpath && m->queue_if_no_path && !m->suspended)) {
+           (!pgpath && m->queue_if_no_path)) {
                /* Queue for the daemon to resubmit */
                bio_list_add(&m->queued_ios, bio);
                m->queue_size++;
-               if (m->pg_init_required || !m->queue_io)
+               if ((m->pg_init_required && !m->pg_init_in_progress) ||
+                   !m->queue_io)
                        queue_work(kmultipathd, &m->process_queued_ios);
                pgpath = NULL;
                r = 0;
@@ -327,14 +328,19 @@ static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio,
 /*
  * If we run out of usable paths, should we queue I/O or error it?
  */
-static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path)
+static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path,
+                           unsigned save_old_value)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&m->lock, flags);
 
+       if (save_old_value)
+               m->saved_queue_if_no_path = m->queue_if_no_path;
+       else
+               m->saved_queue_if_no_path = queue_if_no_path;
        m->queue_if_no_path = queue_if_no_path;
-       if (!m->queue_if_no_path)
+       if (!m->queue_if_no_path && m->queue_size)
                queue_work(kmultipathd, &m->process_queued_ios);
 
        spin_unlock_irqrestore(&m->lock, flags);
@@ -379,25 +385,31 @@ static void process_queued_ios(void *data)
 {
        struct multipath *m = (struct multipath *) data;
        struct hw_handler *hwh = &m->hw_handler;
-       struct pgpath *pgpath;
-       unsigned init_required, must_queue = 0;
+       struct pgpath *pgpath = NULL;
+       unsigned init_required = 0, must_queue = 1;
        unsigned long flags;
 
        spin_lock_irqsave(&m->lock, flags);
 
+       if (!m->queue_size)
+               goto out;
+
        if (!m->current_pgpath)
                __choose_pgpath(m);
 
        pgpath = m->current_pgpath;
 
-       if ((pgpath && m->queue_io) ||
-           (!pgpath && m->queue_if_no_path && !m->suspended))
-               must_queue = 1;
+       if ((pgpath && !m->queue_io) ||
+           (!pgpath && !m->queue_if_no_path))
+               must_queue = 0;
 
-       init_required = m->pg_init_required;
-       if (init_required)
+       if (m->pg_init_required && !m->pg_init_in_progress) {
                m->pg_init_required = 0;
+               m->pg_init_in_progress = 1;
+               init_required = 1;
+       }
 
+out:
        spin_unlock_irqrestore(&m->lock, flags);
 
        if (init_required)
@@ -668,7 +680,7 @@ static int parse_features(struct arg_set *as, struct multipath *m,
                return 0;
 
        if (!strnicmp(shift(as), MESG_STR("queue_if_no_path")))
-               return queue_if_no_path(m, 1);
+               return queue_if_no_path(m, 1, 0);
        else {
                ti->error = "Unrecognised multipath feature request";
                return -EINVAL;
@@ -699,6 +711,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
                return -EINVAL;
        }
 
+       m->ti = ti;
+
        r = parse_features(&as, m, ti);
        if (r)
                goto bad;
@@ -740,7 +754,6 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
        }
 
        ti->private = m;
-       m->ti = ti;
 
        return 0;
 
@@ -752,6 +765,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc,
 static void multipath_dtr(struct dm_target *ti)
 {
        struct multipath *m = (struct multipath *) ti->private;
+
+       flush_workqueue(kmultipathd);
        free_multipath(m);
 }
 
@@ -765,6 +780,9 @@ static int multipath_map(struct dm_target *ti, struct bio *bio,
        struct mpath_io *mpio;
        struct multipath *m = (struct multipath *) ti->private;
 
+       if (bio_barrier(bio))
+               return -EOPNOTSUPP;
+
        mpio = mempool_alloc(m->mpio_pool, GFP_NOIO);
        dm_bio_record(&mpio->details, bio);
 
@@ -837,7 +855,7 @@ static int reinstate_path(struct pgpath *pgpath)
        pgpath->path.is_active = 1;
 
        m->current_pgpath = NULL;
-       if (!m->nr_valid_paths++)
+       if (!m->nr_valid_paths++ && m->queue_size)
                queue_work(kmultipathd, &m->process_queued_ios);
 
        queue_work(kmultipathd, &m->trigger_event);
@@ -963,12 +981,13 @@ void dm_pg_init_complete(struct path *path, unsigned err_flags)
                bypass_pg(m, pg, 1);
 
        spin_lock_irqsave(&m->lock, flags);
-       if (!err_flags)
-               m->queue_io = 0;
-       else {
+       if (err_flags) {
                m->current_pgpath = NULL;
                m->current_pg = NULL;
-       }
+       } else if (!m->pg_init_required)
+               m->queue_io = 0;
+
+       m->pg_init_in_progress = 0;
        queue_work(kmultipathd, &m->process_queued_ios);
        spin_unlock_irqrestore(&m->lock, flags);
 }
@@ -981,6 +1000,7 @@ static int do_end_io(struct multipath *m, struct bio *bio,
 {
        struct hw_handler *hwh = &m->hw_handler;
        unsigned err_flags = MP_FAIL_PATH;      /* Default behavior */
+       unsigned long flags;
 
        if (!error)
                return 0;       /* I/O complete */
@@ -988,17 +1008,20 @@ static int do_end_io(struct multipath *m, struct bio *bio,
        if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio))
                return error;
 
-       spin_lock(&m->lock);
+       if (error == -EOPNOTSUPP)
+               return error;
+
+       spin_lock_irqsave(&m->lock, flags);
        if (!m->nr_valid_paths) {
-               if (!m->queue_if_no_path || m->suspended) {
-                       spin_unlock(&m->lock);
+               if (!m->queue_if_no_path) {
+                       spin_unlock_irqrestore(&m->lock, flags);
                        return -EIO;
                } else {
-                       spin_unlock(&m->lock);
+                       spin_unlock_irqrestore(&m->lock, flags);
                        goto requeue;
                }
        }
-       spin_unlock(&m->lock);
+       spin_unlock_irqrestore(&m->lock, flags);
 
        if (hwh->type && hwh->type->error)
                err_flags = hwh->type->error(hwh, bio);
@@ -1018,12 +1041,12 @@ static int do_end_io(struct multipath *m, struct bio *bio,
        dm_bio_restore(&mpio->details, bio);
 
        /* queue for the daemon to resubmit or fail */
-       spin_lock(&m->lock);
+       spin_lock_irqsave(&m->lock, flags);
        bio_list_add(&m->queued_ios, bio);
        m->queue_size++;
        if (!m->queue_io)
                queue_work(kmultipathd, &m->process_queued_ios);
-       spin_unlock(&m->lock);
+       spin_unlock_irqrestore(&m->lock, flags);
 
        return 1;       /* io not complete */
 }
@@ -1051,27 +1074,27 @@ static int multipath_end_io(struct dm_target *ti, struct bio *bio,
 
 /*
  * Suspend can't complete until all the I/O is processed so if
- * the last path failed we will now error any queued I/O.
+ * the last path fails we must error any remaining I/O.
+ * Note that if the freeze_bdev fails while suspending, the
+ * queue_if_no_path state is lost - userspace should reset it.
  */
 static void multipath_presuspend(struct dm_target *ti)
 {
        struct multipath *m = (struct multipath *) ti->private;
-       unsigned long flags;
 
-       spin_lock_irqsave(&m->lock, flags);
-       m->suspended = 1;
-       if (m->queue_if_no_path)
-               queue_work(kmultipathd, &m->process_queued_ios);
-       spin_unlock_irqrestore(&m->lock, flags);
+       queue_if_no_path(m, 0, 1);
 }
 
+/*
+ * Restore the queue_if_no_path setting.
+ */
 static void multipath_resume(struct dm_target *ti)
 {
        struct multipath *m = (struct multipath *) ti->private;
        unsigned long flags;
 
        spin_lock_irqsave(&m->lock, flags);
-       m->suspended = 0;
+       m->queue_if_no_path = m->saved_queue_if_no_path;
        spin_unlock_irqrestore(&m->lock, flags);
 }
 
@@ -1204,9 +1227,9 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv)
 
        if (argc == 1) {
                if (!strnicmp(argv[0], MESG_STR("queue_if_no_path")))
-                       return queue_if_no_path(m, 1);
+                       return queue_if_no_path(m, 1, 0);
                else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path")))
-                       return queue_if_no_path(m, 0);
+                       return queue_if_no_path(m, 0, 0);
        }
 
        if (argc != 2)
index 6e3cf7e..2cab46e 100644 (file)
@@ -106,12 +106,42 @@ struct region {
        struct bio_list delayed_bios;
 };
 
+
+/*-----------------------------------------------------------------
+ * Mirror set structures.
+ *---------------------------------------------------------------*/
+struct mirror {
+       atomic_t error_count;
+       struct dm_dev *dev;
+       sector_t offset;
+};
+
+struct mirror_set {
+       struct dm_target *ti;
+       struct list_head list;
+       struct region_hash rh;
+       struct kcopyd_client *kcopyd_client;
+
+       spinlock_t lock;        /* protects the next two lists */
+       struct bio_list reads;
+       struct bio_list writes;
+
+       /* recovery */
+       region_t nr_regions;
+       int in_sync;
+
+       struct mirror *default_mirror;  /* Default mirror */
+
+       unsigned int nr_mirrors;
+       struct mirror mirror[0];
+};
+
 /*
  * Conversion fns
  */
 static inline region_t bio_to_region(struct region_hash *rh, struct bio *bio)
 {
-       return bio->bi_sector >> rh->region_shift;
+       return (bio->bi_sector - rh->ms->ti->begin) >> rh->region_shift;
 }
 
 static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
@@ -122,16 +152,6 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
 /* FIXME move this */
 static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
 
-static void *region_alloc(unsigned int __nocast gfp_mask, void *pool_data)
-{
-       return kmalloc(sizeof(struct region), gfp_mask);
-}
-
-static void region_free(void *element, void *pool_data)
-{
-       kfree(element);
-}
-
 #define MIN_REGIONS 64
 #define MAX_RECOVERY 1
 static int rh_init(struct region_hash *rh, struct mirror_set *ms,
@@ -173,8 +193,8 @@ static int rh_init(struct region_hash *rh, struct mirror_set *ms,
        INIT_LIST_HEAD(&rh->quiesced_regions);
        INIT_LIST_HEAD(&rh->recovered_regions);
 
-       rh->region_pool = mempool_create(MIN_REGIONS, region_alloc,
-                                        region_free, NULL);
+       rh->region_pool = mempool_create_kmalloc_pool(MIN_REGIONS,
+                                                     sizeof(struct region));
        if (!rh->region_pool) {
                vfree(rh->buckets);
                rh->buckets = NULL;
@@ -233,7 +253,9 @@ static struct region *__rh_alloc(struct region_hash *rh, region_t region)
        struct region *reg, *nreg;
 
        read_unlock(&rh->hash_lock);
-       nreg = mempool_alloc(rh->region_pool, GFP_NOIO);
+       nreg = mempool_alloc(rh->region_pool, GFP_ATOMIC);
+       if (unlikely(!nreg))
+               nreg = kmalloc(sizeof(struct region), GFP_NOIO);
        nreg->state = rh->log->type->in_sync(rh->log, region, 1) ?
                RH_CLEAN : RH_NOSYNC;
        nreg->rh = rh;
@@ -375,16 +397,20 @@ static void rh_inc(struct region_hash *rh, region_t region)
 
        read_lock(&rh->hash_lock);
        reg = __rh_find(rh, region);
-       if (reg->state == RH_CLEAN) {
-               rh->log->type->mark_region(rh->log, reg->key);
 
-               spin_lock_irq(&rh->region_lock);
+       spin_lock_irq(&rh->region_lock);
+       atomic_inc(&reg->pending);
+
+       if (reg->state == RH_CLEAN) {
                reg->state = RH_DIRTY;
                list_del_init(&reg->list);      /* take off the clean list */
                spin_unlock_irq(&rh->region_lock);
-       }
 
-       atomic_inc(&reg->pending);
+               rh->log->type->mark_region(rh->log, reg->key);
+       } else
+               spin_unlock_irq(&rh->region_lock);
+
+
        read_unlock(&rh->hash_lock);
 }
 
@@ -406,17 +432,29 @@ static void rh_dec(struct region_hash *rh, region_t region)
        reg = __rh_lookup(rh, region);
        read_unlock(&rh->hash_lock);
 
+       spin_lock_irqsave(&rh->region_lock, flags);
        if (atomic_dec_and_test(&reg->pending)) {
-               spin_lock_irqsave(&rh->region_lock, flags);
+               /*
+                * There is no pending I/O for this region.
+                * We can move the region to corresponding list for next action.
+                * At this point, the region is not yet connected to any list.
+                *
+                * If the state is RH_NOSYNC, the region should be kept off
+                * from clean list.
+                * The hash entry for RH_NOSYNC will remain in memory
+                * until the region is recovered or the map is reloaded.
+                */
+
+               /* do nothing for RH_NOSYNC */
                if (reg->state == RH_RECOVERING) {
                        list_add_tail(&reg->list, &rh->quiesced_regions);
-               } else {
+               } else if (reg->state == RH_DIRTY) {
                        reg->state = RH_CLEAN;
                        list_add(&reg->list, &rh->clean_regions);
                }
-               spin_unlock_irqrestore(&rh->region_lock, flags);
                should_wake = 1;
        }
+       spin_unlock_irqrestore(&rh->region_lock, flags);
 
        if (should_wake)
                wake();
@@ -535,33 +573,6 @@ static void rh_start_recovery(struct region_hash *rh)
        wake();
 }
 
-/*-----------------------------------------------------------------
- * Mirror set structures.
- *---------------------------------------------------------------*/
-struct mirror {
-       atomic_t error_count;
-       struct dm_dev *dev;
-       sector_t offset;
-};
-
-struct mirror_set {
-       struct dm_target *ti;
-       struct list_head list;
-       struct region_hash rh;
-       struct kcopyd_client *kcopyd_client;
-
-       spinlock_t lock;        /* protects the next two lists */
-       struct bio_list reads;
-       struct bio_list writes;
-
-       /* recovery */
-       region_t nr_regions;
-       int in_sync;
-
-       unsigned int nr_mirrors;
-       struct mirror mirror[0];
-};
-
 /*
  * Every mirror should look like this one.
  */
@@ -607,7 +618,7 @@ static int recover(struct mirror_set *ms, struct region *reg)
        unsigned long flags = 0;
 
        /* fill in the source */
-       m = ms->mirror + DEFAULT_MIRROR;
+       m = ms->default_mirror;
        from.bdev = m->dev->bdev;
        from.sector = m->offset + region_to_sector(reg->rh, reg->key);
        if (reg->key == (ms->nr_regions - 1)) {
@@ -623,7 +634,7 @@ static int recover(struct mirror_set *ms, struct region *reg)
 
        /* fill in the destinations */
        for (i = 0, dest = to; i < ms->nr_mirrors; i++) {
-               if (i == DEFAULT_MIRROR)
+               if (&ms->mirror[i] == ms->default_mirror)
                        continue;
 
                m = ms->mirror + i;
@@ -678,7 +689,7 @@ static void do_recovery(struct mirror_set *ms)
 static struct mirror *choose_mirror(struct mirror_set *ms, sector_t sector)
 {
        /* FIXME: add read balancing */
-       return ms->mirror + DEFAULT_MIRROR;
+       return ms->default_mirror;
 }
 
 /*
@@ -705,7 +716,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads)
                if (rh_in_sync(&ms->rh, region, 0))
                        m = choose_mirror(ms, bio->bi_sector);
                else
-                       m = ms->mirror + DEFAULT_MIRROR;
+                       m = ms->default_mirror;
 
                map_bio(ms, m, bio);
                generic_make_request(bio);
@@ -829,7 +840,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
                rh_delay(&ms->rh, bio);
 
        while ((bio = bio_list_pop(&nosync))) {
-               map_bio(ms, ms->mirror + DEFAULT_MIRROR, bio);
+               map_bio(ms, ms->default_mirror, bio);
                generic_make_request(bio);
        }
 }
@@ -896,6 +907,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
        ms->nr_mirrors = nr_mirrors;
        ms->nr_regions = dm_sector_div_up(ti->len, region_size);
        ms->in_sync = 0;
+       ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
 
        if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
                ti->error = "dm-mirror: Error creating dirty region hash";
@@ -925,9 +937,9 @@ static inline int _check_region_size(struct dm_target *ti, uint32_t size)
 static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
                      unsigned int mirror, char **argv)
 {
-       sector_t offset;
+       unsigned long long offset;
 
-       if (sscanf(argv[1], SECTOR_FORMAT, &offset) != 1) {
+       if (sscanf(argv[1], "%llu", &offset) != 1) {
                ti->error = "dm-mirror: Invalid offset";
                return -EINVAL;
        }
@@ -1060,6 +1072,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        ti->private = ms;
+       ti->split_io = ms->rh.region_size;
 
        r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
        if (r) {
@@ -1105,7 +1118,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio,
        struct mirror *m;
        struct mirror_set *ms = ti->private;
 
-       map_context->ll = bio->bi_sector >> ms->rh.region_shift;
+       map_context->ll = bio_to_region(&ms->rh, bio);
 
        if (rw == WRITE) {
                queue_bio(ms, bio, rw);
@@ -1193,16 +1206,17 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
                for (m = 0; m < ms->nr_mirrors; m++)
                        DMEMIT("%s ", ms->mirror[m].dev->name);
 
-               DMEMIT(SECTOR_FORMAT "/" SECTOR_FORMAT,
-                      ms->rh.log->type->get_sync_count(ms->rh.log),
-                      ms->nr_regions);
+               DMEMIT("%llu/%llu",
+                       (unsigned long long)ms->rh.log->type->
+                               get_sync_count(ms->rh.log),
+                       (unsigned long long)ms->nr_regions);
                break;
 
        case STATUSTYPE_TABLE:
                DMEMIT("%d ", ms->nr_mirrors);
                for (m = 0; m < ms->nr_mirrors; m++)
-                       DMEMIT("%s " SECTOR_FORMAT " ",
-                              ms->mirror[m].dev->name, ms->mirror[m].offset);
+                       DMEMIT("%s %llu ", ms->mirror[m].dev->name,
+                               (unsigned long long)ms->mirror[m].offset);
        }
 
        return 0;
@@ -1229,7 +1243,7 @@ static int __init dm_mirror_init(void)
        if (r)
                return r;
 
-       _kmirrord_wq = create_workqueue("kmirrord");
+       _kmirrord_wq = create_singlethread_workqueue("kmirrord");
        if (!_kmirrord_wq) {
                DMERR("couldn't start kmirrord");
                dm_dirty_log_exit();
index 7e691ab..b84bc1a 100644 (file)
@@ -49,11 +49,26 @@ struct pending_exception {
        struct bio_list snapshot_bios;
 
        /*
-        * Other pending_exceptions that are processing this
-        * chunk.  When this list is empty, we know we can
-        * complete the origins.
+        * Short-term queue of pending exceptions prior to submission.
         */
-       struct list_head siblings;
+       struct list_head list;
+
+       /*
+        * The primary pending_exception is the one that holds
+        * the sibling_count and the list of origin_bios for a
+        * group of pending_exceptions.  It is always last to get freed.
+        * These fields get set up when writing to the origin.
+        */
+       struct pending_exception *primary_pe;
+
+       /*
+        * Number of pending_exceptions processing this chunk.
+        * When this drops to zero we must complete the origin bios.
+        * If incrementing or decrementing this, hold pe->snap->lock for
+        * the sibling concerned and not pe->primary_pe->snap->lock unless
+        * they are the same.
+        */
+       atomic_t sibling_count;
 
        /* Pointer back to snapshot context */
        struct dm_snapshot *snap;
@@ -371,6 +386,17 @@ static inline ulong round_up(ulong n, ulong size)
        return (n + size) & ~size;
 }
 
+static void read_snapshot_metadata(struct dm_snapshot *s)
+{
+       if (s->store.read_metadata(&s->store)) {
+               down_write(&s->lock);
+               s->valid = 0;
+               up_write(&s->lock);
+
+               dm_table_event(s->table);
+       }
+}
+
 /*
  * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
  */
@@ -457,7 +483,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        s->chunk_shift = ffs(chunk_size) - 1;
 
        s->valid = 1;
-       s->have_metadata = 0;
+       s->active = 0;
        s->last_percent = 0;
        init_rwsem(&s->lock);
        s->table = ti->table;
@@ -492,7 +518,11 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad5;
        }
 
+       /* Metadata must only be loaded into one table at once */
+       read_snapshot_metadata(s);
+
        /* Add snapshot to the list of snapshots for this origin */
+       /* Exceptions aren't triggered till snapshot_resume() is called */
        if (register_snapshot(s)) {
                r = -EINVAL;
                ti->error = "Cannot register snapshot origin";
@@ -500,7 +530,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        ti->private = s;
-       ti->split_io = chunk_size;
+       ti->split_io = s->chunk_size;
 
        return 0;
 
@@ -529,8 +559,12 @@ static void snapshot_dtr(struct dm_target *ti)
 {
        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
+       /* Prevent further origin writes from using this snapshot. */
+       /* After this returns there can be no new kcopyd jobs. */
        unregister_snapshot(s);
 
+       kcopyd_client_destroy(s->kcopyd_client);
+
        exit_exception_table(&s->pending, pending_cache);
        exit_exception_table(&s->complete, exception_cache);
 
@@ -539,7 +573,7 @@ static void snapshot_dtr(struct dm_target *ti)
 
        dm_put_device(ti, s->origin);
        dm_put_device(ti, s->cow);
-       kcopyd_client_destroy(s->kcopyd_client);
+
        kfree(s);
 }
 
@@ -573,78 +607,117 @@ static void error_bios(struct bio *bio)
        }
 }
 
+static inline void error_snapshot_bios(struct pending_exception *pe)
+{
+       error_bios(bio_list_get(&pe->snapshot_bios));
+}
+
 static struct bio *__flush_bios(struct pending_exception *pe)
 {
-       struct pending_exception *sibling;
+       /*
+        * If this pe is involved in a write to the origin and
+        * it is the last sibling to complete then release
+        * the bios for the original write to the origin.
+        */
 
-       if (list_empty(&pe->siblings))
-               return bio_list_get(&pe->origin_bios);
+       if (pe->primary_pe &&
+           atomic_dec_and_test(&pe->primary_pe->sibling_count))
+               return bio_list_get(&pe->primary_pe->origin_bios);
 
-       sibling = list_entry(pe->siblings.next,
-                            struct pending_exception, siblings);
+       return NULL;
+}
 
-       list_del(&pe->siblings);
+static void __invalidate_snapshot(struct dm_snapshot *s,
+                               struct pending_exception *pe, int err)
+{
+       if (!s->valid)
+               return;
 
-       /* This is fine as long as kcopyd is single-threaded. If kcopyd
-        * becomes multi-threaded, we'll need some locking here.
-        */
-       bio_list_merge(&sibling->origin_bios, &pe->origin_bios);
+       if (err == -EIO)
+               DMERR("Invalidating snapshot: Error reading/writing.");
+       else if (err == -ENOMEM)
+               DMERR("Invalidating snapshot: Unable to allocate exception.");
 
-       return NULL;
+       if (pe)
+               remove_exception(&pe->e);
+
+       if (s->store.drop_snapshot)
+               s->store.drop_snapshot(&s->store);
+
+       s->valid = 0;
+
+       dm_table_event(s->table);
 }
 
 static void pending_complete(struct pending_exception *pe, int success)
 {
        struct exception *e;
+       struct pending_exception *primary_pe;
        struct dm_snapshot *s = pe->snap;
        struct bio *flush = NULL;
 
-       if (success) {
-               e = alloc_exception();
-               if (!e) {
-                       DMWARN("Unable to allocate exception.");
-                       down_write(&s->lock);
-                       s->store.drop_snapshot(&s->store);
-                       s->valid = 0;
-                       flush = __flush_bios(pe);
-                       up_write(&s->lock);
-
-                       error_bios(bio_list_get(&pe->snapshot_bios));
-                       goto out;
-               }
-               *e = pe->e;
-
-               /*
-                * Add a proper exception, and remove the
-                * in-flight exception from the list.
-                */
+       if (!success) {
+               /* Read/write error - snapshot is unusable */
                down_write(&s->lock);
-               insert_exception(&s->complete, e);
-               remove_exception(&pe->e);
+               __invalidate_snapshot(s, pe, -EIO);
                flush = __flush_bios(pe);
-
-               /* Submit any pending write bios */
                up_write(&s->lock);
 
-               flush_bios(bio_list_get(&pe->snapshot_bios));
-       } else {
-               /* Read/write error - snapshot is unusable */
+               error_snapshot_bios(pe);
+               goto out;
+       }
+
+       e = alloc_exception();
+       if (!e) {
                down_write(&s->lock);
-               if (s->valid)
-                       DMERR("Error reading/writing snapshot");
-               s->store.drop_snapshot(&s->store);
-               s->valid = 0;
-               remove_exception(&pe->e);
+               __invalidate_snapshot(s, pe, -ENOMEM);
                flush = __flush_bios(pe);
                up_write(&s->lock);
 
-               error_bios(bio_list_get(&pe->snapshot_bios));
+               error_snapshot_bios(pe);
+               goto out;
+       }
+       *e = pe->e;
 
-               dm_table_event(s->table);
+       /*
+        * Add a proper exception, and remove the
+        * in-flight exception from the list.
+        */
+       down_write(&s->lock);
+       if (!s->valid) {
+               flush = __flush_bios(pe);
+               up_write(&s->lock);
+
+               free_exception(e);
+
+               error_snapshot_bios(pe);
+               goto out;
        }
 
+       insert_exception(&s->complete, e);
+       remove_exception(&pe->e);
+       flush = __flush_bios(pe);
+
+       up_write(&s->lock);
+
+       /* Submit any pending write bios */
+       flush_bios(bio_list_get(&pe->snapshot_bios));
+
  out:
-       free_pending_exception(pe);
+       primary_pe = pe->primary_pe;
+
+       /*
+        * Free the pe if it's not linked to an origin write or if
+        * it's not itself a primary pe.
+        */
+       if (!primary_pe || primary_pe != pe)
+               free_pending_exception(pe);
+
+       /*
+        * Free the primary pe if nothing references it.
+        */
+       if (primary_pe && !atomic_read(&primary_pe->sibling_count))
+               free_pending_exception(primary_pe);
 
        if (flush)
                flush_bios(flush);
@@ -677,7 +750,7 @@ static void copy_callback(int read_err, unsigned int write_err, void *context)
 /*
  * Dispatches the copy operation to kcopyd.
  */
-static inline void start_copy(struct pending_exception *pe)
+static void start_copy(struct pending_exception *pe)
 {
        struct dm_snapshot *s = pe->snap;
        struct io_region src, dest;
@@ -721,38 +794,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
        if (e) {
                /* cast the exception to a pending exception */
                pe = container_of(e, struct pending_exception, e);
+               goto out;
+       }
 
-       } else {
-               /*
-                * Create a new pending exception, we don't want
-                * to hold the lock while we do this.
-                */
-               up_write(&s->lock);
-               pe = alloc_pending_exception();
-               down_write(&s->lock);
+       /*
+        * Create a new pending exception, we don't want
+        * to hold the lock while we do this.
+        */
+       up_write(&s->lock);
+       pe = alloc_pending_exception();
+       down_write(&s->lock);
 
-               e = lookup_exception(&s->pending, chunk);
-               if (e) {
-                       free_pending_exception(pe);
-                       pe = container_of(e, struct pending_exception, e);
-               } else {
-                       pe->e.old_chunk = chunk;
-                       bio_list_init(&pe->origin_bios);
-                       bio_list_init(&pe->snapshot_bios);
-                       INIT_LIST_HEAD(&pe->siblings);
-                       pe->snap = s;
-                       pe->started = 0;
-
-                       if (s->store.prepare_exception(&s->store, &pe->e)) {
-                               free_pending_exception(pe);
-                               s->valid = 0;
-                               return NULL;
-                       }
+       if (!s->valid) {
+               free_pending_exception(pe);
+               return NULL;
+       }
 
-                       insert_exception(&s->pending, &pe->e);
-               }
+       e = lookup_exception(&s->pending, chunk);
+       if (e) {
+               free_pending_exception(pe);
+               pe = container_of(e, struct pending_exception, e);
+               goto out;
+       }
+
+       pe->e.old_chunk = chunk;
+       bio_list_init(&pe->origin_bios);
+       bio_list_init(&pe->snapshot_bios);
+       pe->primary_pe = NULL;
+       atomic_set(&pe->sibling_count, 1);
+       pe->snap = s;
+       pe->started = 0;
+
+       if (s->store.prepare_exception(&s->store, &pe->e)) {
+               free_pending_exception(pe);
+               return NULL;
        }
 
+       insert_exception(&s->pending, &pe->e);
+
+ out:
        return pe;
 }
 
@@ -769,15 +849,20 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
 {
        struct exception *e;
        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+       int copy_needed = 0;
        int r = 1;
        chunk_t chunk;
-       struct pending_exception *pe;
+       struct pending_exception *pe = NULL;
 
        chunk = sector_to_chunk(s, bio->bi_sector);
 
        /* Full snapshots are not usable */
+       /* To get here the table must be live so s->active is always set. */
        if (!s->valid)
-               return -1;
+               return -EIO;
+
+       if (unlikely(bio_barrier(bio)))
+               return -EOPNOTSUPP;
 
        /*
         * Write to snapshot - higher level takes care of RW/RO
@@ -790,36 +875,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
                 * to copy an exception */
                down_write(&s->lock);
 
+               if (!s->valid) {
+                       r = -EIO;
+                       goto out_unlock;
+               }
+
                /* If the block is already remapped - use that, else remap it */
                e = lookup_exception(&s->complete, chunk);
                if (e) {
                        remap_exception(s, e, bio);
-                       up_write(&s->lock);
-
-               } else {
-                       pe = __find_pending_exception(s, bio);
-
-                       if (!pe) {
-                               if (s->store.drop_snapshot)
-                                       s->store.drop_snapshot(&s->store);
-                               s->valid = 0;
-                               r = -EIO;
-                               up_write(&s->lock);
-                       } else {
-                               remap_exception(s, &pe->e, bio);
-                               bio_list_add(&pe->snapshot_bios, bio);
-
-                               if (!pe->started) {
-                                       /* this is protected by snap->lock */
-                                       pe->started = 1;
-                                       up_write(&s->lock);
-                                       start_copy(pe);
-                               } else
-                                       up_write(&s->lock);
-                               r = 0;
-                       }
+                       goto out_unlock;
+               }
+
+               pe = __find_pending_exception(s, bio);
+               if (!pe) {
+                       __invalidate_snapshot(s, pe, -ENOMEM);
+                       r = -EIO;
+                       goto out_unlock;
+               }
+
+               remap_exception(s, &pe->e, bio);
+               bio_list_add(&pe->snapshot_bios, bio);
+
+               if (!pe->started) {
+                       /* this is protected by snap->lock */
+                       pe->started = 1;
+                       copy_needed = 1;
                }
 
+               r = 0;
+
+ out_unlock:
+               up_write(&s->lock);
+
+               if (copy_needed)
+                       start_copy(pe);
        } else {
                /*
                 * FIXME: this read path scares me because we
@@ -831,6 +921,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
                /* Do reads */
                down_read(&s->lock);
 
+               if (!s->valid) {
+                       up_read(&s->lock);
+                       return -EIO;
+               }
+
                /* See if it it has been remapped */
                e = lookup_exception(&s->complete, chunk);
                if (e)
@@ -848,16 +943,9 @@ static void snapshot_resume(struct dm_target *ti)
 {
        struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
 
-       if (s->have_metadata)
-               return;
-
-       if (s->store.read_metadata(&s->store)) {
-               down_write(&s->lock);
-               s->valid = 0;
-               up_write(&s->lock);
-       }
-
-       s->have_metadata = 1;
+       down_write(&s->lock);
+       s->active = 1;
+       up_write(&s->lock);
 }
 
 static int snapshot_status(struct dm_target *ti, status_type_t type,
@@ -875,9 +963,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
                                snap->store.fraction_full(&snap->store,
                                                          &numerator,
                                                          &denominator);
-                               snprintf(result, maxlen,
-                                        SECTOR_FORMAT "/" SECTOR_FORMAT,
-                                        numerator, denominator);
+                               snprintf(result, maxlen, "%llu/%llu",
+                                       (unsigned long long)numerator,
+                                       (unsigned long long)denominator);
                        }
                        else
                                snprintf(result, maxlen, "Unknown");
@@ -890,9 +978,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
                 * to make private copies if the output is to
                 * make sense.
                 */
-               snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT,
+               snprintf(result, maxlen, "%s %s %c %llu",
                         snap->origin->name, snap->cow->name,
-                        snap->type, snap->chunk_size);
+                        snap->type,
+                        (unsigned long long)snap->chunk_size);
                break;
        }
 
@@ -902,37 +991,28 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
 /*-----------------------------------------------------------------
  * Origin methods
  *---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
-       struct list_head *l1_n, *l2_p;
-
-       l1_n = l1->next;
-       l2_p = l2->prev;
-
-       l1->next = l2;
-       l2->prev = l1;
-
-       l2_p->next = l1_n;
-       l1_n->prev = l2_p;
-}
-
 static int __origin_write(struct list_head *snapshots, struct bio *bio)
 {
-       int r = 1, first = 1;
+       int r = 1, first = 0;
        struct dm_snapshot *snap;
        struct exception *e;
-       struct pending_exception *pe, *last = NULL;
+       struct pending_exception *pe, *next_pe, *primary_pe = NULL;
        chunk_t chunk;
+       LIST_HEAD(pe_queue);
 
        /* Do all the snapshots on this origin */
        list_for_each_entry (snap, snapshots, list) {
 
-               /* Only deal with valid snapshots */
-               if (!snap->valid)
-                       continue;
-
                down_write(&snap->lock);
 
+               /* Only deal with valid and active snapshots */
+               if (!snap->valid || !snap->active)
+                       goto next_snapshot;
+
+               /* Nothing to do if writing beyond end of snapshot */
+               if (bio->bi_sector >= dm_table_get_size(snap->table))
+                       goto next_snapshot;
+
                /*
                 * Remember, different snapshots can have
                 * different chunk sizes.
@@ -943,49 +1023,75 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
                 * Check exception table to see if block
                 * is already remapped in this snapshot
                 * and trigger an exception if not.
+                *
+                * sibling_count is initialised to 1 so pending_complete()
+                * won't destroy the primary_pe while we're inside this loop.
                 */
                e = lookup_exception(&snap->complete, chunk);
-               if (!e) {
-                       pe = __find_pending_exception(snap, bio);
-                       if (!pe) {
-                               snap->store.drop_snapshot(&snap->store);
-                               snap->valid = 0;
-
-                       } else {
-                               if (last)
-                                       list_merge(&pe->siblings,
-                                                  &last->siblings);
-
-                               last = pe;
-                               r = 0;
+               if (e)
+                       goto next_snapshot;
+
+               pe = __find_pending_exception(snap, bio);
+               if (!pe) {
+                       __invalidate_snapshot(snap, pe, ENOMEM);
+                       goto next_snapshot;
+               }
+
+               if (!primary_pe) {
+                       /*
+                        * Either every pe here has same
+                        * primary_pe or none has one yet.
+                        */
+                       if (pe->primary_pe)
+                               primary_pe = pe->primary_pe;
+                       else {
+                               primary_pe = pe;
+                               first = 1;
                        }
+
+                       bio_list_add(&primary_pe->origin_bios, bio);
+
+                       r = 0;
                }
 
+               if (!pe->primary_pe) {
+                       atomic_inc(&primary_pe->sibling_count);
+                       pe->primary_pe = primary_pe;
+               }
+
+               if (!pe->started) {
+                       pe->started = 1;
+                       list_add_tail(&pe->list, &pe_queue);
+               }
+
+ next_snapshot:
                up_write(&snap->lock);
        }
 
+       if (!primary_pe)
+               goto out;
+
        /*
-        * Now that we have a complete pe list we can start the copying.
+        * If this is the first time we're processing this chunk and
+        * sibling_count is now 1 it means all the pending exceptions
+        * got completed while we were in the loop above, so it falls to
+        * us here to remove the primary_pe and submit any origin_bios.
         */
-       if (last) {
-               pe = last;
-               do {
-                       down_write(&pe->snap->lock);
-                       if (first)
-                               bio_list_add(&pe->origin_bios, bio);
-                       if (!pe->started) {
-                               pe->started = 1;
-                               up_write(&pe->snap->lock);
-                               start_copy(pe);
-                       } else
-                               up_write(&pe->snap->lock);
-                       first = 0;
-                       pe = list_entry(pe->siblings.next,
-                                       struct pending_exception, siblings);
-
-               } while (pe != last);
+
+       if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
+               flush_bios(bio_list_get(&primary_pe->origin_bios));
+               free_pending_exception(primary_pe);
+               /* If we got here, pe_queue is necessarily empty. */
+               goto out;
        }
 
+       /*
+        * Now that we have a complete pe list we can start the copying.
+        */
+       list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
+               start_copy(pe);
+
+ out:
        return r;
 }
 
@@ -1048,6 +1154,9 @@ static int origin_map(struct dm_target *ti, struct bio *bio,
        struct dm_dev *dev = (struct dm_dev *) ti->private;
        bio->bi_bdev = dev->bdev;
 
+       if (unlikely(bio_barrier(bio)))
+               return -EOPNOTSUPP;
+
        /* Only tell snapshots if this is a write */
        return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : 1;
 }
@@ -1095,7 +1204,7 @@ static int origin_status(struct dm_target *ti, status_type_t type, char *result,
 
 static struct target_type origin_target = {
        .name    = "snapshot-origin",
-       .version = {1, 0, 1},
+       .version = {1, 4, 0},
        .module  = THIS_MODULE,
        .ctr     = origin_ctr,
        .dtr     = origin_dtr,
@@ -1106,7 +1215,7 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
        .name    = "snapshot",
-       .version = {1, 0, 1},
+       .version = {1, 4, 0},
        .module  = THIS_MODULE,
        .ctr     = snapshot_ctr,
        .dtr     = snapshot_dtr,
@@ -1158,8 +1267,7 @@ static int __init dm_snapshot_init(void)
                goto bad4;
        }
 
-       pending_pool = mempool_create(128, mempool_alloc_slab,
-                                     mempool_free_slab, pending_cache);
+       pending_pool = mempool_create_slab_pool(128, pending_cache);
        if (!pending_pool) {
                DMERR("Couldn't create pending pool.");
                r = -ENOMEM;
index 4d710b7..dfd0378 100644 (file)
@@ -26,6 +26,7 @@ static const char *_name = DM_NAME;
 static unsigned int major = 0;
 static unsigned int _major = 0;
 
+static DEFINE_SPINLOCK(_minor_lock);
 /*
  * One of these is allocated per bio.
  */
@@ -54,12 +55,15 @@ union map_info *dm_get_mapinfo(struct bio *bio)
         return NULL;
 }
 
+#define MINOR_ALLOCED ((void *)-1)
+
 /*
  * Bits for the md->flags field.
  */
 #define DMF_BLOCK_IO 0
 #define DMF_SUSPENDED 1
 #define DMF_FROZEN 2
+#define DMF_FREEING 3
 
 struct mapped_device {
        struct rw_semaphore io_lock;
@@ -218,9 +222,23 @@ static int dm_blk_open(struct inode *inode, struct file *file)
 {
        struct mapped_device *md;
 
+       spin_lock(&_minor_lock);
+
        md = inode->i_bdev->bd_disk->private_data;
+       if (!md)
+               goto out;
+
+       if (test_bit(DMF_FREEING, &md->flags)) {
+               md = NULL;
+               goto out;
+       }
+
        dm_get(md);
-       return 0;
+
+out:
+       spin_unlock(&_minor_lock);
+
+       return md ? 0 : -ENXIO;
 }
 
 static int dm_blk_close(struct inode *inode, struct file *file)
@@ -744,14 +762,13 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 /*-----------------------------------------------------------------
  * An IDR is used to keep track of allocated minor numbers.
  *---------------------------------------------------------------*/
-static DEFINE_MUTEX(_minor_lock);
 static DEFINE_IDR(_minor_idr);
 
 static void free_minor(unsigned int minor)
 {
-       mutex_lock(&_minor_lock);
+       spin_lock(&_minor_lock);
        idr_remove(&_minor_idr, minor);
-       mutex_unlock(&_minor_lock);
+       spin_unlock(&_minor_lock);
 }
 
 /*
@@ -764,23 +781,20 @@ static int specific_minor(struct mapped_device *md, unsigned int minor)
        if (minor >= (1 << MINORBITS))
                return -EINVAL;
 
-       mutex_lock(&_minor_lock);
+       r = idr_pre_get(&_minor_idr, GFP_KERNEL);
+       if (!r)
+               return -ENOMEM;
+
+       spin_lock(&_minor_lock);
 
        if (idr_find(&_minor_idr, minor)) {
                r = -EBUSY;
                goto out;
        }
 
-       r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-       if (!r) {
-               r = -ENOMEM;
-               goto out;
-       }
-
-       r = idr_get_new_above(&_minor_idr, md, minor, &m);
-       if (r) {
+       r = idr_get_new_above(&_minor_idr, MINOR_ALLOCED, minor, &m);
+       if (r)
                goto out;
-       }
 
        if (m != minor) {
                idr_remove(&_minor_idr, m);
@@ -789,7 +803,7 @@ static int specific_minor(struct mapped_device *md, unsigned int minor)
        }
 
 out:
-       mutex_unlock(&_minor_lock);
+       spin_unlock(&_minor_lock);
        return r;
 }
 
@@ -798,15 +812,13 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor)
        int r;
        unsigned int m;
 
-       mutex_lock(&_minor_lock);
-
        r = idr_pre_get(&_minor_idr, GFP_KERNEL);
-       if (!r) {
-               r = -ENOMEM;
-               goto out;
-       }
+       if (!r)
+               return -ENOMEM;
 
-       r = idr_get_new(&_minor_idr, md, &m);
+       spin_lock(&_minor_lock);
+
+       r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m);
        if (r) {
                goto out;
        }
@@ -820,7 +832,7 @@ static int next_free_minor(struct mapped_device *md, unsigned int *minor)
        *minor = m;
 
 out:
-       mutex_unlock(&_minor_lock);
+       spin_unlock(&_minor_lock);
        return r;
 }
 
@@ -833,12 +845,16 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
 {
        int r;
        struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL);
+       void *old_md;
 
        if (!md) {
                DMWARN("unable to allocate device, out of memory.");
                return NULL;
        }
 
+       if (!try_module_get(THIS_MODULE))
+               goto bad0;
+
        /* get a minor number for the dev */
        r = persistent ? specific_minor(md, minor) : next_free_minor(md, &minor);
        if (r < 0)
@@ -875,6 +891,10 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
        if (!md->disk)
                goto bad4;
 
+       atomic_set(&md->pending, 0);
+       init_waitqueue_head(&md->wait);
+       init_waitqueue_head(&md->eventq);
+
        md->disk->major = _major;
        md->disk->first_minor = minor;
        md->disk->fops = &dm_blk_dops;
@@ -884,9 +904,12 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
        add_disk(md->disk);
        format_dev_t(md->name, MKDEV(_major, minor));
 
-       atomic_set(&md->pending, 0);
-       init_waitqueue_head(&md->wait);
-       init_waitqueue_head(&md->eventq);
+       /* Populate the mapping, nobody knows we exist yet */
+       spin_lock(&_minor_lock);
+       old_md = idr_replace(&_minor_idr, md, minor);
+       spin_unlock(&_minor_lock);
+
+       BUG_ON(old_md != MINOR_ALLOCED);
 
        return md;
 
@@ -898,6 +921,8 @@ static struct mapped_device *alloc_dev(unsigned int minor, int persistent)
        blk_cleanup_queue(md->queue);
        free_minor(minor);
  bad1:
+       module_put(THIS_MODULE);
+ bad0:
        kfree(md);
        return NULL;
 }
@@ -914,8 +939,14 @@ static void free_dev(struct mapped_device *md)
        mempool_destroy(md->io_pool);
        del_gendisk(md->disk);
        free_minor(minor);
+
+       spin_lock(&_minor_lock);
+       md->disk->private_data = NULL;
+       spin_unlock(&_minor_lock);
+
        put_disk(md->disk);
        blk_cleanup_queue(md->queue);
+       module_put(THIS_MODULE);
        kfree(md);
 }
 
@@ -1015,13 +1046,18 @@ static struct mapped_device *dm_find_md(dev_t dev)
        if (MAJOR(dev) != _major || minor >= (1 << MINORBITS))
                return NULL;
 
-       mutex_lock(&_minor_lock);
+       spin_lock(&_minor_lock);
 
        md = idr_find(&_minor_idr, minor);
-       if (!md || (dm_disk(md)->first_minor != minor))
+       if (md && (md == MINOR_ALLOCED ||
+                  (dm_disk(md)->first_minor != minor) ||
+                  test_bit(DMF_FREEING, &md->flags))) {
                md = NULL;
+               goto out;
+       }
 
-       mutex_unlock(&_minor_lock);
+out:
+       spin_unlock(&_minor_lock);
 
        return md;
 }
@@ -1055,8 +1091,13 @@ void dm_put(struct mapped_device *md)
 {
        struct dm_table *map;
 
-       if (atomic_dec_and_test(&md->holders)) {
+       BUG_ON(test_bit(DMF_FREEING, &md->flags));
+
+       if (atomic_dec_and_lock(&md->holders, &_minor_lock)) {
                map = dm_get_table(md);
+               idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor);
+               set_bit(DMF_FREEING, &md->flags);
+               spin_unlock(&_minor_lock);
                if (!dm_suspended(md)) {
                        dm_table_presuspend_targets(map);
                        dm_table_postsuspend_targets(map);
index 4070eff..5a54494 100644 (file)
@@ -1486,7 +1486,6 @@ static void raid1d(mddev_t *mddev)
                                                        d = conf->raid_disks;
                                                d--;
                                                rdev = conf->mirrors[d].rdev;
-                                               atomic_add(s, &rdev->corrected_errors);
                                                if (rdev &&
                                                    test_bit(In_sync, &rdev->flags)) {
                                                        if (sync_page_io(rdev->bdev,
@@ -1509,6 +1508,9 @@ static void raid1d(mddev_t *mddev)
                                                                         s<<9, conf->tmppage, READ) == 0)
                                                                /* Well, this device is dead */
                                                                md_error(mddev, rdev);
+                                                       else
+                                                               atomic_add(s, &rdev->corrected_errors);
+
                                                }
                                        }
                                } else {
index ccc7b2e..0bcaa35 100644 (file)
@@ -184,6 +184,11 @@ static struct mt352_config thomson_dtt7579_config = {
        .pll_set = thomson_dtt7579_pll_set,
 };
 
+static struct zl10353_config thomson_dtt7579_zl10353_config = {
+       .demod_address = 0x0f,
+       .pll_set = thomson_dtt7579_pll_set,
+};
+
 static int cx24108_pll_set(struct dvb_frontend* fe, struct dvb_frontend_parameters* params)
 {
        u32 freq = params->frequency;
@@ -617,6 +622,11 @@ static void frontend_init(struct dvb_bt8xx_card *card, u32 type)
        switch(type) {
        case BTTV_BOARD_DVICO_DVBT_LITE:
                card->fe = mt352_attach(&thomson_dtt7579_config, card->i2c_adapter);
+
+               if (card->fe == NULL)
+                       card->fe = zl10353_attach(&thomson_dtt7579_zl10353_config,
+                                                 card->i2c_adapter);
+
                if (card->fe != NULL) {
                        card->fe->ops->info.frequency_min = 174000000;
                        card->fe->ops->info.frequency_max = 862000000;
index 00dd9fa..e41066a 100644 (file)
@@ -37,6 +37,7 @@
 #include "cx24110.h"
 #include "or51211.h"
 #include "lgdt330x.h"
+#include "zl10353.h"
 
 struct dvb_bt8xx_card {
        struct mutex lock;
index a051790..cb69372 100644 (file)
@@ -519,7 +519,9 @@ static int dvb_frontend_thread(void *data)
        fepriv->delay = 3*HZ;
        fepriv->status = 0;
        fepriv->wakeup = 0;
-       fepriv->reinitialise = 1;
+       fepriv->reinitialise = 0;
+
+       dvb_frontend_init(fe);
 
        while (1) {
                up(&fepriv->sem);           /* is locked when we enter the thread... */
@@ -996,17 +998,17 @@ static int dvb_frontend_open(struct inode *inode, struct file *file)
                return ret;
 
        if ((file->f_flags & O_ACCMODE) != O_RDONLY) {
+               /* normal tune mode when opened R/W */
+               fepriv->tune_mode_flags &= ~FE_TUNE_MODE_ONESHOT;
+               fepriv->tone = -1;
+               fepriv->voltage = -1;
+
                ret = dvb_frontend_start (fe);
                if (ret)
                        dvb_generic_release (inode, file);
 
                /*  empty event queue */
                fepriv->events.eventr = fepriv->events.eventw = 0;
-
-               /* normal tune mode when opened R/W */
-               fepriv->tune_mode_flags &= ~FE_TUNE_MODE_ONESHOT;
-               fepriv->tone = -1;
-               fepriv->voltage = -1;
        }
 
        return ret;
index 2f0f358..9fd8752 100644 (file)
@@ -1052,7 +1052,7 @@ static void wq_set_multicast_list (void *data)
 
        dvb_net_feed_stop(dev);
        priv->rx_mode = RX_MODE_UNI;
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
 
        if (dev->flags & IFF_PROMISC) {
                dprintk("%s: promiscuous mode\n", dev->name);
@@ -1077,7 +1077,7 @@ static void wq_set_multicast_list (void *data)
                }
        }
 
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        dvb_net_feed_start(dev);
 }
 
index 2a3c2ce..40930a3 100644 (file)
@@ -1,6 +1,4 @@
 /*
- * $Id: dvb-pll.c,v 1.7 2005/02/10 11:52:02 kraxel Exp $
- *
  * descriptions + helper functions for simple dvb plls.
  *
  * (c) 2004 Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
@@ -57,7 +55,7 @@ struct dvb_pll_desc dvb_pll_thomson_dtt7610 = {
 };
 EXPORT_SYMBOL(dvb_pll_thomson_dtt7610);
 
-static void thomson_dtt759x_bw(u8 *buf, int bandwidth)
+static void thomson_dtt759x_bw(u8 *buf, u32 freq, int bandwidth)
 {
        if (BANDWIDTH_7_MHZ == bandwidth)
                buf[3] |= 0x10;
@@ -84,17 +82,45 @@ struct dvb_pll_desc dvb_pll_lg_z201 = {
        .name  = "LG z201",
        .min   = 174000000,
        .max   = 862000000,
-       .count = 5,
+       .count = 6,
        .entries = {
                {          0, 36166667, 166666, 0xbc, 0x03 },
-               {  443250000, 36166667, 166666, 0xbc, 0x01 },
-               {  542000000, 36166667, 166666, 0xbc, 0x02 },
-               {  830000000, 36166667, 166666, 0xf4, 0x02 },
-               {  999999999, 36166667, 166666, 0xfc, 0x02 },
+               {  157500000, 36166667, 166666, 0xbc, 0x01 },
+               {  443250000, 36166667, 166666, 0xbc, 0x02 },
+               {  542000000, 36166667, 166666, 0xbc, 0x04 },
+               {  830000000, 36166667, 166666, 0xf4, 0x04 },
+               {  999999999, 36166667, 166666, 0xfc, 0x04 },
        },
 };
 EXPORT_SYMBOL(dvb_pll_lg_z201);
 
+struct dvb_pll_desc dvb_pll_microtune_4042 = {
+       .name  = "Microtune 4042 FI5",
+       .min   =  57000000,
+       .max   = 858000000,
+       .count = 3,
+       .entries = {
+               { 162000000, 44000000, 62500, 0x8e, 0xa1 },
+               { 457000000, 44000000, 62500, 0x8e, 0x91 },
+               { 999999999, 44000000, 62500, 0x8e, 0x31 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_microtune_4042);
+
+struct dvb_pll_desc dvb_pll_thomson_dtt761x = {
+       /* DTT 7611 7611A 7612 7613 7613A 7614 7615 7615A */
+       .name  = "Thomson dtt761x",
+       .min   =  57000000,
+       .max   = 863000000,
+       .count = 3,
+       .entries = {
+               { 147000000, 44000000, 62500, 0x8e, 0x39 },
+               { 417000000, 44000000, 62500, 0x8e, 0x3a },
+               { 999999999, 44000000, 62500, 0x8e, 0x3c },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_thomson_dtt761x);
+
 struct dvb_pll_desc dvb_pll_unknown_1 = {
        .name  = "unknown 1", /* used by dntv live dvb-t */
        .min   = 174000000,
@@ -114,6 +140,285 @@ struct dvb_pll_desc dvb_pll_unknown_1 = {
 };
 EXPORT_SYMBOL(dvb_pll_unknown_1);
 
+/* Infineon TUA6010XS
+ * used in Thomson Cable Tuner
+ */
+struct dvb_pll_desc dvb_pll_tua6010xs = {
+       .name  = "Infineon TUA6010XS",
+       .min   =  44250000,
+       .max   = 858000000,
+       .count = 3,
+       .entries = {
+               {  115750000, 36125000, 62500, 0x8e, 0x03 },
+               {  403250000, 36125000, 62500, 0x8e, 0x06 },
+               {  999999999, 36125000, 62500, 0x8e, 0x85 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_tua6010xs);
+
+/* Panasonic env57h1xd5 (some Philips PLL ?) */
+struct dvb_pll_desc dvb_pll_env57h1xd5 = {
+       .name  = "Panasonic ENV57H1XD5",
+       .min   =  44250000,
+       .max   = 858000000,
+       .count = 4,
+       .entries = {
+               {  153000000, 36291666, 166666, 0xc2, 0x41 },
+               {  470000000, 36291666, 166666, 0xc2, 0x42 },
+               {  526000000, 36291666, 166666, 0xc2, 0x84 },
+               {  999999999, 36291666, 166666, 0xc2, 0xa4 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_env57h1xd5);
+
+/* Philips TDA6650/TDA6651
+ * used in Panasonic ENV77H11D5
+ */
+static void tda665x_bw(u8 *buf, u32 freq, int bandwidth)
+{
+       if (bandwidth == BANDWIDTH_8_MHZ)
+               buf[3] |= 0x08;
+}
+
+struct dvb_pll_desc dvb_pll_tda665x = {
+       .name  = "Philips TDA6650/TDA6651",
+       .min   =  44250000,
+       .max   = 858000000,
+       .setbw = tda665x_bw,
+       .count = 12,
+       .entries = {
+               {   93834000, 36249333, 166667, 0xca, 0x61 /* 011 0 0 0  01 */ },
+               {  123834000, 36249333, 166667, 0xca, 0xa1 /* 101 0 0 0  01 */ },
+               {  161000000, 36249333, 166667, 0xca, 0xa1 /* 101 0 0 0  01 */ },
+               {  163834000, 36249333, 166667, 0xca, 0xc2 /* 110 0 0 0  10 */ },
+               {  253834000, 36249333, 166667, 0xca, 0x62 /* 011 0 0 0  10 */ },
+               {  383834000, 36249333, 166667, 0xca, 0xa2 /* 101 0 0 0  10 */ },
+               {  443834000, 36249333, 166667, 0xca, 0xc2 /* 110 0 0 0  10 */ },
+               {  444000000, 36249333, 166667, 0xca, 0xc4 /* 110 0 0 1  00 */ },
+               {  583834000, 36249333, 166667, 0xca, 0x64 /* 011 0 0 1  00 */ },
+               {  793834000, 36249333, 166667, 0xca, 0xa4 /* 101 0 0 1  00 */ },
+               {  444834000, 36249333, 166667, 0xca, 0xc4 /* 110 0 0 1  00 */ },
+               {  861000000, 36249333, 166667, 0xca, 0xe4 /* 111 0 0 1  00 */ },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_tda665x);
+
+/* Infineon TUA6034
+ * used in LG TDTP E102P
+ */
+static void tua6034_bw(u8 *buf, u32 freq, int bandwidth)
+{
+       if (BANDWIDTH_7_MHZ != bandwidth)
+               buf[3] |= 0x08;
+}
+
+struct dvb_pll_desc dvb_pll_tua6034 = {
+       .name  = "Infineon TUA6034",
+       .min   =  44250000,
+       .max   = 858000000,
+       .count = 3,
+       .setbw = tua6034_bw,
+       .entries = {
+               {  174500000, 36166667, 62500, 0xce, 0x01 },
+               {  230000000, 36166667, 62500, 0xce, 0x02 },
+               {  999999999, 36166667, 62500, 0xce, 0x04 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_tua6034);
+
+/* Infineon TUA6034
+ * used in LG TDVS H061F and LG TDVS H062F
+ */
+struct dvb_pll_desc dvb_pll_tdvs_tua6034 = {
+       .name  = "LG/Infineon TUA6034",
+       .min   =  54000000,
+       .max   = 863000000,
+       .count = 3,
+       .entries = {
+               {  165000000, 44000000, 62500, 0xce, 0x01 },
+               {  450000000, 44000000, 62500, 0xce, 0x02 },
+               {  999999999, 44000000, 62500, 0xce, 0x04 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_tdvs_tua6034);
+
+/* Philips FMD1216ME
+ * used in Medion Hybrid PCMCIA card and USB Box
+ */
+static void fmd1216me_bw(u8 *buf, u32 freq, int bandwidth)
+{
+       if (bandwidth == BANDWIDTH_8_MHZ && freq >= 158870000)
+               buf[3] |= 0x08;
+}
+
+struct dvb_pll_desc dvb_pll_fmd1216me = {
+       .name = "Philips FMD1216ME",
+       .min = 50870000,
+       .max = 858000000,
+       .setbw = fmd1216me_bw,
+       .count = 7,
+       .entries = {
+               { 143870000, 36213333, 166667, 0xbc, 0x41 },
+               { 158870000, 36213333, 166667, 0xf4, 0x41 },
+               { 329870000, 36213333, 166667, 0xbc, 0x42 },
+               { 441870000, 36213333, 166667, 0xf4, 0x42 },
+               { 625870000, 36213333, 166667, 0xbc, 0x44 },
+               { 803870000, 36213333, 166667, 0xf4, 0x44 },
+               { 999999999, 36213333, 166667, 0xfc, 0x44 },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_fmd1216me);
+
+/* ALPS TDED4
+ * used in Nebula-Cards and USB boxes
+ */
+static void tded4_bw(u8 *buf, u32 freq, int bandwidth)
+{
+       if (bandwidth == BANDWIDTH_8_MHZ)
+               buf[3] |= 0x04;
+}
+
+struct dvb_pll_desc dvb_pll_tded4 = {
+       .name = "ALPS TDED4",
+       .min = 47000000,
+       .max = 863000000,
+       .setbw = tded4_bw,
+       .count = 4,
+       .entries = {
+               { 153000000, 36166667, 166667, 0x85, 0x01 },
+               { 470000000, 36166667, 166667, 0x85, 0x02 },
+               { 823000000, 36166667, 166667, 0x85, 0x08 },
+               { 999999999, 36166667, 166667, 0x85, 0x88 },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_tded4);
+
+/* ALPS TDHU2
+ * used in AverTVHD MCE A180
+ */
+struct dvb_pll_desc dvb_pll_tdhu2 = {
+       .name = "ALPS TDHU2",
+       .min = 54000000,
+       .max = 864000000,
+       .count = 4,
+       .entries = {
+               { 162000000, 44000000, 62500, 0x85, 0x01 },
+               { 426000000, 44000000, 62500, 0x85, 0x02 },
+               { 782000000, 44000000, 62500, 0x85, 0x08 },
+               { 999999999, 44000000, 62500, 0x85, 0x88 },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_tdhu2);
+
+/* Philips TUV1236D
+ * used in ATI HDTV Wonder
+ */
+struct dvb_pll_desc dvb_pll_tuv1236d = {
+       .name  = "Philips TUV1236D",
+       .min   =  54000000,
+       .max   = 864000000,
+       .count = 3,
+       .entries = {
+               { 157250000, 44000000, 62500, 0xc6, 0x41 },
+               { 454000000, 44000000, 62500, 0xc6, 0x42 },
+               { 999999999, 44000000, 62500, 0xc6, 0x44 },
+       },
+};
+EXPORT_SYMBOL(dvb_pll_tuv1236d);
+
+/* Samsung TBMV30111IN / TBMV30712IN1
+ * used in Air2PC ATSC - 2nd generation (nxt2002)
+ */
+struct dvb_pll_desc dvb_pll_samsung_tbmv = {
+       .name = "Samsung TBMV30111IN / TBMV30712IN1",
+       .min = 54000000,
+       .max = 860000000,
+       .count = 6,
+       .entries = {
+               { 172000000, 44000000, 166666, 0xb4, 0x01 },
+               { 214000000, 44000000, 166666, 0xb4, 0x02 },
+               { 467000000, 44000000, 166666, 0xbc, 0x02 },
+               { 721000000, 44000000, 166666, 0xbc, 0x08 },
+               { 841000000, 44000000, 166666, 0xf4, 0x08 },
+               { 999999999, 44000000, 166666, 0xfc, 0x02 },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_samsung_tbmv);
+
+/*
+ * Philips SD1878 Tuner.
+ */
+struct dvb_pll_desc dvb_pll_philips_sd1878_tda8261 = {
+       .name  = "Philips SD1878",
+       .min   =  950000,
+       .max   = 2150000,
+       .count = 4,
+       .entries = {
+               { 1250000, 499, 500, 0xc4, 0x00},
+               { 1550000, 499, 500, 0xc4, 0x40},
+               { 2050000, 499, 500, 0xc4, 0x80},
+               { 2150000, 499, 500, 0xc4, 0xc0},
+       },
+};
+EXPORT_SYMBOL(dvb_pll_philips_sd1878_tda8261);
+
+/*
+ * Philips TD1316 Tuner.
+ */
+static void td1316_bw(u8 *buf, u32 freq, int bandwidth)
+{
+       u8 band;
+
+       /* determine band */
+       if (freq < 161000000)
+               band = 1;
+       else if (freq < 444000000)
+               band = 2;
+       else
+               band = 4;
+
+       buf[3] |= band;
+
+       /* setup PLL filter */
+       if (bandwidth == BANDWIDTH_8_MHZ)
+               buf[3] |= 1 << 3;
+}
+
+struct dvb_pll_desc dvb_pll_philips_td1316 = {
+       .name  = "Philips TD1316",
+       .min   =  87000000,
+       .max   = 895000000,
+       .setbw = td1316_bw,
+       .count = 9,
+       .entries = {
+               {  93834000, 36166000, 166666, 0xca, 0x60},
+               { 123834000, 36166000, 166666, 0xca, 0xa0},
+               { 163834000, 36166000, 166666, 0xca, 0xc0},
+               { 253834000, 36166000, 166666, 0xca, 0x60},
+               { 383834000, 36166000, 166666, 0xca, 0xa0},
+               { 443834000, 36166000, 166666, 0xca, 0xc0},
+               { 583834000, 36166000, 166666, 0xca, 0x60},
+               { 793834000, 36166000, 166666, 0xca, 0xa0},
+               { 858834000, 36166000, 166666, 0xca, 0xe0},
+       },
+};
+EXPORT_SYMBOL(dvb_pll_philips_td1316);
+
+/* FE6600 used on DViCO Hybrid */
+struct dvb_pll_desc dvb_pll_thomson_fe6600 = {
+       .name = "Thomson FE6600",
+       .min =  44250000,
+       .max = 858000000,
+       .count = 4,
+       .entries = {
+               { 250000000, 36213333, 166667, 0xb4, 0x12 },
+               { 455000000, 36213333, 166667, 0xfe, 0x11 },
+               { 775500000, 36213333, 166667, 0xbc, 0x18 },
+               { 999999999, 36213333, 166667, 0xf4, 0x18 },
+       }
+};
+EXPORT_SYMBOL(dvb_pll_thomson_fe6600);
+
 /* ----------------------------------------------------------- */
 /* code                                                        */
 
@@ -143,11 +448,11 @@ int dvb_pll_configure(struct dvb_pll_desc *desc, u8 *buf,
        div = (freq + desc->entries[i].offset) / desc->entries[i].stepsize;
        buf[0] = div >> 8;
        buf[1] = div & 0xff;
-       buf[2] = desc->entries[i].cb1;
-       buf[3] = desc->entries[i].cb2;
+       buf[2] = desc->entries[i].config;
+       buf[3] = desc->entries[i].cb;
 
        if (desc->setbw)
-               desc->setbw(buf, bandwidth);
+               desc->setbw(buf, freq, bandwidth);
 
        if (debug)
                printk("pll: %s: div=%d | buf=0x%02x,0x%02x,0x%02x,0x%02x\n",
@@ -160,9 +465,3 @@ EXPORT_SYMBOL(dvb_pll_configure);
 MODULE_DESCRIPTION("dvb pll library");
 MODULE_AUTHOR("Gerd Knorr");
 MODULE_LICENSE("GPL");
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
index 8a7cd7d..3f4aa08 100644 (file)
@@ -58,6 +58,7 @@ struct budget_av {
        struct tasklet_struct ciintf_irq_tasklet;
        int slot_status;
        struct dvb_ca_en50221 ca;
+       u8 reinitialise_demod:1;
 };
 
 /* GPIO Connections:
@@ -214,8 +215,9 @@ static int ciintf_slot_reset(struct dvb_ca_en50221 *ca, int slot)
        while (--timeout > 0 && ciintf_read_attribute_mem(ca, slot, 0) != 0x1d)
                msleep(100);
 
-       /* reinitialise the frontend */
-       dvb_frontend_reinitialise(budget_av->budget.dvb_frontend);
+       /* reinitialise the frontend if necessary */
+       if (budget_av->reinitialise_demod)
+               dvb_frontend_reinitialise(budget_av->budget.dvb_frontend);
 
        if (timeout <= 0)
        {
@@ -1017,12 +1019,13 @@ static void frontend_init(struct budget_av *budget_av)
        struct saa7146_dev * saa = budget_av->budget.dev;
        struct dvb_frontend * fe = NULL;
 
+       /* Enable / PowerON Frontend */
+       saa7146_setgpio(saa, 0, SAA7146_GPIO_OUTLO);
+
        switch (saa->pci->subsystem_device) {
                case SUBID_DVBS_KNC1_PLUS:
                case SUBID_DVBC_KNC1_PLUS:
                case SUBID_DVBT_KNC1_PLUS:
-                       // Enable / PowerON Frontend
-                       saa7146_setgpio(saa, 0, SAA7146_GPIO_OUTLO);
                        saa7146_setgpio(saa, 3, SAA7146_GPIO_OUTHI);
                        break;
        }
@@ -1059,7 +1062,14 @@ static void frontend_init(struct budget_av *budget_av)
                break;
 
        case SUBID_DVBC_KNC1:
+               budget_av->reinitialise_demod = 1;
+               fe = tda10021_attach(&philips_cu1216_config,
+                                    &budget_av->budget.i2c_adap,
+                                    read_pwm(budget_av));
+               break;
+
        case SUBID_DVBC_KNC1_PLUS:
+               budget_av->reinitialise_demod = 1;
                fe = tda10021_attach(&philips_cu1216_config,
                                     &budget_av->budget.i2c_adap,
                                     read_pwm(budget_av));
@@ -1208,11 +1218,7 @@ static int budget_av_attach(struct saa7146_dev *dev, struct saa7146_pci_extensio
 
        budget_av->budget.dvb_adapter.priv = budget_av;
        frontend_init(budget_av);
-
-       if (!budget_av->has_saa7113) {
-               ciintf_init(budget_av);
-       }
-
+       ciintf_init(budget_av);
        return 0;
 }
 
index 689e971..d6d37cf 100644 (file)
@@ -367,12 +367,6 @@ static void frontend_init(struct budget *budget)
 
                // try the ALPS BSRU6 now
                budget->dvb_frontend = stv0299_attach(&alps_bsru6_config, &budget->i2c_adap);
-               if (budget->dvb_frontend) {
-                       budget->dvb_frontend->ops->diseqc_send_master_cmd = budget_diseqc_send_master_cmd;
-                       budget->dvb_frontend->ops->diseqc_send_burst = budget_diseqc_send_burst;
-                       budget->dvb_frontend->ops->set_tone = budget_set_tone;
-                       break;
-               }
                break;
 
        case 0x1004: // Hauppauge/TT DVB-C budget (ves1820/ALPS TDBE2(sp5659))
index 6b41970..67d729a 100644 (file)
@@ -380,10 +380,10 @@ config VIDEO_WM8739
 source "drivers/media/video/cx25840/Kconfig"
 
 config VIDEO_SAA711X
-       tristate "Philips SAA7113/4/5 video decoders (OBSOLETED)"
-       depends on VIDEO_V4L1 && I2C && EXPERIMENTAL
+       tristate "Philips SAA7113/4/5 video decoders"
+       depends on VIDEO_DEV && I2C && EXPERIMENTAL
        ---help---
-         Old support for the Philips SAA7113/4 video decoders.
+         Support for the Philips SAA7113/4/5 video decoders.
 
          To compile this driver as a module, choose M here: the
          module will be called saa7115.
index 07476c7..af372dd 100644 (file)
@@ -2180,7 +2180,6 @@ static struct pci_device_id stradis_pci_tbl[] = {
        { 0 }
 };
 
-MODULE_DEVICE_TABLE(pci, stradis_pci_tbl);
 
 static struct pci_driver stradis_driver = {
        .name = "stradis",
index 066e22b..365a074 100644 (file)
@@ -792,7 +792,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
        entry = cp->tx_head;
        eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0;
        if (dev->features & NETIF_F_TSO)
-               mss = skb_shinfo(skb)->tso_size;
+               mss = skb_shinfo(skb)->gso_size;
 
        if (skb_shinfo(skb)->nr_frags == 0) {
                struct cp_desc *txd = &cp->tx_ring[entry];
index 8acc655..a8f95e5 100644 (file)
@@ -1,6 +1,6 @@
 /* bnx2.c: Broadcom NX2 network driver.
  *
- * Copyright (c) 2004, 2005 Broadcom Corporation
+ * Copyright (c) 2004, 2005, 2006 Broadcom Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -9,24 +9,65 @@
  * Written by: Michael Chan  (mchan@broadcom.com)
  */
 
+#include <linux/config.h>
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/kernel.h>
+#include <linux/timer.h>
+#include <linux/errno.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/dma-mapping.h>
+#include <asm/bitops.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <linux/delay.h>
+#include <asm/byteorder.h>
+#include <linux/time.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#ifdef NETIF_F_HW_VLAN_TX
+#include <linux/if_vlan.h>
+#define BCM_VLAN 1
+#endif
+#ifdef NETIF_F_TSO
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <net/checksum.h>
+#define BCM_TSO 1
+#endif
+#include <linux/workqueue.h>
+#include <linux/crc32.h>
+#include <linux/prefetch.h>
+#include <linux/cache.h>
+
 #include "bnx2.h"
 #include "bnx2_fw.h"
 
 #define DRV_MODULE_NAME                "bnx2"
 #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "1.2.19"
-#define DRV_MODULE_RELDATE     "May 23, 2005"
+#define DRV_MODULE_VERSION     "1.4.40"
+#define DRV_MODULE_RELDATE     "May 22, 2006"
 
 #define RUN_AT(x) (jiffies + (x))
 
 /* Time in jiffies before concluding the transmitter is hung. */
 #define TX_TIMEOUT  (5*HZ)
 
-static char version[] __devinitdata =
+static const char version[] __devinitdata =
        "Broadcom NetXtreme II Gigabit Ethernet Driver " DRV_MODULE_NAME " v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
 
 MODULE_AUTHOR("Michael Chan <mchan@broadcom.com>");
-MODULE_DESCRIPTION("Broadcom NetXtreme II BCM5706 Driver");
+MODULE_DESCRIPTION("Broadcom NetXtreme II BCM5706/5708 Driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_MODULE_VERSION);
 
@@ -41,10 +82,12 @@ typedef enum {
        NC370I,
        BCM5706S,
        NC370F,
+       BCM5708,
+       BCM5708S,
 } board_t;
 
 /* indexed by board_t, above */
-static struct {
+static const struct {
        char *name;
 } board_info[] __devinitdata = {
        { "Broadcom NetXtreme II BCM5706 1000Base-T" },
@@ -52,7 +95,8 @@ static struct {
        { "HP NC370i Multifunction Gigabit Server Adapter" },
        { "Broadcom NetXtreme II BCM5706 1000Base-SX" },
        { "HP NC370F Multifunction Gigabit Server Adapter" },
-       { 0 },
+       { "Broadcom NetXtreme II BCM5708 1000Base-T" },
+       { "Broadcom NetXtreme II BCM5708 1000Base-SX" },
        };
 
 static struct pci_device_id bnx2_pci_tbl[] = {
@@ -62,52 +106,115 @@ static struct pci_device_id bnx2_pci_tbl[] = {
          PCI_VENDOR_ID_HP, 0x3106, 0, 0, NC370I },
        { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5706,
          PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5706 },
+       { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5708,
+         PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5708 },
        { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5706S,
          PCI_VENDOR_ID_HP, 0x3102, 0, 0, NC370F },
        { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5706S,
          PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5706S },
+       { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5708S,
+         PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5708S },
        { 0, }
 };
 
 static struct flash_spec flash_table[] =
 {
        /* Slow EEPROM */
-       {0x00000000, 0x40030380, 0x009f0081, 0xa184a053, 0xaf000400,
+       {0x00000000, 0x40830380, 0x009f0081, 0xa184a053, 0xaf000400,
         1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
         SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
         "EEPROM - slow"},
-       /* Fast EEPROM */
-       {0x02000000, 0x62008380, 0x009f0081, 0xa184a053, 0xaf000400,
-        1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
-        SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
-        "EEPROM - fast"},
-       /* ATMEL AT45DB011B (buffered flash) */
-       {0x02000003, 0x6e008173, 0x00570081, 0x68848353, 0xaf000400,
-        1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
-        BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE,
-        "Buffered flash"},
-       /* Saifun SA25F005 (non-buffered flash) */
-               /* strap, cfg1, & write1 need updates */
-       {0x01000003, 0x5f008081, 0x00050081, 0x03840253, 0xaf020406,
+       /* Expansion entry 0001 */
+       {0x08000002, 0x4b808201, 0x00050081, 0x03840253, 0xaf020406,
         0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
-        SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE,
-        "Non-buffered flash (64kB)"},
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 0001"},
        /* Saifun SA25F010 (non-buffered flash) */
        /* strap, cfg1, & write1 need updates */
-       {0x00000001, 0x47008081, 0x00050081, 0x03840253, 0xaf020406,
+       {0x04000001, 0x47808201, 0x00050081, 0x03840253, 0xaf020406,
         0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
         SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*2,
         "Non-buffered flash (128kB)"},
        /* Saifun SA25F020 (non-buffered flash) */
        /* strap, cfg1, & write1 need updates */
-       {0x00000003, 0x4f008081, 0x00050081, 0x03840253, 0xaf020406,
+       {0x0c000003, 0x4f808201, 0x00050081, 0x03840253, 0xaf020406,
         0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
         SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE*4,
         "Non-buffered flash (256kB)"},
+       /* Expansion entry 0100 */
+       {0x11000000, 0x53808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 0100"},
+       /* Entry 0101: ST M45PE10 (non-buffered flash, TetonII B0) */
+       {0x19000002, 0x5b808201, 0x000500db, 0x03840253, 0xaf020406,        
+        0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+        ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*2,
+        "Entry 0101: ST M45PE10 (128kB non-bufferred)"},
+       /* Entry 0110: ST M45PE20 (non-buffered flash)*/
+       {0x15000001, 0x57808201, 0x000500db, 0x03840253, 0xaf020406,
+        0, ST_MICRO_FLASH_PAGE_BITS, ST_MICRO_FLASH_PAGE_SIZE,
+        ST_MICRO_FLASH_BYTE_ADDR_MASK, ST_MICRO_FLASH_BASE_TOTAL_SIZE*4,
+        "Entry 0110: ST M45PE20 (256kB non-bufferred)"},
+       /* Saifun SA25F005 (non-buffered flash) */
+       /* strap, cfg1, & write1 need updates */
+       {0x1d000003, 0x5f808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, SAIFUN_FLASH_BASE_TOTAL_SIZE,
+        "Non-buffered flash (64kB)"},
+       /* Fast EEPROM */
+       {0x22000000, 0x62808380, 0x009f0081, 0xa184a053, 0xaf000400,
+        1, SEEPROM_PAGE_BITS, SEEPROM_PAGE_SIZE,
+        SEEPROM_BYTE_ADDR_MASK, SEEPROM_TOTAL_SIZE,
+        "EEPROM - fast"},
+       /* Expansion entry 1001 */
+       {0x2a000002, 0x6b808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 1001"},
+       /* Expansion entry 1010 */
+       {0x26000001, 0x67808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 1010"},
+       /* ATMEL AT45DB011B (buffered flash) */
+       {0x2e000003, 0x6e808273, 0x00570081, 0x68848353, 0xaf000400,
+        1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+        BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE,
+        "Buffered flash (128kB)"},
+       /* Expansion entry 1100 */
+       {0x33000000, 0x73808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 1100"},
+       /* Expansion entry 1101 */
+       {0x3b000002, 0x7b808201, 0x00050081, 0x03840253, 0xaf020406,
+        0, SAIFUN_FLASH_PAGE_BITS, SAIFUN_FLASH_PAGE_SIZE,
+        SAIFUN_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 1101"},
+       /* Ateml Expansion entry 1110 */
+       {0x37000001, 0x76808273, 0x00570081, 0x68848353, 0xaf000400,
+        1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+        BUFFERED_FLASH_BYTE_ADDR_MASK, 0,
+        "Entry 1110 (Atmel)"},
+       /* ATMEL AT45DB021B (buffered flash) */
+       {0x3f000003, 0x7e808273, 0x00570081, 0x68848353, 0xaf000400,
+        1, BUFFERED_FLASH_PAGE_BITS, BUFFERED_FLASH_PAGE_SIZE,
+        BUFFERED_FLASH_BYTE_ADDR_MASK, BUFFERED_FLASH_TOTAL_SIZE*2,
+        "Buffered flash (256kB)"},
 };
 
 MODULE_DEVICE_TABLE(pci, bnx2_pci_tbl);
 
+static inline u32 bnx2_tx_avail(struct bnx2 *bp)
+{
+       u32 diff = TX_RING_IDX(bp->tx_prod) - TX_RING_IDX(bp->tx_cons);
+
+       if (diff > MAX_TX_DESC_CNT)
+               diff = (diff & MAX_TX_DESC_CNT) - 1;
+       return (bp->tx_ring_size - diff);
+}
+
 static u32
 bnx2_reg_rd_ind(struct bnx2 *bp, u32 offset)
 {
@@ -247,13 +354,14 @@ bnx2_disable_int(struct bnx2 *bp)
 static void
 bnx2_enable_int(struct bnx2 *bp)
 {
-       u32 val;
+       REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
+              BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+              BNX2_PCICFG_INT_ACK_CMD_MASK_INT | bp->last_status_idx);
 
        REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
               BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID | bp->last_status_idx);
 
-       val = REG_RD(bp, BNX2_HC_COMMAND);
-       REG_WR(bp, BNX2_HC_COMMAND, val | BNX2_HC_COMMAND_COAL_NOW);
+       REG_WR(bp, BNX2_HC_COMMAND, bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW);
 }
 
 static void
@@ -290,15 +398,13 @@ bnx2_netif_start(struct bnx2 *bp)
 static void
 bnx2_free_mem(struct bnx2 *bp)
 {
-       if (bp->stats_blk) {
-               pci_free_consistent(bp->pdev, sizeof(struct statistics_block),
-                                   bp->stats_blk, bp->stats_blk_mapping);
-               bp->stats_blk = NULL;
-       }
+       int i;
+
        if (bp->status_blk) {
-               pci_free_consistent(bp->pdev, sizeof(struct status_block),
+               pci_free_consistent(bp->pdev, bp->status_stats_size,
                                    bp->status_blk, bp->status_blk_mapping);
                bp->status_blk = NULL;
+               bp->stats_blk = NULL;
        }
        if (bp->tx_desc_ring) {
                pci_free_consistent(bp->pdev,
@@ -306,31 +412,30 @@ bnx2_free_mem(struct bnx2 *bp)
                                    bp->tx_desc_ring, bp->tx_desc_mapping);
                bp->tx_desc_ring = NULL;
        }
-       if (bp->tx_buf_ring) {
-               kfree(bp->tx_buf_ring);
-               bp->tx_buf_ring = NULL;
-       }
-       if (bp->rx_desc_ring) {
-               pci_free_consistent(bp->pdev,
-                                   sizeof(struct rx_bd) * RX_DESC_CNT,
-                                   bp->rx_desc_ring, bp->rx_desc_mapping);
-               bp->rx_desc_ring = NULL;
-       }
-       if (bp->rx_buf_ring) {
-               kfree(bp->rx_buf_ring);
-               bp->rx_buf_ring = NULL;
-       }
+       kfree(bp->tx_buf_ring);
+       bp->tx_buf_ring = NULL;
+       for (i = 0; i < bp->rx_max_ring; i++) {
+               if (bp->rx_desc_ring[i])
+                       pci_free_consistent(bp->pdev,
+                                           sizeof(struct rx_bd) * RX_DESC_CNT,
+                                           bp->rx_desc_ring[i],
+                                           bp->rx_desc_mapping[i]);
+               bp->rx_desc_ring[i] = NULL;
+       }
+       vfree(bp->rx_buf_ring);
+       bp->rx_buf_ring = NULL;
 }
 
 static int
 bnx2_alloc_mem(struct bnx2 *bp)
 {
-       bp->tx_buf_ring = kmalloc(sizeof(struct sw_bd) * TX_DESC_CNT,
-                                    GFP_KERNEL);
+       int i, status_blk_size;
+
+       bp->tx_buf_ring = kzalloc(sizeof(struct sw_bd) * TX_DESC_CNT,
+                                 GFP_KERNEL);
        if (bp->tx_buf_ring == NULL)
                return -ENOMEM;
 
-       memset(bp->tx_buf_ring, 0, sizeof(struct sw_bd) * TX_DESC_CNT);
        bp->tx_desc_ring = pci_alloc_consistent(bp->pdev,
                                                sizeof(struct tx_bd) *
                                                TX_DESC_CNT,
@@ -338,34 +443,40 @@ bnx2_alloc_mem(struct bnx2 *bp)
        if (bp->tx_desc_ring == NULL)
                goto alloc_mem_err;
 
-       bp->rx_buf_ring = kmalloc(sizeof(struct sw_bd) * RX_DESC_CNT,
-                                    GFP_KERNEL);
+       bp->rx_buf_ring = vmalloc(sizeof(struct sw_bd) * RX_DESC_CNT *
+                                 bp->rx_max_ring);
        if (bp->rx_buf_ring == NULL)
                goto alloc_mem_err;
 
-       memset(bp->rx_buf_ring, 0, sizeof(struct sw_bd) * RX_DESC_CNT);
-       bp->rx_desc_ring = pci_alloc_consistent(bp->pdev,
-                                               sizeof(struct rx_bd) *
-                                               RX_DESC_CNT,
-                                               &bp->rx_desc_mapping);
-       if (bp->rx_desc_ring == NULL)
-               goto alloc_mem_err;
+       memset(bp->rx_buf_ring, 0, sizeof(struct sw_bd) * RX_DESC_CNT *
+                                  bp->rx_max_ring);
+
+       for (i = 0; i < bp->rx_max_ring; i++) {
+               bp->rx_desc_ring[i] =
+                       pci_alloc_consistent(bp->pdev,
+                                            sizeof(struct rx_bd) * RX_DESC_CNT,
+                                            &bp->rx_desc_mapping[i]);
+               if (bp->rx_desc_ring[i] == NULL)
+                       goto alloc_mem_err;
+
+       }
+
+       /* Combine status and statistics blocks into one allocation. */
+       status_blk_size = L1_CACHE_ALIGN(sizeof(struct status_block));
+       bp->status_stats_size = status_blk_size +
+                               sizeof(struct statistics_block);
 
-       bp->status_blk = pci_alloc_consistent(bp->pdev,
-                                             sizeof(struct status_block),
+       bp->status_blk = pci_alloc_consistent(bp->pdev, bp->status_stats_size,
                                              &bp->status_blk_mapping);
        if (bp->status_blk == NULL)
                goto alloc_mem_err;
 
-       memset(bp->status_blk, 0, sizeof(struct status_block));
+       memset(bp->status_blk, 0, bp->status_stats_size);
 
-       bp->stats_blk = pci_alloc_consistent(bp->pdev,
-                                            sizeof(struct statistics_block),
-                                            &bp->stats_blk_mapping);
-       if (bp->stats_blk == NULL)
-               goto alloc_mem_err;
+       bp->stats_blk = (void *) ((unsigned long) bp->status_blk +
+                                 status_blk_size);
 
-       memset(bp->stats_blk, 0, sizeof(struct statistics_block));
+       bp->stats_blk_mapping = bp->status_blk_mapping + status_blk_size;
 
        return 0;
 
@@ -374,6 +485,62 @@ alloc_mem_err:
        return -ENOMEM;
 }
 
+static void
+bnx2_report_fw_link(struct bnx2 *bp)
+{
+       u32 fw_link_status = 0;
+
+       if (bp->link_up) {
+               u32 bmsr;
+
+               switch (bp->line_speed) {
+               case SPEED_10:
+                       if (bp->duplex == DUPLEX_HALF)
+                               fw_link_status = BNX2_LINK_STATUS_10HALF;
+                       else
+                               fw_link_status = BNX2_LINK_STATUS_10FULL;
+                       break;
+               case SPEED_100:
+                       if (bp->duplex == DUPLEX_HALF)
+                               fw_link_status = BNX2_LINK_STATUS_100HALF;
+                       else
+                               fw_link_status = BNX2_LINK_STATUS_100FULL;
+                       break;
+               case SPEED_1000:
+                       if (bp->duplex == DUPLEX_HALF)
+                               fw_link_status = BNX2_LINK_STATUS_1000HALF;
+                       else
+                               fw_link_status = BNX2_LINK_STATUS_1000FULL;
+                       break;
+               case SPEED_2500:
+                       if (bp->duplex == DUPLEX_HALF)
+                               fw_link_status = BNX2_LINK_STATUS_2500HALF;
+                       else
+                               fw_link_status = BNX2_LINK_STATUS_2500FULL;
+                       break;
+               }
+
+               fw_link_status |= BNX2_LINK_STATUS_LINK_UP;
+
+               if (bp->autoneg) {
+                       fw_link_status |= BNX2_LINK_STATUS_AN_ENABLED;
+
+                       bnx2_read_phy(bp, MII_BMSR, &bmsr);
+                       bnx2_read_phy(bp, MII_BMSR, &bmsr);
+
+                       if (!(bmsr & BMSR_ANEGCOMPLETE) ||
+                           bp->phy_flags & PHY_PARALLEL_DETECT_FLAG)
+                               fw_link_status |= BNX2_LINK_STATUS_PARALLEL_DET;
+                       else
+                               fw_link_status |= BNX2_LINK_STATUS_AN_COMPLETE;
+               }
+       }
+       else
+               fw_link_status = BNX2_LINK_STATUS_LINK_DOWN;
+
+       REG_WR_IND(bp, bp->shmem_base + BNX2_LINK_STATUS, fw_link_status);
+}
+
 static void
 bnx2_report_link(struct bnx2 *bp)
 {
@@ -405,6 +572,8 @@ bnx2_report_link(struct bnx2 *bp)
                netif_carrier_off(bp->dev);
                printk(KERN_ERR PFX "%s NIC Link is Down\n", bp->dev->name);
        }
+
+       bnx2_report_fw_link(bp);
 }
 
 static void
@@ -426,6 +595,18 @@ bnx2_resolve_flow_ctrl(struct bnx2 *bp)
                return;
        }
 
+       if ((bp->phy_flags & PHY_SERDES_FLAG) &&
+           (CHIP_NUM(bp) == CHIP_NUM_5708)) {
+               u32 val;
+
+               bnx2_read_phy(bp, BCM5708S_1000X_STAT1, &val);
+               if (val & BCM5708S_1000X_STAT1_TX_PAUSE)
+                       bp->flow_ctrl |= FLOW_CTRL_TX;
+               if (val & BCM5708S_1000X_STAT1_RX_PAUSE)
+                       bp->flow_ctrl |= FLOW_CTRL_RX;
+               return;
+       }
+
        bnx2_read_phy(bp, MII_ADVERTISE, &local_adv);
        bnx2_read_phy(bp, MII_LPA, &remote_adv);
 
@@ -472,7 +653,36 @@ bnx2_resolve_flow_ctrl(struct bnx2 *bp)
 }
 
 static int
-bnx2_serdes_linkup(struct bnx2 *bp)
+bnx2_5708s_linkup(struct bnx2 *bp)
+{
+       u32 val;
+
+       bp->link_up = 1;
+       bnx2_read_phy(bp, BCM5708S_1000X_STAT1, &val);
+       switch (val & BCM5708S_1000X_STAT1_SPEED_MASK) {
+               case BCM5708S_1000X_STAT1_SPEED_10:
+                       bp->line_speed = SPEED_10;
+                       break;
+               case BCM5708S_1000X_STAT1_SPEED_100:
+                       bp->line_speed = SPEED_100;
+                       break;
+               case BCM5708S_1000X_STAT1_SPEED_1G:
+                       bp->line_speed = SPEED_1000;
+                       break;
+               case BCM5708S_1000X_STAT1_SPEED_2G5:
+                       bp->line_speed = SPEED_2500;
+                       break;
+       }
+       if (val & BCM5708S_1000X_STAT1_FD)
+               bp->duplex = DUPLEX_FULL;
+       else
+               bp->duplex = DUPLEX_HALF;
+
+       return 0;
+}
+
+static int
+bnx2_5706s_linkup(struct bnx2 *bp)
 {
        u32 bmcr, local_adv, remote_adv, common;
 
@@ -589,13 +799,27 @@ bnx2_set_mac_link(struct bnx2 *bp)
        val = REG_RD(bp, BNX2_EMAC_MODE);
 
        val &= ~(BNX2_EMAC_MODE_PORT | BNX2_EMAC_MODE_HALF_DUPLEX |
-               BNX2_EMAC_MODE_MAC_LOOP | BNX2_EMAC_MODE_FORCE_LINK);
+               BNX2_EMAC_MODE_MAC_LOOP | BNX2_EMAC_MODE_FORCE_LINK |
+               BNX2_EMAC_MODE_25G);
 
        if (bp->link_up) {
-               if (bp->line_speed != SPEED_1000)
-                       val |= BNX2_EMAC_MODE_PORT_MII;
-               else
-                       val |= BNX2_EMAC_MODE_PORT_GMII;
+               switch (bp->line_speed) {
+                       case SPEED_10:
+                               if (CHIP_NUM(bp) == CHIP_NUM_5708) {
+                                       val |= BNX2_EMAC_MODE_PORT_MII_10;
+                                       break;
+                               }
+                               /* fall through */
+                       case SPEED_100:
+                               val |= BNX2_EMAC_MODE_PORT_MII;
+                               break;
+                       case SPEED_2500:
+                               val |= BNX2_EMAC_MODE_25G;
+                               /* fall through */
+                       case SPEED_1000:
+                               val |= BNX2_EMAC_MODE_PORT_GMII;
+                               break;
+               }
        }
        else {
                val |= BNX2_EMAC_MODE_PORT_GMII;
@@ -658,7 +882,10 @@ bnx2_set_link(struct bnx2 *bp)
                bp->link_up = 1;
 
                if (bp->phy_flags & PHY_SERDES_FLAG) {
-                       bnx2_serdes_linkup(bp);
+                       if (CHIP_NUM(bp) == CHIP_NUM_5706)
+                               bnx2_5706s_linkup(bp);
+                       else if (CHIP_NUM(bp) == CHIP_NUM_5708)
+                               bnx2_5708s_linkup(bp);
                }
                else {
                        bnx2_copper_linkup(bp);
@@ -751,39 +978,61 @@ bnx2_phy_get_pause_adv(struct bnx2 *bp)
 static int
 bnx2_setup_serdes_phy(struct bnx2 *bp)
 {
-       u32 adv, bmcr;
+       u32 adv, bmcr, up1;
        u32 new_adv = 0;
 
        if (!(bp->autoneg & AUTONEG_SPEED)) {
                u32 new_bmcr;
+               int force_link_down = 0;
+
+               if (CHIP_NUM(bp) == CHIP_NUM_5708) {
+                       bnx2_read_phy(bp, BCM5708S_UP1, &up1);
+                       if (up1 & BCM5708S_UP1_2G5) {
+                               up1 &= ~BCM5708S_UP1_2G5;
+                               bnx2_write_phy(bp, BCM5708S_UP1, up1);
+                               force_link_down = 1;
+                       }
+               }
+
+               bnx2_read_phy(bp, MII_ADVERTISE, &adv);
+               adv &= ~(ADVERTISE_1000XFULL | ADVERTISE_1000XHALF);
 
                bnx2_read_phy(bp, MII_BMCR, &bmcr);
                new_bmcr = bmcr & ~BMCR_ANENABLE;
                new_bmcr |= BMCR_SPEED1000;
                if (bp->req_duplex == DUPLEX_FULL) {
+                       adv |= ADVERTISE_1000XFULL;
                        new_bmcr |= BMCR_FULLDPLX;
                }
                else {
+                       adv |= ADVERTISE_1000XHALF;
                        new_bmcr &= ~BMCR_FULLDPLX;
                }
-               if (new_bmcr != bmcr) {
+               if ((new_bmcr != bmcr) || (force_link_down)) {
                        /* Force a link down visible on the other side */
                        if (bp->link_up) {
-                               bnx2_read_phy(bp, MII_ADVERTISE, &adv);
-                               adv &= ~(ADVERTISE_1000XFULL |
-                                       ADVERTISE_1000XHALF);
-                               bnx2_write_phy(bp, MII_ADVERTISE, adv);
+                               bnx2_write_phy(bp, MII_ADVERTISE, adv &
+                                              ~(ADVERTISE_1000XFULL |
+                                                ADVERTISE_1000XHALF));
                                bnx2_write_phy(bp, MII_BMCR, bmcr |
                                        BMCR_ANRESTART | BMCR_ANENABLE);
 
                                bp->link_up = 0;
                                netif_carrier_off(bp->dev);
+                               bnx2_write_phy(bp, MII_BMCR, new_bmcr);
                        }
+                       bnx2_write_phy(bp, MII_ADVERTISE, adv);
                        bnx2_write_phy(bp, MII_BMCR, new_bmcr);
                }
                return 0;
        }
 
+       if (bp->phy_flags & PHY_2_5G_CAPABLE_FLAG) {
+               bnx2_read_phy(bp, BCM5708S_UP1, &up1);
+               up1 |= BCM5708S_UP1_2G5;
+               bnx2_write_phy(bp, BCM5708S_UP1, up1);
+       }
+
        if (bp->advertising & ADVERTISED_1000baseT_Full)
                new_adv |= ADVERTISE_1000XFULL;
 
@@ -807,7 +1056,19 @@ bnx2_setup_serdes_phy(struct bnx2 *bp)
                bnx2_write_phy(bp, MII_ADVERTISE, new_adv);
                bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART |
                        BMCR_ANENABLE);
-               bp->serdes_an_pending = SERDES_AN_TIMEOUT / bp->timer_interval;
+               if (CHIP_NUM(bp) == CHIP_NUM_5706) {
+                       /* Speed up link-up time when the link partner
+                        * does not autonegotiate which is very common
+                        * in blade servers. Some blade servers use
+                        * IPMI for kerboard input and it's important
+                        * to minimize link disruptions. Autoneg. involves
+                        * exchanging base pages plus 3 next pages and
+                        * normally completes in about 120 msec.
+                        */
+                       bp->current_interval = SERDES_AN_TIMEOUT;
+                       bp->serdes_an_pending = 1;
+                       mod_timer(&bp->timer, jiffies + bp->current_interval);
+               }
        }
 
        return 0;
@@ -936,7 +1197,61 @@ bnx2_setup_phy(struct bnx2 *bp)
 }
 
 static int
-bnx2_init_serdes_phy(struct bnx2 *bp)
+bnx2_init_5708s_phy(struct bnx2 *bp)
+{
+       u32 val;
+
+       bnx2_write_phy(bp, BCM5708S_BLK_ADDR, BCM5708S_BLK_ADDR_DIG3);
+       bnx2_write_phy(bp, BCM5708S_DIG_3_0, BCM5708S_DIG_3_0_USE_IEEE);
+       bnx2_write_phy(bp, BCM5708S_BLK_ADDR, BCM5708S_BLK_ADDR_DIG);
+
+       bnx2_read_phy(bp, BCM5708S_1000X_CTL1, &val);
+       val |= BCM5708S_1000X_CTL1_FIBER_MODE | BCM5708S_1000X_CTL1_AUTODET_EN;
+       bnx2_write_phy(bp, BCM5708S_1000X_CTL1, val);
+
+       bnx2_read_phy(bp, BCM5708S_1000X_CTL2, &val);
+       val |= BCM5708S_1000X_CTL2_PLLEL_DET_EN;
+       bnx2_write_phy(bp, BCM5708S_1000X_CTL2, val);
+
+       if (bp->phy_flags & PHY_2_5G_CAPABLE_FLAG) {
+               bnx2_read_phy(bp, BCM5708S_UP1, &val);
+               val |= BCM5708S_UP1_2G5;
+               bnx2_write_phy(bp, BCM5708S_UP1, val);
+       }
+
+       if ((CHIP_ID(bp) == CHIP_ID_5708_A0) ||
+           (CHIP_ID(bp) == CHIP_ID_5708_B0) ||
+           (CHIP_ID(bp) == CHIP_ID_5708_B1)) {
+               /* increase tx signal amplitude */
+               bnx2_write_phy(bp, BCM5708S_BLK_ADDR,
+                              BCM5708S_BLK_ADDR_TX_MISC);
+               bnx2_read_phy(bp, BCM5708S_TX_ACTL1, &val);
+               val &= ~BCM5708S_TX_ACTL1_DRIVER_VCM;
+               bnx2_write_phy(bp, BCM5708S_TX_ACTL1, val);
+               bnx2_write_phy(bp, BCM5708S_BLK_ADDR, BCM5708S_BLK_ADDR_DIG);
+       }
+
+       val = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_CONFIG) &
+             BNX2_PORT_HW_CFG_CFG_TXCTL3_MASK;
+
+       if (val) {
+               u32 is_backplane;
+
+               is_backplane = REG_RD_IND(bp, bp->shmem_base +
+                                         BNX2_SHARED_HW_CFG_CONFIG);
+               if (is_backplane & BNX2_SHARED_HW_CFG_PHY_BACKPLANE) {
+                       bnx2_write_phy(bp, BCM5708S_BLK_ADDR,
+                                      BCM5708S_BLK_ADDR_TX_MISC);
+                       bnx2_write_phy(bp, BCM5708S_TX_ACTL3, val);
+                       bnx2_write_phy(bp, BCM5708S_BLK_ADDR,
+                                      BCM5708S_BLK_ADDR_DIG);
+               }
+       }
+       return 0;
+}
+
+static int
+bnx2_init_5706s_phy(struct bnx2 *bp)
 {
        bp->phy_flags &= ~PHY_PARALLEL_DETECT_FLAG;
 
@@ -974,6 +1289,8 @@ bnx2_init_serdes_phy(struct bnx2 *bp)
 static int
 bnx2_init_copper_phy(struct bnx2 *bp)
 {
+       u32 val;
+
        bp->phy_flags |= PHY_CRC_FIX_FLAG;
 
        if (bp->phy_flags & PHY_CRC_FIX_FLAG) {
@@ -988,8 +1305,6 @@ bnx2_init_copper_phy(struct bnx2 *bp)
        }
 
        if (bp->dev->mtu > 1500) {
-               u32 val;
-
                /* Set extended packet length bit */
                bnx2_write_phy(bp, 0x18, 0x7);
                bnx2_read_phy(bp, 0x18, &val);
@@ -999,8 +1314,6 @@ bnx2_init_copper_phy(struct bnx2 *bp)
                bnx2_write_phy(bp, 0x10, val | 0x1);
        }
        else {
-               u32 val;
-
                bnx2_write_phy(bp, 0x18, 0x7);
                bnx2_read_phy(bp, 0x18, &val);
                bnx2_write_phy(bp, 0x18, val & ~0x4007);
@@ -1009,6 +1322,10 @@ bnx2_init_copper_phy(struct bnx2 *bp)
                bnx2_write_phy(bp, 0x10, val & ~0x1);
        }
 
+       /* ethernet@wirespeed */
+       bnx2_write_phy(bp, 0x18, 0x7007);
+       bnx2_read_phy(bp, 0x18, &val);
+       bnx2_write_phy(bp, 0x18, val | (1 << 15) | (1 << 4));
        return 0;
 }
 
@@ -1032,7 +1349,10 @@ bnx2_init_phy(struct bnx2 *bp)
        bp->phy_id |= val & 0xffff;
 
        if (bp->phy_flags & PHY_SERDES_FLAG) {
-               rc = bnx2_init_serdes_phy(bp);
+               if (CHIP_NUM(bp) == CHIP_NUM_5706)
+                       rc = bnx2_init_5706s_phy(bp);
+               else if (CHIP_NUM(bp) == CHIP_NUM_5708)
+                       rc = bnx2_init_5708s_phy(bp);
        }
        else {
                rc = bnx2_init_copper_phy(bp);
@@ -1056,44 +1376,78 @@ bnx2_set_mac_loopback(struct bnx2 *bp)
        return 0;
 }
 
+static int bnx2_test_link(struct bnx2 *);
+
+static int
+bnx2_set_phy_loopback(struct bnx2 *bp)
+{
+       u32 mac_mode;
+       int rc, i;
+
+       spin_lock_bh(&bp->phy_lock);
+       rc = bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK | BMCR_FULLDPLX |
+                           BMCR_SPEED1000);
+       spin_unlock_bh(&bp->phy_lock);
+       if (rc)
+               return rc;
+
+       for (i = 0; i < 10; i++) {
+               if (bnx2_test_link(bp) == 0)
+                       break;
+               udelay(10);
+       }
+
+       mac_mode = REG_RD(bp, BNX2_EMAC_MODE);
+       mac_mode &= ~(BNX2_EMAC_MODE_PORT | BNX2_EMAC_MODE_HALF_DUPLEX |
+                     BNX2_EMAC_MODE_MAC_LOOP | BNX2_EMAC_MODE_FORCE_LINK |
+                     BNX2_EMAC_MODE_25G);
+
+       mac_mode |= BNX2_EMAC_MODE_PORT_GMII;
+       REG_WR(bp, BNX2_EMAC_MODE, mac_mode);
+       bp->link_up = 1;
+       return 0;
+}
+
 static int
-bnx2_fw_sync(struct bnx2 *bp, u32 msg_data)
+bnx2_fw_sync(struct bnx2 *bp, u32 msg_data, int silent)
 {
        int i;
        u32 val;
 
-       if (bp->fw_timed_out)
-               return -EBUSY;
-
        bp->fw_wr_seq++;
        msg_data |= bp->fw_wr_seq;
 
-       REG_WR_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_DRV_MB, msg_data);
+       REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_MB, msg_data);
 
        /* wait for an acknowledgement. */
-       for (i = 0; i < (FW_ACK_TIME_OUT_MS * 1000)/5; i++) {
-               udelay(5);
+       for (i = 0; i < (FW_ACK_TIME_OUT_MS / 10); i++) {
+               msleep(10);
 
-               val = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_FW_MB);
+               val = REG_RD_IND(bp, bp->shmem_base + BNX2_FW_MB);
 
                if ((val & BNX2_FW_MSG_ACK) == (msg_data & BNX2_DRV_MSG_SEQ))
                        break;
        }
+       if ((msg_data & BNX2_DRV_MSG_DATA) == BNX2_DRV_MSG_DATA_WAIT0)
+               return 0;
 
        /* If we timed out, inform the firmware that this is the case. */
-       if (((val & BNX2_FW_MSG_ACK) != (msg_data & BNX2_DRV_MSG_SEQ)) &&
-               ((msg_data & BNX2_DRV_MSG_DATA) != BNX2_DRV_MSG_DATA_WAIT0)) {
+       if ((val & BNX2_FW_MSG_ACK) != (msg_data & BNX2_DRV_MSG_SEQ)) {
+               if (!silent)
+                       printk(KERN_ERR PFX "fw sync timeout, reset code = "
+                                           "%x\n", msg_data);
 
                msg_data &= ~BNX2_DRV_MSG_CODE;
                msg_data |= BNX2_DRV_MSG_CODE_FW_TIMEOUT;
 
-               REG_WR_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_DRV_MB, msg_data);
-
-               bp->fw_timed_out = 1;
+               REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_MB, msg_data);
 
                return -EBUSY;
        }
 
+       if ((val & BNX2_FW_MSG_STATUS_MASK) != BNX2_FW_MSG_STATUS_OK)
+               return -EIO;
+
        return 0;
 }
 
@@ -1211,7 +1565,7 @@ bnx2_alloc_rx_skb(struct bnx2 *bp, u16 index)
        struct sk_buff *skb;
        struct sw_bd *rx_buf = &bp->rx_buf_ring[index];
        dma_addr_t mapping;
-       struct rx_bd *rxbd = &bp->rx_desc_ring[index];
+       struct rx_bd *rxbd = &bp->rx_desc_ring[RX_RING(index)][RX_IDX(index)];
        unsigned long align;
 
        skb = dev_alloc_skb(bp->rx_buf_size);
@@ -1263,10 +1617,11 @@ bnx2_phy_int(struct bnx2 *bp)
 static void
 bnx2_tx_int(struct bnx2 *bp)
 {
+       struct status_block *sblk = bp->status_blk;
        u16 hw_cons, sw_cons, sw_ring_cons;
        int tx_free_bd = 0;
 
-       hw_cons = bp->status_blk->status_tx_quick_consumer_index0;
+       hw_cons = bp->hw_tx_cons = sblk->status_tx_quick_consumer_index0;
        if ((hw_cons & MAX_TX_DESC_CNT) == MAX_TX_DESC_CNT) {
                hw_cons++;
        }
@@ -1283,7 +1638,7 @@ bnx2_tx_int(struct bnx2 *bp)
                skb = tx_buf->skb;
 #ifdef BCM_TSO 
                /* partial BD completions possible with TSO packets */
-               if (skb_shinfo(skb)->tso_size) {
+               if (skb_is_gso(skb)) {
                        u16 last_idx, last_ring_idx;
 
                        last_idx = sw_cons +
@@ -1321,61 +1676,66 @@ bnx2_tx_int(struct bnx2 *bp)
 
                dev_kfree_skb_irq(skb);
 
-               hw_cons = bp->status_blk->status_tx_quick_consumer_index0;
+               hw_cons = bp->hw_tx_cons =
+                       sblk->status_tx_quick_consumer_index0;
+
                if ((hw_cons & MAX_TX_DESC_CNT) == MAX_TX_DESC_CNT) {
                        hw_cons++;
                }
        }
 
-       atomic_add(tx_free_bd, &bp->tx_avail_bd);
+       bp->tx_cons = sw_cons;
 
        if (unlikely(netif_queue_stopped(bp->dev))) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->tx_lock, flags);
+               spin_lock(&bp->tx_lock);
                if ((netif_queue_stopped(bp->dev)) &&
-                       (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)) {
+                   (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)) {
 
                        netif_wake_queue(bp->dev);
                }
-               spin_unlock_irqrestore(&bp->tx_lock, flags);
+               spin_unlock(&bp->tx_lock);
        }
-
-       bp->tx_cons = sw_cons;
-
 }
 
 static inline void
 bnx2_reuse_rx_skb(struct bnx2 *bp, struct sk_buff *skb,
        u16 cons, u16 prod)
 {
-       struct sw_bd *cons_rx_buf = &bp->rx_buf_ring[cons];
-       struct sw_bd *prod_rx_buf = &bp->rx_buf_ring[prod];
-       struct rx_bd *cons_bd = &bp->rx_desc_ring[cons];
-       struct rx_bd *prod_bd = &bp->rx_desc_ring[prod];
+       struct sw_bd *cons_rx_buf, *prod_rx_buf;
+       struct rx_bd *cons_bd, *prod_bd;
+
+       cons_rx_buf = &bp->rx_buf_ring[cons];
+       prod_rx_buf = &bp->rx_buf_ring[prod];
 
        pci_dma_sync_single_for_device(bp->pdev,
                pci_unmap_addr(cons_rx_buf, mapping),
                bp->rx_offset + RX_COPY_THRESH, PCI_DMA_FROMDEVICE);
 
-       prod_rx_buf->skb = cons_rx_buf->skb;
-       pci_unmap_addr_set(prod_rx_buf, mapping,
-                       pci_unmap_addr(cons_rx_buf, mapping));
+       bp->rx_prod_bseq += bp->rx_buf_use_size;
 
-       memcpy(prod_bd, cons_bd, 8);
+       prod_rx_buf->skb = skb;
 
-       bp->rx_prod_bseq += bp->rx_buf_use_size;
+       if (cons == prod)
+               return;
+
+       pci_unmap_addr_set(prod_rx_buf, mapping,
+                       pci_unmap_addr(cons_rx_buf, mapping));
 
+       cons_bd = &bp->rx_desc_ring[RX_RING(cons)][RX_IDX(cons)];
+       prod_bd = &bp->rx_desc_ring[RX_RING(prod)][RX_IDX(prod)];
+       prod_bd->rx_bd_haddr_hi = cons_bd->rx_bd_haddr_hi;
+       prod_bd->rx_bd_haddr_lo = cons_bd->rx_bd_haddr_lo;
 }
 
 static int
 bnx2_rx_int(struct bnx2 *bp, int budget)
 {
+       struct status_block *sblk = bp->status_blk;
        u16 hw_cons, sw_cons, sw_ring_cons, sw_prod, sw_ring_prod;
        struct l2_fhdr *rx_hdr;
        int rx_pkt = 0;
 
-       hw_cons = bp->status_blk->status_rx_quick_consumer_index0;
+       hw_cons = bp->hw_rx_cons = sblk->status_rx_quick_consumer_index0;
        if ((hw_cons & MAX_RX_DESC_CNT) == MAX_RX_DESC_CNT) {
                hw_cons++;
        }
@@ -1388,23 +1748,28 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
        rmb();
        while (sw_cons != hw_cons) {
                unsigned int len;
-               u16 status;
+               u32 status;
                struct sw_bd *rx_buf;
                struct sk_buff *skb;
+               dma_addr_t dma_addr;
 
                sw_ring_cons = RX_RING_IDX(sw_cons);
                sw_ring_prod = RX_RING_IDX(sw_prod);
 
                rx_buf = &bp->rx_buf_ring[sw_ring_cons];
                skb = rx_buf->skb;
-               pci_dma_sync_single_for_cpu(bp->pdev,
-                       pci_unmap_addr(rx_buf, mapping),
+
+               rx_buf->skb = NULL;
+
+               dma_addr = pci_unmap_addr(rx_buf, mapping);
+
+               pci_dma_sync_single_for_cpu(bp->pdev, dma_addr,
                        bp->rx_offset + RX_COPY_THRESH, PCI_DMA_FROMDEVICE);
 
                rx_hdr = (struct l2_fhdr *) skb->data;
                len = rx_hdr->l2_fhdr_pkt_len - 4;
 
-               if (rx_hdr->l2_fhdr_errors &
+               if ((status = rx_hdr->l2_fhdr_status) &
                        (L2_FHDR_ERRORS_BAD_CRC |
                        L2_FHDR_ERRORS_PHY_DECODE |
                        L2_FHDR_ERRORS_ALIGNMENT |
@@ -1439,8 +1804,7 @@ bnx2_rx_int(struct bnx2 *bp, int budget)
                        skb = new_skb;
                }
                else if (bnx2_alloc_rx_skb(bp, sw_ring_prod) == 0) {
-                       pci_unmap_single(bp->pdev,
-                               pci_unmap_addr(rx_buf, mapping),
+                       pci_unmap_single(bp->pdev, dma_addr,
                                bp->rx_buf_use_size, PCI_DMA_FROMDEVICE);
 
                        skb_reserve(skb, bp->rx_offset);
@@ -1463,15 +1827,13 @@ reuse_rx:
 
                }
 
-               status = rx_hdr->l2_fhdr_status;
                skb->ip_summed = CHECKSUM_NONE;
                if (bp->rx_csum &&
                        (status & (L2_FHDR_STATUS_TCP_SEGMENT |
                        L2_FHDR_STATUS_UDP_DATAGRAM))) {
 
-                       u16 cksum = rx_hdr->l2_fhdr_tcp_udp_xsum;
-
-                       if (cksum == 0xffff)
+                       if (likely((status & (L2_FHDR_ERRORS_TCP_XSUM |
+                                             L2_FHDR_ERRORS_UDP_XSUM)) == 0))
                                skb->ip_summed = CHECKSUM_UNNECESSARY;
                }
 
@@ -1488,13 +1850,20 @@ reuse_rx:
                rx_pkt++;
 
 next_rx:
-               rx_buf->skb = NULL;
-
                sw_cons = NEXT_RX_BD(sw_cons);
                sw_prod = NEXT_RX_BD(sw_prod);
 
                if ((rx_pkt == budget))
                        break;
+
+               /* Refresh hw_cons to see if there is new work */
+               if (sw_cons == hw_cons) {
+                       hw_cons = bp->hw_rx_cons =
+                               sblk->status_rx_quick_consumer_index0;
+                       if ((hw_cons & MAX_RX_DESC_CNT) == MAX_RX_DESC_CNT)
+                               hw_cons++;
+                       rmb();
+               }
        }
        bp->rx_cons = sw_cons;
        bp->rx_prod = sw_prod;
@@ -1516,29 +1885,27 @@ static irqreturn_t
 bnx2_msi(int irq, void *dev_instance, struct pt_regs *regs)
 {
        struct net_device *dev = dev_instance;
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
+       prefetch(bp->status_blk);
        REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
                BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM |
                BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
 
        /* Return here if interrupt is disabled. */
-       if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
-               return IRQ_RETVAL(1);
-       }
+       if (unlikely(atomic_read(&bp->intr_sem) != 0))
+               return IRQ_HANDLED;
 
-       if (netif_rx_schedule_prep(dev)) {
-               __netif_rx_schedule(dev);
-       }
+       netif_rx_schedule(dev);
 
-       return IRQ_RETVAL(1);
+       return IRQ_HANDLED;
 }
 
 static irqreturn_t
 bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
 {
        struct net_device *dev = dev_instance;
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        /* When using INTx, it is possible for the interrupt to arrive
         * at the CPU before the status block posted prior to the
@@ -1546,53 +1913,66 @@ bnx2_interrupt(int irq, void *dev_instance, struct pt_regs *regs)
         * When using MSI, the MSI message will always complete after
         * the status block write.
         */
-       if ((bp->status_blk->status_idx == bp->last_status_idx) ||
+       if ((bp->status_blk->status_idx == bp->last_status_idx) &&
            (REG_RD(bp, BNX2_PCICFG_MISC_STATUS) &
             BNX2_PCICFG_MISC_STATUS_INTA_VALUE))
-               return IRQ_RETVAL(0);
+               return IRQ_NONE;
 
        REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
                BNX2_PCICFG_INT_ACK_CMD_USE_INT_HC_PARAM |
                BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
 
        /* Return here if interrupt is shared and is disabled. */
-       if (unlikely(atomic_read(&bp->intr_sem) != 0)) {
-               return IRQ_RETVAL(1);
-       }
+       if (unlikely(atomic_read(&bp->intr_sem) != 0))
+               return IRQ_HANDLED;
 
-       if (netif_rx_schedule_prep(dev)) {
-               __netif_rx_schedule(dev);
-       }
+       netif_rx_schedule(dev);
+
+       return IRQ_HANDLED;
+}
+
+static inline int
+bnx2_has_work(struct bnx2 *bp)
+{
+       struct status_block *sblk = bp->status_blk;
 
-       return IRQ_RETVAL(1);
+       if ((sblk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) ||
+           (sblk->status_tx_quick_consumer_index0 != bp->hw_tx_cons))
+               return 1;
+
+       if (((sblk->status_attn_bits & STATUS_ATTN_BITS_LINK_STATE) != 0) !=
+           bp->link_up)
+               return 1;
+
+       return 0;
 }
 
 static int
 bnx2_poll(struct net_device *dev, int *budget)
 {
-       struct bnx2 *bp = dev->priv;
-       int rx_done = 1;
+       struct bnx2 *bp = netdev_priv(dev);
 
-       bp->last_status_idx = bp->status_blk->status_idx;
-
-       rmb();
        if ((bp->status_blk->status_attn_bits &
                STATUS_ATTN_BITS_LINK_STATE) !=
                (bp->status_blk->status_attn_bits_ack &
                STATUS_ATTN_BITS_LINK_STATE)) {
 
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->phy_lock, flags);
+               spin_lock(&bp->phy_lock);
                bnx2_phy_int(bp);
-               spin_unlock_irqrestore(&bp->phy_lock, flags);
+               spin_unlock(&bp->phy_lock);
+
+               /* This is needed to take care of transient status
+                * during link changes.
+                */
+               REG_WR(bp, BNX2_HC_COMMAND,
+                      bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+               REG_RD(bp, BNX2_HC_COMMAND);
        }
 
-       if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_cons) {
+       if (bp->status_blk->status_tx_quick_consumer_index0 != bp->hw_tx_cons)
                bnx2_tx_int(bp);
-       }
 
-       if (bp->status_blk->status_rx_quick_consumer_index0 != bp->rx_cons) {
+       if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) {
                int orig_budget = *budget;
                int work_done;
 
@@ -1602,45 +1982,54 @@ bnx2_poll(struct net_device *dev, int *budget)
                work_done = bnx2_rx_int(bp, orig_budget);
                *budget -= work_done;
                dev->quota -= work_done;
-               
-               if (work_done >= orig_budget) {
-                       rx_done = 0;
-               }
        }
        
-       if (rx_done) {
+       bp->last_status_idx = bp->status_blk->status_idx;
+       rmb();
+
+       if (!bnx2_has_work(bp)) {
                netif_rx_complete(dev);
+               if (likely(bp->flags & USING_MSI_FLAG)) {
+                       REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
+                              BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+                              bp->last_status_idx);
+                       return 0;
+               }
+               REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
+                      BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+                      BNX2_PCICFG_INT_ACK_CMD_MASK_INT |
+                      bp->last_status_idx);
+
                REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
-                       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
-                       bp->last_status_idx);
+                      BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
+                      bp->last_status_idx);
                return 0;
        }
 
        return 1;
 }
 
-/* Called with rtnl_lock from vlan functions and also dev->xmit_lock
+/* Called with rtnl_lock from vlan functions and also netif_tx_lock
  * from set_multicast.
  */
 static void
 bnx2_set_rx_mode(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        u32 rx_mode, sort_mode;
        int i;
-       unsigned long flags;
 
-       spin_lock_irqsave(&bp->phy_lock, flags);
+       spin_lock_bh(&bp->phy_lock);
 
        rx_mode = bp->rx_mode & ~(BNX2_EMAC_RX_MODE_PROMISCUOUS |
                                  BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG);
        sort_mode = 1 | BNX2_RPM_SORT_USER0_BC_EN;
 #ifdef BCM_VLAN
-       if (!bp->vlgrp) {
+       if (!bp->vlgrp && !(bp->flags & ASF_ENABLE_FLAG))
                rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG;
-       }
 #else
-       rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG;
+       if (!(bp->flags & ASF_ENABLE_FLAG))
+               rx_mode |= BNX2_EMAC_RX_MODE_KEEP_VLAN_TAG;
 #endif
        if (dev->flags & IFF_PROMISC) {
                /* Promiscuous mode. */
@@ -1691,7 +2080,7 @@ bnx2_set_rx_mode(struct net_device *dev)
        REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode);
        REG_WR(bp, BNX2_RPM_SORT_USER0, sort_mode | BNX2_RPM_SORT_USER0_ENA);
 
-       spin_unlock_irqrestore(&bp->phy_lock, flags);
+       spin_unlock_bh(&bp->phy_lock);
 }
 
 static void
@@ -1998,14 +2387,14 @@ bnx2_init_cpus(struct bnx2 *bp)
 }
 
 static int
-bnx2_set_power_state(struct bnx2 *bp, int state)
+bnx2_set_power_state(struct bnx2 *bp, pci_power_t state)
 {
        u16 pmcsr;
 
        pci_read_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL, &pmcsr);
 
        switch (state) {
-       case 0: {
+       case PCI_D0: {
                u32 val;
 
                pci_write_config_word(bp->pdev, bp->pm_cap + PCI_PM_CTRL,
@@ -2026,7 +2415,7 @@ bnx2_set_power_state(struct bnx2 *bp, int state)
                REG_WR(bp, BNX2_RPM_CONFIG, val);
                break;
        }
-       case 3: {
+       case PCI_D3hot: {
                int i;
                u32 val, wol_msg;
 
@@ -2058,7 +2447,6 @@ bnx2_set_power_state(struct bnx2 *bp, int state)
                        val |= BNX2_EMAC_MODE_PORT_MII |
                               BNX2_EMAC_MODE_MPKT_RCVD |
                               BNX2_EMAC_MODE_ACPI_RCVD |
-                              BNX2_EMAC_MODE_FORCE_LINK |
                               BNX2_EMAC_MODE_MPKT;
 
                        REG_WR(bp, BNX2_EMAC_MODE, val);
@@ -2094,7 +2482,8 @@ bnx2_set_power_state(struct bnx2 *bp, int state)
                        wol_msg = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL;
                }
 
-               bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT3 | wol_msg);
+               if (!(bp->flags & NO_WOL_FLAG))
+                       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT3 | wol_msg, 0);
 
                pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
                if ((CHIP_ID(bp) == CHIP_ID_5706_A0) ||
@@ -2380,21 +2769,27 @@ bnx2_init_nvram(struct bnx2 *bp)
 
                /* Flash interface has been reconfigured */
                for (j = 0, flash = &flash_table[0]; j < entry_count;
-                       j++, flash++) {
-
-                       if (val == flash->config1) {
+                    j++, flash++) {
+                       if ((val & FLASH_BACKUP_STRAP_MASK) ==
+                           (flash->config1 & FLASH_BACKUP_STRAP_MASK)) {
                                bp->flash_info = flash;
                                break;
                        }
                }
        }
        else {
+               u32 mask;
                /* Not yet been reconfigured */
 
+               if (val & (1 << 23))
+                       mask = FLASH_BACKUP_STRAP_MASK;
+               else
+                       mask = FLASH_STRAP_MASK;
+
                for (j = 0, flash = &flash_table[0]; j < entry_count;
                        j++, flash++) {
 
-                       if ((val & FLASH_STRAP_MASK) == flash->strapping) {
+                       if ((val & mask) == (flash->strapping & mask)) {
                                bp->flash_info = flash;
 
                                /* Request access to the flash interface. */
@@ -2421,10 +2816,17 @@ bnx2_init_nvram(struct bnx2 *bp)
 
        if (j == entry_count) {
                bp->flash_info = NULL;
-               printk(KERN_ALERT "Unknown flash/EEPROM type.\n");
-               rc = -ENODEV;
+               printk(KERN_ALERT PFX "Unknown flash/EEPROM type.\n");
+               return -ENODEV;
        }
 
+       val = REG_RD_IND(bp, bp->shmem_base + BNX2_SHARED_HW_CFG_CONFIG2);
+       val &= BNX2_SHARED_HW_CFG2_NVM_SIZE_MASK;
+       if (val)
+               bp->flash_size = val;
+       else
+               bp->flash_size = bp->flash_info->total_size;
+
        return rc;
 }
 
@@ -2543,7 +2945,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
                int buf_size)
 {
        u32 written, offset32, len32;
-       u8 *buf, start[4], end[4];
+       u8 *buf, start[4], end[4], *flash_buffer = NULL;
        int rc = 0;
        int align_start, align_end;
 
@@ -2583,12 +2985,19 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
                memcpy(buf + align_start, data_buf, buf_size);
        }
 
+       if (bp->flash_info->buffered == 0) {
+               flash_buffer = kmalloc(264, GFP_KERNEL);
+               if (flash_buffer == NULL) {
+                       rc = -ENOMEM;
+                       goto nvram_write_end;
+               }
+       }
+
        written = 0;
        while ((written < len32) && (rc == 0)) {
                u32 page_start, page_end, data_start, data_end;
                u32 addr, cmd_flags;
                int i;
-               u8 flash_buffer[264];
 
                /* Find the page_start addr */
                page_start = offset32 + written;
@@ -2659,7 +3068,7 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
                }
 
                /* Loop to write the new data from data_start to data_end */
-               for (addr = data_start; addr < data_end; addr += 4, i++) {
+               for (addr = data_start; addr < data_end; addr += 4, i += 4) {
                        if ((addr == page_end - 4) ||
                                ((bp->flash_info->buffered) &&
                                 (addr == data_end - 4))) {
@@ -2707,6 +3116,9 @@ bnx2_nvram_write(struct bnx2 *bp, u32 offset, u8 *data_buf,
        }
 
 nvram_write_end:
+       if (bp->flash_info->buffered == 0)
+               kfree(flash_buffer);
+
        if (align_start || align_end)
                kfree(buf);
        return rc;
@@ -2728,16 +3140,14 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
        val = REG_RD(bp, BNX2_MISC_ENABLE_CLR_BITS);
        udelay(5);
 
+       /* Wait for the firmware to tell us it is ok to issue a reset. */
+       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code, 1);
+
        /* Deposit a driver reset signature so the firmware knows that
         * this is a soft reset. */
-       REG_WR_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_DRV_RESET_SIGNATURE,
+       REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_RESET_SIGNATURE,
                   BNX2_DRV_RESET_SIGNATURE_MAGIC);
 
-       bp->fw_timed_out = 0;
-
-       /* Wait for the firmware to tell us it is ok to issue a reset. */
-       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT0 | reset_code);
-
        /* Do a dummy read to force the chip to complete all current transaction
         * before we issue a reset. */
        val = REG_RD(bp, BNX2_MISC_ID);
@@ -2776,10 +3186,10 @@ bnx2_reset_chip(struct bnx2 *bp, u32 reset_code)
                return -ENODEV;
        }
 
-       bp->fw_timed_out = 0;
-
        /* Wait for the firmware to finish its initialization. */
-       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT1 | reset_code);
+       rc = bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT1 | reset_code, 0);
+       if (rc)
+               return rc;
 
        if (CHIP_ID(bp) == CHIP_ID_5706_A0) {
                /* Adjust the voltage regular to two steps lower.  The default
@@ -2797,6 +3207,7 @@ static int
 bnx2_init_chip(struct bnx2 *bp)
 {
        u32 val;
+       int rc;
 
        /* Make sure the interrupt is not active. */
        REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD, BNX2_PCICFG_INT_ACK_CMD_MASK_INT);
@@ -2812,7 +3223,7 @@ bnx2_init_chip(struct bnx2 *bp)
 
        val |= (0x2 << 20) | (1 << 11);
 
-       if ((bp->flags & PCIX_FLAG) && (bp->bus_speed_mhz = 133))
+       if ((bp->flags & PCIX_FLAG) && (bp->bus_speed_mhz == 133))
                val |= (1 << 23);
 
        if ((CHIP_NUM(bp) == CHIP_NUM_5706) &&
@@ -2932,17 +3343,24 @@ bnx2_init_chip(struct bnx2 *bp)
 
        REG_WR(bp, BNX2_HC_ATTN_BITS_ENABLE, STATUS_ATTN_BITS_LINK_STATE);
 
+       if (REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_FEATURE) &
+           BNX2_PORT_FEATURE_ASF_ENABLED)
+               bp->flags |= ASF_ENABLE_FLAG;
+
        /* Initialize the receive filter. */
        bnx2_set_rx_mode(bp->dev);
 
-       bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT2 | BNX2_DRV_MSG_CODE_RESET);
+       rc = bnx2_fw_sync(bp, BNX2_DRV_MSG_DATA_WAIT2 | BNX2_DRV_MSG_CODE_RESET,
+                         0);
 
        REG_WR(bp, BNX2_MISC_ENABLE_SET_BITS, 0x5ffffff);
        REG_RD(bp, BNX2_MISC_ENABLE_SET_BITS);
 
        udelay(20);
 
-       return 0;
+       bp->hc_cmd = REG_RD(bp, BNX2_HC_COMMAND);
+
+       return rc;
 }
 
 
@@ -2959,8 +3377,8 @@ bnx2_init_tx_ring(struct bnx2 *bp)
 
        bp->tx_prod = 0;
        bp->tx_cons = 0;
+       bp->hw_tx_cons = 0;
        bp->tx_prod_bseq = 0;
-       atomic_set(&bp->tx_avail_bd, bp->tx_ring_size);
        
        val = BNX2_L2CTX_TYPE_TYPE_L2;
        val |= BNX2_L2CTX_TYPE_SIZE_L2;
@@ -2992,29 +3410,38 @@ bnx2_init_rx_ring(struct bnx2 *bp)
 
        ring_prod = prod = bp->rx_prod = 0;
        bp->rx_cons = 0;
+       bp->hw_rx_cons = 0;
        bp->rx_prod_bseq = 0;
                
-       rxbd = &bp->rx_desc_ring[0];
-       for (i = 0; i < MAX_RX_DESC_CNT; i++, rxbd++) {
-               rxbd->rx_bd_len = bp->rx_buf_use_size;
-               rxbd->rx_bd_flags = RX_BD_FLAGS_START | RX_BD_FLAGS_END;
-       }
+       for (i = 0; i < bp->rx_max_ring; i++) {
+               int j;
 
-       rxbd->rx_bd_haddr_hi = (u64) bp->rx_desc_mapping >> 32;
-       rxbd->rx_bd_haddr_lo = (u64) bp->rx_desc_mapping & 0xffffffff;
+               rxbd = &bp->rx_desc_ring[i][0];
+               for (j = 0; j < MAX_RX_DESC_CNT; j++, rxbd++) {
+                       rxbd->rx_bd_len = bp->rx_buf_use_size;
+                       rxbd->rx_bd_flags = RX_BD_FLAGS_START | RX_BD_FLAGS_END;
+               }
+               if (i == (bp->rx_max_ring - 1))
+                       j = 0;
+               else
+                       j = i + 1;
+               rxbd->rx_bd_haddr_hi = (u64) bp->rx_desc_mapping[j] >> 32;
+               rxbd->rx_bd_haddr_lo = (u64) bp->rx_desc_mapping[j] &
+                                      0xffffffff;
+       }
 
        val = BNX2_L2CTX_CTX_TYPE_CTX_BD_CHN_TYPE_VALUE;
        val |= BNX2_L2CTX_CTX_TYPE_SIZE_L2;
        val |= 0x02 << 8;
        CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_CTX_TYPE, val);
 
-       val = (u64) bp->rx_desc_mapping >> 32;
+       val = (u64) bp->rx_desc_mapping[0] >> 32;
        CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_NX_BDHADDR_HI, val);
 
-       val = (u64) bp->rx_desc_mapping & 0xffffffff;
+       val = (u64) bp->rx_desc_mapping[0] & 0xffffffff;
        CTX_WR(bp, GET_CID_ADDR(RX_CID), BNX2_L2CTX_NX_BDHADDR_LO, val);
 
-       for ( ;ring_prod < bp->rx_ring_size; ) {
+       for (i = 0; i < bp->rx_ring_size; i++) {
                if (bnx2_alloc_rx_skb(bp, ring_prod) < 0) {
                        break;
                }
@@ -3028,6 +3455,29 @@ bnx2_init_rx_ring(struct bnx2 *bp)
        REG_WR(bp, MB_RX_CID_ADDR + BNX2_L2CTX_HOST_BSEQ, bp->rx_prod_bseq);
 }
 
+static void
+bnx2_set_rx_ring_size(struct bnx2 *bp, u32 size)
+{
+       u32 num_rings, max;
+
+       bp->rx_ring_size = size;
+       num_rings = 1;
+       while (size > MAX_RX_DESC_CNT) {
+               size -= MAX_RX_DESC_CNT;
+               num_rings++;
+       }
+       /* round to next power of 2 */
+       max = MAX_RX_RINGS;
+       while ((max & num_rings) == 0)
+               max >>= 1;
+
+       if (num_rings != max)
+               max <<= 1;
+
+       bp->rx_max_ring = max;
+       bp->rx_max_ring_idx = (bp->rx_max_ring * RX_DESC_CNT) - 1;
+}
+
 static void
 bnx2_free_tx_skbs(struct bnx2 *bp)
 {
@@ -3073,11 +3523,11 @@ bnx2_free_rx_skbs(struct bnx2 *bp)
        if (bp->rx_buf_ring == NULL)
                return;
 
-       for (i = 0; i < RX_DESC_CNT; i++) {
+       for (i = 0; i < bp->rx_max_ring_idx; i++) {
                struct sw_bd *rx_buf = &bp->rx_buf_ring[i];
                struct sk_buff *skb = rx_buf->skb;
 
-               if (skb == 0)
+               if (skb == NULL)
                        continue;
 
                pci_unmap_single(bp->pdev, pci_unmap_addr(rx_buf, mapping),
@@ -3130,7 +3580,7 @@ bnx2_test_registers(struct bnx2 *bp)
 {
        int ret;
        int i;
-       static struct {
+       static const struct {
                u16   offset;
                u16   flags;
                u32   rw_mask;
@@ -3160,190 +3610,23 @@ bnx2_test_registers(struct bnx2 *bp)
                { 0x0c00, 0, 0x00000000, 0x00000001 },
                { 0x0c04, 0, 0x00000000, 0x03ff0001 },
                { 0x0c08, 0, 0x0f0ff073, 0x00000000 },
-               { 0x0c0c, 0, 0x00ffffff, 0x00000000 },
-               { 0x0c30, 0, 0x00000000, 0xffffffff },
-               { 0x0c34, 0, 0x00000000, 0xffffffff },
-               { 0x0c38, 0, 0x00000000, 0xffffffff },
-               { 0x0c3c, 0, 0x00000000, 0xffffffff },
-               { 0x0c40, 0, 0x00000000, 0xffffffff },
-               { 0x0c44, 0, 0x00000000, 0xffffffff },
-               { 0x0c48, 0, 0x00000000, 0x0007ffff },
-               { 0x0c4c, 0, 0x00000000, 0xffffffff },
-               { 0x0c50, 0, 0x00000000, 0xffffffff },
-               { 0x0c54, 0, 0x00000000, 0xffffffff },
-               { 0x0c58, 0, 0x00000000, 0xffffffff },
-               { 0x0c5c, 0, 0x00000000, 0xffffffff },
-               { 0x0c60, 0, 0x00000000, 0xffffffff },
-               { 0x0c64, 0, 0x00000000, 0xffffffff },
-               { 0x0c68, 0, 0x00000000, 0xffffffff },
-               { 0x0c6c, 0, 0x00000000, 0xffffffff },
-               { 0x0c70, 0, 0x00000000, 0xffffffff },
-               { 0x0c74, 0, 0x00000000, 0xffffffff },
-               { 0x0c78, 0, 0x00000000, 0xffffffff },
-               { 0x0c7c, 0, 0x00000000, 0xffffffff },
-               { 0x0c80, 0, 0x00000000, 0xffffffff },
-               { 0x0c84, 0, 0x00000000, 0xffffffff },
-               { 0x0c88, 0, 0x00000000, 0xffffffff },
-               { 0x0c8c, 0, 0x00000000, 0xffffffff },
-               { 0x0c90, 0, 0x00000000, 0xffffffff },
-               { 0x0c94, 0, 0x00000000, 0xffffffff },
-               { 0x0c98, 0, 0x00000000, 0xffffffff },
-               { 0x0c9c, 0, 0x00000000, 0xffffffff },
-               { 0x0ca0, 0, 0x00000000, 0xffffffff },
-               { 0x0ca4, 0, 0x00000000, 0xffffffff },
-               { 0x0ca8, 0, 0x00000000, 0x0007ffff },
-               { 0x0cac, 0, 0x00000000, 0xffffffff },
-               { 0x0cb0, 0, 0x00000000, 0xffffffff },
-               { 0x0cb4, 0, 0x00000000, 0xffffffff },
-               { 0x0cb8, 0, 0x00000000, 0xffffffff },
-               { 0x0cbc, 0, 0x00000000, 0xffffffff },
-               { 0x0cc0, 0, 0x00000000, 0xffffffff },
-               { 0x0cc4, 0, 0x00000000, 0xffffffff },
-               { 0x0cc8, 0, 0x00000000, 0xffffffff },
-               { 0x0ccc, 0, 0x00000000, 0xffffffff },
-               { 0x0cd0, 0, 0x00000000, 0xffffffff },
-               { 0x0cd4, 0, 0x00000000, 0xffffffff },
-               { 0x0cd8, 0, 0x00000000, 0xffffffff },
-               { 0x0cdc, 0, 0x00000000, 0xffffffff },
-               { 0x0ce0, 0, 0x00000000, 0xffffffff },
-               { 0x0ce4, 0, 0x00000000, 0xffffffff },
-               { 0x0ce8, 0, 0x00000000, 0xffffffff },
-               { 0x0cec, 0, 0x00000000, 0xffffffff },
-               { 0x0cf0, 0, 0x00000000, 0xffffffff },
-               { 0x0cf4, 0, 0x00000000, 0xffffffff },
-               { 0x0cf8, 0, 0x00000000, 0xffffffff },
-               { 0x0cfc, 0, 0x00000000, 0xffffffff },
-               { 0x0d00, 0, 0x00000000, 0xffffffff },
-               { 0x0d04, 0, 0x00000000, 0xffffffff },
 
                { 0x1000, 0, 0x00000000, 0x00000001 },
                { 0x1004, 0, 0x00000000, 0x000f0001 },
-               { 0x1044, 0, 0x00000000, 0xffc003ff },
-               { 0x1080, 0, 0x00000000, 0x0001ffff },
-               { 0x1084, 0, 0x00000000, 0xffffffff },
-               { 0x1088, 0, 0x00000000, 0xffffffff },
-               { 0x108c, 0, 0x00000000, 0xffffffff },
-               { 0x1090, 0, 0x00000000, 0xffffffff },
-               { 0x1094, 0, 0x00000000, 0xffffffff },
-               { 0x1098, 0, 0x00000000, 0xffffffff },
-               { 0x109c, 0, 0x00000000, 0xffffffff },
-               { 0x10a0, 0, 0x00000000, 0xffffffff },
 
                { 0x1408, 0, 0x01c00800, 0x00000000 },
                { 0x149c, 0, 0x8000ffff, 0x00000000 },
                { 0x14a8, 0, 0x00000000, 0x000001ff },
-               { 0x14ac, 0, 0x4fffffff, 0x10000000 },
+               { 0x14ac, 0, 0x0fffffff, 0x10000000 },
                { 0x14b0, 0, 0x00000002, 0x00000001 },
                { 0x14b8, 0, 0x00000000, 0x00000000 },
                { 0x14c0, 0, 0x00000000, 0x00000009 },
                { 0x14c4, 0, 0x00003fff, 0x00000000 },
                { 0x14cc, 0, 0x00000000, 0x00000001 },
                { 0x14d0, 0, 0xffffffff, 0x00000000 },
-               { 0x1500, 0, 0x00000000, 0xffffffff },
-               { 0x1504, 0, 0x00000000, 0xffffffff },
-               { 0x1508, 0, 0x00000000, 0xffffffff },
-               { 0x150c, 0, 0x00000000, 0xffffffff },
-               { 0x1510, 0, 0x00000000, 0xffffffff },
-               { 0x1514, 0, 0x00000000, 0xffffffff },
-               { 0x1518, 0, 0x00000000, 0xffffffff },
-               { 0x151c, 0, 0x00000000, 0xffffffff },
-               { 0x1520, 0, 0x00000000, 0xffffffff },
-               { 0x1524, 0, 0x00000000, 0xffffffff },
-               { 0x1528, 0, 0x00000000, 0xffffffff },
-               { 0x152c, 0, 0x00000000, 0xffffffff },
-               { 0x1530, 0, 0x00000000, 0xffffffff },
-               { 0x1534, 0, 0x00000000, 0xffffffff },
-               { 0x1538, 0, 0x00000000, 0xffffffff },
-               { 0x153c, 0, 0x00000000, 0xffffffff },
-               { 0x1540, 0, 0x00000000, 0xffffffff },
-               { 0x1544, 0, 0x00000000, 0xffffffff },
-               { 0x1548, 0, 0x00000000, 0xffffffff },
-               { 0x154c, 0, 0x00000000, 0xffffffff },
-               { 0x1550, 0, 0x00000000, 0xffffffff },
-               { 0x1554, 0, 0x00000000, 0xffffffff },
-               { 0x1558, 0, 0x00000000, 0xffffffff },
-               { 0x1600, 0, 0x00000000, 0xffffffff },
-               { 0x1604, 0, 0x00000000, 0xffffffff },
-               { 0x1608, 0, 0x00000000, 0xffffffff },
-               { 0x160c, 0, 0x00000000, 0xffffffff },
-               { 0x1610, 0, 0x00000000, 0xffffffff },
-               { 0x1614, 0, 0x00000000, 0xffffffff },
-               { 0x1618, 0, 0x00000000, 0xffffffff },
-               { 0x161c, 0, 0x00000000, 0xffffffff },
-               { 0x1620, 0, 0x00000000, 0xffffffff },
-               { 0x1624, 0, 0x00000000, 0xffffffff },
-               { 0x1628, 0, 0x00000000, 0xffffffff },
-               { 0x162c, 0, 0x00000000, 0xffffffff },
-               { 0x1630, 0, 0x00000000, 0xffffffff },
-               { 0x1634, 0, 0x00000000, 0xffffffff },
-               { 0x1638, 0, 0x00000000, 0xffffffff },
-               { 0x163c, 0, 0x00000000, 0xffffffff },
-               { 0x1640, 0, 0x00000000, 0xffffffff },
-               { 0x1644, 0, 0x00000000, 0xffffffff },
-               { 0x1648, 0, 0x00000000, 0xffffffff },
-               { 0x164c, 0, 0x00000000, 0xffffffff },
-               { 0x1650, 0, 0x00000000, 0xffffffff },
-               { 0x1654, 0, 0x00000000, 0xffffffff },
 
                { 0x1800, 0, 0x00000000, 0x00000001 },
                { 0x1804, 0, 0x00000000, 0x00000003 },
-               { 0x1840, 0, 0x00000000, 0xffffffff },
-               { 0x1844, 0, 0x00000000, 0xffffffff },
-               { 0x1848, 0, 0x00000000, 0xffffffff },
-               { 0x184c, 0, 0x00000000, 0xffffffff },
-               { 0x1850, 0, 0x00000000, 0xffffffff },
-               { 0x1900, 0, 0x7ffbffff, 0x00000000 },
-               { 0x1904, 0, 0xffffffff, 0x00000000 },
-               { 0x190c, 0, 0xffffffff, 0x00000000 },
-               { 0x1914, 0, 0xffffffff, 0x00000000 },
-               { 0x191c, 0, 0xffffffff, 0x00000000 },
-               { 0x1924, 0, 0xffffffff, 0x00000000 },
-               { 0x192c, 0, 0xffffffff, 0x00000000 },
-               { 0x1934, 0, 0xffffffff, 0x00000000 },
-               { 0x193c, 0, 0xffffffff, 0x00000000 },
-               { 0x1944, 0, 0xffffffff, 0x00000000 },
-               { 0x194c, 0, 0xffffffff, 0x00000000 },
-               { 0x1954, 0, 0xffffffff, 0x00000000 },
-               { 0x195c, 0, 0xffffffff, 0x00000000 },
-               { 0x1964, 0, 0xffffffff, 0x00000000 },
-               { 0x196c, 0, 0xffffffff, 0x00000000 },
-               { 0x1974, 0, 0xffffffff, 0x00000000 },
-               { 0x197c, 0, 0xffffffff, 0x00000000 },
-               { 0x1980, 0, 0x0700ffff, 0x00000000 },
-
-               { 0x1c00, 0, 0x00000000, 0x00000001 },
-               { 0x1c04, 0, 0x00000000, 0x00000003 },
-               { 0x1c08, 0, 0x0000000f, 0x00000000 },
-               { 0x1c40, 0, 0x00000000, 0xffffffff },
-               { 0x1c44, 0, 0x00000000, 0xffffffff },
-               { 0x1c48, 0, 0x00000000, 0xffffffff },
-               { 0x1c4c, 0, 0x00000000, 0xffffffff },
-               { 0x1c50, 0, 0x00000000, 0xffffffff },
-               { 0x1d00, 0, 0x7ffbffff, 0x00000000 },
-               { 0x1d04, 0, 0xffffffff, 0x00000000 },
-               { 0x1d0c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d14, 0, 0xffffffff, 0x00000000 },
-               { 0x1d1c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d24, 0, 0xffffffff, 0x00000000 },
-               { 0x1d2c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d34, 0, 0xffffffff, 0x00000000 },
-               { 0x1d3c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d44, 0, 0xffffffff, 0x00000000 },
-               { 0x1d4c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d54, 0, 0xffffffff, 0x00000000 },
-               { 0x1d5c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d64, 0, 0xffffffff, 0x00000000 },
-               { 0x1d6c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d74, 0, 0xffffffff, 0x00000000 },
-               { 0x1d7c, 0, 0xffffffff, 0x00000000 },
-               { 0x1d80, 0, 0x0700ffff, 0x00000000 },
-
-               { 0x2004, 0, 0x00000000, 0x0337000f },
-               { 0x2008, 0, 0xffffffff, 0x00000000 },
-               { 0x200c, 0, 0xffffffff, 0x00000000 },
-               { 0x2010, 0, 0xffffffff, 0x00000000 },
-               { 0x2014, 0, 0x801fff80, 0x00000000 },
-               { 0x2018, 0, 0x000003ff, 0x00000000 },
 
                { 0x2800, 0, 0x00000000, 0x00000001 },
                { 0x2804, 0, 0x00000000, 0x00003f01 },
@@ -3361,16 +3644,6 @@ bnx2_test_registers(struct bnx2 *bp)
                { 0x2c00, 0, 0x00000000, 0x00000011 },
                { 0x2c04, 0, 0x00000000, 0x00030007 },
 
-               { 0x3000, 0, 0x00000000, 0x00000001 },
-               { 0x3004, 0, 0x00000000, 0x007007ff },
-               { 0x3008, 0, 0x00000003, 0x00000000 },
-               { 0x300c, 0, 0xffffffff, 0x00000000 },
-               { 0x3010, 0, 0xffffffff, 0x00000000 },
-               { 0x3014, 0, 0xffffffff, 0x00000000 },
-               { 0x3034, 0, 0xffffffff, 0x00000000 },
-               { 0x3038, 0, 0xffffffff, 0x00000000 },
-               { 0x3050, 0, 0x00000001, 0x00000000 },
-
                { 0x3c00, 0, 0x00000000, 0x00000001 },
                { 0x3c04, 0, 0x00000000, 0x00070000 },
                { 0x3c08, 0, 0x00007f71, 0x07f00000 },
@@ -3380,88 +3653,11 @@ bnx2_test_registers(struct bnx2 *bp)
                { 0x3c18, 0, 0x00000000, 0xffffffff },
                { 0x3c1c, 0, 0xfffff000, 0x00000000 },
                { 0x3c20, 0, 0xffffff00, 0x00000000 },
-               { 0x3c24, 0, 0xffffffff, 0x00000000 },
-               { 0x3c28, 0, 0xffffffff, 0x00000000 },
-               { 0x3c2c, 0, 0xffffffff, 0x00000000 },
-               { 0x3c30, 0, 0xffffffff, 0x00000000 },
-               { 0x3c34, 0, 0xffffffff, 0x00000000 },
-               { 0x3c38, 0, 0xffffffff, 0x00000000 },
-               { 0x3c3c, 0, 0xffffffff, 0x00000000 },
-               { 0x3c40, 0, 0xffffffff, 0x00000000 },
-               { 0x3c44, 0, 0xffffffff, 0x00000000 },
-               { 0x3c48, 0, 0xffffffff, 0x00000000 },
-               { 0x3c4c, 0, 0xffffffff, 0x00000000 },
-               { 0x3c50, 0, 0xffffffff, 0x00000000 },
-               { 0x3c54, 0, 0xffffffff, 0x00000000 },
-               { 0x3c58, 0, 0xffffffff, 0x00000000 },
-               { 0x3c5c, 0, 0xffffffff, 0x00000000 },
-               { 0x3c60, 0, 0xffffffff, 0x00000000 },
-               { 0x3c64, 0, 0xffffffff, 0x00000000 },
-               { 0x3c68, 0, 0xffffffff, 0x00000000 },
-               { 0x3c6c, 0, 0xffffffff, 0x00000000 },
-               { 0x3c70, 0, 0xffffffff, 0x00000000 },
-               { 0x3c74, 0, 0x0000003f, 0x00000000 },
-               { 0x3c78, 0, 0x00000000, 0x00000000 },
-               { 0x3c7c, 0, 0x00000000, 0x00000000 },
-               { 0x3c80, 0, 0x3fffffff, 0x00000000 },
-               { 0x3c84, 0, 0x0000003f, 0x00000000 },
-               { 0x3c88, 0, 0x00000000, 0xffffffff },
-               { 0x3c8c, 0, 0x00000000, 0xffffffff },
-
-               { 0x4000, 0, 0x00000000, 0x00000001 },
-               { 0x4004, 0, 0x00000000, 0x00030000 },
-               { 0x4008, 0, 0x00000ff0, 0x00000000 },
-               { 0x400c, 0, 0xffffffff, 0x00000000 },
-               { 0x4088, 0, 0x00000000, 0x00070303 },
-
-               { 0x4400, 0, 0x00000000, 0x00000001 },
-               { 0x4404, 0, 0x00000000, 0x00003f01 },
-               { 0x4408, 0, 0x7fff00ff, 0x00000000 },
-               { 0x440c, 0, 0xffffffff, 0x00000000 },
-               { 0x4410, 0, 0xffff,     0x0000 },
-               { 0x4414, 0, 0xffff,     0x0000 },
-               { 0x4418, 0, 0xffff,     0x0000 },
-               { 0x441c, 0, 0xffff,     0x0000 },
-               { 0x4428, 0, 0xffffffff, 0x00000000 },
-               { 0x442c, 0, 0xffffffff, 0x00000000 },
-               { 0x4430, 0, 0xffffffff, 0x00000000 },
-               { 0x4434, 0, 0xffffffff, 0x00000000 },
-               { 0x4438, 0, 0xffffffff, 0x00000000 },
-               { 0x443c, 0, 0xffffffff, 0x00000000 },
-               { 0x4440, 0, 0xffffffff, 0x00000000 },
-               { 0x4444, 0, 0xffffffff, 0x00000000 },
-
-               { 0x4c00, 0, 0x00000000, 0x00000001 },
-               { 0x4c04, 0, 0x00000000, 0x0000003f },
-               { 0x4c08, 0, 0xffffffff, 0x00000000 },
-               { 0x4c0c, 0, 0x0007fc00, 0x00000000 },
-               { 0x4c10, 0, 0x80003fe0, 0x00000000 },
-               { 0x4c14, 0, 0xffffffff, 0x00000000 },
-               { 0x4c44, 0, 0x00000000, 0x9fff9fff },
-               { 0x4c48, 0, 0x00000000, 0xb3009fff },
-               { 0x4c4c, 0, 0x00000000, 0x77f33b30 },
-               { 0x4c50, 0, 0x00000000, 0xffffffff },
 
                { 0x5004, 0, 0x00000000, 0x0000007f },
                { 0x5008, 0, 0x0f0007ff, 0x00000000 },
                { 0x500c, 0, 0xf800f800, 0x07ff07ff },
 
-               { 0x5400, 0, 0x00000008, 0x00000001 },
-               { 0x5404, 0, 0x00000000, 0x0000003f },
-               { 0x5408, 0, 0x0000001f, 0x00000000 },
-               { 0x540c, 0, 0xffffffff, 0x00000000 },
-               { 0x5410, 0, 0xffffffff, 0x00000000 },
-               { 0x5414, 0, 0x0000ffff, 0x00000000 },
-               { 0x5418, 0, 0x0000ffff, 0x00000000 },
-               { 0x541c, 0, 0x0000ffff, 0x00000000 },
-               { 0x5420, 0, 0x0000ffff, 0x00000000 },
-               { 0x5428, 0, 0x000000ff, 0x00000000 },
-               { 0x542c, 0, 0xff00ffff, 0x00000000 },
-               { 0x5430, 0, 0x001fff80, 0x00000000 },
-               { 0x5438, 0, 0xffffffff, 0x00000000 },
-               { 0x543c, 0, 0xffffffff, 0x00000000 },
-               { 0x5440, 0, 0xf800f800, 0x07ff07ff },
-
                { 0x5c00, 0, 0x00000000, 0x00000001 },
                { 0x5c04, 0, 0x00000000, 0x0003000f },
                { 0x5c08, 0, 0x00000003, 0x00000000 },
@@ -3507,11 +3703,11 @@ bnx2_test_registers(struct bnx2 *bp)
                rw_mask = reg_tbl[i].rw_mask;
                ro_mask = reg_tbl[i].ro_mask;
 
-               save_val = readl((u8 *) bp->regview + offset);
+               save_val = readl(bp->regview + offset);
 
-               writel(0, (u8 *) bp->regview + offset);
+               writel(0, bp->regview + offset);
 
-               val = readl((u8 *) bp->regview + offset);
+               val = readl(bp->regview + offset);
                if ((val & rw_mask) != 0) {
                        goto reg_test_err;
                }
@@ -3520,9 +3716,9 @@ bnx2_test_registers(struct bnx2 *bp)
                        goto reg_test_err;
                }
 
-               writel(0xffffffff, (u8 *) bp->regview + offset);
+               writel(0xffffffff, bp->regview + offset);
 
-               val = readl((u8 *) bp->regview + offset);
+               val = readl(bp->regview + offset);
                if ((val & rw_mask) != rw_mask) {
                        goto reg_test_err;
                }
@@ -3531,11 +3727,11 @@ bnx2_test_registers(struct bnx2 *bp)
                        goto reg_test_err;
                }
 
-               writel(save_val, (u8 *) bp->regview + offset);
+               writel(save_val, bp->regview + offset);
                continue;
 
 reg_test_err:
-               writel(save_val, (u8 *) bp->regview + offset);
+               writel(save_val, bp->regview + offset);
                ret = -ENODEV;
                break;
        }
@@ -3545,7 +3741,7 @@ reg_test_err:
 static int
 bnx2_do_mem_test(struct bnx2 *bp, u32 start, u32 size)
 {
-       static u32 test_pattern[] = { 0x00000000, 0xffffffff, 0x55555555,
+       static const u32 test_pattern[] = { 0x00000000, 0xffffffff, 0x55555555,
                0xaaaaaaaa , 0xaa55aa55, 0x55aa55aa };
        int i;
 
@@ -3570,12 +3766,12 @@ bnx2_test_memory(struct bnx2 *bp)
 {
        int ret = 0;
        int i;
-       static struct {
+       static const struct {
                u32   offset;
                u32   len;
        } mem_tbl[] = {
                { 0x60000,  0x4000 },
-               { 0xa0000,  0x4000 },
+               { 0xa0000,  0x3000 },
                { 0xe0000,  0x4000 },
                { 0x120000, 0x4000 },
                { 0x1a0000, 0x4000 },
@@ -3593,29 +3789,37 @@ bnx2_test_memory(struct bnx2 *bp)
        return ret;
 }
 
+#define BNX2_MAC_LOOPBACK      0
+#define BNX2_PHY_LOOPBACK      1
+
 static int
-bnx2_test_loopback(struct bnx2 *bp)
+bnx2_run_loopback(struct bnx2 *bp, int loopback_mode)
 {
        unsigned int pkt_size, num_pkts, i;
        struct sk_buff *skb, *rx_skb;
        unsigned char *packet;
-       u16 rx_start_idx, rx_idx, send_idx;
-       u32 send_bseq, val;
+       u16 rx_start_idx, rx_idx;
        dma_addr_t map;
        struct tx_bd *txbd;
        struct sw_bd *rx_buf;
        struct l2_fhdr *rx_hdr;
        int ret = -ENODEV;
 
-       if (!netif_running(bp->dev))
-               return -ENODEV;
-
-       bp->loopback = MAC_LOOPBACK;
-       bnx2_reset_nic(bp, BNX2_DRV_MSG_CODE_DIAG);
-       bnx2_set_mac_loopback(bp);
+       if (loopback_mode == BNX2_MAC_LOOPBACK) {
+               bp->loopback = MAC_LOOPBACK;
+               bnx2_set_mac_loopback(bp);
+       }
+       else if (loopback_mode == BNX2_PHY_LOOPBACK) {
+               bp->loopback = 0;
+               bnx2_set_phy_loopback(bp);
+       }
+       else
+               return -EINVAL;
 
        pkt_size = 1514;
        skb = dev_alloc_skb(pkt_size);
+       if (!skb)
+               return -ENOMEM;
        packet = skb_put(skb, pkt_size);
        memcpy(packet, bp->mac_addr, 6);
        memset(packet + 6, 0x0, 8);
@@ -3625,18 +3829,17 @@ bnx2_test_loopback(struct bnx2 *bp)
        map = pci_map_single(bp->pdev, skb->data, pkt_size,
                PCI_DMA_TODEVICE);
 
-       val = REG_RD(bp, BNX2_HC_COMMAND);
-       REG_WR(bp, BNX2_HC_COMMAND, val | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+       REG_WR(bp, BNX2_HC_COMMAND,
+              bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+
        REG_RD(bp, BNX2_HC_COMMAND);
 
        udelay(5);
        rx_start_idx = bp->status_blk->status_rx_quick_consumer_index0;
 
-       send_idx = 0;
-       send_bseq = 0;
        num_pkts = 0;
 
-       txbd = &bp->tx_desc_ring[send_idx];
+       txbd = &bp->tx_desc_ring[TX_RING_IDX(bp->tx_prod)];
 
        txbd->tx_bd_haddr_hi = (u64) map >> 32;
        txbd->tx_bd_haddr_lo = (u64) map & 0xffffffff;
@@ -3644,18 +3847,17 @@ bnx2_test_loopback(struct bnx2 *bp)
        txbd->tx_bd_vlan_tag_flags = TX_BD_FLAGS_START | TX_BD_FLAGS_END;
 
        num_pkts++;
-       send_idx = NEXT_TX_BD(send_idx);
-
-       send_bseq += pkt_size;
-
-       REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, send_idx);
-       REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, send_bseq);
+       bp->tx_prod = NEXT_TX_BD(bp->tx_prod);
+       bp->tx_prod_bseq += pkt_size;
 
+       REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, bp->tx_prod);
+       REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq);
 
        udelay(100);
 
-       val = REG_RD(bp, BNX2_HC_COMMAND);
-       REG_WR(bp, BNX2_HC_COMMAND, val | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+       REG_WR(bp, BNX2_HC_COMMAND,
+              bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW_WO_INT);
+
        REG_RD(bp, BNX2_HC_COMMAND);
 
        udelay(5);
@@ -3663,7 +3865,7 @@ bnx2_test_loopback(struct bnx2 *bp)
        pci_unmap_single(bp->pdev, map, pkt_size, PCI_DMA_TODEVICE);
        dev_kfree_skb_irq(skb);
 
-       if (bp->status_blk->status_tx_quick_consumer_index0 != send_idx) {
+       if (bp->status_blk->status_tx_quick_consumer_index0 != bp->tx_prod) {
                goto loopback_test_done;
        }
 
@@ -3682,7 +3884,7 @@ bnx2_test_loopback(struct bnx2 *bp)
                pci_unmap_addr(rx_buf, mapping),
                bp->rx_buf_size, PCI_DMA_FROMDEVICE);
 
-       if (rx_hdr->l2_fhdr_errors &
+       if (rx_hdr->l2_fhdr_status &
                (L2_FHDR_ERRORS_BAD_CRC |
                L2_FHDR_ERRORS_PHY_DECODE |
                L2_FHDR_ERRORS_ALIGNMENT |
@@ -3709,6 +3911,30 @@ loopback_test_done:
        return ret;
 }
 
+#define BNX2_MAC_LOOPBACK_FAILED       1
+#define BNX2_PHY_LOOPBACK_FAILED       2
+#define BNX2_LOOPBACK_FAILED           (BNX2_MAC_LOOPBACK_FAILED |     \
+                                        BNX2_PHY_LOOPBACK_FAILED)
+
+static int
+bnx2_test_loopback(struct bnx2 *bp)
+{
+       int rc = 0;
+
+       if (!netif_running(bp->dev))
+               return BNX2_LOOPBACK_FAILED;
+
+       bnx2_reset_nic(bp, BNX2_DRV_MSG_CODE_RESET);
+       spin_lock_bh(&bp->phy_lock);
+       bnx2_init_phy(bp);
+       spin_unlock_bh(&bp->phy_lock);
+       if (bnx2_run_loopback(bp, BNX2_MAC_LOOPBACK))
+               rc |= BNX2_MAC_LOOPBACK_FAILED;
+       if (bnx2_run_loopback(bp, BNX2_PHY_LOOPBACK))
+               rc |= BNX2_PHY_LOOPBACK_FAILED;
+       return rc;
+}
+
 #define NVRAM_SIZE 0x200
 #define CRC32_RESIDUAL 0xdebb20e3
 
@@ -3752,10 +3978,10 @@ bnx2_test_link(struct bnx2 *bp)
 {
        u32 bmsr;
 
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
        bnx2_read_phy(bp, MII_BMSR, &bmsr);
        bnx2_read_phy(bp, MII_BMSR, &bmsr);
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
                
        if (bmsr & BMSR_LSTATUS) {
                return 0;
@@ -3767,7 +3993,6 @@ static int
 bnx2_test_intr(struct bnx2 *bp)
 {
        int i;
-       u32 val;
        u16 status_idx;
 
        if (!netif_running(bp->dev))
@@ -3776,8 +4001,7 @@ bnx2_test_intr(struct bnx2 *bp)
        status_idx = REG_RD(bp, BNX2_PCICFG_INT_ACK_CMD) & 0xffff;
 
        /* This register is not touched during run-time. */
-       val = REG_RD(bp, BNX2_HC_COMMAND);
-       REG_WR(bp, BNX2_HC_COMMAND, val | BNX2_HC_COMMAND_COAL_NOW);
+       REG_WR(bp, BNX2_HC_COMMAND, bp->hc_cmd | BNX2_HC_COMMAND_COAL_NOW);
        REG_RD(bp, BNX2_HC_COMMAND);
 
        for (i = 0; i < 10; i++) {
@@ -3801,23 +4025,27 @@ bnx2_timer(unsigned long data)
        struct bnx2 *bp = (struct bnx2 *) data;
        u32 msg;
 
+       if (!netif_running(bp->dev))
+               return;
+
        if (atomic_read(&bp->intr_sem) != 0)
                goto bnx2_restart_timer;
 
        msg = (u32) ++bp->fw_drv_pulse_wr_seq;
-       REG_WR_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_DRV_PULSE_MB, msg);
+       REG_WR_IND(bp, bp->shmem_base + BNX2_DRV_PULSE_MB, msg);
 
        if ((bp->phy_flags & PHY_SERDES_FLAG) &&
            (CHIP_NUM(bp) == CHIP_NUM_5706)) {
-               unsigned long flags;
 
-               spin_lock_irqsave(&bp->phy_lock, flags);
+               spin_lock(&bp->phy_lock);
                if (bp->serdes_an_pending) {
                        bp->serdes_an_pending--;
                }
                else if ((bp->link_up == 0) && (bp->autoneg & AUTONEG_SPEED)) {
                        u32 bmcr;
 
+                       bp->current_interval = bp->timer_interval;
+
                        bnx2_read_phy(bp, MII_BMCR, &bmcr);
 
                        if (bmcr & BMCR_ANENABLE) {
@@ -3860,24 +4088,24 @@ bnx2_timer(unsigned long data)
 
                        }
                }
+               else
+                       bp->current_interval = bp->timer_interval;
 
-               spin_unlock_irqrestore(&bp->phy_lock, flags);
+               spin_unlock(&bp->phy_lock);
        }
 
 bnx2_restart_timer:
-       bp->timer.expires = RUN_AT(bp->timer_interval);
-
-       add_timer(&bp->timer);
+       mod_timer(&bp->timer, jiffies + bp->current_interval);
 }
 
 /* Called with rtnl_lock */
 static int
 bnx2_open(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        int rc;
 
-       bnx2_set_power_state(bp, 0);
+       bnx2_set_power_state(bp, PCI_D0);
        bnx2_disable_int(bp);
 
        rc = bnx2_alloc_mem(bp);
@@ -3920,12 +4148,7 @@ bnx2_open(struct net_device *dev)
                return rc;
        }
        
-       init_timer(&bp->timer);
-
-       bp->timer.expires = RUN_AT(bp->timer_interval);
-       bp->timer.data = (unsigned long) bp;
-       bp->timer.function = bnx2_timer;
-       add_timer(&bp->timer);
+       mod_timer(&bp->timer, jiffies + bp->current_interval);
 
        atomic_set(&bp->intr_sem, 0);
 
@@ -3976,18 +4199,23 @@ bnx2_reset_task(void *data)
 {
        struct bnx2 *bp = data;
 
+       if (!netif_running(bp->dev))
+               return;
+
+       bp->in_reset_task = 1;
        bnx2_netif_stop(bp);
 
        bnx2_init_nic(bp);
 
        atomic_set(&bp->intr_sem, 1);
        bnx2_netif_start(bp);
+       bp->in_reset_task = 0;
 }
 
 static void
 bnx2_tx_timeout(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        /* This allows the netif to be shutdown gracefully before resetting */
        schedule_work(&bp->reset_task);
@@ -3998,7 +4226,7 @@ bnx2_tx_timeout(struct net_device *dev)
 static void
 bnx2_vlan_rx_register(struct net_device *dev, struct vlan_group *vlgrp)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        bnx2_netif_stop(bp);
 
@@ -4012,7 +4240,7 @@ bnx2_vlan_rx_register(struct net_device *dev, struct vlan_group *vlgrp)
 static void
 bnx2_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        bnx2_netif_stop(bp);
 
@@ -4024,7 +4252,7 @@ bnx2_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 }
 #endif
 
-/* Called with dev->xmit_lock.
+/* Called with netif_tx_lock.
  * hard_start_xmit is pseudo-lockless - a lock is only required when
  * the tx queue is full. This way, we get the benefit of lockless
  * operations most of the time without the complexities to handle
@@ -4033,7 +4261,7 @@ bnx2_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 static int
 bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        dma_addr_t mapping;
        struct tx_bd *txbd;
        struct sw_bd *tx_buf;
@@ -4041,9 +4269,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
        u16 prod, ring_prod;
        int i;
 
-       if (unlikely(atomic_read(&bp->tx_avail_bd) <
-               (skb_shinfo(skb)->nr_frags + 1))) {
-
+       if (unlikely(bnx2_tx_avail(bp) < (skb_shinfo(skb)->nr_frags + 1))) {
                netif_stop_queue(dev);
                printk(KERN_ERR PFX "%s: BUG! Tx ring full when queue awake!\n",
                        dev->name);
@@ -4064,7 +4290,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
        }
 #ifdef BCM_TSO 
-       if ((mss = skb_shinfo(skb)->tso_size) &&
+       if ((mss = skb_shinfo(skb)->gso_size) &&
                (skb->len > (bp->dev->mtu + ETH_HLEN))) {
                u32 tcp_opt_len, ip_tcp_len;
 
@@ -4140,8 +4366,6 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
        prod = NEXT_TX_BD(prod);
        bp->tx_prod_bseq += skb->len;
 
-       atomic_sub(last_frag + 1, &bp->tx_avail_bd);
-
        REG_WR16(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BIDX, prod);
        REG_WR(bp, MB_TX_CID_ADDR + BNX2_L2CTX_TX_HOST_BSEQ, bp->tx_prod_bseq);
 
@@ -4150,17 +4374,13 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
        bp->tx_prod = prod;
        dev->trans_start = jiffies;
 
-       if (unlikely(atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS)) {
-               unsigned long flags;
-
-               spin_lock_irqsave(&bp->tx_lock, flags);
-               if (atomic_read(&bp->tx_avail_bd) <= MAX_SKB_FRAGS) {
-                       netif_stop_queue(dev);
-
-                       if (atomic_read(&bp->tx_avail_bd) > MAX_SKB_FRAGS)
-                               netif_wake_queue(dev);
-               }
-               spin_unlock_irqrestore(&bp->tx_lock, flags);
+       if (unlikely(bnx2_tx_avail(bp) <= MAX_SKB_FRAGS)) {
+               spin_lock(&bp->tx_lock);
+               netif_stop_queue(dev);
+               
+               if (bnx2_tx_avail(bp) > MAX_SKB_FRAGS)
+                       netif_wake_queue(dev);
+               spin_unlock(&bp->tx_lock);
        }
 
        return NETDEV_TX_OK;
@@ -4170,13 +4390,21 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static int
 bnx2_close(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        u32 reset_code;
 
-       flush_scheduled_work();
+       /* Calling flush_scheduled_work() may deadlock because
+        * linkwatch_event() may be on the workqueue and it will try to get
+        * the rtnl_lock which we are holding.
+        */
+       while (bp->in_reset_task)
+               msleep(1);
+
        bnx2_netif_stop(bp);
        del_timer_sync(&bp->timer);
-       if (bp->wol)
+       if (bp->flags & NO_WOL_FLAG)
+               reset_code = BNX2_DRV_MSG_CODE_UNLOAD;
+       else if (bp->wol)
                reset_code = BNX2_DRV_MSG_CODE_SUSPEND_WOL;
        else
                reset_code = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL;
@@ -4190,7 +4418,7 @@ bnx2_close(struct net_device *dev)
        bnx2_free_mem(bp);
        bp->link_up = 0;
        netif_carrier_off(bp->dev);
-       bnx2_set_power_state(bp, 3);
+       bnx2_set_power_state(bp, PCI_D3hot);
        return 0;
 }
 
@@ -4210,7 +4438,7 @@ bnx2_close(struct net_device *dev)
 static struct net_device_stats *
 bnx2_get_stats(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        struct statistics_block *stats_blk = bp->stats_blk;
        struct net_device_stats *net_stats = &bp->net_stats;
 
@@ -4260,7 +4488,8 @@ bnx2_get_stats(struct net_device *dev)
                (unsigned long) (stats_blk->stat_Dot3StatsExcessiveCollisions +
                stats_blk->stat_Dot3StatsLateCollisions);
 
-       if (CHIP_NUM(bp) == CHIP_NUM_5706)
+       if ((CHIP_NUM(bp) == CHIP_NUM_5706) ||
+           (CHIP_ID(bp) == CHIP_ID_5708_A0))
                net_stats->tx_carrier_errors = 0;
        else {
                net_stats->tx_carrier_errors =
@@ -4283,7 +4512,7 @@ bnx2_get_stats(struct net_device *dev)
 static int
 bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        cmd->supported = SUPPORTED_Autoneg;
        if (bp->phy_flags & PHY_SERDES_FLAG) {
@@ -4330,7 +4559,7 @@ bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static int
 bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        u8 autoneg = bp->autoneg;
        u8 req_duplex = bp->req_duplex;
        u16 req_line_speed = bp->req_line_speed;
@@ -4390,11 +4619,11 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
        bp->req_line_speed = req_line_speed;
        bp->req_duplex = req_duplex;
 
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
 
        bnx2_setup_phy(bp);
 
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
 
        return 0;
 }
@@ -4402,7 +4631,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 static void
 bnx2_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        strcpy(info->driver, DRV_MODULE_NAME);
        strcpy(info->version, DRV_MODULE_VERSION);
@@ -4410,15 +4639,72 @@ bnx2_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
        info->fw_version[0] = ((bp->fw_ver & 0xff000000) >> 24) + '0';
        info->fw_version[2] = ((bp->fw_ver & 0xff0000) >> 16) + '0';
        info->fw_version[4] = ((bp->fw_ver & 0xff00) >> 8) + '0';
-       info->fw_version[6] = (bp->fw_ver & 0xff) + '0';
-       info->fw_version[1] = info->fw_version[3] = info->fw_version[5] = '.';
-       info->fw_version[7] = 0;
+       info->fw_version[1] = info->fw_version[3] = '.';
+       info->fw_version[5] = 0;
+}
+
+#define BNX2_REGDUMP_LEN               (32 * 1024)
+
+static int
+bnx2_get_regs_len(struct net_device *dev)
+{
+       return BNX2_REGDUMP_LEN;
+}
+
+static void
+bnx2_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p)
+{
+       u32 *p = _p, i, offset;
+       u8 *orig_p = _p;
+       struct bnx2 *bp = netdev_priv(dev);
+       u32 reg_boundaries[] = { 0x0000, 0x0098, 0x0400, 0x045c,
+                                0x0800, 0x0880, 0x0c00, 0x0c10,
+                                0x0c30, 0x0d08, 0x1000, 0x101c,
+                                0x1040, 0x1048, 0x1080, 0x10a4,
+                                0x1400, 0x1490, 0x1498, 0x14f0,
+                                0x1500, 0x155c, 0x1580, 0x15dc,
+                                0x1600, 0x1658, 0x1680, 0x16d8,
+                                0x1800, 0x1820, 0x1840, 0x1854,
+                                0x1880, 0x1894, 0x1900, 0x1984,
+                                0x1c00, 0x1c0c, 0x1c40, 0x1c54,
+                                0x1c80, 0x1c94, 0x1d00, 0x1d84,
+                                0x2000, 0x2030, 0x23c0, 0x2400,
+                                0x2800, 0x2820, 0x2830, 0x2850,
+                                0x2b40, 0x2c10, 0x2fc0, 0x3058,
+                                0x3c00, 0x3c94, 0x4000, 0x4010,
+                                0x4080, 0x4090, 0x43c0, 0x4458,
+                                0x4c00, 0x4c18, 0x4c40, 0x4c54,
+                                0x4fc0, 0x5010, 0x53c0, 0x5444,
+                                0x5c00, 0x5c18, 0x5c80, 0x5c90,
+                                0x5fc0, 0x6000, 0x6400, 0x6428,
+                                0x6800, 0x6848, 0x684c, 0x6860,
+                                0x6888, 0x6910, 0x8000 };
+
+       regs->version = 0;
+
+       memset(p, 0, BNX2_REGDUMP_LEN);
+
+       if (!netif_running(bp->dev))
+               return;
+
+       i = 0;
+       offset = reg_boundaries[0];
+       p += offset;
+       while (offset < BNX2_REGDUMP_LEN) {
+               *p++ = REG_RD(bp, offset);
+               offset += 4;
+               if (offset == reg_boundaries[i + 1]) {
+                       offset = reg_boundaries[i + 2];
+                       p = (u32 *) (orig_p + offset);
+                       i += 2;
+               }
+       }
 }
 
 static void
 bnx2_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        if (bp->flags & NO_WOL_FLAG) {
                wol->supported = 0;
@@ -4437,7 +4723,7 @@ bnx2_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static int
 bnx2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        if (wol->wolopts & ~WAKE_MAGIC)
                return -EINVAL;
@@ -4457,26 +4743,27 @@ bnx2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 static int
 bnx2_nway_reset(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        u32 bmcr;
 
        if (!(bp->autoneg & AUTONEG_SPEED)) {
                return -EINVAL;
        }
 
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
 
        /* Force a link down visible on the other side */
        if (bp->phy_flags & PHY_SERDES_FLAG) {
                bnx2_write_phy(bp, MII_BMCR, BMCR_LOOPBACK);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
 
                msleep(20);
 
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                if (CHIP_NUM(bp) == CHIP_NUM_5706) {
-                       bp->serdes_an_pending = SERDES_AN_TIMEOUT /
-                               bp->timer_interval;
+                       bp->current_interval = SERDES_AN_TIMEOUT;
+                       bp->serdes_an_pending = 1;
+                       mod_timer(&bp->timer, jiffies + bp->current_interval);
                }
        }
 
@@ -4484,7 +4771,7 @@ bnx2_nway_reset(struct net_device *dev)
        bmcr &= ~BMCR_LOOPBACK;
        bnx2_write_phy(bp, MII_BMCR, bmcr | BMCR_ANRESTART | BMCR_ANENABLE);
 
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
 
        return 0;
 }
@@ -4492,26 +4779,22 @@ bnx2_nway_reset(struct net_device *dev)
 static int
 bnx2_get_eeprom_len(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
-       if (bp->flash_info == 0)
+       if (bp->flash_info == NULL)
                return 0;
 
-       return (int) bp->flash_info->total_size;
+       return (int) bp->flash_size;
 }
 
 static int
 bnx2_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
                u8 *eebuf)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        int rc;
 
-       if (eeprom->offset > bp->flash_info->total_size)
-               return -EINVAL;
-
-       if ((eeprom->offset + eeprom->len) > bp->flash_info->total_size)
-               eeprom->len = bp->flash_info->total_size - eeprom->offset;
+       /* parameters already validated in ethtool_get_eeprom */
 
        rc = bnx2_nvram_read(bp, eeprom->offset, eebuf, eeprom->len);
 
@@ -4522,14 +4805,10 @@ static int
 bnx2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
                u8 *eebuf)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        int rc;
 
-       if (eeprom->offset > bp->flash_info->total_size)
-               return -EINVAL;
-
-       if ((eeprom->offset + eeprom->len) > bp->flash_info->total_size)
-               eeprom->len = bp->flash_info->total_size - eeprom->offset;
+       /* parameters already validated in ethtool_set_eeprom */
 
        rc = bnx2_nvram_write(bp, eeprom->offset, eebuf, eeprom->len);
 
@@ -4539,7 +4818,7 @@ bnx2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 static int
 bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        memset(coal, 0, sizeof(struct ethtool_coalesce));
 
@@ -4561,7 +4840,7 @@ bnx2_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 static int
 bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        bp->rx_ticks = (u16) coal->rx_coalesce_usecs;
        if (bp->rx_ticks > 0x3ff) bp->rx_ticks = 0x3ff;
@@ -4605,9 +4884,9 @@ bnx2_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coal)
 static void
 bnx2_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
-       ering->rx_max_pending = MAX_RX_DESC_CNT;
+       ering->rx_max_pending = MAX_TOTAL_RX_DESC_CNT;
        ering->rx_mini_max_pending = 0;
        ering->rx_jumbo_max_pending = 0;
 
@@ -4622,19 +4901,30 @@ bnx2_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 static int
 bnx2_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
-       if ((ering->rx_pending > MAX_RX_DESC_CNT) ||
+       if ((ering->rx_pending > MAX_TOTAL_RX_DESC_CNT) ||
                (ering->tx_pending > MAX_TX_DESC_CNT) ||
                (ering->tx_pending <= MAX_SKB_FRAGS)) {
 
                return -EINVAL;
        }
-       bp->rx_ring_size = ering->rx_pending;
+       if (netif_running(bp->dev)) {
+               bnx2_netif_stop(bp);
+               bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
+               bnx2_free_skbs(bp);
+               bnx2_free_mem(bp);
+       }
+
+       bnx2_set_rx_ring_size(bp, ering->rx_pending);
        bp->tx_ring_size = ering->tx_pending;
 
        if (netif_running(bp->dev)) {
-               bnx2_netif_stop(bp);
+               int rc;
+
+               rc = bnx2_alloc_mem(bp);
+               if (rc)
+                       return rc;
                bnx2_init_nic(bp);
                bnx2_netif_start(bp);
        }
@@ -4645,7 +4935,7 @@ bnx2_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 static void
 bnx2_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        epause->autoneg = ((bp->autoneg & AUTONEG_FLOW_CTRL) != 0);
        epause->rx_pause = ((bp->flow_ctrl & FLOW_CTRL_RX) != 0);
@@ -4655,7 +4945,7 @@ bnx2_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
 static int
 bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        bp->req_flow_ctrl = 0;
        if (epause->rx_pause)
@@ -4670,11 +4960,11 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
                bp->autoneg &= ~AUTONEG_FLOW_CTRL;
        }
 
-       spin_lock_irq(&bp->phy_lock);
+       spin_lock_bh(&bp->phy_lock);
 
        bnx2_setup_phy(bp);
 
-       spin_unlock_irq(&bp->phy_lock);
+       spin_unlock_bh(&bp->phy_lock);
 
        return 0;
 }
@@ -4682,7 +4972,7 @@ bnx2_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause)
 static u32
 bnx2_get_rx_csum(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        return bp->rx_csum;
 }
@@ -4690,7 +4980,7 @@ bnx2_get_rx_csum(struct net_device *dev)
 static int
 bnx2_set_rx_csum(struct net_device *dev, u32 data)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        bp->rx_csum = data;
        return 0;
@@ -4698,7 +4988,7 @@ bnx2_set_rx_csum(struct net_device *dev, u32 data)
 
 #define BNX2_NUM_STATS 45
 
-struct {
+static struct {
        char string[ETH_GSTRING_LEN];
 } bnx2_stats_str_arr[BNX2_NUM_STATS] = {
        { "rx_bytes" },
@@ -4750,7 +5040,7 @@ struct {
 
 #define STATS_OFFSET32(offset_name) (offsetof(struct statistics_block, offset_name) / 4)
 
-unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
+static const unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
     STATS_OFFSET32(stat_IfHCInOctets_hi),
     STATS_OFFSET32(stat_IfHCInBadOctets_hi),
     STATS_OFFSET32(stat_IfHCOutOctets_hi),
@@ -4801,7 +5091,7 @@ unsigned long bnx2_stats_offset_arr[BNX2_NUM_STATS] = {
 /* stat_IfHCInBadOctets and stat_Dot3StatsCarrierSenseErrors are
  * skipped because of errata.
  */               
-u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
+static u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
        8,0,8,8,8,8,8,8,8,8,
        4,0,4,4,4,4,4,4,4,4,
        4,4,4,4,4,4,4,4,4,4,
@@ -4809,9 +5099,17 @@ u8 bnx2_5706_stats_len_arr[BNX2_NUM_STATS] = {
        4,4,4,4,4,
 };
 
+static u8 bnx2_5708_stats_len_arr[BNX2_NUM_STATS] = {
+       8,0,8,8,8,8,8,8,8,8,
+       4,4,4,4,4,4,4,4,4,4,
+       4,4,4,4,4,4,4,4,4,4,
+       4,4,4,4,4,4,4,4,4,4,
+       4,4,4,4,4,
+};
+
 #define BNX2_NUM_TESTS 6
 
-struct {
+static struct {
        char string[ETH_GSTRING_LEN];
 } bnx2_tests_str_arr[BNX2_NUM_TESTS] = {
        { "register_test (offline)" },
@@ -4831,7 +5129,7 @@ bnx2_self_test_count(struct net_device *dev)
 static void
 bnx2_self_test(struct net_device *dev, struct ethtool_test *etest, u64 *buf)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        memset(buf, 0, sizeof(u64) * BNX2_NUM_TESTS);
        if (etest->flags & ETH_TEST_FL_OFFLINE) {
@@ -4847,10 +5145,8 @@ bnx2_self_test(struct net_device *dev, struct ethtool_test *etest, u64 *buf)
                        buf[1] = 1;
                        etest->flags |= ETH_TEST_FL_FAILED;
                }
-               if (bnx2_test_loopback(bp) != 0) {
-                       buf[2] = 1;
+               if ((buf[2] = bnx2_test_loopback(bp)) != 0)
                        etest->flags |= ETH_TEST_FL_FAILED;
-               }
 
                if (!netif_running(bp->dev)) {
                        bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
@@ -4907,18 +5203,23 @@ static void
 bnx2_get_ethtool_stats(struct net_device *dev,
                struct ethtool_stats *stats, u64 *buf)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        int i;
        u32 *hw_stats = (u32 *) bp->stats_blk;
-       u8 *stats_len_arr = 0;
+       u8 *stats_len_arr = NULL;
 
        if (hw_stats == NULL) {
                memset(buf, 0, sizeof(u64) * BNX2_NUM_STATS);
                return;
        }
 
-       if (CHIP_NUM(bp) == CHIP_NUM_5706)
+       if ((CHIP_ID(bp) == CHIP_ID_5706_A0) ||
+           (CHIP_ID(bp) == CHIP_ID_5706_A1) ||
+           (CHIP_ID(bp) == CHIP_ID_5706_A2) ||
+           (CHIP_ID(bp) == CHIP_ID_5708_A0))
                stats_len_arr = bnx2_5706_stats_len_arr;
+       else
+               stats_len_arr = bnx2_5708_stats_len_arr;
 
        for (i = 0; i < BNX2_NUM_STATS; i++) {
                if (stats_len_arr[i] == 0) {
@@ -4942,7 +5243,7 @@ bnx2_get_ethtool_stats(struct net_device *dev,
 static int
 bnx2_phys_id(struct net_device *dev, u32 data)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        int i;
        u32 save;
 
@@ -4977,6 +5278,8 @@ static struct ethtool_ops bnx2_ethtool_ops = {
        .get_settings           = bnx2_get_settings,
        .set_settings           = bnx2_set_settings,
        .get_drvinfo            = bnx2_get_drvinfo,
+       .get_regs_len           = bnx2_get_regs_len,
+       .get_regs               = bnx2_get_regs,
        .get_wol                = bnx2_get_wol,
        .set_wol                = bnx2_set_wol,
        .nway_reset             = bnx2_nway_reset,
@@ -5006,14 +5309,15 @@ static struct ethtool_ops bnx2_ethtool_ops = {
        .phys_id                = bnx2_phys_id,
        .get_stats_count        = bnx2_get_stats_count,
        .get_ethtool_stats      = bnx2_get_ethtool_stats,
+       .get_perm_addr          = ethtool_op_get_perm_addr,
 };
 
 /* Called with rtnl_lock */
 static int
 bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-       struct mii_ioctl_data *data = (struct mii_ioctl_data *)&ifr->ifr_data;
-       struct bnx2 *bp = dev->priv;
+       struct mii_ioctl_data *data = if_mii(ifr);
+       struct bnx2 *bp = netdev_priv(dev);
        int err;
 
        switch(cmd) {
@@ -5024,9 +5328,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
        case SIOCGMIIREG: {
                u32 mii_regval;
 
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                err = bnx2_read_phy(bp, data->reg_num & 0x1f, &mii_regval);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
 
                data->val_out = mii_regval;
 
@@ -5037,9 +5341,9 @@ bnx2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                if (!capable(CAP_NET_ADMIN))
                        return -EPERM;
 
-               spin_lock_irq(&bp->phy_lock);
+               spin_lock_bh(&bp->phy_lock);
                err = bnx2_write_phy(bp, data->reg_num & 0x1f, data->val_in);
-               spin_unlock_irq(&bp->phy_lock);
+               spin_unlock_bh(&bp->phy_lock);
 
                return err;
 
@@ -5055,7 +5359,10 @@ static int
 bnx2_change_mac_addr(struct net_device *dev, void *p)
 {
        struct sockaddr *addr = p;
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
+
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EINVAL;
 
        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
        if (netif_running(dev))
@@ -5068,7 +5375,7 @@ bnx2_change_mac_addr(struct net_device *dev, void *p)
 static int
 bnx2_change_mtu(struct net_device *dev, int new_mtu)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        if (((new_mtu + ETH_HLEN) > MAX_ETHERNET_JUMBO_PACKET_SIZE) ||
                ((new_mtu + ETH_HLEN) < MIN_ETHERNET_PACKET_SIZE))
@@ -5089,7 +5396,7 @@ bnx2_change_mtu(struct net_device *dev, int new_mtu)
 static void
 poll_bnx2(struct net_device *dev)
 {
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        disable_irq(bp->pdev->irq);
        bnx2_interrupt(bp->pdev->irq, dev, NULL);
@@ -5107,7 +5414,7 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 
        SET_MODULE_OWNER(dev);
        SET_NETDEV_DEV(dev, &pdev->dev);
-       bp = dev->priv;
+       bp = netdev_priv(dev);
 
        bp->flags = 0;
        bp->phy_flags = 0;
@@ -5192,12 +5499,10 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
                               BNX2_PCICFG_MISC_CONFIG_REG_WINDOW_ENA |
                               BNX2_PCICFG_MISC_CONFIG_TARGET_MB_WORD_SWAP);
 
-       bnx2_set_power_state(bp, 0);
+       bnx2_set_power_state(bp, PCI_D0);
 
        bp->chip_id = REG_RD(bp, BNX2_MISC_ID);
 
-       bp->phy_addr = 1;
-
        /* Get bus information. */
        reg = REG_RD(bp, BNX2_PCICFG_MISC_STATUS);
        if (reg & BNX2_PCICFG_MISC_STATUS_PCIX_DET) {
@@ -5260,10 +5565,18 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 
        bnx2_init_nvram(bp);
 
+       reg = REG_RD_IND(bp, BNX2_SHM_HDR_SIGNATURE);
+
+       if ((reg & BNX2_SHM_HDR_SIGNATURE_SIG_MASK) ==
+           BNX2_SHM_HDR_SIGNATURE_SIG)
+               bp->shmem_base = REG_RD_IND(bp, BNX2_SHM_HDR_ADDR_0);
+       else
+               bp->shmem_base = HOST_VIEW_SHMEM_BASE;
+
        /* Get the permanent MAC address.  First we need to make sure the
         * firmware is actually running.
         */
-       reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_DEV_INFO_SIGNATURE);
+       reg = REG_RD_IND(bp, bp->shmem_base + BNX2_DEV_INFO_SIGNATURE);
 
        if ((reg & BNX2_DEV_INFO_SIGNATURE_MAGIC_MASK) !=
            BNX2_DEV_INFO_SIGNATURE_MAGIC) {
@@ -5272,21 +5585,20 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
                goto err_out_unmap;
        }
 
-       bp->fw_ver = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE +
-                               BNX2_DEV_INFO_BC_REV);
+       bp->fw_ver = REG_RD_IND(bp, bp->shmem_base + BNX2_DEV_INFO_BC_REV);
 
-       reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_PORT_HW_CFG_MAC_UPPER);
+       reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_MAC_UPPER);
        bp->mac_addr[0] = (u8) (reg >> 8);
        bp->mac_addr[1] = (u8) reg;
 
-       reg = REG_RD_IND(bp, HOST_VIEW_SHMEM_BASE + BNX2_PORT_HW_CFG_MAC_LOWER);
+       reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_MAC_LOWER);
        bp->mac_addr[2] = (u8) (reg >> 24);
        bp->mac_addr[3] = (u8) (reg >> 16);
        bp->mac_addr[4] = (u8) (reg >> 8);
        bp->mac_addr[5] = (u8) reg;
 
        bp->tx_ring_size = MAX_TX_DESC_CNT;
-       bp->rx_ring_size = 100;
+       bnx2_set_rx_ring_size(bp, 100);
 
        bp->rx_csum = 1;
 
@@ -5305,13 +5617,26 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
        bp->stats_ticks = 1000000 & 0xffff00;
 
        bp->timer_interval =  HZ;
+       bp->current_interval =  HZ;
+
+       bp->phy_addr = 1;
 
        /* Disable WOL support if we are running on a SERDES chip. */
        if (CHIP_BOND_ID(bp) & CHIP_BOND_ID_SERDES_BIT) {
                bp->phy_flags |= PHY_SERDES_FLAG;
                bp->flags |= NO_WOL_FLAG;
+               if (CHIP_NUM(bp) == CHIP_NUM_5708) {
+                       bp->phy_addr = 2;
+                       reg = REG_RD_IND(bp, bp->shmem_base +
+                                        BNX2_SHARED_HW_CFG_CONFIG);
+                       if (reg & BNX2_SHARED_HW_CFG_PHY_2_5G)
+                               bp->phy_flags |= PHY_2_5G_CAPABLE_FLAG;
+               }
        }
 
+       if (CHIP_NUM(bp) == CHIP_NUM_5708)
+               bp->flags |= NO_WOL_FLAG;
+
        if (CHIP_ID(bp) == CHIP_ID_5706_A0) {
                bp->tx_quick_cons_trip_int =
                        bp->tx_quick_cons_trip;
@@ -5328,6 +5653,14 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
        bp->req_line_speed = 0;
        if (bp->phy_flags & PHY_SERDES_FLAG) {
                bp->advertising = ETHTOOL_ALL_FIBRE_SPEED | ADVERTISED_Autoneg;
+
+               reg = REG_RD_IND(bp, bp->shmem_base + BNX2_PORT_HW_CFG_CONFIG);
+               reg &= BNX2_PORT_HW_CFG_CFG_DFLT_LINK_MASK;
+               if (reg == BNX2_PORT_HW_CFG_CFG_DFLT_LINK_1G) {
+                       bp->autoneg = 0;
+                       bp->req_line_speed = bp->line_speed = SPEED_1000;
+                       bp->req_duplex = DUPLEX_FULL;
+               }
        }
        else {
                bp->advertising = ETHTOOL_ALL_COPPER_SPEED | ADVERTISED_Autoneg;
@@ -5335,11 +5668,17 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev)
 
        bp->req_flow_ctrl = FLOW_CTRL_RX | FLOW_CTRL_TX;
 
+       init_timer(&bp->timer);
+       bp->timer.expires = RUN_AT(bp->timer_interval);
+       bp->timer.data = (unsigned long) bp;
+       bp->timer.function = bnx2_timer;
+
        return 0;
 
 err_out_unmap:
        if (bp->regview) {
                iounmap(bp->regview);
+               bp->regview = NULL;
        }
 
 err_out_release:
@@ -5394,7 +5733,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        dev->ethtool_ops = &bnx2_ethtool_ops;
        dev->weight = 64;
 
-       bp = dev->priv;
+       bp = netdev_priv(dev);
 
 #if defined(HAVE_POLL_CONTROLLER) || defined(CONFIG_NET_POLL_CONTROLLER)
        dev->poll_controller = poll_bnx2;
@@ -5414,6 +5753,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_drvdata(pdev, dev);
 
        memcpy(dev->dev_addr, bp->mac_addr, 6);
+       memcpy(dev->perm_addr, bp->mac_addr, 6);
        bp->name = board_info[ent->driver_data].name,
        printk(KERN_INFO "%s: %s (%c%d) PCI%s %s %dMHz found at mem %lx, "
                "IRQ %d, ",
@@ -5452,7 +5792,9 @@ static void __devexit
 bnx2_remove_one(struct pci_dev *pdev)
 {
        struct net_device *dev = pci_get_drvdata(pdev);
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
+
+       flush_scheduled_work();
 
        unregister_netdev(dev);
 
@@ -5466,25 +5808,28 @@ bnx2_remove_one(struct pci_dev *pdev)
 }
 
 static int
-bnx2_suspend(struct pci_dev *pdev, u32 state)
+bnx2_suspend(struct pci_dev *pdev, pm_message_t state)
 {
        struct net_device *dev = pci_get_drvdata(pdev);
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
        u32 reset_code;
 
        if (!netif_running(dev))
                return 0;
 
+       flush_scheduled_work();
        bnx2_netif_stop(bp);
        netif_device_detach(dev);
        del_timer_sync(&bp->timer);
-       if (bp->wol)
+       if (bp->flags & NO_WOL_FLAG)
+               reset_code = BNX2_DRV_MSG_CODE_UNLOAD;
+       else if (bp->wol)
                reset_code = BNX2_DRV_MSG_CODE_SUSPEND_WOL;
        else
                reset_code = BNX2_DRV_MSG_CODE_SUSPEND_NO_WOL;
        bnx2_reset_chip(bp, reset_code);
        bnx2_free_skbs(bp);
-       bnx2_set_power_state(bp, state);
+       bnx2_set_power_state(bp, pci_choose_state(pdev, state));
        return 0;
 }
 
@@ -5492,12 +5837,12 @@ static int
 bnx2_resume(struct pci_dev *pdev)
 {
        struct net_device *dev = pci_get_drvdata(pdev);
-       struct bnx2 *bp = dev->priv;
+       struct bnx2 *bp = netdev_priv(dev);
 
        if (!netif_running(dev))
                return 0;
 
-       bnx2_set_power_state(bp, 0);
+       bnx2_set_power_state(bp, PCI_D0);
        netif_device_attach(dev);
        bnx2_init_nic(bp);
        bnx2_netif_start(bp);
@@ -5505,12 +5850,12 @@ bnx2_resume(struct pci_dev *pdev)
 }
 
 static struct pci_driver bnx2_pci_driver = {
-       name:           DRV_MODULE_NAME,
-       id_table:       bnx2_pci_tbl,
-       probe:          bnx2_init_one,
-       remove:         __devexit_p(bnx2_remove_one),
-       suspend:        bnx2_suspend,
-       resume:         bnx2_resume,
+       .name           = DRV_MODULE_NAME,
+       .id_table       = bnx2_pci_tbl,
+       .probe          = bnx2_init_one,
+       .remove         = __devexit_p(bnx2_remove_one),
+       .suspend        = bnx2_suspend,
+       .resume         = bnx2_resume,
 };
 
 static int __init bnx2_init(void)
index 55d2367..8171cae 100644 (file)
@@ -1199,8 +1199,7 @@ int bond_sethwaddr(struct net_device *bond_dev, struct net_device *slave_dev)
 }
 
 #define BOND_INTERSECT_FEATURES \
-       (NETIF_F_SG|NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM|\
-       NETIF_F_TSO|NETIF_F_UFO)
+       (NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_TSO | NETIF_F_UFO)
 
 /* 
  * Compute the common dev->feature set available to all slaves.  Some
@@ -1218,9 +1217,7 @@ static int bond_compute_features(struct bonding *bond)
                features &= (slave->dev->features & BOND_INTERSECT_FEATURES);
 
        if ((features & NETIF_F_SG) && 
-           !(features & (NETIF_F_IP_CSUM |
-                         NETIF_F_NO_CSUM |
-                         NETIF_F_HW_CSUM)))
+           !(features & NETIF_F_ALL_CSUM))
                features &= ~NETIF_F_SG;
 
        /* 
@@ -4191,7 +4188,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params)
         */
        bond_dev->features |= NETIF_F_VLAN_CHALLENGED;
 
-       /* don't acquire bond device's xmit_lock when 
+       /* don't acquire bond device's netif_tx_lock when
         * transmitting */
        bond_dev->features |= NETIF_F_LLTX;
 
index 4391bf4..722be62 100644 (file)
@@ -1418,7 +1418,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
        struct cpl_tx_pkt *cpl;
 
 #ifdef NETIF_F_TSO
-       if (skb_shinfo(skb)->tso_size) {
+       if (skb_is_gso(skb)) {
                int eth_type;
                struct cpl_tx_pkt_lso *hdr;
 
@@ -1433,7 +1433,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
                hdr->ip_hdr_words = skb->nh.iph->ihl;
                hdr->tcp_hdr_words = skb->h.th->doff;
                hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type,
-                                               skb_shinfo(skb)->tso_size));
+                                               skb_shinfo(skb)->gso_size));
                hdr->len = htonl(skb->len - sizeof(*hdr));
                cpl = (struct cpl_tx_pkt *)hdr;
                sge->stats.tx_lso_pkts++;
index 523c2c9..c5e7023 100644 (file)
@@ -353,6 +353,7 @@ e1000_set_mac_type(struct e1000_hw *hw)
     case E1000_DEV_ID_82572EI_COPPER:
     case E1000_DEV_ID_82572EI_FIBER:
     case E1000_DEV_ID_82572EI_SERDES:
+    case E1000_DEV_ID_82572EI:
         hw->mac_type = e1000_82572;
         break;
     case E1000_DEV_ID_82573E:
index 150e45e..c01e5d2 100644 (file)
@@ -462,6 +462,7 @@ int32_t e1000_check_phy_reset_block(struct e1000_hw *hw);
 #define E1000_DEV_ID_82572EI_COPPER      0x107D
 #define E1000_DEV_ID_82572EI_FIBER       0x107E
 #define E1000_DEV_ID_82572EI_SERDES      0x107F
+#define E1000_DEV_ID_82572EI             0x10B9
 #define E1000_DEV_ID_82573E              0x108B
 #define E1000_DEV_ID_82573E_IAMT         0x108C
 #define E1000_DEV_ID_82573L              0x109A
index 97e71a4..43e6472 100644 (file)
@@ -2413,7 +2413,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
        uint8_t ipcss, ipcso, tucss, tucso, hdr_len;
        int err;
 
-       if (skb_shinfo(skb)->tso_size) {
+       if (skb_is_gso(skb)) {
                if (skb_header_cloned(skb)) {
                        err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
                        if (err)
@@ -2421,7 +2421,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
                }
 
                hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
-               mss = skb_shinfo(skb)->tso_size;
+               mss = skb_shinfo(skb)->gso_size;
                if (skb->protocol == ntohs(ETH_P_IP)) {
                        skb->nh.iph->tot_len = 0;
                        skb->nh.iph->check = 0;
@@ -2538,7 +2538,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring,
                 * tso gets written back prematurely before the data is fully
                 * DMA'd to the controller */
                if (!skb->data_len && tx_ring->last_tx_tso &&
-                   !skb_shinfo(skb)->tso_size) {
+                   !skb_is_gso(skb)) {
                        tx_ring->last_tx_tso = 0;
                        size -= 4;
                }
@@ -2776,7 +2776,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
        }
 
 #ifdef NETIF_F_TSO
-       mss = skb_shinfo(skb)->tso_size;
+       mss = skb_shinfo(skb)->gso_size;
        /* The controller does a simple calculation to 
         * make sure there is enough room in the FIFO before
         * initiating the DMA for each buffer.  The calc is:
@@ -2825,8 +2825,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 
 #ifdef NETIF_F_TSO
        /* Controller Erratum workaround */
-       if (!skb->data_len && tx_ring->last_tx_tso &&
-           !skb_shinfo(skb)->tso_size)
+       if (!skb->data_len && tx_ring->last_tx_tso && !skb_is_gso(skb))
                count++;
 #endif
 
index feb5b22..1a55695 100644 (file)
@@ -533,9 +533,9 @@ typedef union _ring_type {
  * critical parts:
  * - rx is (pseudo-) lockless: it relies on the single-threading provided
  *     by the arch code for interrupts.
- * - tx setup is lockless: it relies on dev->xmit_lock. Actual submission
+ * - tx setup is lockless: it relies on netif_tx_lock. Actual submission
  *     needs dev->priv->lock :-(
- * - set_multicast_list: preparation lockless, relies on dev->xmit_lock.
+ * - set_multicast_list: preparation lockless, relies on netif_tx_lock.
  */
 
 /* in dev: base, irq */
@@ -1213,7 +1213,7 @@ static void drain_ring(struct net_device *dev)
 
 /*
  * nv_start_xmit: dev->hard_start_xmit function
- * Called with dev->xmit_lock held.
+ * Called with netif_tx_lock held.
  */
 static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
@@ -1303,8 +1303,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
        np->tx_skbuff[nr] = skb;
 
 #ifdef NETIF_F_TSO
-       if (skb_shinfo(skb)->tso_size)
-               tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT);
+       if (skb_is_gso(skb))
+               tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
        else
 #endif
        tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0);
@@ -1407,7 +1407,7 @@ static void nv_tx_done(struct net_device *dev)
 
 /*
  * nv_tx_timeout: dev->tx_timeout function
- * Called with dev->xmit_lock held.
+ * Called with netif_tx_lock held.
  */
 static void nv_tx_timeout(struct net_device *dev)
 {
@@ -1736,8 +1736,8 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
                 * guessed, there is probably a simpler approach.
                 * Changing the MTU is a rare event, it shouldn't matter.
                 */
-               nv_disable_irq(dev);
-               spin_lock_bh(&dev->xmit_lock);
+               disable_irq(dev->irq);
+               netif_tx_lock_bh(dev);
                spin_lock(&np->lock);
                /* stop engines */
                nv_stop_rx(dev);
@@ -1768,8 +1768,8 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
                nv_start_rx(dev);
                nv_start_tx(dev);
                spin_unlock(&np->lock);
-               spin_unlock_bh(&dev->xmit_lock);
-               nv_enable_irq(dev);
+               netif_tx_unlock_bh(dev);
+               enable_irq(dev->irq);
        }
        return 0;
 }
@@ -1803,7 +1803,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
        memcpy(dev->dev_addr, macaddr->sa_data, ETH_ALEN);
 
        if (netif_running(dev)) {
-               spin_lock_bh(&dev->xmit_lock);
+               netif_tx_lock_bh(dev);
                spin_lock_irq(&np->lock);
 
                /* stop rx engine */
@@ -1815,7 +1815,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
                /* restart rx engine */
                nv_start_rx(dev);
                spin_unlock_irq(&np->lock);
-               spin_unlock_bh(&dev->xmit_lock);
+               netif_tx_unlock_bh(dev);
        } else {
                nv_copy_mac_to_hw(dev);
        }
@@ -1824,7 +1824,7 @@ static int nv_set_mac_address(struct net_device *dev, void *addr)
 
 /*
  * nv_set_multicast: dev->set_multicast function
- * Called with dev->xmit_lock held.
+ * Called with netif_tx_lock held.
  */
 static void nv_set_multicast(struct net_device *dev)
 {
index 102c1f0..d12605f 100644 (file)
@@ -308,9 +308,9 @@ static int sp_set_mac_address(struct net_device *dev, void *addr)
 {
        struct sockaddr_ax25 *sa = addr;
 
-       spin_lock_irq(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
-       spin_unlock_irq(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 
        return 0;
 }
@@ -767,9 +767,9 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file,
                        break;
                }
 
-               spin_lock_irq(&dev->xmit_lock);
+               netif_tx_lock_bh(dev);
                memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN);
-               spin_unlock_irq(&dev->xmit_lock);
+               netif_tx_unlock_bh(dev);
 
                err = 0;
                break;
index d81a8e1..3ebbbe5 100644 (file)
@@ -357,9 +357,9 @@ static int ax_set_mac_address(struct net_device *dev, void *addr)
 {
        struct sockaddr_ax25 *sa = addr;
 
-       spin_lock_irq(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        memcpy(dev->dev_addr, &sa->sax25_call, AX25_ADDR_LEN);
-       spin_unlock_irq(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 
        return 0;
 }
@@ -886,9 +886,9 @@ static int mkiss_ioctl(struct tty_struct *tty, struct file *file,
                        break;
                }
 
-               spin_lock_irq(&dev->xmit_lock);
+               netif_tx_lock_bh(dev);
                memcpy(dev->dev_addr, addr, AX25_ADDR_LEN);
-               spin_unlock_irq(&dev->xmit_lock);
+               netif_tx_unlock_bh(dev);
 
                err = 0;
                break;
index 31fb2d7..2e222ef 100644 (file)
@@ -76,13 +76,13 @@ static void ri_tasklet(unsigned long dev)
        dp->st_task_enter++;
        if ((skb = skb_peek(&dp->tq)) == NULL) {
                dp->st_txq_refl_try++;
-               if (spin_trylock(&_dev->xmit_lock)) {
+               if (netif_tx_trylock(_dev)) {
                        dp->st_rxq_enter++;
                        while ((skb = skb_dequeue(&dp->rq)) != NULL) {
                                skb_queue_tail(&dp->tq, skb);
                                dp->st_rx2tx_tran++;
                        }
-                       spin_unlock(&_dev->xmit_lock);
+                       netif_tx_unlock(_dev);
                } else {
                        /* reschedule */
                        dp->st_rxq_notenter++;
@@ -110,7 +110,7 @@ static void ri_tasklet(unsigned long dev)
                }
        }
 
-       if (spin_trylock(&_dev->xmit_lock)) {
+       if (netif_tx_trylock(_dev)) {
                dp->st_rxq_check++;
                if ((skb = skb_peek(&dp->rq)) == NULL) {
                        dp->tasklet_pending = 0;
@@ -118,10 +118,10 @@ static void ri_tasklet(unsigned long dev)
                                netif_wake_queue(_dev);
                } else {
                        dp->st_rxq_rsch++;
-                       spin_unlock(&_dev->xmit_lock);
+                       netif_tx_unlock(_dev);
                        goto resched;
                }
-               spin_unlock(&_dev->xmit_lock);
+               netif_tx_unlock(_dev);
        } else {
 resched:
                dp->tasklet_pending = 1;
index 97a49e0..d70b9e8 100644 (file)
@@ -959,7 +959,7 @@ static int vlsi_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
                            ||  (now.tv_sec==ready.tv_sec && now.tv_usec>=ready.tv_usec))
                                break;
                        udelay(100);
-                       /* must not sleep here - we are called under xmit_lock! */
+                       /* must not sleep here - called under netif_tx_lock! */
                }
        }
 
index cfd67d8..8385245 100644 (file)
@@ -1168,7 +1168,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
        uint16_t ipcse, tucse, mss;
        int err;
 
-       if(likely(skb_shinfo(skb)->tso_size)) {
+       if (likely(skb_is_gso(skb))) {
                if (skb_header_cloned(skb)) {
                        err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
                        if (err)
@@ -1176,7 +1176,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
                }
 
                hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
-               mss = skb_shinfo(skb)->tso_size;
+               mss = skb_shinfo(skb)->gso_size;
                skb->nh.iph->tot_len = 0;
                skb->nh.iph->check = 0;
                skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr,
index b79d6e8..997cbce 100644 (file)
@@ -74,7 +74,7 @@ static void emulate_large_send_offload(struct sk_buff *skb)
        struct iphdr *iph = skb->nh.iph;
        struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4));
        unsigned int doffset = (iph->ihl + th->doff) * 4;
-       unsigned int mtu = skb_shinfo(skb)->tso_size + doffset;
+       unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
        unsigned int offset = 0;
        u32 seq = ntohl(th->seq);
        u16 id  = ntohs(iph->id);
@@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif
 
 #ifdef LOOPBACK_TSO
-       if (skb_shinfo(skb)->tso_size) {
+       if (skb_is_gso(skb)) {
                BUG_ON(skb->protocol != htons(ETH_P_IP));
                BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP);
 
index 411f4d8..625ff61 100644 (file)
@@ -1200,7 +1200,7 @@ static int mv643xx_eth_start_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        if (has_tiny_unaligned_frags(skb)) {
-               if ((skb_linearize(skb, GFP_ATOMIC) != 0)) {
+               if (__skb_linearize(skb)) {
                        stats->tx_dropped++;
                        printk(KERN_DEBUG "%s: failed to linearize tiny "
                                        "unaligned fragment\n", dev->name);
index 9062775..2e4eced 100644 (file)
@@ -318,12 +318,12 @@ performance critical codepaths:
 The rx process only runs in the interrupt handler. Access from outside
 the interrupt handler is only permitted after disable_irq().
 
-The rx process usually runs under the dev->xmit_lock. If np->intr_tx_reap
+The rx process usually runs under the netif_tx_lock. If np->intr_tx_reap
 is set, then access is permitted under spin_lock_irq(&np->lock).
 
 Thus configuration functions that want to access everything must call
        disable_irq(dev->irq);
-       spin_lock_bh(dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        spin_lock_irq(&np->lock);
 
 IV. Notes
index 0ad3310..43641dd 100644 (file)
@@ -2171,7 +2171,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev)
 {
        if (dev->features & NETIF_F_TSO) {
-               u32 mss = skb_shinfo(skb)->tso_size;
+               u32 mss = skb_shinfo(skb)->gso_size;
 
                if (mss)
                        return LargeSend | ((mss & MSSMask) << MSSShift);
index 79208f4..0ad01f0 100644 (file)
@@ -3564,8 +3564,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        txdp->Control_1 = 0;
        txdp->Control_2 = 0;
 #ifdef NETIF_F_TSO
-       mss = skb_shinfo(skb)->tso_size;
-       if (mss) {
+       mss = skb_shinfo(skb)->gso_size;
+       if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) {
                txdp->Control_1 |= TXD_TCP_LSO_EN;
                txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
        }
@@ -3585,10 +3585,10 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        frg_len = skb->len - skb->data_len;
-       if (skb_shinfo(skb)->ufo_size) {
+       if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) {
                int ufo_size;
 
-               ufo_size = skb_shinfo(skb)->ufo_size;
+               ufo_size = skb_shinfo(skb)->gso_size;
                ufo_size &= ~7;
                txdp->Control_1 |= TXD_UFO_EN;
                txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
@@ -3614,7 +3614,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        txdp->Host_Control = (unsigned long) skb;
        txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
 
-       if (skb_shinfo(skb)->ufo_size)
+       if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
                txdp->Control_1 |= TXD_UFO_EN;
 
        frg_cnt = skb_shinfo(skb)->nr_frags;
@@ -3629,12 +3629,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
                    (sp->pdev, frag->page, frag->page_offset,
                     frag->size, PCI_DMA_TODEVICE);
                txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size);
-               if (skb_shinfo(skb)->ufo_size)
+               if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
                        txdp->Control_1 |= TXD_UFO_EN;
        }
        txdp->Control_1 |= TXD_GATHER_CODE_LAST;
 
-       if (skb_shinfo(skb)->ufo_size)
+       if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
                frg_cnt++; /* as Txd0 was used for inband header */
 
        tx_fifo = mac_control->tx_FIFO_start[queue];
@@ -3648,7 +3648,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev)
        if (mss)
                val64 |= TX_FIFO_SPECIAL_FUNC;
 #endif
-       if (skb_shinfo(skb)->ufo_size)
+       if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4)
                val64 |= TX_FIFO_SPECIAL_FUNC;
        writeq(val64, &tx_fifo->List_Control);
 
index fba1e4d..4103e30 100644 (file)
@@ -51,7 +51,7 @@
 #include "sky2.h"
 
 #define DRV_NAME               "sky2"
-#define DRV_VERSION            "1.4"
+#define DRV_VERSION            "1.6.1"
 #define PFX                    DRV_NAME " "
 
 /*
@@ -233,6 +233,8 @@ static void sky2_set_power_state(struct sky2_hw *hw, pci_power_t state)
                        if (hw->ports > 1)
                                reg1 |= PCI_Y2_PHY2_COMA;
                }
+               sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
+               udelay(100);
 
                if (hw->chip_id == CHIP_ID_YUKON_EC_U) {
                        sky2_write16(hw, B0_CTST, Y2_HW_WOL_ON);
@@ -243,8 +245,6 @@ static void sky2_set_power_state(struct sky2_hw *hw, pci_power_t state)
                        sky2_pci_write32(hw, PCI_DEV_REG5, 0);
                }
 
-               sky2_pci_write32(hw, PCI_DEV_REG1, reg1);
-
                break;
 
        case PCI_D3hot:
@@ -321,7 +321,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
        }
 
        ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL);
-       if (hw->copper) {
+       if (sky2_is_copper(hw)) {
                if (hw->chip_id == CHIP_ID_YUKON_FE) {
                        /* enable automatic crossover */
                        ctrl |= PHY_M_PC_MDI_XMODE(PHY_M_PC_ENA_AUTO) >> 1;
@@ -338,25 +338,37 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
                                ctrl |= PHY_M_PC_DSC(2) | PHY_M_PC_DOWN_S_ENA;
                        }
                }
-               gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl);
        } else {
                /* workaround for deviation #4.88 (CRC errors) */
                /* disable Automatic Crossover */
 
                ctrl &= ~PHY_M_PC_MDIX_MSK;
-               gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl);
+       }
 
-               if (hw->chip_id == CHIP_ID_YUKON_XL) {
-                       /* Fiber: select 1000BASE-X only mode MAC Specific Ctrl Reg. */
-                       gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 2);
-                       ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL);
-                       ctrl &= ~PHY_M_MAC_MD_MSK;
-                       ctrl |= PHY_M_MAC_MODE_SEL(PHY_M_MAC_MD_1000BX);
-                       gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl);
+       gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl);
 
+       /* special setup for PHY 88E1112 Fiber */
+       if (hw->chip_id == CHIP_ID_YUKON_XL && !sky2_is_copper(hw)) {
+               pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR);
+
+               /* Fiber: select 1000BASE-X only mode MAC Specific Ctrl Reg. */
+               gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 2);
+               ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL);
+               ctrl &= ~PHY_M_MAC_MD_MSK;
+               ctrl |= PHY_M_MAC_MODE_SEL(PHY_M_MAC_MD_1000BX);
+               gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl);
+
+               if (hw->pmd_type  == 'P') {
                        /* select page 1 to access Fiber registers */
                        gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 1);
+
+                       /* for SFP-module set SIGDET polarity to low */
+                       ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL);
+                       ctrl |= PHY_M_FIB_SIGD_POL;
+                       gm_phy_write(hw, port, PHY_MARV_CTRL, ctrl);
                }
+
+               gm_phy_write(hw, port, PHY_MARV_EXT_ADR, pg);
        }
 
        ctrl = gm_phy_read(hw, port, PHY_MARV_CTRL);
@@ -373,7 +385,7 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
        adv = PHY_AN_CSMA;
 
        if (sky2->autoneg == AUTONEG_ENABLE) {
-               if (hw->copper) {
+               if (sky2_is_copper(hw)) {
                        if (sky2->advertising & ADVERTISED_1000baseT_Full)
                                ct1000 |= PHY_M_1000C_AFD;
                        if (sky2->advertising & ADVERTISED_1000baseT_Half)
@@ -386,8 +398,12 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
                                adv |= PHY_M_AN_10_FD;
                        if (sky2->advertising & ADVERTISED_10baseT_Half)
                                adv |= PHY_M_AN_10_HD;
-               } else          /* special defines for FIBER (88E1011S only) */
-                       adv |= PHY_M_AN_1000X_AHD | PHY_M_AN_1000X_AFD;
+               } else {        /* special defines for FIBER (88E1040S only) */
+                       if (sky2->advertising & ADVERTISED_1000baseT_Full)
+                               adv |= PHY_M_AN_1000X_AFD;
+                       if (sky2->advertising & ADVERTISED_1000baseT_Half)
+                               adv |= PHY_M_AN_1000X_AHD;
+               }
 
                /* Set Flow-control capabilities */
                if (sky2->tx_pause && sky2->rx_pause)
@@ -949,14 +965,14 @@ static void sky2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 /*
  * It appears the hardware has a bug in the FIFO logic that
  * cause it to hang if the FIFO gets overrun and the receive buffer
- * is not aligned. ALso alloc_skb() won't align properly if slab
+ * is not aligned. Also dev_alloc_skb() won't align properly if slab
  * debugging is enabled.
  */
 static inline struct sk_buff *sky2_alloc_skb(unsigned int size, gfp_t gfp_mask)
 {
        struct sk_buff *skb;
 
-       skb = alloc_skb(size + RX_SKB_ALIGN, gfp_mask);
+       skb = __dev_alloc_skb(size + RX_SKB_ALIGN, gfp_mask);
        if (likely(skb)) {
                unsigned long p = (unsigned long) skb->data;
                skb_reserve(skb, ALIGN(p, RX_SKB_ALIGN) - p);
@@ -1160,7 +1176,7 @@ static unsigned tx_le_req(const struct sk_buff *skb)
        count = sizeof(dma_addr_t) / sizeof(u32);
        count += skb_shinfo(skb)->nr_frags * count;
 
-       if (skb_shinfo(skb)->tso_size)
+       if (skb_is_gso(skb))
                ++count;
 
        if (skb->ip_summed == CHECKSUM_HW)
@@ -1232,7 +1248,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
        }
 
        /* Check for TCP Segmentation Offload */
-       mss = skb_shinfo(skb)->tso_size;
+       mss = skb_shinfo(skb)->gso_size;
        if (mss != 0) {
                /* just drop the packet if non-linear expansion fails */
                if (skb_header_cloned(skb) &&
@@ -1497,7 +1513,7 @@ static int sky2_down(struct net_device *dev)
 
 static u16 sky2_phy_speed(const struct sky2_hw *hw, u16 aux)
 {
-       if (!hw->copper)
+       if (!sky2_is_copper(hw))
                return SPEED_1000;
 
        if (hw->chip_id == CHIP_ID_YUKON_FE)
@@ -1855,7 +1871,7 @@ static struct sk_buff *sky2_receive(struct sky2_port *sky2,
                goto oversize;
 
        if (length < copybreak) {
-               skb = alloc_skb(length + 2, GFP_ATOMIC);
+               skb = dev_alloc_skb(length + 2);
                if (!skb)
                        goto resubmit;
 
@@ -2016,6 +2032,9 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do)
                }
        }
 
+       /* Fully processed status ring so clear irq */
+       sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
+
 exit_loop:
        return work_done;
 }
@@ -2187,9 +2206,6 @@ static int sky2_poll(struct net_device *dev0, int *budget)
        int work_done = 0;
        u32 status = sky2_read32(hw, B0_Y2_SP_EISR);
 
-       if (!~status)
-               goto out;
-
        if (status & Y2_IS_HW_ERR)
                sky2_hw_intr(hw);
 
@@ -2221,12 +2237,9 @@ static int sky2_poll(struct net_device *dev0, int *budget)
        *budget -= work_done;
        dev0->quota -= work_done;
 
-       if (status & Y2_IS_STAT_BMU)
-               sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
-
        if (sky2_more_work(hw))
                return 1;
-out:
+
        netif_rx_complete(dev0);
 
        sky2_read32(hw, B0_Y2_SP_LISR);
@@ -2290,7 +2303,7 @@ static inline u32 sky2_clk2us(const struct sky2_hw *hw, u32 clk)
 static int __devinit sky2_reset(struct sky2_hw *hw)
 {
        u16 status;
-       u8 t8, pmd_type;
+       u8 t8;
        int i;
 
        sky2_write8(hw, B0_CTST, CS_RST_CLR);
@@ -2336,9 +2349,7 @@ static int __devinit sky2_reset(struct sky2_hw *hw)
                sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL);
 
 
-       pmd_type = sky2_read8(hw, B2_PMD_TYP);
-       hw->copper = !(pmd_type == 'L' || pmd_type == 'S');
-
+       hw->pmd_type = sky2_read8(hw, B2_PMD_TYP);
        hw->ports = 1;
        t8 = sky2_read8(hw, B2_Y2_HW_RES);
        if ((t8 & CFG_DUAL_MAC_MSK) == CFG_DUAL_MAC_MSK) {
@@ -2435,21 +2446,22 @@ static int __devinit sky2_reset(struct sky2_hw *hw)
 
 static u32 sky2_supported_modes(const struct sky2_hw *hw)
 {
-       u32 modes;
-       if (hw->copper) {
-               modes = SUPPORTED_10baseT_Half
-                   | SUPPORTED_10baseT_Full
-                   | SUPPORTED_100baseT_Half
-                   | SUPPORTED_100baseT_Full
-                   | SUPPORTED_Autoneg | SUPPORTED_TP;
+       if (sky2_is_copper(hw)) {
+               u32 modes = SUPPORTED_10baseT_Half
+                       | SUPPORTED_10baseT_Full
+                       | SUPPORTED_100baseT_Half
+                       | SUPPORTED_100baseT_Full
+                       | SUPPORTED_Autoneg | SUPPORTED_TP;
 
                if (hw->chip_id != CHIP_ID_YUKON_FE)
                        modes |= SUPPORTED_1000baseT_Half
-                           | SUPPORTED_1000baseT_Full;
+                               | SUPPORTED_1000baseT_Full;
+               return modes;
        } else
-               modes = SUPPORTED_1000baseT_Full | SUPPORTED_FIBRE
-                   | SUPPORTED_Autoneg;
-       return modes;
+               return  SUPPORTED_1000baseT_Half
+                       | SUPPORTED_1000baseT_Full
+                       | SUPPORTED_Autoneg
+                       | SUPPORTED_FIBRE;
 }
 
 static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
@@ -2460,7 +2472,7 @@ static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
        ecmd->transceiver = XCVR_INTERNAL;
        ecmd->supported = sky2_supported_modes(hw);
        ecmd->phy_address = PHY_ADDR_MARV;
-       if (hw->copper) {
+       if (sky2_is_copper(hw)) {
                ecmd->supported = SUPPORTED_10baseT_Half
                    | SUPPORTED_10baseT_Full
                    | SUPPORTED_100baseT_Half
@@ -2469,12 +2481,14 @@ static int sky2_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
                    | SUPPORTED_1000baseT_Full
                    | SUPPORTED_Autoneg | SUPPORTED_TP;
                ecmd->port = PORT_TP;
-       } else
+               ecmd->speed = sky2->speed;
+       } else {
+               ecmd->speed = SPEED_1000;
                ecmd->port = PORT_FIBRE;
+       }
 
        ecmd->advertising = sky2->advertising;
        ecmd->autoneg = sky2->autoneg;
-       ecmd->speed = sky2->speed;
        ecmd->duplex = sky2->duplex;
        return 0;
 }
@@ -3187,6 +3201,8 @@ static int __devinit sky2_test_msi(struct sky2_hw *hw)
        struct pci_dev *pdev = hw->pdev;
        int err;
 
+       init_waitqueue_head (&hw->msi_wait);
+
        sky2_write32(hw, B0_IMSK, Y2_IS_IRQ_SW);
 
        err = request_irq(pdev->irq, sky2_test_intr, SA_SHIRQ, DRV_NAME, hw);
@@ -3196,10 +3212,8 @@ static int __devinit sky2_test_msi(struct sky2_hw *hw)
                return err;
        }
 
-       init_waitqueue_head (&hw->msi_wait);
-
        sky2_write8(hw, B0_CTST, CS_ST_SW_IRQ);
-       wmb();
+       sky2_read8(hw, B0_CTST);
 
        wait_event_timeout(hw->msi_wait, hw->msi_detected, HZ/10);
 
index 8a0bc55..9516c1f 100644 (file)
@@ -1317,6 +1317,14 @@ enum {
        PHY_M_FESC_SEL_CL_A     = 1<<0, /* Select Class A driver (100B-TX) */
 };
 
+/* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */
+/*****  PHY_MARV_PHY_CTRL (page 1)             16 bit r/w      Fiber Specific Ctrl *****/
+enum {
+       PHY_M_FIB_FORCE_LNK     = 1<<10,/* Force Link Good */
+       PHY_M_FIB_SIGD_POL      = 1<<9, /* SIGDET Polarity */
+       PHY_M_FIB_TX_DIS        = 1<<3, /* Transmitter Disable */
+};
+
 /* for Yukon-2 Gigabit Ethernet PHY (88E1112 only) */
 /*****  PHY_MARV_PHY_CTRL (page 2)             16 bit r/w      MAC Specific Ctrl *****/
 enum {
@@ -1566,7 +1574,7 @@ enum {
 
        GMR_FS_ANY_ERR  = GMR_FS_RX_FF_OV | GMR_FS_CRC_ERR |
                          GMR_FS_FRAGMENT | GMR_FS_LONG_ERR |
-                         GMR_FS_MII_ERR | GMR_FS_BAD_FC | GMR_FS_GOOD_FC |
+                         GMR_FS_MII_ERR | GMR_FS_BAD_FC |
                          GMR_FS_UN_SIZE | GMR_FS_JABBER,
 };
 
@@ -1879,7 +1887,7 @@ struct sky2_hw {
        int                  pm_cap;
        u8                   chip_id;
        u8                   chip_rev;
-       u8                   copper;
+       u8                   pmd_type;
        u8                   ports;
 
        struct sky2_status_le *st_le;
@@ -1891,6 +1899,11 @@ struct sky2_hw {
        wait_queue_head_t    msi_wait;
 };
 
+static inline int sky2_is_copper(const struct sky2_hw *hw)
+{
+       return !(hw->pmd_type == 'L' || hw->pmd_type == 'S' || hw->pmd_type == 'P');
+}
+
 /* Register accessor for memory mapped device */
 static inline u32 sky2_read32(const struct sky2_hw *hw, unsigned reg)
 {
index 862c226..09a6b8e 100644 (file)
@@ -69,8 +69,8 @@
 
 #define DRV_MODULE_NAME                "tg3"
 #define PFX DRV_MODULE_NAME    ": "
-#define DRV_MODULE_VERSION     "3.59"
-#define DRV_MODULE_RELDATE     "June 8, 2006"
+#define DRV_MODULE_VERSION     "3.59.1"
+#define DRV_MODULE_RELDATE     "August 25, 2006"
 
 #define TG3_DEF_MAC_MODE       0
 #define TG3_DEF_RX_MODE                0
@@ -3743,7 +3743,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 #if TG3_TSO_SUPPORT != 0
        mss = 0;
        if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
-           (mss = skb_shinfo(skb)->tso_size) != 0) {
+           (mss = skb_shinfo(skb)->gso_size) != 0) {
                int tcp_opt_len, ip_tcp_len;
 
                if (skb_header_cloned(skb) &&
@@ -3871,7 +3871,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 #if TG3_TSO_SUPPORT != 0
        mss = 0;
        if (skb->len > (tp->dev->mtu + ETH_HLEN) &&
-           (mss = skb_shinfo(skb)->tso_size) != 0) {
+           (mss = skb_shinfo(skb)->gso_size) != 0) {
                int tcp_opt_len, ip_tcp_len;
 
                if (skb_header_cloned(skb) &&
@@ -11381,11 +11381,15 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
                tp->tg3_flags2 |= TG3_FLG2_TSO_CAPABLE;
        }
 
-       /* TSO is on by default on chips that support hardware TSO.
+       /* TSO is on by default on chips that support HW_TSO_2.
+        * Some HW_TSO_1 capable chips have bugs that can lead to
+        * tx timeouts in some cases when TSO is enabled.
         * Firmware TSO on older chips gives lower performance, so it
         * is off by default, but can be enabled using ethtool.
         */
-       if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
+       if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) ||
+           (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5750 &&
+            tp->pci_chip_rev_id >= CHIPREV_ID_5750_C2))
                dev->features |= NETIF_F_TSO;
 
 #endif
index ff0faab..cd68f46 100644 (file)
 #define  CHIPREV_ID_5750_A0             0x4000
 #define  CHIPREV_ID_5750_A1             0x4001
 #define  CHIPREV_ID_5750_A3             0x4003
+#define  CHIPREV_ID_5750_C2             0x4202
 #define  CHIPREV_ID_5752_A0_HW          0x5000
 #define  CHIPREV_ID_5752_A0             0x6000
 #define  CHIPREV_ID_5752_A1             0x6001
index 136a70c..56d86c7 100644 (file)
@@ -1605,11 +1605,11 @@ static void __devexit w840_remove1 (struct pci_dev *pdev)
  * - get_stats:
  *     spin_lock_irq(np->lock), doesn't touch hw if not present
  * - hard_start_xmit:
- *     netif_stop_queue + spin_unlock_wait(&dev->xmit_lock);
+ *     synchronize_irq + netif_tx_disable;
  * - tx_timeout:
- *     netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
+ *     netif_device_detach + netif_tx_disable;
  * - set_multicast_list
- *     netif_device_detach + spin_unlock_wait(&dev->xmit_lock);
+ *     netif_device_detach + netif_tx_disable;
  * - interrupt handler
  *     doesn't touch hw if not present, synchronize_irq waits for
  *     running instances of the interrupt handler.
@@ -1635,11 +1635,10 @@ static int w840_suspend (struct pci_dev *pdev, pm_message_t state)
                netif_device_detach(dev);
                update_csr6(dev, 0);
                iowrite32(0, ioaddr + IntrEnable);
-               netif_stop_queue(dev);
                spin_unlock_irq(&np->lock);
 
-               spin_unlock_wait(&dev->xmit_lock);
                synchronize_irq(dev->irq);
+               netif_tx_disable(dev);
        
                np->stats.rx_missed_errors += ioread32(ioaddr + RxMissed) & 0xffff;
 
index d9258d4..389d19f 100644 (file)
@@ -340,7 +340,7 @@ enum state_values {
 #endif
 
 #if defined(NETIF_F_TSO)
-#define skb_tso_size(x)                (skb_shinfo(x)->tso_size)
+#define skb_tso_size(x)                (skb_shinfo(x)->gso_size)
 #define TSO_NUM_DESCRIPTORS    2
 #define TSO_OFFLOAD_ON         TYPHOON_OFFLOAD_TCP_SEGMENT
 #else
@@ -805,7 +805,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
         * If problems develop with TSO, check this first.
         */
        numDesc = skb_shinfo(skb)->nr_frags + 1;
-       if(skb_tso_size(skb))
+       if (skb_is_gso(skb))
                numDesc++;
 
        /* When checking for free space in the ring, we need to also
@@ -845,7 +845,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
                                TYPHOON_TX_PF_VLAN_TAG_SHIFT);
        }
 
-       if(skb_tso_size(skb)) {
+       if (skb_is_gso(skb)) {
                first_txd->processFlags |= TYPHOON_TX_PF_TCP_SEGMENT;
                first_txd->numDesc++;
 
index 15e7102..09e05fe 100644 (file)
@@ -61,7 +61,6 @@
 #include <linux/timer.h>
 #include <linux/slab.h>
 #include <linux/interrupt.h>
-#include <linux/version.h>
 #include <linux/string.h>
 #include <linux/wait.h>
 #include <asm/io.h>
@@ -249,6 +248,7 @@ static void velocity_free_rd_ring(struct velocity_info *vptr);
 static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *);
 static int velocity_soft_reset(struct velocity_info *vptr);
 static void mii_init(struct velocity_info *vptr, u32 mii_status);
+static u32 velocity_get_link(struct net_device *dev);
 static u32 velocity_get_opt_media_mode(struct velocity_info *vptr);
 static void velocity_print_link_status(struct velocity_info *vptr);
 static void safe_disable_mii_autopoll(struct mac_regs __iomem * regs);
@@ -792,13 +792,16 @@ static int __devinit velocity_found1(struct pci_dev *pdev, const struct pci_devi
 #endif
 
        if (vptr->flags & VELOCITY_FLAGS_TX_CSUM) {
-               dev->features |= NETIF_F_HW_CSUM;
+               dev->features |= NETIF_F_IP_CSUM;
        }
 
        ret = register_netdev(dev);
        if (ret < 0)
                goto err_iounmap;
 
+       if (velocity_get_link(dev))
+               netif_carrier_off(dev);
+
        velocity_print_info(vptr);
        pci_set_drvdata(pdev, dev);
        
@@ -1107,6 +1110,9 @@ static void velocity_free_rd_ring(struct velocity_info *vptr)
 
        for (i = 0; i < vptr->options.numrx; i++) {
                struct velocity_rd_info *rd_info = &(vptr->rd_info[i]);
+               struct rx_desc *rd = vptr->rd_ring + i;
+
+               memset(rd, 0, sizeof(*rd));
 
                if (!rd_info->skb)
                        continue;
@@ -1212,10 +1218,8 @@ static void velocity_free_td_ring(struct velocity_info *vptr)
                        velocity_free_td_ring_entry(vptr, j, i);
 
                }
-               if (vptr->td_infos[j]) {
-                       kfree(vptr->td_infos[j]);
-                       vptr->td_infos[j] = NULL;
-               }
+               kfree(vptr->td_infos[j]);
+               vptr->td_infos[j] = NULL;
        }
 }
 
@@ -1335,7 +1339,7 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size,
                        if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN)
                                skb_reserve(new_skb, 2);
 
-                       memcpy(new_skb->data, rx_skb[0]->tail, pkt_size);
+                       memcpy(new_skb->data, rx_skb[0]->data, pkt_size);
                        *rx_skb = new_skb;
                        ret = 0;
                }
@@ -1456,9 +1460,9 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx)
         *      Do the gymnastics to get the buffer head for data at
         *      64byte alignment.
         */
-       skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->tail & 63);
+       skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63);
        rd_info->skb->dev = vptr->dev;
-       rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->tail, vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
+       rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data, vptr->rx_buf_sz, PCI_DMA_FROMDEVICE);
        
        /*
         *      Fill in the descriptor to match
@@ -1653,8 +1657,10 @@ static void velocity_error(struct velocity_info *vptr, int status)
 
                if (linked) {
                        vptr->mii_status &= ~VELOCITY_LINK_FAIL;
+                       netif_carrier_on(vptr->dev);
                } else {
                        vptr->mii_status |= VELOCITY_LINK_FAIL;
+                       netif_carrier_off(vptr->dev);
                }
 
                velocity_print_link_status(vptr);
@@ -1899,6 +1905,13 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
 
        int pktlen = skb->len;
 
+#ifdef VELOCITY_ZERO_COPY_SUPPORT
+       if (skb_shinfo(skb)->nr_frags > 6 && __skb_linearize(skb)) {
+               kfree_skb(skb);
+               return 0;
+       }
+#endif
+
        spin_lock_irqsave(&vptr->lock, flags);
 
        index = vptr->td_curr[qnum];
@@ -1914,8 +1927,6 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
         */
        if (pktlen < ETH_ZLEN) {
                /* Cannot occur until ZC support */
-               if(skb_linearize(skb, GFP_ATOMIC))
-                       return 0; 
                pktlen = ETH_ZLEN;
                memcpy(tdinfo->buf, skb->data, skb->len);
                memset(tdinfo->buf + skb->len, 0, ETH_ZLEN - skb->len);
@@ -1933,7 +1944,6 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev)
                int nfrags = skb_shinfo(skb)->nr_frags;
                tdinfo->skb = skb;
                if (nfrags > 6) {
-                       skb_linearize(skb, GFP_ATOMIC);
                        memcpy(tdinfo->buf, skb->data, skb->len);
                        tdinfo->skb_dma[0] = tdinfo->buf_dma;
                        td_ptr->tdesc0.pktsize = 
index c2d0b09..a5fcfcd 100644 (file)
@@ -1833,7 +1833,9 @@ static int __orinoco_program_rids(struct net_device *dev)
        /* Set promiscuity / multicast*/
        priv->promiscuous = 0;
        priv->mc_count = 0;
-       __orinoco_set_multicast_list(dev); /* FIXME: what about the xmit_lock */
+
+       /* FIXME: what about netif_tx_lock */
+       __orinoco_set_multicast_list(dev);
 
        return 0;
 }
index f7b77ce..54fe4e4 100644 (file)
@@ -245,7 +245,7 @@ spectrum_reset(struct pcmcia_device *link, int idle)
        u_int save_cor;
 
        /* Doing it if hardware is gone is guaranteed crash */
-       if (pcmcia_dev_present(link))
+       if (!pcmcia_dev_present(link))
                return -ENODEV;
 
        /* Save original COR value */
index b2e8e49..a647d39 100644 (file)
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * This is the core of the buffer management. Each
  * CPU buffer is processed and entered into the
  * global event buffer. Such processing is necessary
@@ -275,15 +279,31 @@ static void add_cpu_switch(int i)
        last_cookie = INVALID_COOKIE;
 }
 
-static void add_kernel_ctx_switch(unsigned int in_kernel)
+static void add_cpu_mode_switch(unsigned int cpu_mode)
 {
        add_event_entry(ESCAPE_CODE);
-       if (in_kernel)
-               add_event_entry(KERNEL_ENTER_SWITCH_CODE); 
-       else
-               add_event_entry(KERNEL_EXIT_SWITCH_CODE); 
+       switch (cpu_mode) {
+       case CPU_MODE_USER:
+               add_event_entry(USER_ENTER_SWITCH_CODE);
+               break;
+       case CPU_MODE_KERNEL:
+               add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+               break;
+       case CPU_MODE_XEN:
+               add_event_entry(XEN_ENTER_SWITCH_CODE);
+               break;
+       default:
+               break;
+       }
 }
+
+static void add_domain_switch(unsigned long domain_id)
+{
+       add_event_entry(ESCAPE_CODE);
+       add_event_entry(DOMAIN_SWITCH_CODE);
+       add_event_entry(domain_id);
+}
+
 static void
 add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
 {
@@ -348,9 +368,9 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
  * for later lookup from userspace.
  */
 static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
+add_sample(struct mm_struct * mm, struct op_sample * s, int cpu_mode)
 {
-       if (in_kernel) {
+       if (cpu_mode >= CPU_MODE_KERNEL) {
                add_sample_entry(s->eip, s->event);
                return 1;
        } else if (mm) {
@@ -496,10 +516,11 @@ void sync_buffer(int cpu)
        struct mm_struct *mm = NULL;
        struct task_struct * new;
        unsigned long cookie = 0;
-       int in_kernel = 1;
+       int cpu_mode = 1;
        unsigned int i;
        sync_buffer_state state = sb_buffer_start;
        unsigned long available;
+       int domain_switch = 0;
 
        down(&buffer_sem);
  
@@ -512,16 +533,18 @@ void sync_buffer(int cpu)
        for (i = 0; i < available; ++i) {
                struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
  
-               if (is_code(s->eip)) {
-                       if (s->event <= CPU_IS_KERNEL) {
-                               /* kernel/userspace switch */
-                               in_kernel = s->event;
+               if (is_code(s->eip) && !domain_switch) {
+                       if (s->event <= CPU_MODE_XEN) {
+                               /* xen/kernel/userspace switch */
+                               cpu_mode = s->event;
                                if (state == sb_buffer_start)
                                        state = sb_sample_start;
-                               add_kernel_ctx_switch(s->event);
+                               add_cpu_mode_switch(s->event);
                        } else if (s->event == CPU_TRACE_BEGIN) {
                                state = sb_bt_start;
                                add_trace_begin();
+                       } else if (s->event == CPU_DOMAIN_SWITCH) {
+                                       domain_switch = 1;                              
                        } else {
                                struct mm_struct * oldmm = mm;
 
@@ -535,11 +558,16 @@ void sync_buffer(int cpu)
                                add_user_ctx_switch(new, cookie);
                        }
                } else {
-                       if (state >= sb_bt_start &&
-                           !add_sample(mm, s, in_kernel)) {
-                               if (state == sb_bt_start) {
-                                       state = sb_bt_ignore;
-                                       atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+                       if (domain_switch) {
+                               add_domain_switch(s->eip);
+                               domain_switch = 0;
+                       } else {
+                               if (state >= sb_bt_start &&
+                                   !add_sample(mm, s, cpu_mode)) {
+                                       if (state == sb_bt_start) {
+                                               state = sb_bt_ignore;
+                                               atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+                                       }
                                }
                        }
                }
index fc4bc9b..a59878e 100644 (file)
@@ -6,6 +6,10 @@
  *
  * @author John Levon <levon@movementarian.org>
  *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ *
  * Each CPU has a local buffer that stores PC value/event
  * pairs. We also log context switches when we notice them.
  * Eventually each CPU's buffer is processed into the global
@@ -34,6 +38,8 @@ static void wq_sync_buffer(void *);
 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
 static int work_enabled;
 
+static int32_t current_domain = COORDINATOR_DOMAIN;
+
 void free_cpu_buffers(void)
 {
        int i;
@@ -57,7 +63,7 @@ int alloc_cpu_buffers(void)
                        goto fail;
  
                b->last_task = NULL;
-               b->last_is_kernel = -1;
+               b->last_cpu_mode = -1;
                b->tracing = 0;
                b->buffer_size = buffer_size;
                b->tail_pos = 0;
@@ -113,7 +119,7 @@ void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
         * collected will populate the buffer with proper
         * values to initialize the buffer
         */
-       cpu_buf->last_is_kernel = -1;
+       cpu_buf->last_cpu_mode = -1;
        cpu_buf->last_task = NULL;
 }
 
@@ -163,13 +169,13 @@ add_code(struct oprofile_cpu_buffer * buffer, unsigned long value)
  * because of the head/tail separation of the writer and reader
  * of the CPU buffer.
  *
- * is_kernel is needed because on some architectures you cannot
+ * cpu_mode is needed because on some architectures you cannot
  * tell if you are in kernel or user space simply by looking at
- * pc. We tag this in the buffer by generating kernel enter/exit
- * events whenever is_kernel changes
+ * pc. We tag this in the buffer by generating kernel/user (and xen)
+ *  enter events whenever cpu_mode changes
  */
 static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
-                     int is_kernel, unsigned long event)
+                     int cpu_mode, unsigned long event)
 {
        struct task_struct * task;
 
@@ -180,18 +186,20 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
                return 0;
        }
 
-       is_kernel = !!is_kernel;
+       WARN_ON(cpu_mode > CPU_MODE_XEN);
 
        task = current;
 
        /* notice a switch from user->kernel or vice versa */
-       if (cpu_buf->last_is_kernel != is_kernel) {
-               cpu_buf->last_is_kernel = is_kernel;
-               add_code(cpu_buf, is_kernel);
+       if (cpu_buf->last_cpu_mode != cpu_mode) {
+               cpu_buf->last_cpu_mode = cpu_mode;
+               add_code(cpu_buf, cpu_mode);
        }
-
+       
        /* notice a task switch */
-       if (cpu_buf->last_task != task) {
+       /* if not processing other domain samples */
+       if ((cpu_buf->last_task != task) &&
+           (current_domain == COORDINATOR_DOMAIN)) {
                cpu_buf->last_task = task;
                add_code(cpu_buf, (unsigned long)task);
        }
@@ -275,6 +283,25 @@ void oprofile_add_trace(unsigned long pc)
        add_sample(cpu_buf, pc, 0);
 }
 
+int oprofile_add_domain_switch(int32_t domain_id)
+{
+       struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
+
+       /* should have space for switching into and out of domain 
+          (2 slots each) plus one sample and one cpu mode switch */
+       if (((nr_available_slots(cpu_buf) < 6) && 
+            (domain_id != COORDINATOR_DOMAIN)) ||
+           (nr_available_slots(cpu_buf) < 2))
+               return 0;
+
+       add_code(cpu_buf, CPU_DOMAIN_SWITCH);
+       add_sample(cpu_buf, domain_id, 0);
+
+       current_domain = domain_id;
+
+       return 1;
+}
+
 /*
  * This serves to avoid cpu buffer overflow, and makes sure
  * the task mortuary progresses
index 09abb80..cd94735 100644 (file)
@@ -36,7 +36,7 @@ struct oprofile_cpu_buffer {
        volatile unsigned long tail_pos;
        unsigned long buffer_size;
        struct task_struct * last_task;
-       int last_is_kernel;
+       int last_cpu_mode;
        int tracing;
        struct op_sample * buffer;
        unsigned long sample_received;
@@ -51,7 +51,10 @@ extern struct oprofile_cpu_buffer cpu_buffer[];
 void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf);
 
 /* transient events for the CPU buffer -> event buffer */
-#define CPU_IS_KERNEL 1
-#define CPU_TRACE_BEGIN 2
+#define CPU_MODE_USER           0
+#define CPU_MODE_KERNEL         1
+#define CPU_MODE_XEN            2
+#define CPU_TRACE_BEGIN         3
+#define CPU_DOMAIN_SWITCH       4
 
 #endif /* OPROFILE_CPU_BUFFER_H */
index 0180236..0c33ee7 100644 (file)
@@ -29,15 +29,20 @@ void wake_up_buffer_waiter(void);
 #define CPU_SWITCH_CODE                2
 #define COOKIE_SWITCH_CODE             3
 #define KERNEL_ENTER_SWITCH_CODE       4
-#define KERNEL_EXIT_SWITCH_CODE                5
+#define USER_ENTER_SWITCH_CODE         5
 #define MODULE_LOADED_CODE             6
 #define CTX_TGID_CODE                  7
 #define TRACE_BEGIN_CODE               8
 #define TRACE_END_CODE                 9
+#define XEN_ENTER_SWITCH_CODE          10
+#define DOMAIN_SWITCH_CODE             11
  
 #define INVALID_COOKIE ~0UL
 #define NO_COOKIE 0UL
 
+/* Constant used to refer to coordinator domain (Xen) */
+#define COORDINATOR_DOMAIN -1
+
 /* add data to the event buffer */
 void add_event_entry(unsigned long data);
  
index b3f1cd6..76bac8d 100644 (file)
@@ -5,6 +5,10 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.
  */
 
 #include <linux/kernel.h>
@@ -19,7 +23,7 @@
 #include "cpu_buffer.h"
 #include "buffer_sync.h"
 #include "oprofile_stats.h"
+
 struct oprofile_operations oprofile_ops;
 
 unsigned long oprofile_started;
@@ -33,6 +37,34 @@ static DECLARE_MUTEX(start_sem);
  */
 static int timer = 0;
 
+#ifdef CONFIG_XEN
+int oprofile_set_active(int active_domains[], unsigned int adomains)
+{
+       int err;
+
+       if (!oprofile_ops.set_active)
+               return -EINVAL;
+
+       down(&start_sem);
+       err = oprofile_ops.set_active(active_domains, adomains);
+       up(&start_sem);
+       return err;
+}
+
+int oprofile_set_passive(int passive_domains[], unsigned int pdomains)
+{
+       int err;
+
+       if (!oprofile_ops.set_passive)
+               return -EINVAL;
+
+       down(&start_sem);
+       err = oprofile_ops.set_passive(passive_domains, pdomains);
+       up(&start_sem);
+       return err;
+}
+#endif
+
 int oprofile_setup(void)
 {
        int err;
index 1832365..587db2b 100644 (file)
@@ -35,5 +35,10 @@ void oprofile_create_files(struct super_block * sb, struct dentry * root);
 void oprofile_timer_init(struct oprofile_operations * ops);
 
 int oprofile_set_backtrace(unsigned long depth);
+
+#ifdef CONFIG_XEN
+int oprofile_set_active(int active_domains[], unsigned int adomains);
+int oprofile_set_passive(int passive_domains[], unsigned int pdomains);
+#endif
+
 #endif /* OPROF_H */
index a72006c..3888465 100644 (file)
@@ -5,15 +5,21 @@
  * @remark Read the file COPYING
  *
  * @author John Levon <levon@movementarian.org>
+ *
+ * Modified by Aravind Menon for Xen
+ * These modifications are:
+ * Copyright (C) 2005 Hewlett-Packard Co.      
  */
 
 #include <linux/fs.h>
 #include <linux/oprofile.h>
+#include <asm/uaccess.h>
+#include <linux/ctype.h>
 
 #include "event_buffer.h"
 #include "oprofile_stats.h"
 #include "oprof.h"
+
 unsigned long fs_buffer_size = 131072;
 unsigned long fs_cpu_buffer_size = 8192;
 unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
@@ -117,11 +123,206 @@ static ssize_t dump_write(struct file * file, char const __user * buf, size_t co
 static struct file_operations dump_fops = {
        .write          = dump_write,
 };
+
+#define TMPBUFSIZE 512
+
+#ifdef CONFIG_XEN
+static unsigned int adomains = 0;
+static int active_domains[MAX_OPROF_DOMAINS + 1];
+static DEFINE_MUTEX(adom_mutex);
+
+static ssize_t adomain_write(struct file * file, char const __user * buf, 
+                            size_t count, loff_t * offset)
+{
+       char *tmpbuf;
+       char *startp, *endp;
+       int i;
+       unsigned long val;
+       ssize_t retval = count;
+       
+       if (*offset)
+               return -EINVAL; 
+       if (count > TMPBUFSIZE - 1)
+               return -EINVAL;
+
+       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+               return -ENOMEM;
+
+       if (copy_from_user(tmpbuf, buf, count)) {
+               kfree(tmpbuf);
+               return -EFAULT;
+       }
+       tmpbuf[count] = 0;
+
+       mutex_lock(&adom_mutex);
+
+       startp = tmpbuf;
+       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
+       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
+               val = simple_strtoul(startp, &endp, 0);
+               if (endp == startp)
+                       break;
+               while (ispunct(*endp) || isspace(*endp))
+                       endp++;
+               active_domains[i] = val;
+               if (active_domains[i] != val)
+                       /* Overflow, force error below */
+                       i = MAX_OPROF_DOMAINS + 1;
+               startp = endp;
+       }
+       /* Force error on trailing junk */
+       adomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
+
+       kfree(tmpbuf);
+
+       if (adomains > MAX_OPROF_DOMAINS
+           || oprofile_set_active(active_domains, adomains)) {
+               adomains = 0;
+               retval = -EINVAL;
+       }
+
+       mutex_unlock(&adom_mutex);
+       return retval;
+}
+
+static ssize_t adomain_read(struct file * file, char __user * buf, 
+                           size_t count, loff_t * offset)
+{
+       char * tmpbuf;
+       size_t len;
+       int i;
+       ssize_t retval;
+
+       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+               return -ENOMEM;
+
+       mutex_lock(&adom_mutex);
+
+       len = 0;
+       for (i = 0; i < adomains; i++)
+               len += snprintf(tmpbuf + len,
+                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
+                               "%u ", active_domains[i]);
+       WARN_ON(len > TMPBUFSIZE);
+       if (len != 0 && len <= TMPBUFSIZE)
+               tmpbuf[len-1] = '\n';
+
+       mutex_unlock(&adom_mutex);
+
+       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
+
+       kfree(tmpbuf);
+       return retval;
+}
+
+
+static struct file_operations active_domain_ops = {
+       .read           = adomain_read,
+       .write          = adomain_write,
+};
+
+static unsigned int pdomains = 0;
+static int passive_domains[MAX_OPROF_DOMAINS];
+static DEFINE_MUTEX(pdom_mutex);
+
+static ssize_t pdomain_write(struct file * file, char const __user * buf, 
+                            size_t count, loff_t * offset)
+{
+       char *tmpbuf;
+       char *startp, *endp;
+       int i;
+       unsigned long val;
+       ssize_t retval = count;
+       
+       if (*offset)
+               return -EINVAL; 
+       if (count > TMPBUFSIZE - 1)
+               return -EINVAL;
+
+       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+               return -ENOMEM;
+
+       if (copy_from_user(tmpbuf, buf, count)) {
+               kfree(tmpbuf);
+               return -EFAULT;
+       }
+       tmpbuf[count] = 0;
+
+       mutex_lock(&pdom_mutex);
+
+       startp = tmpbuf;
+       /* Parse one more than MAX_OPROF_DOMAINS, for easy error checking */
+       for (i = 0; i <= MAX_OPROF_DOMAINS; i++) {
+               val = simple_strtoul(startp, &endp, 0);
+               if (endp == startp)
+                       break;
+               while (ispunct(*endp) || isspace(*endp))
+                       endp++;
+               passive_domains[i] = val;
+               if (passive_domains[i] != val)
+                       /* Overflow, force error below */
+                       i = MAX_OPROF_DOMAINS + 1;
+               startp = endp;
+       }
+       /* Force error on trailing junk */
+       pdomains = *startp ? MAX_OPROF_DOMAINS + 1 : i;
+
+       kfree(tmpbuf);
+
+       if (pdomains > MAX_OPROF_DOMAINS
+           || oprofile_set_passive(passive_domains, pdomains)) {
+               pdomains = 0;
+               retval = -EINVAL;
+       }
+
+       mutex_unlock(&pdom_mutex);
+       return retval;
+}
+
+static ssize_t pdomain_read(struct file * file, char __user * buf, 
+                           size_t count, loff_t * offset)
+{
+       char * tmpbuf;
+       size_t len;
+       int i;
+       ssize_t retval;
+
+       if (!(tmpbuf = kmalloc(TMPBUFSIZE, GFP_KERNEL)))
+               return -ENOMEM;
+
+       mutex_lock(&pdom_mutex);
+
+       len = 0;
+       for (i = 0; i < pdomains; i++)
+               len += snprintf(tmpbuf + len,
+                               len < TMPBUFSIZE ? TMPBUFSIZE - len : 0,
+                               "%u ", passive_domains[i]);
+       WARN_ON(len > TMPBUFSIZE);
+       if (len != 0 && len <= TMPBUFSIZE)
+               tmpbuf[len-1] = '\n';
+
+       mutex_unlock(&pdom_mutex);
+
+       retval = simple_read_from_buffer(buf, count, offset, tmpbuf, len);
+
+       kfree(tmpbuf);
+       return retval;
+}
+
+static struct file_operations passive_domain_ops = {
+       .read           = pdomain_read,
+       .write          = pdomain_write,
+};
+#endif /* CONFIG_XEN */
+
 void oprofile_create_files(struct super_block * sb, struct dentry * root)
 {
        oprofilefs_create_file(sb, root, "enable", &enable_fops);
        oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
+#ifdef CONFIG_XEN
+       oprofilefs_create_file(sb, root, "active_domains", &active_domain_ops);
+       oprofilefs_create_file(sb, root, "passive_domains", &passive_domain_ops);
+#endif
        oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);
        oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size);
        oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed);
index 4d762fc..065ea43 100644 (file)
@@ -5,6 +5,7 @@ config PCI_MSI
        bool "Message Signaled Interrupts (MSI and MSI-X)"
        depends on PCI
        depends on (X86_LOCAL_APIC && X86_IO_APIC) || IA64
+       depends on !XEN
        help
           This allows device drivers to enable MSI (Message Signaled
           Interrupts).  Message Signaled Interrupts enable a device to
index d378478..6e3786f 100644 (file)
@@ -427,6 +427,7 @@ static void __devinit quirk_ich6_lpc_acpi(struct pci_dev *dev)
        pci_read_config_dword(dev, 0x48, &region);
        quirk_io_region(dev, region, 64, PCI_BRIDGE_RESOURCES+1, "ICH6 GPIO");
 }
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_0, quirk_ich6_lpc_acpi );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_1, quirk_ich6_lpc_acpi );
 
 /*
@@ -1043,7 +1044,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,     PCI_DEVICE_ID_INTEL_82801BA_0,  asu
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801CA_12, asus_hides_smbus_lpc );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801DB_12, asus_hides_smbus_lpc );
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_82801EB_0,  asus_hides_smbus_lpc );
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL,  PCI_DEVICE_ID_INTEL_ICH6_1,     asus_hides_smbus_lpc );
 
 static void __init asus_hides_smbus_lpc_ich6(struct pci_dev *dev)
 {
index 6ded527..688421d 100644 (file)
@@ -396,7 +396,8 @@ int pnp_check_irq(struct pnp_dev * dev, int idx)
        /* check if the resource is already in use, skip if the
         * device is active because it itself may be in use */
        if(!dev->active) {
-               if (request_irq(*irq, pnp_test_handler, SA_INTERRUPT, "pnp", NULL))
+               if (request_irq(*irq, pnp_test_handler,
+                               SA_INTERRUPT|SA_PROBEIRQ, "pnp", NULL))
                        return 0;
                free_irq(*irq, NULL);
        }
index f94f1f2..38aad83 100644 (file)
@@ -1,6 +1,5 @@
 /*
- *
- * linux/drivers/s390/net/qeth_eddp.c ($Revision: 1.13 $)
+ * linux/drivers/s390/net/qeth_eddp.c
  *
  * Enhanced Device Driver Packing (EDDP) support for the qeth driver.
  *
@@ -8,8 +7,6 @@
  *
  *    Author(s): Thomas Spatzier <tspat@de.ibm.com>
  *
- *    $Revision: 1.13 $         $Date: 2005/05/04 20:19:18 $
- *
  */
 #include <linux/config.h>
 #include <linux/errno.h>
@@ -62,8 +59,7 @@ qeth_eddp_free_context(struct qeth_eddp_context *ctx)
        for (i = 0; i < ctx->num_pages; ++i)
                free_page((unsigned long)ctx->pages[i]);
        kfree(ctx->pages);
-       if (ctx->elements != NULL)
-               kfree(ctx->elements);
+       kfree(ctx->elements);
        kfree(ctx);
 }
 
@@ -85,7 +81,7 @@ void
 qeth_eddp_buf_release_contexts(struct qeth_qdio_out_buffer *buf)
 {
        struct qeth_eddp_context_reference *ref;
-       
+
        QETH_DBF_TEXT(trace, 6, "eddprctx");
        while (!list_empty(&buf->ctx_list)){
                ref = list_entry(buf->ctx_list.next,
@@ -139,7 +135,7 @@ qeth_eddp_fill_buffer(struct qeth_qdio_out_q *queue,
                                           "buffer!\n");
                                goto out;
                        }
-               }               
+               }
                /* check if the whole next skb fits into current buffer */
                if ((QETH_MAX_BUFFER_ELEMENTS(queue->card) -
                                        buf->next_element_to_fill)
@@ -152,7 +148,7 @@ qeth_eddp_fill_buffer(struct qeth_qdio_out_q *queue,
                         * and increment ctx's refcnt */
                        must_refcnt = 1;
                        continue;
-               }       
+               }
                if (must_refcnt){
                        must_refcnt = 0;
                        if (qeth_eddp_buf_ref_context(buf, ctx)){
@@ -270,7 +266,7 @@ qeth_eddp_copy_data_tcp(char *dst, struct qeth_eddp_data *eddp, int len,
        int left_in_frag;
        int copy_len;
        u8 *src;
-       
+
        QETH_DBF_TEXT(trace, 5, "eddpcdtc");
        if (skb_shinfo(eddp->skb)->nr_frags == 0) {
                memcpy(dst, eddp->skb->data + eddp->skb_offset, len);
@@ -393,9 +389,8 @@ qeth_eddp_create_eddp_data(struct qeth_hdr *qh, u8 *nh, u8 nhl, u8 *th, u8 thl)
        struct qeth_eddp_data *eddp;
 
        QETH_DBF_TEXT(trace, 5, "eddpcrda");
-       eddp = kmalloc(sizeof(struct qeth_eddp_data), GFP_ATOMIC);
+       eddp = kzalloc(sizeof(struct qeth_eddp_data), GFP_ATOMIC);
        if (eddp){
-               memset(eddp, 0, sizeof(struct qeth_eddp_data));
                eddp->nhl = nhl;
                eddp->thl = thl;
                memcpy(&eddp->qh, qh, sizeof(struct qeth_hdr));
@@ -413,12 +408,19 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
        struct tcphdr *tcph;
        int data_len;
        u32 hcsum;
-       
+
        QETH_DBF_TEXT(trace, 5, "eddpftcp");
        eddp->skb_offset = sizeof(struct qeth_hdr) + eddp->nhl + eddp->thl;
+       if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
+               eddp->skb_offset += sizeof(struct ethhdr);
+#ifdef CONFIG_QETH_VLAN
+               if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q))
+                       eddp->skb_offset += VLAN_HLEN;
+#endif /* CONFIG_QETH_VLAN */
+       }
        tcph = eddp->skb->h.th;
        while (eddp->skb_offset < eddp->skb->len) {
-               data_len = min((int)skb_shinfo(eddp->skb)->tso_size,
+               data_len = min((int)skb_shinfo(eddp->skb)->gso_size,
                               (int)(eddp->skb->len - eddp->skb_offset));
                /* prepare qdio hdr */
                if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){
@@ -463,13 +465,13 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                eddp->th.tcp.h.seq += data_len;
        }
 }
-                          
+
 static inline int
 qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                           struct sk_buff *skb, struct qeth_hdr *qhdr)
 {
        struct qeth_eddp_data *eddp = NULL;
-       
+
        QETH_DBF_TEXT(trace, 5, "eddpficx");
        /* create our segmentation headers and copy original headers */
        if (skb->protocol == ETH_P_IP)
@@ -486,6 +488,7 @@ qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx,
                return -ENOMEM;
        }
        if (qhdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
+               skb->mac.raw = (skb->data) + sizeof(struct qeth_hdr);
                memcpy(&eddp->mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
                if (eddp->mac.h_proto == __constant_htons(ETH_P_8021Q)) {
@@ -509,23 +512,23 @@ qeth_eddp_calc_num_pages(struct qeth_eddp_context *ctx, struct sk_buff *skb,
                         int hdr_len)
 {
        int skbs_per_page;
-       
+
        QETH_DBF_TEXT(trace, 5, "eddpcanp");
        /* can we put multiple skbs in one page? */
-       skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len);
+       skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len);
        if (skbs_per_page > 1){
-               ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) /
+               ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) /
                                 skbs_per_page + 1;
                ctx->elements_per_skb = 1;
        } else {
                /* no -> how many elements per skb? */
-               ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len +
+               ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len +
                                     PAGE_SIZE) >> PAGE_SHIFT;
                ctx->num_pages = ctx->elements_per_skb *
-                                (skb_shinfo(skb)->tso_segs + 1);
+                                (skb_shinfo(skb)->gso_segs + 1);
        }
        ctx->num_elements = ctx->elements_per_skb *
-                           (skb_shinfo(skb)->tso_segs + 1);
+                           (skb_shinfo(skb)->gso_segs + 1);
 }
 
 static inline struct qeth_eddp_context *
@@ -538,12 +541,11 @@ qeth_eddp_create_context_generic(struct qeth_card *card, struct sk_buff *skb,
 
        QETH_DBF_TEXT(trace, 5, "creddpcg");
        /* create the context and allocate pages */
-       ctx = kmalloc(sizeof(struct qeth_eddp_context), GFP_ATOMIC);
+       ctx = kzalloc(sizeof(struct qeth_eddp_context), GFP_ATOMIC);
        if (ctx == NULL){
                QETH_DBF_TEXT(trace, 2, "ceddpcn1");
                return NULL;
        }
-       memset(ctx, 0, sizeof(struct qeth_eddp_context));
        ctx->type = QETH_LARGE_SEND_EDDP;
        qeth_eddp_calc_num_pages(ctx, skb, hdr_len);
        if (ctx->elements_per_skb > QETH_MAX_BUFFER_ELEMENTS(card)){
@@ -551,13 +553,12 @@ qeth_eddp_create_context_generic(struct qeth_card *card, struct sk_buff *skb,
                kfree(ctx);
                return NULL;
        }
-       ctx->pages = kmalloc(ctx->num_pages * sizeof(u8 *), GFP_ATOMIC);
+       ctx->pages = kcalloc(ctx->num_pages, sizeof(u8 *), GFP_ATOMIC);
        if (ctx->pages == NULL){
                QETH_DBF_TEXT(trace, 2, "ceddpcn2");
                kfree(ctx);
                return NULL;
        }
-       memset(ctx->pages, 0, ctx->num_pages * sizeof(u8 *));
        for (i = 0; i < ctx->num_pages; ++i){
                addr = (u8 *)__get_free_page(GFP_ATOMIC);
                if (addr == NULL){
@@ -569,15 +570,13 @@ qeth_eddp_create_context_generic(struct qeth_card *card, struct sk_buff *skb,
                memset(addr, 0, PAGE_SIZE);
                ctx->pages[i] = addr;
        }
-       ctx->elements = kmalloc(ctx->num_elements *
+       ctx->elements = kcalloc(ctx->num_elements,
                                sizeof(struct qeth_eddp_element), GFP_ATOMIC);
        if (ctx->elements == NULL){
                QETH_DBF_TEXT(trace, 2, "ceddpcn4");
                qeth_eddp_free_context(ctx);
                return NULL;
        }
-       memset(ctx->elements, 0,
-              ctx->num_elements * sizeof(struct qeth_eddp_element));
        /* reset num_elements; will be incremented again in fill_buffer to
         * reflect number of actually used elements */
        ctx->num_elements = 0;
@@ -589,7 +588,7 @@ qeth_eddp_create_context_tcp(struct qeth_card *card, struct sk_buff *skb,
                             struct qeth_hdr *qhdr)
 {
        struct qeth_eddp_context *ctx = NULL;
-       
+
        QETH_DBF_TEXT(trace, 5, "creddpct");
        if (skb->protocol == ETH_P_IP)
                ctx = qeth_eddp_create_context_generic(card, skb,
index 9e671a4..2908669 100644 (file)
@@ -4417,7 +4417,6 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb)
        struct qeth_eddp_context *ctx = NULL;
        int tx_bytes = skb->len;
        unsigned short nr_frags = skb_shinfo(skb)->nr_frags;
-       unsigned short tso_size = skb_shinfo(skb)->tso_size;
        int rc;
 
        QETH_DBF_TEXT(trace, 6, "sendpkt");
@@ -4453,7 +4452,7 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb)
        queue = card->qdio.out_qs
                [qeth_get_priority_queue(card, skb, ipv, cast_type)];
 
-       if (skb_shinfo(skb)->tso_size)
+       if (skb_is_gso(skb))
                large_send = card->options.large_send;
 
        /*are we able to do TSO ? If so ,prepare and send it from here */
@@ -4500,9 +4499,8 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb)
                card->stats.tx_packets++;
                card->stats.tx_bytes += tx_bytes;
 #ifdef CONFIG_QETH_PERF_STATS
-               if (tso_size &&
-                  !(large_send == QETH_LARGE_SEND_NO)) {
-                       card->perf_stats.large_send_bytes += tx_bytes;
+               if (skb_is_gso(skb) && !(large_send == QETH_LARGE_SEND_NO)) {
+                       card->perf_stats.large_send_bytes += skb->len;
                        card->perf_stats.large_send_cnt++;
                }
                if (nr_frags > 0){
index ad33e6f..593f298 100644 (file)
@@ -1,13 +1,11 @@
 /*
- * linux/drivers/s390/net/qeth_tso.h ($Revision: 1.7 $)
+ * linux/drivers/s390/net/qeth_tso.h
  *
  * Header file for qeth TCP Segmentation Offload support.
  *
  * Copyright 2004 IBM Corporation
  *
- *    Author(s): Frank Pavlic <pavlic@de.ibm.com>
- *
- *    $Revision: 1.7 $  $Date: 2005/05/04 20:19:18 $
+ *    Author(s): Frank Pavlic <fpavlic@de.ibm.com>
  *
  */
 #ifndef __QETH_TSO_H__
@@ -53,7 +51,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb)
        hdr->ext.hdr_version = 1;
        hdr->ext.hdr_len     = 28;
        /*insert non-fix values */
-       hdr->ext.mss = skb_shinfo(skb)->tso_size;
+       hdr->ext.mss = skb_shinfo(skb)->gso_size;
        hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4);
        hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len -
                                       sizeof(struct qeth_hdr_tso));
@@ -119,11 +117,11 @@ __qeth_fill_buffer_frag(struct sk_buff *skb, struct qdio_buffer *buffer,
        int fragno;
        unsigned long addr;
        int element, cnt, dlen;
-       
+
        fragno = skb_shinfo(skb)->nr_frags;
        element = *next_element_to_fill;
        dlen = 0;
-       
+
        if (is_tso)
                buffer->element[element].flags =
                        SBAL_FLAGS_MIDDLE_FRAG;
index a262e38..81803a1 100644 (file)
@@ -33,7 +33,7 @@ obj-$(CONFIG_SCSI_FC_ATTRS)   += scsi_transport_fc.o
 obj-$(CONFIG_SCSI_ISCSI_ATTRS) += scsi_transport_iscsi.o
 obj-$(CONFIG_SCSI_SAS_ATTRS)   += scsi_transport_sas.o
 
-obj-$(CONFIG_ISCSI_TCP)                += libiscsi.o   iscsi_tcp.o
+obj-$(CONFIG_ISCSI_TCP)        += iscsi_tcp.o
 obj-$(CONFIG_SCSI_AMIGA7XX)    += amiga7xx.o   53c7xx.o
 obj-$(CONFIG_A3000_SCSI)       += a3000.o      wd33c93.o
 obj-$(CONFIG_A2091_SCSI)       += a2091.o      wd33c93.o
index b4743a9..2068b66 100644 (file)
@@ -3,8 +3,7 @@
  *
  * Copyright (C) 2004 Dmitry Yusupov
  * Copyright (C) 2004 Alex Aizman
- * Copyright (C) 2005 - 2006 Mike Christie
- * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2005 Mike Christie
  * maintained by open-iscsi@googlegroups.com
  *
  * This program is free software; you can redistribute it and/or modify
 #include <linux/mutex.h>
 #include <net/tcp.h>
 #include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_request.h>
+#include <scsi/scsi_tcq.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_transport_iscsi.h>
 
 #include "iscsi_tcp.h"
 
-#define ISCSI_TCP_VERSION "1.0-595"
-
 MODULE_AUTHOR("Dmitry Yusupov <dmitry_yus@yahoo.com>, "
              "Alex Aizman <itn780@yahoo.com>");
 MODULE_DESCRIPTION("iSCSI/TCP data-path");
 MODULE_LICENSE("GPL");
-MODULE_VERSION(ISCSI_TCP_VERSION);
+MODULE_VERSION("0:4.445");
 /* #define DEBUG_TCP */
+/* #define DEBUG_SCSI */
 #define DEBUG_ASSERT
 
 #ifdef DEBUG_TCP
-#define debug_tcp(fmt...) printk(KERN_INFO "tcp: " fmt)
+#define debug_tcp(fmt...) printk(KERN_DEBUG "tcp: " fmt)
 #else
 #define debug_tcp(fmt...)
 #endif
 
+#ifdef DEBUG_SCSI
+#define debug_scsi(fmt...) printk(KERN_DEBUG "scsi: " fmt)
+#else
+#define debug_scsi(fmt...)
+#endif
+
 #ifndef DEBUG_ASSERT
 #ifdef BUG_ON
 #undef BUG_ON
@@ -66,9 +74,22 @@ MODULE_VERSION(ISCSI_TCP_VERSION);
 #define BUG_ON(expr)
 #endif
 
+#define INVALID_SN_DELTA       0xffff
+
 static unsigned int iscsi_max_lun = 512;
 module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
 
+/* global data */
+static kmem_cache_t *taskcache;
+
+static inline void
+iscsi_buf_init_virt(struct iscsi_buf *ibuf, char *vbuf, int size)
+{
+       sg_init_one(&ibuf->sg, (u8 *)vbuf, size);
+       ibuf->sent = 0;
+       ibuf->use_sendmsg = 0;
+}
+
 static inline void
 iscsi_buf_init_iov(struct iscsi_buf *ibuf, char *vbuf, int size)
 {
@@ -109,39 +130,68 @@ static inline void
 iscsi_hdr_digest(struct iscsi_conn *conn, struct iscsi_buf *buf,
                 u8* crc)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-
-       crypto_digest_digest(tcp_conn->tx_tfm, &buf->sg, 1, crc);
+       crypto_digest_digest(conn->tx_tfm, &buf->sg, 1, crc);
        buf->sg.length += sizeof(uint32_t);
 }
 
+static void
+iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
+{
+       struct iscsi_session *session = conn->session;
+       unsigned long flags;
+
+       spin_lock_irqsave(&session->lock, flags);
+       if (session->conn_cnt == 1 || session->leadconn == conn)
+               session->state = ISCSI_STATE_FAILED;
+       spin_unlock_irqrestore(&session->lock, flags);
+       set_bit(SUSPEND_BIT, &conn->suspend_tx);
+       set_bit(SUSPEND_BIT, &conn->suspend_rx);
+       iscsi_conn_error(conn->cls_conn, err);
+}
+
 static inline int
-iscsi_hdr_extract(struct iscsi_tcp_conn *tcp_conn)
+iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
 {
-       struct sk_buff *skb = tcp_conn->in.skb;
+       uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
+       uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
+
+       if (max_cmdsn < exp_cmdsn -1 &&
+           max_cmdsn > exp_cmdsn - INVALID_SN_DELTA)
+               return ISCSI_ERR_MAX_CMDSN;
+       if (max_cmdsn > session->max_cmdsn ||
+           max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA)
+               session->max_cmdsn = max_cmdsn;
+       if (exp_cmdsn > session->exp_cmdsn ||
+           exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA)
+               session->exp_cmdsn = exp_cmdsn;
+
+       return 0;
+}
 
-       tcp_conn->in.zero_copy_hdr = 0;
+static inline int
+iscsi_hdr_extract(struct iscsi_conn *conn)
+{
+       struct sk_buff *skb = conn->in.skb;
 
-       if (tcp_conn->in.copy >= tcp_conn->hdr_size &&
-           tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER) {
+       if (conn->in.copy >= conn->hdr_size &&
+           conn->in_progress == IN_PROGRESS_WAIT_HEADER) {
                /*
                 * Zero-copy PDU Header: using connection context
                 * to store header pointer.
                 */
                if (skb_shinfo(skb)->frag_list == NULL &&
-                   !skb_shinfo(skb)->nr_frags) {
-                       tcp_conn->in.hdr = (struct iscsi_hdr *)
-                               ((char*)skb->data + tcp_conn->in.offset);
-                       tcp_conn->in.zero_copy_hdr = 1;
-               } else {
+                   !skb_shinfo(skb)->nr_frags)
+                       conn->in.hdr = (struct iscsi_hdr *)
+                               ((char*)skb->data + conn->in.offset);
+               else {
                        /* ignoring return code since we checked
                         * in.copy before */
-                       skb_copy_bits(skb, tcp_conn->in.offset,
-                               &tcp_conn->hdr, tcp_conn->hdr_size);
-                       tcp_conn->in.hdr = &tcp_conn->hdr;
+                       skb_copy_bits(skb, conn->in.offset,
+                               &conn->hdr, conn->hdr_size);
+                       conn->in.hdr = &conn->hdr;
                }
-               tcp_conn->in.offset += tcp_conn->hdr_size;
-               tcp_conn->in.copy -= tcp_conn->hdr_size;
+               conn->in.offset += conn->hdr_size;
+               conn->in.copy -= conn->hdr_size;
        } else {
                int hdr_remains;
                int copylen;
@@ -151,51 +201,118 @@ iscsi_hdr_extract(struct iscsi_tcp_conn *tcp_conn)
                 * copying it... This'll happen quite rarely.
                 */
 
-               if (tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER)
-                       tcp_conn->in.hdr_offset = 0;
+               if (conn->in_progress == IN_PROGRESS_WAIT_HEADER)
+                       conn->in.hdr_offset = 0;
 
-               hdr_remains = tcp_conn->hdr_size - tcp_conn->in.hdr_offset;
+               hdr_remains = conn->hdr_size - conn->in.hdr_offset;
                BUG_ON(hdr_remains <= 0);
 
-               copylen = min(tcp_conn->in.copy, hdr_remains);
-               skb_copy_bits(skb, tcp_conn->in.offset,
-                       (char*)&tcp_conn->hdr + tcp_conn->in.hdr_offset,
-                       copylen);
+               copylen = min(conn->in.copy, hdr_remains);
+               skb_copy_bits(skb, conn->in.offset,
+                       (char*)&conn->hdr + conn->in.hdr_offset, copylen);
 
                debug_tcp("PDU gather offset %d bytes %d in.offset %d "
-                      "in.copy %d\n", tcp_conn->in.hdr_offset, copylen,
-                      tcp_conn->in.offset, tcp_conn->in.copy);
+                      "in.copy %d\n", conn->in.hdr_offset, copylen,
+                      conn->in.offset, conn->in.copy);
 
-               tcp_conn->in.offset += copylen;
-               tcp_conn->in.copy -= copylen;
+               conn->in.offset += copylen;
+               conn->in.copy -= copylen;
                if (copylen < hdr_remains)  {
-                       tcp_conn->in_progress = IN_PROGRESS_HEADER_GATHER;
-                       tcp_conn->in.hdr_offset += copylen;
+                       conn->in_progress = IN_PROGRESS_HEADER_GATHER;
+                       conn->in.hdr_offset += copylen;
                        return -EAGAIN;
                }
-               tcp_conn->in.hdr = &tcp_conn->hdr;
-               tcp_conn->discontiguous_hdr_cnt++;
-               tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+               conn->in.hdr = &conn->hdr;
+               conn->discontiguous_hdr_cnt++;
+               conn->in_progress = IN_PROGRESS_WAIT_HEADER;
        }
 
        return 0;
 }
 
-/*
- * must be called with session lock
- */
-static void
-__iscsi_ctask_cleanup(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static inline void
+iscsi_ctask_cleanup(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct scsi_cmnd *sc;
+       struct scsi_cmnd *sc = ctask->sc;
+       struct iscsi_session *session = conn->session;
 
-       sc = ctask->sc;
-       if (unlikely(!sc))
+       spin_lock(&session->lock);
+       if (unlikely(!sc)) {
+               spin_unlock(&session->lock);
                return;
+       }
+       if (sc->sc_data_direction == DMA_TO_DEVICE) {
+               struct iscsi_data_task *dtask, *n;
+               /* WRITE: cleanup Data-Out's if any */
+               list_for_each_entry_safe(dtask, n, &ctask->dataqueue, item) {
+                       list_del(&dtask->item);
+                       mempool_free(dtask, ctask->datapool);
+               }
+       }
+       ctask->xmstate = XMSTATE_IDLE;
+       ctask->r2t = NULL;
+       ctask->sc = NULL;
+       __kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+       spin_unlock(&session->lock);
+}
+
+/**
+ * iscsi_cmd_rsp - SCSI Command Response processing
+ * @conn: iscsi connection
+ * @ctask: scsi command task
+ **/
+static int
+iscsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+{
+       int rc;
+       struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)conn->in.hdr;
+       struct iscsi_session *session = conn->session;
+       struct scsi_cmnd *sc = ctask->sc;
+
+       rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+       if (rc) {
+               sc->result = (DID_ERROR << 16);
+               goto out;
+       }
+
+       conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+
+       sc->result = (DID_OK << 16) | rhdr->cmd_status;
+
+       if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) {
+               sc->result = (DID_ERROR << 16);
+               goto out;
+       }
+
+       if (rhdr->cmd_status == SAM_STAT_CHECK_CONDITION && conn->senselen) {
+               int sensecopy = min(conn->senselen, SCSI_SENSE_BUFFERSIZE);
 
-       tcp_ctask->xmstate = XMSTATE_IDLE;
-       tcp_ctask->r2t = NULL;
+               memcpy(sc->sense_buffer, conn->data + 2, sensecopy);
+               debug_scsi("copied %d bytes of sense\n", sensecopy);
+       }
+
+       if (sc->sc_data_direction == DMA_TO_DEVICE)
+               goto out;
+
+       if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+               int res_count = be32_to_cpu(rhdr->residual_count);
+
+               if (res_count > 0 && res_count <= sc->request_bufflen)
+                       sc->resid = res_count;
+               else
+                       sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+       } else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW)
+               sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+       else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW)
+               sc->resid = be32_to_cpu(rhdr->residual_count);
+
+out:
+       debug_scsi("done [sc %lx res %d itt 0x%x]\n",
+                  (long)sc, sc->result, ctask->itt);
+       conn->scsirsp_pdus_cnt++;
+       iscsi_ctask_cleanup(conn, ctask);
+       sc->scsi_done(sc);
+       return rc;
 }
 
 /**
@@ -207,9 +324,7 @@ static int
 iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
        int rc;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
+       struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)conn->in.hdr;
        struct iscsi_session *session = conn->session;
        int datasn = be32_to_cpu(rhdr->datasn);
 
@@ -219,9 +334,9 @@ iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
        /*
         * setup Data-In byte counter (gets decremented..)
         */
-       ctask->data_count = tcp_conn->in.datalen;
+       ctask->data_count = conn->in.datalen;
 
-       if (tcp_conn->in.datalen == 0)
+       if (conn->in.datalen == 0)
                return 0;
 
        if (ctask->datasn != datasn)
@@ -229,8 +344,8 @@ iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
        ctask->datasn++;
 
-       tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
-       if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length)
+       ctask->data_offset = be32_to_cpu(rhdr->offset);
+       if (ctask->data_offset + conn->in.datalen > ctask->total_length)
                return ISCSI_ERR_DATA_OFFSET;
 
        if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
@@ -275,17 +390,19 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
                        struct iscsi_r2t_info *r2t)
 {
        struct iscsi_data *hdr;
+       struct iscsi_data_task *dtask;
        struct scsi_cmnd *sc = ctask->sc;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 
-       hdr = &r2t->dtask.hdr;
+       dtask = mempool_alloc(ctask->datapool, GFP_ATOMIC);
+       BUG_ON(!dtask);
+       hdr = &dtask->hdr;
        memset(hdr, 0, sizeof(struct iscsi_data));
        hdr->ttt = r2t->ttt;
        hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
        r2t->solicit_datasn++;
        hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
-       memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
-       hdr->itt = ctask->hdr->itt;
+       memcpy(hdr->lun, ctask->hdr.lun, sizeof(hdr->lun));
+       hdr->itt = ctask->hdr.itt;
        hdr->exp_statsn = r2t->exp_statsn;
        hdr->offset = cpu_to_be32(r2t->data_offset);
        if (r2t->data_length > conn->max_xmit_dlength) {
@@ -301,9 +418,11 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
 
        r2t->sent = 0;
 
-       iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
+       iscsi_buf_init_virt(&r2t->headbuf, (char*)hdr,
                           sizeof(struct iscsi_hdr));
 
+       r2t->dtask = dtask;
+
        if (sc->use_sg) {
                int i, sg_count = 0;
                struct scatterlist *sg = sc->request_buffer;
@@ -332,9 +451,11 @@ iscsi_solicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
                }
                BUG_ON(r2t->sg == NULL);
        } else
-               iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+               iscsi_buf_init_iov(&ctask->sendbuf,
                            (char*)sc->request_buffer + r2t->data_offset,
                            r2t->data_count);
+
+       list_add(&dtask->item, &ctask->dataqueue);
 }
 
 /**
@@ -347,16 +468,17 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
        struct iscsi_r2t_info *r2t;
        struct iscsi_session *session = conn->session;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+       struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)conn->in.hdr;
        int r2tsn = be32_to_cpu(rhdr->r2tsn);
        int rc;
 
-       if (tcp_conn->in.datalen)
+       if (conn->in.ahslen)
+               return ISCSI_ERR_AHSLEN;
+
+       if (conn->in.datalen)
                return ISCSI_ERR_DATALEN;
 
-       if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
+       if (ctask->exp_r2tsn && ctask->exp_r2tsn != r2tsn)
                return ISCSI_ERR_R2TSN;
 
        rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
@@ -374,7 +496,7 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
                spin_unlock(&session->lock);
                return 0;
        }
-       rc = __kfifo_get(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
+       rc = __kfifo_get(ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
        BUG_ON(!rc);
 
        r2t->exp_statsn = rhdr->statsn;
@@ -396,10 +518,10 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 
        iscsi_solicit_data_init(conn, ctask, r2t);
 
-       tcp_ctask->exp_r2tsn = r2tsn + 1;
-       tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
-       __kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
-       __kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
+       ctask->exp_r2tsn = r2tsn + 1;
+       ctask->xmstate |= XMSTATE_SOL_HDR;
+       __kfifo_put(ctask->r2tqueue, (void*)&r2t, sizeof(void*));
+       __kfifo_put(conn->writequeue, (void*)&ctask, sizeof(void*));
 
        scsi_queue_work(session->host, &conn->xmitwork);
        conn->r2t_pdus_cnt++;
@@ -409,136 +531,258 @@ iscsi_r2t_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 }
 
 static int
-iscsi_tcp_hdr_recv(struct iscsi_conn *conn)
+iscsi_hdr_recv(struct iscsi_conn *conn)
 {
-       int rc = 0, opcode, ahslen;
+       int rc = 0;
        struct iscsi_hdr *hdr;
+       struct iscsi_cmd_task *ctask;
        struct iscsi_session *session = conn->session;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       uint32_t cdgst, rdgst = 0, itt;
+       uint32_t cdgst, rdgst = 0;
 
-       hdr = tcp_conn->in.hdr;
+       hdr = conn->in.hdr;
 
        /* verify PDU length */
-       tcp_conn->in.datalen = ntoh24(hdr->dlength);
-       if (tcp_conn->in.datalen > conn->max_recv_dlength) {
+       conn->in.datalen = ntoh24(hdr->dlength);
+       if (conn->in.datalen > conn->max_recv_dlength) {
                printk(KERN_ERR "iscsi_tcp: datalen %d > %d\n",
-                      tcp_conn->in.datalen, conn->max_recv_dlength);
+                      conn->in.datalen, conn->max_recv_dlength);
                return ISCSI_ERR_DATALEN;
        }
-       tcp_conn->data_copied = 0;
+       conn->data_copied = 0;
 
        /* read AHS */
-       ahslen = hdr->hlength << 2;
-       tcp_conn->in.offset += ahslen;
-       tcp_conn->in.copy -= ahslen;
-       if (tcp_conn->in.copy < 0) {
+       conn->in.ahslen = hdr->hlength * 4;
+       conn->in.offset += conn->in.ahslen;
+       conn->in.copy -= conn->in.ahslen;
+       if (conn->in.copy < 0) {
                printk(KERN_ERR "iscsi_tcp: can't handle AHS with length "
-                      "%d bytes\n", ahslen);
+                      "%d bytes\n", conn->in.ahslen);
                return ISCSI_ERR_AHSLEN;
        }
 
        /* calculate read padding */
-       tcp_conn->in.padding = tcp_conn->in.datalen & (ISCSI_PAD_LEN-1);
-       if (tcp_conn->in.padding) {
-               tcp_conn->in.padding = ISCSI_PAD_LEN - tcp_conn->in.padding;
-               debug_scsi("read padding %d bytes\n", tcp_conn->in.padding);
+       conn->in.padding = conn->in.datalen & (ISCSI_PAD_LEN-1);
+       if (conn->in.padding) {
+               conn->in.padding = ISCSI_PAD_LEN - conn->in.padding;
+               debug_scsi("read padding %d bytes\n", conn->in.padding);
        }
 
        if (conn->hdrdgst_en) {
                struct scatterlist sg;
 
                sg_init_one(&sg, (u8 *)hdr,
-                           sizeof(struct iscsi_hdr) + ahslen);
-               crypto_digest_digest(tcp_conn->rx_tfm, &sg, 1, (u8 *)&cdgst);
+                           sizeof(struct iscsi_hdr) + conn->in.ahslen);
+               crypto_digest_digest(conn->rx_tfm, &sg, 1, (u8 *)&cdgst);
                rdgst = *(uint32_t*)((char*)hdr + sizeof(struct iscsi_hdr) +
-                                    ahslen);
+                                    conn->in.ahslen);
                if (cdgst != rdgst) {
-                       printk(KERN_ERR "iscsi_tcp: hdrdgst error "
-                              "recv 0x%x calc 0x%x\n", rdgst, cdgst);
+                       printk(KERN_ERR "iscsi_tcp: itt %x: hdrdgst error "
+                              "recv 0x%x calc 0x%x\n", conn->in.itt, rdgst,
+                              cdgst);
                        return ISCSI_ERR_HDR_DGST;
                }
        }
 
-       opcode = hdr->opcode & ISCSI_OPCODE_MASK;
+       /* save opcode for later */
+       conn->in.opcode = hdr->opcode & ISCSI_OPCODE_MASK;
+
        /* verify itt (itt encoding: age+cid+itt) */
-       rc = iscsi_verify_itt(conn, hdr, &itt);
-       if (rc == ISCSI_ERR_NO_SCSI_CMD) {
-               tcp_conn->in.datalen = 0; /* force drop */
-               return 0;
-       } else if (rc)
-               return rc;
+       if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
+               if ((hdr->itt & AGE_MASK) !=
+                               (session->age << AGE_SHIFT)) {
+                       printk(KERN_ERR "iscsi_tcp: received itt %x expected "
+                               "session age (%x)\n", hdr->itt,
+                               session->age & AGE_MASK);
+                       return ISCSI_ERR_BAD_ITT;
+               }
+
+               if ((hdr->itt & CID_MASK) != (conn->id << CID_SHIFT)) {
+                       printk(KERN_ERR "iscsi_tcp: received itt %x, expected "
+                               "CID (%x)\n", hdr->itt, conn->id);
+                       return ISCSI_ERR_BAD_ITT;
+               }
+               conn->in.itt = hdr->itt & ITT_MASK;
+       } else
+               conn->in.itt = hdr->itt;
 
        debug_tcp("opcode 0x%x offset %d copy %d ahslen %d datalen %d\n",
-                 opcode, tcp_conn->in.offset, tcp_conn->in.copy,
-                 ahslen, tcp_conn->in.datalen);
+                 hdr->opcode, conn->in.offset, conn->in.copy,
+                 conn->in.ahslen, conn->in.datalen);
 
-       switch(opcode) {
-       case ISCSI_OP_SCSI_DATA_IN:
-               tcp_conn->in.ctask = session->cmds[itt];
-               rc = iscsi_data_rsp(conn, tcp_conn->in.ctask);
-               /* fall through */
-       case ISCSI_OP_SCSI_CMD_RSP:
-               tcp_conn->in.ctask = session->cmds[itt];
-               if (tcp_conn->in.datalen)
-                       goto copy_hdr;
-
-               spin_lock(&session->lock);
-               __iscsi_ctask_cleanup(conn, tcp_conn->in.ctask);
-               rc = __iscsi_complete_pdu(conn, hdr, NULL, 0);
-               spin_unlock(&session->lock);
-               break;
-       case ISCSI_OP_R2T:
-               tcp_conn->in.ctask = session->cmds[itt];
-               if (ahslen)
-                       rc = ISCSI_ERR_AHSLEN;
-               else if (tcp_conn->in.ctask->sc->sc_data_direction ==
-                                                               DMA_TO_DEVICE)
-                       rc = iscsi_r2t_rsp(conn, tcp_conn->in.ctask);
-               else
-                       rc = ISCSI_ERR_PROTO;
-               break;
-       case ISCSI_OP_LOGIN_RSP:
-       case ISCSI_OP_TEXT_RSP:
-       case ISCSI_OP_LOGOUT_RSP:
-       case ISCSI_OP_NOOP_IN:
-       case ISCSI_OP_REJECT:
-       case ISCSI_OP_ASYNC_EVENT:
-               if (tcp_conn->in.datalen)
-                       goto copy_hdr;
-       /* fall through */
-       case ISCSI_OP_SCSI_TMFUNC_RSP:
-               rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
-               break;
-       default:
-               rc = ISCSI_ERR_BAD_OPCODE;
-               break;
-       }
+       if (conn->in.itt < session->cmds_max) {
+               ctask = (struct iscsi_cmd_task *)session->cmds[conn->in.itt];
 
-       return rc;
+               if (!ctask->sc) {
+                       printk(KERN_INFO "iscsi_tcp: dropping ctask with "
+                              "itt 0x%x\n", ctask->itt);
+                       conn->in.datalen = 0; /* force drop */
+                       return 0;
+               }
 
-copy_hdr:
-       /*
-        * if we did zero copy for the header but we will need multiple
-        * skbs to complete the command then we have to copy the header
-        * for later use
-        */
-       if (tcp_conn->in.zero_copy_hdr && tcp_conn->in.copy <
-          (tcp_conn->in.datalen + tcp_conn->in.padding +
-           (conn->datadgst_en ? 4 : 0))) {
-               debug_tcp("Copying header for later use. in.copy %d in.datalen"
-                         " %d\n", tcp_conn->in.copy, tcp_conn->in.datalen);
-               memcpy(&tcp_conn->hdr, tcp_conn->in.hdr,
-                      sizeof(struct iscsi_hdr));
-               tcp_conn->in.hdr = &tcp_conn->hdr;
-               tcp_conn->in.zero_copy_hdr = 0;
-       }
-       return 0;
+               if (ctask->sc->SCp.phase != session->age) {
+                       printk(KERN_ERR "iscsi_tcp: ctask's session age %d, "
+                               "expected %d\n", ctask->sc->SCp.phase,
+                               session->age);
+                       return ISCSI_ERR_SESSION_FAILED;
+               }
+
+               conn->in.ctask = ctask;
+
+               debug_scsi("rsp [op 0x%x cid %d sc %lx itt 0x%x len %d]\n",
+                          hdr->opcode, conn->id, (long)ctask->sc,
+                          ctask->itt, conn->in.datalen);
+
+               switch(conn->in.opcode) {
+               case ISCSI_OP_SCSI_CMD_RSP:
+                       BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+                       if (!conn->in.datalen)
+                               rc = iscsi_cmd_rsp(conn, ctask);
+                       else
+                               /*
+                                * got sense or response data; copying PDU
+                                * Header to the connection's header
+                                * placeholder
+                                */
+                               memcpy(&conn->hdr, hdr,
+                                      sizeof(struct iscsi_hdr));
+                       break;
+               case ISCSI_OP_SCSI_DATA_IN:
+                       BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+                       /* save flags for non-exceptional status */
+                       conn->in.flags = hdr->flags;
+                       /* save cmd_status for sense data */
+                       conn->in.cmd_status =
+                               ((struct iscsi_data_rsp*)hdr)->cmd_status;
+                       rc = iscsi_data_rsp(conn, ctask);
+                       break;
+               case ISCSI_OP_R2T:
+                       BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+                       if (ctask->sc->sc_data_direction == DMA_TO_DEVICE)
+                               rc = iscsi_r2t_rsp(conn, ctask);
+                       else
+                               rc = ISCSI_ERR_PROTO;
+                       break;
+               default:
+                       rc = ISCSI_ERR_BAD_OPCODE;
+                       break;
+               }
+       } else if (conn->in.itt >= ISCSI_MGMT_ITT_OFFSET &&
+                  conn->in.itt < ISCSI_MGMT_ITT_OFFSET +
+                                       session->mgmtpool_max) {
+               struct iscsi_mgmt_task *mtask = (struct iscsi_mgmt_task *)
+                                       session->mgmt_cmds[conn->in.itt -
+                                               ISCSI_MGMT_ITT_OFFSET];
+
+               debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n",
+                          conn->in.opcode, conn->id, mtask->itt,
+                          conn->in.datalen);
+
+               switch(conn->in.opcode) {
+               case ISCSI_OP_LOGIN_RSP:
+               case ISCSI_OP_TEXT_RSP:
+               case ISCSI_OP_LOGOUT_RSP:
+                       rc = iscsi_check_assign_cmdsn(session,
+                                                (struct iscsi_nopin*)hdr);
+                       if (rc)
+                               break;
+
+                       if (!conn->in.datalen) {
+                               rc = iscsi_recv_pdu(conn->cls_conn, hdr,
+                                                   NULL, 0);
+                               if (conn->login_mtask != mtask) {
+                                       spin_lock(&session->lock);
+                                       __kfifo_put(session->mgmtpool.queue,
+                                           (void*)&mtask, sizeof(void*));
+                                       spin_unlock(&session->lock);
+                               }
+                       }
+                       break;
+               case ISCSI_OP_SCSI_TMFUNC_RSP:
+                       rc = iscsi_check_assign_cmdsn(session,
+                                                (struct iscsi_nopin*)hdr);
+                       if (rc)
+                               break;
+
+                       if (conn->in.datalen || conn->in.ahslen) {
+                               rc = ISCSI_ERR_PROTO;
+                               break;
+                       }
+                       conn->tmfrsp_pdus_cnt++;
+                       spin_lock(&session->lock);
+                       if (conn->tmabort_state == TMABORT_INITIAL) {
+                               __kfifo_put(session->mgmtpool.queue,
+                                               (void*)&mtask, sizeof(void*));
+                               conn->tmabort_state =
+                                       ((struct iscsi_tm_rsp *)hdr)->
+                                       response == ISCSI_TMF_RSP_COMPLETE ?
+                                               TMABORT_SUCCESS:TMABORT_FAILED;
+                               /* unblock eh_abort() */
+                               wake_up(&conn->ehwait);
+                       }
+                       spin_unlock(&session->lock);
+                       break;
+               case ISCSI_OP_NOOP_IN:
+                       if (hdr->ttt != ISCSI_RESERVED_TAG) {
+                               rc = ISCSI_ERR_PROTO;
+                               break;
+                       }
+                       rc = iscsi_check_assign_cmdsn(session,
+                                               (struct iscsi_nopin*)hdr);
+                       if (rc)
+                               break;
+                       conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
+
+                       if (!conn->in.datalen) {
+                               struct iscsi_mgmt_task *mtask;
+
+                               rc = iscsi_recv_pdu(conn->cls_conn, hdr,
+                                                   NULL, 0);
+                               mtask = (struct iscsi_mgmt_task *)
+                                       session->mgmt_cmds[conn->in.itt -
+                                                       ISCSI_MGMT_ITT_OFFSET];
+                               if (conn->login_mtask != mtask) {
+                                       spin_lock(&session->lock);
+                                       __kfifo_put(session->mgmtpool.queue,
+                                                 (void*)&mtask, sizeof(void*));
+                                       spin_unlock(&session->lock);
+                               }
+                       }
+                       break;
+               default:
+                       rc = ISCSI_ERR_BAD_OPCODE;
+                       break;
+               }
+       } else if (conn->in.itt == ISCSI_RESERVED_TAG) {
+               switch(conn->in.opcode) {
+               case ISCSI_OP_NOOP_IN:
+                       if (!conn->in.datalen) {
+                               rc = iscsi_check_assign_cmdsn(session,
+                                                (struct iscsi_nopin*)hdr);
+                               if (!rc && hdr->ttt != ISCSI_RESERVED_TAG)
+                                       rc = iscsi_recv_pdu(conn->cls_conn,
+                                                           hdr, NULL, 0);
+                       } else
+                               rc = ISCSI_ERR_PROTO;
+                       break;
+               case ISCSI_OP_REJECT:
+                       /* we need sth like iscsi_reject_rsp()*/
+               case ISCSI_OP_ASYNC_EVENT:
+                       /* we need sth like iscsi_async_event_rsp() */
+                       rc = ISCSI_ERR_BAD_OPCODE;
+                       break;
+               default:
+                       rc = ISCSI_ERR_BAD_OPCODE;
+                       break;
+               }
+       } else
+               rc = ISCSI_ERR_BAD_ITT;
+
+       return rc;
 }
 
 /**
  * iscsi_ctask_copy - copy skb bits to the destanation cmd task
- * @conn: iscsi tcp connection
+ * @conn: iscsi connection
  * @ctask: scsi command task
  * @buf: buffer to copy to
  * @buf_size: size of buffer
@@ -560,113 +804,110 @@ copy_hdr:
  *     buf_left                left to copy from in progress buffer
  **/
 static inline int
-iscsi_ctask_copy(struct iscsi_tcp_conn *tcp_conn, struct iscsi_cmd_task *ctask,
+iscsi_ctask_copy(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
                void *buf, int buf_size, int offset)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       int buf_left = buf_size - (tcp_conn->data_copied + offset);
-       int size = min(tcp_conn->in.copy, buf_left);
+       int buf_left = buf_size - (conn->data_copied + offset);
+       int size = min(conn->in.copy, buf_left);
        int rc;
 
        size = min(size, ctask->data_count);
 
        debug_tcp("ctask_copy %d bytes at offset %d copied %d\n",
-              size, tcp_conn->in.offset, tcp_conn->in.copied);
+              size, conn->in.offset, conn->in.copied);
 
        BUG_ON(size <= 0);
-       BUG_ON(tcp_ctask->sent + size > ctask->total_length);
+       BUG_ON(ctask->sent + size > ctask->total_length);
 
-       rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
-                          (char*)buf + (offset + tcp_conn->data_copied), size);
+       rc = skb_copy_bits(conn->in.skb, conn->in.offset,
+                          (char*)buf + (offset + conn->data_copied), size);
        /* must fit into skb->len */
        BUG_ON(rc);
 
-       tcp_conn->in.offset += size;
-       tcp_conn->in.copy -= size;
-       tcp_conn->in.copied += size;
-       tcp_conn->data_copied += size;
-       tcp_ctask->sent += size;
+       conn->in.offset += size;
+       conn->in.copy -= size;
+       conn->in.copied += size;
+       conn->data_copied += size;
+       ctask->sent += size;
        ctask->data_count -= size;
 
-       BUG_ON(tcp_conn->in.copy < 0);
+       BUG_ON(conn->in.copy < 0);
        BUG_ON(ctask->data_count < 0);
 
-       if (buf_size != (tcp_conn->data_copied + offset)) {
+       if (buf_size != (conn->data_copied + offset)) {
                if (!ctask->data_count) {
-                       BUG_ON(buf_size - tcp_conn->data_copied < 0);
+                       BUG_ON(buf_size - conn->data_copied < 0);
                        /* done with this PDU */
-                       return buf_size - tcp_conn->data_copied;
+                       return buf_size - conn->data_copied;
                }
                return -EAGAIN;
        }
 
        /* done with this buffer or with both - PDU and buffer */
-       tcp_conn->data_copied = 0;
+       conn->data_copied = 0;
        return 0;
 }
 
 /**
  * iscsi_tcp_copy - copy skb bits to the destanation buffer
- * @conn: iscsi tcp connection
+ * @conn: iscsi connection
+ * @buf: buffer to copy to
+ * @buf_size: number of bytes to copy
  *
  * Notes:
  *     The function calls skb_copy_bits() and updates per-connection
  *     byte counters.
  **/
 static inline int
-iscsi_tcp_copy(struct iscsi_tcp_conn *tcp_conn)
+iscsi_tcp_copy(struct iscsi_conn *conn, void *buf, int buf_size)
 {
-       void *buf = tcp_conn->data;
-       int buf_size = tcp_conn->in.datalen;
-       int buf_left = buf_size - tcp_conn->data_copied;
-       int size = min(tcp_conn->in.copy, buf_left);
+       int buf_left = buf_size - conn->data_copied;
+       int size = min(conn->in.copy, buf_left);
        int rc;
 
        debug_tcp("tcp_copy %d bytes at offset %d copied %d\n",
-              size, tcp_conn->in.offset, tcp_conn->data_copied);
+              size, conn->in.offset, conn->data_copied);
        BUG_ON(size <= 0);
 
-       rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
-                          (char*)buf + tcp_conn->data_copied, size);
+       rc = skb_copy_bits(conn->in.skb, conn->in.offset,
+                          (char*)buf + conn->data_copied, size);
        BUG_ON(rc);
 
-       tcp_conn->in.offset += size;
-       tcp_conn->in.copy -= size;
-       tcp_conn->in.copied += size;
-       tcp_conn->data_copied += size;
+       conn->in.offset += size;
+       conn->in.copy -= size;
+       conn->in.copied += size;
+       conn->data_copied += size;
 
-       if (buf_size != tcp_conn->data_copied)
+       if (buf_size != conn->data_copied)
                return -EAGAIN;
 
        return 0;
 }
 
 static inline void
-partial_sg_digest_update(struct iscsi_tcp_conn *tcp_conn,
-                        struct scatterlist *sg, int offset, int length)
+partial_sg_digest_update(struct iscsi_conn *conn, struct scatterlist *sg,
+                        int offset, int length)
 {
        struct scatterlist temp;
 
        memcpy(&temp, sg, sizeof(struct scatterlist));
        temp.offset = offset;
        temp.length = length;
-       crypto_digest_update(tcp_conn->data_rx_tfm, &temp, 1);
+       crypto_digest_update(conn->data_rx_tfm, &temp, 1);
 }
 
 static void
-iscsi_recv_digest_update(struct iscsi_tcp_conn *tcp_conn, char* buf, int len)
+iscsi_recv_digest_update(struct iscsi_conn *conn, char* buf, int len)
 {
        struct scatterlist tmp;
 
        sg_init_one(&tmp, buf, len);
-       crypto_digest_update(tcp_conn->data_rx_tfm, &tmp, 1);
+       crypto_digest_update(conn->data_rx_tfm, &tmp, 1);
 }
 
 static int iscsi_scsi_data_in(struct iscsi_conn *conn)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_cmd_task *ctask = tcp_conn->in.ctask;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+       struct iscsi_cmd_task *ctask = conn->in.ctask;
        struct scsi_cmnd *sc = ctask->sc;
        struct scatterlist *sg;
        int i, offset, rc = 0;
@@ -678,33 +919,31 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
         */
        if (!sc->use_sg) {
                i = ctask->data_count;
-               rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer,
-                                     sc->request_bufflen,
-                                     tcp_ctask->data_offset);
+               rc = iscsi_ctask_copy(conn, ctask, sc->request_buffer,
+                                     sc->request_bufflen, ctask->data_offset);
                if (rc == -EAGAIN)
                        return rc;
                if (conn->datadgst_en)
-                       iscsi_recv_digest_update(tcp_conn, sc->request_buffer,
-                                                i);
+                       iscsi_recv_digest_update(conn, sc->request_buffer, i);
                rc = 0;
                goto done;
        }
 
-       offset = tcp_ctask->data_offset;
+       offset = ctask->data_offset;
        sg = sc->request_buffer;
 
-       if (tcp_ctask->data_offset)
-               for (i = 0; i < tcp_ctask->sg_count; i++)
+       if (ctask->data_offset)
+               for (i = 0; i < ctask->sg_count; i++)
                        offset -= sg[i].length;
        /* we've passed through partial sg*/
        if (offset < 0)
                offset = 0;
 
-       for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) {
+       for (i = ctask->sg_count; i < sc->use_sg; i++) {
                char *dest;
 
                dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
-               rc = iscsi_ctask_copy(tcp_conn, ctask, dest + sg[i].offset,
+               rc = iscsi_ctask_copy(conn, ctask, dest + sg[i].offset,
                                      sg[i].length, offset);
                kunmap_atomic(dest, KM_SOFTIRQ0);
                if (rc == -EAGAIN)
@@ -713,17 +952,15 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
                if (!rc) {
                        if (conn->datadgst_en) {
                                if (!offset)
-                                       crypto_digest_update(
-                                                       tcp_conn->data_rx_tfm,
-                                                       &sg[i], 1);
+                                       crypto_digest_update(conn->data_rx_tfm,
+                                                            &sg[i], 1);
                                else
-                                       partial_sg_digest_update(tcp_conn,
-                                                       &sg[i],
+                                       partial_sg_digest_update(conn, &sg[i],
                                                        sg[i].offset + offset,
                                                        sg[i].length - offset);
                        }
                        offset = 0;
-                       tcp_ctask->sg_count++;
+                       ctask->sg_count++;
                }
 
                if (!ctask->data_count) {
@@ -731,26 +968,25 @@ static int iscsi_scsi_data_in(struct iscsi_conn *conn)
                                /*
                                 * data-in is complete, but buffer not...
                                 */
-                               partial_sg_digest_update(tcp_conn, &sg[i],
+                               partial_sg_digest_update(conn, &sg[i],
                                                sg[i].offset, sg[i].length-rc);
                        rc = 0;
                        break;
                }
 
-               if (!tcp_conn->in.copy)
+               if (!conn->in.copy)
                        return -EAGAIN;
        }
        BUG_ON(ctask->data_count);
 
 done:
        /* check for non-exceptional status */
-       if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+       if (conn->in.flags & ISCSI_FLAG_DATA_STATUS) {
                debug_scsi("done [sc %lx res %d itt 0x%x]\n",
                           (long)sc, sc->result, ctask->itt);
-               spin_lock(&conn->session->lock);
-               __iscsi_ctask_cleanup(conn, ctask);
-               __iscsi_complete_pdu(conn, tcp_conn->in.hdr, NULL, 0);
-               spin_unlock(&conn->session->lock);
+               conn->scsirsp_pdus_cnt++;
+               iscsi_ctask_cleanup(conn, ctask);
+               sc->scsi_done(sc);
        }
 
        return rc;
@@ -759,38 +995,71 @@ done:
 static int
 iscsi_data_recv(struct iscsi_conn *conn)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       int rc = 0, opcode;
+       struct iscsi_session *session = conn->session;
+       int rc = 0;
 
-       opcode = tcp_conn->in.hdr->opcode & ISCSI_OPCODE_MASK;
-       switch (opcode) {
+       switch(conn->in.opcode) {
        case ISCSI_OP_SCSI_DATA_IN:
                rc = iscsi_scsi_data_in(conn);
                break;
-       case ISCSI_OP_SCSI_CMD_RSP:
-               spin_lock(&conn->session->lock);
-               __iscsi_ctask_cleanup(conn, tcp_conn->in.ctask);
-               spin_unlock(&conn->session->lock);
+       case ISCSI_OP_SCSI_CMD_RSP: {
+               /*
+                * SCSI Sense Data:
+                * copying the entire Data Segment.
+                */
+               if (iscsi_tcp_copy(conn, conn->data, conn->in.datalen)) {
+                       rc = -EAGAIN;
+                       goto exit;
+               }
+
+               /*
+                * check for sense
+                */
+               conn->in.hdr = &conn->hdr;
+               conn->senselen = (conn->data[0] << 8) | conn->data[1];
+               rc = iscsi_cmd_rsp(conn, conn->in.ctask);
+               if (!rc && conn->datadgst_en)
+                       iscsi_recv_digest_update(conn, conn->data,
+                                                conn->in.datalen);
+       }
+       break;
        case ISCSI_OP_TEXT_RSP:
        case ISCSI_OP_LOGIN_RSP:
-       case ISCSI_OP_NOOP_IN:
-       case ISCSI_OP_ASYNC_EVENT:
-       case ISCSI_OP_REJECT:
+       case ISCSI_OP_NOOP_IN: {
+               struct iscsi_mgmt_task *mtask = NULL;
+
+               if (conn->in.itt != ISCSI_RESERVED_TAG)
+                       mtask = (struct iscsi_mgmt_task *)
+                               session->mgmt_cmds[conn->in.itt -
+                                       ISCSI_MGMT_ITT_OFFSET];
+
                /*
                 * Collect data segment to the connection's data
                 * placeholder
                 */
-               if (iscsi_tcp_copy(tcp_conn)) {
+               if (iscsi_tcp_copy(conn, conn->data, conn->in.datalen)) {
                        rc = -EAGAIN;
                        goto exit;
                }
 
-               rc = iscsi_complete_pdu(conn, tcp_conn->in.hdr, tcp_conn->data,
-                                       tcp_conn->in.datalen);
-               if (!rc && conn->datadgst_en && opcode != ISCSI_OP_LOGIN_RSP)
-                       iscsi_recv_digest_update(tcp_conn, tcp_conn->data,
-                                               tcp_conn->in.datalen);
-               break;
+               rc = iscsi_recv_pdu(conn->cls_conn, conn->in.hdr,
+                                   conn->data, conn->in.datalen);
+
+               if (!rc && conn->datadgst_en &&
+                       conn->in.opcode != ISCSI_OP_LOGIN_RSP)
+                       iscsi_recv_digest_update(conn, conn->data,
+                                               conn->in.datalen);
+
+               if (mtask && conn->login_mtask != mtask) {
+                       spin_lock(&session->lock);
+                       __kfifo_put(session->mgmtpool.queue, (void*)&mtask,
+                                   sizeof(void*));
+                       spin_unlock(&session->lock);
+               }
+       }
+       break;
+       case ISCSI_OP_ASYNC_EVENT:
+       case ISCSI_OP_REJECT:
        default:
                BUG_ON(1);
        }
@@ -811,7 +1080,6 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
 {
        int rc;
        struct iscsi_conn *conn = rd_desc->arg.data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
        int processed;
        char pad[ISCSI_PAD_LEN];
        struct scatterlist sg;
@@ -820,15 +1088,15 @@ iscsi_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb,
         * Save current SKB and its offset in the corresponding
         * connection context.
         */
-       tcp_conn->in.copy = skb->len - offset;
-       tcp_conn->in.offset = offset;
-       tcp_conn->in.skb = skb;
-       tcp_conn->in.len = tcp_conn->in.copy;
-       BUG_ON(tcp_conn->in.copy <= 0);
-       debug_tcp("in %d bytes\n", tcp_conn->in.copy);
+       conn->in.copy = skb->len - offset;
+       conn->in.offset = offset;
+       conn->in.skb = skb;
+       conn->in.len = conn->in.copy;
+       BUG_ON(conn->in.copy <= 0);
+       debug_tcp("in %d bytes\n", conn->in.copy);
 
 more:
-       tcp_conn->in.copied = 0;
+       conn->in.copied = 0;
        rc = 0;
 
        if (unlikely(conn->suspend_rx)) {
@@ -836,9 +1104,9 @@ more:
                return 0;
        }
 
-       if (tcp_conn->in_progress == IN_PROGRESS_WAIT_HEADER ||
-           tcp_conn->in_progress == IN_PROGRESS_HEADER_GATHER) {
-               rc = iscsi_hdr_extract(tcp_conn);
+       if (conn->in_progress == IN_PROGRESS_WAIT_HEADER ||
+           conn->in_progress == IN_PROGRESS_HEADER_GATHER) {
+               rc = iscsi_hdr_extract(conn);
                if (rc) {
                       if (rc == -EAGAIN)
                                goto nomore;
@@ -851,91 +1119,90 @@ more:
                /*
                 * Verify and process incoming PDU header.
                 */
-               rc = iscsi_tcp_hdr_recv(conn);
-               if (!rc && tcp_conn->in.datalen) {
+               rc = iscsi_hdr_recv(conn);
+               if (!rc && conn->in.datalen) {
                        if (conn->datadgst_en) {
-                               BUG_ON(!tcp_conn->data_rx_tfm);
-                               crypto_digest_init(tcp_conn->data_rx_tfm);
+                               BUG_ON(!conn->data_rx_tfm);
+                               crypto_digest_init(conn->data_rx_tfm);
                        }
-                       tcp_conn->in_progress = IN_PROGRESS_DATA_RECV;
+                       conn->in_progress = IN_PROGRESS_DATA_RECV;
                } else if (rc) {
                        iscsi_conn_failure(conn, rc);
                        return 0;
                }
        }
 
-       if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) {
+       if (conn->in_progress == IN_PROGRESS_DDIGEST_RECV) {
                uint32_t recv_digest;
-
                debug_tcp("extra data_recv offset %d copy %d\n",
-                         tcp_conn->in.offset, tcp_conn->in.copy);
-               skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
+                         conn->in.offset, conn->in.copy);
+               skb_copy_bits(conn->in.skb, conn->in.offset,
                                &recv_digest, 4);
-               tcp_conn->in.offset += 4;
-               tcp_conn->in.copy -= 4;
-               if (recv_digest != tcp_conn->in.datadgst) {
+               conn->in.offset += 4;
+               conn->in.copy -= 4;
+               if (recv_digest != conn->in.datadgst) {
                        debug_tcp("iscsi_tcp: data digest error!"
                                  "0x%x != 0x%x\n", recv_digest,
-                                 tcp_conn->in.datadgst);
+                                 conn->in.datadgst);
                        iscsi_conn_failure(conn, ISCSI_ERR_DATA_DGST);
                        return 0;
                } else {
                        debug_tcp("iscsi_tcp: data digest match!"
                                  "0x%x == 0x%x\n", recv_digest,
-                                 tcp_conn->in.datadgst);
-                       tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+                                 conn->in.datadgst);
+                       conn->in_progress = IN_PROGRESS_WAIT_HEADER;
                }
        }
 
-       if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV &&
-          tcp_conn->in.copy) {
+       if (conn->in_progress == IN_PROGRESS_DATA_RECV && conn->in.copy) {
 
                debug_tcp("data_recv offset %d copy %d\n",
-                      tcp_conn->in.offset, tcp_conn->in.copy);
+                      conn->in.offset, conn->in.copy);
 
                rc = iscsi_data_recv(conn);
                if (rc) {
-                       if (rc == -EAGAIN)
+                       if (rc == -EAGAIN) {
+                               rd_desc->count = conn->in.datalen -
+                                               conn->in.ctask->data_count;
                                goto again;
+                       }
                        iscsi_conn_failure(conn, rc);
                        return 0;
                }
-               tcp_conn->in.copy -= tcp_conn->in.padding;
-               tcp_conn->in.offset += tcp_conn->in.padding;
+               conn->in.copy -= conn->in.padding;
+               conn->in.offset += conn->in.padding;
                if (conn->datadgst_en) {
-                       if (tcp_conn->in.padding) {
-                               debug_tcp("padding -> %d\n",
-                                         tcp_conn->in.padding);
-                               memset(pad, 0, tcp_conn->in.padding);
-                               sg_init_one(&sg, pad, tcp_conn->in.padding);
-                               crypto_digest_update(tcp_conn->data_rx_tfm,
-                                                    &sg, 1);
+                       if (conn->in.padding) {
+                               debug_tcp("padding -> %d\n", conn->in.padding);
+                               memset(pad, 0, conn->in.padding);
+                               sg_init_one(&sg, pad, conn->in.padding);
+                               crypto_digest_update(conn->data_rx_tfm, &sg, 1);
                        }
-                       crypto_digest_final(tcp_conn->data_rx_tfm,
-                                           (u8 *) & tcp_conn->in.datadgst);
-                       debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
-                       tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
+                       crypto_digest_final(conn->data_rx_tfm,
+                                           (u8 *) & conn->in.datadgst);
+                       debug_tcp("rx digest 0x%x\n", conn->in.datadgst);
+                       conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
                } else
-                       tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+                       conn->in_progress = IN_PROGRESS_WAIT_HEADER;
        }
 
        debug_tcp("f, processed %d from out of %d padding %d\n",
-              tcp_conn->in.offset - offset, (int)len, tcp_conn->in.padding);
-       BUG_ON(tcp_conn->in.offset - offset > len);
+              conn->in.offset - offset, (int)len, conn->in.padding);
+       BUG_ON(conn->in.offset - offset > len);
 
-       if (tcp_conn->in.offset - offset != len) {
+       if (conn->in.offset - offset != len) {
                debug_tcp("continue to process %d bytes\n",
-                      (int)len - (tcp_conn->in.offset - offset));
+                      (int)len - (conn->in.offset - offset));
                goto more;
        }
 
 nomore:
-       processed = tcp_conn->in.offset - offset;
+       processed = conn->in.offset - offset;
        BUG_ON(processed == 0);
        return processed;
 
 again:
-       processed = tcp_conn->in.offset - offset;
+       processed = conn->in.offset - offset;
        debug_tcp("c, processed %d from out of %d rd_desc_cnt %d\n",
                  processed, (int)len, (int)rd_desc->count);
        BUG_ON(processed == 0);
@@ -953,14 +1220,9 @@ iscsi_tcp_data_ready(struct sock *sk, int flag)
 
        read_lock(&sk->sk_callback_lock);
 
-       /*
-        * Use rd_desc to pass 'conn' to iscsi_tcp_data_recv.
-        * We set count to 1 because we want the network layer to
-        * hand us all the skbs that are available. iscsi_tcp_data_recv
-        * handled pdus that cross buffers or pdus that still need data.
-        */
+       /* use rd_desc to pass 'conn' to iscsi_tcp_data_recv */
        rd_desc.arg.data = conn;
-       rd_desc.count = 1;
+       rd_desc.count = 0;
        tcp_read_sock(sk, &rd_desc, iscsi_tcp_data_recv);
 
        read_unlock(&sk->sk_callback_lock);
@@ -969,7 +1231,6 @@ iscsi_tcp_data_ready(struct sock *sk, int flag)
 static void
 iscsi_tcp_state_change(struct sock *sk)
 {
-       struct iscsi_tcp_conn *tcp_conn;
        struct iscsi_conn *conn;
        struct iscsi_session *session;
        void (*old_state_change)(struct sock *);
@@ -986,8 +1247,7 @@ iscsi_tcp_state_change(struct sock *sk)
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
        }
 
-       tcp_conn = conn->dd_data;
-       old_state_change = tcp_conn->old_state_change;
+       old_state_change = conn->old_state_change;
 
        read_unlock(&sk->sk_callback_lock);
 
@@ -1002,25 +1262,23 @@ static void
 iscsi_write_space(struct sock *sk)
 {
        struct iscsi_conn *conn = (struct iscsi_conn*)sk->sk_user_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-
-       tcp_conn->old_write_space(sk);
+       conn->old_write_space(sk);
        debug_tcp("iscsi_write_space: cid %d\n", conn->id);
+       clear_bit(SUSPEND_BIT, &conn->suspend_tx);
        scsi_queue_work(conn->session->host, &conn->xmitwork);
 }
 
 static void
 iscsi_conn_set_callbacks(struct iscsi_conn *conn)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct sock *sk = tcp_conn->sock->sk;
+       struct sock *sk = conn->sock->sk;
 
        /* assign new callbacks */
        write_lock_bh(&sk->sk_callback_lock);
        sk->sk_user_data = conn;
-       tcp_conn->old_data_ready = sk->sk_data_ready;
-       tcp_conn->old_state_change = sk->sk_state_change;
-       tcp_conn->old_write_space = sk->sk_write_space;
+       conn->old_data_ready = sk->sk_data_ready;
+       conn->old_state_change = sk->sk_state_change;
+       conn->old_write_space = sk->sk_write_space;
        sk->sk_data_ready = iscsi_tcp_data_ready;
        sk->sk_state_change = iscsi_tcp_state_change;
        sk->sk_write_space = iscsi_write_space;
@@ -1030,15 +1288,14 @@ iscsi_conn_set_callbacks(struct iscsi_conn *conn)
 static void
 iscsi_conn_restore_callbacks(struct iscsi_conn *conn)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct sock *sk = tcp_conn->sock->sk;
+       struct sock *sk = conn->sock->sk;
 
        /* restore socket callbacks, see also: iscsi_conn_set_callbacks() */
        write_lock_bh(&sk->sk_callback_lock);
        sk->sk_user_data    = NULL;
-       sk->sk_data_ready   = tcp_conn->old_data_ready;
-       sk->sk_state_change = tcp_conn->old_state_change;
-       sk->sk_write_space  = tcp_conn->old_write_space;
+       sk->sk_data_ready   = conn->old_data_ready;
+       sk->sk_state_change = conn->old_state_change;
+       sk->sk_write_space  = conn->old_write_space;
        sk->sk_no_check  = 0;
        write_unlock_bh(&sk->sk_callback_lock);
 }
@@ -1053,9 +1310,8 @@ iscsi_conn_restore_callbacks(struct iscsi_conn *conn)
 static inline int
 iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct socket *sk = tcp_conn->sock;
-       int offset = buf->sg.offset + buf->sent, res;
+       struct socket *sk = conn->sock;
+       int offset = buf->sg.offset + buf->sent;
 
        /*
         * if we got use_sg=0 or are sending something we kmallocd
@@ -1066,22 +1322,9 @@ iscsi_send(struct iscsi_conn *conn, struct iscsi_buf *buf, int size, int flags)
         * slab case.
         */
        if (buf->use_sendmsg)
-               res = sock_no_sendpage(sk, buf->sg.page, offset, size, flags);
-       else
-               res = tcp_conn->sendpage(sk, buf->sg.page, offset, size, flags);
-
-       if (res >= 0) {
-               conn->txdata_octets += res;
-               buf->sent += res;
-               return res;
-       }
-
-       tcp_conn->sendpage_failures_cnt++;
-       if (res == -EAGAIN)
-               res = -ENOBUFS;
+               return sock_no_sendpage(sk, buf->sg.page, offset, size, flags);
        else
-               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-       return res;
+               return conn->sendpage(sk, buf->sg.page, offset, size, flags);
 }
 
 /**
@@ -1107,10 +1350,16 @@ iscsi_sendhdr(struct iscsi_conn *conn, struct iscsi_buf *buf, int datalen)
        res = iscsi_send(conn, buf, size, flags);
        debug_tcp("sendhdr %d bytes, sent %d res %d\n", size, buf->sent, res);
        if (res >= 0) {
+               conn->txdata_octets += res;
+               buf->sent += res;
                if (size != res)
                        return -EAGAIN;
                return 0;
-       }
+       } else if (res == -EAGAIN) {
+               conn->sendpage_failures_cnt++;
+               set_bit(SUSPEND_BIT, &conn->suspend_tx);
+       } else if (res == -EPIPE)
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 
        return res;
 }
@@ -1143,46 +1392,47 @@ iscsi_sendpage(struct iscsi_conn *conn, struct iscsi_buf *buf,
        debug_tcp("sendpage: %d bytes, sent %d left %d sent %d res %d\n",
                  size, buf->sent, *count, *sent, res);
        if (res >= 0) {
+               conn->txdata_octets += res;
+               buf->sent += res;
                *count -= res;
                *sent += res;
                if (size != res)
                        return -EAGAIN;
                return 0;
-       }
+       } else if (res == -EAGAIN) {
+               conn->sendpage_failures_cnt++;
+               set_bit(SUSPEND_BIT, &conn->suspend_tx);
+       } else if (res == -EPIPE)
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
 
        return res;
 }
 
 static inline void
-iscsi_data_digest_init(struct iscsi_tcp_conn *tcp_conn,
-                     struct iscsi_cmd_task *ctask)
+iscsi_data_digest_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-
-       BUG_ON(!tcp_conn->data_tx_tfm);
-       crypto_digest_init(tcp_conn->data_tx_tfm);
-       tcp_ctask->digest_count = 4;
+       BUG_ON(!conn->data_tx_tfm);
+       crypto_digest_init(conn->data_tx_tfm);
+       ctask->digest_count = 4;
 }
 
 static int
 iscsi_digest_final_send(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
                        struct iscsi_buf *buf, uint32_t *digest, int final)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
        int rc = 0;
        int sent = 0;
 
        if (final)
-               crypto_digest_final(tcp_conn->data_tx_tfm, (u8*)digest);
+               crypto_digest_final(conn->data_tx_tfm, (u8*)digest);
 
-       iscsi_buf_init_iov(buf, (char*)digest, 4);
-       rc = iscsi_sendpage(conn, buf, &tcp_ctask->digest_count, &sent);
+       iscsi_buf_init_virt(buf, (char*)digest, 4);
+       rc = iscsi_sendpage(conn, buf, &ctask->digest_count, &sent);
        if (rc) {
-               tcp_ctask->datadigest = *digest;
-               tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
+               ctask->datadigest = *digest;
+               ctask->xmstate |= XMSTATE_DATA_DIGEST;
        } else
-               tcp_ctask->digest_count = 4;
+               ctask->digest_count = 4;
        return rc;
 }
 
@@ -1203,19 +1453,21 @@ static void
 iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
                        struct iscsi_r2t_info *r2t, int left)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
        struct iscsi_data *hdr;
+       struct iscsi_data_task *dtask;
        struct scsi_cmnd *sc = ctask->sc;
        int new_offset;
 
-       hdr = &r2t->dtask.hdr;
+       dtask = mempool_alloc(ctask->datapool, GFP_ATOMIC);
+       BUG_ON(!dtask);
+       hdr = &dtask->hdr;
        memset(hdr, 0, sizeof(struct iscsi_data));
        hdr->ttt = r2t->ttt;
        hdr->datasn = cpu_to_be32(r2t->solicit_datasn);
        r2t->solicit_datasn++;
        hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
-       memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
-       hdr->itt = ctask->hdr->itt;
+       memcpy(hdr->lun, ctask->hdr.lun, sizeof(hdr->lun));
+       hdr->itt = ctask->hdr.itt;
        hdr->exp_statsn = r2t->exp_statsn;
        new_offset = r2t->data_offset + r2t->sent;
        hdr->offset = cpu_to_be32(new_offset);
@@ -1229,98 +1481,181 @@ iscsi_solicit_data_cont(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
        }
        conn->dataout_pdus_cnt++;
 
-       iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
+       iscsi_buf_init_virt(&r2t->headbuf, (char*)hdr,
                           sizeof(struct iscsi_hdr));
 
+       r2t->dtask = dtask;
+
        if (sc->use_sg && !iscsi_buf_left(&r2t->sendbuf)) {
-               BUG_ON(tcp_ctask->bad_sg == r2t->sg);
+               BUG_ON(ctask->bad_sg == r2t->sg);
                iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
                r2t->sg += 1;
        } else
-               iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+               iscsi_buf_init_iov(&ctask->sendbuf,
                            (char*)sc->request_buffer + new_offset,
                            r2t->data_count);
+
+       list_add(&dtask->item, &ctask->dataqueue);
 }
 
 static void
 iscsi_unsolicit_data_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+       struct iscsi_data *hdr;
        struct iscsi_data_task *dtask;
 
-       dtask = tcp_ctask->dtask = &tcp_ctask->unsol_dtask;
-       iscsi_prep_unsolicit_data_pdu(ctask, &dtask->hdr,
-                                     tcp_ctask->r2t_data_count);
-       iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)&dtask->hdr,
+       dtask = mempool_alloc(ctask->datapool, GFP_ATOMIC);
+       BUG_ON(!dtask);
+       hdr = &dtask->hdr;
+       memset(hdr, 0, sizeof(struct iscsi_data));
+       hdr->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
+       hdr->datasn = cpu_to_be32(ctask->unsol_datasn);
+       ctask->unsol_datasn++;
+       hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
+       memcpy(hdr->lun, ctask->hdr.lun, sizeof(hdr->lun));
+       hdr->itt = ctask->hdr.itt;
+       hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
+       hdr->offset = cpu_to_be32(ctask->total_length -
+                                 ctask->r2t_data_count -
+                                 ctask->unsol_count);
+       if (ctask->unsol_count > conn->max_xmit_dlength) {
+               hton24(hdr->dlength, conn->max_xmit_dlength);
+               ctask->data_count = conn->max_xmit_dlength;
+               hdr->flags = 0;
+       } else {
+               hton24(hdr->dlength, ctask->unsol_count);
+               ctask->data_count = ctask->unsol_count;
+               hdr->flags = ISCSI_FLAG_CMD_FINAL;
+       }
+
+       iscsi_buf_init_virt(&ctask->headbuf, (char*)hdr,
                           sizeof(struct iscsi_hdr));
+
+       list_add(&dtask->item, &ctask->dataqueue);
+
+       ctask->dtask = dtask;
 }
 
 /**
- * iscsi_tcp_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
+ * iscsi_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
  * @conn: iscsi connection
  * @ctask: scsi command task
  * @sc: scsi command
  **/
 static void
-iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
+iscsi_cmd_init(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
+               struct scsi_cmnd *sc)
 {
-       struct scsi_cmnd *sc = ctask->sc;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+       struct iscsi_session *session = conn->session;
 
-       BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
+       BUG_ON(__kfifo_len(ctask->r2tqueue));
 
-       tcp_ctask->sent = 0;
-       tcp_ctask->sg_count = 0;
+       ctask->sc = sc;
+       ctask->conn = conn;
+       ctask->hdr.opcode = ISCSI_OP_SCSI_CMD;
+       ctask->hdr.flags = ISCSI_ATTR_SIMPLE;
+       int_to_scsilun(sc->device->lun, (struct scsi_lun *)ctask->hdr.lun);
+       ctask->hdr.itt = ctask->itt | (conn->id << CID_SHIFT) |
+                        (session->age << AGE_SHIFT);
+       ctask->hdr.data_length = cpu_to_be32(sc->request_bufflen);
+       ctask->hdr.cmdsn = cpu_to_be32(session->cmdsn); session->cmdsn++;
+       ctask->hdr.exp_statsn = cpu_to_be32(conn->exp_statsn);
+       memcpy(ctask->hdr.cdb, sc->cmnd, sc->cmd_len);
+       memset(&ctask->hdr.cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
+
+       ctask->mtask = NULL;
+       ctask->sent = 0;
+       ctask->sg_count = 0;
+
+       ctask->total_length = sc->request_bufflen;
 
        if (sc->sc_data_direction == DMA_TO_DEVICE) {
-               tcp_ctask->xmstate = XMSTATE_W_HDR;
-               tcp_ctask->exp_r2tsn = 0;
+               ctask->exp_r2tsn = 0;
+               ctask->hdr.flags |= ISCSI_FLAG_CMD_WRITE;
                BUG_ON(ctask->total_length == 0);
-
                if (sc->use_sg) {
                        struct scatterlist *sg = sc->request_buffer;
 
-                       iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-                                         &sg[tcp_ctask->sg_count++]);
-                       tcp_ctask->sg = sg;
-                       tcp_ctask->bad_sg = sg + sc->use_sg;
-               } else
-                       iscsi_buf_init_iov(&tcp_ctask->sendbuf,
-                                          sc->request_buffer,
-                                          sc->request_bufflen);
-
-               if (ctask->imm_count)
-                       tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+                       iscsi_buf_init_sg(&ctask->sendbuf,
+                                         &sg[ctask->sg_count++]);
+                       ctask->sg = sg;
+                       ctask->bad_sg = sg + sc->use_sg;
+               } else {
+                       iscsi_buf_init_iov(&ctask->sendbuf, sc->request_buffer,
+                                       sc->request_bufflen);
+               }
 
-               tcp_ctask->pad_count = ctask->total_length & (ISCSI_PAD_LEN-1);
-               if (tcp_ctask->pad_count) {
-                       tcp_ctask->pad_count = ISCSI_PAD_LEN -
-                                                       tcp_ctask->pad_count;
+               /*
+                * Write counters:
+                *
+                *      imm_count       bytes to be sent right after
+                *                      SCSI PDU Header
+                *
+                *      unsol_count     bytes(as Data-Out) to be sent
+                *                      without R2T ack right after
+                *                      immediate data
+                *
+                *      r2t_data_count  bytes to be sent via R2T ack's
+                *
+                *      pad_count       bytes to be sent as zero-padding
+                */
+               ctask->imm_count = 0;
+               ctask->unsol_count = 0;
+               ctask->unsol_datasn = 0;
+               ctask->xmstate = XMSTATE_W_HDR;
+               /* calculate write padding */
+               ctask->pad_count = ctask->total_length & (ISCSI_PAD_LEN-1);
+               if (ctask->pad_count) {
+                       ctask->pad_count = ISCSI_PAD_LEN - ctask->pad_count;
                        debug_scsi("write padding %d bytes\n",
-                                  tcp_ctask->pad_count);
-                       tcp_ctask->xmstate |= XMSTATE_W_PAD;
+                               ctask->pad_count);
+                       ctask->xmstate |= XMSTATE_W_PAD;
                }
+               if (session->imm_data_en) {
+                       if (ctask->total_length >= session->first_burst)
+                               ctask->imm_count = min(session->first_burst,
+                                                       conn->max_xmit_dlength);
+                       else
+                               ctask->imm_count = min(ctask->total_length,
+                                                       conn->max_xmit_dlength);
+                       hton24(ctask->hdr.dlength, ctask->imm_count);
+                       ctask->xmstate |= XMSTATE_IMM_DATA;
+               } else
+                       zero_data(ctask->hdr.dlength);
+
+               if (!session->initial_r2t_en)
+                       ctask->unsol_count = min(session->first_burst,
+                               ctask->total_length) - ctask->imm_count;
+               if (!ctask->unsol_count)
+                       /* No unsolicit Data-Out's */
+                       ctask->hdr.flags |= ISCSI_FLAG_CMD_FINAL;
+               else
+                       ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
 
-               if (ctask->unsol_count)
-                       tcp_ctask->xmstate |= XMSTATE_UNS_HDR |
-                                               XMSTATE_UNS_INIT;
-               tcp_ctask->r2t_data_count = ctask->total_length -
+               ctask->r2t_data_count = ctask->total_length -
                                    ctask->imm_count -
                                    ctask->unsol_count;
 
                debug_scsi("cmd [itt %x total %d imm %d imm_data %d "
                           "r2t_data %d]\n",
                           ctask->itt, ctask->total_length, ctask->imm_count,
-                          ctask->unsol_count, tcp_ctask->r2t_data_count);
-       } else
-               tcp_ctask->xmstate = XMSTATE_R_HDR;
+                          ctask->unsol_count, ctask->r2t_data_count);
+       } else {
+               ctask->hdr.flags |= ISCSI_FLAG_CMD_FINAL;
+               if (sc->sc_data_direction == DMA_FROM_DEVICE)
+                       ctask->hdr.flags |= ISCSI_FLAG_CMD_READ;
+               ctask->datasn = 0;
+               ctask->xmstate = XMSTATE_R_HDR;
+               zero_data(ctask->hdr.dlength);
+       }
 
-       iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
+       iscsi_buf_init_virt(&ctask->headbuf, (char*)&ctask->hdr,
                            sizeof(struct iscsi_hdr));
+       conn->scsicmd_pdus_cnt++;
 }
 
 /**
- * iscsi_tcp_mtask_xmit - xmit management(immediate) task
+ * iscsi_mtask_xmit - xmit management(immediate) task
  * @conn: iscsi connection
  * @mtask: task management task
  *
@@ -1334,167 +1669,132 @@ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
  *             IN_PROGRESS_IMM_DATA - PDU Data xmit in progress
  **/
 static int
-iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
+iscsi_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
 {
-       struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
-       int rc;
 
        debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n",
-               conn->id, tcp_mtask->xmstate, mtask->itt);
+               conn->id, mtask->xmstate, mtask->itt);
 
-       if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
-               tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
+       if (mtask->xmstate & XMSTATE_IMM_HDR) {
+               mtask->xmstate &= ~XMSTATE_IMM_HDR;
                if (mtask->data_count)
-                       tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
+                       mtask->xmstate |= XMSTATE_IMM_DATA;
                if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
-                   conn->stop_stage != STOP_CONN_RECOVER &&
+                   conn->stop_stage != STOP_CONN_RECOVER &&
                    conn->hdrdgst_en)
-                       iscsi_hdr_digest(conn, &tcp_mtask->headbuf,
-                                       (u8*)tcp_mtask->hdrext);
-               rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf,
-                                  mtask->data_count);
-               if (rc) {
-                       tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
+                       iscsi_hdr_digest(conn, &mtask->headbuf,
+                                       (u8*)mtask->hdrext);
+               if (iscsi_sendhdr(conn, &mtask->headbuf, mtask->data_count)) {
+                       mtask->xmstate |= XMSTATE_IMM_HDR;
                        if (mtask->data_count)
-                               tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
-                       return rc;
+                               mtask->xmstate &= ~XMSTATE_IMM_DATA;
+                       return -EAGAIN;
                }
        }
 
-       if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) {
+       if (mtask->xmstate & XMSTATE_IMM_DATA) {
                BUG_ON(!mtask->data_count);
-               tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
+               mtask->xmstate &= ~XMSTATE_IMM_DATA;
                /* FIXME: implement.
                 * Virtual buffer could be spreaded across multiple pages...
                 */
                do {
-                       int rc;
-
-                       rc = iscsi_sendpage(conn, &tcp_mtask->sendbuf,
-                                       &mtask->data_count, &tcp_mtask->sent);
-                       if (rc) {
-                               tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
-                               return rc;
+                       if (iscsi_sendpage(conn, &mtask->sendbuf,
+                                  &mtask->data_count, &mtask->sent)) {
+                               mtask->xmstate |= XMSTATE_IMM_DATA;
+                               return -EAGAIN;
                        }
                } while (mtask->data_count);
        }
 
-       BUG_ON(tcp_mtask->xmstate != XMSTATE_IDLE);
-       if (mtask->hdr->itt == cpu_to_be32(ISCSI_RESERVED_TAG)) {
-               struct iscsi_session *session = conn->session;
-
-               spin_lock_bh(&session->lock);
-               list_del(&conn->mtask->running);
-               __kfifo_put(session->mgmtpool.queue, (void*)&conn->mtask,
-                           sizeof(void*));
-               spin_unlock_bh(&session->lock);
-       }
+       BUG_ON(mtask->xmstate != XMSTATE_IDLE);
        return 0;
 }
 
 static inline int
-handle_xmstate_r_hdr(struct iscsi_conn *conn,
-                    struct iscsi_tcp_cmd_task *tcp_ctask)
+handle_xmstate_r_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       int rc;
-
-       tcp_ctask->xmstate &= ~XMSTATE_R_HDR;
+       ctask->xmstate &= ~XMSTATE_R_HDR;
        if (conn->hdrdgst_en)
-               iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
-                                (u8*)tcp_ctask->hdrext);
-       rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0);
-       if (!rc) {
-               BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE);
+               iscsi_hdr_digest(conn, &ctask->headbuf, (u8*)ctask->hdrext);
+       if (!iscsi_sendhdr(conn, &ctask->headbuf, 0)) {
+               BUG_ON(ctask->xmstate != XMSTATE_IDLE);
                return 0; /* wait for Data-In */
        }
-       tcp_ctask->xmstate |= XMSTATE_R_HDR;
-       return rc;
+       ctask->xmstate |= XMSTATE_R_HDR;
+       return -EAGAIN;
 }
 
 static inline int
-handle_xmstate_w_hdr(struct iscsi_conn *conn,
-                    struct iscsi_cmd_task *ctask)
+handle_xmstate_w_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       int rc;
-
-       tcp_ctask->xmstate &= ~XMSTATE_W_HDR;
+       ctask->xmstate &= ~XMSTATE_W_HDR;
        if (conn->hdrdgst_en)
-               iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
-                                (u8*)tcp_ctask->hdrext);
-       rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
-       if (rc)
-               tcp_ctask->xmstate |= XMSTATE_W_HDR;
-       return rc;
+               iscsi_hdr_digest(conn, &ctask->headbuf, (u8*)ctask->hdrext);
+       if (iscsi_sendhdr(conn, &ctask->headbuf, ctask->imm_count)) {
+               ctask->xmstate |= XMSTATE_W_HDR;
+               return -EAGAIN;
+       }
+       return 0;
 }
 
 static inline int
 handle_xmstate_data_digest(struct iscsi_conn *conn,
                           struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       int rc;
-
-       tcp_ctask->xmstate &= ~XMSTATE_DATA_DIGEST;
-       debug_tcp("resent data digest 0x%x\n", tcp_ctask->datadigest);
-       rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-                                   &tcp_ctask->datadigest, 0);
-       if (rc) {
-               tcp_ctask->xmstate |= XMSTATE_DATA_DIGEST;
+       ctask->xmstate &= ~XMSTATE_DATA_DIGEST;
+       debug_tcp("resent data digest 0x%x\n", ctask->datadigest);
+       if (iscsi_digest_final_send(conn, ctask, &ctask->immbuf,
+                                   &ctask->datadigest, 0)) {
+               ctask->xmstate |= XMSTATE_DATA_DIGEST;
                debug_tcp("resent data digest 0x%x fail!\n",
-                         tcp_ctask->datadigest);
+                         ctask->datadigest);
+               return -EAGAIN;
        }
-
-       return rc;
+       return 0;
 }
 
 static inline int
 handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       int rc;
-
        BUG_ON(!ctask->imm_count);
-       tcp_ctask->xmstate &= ~XMSTATE_IMM_DATA;
+       ctask->xmstate &= ~XMSTATE_IMM_DATA;
 
        if (conn->datadgst_en) {
-               iscsi_data_digest_init(tcp_conn, ctask);
-               tcp_ctask->immdigest = 0;
+               iscsi_data_digest_init(conn, ctask);
+               ctask->immdigest = 0;
        }
 
        for (;;) {
-               rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-                                  &ctask->imm_count, &tcp_ctask->sent);
-               if (rc) {
-                       tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+               if (iscsi_sendpage(conn, &ctask->sendbuf, &ctask->imm_count,
+                                  &ctask->sent)) {
+                       ctask->xmstate |= XMSTATE_IMM_DATA;
                        if (conn->datadgst_en) {
-                               crypto_digest_final(tcp_conn->data_tx_tfm,
-                                               (u8*)&tcp_ctask->immdigest);
+                               crypto_digest_final(conn->data_tx_tfm,
+                                               (u8*)&ctask->immdigest);
                                debug_tcp("tx imm sendpage fail 0x%x\n",
-                                         tcp_ctask->datadigest);
+                                         ctask->datadigest);
                        }
-                       return rc;
+                       return -EAGAIN;
                }
                if (conn->datadgst_en)
-                       crypto_digest_update(tcp_conn->data_tx_tfm,
-                                            &tcp_ctask->sendbuf.sg, 1);
+                       crypto_digest_update(conn->data_tx_tfm,
+                                            &ctask->sendbuf.sg, 1);
 
                if (!ctask->imm_count)
                        break;
-               iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-                                 &tcp_ctask->sg[tcp_ctask->sg_count++]);
+               iscsi_buf_init_sg(&ctask->sendbuf,
+                                 &ctask->sg[ctask->sg_count++]);
        }
 
-       if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-               rc = iscsi_digest_final_send(conn, ctask, &tcp_ctask->immbuf,
-                                           &tcp_ctask->immdigest, 1);
-               if (rc) {
+       if (conn->datadgst_en && !(ctask->xmstate & XMSTATE_W_PAD)) {
+               if (iscsi_digest_final_send(conn, ctask, &ctask->immbuf,
+                                           &ctask->immdigest, 1)) {
                        debug_tcp("sending imm digest 0x%x fail!\n",
-                                 tcp_ctask->immdigest);
-                       return rc;
+                                 ctask->immdigest);
+                       return -EAGAIN;
                }
-               debug_tcp("sending imm digest 0x%x\n", tcp_ctask->immdigest);
+               debug_tcp("sending imm digest 0x%x\n", ctask->immdigest);
        }
 
        return 0;
@@ -1503,81 +1803,74 @@ handle_xmstate_imm_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 static inline int
 handle_xmstate_uns_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
        struct iscsi_data_task *dtask;
-       int rc;
 
-       tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
-       if (tcp_ctask->xmstate & XMSTATE_UNS_INIT) {
+       ctask->xmstate |= XMSTATE_UNS_DATA;
+       if (ctask->xmstate & XMSTATE_UNS_INIT) {
                iscsi_unsolicit_data_init(conn, ctask);
-               dtask = tcp_ctask->dtask;
+               BUG_ON(!ctask->dtask);
+               dtask = ctask->dtask;
                if (conn->hdrdgst_en)
-                       iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
+                       iscsi_hdr_digest(conn, &ctask->headbuf,
                                        (u8*)dtask->hdrext);
-               tcp_ctask->xmstate &= ~XMSTATE_UNS_INIT;
+               ctask->xmstate &= ~XMSTATE_UNS_INIT;
        }
-
-       rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->data_count);
-       if (rc) {
-               tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
-               tcp_ctask->xmstate |= XMSTATE_UNS_HDR;
-               return rc;
+       if (iscsi_sendhdr(conn, &ctask->headbuf, ctask->data_count)) {
+               ctask->xmstate &= ~XMSTATE_UNS_DATA;
+               ctask->xmstate |= XMSTATE_UNS_HDR;
+               return -EAGAIN;
        }
 
        debug_scsi("uns dout [itt 0x%x dlen %d sent %d]\n",
-                  ctask->itt, ctask->unsol_count, tcp_ctask->sent);
+                  ctask->itt, ctask->unsol_count, ctask->sent);
        return 0;
 }
 
 static inline int
 handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_data_task *dtask = tcp_ctask->dtask;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       int rc;
+       struct iscsi_data_task *dtask = ctask->dtask;
 
        BUG_ON(!ctask->data_count);
-       tcp_ctask->xmstate &= ~XMSTATE_UNS_DATA;
+       ctask->xmstate &= ~XMSTATE_UNS_DATA;
 
        if (conn->datadgst_en) {
-               iscsi_data_digest_init(tcp_conn, ctask);
+               iscsi_data_digest_init(conn, ctask);
                dtask->digest = 0;
        }
 
        for (;;) {
-               int start = tcp_ctask->sent;
+               int start = ctask->sent;
 
-               rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf,
-                                  &ctask->data_count, &tcp_ctask->sent);
-               if (rc) {
-                       ctask->unsol_count -= tcp_ctask->sent - start;
-                       tcp_ctask->xmstate |= XMSTATE_UNS_DATA;
+               if (iscsi_sendpage(conn, &ctask->sendbuf, &ctask->data_count,
+                                  &ctask->sent)) {
+                       ctask->unsol_count -= ctask->sent - start;
+                       ctask->xmstate |= XMSTATE_UNS_DATA;
                        /* will continue with this ctask later.. */
                        if (conn->datadgst_en) {
-                               crypto_digest_final(tcp_conn->data_tx_tfm,
+                               crypto_digest_final(conn->data_tx_tfm,
                                                (u8 *)&dtask->digest);
                                debug_tcp("tx uns data fail 0x%x\n",
                                          dtask->digest);
                        }
-                       return rc;
+                       return -EAGAIN;
                }
 
-               BUG_ON(tcp_ctask->sent > ctask->total_length);
-               ctask->unsol_count -= tcp_ctask->sent - start;
+               BUG_ON(ctask->sent > ctask->total_length);
+               ctask->unsol_count -= ctask->sent - start;
 
                /*
                 * XXX:we may run here with un-initial sendbuf.
                 * so pass it
                 */
-               if (conn->datadgst_en && tcp_ctask->sent - start > 0)
-                       crypto_digest_update(tcp_conn->data_tx_tfm,
-                                            &tcp_ctask->sendbuf.sg, 1);
+               if (conn->datadgst_en && ctask->sent - start > 0)
+                       crypto_digest_update(conn->data_tx_tfm,
+                                            &ctask->sendbuf.sg, 1);
 
                if (!ctask->data_count)
                        break;
-               iscsi_buf_init_sg(&tcp_ctask->sendbuf,
-                                 &tcp_ctask->sg[tcp_ctask->sg_count++]);
+               iscsi_buf_init_sg(&ctask->sendbuf,
+                                 &ctask->sg[ctask->sg_count++]);
        }
        BUG_ON(ctask->unsol_count < 0);
 
@@ -1587,29 +1880,27 @@ handle_xmstate_uns_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
         */
        if (ctask->unsol_count) {
                if (conn->datadgst_en) {
-                       rc = iscsi_digest_final_send(conn, ctask,
+                       if (iscsi_digest_final_send(conn, ctask,
                                                    &dtask->digestbuf,
-                                                   &dtask->digest, 1);
-                       if (rc) {
+                                                   &dtask->digest, 1)) {
                                debug_tcp("send uns digest 0x%x fail\n",
                                          dtask->digest);
-                               return rc;
+                               return -EAGAIN;
                        }
                        debug_tcp("sending uns digest 0x%x, more uns\n",
                                  dtask->digest);
                }
-               tcp_ctask->xmstate |= XMSTATE_UNS_INIT;
+               ctask->xmstate |= XMSTATE_UNS_INIT;
                return 1;
        }
 
-       if (conn->datadgst_en && !(tcp_ctask->xmstate & XMSTATE_W_PAD)) {
-               rc = iscsi_digest_final_send(conn, ctask,
+       if (conn->datadgst_en && !(ctask->xmstate & XMSTATE_W_PAD)) {
+               if (iscsi_digest_final_send(conn, ctask,
                                            &dtask->digestbuf,
-                                           &dtask->digest, 1);
-               if (rc) {
+                                           &dtask->digest, 1)) {
                        debug_tcp("send last uns digest 0x%x fail\n",
                                   dtask->digest);
-                       return rc;
+                       return -EAGAIN;
                }
                debug_tcp("sending uns digest 0x%x\n",dtask->digest);
        }
@@ -1621,17 +1912,15 @@ static inline int
 handle_xmstate_sol_data(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
        struct iscsi_session *session = conn->session;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_r2t_info *r2t = tcp_ctask->r2t;
-       struct iscsi_data_task *dtask = &r2t->dtask;
-       int left, rc;
+       struct iscsi_r2t_info *r2t = ctask->r2t;
+       struct iscsi_data_task *dtask = r2t->dtask;
+       int left;
 
-       tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-       tcp_ctask->dtask = dtask;
+       ctask->xmstate &= ~XMSTATE_SOL_DATA;
+       ctask->dtask = dtask;
 
        if (conn->datadgst_en) {
-               iscsi_data_digest_init(tcp_conn, ctask);
+               iscsi_data_digest_init(conn, ctask);
                dtask->digest = 0;
        }
 solicit_again:
@@ -1641,27 +1930,25 @@ solicit_again:
        if (!r2t->data_count)
                goto data_out_done;
 
-       rc = iscsi_sendpage(conn, &r2t->sendbuf, &r2t->data_count, &r2t->sent);
-       if (rc) {
-               tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+       if (iscsi_sendpage(conn, &r2t->sendbuf, &r2t->data_count, &r2t->sent)) {
+               ctask->xmstate |= XMSTATE_SOL_DATA;
                /* will continue with this ctask later.. */
                if (conn->datadgst_en) {
-                       crypto_digest_final(tcp_conn->data_tx_tfm,
+                       crypto_digest_final(conn->data_tx_tfm,
                                          (u8 *)&dtask->digest);
                        debug_tcp("r2t data send fail 0x%x\n", dtask->digest);
                }
-               return rc;
+               return -EAGAIN;
        }
 
        BUG_ON(r2t->data_count < 0);
        if (conn->datadgst_en)
-               crypto_digest_update(tcp_conn->data_tx_tfm, &r2t->sendbuf.sg,
-                                    1);
+               crypto_digest_update(conn->data_tx_tfm, &r2t->sendbuf.sg, 1);
 
        if (r2t->data_count) {
                BUG_ON(ctask->sc->use_sg == 0);
                if (!iscsi_buf_left(&r2t->sendbuf)) {
-                       BUG_ON(tcp_ctask->bad_sg == r2t->sg);
+                       BUG_ON(ctask->bad_sg == r2t->sg);
                        iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
                        r2t->sg += 1;
                }
@@ -1677,20 +1964,19 @@ data_out_done:
        left = r2t->data_length - r2t->sent;
        if (left) {
                if (conn->datadgst_en) {
-                       rc = iscsi_digest_final_send(conn, ctask,
+                       if (iscsi_digest_final_send(conn, ctask,
                                                    &dtask->digestbuf,
-                                                   &dtask->digest, 1);
-                       if (rc) {
+                                                   &dtask->digest, 1)) {
                                debug_tcp("send r2t data digest 0x%x"
                                          "fail\n", dtask->digest);
-                               return rc;
+                               return -EAGAIN;
                        }
                        debug_tcp("r2t data send digest 0x%x\n",
                                  dtask->digest);
                }
                iscsi_solicit_data_cont(conn, ctask, r2t, left);
-               tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-               tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+               ctask->xmstate |= XMSTATE_SOL_DATA;
+               ctask->xmstate &= ~XMSTATE_SOL_HDR;
                return 1;
        }
 
@@ -1698,27 +1984,26 @@ data_out_done:
         * Done with this R2T. Check if there are more
         * outstanding R2Ts ready to be processed.
         */
-       BUG_ON(tcp_ctask->r2t_data_count - r2t->data_length < 0);
+       BUG_ON(ctask->r2t_data_count - r2t->data_length < 0);
        if (conn->datadgst_en) {
-               rc = iscsi_digest_final_send(conn, ctask, &dtask->digestbuf,
-                                           &dtask->digest, 1);
-               if (rc) {
+               if (iscsi_digest_final_send(conn, ctask, &dtask->digestbuf,
+                                           &dtask->digest, 1)) {
                        debug_tcp("send last r2t data digest 0x%x"
                                  "fail\n", dtask->digest);
-                       return rc;
+                       return -EAGAIN;
                }
                debug_tcp("r2t done dout digest 0x%x\n", dtask->digest);
        }
 
-       tcp_ctask->r2t_data_count -= r2t->data_length;
-       tcp_ctask->r2t = NULL;
+       ctask->r2t_data_count -= r2t->data_length;
+       ctask->r2t = NULL;
        spin_lock_bh(&session->lock);
-       __kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
+       __kfifo_put(ctask->r2tpool.queue, (void*)&r2t, sizeof(void*));
        spin_unlock_bh(&session->lock);
-       if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
-               tcp_ctask->r2t = r2t;
-               tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-               tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+       if (__kfifo_get(ctask->r2tqueue, (void*)&r2t, sizeof(void*))) {
+               ctask->r2t = r2t;
+               ctask->xmstate |= XMSTATE_SOL_DATA;
+               ctask->xmstate &= ~XMSTATE_SOL_HDR;
                return 1;
        }
 
@@ -1728,44 +2013,36 @@ data_out_done:
 static inline int
 handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_data_task *dtask = tcp_ctask->dtask;
-       int sent, rc;
-
-       tcp_ctask->xmstate &= ~XMSTATE_W_PAD;
-       iscsi_buf_init_iov(&tcp_ctask->sendbuf, (char*)&tcp_ctask->pad,
-                           tcp_ctask->pad_count);
-       rc = iscsi_sendpage(conn, &tcp_ctask->sendbuf, &tcp_ctask->pad_count,
-                          &sent);
-       if (rc) {
-               tcp_ctask->xmstate |= XMSTATE_W_PAD;
-               return rc;
+       struct iscsi_data_task *dtask = ctask->dtask;
+       int sent;
+
+       ctask->xmstate &= ~XMSTATE_W_PAD;
+       iscsi_buf_init_virt(&ctask->sendbuf, (char*)&ctask->pad,
+                           ctask->pad_count);
+       if (iscsi_sendpage(conn, &ctask->sendbuf, &ctask->pad_count, &sent)) {
+               ctask->xmstate |= XMSTATE_W_PAD;
+               return -EAGAIN;
        }
 
        if (conn->datadgst_en) {
-               crypto_digest_update(tcp_conn->data_tx_tfm,
-                                    &tcp_ctask->sendbuf.sg, 1);
+               crypto_digest_update(conn->data_tx_tfm, &ctask->sendbuf.sg, 1);
                /* imm data? */
                if (!dtask) {
-                       rc = iscsi_digest_final_send(conn, ctask,
-                                                   &tcp_ctask->immbuf,
-                                                   &tcp_ctask->immdigest, 1);
-                       if (rc) {
+                       if (iscsi_digest_final_send(conn, ctask, &ctask->immbuf,
+                                                   &ctask->immdigest, 1)) {
                                debug_tcp("send padding digest 0x%x"
-                                         "fail!\n", tcp_ctask->immdigest);
-                               return rc;
+                                         "fail!\n", ctask->immdigest);
+                               return -EAGAIN;
                        }
                        debug_tcp("done with padding, digest 0x%x\n",
-                                 tcp_ctask->datadigest);
+                                 ctask->datadigest);
                } else {
-                       rc = iscsi_digest_final_send(conn, ctask,
+                       if (iscsi_digest_final_send(conn, ctask,
                                                    &dtask->digestbuf,
-                                                   &dtask->digest, 1);
-                       if (rc) {
+                                                   &dtask->digest, 1)) {
                                debug_tcp("send padding digest 0x%x"
                                          "fail\n", dtask->digest);
-                               return rc;
+                               return -EAGAIN;
                        }
                        debug_tcp("done with padding, digest 0x%x\n",
                                  dtask->digest);
@@ -1776,13 +2053,12 @@ handle_xmstate_w_pad(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 }
 
 static int
-iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+iscsi_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
        int rc = 0;
 
        debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n",
-               conn->id, tcp_ctask->xmstate, ctask->itt);
+               conn->id, ctask->xmstate, ctask->itt);
 
        /*
         * serialize with TMF AbortTask
@@ -1790,38 +2066,40 @@ iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
        if (ctask->mtask)
                return rc;
 
-       if (tcp_ctask->xmstate & XMSTATE_R_HDR)
-               return handle_xmstate_r_hdr(conn, tcp_ctask);
+       if (ctask->xmstate & XMSTATE_R_HDR) {
+               rc = handle_xmstate_r_hdr(conn, ctask);
+               return rc;
+       }
 
-       if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
+       if (ctask->xmstate & XMSTATE_W_HDR) {
                rc = handle_xmstate_w_hdr(conn, ctask);
                if (rc)
                        return rc;
        }
 
        /* XXX: for data digest xmit recover */
-       if (tcp_ctask->xmstate & XMSTATE_DATA_DIGEST) {
+       if (ctask->xmstate & XMSTATE_DATA_DIGEST) {
                rc = handle_xmstate_data_digest(conn, ctask);
                if (rc)
                        return rc;
        }
 
-       if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
+       if (ctask->xmstate & XMSTATE_IMM_DATA) {
                rc = handle_xmstate_imm_data(conn, ctask);
                if (rc)
                        return rc;
        }
 
-       if (tcp_ctask->xmstate & XMSTATE_UNS_HDR) {
+       if (ctask->xmstate & XMSTATE_UNS_HDR) {
                BUG_ON(!ctask->unsol_count);
-               tcp_ctask->xmstate &= ~XMSTATE_UNS_HDR;
+               ctask->xmstate &= ~XMSTATE_UNS_HDR;
 unsolicit_head_again:
                rc = handle_xmstate_uns_hdr(conn, ctask);
                if (rc)
                        return rc;
        }
 
-       if (tcp_ctask->xmstate & XMSTATE_UNS_DATA) {
+       if (ctask->xmstate & XMSTATE_UNS_DATA) {
                rc = handle_xmstate_uns_data(conn, ctask);
                if (rc == 1)
                        goto unsolicit_head_again;
@@ -1830,24 +2108,23 @@ unsolicit_head_again:
                goto done;
        }
 
-       if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
+       if (ctask->xmstate & XMSTATE_SOL_HDR) {
                struct iscsi_r2t_info *r2t;
 
-               tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
-               tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
-               if (!tcp_ctask->r2t)
-                       __kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
+               ctask->xmstate &= ~XMSTATE_SOL_HDR;
+               ctask->xmstate |= XMSTATE_SOL_DATA;
+               if (!ctask->r2t)
+                       __kfifo_get(ctask->r2tqueue, (void*)&ctask->r2t,
                                    sizeof(void*));
 solicit_head_again:
-               r2t = tcp_ctask->r2t;
+               r2t = ctask->r2t;
                if (conn->hdrdgst_en)
                        iscsi_hdr_digest(conn, &r2t->headbuf,
-                                       (u8*)r2t->dtask.hdrext);
-               rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
-               if (rc) {
-                       tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
-                       tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
-                       return rc;
+                                       (u8*)r2t->dtask->hdrext);
+               if (iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count)) {
+                       ctask->xmstate &= ~XMSTATE_SOL_DATA;
+                       ctask->xmstate |= XMSTATE_SOL_HDR;
+                       return -EAGAIN;
                }
 
                debug_scsi("sol dout [dsn %d itt 0x%x dlen %d sent %d]\n",
@@ -1855,7 +2132,7 @@ solicit_head_again:
                        r2t->sent);
        }
 
-       if (tcp_ctask->xmstate & XMSTATE_SOL_DATA) {
+       if (ctask->xmstate & XMSTATE_SOL_DATA) {
                rc = handle_xmstate_sol_data(conn, ctask);
                if (rc == 1)
                        goto solicit_head_again;
@@ -1868,199 +2145,1022 @@ done:
         * Last thing to check is whether we need to send write
         * padding. Note that we check for xmstate equality, not just the bit.
         */
-       if (tcp_ctask->xmstate == XMSTATE_W_PAD)
+       if (ctask->xmstate == XMSTATE_W_PAD)
                rc = handle_xmstate_w_pad(conn, ctask);
 
        return rc;
 }
 
+/**
+ * iscsi_data_xmit - xmit any command into the scheduled connection
+ * @conn: iscsi connection
+ *
+ * Notes:
+ *     The function can return -EAGAIN in which case the caller must
+ *     re-schedule it again later or recover. '0' return code means
+ *     successful xmit.
+ **/
+static int
+iscsi_data_xmit(struct iscsi_conn *conn)
+{
+       if (unlikely(conn->suspend_tx)) {
+               debug_tcp("conn %d Tx suspended!\n", conn->id);
+               return 0;
+       }
+
+       /*
+        * Transmit in the following order:
+        *
+        * 1) un-finished xmit (ctask or mtask)
+        * 2) immediate control PDUs
+        * 3) write data
+        * 4) SCSI commands
+        * 5) non-immediate control PDUs
+        *
+        * No need to lock around __kfifo_get as long as
+        * there's one producer and one consumer.
+        */
+
+       BUG_ON(conn->ctask && conn->mtask);
+
+       if (conn->ctask) {
+               if (iscsi_ctask_xmit(conn, conn->ctask))
+                       goto again;
+               /* done with this in-progress ctask */
+               conn->ctask = NULL;
+       }
+       if (conn->mtask) {
+               if (iscsi_mtask_xmit(conn, conn->mtask))
+                       goto again;
+               /* done with this in-progress mtask */
+               conn->mtask = NULL;
+       }
+
+       /* process immediate first */
+        if (unlikely(__kfifo_len(conn->immqueue))) {
+               struct iscsi_session *session = conn->session;
+               while (__kfifo_get(conn->immqueue, (void*)&conn->mtask,
+                                  sizeof(void*))) {
+                       if (iscsi_mtask_xmit(conn, conn->mtask))
+                               goto again;
+
+                       if (conn->mtask->hdr.itt ==
+                                       cpu_to_be32(ISCSI_RESERVED_TAG)) {
+                               spin_lock_bh(&session->lock);
+                               __kfifo_put(session->mgmtpool.queue,
+                                           (void*)&conn->mtask, sizeof(void*));
+                               spin_unlock_bh(&session->lock);
+                       }
+               }
+               /* done with this mtask */
+               conn->mtask = NULL;
+       }
+
+       /* process write queue */
+       while (__kfifo_get(conn->writequeue, (void*)&conn->ctask,
+                          sizeof(void*))) {
+               if (iscsi_ctask_xmit(conn, conn->ctask))
+                       goto again;
+       }
+
+       /* process command queue */
+       while (__kfifo_get(conn->xmitqueue, (void*)&conn->ctask,
+                          sizeof(void*))) {
+               if (iscsi_ctask_xmit(conn, conn->ctask))
+                       goto again;
+       }
+       /* done with this ctask */
+       conn->ctask = NULL;
+
+       /* process the rest control plane PDUs, if any */
+        if (unlikely(__kfifo_len(conn->mgmtqueue))) {
+               struct iscsi_session *session = conn->session;
+
+               while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+                                  sizeof(void*))) {
+                       if (iscsi_mtask_xmit(conn, conn->mtask))
+                               goto again;
+
+                       if (conn->mtask->hdr.itt ==
+                                       cpu_to_be32(ISCSI_RESERVED_TAG)) {
+                               spin_lock_bh(&session->lock);
+                               __kfifo_put(session->mgmtpool.queue,
+                                           (void*)&conn->mtask,
+                                           sizeof(void*));
+                               spin_unlock_bh(&session->lock);
+                       }
+               }
+               /* done with this mtask */
+               conn->mtask = NULL;
+       }
+
+       return 0;
+
+again:
+       if (unlikely(conn->suspend_tx))
+               return 0;
+
+       return -EAGAIN;
+}
+
+static void
+iscsi_xmitworker(void *data)
+{
+       struct iscsi_conn *conn = data;
+
+       /*
+        * serialize Xmit worker on a per-connection basis.
+        */
+       mutex_lock(&conn->xmitmutex);
+       if (iscsi_data_xmit(conn))
+               scsi_queue_work(conn->session->host, &conn->xmitwork);
+       mutex_unlock(&conn->xmitmutex);
+}
+
+#define FAILURE_BAD_HOST               1
+#define FAILURE_SESSION_FAILED         2
+#define FAILURE_SESSION_FREED          3
+#define FAILURE_WINDOW_CLOSED          4
+#define FAILURE_SESSION_TERMINATE      5
+
+static int
+iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
+{
+       struct Scsi_Host *host;
+       int reason = 0;
+       struct iscsi_session *session;
+       struct iscsi_conn *conn = NULL;
+       struct iscsi_cmd_task *ctask = NULL;
+
+       sc->scsi_done = done;
+       sc->result = 0;
+
+       host = sc->device->host;
+       session = iscsi_hostdata(host->hostdata);
+       BUG_ON(host != session->host);
+
+       spin_lock(&session->lock);
+
+       if (session->state != ISCSI_STATE_LOGGED_IN) {
+               if (session->state == ISCSI_STATE_FAILED) {
+                       reason = FAILURE_SESSION_FAILED;
+                       goto reject;
+               } else if (session->state == ISCSI_STATE_TERMINATE) {
+                       reason = FAILURE_SESSION_TERMINATE;
+                       goto fault;
+               }
+               reason = FAILURE_SESSION_FREED;
+               goto fault;
+       }
+
+       /*
+        * Check for iSCSI window and take care of CmdSN wrap-around
+        */
+       if ((int)(session->max_cmdsn - session->cmdsn) < 0) {
+               reason = FAILURE_WINDOW_CLOSED;
+               goto reject;
+       }
+
+       conn = session->leadconn;
+
+       __kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+       BUG_ON(ctask->sc);
+
+       sc->SCp.phase = session->age;
+       sc->SCp.ptr = (char*)ctask;
+       iscsi_cmd_init(conn, ctask, sc);
+
+       __kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
+       debug_scsi(
+              "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n",
+               sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
+               conn->id, (long)sc, ctask->itt, sc->request_bufflen,
+               session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+       spin_unlock(&session->lock);
+
+       scsi_queue_work(host, &conn->xmitwork);
+       return 0;
+
+reject:
+       spin_unlock(&session->lock);
+       debug_scsi("cmd 0x%x rejected (%d)\n", sc->cmnd[0], reason);
+       return SCSI_MLQUEUE_HOST_BUSY;
+
+fault:
+       spin_unlock(&session->lock);
+       printk(KERN_ERR "iscsi_tcp: cmd 0x%x is not queued (%d)\n",
+              sc->cmnd[0], reason);
+       sc->sense_buffer[0] = 0x70;
+       sc->sense_buffer[2] = NOT_READY;
+       sc->sense_buffer[7] = 0x6;
+       sc->sense_buffer[12] = 0x08;
+       sc->sense_buffer[13] = 0x00;
+       sc->result = (DID_NO_CONNECT << 16);
+       sc->resid = sc->request_bufflen;
+       sc->scsi_done(sc);
+       return 0;
+}
+
+static int
+iscsi_change_queue_depth(struct scsi_device *sdev, int depth)
+{
+       if (depth > ISCSI_MAX_CMD_PER_LUN)
+               depth = ISCSI_MAX_CMD_PER_LUN;
+       scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+       return sdev->queue_depth;
+}
+
+static int
+iscsi_pool_init(struct iscsi_queue *q, int max, void ***items, int item_size)
+{
+       int i;
+
+       *items = kmalloc(max * sizeof(void*), GFP_KERNEL);
+       if (*items == NULL)
+               return -ENOMEM;
+
+       q->max = max;
+       q->pool = kmalloc(max * sizeof(void*), GFP_KERNEL);
+       if (q->pool == NULL) {
+               kfree(*items);
+               return -ENOMEM;
+       }
+
+       q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
+                             GFP_KERNEL, NULL);
+       if (q->queue == ERR_PTR(-ENOMEM)) {
+               kfree(q->pool);
+               kfree(*items);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < max; i++) {
+               q->pool[i] = kmalloc(item_size, GFP_KERNEL);
+               if (q->pool[i] == NULL) {
+                       int j;
+
+                       for (j = 0; j < i; j++)
+                               kfree(q->pool[j]);
+
+                       kfifo_free(q->queue);
+                       kfree(q->pool);
+                       kfree(*items);
+                       return -ENOMEM;
+               }
+               memset(q->pool[i], 0, item_size);
+               (*items)[i] = q->pool[i];
+               __kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+       }
+       return 0;
+}
+
+static void
+iscsi_pool_free(struct iscsi_queue *q, void **items)
+{
+       int i;
+
+       for (i = 0; i < q->max; i++)
+               kfree(items[i]);
+       kfree(q->pool);
+       kfree(items);
+}
+
 static struct iscsi_cls_conn *
-iscsi_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
+iscsi_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
 {
+       struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+       struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
        struct iscsi_conn *conn;
        struct iscsi_cls_conn *cls_conn;
-       struct iscsi_tcp_conn *tcp_conn;
 
-       cls_conn = iscsi_conn_setup(cls_session, conn_idx);
+       cls_conn = iscsi_create_conn(cls_session, conn_idx);
        if (!cls_conn)
                return NULL;
        conn = cls_conn->dd_data;
-       /*
-        * due to strange issues with iser these are not set
-        * in iscsi_conn_setup
-        */
-       conn->max_recv_dlength = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
+       memset(conn, 0, sizeof(*conn));
 
-       tcp_conn = kzalloc(sizeof(*tcp_conn), GFP_KERNEL);
-       if (!tcp_conn)
-               goto tcp_conn_alloc_fail;
+       conn->cls_conn = cls_conn;
+       conn->c_stage = ISCSI_CONN_INITIAL_STAGE;
+       conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+       conn->id = conn_idx;
+       conn->exp_statsn = 0;
+       conn->tmabort_state = TMABORT_INITIAL;
 
-       conn->dd_data = tcp_conn;
-       tcp_conn->iscsi_conn = conn;
-       tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
        /* initial operational parameters */
-       tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
-       tcp_conn->data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
+       conn->hdr_size = sizeof(struct iscsi_hdr);
+       conn->data_size = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
+       conn->max_recv_dlength = DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH;
+
+       /* initialize general xmit PDU commands queue */
+       conn->xmitqueue = kfifo_alloc(session->cmds_max * sizeof(void*),
+                                       GFP_KERNEL, NULL);
+       if (conn->xmitqueue == ERR_PTR(-ENOMEM))
+               goto xmitqueue_alloc_fail;
+
+       /* initialize write response PDU commands queue */
+       conn->writequeue = kfifo_alloc(session->cmds_max * sizeof(void*),
+                                       GFP_KERNEL, NULL);
+       if (conn->writequeue == ERR_PTR(-ENOMEM))
+               goto writequeue_alloc_fail;
+
+       /* initialize general immediate & non-immediate PDU commands queue */
+       conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+                                       GFP_KERNEL, NULL);
+       if (conn->immqueue == ERR_PTR(-ENOMEM))
+               goto immqueue_alloc_fail;
+
+       conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+                                       GFP_KERNEL, NULL);
+       if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
+               goto mgmtqueue_alloc_fail;
+
+       INIT_WORK(&conn->xmitwork, iscsi_xmitworker, conn);
+
+       /* allocate login_mtask used for the login/text sequences */
+       spin_lock_bh(&session->lock);
+       if (!__kfifo_get(session->mgmtpool.queue,
+                         (void*)&conn->login_mtask,
+                        sizeof(void*))) {
+               spin_unlock_bh(&session->lock);
+               goto login_mtask_alloc_fail;
+       }
+       spin_unlock_bh(&session->lock);
 
        /* allocate initial PDU receive place holder */
-       if (tcp_conn->data_size <= PAGE_SIZE)
-               tcp_conn->data = kmalloc(tcp_conn->data_size, GFP_KERNEL);
+       if (conn->data_size <= PAGE_SIZE)
+               conn->data = kmalloc(conn->data_size, GFP_KERNEL);
        else
-               tcp_conn->data = (void*)__get_free_pages(GFP_KERNEL,
-                                       get_order(tcp_conn->data_size));
-       if (!tcp_conn->data)
+               conn->data = (void*)__get_free_pages(GFP_KERNEL,
+                                       get_order(conn->data_size));
+       if (!conn->data)
                goto max_recv_dlenght_alloc_fail;
 
+       init_timer(&conn->tmabort_timer);
+       mutex_init(&conn->xmitmutex);
+       init_waitqueue_head(&conn->ehwait);
+
        return cls_conn;
 
 max_recv_dlenght_alloc_fail:
-       kfree(tcp_conn);
-tcp_conn_alloc_fail:
-       iscsi_conn_teardown(cls_conn);
+       spin_lock_bh(&session->lock);
+       __kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
+                   sizeof(void*));
+       spin_unlock_bh(&session->lock);
+login_mtask_alloc_fail:
+       kfifo_free(conn->mgmtqueue);
+mgmtqueue_alloc_fail:
+       kfifo_free(conn->immqueue);
+immqueue_alloc_fail:
+       kfifo_free(conn->writequeue);
+writequeue_alloc_fail:
+       kfifo_free(conn->xmitqueue);
+xmitqueue_alloc_fail:
+       iscsi_destroy_conn(cls_conn);
        return NULL;
 }
 
 static void
-iscsi_tcp_conn_destroy(struct iscsi_cls_conn *cls_conn)
+iscsi_conn_destroy(struct iscsi_cls_conn *cls_conn)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       int digest = 0;
+       struct iscsi_session *session = conn->session;
+       unsigned long flags;
+
+       mutex_lock(&conn->xmitmutex);
+       set_bit(SUSPEND_BIT, &conn->suspend_tx);
+       if (conn->c_stage == ISCSI_CONN_INITIAL_STAGE && conn->sock) {
+               struct sock *sk = conn->sock->sk;
+
+               /*
+                * conn_start() has never been called!
+                * need to cleanup the socket.
+                */
+               write_lock_bh(&sk->sk_callback_lock);
+               set_bit(SUSPEND_BIT, &conn->suspend_rx);
+               write_unlock_bh(&sk->sk_callback_lock);
+
+               sock_hold(conn->sock->sk);
+               iscsi_conn_restore_callbacks(conn);
+               sock_put(conn->sock->sk);
+               sock_release(conn->sock);
+               conn->sock = NULL;
+       }
 
-       if (conn->hdrdgst_en || conn->datadgst_en)
-               digest = 1;
+       spin_lock_bh(&session->lock);
+       conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
+       if (session->leadconn == conn) {
+               /*
+                * leading connection? then give up on recovery.
+                */
+               session->state = ISCSI_STATE_TERMINATE;
+               wake_up(&conn->ehwait);
+       }
+       spin_unlock_bh(&session->lock);
+
+       mutex_unlock(&conn->xmitmutex);
 
-       iscsi_conn_teardown(cls_conn);
+       /*
+        * Block until all in-progress commands for this connection
+        * time out or fail.
+        */
+       for (;;) {
+               spin_lock_irqsave(session->host->host_lock, flags);
+               if (!session->host->host_busy) { /* OK for ERL == 0 */
+                       spin_unlock_irqrestore(session->host->host_lock, flags);
+                       break;
+               }
+               spin_unlock_irqrestore(session->host->host_lock, flags);
+               msleep_interruptible(500);
+               printk("conn_destroy(): host_busy %d host_failed %d\n",
+                       session->host->host_busy, session->host->host_failed);
+               /*
+                * force eh_abort() to unblock
+                */
+               wake_up(&conn->ehwait);
+       }
 
-       /* now free tcp_conn */
-       if (digest) {
-               if (tcp_conn->tx_tfm)
-                       crypto_free_tfm(tcp_conn->tx_tfm);
-               if (tcp_conn->rx_tfm)
-                       crypto_free_tfm(tcp_conn->rx_tfm);
-               if (tcp_conn->data_tx_tfm)
-                       crypto_free_tfm(tcp_conn->data_tx_tfm);
-               if (tcp_conn->data_rx_tfm)
-                       crypto_free_tfm(tcp_conn->data_rx_tfm);
+       /* now free crypto */
+       if (conn->hdrdgst_en || conn->datadgst_en) {
+               if (conn->tx_tfm)
+                       crypto_free_tfm(conn->tx_tfm);
+               if (conn->rx_tfm)
+                       crypto_free_tfm(conn->rx_tfm);
+               if (conn->data_tx_tfm)
+                       crypto_free_tfm(conn->data_tx_tfm);
+               if (conn->data_rx_tfm)
+                       crypto_free_tfm(conn->data_rx_tfm);
        }
 
        /* free conn->data, size = MaxRecvDataSegmentLength */
-       if (tcp_conn->data_size <= PAGE_SIZE)
-               kfree(tcp_conn->data);
+       if (conn->data_size <= PAGE_SIZE)
+               kfree(conn->data);
        else
-               free_pages((unsigned long)tcp_conn->data,
-                          get_order(tcp_conn->data_size));
-       kfree(tcp_conn);
+               free_pages((unsigned long)conn->data,
+                                       get_order(conn->data_size));
+
+       spin_lock_bh(&session->lock);
+       __kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
+                   sizeof(void*));
+       list_del(&conn->item);
+       if (list_empty(&session->connections))
+               session->leadconn = NULL;
+       if (session->leadconn && session->leadconn == conn)
+               session->leadconn = container_of(session->connections.next,
+                       struct iscsi_conn, item);
+
+       if (session->leadconn == NULL)
+               /* none connections exits.. reset sequencing */
+               session->cmdsn = session->max_cmdsn = session->exp_cmdsn = 1;
+       spin_unlock_bh(&session->lock);
+
+       kfifo_free(conn->xmitqueue);
+       kfifo_free(conn->writequeue);
+       kfifo_free(conn->immqueue);
+       kfifo_free(conn->mgmtqueue);
+
+       iscsi_destroy_conn(cls_conn);
 }
 
 static int
-iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
-                   struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
-                   int is_leading)
+iscsi_conn_bind(struct iscsi_cls_session *cls_session,
+               struct iscsi_cls_conn *cls_conn, uint32_t transport_fd,
+               int is_leading)
 {
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+       struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
+       struct iscsi_conn *tmp = ERR_PTR(-EEXIST), *conn = cls_conn->dd_data;
        struct sock *sk;
        struct socket *sock;
        int err;
 
        /* lookup for existing socket */
-       sock = sockfd_lookup((int)transport_eph, &err);
+       sock = sockfd_lookup(transport_fd, &err);
        if (!sock) {
                printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err);
                return -EEXIST;
        }
 
-       err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
-       if (err)
-               return err;
+       /* lookup for existing connection */
+       spin_lock_bh(&session->lock);
+       list_for_each_entry(tmp, &session->connections, item) {
+               if (tmp == conn) {
+                       if (conn->c_stage != ISCSI_CONN_STOPPED ||
+                           conn->stop_stage == STOP_CONN_TERM) {
+                               printk(KERN_ERR "iscsi_tcp: can't bind "
+                                      "non-stopped connection (%d:%d)\n",
+                                      conn->c_stage, conn->stop_stage);
+                               spin_unlock_bh(&session->lock);
+                               return -EIO;
+                       }
+                       break;
+               }
+       }
+       if (tmp != conn) {
+               /* bind new iSCSI connection to session */
+               conn->session = session;
+
+               list_add(&conn->item, &session->connections);
+       }
+       spin_unlock_bh(&session->lock);
 
-       /* bind iSCSI connection and socket */
-       tcp_conn->sock = sock;
+       if (conn->stop_stage != STOP_CONN_SUSPEND) {
+               /* bind iSCSI connection and socket */
+               conn->sock = sock;
 
-       /* setup Socket parameters */
-       sk = sock->sk;
-       sk->sk_reuse = 1;
-       sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
-       sk->sk_allocation = GFP_ATOMIC;
+               /* setup Socket parameters */
+               sk = sock->sk;
+               sk->sk_reuse = 1;
+               sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
+               sk->sk_allocation = GFP_ATOMIC;
 
-       /* FIXME: disable Nagle's algorithm */
+               /* FIXME: disable Nagle's algorithm */
+
+               /*
+                * Intercept TCP callbacks for sendfile like receive
+                * processing.
+                */
+               iscsi_conn_set_callbacks(conn);
+
+               conn->sendpage = conn->sock->ops->sendpage;
+
+               /*
+                * set receive state machine into initial state
+                */
+               conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+       }
+
+       if (is_leading)
+               session->leadconn = conn;
 
        /*
-        * Intercept TCP callbacks for sendfile like receive
-        * processing.
-        */
-       conn->recv_lock = &sk->sk_callback_lock;
-       iscsi_conn_set_callbacks(conn);
-       tcp_conn->sendpage = tcp_conn->sock->ops->sendpage;
-       /*
-        * set receive state machine into initial state
+        * Unblock xmitworker(), Login Phase will pass through.
         */
-       tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+       clear_bit(SUSPEND_BIT, &conn->suspend_rx);
+       clear_bit(SUSPEND_BIT, &conn->suspend_tx);
 
        return 0;
 }
 
-static void
-iscsi_tcp_cleanup_ctask(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+static int
+iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 {
-       struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-       struct iscsi_r2t_info *r2t;
+       struct iscsi_conn *conn = cls_conn->dd_data;
+       struct iscsi_session *session = conn->session;
+       struct sock *sk;
 
-       /* flush ctask's r2t queues */
-       while (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*)))
-               __kfifo_put(tcp_ctask->r2tpool.queue, (void*)&r2t,
-                           sizeof(void*));
+       /* FF phase warming up... */
 
-       __iscsi_ctask_cleanup(conn, ctask);
+       if (session == NULL) {
+               printk(KERN_ERR "iscsi_tcp: can't start unbound connection\n");
+               return -EPERM;
+       }
+
+       sk = conn->sock->sk;
+
+       write_lock_bh(&sk->sk_callback_lock);
+       spin_lock_bh(&session->lock);
+       conn->c_stage = ISCSI_CONN_STARTED;
+       session->state = ISCSI_STATE_LOGGED_IN;
+
+       switch(conn->stop_stage) {
+       case STOP_CONN_RECOVER:
+               /*
+                * unblock eh_abort() if it is blocked. re-try all
+                * commands after successful recovery
+                */
+               session->conn_cnt++;
+               conn->stop_stage = 0;
+               conn->tmabort_state = TMABORT_INITIAL;
+               session->age++;
+               wake_up(&conn->ehwait);
+               break;
+       case STOP_CONN_TERM:
+               session->conn_cnt++;
+               conn->stop_stage = 0;
+               break;
+       case STOP_CONN_SUSPEND:
+               conn->stop_stage = 0;
+               clear_bit(SUSPEND_BIT, &conn->suspend_rx);
+               clear_bit(SUSPEND_BIT, &conn->suspend_tx);
+               break;
+       default:
+               break;
+       }
+       spin_unlock_bh(&session->lock);
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       return 0;
 }
 
 static void
-iscsi_tcp_suspend_conn_rx(struct iscsi_conn *conn)
+iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct iscsi_conn *conn = cls_conn->dd_data;
+       struct iscsi_session *session = conn->session;
        struct sock *sk;
+       unsigned long flags;
 
-       if (!tcp_conn->sock)
-               return;
-
-       sk = tcp_conn->sock->sk;
+       BUG_ON(!conn->sock);
+       sk = conn->sock->sk;
        write_lock_bh(&sk->sk_callback_lock);
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+       set_bit(SUSPEND_BIT, &conn->suspend_rx);
        write_unlock_bh(&sk->sk_callback_lock);
+
+       mutex_lock(&conn->xmitmutex);
+
+       spin_lock_irqsave(session->host->host_lock, flags);
+       spin_lock(&session->lock);
+       conn->stop_stage = flag;
+       conn->c_stage = ISCSI_CONN_STOPPED;
+       set_bit(SUSPEND_BIT, &conn->suspend_tx);
+
+       if (flag != STOP_CONN_SUSPEND)
+               session->conn_cnt--;
+
+       if (session->conn_cnt == 0 || session->leadconn == conn)
+               session->state = ISCSI_STATE_FAILED;
+
+       spin_unlock(&session->lock);
+       spin_unlock_irqrestore(session->host->host_lock, flags);
+
+       if (flag == STOP_CONN_TERM || flag == STOP_CONN_RECOVER) {
+               struct iscsi_cmd_task *ctask;
+               struct iscsi_mgmt_task *mtask;
+
+               /*
+                * Socket must go now.
+                */
+               sock_hold(conn->sock->sk);
+               iscsi_conn_restore_callbacks(conn);
+               sock_put(conn->sock->sk);
+
+               /*
+                * flush xmit queues.
+                */
+               spin_lock_bh(&session->lock);
+               while (__kfifo_get(conn->writequeue, (void*)&ctask,
+                           sizeof(void*)) ||
+                       __kfifo_get(conn->xmitqueue, (void*)&ctask,
+                           sizeof(void*))) {
+                       struct iscsi_r2t_info *r2t;
+
+                       /*
+                        * flush ctask's r2t queues
+                        */
+                       while (__kfifo_get(ctask->r2tqueue, (void*)&r2t,
+                               sizeof(void*)))
+                               __kfifo_put(ctask->r2tpool.queue, (void*)&r2t,
+                                           sizeof(void*));
+
+                       spin_unlock_bh(&session->lock);
+                       local_bh_disable();
+                       iscsi_ctask_cleanup(conn, ctask);
+                       local_bh_enable();
+                       spin_lock_bh(&session->lock);
+               }
+               conn->ctask = NULL;
+               while (__kfifo_get(conn->immqueue, (void*)&mtask,
+                          sizeof(void*)) ||
+                       __kfifo_get(conn->mgmtqueue, (void*)&mtask,
+                          sizeof(void*))) {
+                       __kfifo_put(session->mgmtpool.queue,
+                                   (void*)&mtask, sizeof(void*));
+               }
+               conn->mtask = NULL;
+               spin_unlock_bh(&session->lock);
+
+               /*
+                * release socket only after we stopped data_xmit()
+                * activity and flushed all outstandings
+                */
+               sock_release(conn->sock);
+               conn->sock = NULL;
+
+               /*
+                * for connection level recovery we should not calculate
+                * header digest. conn->hdr_size used for optimization
+                * in hdr_extract() and will be re-negotiated at
+                * set_param() time.
+                */
+               if (flag == STOP_CONN_RECOVER) {
+                       conn->hdr_size = sizeof(struct iscsi_hdr);
+                       conn->hdrdgst_en = 0;
+                       conn->datadgst_en = 0;
+               }
+       }
+       mutex_unlock(&conn->xmitmutex);
 }
 
-static void
-iscsi_tcp_terminate_conn(struct iscsi_conn *conn)
+static int
+iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+                       char *data, uint32_t data_size)
 {
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+       struct iscsi_session *session = conn->session;
+       struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
+       struct iscsi_mgmt_task *mtask;
 
-       if (!tcp_conn->sock)
-               return;
+       spin_lock_bh(&session->lock);
+       if (session->state == ISCSI_STATE_TERMINATE) {
+               spin_unlock_bh(&session->lock);
+               return -EPERM;
+       }
+       if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
+           hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
+               /*
+                * Login and Text are sent serially, in
+                * request-followed-by-response sequence.
+                * Same mtask can be used. Same ITT must be used.
+                * Note that login_mtask is preallocated at conn_create().
+                */
+               mtask = conn->login_mtask;
+       else {
+               BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+               BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+
+               if (!__kfifo_get(session->mgmtpool.queue,
+                                (void*)&mtask, sizeof(void*))) {
+                       spin_unlock_bh(&session->lock);
+                       return -ENOSPC;
+               }
+       }
+
+       /*
+        * pre-format CmdSN and ExpStatSN for outgoing PDU.
+        */
+       if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
+               hdr->itt = mtask->itt | (conn->id << CID_SHIFT) |
+                          (session->age << AGE_SHIFT);
+               nop->cmdsn = cpu_to_be32(session->cmdsn);
+               if (conn->c_stage == ISCSI_CONN_STARTED &&
+                   !(hdr->opcode & ISCSI_OP_IMMEDIATE))
+                       session->cmdsn++;
+       } else
+               /* do not advance CmdSN */
+               nop->cmdsn = cpu_to_be32(session->cmdsn);
+
+       nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
+
+       memcpy(&mtask->hdr, hdr, sizeof(struct iscsi_hdr));
+
+       iscsi_buf_init_virt(&mtask->headbuf, (char*)&mtask->hdr,
+                                   sizeof(struct iscsi_hdr));
+
+       spin_unlock_bh(&session->lock);
+
+       if (data_size) {
+               memcpy(mtask->data, data, data_size);
+               mtask->data_count = data_size;
+       } else
+               mtask->data_count = 0;
+
+       mtask->xmstate = XMSTATE_IMM_HDR;
+
+       if (mtask->data_count) {
+               iscsi_buf_init_iov(&mtask->sendbuf, (char*)mtask->data,
+                                   mtask->data_count);
+       }
+
+       debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
+                  hdr->opcode, hdr->itt, data_size);
 
-       sock_hold(tcp_conn->sock->sk);
-       iscsi_conn_restore_callbacks(conn);
-       sock_put(tcp_conn->sock->sk);
+       /*
+        * since send_pdu() could be called at least from two contexts,
+        * we need to serialize __kfifo_put, so we don't have to take
+        * additional lock on fast data-path
+        */
+        if (hdr->opcode & ISCSI_OP_IMMEDIATE)
+               __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*));
+       else
+               __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*));
 
-       sock_release(tcp_conn->sock);
-       tcp_conn->sock = NULL;
-       conn->recv_lock = NULL;
+       scsi_queue_work(session->host, &conn->xmitwork);
+       return 0;
+}
+
+static int
+iscsi_eh_host_reset(struct scsi_cmnd *sc)
+{
+       struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
+       struct iscsi_conn *conn = ctask->conn;
+       struct iscsi_session *session = conn->session;
+
+       spin_lock_bh(&session->lock);
+       if (session->state == ISCSI_STATE_TERMINATE) {
+               debug_scsi("failing host reset: session terminated "
+                          "[CID %d age %d]", conn->id, session->age);
+               spin_unlock_bh(&session->lock);
+               return FAILED;
+       }
+       spin_unlock_bh(&session->lock);
+
+       debug_scsi("failing connection CID %d due to SCSI host reset "
+                  "[itt 0x%x age %d]", conn->id, ctask->itt,
+                  session->age);
+       iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+
+       return SUCCESS;
 }
 
-/* called with host lock */
 static void
-iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
-                   char *data, uint32_t data_size)
+iscsi_tmabort_timedout(unsigned long data)
 {
-       struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
+       struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)data;
+       struct iscsi_conn *conn = ctask->conn;
+       struct iscsi_session *session = conn->session;
 
-       iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
-                          sizeof(struct iscsi_hdr));
-       tcp_mtask->xmstate = XMSTATE_IMM_HDR;
+       spin_lock(&session->lock);
+       if (conn->tmabort_state == TMABORT_INITIAL) {
+               __kfifo_put(session->mgmtpool.queue,
+                               (void*)&ctask->mtask, sizeof(void*));
+               conn->tmabort_state = TMABORT_TIMEDOUT;
+               debug_scsi("tmabort timedout [sc %lx itt 0x%x]\n",
+                       (long)ctask->sc, ctask->itt);
+               /* unblock eh_abort() */
+               wake_up(&conn->ehwait);
+       }
+       spin_unlock(&session->lock);
+}
 
-       if (mtask->data_count)
-               iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data,
-                                   mtask->data_count);
+static int
+iscsi_eh_abort(struct scsi_cmnd *sc)
+{
+       int rc;
+       struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
+       struct iscsi_conn *conn = ctask->conn;
+       struct iscsi_session *session = conn->session;
+
+       conn->eh_abort_cnt++;
+       debug_scsi("aborting [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+
+       /*
+        * two cases for ERL=0 here:
+        *
+        * 1) connection-level failure;
+        * 2) recovery due protocol error;
+        */
+       mutex_lock(&conn->xmitmutex);
+       spin_lock_bh(&session->lock);
+       if (session->state != ISCSI_STATE_LOGGED_IN) {
+               if (session->state == ISCSI_STATE_TERMINATE) {
+                       spin_unlock_bh(&session->lock);
+                       mutex_unlock(&conn->xmitmutex);
+                       goto failed;
+               }
+               spin_unlock_bh(&session->lock);
+       } else {
+               struct iscsi_tm *hdr = &conn->tmhdr;
+
+               /*
+                * Still LOGGED_IN...
+                */
+
+               if (!ctask->sc || sc->SCp.phase != session->age) {
+                       /*
+                        * 1) ctask completed before time out. But session
+                        *    is still ok => Happy Retry.
+                        * 2) session was re-open during time out of ctask.
+                        */
+                       spin_unlock_bh(&session->lock);
+                       mutex_unlock(&conn->xmitmutex);
+                       goto success;
+               }
+               conn->tmabort_state = TMABORT_INITIAL;
+               spin_unlock_bh(&session->lock);
+
+               /*
+                * ctask timed out but session is OK
+                * ERL=0 requires task mgmt abort to be issued on each
+                * failed command. requests must be serialized.
+                */
+               memset(hdr, 0, sizeof(struct iscsi_tm));
+               hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
+               hdr->flags = ISCSI_TM_FUNC_ABORT_TASK;
+               hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+               memcpy(hdr->lun, ctask->hdr.lun, sizeof(hdr->lun));
+               hdr->rtt = ctask->hdr.itt;
+               hdr->refcmdsn = ctask->hdr.cmdsn;
+
+               rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr,
+                                            NULL, 0);
+               if (rc) {
+                       iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+                       debug_scsi("abort sent failure [itt 0x%x]", ctask->itt);
+               } else {
+                       struct iscsi_r2t_info *r2t;
+
+                       /*
+                        * TMF abort vs. TMF response race logic
+                        */
+                       spin_lock_bh(&session->lock);
+                       ctask->mtask = (struct iscsi_mgmt_task *)
+                               session->mgmt_cmds[(hdr->itt & ITT_MASK) -
+                                                       ISCSI_MGMT_ITT_OFFSET];
+                       /*
+                        * have to flush r2tqueue to avoid r2t leaks
+                        */
+                       while (__kfifo_get(ctask->r2tqueue, (void*)&r2t,
+                               sizeof(void*))) {
+                               __kfifo_put(ctask->r2tpool.queue, (void*)&r2t,
+                                       sizeof(void*));
+                       }
+                       if (conn->tmabort_state == TMABORT_INITIAL) {
+                               conn->tmfcmd_pdus_cnt++;
+                               conn->tmabort_timer.expires = 3*HZ + jiffies;
+                               conn->tmabort_timer.function =
+                                               iscsi_tmabort_timedout;
+                               conn->tmabort_timer.data = (unsigned long)ctask;
+                               add_timer(&conn->tmabort_timer);
+                               debug_scsi("abort sent [itt 0x%x]", ctask->itt);
+                       } else {
+                               if (!ctask->sc ||
+                                   conn->tmabort_state == TMABORT_SUCCESS) {
+                                       conn->tmabort_state = TMABORT_INITIAL;
+                                       spin_unlock_bh(&session->lock);
+                                       mutex_unlock(&conn->xmitmutex);
+                                       goto success;
+                               }
+                               conn->tmabort_state = TMABORT_INITIAL;
+                               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+                       }
+                       spin_unlock_bh(&session->lock);
+               }
+       }
+       mutex_unlock(&conn->xmitmutex);
+
+
+       /*
+        * block eh thread until:
+        *
+        * 1) abort response;
+        * 2) abort timeout;
+        * 3) session re-opened;
+        * 4) session terminated;
+        */
+       for (;;) {
+               int p_state = session->state;
+
+               rc = wait_event_interruptible(conn->ehwait,
+                       (p_state == ISCSI_STATE_LOGGED_IN ?
+                        (session->state == ISCSI_STATE_TERMINATE ||
+                         conn->tmabort_state != TMABORT_INITIAL) :
+                        (session->state == ISCSI_STATE_TERMINATE ||
+                         session->state == ISCSI_STATE_LOGGED_IN)));
+               if (rc) {
+                       /* shutdown.. */
+                       session->state = ISCSI_STATE_TERMINATE;
+                       goto failed;
+               }
+
+               if (signal_pending(current))
+                       flush_signals(current);
+
+               if (session->state == ISCSI_STATE_TERMINATE)
+                       goto failed;
+
+               spin_lock_bh(&session->lock);
+               if (sc->SCp.phase == session->age &&
+                  (conn->tmabort_state == TMABORT_TIMEDOUT ||
+                   conn->tmabort_state == TMABORT_FAILED)) {
+                       conn->tmabort_state = TMABORT_INITIAL;
+                       if (!ctask->sc) {
+                               /*
+                                * ctask completed before tmf abort response or
+                                * time out.
+                                * But session is still ok => Happy Retry.
+                                */
+                               spin_unlock_bh(&session->lock);
+                               break;
+                       }
+                       spin_unlock_bh(&session->lock);
+                       iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+                       continue;
+               }
+               spin_unlock_bh(&session->lock);
+               break;
+       }
+
+success:
+       debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+       rc = SUCCESS;
+       goto exit;
+
+failed:
+       debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+       rc = FAILED;
+
+exit:
+       del_timer_sync(&conn->tmabort_timer);
+
+       mutex_lock(&conn->xmitmutex);
+       if (conn->sock) {
+               struct sock *sk = conn->sock->sk;
+
+               write_lock_bh(&sk->sk_callback_lock);
+               iscsi_ctask_cleanup(conn, ctask);
+               write_unlock_bh(&sk->sk_callback_lock);
+       }
+       mutex_unlock(&conn->xmitmutex);
+       return rc;
 }
 
 static int
@@ -2074,7 +3174,6 @@ iscsi_r2tpool_alloc(struct iscsi_session *session)
         */
        for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
                struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
-               struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
 
                /*
                 * pre-allocated x4 as much r2ts to handle race when
@@ -2083,32 +3182,42 @@ iscsi_r2tpool_alloc(struct iscsi_session *session)
                 */
 
                /* R2T pool */
-               if (iscsi_pool_init(&tcp_ctask->r2tpool, session->max_r2t * 4,
-                                   (void***)&tcp_ctask->r2ts,
-                                   sizeof(struct iscsi_r2t_info))) {
+               if (iscsi_pool_init(&ctask->r2tpool, session->max_r2t * 4,
+                       (void***)&ctask->r2ts, sizeof(struct iscsi_r2t_info))) {
                        goto r2t_alloc_fail;
                }
 
                /* R2T xmit queue */
-               tcp_ctask->r2tqueue = kfifo_alloc(
+               ctask->r2tqueue = kfifo_alloc(
                      session->max_r2t * 4 * sizeof(void*), GFP_KERNEL, NULL);
-               if (tcp_ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
-                       iscsi_pool_free(&tcp_ctask->r2tpool,
-                                       (void**)tcp_ctask->r2ts);
+               if (ctask->r2tqueue == ERR_PTR(-ENOMEM)) {
+                       iscsi_pool_free(&ctask->r2tpool, (void**)ctask->r2ts);
                        goto r2t_alloc_fail;
                }
+
+               /*
+                * number of
+                * Data-Out PDU's within R2T-sequence can be quite big;
+                * using mempool
+                */
+               ctask->datapool = mempool_create_slab_pool(ISCSI_DTASK_DEFAULT_MAX,
+                                                          taskcache);
+               if (ctask->datapool == NULL) {
+                       kfifo_free(ctask->r2tqueue);
+                       iscsi_pool_free(&ctask->r2tpool, (void**)ctask->r2ts);
+                       goto r2t_alloc_fail;
+               }
+               INIT_LIST_HEAD(&ctask->dataqueue);
        }
 
        return 0;
 
 r2t_alloc_fail:
        for (i = 0; i < cmd_i; i++) {
-               struct iscsi_cmd_task *ctask = session->cmds[i];
-               struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-
-               kfifo_free(tcp_ctask->r2tqueue);
-               iscsi_pool_free(&tcp_ctask->r2tpool,
-                               (void**)tcp_ctask->r2ts);
+               mempool_destroy(session->cmds[i]->datapool);
+               kfifo_free(session->cmds[i]->r2tqueue);
+               iscsi_pool_free(&session->cmds[i]->r2tpool,
+                               (void**)session->cmds[i]->r2ts);
        }
        return -ENOMEM;
 }
@@ -2119,13 +3228,127 @@ iscsi_r2tpool_free(struct iscsi_session *session)
        int i;
 
        for (i = 0; i < session->cmds_max; i++) {
-               struct iscsi_cmd_task *ctask = session->cmds[i];
-               struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+               mempool_destroy(session->cmds[i]->datapool);
+               kfifo_free(session->cmds[i]->r2tqueue);
+               iscsi_pool_free(&session->cmds[i]->r2tpool,
+                               (void**)session->cmds[i]->r2ts);
+       }
+}
+
+static struct scsi_host_template iscsi_sht = {
+       .name                   = "iSCSI Initiator over TCP/IP, v."
+                                 ISCSI_VERSION_STR,
+       .queuecommand           = iscsi_queuecommand,
+       .change_queue_depth     = iscsi_change_queue_depth,
+       .can_queue              = ISCSI_XMIT_CMDS_MAX - 1,
+       .sg_tablesize           = ISCSI_SG_TABLESIZE,
+       .cmd_per_lun            = ISCSI_DEF_CMD_PER_LUN,
+       .eh_abort_handler       = iscsi_eh_abort,
+       .eh_host_reset_handler  = iscsi_eh_host_reset,
+       .use_clustering         = DISABLE_CLUSTERING,
+       .proc_name              = "iscsi_tcp",
+       .this_id                = -1,
+};
+
+static struct iscsi_transport iscsi_tcp_transport;
+
+static struct iscsi_cls_session *
+iscsi_session_create(struct scsi_transport_template *scsit,
+                    uint32_t initial_cmdsn, uint32_t *sid)
+{
+       struct Scsi_Host *shost;
+       struct iscsi_session *session;
+       int cmd_i;
+
+       shost = iscsi_transport_create_session(scsit, &iscsi_tcp_transport);
+       if (!shost)
+               return NULL; 
+
+       session = iscsi_hostdata(shost->hostdata);
+       memset(session, 0, sizeof(struct iscsi_session));
+       session->host = shost;
+       session->state = ISCSI_STATE_FREE;
+       session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX;
+       session->cmds_max = ISCSI_XMIT_CMDS_MAX;
+       session->cmdsn = initial_cmdsn;
+       session->exp_cmdsn = initial_cmdsn + 1;
+       session->max_cmdsn = initial_cmdsn + 1;
+       session->max_r2t = 1;
+       *sid = shost->host_no;
+
+       /* initialize SCSI PDU commands pool */
+       if (iscsi_pool_init(&session->cmdpool, session->cmds_max,
+               (void***)&session->cmds, sizeof(struct iscsi_cmd_task)))
+               goto cmdpool_alloc_fail;
+
+       /* pre-format cmds pool with ITT */
+       for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++)
+               session->cmds[cmd_i]->itt = cmd_i;
+
+       spin_lock_init(&session->lock);
+       INIT_LIST_HEAD(&session->connections);
+
+       /* initialize immediate command pool */
+       if (iscsi_pool_init(&session->mgmtpool, session->mgmtpool_max,
+               (void***)&session->mgmt_cmds, sizeof(struct iscsi_mgmt_task)))
+               goto mgmtpool_alloc_fail;
+
+
+       /* pre-format immediate cmds pool with ITT */
+       for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
+               session->mgmt_cmds[cmd_i]->itt = ISCSI_MGMT_ITT_OFFSET + cmd_i;
+               session->mgmt_cmds[cmd_i]->data = kmalloc(
+                       DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH, GFP_KERNEL);
+               if (!session->mgmt_cmds[cmd_i]->data) {
+                       int j;
+
+                       for (j = 0; j < cmd_i; j++)
+                               kfree(session->mgmt_cmds[j]->data);
+                       goto immdata_alloc_fail;
+               }
+       }
+
+       if (iscsi_r2tpool_alloc(session))
+               goto r2tpool_alloc_fail;
 
-               kfifo_free(tcp_ctask->r2tqueue);
-               iscsi_pool_free(&tcp_ctask->r2tpool,
-                               (void**)tcp_ctask->r2ts);
+       return hostdata_session(shost->hostdata);
+
+r2tpool_alloc_fail:
+       for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++)
+               kfree(session->mgmt_cmds[cmd_i]->data);
+immdata_alloc_fail:
+       iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+mgmtpool_alloc_fail:
+       iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
+cmdpool_alloc_fail:
+       iscsi_transport_destroy_session(shost);
+       return NULL;
+}
+
+static void
+iscsi_session_destroy(struct iscsi_cls_session *cls_session)
+{
+       struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
+       struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
+       int cmd_i;
+       struct iscsi_data_task *dtask, *n;
+
+       for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
+               struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
+               list_for_each_entry_safe(dtask, n, &ctask->dataqueue, item) {
+                       list_del(&dtask->item);
+                       mempool_free(dtask, ctask->datapool);
+               }
        }
+
+       for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++)
+               kfree(session->mgmt_cmds[cmd_i]->data);
+
+       iscsi_r2tpool_free(session);
+       iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+       iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
+
+       iscsi_transport_destroy_session(shost);
 }
 
 static int
@@ -2134,14 +3357,23 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
        struct iscsi_session *session = conn->session;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+
+       spin_lock_bh(&session->lock);
+       if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
+           conn->stop_stage != STOP_CONN_RECOVER) {
+               printk(KERN_ERR "iscsi_tcp: can not change parameter [%d]\n",
+                      param);
+               spin_unlock_bh(&session->lock);
+               return 0;
+       }
+       spin_unlock_bh(&session->lock);
 
        switch(param) {
        case ISCSI_PARAM_MAX_RECV_DLENGTH: {
-               char *saveptr = tcp_conn->data;
+               char *saveptr = conn->data;
                gfp_t flags = GFP_KERNEL;
 
-               if (tcp_conn->data_size >= value) {
+               if (conn->data_size >= value) {
                        conn->max_recv_dlength = value;
                        break;
                }
@@ -2152,21 +3384,21 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
                spin_unlock_bh(&session->lock);
 
                if (value <= PAGE_SIZE)
-                       tcp_conn->data = kmalloc(value, flags);
+                       conn->data = kmalloc(value, flags);
                else
-                       tcp_conn->data = (void*)__get_free_pages(flags,
+                       conn->data = (void*)__get_free_pages(flags,
                                                             get_order(value));
-               if (tcp_conn->data == NULL) {
-                       tcp_conn->data = saveptr;
+               if (conn->data == NULL) {
+                       conn->data = saveptr;
                        return -ENOMEM;
                }
-               if (tcp_conn->data_size <= PAGE_SIZE)
+               if (conn->data_size <= PAGE_SIZE)
                        kfree(saveptr);
                else
                        free_pages((unsigned long)saveptr,
-                                  get_order(tcp_conn->data_size));
+                                  get_order(conn->data_size));
                conn->max_recv_dlength = value;
-               tcp_conn->data_size = value;
+               conn->data_size = value;
                }
                break;
        case ISCSI_PARAM_MAX_XMIT_DLENGTH:
@@ -2174,51 +3406,49 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
                break;
        case ISCSI_PARAM_HDRDGST_EN:
                conn->hdrdgst_en = value;
-               tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
+               conn->hdr_size = sizeof(struct iscsi_hdr);
                if (conn->hdrdgst_en) {
-                       tcp_conn->hdr_size += sizeof(__u32);
-                       if (!tcp_conn->tx_tfm)
-                               tcp_conn->tx_tfm = crypto_alloc_tfm("crc32c",
-                                                                   0);
-                       if (!tcp_conn->tx_tfm)
+                       conn->hdr_size += sizeof(__u32);
+                       if (!conn->tx_tfm)
+                               conn->tx_tfm = crypto_alloc_tfm("crc32c", 0);
+                       if (!conn->tx_tfm)
                                return -ENOMEM;
-                       if (!tcp_conn->rx_tfm)
-                               tcp_conn->rx_tfm = crypto_alloc_tfm("crc32c",
-                                                                   0);
-                       if (!tcp_conn->rx_tfm) {
-                               crypto_free_tfm(tcp_conn->tx_tfm);
+                       if (!conn->rx_tfm)
+                               conn->rx_tfm = crypto_alloc_tfm("crc32c", 0);
+                       if (!conn->rx_tfm) {
+                               crypto_free_tfm(conn->tx_tfm);
                                return -ENOMEM;
                        }
                } else {
-                       if (tcp_conn->tx_tfm)
-                               crypto_free_tfm(tcp_conn->tx_tfm);
-                       if (tcp_conn->rx_tfm)
-                               crypto_free_tfm(tcp_conn->rx_tfm);
+                       if (conn->tx_tfm)
+                               crypto_free_tfm(conn->tx_tfm);
+                       if (conn->rx_tfm)
+                               crypto_free_tfm(conn->rx_tfm);
                }
                break;
        case ISCSI_PARAM_DATADGST_EN:
                conn->datadgst_en = value;
                if (conn->datadgst_en) {
-                       if (!tcp_conn->data_tx_tfm)
-                               tcp_conn->data_tx_tfm =
+                       if (!conn->data_tx_tfm)
+                               conn->data_tx_tfm =
                                    crypto_alloc_tfm("crc32c", 0);
-                       if (!tcp_conn->data_tx_tfm)
+                       if (!conn->data_tx_tfm)
                                return -ENOMEM;
-                       if (!tcp_conn->data_rx_tfm)
-                               tcp_conn->data_rx_tfm =
+                       if (!conn->data_rx_tfm)
+                               conn->data_rx_tfm =
                                    crypto_alloc_tfm("crc32c", 0);
-                       if (!tcp_conn->data_rx_tfm) {
-                               crypto_free_tfm(tcp_conn->data_tx_tfm);
+                       if (!conn->data_rx_tfm) {
+                               crypto_free_tfm(conn->data_tx_tfm);
                                return -ENOMEM;
                        }
                } else {
-                       if (tcp_conn->data_tx_tfm)
-                               crypto_free_tfm(tcp_conn->data_tx_tfm);
-                       if (tcp_conn->data_rx_tfm)
-                               crypto_free_tfm(tcp_conn->data_rx_tfm);
+                       if (conn->data_tx_tfm)
+                               crypto_free_tfm(conn->data_tx_tfm);
+                       if (conn->data_rx_tfm)
+                               crypto_free_tfm(conn->data_rx_tfm);
                }
-               tcp_conn->sendpage = conn->datadgst_en ?
-                       sock_no_sendpage : tcp_conn->sock->ops->sendpage;
+               conn->sendpage = conn->datadgst_en ?
+                       sock_no_sendpage : conn->sock->ops->sendpage;
                break;
        case ISCSI_PARAM_INITIAL_R2T_EN:
                session->initial_r2t_en = value;
@@ -2259,9 +3489,6 @@ iscsi_conn_set_param(struct iscsi_cls_conn *cls_conn, enum iscsi_param param,
                BUG_ON(value);
                session->ofmarker_en = value;
                break;
-       case ISCSI_PARAM_EXP_STATSN:
-               conn->exp_statsn = value;
-               break;
        default:
                break;
        }
@@ -2308,7 +3535,7 @@ iscsi_session_get_param(struct iscsi_cls_session *cls_session,
                *value = session->ofmarker_en;
                break;
        default:
-               return -EINVAL;
+               return ISCSI_ERR_PARAM_NOT_FOUND;
        }
 
        return 0;
@@ -2319,8 +3546,6 @@ iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
                     enum iscsi_param param, uint32_t *value)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct inet_sock *inet;
 
        switch(param) {
        case ISCSI_PARAM_MAX_RECV_DLENGTH:
@@ -2335,70 +3560,17 @@ iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
        case ISCSI_PARAM_DATADGST_EN:
                *value = conn->datadgst_en;
                break;
-       case ISCSI_PARAM_CONN_PORT:
-               mutex_lock(&conn->xmitmutex);
-               if (!tcp_conn->sock) {
-                       mutex_unlock(&conn->xmitmutex);
-                       return -EINVAL;
-               }
-
-               inet = inet_sk(tcp_conn->sock->sk);
-               *value = be16_to_cpu(inet->dport);
-               mutex_unlock(&conn->xmitmutex);
-       case ISCSI_PARAM_EXP_STATSN:
-               *value = conn->exp_statsn;
-               break;
        default:
-               return -EINVAL;
+               return ISCSI_ERR_PARAM_NOT_FOUND;
        }
 
        return 0;
 }
 
-static int
-iscsi_conn_get_str_param(struct iscsi_cls_conn *cls_conn,
-                        enum iscsi_param param, char *buf)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct sock *sk;
-       struct inet_sock *inet;
-       struct ipv6_pinfo *np;
-       int len = 0;
-
-       switch (param) {
-       case ISCSI_PARAM_CONN_ADDRESS:
-               mutex_lock(&conn->xmitmutex);
-               if (!tcp_conn->sock) {
-                       mutex_unlock(&conn->xmitmutex);
-                       return -EINVAL;
-               }
-
-               sk = tcp_conn->sock->sk;
-               if (sk->sk_family == PF_INET) {
-                       inet = inet_sk(sk);
-                       len = sprintf(buf, "%u.%u.%u.%u\n",
-                                     NIPQUAD(inet->daddr));
-               } else {
-                       np = inet6_sk(sk);
-                       len = sprintf(buf,
-                               "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-                               NIP6(np->daddr));
-               }
-               mutex_unlock(&conn->xmitmutex);
-               break;
-       default:
-               return -EINVAL;
-       }
-
-       return len;
-}
-
 static void
 iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
 {
        struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
 
        stats->txdata_octets = conn->txdata_octets;
        stats->rxdata_octets = conn->rxdata_octets;
@@ -2411,141 +3583,68 @@ iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
        stats->tmfrsp_pdus = conn->tmfrsp_pdus_cnt;
        stats->custom_length = 3;
        strcpy(stats->custom[0].desc, "tx_sendpage_failures");
-       stats->custom[0].value = tcp_conn->sendpage_failures_cnt;
+       stats->custom[0].value = conn->sendpage_failures_cnt;
        strcpy(stats->custom[1].desc, "rx_discontiguous_hdr");
-       stats->custom[1].value = tcp_conn->discontiguous_hdr_cnt;
+       stats->custom[1].value = conn->discontiguous_hdr_cnt;
        strcpy(stats->custom[2].desc, "eh_abort_cnt");
        stats->custom[2].value = conn->eh_abort_cnt;
 }
 
-static struct iscsi_cls_session *
-iscsi_tcp_session_create(struct iscsi_transport *iscsit,
-                        struct scsi_transport_template *scsit,
-                        uint32_t initial_cmdsn, uint32_t *hostno)
+static int
+iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
+                   char *data, uint32_t data_size)
 {
-       struct iscsi_cls_session *cls_session;
-       struct iscsi_session *session;
-       uint32_t hn;
-       int cmd_i;
-
-       cls_session = iscsi_session_setup(iscsit, scsit,
-                                        sizeof(struct iscsi_tcp_cmd_task),
-                                        sizeof(struct iscsi_tcp_mgmt_task),
-                                        initial_cmdsn, &hn);
-       if (!cls_session)
-               return NULL;
-       *hostno = hn;
-
-       session = class_to_transport_session(cls_session);
-       for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
-               struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
-               struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
-
-               ctask->hdr = &tcp_ctask->hdr;
-       }
-
-       for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
-               struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
-               struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
-
-               mtask->hdr = &tcp_mtask->hdr;
-       }
-
-       if (iscsi_r2tpool_alloc(class_to_transport_session(cls_session)))
-               goto r2tpool_alloc_fail;
-
-       return cls_session;
+       struct iscsi_conn *conn = cls_conn->dd_data;
+       int rc;
 
-r2tpool_alloc_fail:
-       iscsi_session_teardown(cls_session);
-       return NULL;
-}
+       mutex_lock(&conn->xmitmutex);
+       rc = iscsi_conn_send_generic(conn, hdr, data, data_size);
+       mutex_unlock(&conn->xmitmutex);
 
-static void iscsi_tcp_session_destroy(struct iscsi_cls_session *cls_session)
-{
-       iscsi_r2tpool_free(class_to_transport_session(cls_session));
-       iscsi_session_teardown(cls_session);
+       return rc;
 }
 
-static struct scsi_host_template iscsi_sht = {
-       .name                   = "iSCSI Initiator over TCP/IP, v"
-                                 ISCSI_TCP_VERSION,
-       .queuecommand           = iscsi_queuecommand,
-       .change_queue_depth     = iscsi_change_queue_depth,
-       .can_queue              = ISCSI_XMIT_CMDS_MAX - 1,
-       .sg_tablesize           = ISCSI_SG_TABLESIZE,
-       .cmd_per_lun            = ISCSI_DEF_CMD_PER_LUN,
-       .eh_abort_handler       = iscsi_eh_abort,
-       .eh_host_reset_handler  = iscsi_eh_host_reset,
-       .use_clustering         = DISABLE_CLUSTERING,
-       .proc_name              = "iscsi_tcp",
-       .this_id                = -1,
-};
-
 static struct iscsi_transport iscsi_tcp_transport = {
        .owner                  = THIS_MODULE,
        .name                   = "tcp",
        .caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T | CAP_HDRDGST
                                  | CAP_DATADGST,
-       .param_mask             = ISCSI_MAX_RECV_DLENGTH |
-                                 ISCSI_MAX_XMIT_DLENGTH |
-                                 ISCSI_HDRDGST_EN |
-                                 ISCSI_DATADGST_EN |
-                                 ISCSI_INITIAL_R2T_EN |
-                                 ISCSI_MAX_R2T |
-                                 ISCSI_IMM_DATA_EN |
-                                 ISCSI_FIRST_BURST |
-                                 ISCSI_MAX_BURST |
-                                 ISCSI_PDU_INORDER_EN |
-                                 ISCSI_DATASEQ_INORDER_EN |
-                                 ISCSI_ERL |
-                                 ISCSI_CONN_PORT |
-                                 ISCSI_CONN_ADDRESS |
-                                 ISCSI_EXP_STATSN,
        .host_template          = &iscsi_sht,
+       .hostdata_size          = sizeof(struct iscsi_session),
        .conndata_size          = sizeof(struct iscsi_conn),
        .max_conn               = 1,
        .max_cmd_len            = ISCSI_TCP_MAX_CMD_LEN,
-       /* session management */
-       .create_session         = iscsi_tcp_session_create,
-       .destroy_session        = iscsi_tcp_session_destroy,
-       /* connection management */
-       .create_conn            = iscsi_tcp_conn_create,
-       .bind_conn              = iscsi_tcp_conn_bind,
-       .destroy_conn           = iscsi_tcp_conn_destroy,
+       .create_session         = iscsi_session_create,
+       .destroy_session        = iscsi_session_destroy,
+       .create_conn            = iscsi_conn_create,
+       .bind_conn              = iscsi_conn_bind,
+       .destroy_conn           = iscsi_conn_destroy,
        .set_param              = iscsi_conn_set_param,
        .get_conn_param         = iscsi_conn_get_param,
-       .get_conn_str_param     = iscsi_conn_get_str_param,
        .get_session_param      = iscsi_session_get_param,
        .start_conn             = iscsi_conn_start,
        .stop_conn              = iscsi_conn_stop,
-       /* these are called as part of conn recovery */
-       .suspend_conn_recv      = iscsi_tcp_suspend_conn_rx,
-       .terminate_conn         = iscsi_tcp_terminate_conn,
-       /* IO */
        .send_pdu               = iscsi_conn_send_pdu,
        .get_stats              = iscsi_conn_get_stats,
-       .init_cmd_task          = iscsi_tcp_cmd_init,
-       .init_mgmt_task         = iscsi_tcp_mgmt_init,
-       .xmit_cmd_task          = iscsi_tcp_ctask_xmit,
-       .xmit_mgmt_task         = iscsi_tcp_mtask_xmit,
-       .cleanup_cmd_task       = iscsi_tcp_cleanup_ctask,
-       /* recovery */
-       .session_recovery_timedout = iscsi_session_recovery_timedout,
 };
 
 static int __init
 iscsi_tcp_init(void)
 {
        if (iscsi_max_lun < 1) {
-               printk(KERN_ERR "iscsi_tcp: Invalid max_lun value of %u\n",
-                      iscsi_max_lun);
+               printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun);
                return -EINVAL;
        }
        iscsi_tcp_transport.max_lun = iscsi_max_lun;
 
+       taskcache = kmem_cache_create("iscsi_taskcache",
+                       sizeof(struct iscsi_data_task), 0,
+                       SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (!taskcache)
+               return -ENOMEM;
+
        if (!iscsi_register_transport(&iscsi_tcp_transport))
-               return -ENODEV;
+               kmem_cache_destroy(taskcache);
 
        return 0;
 }
@@ -2554,6 +3653,7 @@ static void __exit
 iscsi_tcp_exit(void)
 {
        iscsi_unregister_transport(&iscsi_tcp_transport);
+       kmem_cache_destroy(taskcache);
 }
 
 module_init(iscsi_tcp_init);
index 8083028..ba26741 100644 (file)
@@ -2,8 +2,7 @@
  * iSCSI Initiator TCP Transport
  * Copyright (C) 2004 Dmitry Yusupov
  * Copyright (C) 2004 Alex Aizman
- * Copyright (C) 2005 - 2006 Mike Christie
- * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
+ * Copyright (C) 2005 Mike Christie
  * maintained by open-iscsi@googlegroups.com
  *
  * This program is free software; you can redistribute it and/or modify
 #ifndef ISCSI_TCP_H
 #define ISCSI_TCP_H
 
-#include <scsi/libiscsi.h>
+/* Session's states */
+#define ISCSI_STATE_FREE               1
+#define ISCSI_STATE_LOGGED_IN          2
+#define ISCSI_STATE_FAILED             3
+#define ISCSI_STATE_TERMINATE          4
+
+/* Connection's states */
+#define ISCSI_CONN_INITIAL_STAGE       0
+#define ISCSI_CONN_STARTED             1
+#define ISCSI_CONN_STOPPED             2
+#define ISCSI_CONN_CLEANUP_WAIT                3
+
+/* Connection suspend "bit" */
+#define SUSPEND_BIT                    1
 
 /* Socket's Receive state machine */
 #define IN_PROGRESS_WAIT_HEADER                0x0
 #define IN_PROGRESS_DATA_RECV          0x2
 #define IN_PROGRESS_DDIGEST_RECV       0x3
 
+/* Task Mgmt states */
+#define        TMABORT_INITIAL                 0x0
+#define        TMABORT_SUCCESS                 0x1
+#define        TMABORT_FAILED                  0x2
+#define        TMABORT_TIMEDOUT                0x3
+
 /* xmit state machine */
 #define        XMSTATE_IDLE                    0x0
 #define        XMSTATE_R_HDR                   0x1
 #define        XMSTATE_W_PAD                   0x200
 #define XMSTATE_DATA_DIGEST            0x400
 
+#define ISCSI_CONN_MAX                 1
 #define ISCSI_CONN_RCVBUF_MIN          262144
 #define ISCSI_CONN_SNDBUF_MIN          262144
 #define ISCSI_PAD_LEN                  4
 #define ISCSI_R2T_MAX                  16
+#define ISCSI_XMIT_CMDS_MAX            128     /* must be power of 2 */
+#define ISCSI_MGMT_CMDS_MAX            32      /* must be power of 2 */
+#define ISCSI_MGMT_ITT_OFFSET          0xa00
 #define ISCSI_SG_TABLESIZE             SG_ALL
+#define ISCSI_DEF_CMD_PER_LUN          32
+#define ISCSI_MAX_CMD_PER_LUN          128
 #define ISCSI_TCP_MAX_CMD_LEN          16
 
-struct socket;
+#define ITT_MASK                       (0xfff)
+#define CID_SHIFT                      12
+#define CID_MASK                       (0xffff<<CID_SHIFT)
+#define AGE_SHIFT                      28
+#define AGE_MASK                       (0xf<<AGE_SHIFT)
+
+struct iscsi_queue {
+       struct kfifo            *queue;         /* FIFO Queue */
+       void                    **pool;         /* Pool of elements */
+       int                     max;            /* Max number of elements */
+};
+
+struct iscsi_session;
+struct iscsi_cmd_task;
+struct iscsi_mgmt_task;
 
 /* Socket connection recieve helper */
 struct iscsi_tcp_recv {
@@ -66,32 +104,48 @@ struct iscsi_tcp_recv {
        struct iscsi_cmd_task   *ctask;         /* current cmd in progress */
 
        /* copied and flipped values */
+       int                     opcode;
+       int                     flags;
+       int                     cmd_status;
+       int                     ahslen;
        int                     datalen;
+       uint32_t                itt;
        int                     datadgst;
-       char                    zero_copy_hdr;
 };
 
-struct iscsi_tcp_conn {
-       struct iscsi_conn       *iscsi_conn;
-       struct socket           *sock;
+struct iscsi_cls_conn;
+
+struct iscsi_conn {
+       struct iscsi_cls_conn   *cls_conn;      /* ptr to class connection */
        struct iscsi_hdr        hdr;            /* header placeholder */
        char                    hdrext[4*sizeof(__u16) +
                                    sizeof(__u32)];
        int                     data_copied;
        char                    *data;          /* data placeholder */
+       struct socket           *sock;          /* TCP socket */
        int                     data_size;      /* actual recv_dlength */
        int                     stop_stage;     /* conn_stop() flag: *
                                                 * stop to recover,  *
                                                 * stop to terminate */
        /* iSCSI connection-wide sequencing */
+       uint32_t                exp_statsn;
        int                     hdr_size;       /* PDU header size */
+       unsigned long           suspend_rx;     /* suspend Rx */
 
        struct crypto_tfm       *rx_tfm;        /* CRC32C (Rx) */
        struct crypto_tfm       *data_rx_tfm;   /* CRC32C (Rx) for data */
 
        /* control data */
+       int                     senselen;       /* scsi sense length */
+       int                     id;             /* CID */
        struct iscsi_tcp_recv   in;             /* TCP receive context */
+       struct iscsi_session    *session;       /* parent session */
+       struct list_head        item;           /* maintains list of conns */
        int                     in_progress;    /* connection state machine */
+       int                     c_stage;        /* connection state */
+       struct iscsi_mgmt_task  *login_mtask;   /* mtask used for login/text */
+       struct iscsi_mgmt_task  *mtask;         /* xmit mtask in progress */
+       struct iscsi_cmd_task   *ctask;         /* xmit ctask in progress */
 
        /* old values for socket callbacks */
        void                    (*old_data_ready)(struct sock *, int);
@@ -101,14 +155,93 @@ struct iscsi_tcp_conn {
        /* xmit */
        struct crypto_tfm       *tx_tfm;        /* CRC32C (Tx) */
        struct crypto_tfm       *data_tx_tfm;   /* CRC32C (Tx) for data */
+       struct kfifo            *writequeue;    /* write cmds for Data-Outs */
+       struct kfifo            *immqueue;      /* immediate xmit queue */
+       struct kfifo            *mgmtqueue;     /* mgmt (control) xmit queue */
+       struct kfifo            *xmitqueue;     /* data-path cmd queue */
+       struct work_struct      xmitwork;       /* per-conn. xmit workqueue */
+       struct mutex            xmitmutex;      /* serializes connection xmit,
+                                                * access to kfifos:      *
+                                                * xmitqueue, writequeue, *
+                                                * immqueue, mgmtqueue    */
+       unsigned long           suspend_tx;     /* suspend Tx */
+
+       /* abort */
+       wait_queue_head_t       ehwait;         /* used in eh_abort()     */
+       struct iscsi_tm         tmhdr;
+       struct timer_list       tmabort_timer;  /* abort timer */
+       int                     tmabort_state;  /* see TMABORT_INITIAL, etc.*/
+
+       /* negotiated params */
+       int                     max_recv_dlength;
+       int                     max_xmit_dlength;
+       int                     hdrdgst_en;
+       int                     datadgst_en;
 
-       /* MIB custom statistics */
+       /* MIB-statistics */
+       uint64_t                txdata_octets;
+       uint64_t                rxdata_octets;
+       uint32_t                scsicmd_pdus_cnt;
+       uint32_t                dataout_pdus_cnt;
+       uint32_t                scsirsp_pdus_cnt;
+       uint32_t                datain_pdus_cnt;
+       uint32_t                r2t_pdus_cnt;
+       uint32_t                tmfcmd_pdus_cnt;
+       int32_t                 tmfrsp_pdus_cnt;
+
+       /* custom statistics */
        uint32_t                sendpage_failures_cnt;
        uint32_t                discontiguous_hdr_cnt;
+       uint32_t                eh_abort_cnt;
 
        ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int);
 };
 
+struct iscsi_session {
+       /* iSCSI session-wide sequencing */
+       uint32_t                cmdsn;
+       uint32_t                exp_cmdsn;
+       uint32_t                max_cmdsn;
+
+       /* configuration */
+       int                     initial_r2t_en;
+       int                     max_r2t;
+       int                     imm_data_en;
+       int                     first_burst;
+       int                     max_burst;
+       int                     time2wait;
+       int                     time2retain;
+       int                     pdu_inorder_en;
+       int                     dataseq_inorder_en;
+       int                     erl;
+       int                     ifmarker_en;
+       int                     ofmarker_en;
+
+       /* control data */
+       struct Scsi_Host        *host;
+       int                     id;
+       struct iscsi_conn       *leadconn;      /* leading connection */
+       spinlock_t              lock;           /* protects session state, *
+                                                * sequence numbers,       *
+                                                * session resources:      *
+                                                * - cmdpool,              *
+                                                * - mgmtpool,             *
+                                                * - r2tpool               */
+       int                     state;          /* session state           */
+       struct list_head        item;
+       void                    *auth_client;
+       int                     conn_cnt;
+       int                     age;            /* counts session re-opens */
+
+       struct list_head        connections;    /* list of connections */
+       int                     cmds_max;       /* size of cmds array */
+       struct iscsi_cmd_task   **cmds;         /* Original Cmds arr */
+       struct iscsi_queue      cmdpool;        /* PDU's pool */
+       int                     mgmtpool_max;   /* size of mgmt array */
+       struct iscsi_mgmt_task  **mgmt_cmds;    /* Original mgmt arr */
+       struct iscsi_queue      mgmtpool;       /* Mgmt PDU's pool */
+};
+
 struct iscsi_buf {
        struct scatterlist      sg;
        unsigned int            sent;
@@ -118,17 +251,22 @@ struct iscsi_buf {
 struct iscsi_data_task {
        struct iscsi_data       hdr;                    /* PDU */
        char                    hdrext[sizeof(__u32)];  /* Header-Digest */
+       struct list_head        item;                   /* data queue item */
        struct iscsi_buf        digestbuf;              /* digest buffer */
        uint32_t                digest;                 /* data digest */
 };
+#define ISCSI_DTASK_DEFAULT_MAX        ISCSI_SG_TABLESIZE * PAGE_SIZE / 512
 
-struct iscsi_tcp_mgmt_task {
-       struct iscsi_hdr        hdr;
-       char                    hdrext[sizeof(__u32)]; /* Header-Digest */
+struct iscsi_mgmt_task {
+       struct iscsi_hdr        hdr;            /* mgmt. PDU */
+       char                    hdrext[sizeof(__u32)];  /* Header-Digest */
+       char                    *data;          /* mgmt payload */
        int                     xmstate;        /* mgmt xmit progress */
+       int                     data_count;     /* counts data to be sent */
        struct iscsi_buf        headbuf;        /* header buffer */
        struct iscsi_buf        sendbuf;        /* in progress buffer */
        int                     sent;
+       uint32_t                itt;            /* this ITT */
 };
 
 struct iscsi_r2t_info {
@@ -142,36 +280,48 @@ struct iscsi_r2t_info {
        int                     data_count;     /* DATA-Out payload progress */
        struct scatterlist      *sg;            /* per-R2T SG list */
        int                     solicit_datasn;
-       struct iscsi_data_task   dtask;        /* which data task */
+       struct iscsi_data_task   *dtask;        /* which data task */
 };
 
-struct iscsi_tcp_cmd_task {
-       struct iscsi_cmd        hdr;
+struct iscsi_cmd_task {
+       struct iscsi_cmd        hdr;                    /* iSCSI PDU header */
        char                    hdrext[4*sizeof(__u16)+ /* AHS */
                                    sizeof(__u32)];     /* HeaderDigest */
        char                    pad[ISCSI_PAD_LEN];
-       int                     pad_count;              /* padded bytes */
+       int                     itt;                    /* this ITT */
+       int                     datasn;                 /* DataSN */
        struct iscsi_buf        headbuf;                /* header buf (xmit) */
        struct iscsi_buf        sendbuf;                /* in progress buffer*/
-       int                     xmstate;                /* xmit xtate machine */
        int                     sent;
        struct scatterlist      *sg;                    /* per-cmd SG list  */
        struct scatterlist      *bad_sg;                /* assert statement */
        int                     sg_count;               /* SG's to process  */
+       uint32_t                unsol_datasn;
        uint32_t                exp_r2tsn;
+       int                     xmstate;                /* xmit xtate machine */
+       int                     imm_count;              /* imm-data (bytes)   */
+       int                     unsol_count;            /* unsolicited (bytes)*/
        int                     r2t_data_count;         /* R2T Data-Out bytes */
+       int                     data_count;             /* remaining Data-Out */
+       int                     pad_count;              /* padded bytes */
+       struct scsi_cmnd        *sc;                    /* associated SCSI cmd*/
+       int                     total_length;
        int                     data_offset;
+       struct iscsi_conn       *conn;                  /* used connection    */
+       struct iscsi_mgmt_task  *mtask;                 /* tmf mtask in progr */
+
        struct iscsi_r2t_info   *r2t;                   /* in progress R2T    */
        struct iscsi_queue      r2tpool;
        struct kfifo            *r2tqueue;
        struct iscsi_r2t_info   **r2ts;
+       struct list_head        dataqueue;              /* Data-Out dataqueue */
+       mempool_t               *datapool;
        uint32_t                datadigest;             /* for recover digest */
        int                     digest_count;
        uint32_t                immdigest;              /* for imm data */
        struct iscsi_buf        immbuf;                 /* for imm data digest */
-       struct iscsi_data_task  *dtask;         /* data task in progress*/
-       struct iscsi_data_task  unsol_dtask;    /* unsol data task */
-       int                     digest_offset;  /* for partial buff digest */
+       struct iscsi_data_task   *dtask;                /* data task in progress*/
+       int                     digest_offset;          /* for partial buff digest */
 };
 
 #endif /* ISCSI_H */
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
deleted file mode 100644 (file)
index 2673a11..0000000
+++ /dev/null
@@ -1,1702 +0,0 @@
-/*
- * iSCSI lib functions
- *
- * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
- * Copyright (C) 2004 - 2006 Mike Christie
- * Copyright (C) 2004 - 2005 Dmitry Yusupov
- * Copyright (C) 2004 - 2005 Alex Aizman
- * maintained by open-iscsi@googlegroups.com
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <linux/kfifo.h>
-#include <linux/delay.h>
-#include <net/tcp.h>
-#include <scsi/scsi_cmnd.h>
-#include <scsi/scsi_device.h>
-#include <scsi/scsi_eh.h>
-#include <scsi/scsi_tcq.h>
-#include <scsi/scsi_host.h>
-#include <scsi/scsi.h>
-#include <scsi/iscsi_proto.h>
-#include <scsi/scsi_transport.h>
-#include <scsi/scsi_transport_iscsi.h>
-#include <scsi/libiscsi.h>
-
-struct iscsi_session *
-class_to_transport_session(struct iscsi_cls_session *cls_session)
-{
-       struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
-       return iscsi_hostdata(shost->hostdata);
-}
-EXPORT_SYMBOL_GPL(class_to_transport_session);
-
-#define INVALID_SN_DELTA       0xffff
-
-int
-iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
-{
-       uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
-       uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
-
-       if (max_cmdsn < exp_cmdsn -1 &&
-           max_cmdsn > exp_cmdsn - INVALID_SN_DELTA)
-               return ISCSI_ERR_MAX_CMDSN;
-       if (max_cmdsn > session->max_cmdsn ||
-           max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA)
-               session->max_cmdsn = max_cmdsn;
-       if (exp_cmdsn > session->exp_cmdsn ||
-           exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA)
-               session->exp_cmdsn = exp_cmdsn;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn);
-
-void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
-                                  struct iscsi_data *hdr,
-                                  int transport_data_cnt)
-{
-       struct iscsi_conn *conn = ctask->conn;
-
-       memset(hdr, 0, sizeof(struct iscsi_data));
-       hdr->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
-       hdr->datasn = cpu_to_be32(ctask->unsol_datasn);
-       ctask->unsol_datasn++;
-       hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
-       memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
-
-       hdr->itt = ctask->hdr->itt;
-       hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-
-       hdr->offset = cpu_to_be32(ctask->total_length -
-                                 transport_data_cnt -
-                                 ctask->unsol_count);
-
-       if (ctask->unsol_count > conn->max_xmit_dlength) {
-               hton24(hdr->dlength, conn->max_xmit_dlength);
-               ctask->data_count = conn->max_xmit_dlength;
-               hdr->flags = 0;
-       } else {
-               hton24(hdr->dlength, ctask->unsol_count);
-               ctask->data_count = ctask->unsol_count;
-               hdr->flags = ISCSI_FLAG_CMD_FINAL;
-       }
-}
-EXPORT_SYMBOL_GPL(iscsi_prep_unsolicit_data_pdu);
-
-/**
- * iscsi_prep_scsi_cmd_pdu - prep iscsi scsi cmd pdu
- * @ctask: iscsi cmd task
- *
- * Prep basic iSCSI PDU fields for a scsi cmd pdu. The LLD should set
- * fields like dlength or final based on how much data it sends
- */
-static void iscsi_prep_scsi_cmd_pdu(struct iscsi_cmd_task *ctask)
-{
-       struct iscsi_conn *conn = ctask->conn;
-       struct iscsi_session *session = conn->session;
-       struct iscsi_cmd *hdr = ctask->hdr;
-       struct scsi_cmnd *sc = ctask->sc;
-
-        hdr->opcode = ISCSI_OP_SCSI_CMD;
-        hdr->flags = ISCSI_ATTR_SIMPLE;
-        int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
-        hdr->itt = ctask->itt | (conn->id << ISCSI_CID_SHIFT) |
-                         (session->age << ISCSI_AGE_SHIFT);
-        hdr->data_length = cpu_to_be32(sc->request_bufflen);
-        hdr->cmdsn = cpu_to_be32(session->cmdsn);
-        session->cmdsn++;
-        hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-        memcpy(hdr->cdb, sc->cmnd, sc->cmd_len);
-        memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
-
-       if (sc->sc_data_direction == DMA_TO_DEVICE) {
-               hdr->flags |= ISCSI_FLAG_CMD_WRITE;
-               /*
-                * Write counters:
-                *
-                *      imm_count       bytes to be sent right after
-                *                      SCSI PDU Header
-                *
-                *      unsol_count     bytes(as Data-Out) to be sent
-                *                      without R2T ack right after
-                *                      immediate data
-                *
-                *      r2t_data_count  bytes to be sent via R2T ack's
-                *
-                *      pad_count       bytes to be sent as zero-padding
-                */
-               ctask->imm_count = 0;
-               ctask->unsol_count = 0;
-               ctask->unsol_datasn = 0;
-
-               if (session->imm_data_en) {
-                       if (ctask->total_length >= session->first_burst)
-                               ctask->imm_count = min(session->first_burst,
-                                                       conn->max_xmit_dlength);
-                       else
-                               ctask->imm_count = min(ctask->total_length,
-                                                       conn->max_xmit_dlength);
-                       hton24(ctask->hdr->dlength, ctask->imm_count);
-               } else
-                       zero_data(ctask->hdr->dlength);
-
-               if (!session->initial_r2t_en)
-                       ctask->unsol_count = min(session->first_burst,
-                               ctask->total_length) - ctask->imm_count;
-               if (!ctask->unsol_count)
-                       /* No unsolicit Data-Out's */
-                       ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
-       } else {
-               ctask->datasn = 0;
-               hdr->flags |= ISCSI_FLAG_CMD_FINAL;
-               zero_data(hdr->dlength);
-
-               if (sc->sc_data_direction == DMA_FROM_DEVICE)
-                       hdr->flags |= ISCSI_FLAG_CMD_READ;
-       }
-
-       conn->scsicmd_pdus_cnt++;
-}
-EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu);
-
-/**
- * iscsi_complete_command - return command back to scsi-ml
- * @session: iscsi session
- * @ctask: iscsi cmd task
- *
- * Must be called with session lock.
- * This function returns the scsi command to scsi-ml and returns
- * the cmd task to the pool of available cmd tasks.
- */
-static void iscsi_complete_command(struct iscsi_session *session,
-                                  struct iscsi_cmd_task *ctask)
-{
-       struct scsi_cmnd *sc = ctask->sc;
-
-       ctask->sc = NULL;
-       list_del_init(&ctask->running);
-       __kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
-       sc->scsi_done(sc);
-}
-
-/**
- * iscsi_cmd_rsp - SCSI Command Response processing
- * @conn: iscsi connection
- * @hdr: iscsi header
- * @ctask: scsi command task
- * @data: cmd data buffer
- * @datalen: len of buffer
- *
- * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and
- * then completes the command and task.
- **/
-static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
-                             struct iscsi_cmd_task *ctask, char *data,
-                             int datalen)
-{
-       int rc;
-       struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
-       struct iscsi_session *session = conn->session;
-       struct scsi_cmnd *sc = ctask->sc;
-
-       rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
-       if (rc) {
-               sc->result = DID_ERROR << 16;
-               goto out;
-       }
-
-       conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
-
-       sc->result = (DID_OK << 16) | rhdr->cmd_status;
-
-       if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) {
-               sc->result = DID_ERROR << 16;
-               goto out;
-       }
-
-       if (rhdr->cmd_status == SAM_STAT_CHECK_CONDITION) {
-               int senselen;
-
-               if (datalen < 2) {
-invalid_datalen:
-                       printk(KERN_ERR "iscsi: Got CHECK_CONDITION but "
-                              "invalid data buffer size of %d\n", datalen);
-                       sc->result = DID_BAD_TARGET << 16;
-                       goto out;
-               }
-
-               senselen = (data[0] << 8) | data[1];
-               if (datalen < senselen)
-                       goto invalid_datalen;
-
-               memcpy(sc->sense_buffer, data + 2,
-                      min(senselen, SCSI_SENSE_BUFFERSIZE));
-               debug_scsi("copied %d bytes of sense\n",
-                          min(senselen, SCSI_SENSE_BUFFERSIZE));
-       }
-
-       if (sc->sc_data_direction == DMA_TO_DEVICE)
-               goto out;
-
-       if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
-               int res_count = be32_to_cpu(rhdr->residual_count);
-
-               if (res_count > 0 && res_count <= sc->request_bufflen)
-                       sc->resid = res_count;
-               else
-                       sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
-       } else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW)
-               sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
-       else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW)
-               sc->resid = be32_to_cpu(rhdr->residual_count);
-
-out:
-       debug_scsi("done [sc %lx res %d itt 0x%x]\n",
-                  (long)sc, sc->result, ctask->itt);
-       conn->scsirsp_pdus_cnt++;
-
-       iscsi_complete_command(conn->session, ctask);
-       return rc;
-}
-
-/**
- * __iscsi_complete_pdu - complete pdu
- * @conn: iscsi conn
- * @hdr: iscsi header
- * @data: data buffer
- * @datalen: len of data buffer
- *
- * Completes pdu processing by freeing any resources allocated at
- * queuecommand or send generic. session lock must be held and verify
- * itt must have been called.
- */
-int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
-                        char *data, int datalen)
-{
-       struct iscsi_session *session = conn->session;
-       int opcode = hdr->opcode & ISCSI_OPCODE_MASK, rc = 0;
-       struct iscsi_cmd_task *ctask;
-       struct iscsi_mgmt_task *mtask;
-       uint32_t itt;
-
-       if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG))
-               itt = hdr->itt & ISCSI_ITT_MASK;
-       else
-               itt = hdr->itt;
-
-       if (itt < session->cmds_max) {
-               ctask = session->cmds[itt];
-
-               debug_scsi("cmdrsp [op 0x%x cid %d itt 0x%x len %d]\n",
-                          opcode, conn->id, ctask->itt, datalen);
-
-               switch(opcode) {
-               case ISCSI_OP_SCSI_CMD_RSP:
-                       BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
-                       rc = iscsi_scsi_cmd_rsp(conn, hdr, ctask, data,
-                                               datalen);
-                       break;
-               case ISCSI_OP_SCSI_DATA_IN:
-                       BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
-                       if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
-                               conn->scsirsp_pdus_cnt++;
-                               iscsi_complete_command(session, ctask);
-                       }
-                       break;
-               case ISCSI_OP_R2T:
-                       /* LLD handles this for now */
-                       break;
-               default:
-                       rc = ISCSI_ERR_BAD_OPCODE;
-                       break;
-               }
-       } else if (itt >= ISCSI_MGMT_ITT_OFFSET &&
-                  itt < ISCSI_MGMT_ITT_OFFSET + session->mgmtpool_max) {
-               mtask = session->mgmt_cmds[itt - ISCSI_MGMT_ITT_OFFSET];
-
-               debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n",
-                          opcode, conn->id, mtask->itt, datalen);
-
-               rc = iscsi_check_assign_cmdsn(session,
-                                             (struct iscsi_nopin*)hdr);
-               if (rc)
-                       goto done;
-
-               switch(opcode) {
-               case ISCSI_OP_LOGOUT_RSP:
-                       conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
-                       /* fall through */
-               case ISCSI_OP_LOGIN_RSP:
-               case ISCSI_OP_TEXT_RSP:
-                       /*
-                        * login related PDU's exp_statsn is handled in
-                        * userspace
-                        */
-                       rc = iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen);
-                       list_del(&mtask->running);
-                       if (conn->login_mtask != mtask)
-                               __kfifo_put(session->mgmtpool.queue,
-                                           (void*)&mtask, sizeof(void*));
-                       break;
-               case ISCSI_OP_SCSI_TMFUNC_RSP:
-                       if (datalen) {
-                               rc = ISCSI_ERR_PROTO;
-                               break;
-                       }
-
-                       conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
-                       conn->tmfrsp_pdus_cnt++;
-                       if (conn->tmabort_state == TMABORT_INITIAL) {
-                               conn->tmabort_state =
-                                       ((struct iscsi_tm_rsp *)hdr)->
-                                       response == ISCSI_TMF_RSP_COMPLETE ?
-                                               TMABORT_SUCCESS:TMABORT_FAILED;
-                               /* unblock eh_abort() */
-                               wake_up(&conn->ehwait);
-                       }
-                       break;
-               case ISCSI_OP_NOOP_IN:
-                       if (hdr->ttt != ISCSI_RESERVED_TAG) {
-                               rc = ISCSI_ERR_PROTO;
-                               break;
-                       }
-                       conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
-
-                       rc = iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen);
-                       list_del(&mtask->running);
-                       if (conn->login_mtask != mtask)
-                               __kfifo_put(session->mgmtpool.queue,
-                                           (void*)&mtask, sizeof(void*));
-                       break;
-               default:
-                       rc = ISCSI_ERR_BAD_OPCODE;
-                       break;
-               }
-       } else if (itt == ISCSI_RESERVED_TAG) {
-               switch(opcode) {
-               case ISCSI_OP_NOOP_IN:
-                       if (!datalen) {
-                               rc = iscsi_check_assign_cmdsn(session,
-                                                (struct iscsi_nopin*)hdr);
-                               if (!rc && hdr->ttt != ISCSI_RESERVED_TAG)
-                                       rc = iscsi_recv_pdu(conn->cls_conn,
-                                                           hdr, NULL, 0);
-                       } else
-                               rc = ISCSI_ERR_PROTO;
-                       break;
-               case ISCSI_OP_REJECT:
-                       /* we need sth like iscsi_reject_rsp()*/
-               case ISCSI_OP_ASYNC_EVENT:
-                       conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
-                       /* we need sth like iscsi_async_event_rsp() */
-                       rc = ISCSI_ERR_BAD_OPCODE;
-                       break;
-               default:
-                       rc = ISCSI_ERR_BAD_OPCODE;
-                       break;
-               }
-       } else
-               rc = ISCSI_ERR_BAD_ITT;
-
-done:
-       return rc;
-}
-EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
-
-int iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
-                      char *data, int datalen)
-{
-       int rc;
-
-       spin_lock(&conn->session->lock);
-       rc = __iscsi_complete_pdu(conn, hdr, data, datalen);
-       spin_unlock(&conn->session->lock);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(iscsi_complete_pdu);
-
-/* verify itt (itt encoding: age+cid+itt) */
-int iscsi_verify_itt(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
-                    uint32_t *ret_itt)
-{
-       struct iscsi_session *session = conn->session;
-       struct iscsi_cmd_task *ctask;
-       uint32_t itt;
-
-       if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
-               if ((hdr->itt & ISCSI_AGE_MASK) !=
-                   (session->age << ISCSI_AGE_SHIFT)) {
-                       printk(KERN_ERR "iscsi: received itt %x expected "
-                               "session age (%x)\n", hdr->itt,
-                               session->age & ISCSI_AGE_MASK);
-                       return ISCSI_ERR_BAD_ITT;
-               }
-
-               if ((hdr->itt & ISCSI_CID_MASK) !=
-                   (conn->id << ISCSI_CID_SHIFT)) {
-                       printk(KERN_ERR "iscsi: received itt %x, expected "
-                               "CID (%x)\n", hdr->itt, conn->id);
-                       return ISCSI_ERR_BAD_ITT;
-               }
-               itt = hdr->itt & ISCSI_ITT_MASK;
-       } else
-               itt = hdr->itt;
-
-       if (itt < session->cmds_max) {
-               ctask = session->cmds[itt];
-
-               if (!ctask->sc) {
-                       printk(KERN_INFO "iscsi: dropping ctask with "
-                              "itt 0x%x\n", ctask->itt);
-                       /* force drop */
-                       return ISCSI_ERR_NO_SCSI_CMD;
-               }
-
-               if (ctask->sc->SCp.phase != session->age) {
-                       printk(KERN_ERR "iscsi: ctask's session age %d, "
-                               "expected %d\n", ctask->sc->SCp.phase,
-                               session->age);
-                       return ISCSI_ERR_SESSION_FAILED;
-               }
-       }
-
-       *ret_itt = itt;
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_verify_itt);
-
-void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
-{
-       struct iscsi_session *session = conn->session;
-       unsigned long flags;
-
-       spin_lock_irqsave(&session->lock, flags);
-       if (session->state == ISCSI_STATE_FAILED) {
-               spin_unlock_irqrestore(&session->lock, flags);
-               return;
-       }
-
-       if (conn->stop_stage == 0)
-               session->state = ISCSI_STATE_FAILED;
-       spin_unlock_irqrestore(&session->lock, flags);
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-       iscsi_conn_error(conn->cls_conn, err);
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_failure);
-
-/**
- * iscsi_data_xmit - xmit any command into the scheduled connection
- * @conn: iscsi connection
- *
- * Notes:
- *     The function can return -EAGAIN in which case the caller must
- *     re-schedule it again later or recover. '0' return code means
- *     successful xmit.
- **/
-static int iscsi_data_xmit(struct iscsi_conn *conn)
-{
-       struct iscsi_transport *tt;
-       int rc = 0;
-
-       if (unlikely(conn->suspend_tx)) {
-               debug_scsi("conn %d Tx suspended!\n", conn->id);
-               return -ENODATA;
-       }
-       tt = conn->session->tt;
-
-       /*
-        * Transmit in the following order:
-        *
-        * 1) un-finished xmit (ctask or mtask)
-        * 2) immediate control PDUs
-        * 3) write data
-        * 4) SCSI commands
-        * 5) non-immediate control PDUs
-        *
-        * No need to lock around __kfifo_get as long as
-        * there's one producer and one consumer.
-        */
-
-       BUG_ON(conn->ctask && conn->mtask);
-
-       if (conn->ctask) {
-               rc = tt->xmit_cmd_task(conn, conn->ctask);
-               if (rc)
-                       goto again;
-               /* done with this in-progress ctask */
-               conn->ctask = NULL;
-       }
-       if (conn->mtask) {
-               rc = tt->xmit_mgmt_task(conn, conn->mtask);
-               if (rc)
-                       goto again;
-               /* done with this in-progress mtask */
-               conn->mtask = NULL;
-       }
-
-       /* process immediate first */
-        if (unlikely(__kfifo_len(conn->immqueue))) {
-               while (__kfifo_get(conn->immqueue, (void*)&conn->mtask,
-                                  sizeof(void*))) {
-                       spin_lock_bh(&conn->session->lock);
-                       list_add_tail(&conn->mtask->running,
-                                     &conn->mgmt_run_list);
-                       spin_unlock_bh(&conn->session->lock);
-                       rc = tt->xmit_mgmt_task(conn, conn->mtask);
-                       if (rc)
-                               goto again;
-               }
-               /* done with this mtask */
-               conn->mtask = NULL;
-       }
-
-       /* process command queue */
-       while (__kfifo_get(conn->xmitqueue, (void*)&conn->ctask,
-                          sizeof(void*))) {
-               /*
-                * iscsi tcp may readd the task to the xmitqueue to send
-                * write data
-                */
-               spin_lock_bh(&conn->session->lock);
-               if (list_empty(&conn->ctask->running))
-                       list_add_tail(&conn->ctask->running, &conn->run_list);
-               spin_unlock_bh(&conn->session->lock);
-               rc = tt->xmit_cmd_task(conn, conn->ctask);
-               if (rc)
-                       goto again;
-       }
-       /* done with this ctask */
-       conn->ctask = NULL;
-
-       /* process the rest control plane PDUs, if any */
-        if (unlikely(__kfifo_len(conn->mgmtqueue))) {
-               while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
-                                  sizeof(void*))) {
-                       spin_lock_bh(&conn->session->lock);
-                       list_add_tail(&conn->mtask->running,
-                                     &conn->mgmt_run_list);
-                       spin_unlock_bh(&conn->session->lock);
-                       rc = tt->xmit_mgmt_task(conn, conn->mtask);
-                       if (rc)
-                               goto again;
-               }
-               /* done with this mtask */
-               conn->mtask = NULL;
-       }
-
-       return -ENODATA;
-
-again:
-       if (unlikely(conn->suspend_tx))
-               return -ENODATA;
-
-       return rc;
-}
-
-static void iscsi_xmitworker(void *data)
-{
-       struct iscsi_conn *conn = data;
-       int rc;
-       /*
-        * serialize Xmit worker on a per-connection basis.
-        */
-       mutex_lock(&conn->xmitmutex);
-       do {
-               rc = iscsi_data_xmit(conn);
-       } while (rc >= 0 || rc == -EAGAIN);
-       mutex_unlock(&conn->xmitmutex);
-}
-
-enum {
-       FAILURE_BAD_HOST = 1,
-       FAILURE_SESSION_FAILED,
-       FAILURE_SESSION_FREED,
-       FAILURE_WINDOW_CLOSED,
-       FAILURE_SESSION_TERMINATE,
-       FAILURE_SESSION_IN_RECOVERY,
-       FAILURE_SESSION_RECOVERY_TIMEOUT,
-};
-
-int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
-{
-       struct Scsi_Host *host;
-       int reason = 0;
-       struct iscsi_session *session;
-       struct iscsi_conn *conn;
-       struct iscsi_cmd_task *ctask = NULL;
-
-       sc->scsi_done = done;
-       sc->result = 0;
-
-       host = sc->device->host;
-       session = iscsi_hostdata(host->hostdata);
-
-       spin_lock(&session->lock);
-
-       /*
-        * ISCSI_STATE_FAILED is a temp. state. The recovery
-        * code will decide what is best to do with command queued
-        * during this time
-        */
-       if (session->state != ISCSI_STATE_LOGGED_IN &&
-           session->state != ISCSI_STATE_FAILED) {
-               /*
-                * to handle the race between when we set the recovery state
-                * and block the session we requeue here (commands could
-                * be entering our queuecommand while a block is starting
-                * up because the block code is not locked)
-                */
-               if (session->state == ISCSI_STATE_IN_RECOVERY) {
-                       reason = FAILURE_SESSION_IN_RECOVERY;
-                       goto reject;
-               }
-
-               if (session->state == ISCSI_STATE_RECOVERY_FAILED)
-                       reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
-               else if (session->state == ISCSI_STATE_TERMINATE)
-                       reason = FAILURE_SESSION_TERMINATE;
-               else
-                       reason = FAILURE_SESSION_FREED;
-               goto fault;
-       }
-
-       /*
-        * Check for iSCSI window and take care of CmdSN wrap-around
-        */
-       if ((int)(session->max_cmdsn - session->cmdsn) < 0) {
-               reason = FAILURE_WINDOW_CLOSED;
-               goto reject;
-       }
-
-       conn = session->leadconn;
-
-       __kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
-       sc->SCp.phase = session->age;
-       sc->SCp.ptr = (char *)ctask;
-
-       ctask->mtask = NULL;
-       ctask->conn = conn;
-       ctask->sc = sc;
-       INIT_LIST_HEAD(&ctask->running);
-       ctask->total_length = sc->request_bufflen;
-       iscsi_prep_scsi_cmd_pdu(ctask);
-
-       session->tt->init_cmd_task(ctask);
-
-       __kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
-       debug_scsi(
-              "ctask enq [%s cid %d sc %lx itt 0x%x len %d cmdsn %d win %d]\n",
-               sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
-               conn->id, (long)sc, ctask->itt, sc->request_bufflen,
-               session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
-       spin_unlock(&session->lock);
-
-       scsi_queue_work(host, &conn->xmitwork);
-       return 0;
-
-reject:
-       spin_unlock(&session->lock);
-       debug_scsi("cmd 0x%x rejected (%d)\n", sc->cmnd[0], reason);
-       return SCSI_MLQUEUE_HOST_BUSY;
-
-fault:
-       spin_unlock(&session->lock);
-       printk(KERN_ERR "iscsi: cmd 0x%x is not queued (%d)\n",
-              sc->cmnd[0], reason);
-       sc->result = (DID_NO_CONNECT << 16);
-       sc->resid = sc->request_bufflen;
-       sc->scsi_done(sc);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_queuecommand);
-
-int iscsi_change_queue_depth(struct scsi_device *sdev, int depth)
-{
-       if (depth > ISCSI_MAX_CMD_PER_LUN)
-               depth = ISCSI_MAX_CMD_PER_LUN;
-       scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
-       return sdev->queue_depth;
-}
-EXPORT_SYMBOL_GPL(iscsi_change_queue_depth);
-
-static int
-iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
-                       char *data, uint32_t data_size)
-{
-       struct iscsi_session *session = conn->session;
-       struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
-       struct iscsi_mgmt_task *mtask;
-
-       spin_lock_bh(&session->lock);
-       if (session->state == ISCSI_STATE_TERMINATE) {
-               spin_unlock_bh(&session->lock);
-               return -EPERM;
-       }
-       if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
-           hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
-               /*
-                * Login and Text are sent serially, in
-                * request-followed-by-response sequence.
-                * Same mtask can be used. Same ITT must be used.
-                * Note that login_mtask is preallocated at conn_create().
-                */
-               mtask = conn->login_mtask;
-       else {
-               BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
-               BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
-
-               nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
-               if (!__kfifo_get(session->mgmtpool.queue,
-                                (void*)&mtask, sizeof(void*))) {
-                       spin_unlock_bh(&session->lock);
-                       return -ENOSPC;
-               }
-       }
-
-       /*
-        * pre-format CmdSN for outgoing PDU.
-        */
-       if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
-               hdr->itt = mtask->itt | (conn->id << ISCSI_CID_SHIFT) |
-                          (session->age << ISCSI_AGE_SHIFT);
-               nop->cmdsn = cpu_to_be32(session->cmdsn);
-               if (conn->c_stage == ISCSI_CONN_STARTED &&
-                   !(hdr->opcode & ISCSI_OP_IMMEDIATE))
-                       session->cmdsn++;
-       } else
-               /* do not advance CmdSN */
-               nop->cmdsn = cpu_to_be32(session->cmdsn);
-
-       if (data_size) {
-               memcpy(mtask->data, data, data_size);
-               mtask->data_count = data_size;
-       } else
-               mtask->data_count = 0;
-
-       INIT_LIST_HEAD(&mtask->running);
-       memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr));
-       if (session->tt->init_mgmt_task)
-               session->tt->init_mgmt_task(conn, mtask, data, data_size);
-       spin_unlock_bh(&session->lock);
-
-       debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
-                  hdr->opcode, hdr->itt, data_size);
-
-       /*
-        * since send_pdu() could be called at least from two contexts,
-        * we need to serialize __kfifo_put, so we don't have to take
-        * additional lock on fast data-path
-        */
-        if (hdr->opcode & ISCSI_OP_IMMEDIATE)
-               __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*));
-       else
-               __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*));
-
-       scsi_queue_work(session->host, &conn->xmitwork);
-       return 0;
-}
-
-int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
-                       char *data, uint32_t data_size)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       int rc;
-
-       mutex_lock(&conn->xmitmutex);
-       rc = iscsi_conn_send_generic(conn, hdr, data, data_size);
-       mutex_unlock(&conn->xmitmutex);
-
-       return rc;
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu);
-
-void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session)
-{
-       struct iscsi_session *session = class_to_transport_session(cls_session);
-       struct iscsi_conn *conn = session->leadconn;
-
-       spin_lock_bh(&session->lock);
-       if (session->state != ISCSI_STATE_LOGGED_IN) {
-               session->state = ISCSI_STATE_RECOVERY_FAILED;
-               if (conn)
-                       wake_up(&conn->ehwait);
-       }
-       spin_unlock_bh(&session->lock);
-}
-EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout);
-
-int iscsi_eh_host_reset(struct scsi_cmnd *sc)
-{
-       struct Scsi_Host *host = sc->device->host;
-       struct iscsi_session *session = iscsi_hostdata(host->hostdata);
-       struct iscsi_conn *conn = session->leadconn;
-       int fail_session = 0;
-
-       spin_lock_bh(&session->lock);
-       if (session->state == ISCSI_STATE_TERMINATE) {
-failed:
-               debug_scsi("failing host reset: session terminated "
-                          "[CID %d age %d]", conn->id, session->age);
-               spin_unlock_bh(&session->lock);
-               return FAILED;
-       }
-
-       if (sc->SCp.phase == session->age) {
-               debug_scsi("failing connection CID %d due to SCSI host reset",
-                          conn->id);
-               fail_session = 1;
-       }
-       spin_unlock_bh(&session->lock);
-
-       /*
-        * we drop the lock here but the leadconn cannot be destoyed while
-        * we are in the scsi eh
-        */
-       if (fail_session)
-               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-
-       debug_scsi("iscsi_eh_host_reset wait for relogin\n");
-       wait_event_interruptible(conn->ehwait,
-                                session->state == ISCSI_STATE_TERMINATE ||
-                                session->state == ISCSI_STATE_LOGGED_IN ||
-                                session->state == ISCSI_STATE_RECOVERY_FAILED);
-       if (signal_pending(current))
-               flush_signals(current);
-
-       spin_lock_bh(&session->lock);
-       if (session->state == ISCSI_STATE_LOGGED_IN)
-               printk(KERN_INFO "iscsi: host reset succeeded\n");
-       else
-               goto failed;
-       spin_unlock_bh(&session->lock);
-
-       return SUCCESS;
-}
-EXPORT_SYMBOL_GPL(iscsi_eh_host_reset);
-
-static void iscsi_tmabort_timedout(unsigned long data)
-{
-       struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)data;
-       struct iscsi_conn *conn = ctask->conn;
-       struct iscsi_session *session = conn->session;
-
-       spin_lock(&session->lock);
-       if (conn->tmabort_state == TMABORT_INITIAL) {
-               conn->tmabort_state = TMABORT_TIMEDOUT;
-               debug_scsi("tmabort timedout [sc %p itt 0x%x]\n",
-                       ctask->sc, ctask->itt);
-               /* unblock eh_abort() */
-               wake_up(&conn->ehwait);
-       }
-       spin_unlock(&session->lock);
-}
-
-/* must be called with the mutex lock */
-static int iscsi_exec_abort_task(struct scsi_cmnd *sc,
-                                struct iscsi_cmd_task *ctask)
-{
-       struct iscsi_conn *conn = ctask->conn;
-       struct iscsi_session *session = conn->session;
-       struct iscsi_tm *hdr = &conn->tmhdr;
-       int rc;
-
-       /*
-        * ctask timed out but session is OK requests must be serialized.
-        */
-       memset(hdr, 0, sizeof(struct iscsi_tm));
-       hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
-       hdr->flags = ISCSI_TM_FUNC_ABORT_TASK;
-       hdr->flags |= ISCSI_FLAG_CMD_FINAL;
-       memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
-       hdr->rtt = ctask->hdr->itt;
-       hdr->refcmdsn = ctask->hdr->cmdsn;
-
-       rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr,
-                                    NULL, 0);
-       if (rc) {
-               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-               debug_scsi("abort sent failure [itt 0x%x] %d", ctask->itt, rc);
-               return rc;
-       }
-
-       debug_scsi("abort sent [itt 0x%x]\n", ctask->itt);
-
-       spin_lock_bh(&session->lock);
-       ctask->mtask = (struct iscsi_mgmt_task *)
-                       session->mgmt_cmds[(hdr->itt & ISCSI_ITT_MASK) -
-                                       ISCSI_MGMT_ITT_OFFSET];
-
-       if (conn->tmabort_state == TMABORT_INITIAL) {
-               conn->tmfcmd_pdus_cnt++;
-               conn->tmabort_timer.expires = 10*HZ + jiffies;
-               conn->tmabort_timer.function = iscsi_tmabort_timedout;
-               conn->tmabort_timer.data = (unsigned long)ctask;
-               add_timer(&conn->tmabort_timer);
-               debug_scsi("abort set timeout [itt 0x%x]", ctask->itt);
-       }
-       spin_unlock_bh(&session->lock);
-       mutex_unlock(&conn->xmitmutex);
-
-       /*
-        * block eh thread until:
-        *
-        * 1) abort response
-        * 2) abort timeout
-        * 3) session is terminated or restarted or userspace has
-        * given up on recovery
-        */
-       wait_event_interruptible(conn->ehwait,
-                                sc->SCp.phase != session->age ||
-                                session->state != ISCSI_STATE_LOGGED_IN ||
-                                conn->tmabort_state != TMABORT_INITIAL);
-       if (signal_pending(current))
-               flush_signals(current);
-       del_timer_sync(&conn->tmabort_timer);
-
-       mutex_lock(&conn->xmitmutex);
-       return 0;
-}
-
-/*
- * xmit mutex and session lock must be held
- */
-#define iscsi_remove_task(tasktype)                                    \
-static struct iscsi_##tasktype *                                       \
-iscsi_remove_##tasktype(struct kfifo *fifo, uint32_t itt)              \
-{                                                                      \
-       int i, nr_tasks = __kfifo_len(fifo) / sizeof(void*);            \
-       struct iscsi_##tasktype *task;                                  \
-                                                                       \
-       debug_scsi("searching %d tasks\n", nr_tasks);                   \
-                                                                       \
-       for (i = 0; i < nr_tasks; i++) {                                \
-               __kfifo_get(fifo, (void*)&task, sizeof(void*));         \
-               debug_scsi("check task %u\n", task->itt);               \
-                                                                       \
-               if (task->itt == itt) {                                 \
-                       debug_scsi("matched task\n");                   \
-                       return task;                                    \
-               }                                                       \
-                                                                       \
-               __kfifo_put(fifo, (void*)&task, sizeof(void*));         \
-       }                                                               \
-       return NULL;                                                    \
-}
-
-iscsi_remove_task(mgmt_task);
-iscsi_remove_task(cmd_task);
-
-static int iscsi_ctask_mtask_cleanup(struct iscsi_cmd_task *ctask)
-{
-       struct iscsi_conn *conn = ctask->conn;
-       struct iscsi_session *session = conn->session;
-
-       if (!ctask->mtask)
-               return -EINVAL;
-
-       if (!iscsi_remove_mgmt_task(conn->immqueue, ctask->mtask->itt))
-               list_del(&ctask->mtask->running);
-       __kfifo_put(session->mgmtpool.queue, (void*)&ctask->mtask,
-                   sizeof(void*));
-       ctask->mtask = NULL;
-       return 0;
-}
-
-/*
- * session lock and xmitmutex must be held
- */
-static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
-                        int err)
-{
-       struct scsi_cmnd *sc;
-
-       conn->session->tt->cleanup_cmd_task(conn, ctask);
-       iscsi_ctask_mtask_cleanup(ctask);
-
-       sc = ctask->sc;
-       if (!sc)
-               return;
-       sc->result = err;
-       sc->resid = sc->request_bufflen;
-       iscsi_complete_command(conn->session, ctask);
-}
-
-int iscsi_eh_abort(struct scsi_cmnd *sc)
-{
-       struct iscsi_cmd_task *ctask = (struct iscsi_cmd_task *)sc->SCp.ptr;
-       struct iscsi_conn *conn = ctask->conn;
-       struct iscsi_session *session = conn->session;
-       struct iscsi_cmd_task *pending_ctask;
-       int rc;
-
-       conn->eh_abort_cnt++;
-       debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt);
-
-       mutex_lock(&conn->xmitmutex);
-       spin_lock_bh(&session->lock);
-
-       /*
-        * If we are not logged in or we have started a new session
-        * then let the host reset code handle this
-        */
-       if (session->state != ISCSI_STATE_LOGGED_IN ||
-           sc->SCp.phase != session->age)
-               goto failed;
-
-       /* ctask completed before time out */
-       if (!ctask->sc)
-               goto success;
-
-       /* what should we do here ? */
-       if (conn->ctask == ctask) {
-               printk(KERN_INFO "iscsi: sc %p itt 0x%x partially sent. "
-                      "Failing abort\n", sc, ctask->itt);
-               goto failed;
-       }
-
-       /* check for the easy pending cmd abort */
-       pending_ctask = iscsi_remove_cmd_task(conn->xmitqueue, ctask->itt);
-       if (pending_ctask) {
-               /* iscsi_tcp queues write transfers on the xmitqueue */
-               if (list_empty(&pending_ctask->running)) {
-                       debug_scsi("found pending task\n");
-                       goto success;
-               } else
-                       __kfifo_put(conn->xmitqueue, (void*)&pending_ctask,
-                                   sizeof(void*));
-       }
-
-       conn->tmabort_state = TMABORT_INITIAL;
-
-       spin_unlock_bh(&session->lock);
-       rc = iscsi_exec_abort_task(sc, ctask);
-       spin_lock_bh(&session->lock);
-
-       iscsi_ctask_mtask_cleanup(ctask);
-       if (rc || sc->SCp.phase != session->age ||
-           session->state != ISCSI_STATE_LOGGED_IN)
-               goto failed;
-
-       /* ctask completed before tmf abort response */
-       if (!ctask->sc) {
-               debug_scsi("sc completed while abort in progress\n");
-               goto success;
-       }
-
-       if (conn->tmabort_state != TMABORT_SUCCESS) {
-               spin_unlock_bh(&session->lock);
-               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-               spin_lock_bh(&session->lock);
-               goto failed;
-       }
-
-success:
-       debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
-       spin_unlock_bh(&session->lock);
-
-       /*
-        * clean up task if aborted. we have the xmitmutex so grab
-        * the recv lock as a writer
-        */
-       write_lock_bh(conn->recv_lock);
-       spin_lock(&session->lock);
-       fail_command(conn, ctask, DID_ABORT << 16);
-       spin_unlock(&session->lock);
-       write_unlock_bh(conn->recv_lock);
-
-       mutex_unlock(&conn->xmitmutex);
-       return SUCCESS;
-
-failed:
-       spin_unlock_bh(&session->lock);
-       mutex_unlock(&conn->xmitmutex);
-
-       debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
-       return FAILED;
-}
-EXPORT_SYMBOL_GPL(iscsi_eh_abort);
-
-int
-iscsi_pool_init(struct iscsi_queue *q, int max, void ***items, int item_size)
-{
-       int i;
-
-       *items = kmalloc(max * sizeof(void*), GFP_KERNEL);
-       if (*items == NULL)
-               return -ENOMEM;
-
-       q->max = max;
-       q->pool = kmalloc(max * sizeof(void*), GFP_KERNEL);
-       if (q->pool == NULL) {
-               kfree(*items);
-               return -ENOMEM;
-       }
-
-       q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-                             GFP_KERNEL, NULL);
-       if (q->queue == ERR_PTR(-ENOMEM)) {
-               kfree(q->pool);
-               kfree(*items);
-               return -ENOMEM;
-       }
-
-       for (i = 0; i < max; i++) {
-               q->pool[i] = kmalloc(item_size, GFP_KERNEL);
-               if (q->pool[i] == NULL) {
-                       int j;
-
-                       for (j = 0; j < i; j++)
-                               kfree(q->pool[j]);
-
-                       kfifo_free(q->queue);
-                       kfree(q->pool);
-                       kfree(*items);
-                       return -ENOMEM;
-               }
-               memset(q->pool[i], 0, item_size);
-               (*items)[i] = q->pool[i];
-               __kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_pool_init);
-
-void iscsi_pool_free(struct iscsi_queue *q, void **items)
-{
-       int i;
-
-       for (i = 0; i < q->max; i++)
-               kfree(items[i]);
-       kfree(q->pool);
-       kfree(items);
-}
-EXPORT_SYMBOL_GPL(iscsi_pool_free);
-
-/*
- * iSCSI Session's hostdata organization:
- *
- *    *------------------* <== hostdata_session(host->hostdata)
- *    | ptr to class sess|
- *    |------------------| <== iscsi_hostdata(host->hostdata)
- *    | iscsi_session    |
- *    *------------------*
- */
-
-#define hostdata_privsize(_sz) (sizeof(unsigned long) + _sz + \
-                                _sz % sizeof(unsigned long))
-
-#define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata))
-
-/**
- * iscsi_session_setup - create iscsi cls session and host and session
- * @scsit: scsi transport template
- * @iscsit: iscsi transport template
- * @initial_cmdsn: initial CmdSN
- * @hostno: host no allocated
- *
- * This can be used by software iscsi_transports that allocate
- * a session per scsi host.
- **/
-struct iscsi_cls_session *
-iscsi_session_setup(struct iscsi_transport *iscsit,
-                   struct scsi_transport_template *scsit,
-                   int cmd_task_size, int mgmt_task_size,
-                   uint32_t initial_cmdsn, uint32_t *hostno)
-{
-       struct Scsi_Host *shost;
-       struct iscsi_session *session;
-       struct iscsi_cls_session *cls_session;
-       int cmd_i;
-
-       shost = scsi_host_alloc(iscsit->host_template,
-                               hostdata_privsize(sizeof(*session)));
-       if (!shost)
-               return NULL;
-
-       shost->max_id = 1;
-       shost->max_channel = 0;
-       shost->max_lun = iscsit->max_lun;
-       shost->max_cmd_len = iscsit->max_cmd_len;
-       shost->transportt = scsit;
-       shost->transportt->create_work_queue = 1;
-       *hostno = shost->host_no;
-
-       session = iscsi_hostdata(shost->hostdata);
-       memset(session, 0, sizeof(struct iscsi_session));
-       session->host = shost;
-       session->state = ISCSI_STATE_FREE;
-       session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX;
-       session->cmds_max = ISCSI_XMIT_CMDS_MAX;
-       session->cmdsn = initial_cmdsn;
-       session->exp_cmdsn = initial_cmdsn + 1;
-       session->max_cmdsn = initial_cmdsn + 1;
-       session->max_r2t = 1;
-       session->tt = iscsit;
-
-       /* initialize SCSI PDU commands pool */
-       if (iscsi_pool_init(&session->cmdpool, session->cmds_max,
-                           (void***)&session->cmds,
-                           cmd_task_size + sizeof(struct iscsi_cmd_task)))
-               goto cmdpool_alloc_fail;
-
-       /* pre-format cmds pool with ITT */
-       for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
-               struct iscsi_cmd_task *ctask = session->cmds[cmd_i];
-
-               if (cmd_task_size)
-                       ctask->dd_data = &ctask[1];
-               ctask->itt = cmd_i;
-       }
-
-       spin_lock_init(&session->lock);
-       INIT_LIST_HEAD(&session->connections);
-
-       /* initialize immediate command pool */
-       if (iscsi_pool_init(&session->mgmtpool, session->mgmtpool_max,
-                          (void***)&session->mgmt_cmds,
-                          mgmt_task_size + sizeof(struct iscsi_mgmt_task)))
-               goto mgmtpool_alloc_fail;
-
-
-       /* pre-format immediate cmds pool with ITT */
-       for (cmd_i = 0; cmd_i < session->mgmtpool_max; cmd_i++) {
-               struct iscsi_mgmt_task *mtask = session->mgmt_cmds[cmd_i];
-
-               if (mgmt_task_size)
-                       mtask->dd_data = &mtask[1];
-               mtask->itt = ISCSI_MGMT_ITT_OFFSET + cmd_i;
-       }
-
-       if (scsi_add_host(shost, NULL))
-               goto add_host_fail;
-
-       cls_session = iscsi_create_session(shost, iscsit, 0);
-       if (!cls_session)
-               goto cls_session_fail;
-       *(unsigned long*)shost->hostdata = (unsigned long)cls_session;
-
-       return cls_session;
-
-cls_session_fail:
-       scsi_remove_host(shost);
-add_host_fail:
-       iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
-mgmtpool_alloc_fail:
-       iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
-cmdpool_alloc_fail:
-       scsi_host_put(shost);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(iscsi_session_setup);
-
-/**
- * iscsi_session_teardown - destroy session, host, and cls_session
- * shost: scsi host
- *
- * This can be used by software iscsi_transports that allocate
- * a session per scsi host.
- **/
-void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
-{
-       struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
-       struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
-
-       scsi_remove_host(shost);
-
-       iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
-       iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
-
-       iscsi_destroy_session(cls_session);
-       scsi_host_put(shost);
-}
-EXPORT_SYMBOL_GPL(iscsi_session_teardown);
-
-/**
- * iscsi_conn_setup - create iscsi_cls_conn and iscsi_conn
- * @cls_session: iscsi_cls_session
- * @conn_idx: cid
- **/
-struct iscsi_cls_conn *
-iscsi_conn_setup(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
-{
-       struct iscsi_session *session = class_to_transport_session(cls_session);
-       struct iscsi_conn *conn;
-       struct iscsi_cls_conn *cls_conn;
-       char *data;
-
-       cls_conn = iscsi_create_conn(cls_session, conn_idx);
-       if (!cls_conn)
-               return NULL;
-       conn = cls_conn->dd_data;
-       memset(conn, 0, sizeof(*conn));
-
-       conn->session = session;
-       conn->cls_conn = cls_conn;
-       conn->c_stage = ISCSI_CONN_INITIAL_STAGE;
-       conn->id = conn_idx;
-       conn->exp_statsn = 0;
-       conn->tmabort_state = TMABORT_INITIAL;
-       INIT_LIST_HEAD(&conn->run_list);
-       INIT_LIST_HEAD(&conn->mgmt_run_list);
-
-       /* initialize general xmit PDU commands queue */
-       conn->xmitqueue = kfifo_alloc(session->cmds_max * sizeof(void*),
-                                       GFP_KERNEL, NULL);
-       if (conn->xmitqueue == ERR_PTR(-ENOMEM))
-               goto xmitqueue_alloc_fail;
-
-       /* initialize general immediate & non-immediate PDU commands queue */
-       conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
-                                       GFP_KERNEL, NULL);
-       if (conn->immqueue == ERR_PTR(-ENOMEM))
-               goto immqueue_alloc_fail;
-
-       conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
-                                       GFP_KERNEL, NULL);
-       if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
-               goto mgmtqueue_alloc_fail;
-
-       INIT_WORK(&conn->xmitwork, iscsi_xmitworker, conn);
-
-       /* allocate login_mtask used for the login/text sequences */
-       spin_lock_bh(&session->lock);
-       if (!__kfifo_get(session->mgmtpool.queue,
-                         (void*)&conn->login_mtask,
-                        sizeof(void*))) {
-               spin_unlock_bh(&session->lock);
-               goto login_mtask_alloc_fail;
-       }
-       spin_unlock_bh(&session->lock);
-
-       data = kmalloc(DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH, GFP_KERNEL);
-       if (!data)
-               goto login_mtask_data_alloc_fail;
-       conn->login_mtask->data = data;
-
-       init_timer(&conn->tmabort_timer);
-       mutex_init(&conn->xmitmutex);
-       init_waitqueue_head(&conn->ehwait);
-
-       return cls_conn;
-
-login_mtask_data_alloc_fail:
-       __kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
-                   sizeof(void*));
-login_mtask_alloc_fail:
-       kfifo_free(conn->mgmtqueue);
-mgmtqueue_alloc_fail:
-       kfifo_free(conn->immqueue);
-immqueue_alloc_fail:
-       kfifo_free(conn->xmitqueue);
-xmitqueue_alloc_fail:
-       iscsi_destroy_conn(cls_conn);
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_setup);
-
-/**
- * iscsi_conn_teardown - teardown iscsi connection
- * cls_conn: iscsi class connection
- *
- * TODO: we may need to make this into a two step process
- * like scsi-mls remove + put host
- */
-void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_session *session = conn->session;
-       unsigned long flags;
-
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       mutex_lock(&conn->xmitmutex);
-       if (conn->c_stage == ISCSI_CONN_INITIAL_STAGE) {
-               if (session->tt->suspend_conn_recv)
-                       session->tt->suspend_conn_recv(conn);
-
-               session->tt->terminate_conn(conn);
-       }
-
-       spin_lock_bh(&session->lock);
-       conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
-       if (session->leadconn == conn) {
-               /*
-                * leading connection? then give up on recovery.
-                */
-               session->state = ISCSI_STATE_TERMINATE;
-               wake_up(&conn->ehwait);
-       }
-       spin_unlock_bh(&session->lock);
-
-       mutex_unlock(&conn->xmitmutex);
-
-       /*
-        * Block until all in-progress commands for this connection
-        * time out or fail.
-        */
-       for (;;) {
-               spin_lock_irqsave(session->host->host_lock, flags);
-               if (!session->host->host_busy) { /* OK for ERL == 0 */
-                       spin_unlock_irqrestore(session->host->host_lock, flags);
-                       break;
-               }
-               spin_unlock_irqrestore(session->host->host_lock, flags);
-               msleep_interruptible(500);
-               printk(KERN_INFO "iscsi: scsi conn_destroy(): host_busy %d "
-                      "host_failed %d\n", session->host->host_busy,
-                      session->host->host_failed);
-               /*
-                * force eh_abort() to unblock
-                */
-               wake_up(&conn->ehwait);
-       }
-
-       spin_lock_bh(&session->lock);
-       kfree(conn->login_mtask->data);
-       __kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
-                   sizeof(void*));
-       list_del(&conn->item);
-       if (list_empty(&session->connections))
-               session->leadconn = NULL;
-       if (session->leadconn && session->leadconn == conn)
-               session->leadconn = container_of(session->connections.next,
-                       struct iscsi_conn, item);
-
-       if (session->leadconn == NULL)
-               /* no connections exits.. reset sequencing */
-               session->cmdsn = session->max_cmdsn = session->exp_cmdsn = 1;
-       spin_unlock_bh(&session->lock);
-
-       kfifo_free(conn->xmitqueue);
-       kfifo_free(conn->immqueue);
-       kfifo_free(conn->mgmtqueue);
-
-       iscsi_destroy_conn(cls_conn);
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_teardown);
-
-int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_session *session = conn->session;
-
-       if (session == NULL) {
-               printk(KERN_ERR "iscsi: can't start unbound connection\n");
-               return -EPERM;
-       }
-
-       spin_lock_bh(&session->lock);
-       conn->c_stage = ISCSI_CONN_STARTED;
-       session->state = ISCSI_STATE_LOGGED_IN;
-
-       switch(conn->stop_stage) {
-       case STOP_CONN_RECOVER:
-               /*
-                * unblock eh_abort() if it is blocked. re-try all
-                * commands after successful recovery
-                */
-               conn->stop_stage = 0;
-               conn->tmabort_state = TMABORT_INITIAL;
-               session->age++;
-               spin_unlock_bh(&session->lock);
-
-               iscsi_unblock_session(session_to_cls(session));
-               wake_up(&conn->ehwait);
-               return 0;
-       case STOP_CONN_TERM:
-               conn->stop_stage = 0;
-               break;
-       default:
-               break;
-       }
-       spin_unlock_bh(&session->lock);
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_start);
-
-static void
-flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn)
-{
-       struct iscsi_mgmt_task *mtask, *tmp;
-
-       /* handle pending */
-       while (__kfifo_get(conn->immqueue, (void*)&mtask, sizeof(void*)) ||
-              __kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) {
-               if (mtask == conn->login_mtask)
-                       continue;
-               debug_scsi("flushing pending mgmt task itt 0x%x\n", mtask->itt);
-               __kfifo_put(session->mgmtpool.queue, (void*)&mtask,
-                           sizeof(void*));
-       }
-
-       /* handle running */
-       list_for_each_entry_safe(mtask, tmp, &conn->mgmt_run_list, running) {
-               debug_scsi("flushing running mgmt task itt 0x%x\n", mtask->itt);
-               list_del(&mtask->running);
-
-               if (mtask == conn->login_mtask)
-                       continue;
-               __kfifo_put(session->mgmtpool.queue, (void*)&mtask,
-                          sizeof(void*));
-       }
-
-       conn->mtask = NULL;
-}
-
-/* Fail commands. Mutex and session lock held and recv side suspended */
-static void fail_all_commands(struct iscsi_conn *conn)
-{
-       struct iscsi_cmd_task *ctask, *tmp;
-
-       /* flush pending */
-       while (__kfifo_get(conn->xmitqueue, (void*)&ctask, sizeof(void*))) {
-               debug_scsi("failing pending sc %p itt 0x%x\n", ctask->sc,
-                          ctask->itt);
-               fail_command(conn, ctask, DID_BUS_BUSY << 16);
-       }
-
-       /* fail all other running */
-       list_for_each_entry_safe(ctask, tmp, &conn->run_list, running) {
-               debug_scsi("failing in progress sc %p itt 0x%x\n",
-                          ctask->sc, ctask->itt);
-               fail_command(conn, ctask, DID_BUS_BUSY << 16);
-       }
-
-       conn->ctask = NULL;
-}
-
-static void iscsi_start_session_recovery(struct iscsi_session *session,
-                                        struct iscsi_conn *conn, int flag)
-{
-       int old_stop_stage;
-
-       spin_lock_bh(&session->lock);
-       if (conn->stop_stage == STOP_CONN_TERM) {
-               spin_unlock_bh(&session->lock);
-               return;
-       }
-
-       /*
-        * When this is called for the in_login state, we only want to clean
-        * up the login task and connection. We do not need to block and set
-        * the recovery state again
-        */
-       if (flag == STOP_CONN_TERM)
-               session->state = ISCSI_STATE_TERMINATE;
-       else if (conn->stop_stage != STOP_CONN_RECOVER)
-               session->state = ISCSI_STATE_IN_RECOVERY;
-
-       old_stop_stage = conn->stop_stage;
-       conn->stop_stage = flag;
-       conn->c_stage = ISCSI_CONN_STOPPED;
-       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       spin_unlock_bh(&session->lock);
-
-       if (session->tt->suspend_conn_recv)
-               session->tt->suspend_conn_recv(conn);
-
-       mutex_lock(&conn->xmitmutex);
-       /*
-        * for connection level recovery we should not calculate
-        * header digest. conn->hdr_size used for optimization
-        * in hdr_extract() and will be re-negotiated at
-        * set_param() time.
-        */
-       if (flag == STOP_CONN_RECOVER) {
-               conn->hdrdgst_en = 0;
-               conn->datadgst_en = 0;
-               if (session->state == ISCSI_STATE_IN_RECOVERY &&
-                   old_stop_stage != STOP_CONN_RECOVER) {
-                       debug_scsi("blocking session\n");
-                       iscsi_block_session(session_to_cls(session));
-               }
-       }
-
-       session->tt->terminate_conn(conn);
-       /*
-        * flush queues.
-        */
-       spin_lock_bh(&session->lock);
-       fail_all_commands(conn);
-       flush_control_queues(session, conn);
-       spin_unlock_bh(&session->lock);
-
-       mutex_unlock(&conn->xmitmutex);
-}
-
-void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
-{
-       struct iscsi_conn *conn = cls_conn->dd_data;
-       struct iscsi_session *session = conn->session;
-
-       switch (flag) {
-       case STOP_CONN_RECOVER:
-       case STOP_CONN_TERM:
-               iscsi_start_session_recovery(session, conn, flag);
-               break;
-       default:
-               printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag);
-       }
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_stop);
-
-int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
-                   struct iscsi_cls_conn *cls_conn, int is_leading)
-{
-       struct iscsi_session *session = class_to_transport_session(cls_session);
-       struct iscsi_conn *tmp = ERR_PTR(-EEXIST), *conn = cls_conn->dd_data;
-
-       /* lookup for existing connection */
-       spin_lock_bh(&session->lock);
-       list_for_each_entry(tmp, &session->connections, item) {
-               if (tmp == conn) {
-                       if (conn->c_stage != ISCSI_CONN_STOPPED ||
-                           conn->stop_stage == STOP_CONN_TERM) {
-                               printk(KERN_ERR "iscsi: can't bind "
-                                      "non-stopped connection (%d:%d)\n",
-                                      conn->c_stage, conn->stop_stage);
-                               spin_unlock_bh(&session->lock);
-                               return -EIO;
-                       }
-                       break;
-               }
-       }
-       if (tmp != conn) {
-               /* bind new iSCSI connection to session */
-               conn->session = session;
-               list_add(&conn->item, &session->connections);
-       }
-       spin_unlock_bh(&session->lock);
-
-       if (is_leading)
-               session->leadconn = conn;
-
-       /*
-        * Unblock xmitworker(), Login Phase will pass through.
-        */
-       clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-       clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       return 0;
-}
-EXPORT_SYMBOL_GPL(iscsi_conn_bind);
-
-MODULE_AUTHOR("Mike Christie");
-MODULE_DESCRIPTION("iSCSI library functions");
-MODULE_LICENSE("GPL");
index a89aff6..46ababf 100644 (file)
@@ -110,11 +110,8 @@ static int ioctl_internal_command(struct scsi_device *sdev, char *cmd,
                                       sshdr.asc, sshdr.ascq);
                        break;
                case NOT_READY: /* This happens if there is no disc in drive */
-                       if (sdev->removable && (cmd[0] != TEST_UNIT_READY)) {
-                               printk(KERN_INFO "Device not ready. Make sure"
-                                      " there is a disc in the drive.\n");
+                       if (sdev->removable)
                                break;
-                       }
                case UNIT_ATTENTION:
                        if (sdev->removable) {
                                sdev->changed = 1;
index 8bb8222..2730d50 100644 (file)
@@ -1,8 +1,10 @@
-/* 
+/*
  * iSCSI transport class definitions
  *
  * Copyright (C) IBM Corporation, 2004
- * Copyright (C) Mike Christie, 2004
+ * Copyright (C) Mike Christie, 2004 - 2005
+ * Copyright (C) Dmitry Yusupov, 2004 - 2005
+ * Copyright (C) Alex Aizman, 2004 - 2005
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 #include <linux/module.h>
+#include <linux/mempool.h>
+#include <linux/mutex.h>
+#include <net/tcp.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_transport.h>
 #include <scsi/scsi_transport_iscsi.h>
+#include <scsi/iscsi_if.h>
 
-#define ISCSI_SESSION_ATTRS 20
-#define ISCSI_HOST_ATTRS 2
+#define ISCSI_SESSION_ATTRS 8
+#define ISCSI_CONN_ATTRS 6
 
 struct iscsi_internal {
        struct scsi_transport_template t;
-       struct iscsi_function_template *fnt;
+       struct iscsi_transport *iscsi_transport;
+       struct list_head list;
+       /*
+        * based on transport capabilities, at register time we set these
+        * bits to tell the transport class it wants attributes displayed
+        * in sysfs or that it can support different iSCSI Data-Path
+        * capabilities
+        */
+       uint32_t param_mask;
+
+       struct class_device cdev;
        /*
         * We do not have any private or other attrs.
         */
+       struct transport_container conn_cont;
+       struct class_device_attribute *conn_attrs[ISCSI_CONN_ATTRS + 1];
+       struct transport_container session_cont;
        struct class_device_attribute *session_attrs[ISCSI_SESSION_ATTRS + 1];
-       struct class_device_attribute *host_attrs[ISCSI_HOST_ATTRS + 1];
 };
 
-#define to_iscsi_internal(tmpl) container_of(tmpl, struct iscsi_internal, t)
+/*
+ * list of registered transports and lock that must
+ * be held while accessing list. The iscsi_transport_lock must
+ * be acquired after the rx_queue_mutex.
+ */
+static LIST_HEAD(iscsi_transports);
+static DEFINE_SPINLOCK(iscsi_transport_lock);
+
+#define to_iscsi_internal(tmpl) \
+       container_of(tmpl, struct iscsi_internal, t)
+
+#define cdev_to_iscsi_internal(_cdev) \
+       container_of(_cdev, struct iscsi_internal, cdev)
+
+static void iscsi_transport_release(struct class_device *cdev)
+{
+       struct iscsi_internal *priv = cdev_to_iscsi_internal(cdev);
+       kfree(priv);
+}
+
+/*
+ * iscsi_transport_class represents the iscsi_transports that are
+ * registered.
+ */
+static struct class iscsi_transport_class = {
+       .name = "iscsi_transport",
+       .release = iscsi_transport_release,
+};
+
+static ssize_t
+show_transport_handle(struct class_device *cdev, char *buf)
+{
+       struct iscsi_internal *priv = cdev_to_iscsi_internal(cdev);
+       return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport));
+}
+static CLASS_DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL);
+
+#define show_transport_attr(name, format)                              \
+static ssize_t                                                         \
+show_transport_##name(struct class_device *cdev, char *buf)            \
+{                                                                      \
+       struct iscsi_internal *priv = cdev_to_iscsi_internal(cdev);     \
+       return sprintf(buf, format"\n", priv->iscsi_transport->name);   \
+}                                                                      \
+static CLASS_DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL);
+
+show_transport_attr(caps, "0x%x");
+show_transport_attr(max_lun, "%d");
+show_transport_attr(max_conn, "%d");
+show_transport_attr(max_cmd_len, "%d");
+
+static struct attribute *iscsi_transport_attrs[] = {
+       &class_device_attr_handle.attr,
+       &class_device_attr_caps.attr,
+       &class_device_attr_max_lun.attr,
+       &class_device_attr_max_conn.attr,
+       &class_device_attr_max_cmd_len.attr,
+       NULL,
+};
+
+static struct attribute_group iscsi_transport_group = {
+       .attrs = iscsi_transport_attrs,
+};
 
-static DECLARE_TRANSPORT_CLASS(iscsi_transport_class,
-                              "iscsi_transport",
+static DECLARE_TRANSPORT_CLASS(iscsi_session_class,
+                              "iscsi_session",
                               NULL,
                               NULL,
                               NULL);
 
-static DECLARE_TRANSPORT_CLASS(iscsi_host_class,
-                              "iscsi_host",
+static DECLARE_TRANSPORT_CLASS(iscsi_connection_class,
+                              "iscsi_connection",
                               NULL,
                               NULL,
                               NULL);
+
+static struct sock *nls;
+static int daemon_pid;
+static DEFINE_MUTEX(rx_queue_mutex);
+
+struct mempool_zone {
+       mempool_t *pool;
+       atomic_t allocated;
+       int size;
+       int hiwat;
+       struct list_head freequeue;
+       spinlock_t freelock;
+};
+
+static struct mempool_zone *z_reply;
+
 /*
- * iSCSI target and session attrs
+ * Z_MAX_* - actual mempool size allocated at the mempool_zone_init() time
+ * Z_HIWAT_* - zone's high watermark when if_error bit will be set to -ENOMEM
+ *             so daemon will notice OOM on NETLINK tranposrt level and will
+ *             be able to predict or change operational behavior
  */
-#define iscsi_session_show_fn(field, format)                           \
-                                                                       \
-static ssize_t                                                         \
-show_session_##field(struct class_device *cdev, char *buf)             \
-{                                                                      \
-       struct scsi_target *starget = transport_class_to_starget(cdev); \
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);    \
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
-                                                                       \
-       if (i->fnt->get_##field)                                        \
-               i->fnt->get_##field(starget);                           \
-       return snprintf(buf, 20, format"\n", iscsi_##field(starget));   \
+#define Z_MAX_REPLY    8
+#define Z_HIWAT_REPLY  6
+#define Z_MAX_PDU      8
+#define Z_HIWAT_PDU    6
+#define Z_MAX_ERROR    16
+#define Z_HIWAT_ERROR  12
+
+static LIST_HEAD(sesslist);
+static DEFINE_SPINLOCK(sesslock);
+static LIST_HEAD(connlist);
+static DEFINE_SPINLOCK(connlock);
+
+static struct iscsi_cls_session *iscsi_session_lookup(uint64_t handle)
+{
+       unsigned long flags;
+       struct iscsi_cls_session *sess;
+
+       spin_lock_irqsave(&sesslock, flags);
+       list_for_each_entry(sess, &sesslist, sess_list) {
+               if (sess == iscsi_ptr(handle)) {
+                       spin_unlock_irqrestore(&sesslock, flags);
+                       return sess;
+               }
+       }
+       spin_unlock_irqrestore(&sesslock, flags);
+       return NULL;
 }
 
-#define iscsi_session_rd_attr(field, format)                           \
-       iscsi_session_show_fn(field, format)                            \
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_##field, NULL);
+static struct iscsi_cls_conn *iscsi_conn_lookup(uint64_t handle)
+{
+       unsigned long flags;
+       struct iscsi_cls_conn *conn;
 
-iscsi_session_rd_attr(tpgt, "%hu");
-iscsi_session_rd_attr(tsih, "%2x");
-iscsi_session_rd_attr(max_recv_data_segment_len, "%u");
-iscsi_session_rd_attr(max_burst_len, "%u");
-iscsi_session_rd_attr(first_burst_len, "%u");
-iscsi_session_rd_attr(def_time2wait, "%hu");
-iscsi_session_rd_attr(def_time2retain, "%hu");
-iscsi_session_rd_attr(max_outstanding_r2t, "%hu");
-iscsi_session_rd_attr(erl, "%d");
+       spin_lock_irqsave(&connlock, flags);
+       list_for_each_entry(conn, &connlist, conn_list) {
+               if (conn == iscsi_ptr(handle)) {
+                       spin_unlock_irqrestore(&connlock, flags);
+                       return conn;
+               }
+       }
+       spin_unlock_irqrestore(&connlock, flags);
+       return NULL;
+}
 
+/*
+ * The following functions can be used by LLDs that allocate
+ * their own scsi_hosts or by software iscsi LLDs
+ */
+static void iscsi_session_release(struct device *dev)
+{
+       struct iscsi_cls_session *session = iscsi_dev_to_session(dev);
+       struct iscsi_transport *transport = session->transport;
+       struct Scsi_Host *shost;
 
-#define iscsi_session_show_bool_fn(field)                              \
-                                                                       \
-static ssize_t                                                         \
-show_session_bool_##field(struct class_device *cdev, char *buf)                \
-{                                                                      \
-       struct scsi_target *starget = transport_class_to_starget(cdev); \
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);    \
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
-                                                                       \
-       if (i->fnt->get_##field)                                        \
-               i->fnt->get_##field(starget);                           \
-                                                                       \
-       if (iscsi_##field(starget))                                     \
-               return sprintf(buf, "Yes\n");                           \
-       return sprintf(buf, "No\n");                                    \
+       shost = iscsi_session_to_shost(session);
+       scsi_host_put(shost);
+       kfree(session);
+       module_put(transport->owner);
+}
+
+static int iscsi_is_session_dev(const struct device *dev)
+{
+       return dev->release == iscsi_session_release;
 }
 
-#define iscsi_session_rd_bool_attr(field)                              \
-       iscsi_session_show_bool_fn(field)                               \
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_bool_##field, NULL);
+/**
+ * iscsi_create_session - create iscsi class session
+ * @shost: scsi host
+ * @transport: iscsi transport
+ *
+ * This can be called from a LLD or iscsi_transport
+ **/
+struct iscsi_cls_session *
+iscsi_create_session(struct Scsi_Host *shost, struct iscsi_transport *transport)
+{
+       struct iscsi_cls_session *session;
+       int err;
 
-iscsi_session_rd_bool_attr(initial_r2t);
-iscsi_session_rd_bool_attr(immediate_data);
-iscsi_session_rd_bool_attr(data_pdu_in_order);
-iscsi_session_rd_bool_attr(data_sequence_in_order);
+       if (!try_module_get(transport->owner))
+               return NULL;
 
-#define iscsi_session_show_digest_fn(field)                            \
-                                                                       \
-static ssize_t                                                         \
-show_##field(struct class_device *cdev, char *buf)                     \
-{                                                                      \
-       struct scsi_target *starget = transport_class_to_starget(cdev); \
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);    \
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
-                                                                       \
-       if (i->fnt->get_##field)                                        \
-               i->fnt->get_##field(starget);                           \
-                                                                       \
-       if (iscsi_##field(starget))                                     \
-               return sprintf(buf, "CRC32C\n");                        \
-       return sprintf(buf, "None\n");                                  \
+       session = kzalloc(sizeof(*session), GFP_KERNEL);
+       if (!session)
+               goto module_put;
+       session->transport = transport;
+
+       /* this is released in the dev's release function */
+       scsi_host_get(shost);
+       snprintf(session->dev.bus_id, BUS_ID_SIZE, "session%u", shost->host_no);
+       session->dev.parent = &shost->shost_gendev;
+       session->dev.release = iscsi_session_release;
+       err = device_register(&session->dev);
+       if (err) {
+               dev_printk(KERN_ERR, &session->dev, "iscsi: could not "
+                          "register session's dev\n");
+               goto free_session;
+       }
+       transport_register_device(&session->dev);
+
+       return session;
+
+free_session:
+       kfree(session);
+module_put:
+       module_put(transport->owner);
+       return NULL;
 }
 
-#define iscsi_session_rd_digest_attr(field)                            \
-       iscsi_session_show_digest_fn(field)                             \
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL);
+EXPORT_SYMBOL_GPL(iscsi_create_session);
 
-iscsi_session_rd_digest_attr(header_digest);
-iscsi_session_rd_digest_attr(data_digest);
+/**
+ * iscsi_destroy_session - destroy iscsi session
+ * @session: iscsi_session
+ *
+ * Can be called by a LLD or iscsi_transport. There must not be
+ * any running connections.
+ **/
+int iscsi_destroy_session(struct iscsi_cls_session *session)
+{
+       transport_unregister_device(&session->dev);
+       device_unregister(&session->dev);
+       return 0;
+}
 
-static ssize_t
-show_port(struct class_device *cdev, char *buf)
+EXPORT_SYMBOL_GPL(iscsi_destroy_session);
+
+static void iscsi_conn_release(struct device *dev)
 {
-       struct scsi_target *starget = transport_class_to_starget(cdev);
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
+       struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev);
+       struct device *parent = conn->dev.parent;
 
-       if (i->fnt->get_port)
-               i->fnt->get_port(starget);
+       kfree(conn);
+       put_device(parent);
+}
 
-       return snprintf(buf, 20, "%hu\n", ntohs(iscsi_port(starget)));
+static int iscsi_is_conn_dev(const struct device *dev)
+{
+       return dev->release == iscsi_conn_release;
 }
-static CLASS_DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
 
-static ssize_t
-show_ip_address(struct class_device *cdev, char *buf)
+/**
+ * iscsi_create_conn - create iscsi class connection
+ * @session: iscsi cls session
+ * @cid: connection id
+ *
+ * This can be called from a LLD or iscsi_transport. The connection
+ * is child of the session so cid must be unique for all connections
+ * on the session.
+ **/
+struct iscsi_cls_conn *
+iscsi_create_conn(struct iscsi_cls_session *session, uint32_t cid)
 {
-       struct scsi_target *starget = transport_class_to_starget(cdev);
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
+       struct iscsi_transport *transport = session->transport;
+       struct Scsi_Host *shost = iscsi_session_to_shost(session);
+       struct iscsi_cls_conn *conn;
+       int err;
+
+       conn = kzalloc(sizeof(*conn) + transport->conndata_size, GFP_KERNEL);
+       if (!conn)
+               return NULL;
+
+       if (transport->conndata_size)
+               conn->dd_data = &conn[1];
+
+       INIT_LIST_HEAD(&conn->conn_list);
+       conn->transport = transport;
 
-       if (i->fnt->get_ip_address)
-               i->fnt->get_ip_address(starget);
+       /* this is released in the dev's release function */
+       if (!get_device(&session->dev))
+               goto free_conn;
+       snprintf(conn->dev.bus_id, BUS_ID_SIZE, "connection%d:%u",
+                shost->host_no, cid);
+       conn->dev.parent = &session->dev;
+       conn->dev.release = iscsi_conn_release;
+       err = device_register(&conn->dev);
+       if (err) {
+               dev_printk(KERN_ERR, &conn->dev, "iscsi: could not register "
+                          "connection's dev\n");
+               goto release_parent_ref;
+       }
+       transport_register_device(&conn->dev);
+       return conn;
 
-       if (iscsi_addr_type(starget) == AF_INET)
-               return sprintf(buf, "%u.%u.%u.%u\n",
-                              NIPQUAD(iscsi_sin_addr(starget)));
-       else if(iscsi_addr_type(starget) == AF_INET6)
-               return sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
-                              NIP6(iscsi_sin6_addr(starget)));
-       return -EINVAL;
+release_parent_ref:
+       put_device(&session->dev);
+free_conn:
+       kfree(conn);
+       return NULL;
 }
-static CLASS_DEVICE_ATTR(ip_address, S_IRUGO, show_ip_address, NULL);
 
-static ssize_t
-show_isid(struct class_device *cdev, char *buf)
+EXPORT_SYMBOL_GPL(iscsi_create_conn);
+
+/**
+ * iscsi_destroy_conn - destroy iscsi class connection
+ * @session: iscsi cls session
+ *
+ * This can be called from a LLD or iscsi_transport.
+ **/
+int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
+{
+       transport_unregister_device(&conn->dev);
+       device_unregister(&conn->dev);
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(iscsi_destroy_conn);
+
+/*
+ * These functions are used only by software iscsi_transports
+ * which do not allocate and more their scsi_hosts since this
+ * is initiated from userspace.
+ */
+
+/*
+ * iSCSI Session's hostdata organization:
+ *
+ *    *------------------* <== hostdata_session(host->hostdata)
+ *    | ptr to class sess|
+ *    |------------------| <== iscsi_hostdata(host->hostdata)
+ *    | transport's data |
+ *    *------------------*
+ */
+
+#define hostdata_privsize(_t)  (sizeof(unsigned long) + _t->hostdata_size + \
+                                _t->hostdata_size % sizeof(unsigned long))
+
+#define hostdata_session(_hostdata) (iscsi_ptr(*(unsigned long *)_hostdata))
+
+/**
+ * iscsi_transport_create_session - create iscsi cls session and host
+ * scsit: scsi transport template
+ * transport: iscsi transport template
+ *
+ * This can be used by software iscsi_transports that allocate
+ * a session per scsi host.
+ **/
+struct Scsi_Host *
+iscsi_transport_create_session(struct scsi_transport_template *scsit,
+                              struct iscsi_transport *transport)
 {
-       struct scsi_target *starget = transport_class_to_starget(cdev);
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt);
+       struct iscsi_cls_session *session;
+       struct Scsi_Host *shost;
+       unsigned long flags;
+
+       shost = scsi_host_alloc(transport->host_template,
+                               hostdata_privsize(transport));
+       if (!shost) {
+               printk(KERN_ERR "iscsi: can not allocate SCSI host for "
+                       "session\n");
+               return NULL;
+       }
+
+       shost->max_id = 1;
+       shost->max_channel = 0;
+       shost->max_lun = transport->max_lun;
+       shost->max_cmd_len = transport->max_cmd_len;
+       shost->transportt = scsit;
+       shost->transportt->create_work_queue = 1;
+
+       if (scsi_add_host(shost, NULL))
+               goto free_host;
+
+       session = iscsi_create_session(shost, transport);
+       if (!session)
+               goto remove_host;
+
+       *(unsigned long*)shost->hostdata = (unsigned long)session;
+       spin_lock_irqsave(&sesslock, flags);
+       list_add(&session->sess_list, &sesslist);
+       spin_unlock_irqrestore(&sesslock, flags);
+       return shost;
+
+remove_host:
+       scsi_remove_host(shost);
+free_host:
+       scsi_host_put(shost);
+       return NULL;
+}
 
-       if (i->fnt->get_isid)
-               i->fnt->get_isid(starget);
+EXPORT_SYMBOL_GPL(iscsi_transport_create_session);
 
-       return sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
-                      iscsi_isid(starget)[0], iscsi_isid(starget)[1],
-                      iscsi_isid(starget)[2], iscsi_isid(starget)[3],
-                      iscsi_isid(starget)[4], iscsi_isid(starget)[5]);
+/**
+ * iscsi_transport_destroy_session - destroy session and scsi host
+ * shost: scsi host
+ *
+ * This can be used by software iscsi_transports that allocate
+ * a session per scsi host.
+ **/
+int iscsi_transport_destroy_session(struct Scsi_Host *shost)
+{
+       struct iscsi_cls_session *session;
+       unsigned long flags;
+
+       scsi_remove_host(shost);
+       session = hostdata_session(shost->hostdata);
+       spin_lock_irqsave(&sesslock, flags);
+       list_del(&session->sess_list);
+       spin_unlock_irqrestore(&sesslock, flags);
+       iscsi_destroy_session(session);
+       /* ref from host alloc */
+       scsi_host_put(shost);
+       return 0;
 }
-static CLASS_DEVICE_ATTR(isid, S_IRUGO, show_isid, NULL);
+
+EXPORT_SYMBOL_GPL(iscsi_transport_destroy_session);
 
 /*
- * This is used for iSCSI names. Normally, we follow
- * the transport class convention of having the lld
- * set the field, but in these cases the value is
- * too large.
+ * iscsi interface functions
  */
-#define iscsi_session_show_str_fn(field)                               \
-                                                                       \
+static struct iscsi_internal *
+iscsi_if_transport_lookup(struct iscsi_transport *tt)
+{
+       struct iscsi_internal *priv;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iscsi_transport_lock, flags);
+       list_for_each_entry(priv, &iscsi_transports, list) {
+               if (tt == priv->iscsi_transport) {
+                       spin_unlock_irqrestore(&iscsi_transport_lock, flags);
+                       return priv;
+               }
+       }
+       spin_unlock_irqrestore(&iscsi_transport_lock, flags);
+       return NULL;
+}
+
+static inline struct list_head *skb_to_lh(struct sk_buff *skb)
+{
+       return (struct list_head *)&skb->cb;
+}
+
+static void*
+mempool_zone_alloc_skb(gfp_t gfp_mask, void *pool_data)
+{
+       struct mempool_zone *zone = pool_data;
+
+       return alloc_skb(zone->size, gfp_mask);
+}
+
+static void
+mempool_zone_free_skb(void *element, void *pool_data)
+{
+       kfree_skb(element);
+}
+
+static void
+mempool_zone_complete(struct mempool_zone *zone)
+{
+       unsigned long flags;
+       struct list_head *lh, *n;
+
+       spin_lock_irqsave(&zone->freelock, flags);
+       list_for_each_safe(lh, n, &zone->freequeue) {
+               struct sk_buff *skb = (struct sk_buff *)((char *)lh -
+                               offsetof(struct sk_buff, cb));
+               if (!skb_shared(skb)) {
+                       list_del(skb_to_lh(skb));
+                       mempool_free(skb, zone->pool);
+                       atomic_dec(&zone->allocated);
+               }
+       }
+       spin_unlock_irqrestore(&zone->freelock, flags);
+}
+
+static struct mempool_zone *
+mempool_zone_init(unsigned max, unsigned size, unsigned hiwat)
+{
+       struct mempool_zone *zp;
+
+       zp = kzalloc(sizeof(*zp), GFP_KERNEL);
+       if (!zp)
+               return NULL;
+
+       zp->size = size;
+       zp->hiwat = hiwat;
+       INIT_LIST_HEAD(&zp->freequeue);
+       spin_lock_init(&zp->freelock);
+       atomic_set(&zp->allocated, 0);
+
+       zp->pool = mempool_create(max, mempool_zone_alloc_skb,
+                                 mempool_zone_free_skb, zp);
+       if (!zp->pool) {
+               kfree(zp);
+               return NULL;
+       }
+
+       return zp;
+}
+
+static void mempool_zone_destroy(struct mempool_zone *zp)
+{
+       mempool_destroy(zp->pool);
+       kfree(zp);
+}
+
+static struct sk_buff*
+mempool_zone_get_skb(struct mempool_zone *zone)
+{
+       struct sk_buff *skb;
+
+       skb = mempool_alloc(zone->pool, GFP_ATOMIC);
+       if (skb)
+               atomic_inc(&zone->allocated);
+       return skb;
+}
+
+static int
+iscsi_unicast_skb(struct mempool_zone *zone, struct sk_buff *skb)
+{
+       unsigned long flags;
+       int rc;
+
+       skb_get(skb);
+       rc = netlink_unicast(nls, skb, daemon_pid, MSG_DONTWAIT);
+       if (rc < 0) {
+               mempool_free(skb, zone->pool);
+               printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc);
+               return rc;
+       }
+
+       spin_lock_irqsave(&zone->freelock, flags);
+       list_add(skb_to_lh(skb), &zone->freequeue);
+       spin_unlock_irqrestore(&zone->freelock, flags);
+
+       return 0;
+}
+
+int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
+                  char *data, uint32_t data_size)
+{
+       struct nlmsghdr *nlh;
+       struct sk_buff *skb;
+       struct iscsi_uevent *ev;
+       char *pdu;
+       int len = NLMSG_SPACE(sizeof(*ev) + sizeof(struct iscsi_hdr) +
+                             data_size);
+
+       mempool_zone_complete(conn->z_pdu);
+
+       skb = mempool_zone_get_skb(conn->z_pdu);
+       if (!skb) {
+               iscsi_conn_error(conn, ISCSI_ERR_CONN_FAILED);
+               dev_printk(KERN_ERR, &conn->dev, "iscsi: can not deliver "
+                          "control PDU: OOM\n");
+               return -ENOMEM;
+       }
+
+       nlh = __nlmsg_put(skb, daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+       ev = NLMSG_DATA(nlh);
+       memset(ev, 0, sizeof(*ev));
+       ev->transport_handle = iscsi_handle(conn->transport);
+       ev->type = ISCSI_KEVENT_RECV_PDU;
+       if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat)
+               ev->iferror = -ENOMEM;
+       ev->r.recv_req.conn_handle = iscsi_handle(conn);
+       pdu = (char*)ev + sizeof(*ev);
+       memcpy(pdu, hdr, sizeof(struct iscsi_hdr));
+       memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size);
+
+       return iscsi_unicast_skb(conn->z_pdu, skb);
+}
+EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
+
+void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
+{
+       struct nlmsghdr *nlh;
+       struct sk_buff  *skb;
+       struct iscsi_uevent *ev;
+       int len = NLMSG_SPACE(sizeof(*ev));
+
+       mempool_zone_complete(conn->z_error);
+
+       skb = mempool_zone_get_skb(conn->z_error);
+       if (!skb) {
+               dev_printk(KERN_ERR, &conn->dev, "iscsi: gracefully ignored "
+                         "conn error (%d)\n", error);
+               return;
+       }
+
+       nlh = __nlmsg_put(skb, daemon_pid, 0, 0, (len - sizeof(*nlh)), 0);
+       ev = NLMSG_DATA(nlh);
+       ev->transport_handle = iscsi_handle(conn->transport);
+       ev->type = ISCSI_KEVENT_CONN_ERROR;
+       if (atomic_read(&conn->z_error->allocated) >= conn->z_error->hiwat)
+               ev->iferror = -ENOMEM;
+       ev->r.connerror.error = error;
+       ev->r.connerror.conn_handle = iscsi_handle(conn);
+
+       iscsi_unicast_skb(conn->z_error, skb);
+
+       dev_printk(KERN_INFO, &conn->dev, "iscsi: detected conn error (%d)\n",
+                  error);
+}
+EXPORT_SYMBOL_GPL(iscsi_conn_error);
+
+static int
+iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
+                     void *payload, int size)
+{
+       struct sk_buff  *skb;
+       struct nlmsghdr *nlh;
+       int len = NLMSG_SPACE(size);
+       int flags = multi ? NLM_F_MULTI : 0;
+       int t = done ? NLMSG_DONE : type;
+
+       mempool_zone_complete(z_reply);
+
+       skb = mempool_zone_get_skb(z_reply);
+       /*
+        * FIXME:
+        * user is supposed to react on iferror == -ENOMEM;
+        * see iscsi_if_rx().
+        */
+       BUG_ON(!skb);
+
+       nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+       nlh->nlmsg_flags = flags;
+       memcpy(NLMSG_DATA(nlh), payload, size);
+       return iscsi_unicast_skb(z_reply, skb);
+}
+
+static int
+iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
+{
+       struct iscsi_uevent *ev = NLMSG_DATA(nlh);
+       struct iscsi_stats *stats;
+       struct sk_buff *skbstat;
+       struct iscsi_cls_conn *conn;
+       struct nlmsghdr *nlhstat;
+       struct iscsi_uevent *evstat;
+       int len = NLMSG_SPACE(sizeof(*ev) +
+                             sizeof(struct iscsi_stats) +
+                             sizeof(struct iscsi_stats_custom) *
+                             ISCSI_STATS_CUSTOM_MAX);
+       int err = 0;
+
+       conn = iscsi_conn_lookup(ev->u.get_stats.conn_handle);
+       if (!conn)
+               return -EEXIST;
+
+       do {
+               int actual_size;
+
+               mempool_zone_complete(conn->z_pdu);
+
+               skbstat = mempool_zone_get_skb(conn->z_pdu);
+               if (!skbstat) {
+                       dev_printk(KERN_ERR, &conn->dev, "iscsi: can not "
+                                  "deliver stats: OOM\n");
+                       return -ENOMEM;
+               }
+
+               nlhstat = __nlmsg_put(skbstat, daemon_pid, 0, 0,
+                                     (len - sizeof(*nlhstat)), 0);
+               evstat = NLMSG_DATA(nlhstat);
+               memset(evstat, 0, sizeof(*evstat));
+               evstat->transport_handle = iscsi_handle(conn->transport);
+               evstat->type = nlh->nlmsg_type;
+               if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat)
+                       evstat->iferror = -ENOMEM;
+               evstat->u.get_stats.conn_handle =
+                       ev->u.get_stats.conn_handle;
+               stats = (struct iscsi_stats *)
+                       ((char*)evstat + sizeof(*evstat));
+               memset(stats, 0, sizeof(*stats));
+
+               transport->get_stats(conn, stats);
+               actual_size = NLMSG_SPACE(sizeof(struct iscsi_uevent) +
+                                         sizeof(struct iscsi_stats) +
+                                         sizeof(struct iscsi_stats_custom) *
+                                         stats->custom_length);
+               actual_size -= sizeof(*nlhstat);
+               actual_size = NLMSG_LENGTH(actual_size);
+               skb_trim(skbstat, NLMSG_ALIGN(actual_size));
+               nlhstat->nlmsg_len = actual_size;
+
+               err = iscsi_unicast_skb(conn->z_pdu, skbstat);
+       } while (err < 0 && err != -ECONNREFUSED);
+
+       return err;
+}
+
+static int
+iscsi_if_create_session(struct iscsi_internal *priv, struct iscsi_uevent *ev)
+{
+       struct iscsi_transport *transport = priv->iscsi_transport;
+       struct iscsi_cls_session *session;
+       uint32_t sid;
+
+       session = transport->create_session(&priv->t,
+                                           ev->u.c_session.initial_cmdsn,
+                                           &sid);
+       if (!session)
+               return -ENOMEM;
+
+       ev->r.c_session_ret.session_handle = iscsi_handle(session);
+       ev->r.c_session_ret.sid = sid;
+       return 0;
+}
+
+static int
+iscsi_if_create_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+{
+       struct iscsi_cls_conn *conn;
+       struct iscsi_cls_session *session;
+       unsigned long flags;
+
+       session = iscsi_session_lookup(ev->u.c_conn.session_handle);
+       if (!session)
+               return -EINVAL;
+
+       conn = transport->create_conn(session, ev->u.c_conn.cid);
+       if (!conn)
+               return -ENOMEM;
+
+       conn->z_pdu = mempool_zone_init(Z_MAX_PDU,
+                       NLMSG_SPACE(sizeof(struct iscsi_uevent) +
+                                   sizeof(struct iscsi_hdr) +
+                                   DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH),
+                       Z_HIWAT_PDU);
+       if (!conn->z_pdu) {
+               dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
+                          "pdu zone for new conn\n");
+               goto destroy_conn;
+       }
+
+       conn->z_error = mempool_zone_init(Z_MAX_ERROR,
+                       NLMSG_SPACE(sizeof(struct iscsi_uevent)),
+                       Z_HIWAT_ERROR);
+       if (!conn->z_error) {
+               dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
+                          "error zone for new conn\n");
+               goto free_pdu_pool;
+       }
+
+       ev->r.handle = iscsi_handle(conn);
+
+       spin_lock_irqsave(&connlock, flags);
+       list_add(&conn->conn_list, &connlist);
+       conn->active = 1;
+       spin_unlock_irqrestore(&connlock, flags);
+
+       return 0;
+
+free_pdu_pool:
+       mempool_zone_destroy(conn->z_pdu);
+destroy_conn:
+       if (transport->destroy_conn)
+               transport->destroy_conn(conn->dd_data);
+       return -ENOMEM;
+}
+
+static int
+iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+{
+       unsigned long flags;
+       struct iscsi_cls_conn *conn;
+       struct mempool_zone *z_error, *z_pdu;
+
+       conn = iscsi_conn_lookup(ev->u.d_conn.conn_handle);
+       if (!conn)
+               return -EINVAL;
+       spin_lock_irqsave(&connlock, flags);
+       conn->active = 0;
+       list_del(&conn->conn_list);
+       spin_unlock_irqrestore(&connlock, flags);
+
+       z_pdu = conn->z_pdu;
+       z_error = conn->z_error;
+
+       if (transport->destroy_conn)
+               transport->destroy_conn(conn);
+
+       mempool_zone_destroy(z_pdu);
+       mempool_zone_destroy(z_error);
+
+       return 0;
+}
+
+static int
+iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+       int err = 0;
+       struct iscsi_uevent *ev = NLMSG_DATA(nlh);
+       struct iscsi_transport *transport = NULL;
+       struct iscsi_internal *priv;
+       struct iscsi_cls_session *session;
+       struct iscsi_cls_conn *conn;
+
+       priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle));
+       if (!priv)
+               return -EINVAL;
+       transport = priv->iscsi_transport;
+
+       if (!try_module_get(transport->owner))
+               return -EINVAL;
+
+       switch (nlh->nlmsg_type) {
+       case ISCSI_UEVENT_CREATE_SESSION:
+               err = iscsi_if_create_session(priv, ev);
+               break;
+       case ISCSI_UEVENT_DESTROY_SESSION:
+               session = iscsi_session_lookup(ev->u.d_session.session_handle);
+               if (session)
+                       transport->destroy_session(session);
+               else
+                       err = -EINVAL;
+               break;
+       case ISCSI_UEVENT_CREATE_CONN:
+               err = iscsi_if_create_conn(transport, ev);
+               break;
+       case ISCSI_UEVENT_DESTROY_CONN:
+               err = iscsi_if_destroy_conn(transport, ev);
+               break;
+       case ISCSI_UEVENT_BIND_CONN:
+               session = iscsi_session_lookup(ev->u.b_conn.session_handle);
+               conn = iscsi_conn_lookup(ev->u.b_conn.conn_handle);
+
+               if (session && conn)
+                       ev->r.retcode = transport->bind_conn(session, conn,
+                                       ev->u.b_conn.transport_fd,
+                                       ev->u.b_conn.is_leading);
+               else
+                       err = -EINVAL;
+               break;
+       case ISCSI_UEVENT_SET_PARAM:
+               conn = iscsi_conn_lookup(ev->u.set_param.conn_handle);
+               if (conn)
+                       ev->r.retcode = transport->set_param(conn,
+                               ev->u.set_param.param, ev->u.set_param.value);
+               else
+                       err = -EINVAL;
+               break;
+       case ISCSI_UEVENT_START_CONN:
+               conn = iscsi_conn_lookup(ev->u.start_conn.conn_handle);
+               if (conn)
+                       ev->r.retcode = transport->start_conn(conn);
+               else
+                       err = -EINVAL;
+
+               break;
+       case ISCSI_UEVENT_STOP_CONN:
+               conn = iscsi_conn_lookup(ev->u.stop_conn.conn_handle);
+               if (conn)
+                       transport->stop_conn(conn, ev->u.stop_conn.flag);
+               else
+                       err = -EINVAL;
+               break;
+       case ISCSI_UEVENT_SEND_PDU:
+               conn = iscsi_conn_lookup(ev->u.send_pdu.conn_handle);
+               if (conn)
+                       ev->r.retcode = transport->send_pdu(conn,
+                               (struct iscsi_hdr*)((char*)ev + sizeof(*ev)),
+                               (char*)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size,
+                               ev->u.send_pdu.data_size);
+               else
+                       err = -EINVAL;
+               break;
+       case ISCSI_UEVENT_GET_STATS:
+               err = iscsi_if_get_stats(transport, nlh);
+               break;
+       default:
+               err = -EINVAL;
+               break;
+       }
+
+       module_put(transport->owner);
+       return err;
+}
+
+/* Get message from skb (based on rtnetlink_rcv_skb).  Each message is
+ * processed by iscsi_if_recv_msg.  Malformed skbs with wrong length are
+ * or invalid creds discarded silently.  */
+static void
+iscsi_if_rx(struct sock *sk, int len)
+{
+       struct sk_buff *skb;
+
+       mutex_lock(&rx_queue_mutex);
+       while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               if (NETLINK_CREDS(skb)->uid) {
+                       skb_pull(skb, skb->len);
+                       goto free_skb;
+               }
+               daemon_pid = NETLINK_CREDS(skb)->pid;
+
+               while (skb->len >= NLMSG_SPACE(0)) {
+                       int err;
+                       uint32_t rlen;
+                       struct nlmsghdr *nlh;
+                       struct iscsi_uevent *ev;
+
+                       nlh = (struct nlmsghdr *)skb->data;
+                       if (nlh->nlmsg_len < sizeof(*nlh) ||
+                           skb->len < nlh->nlmsg_len) {
+                               break;
+                       }
+
+                       ev = NLMSG_DATA(nlh);
+                       rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+                       if (rlen > skb->len)
+                               rlen = skb->len;
+
+                       err = iscsi_if_recv_msg(skb, nlh);
+                       if (err) {
+                               ev->type = ISCSI_KEVENT_IF_ERROR;
+                               ev->iferror = err;
+                       }
+                       do {
+                               /*
+                                * special case for GET_STATS:
+                                * on success - sending reply and stats from
+                                * inside of if_recv_msg(),
+                                * on error - fall through.
+                                */
+                               if (ev->type == ISCSI_UEVENT_GET_STATS && !err)
+                                       break;
+                               err = iscsi_if_send_reply(
+                                       NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
+                                       nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
+                               if (atomic_read(&z_reply->allocated) >=
+                                               z_reply->hiwat)
+                                       ev->iferror = -ENOMEM;
+                       } while (err < 0 && err != -ECONNREFUSED);
+                       skb_pull(skb, rlen);
+               }
+free_skb:
+               kfree_skb(skb);
+       }
+       mutex_unlock(&rx_queue_mutex);
+}
+
+#define iscsi_cdev_to_conn(_cdev) \
+       iscsi_dev_to_conn(_cdev->dev)
+
+/*
+ * iSCSI connection attrs
+ */
+#define iscsi_conn_int_attr_show(param, format)                                \
 static ssize_t                                                         \
-show_session_str_##field(struct class_device *cdev, char *buf)         \
+show_conn_int_param_##param(struct class_device *cdev, char *buf)      \
 {                                                                      \
-       ssize_t ret = 0;                                                \
-       struct scsi_target *starget = transport_class_to_starget(cdev); \
-       struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);    \
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
+       uint32_t value = 0;                                             \
+       struct iscsi_cls_conn *conn = iscsi_cdev_to_conn(cdev);         \
+       struct iscsi_transport *t = conn->transport;                    \
                                                                        \
-       if (i->fnt->get_##field)                                        \
-               ret = i->fnt->get_##field(starget, buf, PAGE_SIZE);     \
-       return ret;                                                     \
+       t->get_conn_param(conn, param, &value);                         \
+       return snprintf(buf, 20, format"\n", value);                    \
 }
 
-#define iscsi_session_rd_str_attr(field)                               \
-       iscsi_session_show_str_fn(field)                                \
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_str_##field, NULL);
+#define iscsi_conn_int_attr(field, param, format)                      \
+       iscsi_conn_int_attr_show(param, format)                         \
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_conn_int_param_##param, NULL);
 
-iscsi_session_rd_str_attr(target_name);
-iscsi_session_rd_str_attr(target_alias);
+iscsi_conn_int_attr(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH, "%u");
+iscsi_conn_int_attr(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH, "%u");
+iscsi_conn_int_attr(header_digest, ISCSI_PARAM_HDRDGST_EN, "%d");
+iscsi_conn_int_attr(data_digest, ISCSI_PARAM_DATADGST_EN, "%d");
+iscsi_conn_int_attr(ifmarker, ISCSI_PARAM_IFMARKER_EN, "%d");
+iscsi_conn_int_attr(ofmarker, ISCSI_PARAM_OFMARKER_EN, "%d");
 
-/*
- * iSCSI host attrs
- */
+#define iscsi_cdev_to_session(_cdev) \
+       iscsi_dev_to_session(_cdev->dev)
 
 /*
- * Again, this is used for iSCSI names. Normally, we follow
- * the transport class convention of having the lld set
- * the field, but in these cases the value is too large.
+ * iSCSI session attrs
  */
-#define iscsi_host_show_str_fn(field)                                  \
-                                                                       \
+#define iscsi_session_int_attr_show(param, format)                     \
 static ssize_t                                                         \
-show_host_str_##field(struct class_device *cdev, char *buf)            \
+show_session_int_param_##param(struct class_device *cdev, char *buf)   \
 {                                                                      \
-       int ret = 0;                                                    \
-       struct Scsi_Host *shost = transport_class_to_shost(cdev);       \
-       struct iscsi_internal *i = to_iscsi_internal(shost->transportt); \
+       uint32_t value = 0;                                             \
+       struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev);        \
+       struct iscsi_transport *t = session->transport;                 \
                                                                        \
-       if (i->fnt->get_##field)                                        \
-               ret = i->fnt->get_##field(shost, buf, PAGE_SIZE);       \
-       return ret;                                                     \
+       t->get_session_param(session, param, &value);                   \
+       return snprintf(buf, 20, format"\n", value);                    \
 }
 
-#define iscsi_host_rd_str_attr(field)                                  \
-       iscsi_host_show_str_fn(field)                                   \
-static CLASS_DEVICE_ATTR(field, S_IRUGO, show_host_str_##field, NULL);
+#define iscsi_session_int_attr(field, param, format)                   \
+       iscsi_session_int_attr_show(param, format)                      \
+static CLASS_DEVICE_ATTR(field, S_IRUGO, show_session_int_param_##param, NULL);
 
-iscsi_host_rd_str_attr(initiator_name);
-iscsi_host_rd_str_attr(initiator_alias);
+iscsi_session_int_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, "%d");
+iscsi_session_int_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, "%hu");
+iscsi_session_int_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, "%d");
+iscsi_session_int_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, "%u");
+iscsi_session_int_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, "%u");
+iscsi_session_int_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, "%d");
+iscsi_session_int_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, "%d");
+iscsi_session_int_attr(erl, ISCSI_PARAM_ERL, "%d");
 
-#define SETUP_SESSION_RD_ATTR(field)                                   \
-       if (i->fnt->show_##field) {                                     \
-               i->session_attrs[count] = &class_device_attr_##field;   \
+#define SETUP_SESSION_RD_ATTR(field, param)                            \
+       if (priv->param_mask & (1 << param)) {                          \
+               priv->session_attrs[count] = &class_device_attr_##field;\
                count++;                                                \
        }
 
-#define SETUP_HOST_RD_ATTR(field)                                      \
-       if (i->fnt->show_##field) {                                     \
-               i->host_attrs[count] = &class_device_attr_##field;      \
+#define SETUP_CONN_RD_ATTR(field, param)                               \
+       if (priv->param_mask & (1 << param)) {                          \
+               priv->conn_attrs[count] = &class_device_attr_##field;   \
                count++;                                                \
        }
 
-static int iscsi_host_match(struct attribute_container *cont,
-                         struct device *dev)
+static int iscsi_session_match(struct attribute_container *cont,
+                          struct device *dev)
 {
+       struct iscsi_cls_session *session;
        struct Scsi_Host *shost;
-       struct iscsi_internal *i;
+       struct iscsi_internal *priv;
 
-       if (!scsi_is_host_device(dev))
+       if (!iscsi_is_session_dev(dev))
                return 0;
 
-       shost = dev_to_shost(dev);
-       if (!shost->transportt  || shost->transportt->host_attrs.ac.class
-           != &iscsi_host_class.class)
+       session = iscsi_dev_to_session(dev);
+       shost = iscsi_session_to_shost(session);
+       if (!shost->transportt)
                return 0;
 
-       i = to_iscsi_internal(shost->transportt);
-       
-       return &i->t.host_attrs.ac == cont;
+       priv = to_iscsi_internal(shost->transportt);
+       if (priv->session_cont.ac.class != &iscsi_session_class.class)
+               return 0;
+
+       return &priv->session_cont.ac == cont;
 }
 
-static int iscsi_target_match(struct attribute_container *cont,
-                           struct device *dev)
+static int iscsi_conn_match(struct attribute_container *cont,
+                          struct device *dev)
 {
+       struct iscsi_cls_session *session;
+       struct iscsi_cls_conn *conn;
        struct Scsi_Host *shost;
-       struct iscsi_internal *i;
+       struct iscsi_internal *priv;
 
-       if (!scsi_is_target_device(dev))
+       if (!iscsi_is_conn_dev(dev))
                return 0;
 
-       shost = dev_to_shost(dev->parent);
-       if (!shost->transportt  || shost->transportt->host_attrs.ac.class
-           != &iscsi_host_class.class)
+       conn = iscsi_dev_to_conn(dev);
+       session = iscsi_dev_to_session(conn->dev.parent);
+       shost = iscsi_session_to_shost(session);
+
+       if (!shost->transportt)
                return 0;
 
-       i = to_iscsi_internal(shost->transportt);
-       
-       return &i->t.target_attrs.ac == cont;
+       priv = to_iscsi_internal(shost->transportt);
+       if (priv->conn_cont.ac.class != &iscsi_connection_class.class)
+               return 0;
+
+       return &priv->conn_cont.ac == cont;
 }
 
 struct scsi_transport_template *
-iscsi_attach_transport(struct iscsi_function_template *fnt)
+iscsi_register_transport(struct iscsi_transport *tt)
 {
-       struct iscsi_internal *i = kmalloc(sizeof(struct iscsi_internal),
-                                          GFP_KERNEL);
-       int count = 0;
+       struct iscsi_internal *priv;
+       unsigned long flags;
+       int count = 0, err;
+
+       BUG_ON(!tt);
+
+       priv = iscsi_if_transport_lookup(tt);
+       if (priv)
+               return NULL;
 
-       if (unlikely(!i))
+       priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+       if (!priv)
                return NULL;
+       INIT_LIST_HEAD(&priv->list);
+       priv->iscsi_transport = tt;
 
-       memset(i, 0, sizeof(struct iscsi_internal));
-       i->fnt = fnt;
-
-       i->t.target_attrs.ac.attrs = &i->session_attrs[0];
-       i->t.target_attrs.ac.class = &iscsi_transport_class.class;
-       i->t.target_attrs.ac.match = iscsi_target_match;
-       transport_container_register(&i->t.target_attrs);
-       i->t.target_size = sizeof(struct iscsi_class_session);
-
-       SETUP_SESSION_RD_ATTR(tsih);
-       SETUP_SESSION_RD_ATTR(isid);
-       SETUP_SESSION_RD_ATTR(header_digest);
-       SETUP_SESSION_RD_ATTR(data_digest);
-       SETUP_SESSION_RD_ATTR(target_name);
-       SETUP_SESSION_RD_ATTR(target_alias);
-       SETUP_SESSION_RD_ATTR(port);
-       SETUP_SESSION_RD_ATTR(tpgt);
-       SETUP_SESSION_RD_ATTR(ip_address);
-       SETUP_SESSION_RD_ATTR(initial_r2t);
-       SETUP_SESSION_RD_ATTR(immediate_data);
-       SETUP_SESSION_RD_ATTR(max_recv_data_segment_len);
-       SETUP_SESSION_RD_ATTR(max_burst_len);
-       SETUP_SESSION_RD_ATTR(first_burst_len);
-       SETUP_SESSION_RD_ATTR(def_time2wait);
-       SETUP_SESSION_RD_ATTR(def_time2retain);
-       SETUP_SESSION_RD_ATTR(max_outstanding_r2t);
-       SETUP_SESSION_RD_ATTR(data_pdu_in_order);
-       SETUP_SESSION_RD_ATTR(data_sequence_in_order);
-       SETUP_SESSION_RD_ATTR(erl);
+       priv->cdev.class = &iscsi_transport_class;
+       snprintf(priv->cdev.class_id, BUS_ID_SIZE, "%s", tt->name);
+       err = class_device_register(&priv->cdev);
+       if (err)
+               goto free_priv;
 
-       BUG_ON(count > ISCSI_SESSION_ATTRS);
-       i->session_attrs[count] = NULL;
+       err = sysfs_create_group(&priv->cdev.kobj, &iscsi_transport_group);
+       if (err)
+               goto unregister_cdev;
 
-       i->t.host_attrs.ac.attrs = &i->host_attrs[0];
-       i->t.host_attrs.ac.class = &iscsi_host_class.class;
-       i->t.host_attrs.ac.match = iscsi_host_match;
-       transport_container_register(&i->t.host_attrs);
-       i->t.host_size = 0;
+       /* setup parameters mask */
+       priv->param_mask = 0xFFFFFFFF;
+       if (!(tt->caps & CAP_MULTI_R2T))
+               priv->param_mask &= ~(1 << ISCSI_PARAM_MAX_R2T);
+       if (!(tt->caps & CAP_HDRDGST))
+               priv->param_mask &= ~(1 << ISCSI_PARAM_HDRDGST_EN);
+       if (!(tt->caps & CAP_DATADGST))
+               priv->param_mask &= ~(1 << ISCSI_PARAM_DATADGST_EN);
+       if (!(tt->caps & CAP_MARKERS)) {
+               priv->param_mask &= ~(1 << ISCSI_PARAM_IFMARKER_EN);
+               priv->param_mask &= ~(1 << ISCSI_PARAM_OFMARKER_EN);
+       }
 
+       /* connection parameters */
+       priv->conn_cont.ac.attrs = &priv->conn_attrs[0];
+       priv->conn_cont.ac.class = &iscsi_connection_class.class;
+       priv->conn_cont.ac.match = iscsi_conn_match;
+       transport_container_register(&priv->conn_cont);
+
+       SETUP_CONN_RD_ATTR(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH);
+       SETUP_CONN_RD_ATTR(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH);
+       SETUP_CONN_RD_ATTR(header_digest, ISCSI_PARAM_HDRDGST_EN);
+       SETUP_CONN_RD_ATTR(data_digest, ISCSI_PARAM_DATADGST_EN);
+       SETUP_CONN_RD_ATTR(ifmarker, ISCSI_PARAM_IFMARKER_EN);
+       SETUP_CONN_RD_ATTR(ofmarker, ISCSI_PARAM_OFMARKER_EN);
+
+       BUG_ON(count > ISCSI_CONN_ATTRS);
+       priv->conn_attrs[count] = NULL;
        count = 0;
-       SETUP_HOST_RD_ATTR(initiator_name);
-       SETUP_HOST_RD_ATTR(initiator_alias);
 
-       BUG_ON(count > ISCSI_HOST_ATTRS);
-       i->host_attrs[count] = NULL;
+       /* session parameters */
+       priv->session_cont.ac.attrs = &priv->session_attrs[0];
+       priv->session_cont.ac.class = &iscsi_session_class.class;
+       priv->session_cont.ac.match = iscsi_session_match;
+       transport_container_register(&priv->session_cont);
+
+       SETUP_SESSION_RD_ATTR(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN);
+       SETUP_SESSION_RD_ATTR(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T);
+       SETUP_SESSION_RD_ATTR(immediate_data, ISCSI_PARAM_IMM_DATA_EN);
+       SETUP_SESSION_RD_ATTR(first_burst_len, ISCSI_PARAM_FIRST_BURST);
+       SETUP_SESSION_RD_ATTR(max_burst_len, ISCSI_PARAM_MAX_BURST);
+       SETUP_SESSION_RD_ATTR(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN);
+       SETUP_SESSION_RD_ATTR(data_seq_in_order,ISCSI_PARAM_DATASEQ_INORDER_EN)
+       SETUP_SESSION_RD_ATTR(erl, ISCSI_PARAM_ERL);
+
+       BUG_ON(count > ISCSI_SESSION_ATTRS);
+       priv->session_attrs[count] = NULL;
+
+       spin_lock_irqsave(&iscsi_transport_lock, flags);
+       list_add(&priv->list, &iscsi_transports);
+       spin_unlock_irqrestore(&iscsi_transport_lock, flags);
+
+       printk(KERN_NOTICE "iscsi: registered transport (%s)\n", tt->name);
+       return &priv->t;
 
-       return &i->t;
+unregister_cdev:
+       class_device_unregister(&priv->cdev);
+free_priv:
+       kfree(priv);
+       return NULL;
 }
+EXPORT_SYMBOL_GPL(iscsi_register_transport);
+
+int iscsi_unregister_transport(struct iscsi_transport *tt)
+{
+       struct iscsi_internal *priv;
+       unsigned long flags;
+
+       BUG_ON(!tt);
+
+       mutex_lock(&rx_queue_mutex);
+
+       priv = iscsi_if_transport_lookup(tt);
+       BUG_ON (!priv);
+
+       spin_lock_irqsave(&iscsi_transport_lock, flags);
+       list_del(&priv->list);
+       spin_unlock_irqrestore(&iscsi_transport_lock, flags);
 
-EXPORT_SYMBOL(iscsi_attach_transport);
+       transport_container_unregister(&priv->conn_cont);
+       transport_container_unregister(&priv->session_cont);
 
-void iscsi_release_transport(struct scsi_transport_template *t)
+       sysfs_remove_group(&priv->cdev.kobj, &iscsi_transport_group);
+       class_device_unregister(&priv->cdev);
+       mutex_unlock(&rx_queue_mutex);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(iscsi_unregister_transport);
+
+static int
+iscsi_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr)
 {
-       struct iscsi_internal *i = to_iscsi_internal(t);
+       struct netlink_notify *n = ptr;
 
-       transport_container_unregister(&i->t.target_attrs);
-       transport_container_unregister(&i->t.host_attrs);
-  
-       kfree(i);
+       if (event == NETLINK_URELEASE &&
+           n->protocol == NETLINK_ISCSI && n->pid) {
+               struct iscsi_cls_conn *conn;
+               unsigned long flags;
+
+               mempool_zone_complete(z_reply);
+               spin_lock_irqsave(&connlock, flags);
+               list_for_each_entry(conn, &connlist, conn_list) {
+                       mempool_zone_complete(conn->z_error);
+                       mempool_zone_complete(conn->z_pdu);
+               }
+               spin_unlock_irqrestore(&connlock, flags);
+       }
+
+       return NOTIFY_DONE;
 }
 
-EXPORT_SYMBOL(iscsi_release_transport);
+static struct notifier_block iscsi_nl_notifier = {
+       .notifier_call  = iscsi_rcv_nl_event,
+};
 
 static __init int iscsi_transport_init(void)
 {
-       int err = transport_class_register(&iscsi_transport_class);
+       int err;
 
+       err = class_register(&iscsi_transport_class);
        if (err)
                return err;
-       return transport_class_register(&iscsi_host_class);
+
+       err = transport_class_register(&iscsi_connection_class);
+       if (err)
+               goto unregister_transport_class;
+
+       err = transport_class_register(&iscsi_session_class);
+       if (err)
+               goto unregister_conn_class;
+
+       err = netlink_register_notifier(&iscsi_nl_notifier);
+       if (err)
+               goto unregister_session_class;
+
+       nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
+                       THIS_MODULE);
+       if (!nls) {
+               err = -ENOBUFS;
+               goto unregister_notifier;
+       }
+
+       z_reply = mempool_zone_init(Z_MAX_REPLY,
+               NLMSG_SPACE(sizeof(struct iscsi_uevent)), Z_HIWAT_REPLY);
+       if (z_reply)
+               return 0;
+
+       sock_release(nls->sk_socket);
+unregister_notifier:
+       netlink_unregister_notifier(&iscsi_nl_notifier);
+unregister_session_class:
+       transport_class_unregister(&iscsi_session_class);
+unregister_conn_class:
+       transport_class_unregister(&iscsi_connection_class);
+unregister_transport_class:
+       class_unregister(&iscsi_transport_class);
+       return err;
 }
 
 static void __exit iscsi_transport_exit(void)
 {
-       transport_class_unregister(&iscsi_host_class);
-       transport_class_unregister(&iscsi_transport_class);
+       mempool_zone_destroy(z_reply);
+       sock_release(nls->sk_socket);
+       netlink_unregister_notifier(&iscsi_nl_notifier);
+       transport_class_unregister(&iscsi_connection_class);
+       transport_class_unregister(&iscsi_session_class);
+       class_unregister(&iscsi_transport_class);
 }
 
 module_init(iscsi_transport_init);
 module_exit(iscsi_transport_exit);
 
-MODULE_AUTHOR("Mike Christie");
-MODULE_DESCRIPTION("iSCSI Transport Attributes");
+MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, "
+             "Dmitry Yusupov <dmitry_yus@yahoo.com>, "
+             "Alex Aizman <itn780@yahoo.com>");
+MODULE_DESCRIPTION("iSCSI Transport Interface");
 MODULE_LICENSE("GPL");
index fbb53ad..9685977 100644 (file)
@@ -7,6 +7,9 @@
  *
  *  Copyright (C) 2001 Russell King.
  *
+ *  2005/09/16: Enabled higher baud rates for 16C95x.
+ *             (Mathias Adam <a2@adamis.de>)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -1745,6 +1748,14 @@ static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int
        else if ((port->flags & UPF_MAGIC_MULTIPLIER) &&
                 baud == (port->uartclk/8))
                quot = 0x8002;
+       /*
+        * For 16C950s UART_TCR is used in combination with divisor==1
+        * to achieve baud rates up to baud_base*4.
+        */
+       else if ((port->type == PORT_16C950) &&
+                baud > (port->uartclk/16))
+               quot = 1;
+
        else
                quot = uart_get_divisor(port, baud);
 
@@ -1758,7 +1769,7 @@ serial8250_set_termios(struct uart_port *port, struct termios *termios,
        struct uart_8250_port *up = (struct uart_8250_port *)port;
        unsigned char cval, fcr = 0;
        unsigned long flags;
-       unsigned int baud, quot;
+       unsigned int baud, quot, max_baud;
 
        switch (termios->c_cflag & CSIZE) {
        case CS5:
@@ -1790,7 +1801,8 @@ serial8250_set_termios(struct uart_port *port, struct termios *termios,
        /*
         * Ask the core to calculate the divisor for us.
         */
-       baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16); 
+       max_baud = (up->port.type == PORT_16C950 ? port->uartclk/4 : port->uartclk/16);
+       baud = uart_get_baud_rate(port, termios, old, 0, max_baud); 
        quot = serial8250_get_divisor(port, baud);
 
        /*
@@ -1826,6 +1838,19 @@ serial8250_set_termios(struct uart_port *port, struct termios *termios,
         */
        spin_lock_irqsave(&up->port.lock, flags);
 
+       /* 
+        * 16C950 supports additional prescaler ratios between 1:16 and 1:4
+        * thus increasing max baud rate to uartclk/4.
+        */
+       if (up->port.type == PORT_16C950) {
+               if (baud == port->uartclk/4)
+                       serial_icr_write(up, UART_TCR, 0x4);
+               else if (baud == port->uartclk/8)
+                       serial_icr_write(up, UART_TCR, 0x8);
+               else
+                       serial_icr_write(up, UART_TCR, 0);
+       }
+       
        /*
         * Update the per-port timeout.
         */
@@ -2243,10 +2268,14 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 
        touch_nmi_watchdog();
 
-       if (oops_in_progress) {
-               locked = spin_trylock_irqsave(&up->port.lock, flags);
+       local_irq_save(flags);
+       if (up->port.sysrq) {
+               /* serial8250_handle_port() already took the lock */
+               locked = 0;
+       } else if (oops_in_progress) {
+               locked = spin_trylock(&up->port.lock);
        } else
-               spin_lock_irqsave(&up->port.lock, flags);
+               spin_lock(&up->port.lock);
 
        /*
         *      First save the IER then disable the interrupts
@@ -2268,7 +2297,8 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
        serial_out(up, UART_IER, ier);
 
        if (locked)
-               spin_unlock_irqrestore(&up->port.lock, flags);
+               spin_unlock(&up->port.lock);
+       local_irq_restore(flags);
 }
 
 static int serial8250_console_setup(struct console *co, char *options)
index 7d22dc0..d271617 100644 (file)
@@ -11,6 +11,7 @@ menu "Serial drivers"
 config SERIAL_8250
        tristate "8250/16550 and compatible serial support"
        depends on (BROKEN || !SPARC)
+       depends on !XEN_DISABLE_SERIAL
        select SERIAL_CORE
        ---help---
          This selects whether you want to include the driver for the standard
@@ -803,6 +804,7 @@ config SERIAL_MPC52xx
        tristate "Freescale MPC52xx family PSC serial support"
        depends on PPC_MPC52xx
        select SERIAL_CORE
+       select FW_LOADER
        help
          This drivers support the MPC52xx PSC serial ports. If you would
          like to use them, you must answer Y or M to this option. Not that
index 7abe532..17839e7 100644 (file)
@@ -1968,16 +1968,16 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *port)
                struct termios termios;
 
                /*
-                * Get the termios for this line
+                * First try to use the console cflag setting.
                 */
-               tty_get_termios(drv->tty_driver, port->line, &termios);
+               memset(&termios, 0, sizeof(struct termios));
+               termios.c_cflag = port->cons->cflag;
 
                /*
-                * If the console cflag is still set, subsitute that
-                * for the termios cflag.
+                * If that's unset, use the tty termios setting.
                 */
-               if (port->cons->cflag)
-                       termios.c_cflag = port->cons->cflag;
+               if (state->info && state->info->tty && termios.c_cflag == 0)
+                       termios = *state->info->tty->termios;
 
                port->ops->set_termios(port, &termios, NULL);
                console_start(port->cons);
index 2a7c195..5f44354 100644 (file)
  * (C) Copyright 2000 Yggdrasil Computing, Inc. (port of new PCI interface
  *               support from usb-ohci.c by Adam Richter, adam@yggdrasil.com).
  * (C) Copyright 1999 Gregory P. Smith (from usb-ohci.c)
- * (C) Copyright 2004 Alan Stern, stern@rowland.harvard.edu
+ * (C) Copyright 2004-2005 Alan Stern, stern@rowland.harvard.edu
  */
 
-static int uhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb);
-static void uhci_unlink_generic(struct uhci_hcd *uhci, struct urb *urb);
-static void uhci_remove_pending_urbps(struct uhci_hcd *uhci);
-static void uhci_free_pending_qhs(struct uhci_hcd *uhci);
 static void uhci_free_pending_tds(struct uhci_hcd *uhci);
 
 /*
@@ -30,8 +26,10 @@ static void uhci_free_pending_tds(struct uhci_hcd *uhci);
  * games with the FSBR code to make sure we get the correct order in all
  * the cases. I don't think it's worth the effort
  */
-static inline void uhci_set_next_interrupt(struct uhci_hcd *uhci)
+static void uhci_set_next_interrupt(struct uhci_hcd *uhci)
 {
+       if (uhci->is_stopped)
+               mod_timer(&uhci_to_hcd(uhci)->rh_timer, jiffies);
        uhci->term_td->status |= cpu_to_le32(TD_CTRL_IOC); 
 }
 
@@ -40,13 +38,7 @@ static inline void uhci_clear_next_interrupt(struct uhci_hcd *uhci)
        uhci->term_td->status &= ~cpu_to_le32(TD_CTRL_IOC);
 }
 
-static inline void uhci_moveto_complete(struct uhci_hcd *uhci, 
-                                       struct urb_priv *urbp)
-{
-       list_move_tail(&urbp->urb_list, &uhci->complete_list);
-}
-
-static struct uhci_td *uhci_alloc_td(struct uhci_hcd *uhci, struct usb_device *dev)
+static struct uhci_td *uhci_alloc_td(struct uhci_hcd *uhci)
 {
        dma_addr_t dma_handle;
        struct uhci_td *td;
@@ -56,22 +48,27 @@ static struct uhci_td *uhci_alloc_td(struct uhci_hcd *uhci, struct usb_device *d
                return NULL;
 
        td->dma_handle = dma_handle;
-
-       td->link = UHCI_PTR_TERM;
-       td->buffer = 0;
-
        td->frame = -1;
-       td->dev = dev;
 
        INIT_LIST_HEAD(&td->list);
        INIT_LIST_HEAD(&td->remove_list);
        INIT_LIST_HEAD(&td->fl_list);
 
-       usb_get_dev(dev);
-
        return td;
 }
 
+static void uhci_free_td(struct uhci_hcd *uhci, struct uhci_td *td)
+{
+       if (!list_empty(&td->list))
+               dev_warn(uhci_dev(uhci), "td %p still in list!\n", td);
+       if (!list_empty(&td->remove_list))
+               dev_warn(uhci_dev(uhci), "td %p still in remove_list!\n", td);
+       if (!list_empty(&td->fl_list))
+               dev_warn(uhci_dev(uhci), "td %p still in fl_list!\n", td);
+
+       dma_pool_free(uhci->td_pool, td, td->dma_handle);
+}
+
 static inline void uhci_fill_td(struct uhci_td *td, u32 status,
                u32 token, u32 buffer)
 {
@@ -81,19 +78,20 @@ static inline void uhci_fill_td(struct uhci_td *td, u32 status,
 }
 
 /*
- * We insert Isochronous URB's directly into the frame list at the beginning
+ * We insert Isochronous URBs directly into the frame list at the beginning
  */
-static void uhci_insert_td_frame_list(struct uhci_hcd *uhci, struct uhci_td *td, unsigned framenum)
+static inline void uhci_insert_td_in_frame_list(struct uhci_hcd *uhci,
+               struct uhci_td *td, unsigned framenum)
 {
        framenum &= (UHCI_NUMFRAMES - 1);
 
        td->frame = framenum;
 
        /* Is there a TD already mapped there? */
-       if (uhci->fl->frame_cpu[framenum]) {
+       if (uhci->frame_cpu[framenum]) {
                struct uhci_td *ftd, *ltd;
 
-               ftd = uhci->fl->frame_cpu[framenum];
+               ftd = uhci->frame_cpu[framenum];
                ltd = list_entry(ftd->fl_list.prev, struct uhci_td, fl_list);
 
                list_add_tail(&td->fl_list, &ftd->fl_list);
@@ -102,29 +100,32 @@ static void uhci_insert_td_frame_list(struct uhci_hcd *uhci, struct uhci_td *td,
                wmb();
                ltd->link = cpu_to_le32(td->dma_handle);
        } else {
-               td->link = uhci->fl->frame[framenum];
+               td->link = uhci->frame[framenum];
                wmb();
-               uhci->fl->frame[framenum] = cpu_to_le32(td->dma_handle);
-               uhci->fl->frame_cpu[framenum] = td;
+               uhci->frame[framenum] = cpu_to_le32(td->dma_handle);
+               uhci->frame_cpu[framenum] = td;
        }
 }
 
-static void uhci_remove_td(struct uhci_hcd *uhci, struct uhci_td *td)
+static inline void uhci_remove_td_from_frame_list(struct uhci_hcd *uhci,
+               struct uhci_td *td)
 {
        /* If it's not inserted, don't remove it */
-       if (td->frame == -1 && list_empty(&td->fl_list))
+       if (td->frame == -1) {
+               WARN_ON(!list_empty(&td->fl_list));
                return;
+       }
 
-       if (td->frame != -1 && uhci->fl->frame_cpu[td->frame] == td) {
+       if (uhci->frame_cpu[td->frame] == td) {
                if (list_empty(&td->fl_list)) {
-                       uhci->fl->frame[td->frame] = td->link;
-                       uhci->fl->frame_cpu[td->frame] = NULL;
+                       uhci->frame[td->frame] = td->link;
+                       uhci->frame_cpu[td->frame] = NULL;
                } else {
                        struct uhci_td *ntd;
 
                        ntd = list_entry(td->fl_list.next, struct uhci_td, fl_list);
-                       uhci->fl->frame[td->frame] = cpu_to_le32(ntd->dma_handle);
-                       uhci->fl->frame_cpu[td->frame] = ntd;
+                       uhci->frame[td->frame] = cpu_to_le32(ntd->dma_handle);
+                       uhci->frame_cpu[td->frame] = ntd;
                }
        } else {
                struct uhci_td *ptd;
@@ -133,48 +134,25 @@ static void uhci_remove_td(struct uhci_hcd *uhci, struct uhci_td *td)
                ptd->link = td->link;
        }
 
-       wmb();
-       td->link = UHCI_PTR_TERM;
-
        list_del_init(&td->fl_list);
        td->frame = -1;
 }
 
 /*
- * Inserts a td list into qh.
+ * Remove all the TDs for an Isochronous URB from the frame list
  */
-static void uhci_insert_tds_in_qh(struct uhci_qh *qh, struct urb *urb, __le32 breadth)
+static void uhci_unlink_isochronous_tds(struct uhci_hcd *uhci, struct urb *urb)
 {
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
+       struct urb_priv *urbp = (struct urb_priv *) urb->hcpriv;
        struct uhci_td *td;
-       __le32 *plink;
 
-       /* Ordering isn't important here yet since the QH hasn't been */
-       /* inserted into the schedule yet */
-       plink = &qh->element;
-       list_for_each_entry(td, &urbp->td_list, list) {
-               *plink = cpu_to_le32(td->dma_handle) | breadth;
-               plink = &td->link;
-       }
-       *plink = UHCI_PTR_TERM;
-}
-
-static void uhci_free_td(struct uhci_hcd *uhci, struct uhci_td *td)
-{
-       if (!list_empty(&td->list))
-               dev_warn(uhci_dev(uhci), "td %p still in list!\n", td);
-       if (!list_empty(&td->remove_list))
-               dev_warn(uhci_dev(uhci), "td %p still in remove_list!\n", td);
-       if (!list_empty(&td->fl_list))
-               dev_warn(uhci_dev(uhci), "td %p still in fl_list!\n", td);
-
-       if (td->dev)
-               usb_put_dev(td->dev);
-
-       dma_pool_free(uhci->td_pool, td, td->dma_handle);
+       list_for_each_entry(td, &urbp->td_list, list)
+               uhci_remove_td_from_frame_list(uhci, td);
+       wmb();
 }
 
-static struct uhci_qh *uhci_alloc_qh(struct uhci_hcd *uhci, struct usb_device *dev)
+static struct uhci_qh *uhci_alloc_qh(struct uhci_hcd *uhci,
+               struct usb_device *udev, struct usb_host_endpoint *hep)
 {
        dma_addr_t dma_handle;
        struct uhci_qh *qh;
@@ -188,262 +166,217 @@ static struct uhci_qh *uhci_alloc_qh(struct uhci_hcd *uhci, struct usb_device *d
        qh->element = UHCI_PTR_TERM;
        qh->link = UHCI_PTR_TERM;
 
-       qh->dev = dev;
-       qh->urbp = NULL;
-
-       INIT_LIST_HEAD(&qh->list);
-       INIT_LIST_HEAD(&qh->remove_list);
-
-       usb_get_dev(dev);
+       INIT_LIST_HEAD(&qh->queue);
+       INIT_LIST_HEAD(&qh->node);
 
+       if (udev) {             /* Normal QH */
+               qh->dummy_td = uhci_alloc_td(uhci);
+               if (!qh->dummy_td) {
+                       dma_pool_free(uhci->qh_pool, qh, dma_handle);
+                       return NULL;
+               }
+               qh->state = QH_STATE_IDLE;
+               qh->hep = hep;
+               qh->udev = udev;
+               hep->hcpriv = qh;
+
+       } else {                /* Skeleton QH */
+               qh->state = QH_STATE_ACTIVE;
+               qh->udev = NULL;
+       }
        return qh;
 }
 
 static void uhci_free_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
-       if (!list_empty(&qh->list))
+       WARN_ON(qh->state != QH_STATE_IDLE && qh->udev);
+       if (!list_empty(&qh->queue))
                dev_warn(uhci_dev(uhci), "qh %p list not empty!\n", qh);
-       if (!list_empty(&qh->remove_list))
-               dev_warn(uhci_dev(uhci), "qh %p still in remove_list!\n", qh);
-
-       if (qh->dev)
-               usb_put_dev(qh->dev);
 
+       list_del(&qh->node);
+       if (qh->udev) {
+               qh->hep->hcpriv = NULL;
+               uhci_free_td(uhci, qh->dummy_td);
+       }
        dma_pool_free(uhci->qh_pool, qh, qh->dma_handle);
 }
 
 /*
- * Append this urb's qh after the last qh in skelqh->list
- *
- * Note that urb_priv.queue_list doesn't have a separate queue head;
- * it's a ring with every element "live".
+ * When the currently executing URB is dequeued, save its current toggle value
  */
-static void uhci_insert_qh(struct uhci_hcd *uhci, struct uhci_qh *skelqh, struct urb *urb)
+static void uhci_save_toggle(struct uhci_qh *qh, struct urb *urb)
 {
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       struct urb_priv *turbp;
-       struct uhci_qh *lqh;
+       struct urb_priv *urbp = (struct urb_priv *) urb->hcpriv;
+       struct uhci_td *td;
 
-       /* Grab the last QH */
-       lqh = list_entry(skelqh->list.prev, struct uhci_qh, list);
+       /* If the QH element pointer is UHCI_PTR_TERM then then currently
+        * executing URB has already been unlinked, so this one isn't it. */
+       if (qh_element(qh) == UHCI_PTR_TERM ||
+                               qh->queue.next != &urbp->node)
+               return;
+       qh->element = UHCI_PTR_TERM;
 
-       /* Point to the next skelqh */
-       urbp->qh->link = lqh->link;
-       wmb();                          /* Ordering is important */
+       /* Only bulk and interrupt pipes have to worry about toggles */
+       if (!(usb_pipetype(urb->pipe) == PIPE_BULK ||
+                       usb_pipetype(urb->pipe) == PIPE_INTERRUPT))
+               return;
 
-       /*
-        * Patch QHs for previous endpoint's queued URBs?  HC goes
-        * here next, not to the next skelqh it now points to.
-        *
-        *    lqh --> td ... --> qh ... --> td --> qh ... --> td
-        *     |                 |                 |
-        *     v                 v                 v
-        *     +<----------------+-----------------+
-        *     v
-        *    newqh --> td ... --> td
-        *     |
-        *     v
-        *    ...
-        *
-        * The HC could see (and use!) any of these as we write them.
-        */
-       lqh->link = cpu_to_le32(urbp->qh->dma_handle) | UHCI_PTR_QH;
-       if (lqh->urbp) {
-               list_for_each_entry(turbp, &lqh->urbp->queue_list, queue_list)
-                       turbp->qh->link = lqh->link;
+       /* Find the first active TD; that's the device's toggle state */
+       list_for_each_entry(td, &urbp->td_list, list) {
+               if (td_status(td) & TD_CTRL_ACTIVE) {
+                       qh->needs_fixup = 1;
+                       qh->initial_toggle = uhci_toggle(td_token(td));
+                       return;
+               }
        }
 
-       list_add_tail(&urbp->qh->list, &skelqh->list);
+       WARN_ON(1);
 }
 
 /*
- * Start removal of QH from schedule; it finishes next frame.
- * TDs should be unlinked before this is called.
+ * Fix up the data toggles for URBs in a queue, when one of them
+ * terminates early (short transfer, error, or dequeued).
  */
-static void uhci_remove_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
+static void uhci_fixup_toggles(struct uhci_qh *qh, int skip_first)
 {
-       struct uhci_qh *pqh;
-       __le32 newlink;
-
-       if (!qh)
-               return;
-
-       /*
-        * Only go through the hoops if it's actually linked in
-        */
-       if (!list_empty(&qh->list)) {
-
-               /* If our queue is nonempty, make the next URB the head */
-               if (!list_empty(&qh->urbp->queue_list)) {
-                       struct urb_priv *nurbp;
-
-                       nurbp = list_entry(qh->urbp->queue_list.next,
-                                       struct urb_priv, queue_list);
-                       nurbp->queued = 0;
-                       list_add(&nurbp->qh->list, &qh->list);
-                       newlink = cpu_to_le32(nurbp->qh->dma_handle) | UHCI_PTR_QH;
-               } else
-                       newlink = qh->link;
-
-               /* Fix up the previous QH's queue to link to either
-                * the new head of this queue or the start of the
-                * next endpoint's queue. */
-               pqh = list_entry(qh->list.prev, struct uhci_qh, list);
-               pqh->link = newlink;
-               if (pqh->urbp) {
-                       struct urb_priv *turbp;
-
-                       list_for_each_entry(turbp, &pqh->urbp->queue_list,
-                                       queue_list)
-                               turbp->qh->link = newlink;
+       struct urb_priv *urbp = NULL;
+       struct uhci_td *td;
+       unsigned int toggle = qh->initial_toggle;
+       unsigned int pipe;
+
+       /* Fixups for a short transfer start with the second URB in the
+        * queue (the short URB is the first). */
+       if (skip_first)
+               urbp = list_entry(qh->queue.next, struct urb_priv, node);
+
+       /* When starting with the first URB, if the QH element pointer is
+        * still valid then we know the URB's toggles are okay. */
+       else if (qh_element(qh) != UHCI_PTR_TERM)
+               toggle = 2;
+
+       /* Fix up the toggle for the URBs in the queue.  Normally this
+        * loop won't run more than once: When an error or short transfer
+        * occurs, the queue usually gets emptied. */
+       urbp = list_prepare_entry(urbp, &qh->queue, node);
+       list_for_each_entry_continue(urbp, &qh->queue, node) {
+
+               /* If the first TD has the right toggle value, we don't
+                * need to change any toggles in this URB */
+               td = list_entry(urbp->td_list.next, struct uhci_td, list);
+               if (toggle > 1 || uhci_toggle(td_token(td)) == toggle) {
+                       td = list_entry(urbp->td_list.prev, struct uhci_td,
+                                       list);
+                       toggle = uhci_toggle(td_token(td)) ^ 1;
+
+               /* Otherwise all the toggles in the URB have to be switched */
+               } else {
+                       list_for_each_entry(td, &urbp->td_list, list) {
+                               td->token ^= __constant_cpu_to_le32(
+                                                       TD_TOKEN_TOGGLE);
+                               toggle ^= 1;
+                       }
                }
-               wmb();
-
-               /* Leave qh->link in case the HC is on the QH now, it will */
-               /* continue the rest of the schedule */
-               qh->element = UHCI_PTR_TERM;
-
-               list_del_init(&qh->list);
-       }
-
-       list_del_init(&qh->urbp->queue_list);
-       qh->urbp = NULL;
-
-       uhci_get_current_frame_number(uhci);
-       if (uhci->frame_number + uhci->is_stopped != uhci->qh_remove_age) {
-               uhci_free_pending_qhs(uhci);
-               uhci->qh_remove_age = uhci->frame_number;
        }
 
-       /* Check to see if the remove list is empty. Set the IOC bit */
-       /* to force an interrupt so we can remove the QH */
-       if (list_empty(&uhci->qh_remove_list))
-               uhci_set_next_interrupt(uhci);
-
-       list_add(&qh->remove_list, &uhci->qh_remove_list);
+       wmb();
+       pipe = list_entry(qh->queue.next, struct urb_priv, node)->urb->pipe;
+       usb_settoggle(qh->udev, usb_pipeendpoint(pipe),
+                       usb_pipeout(pipe), toggle);
+       qh->needs_fixup = 0;
 }
 
-static int uhci_fixup_toggle(struct urb *urb, unsigned int toggle)
+/*
+ * Put a QH on the schedule in both hardware and software
+ */
+static void uhci_activate_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       struct uhci_td *td;
-
-       list_for_each_entry(td, &urbp->td_list, list) {
-               if (toggle)
-                       td->token |= cpu_to_le32(TD_TOKEN_TOGGLE);
-               else
-                       td->token &= ~cpu_to_le32(TD_TOKEN_TOGGLE);
-
-               toggle ^= 1;
-       }
-
-       return toggle;
-}
+       struct uhci_qh *pqh;
 
-/* This function will append one URB's QH to another URB's QH. This is for */
-/* queuing interrupt, control or bulk transfers */
-static void uhci_append_queued_urb(struct uhci_hcd *uhci, struct urb *eurb, struct urb *urb)
-{
-       struct urb_priv *eurbp, *urbp, *furbp, *lurbp;
-       struct uhci_td *lltd;
+       WARN_ON(list_empty(&qh->queue));
 
-       eurbp = eurb->hcpriv;
-       urbp = urb->hcpriv;
+       /* Set the element pointer if it isn't set already.
+        * This isn't needed for Isochronous queues, but it doesn't hurt. */
+       if (qh_element(qh) == UHCI_PTR_TERM) {
+               struct urb_priv *urbp = list_entry(qh->queue.next,
+                               struct urb_priv, node);
+               struct uhci_td *td = list_entry(urbp->td_list.next,
+                               struct uhci_td, list);
 
-       /* Find the first URB in the queue */
-       furbp = eurbp;
-       if (eurbp->queued) {
-               list_for_each_entry(furbp, &eurbp->queue_list, queue_list)
-                       if (!furbp->queued)
-                               break;
+               qh->element = cpu_to_le32(td->dma_handle);
        }
 
-       lurbp = list_entry(furbp->queue_list.prev, struct urb_priv, queue_list);
-
-       lltd = list_entry(lurbp->td_list.prev, struct uhci_td, list);
-
-       /* Control transfers always start with toggle 0 */
-       if (!usb_pipecontrol(urb->pipe))
-               usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                               usb_pipeout(urb->pipe),
-                               uhci_fixup_toggle(urb,
-                                       uhci_toggle(td_token(lltd)) ^ 1));
-
-       /* All qh's in the queue need to link to the next queue */
-       urbp->qh->link = eurbp->qh->link;
-
-       wmb();                  /* Make sure we flush everything */
-
-       lltd->link = cpu_to_le32(urbp->qh->dma_handle) | UHCI_PTR_QH;
-
-       list_add_tail(&urbp->queue_list, &furbp->queue_list);
-
-       urbp->queued = 1;
+       if (qh->state == QH_STATE_ACTIVE)
+               return;
+       qh->state = QH_STATE_ACTIVE;
+
+       /* Move the QH from its old list to the end of the appropriate
+        * skeleton's list */
+       if (qh == uhci->next_qh)
+               uhci->next_qh = list_entry(qh->node.next, struct uhci_qh,
+                               node);
+       list_move_tail(&qh->node, &qh->skel->node);
+
+       /* Link it into the schedule */
+       pqh = list_entry(qh->node.prev, struct uhci_qh, node);
+       qh->link = pqh->link;
+       wmb();
+       pqh->link = UHCI_PTR_QH | cpu_to_le32(qh->dma_handle);
 }
 
-static void uhci_delete_queued_urb(struct uhci_hcd *uhci, struct urb *urb)
+/*
+ * Take a QH off the hardware schedule
+ */
+static void uhci_unlink_qh(struct uhci_hcd *uhci, struct uhci_qh *qh)
 {
-       struct urb_priv *urbp, *nurbp, *purbp, *turbp;
-       struct uhci_td *pltd;
-       unsigned int toggle;
-
-       urbp = urb->hcpriv;
+       struct uhci_qh *pqh;
 
-       if (list_empty(&urbp->queue_list))
+       if (qh->state == QH_STATE_UNLINKING)
                return;
+       WARN_ON(qh->state != QH_STATE_ACTIVE || !qh->udev);
+       qh->state = QH_STATE_UNLINKING;
 
-       nurbp = list_entry(urbp->queue_list.next, struct urb_priv, queue_list);
+       /* Unlink the QH from the schedule and record when we did it */
+       pqh = list_entry(qh->node.prev, struct uhci_qh, node);
+       pqh->link = qh->link;
+       mb();
 
-       /*
-        * Fix up the toggle for the following URBs in the queue.
-        * Only needed for bulk and interrupt: control and isochronous
-        * endpoints don't propagate toggles between messages.
-        */
-       if (usb_pipebulk(urb->pipe) || usb_pipeint(urb->pipe)) {
-               if (!urbp->queued)
-                       /* We just set the toggle in uhci_unlink_generic */
-                       toggle = usb_gettoggle(urb->dev,
-                                       usb_pipeendpoint(urb->pipe),
-                                       usb_pipeout(urb->pipe));
-               else {
-                       /* If we're in the middle of the queue, grab the */
-                       /* toggle from the TD previous to us */
-                       purbp = list_entry(urbp->queue_list.prev,
-                                       struct urb_priv, queue_list);
-                       pltd = list_entry(purbp->td_list.prev,
-                                       struct uhci_td, list);
-                       toggle = uhci_toggle(td_token(pltd)) ^ 1;
-               }
+       uhci_get_current_frame_number(uhci);
+       qh->unlink_frame = uhci->frame_number;
 
-               list_for_each_entry(turbp, &urbp->queue_list, queue_list) {
-                       if (!turbp->queued)
-                               break;
-                       toggle = uhci_fixup_toggle(turbp->urb, toggle);
-               }
+       /* Force an interrupt so we know when the QH is fully unlinked */
+       if (list_empty(&uhci->skel_unlink_qh->node))
+               uhci_set_next_interrupt(uhci);
 
-               usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                               usb_pipeout(urb->pipe), toggle);
-       }
+       /* Move the QH from its old list to the end of the unlinking list */
+       if (qh == uhci->next_qh)
+               uhci->next_qh = list_entry(qh->node.next, struct uhci_qh,
+                               node);
+       list_move_tail(&qh->node, &uhci->skel_unlink_qh->node);
+}
 
-       if (urbp->queued) {
-               /* We're somewhere in the middle (or end).  The case where
-                * we're at the head is handled in uhci_remove_qh(). */
-               purbp = list_entry(urbp->queue_list.prev, struct urb_priv,
-                               queue_list);
+/*
+ * When we and the controller are through with a QH, it becomes IDLE.
+ * This happens when a QH has been off the schedule (on the unlinking
+ * list) for more than one frame, or when an error occurs while adding
+ * the first URB onto a new QH.
+ */
+static void uhci_make_qh_idle(struct uhci_hcd *uhci, struct uhci_qh *qh)
+{
+       WARN_ON(qh->state == QH_STATE_ACTIVE);
 
-               pltd = list_entry(purbp->td_list.prev, struct uhci_td, list);
-               if (nurbp->queued)
-                       pltd->link = cpu_to_le32(nurbp->qh->dma_handle) | UHCI_PTR_QH;
-               else
-                       /* The next URB happens to be the beginning, so */
-                       /*  we're the last, end the chain */
-                       pltd->link = UHCI_PTR_TERM;
-       }
+       if (qh == uhci->next_qh)
+               uhci->next_qh = list_entry(qh->node.next, struct uhci_qh,
+                               node);
+       list_move(&qh->node, &uhci->idle_qh_list);
+       qh->state = QH_STATE_IDLE;
 
-       /* urbp->queue_list is handled in uhci_remove_qh() */
+       /* If anyone is waiting for a QH to become idle, wake them up */
+       if (uhci->num_waiting)
+               wake_up_all(&uhci->waitqh);
 }
 
-static struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci, struct urb *urb)
+static inline struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci,
+               struct urb *urb)
 {
        struct urb_priv *urbp;
 
@@ -453,17 +386,11 @@ static struct urb_priv *uhci_alloc_urb_priv(struct uhci_hcd *uhci, struct urb *u
 
        memset((void *)urbp, 0, sizeof(*urbp));
 
-       urbp->inserttime = jiffies;
-       urbp->fsbrtime = jiffies;
        urbp->urb = urb;
+       urb->hcpriv = urbp;
        
+       INIT_LIST_HEAD(&urbp->node);
        INIT_LIST_HEAD(&urbp->td_list);
-       INIT_LIST_HEAD(&urbp->queue_list);
-       INIT_LIST_HEAD(&urbp->urb_list);
-
-       list_add_tail(&urbp->urb_list, &uhci->urb_list);
-
-       urb->hcpriv = urbp;
 
        return urbp;
 }
@@ -472,8 +399,6 @@ static void uhci_add_td_to_urb(struct urb *urb, struct uhci_td *td)
 {
        struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
 
-       td->urb = urb;
-
        list_add_tail(&td->list, &urbp->td_list);
 }
 
@@ -483,22 +408,16 @@ static void uhci_remove_td_from_urb(struct uhci_td *td)
                return;
 
        list_del_init(&td->list);
-
-       td->urb = NULL;
 }
 
-static void uhci_destroy_urb_priv(struct uhci_hcd *uhci, struct urb *urb)
+static void uhci_free_urb_priv(struct uhci_hcd *uhci,
+               struct urb_priv *urbp)
 {
        struct uhci_td *td, *tmp;
-       struct urb_priv *urbp;
-
-       urbp = (struct urb_priv *)urb->hcpriv;
-       if (!urbp)
-               return;
 
-       if (!list_empty(&urbp->urb_list))
-               dev_warn(uhci_dev(uhci), "urb %p still on uhci->urb_list "
-                               "or uhci->remove_list!\n", urb);
+       if (!list_empty(&urbp->node))
+               dev_warn(uhci_dev(uhci), "urb %p still on QH's list!\n",
+                               urbp->urb);
 
        uhci_get_current_frame_number(uhci);
        if (uhci->frame_number + uhci->is_stopped != uhci->td_remove_age) {
@@ -507,17 +426,16 @@ static void uhci_destroy_urb_priv(struct uhci_hcd *uhci, struct urb *urb)
        }
 
        /* Check to see if the remove list is empty. Set the IOC bit */
-       /* to force an interrupt so we can remove the TD's*/
+       /* to force an interrupt so we can remove the TDs. */
        if (list_empty(&uhci->td_remove_list))
                uhci_set_next_interrupt(uhci);
 
        list_for_each_entry_safe(td, tmp, &urbp->td_list, list) {
                uhci_remove_td_from_urb(td);
-               uhci_remove_td(uhci, td);
                list_add(&td->remove_list, &uhci->td_remove_list);
        }
 
-       urb->hcpriv = NULL;
+       urbp->urb->hcpriv = NULL;
        kmem_cache_free(uhci_up_cachep, urbp);
 }
 
@@ -576,34 +494,33 @@ static int uhci_map_status(int status, int dir_out)
 /*
  * Control transfers
  */
-static int uhci_submit_control(struct uhci_hcd *uhci, struct urb *urb, struct urb *eurb)
+static int uhci_submit_control(struct uhci_hcd *uhci, struct urb *urb,
+               struct uhci_qh *qh)
 {
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
        struct uhci_td *td;
-       struct uhci_qh *qh, *skelqh;
        unsigned long destination, status;
-       int maxsze = usb_maxpacket(urb->dev, urb->pipe, usb_pipeout(urb->pipe));
+       int maxsze = le16_to_cpu(qh->hep->desc.wMaxPacketSize);
        int len = urb->transfer_buffer_length;
        dma_addr_t data = urb->transfer_dma;
+       __le32 *plink;
 
        /* The "pipe" thing contains the destination in bits 8--18 */
        destination = (urb->pipe & PIPE_DEVEP_MASK) | USB_PID_SETUP;
 
-       /* 3 errors */
-       status = TD_CTRL_ACTIVE | uhci_maxerr(3);
+       /* 3 errors, dummy TD remains inactive */
+       status = uhci_maxerr(3);
        if (urb->dev->speed == USB_SPEED_LOW)
                status |= TD_CTRL_LS;
 
        /*
         * Build the TD for the control request setup packet
         */
-       td = uhci_alloc_td(uhci, urb->dev);
-       if (!td)
-               return -ENOMEM;
-
+       td = qh->dummy_td;
        uhci_add_td_to_urb(urb, td);
-       uhci_fill_td(td, status, destination | uhci_explen(7),
-               urb->setup_dma);
+       uhci_fill_td(td, status, destination | uhci_explen(8),
+                       urb->setup_dma);
+       plink = &td->link;
+       status |= TD_CTRL_ACTIVE;
 
        /*
         * If direction is "send", change the packet ID from SETUP (0x2D)
@@ -618,24 +535,23 @@ static int uhci_submit_control(struct uhci_hcd *uhci, struct urb *urb, struct ur
        }
 
        /*
-        * Build the DATA TD's
+        * Build the DATA TDs
         */
        while (len > 0) {
-               int pktsze = len;
-
-               if (pktsze > maxsze)
-                       pktsze = maxsze;
+               int pktsze = min(len, maxsze);
 
-               td = uhci_alloc_td(uhci, urb->dev);
+               td = uhci_alloc_td(uhci);
                if (!td)
-                       return -ENOMEM;
+                       goto nomem;
+               *plink = cpu_to_le32(td->dma_handle);
 
                /* Alternate Data0/1 (start with Data1) */
                destination ^= TD_TOKEN_TOGGLE;
        
                uhci_add_td_to_urb(urb, td);
-               uhci_fill_td(td, status, destination | uhci_explen(pktsze - 1),
-                       data);
+               uhci_fill_td(td, status, destination | uhci_explen(pktsze),
+                               data);
+               plink = &td->link;
 
                data += pktsze;
                len -= pktsze;
@@ -644,9 +560,10 @@ static int uhci_submit_control(struct uhci_hcd *uhci, struct urb *urb, struct ur
        /*
         * Build the final TD for control status 
         */
-       td = uhci_alloc_td(uhci, urb->dev);
+       td = uhci_alloc_td(uhci);
        if (!td)
-               return -ENOMEM;
+               goto nomem;
+       *plink = cpu_to_le32(td->dma_handle);
 
        /*
         * It's IN if the pipe is an output pipe or we're not expecting
@@ -664,35 +581,39 @@ static int uhci_submit_control(struct uhci_hcd *uhci, struct urb *urb, struct ur
 
        uhci_add_td_to_urb(urb, td);
        uhci_fill_td(td, status | TD_CTRL_IOC,
-               destination | uhci_explen(UHCI_NULL_DATA_SIZE), 0);
-
-       qh = uhci_alloc_qh(uhci, urb->dev);
-       if (!qh)
-               return -ENOMEM;
+                       destination | uhci_explen(0), 0);
+       plink = &td->link;
 
-       urbp->qh = qh;
-       qh->urbp = urbp;
+       /*
+        * Build the new dummy TD and activate the old one
+        */
+       td = uhci_alloc_td(uhci);
+       if (!td)
+               goto nomem;
+       *plink = cpu_to_le32(td->dma_handle);
 
-       uhci_insert_tds_in_qh(qh, urb, UHCI_PTR_BREADTH);
+       uhci_fill_td(td, 0, USB_PID_OUT | uhci_explen(0), 0);
+       wmb();
+       qh->dummy_td->status |= __constant_cpu_to_le32(TD_CTRL_ACTIVE);
+       qh->dummy_td = td;
 
        /* Low-speed transfers get a different queue, and won't hog the bus.
         * Also, some devices enumerate better without FSBR; the easiest way
         * to do that is to put URBs on the low-speed queue while the device
-        * is in the DEFAULT state. */
+        * isn't in the CONFIGURED state. */
        if (urb->dev->speed == USB_SPEED_LOW ||
-                       urb->dev->state == USB_STATE_DEFAULT)
-               skelqh = uhci->skel_ls_control_qh;
+                       urb->dev->state != USB_STATE_CONFIGURED)
+               qh->skel = uhci->skel_ls_control_qh;
        else {
-               skelqh = uhci->skel_fs_control_qh;
+               qh->skel = uhci->skel_fs_control_qh;
                uhci_inc_fsbr(uhci, urb);
        }
+       return 0;
 
-       if (eurb)
-               uhci_append_queued_urb(uhci, eurb, urb);
-       else
-               uhci_insert_qh(uhci, skelqh, urb);
-
-       return -EINPROGRESS;
+nomem:
+       /* Remove the dummy TD from the td_list so it doesn't get freed */
+       uhci_remove_td_from_urb(qh->dummy_td);
+       return -ENOMEM;
 }
 
 /*
@@ -709,7 +630,7 @@ static int usb_control_retrigger_status(struct uhci_hcd *uhci, struct urb *urb)
        struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
        struct uhci_td *td;
 
-       urbp->short_control_packet = 1;
+       urbp->short_transfer = 1;
 
        td = list_entry(urbp->td_list.prev, struct uhci_td, list);
        urbp->qh->element = cpu_to_le32(td->dma_handle);
@@ -726,16 +647,14 @@ static int uhci_result_control(struct uhci_hcd *uhci, struct urb *urb)
        unsigned int status;
        int ret = 0;
 
-       if (list_empty(&urbp->td_list))
-               return -EINVAL;
-
        head = &urbp->td_list;
-
-       if (urbp->short_control_packet) {
+       if (urbp->short_transfer) {
                tmp = head->prev;
                goto status_stage;
        }
 
+       urb->actual_length = 0;
+
        tmp = head->next;
        td = list_entry(tmp, struct uhci_td, list);
 
@@ -748,9 +667,7 @@ static int uhci_result_control(struct uhci_hcd *uhci, struct urb *urb)
        if (status)
                goto td_error;
 
-       urb->actual_length = 0;
-
-       /* The rest of the TD's (but the last) are data */
+       /* The rest of the TDs (but the last) are data */
        tmp = tmp->next;
        while (tmp != head && tmp->next != head) {
                unsigned int ctrlstat;
@@ -776,10 +693,7 @@ static int uhci_result_control(struct uhci_hcd *uhci, struct urb *urb)
                                goto err;
                        }
 
-                       if (uhci_packetid(td_token(td)) == USB_PID_IN)
-                               return usb_control_retrigger_status(uhci, urb);
-                       else
-                               return 0;
+                       return usb_control_retrigger_status(uhci, urb);
                }
        }
 
@@ -820,66 +734,76 @@ err:
                if (errbuf) {
                        /* Print the chain for debugging purposes */
                        uhci_show_qh(urbp->qh, errbuf, ERRBUF_LEN, 0);
-
                        lprintk(errbuf);
                }
        }
 
+       /* Note that the queue has stopped */
+       urbp->qh->element = UHCI_PTR_TERM;
+       urbp->qh->is_stopped = 1;
        return ret;
 }
 
 /*
  * Common submit for bulk and interrupt
  */
-static int uhci_submit_common(struct uhci_hcd *uhci, struct urb *urb, struct urb *eurb, struct uhci_qh *skelqh)
+static int uhci_submit_common(struct uhci_hcd *uhci, struct urb *urb,
+               struct uhci_qh *qh)
 {
        struct uhci_td *td;
-       struct uhci_qh *qh;
        unsigned long destination, status;
-       int maxsze = usb_maxpacket(urb->dev, urb->pipe, usb_pipeout(urb->pipe));
+       int maxsze = le16_to_cpu(qh->hep->desc.wMaxPacketSize);
        int len = urb->transfer_buffer_length;
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
        dma_addr_t data = urb->transfer_dma;
+       __le32 *plink;
+       unsigned int toggle;
 
        if (len < 0)
                return -EINVAL;
 
        /* The "pipe" thing contains the destination in bits 8--18 */
        destination = (urb->pipe & PIPE_DEVEP_MASK) | usb_packetid(urb->pipe);
+       toggle = usb_gettoggle(urb->dev, usb_pipeendpoint(urb->pipe),
+                        usb_pipeout(urb->pipe));
 
-       status = uhci_maxerr(3) | TD_CTRL_ACTIVE;
+       /* 3 errors, dummy TD remains inactive */
+       status = uhci_maxerr(3);
        if (urb->dev->speed == USB_SPEED_LOW)
                status |= TD_CTRL_LS;
        if (usb_pipein(urb->pipe))
                status |= TD_CTRL_SPD;
 
        /*
-        * Build the DATA TD's
+        * Build the DATA TDs
         */
+       plink = NULL;
+       td = qh->dummy_td;
        do {    /* Allow zero length packets */
                int pktsze = maxsze;
 
-               if (pktsze >= len) {
+               if (len <= pktsze) {            /* The last packet */
                        pktsze = len;
                        if (!(urb->transfer_flags & URB_SHORT_NOT_OK))
                                status &= ~TD_CTRL_SPD;
                }
 
-               td = uhci_alloc_td(uhci, urb->dev);
-               if (!td)
-                       return -ENOMEM;
-
+               if (plink) {
+                       td = uhci_alloc_td(uhci);
+                       if (!td)
+                               goto nomem;
+                       *plink = cpu_to_le32(td->dma_handle);
+               }
                uhci_add_td_to_urb(urb, td);
-               uhci_fill_td(td, status, destination | uhci_explen(pktsze - 1) |
-                       (usb_gettoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                        usb_pipeout(urb->pipe)) << TD_TOKEN_TOGGLE_SHIFT),
-                       data);
+               uhci_fill_td(td, status,
+                               destination | uhci_explen(pktsze) |
+                                       (toggle << TD_TOKEN_TOGGLE_SHIFT),
+                               data);
+               plink = &td->link;
+               status |= TD_CTRL_ACTIVE;
 
                data += pktsze;
                len -= maxsze;
-
-               usb_dotoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                       usb_pipeout(urb->pipe));
+               toggle ^= 1;
        } while (len > 0);
 
        /*
@@ -889,20 +813,22 @@ static int uhci_submit_common(struct uhci_hcd *uhci, struct urb *urb, struct urb
         * however, if transfer_length == 0, the zero packet was already
         * prepared above.
         */
-       if (usb_pipeout(urb->pipe) && (urb->transfer_flags & URB_ZERO_PACKET) &&
-           !len && urb->transfer_buffer_length) {
-               td = uhci_alloc_td(uhci, urb->dev);
+       if ((urb->transfer_flags & URB_ZERO_PACKET) &&
+                       usb_pipeout(urb->pipe) && len == 0 &&
+                       urb->transfer_buffer_length > 0) {
+               td = uhci_alloc_td(uhci);
                if (!td)
-                       return -ENOMEM;
+                       goto nomem;
+               *plink = cpu_to_le32(td->dma_handle);
 
                uhci_add_td_to_urb(urb, td);
-               uhci_fill_td(td, status, destination | uhci_explen(UHCI_NULL_DATA_SIZE) |
-                       (usb_gettoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                        usb_pipeout(urb->pipe)) << TD_TOKEN_TOGGLE_SHIFT),
-                       data);
+               uhci_fill_td(td, status,
+                               destination | uhci_explen(0) |
+                                       (toggle << TD_TOKEN_TOGGLE_SHIFT),
+                               data);
+               plink = &td->link;
 
-               usb_dotoggle(urb->dev, usb_pipeendpoint(urb->pipe),
-                       usb_pipeout(urb->pipe));
+               toggle ^= 1;
        }
 
        /* Set the interrupt-on-completion flag on the last packet.
@@ -911,24 +837,29 @@ static int uhci_submit_common(struct uhci_hcd *uhci, struct urb *urb, struct urb
         * fast side but not enough to justify delaying an interrupt
         * more than 2 or 3 URBs, so we will ignore the URB_NO_INTERRUPT
         * flag setting. */
-       td->status |= cpu_to_le32(TD_CTRL_IOC);
-
-       qh = uhci_alloc_qh(uhci, urb->dev);
-       if (!qh)
-               return -ENOMEM;
+       td->status |= __constant_cpu_to_le32(TD_CTRL_IOC);
 
-       urbp->qh = qh;
-       qh->urbp = urbp;
+       /*
+        * Build the new dummy TD and activate the old one
+        */
+       td = uhci_alloc_td(uhci);
+       if (!td)
+               goto nomem;
+       *plink = cpu_to_le32(td->dma_handle);
 
-       /* Always breadth first */
-       uhci_insert_tds_in_qh(qh, urb, UHCI_PTR_BREADTH);
+       uhci_fill_td(td, 0, USB_PID_OUT | uhci_explen(0), 0);
+       wmb();
+       qh->dummy_td->status |= __constant_cpu_to_le32(TD_CTRL_ACTIVE);
+       qh->dummy_td = td;
 
-       if (eurb)
-               uhci_append_queued_urb(uhci, eurb, urb);
-       else
-               uhci_insert_qh(uhci, skelqh, urb);
+       usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe),
+                       usb_pipeout(urb->pipe), toggle);
+       return 0;
 
-       return -EINPROGRESS;
+nomem:
+       /* Remove the dummy TD from the td_list so it doesn't get freed */
+       uhci_remove_td_from_urb(qh->dummy_td);
+       return -ENOMEM;
 }
 
 /*
@@ -960,8 +891,29 @@ static int uhci_result_common(struct uhci_hcd *uhci, struct urb *urb)
                        if (urb->transfer_flags & URB_SHORT_NOT_OK) {
                                ret = -EREMOTEIO;
                                goto err;
-                       } else
-                               return 0;
+                       }
+
+                       /*
+                        * This URB stopped short of its end.  We have to
+                        * fix up the toggles of the following URBs on the
+                        * queue and restart the queue.  But only if this
+                        * TD isn't the last one in the URB.
+                        *
+                        * Do this only the first time we encounter the
+                        * short URB.
+                        */
+                       if (!urbp->short_transfer &&
+                                       &td->list != urbp->td_list.prev) {
+                               urbp->short_transfer = 1;
+                               urbp->qh->initial_toggle =
+                                               uhci_toggle(td_token(td)) ^ 1;
+                               uhci_fixup_toggles(urbp->qh, 1);
+
+                               td = list_entry(urbp->td_list.prev,
+                                               struct uhci_td, list);
+                               urbp->qh->element = td->link;
+                       }
+                       break;
                }
        }
 
@@ -970,31 +922,30 @@ static int uhci_result_common(struct uhci_hcd *uhci, struct urb *urb)
 td_error:
        ret = uhci_map_status(status, uhci_packetout(td_token(td)));
 
-err:
-       /* 
-        * Enable this chunk of code if you want to see some more debugging.
-        * But be careful, it has the tendancy to starve out khubd and prevent
-        * disconnects from happening successfully if you have a slow debug
-        * log interface (like a serial console.
-        */
-#if 0
        if ((debug == 1 && ret != -EPIPE) || debug > 1) {
                /* Some debugging code */
                dev_dbg(uhci_dev(uhci), "%s: failed with status %x\n",
                                __FUNCTION__, status);
 
-               if (errbuf) {
+               if (debug > 1 && errbuf) {
                        /* Print the chain for debugging purposes */
                        uhci_show_qh(urbp->qh, errbuf, ERRBUF_LEN, 0);
-
                        lprintk(errbuf);
                }
        }
-#endif
+err:
+
+       /* Note that the queue has stopped and save the next toggle value */
+       urbp->qh->element = UHCI_PTR_TERM;
+       urbp->qh->is_stopped = 1;
+       urbp->qh->needs_fixup = 1;
+       urbp->qh->initial_toggle = uhci_toggle(td_token(td)) ^
+                       (ret == -EREMOTEIO);
        return ret;
 }
 
-static inline int uhci_submit_bulk(struct uhci_hcd *uhci, struct urb *urb, struct urb *eurb)
+static inline int uhci_submit_bulk(struct uhci_hcd *uhci, struct urb *urb,
+               struct uhci_qh *qh)
 {
        int ret;
 
@@ -1002,115 +953,86 @@ static inline int uhci_submit_bulk(struct uhci_hcd *uhci, struct urb *urb, struc
        if (urb->dev->speed == USB_SPEED_LOW)
                return -EINVAL;
 
-       ret = uhci_submit_common(uhci, urb, eurb, uhci->skel_bulk_qh);
-       if (ret == -EINPROGRESS)
+       qh->skel = uhci->skel_bulk_qh;
+       ret = uhci_submit_common(uhci, urb, qh);
+       if (ret == 0)
                uhci_inc_fsbr(uhci, urb);
-
        return ret;
 }
 
-static inline int uhci_submit_interrupt(struct uhci_hcd *uhci, struct urb *urb, struct urb *eurb)
+static inline int uhci_submit_interrupt(struct uhci_hcd *uhci, struct urb *urb,
+               struct uhci_qh *qh)
 {
-       /* USB 1.1 interrupt transfers only involve one packet per interval;
-        * that's the uhci_submit_common() "breadth first" policy.  Drivers
-        * can submit urbs of any length, but longer ones might need many
-        * intervals to complete.
+       /* USB 1.1 interrupt transfers only involve one packet per interval.
+        * Drivers can submit URBs of any length, but longer ones will need
+        * multiple intervals to complete.
         */
-       return uhci_submit_common(uhci, urb, eurb, uhci->skelqh[__interval_to_skel(urb->interval)]);
+       qh->skel = uhci->skelqh[__interval_to_skel(urb->interval)];
+       return uhci_submit_common(uhci, urb, qh);
 }
 
 /*
  * Isochronous transfers
  */
-static int isochronous_find_limits(struct uhci_hcd *uhci, struct urb *urb, unsigned int *start, unsigned int *end)
+static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb,
+               struct uhci_qh *qh)
 {
-       struct urb *last_urb = NULL;
-       struct urb_priv *up;
-       int ret = 0;
-
-       list_for_each_entry(up, &uhci->urb_list, urb_list) {
-               struct urb *u = up->urb;
-
-               /* look for pending URB's with identical pipe handle */
-               if ((urb->pipe == u->pipe) && (urb->dev == u->dev) &&
-                   (u->status == -EINPROGRESS) && (u != urb)) {
-                       if (!last_urb)
-                               *start = u->start_frame;
-                       last_urb = u;
-               }
-       }
-
-       if (last_urb) {
-               *end = (last_urb->start_frame + last_urb->number_of_packets *
-                               last_urb->interval) & (UHCI_NUMFRAMES-1);
-               ret = 0;
-       } else
-               ret = -1;       /* no previous urb found */
-
-       return ret;
-}
-
-static int isochronous_find_start(struct uhci_hcd *uhci, struct urb *urb)
-{
-       int limits;
-       unsigned int start = 0, end = 0;
+       struct uhci_td *td = NULL;      /* Since urb->number_of_packets > 0 */
+       int i, frame;
+       unsigned long destination, status;
+       struct urb_priv *urbp = (struct urb_priv *) urb->hcpriv;
 
        if (urb->number_of_packets > 900)       /* 900? Why? */
                return -EFBIG;
 
-       limits = isochronous_find_limits(uhci, urb, &start, &end);
+       status = TD_CTRL_ACTIVE | TD_CTRL_IOS;
+       destination = (urb->pipe & PIPE_DEVEP_MASK) | usb_packetid(urb->pipe);
 
+       /* Figure out the starting frame number */
        if (urb->transfer_flags & URB_ISO_ASAP) {
-               if (limits) {
+               if (list_empty(&qh->queue)) {
                        uhci_get_current_frame_number(uhci);
-                       urb->start_frame = (uhci->frame_number + 10)
-                                       & (UHCI_NUMFRAMES - 1);
-               } else
-                       urb->start_frame = end;
+                       urb->start_frame = (uhci->frame_number + 10);
+
+               } else {                /* Go right after the last one */
+                       struct urb *last_urb;
+
+                       last_urb = list_entry(qh->queue.prev,
+                                       struct urb_priv, node)->urb;
+                       urb->start_frame = (last_urb->start_frame +
+                                       last_urb->number_of_packets *
+                                       last_urb->interval);
+               }
        } else {
-               urb->start_frame &= (UHCI_NUMFRAMES - 1);
                /* FIXME: Sanity check */
        }
+       urb->start_frame &= (UHCI_NUMFRAMES - 1);
 
-       return 0;
-}
-
-/*
- * Isochronous transfers
- */
-static int uhci_submit_isochronous(struct uhci_hcd *uhci, struct urb *urb)
-{
-       struct uhci_td *td;
-       int i, ret, frame;
-       int status, destination;
-
-       status = TD_CTRL_ACTIVE | TD_CTRL_IOS;
-       destination = (urb->pipe & PIPE_DEVEP_MASK) | usb_packetid(urb->pipe);
-
-       ret = isochronous_find_start(uhci, urb);
-       if (ret)
-               return ret;
-
-       frame = urb->start_frame;
-       for (i = 0; i < urb->number_of_packets; i++, frame += urb->interval) {
-               if (!urb->iso_frame_desc[i].length)
-                       continue;
-
-               td = uhci_alloc_td(uhci, urb->dev);
+       for (i = 0; i < urb->number_of_packets; i++) {
+               td = uhci_alloc_td(uhci);
                if (!td)
                        return -ENOMEM;
 
                uhci_add_td_to_urb(urb, td);
-               uhci_fill_td(td, status, destination | uhci_explen(urb->iso_frame_desc[i].length - 1),
-                       urb->transfer_dma + urb->iso_frame_desc[i].offset);
+               uhci_fill_td(td, status, destination |
+                               uhci_explen(urb->iso_frame_desc[i].length),
+                               urb->transfer_dma +
+                                       urb->iso_frame_desc[i].offset);
+       }
+
+       /* Set the interrupt-on-completion flag on the last packet. */
+       td->status |= __constant_cpu_to_le32(TD_CTRL_IOC);
 
-               if (i + 1 >= urb->number_of_packets)
-                       td->status |= cpu_to_le32(TD_CTRL_IOC);
+       qh->skel = uhci->skel_iso_qh;
 
-               uhci_insert_td_frame_list(uhci, td, frame);
+       /* Add the TDs to the frame list */
+       frame = urb->start_frame;
+       list_for_each_entry(td, &urbp->td_list, list) {
+               uhci_insert_td_in_frame_list(uhci, td, frame);
+               frame += urb->interval;
        }
 
-       return -EINPROGRESS;
+       return 0;
 }
 
 static int uhci_result_isochronous(struct uhci_hcd *uhci, struct urb *urb)
@@ -1120,7 +1042,7 @@ static int uhci_result_isochronous(struct uhci_hcd *uhci, struct urb *urb)
        int status;
        int i, ret = 0;
 
-       urb->actual_length = 0;
+       urb->actual_length = urb->error_count = 0;
 
        i = 0;
        list_for_each_entry(td, &urbp->td_list, list) {
@@ -1148,75 +1070,63 @@ static int uhci_result_isochronous(struct uhci_hcd *uhci, struct urb *urb)
        return ret;
 }
 
-static struct urb *uhci_find_urb_ep(struct uhci_hcd *uhci, struct urb *urb)
-{
-       struct urb_priv *up;
-
-       /* We don't match Isoc transfers since they are special */
-       if (usb_pipeisoc(urb->pipe))
-               return NULL;
-
-       list_for_each_entry(up, &uhci->urb_list, urb_list) {
-               struct urb *u = up->urb;
-
-               if (u->dev == urb->dev && u->status == -EINPROGRESS) {
-                       /* For control, ignore the direction */
-                       if (usb_pipecontrol(urb->pipe) &&
-                           (u->pipe & ~USB_DIR_IN) == (urb->pipe & ~USB_DIR_IN))
-                               return u;
-                       else if (u->pipe == urb->pipe)
-                               return u;
-               }
-       }
-
-       return NULL;
-}
-
 static int uhci_urb_enqueue(struct usb_hcd *hcd,
-               struct usb_host_endpoint *ep,
-               struct urb *urb, int mem_flags)
+               struct usb_host_endpoint *hep,
+               struct urb *urb, gfp_t mem_flags)
 {
        int ret;
        struct uhci_hcd *uhci = hcd_to_uhci(hcd);
        unsigned long flags;
-       struct urb *eurb;
+       struct urb_priv *urbp;
+       struct uhci_qh *qh;
        int bustime;
 
        spin_lock_irqsave(&uhci->lock, flags);
 
        ret = urb->status;
        if (ret != -EINPROGRESS)                /* URB already unlinked! */
-               goto out;
+               goto done;
 
-       eurb = uhci_find_urb_ep(uhci, urb);
+       ret = -ENOMEM;
+       urbp = uhci_alloc_urb_priv(uhci, urb);
+       if (!urbp)
+               goto done;
 
-       if (!uhci_alloc_urb_priv(uhci, urb)) {
-               ret = -ENOMEM;
-               goto out;
+       if (hep->hcpriv)
+               qh = (struct uhci_qh *) hep->hcpriv;
+       else {
+               qh = uhci_alloc_qh(uhci, urb->dev, hep);
+               if (!qh)
+                       goto err_no_qh;
        }
+       urbp->qh = qh;
 
        switch (usb_pipetype(urb->pipe)) {
        case PIPE_CONTROL:
-               ret = uhci_submit_control(uhci, urb, eurb);
+               ret = uhci_submit_control(uhci, urb, qh);
+               break;
+       case PIPE_BULK:
+               ret = uhci_submit_bulk(uhci, urb, qh);
                break;
        case PIPE_INTERRUPT:
-               if (!eurb) {
+               if (list_empty(&qh->queue)) {
                        bustime = usb_check_bandwidth(urb->dev, urb);
                        if (bustime < 0)
                                ret = bustime;
                        else {
-                               ret = uhci_submit_interrupt(uhci, urb, eurb);
-                               if (ret == -EINPROGRESS)
+                               ret = uhci_submit_interrupt(uhci, urb, qh);
+                               if (ret == 0)
                                        usb_claim_bandwidth(urb->dev, urb, bustime, 0);
                        }
                } else {        /* inherit from parent */
-                       urb->bandwidth = eurb->bandwidth;
-                       ret = uhci_submit_interrupt(uhci, urb, eurb);
+                       struct urb_priv *eurbp;
+
+                       eurbp = list_entry(qh->queue.prev, struct urb_priv,
+                                       node);
+                       urb->bandwidth = eurbp->urb->bandwidth;
+                       ret = uhci_submit_interrupt(uhci, urb, qh);
                }
                break;
-       case PIPE_BULK:
-               ret = uhci_submit_bulk(uhci, urb, eurb);
-               break;
        case PIPE_ISOCHRONOUS:
                bustime = usb_check_bandwidth(urb->dev, urb);
                if (bustime < 0) {
@@ -1224,219 +1134,208 @@ static int uhci_urb_enqueue(struct usb_hcd *hcd,
                        break;
                }
 
-               ret = uhci_submit_isochronous(uhci, urb);
-               if (ret == -EINPROGRESS)
+               ret = uhci_submit_isochronous(uhci, urb, qh);
+               if (ret == 0)
                        usb_claim_bandwidth(urb->dev, urb, bustime, 1);
                break;
        }
+       if (ret != 0)
+               goto err_submit_failed;
 
-       if (ret != -EINPROGRESS) {
-               /* Submit failed, so delete it from the urb_list */
-               struct urb_priv *urbp = urb->hcpriv;
+       /* Add this URB to the QH */
+       urbp->qh = qh;
+       list_add_tail(&urbp->node, &qh->queue);
+
+       /* If the new URB is the first and only one on this QH then either
+        * the QH is new and idle or else it's unlinked and waiting to
+        * become idle, so we can activate it right away. */
+       if (qh->queue.next == &urbp->node)
+               uhci_activate_qh(uhci, qh);
+       goto done;
+
+err_submit_failed:
+       if (qh->state == QH_STATE_IDLE)
+               uhci_make_qh_idle(uhci, qh);    /* Reclaim unused QH */
 
-               list_del_init(&urbp->urb_list);
-               uhci_destroy_urb_priv(uhci, urb);
-       } else
-               ret = 0;
+err_no_qh:
+       uhci_free_urb_priv(uhci, urbp);
 
-out:
+done:
        spin_unlock_irqrestore(&uhci->lock, flags);
        return ret;
 }
 
-/*
- * Return the result of a transfer
- */
-static void uhci_transfer_result(struct uhci_hcd *uhci, struct urb *urb)
+static int uhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb)
 {
-       int ret = -EINPROGRESS;
+       struct uhci_hcd *uhci = hcd_to_uhci(hcd);
+       unsigned long flags;
        struct urb_priv *urbp;
 
-       spin_lock(&urb->lock);
+       spin_lock_irqsave(&uhci->lock, flags);
+       urbp = urb->hcpriv;
+       if (!urbp)                      /* URB was never linked! */
+               goto done;
+
+       /* Remove Isochronous TDs from the frame list ASAP */
+       if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
+               uhci_unlink_isochronous_tds(uhci, urb);
+       uhci_unlink_qh(uhci, urbp->qh);
 
-       urbp = (struct urb_priv *)urb->hcpriv;
+done:
+       spin_unlock_irqrestore(&uhci->lock, flags);
+       return 0;
+}
 
-       if (urb->status != -EINPROGRESS)        /* URB already dequeued */
-               goto out;
+/*
+ * Finish unlinking an URB and give it back
+ */
+static void uhci_giveback_urb(struct uhci_hcd *uhci, struct uhci_qh *qh,
+               struct urb *urb, struct pt_regs *regs)
+__releases(uhci->lock)
+__acquires(uhci->lock)
+{
+       struct urb_priv *urbp = (struct urb_priv *) urb->hcpriv;
+
+       /* Isochronous TDs get unlinked directly from the frame list */
+       if (usb_pipetype(urb->pipe) == PIPE_ISOCHRONOUS)
+               uhci_unlink_isochronous_tds(uhci, urb);
+
+       /* If the URB isn't first on its queue, adjust the link pointer
+        * of the last TD in the previous URB. */
+       else if (qh->queue.next != &urbp->node) {
+               struct urb_priv *purbp;
+               struct uhci_td *ptd, *ltd;
+
+               purbp = list_entry(urbp->node.prev, struct urb_priv, node);
+               ptd = list_entry(purbp->td_list.prev, struct uhci_td,
+                               list);
+               ltd = list_entry(urbp->td_list.prev, struct uhci_td,
+                               list);
+               ptd->link = ltd->link;
+       }
 
-       switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-               ret = uhci_result_control(uhci, urb);
-               break;
-       case PIPE_BULK:
-       case PIPE_INTERRUPT:
-               ret = uhci_result_common(uhci, urb);
-               break;
-       case PIPE_ISOCHRONOUS:
-               ret = uhci_result_isochronous(uhci, urb);
-               break;
+       /* Take the URB off the QH's queue.  If the queue is now empty,
+        * this is a perfect time for a toggle fixup. */
+       list_del_init(&urbp->node);
+       if (list_empty(&qh->queue) && qh->needs_fixup) {
+               usb_settoggle(urb->dev, usb_pipeendpoint(urb->pipe),
+                               usb_pipeout(urb->pipe), qh->initial_toggle);
+               qh->needs_fixup = 0;
        }
 
-       if (ret == -EINPROGRESS)
-               goto out;
-       urb->status = ret;
+       uhci_dec_fsbr(uhci, urb);       /* Safe since it checks */
+       uhci_free_urb_priv(uhci, urbp);
 
        switch (usb_pipetype(urb->pipe)) {
-       case PIPE_CONTROL:
-       case PIPE_BULK:
        case PIPE_ISOCHRONOUS:
                /* Release bandwidth for Interrupt or Isoc. transfers */
                if (urb->bandwidth)
                        usb_release_bandwidth(urb->dev, urb, 1);
-               uhci_unlink_generic(uhci, urb);
                break;
        case PIPE_INTERRUPT:
                /* Release bandwidth for Interrupt or Isoc. transfers */
                /* Make sure we don't release if we have a queued URB */
-               if (list_empty(&urbp->queue_list) && urb->bandwidth)
+               if (list_empty(&qh->queue) && urb->bandwidth)
                        usb_release_bandwidth(urb->dev, urb, 0);
                else
                        /* bandwidth was passed on to queued URB, */
                        /* so don't let usb_unlink_urb() release it */
                        urb->bandwidth = 0;
-               uhci_unlink_generic(uhci, urb);
                break;
-       default:
-               dev_info(uhci_dev(uhci), "%s: unknown pipe type %d "
-                               "for urb %p\n",
-                               __FUNCTION__, usb_pipetype(urb->pipe), urb);
        }
 
-       /* Move it from uhci->urb_list to uhci->complete_list */
-       uhci_moveto_complete(uhci, urbp);
-
-out:
-       spin_unlock(&urb->lock);
-}
-
-static void uhci_unlink_generic(struct uhci_hcd *uhci, struct urb *urb)
-{
-       struct list_head *head;
-       struct uhci_td *td;
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       int prevactive = 0;
+       spin_unlock(&uhci->lock);
+       usb_hcd_giveback_urb(uhci_to_hcd(uhci), urb, regs);
+       spin_lock(&uhci->lock);
 
-       uhci_dec_fsbr(uhci, urb);       /* Safe since it checks */
+       /* If the queue is now empty, we can unlink the QH and give up its
+        * reserved bandwidth. */
+       if (list_empty(&qh->queue)) {
+               uhci_unlink_qh(uhci, qh);
 
-       /*
-        * Now we need to find out what the last successful toggle was
-        * so we can update the local data toggle for the next transfer
-        *
-        * There are 2 ways the last successful completed TD is found:
-        *
-        * 1) The TD is NOT active and the actual length < expected length
-        * 2) The TD is NOT active and it's the last TD in the chain
-        *
-        * and a third way the first uncompleted TD is found:
-        *
-        * 3) The TD is active and the previous TD is NOT active
-        *
-        * Control and Isochronous ignore the toggle, so this is safe
-        * for all types
-        *
-        * FIXME: The toggle fixups won't be 100% reliable until we
-        * change over to using a single queue for each endpoint and
-        * stop the queue before unlinking.
-        */
-       head = &urbp->td_list;
-       list_for_each_entry(td, head, list) {
-               unsigned int ctrlstat = td_status(td);
-
-               if (!(ctrlstat & TD_CTRL_ACTIVE) &&
-                               (uhci_actual_length(ctrlstat) <
-                                uhci_expected_length(td_token(td)) ||
-                               td->list.next == head))
-                       usb_settoggle(urb->dev, uhci_endpoint(td_token(td)),
-                               uhci_packetout(td_token(td)),
-                               uhci_toggle(td_token(td)) ^ 1);
-               else if ((ctrlstat & TD_CTRL_ACTIVE) && !prevactive)
-                       usb_settoggle(urb->dev, uhci_endpoint(td_token(td)),
-                               uhci_packetout(td_token(td)),
-                               uhci_toggle(td_token(td)));
-
-               prevactive = ctrlstat & TD_CTRL_ACTIVE;
+               /* Bandwidth stuff not yet implemented */
        }
-
-       uhci_delete_queued_urb(uhci, urb);
-
-       /* The interrupt loop will reclaim the QH's */
-       uhci_remove_qh(uhci, urbp->qh);
-       urbp->qh = NULL;
 }
 
-static int uhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb)
+/*
+ * Scan the URBs in a QH's queue
+ */
+#define QH_FINISHED_UNLINKING(qh)                      \
+               (qh->state == QH_STATE_UNLINKING &&     \
+               uhci->frame_number + uhci->is_stopped != qh->unlink_frame)
+
+static void uhci_scan_qh(struct uhci_hcd *uhci, struct uhci_qh *qh,
+               struct pt_regs *regs)
 {
-       struct uhci_hcd *uhci = hcd_to_uhci(hcd);
-       unsigned long flags;
        struct urb_priv *urbp;
+       struct urb *urb;
+       int status;
 
-       spin_lock_irqsave(&uhci->lock, flags);
-       urbp = urb->hcpriv;
-       if (!urbp)                      /* URB was never linked! */
-               goto done;
-       list_del_init(&urbp->urb_list);
-
-       uhci_unlink_generic(uhci, urb);
-
-       uhci_get_current_frame_number(uhci);
-       if (uhci->frame_number + uhci->is_stopped != uhci->urb_remove_age) {
-               uhci_remove_pending_urbps(uhci);
-               uhci->urb_remove_age = uhci->frame_number;
-       }
-
-       /* If we're the first, set the next interrupt bit */
-       if (list_empty(&uhci->urb_remove_list))
-               uhci_set_next_interrupt(uhci);
-       list_add_tail(&urbp->urb_list, &uhci->urb_remove_list);
-
-done:
-       spin_unlock_irqrestore(&uhci->lock, flags);
-       return 0;
-}
+       while (!list_empty(&qh->queue)) {
+               urbp = list_entry(qh->queue.next, struct urb_priv, node);
+               urb = urbp->urb;
 
-static int uhci_fsbr_timeout(struct uhci_hcd *uhci, struct urb *urb)
-{
-       struct urb_priv *urbp = (struct urb_priv *)urb->hcpriv;
-       struct list_head *head;
-       struct uhci_td *td;
-       int count = 0;
+               switch (usb_pipetype(urb->pipe)) {
+               case PIPE_CONTROL:
+                       status = uhci_result_control(uhci, urb);
+                       break;
+               case PIPE_ISOCHRONOUS:
+                       status = uhci_result_isochronous(uhci, urb);
+                       break;
+               default:        /* PIPE_BULK or PIPE_INTERRUPT */
+                       status = uhci_result_common(uhci, urb);
+                       break;
+               }
+               if (status == -EINPROGRESS)
+                       break;
 
-       uhci_dec_fsbr(uhci, urb);
+               spin_lock(&urb->lock);
+               if (urb->status == -EINPROGRESS)        /* Not dequeued */
+                       urb->status = status;
+               else
+                       status = -ECONNRESET;
+               spin_unlock(&urb->lock);
 
-       urbp->fsbr_timeout = 1;
+               /* Dequeued but completed URBs can't be given back unless
+                * the QH is stopped or has finished unlinking. */
+               if (status == -ECONNRESET &&
+                               !(qh->is_stopped || QH_FINISHED_UNLINKING(qh)))
+                       return;
 
-       /*
-        * Ideally we would want to fix qh->element as well, but it's
-        * read/write by the HC, so that can introduce a race. It's not
-        * really worth the hassle
-        */
-
-       head = &urbp->td_list;
-       list_for_each_entry(td, head, list) {
-               /*
-                * Make sure we don't do the last one (since it'll have the
-                * TERM bit set) as well as we skip every so many TD's to
-                * make sure it doesn't hog the bandwidth
-                */
-               if (td->list.next != head && (count % DEPTH_INTERVAL) ==
-                               (DEPTH_INTERVAL - 1))
-                       td->link |= UHCI_PTR_DEPTH;
-
-               count++;
+               uhci_giveback_urb(uhci, qh, urb, regs);
+               if (qh->is_stopped)
+                       break;
        }
 
-       return 0;
-}
-
-static void uhci_free_pending_qhs(struct uhci_hcd *uhci)
-{
-       struct uhci_qh *qh, *tmp;
-
-       list_for_each_entry_safe(qh, tmp, &uhci->qh_remove_list, remove_list) {
-               list_del_init(&qh->remove_list);
+       /* If the QH is neither stopped nor finished unlinking (normal case),
+        * our work here is done. */
+ restart:
+       if (!(qh->is_stopped || QH_FINISHED_UNLINKING(qh)))
+               return;
 
-               uhci_free_qh(uhci, qh);
+       /* Otherwise give back each of the dequeued URBs */
+       list_for_each_entry(urbp, &qh->queue, node) {
+               urb = urbp->urb;
+               if (urb->status != -EINPROGRESS) {
+                       uhci_save_toggle(qh, urb);
+                       uhci_giveback_urb(uhci, qh, urb, regs);
+                       goto restart;
+               }
+       }
+       qh->is_stopped = 0;
+
+       /* There are no more dequeued URBs.  If there are still URBs on the
+        * queue, the QH can now be re-activated. */
+       if (!list_empty(&qh->queue)) {
+               if (qh->needs_fixup)
+                       uhci_fixup_toggles(qh, 0);
+               uhci_activate_qh(uhci, qh);
        }
+
+       /* The queue is empty.  The QH can become idle if it is fully
+        * unlinked. */
+       else if (QH_FINISHED_UNLINKING(qh))
+               uhci_make_qh_idle(uhci, qh);
 }
 
 static void uhci_free_pending_tds(struct uhci_hcd *uhci)
@@ -1450,43 +1349,13 @@ static void uhci_free_pending_tds(struct uhci_hcd *uhci)
        }
 }
 
-static void
-uhci_finish_urb(struct usb_hcd *hcd, struct urb *urb, struct pt_regs *regs)
-__releases(uhci->lock)
-__acquires(uhci->lock)
-{
-       struct uhci_hcd *uhci = hcd_to_uhci(hcd);
-
-       uhci_destroy_urb_priv(uhci, urb);
-
-       spin_unlock(&uhci->lock);
-       usb_hcd_giveback_urb(hcd, urb, regs);
-       spin_lock(&uhci->lock);
-}
-
-static void uhci_finish_completion(struct uhci_hcd *uhci, struct pt_regs *regs)
-{
-       struct urb_priv *urbp, *tmp;
-
-       list_for_each_entry_safe(urbp, tmp, &uhci->complete_list, urb_list) {
-               struct urb *urb = urbp->urb;
-
-               list_del_init(&urbp->urb_list);
-               uhci_finish_urb(uhci_to_hcd(uhci), urb, regs);
-       }
-}
-
-static void uhci_remove_pending_urbps(struct uhci_hcd *uhci)
-{
-
-       /* Splice the urb_remove_list onto the end of the complete_list */
-       list_splice_init(&uhci->urb_remove_list, uhci->complete_list.prev);
-}
-
-/* Process events in the schedule, but only in one thread at a time */
+/*
+ * Process events in the schedule, but only in one thread at a time
+ */
 static void uhci_scan_schedule(struct uhci_hcd *uhci, struct pt_regs *regs)
 {
-       struct urb_priv *urbp, *tmp;
+       int i;
+       struct uhci_qh *qh;
 
        /* Don't allow re-entrant calls */
        if (uhci->scan_in_progress) {
@@ -1497,43 +1366,46 @@ static void uhci_scan_schedule(struct uhci_hcd *uhci, struct pt_regs *regs)
  rescan:
        uhci->need_rescan = 0;
 
+       uhci_clear_next_interrupt(uhci);
        uhci_get_current_frame_number(uhci);
 
-       if (uhci->frame_number + uhci->is_stopped != uhci->qh_remove_age)
-               uhci_free_pending_qhs(uhci);
        if (uhci->frame_number + uhci->is_stopped != uhci->td_remove_age)
                uhci_free_pending_tds(uhci);
-       if (uhci->frame_number + uhci->is_stopped != uhci->urb_remove_age)
-               uhci_remove_pending_urbps(uhci);
-
-       /* Walk the list of pending URBs to see which ones completed
-        * (must be _safe because uhci_transfer_result() dequeues URBs) */
-       list_for_each_entry_safe(urbp, tmp, &uhci->urb_list, urb_list) {
-               struct urb *urb = urbp->urb;
 
-               /* Checks the status and does all of the magic necessary */
-               uhci_transfer_result(uhci, urb);
-       }
-       uhci_finish_completion(uhci, regs);
-
-       /* If the controller is stopped, we can finish these off right now */
-       if (uhci->is_stopped) {
-               uhci_free_pending_qhs(uhci);
-               uhci_free_pending_tds(uhci);
-               uhci_remove_pending_urbps(uhci);
+       /* Go through all the QH queues and process the URBs in each one */
+       for (i = 0; i < UHCI_NUM_SKELQH - 1; ++i) {
+               uhci->next_qh = list_entry(uhci->skelqh[i]->node.next,
+                               struct uhci_qh, node);
+               while ((qh = uhci->next_qh) != uhci->skelqh[i]) {
+                       uhci->next_qh = list_entry(qh->node.next,
+                                       struct uhci_qh, node);
+                       uhci_scan_qh(uhci, qh, regs);
+               }
        }
 
        if (uhci->need_rescan)
                goto rescan;
        uhci->scan_in_progress = 0;
 
-       if (list_empty(&uhci->urb_remove_list) &&
-           list_empty(&uhci->td_remove_list) &&
-           list_empty(&uhci->qh_remove_list))
+       /* If the controller is stopped, we can finish these off right now */
+       if (uhci->is_stopped)
+               uhci_free_pending_tds(uhci);
+
+       if (list_empty(&uhci->td_remove_list) &&
+                       list_empty(&uhci->skel_unlink_qh->node))
                uhci_clear_next_interrupt(uhci);
        else
                uhci_set_next_interrupt(uhci);
+}
 
-       /* Wake up anyone waiting for an URB to complete */
-       wake_up_all(&uhci->waitqh);
+static void check_fsbr(struct uhci_hcd *uhci)
+{
+       /* For now, don't scan URBs for FSBR timeouts.
+        * Add it back in later... */
+
+       /* Really disable FSBR */
+       if (!uhci->fsbr && uhci->fsbrtimeout && time_after_eq(jiffies, uhci->fsbrtimeout)) {
+               uhci->fsbrtimeout = 0;
+               uhci->skel_term_qh->link = UHCI_PTR_TERM;
+       }
 }
index 986d762..ad9ddec 100644 (file)
@@ -553,6 +553,10 @@ struct ftdi_private {
 
        int force_baud;         /* if non-zero, force the baud rate to this value */
        int force_rtscts;       /* if non-zero, force RTS-CTS to always be enabled */
+
+       spinlock_t tx_lock;     /* spinlock for transmit state */
+       unsigned long tx_outstanding_bytes;
+       unsigned long tx_outstanding_urbs;
 };
 
 /* Used for TIOCMIWAIT */
@@ -626,6 +630,9 @@ static struct usb_serial_driver ftdi_sio_device = {
 #define HIGH 1
 #define LOW 0
 
+/* number of outstanding urbs to prevent userspace DoS from happening */
+#define URB_UPPER_LIMIT        42
+
 /*
  * ***************************************************************************
  * Utlity functions
@@ -1156,6 +1163,7 @@ static int ftdi_sio_attach (struct usb_serial *serial)
        }
 
        spin_lock_init(&priv->rx_lock);
+       spin_lock_init(&priv->tx_lock);
         init_waitqueue_head(&priv->delta_msr_wait);
        /* This will push the characters through immediately rather
           than queue a task to deliver them */
@@ -1372,6 +1380,7 @@ static int ftdi_write (struct usb_serial_port *port,
        int data_offset ;       /* will be 1 for the SIO and 0 otherwise */
        int status;
        int transfer_size;
+       unsigned long flags;
 
        dbg("%s port %d, %d bytes", __FUNCTION__, port->number, count);
 
@@ -1379,6 +1388,13 @@ static int ftdi_write (struct usb_serial_port *port,
                dbg("write request of 0 bytes");
                return 0;
        }
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       if (priv->tx_outstanding_urbs > URB_UPPER_LIMIT) {
+               spin_unlock_irqrestore(&priv->tx_lock, flags);
+               dbg("%s - write limit hit\n", __FUNCTION__);
+               return 0;
+       }
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
        
        data_offset = priv->write_offset;
         dbg("data_offset set to %d",data_offset);
@@ -1445,6 +1461,11 @@ static int ftdi_write (struct usb_serial_port *port,
                err("%s - failed submitting write urb, error %d", __FUNCTION__, status);
                count = status;
                kfree (buffer);
+       } else {
+               spin_lock_irqsave(&priv->tx_lock, flags);
+               ++priv->tx_outstanding_urbs;
+               priv->tx_outstanding_bytes += count;
+               spin_unlock_irqrestore(&priv->tx_lock, flags);
        }
 
        /* we are done with this urb, so let the host driver
@@ -1460,7 +1481,11 @@ static int ftdi_write (struct usb_serial_port *port,
 
 static void ftdi_write_bulk_callback (struct urb *urb, struct pt_regs *regs)
 {
+       unsigned long flags;
        struct usb_serial_port *port = (struct usb_serial_port *)urb->context;
+       struct ftdi_private *priv;
+       int data_offset;       /* will be 1 for the SIO and 0 otherwise */
+       unsigned long countback;
 
        /* free up the transfer buffer, as usb_free_urb() does not do this */
        kfree (urb->transfer_buffer);
@@ -1472,34 +1497,67 @@ static void ftdi_write_bulk_callback (struct urb *urb, struct pt_regs *regs)
                return;
        }
 
+       priv = usb_get_serial_port_data(port);
+       if (!priv) {
+               dbg("%s - bad port private data pointer - exiting", __FUNCTION__);
+               return;
+       }
+       /* account for transferred data */
+       countback = urb->actual_length;
+       data_offset = priv->write_offset;
+       if (data_offset > 0) {
+               /* Subtract the control bytes */
+               countback -= (data_offset * ((countback + (PKTSZ - 1)) / PKTSZ));
+       }
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       --priv->tx_outstanding_urbs;
+       priv->tx_outstanding_bytes -= countback;
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+
        schedule_work(&port->work);
 } /* ftdi_write_bulk_callback */
 
 
 static int ftdi_write_room( struct usb_serial_port *port )
 {
+       struct ftdi_private *priv = usb_get_serial_port_data(port);
+       int room;
+       unsigned long flags;
+
        dbg("%s - port %d", __FUNCTION__, port->number);
 
-       /*
-        * We really can take anything the user throws at us
-        * but let's pick a nice big number to tell the tty
-        * layer that we have lots of free space
-        */
-       return 2048;
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       if (priv->tx_outstanding_urbs < URB_UPPER_LIMIT) {
+               /*
+                * We really can take anything the user throws at us
+                * but let's pick a nice big number to tell the tty
+                * layer that we have lots of free space
+                */
+               room = 2048;
+       } else {
+               room = 0;
+       }
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+       return room;
 } /* ftdi_write_room */
 
 
 static int ftdi_chars_in_buffer (struct usb_serial_port *port)
 { /* ftdi_chars_in_buffer */
+       struct ftdi_private *priv = usb_get_serial_port_data(port);
+       int buffered;
+       unsigned long flags;
+
        dbg("%s - port %d", __FUNCTION__, port->number);
 
-       /* 
-        * We can't really account for how much data we
-        * have sent out, but hasn't made it through to the
-        * device, so just tell the tty layer that everything
-        * is flushed.
-        */
-       return 0;
+       spin_lock_irqsave(&priv->tx_lock, flags);
+       buffered = (int)priv->tx_outstanding_bytes;
+       spin_unlock_irqrestore(&priv->tx_lock, flags);
+       if (buffered < 0) {
+               err("%s outstanding tx bytes is negative!", __FUNCTION__);
+               buffered = 0;
+       }
+       return buffered;
 } /* ftdi_chars_in_buffer */
 
 
index 2de6528..a1a8302 100644 (file)
@@ -113,6 +113,12 @@ UNUSUAL_DEV(  0x0411, 0x001c, 0x0113, 0x0113,
                US_SC_DEVICE, US_PR_DEVICE, NULL,
                US_FL_FIX_INQUIRY ),
 
+/* Reported by Orgad Shaneh <orgads@gmail.com> */
+UNUSUAL_DEV(  0x0419, 0xaace, 0x0100, 0x0100,
+               "Samsung", "MP3 Player",
+               US_SC_DEVICE, US_PR_DEVICE, NULL,
+               US_FL_IGNORE_RESIDUE),
+
 /* Reported by Christian Leber <christian@leber.de> */
 UNUSUAL_DEV(  0x0419, 0xaaf5, 0x0100, 0x0100,
                "TrekStor",
@@ -133,12 +139,6 @@ UNUSUAL_DEV(  0x0420, 0x0001, 0x0100, 0x0100,
                US_SC_DEVICE, US_PR_DEVICE, NULL,
                US_FL_IGNORE_RESIDUE ),
 
-/* Reported by Pete Zaitcev <zaitcev@redhat.com>, bz#176584 */
-UNUSUAL_DEV(  0x0420, 0x0001, 0x0100, 0x0100,
-               "GENERIC", "MP3 PLAYER", /* MyMusix PD-205 on the outside. */
-               US_SC_DEVICE, US_PR_DEVICE, NULL,
-               US_FL_IGNORE_RESIDUE ),
-
 /* Reported by Olaf Hering <olh@suse.de> from novell bug #105878 */
 UNUSUAL_DEV(  0x0424, 0x0fdc, 0x0210, 0x0210,
                "SMSC",
index dd10863..0142fe8 100644 (file)
@@ -593,6 +593,15 @@ static int get_transport(struct us_data *us)
                break;
 #endif
 
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+       case US_PR_ALAUDA:
+               us->transport_name  = "Alauda Control/Bulk";
+               us->transport = alauda_transport;
+               us->transport_reset = usb_stor_Bulk_reset;
+               us->max_lun = 1;
+               break;
+#endif
+
        default:
                return -EIO;
        }
@@ -648,15 +657,6 @@ static int get_protocol(struct us_data *us)
                break;
 #endif
 
-#ifdef CONFIG_USB_STORAGE_ALAUDA
-       case US_PR_ALAUDA:
-               us->transport_name  = "Alauda Control/Bulk";
-               us->transport = alauda_transport;
-               us->transport_reset = usb_stor_Bulk_reset;
-               us->max_lun = 1;
-               break;
-#endif
-
        default:
                return -EIO;
        }
index 4587087..2fd9ab7 100644 (file)
@@ -513,7 +513,7 @@ config FB_HGA_ACCEL
 
 config VIDEO_SELECT
        bool
-       depends on (FB = y) && X86
+       depends on (FB = y) && X86 && !XEN
        default y
 
 config FB_SGIVW
index 0a2ed2f..2ade752 100644 (file)
@@ -27,7 +27,7 @@ int soft_cursor(struct fb_info *info, struct fb_cursor *cursor)
        struct fb_image *image;
        u8 *dst;
        static u8 *src=NULL;
-       static int allocsize=0;
+       static int allocsize = 0;
 
        if (info->state != FBINFO_STATE_RUNNING)
                return 0;
@@ -41,8 +41,10 @@ int soft_cursor(struct fb_info *info, struct fb_cursor *cursor)
                allocsize = dsize + sizeof(struct fb_image);
 
                src = kmalloc(allocsize, GFP_ATOMIC);
-               if (!src)
+               if (!src) {
+                       allocsize = 0;
                        return -ENOMEM;
+               }
        }
 
        image = (struct fb_image *) (src + dsize);
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
new file mode 100644 (file)
index 0000000..79eddee
--- /dev/null
@@ -0,0 +1,17 @@
+obj-y  += core/
+obj-y  += console/
+obj-y  += evtchn/
+obj-y  += privcmd/
+obj-y  += xenbus/
+
+obj-$(CONFIG_XEN_UTIL)                 += util.o
+obj-$(CONFIG_XEN_BALLOON)              += balloon/
+obj-$(CONFIG_XEN_DEVMEM)               += char/
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
+obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
+obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)      += netfront/
+obj-$(CONFIG_XEN_PCIDEV_BACKEND)       += pciback/
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)      += pcifront/
diff --git a/drivers/xen/balloon/Makefile b/drivers/xen/balloon/Makefile
new file mode 100644 (file)
index 0000000..0e3a348
--- /dev/null
@@ -0,0 +1,2 @@
+
+obj-y += balloon.o
diff --git a/drivers/xen/balloon/balloon.c b/drivers/xen/balloon/balloon.c
new file mode 100644 (file)
index 0000000..3226753
--- /dev/null
@@ -0,0 +1,617 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <xen/xen_proc.h>
+#include <asm/hypervisor.h>
+#include <xen/balloon.h>
+#include <xen/interface/memory.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <linux/list.h>
+
+#include <xen/xenbus.h>
+
+#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
+
+#ifdef CONFIG_PROC_FS
+static struct proc_dir_entry *balloon_pde;
+#endif
+
+static DECLARE_MUTEX(balloon_mutex);
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+DEFINE_SPINLOCK(balloon_lock);
+
+/* We aim for 'current allocation' == 'target allocation'. */
+static unsigned long current_pages;
+static unsigned long target_pages;
+
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; 
+
+/* VM /proc information for memory */
+extern unsigned long totalram_pages;
+
+/* We may hit the hard limit in Xen. If we do then we remember it. */
+static unsigned long hard_limit;
+
+/*
+ * Drivers may alter the memory reservation independently, but they must
+ * inform the balloon driver so that we can avoid hitting the hard limit.
+ */
+static unsigned long driver_pages;
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static LIST_HEAD(ballooned_pages);
+static unsigned long balloon_low, balloon_high;
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+static DECLARE_WORK(balloon_worker, balloon_process, NULL);
+static struct timer_list balloon_timer;
+
+/* When ballooning out (allocating memory to return to Xen) we don't really 
+   want the kernel to try too hard since that can trigger the oom killer. */
+#define GFP_BALLOON \
+       (GFP_HIGHUSER | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC)
+
+#define PAGE_TO_LIST(p) (&(p)->lru)
+#define LIST_TO_PAGE(l) list_entry((l), struct page, lru)
+#define UNLIST_PAGE(p)                         \
+       do {                                    \
+               list_del(PAGE_TO_LIST(p));      \
+               PAGE_TO_LIST(p)->next = NULL;   \
+               PAGE_TO_LIST(p)->prev = NULL;   \
+       } while(0)
+
+#define IPRINTK(fmt, args...) \
+       printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+       printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
+/* balloon_append: add the given page to the balloon. */
+static void balloon_append(struct page *page)
+{
+       /* Lowmem is re-populated first, so highmem pages go at list tail. */
+       if (PageHighMem(page)) {
+               list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_high++;
+       } else {
+               list_add(PAGE_TO_LIST(page), &ballooned_pages);
+               balloon_low++;
+       }
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static struct page *balloon_retrieve(void)
+{
+       struct page *page;
+
+       if (list_empty(&ballooned_pages))
+               return NULL;
+
+       page = LIST_TO_PAGE(ballooned_pages.next);
+       UNLIST_PAGE(page);
+
+       if (PageHighMem(page))
+               balloon_high--;
+       else
+               balloon_low--;
+
+       return page;
+}
+
+static struct page *balloon_first_page(void)
+{
+       if (list_empty(&ballooned_pages))
+               return NULL;
+       return LIST_TO_PAGE(ballooned_pages.next);
+}
+
+static struct page *balloon_next_page(struct page *page)
+{
+       struct list_head *next = PAGE_TO_LIST(page)->next;
+       if (next == &ballooned_pages)
+               return NULL;
+       return LIST_TO_PAGE(next);
+}
+
+static void balloon_alarm(unsigned long unused)
+{
+       schedule_work(&balloon_worker);
+}
+
+static unsigned long current_target(void)
+{
+       unsigned long target = min(target_pages, hard_limit);
+       if (target > (current_pages + balloon_low + balloon_high))
+               target = current_pages + balloon_low + balloon_high;
+       return target;
+}
+
+static int increase_reservation(unsigned long nr_pages)
+{
+       unsigned long  pfn, i, flags;
+       struct page   *page;
+       long           rc;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
+
+       balloon_lock(flags);
+
+       page = balloon_first_page();
+       for (i = 0; i < nr_pages; i++) {
+               BUG_ON(page == NULL);
+               frame_list[i] = page_to_pfn(page);;
+               page = balloon_next_page(page);
+       }
+
+       set_xen_guest_handle(reservation.extent_start, frame_list);
+       reservation.nr_extents   = nr_pages;
+       rc = HYPERVISOR_memory_op(
+               XENMEM_populate_physmap, &reservation);
+       if (rc < nr_pages) {
+               if (rc > 0) {
+                       int ret;
+
+                       /* We hit the Xen hard limit: reprobe. */
+                       reservation.nr_extents = rc;
+                       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                       &reservation);
+                       BUG_ON(ret != rc);
+               }
+               if (rc >= 0)
+                       hard_limit = current_pages + rc - driver_pages;
+               goto out;
+       }
+
+       for (i = 0; i < nr_pages; i++) {
+               page = balloon_retrieve();
+               BUG_ON(page == NULL);
+
+               pfn = page_to_pfn(page);
+               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
+                      phys_to_machine_mapping_valid(pfn));
+
+               /* Update P->M and M->P tables. */
+               set_phys_to_machine(pfn, frame_list[i]);
+               xen_machphys_update(frame_list[i], pfn);
+
+               /* Link back into the page tables if not highmem. */
+               if (pfn < max_low_pfn) {
+                       int ret;
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)__va(pfn << PAGE_SHIFT),
+                               pfn_pte_ma(frame_list[i], PAGE_KERNEL),
+                               0);
+                       BUG_ON(ret);
+               }
+
+               /* Relinquish the page back to the allocator. */
+               ClearPageReserved(page);
+               init_page_count(page);
+               __free_page(page);
+       }
+
+       current_pages += nr_pages;
+       totalram_pages = current_pages;
+
+ out:
+       balloon_unlock(flags);
+
+       return 0;
+}
+
+static int decrease_reservation(unsigned long nr_pages)
+{
+       unsigned long  pfn, i, flags;
+       struct page   *page;
+       void          *v;
+       int            need_sleep = 0;
+       int ret;
+       struct xen_memory_reservation reservation = {
+               .address_bits = 0,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (nr_pages > ARRAY_SIZE(frame_list))
+               nr_pages = ARRAY_SIZE(frame_list);
+
+       for (i = 0; i < nr_pages; i++) {
+               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+                       nr_pages = i;
+                       need_sleep = 1;
+                       break;
+               }
+
+               pfn = page_to_pfn(page);
+               frame_list[i] = pfn_to_mfn(pfn);
+
+               if (!PageHighMem(page)) {
+                       v = phys_to_virt(pfn << PAGE_SHIFT);
+                       scrub_pages(v, 1);
+                       ret = HYPERVISOR_update_va_mapping(
+                               (unsigned long)v, __pte_ma(0), 0);
+                       BUG_ON(ret);
+               }
+#ifdef CONFIG_XEN_SCRUB_PAGES
+               else {
+                       v = kmap(page);
+                       scrub_pages(v, 1);
+                       kunmap(page);
+               }
+#endif
+       }
+
+       /* Ensure that ballooned highmem pages don't have kmaps. */
+       kmap_flush_unused();
+       flush_tlb_all();
+
+       balloon_lock(flags);
+
+       /* No more mappings: invalidate P2M and add to balloon. */
+       for (i = 0; i < nr_pages; i++) {
+               pfn = mfn_to_pfn(frame_list[i]);
+               set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+               balloon_append(pfn_to_page(pfn));
+       }
+
+       set_xen_guest_handle(reservation.extent_start, frame_list);
+       reservation.nr_extents   = nr_pages;
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != nr_pages);
+
+       current_pages -= nr_pages;
+       totalram_pages = current_pages;
+
+       balloon_unlock(flags);
+
+       return need_sleep;
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void balloon_process(void *unused)
+{
+       int need_sleep = 0;
+       long credit;
+
+       down(&balloon_mutex);
+
+       do {
+               credit = current_target() - current_pages;
+               if (credit > 0)
+                       need_sleep = (increase_reservation(credit) != 0);
+               if (credit < 0)
+                       need_sleep = (decrease_reservation(-credit) != 0);
+
+#ifndef CONFIG_PREEMPT
+               if (need_resched())
+                       schedule();
+#endif
+       } while ((credit != 0) && !need_sleep);
+
+       /* Schedule more work if there is some still to be done. */
+       if (current_target() != current_pages)
+               mod_timer(&balloon_timer, jiffies + HZ);
+
+       up(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void set_new_target(unsigned long target)
+{
+       /* No need for lock. Not read-modify-write updates. */
+       hard_limit   = ~0UL;
+       target_pages = target;
+       schedule_work(&balloon_worker);
+}
+
+static struct xenbus_watch target_watch =
+{
+       .node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void watch_target(struct xenbus_watch *watch,
+                        const char **vec, unsigned int len)
+{
+       unsigned long long new_target;
+       int err;
+
+       err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
+       if (err != 1) {
+               /* This is ok (for domain0 at least) - so just return */
+               return;
+       }
+
+       /* The given memory/target value is in KiB, so it needs converting to
+        * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+        */
+       set_new_target(new_target >> (PAGE_SHIFT - 10));
+}
+
+static int balloon_init_watcher(struct notifier_block *notifier,
+                               unsigned long event,
+                               void *data)
+{
+       int err;
+
+       err = register_xenbus_watch(&target_watch);
+       if (err)
+               printk(KERN_ERR "Failed to set balloon watcher\n");
+
+       return NOTIFY_DONE;
+}
+
+#ifdef CONFIG_PROC_FS
+static int balloon_write(struct file *file, const char __user *buffer,
+                        unsigned long count, void *data)
+{
+       char memstring[64], *endchar;
+       unsigned long long target_bytes;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (count <= 1)
+               return -EBADMSG; /* runt */
+       if (count > sizeof(memstring))
+               return -EFBIG;   /* too long */
+
+       if (copy_from_user(memstring, buffer, count))
+               return -EFAULT;
+       memstring[sizeof(memstring)-1] = '\0';
+
+       target_bytes = memparse(memstring, &endchar);
+       set_new_target(target_bytes >> PAGE_SHIFT);
+
+       return count;
+}
+
+static int balloon_read(char *page, char **start, off_t off,
+                       int count, int *eof, void *data)
+{
+       int len;
+
+       len = sprintf(
+               page,
+               "Current allocation: %8lu kB\n"
+               "Requested target:   %8lu kB\n"
+               "Low-mem balloon:    %8lu kB\n"
+               "High-mem balloon:   %8lu kB\n"
+               "Xen hard limit:     ",
+               PAGES2KB(current_pages), PAGES2KB(target_pages), 
+               PAGES2KB(balloon_low), PAGES2KB(balloon_high));
+
+       if (hard_limit != ~0UL) {
+               len += sprintf(
+                       page + len, 
+                       "%8lu kB (inc. %8lu kB driver headroom)\n",
+                       PAGES2KB(hard_limit), PAGES2KB(driver_pages));
+       } else {
+               len += sprintf(
+                       page + len,
+                       "     ??? kB\n");
+       }
+
+       *eof = 1;
+       return len;
+}
+#endif
+
+static struct notifier_block xenstore_notifier;
+
+static int __init balloon_init(void)
+{
+       unsigned long pfn;
+       struct page *page;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       IPRINTK("Initialising balloon driver.\n");
+
+       current_pages = min(xen_start_info->nr_pages, max_pfn);
+       totalram_pages = current_pages;
+       target_pages  = current_pages;
+       balloon_low   = 0;
+       balloon_high  = 0;
+       driver_pages  = 0UL;
+       hard_limit    = ~0UL;
+
+       init_timer(&balloon_timer);
+       balloon_timer.data = 0;
+       balloon_timer.function = balloon_alarm;
+    
+#ifdef CONFIG_PROC_FS
+       if ((balloon_pde = create_xen_proc_entry("balloon", 0644)) == NULL) {
+               WPRINTK("Unable to create /proc/xen/balloon.\n");
+               return -1;
+       }
+
+       balloon_pde->read_proc  = balloon_read;
+       balloon_pde->write_proc = balloon_write;
+#endif
+    
+       /* Initialise the balloon with excess memory space. */
+       for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+               page = pfn_to_page(pfn);
+               if (!PageReserved(page))
+                       balloon_append(page);
+       }
+
+       target_watch.callback = watch_target;
+       xenstore_notifier.notifier_call = balloon_init_watcher;
+
+       register_xenstore_notifier(&xenstore_notifier);
+    
+       return 0;
+}
+
+subsys_initcall(balloon_init);
+
+void balloon_update_driver_allowance(long delta)
+{
+       unsigned long flags;
+
+       balloon_lock(flags);
+       driver_pages += delta;
+       balloon_unlock(flags);
+}
+
+static int dealloc_pte_fn(
+       pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
+{
+       unsigned long mfn = pte_mfn(*pte);
+       int ret;
+       struct xen_memory_reservation reservation = {
+               .nr_extents   = 1,
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+       set_xen_guest_handle(reservation.extent_start, &mfn);
+       set_pte_at(&init_mm, addr, pte, __pte_ma(0));
+       set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+       ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+       BUG_ON(ret != 1);
+       return 0;
+}
+
+struct page *balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+       unsigned long vstart, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+       int ret;
+       unsigned long i;
+       struct page *page;
+
+       vstart = __get_free_pages(GFP_KERNEL, order);
+       if (vstart == 0)
+               return NULL;
+
+       scrub_pages(vstart, 1 << order);
+
+       balloon_lock(flags);
+       if (xen_feature(XENFEAT_auto_translated_physmap)) {
+               unsigned long gmfn = __pa(vstart) >> PAGE_SHIFT;
+               struct xen_memory_reservation reservation = {
+                       .nr_extents   = 1,
+                       .extent_order = order,
+                       .domid        = DOMID_SELF
+               };
+               set_xen_guest_handle(reservation.extent_start, &gmfn);
+               ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                          &reservation);
+               if (ret == -ENOSYS)
+                       goto err;
+               BUG_ON(ret != 1);
+       } else {
+               ret = apply_to_page_range(&init_mm, vstart, PAGE_SIZE << order,
+                                         dealloc_pte_fn, NULL);
+               if (ret == -ENOSYS)
+                       goto err;
+               BUG_ON(ret);
+       }
+       current_pages -= 1UL << order;
+       totalram_pages = current_pages;
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
+
+       flush_tlb_all();
+
+       page = virt_to_page(vstart);
+
+       for (i = 0; i < (1UL << order); i++)
+               init_page_count(page + i);
+
+       return page;
+
+ err:
+       free_pages(vstart, order);
+       balloon_unlock(flags);
+       return NULL;
+}
+
+void balloon_dealloc_empty_page_range(
+       struct page *page, unsigned long nr_pages)
+{
+       unsigned long i, flags;
+       unsigned int  order = get_order(nr_pages * PAGE_SIZE);
+
+       balloon_lock(flags);
+       for (i = 0; i < (1UL << order); i++) {
+               BUG_ON(page_count(page + i) != 1);
+               balloon_append(page + i);
+       }
+       balloon_unlock(flags);
+
+       schedule_work(&balloon_worker);
+}
+
+EXPORT_SYMBOL_GPL(balloon_update_driver_allowance);
+EXPORT_SYMBOL_GPL(balloon_alloc_empty_page_range);
+EXPORT_SYMBOL_GPL(balloon_dealloc_empty_page_range);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/blkback/Makefile b/drivers/xen/blkback/Makefile
new file mode 100644 (file)
index 0000000..8bab63d
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_XEN_BLKDEV_BACKEND) := blkbk.o
+
+blkbk-y        := blkback.o xenbus.o interface.o vbd.o
diff --git a/drivers/xen/blkback/blkback.c b/drivers/xen/blkback/blkback.c
new file mode 100644 (file)
index 0000000..c5b3eee
--- /dev/null
@@ -0,0 +1,566 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/main.c
+ * 
+ * Back-end of the driver for virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  arch/xen/drivers/blkif/frontend
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Copyright (c) 2005, Christopher Clark
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <xen/balloon.h>
+#include <asm/hypervisor.h>
+#include "common.h"
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ * 
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int blkif_reqs = 64;
+module_param_named(reqs, blkif_reqs, int, 0);
+MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate");
+
+static int mmap_pages;
+
+/* Run-time switchable: /sys/module/blkback/parameters/ */
+static unsigned int log_stats = 0;
+static unsigned int debug_lvl = 0;
+module_param(log_stats, int, 0644);
+module_param(debug_lvl, int, 0644);
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+       blkif_t       *blkif;
+       unsigned long  id;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
+       struct list_head free_list;
+} pending_req_t;
+
+static pending_req_t *pending_reqs;
+static struct list_head pending_free;
+static DEFINE_SPINLOCK(pending_free_lock);
+static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq);
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static grant_handle_t *pending_grant_handles;
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+       return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+       return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+       (pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                         unsigned short op, int st);
+
+/******************************************************************
+ * misc small helpers
+ */
+static pending_req_t* alloc_req(void)
+{
+       pending_req_t *req = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+       if (!list_empty(&pending_free)) {
+               req = list_entry(pending_free.next, pending_req_t, free_list);
+               list_del(&req->free_list);
+       }
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+       unsigned long flags;
+       int was_empty;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+       was_empty = list_empty(&pending_free);
+       list_add(&req->free_list, &pending_free);
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       if (was_empty)
+               wake_up(&pending_free_wq);
+}
+
+static void unplug_queue(blkif_t *blkif)
+{
+       if (blkif->plug == NULL)
+               return;
+       if (blkif->plug->unplug_fn)
+               blkif->plug->unplug_fn(blkif->plug);
+       blk_put_queue(blkif->plug);
+       blkif->plug = NULL;
+}
+
+static void plug_queue(blkif_t *blkif, struct bio *bio)
+{
+       request_queue_t *q = bdev_get_queue(bio->bi_bdev);
+
+       if (q == blkif->plug)
+               return;
+       unplug_queue(blkif);
+       blk_get_queue(q);
+       blkif->plug = q;
+}
+
+static void fast_flush_area(pending_req_t *req)
+{
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int i, invcount = 0;
+       grant_handle_t handle;
+       int ret;
+
+       for (i = 0; i < req->nr_pages; i++) {
+               handle = pending_handle(req, i);
+               if (handle == BLKBACK_INVALID_HANDLE)
+                       continue;
+               gnttab_set_unmap_op(&unmap[i], vaddr(req, i), GNTMAP_host_map,
+                                   handle);
+               pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+               invcount++;
+       }
+
+       ret = HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, invcount);
+       BUG_ON(ret);
+}
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static void print_stats(blkif_t *blkif)
+{
+       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
+              current->comm, blkif->st_oo_req,
+              blkif->st_rd_req, blkif->st_wr_req);
+       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+       blkif->st_rd_req = 0;
+       blkif->st_wr_req = 0;
+       blkif->st_oo_req = 0;
+}
+
+int blkif_schedule(void *arg)
+{
+       blkif_t *blkif = arg;
+
+       blkif_get(blkif);
+
+       if (debug_lvl)
+               printk(KERN_DEBUG "%s: started\n", current->comm);
+
+       while (!kthread_should_stop()) {
+               wait_event_interruptible(
+                       blkif->wq,
+                       blkif->waiting_reqs || kthread_should_stop());
+               wait_event_interruptible(
+                       pending_free_wq,
+                       !list_empty(&pending_free) || kthread_should_stop());
+
+               blkif->waiting_reqs = 0;
+               smp_mb(); /* clear flag *before* checking for work */
+
+               if (do_block_io_op(blkif))
+                       blkif->waiting_reqs = 1;
+               unplug_queue(blkif);
+
+               if (log_stats && time_after(jiffies, blkif->st_print))
+                       print_stats(blkif);
+       }
+
+       if (log_stats)
+               print_stats(blkif);
+       if (debug_lvl)
+               printk(KERN_DEBUG "%s: exiting\n", current->comm);
+
+       blkif->xenblkd = NULL;
+       blkif_put(blkif);
+
+       return 0;
+}
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called as bh->b_end_io()
+ */
+
+static void __end_block_io_op(pending_req_t *pending_req, int uptodate)
+{
+       /* An error fails the entire request. */
+       if (!uptodate) {
+               DPRINTK("Buffer not up-to-date at end of operation\n");
+               pending_req->status = BLKIF_RSP_ERROR;
+       }
+
+       if (atomic_dec_and_test(&pending_req->pendcnt)) {
+               fast_flush_area(pending_req);
+               make_response(pending_req->blkif, pending_req->id,
+                             pending_req->operation, pending_req->status);
+               blkif_put(pending_req->blkif);
+               free_req(pending_req);
+       }
+}
+
+static int end_block_io_op(struct bio *bio, unsigned int done, int error)
+{
+       if (bio->bi_size != 0)
+               return 1;
+       __end_block_io_op(bio->bi_private, !error);
+       bio_put(bio);
+       return error;
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+static void blkif_notify_work(blkif_t *blkif)
+{
+       blkif->waiting_reqs = 1;
+       wake_up(&blkif->wq);
+}
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+       blkif_notify_work(dev_id);
+       return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+
+static int do_block_io_op(blkif_t *blkif)
+{
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       pending_req_t *pending_req;
+       RING_IDX rc, rp;
+       int more_to_do = 0;
+
+       rc = blk_ring->req_cons;
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
+
+               pending_req = alloc_req();
+               if (NULL == pending_req) {
+                       blkif->st_oo_req++;
+                       more_to_do = 1;
+                       break;
+               }
+
+               req = RING_GET_REQUEST(blk_ring, rc);
+               blk_ring->req_cons = ++rc; /* before make_response() */
+
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+                       blkif->st_rd_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
+                       break;
+               case BLKIF_OP_WRITE:
+                       blkif->st_wr_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
+                       break;
+               default:
+                       DPRINTK("error: unknown block io operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       free_req(pending_req);
+                       break;
+               }
+       }
+       return more_to_do;
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req)
+{
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       int operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       struct phys_req preq;
+       struct { 
+               unsigned long buf; unsigned int nsec;
+       } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       unsigned int nseg;
+       struct bio *bio = NULL, *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+       int ret, i, nbio = 0;
+
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if (unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+               DPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto fail_response;
+       }
+
+       preq.dev           = req->handle;
+       preq.sector_number = req->sector_number;
+       preq.nr_sects      = 0;
+
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+
+       for (i = 0; i < nseg; i++) {
+               uint32_t flags;
+
+               seg[i].nsec = req->seg[i].last_sect -
+                       req->seg[i].first_sect + 1;
+
+               if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+                   (seg[i].nsec <= 0))
+                       goto fail_response;
+               preq.nr_sects += seg[i].nsec;
+
+               flags = GNTMAP_host_map;
+               if ( operation == WRITE )
+                       flags |= GNTMAP_readonly;
+               gnttab_set_map_op(&map[i], vaddr(pending_req, i), flags,
+                                 req->seg[i].gref, blkif->domid);
+       }
+
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+       BUG_ON(ret);
+
+       for (i = 0; i < nseg; i++) {
+               if (unlikely(map[i].status != 0)) {
+                       DPRINTK("invalid buffer -- could not remap it\n");
+                       goto fail_flush;
+               }
+
+               pending_handle(pending_req, i) = map[i].handle;
+               set_phys_to_machine(__pa(vaddr(
+                       pending_req, i)) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
+               seg[i].buf  = map[i].dev_bus_addr | 
+                       (req->seg[i].first_sect << 9);
+       }
+
+       if (vbd_translate(&preq, blkif, operation) != 0) {
+               DPRINTK("access denied: %s of [%llu,%llu] on dev=%04x\n", 
+                       operation == READ ? "read" : "write",
+                       preq.sector_number,
+                       preq.sector_number + preq.nr_sects, preq.dev); 
+               goto fail_flush;
+       }
+
+       for (i = 0; i < nseg; i++) {
+               if (((int)preq.sector_number|(int)seg[i].nsec) &
+                   ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+                       DPRINTK("Misaligned I/O request from domain %d",
+                               blkif->domid);
+                       goto fail_put_bio;
+               }
+
+               while ((bio == NULL) ||
+                      (bio_add_page(bio,
+                                    virt_to_page(vaddr(pending_req, i)),
+                                    seg[i].nsec << 9,
+                                    seg[i].buf & ~PAGE_MASK) == 0)) {
+                       bio = biolist[nbio++] = bio_alloc(GFP_KERNEL, nseg-i);
+                       if (unlikely(bio == NULL))
+                               goto fail_put_bio;
+
+                       bio->bi_bdev    = preq.bdev;
+                       bio->bi_private = pending_req;
+                       bio->bi_end_io  = end_block_io_op;
+                       bio->bi_sector  = preq.sector_number;
+               }
+
+               preq.sector_number += seg[i].nsec;
+       }
+
+       plug_queue(blkif, bio);
+       atomic_set(&pending_req->pendcnt, nbio);
+       blkif_get(blkif);
+
+       for (i = 0; i < nbio; i++)
+               submit_bio(operation, biolist[i]);
+
+       return;
+
+ fail_put_bio:
+       for (i = 0; i < (nbio-1); i++)
+               bio_put(biolist[i]);
+ fail_flush:
+       fast_flush_area(pending_req);
+ fail_response:
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       free_req(pending_req);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                         unsigned short op, int st)
+{
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       int more_to_do = 0;
+       int notify;
+
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+
+       /* Place on the response ring for the relevant domain. */ 
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
+
+       if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
+               /*
+                * Tail check for pending requests. Allows frontend to avoid
+                * notifications if requests are already in flight (lower
+                * overheads and promotes batching).
+                */
+               RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
+
+       } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
+               more_to_do = 1;
+
+       }
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+
+       if (more_to_do)
+               blkif_notify_work(blkif);
+       if (notify)
+               notify_remote_via_irq(blkif->irq);
+}
+
+static int __init blkif_init(void)
+{
+       struct page *page;
+       int i;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       mmap_pages            = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+
+#ifdef CONFIG_XEN_IA64_DOM0_NON_VP
+       extern unsigned long alloc_empty_foreign_map_page_range(
+               unsigned long pages);
+       mmap_vstart = (unsigned long)
+               alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+       page = balloon_alloc_empty_page_range(mmap_pages);
+       if (page == NULL)
+               return -ENOMEM;
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+#endif
+
+       pending_reqs          = kmalloc(sizeof(pending_reqs[0]) *
+                                       blkif_reqs, GFP_KERNEL);
+       pending_grant_handles = kmalloc(sizeof(pending_grant_handles[0]) *
+                                       mmap_pages, GFP_KERNEL);
+       pending_vaddrs        = kmalloc(sizeof(pending_vaddrs[0]) *
+                                       mmap_pages, GFP_KERNEL);
+       if (!pending_reqs || !pending_grant_handles || !pending_vaddrs) {
+               kfree(pending_reqs);
+               kfree(pending_grant_handles);
+               kfree(pending_vaddrs);
+               printk("%s: out of memory\n", __FUNCTION__);
+               return -ENOMEM;
+       }
+
+       blkif_interface_init();
+       
+       printk("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+              __FUNCTION__, blkif_reqs, mmap_pages, mmap_vstart);
+       BUG_ON(mmap_vstart == 0);
+       for (i = 0; i < mmap_pages; i++) {
+               pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+               pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+       }
+
+       memset(pending_reqs, 0, sizeof(pending_reqs));
+       INIT_LIST_HEAD(&pending_free);
+
+       for (i = 0; i < blkif_reqs; i++)
+               list_add_tail(&pending_reqs[i].free_list, &pending_free);
+    
+       blkif_xenbus_init();
+
+       return 0;
+}
+
+module_init(blkif_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/blkback/common.h b/drivers/xen/blkback/common.h
new file mode 100644 (file)
index 0000000..d057f79
--- /dev/null
@@ -0,0 +1,134 @@
+/* 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __BLKIF__BACKEND__COMMON_H__
+#define __BLKIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/blkdev.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <asm/io.h>
+#include <asm/setup.h>
+#include <asm/pgalloc.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/ring.h>
+#include <xen/gnttab.h>
+#include <xen/driver_util.h>
+
+#define DPRINTK(_f, _a...)                     \
+       pr_debug("(file=%s, line=%d) " _f,      \
+                __FILE__ , __LINE__ , ## _a )
+
+struct vbd {
+       blkif_vdev_t   handle;      /* what the domain refers to this vbd as */
+       unsigned char  readonly;    /* Non-zero -> read-only */
+       unsigned char  type;        /* VDISK_xxx */
+       u32            pdevice;     /* phys device that this vbd maps to */
+       struct block_device *bdev;
+}; 
+
+struct backend_info; 
+
+typedef struct blkif_st {
+       /* Unique identifier for this interface. */
+       domid_t           domid;
+       unsigned int      handle;
+       /* Physical parameters of the comms window. */
+       unsigned int      evtchn;
+       unsigned int      irq;
+       /* Comms information. */
+       blkif_back_ring_t blk_ring;
+       struct vm_struct *blk_ring_area;
+       /* The VBD attached to this interface. */
+       struct vbd        vbd;
+       /* Back pointer to the backend_info. */
+       struct backend_info *be; 
+       /* Private fields. */
+       spinlock_t       blk_ring_lock;
+       atomic_t         refcnt;
+
+       wait_queue_head_t   wq;
+       struct task_struct  *xenblkd;
+       unsigned int        waiting_reqs;
+       request_queue_t     *plug;
+
+       /* statistics */
+       unsigned long       st_print;
+       int                 st_rd_req;
+       int                 st_wr_req;
+       int                 st_oo_req;
+
+       wait_queue_head_t waiting_to_free;
+
+       grant_handle_t shmem_handle;
+       grant_ref_t    shmem_ref;
+} blkif_t;
+
+blkif_t *blkif_alloc(domid_t domid);
+void blkif_disconnect(blkif_t *blkif);
+void blkif_free(blkif_t *blkif);
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn);
+
+#define blkif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define blkif_put(_b)                                  \
+       do {                                            \
+               if (atomic_dec_and_test(&(_b)->refcnt)) \
+                       wake_up(&(_b)->waiting_to_free);\
+       } while (0)
+
+/* Create a vbd. */
+int vbd_create(blkif_t *blkif, blkif_vdev_t vdevice, unsigned major,
+              unsigned minor, int readonly);
+void vbd_free(struct vbd *vbd);
+
+unsigned long vbd_size(struct vbd *vbd);
+unsigned int vbd_info(struct vbd *vbd);
+unsigned long vbd_secsize(struct vbd *vbd);
+
+struct phys_req {
+       unsigned short       dev;
+       unsigned short       nr_sects;
+       struct block_device *bdev;
+       blkif_sector_t       sector_number;
+};
+
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation); 
+
+void blkif_interface_init(void);
+
+void blkif_xenbus_init(void);
+
+irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+int blkif_schedule(void *arg);
+
+#endif /* __BLKIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/blkback/interface.c b/drivers/xen/blkback/interface.c
new file mode 100644 (file)
index 0000000..53b4764
--- /dev/null
@@ -0,0 +1,171 @@
+/******************************************************************************
+ * arch/xen/drivers/blkif/backend/interface.c
+ * 
+ * Block-device interface management.
+ * 
+ * Copyright (c) 2004, Keir Fraser
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <xen/evtchn.h>
+#include <linux/kthread.h>
+
+static kmem_cache_t *blkif_cachep;
+
+blkif_t *blkif_alloc(domid_t domid)
+{
+       blkif_t *blkif;
+
+       blkif = kmem_cache_alloc(blkif_cachep, GFP_KERNEL);
+       if (!blkif)
+               return ERR_PTR(-ENOMEM);
+
+       memset(blkif, 0, sizeof(*blkif));
+       blkif->domid = domid;
+       spin_lock_init(&blkif->blk_ring_lock);
+       atomic_set(&blkif->refcnt, 1);
+       init_waitqueue_head(&blkif->wq);
+       blkif->st_print = jiffies;
+       init_waitqueue_head(&blkif->waiting_to_free);
+
+       return blkif;
+}
+
+static int map_frontend_page(blkif_t *blkif, unsigned long shared_page)
+{
+       struct gnttab_map_grant_ref op;
+       int ret;
+
+       gnttab_set_map_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+                         GNTMAP_host_map, shared_page, blkif->domid);
+
+       lock_vm_area(blkif->blk_ring_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+       unlock_vm_area(blkif->blk_ring_area);
+       BUG_ON(ret);
+
+       if (op.status) {
+               DPRINTK(" Grant table operation failure !\n");
+               return op.status;
+       }
+
+       blkif->shmem_ref = shared_page;
+       blkif->shmem_handle = op.handle;
+
+       return 0;
+}
+
+static void unmap_frontend_page(blkif_t *blkif)
+{
+       struct gnttab_unmap_grant_ref op;
+       int ret;
+
+       gnttab_set_unmap_op(&op, (unsigned long)blkif->blk_ring_area->addr,
+                           GNTMAP_host_map, blkif->shmem_handle);
+
+       lock_vm_area(blkif->blk_ring_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+       unlock_vm_area(blkif->blk_ring_area);
+       BUG_ON(ret);
+}
+
+int blkif_map(blkif_t *blkif, unsigned long shared_page, unsigned int evtchn)
+{
+       blkif_sring_t *sring;
+       int err;
+       struct evtchn_bind_interdomain bind_interdomain;
+
+       /* Already connected through? */
+       if (blkif->irq)
+               return 0;
+
+       if ( (blkif->blk_ring_area = alloc_vm_area(PAGE_SIZE)) == NULL )
+               return -ENOMEM;
+
+       err = map_frontend_page(blkif, shared_page);
+       if (err) {
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
+
+       bind_interdomain.remote_dom  = blkif->domid;
+       bind_interdomain.remote_port = evtchn;
+
+       err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                         &bind_interdomain);
+       if (err) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               return err;
+       }
+
+       blkif->evtchn = bind_interdomain.local_port;
+
+       sring = (blkif_sring_t *)blkif->blk_ring_area->addr;
+       BACK_RING_INIT(&blkif->blk_ring, sring, PAGE_SIZE);
+
+       blkif->irq = bind_evtchn_to_irqhandler(
+               blkif->evtchn, blkif_be_int, 0, "blkif-backend", blkif);
+
+       return 0;
+}
+
+void blkif_disconnect(blkif_t *blkif)
+{
+       if (blkif->xenblkd) {
+               kthread_stop(blkif->xenblkd);
+               blkif->xenblkd = NULL;
+       }
+
+       atomic_dec(&blkif->refcnt);
+       wait_event(blkif->waiting_to_free, atomic_read(&blkif->refcnt) == 0);
+       atomic_inc(&blkif->refcnt);
+
+       if (blkif->irq) {
+               unbind_from_irqhandler(blkif->irq, blkif);
+               blkif->irq = 0;
+       }
+
+       if (blkif->blk_ring.sring) {
+               unmap_frontend_page(blkif);
+               free_vm_area(blkif->blk_ring_area);
+               blkif->blk_ring.sring = NULL;
+       }
+}
+
+void blkif_free(blkif_t *blkif)
+{
+       if (!atomic_dec_and_test(&blkif->refcnt))
+               BUG();
+       kmem_cache_free(blkif_cachep, blkif);
+}
+
+void __init blkif_interface_init(void)
+{
+       blkif_cachep = kmem_cache_create("blkif_cache", sizeof(blkif_t), 
+                                        0, 0, NULL, NULL);
+}
diff --git a/drivers/xen/blkback/vbd.c b/drivers/xen/blkback/vbd.c
new file mode 100644 (file)
index 0000000..a809b04
--- /dev/null
@@ -0,0 +1,119 @@
+/******************************************************************************
+ * blkback/vbd.c
+ * 
+ * Routines for managing virtual block devices (VBDs).
+ * 
+ * Copyright (c) 2003-2005, Keir Fraser & Steve Hand
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <xen/xenbus.h>
+
+#define vbd_sz(_v)   ((_v)->bdev->bd_part ?                            \
+       (_v)->bdev->bd_part->nr_sects : (_v)->bdev->bd_disk->capacity)
+
+unsigned long vbd_size(struct vbd *vbd)
+{
+       return vbd_sz(vbd);
+}
+
+unsigned int vbd_info(struct vbd *vbd)
+{
+       return vbd->type | (vbd->readonly?VDISK_READONLY:0);
+}
+
+unsigned long vbd_secsize(struct vbd *vbd)
+{
+       return bdev_hardsect_size(vbd->bdev);
+}
+
+int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
+              unsigned minor, int readonly)
+{
+       struct vbd *vbd;
+       struct block_device *bdev;
+
+       vbd = &blkif->vbd;
+       vbd->handle   = handle; 
+       vbd->readonly = readonly;
+       vbd->type     = 0;
+
+       vbd->pdevice  = MKDEV(major, minor);
+
+       bdev = open_by_devnum(vbd->pdevice,
+                             vbd->readonly ? FMODE_READ : FMODE_WRITE);
+
+       if (IS_ERR(bdev)) {
+               DPRINTK("vbd_creat: device %08x could not be opened.\n",
+                       vbd->pdevice);
+               return -ENOENT;
+       }
+
+       vbd->bdev = bdev;
+
+       if (vbd->bdev->bd_disk == NULL) {
+               DPRINTK("vbd_creat: device %08x doesn't exist.\n",
+                       vbd->pdevice);
+               vbd_free(vbd);
+               return -ENOENT;
+       }
+
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_CD)
+               vbd->type |= VDISK_CDROM;
+       if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
+               vbd->type |= VDISK_REMOVABLE;
+
+       DPRINTK("Successful creation of handle=%04x (dom=%u)\n",
+               handle, blkif->domid);
+       return 0;
+}
+
+void vbd_free(struct vbd *vbd)
+{
+       if (vbd->bdev)
+               blkdev_put(vbd->bdev);
+       vbd->bdev = NULL;
+}
+
+int vbd_translate(struct phys_req *req, blkif_t *blkif, int operation)
+{
+       struct vbd *vbd = &blkif->vbd;
+       int rc = -EACCES;
+
+       if ((operation == WRITE) && vbd->readonly)
+               goto out;
+
+       if (unlikely((req->sector_number + req->nr_sects) > vbd_sz(vbd)))
+               goto out;
+
+       req->dev  = vbd->pdevice;
+       req->bdev = vbd->bdev;
+       rc = 0;
+
+ out:
+       return rc;
+}
diff --git a/drivers/xen/blkfront/Kconfig b/drivers/xen/blkfront/Kconfig
new file mode 100644 (file)
index 0000000..edde837
--- /dev/null
@@ -0,0 +1,6 @@
+
+config XENBLOCK
+       tristate "Block device driver"
+       depends on ARCH_XEN
+       help
+         Block device driver for Xen
diff --git a/drivers/xen/blkfront/Makefile b/drivers/xen/blkfront/Makefile
new file mode 100644 (file)
index 0000000..182ef65
--- /dev/null
@@ -0,0 +1,5 @@
+
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      := xenblk.o
+
+xenblk-objs := blkfront.o vbd.o
+
diff --git a/drivers/xen/blkfront/blkfront.c b/drivers/xen/blkfront/blkfront.c
new file mode 100644 (file)
index 0000000..a911c76
--- /dev/null
@@ -0,0 +1,841 @@
+/******************************************************************************
+ * blkfront.c
+ * 
+ * XenLinux virtual block-device driver.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004, Christian Limpach
+ * Copyright (c) 2004, Andrew Warfield
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2005, XenSource Ltd
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/version.h>
+#include "block.h"
+#include <linux/cdrom.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <scsi/scsi.h>
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
+#include <xen/interface/grant_table.h>
+#include <xen/gnttab.h>
+#include <asm/hypervisor.h>
+
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED    1
+#define BLKIF_STATE_SUSPENDED    2
+
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+#define GRANT_INVALID_REF      0
+
+static void connect(struct blkfront_info *);
+static void blkfront_closing(struct xenbus_device *);
+static int blkfront_remove(struct xenbus_device *);
+static int talk_to_backend(struct xenbus_device *, struct blkfront_info *);
+static int setup_blkring(struct xenbus_device *, struct blkfront_info *);
+
+static void kick_pending_request_queues(struct blkfront_info *);
+
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
+static void blkif_restart_queue(void *arg);
+static void blkif_recover(struct blkfront_info *);
+static void blkif_completion(struct blk_shadow *);
+static void blkif_free(struct blkfront_info *, int);
+
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffer for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Initialised state.
+ */
+static int blkfront_probe(struct xenbus_device *dev,
+                         const struct xenbus_device_id *id)
+{
+       int err, vdevice, i;
+       struct blkfront_info *info;
+
+       /* FIXME: Use dynamic device id if this is not set. */
+       err = xenbus_scanf(XBT_NIL, dev->nodename,
+                          "virtual-device", "%i", &vdevice);
+       if (err != 1) {
+               xenbus_dev_fatal(dev, err, "reading virtual-device");
+               return err;
+       }
+
+       info = kzalloc(sizeof(*info), GFP_KERNEL);
+       if (!info) {
+               xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
+               return -ENOMEM;
+       }
+
+       info->xbdev = dev;
+       info->vdevice = vdevice;
+       info->connected = BLKIF_STATE_DISCONNECTED;
+       INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
+
+       for (i = 0; i < BLK_RING_SIZE; i++)
+               info->shadow[i].req.id = i+1;
+       info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+       /* Front end dir is a number, which is used as the id. */
+       info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
+       dev->dev.driver_data = info;
+
+       err = talk_to_backend(dev, info);
+       if (err) {
+               kfree(info);
+               dev->dev.driver_data = NULL;
+               return err;
+       }
+
+       return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our blkif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int blkfront_resume(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->dev.driver_data;
+       int err;
+
+       DPRINTK("blkfront_resume: %s\n", dev->nodename);
+
+       blkif_free(info, 1);
+
+       err = talk_to_backend(dev, info);
+       if (!err)
+               blkif_recover(info);
+
+       return err;
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+                          struct blkfront_info *info)
+{
+       const char *message = NULL;
+       struct xenbus_transaction xbt;
+       int err;
+
+       /* Create shared ring, alloc event channel. */
+       err = setup_blkring(dev, info);
+       if (err)
+               goto out;
+
+again:
+       err = xenbus_transaction_start(&xbt);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "starting transaction");
+               goto destroy_blkring;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename,
+                           "ring-ref","%u", info->ring_ref);
+       if (err) {
+               message = "writing ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(xbt, dev->nodename,
+                           "event-channel", "%u", info->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(xbt, 0);
+       if (err) {
+               if (err == -EAGAIN)
+                       goto again;
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto destroy_blkring;
+       }
+
+       xenbus_switch_state(dev, XenbusStateInitialised);
+
+       return 0;
+
+ abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       if (message)
+               xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+       blkif_free(info, 0);
+ out:
+       return err;
+}
+
+
+static int setup_blkring(struct xenbus_device *dev,
+                        struct blkfront_info *info)
+{
+       blkif_sring_t *sring;
+       int err;
+
+       info->ring_ref = GRANT_INVALID_REF;
+
+       sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
+       if (!sring) {
+               xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
+               return -ENOMEM;
+       }
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
+       if (err < 0) {
+               free_page((unsigned long)sring);
+               info->ring.sring = NULL;
+               goto fail;
+       }
+       info->ring_ref = err;
+
+       err = xenbus_alloc_evtchn(dev, &info->evtchn);
+       if (err)
+               goto fail;
+
+       err = bind_evtchn_to_irqhandler(
+               info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
+       if (err <= 0) {
+               xenbus_dev_fatal(dev, err,
+                                "bind_evtchn_to_irqhandler failed");
+               goto fail;
+       }
+       info->irq = err;
+
+       return 0;
+fail:
+       blkif_free(info, 0);
+       return err;
+}
+
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+                           enum xenbus_state backend_state)
+{
+       struct blkfront_info *info = dev->dev.driver_data;
+       struct block_device *bd;
+
+       DPRINTK("blkfront:backend_changed.\n");
+
+       switch (backend_state) {
+       case XenbusStateUnknown:
+       case XenbusStateInitialising:
+       case XenbusStateInitWait:
+       case XenbusStateInitialised:
+       case XenbusStateClosed:
+               break;
+
+       case XenbusStateConnected:
+               connect(info);
+               break;
+
+       case XenbusStateClosing:
+               bd = bdget(info->dev);
+               if (bd == NULL)
+                       xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+               mutex_lock(&bd->bd_mutex);
+               if (info->users > 0)
+                       xenbus_dev_error(dev, -EBUSY,
+                                        "Device in use; refusing to close");
+               else
+                       blkfront_closing(dev);
+               mutex_unlock(&bd->bd_mutex);
+               bdput(bd);
+               break;
+       }
+}
+
+
+/* ** Connection ** */
+
+
+/*
+ * Invoked when the backend is finally 'ready' (and has told produced
+ * the details about the physical device - #sectors, size, etc).
+ */
+static void connect(struct blkfront_info *info)
+{
+       unsigned long sectors, sector_size;
+       unsigned int binfo;
+       int err;
+
+       if ((info->connected == BLKIF_STATE_CONNECTED) ||
+           (info->connected == BLKIF_STATE_SUSPENDED) )
+               return;
+
+       DPRINTK("blkfront.c:connect:%s.\n", info->xbdev->otherend);
+
+       err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
+                           "sectors", "%lu", &sectors,
+                           "info", "%u", &binfo,
+                           "sector-size", "%lu", &sector_size,
+                           NULL);
+       if (err) {
+               xenbus_dev_fatal(info->xbdev, err,
+                                "reading backend fields at %s",
+                                info->xbdev->otherend);
+               return;
+       }
+
+       err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
+       if (err) {
+               xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
+                                info->xbdev->otherend);
+               return;
+       }
+
+       (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+       /* Kick pending requests. */
+       spin_lock_irq(&blkif_io_lock);
+       info->connected = BLKIF_STATE_CONNECTED;
+       kick_pending_request_queues(info);
+       spin_unlock_irq(&blkif_io_lock);
+
+       add_disk(info->gd);
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void blkfront_closing(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->dev.driver_data;
+       unsigned long flags;
+
+       DPRINTK("blkfront_closing: %s removed\n", dev->nodename);
+
+       if (info->rq == NULL)
+               return;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+       /* No more blkif_request(). */
+       blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       xlvbd_del(info);
+
+       xenbus_switch_state(dev, XenbusStateClosed);
+}
+
+
+static int blkfront_remove(struct xenbus_device *dev)
+{
+       struct blkfront_info *info = dev->dev.driver_data;
+
+       DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
+
+       blkif_free(info, 0);
+
+       kfree(info);
+
+       return 0;
+}
+
+
+static inline int GET_ID_FROM_FREELIST(
+       struct blkfront_info *info)
+{
+       unsigned long free = info->shadow_free;
+       BUG_ON(free > BLK_RING_SIZE);
+       info->shadow_free = info->shadow[free].req.id;
+       info->shadow[free].req.id = 0x0fffffee; /* debug */
+       return free;
+}
+
+static inline void ADD_ID_TO_FREELIST(
+       struct blkfront_info *info, unsigned long id)
+{
+       info->shadow[id].req.id  = info->shadow_free;
+       info->shadow[id].request = 0;
+       info->shadow_free = id;
+}
+
+static inline void flush_requests(struct blkfront_info *info)
+{
+       int notify;
+
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+       if (notify)
+               notify_remote_via_irq(info->irq);
+}
+
+static void kick_pending_request_queues(struct blkfront_info *info)
+{
+       if (!RING_FULL(&info->ring)) {
+               /* Re-enable calldowns. */
+               blk_start_queue(info->rq);
+               /* Kick things off immediately. */
+               do_blkif_request(info->rq);
+       }
+}
+
+static void blkif_restart_queue(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       spin_lock_irq(&blkif_io_lock);
+       if (info->connected == BLKIF_STATE_CONNECTED)
+               kick_pending_request_queues(info);
+       spin_unlock_irq(&blkif_io_lock);
+}
+
+static void blkif_restart_queue_callback(void *arg)
+{
+       struct blkfront_info *info = (struct blkfront_info *)arg;
+       schedule_work(&info->work);
+}
+
+int blkif_open(struct inode *inode, struct file *filep)
+{
+       struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+       info->users++;
+       return 0;
+}
+
+
+int blkif_release(struct inode *inode, struct file *filep)
+{
+       struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
+       info->users--;
+       if (info->users == 0) {
+               /* Check whether we have been instructed to close.  We will
+                  have ignored this request initially, as the device was
+                  still mounted. */
+               struct xenbus_device * dev = info->xbdev;
+               enum xenbus_state state = xenbus_read_driver_state(dev->otherend);
+
+               if (state == XenbusStateClosing)
+                       blkfront_closing(dev);
+       }
+       return 0;
+}
+
+
+int blkif_ioctl(struct inode *inode, struct file *filep,
+               unsigned command, unsigned long argument)
+{
+       int i;
+
+       DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
+                     command, (long)argument, inode->i_rdev);
+
+       switch (command) {
+       case CDROMMULTISESSION:
+               DPRINTK("FIXME: support multisession CDs later\n");
+               for (i = 0; i < sizeof(struct cdrom_multisession); i++)
+                       if (put_user(0, (char __user *)(argument + i)))
+                               return -EFAULT;
+               return 0;
+
+       default:
+               /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
+                 command);*/
+               return -EINVAL; /* same return as native Linux */
+       }
+
+       return 0;
+}
+
+
+int blkif_getgeo(struct block_device *bd, struct hd_geometry *hg)
+{
+       /* We don't have real geometry info, but let's at least return
+          values consistent with the size of the device */
+       sector_t nsect = get_capacity(bd->bd_disk);
+       sector_t cylinders = nsect;
+
+       hg->heads = 0xff;
+       hg->sectors = 0x3f;
+       sector_div(cylinders, hg->heads * hg->sectors);
+       hg->cylinders = cylinders;
+       if ((sector_t)(hg->cylinders + 1) * hg->heads * hg->sectors < nsect)
+               hg->cylinders = 0xffff;
+       return 0;
+}
+
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ *
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ *   virtual address in the guest os.
+ */
+static int blkif_queue_request(struct request *req)
+{
+       struct blkfront_info *info = req->rq_disk->private_data;
+       unsigned long buffer_mfn;
+       blkif_request_t *ring_req;
+       struct bio *bio;
+       struct bio_vec *bvec;
+       int idx;
+       unsigned long id;
+       unsigned int fsect, lsect;
+       int ref;
+       grant_ref_t gref_head;
+
+       if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
+               return 1;
+
+       if (gnttab_alloc_grant_references(
+               BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+               gnttab_request_free_callback(
+                       &info->callback,
+                       blkif_restart_queue_callback,
+                       info,
+                       BLKIF_MAX_SEGMENTS_PER_REQUEST);
+               return 1;
+       }
+
+       /* Fill out a communications ring structure. */
+       ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
+       id = GET_ID_FROM_FREELIST(info);
+       info->shadow[id].request = (unsigned long)req;
+
+       ring_req->id = id;
+       ring_req->operation = rq_data_dir(req) ?
+               BLKIF_OP_WRITE : BLKIF_OP_READ;
+       ring_req->sector_number = (blkif_sector_t)req->sector;
+       ring_req->handle = info->handle;
+
+       ring_req->nr_segments = 0;
+       rq_for_each_bio (bio, req) {
+               bio_for_each_segment (bvec, bio, idx) {
+                       BUG_ON(ring_req->nr_segments
+                              == BLKIF_MAX_SEGMENTS_PER_REQUEST);
+                       buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
+                       fsect = bvec->bv_offset >> 9;
+                       lsect = fsect + (bvec->bv_len >> 9) - 1;
+                       /* install a grant reference. */
+                       ref = gnttab_claim_grant_reference(&gref_head);
+                       BUG_ON(ref == -ENOSPC);
+
+                       gnttab_grant_foreign_access_ref(
+                               ref,
+                               info->xbdev->otherend_id,
+                               buffer_mfn,
+                               rq_data_dir(req) );
+
+                       info->shadow[id].frame[ring_req->nr_segments] =
+                               mfn_to_pfn(buffer_mfn);
+
+                       ring_req->seg[ring_req->nr_segments] =
+                               (struct blkif_request_segment) {
+                                       .gref       = ref,
+                                       .first_sect = fsect,
+                                       .last_sect  = lsect };
+
+                       ring_req->nr_segments++;
+               }
+       }
+
+       info->ring.req_prod_pvt++;
+
+       /* Keep a private copy so we can reissue requests when recovering. */
+       info->shadow[id].req = *ring_req;
+
+       gnttab_free_grant_references(gref_head);
+
+       return 0;
+}
+
+/*
+ * do_blkif_request
+ *  read a block; request is in a request queue
+ */
+void do_blkif_request(request_queue_t *rq)
+{
+       struct blkfront_info *info = NULL;
+       struct request *req;
+       int queued;
+
+       DPRINTK("Entered do_blkif_request\n");
+
+       queued = 0;
+
+       while ((req = elv_next_request(rq)) != NULL) {
+               info = req->rq_disk->private_data;
+               if (!blk_fs_request(req)) {
+                       end_request(req, 0);
+                       continue;
+               }
+
+               if (RING_FULL(&info->ring))
+                       goto wait;
+
+               DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
+                       "(%u/%li) buffer:%p [%s]\n",
+                       req, req->cmd, req->sector, req->current_nr_sectors,
+                       req->nr_sectors, req->buffer,
+                       rq_data_dir(req) ? "write" : "read");
+
+
+               blkdev_dequeue_request(req);
+               if (blkif_queue_request(req)) {
+                       blk_requeue_request(rq, req);
+               wait:
+                       /* Avoid pointless unplugs. */
+                       blk_stop_queue(rq);
+                       break;
+               }
+
+               queued++;
+       }
+
+       if (queued != 0)
+               flush_requests(info);
+}
+
+
+static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+       struct request *req;
+       blkif_response_t *bret;
+       RING_IDX i, rp;
+       unsigned long flags;
+       struct blkfront_info *info = (struct blkfront_info *)dev_id;
+
+       spin_lock_irqsave(&blkif_io_lock, flags);
+
+       if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+               spin_unlock_irqrestore(&blkif_io_lock, flags);
+               return IRQ_HANDLED;
+       }
+
+ again:
+       rp = info->ring.sring->rsp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       for (i = info->ring.rsp_cons; i != rp; i++) {
+               unsigned long id;
+               int ret;
+
+               bret = RING_GET_RESPONSE(&info->ring, i);
+               id   = bret->id;
+               req  = (struct request *)info->shadow[id].request;
+
+               blkif_completion(&info->shadow[id]);
+
+               ADD_ID_TO_FREELIST(info, id);
+
+               switch (bret->operation) {
+               case BLKIF_OP_READ:
+               case BLKIF_OP_WRITE:
+                       if (unlikely(bret->status != BLKIF_RSP_OKAY))
+                               DPRINTK("Bad return from blkdev data "
+                                       "request: %x\n", bret->status);
+
+                       ret = end_that_request_first(
+                               req, (bret->status == BLKIF_RSP_OKAY),
+                               req->hard_nr_sectors);
+                       BUG_ON(ret);
+                       end_that_request_last(
+                               req, (bret->status == BLKIF_RSP_OKAY));
+                       break;
+               default:
+                       BUG();
+               }
+       }
+
+       info->ring.rsp_cons = i;
+
+       if (i != info->ring.req_prod_pvt) {
+               int more_to_do;
+               RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+               if (more_to_do)
+                       goto again;
+       } else
+               info->ring.sring->rsp_event = i + 1;
+
+       kick_pending_request_queues(info);
+
+       spin_unlock_irqrestore(&blkif_io_lock, flags);
+
+       return IRQ_HANDLED;
+}
+
+static void blkif_free(struct blkfront_info *info, int suspend)
+{
+       /* Prevent new requests being issued until we fix things up. */
+       spin_lock_irq(&blkif_io_lock);
+       info->connected = suspend ?
+               BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
+       /* No more blkif_request(). */
+       if (info->rq)
+               blk_stop_queue(info->rq);
+       /* No more gnttab callback work. */
+       gnttab_cancel_free_callback(&info->callback);
+       flush_scheduled_work();
+       spin_unlock_irq(&blkif_io_lock);
+
+       /* Free resources associated with old device channel. */
+       if (info->ring_ref != GRANT_INVALID_REF) {
+               gnttab_end_foreign_access(info->ring_ref, 0,
+                                         (unsigned long)info->ring.sring);
+               info->ring_ref = GRANT_INVALID_REF;
+               info->ring.sring = NULL;
+       }
+       if (info->irq)
+               unbind_from_irqhandler(info->irq, info);
+       info->evtchn = info->irq = 0;
+
+}
+
+static void blkif_completion(struct blk_shadow *s)
+{
+       int i;
+       for (i = 0; i < s->req.nr_segments; i++)
+               gnttab_end_foreign_access(s->req.seg[i].gref, 0, 0UL);
+}
+
+static void blkif_recover(struct blkfront_info *info)
+{
+       int i;
+       blkif_request_t *req;
+       struct blk_shadow *copy;
+       int j;
+
+       /* Stage 1: Make a safe copy of the shadow state. */
+       copy = kmalloc(sizeof(info->shadow), GFP_KERNEL | __GFP_NOFAIL);
+       memcpy(copy, info->shadow, sizeof(info->shadow));
+
+       /* Stage 2: Set up free list. */
+       memset(&info->shadow, 0, sizeof(info->shadow));
+       for (i = 0; i < BLK_RING_SIZE; i++)
+               info->shadow[i].req.id = i+1;
+       info->shadow_free = info->ring.req_prod_pvt;
+       info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+       /* Stage 3: Find pending requests and requeue them. */
+       for (i = 0; i < BLK_RING_SIZE; i++) {
+               /* Not in use? */
+               if (copy[i].request == 0)
+                       continue;
+
+               /* Grab a request slot and copy shadow state into it. */
+               req = RING_GET_REQUEST(
+                       &info->ring, info->ring.req_prod_pvt);
+               *req = copy[i].req;
+
+               /* We get a new request id, and must reset the shadow state. */
+               req->id = GET_ID_FROM_FREELIST(info);
+               memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+               /* Rewrite any grant references invalidated by susp/resume. */
+               for (j = 0; j < req->nr_segments; j++)
+                       gnttab_grant_foreign_access_ref(
+                               req->seg[j].gref,
+                               info->xbdev->otherend_id,
+                               pfn_to_mfn(info->shadow[req->id].frame[j]),
+                               rq_data_dir(
+                                       (struct request *)
+                                       info->shadow[req->id].request));
+               info->shadow[req->id].req = *req;
+
+               info->ring.req_prod_pvt++;
+       }
+
+       kfree(copy);
+
+       (void)xenbus_switch_state(info->xbdev, XenbusStateConnected);
+
+       spin_lock_irq(&blkif_io_lock);
+
+       /* Now safe for us to use the shared ring */
+       info->connected = BLKIF_STATE_CONNECTED;
+
+       /* Send off requeued requests */
+       flush_requests(info);
+
+       /* Kick any other new requests queued since we resumed */
+       kick_pending_request_queues(info);
+
+       spin_unlock_irq(&blkif_io_lock);
+}
+
+
+/* ** Driver Registration ** */
+
+
+static struct xenbus_device_id blkfront_ids[] = {
+       { "vbd" },
+       { "" }
+};
+
+
+static struct xenbus_driver blkfront = {
+       .name = "vbd",
+       .owner = THIS_MODULE,
+       .ids = blkfront_ids,
+       .probe = blkfront_probe,
+       .remove = blkfront_remove,
+       .resume = blkfront_resume,
+       .otherend_changed = backend_changed,
+};
+
+
+static int __init xlblk_init(void)
+{
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       return xenbus_register_frontend(&blkfront);
+}
+module_init(xlblk_init);
+
+
+static void xlblk_exit(void)
+{
+       return xenbus_unregister_driver(&blkfront);
+}
+module_exit(xlblk_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/blkfront/block.h b/drivers/xen/blkfront/block.h
new file mode 100644 (file)
index 0000000..5ba3d1e
--- /dev/null
@@ -0,0 +1,156 @@
+/******************************************************************************
+ * block.h
+ * 
+ * Shared definitions between all levels of XenLinux Virtual block devices.
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004-2005, Christian Limpach
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <linux/blkdev.h>
+#include <linux/major.h>
+#include <linux/devfs_fs_kernel.h>
+#include <asm/hypervisor.h>
+#include <xen/xenbus.h>
+#include <xen/gnttab.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/io/blkif.h>
+#include <xen/interface/io/ring.h>
+#include <asm/io.h>
+#include <asm/atomic.h>
+#include <asm/uaccess.h>
+
+#if 1
+#define IPRINTK(fmt, args...)                          \
+       printk(KERN_INFO "xen_blk: " fmt, ##args)
+#else
+#define IPRINTK(fmt, args...) ((void)0)
+#endif
+
+#if 1
+#define WPRINTK(fmt, args...)                          \
+       printk(KERN_WARNING "xen_blk: " fmt, ##args)
+#else
+#define WPRINTK(fmt, args...) ((void)0)
+#endif
+
+#define DPRINTK(_f, _a...) pr_debug(_f, ## _a)
+
+#if 0
+#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a)
+#else
+#define DPRINTK_IOCTL(_f, _a...) ((void)0)
+#endif
+
+struct xlbd_type_info
+{
+       int partn_shift;
+       int disks_per_major;
+       char *devname;
+       char *diskname;
+};
+
+struct xlbd_major_info
+{
+       int major;
+       int index;
+       int usage;
+       struct xlbd_type_info *type;
+};
+
+struct blk_shadow {
+       blkif_request_t req;
+       unsigned long request;
+       unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+       struct xenbus_device *xbdev;
+       dev_t dev;
+       struct gendisk *gd;
+       int vdevice;
+       blkif_vdev_t handle;
+       int connected;
+       int ring_ref;
+       blkif_front_ring_t ring;
+       unsigned int evtchn, irq;
+       struct xlbd_major_info *mi;
+       request_queue_t *rq;
+       struct work_struct work;
+       struct gnttab_free_callback callback;
+       struct blk_shadow shadow[BLK_RING_SIZE];
+       unsigned long shadow_free;
+
+       /**
+        * The number of people holding this device open.  We won't allow a
+        * hot-unplug unless this is 0.
+        */
+       int users;
+};
+
+extern spinlock_t blkif_io_lock;
+
+extern int blkif_open(struct inode *inode, struct file *filep);
+extern int blkif_release(struct inode *inode, struct file *filep);
+extern int blkif_ioctl(struct inode *inode, struct file *filep,
+                      unsigned command, unsigned long argument);
+extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
+extern int blkif_check(dev_t dev);
+extern int blkif_revalidate(dev_t dev);
+extern void do_blkif_request (request_queue_t *rq);
+
+/* Virtual block-device subsystem. */
+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
+   to call add_disk on info->gd once the disk is properly connected
+   up. */
+int xlvbd_add(blkif_sector_t capacity, int device,
+             u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
diff --git a/drivers/xen/blkfront/vbd.c b/drivers/xen/blkfront/vbd.c
new file mode 100644 (file)
index 0000000..8aa453d
--- /dev/null
@@ -0,0 +1,318 @@
+/******************************************************************************
+ * vbd.c
+ * 
+ * XenLinux virtual block-device driver (xvd).
+ * 
+ * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
+ * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
+ * Copyright (c) 2004-2005, Christian Limpach
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "block.h"
+#include <linux/blkdev.h>
+#include <linux/list.h>
+
+#define BLKIF_MAJOR(dev) ((dev)>>8)
+#define BLKIF_MINOR(dev) ((dev) & 0xff)
+
+/*
+ * For convenience we distinguish between ide, scsi and 'other' (i.e.,
+ * potentially combinations of the two) in the naming scheme and in a few other
+ * places.
+ */
+
+#define NUM_IDE_MAJORS 10
+#define NUM_SCSI_MAJORS 9
+#define NUM_VBD_MAJORS 1
+
+static struct xlbd_type_info xlbd_ide_type = {
+       .partn_shift = 6,
+       .disks_per_major = 2,
+       .devname = "ide",
+       .diskname = "hd",
+};
+
+static struct xlbd_type_info xlbd_scsi_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "sd",
+       .diskname = "sd",
+};
+
+static struct xlbd_type_info xlbd_vbd_type = {
+       .partn_shift = 4,
+       .disks_per_major = 16,
+       .devname = "xvd",
+       .diskname = "xvd",
+};
+
+static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
+                                        NUM_VBD_MAJORS];
+
+#define XLBD_MAJOR_IDE_START   0
+#define XLBD_MAJOR_SCSI_START  (NUM_IDE_MAJORS)
+#define XLBD_MAJOR_VBD_START   (NUM_IDE_MAJORS + NUM_SCSI_MAJORS)
+
+#define XLBD_MAJOR_IDE_RANGE   XLBD_MAJOR_IDE_START ... XLBD_MAJOR_SCSI_START - 1
+#define XLBD_MAJOR_SCSI_RANGE  XLBD_MAJOR_SCSI_START ... XLBD_MAJOR_VBD_START - 1
+#define XLBD_MAJOR_VBD_RANGE   XLBD_MAJOR_VBD_START ... XLBD_MAJOR_VBD_START + NUM_VBD_MAJORS - 1
+
+/* Information about our VBDs. */
+#define MAX_VBDS 64
+static LIST_HEAD(vbds_list);
+
+static struct block_device_operations xlvbd_block_fops =
+{
+       .owner = THIS_MODULE,
+       .open = blkif_open,
+       .release = blkif_release,
+       .ioctl  = blkif_ioctl,
+       .getgeo = blkif_getgeo
+};
+
+DEFINE_SPINLOCK(blkif_io_lock);
+
+static struct xlbd_major_info *
+xlbd_alloc_major_info(int major, int minor, int index)
+{
+       struct xlbd_major_info *ptr;
+
+       ptr = kzalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
+       if (ptr == NULL)
+               return NULL;
+
+       ptr->major = major;
+
+       switch (index) {
+       case XLBD_MAJOR_IDE_RANGE:
+               ptr->type = &xlbd_ide_type;
+               ptr->index = index - XLBD_MAJOR_IDE_START;
+               break;
+       case XLBD_MAJOR_SCSI_RANGE:
+               ptr->type = &xlbd_scsi_type;
+               ptr->index = index - XLBD_MAJOR_SCSI_START;
+               break;
+       case XLBD_MAJOR_VBD_RANGE:
+               ptr->type = &xlbd_vbd_type;
+               ptr->index = index - XLBD_MAJOR_VBD_START;
+               break;
+       }
+
+       printk("Registering block device major %i\n", ptr->major);
+       if (register_blkdev(ptr->major, ptr->type->devname)) {
+               WPRINTK("can't get major %d with name %s\n",
+                       ptr->major, ptr->type->devname);
+               kfree(ptr);
+               return NULL;
+       }
+
+       devfs_mk_dir(ptr->type->devname);
+       major_info[index] = ptr;
+       return ptr;
+}
+
+static struct xlbd_major_info *
+xlbd_get_major_info(int vdevice)
+{
+       struct xlbd_major_info *mi;
+       int major, minor, index;
+
+       major = BLKIF_MAJOR(vdevice);
+       minor = BLKIF_MINOR(vdevice);
+
+       switch (major) {
+       case IDE0_MAJOR: index = 0; break;
+       case IDE1_MAJOR: index = 1; break;
+       case IDE2_MAJOR: index = 2; break;
+       case IDE3_MAJOR: index = 3; break;
+       case IDE4_MAJOR: index = 4; break;
+       case IDE5_MAJOR: index = 5; break;
+       case IDE6_MAJOR: index = 6; break;
+       case IDE7_MAJOR: index = 7; break;
+       case IDE8_MAJOR: index = 8; break;
+       case IDE9_MAJOR: index = 9; break;
+       case SCSI_DISK0_MAJOR: index = 10; break;
+       case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
+               index = 11 + major - SCSI_DISK1_MAJOR;
+               break;
+       case SCSI_CDROM_MAJOR: index = 18; break;
+       default: index = 19; break;
+       }
+
+       mi = ((major_info[index] != NULL) ? major_info[index] :
+             xlbd_alloc_major_info(major, minor, index));
+       if (mi)
+               mi->usage++;
+       return mi;
+}
+
+static void
+xlbd_put_major_info(struct xlbd_major_info *mi)
+{
+       mi->usage--;
+       /* XXX: release major if 0 */
+}
+
+static int
+xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
+{
+       request_queue_t *rq;
+
+       rq = blk_init_queue(do_blkif_request, &blkif_io_lock);
+       if (rq == NULL)
+               return -1;
+
+       elevator_init(rq, "noop");
+
+       /* Hard sector size and max sectors impersonate the equiv. hardware. */
+       blk_queue_hardsect_size(rq, sector_size);
+       blk_queue_max_sectors(rq, 512);
+
+       /* Each segment in a request is up to an aligned page in size. */
+       blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
+       blk_queue_max_segment_size(rq, PAGE_SIZE);
+
+       /* Ensure a merged request will fit in a single I/O ring slot. */
+       blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+       blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+
+       /* Make sure buffer addresses are sector-aligned. */
+       blk_queue_dma_alignment(rq, 511);
+
+       gd->queue = rq;
+
+       return 0;
+}
+
+static int
+xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, int vdevice,
+                   u16 vdisk_info, u16 sector_size,
+                   struct blkfront_info *info)
+{
+       struct gendisk *gd;
+       struct xlbd_major_info *mi;
+       int nr_minors = 1;
+       int err = -ENODEV;
+
+       BUG_ON(info->gd != NULL);
+       BUG_ON(info->mi != NULL);
+       BUG_ON(info->rq != NULL);
+
+       mi = xlbd_get_major_info(vdevice);
+       if (mi == NULL)
+               goto out;
+       info->mi = mi;
+
+       if ((minor & ((1 << mi->type->partn_shift) - 1)) == 0)
+               nr_minors = 1 << mi->type->partn_shift;
+
+       gd = alloc_disk(nr_minors);
+       if (gd == NULL)
+               goto out;
+
+       if (nr_minors > 1)
+               sprintf(gd->disk_name, "%s%c", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift));
+       else
+               sprintf(gd->disk_name, "%s%c%d", mi->type->diskname,
+                       'a' + mi->index * mi->type->disks_per_major +
+                       (minor >> mi->type->partn_shift),
+                       minor & ((1 << mi->type->partn_shift) - 1));
+
+       gd->major = mi->major;
+       gd->first_minor = minor;
+       gd->fops = &xlvbd_block_fops;
+       gd->private_data = info;
+       gd->driverfs_dev = &(info->xbdev->dev);
+       set_capacity(gd, capacity);
+
+       if (xlvbd_init_blk_queue(gd, sector_size)) {
+               del_gendisk(gd);
+               goto out;
+       }
+
+       info->rq = gd->queue;
+
+       if (vdisk_info & VDISK_READONLY)
+               set_disk_ro(gd, 1);
+
+       if (vdisk_info & VDISK_REMOVABLE)
+               gd->flags |= GENHD_FL_REMOVABLE;
+
+       if (vdisk_info & VDISK_CDROM)
+               gd->flags |= GENHD_FL_CD;
+
+       info->gd = gd;
+
+       return 0;
+
+ out:
+       if (mi)
+               xlbd_put_major_info(mi);
+       info->mi = NULL;
+       return err;
+}
+
+int
+xlvbd_add(blkif_sector_t capacity, int vdevice, u16 vdisk_info,
+         u16 sector_size, struct blkfront_info *info)
+{
+       struct block_device *bd;
+       int err = 0;
+
+       info->dev = MKDEV(BLKIF_MAJOR(vdevice), BLKIF_MINOR(vdevice));
+
+       bd = bdget(info->dev);
+       if (bd == NULL)
+               return -ENODEV;
+
+       err = xlvbd_alloc_gendisk(BLKIF_MINOR(vdevice), capacity, vdevice,
+                                 vdisk_info, sector_size, info);
+
+       bdput(bd);
+       return err;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+       if (info->mi == NULL)
+               return;
+
+       BUG_ON(info->gd == NULL);
+       del_gendisk(info->gd);
+       put_disk(info->gd);
+       info->gd = NULL;
+
+       xlbd_put_major_info(info->mi);
+       info->mi = NULL;
+
+       BUG_ON(info->rq == NULL);
+       blk_cleanup_queue(info->rq);
+       info->rq = NULL;
+}
diff --git a/drivers/xen/blktap/Makefile b/drivers/xen/blktap/Makefile
new file mode 100644 (file)
index 0000000..409b078
--- /dev/null
@@ -0,0 +1,3 @@
+LINUXINCLUDE += -I../xen/include/public/io
+obj-y  := xenbus.o interface.o blktap.o 
+
diff --git a/drivers/xen/blktap/blktap.c b/drivers/xen/blktap/blktap.c
new file mode 100644 (file)
index 0000000..8c5cf68
--- /dev/null
@@ -0,0 +1,1450 @@
+/******************************************************************************
+ * drivers/xen/blktap/blktap.c
+ * 
+ * Back-end driver for user level virtual block devices. This portion of the
+ * driver exports a 'unified' block-device interface that can be accessed
+ * by any operating system that implements a compatible front end. Requests
+ * are remapped to a user-space memory region.
+ *
+ * Based on the blkback driver code.
+ * 
+ * Copyright (c) 2004-2005, Andrew Warfield and Julian Chesterfield
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <asm/hypervisor.h>
+#include "common.h"
+#include <xen/balloon.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/errno.h>
+#include <linux/major.h>
+#include <linux/gfp.h>
+#include <linux/poll.h>
+#include <asm/tlbflush.h>
+#include <linux/devfs_fs_kernel.h>
+
+#define MAX_TAP_DEV 100     /*the maximum number of tapdisk ring devices    */
+#define MAX_DEV_NAME 100    /*the max tapdisk ring device name e.g. blktap0 */
+
+/*
+ * The maximum number of requests that can be outstanding at any time
+ * is determined by 
+ *
+ *   [mmap_alloc * MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST] 
+ *
+ * where mmap_alloc < MAX_DYNAMIC_MEM.
+ *
+ * TODO:
+ * mmap_alloc is initialised to 2 and should be adjustable on the fly via
+ * sysfs.
+ */
+#define MAX_DYNAMIC_MEM 64
+#define MAX_PENDING_REQS 64   
+#define MMAP_PAGES (MAX_PENDING_REQS * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define MMAP_VADDR(_start, _req,_seg)                                   \
+        (_start +                                                       \
+         ((_req) * BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE) +        \
+         ((_seg) * PAGE_SIZE))
+static int blkif_reqs = MAX_PENDING_REQS;
+static int mmap_pages = MMAP_PAGES;
+
+#define RING_PAGES 1 /* BLKTAP - immediately before the mmap area, we
+                     * have a bunch of pages reserved for shared
+                     * memory rings.
+                     */
+
+/*Data struct associated with each of the tapdisk devices*/
+typedef struct tap_blkif {
+       struct vm_area_struct *vma;   /*Shared memory area                   */
+       unsigned long rings_vstart;   /*Kernel memory mapping                */
+       unsigned long user_vstart;    /*User memory mapping                  */
+       unsigned long dev_inuse;      /*One process opens device at a time.  */
+       unsigned long dev_pending;    /*In process of being opened           */
+       unsigned long ring_ok;        /*make this ring->state                */
+       blkif_front_ring_t ufe_ring;  /*Rings up to user space.              */
+       wait_queue_head_t wait;       /*for poll                             */
+       unsigned long mode;           /*current switching mode               */
+       int minor;                    /*Minor number for tapdisk device      */
+       pid_t pid;                    /*tapdisk process id                   */
+       enum { RUNNING, CLEANSHUTDOWN } status; /*Detect a clean userspace 
+                                                 shutdown                   */
+       unsigned long *idx_map;       /*Record the user ring id to kern 
+                                       [req id, idx] tuple                  */
+       blkif_t *blkif;               /*Associate blkif with tapdev          */
+} tap_blkif_t;
+
+/*Private data struct associated with the inode*/
+typedef struct private_info {
+       int idx;
+} private_info_t;
+
+/*Data struct handed back to userspace for tapdisk device to VBD mapping*/
+typedef struct domid_translate {
+       unsigned short domid;
+       unsigned short busid;
+} domid_translate_t ;
+
+
+domid_translate_t  translate_domid[MAX_TAP_DEV];
+tap_blkif_t *tapfds[MAX_TAP_DEV];
+
+static int __init set_blkif_reqs(char *str)
+{
+       get_option(&str, &blkif_reqs);
+       return 1;
+}
+__setup("blkif_reqs=", set_blkif_reqs);
+
+/* Run-time switchable: /sys/module/blktap/parameters/ */
+static unsigned int log_stats = 0;
+static unsigned int debug_lvl = 0;
+module_param(log_stats, int, 0644);
+module_param(debug_lvl, int, 0644);
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct {
+       blkif_t       *blkif;
+       unsigned long  id;
+       unsigned short mem_idx;
+       int            nr_pages;
+       atomic_t       pendcnt;
+       unsigned short operation;
+       int            status;
+       struct list_head free_list;
+       int            inuse;
+} pending_req_t;
+
+static pending_req_t *pending_reqs[MAX_PENDING_REQS];
+static struct list_head pending_free;
+static DEFINE_SPINLOCK(pending_free_lock);
+static DECLARE_WAIT_QUEUE_HEAD (pending_free_wq);
+static int alloc_pending_reqs;
+
+typedef unsigned int PEND_RING_IDX;
+
+static inline int MASK_PEND_IDX(int i) { 
+       return (i & (MAX_PENDING_REQS-1)); 
+}
+
+static inline unsigned int RTN_PEND_IDX(pending_req_t *req, int idx) {
+       return (req - pending_reqs[idx]);
+}
+
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+typedef struct mmap_page {
+       unsigned long start;
+       struct page *mpage;
+} mmap_page_t;
+
+static mmap_page_t mmap_start[MAX_DYNAMIC_MEM];
+static unsigned short mmap_alloc = 0;
+static unsigned short mmap_lock = 0;
+static unsigned short mmap_inuse = 0;
+static unsigned long *pending_addrs[MAX_DYNAMIC_MEM];
+
+/******************************************************************
+ * GRANT HANDLES
+ */
+
+/* When using grant tables to map a frame for device access then the
+ * handle returned must be used to unmap the frame. This is needed to
+ * drop the ref count on the frame.
+ */
+struct grant_handle_pair
+{
+        grant_handle_t kernel;
+        grant_handle_t user;
+};
+
+static struct grant_handle_pair 
+    pending_grant_handles[MAX_DYNAMIC_MEM][MMAP_PAGES];
+#define pending_handle(_id, _idx, _i) \
+    (pending_grant_handles[_id][((_idx) * BLKIF_MAX_SEGMENTS_PER_REQUEST) \
+    + (_i)])
+
+
+static int blktap_read_ufe_ring(int idx); /*local prototypes*/
+
+#define BLKTAP_MINOR 0  /*/dev/xen/blktap resides at device number
+                         major=254, minor numbers begin at 0            */ 
+#define BLKTAP_DEV_MAJOR 254         /* TODO: Make major number dynamic  *
+                                      * and create devices in the kernel *
+                                     */
+#define BLKTAP_DEV_DIR  "/dev/xen"
+
+/* blktap IOCTLs: */
+#define BLKTAP_IOCTL_KICK_FE         1
+#define BLKTAP_IOCTL_KICK_BE         2 /* currently unused */
+#define BLKTAP_IOCTL_SETMODE         3
+#define BLKTAP_IOCTL_SENDPID        4
+#define BLKTAP_IOCTL_NEWINTF        5
+#define BLKTAP_IOCTL_MINOR          6
+#define BLKTAP_IOCTL_MAJOR          7
+#define BLKTAP_QUERY_ALLOC_REQS      8
+#define BLKTAP_IOCTL_FREEINTF        9
+#define BLKTAP_IOCTL_PRINT_IDXS      100  
+
+/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE)             */
+#define BLKTAP_MODE_PASSTHROUGH      0x00000000  /* default            */
+#define BLKTAP_MODE_INTERCEPT_FE     0x00000001
+#define BLKTAP_MODE_INTERCEPT_BE     0x00000002  /* unimp.             */
+
+#define BLKTAP_MODE_INTERPOSE \
+           (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE)
+
+
+static inline int BLKTAP_MODE_VALID(unsigned long arg)
+{
+       return ((arg == BLKTAP_MODE_PASSTHROUGH ) ||
+               (arg == BLKTAP_MODE_INTERCEPT_FE) ||
+                (arg == BLKTAP_MODE_INTERPOSE   ));
+}
+
+/* Requests passing through the tap to userspace are re-assigned an ID.
+ * We must record a mapping between the BE [IDX,ID] tuple and the userspace
+ * ring ID. 
+ */
+
+static inline unsigned long MAKE_ID(domid_t fe_dom, PEND_RING_IDX idx)
+{
+        return ((fe_dom << 16) | MASK_PEND_IDX(idx));
+}
+
+extern inline PEND_RING_IDX ID_TO_IDX(unsigned long id)
+{
+        return (PEND_RING_IDX)(id & 0x0000ffff);
+}
+
+extern inline int ID_TO_MIDX(unsigned long id)
+{
+        return (int)(id >> 16);
+}
+
+#define INVALID_REQ 0xdead0000
+
+/*TODO: Convert to a free list*/
+static inline int GET_NEXT_REQ(unsigned long *idx_map)
+{
+       int i;
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               if (idx_map[i] == INVALID_REQ) return i;
+
+       return INVALID_REQ;
+}
+
+
+#define BLKTAP_INVALID_HANDLE(_g) \
+    (((_g->kernel) == 0xFFFF) && ((_g->user) == 0xFFFF))
+
+#define BLKTAP_INVALIDATE_HANDLE(_g) do {       \
+    (_g)->kernel = 0xFFFF; (_g)->user = 0xFFFF; \
+    } while(0)
+
+
+/******************************************************************
+ * BLKTAP VM OPS
+ */
+
+static struct page *blktap_nopage(struct vm_area_struct *vma,
+                                 unsigned long address,
+                                 int *type)
+{
+       /*
+        * if the page has not been mapped in by the driver then return
+        * NOPAGE_SIGBUS to the domain.
+        */
+
+       return NOPAGE_SIGBUS;
+}
+
+struct vm_operations_struct blktap_vm_ops = {
+       nopage:   blktap_nopage,
+};
+
+/******************************************************************
+ * BLKTAP FILE OPS
+ */
+/*Function Declarations*/
+static int get_next_free_dev(void);
+static int blktap_open(struct inode *inode, struct file *filp);
+static int blktap_release(struct inode *inode, struct file *filp);
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg);
+static unsigned int blktap_poll(struct file *file, poll_table *wait);
+
+struct miscdevice *set_misc(int minor, char *name, int dev);
+
+static struct file_operations blktap_fops = {
+       .owner   = THIS_MODULE,
+       .poll    = blktap_poll,
+       .ioctl   = blktap_ioctl,
+       .open    = blktap_open,
+       .release = blktap_release,
+       .mmap    = blktap_mmap,
+};
+
+
+static int get_next_free_dev(void)
+{
+       tap_blkif_t *info;
+       int i = 0, ret = -1;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+       
+       while (i < MAX_TAP_DEV) {
+               info = tapfds[i];
+               if ( (tapfds[i] != NULL) && (info->dev_inuse == 0)
+                       && (info->dev_pending == 0) ) {
+                       info->dev_pending = 1;
+                       ret = i;
+                       goto done;
+               }
+               i++;
+       }
+       
+done:
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       return ret;
+}
+
+int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif) 
+{
+       int i;
+               
+       for (i = 0; i < MAX_TAP_DEV; i++)
+               if ( (translate_domid[i].domid == domid)
+                   && (translate_domid[i].busid == xenbus_id) ) {
+                       tapfds[i]->blkif = blkif;
+                       tapfds[i]->status = RUNNING;
+                       return i;
+               }
+       return -1;
+}
+
+void signal_tapdisk(int idx) 
+{
+       tap_blkif_t *info;
+       struct task_struct *ptask;
+
+       info = tapfds[idx];
+       if ( (idx > 0) && (idx < MAX_TAP_DEV) && (info->pid > 0) ) {
+               ptask = find_task_by_pid(info->pid);
+               if (ptask) { 
+                       info->status = CLEANSHUTDOWN;
+               }
+       }
+       info->blkif = NULL;
+       return;
+}
+
+static int blktap_open(struct inode *inode, struct file *filp)
+{
+       blkif_sring_t *sring;
+       int idx = iminor(inode) - BLKTAP_MINOR;
+       tap_blkif_t *info;
+       private_info_t *prv;
+       int i;
+       
+       if (tapfds[idx] == NULL) {
+               WPRINTK("Unable to open device /dev/xen/blktap%d\n",
+                      idx);
+               return -ENOMEM;
+       }
+       DPRINTK("Opening device /dev/xen/blktap%d\n",idx);
+       
+       info = tapfds[idx];
+       
+       /*Only one process can access device at a time*/
+       if (test_and_set_bit(0, &info->dev_inuse))
+               return -EBUSY;
+
+       info->dev_pending = 0;
+           
+       /* Allocate the fe ring. */
+       sring = (blkif_sring_t *)get_zeroed_page(GFP_KERNEL);
+       if (sring == NULL)
+               goto fail_nomem;
+
+       SetPageReserved(virt_to_page(sring));
+    
+       SHARED_RING_INIT(sring);
+       FRONT_RING_INIT(&info->ufe_ring, sring, PAGE_SIZE);
+       
+       prv = kzalloc(sizeof(private_info_t),GFP_KERNEL);
+       prv->idx = idx;
+       filp->private_data = prv;
+       info->vma = NULL;
+
+       info->idx_map = kmalloc(sizeof(unsigned long) * MAX_PENDING_REQS, 
+                               GFP_KERNEL);
+       
+       if (idx > 0) {
+               init_waitqueue_head(&info->wait);
+               for (i = 0; i < MAX_PENDING_REQS; i++) 
+                       info->idx_map[i] = INVALID_REQ;
+       }
+
+       DPRINTK("Tap open: device /dev/xen/blktap%d\n",idx);
+       return 0;
+
+ fail_nomem:
+       return -ENOMEM;
+}
+
+static int blktap_release(struct inode *inode, struct file *filp)
+{
+       int idx = iminor(inode) - BLKTAP_MINOR;
+       tap_blkif_t *info;
+       
+       if (tapfds[idx] == NULL) {
+               WPRINTK("Trying to free device that doesn't exist "
+                      "[/dev/xen/blktap%d]\n",idx);
+               return -1;
+       }
+       info = tapfds[idx];
+       info->dev_inuse = 0;
+       DPRINTK("Freeing device [/dev/xen/blktap%d]\n",idx);
+
+       /* Free the ring page. */
+       ClearPageReserved(virt_to_page(info->ufe_ring.sring));
+       free_page((unsigned long) info->ufe_ring.sring);
+
+       /* Clear any active mappings and free foreign map table */
+       if (info->vma) {
+               zap_page_range(
+                       info->vma, info->vma->vm_start, 
+                       info->vma->vm_end - info->vma->vm_start, NULL);
+               info->vma = NULL;
+       }
+       
+       if (filp->private_data) kfree(filp->private_data);
+
+       if ( (info->status != CLEANSHUTDOWN) && (info->blkif != NULL) ) {
+               kthread_stop(info->blkif->xenblkd);
+               info->blkif->xenblkd = NULL;
+               info->status = CLEANSHUTDOWN;
+       }       
+       return 0;
+}
+
+
+/* Note on mmap:
+ * We need to map pages to user space in a way that will allow the block
+ * subsystem set up direct IO to them.  This couldn't be done before, because
+ * there isn't really a sane way to translate a user virtual address down to a 
+ * physical address when the page belongs to another domain.
+ *
+ * My first approach was to map the page in to kernel memory, add an entry
+ * for it in the physical frame list (using alloc_lomem_region as in blkback)
+ * and then attempt to map that page up to user space.  This is disallowed
+ * by xen though, which realizes that we don't really own the machine frame
+ * underlying the physical page.
+ *
+ * The new approach is to provide explicit support for this in xen linux.
+ * The VMA now has a flag, VM_FOREIGN, to indicate that it contains pages
+ * mapped from other vms.  vma->vm_private_data is set up as a mapping 
+ * from pages to actual page structs.  There is a new clause in get_user_pages
+ * that does the right thing for this sort of mapping.
+ */
+static int blktap_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+       int size;
+       struct page **map;
+       int i;
+       private_info_t *prv;
+       tap_blkif_t *info;
+
+       /*Retrieve the dev info*/
+       prv = (private_info_t *)filp->private_data;
+       if (prv == NULL) {
+               WPRINTK("blktap: mmap, retrieving idx failed\n");
+               return -ENOMEM;
+       }
+       info = tapfds[prv->idx];
+       
+       vma->vm_flags |= VM_RESERVED;
+       vma->vm_ops = &blktap_vm_ops;
+
+       size = vma->vm_end - vma->vm_start;
+       if (size != ((mmap_pages + RING_PAGES) << PAGE_SHIFT)) {
+               WPRINTK("you _must_ map exactly %d pages!\n",
+                      mmap_pages + RING_PAGES);
+               return -EAGAIN;
+       }
+
+       size >>= PAGE_SHIFT;
+       info->rings_vstart = vma->vm_start;
+       info->user_vstart  = info->rings_vstart + (RING_PAGES << PAGE_SHIFT);
+    
+       /* Map the ring pages to the start of the region and reserve it. */
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (remap_pfn_range(vma, vma->vm_start, 
+                           __pa(info->ufe_ring.sring) >> PAGE_SHIFT, 
+                           PAGE_SIZE, vma->vm_page_prot)) {
+               WPRINTK("Mapping user ring failed!\n");
+               goto fail;
+       }
+
+       /* Mark this VM as containing foreign pages, and set up mappings. */
+       map = kzalloc(((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)
+                     * sizeof(struct page_struct*),
+                     GFP_KERNEL);
+       if (map == NULL) {
+               WPRINTK("Couldn't alloc VM_FOREIGN map.\n");
+               goto fail;
+       }
+
+       for (i = 0; i < ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); i++)
+               map[i] = NULL;
+    
+       vma->vm_private_data = map;
+       vma->vm_flags |= VM_FOREIGN;
+
+       info->vma = vma;
+       info->ring_ok = 1;
+       return 0;
+ fail:
+       /* Clear any active mappings. */
+       zap_page_range(vma, vma->vm_start, 
+                      vma->vm_end - vma->vm_start, NULL);
+
+       return -ENOMEM;
+}
+
+
+static int blktap_ioctl(struct inode *inode, struct file *filp,
+                        unsigned int cmd, unsigned long arg)
+{
+       int idx = iminor(inode) - BLKTAP_MINOR;
+       switch(cmd) {
+       case BLKTAP_IOCTL_KICK_FE: 
+       {
+               /* There are fe messages to process. */
+               return blktap_read_ufe_ring(idx);
+       }
+       case BLKTAP_IOCTL_SETMODE:
+       {
+               tap_blkif_t *info = tapfds[idx];
+               
+               if ( (idx > 0) && (idx < MAX_TAP_DEV) 
+                    && (tapfds[idx] != NULL) ) 
+               {
+                       if (BLKTAP_MODE_VALID(arg)) {
+                               info->mode = arg;
+                               /* XXX: may need to flush rings here. */
+                               DPRINTK("blktap: set mode to %lx\n", 
+                                      arg);
+                               return 0;
+                       }
+               }
+               return 0;
+       }
+       case BLKTAP_IOCTL_PRINT_IDXS:
+        {
+               tap_blkif_t *info = tapfds[idx];
+               
+               if ( (idx > 0) && (idx < MAX_TAP_DEV) 
+                    && (tapfds[idx] != NULL) ) 
+               {
+                       printk("User Rings: \n-----------\n");
+                       printk("UF: rsp_cons: %2d, req_prod_prv: %2d "
+                               "| req_prod: %2d, rsp_prod: %2d\n",
+                               info->ufe_ring.rsp_cons,
+                               info->ufe_ring.req_prod_pvt,
+                               info->ufe_ring.sring->req_prod,
+                               info->ufe_ring.sring->rsp_prod);
+               }
+               return 0;
+        }
+       case BLKTAP_IOCTL_SENDPID:
+       {
+               tap_blkif_t *info = tapfds[idx];
+               
+               if ( (idx > 0) && (idx < MAX_TAP_DEV) 
+                    && (tapfds[idx] != NULL) ) 
+               {
+                       info->pid = (pid_t)arg;
+                       DPRINTK("blktap: pid received %d\n", 
+                              info->pid);
+               }
+               return 0;
+       }
+       case BLKTAP_IOCTL_NEWINTF:
+       {               
+               uint64_t val = (uint64_t)arg;
+               domid_translate_t *tr = (domid_translate_t *)&val;
+               int newdev;
+
+               DPRINTK("NEWINTF Req for domid %d and bus id %d\n", 
+                      tr->domid, tr->busid);
+               newdev = get_next_free_dev();
+               if (newdev < 1) {
+                       WPRINTK("Error initialising /dev/xen/blktap - "
+                               "No more devices\n");
+                       return -1;
+               }
+               translate_domid[newdev].domid = tr->domid;
+               translate_domid[newdev].busid = tr->busid;
+               return newdev;
+       }
+       case BLKTAP_IOCTL_FREEINTF:
+       {
+               unsigned long dev = arg;
+               tap_blkif_t *info = NULL;
+
+               if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
+
+               if ( (info != NULL) && (info->dev_pending) )
+                       info->dev_pending = 0;
+               return 0;
+       }
+       case BLKTAP_IOCTL_MINOR:
+       {
+               unsigned long dev = arg;
+               tap_blkif_t *info = NULL;
+               
+               if ( (dev > 0) && (dev < MAX_TAP_DEV) ) info = tapfds[dev];
+               
+               if (info != NULL) return info->minor;
+               else return -1;
+       }
+       case BLKTAP_IOCTL_MAJOR:
+               return BLKTAP_DEV_MAJOR;
+
+       case BLKTAP_QUERY_ALLOC_REQS:
+       {
+               WPRINTK("BLKTAP_QUERY_ALLOC_REQS ioctl: %d/%d\n",
+                      alloc_pending_reqs, blkif_reqs);
+               return (alloc_pending_reqs/blkif_reqs) * 100;
+       }
+       }
+       return -ENOIOCTLCMD;
+}
+
+static unsigned int blktap_poll(struct file *file, poll_table *wait)
+{
+       private_info_t *prv;
+       tap_blkif_t *info;
+       
+       /*Retrieve the dev info*/
+       prv = (private_info_t *)file->private_data;
+       if (prv == NULL) {
+               WPRINTK(" poll, retrieving idx failed\n");
+               return 0;
+       }
+       
+       if (prv->idx == 0) return 0;
+       
+       info = tapfds[prv->idx];
+       
+       poll_wait(file, &info->wait, wait);
+       if (info->ufe_ring.req_prod_pvt != info->ufe_ring.sring->req_prod) {
+               flush_tlb_all();
+               RING_PUSH_REQUESTS(&info->ufe_ring);
+               return POLLIN | POLLRDNORM;
+       }
+       return 0;
+}
+
+void blktap_kick_user(int idx)
+{
+       tap_blkif_t *info;
+
+       if (idx == 0) return;
+       
+       info = tapfds[idx];
+       
+       if (info != NULL) wake_up_interruptible(&info->wait);
+       return;
+}
+
+static int do_block_io_op(blkif_t *blkif);
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req);
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st);
+
+/******************************************************************
+ * misc small helpers
+ */
+static int req_increase(void)
+{
+       int i, j;
+       struct page *page;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+
+       ret = -EINVAL;
+       if (mmap_alloc >= MAX_PENDING_REQS || mmap_lock) 
+               goto done;
+
+#ifdef __ia64__
+       extern unsigned long alloc_empty_foreign_map_page_range(
+               unsigned long pages);
+       mmap_start[mmap_alloc].start = (unsigned long)
+               alloc_empty_foreign_map_page_range(mmap_pages);
+#else /* ! ia64 */
+       page = balloon_alloc_empty_page_range(mmap_pages);
+       ret = -ENOMEM;
+       if (page == NULL) {
+               printk("%s balloon_alloc_empty_page_range gave NULL\n", __FUNCTION__);
+               goto done;
+       }
+
+       /* Pin all of the pages. */
+       for (i=0; i<mmap_pages; i++)
+               get_page(&page[i]);
+
+       mmap_start[mmap_alloc].start = 
+               (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+       mmap_start[mmap_alloc].mpage = page;
+
+#endif
+
+       pending_reqs[mmap_alloc]  = kzalloc(sizeof(pending_req_t) *
+                                       blkif_reqs, GFP_KERNEL);
+       pending_addrs[mmap_alloc] = kzalloc(sizeof(unsigned long) *
+                                       mmap_pages, GFP_KERNEL);
+
+       ret = -ENOMEM;
+       if (!pending_reqs[mmap_alloc] || !pending_addrs[mmap_alloc]) {
+               kfree(pending_reqs[mmap_alloc]);
+               kfree(pending_addrs[mmap_alloc]);
+               WPRINTK("%s: out of memory\n", __FUNCTION__); 
+               ret = -ENOMEM;
+               goto done;
+       }
+
+       ret = 0;
+
+       DPRINTK("%s: reqs=%d, pages=%d, mmap_vstart=0x%lx\n",
+               __FUNCTION__, blkif_reqs, mmap_pages, 
+              mmap_start[mmap_alloc].start);
+
+       BUG_ON(mmap_start[mmap_alloc].start == 0);
+
+       for (i = 0; i < mmap_pages; i++) 
+               pending_addrs[mmap_alloc][i] = 
+                       mmap_start[mmap_alloc].start + (i << PAGE_SHIFT);
+
+       for (i = 0; i < MAX_PENDING_REQS ; i++) {
+               list_add_tail(&pending_reqs[mmap_alloc][i].free_list, 
+                             &pending_free);
+               pending_reqs[mmap_alloc][i].mem_idx = mmap_alloc;
+               for (j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++)
+                       BLKTAP_INVALIDATE_HANDLE(&pending_handle(mmap_alloc, 
+                                                                i, j));
+       }
+
+       mmap_alloc++;
+       DPRINTK("# MMAPs increased to %d\n",mmap_alloc);
+ done:
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       return ret;
+}
+
+static void mmap_req_del(int mmap)
+{
+       int i;
+       struct page *page;
+
+       /*Spinlock already acquired*/
+       kfree(pending_reqs[mmap]);
+       kfree(pending_addrs[mmap]);
+
+#ifdef __ia64__
+       /*Not sure what goes here yet!*/
+#else
+
+       /* Unpin all of the pages. */
+       page = mmap_start[mmap].mpage;
+       for (i=0; i<mmap_pages; i++)
+               put_page(&page[i]);
+
+       balloon_dealloc_empty_page_range(mmap_start[mmap].mpage, mmap_pages);
+#endif
+
+       mmap_lock = 0;
+       DPRINTK("# MMAPs decreased to %d\n",mmap_alloc);
+       mmap_alloc--;
+}
+
+/*N.B. Currently unused - will be accessed via sysfs*/
+static void req_decrease(void)
+{
+       pending_req_t *req;
+       int i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+
+       DPRINTK("Req decrease called.\n");
+       if (mmap_lock || mmap_alloc == 1) 
+               goto done;
+
+       mmap_lock = 1;
+       mmap_inuse = MAX_PENDING_REQS;
+       
+        /*Go through reqs and remove any that aren't in use*/
+       for (i = 0; i < MAX_PENDING_REQS ; i++) {
+               req = &pending_reqs[mmap_alloc-1][i];
+               if (req->inuse == 0) {
+                       list_del(&req->free_list);
+                       mmap_inuse--;
+               }
+       }
+       if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
+ done:
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+       return;
+}
+
+static pending_req_t* alloc_req(void)
+{
+       pending_req_t *req = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+
+       if (!list_empty(&pending_free)) {
+               req = list_entry(pending_free.next, pending_req_t, free_list);
+               list_del(&req->free_list);
+       }
+
+       if (req) {
+               req->inuse = 1;
+               alloc_pending_reqs++;
+       }
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+
+       return req;
+}
+
+static void free_req(pending_req_t *req)
+{
+       unsigned long flags;
+       int was_empty;
+
+       spin_lock_irqsave(&pending_free_lock, flags);
+
+       alloc_pending_reqs--;
+       req->inuse = 0;
+       if (mmap_lock && (req->mem_idx == mmap_alloc-1)) {
+               mmap_inuse--;
+               if (mmap_inuse == 0) mmap_req_del(mmap_alloc-1);
+               spin_unlock_irqrestore(&pending_free_lock, flags);
+               return;
+       }
+       was_empty = list_empty(&pending_free);
+       list_add(&req->free_list, &pending_free);
+
+       spin_unlock_irqrestore(&pending_free_lock, flags);
+
+       if (was_empty)
+               wake_up(&pending_free_wq);
+}
+
+static void fast_flush_area(pending_req_t *req, int k_idx, int u_idx, int 
+                           tapidx)
+{
+       struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       unsigned int i, invcount = 0;
+       struct grant_handle_pair *khandle;
+       uint64_t ptep;
+       int ret, mmap_idx;
+       unsigned long kvaddr, uvaddr;
+
+       tap_blkif_t *info = tapfds[tapidx];
+       
+       if (info == NULL) {
+               WPRINTK("fast_flush: Couldn't get info!\n");
+               return;
+       }
+       mmap_idx = req->mem_idx;
+
+       for (i = 0; i < req->nr_pages; i++) {
+               kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i);
+               uvaddr = MMAP_VADDR(info->user_vstart, u_idx, i);
+
+               khandle = &pending_handle(mmap_idx, k_idx, i);
+               if (BLKTAP_INVALID_HANDLE(khandle)) {
+                       WPRINTK("BLKTAP_INVALID_HANDLE\n");
+                       continue;
+               }
+               gnttab_set_unmap_op(&unmap[invcount], 
+                       MMAP_VADDR(mmap_start[mmap_idx].start, k_idx, i), 
+                                   GNTMAP_host_map, khandle->kernel);
+               invcount++;
+
+               if (create_lookup_pte_addr(
+                   info->vma->vm_mm,
+                   MMAP_VADDR(info->user_vstart, u_idx, i), 
+                   &ptep) !=0) {
+                       WPRINTK("Couldn't get a pte addr!\n");
+                       return;
+               }
+
+               gnttab_set_unmap_op(&unmap[invcount], 
+                       ptep, GNTMAP_host_map,
+                       khandle->user);
+               invcount++;
+            
+               BLKTAP_INVALIDATE_HANDLE(khandle);
+       }
+       ret = HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, unmap, invcount);
+       BUG_ON(ret);
+       
+       if (info->vma != NULL)
+               zap_page_range(info->vma, 
+                              MMAP_VADDR(info->user_vstart, u_idx, 0), 
+                              req->nr_pages << PAGE_SHIFT, NULL);
+}
+
+/******************************************************************
+ * SCHEDULER FUNCTIONS
+ */
+
+static void print_stats(blkif_t *blkif)
+{
+       printk(KERN_DEBUG "%s: oo %3d  |  rd %4d  |  wr %4d\n",
+              current->comm, blkif->st_oo_req,
+              blkif->st_rd_req, blkif->st_wr_req);
+       blkif->st_print = jiffies + msecs_to_jiffies(10 * 1000);
+       blkif->st_rd_req = 0;
+       blkif->st_wr_req = 0;
+       blkif->st_oo_req = 0;
+}
+
+int tap_blkif_schedule(void *arg)
+{
+       blkif_t *blkif = arg;
+
+       blkif_get(blkif);
+
+       if (debug_lvl)
+               printk(KERN_DEBUG "%s: started\n", current->comm);
+
+       while (!kthread_should_stop()) {
+               wait_event_interruptible(
+                       blkif->wq,
+                       blkif->waiting_reqs || kthread_should_stop());
+               wait_event_interruptible(
+                       pending_free_wq,
+                       !list_empty(&pending_free) || kthread_should_stop());
+
+               blkif->waiting_reqs = 0;
+               smp_mb(); /* clear flag *before* checking for work */
+
+               if (do_block_io_op(blkif))
+                       blkif->waiting_reqs = 1;
+
+               if (log_stats && time_after(jiffies, blkif->st_print))
+                       print_stats(blkif);
+       }
+
+       if (log_stats)
+               print_stats(blkif);
+       if (debug_lvl)
+               printk(KERN_DEBUG "%s: exiting\n", current->comm);
+
+       blkif->xenblkd = NULL;
+       blkif_put(blkif);
+
+       return 0;
+}
+
+/******************************************************************
+ * COMPLETION CALLBACK -- Called by user level ioctl()
+ */
+
+static int blktap_read_ufe_ring(int idx)
+{
+       /* This is called to read responses from the UFE ring. */
+       RING_IDX i, j, rp;
+       blkif_response_t *resp;
+       blkif_t *blkif=NULL;
+       int pending_idx, usr_idx, mmap_idx;
+       pending_req_t *pending_req;
+       tap_blkif_t *info;
+       
+       info = tapfds[idx];
+       if (info == NULL) {
+               return 0;
+       }
+
+       /* We currently only forward packets in INTERCEPT_FE mode. */
+       if (!(info->mode & BLKTAP_MODE_INTERCEPT_FE))
+               return 0;
+
+       /* for each outstanding message on the UFEring  */
+       rp = info->ufe_ring.sring->rsp_prod;
+       rmb();
+        
+       for (i = info->ufe_ring.rsp_cons; i != rp; i++) {
+               resp = RING_GET_RESPONSE(&info->ufe_ring, i);
+               ++info->ufe_ring.rsp_cons;
+
+               /*retrieve [usr_idx] to [mmap_idx,pending_idx] mapping*/
+               usr_idx = (int)resp->id;
+               pending_idx = MASK_PEND_IDX(ID_TO_IDX(info->idx_map[usr_idx]));
+               mmap_idx = ID_TO_MIDX(info->idx_map[usr_idx]);
+
+               if ( (mmap_idx >= mmap_alloc) || 
+                  (ID_TO_IDX(info->idx_map[usr_idx]) >= MAX_PENDING_REQS) )
+                       WPRINTK("Incorrect req map"
+                              "[%d], internal map [%d,%d (%d)]\n", 
+                              usr_idx, mmap_idx, 
+                              ID_TO_IDX(info->idx_map[usr_idx]),
+                              MASK_PEND_IDX(
+                                      ID_TO_IDX(info->idx_map[usr_idx])));
+
+               pending_req = &pending_reqs[mmap_idx][pending_idx];
+               blkif = pending_req->blkif;
+
+               for (j = 0; j < pending_req->nr_pages; j++) {
+
+                       unsigned long kvaddr, uvaddr;
+                       struct page **map = info->vma->vm_private_data;
+                       struct page *pg;
+                       int offset; 
+
+                       uvaddr  = MMAP_VADDR(info->user_vstart, usr_idx, j);
+                       kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
+                                           pending_idx, j);
+
+                       pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+                       ClearPageReserved(pg);
+                       offset = (uvaddr - info->vma->vm_start) 
+                               >> PAGE_SHIFT;
+                       map[offset] = NULL;
+               }
+               fast_flush_area(pending_req, pending_idx, usr_idx, idx); 
+               make_response(blkif, pending_req->id, resp->operation,
+                             resp->status);
+               info->idx_map[usr_idx] = INVALID_REQ;
+               blkif_put(pending_req->blkif);
+               free_req(pending_req);
+       }
+               
+       return 0;
+}
+
+
+/******************************************************************************
+ * NOTIFICATION FROM GUEST OS.
+ */
+
+static void blkif_notify_work(blkif_t *blkif)
+{
+       blkif->waiting_reqs = 1;
+       wake_up(&blkif->wq);
+}
+
+irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+       blkif_notify_work(dev_id);
+       return IRQ_HANDLED;
+}
+
+
+
+/******************************************************************
+ * DOWNWARD CALLS -- These interface with the block-device layer proper.
+ */
+static int print_dbug = 1;
+static int do_block_io_op(blkif_t *blkif)
+{
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       blkif_request_t *req;
+       pending_req_t *pending_req;
+       RING_IDX rc, rp;
+       int more_to_do = 0;
+       tap_blkif_t *info;
+
+       rc = blk_ring->req_cons;
+       rp = blk_ring->sring->req_prod;
+       rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+       /*Check blkif has corresponding UE ring*/
+       if (blkif->dev_num == -1) {
+               /*oops*/
+               if (print_dbug) {
+                       WPRINTK("Corresponding UE " 
+                              "ring does not exist!\n");
+                       print_dbug = 0; /*We only print this message once*/
+               }
+               return 1; 
+       }
+
+       info = tapfds[blkif->dev_num];
+       if (info == NULL || !info->dev_inuse) {
+               if (print_dbug) {
+                       WPRINTK("Can't get UE info!\n");
+                       print_dbug = 0;
+               }
+               return 1;
+       }
+
+       while (rc != rp) {
+               
+               if (RING_FULL(&info->ufe_ring)) {
+                       WPRINTK("RING_FULL! More to do\n");
+                       more_to_do = 1;
+                       break;
+               }
+               
+               if (RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
+                       WPRINTK("RING_REQUEST_CONS_OVERFLOW!"
+                              " More to do\n");
+                       more_to_do = 1;
+                       break;          
+               }
+
+               pending_req = alloc_req();
+               if (NULL == pending_req) {
+                       blkif->st_oo_req++;
+                       more_to_do = 1;
+                       break;
+               }
+
+               req = RING_GET_REQUEST(blk_ring, rc);
+               blk_ring->req_cons = ++rc; /* before make_response() */ 
+
+               switch (req->operation) {
+               case BLKIF_OP_READ:
+                       blkif->st_rd_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
+                       break;
+
+               case BLKIF_OP_WRITE:
+                       blkif->st_wr_req++;
+                       dispatch_rw_block_io(blkif, req, pending_req);
+                       break;
+
+               default:
+                       WPRINTK("unknown operation [%d]\n",
+                               req->operation);
+                       make_response(blkif, req->id, req->operation,
+                                     BLKIF_RSP_ERROR);
+                       free_req(pending_req);
+                       break;
+               }
+       }
+               
+       blktap_kick_user(blkif->dev_num);
+
+       return more_to_do;
+}
+
+static void dispatch_rw_block_io(blkif_t *blkif,
+                                blkif_request_t *req,
+                                pending_req_t *pending_req)
+{
+       extern void ll_rw_block(int rw, int nr, struct buffer_head * bhs[]); 
+       int op, operation = (req->operation == BLKIF_OP_WRITE) ? WRITE : READ;
+       struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST*2];
+       unsigned int nseg;
+       int ret, i;
+       tap_blkif_t *info = tapfds[blkif->dev_num];
+       uint64_t sector;
+       
+       blkif_request_t *target;
+       int pending_idx = RTN_PEND_IDX(pending_req,pending_req->mem_idx);
+       int usr_idx = GET_NEXT_REQ(info->idx_map);
+       uint16_t mmap_idx = pending_req->mem_idx;
+
+       /*Check we have space on user ring - should never fail*/
+       if(usr_idx == INVALID_REQ) goto fail_flush;
+       
+       /* Check that number of segments is sane. */
+       nseg = req->nr_segments;
+       if ( unlikely(nseg == 0) || 
+           unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST) ) {
+               WPRINTK("Bad number of segments in request (%d)\n", nseg);
+               goto fail_response;
+       }
+       
+       /* Make sure userspace is ready. */
+       if (!info->ring_ok) {
+               WPRINTK("blktap: ring not ready for requests!\n");
+               goto fail_response;
+       }
+
+       if (RING_FULL(&info->ufe_ring)) {
+               WPRINTK("blktap: fe_ring is full, can't add "
+                       "IO Request will be dropped. %d %d\n",
+                       RING_SIZE(&info->ufe_ring),
+                       RING_SIZE(&blkif->blk_ring));
+               goto fail_response;
+       }
+
+       pending_req->blkif     = blkif;
+       pending_req->id        = req->id;
+       pending_req->operation = operation;
+       pending_req->status    = BLKIF_RSP_OKAY;
+       pending_req->nr_pages  = nseg;
+       op = 0;
+       for (i = 0; i < nseg; i++) {
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               uint64_t ptep;
+               struct page *page;
+               uint32_t flags;
+
+               uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i);
+               kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
+                                   pending_idx, i);
+               page = virt_to_page(kvaddr);
+
+               sector = req->sector_number + (8*i);
+               if( (blkif->sectors > 0) && (sector >= blkif->sectors) ) {
+                       WPRINTK("BLKTAP: Sector request greater" 
+                              "than size\n");
+                       WPRINTK("BLKTAP: %s request sector" 
+                              "[%llu,%llu], Total [%llu]\n",
+                              (req->operation == 
+                               BLKIF_OP_WRITE ? "WRITE" : "READ"),
+                               (long long unsigned) sector,
+                               (long long unsigned) sector>>9,
+                               blkif->sectors);
+               }
+
+               flags = GNTMAP_host_map;
+               if (operation == WRITE)
+                       flags |= GNTMAP_readonly;
+               gnttab_set_map_op(&map[op], kvaddr, flags,
+                                 req->seg[i].gref, blkif->domid);
+               op++;
+
+               /* Now map it to user. */
+               ret = create_lookup_pte_addr(info->vma->vm_mm, 
+                                            uvaddr, &ptep);
+               if (ret) {
+                       WPRINTK("Couldn't get a pte addr!\n");
+                       fast_flush_area(pending_req, pending_idx, usr_idx, 
+                                       blkif->dev_num);
+                       goto fail_flush;
+               }
+
+               flags = GNTMAP_host_map | GNTMAP_application_map
+                       | GNTMAP_contains_pte;
+               if (operation == WRITE)
+                       flags |= GNTMAP_readonly;
+               gnttab_set_map_op(&map[op], ptep, flags,
+                                 req->seg[i].gref, blkif->domid);
+               op++;
+       }
+
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, op);
+       BUG_ON(ret);
+
+       for (i = 0; i < (nseg*2); i+=2) {
+               unsigned long uvaddr;
+               unsigned long kvaddr;
+               unsigned long offset;
+               struct page *pg;
+
+               uvaddr = MMAP_VADDR(info->user_vstart, usr_idx, i/2);
+               kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
+                                   pending_idx, i/2);
+
+               if (unlikely(map[i].status != 0)) {
+                       WPRINTK("invalid kernel buffer -- "
+                               "could not remap it\n");
+                       goto fail_flush;
+               }
+
+               if (unlikely(map[i+1].status != 0)) {
+                       WPRINTK("invalid user buffer -- "
+                               "could not remap it\n");
+                       goto fail_flush;
+               }
+
+               pending_handle(mmap_idx, pending_idx, i/2).kernel 
+                       = map[i].handle;
+               pending_handle(mmap_idx, pending_idx, i/2).user   
+                       = map[i+1].handle;
+               set_phys_to_machine(__pa(kvaddr) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT));
+               offset = (uvaddr - info->vma->vm_start) >> PAGE_SHIFT;
+               pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+               ((struct page **)info->vma->vm_private_data)[offset] =
+                       pg;
+       }
+       /* Mark mapped pages as reserved: */
+       for (i = 0; i < req->nr_segments; i++) {
+               unsigned long kvaddr;
+               struct page *pg;
+
+               kvaddr = MMAP_VADDR(mmap_start[mmap_idx].start, 
+                                   pending_idx, i);
+               pg = pfn_to_page(__pa(kvaddr) >> PAGE_SHIFT);
+               SetPageReserved(pg);
+       }
+       
+       /*record [mmap_idx,pending_idx] to [usr_idx] mapping*/
+       info->idx_map[usr_idx] = MAKE_ID(mmap_idx, pending_idx);
+
+       blkif_get(blkif);
+       /* Finally, write the request message to the user ring. */
+       target = RING_GET_REQUEST(&info->ufe_ring,
+                                 info->ufe_ring.req_prod_pvt);
+       memcpy(target, req, sizeof(*req));
+       target->id = usr_idx;
+       info->ufe_ring.req_prod_pvt++;
+       return;
+
+ fail_flush:
+       WPRINTK("Reached Fail_flush\n");
+       fast_flush_area(pending_req, pending_idx, usr_idx, blkif->dev_num);
+ fail_response:
+       make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+       free_req(pending_req);
+} 
+
+
+
+/******************************************************************
+ * MISCELLANEOUS SETUP / TEARDOWN / DEBUGGING
+ */
+
+
+static void make_response(blkif_t *blkif, unsigned long id, 
+                          unsigned short op, int st)
+{
+       blkif_response_t *resp;
+       unsigned long     flags;
+       blkif_back_ring_t *blk_ring = &blkif->blk_ring;
+       int more_to_do = 0;
+       int notify;
+
+       spin_lock_irqsave(&blkif->blk_ring_lock, flags);
+       /* Place on the response ring for the relevant domain. */ 
+       resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+       resp->id        = id;
+       resp->operation = op;
+       resp->status    = st;
+       blk_ring->rsp_prod_pvt++;
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
+
+       if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
+               /*
+                * Tail check for pending requests. Allows frontend to avoid
+                * notifications if requests are already in flight (lower
+                * overheads and promotes batching).
+                */
+               RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
+       } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) {
+               more_to_do = 1;
+
+       }       
+       spin_unlock_irqrestore(&blkif->blk_ring_lock, flags);
+       if (more_to_do)
+               blkif_notify_work(blkif);
+       if (notify)
+               notify_remote_via_irq(blkif->irq);
+}
+
+static int __init blkif_init(void)
+{
+       int i,ret,blktap_dir;
+       tap_blkif_t *info;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       INIT_LIST_HEAD(&pending_free);
+        for(i = 0; i < 2; i++) {
+               ret = req_increase();
+               if (ret)
+                       break;
+       }
+       if (i == 0)
+               return ret;
+
+       tap_blkif_interface_init();
+
+       alloc_pending_reqs = 0;
+
+       tap_blkif_xenbus_init();
+
+       /*Create the blktap devices, but do not map memory or waitqueue*/
+       for(i = 0; i < MAX_TAP_DEV; i++) translate_domid[i].domid = 0xFFFF;
+
+       ret = register_chrdev(BLKTAP_DEV_MAJOR,"blktap",&blktap_fops);
+       blktap_dir = devfs_mk_dir(NULL, "xen", 0, NULL);
+
+       if ( (ret < 0)||(blktap_dir < 0) ) {
+               WPRINTK("Couldn't register /dev/xen/blktap\n");
+               return -ENOMEM;
+       }       
+       
+       for(i = 0; i < MAX_TAP_DEV; i++ ) {
+               info = tapfds[i] = kzalloc(sizeof(tap_blkif_t),GFP_KERNEL);
+               if(tapfds[i] == NULL) return -ENOMEM;
+               info->minor = i;
+               info->pid = 0;
+               info->blkif = NULL;
+
+               ret = devfs_mk_cdev(MKDEV(BLKTAP_DEV_MAJOR, i),
+                       S_IFCHR|S_IRUGO|S_IWUSR, "xen/blktap%d", i);
+
+               if(ret != 0) return -ENOMEM;
+               info->dev_pending = info->dev_inuse = 0;
+
+               DPRINTK("Created misc_dev [/dev/xen/blktap%d]\n",i);
+       }
+       
+       DPRINTK("Blktap device successfully created\n");
+
+       return 0;
+}
+
+module_init(blkif_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/console/Makefile b/drivers/xen/console/Makefile
new file mode 100644 (file)
index 0000000..35de3e9
--- /dev/null
@@ -0,0 +1,2 @@
+
+obj-y  := console.o xencons_ring.o
diff --git a/drivers/xen/console/console.c b/drivers/xen/console/console.c
new file mode 100644 (file)
index 0000000..ec7dd9e
--- /dev/null
@@ -0,0 +1,644 @@
+/******************************************************************************
+ * console.c
+ * 
+ * Virtual console driver.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser.
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/major.h>
+#include <linux/ptrace.h>
+#include <linux/ioport.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/bootmem.h>
+#include <linux/sysrq.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/uaccess.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/event_channel.h>
+#include <asm/hypervisor.h>
+#include <xen/evtchn.h>
+#include <xen/xencons.h>
+
+/*
+ * Modes:
+ *  'xencons=off'  [XC_OFF]:     Console is disabled.
+ *  'xencons=tty'  [XC_TTY]:     Console attached to '/dev/tty[0-9]+'.
+ *  'xencons=ttyS' [XC_SERIAL]:  Console attached to '/dev/ttyS[0-9]+'.
+ *                 [XC_DEFAULT]: DOM0 -> XC_SERIAL ; all others -> XC_TTY.
+ * 
+ * NB. In mode XC_TTY, we create dummy consoles for tty2-63. This suppresses
+ * warnings from standard distro startup scripts.
+ */
+static enum { XC_OFF, XC_DEFAULT, XC_TTY, XC_SERIAL } xc_mode = XC_DEFAULT;
+static int xc_num = -1;
+
+#ifdef CONFIG_MAGIC_SYSRQ
+static unsigned long sysrq_requested;
+extern int sysrq_enabled;
+#endif
+
+static int __init xencons_setup(char *str)
+{
+       char *q;
+       int n;
+
+       if (!strncmp(str, "ttyS", 4))
+               xc_mode = XC_SERIAL;
+       else if (!strncmp(str, "tty", 3))
+               xc_mode = XC_TTY;
+       else if (!strncmp(str, "off", 3))
+               xc_mode = XC_OFF;
+
+       switch (xc_mode) {
+       case XC_SERIAL:
+               n = simple_strtol(str+4, &q, 10);
+               if (q > (str + 4))
+                       xc_num = n;
+               break;
+       case XC_TTY:
+               n = simple_strtol(str+3, &q, 10);
+               if (q > (str + 3))
+                       xc_num = n;
+               break;
+       default:
+               break;
+       }
+
+       return 1;
+}
+__setup("xencons=", xencons_setup);
+
+/* The kernel and user-land drivers share a common transmit buffer. */
+static unsigned int wbuf_size = 4096;
+#define WBUF_MASK(_i) ((_i)&(wbuf_size-1))
+static char *wbuf;
+static unsigned int wc, wp; /* write_cons, write_prod */
+
+static int __init xencons_bufsz_setup(char *str)
+{
+       unsigned int goal;
+       goal = simple_strtoul(str, NULL, 0);
+       if (goal) {
+               goal = roundup_pow_of_two(goal);
+               if (wbuf_size < goal)
+                       wbuf_size = goal;
+       }
+       return 1;
+}
+__setup("xencons_bufsz=", xencons_bufsz_setup);
+
+/* This lock protects accesses to the common transmit buffer. */
+static DEFINE_SPINLOCK(xencons_lock);
+
+/* Common transmit-kick routine. */
+static void __xencons_tx_flush(void);
+
+static struct tty_driver *xencons_driver;
+
+/******************** Kernel console driver ********************************/
+
+static void kcons_write(struct console *c, const char *s, unsigned int count)
+{
+       int           i = 0;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       while (i < count) {
+               for (; i < count; i++) {
+                       if ((wp - wc) >= (wbuf_size - 1))
+                               break;
+                       if ((wbuf[WBUF_MASK(wp++)] = s[i]) == '\n')
+                               wbuf[WBUF_MASK(wp++)] = '\r';
+               }
+
+               __xencons_tx_flush();
+       }
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void kcons_write_dom0(struct console *c, const char *s, unsigned int count)
+{
+
+       while (count > 0) {
+               int rc;
+               rc = HYPERVISOR_console_io( CONSOLEIO_write, count, (char *)s);
+               if (rc <= 0)
+                       break;
+               count -= rc;
+               s += rc;
+       }
+}
+
+static struct tty_driver *kcons_device(struct console *c, int *index)
+{
+       *index = 0;
+       return xencons_driver;
+}
+
+static struct console kcons_info = {
+       .device = kcons_device,
+       .flags  = CON_PRINTBUFFER | CON_ENABLED,
+       .index  = -1,
+};
+
+#define __RETCODE 0
+static int __init xen_console_init(void)
+{
+       if (!is_running_on_xen())
+               return __RETCODE;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_SERIAL;
+               kcons_info.write = kcons_write_dom0;
+       } else {
+               if (xc_mode == XC_DEFAULT)
+                       xc_mode = XC_TTY;
+               kcons_info.write = kcons_write;
+       }
+
+       switch (xc_mode) {
+       case XC_SERIAL:
+               strcpy(kcons_info.name, "ttyS");
+               if (xc_num == -1)
+                       xc_num = 0;
+               break;
+
+       case XC_TTY:
+               strcpy(kcons_info.name, "tty");
+               if (xc_num == -1)
+                       xc_num = 1;
+               break;
+
+       default:
+               return __RETCODE;
+       }
+
+       wbuf = alloc_bootmem(wbuf_size);
+
+       register_console(&kcons_info);
+
+       return __RETCODE;
+}
+console_initcall(xen_console_init);
+
+/*** Useful function for console debugging -- goes straight to Xen. ***/
+asmlinkage int xprintk(const char *fmt, ...)
+{
+       va_list args;
+       int printk_len;
+       static char printk_buf[1024];
+
+       /* Emit the output into the temporary buffer */
+       va_start(args, fmt);
+       printk_len = vsnprintf(printk_buf, sizeof(printk_buf), fmt, args);
+       va_end(args);
+
+       /* Send the processed output directly to Xen. */
+       kcons_write_dom0(NULL, printk_buf, printk_len);
+
+       return 0;
+}
+
+/*** Forcibly flush console data before dying. ***/
+void xencons_force_flush(void)
+{
+       int sz;
+
+       /* Emergency console is synchronous, so there's nothing to flush. */
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return;
+
+       /* Spin until console data is flushed through to the daemon. */
+       while (wc != wp) {
+               int sent = 0;
+               if ((sz = wp - wc) == 0)
+                       continue;
+               sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+               if (sent > 0)
+                       wc += sent;
+       }
+}
+
+
+/******************** User-space console driver (/dev/console) ************/
+
+#define DRV(_d)         (_d)
+#define DUMMY_TTY(_tty) ((xc_mode != XC_SERIAL) &&             \
+                        ((_tty)->index != (xc_num - 1)))
+
+static struct termios *xencons_termios[MAX_NR_CONSOLES];
+static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
+static struct tty_struct *xencons_tty;
+static int xencons_priv_irq;
+static char x_char;
+
+void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
+{
+       int           i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       if (xencons_tty == NULL)
+               goto out;
+
+       for (i = 0; i < len; i++) {
+#ifdef CONFIG_MAGIC_SYSRQ
+               if (sysrq_enabled) {
+                       if (buf[i] == '\x0f') { /* ^O */
+                               sysrq_requested = jiffies;
+                               continue; /* don't print the sysrq key */
+                       } else if (sysrq_requested) {
+                               unsigned long sysrq_timeout =
+                                       sysrq_requested + HZ*2;
+                               sysrq_requested = 0;
+                               if (time_before(jiffies, sysrq_timeout)) {
+                                       spin_unlock_irqrestore(
+                                               &xencons_lock, flags);
+                                       handle_sysrq(
+                                               buf[i], regs, xencons_tty);
+                                       spin_lock_irqsave(
+                                               &xencons_lock, flags);
+                                       continue;
+                               }
+                       }
+               }
+#endif
+               tty_insert_flip_char(xencons_tty, buf[i], 0);
+       }
+       tty_flip_buffer_push(xencons_tty);
+
+ out:
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void __xencons_tx_flush(void)
+{
+       int sent, sz, work_done = 0;
+
+       if (x_char) {
+               if (xen_start_info->flags & SIF_INITDOMAIN)
+                       kcons_write_dom0(NULL, &x_char, 1);
+               else
+                       while (x_char)
+                               if (xencons_ring_send(&x_char, 1) == 1)
+                                       break;
+               x_char = 0;
+               work_done = 1;
+       }
+
+       while (wc != wp) {
+               sz = wp - wc;
+               if (sz > (wbuf_size - WBUF_MASK(wc)))
+                       sz = wbuf_size - WBUF_MASK(wc);
+               if (xen_start_info->flags & SIF_INITDOMAIN) {
+                       kcons_write_dom0(NULL, &wbuf[WBUF_MASK(wc)], sz);
+                       wc += sz;
+               } else {
+                       sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+                       if (sent == 0)
+                               break;
+                       wc += sent;
+               }
+               work_done = 1;
+       }
+
+       if (work_done && (xencons_tty != NULL)) {
+               wake_up_interruptible(&xencons_tty->write_wait);
+               if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
+                   (xencons_tty->ldisc.write_wakeup != NULL))
+                       (xencons_tty->ldisc.write_wakeup)(xencons_tty);
+       }
+}
+
+void xencons_tx(void)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+/* Privileged receive callback and transmit kicker. */
+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
+                                         struct pt_regs *regs)
+{
+       static char rbuf[16];
+       int         l;
+
+       while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+               xencons_rx(rbuf, l, regs);
+
+       xencons_tx();
+
+       return IRQ_HANDLED;
+}
+
+static int xencons_write_room(struct tty_struct *tty)
+{
+       return wbuf_size - (wp - wc);
+}
+
+static int xencons_chars_in_buffer(struct tty_struct *tty)
+{
+       return wp - wc;
+}
+
+static void xencons_send_xchar(struct tty_struct *tty, char ch)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       x_char = ch;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_throttle(struct tty_struct *tty)
+{
+       if (DUMMY_TTY(tty))
+               return;
+
+       if (I_IXOFF(tty))
+               xencons_send_xchar(tty, STOP_CHAR(tty));
+}
+
+static void xencons_unthrottle(struct tty_struct *tty)
+{
+       if (DUMMY_TTY(tty))
+               return;
+
+       if (I_IXOFF(tty)) {
+               if (x_char != 0)
+                       x_char = 0;
+               else
+                       xencons_send_xchar(tty, START_CHAR(tty));
+       }
+}
+
+static void xencons_flush_buffer(struct tty_struct *tty)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       wc = wp = 0;
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static inline int __xencons_put_char(int ch)
+{
+       char _ch = (char)ch;
+       if ((wp - wc) == wbuf_size)
+               return 0;
+       wbuf[WBUF_MASK(wp++)] = _ch;
+       return 1;
+}
+
+static int xencons_write(
+       struct tty_struct *tty,
+       const unsigned char *buf,
+       int count)
+{
+       int i;
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return count;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+
+       for (i = 0; i < count; i++)
+               if (!__xencons_put_char(buf[i]))
+                       break;
+
+       if (i != 0)
+               __xencons_tx_flush();
+
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return i;
+}
+
+static void xencons_put_char(struct tty_struct *tty, u_char ch)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       (void)__xencons_put_char(ch);
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_flush_chars(struct tty_struct *tty)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static void xencons_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+       unsigned long orig_jiffies = jiffies;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       while (DRV(tty->driver)->chars_in_buffer(tty)) {
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(1);
+               if (signal_pending(current))
+                       break;
+               if (timeout && time_after(jiffies, orig_jiffies + timeout))
+                       break;
+       }
+
+       set_current_state(TASK_RUNNING);
+}
+
+static int xencons_open(struct tty_struct *tty, struct file *filp)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return 0;
+
+       spin_lock_irqsave(&xencons_lock, flags);
+       tty->driver_data = NULL;
+       if (xencons_tty == NULL)
+               xencons_tty = tty;
+       __xencons_tx_flush();
+       spin_unlock_irqrestore(&xencons_lock, flags);
+
+       return 0;
+}
+
+static void xencons_close(struct tty_struct *tty, struct file *filp)
+{
+       unsigned long flags;
+
+       if (DUMMY_TTY(tty))
+               return;
+
+       mutex_lock(&tty_mutex);
+
+       if (tty->count != 1) {
+               mutex_unlock(&tty_mutex);
+               return;
+       }
+
+       /* Prevent other threads from re-opening this tty. */
+       set_bit(TTY_CLOSING, &tty->flags);
+       mutex_unlock(&tty_mutex);
+
+       tty->closing = 1;
+       tty_wait_until_sent(tty, 0);
+       if (DRV(tty->driver)->flush_buffer != NULL)
+               DRV(tty->driver)->flush_buffer(tty);
+       if (tty->ldisc.flush_buffer != NULL)
+               tty->ldisc.flush_buffer(tty);
+       tty->closing = 0;
+       spin_lock_irqsave(&xencons_lock, flags);
+       xencons_tty = NULL;
+       spin_unlock_irqrestore(&xencons_lock, flags);
+}
+
+static struct tty_operations xencons_ops = {
+       .open = xencons_open,
+       .close = xencons_close,
+       .write = xencons_write,
+       .write_room = xencons_write_room,
+       .put_char = xencons_put_char,
+       .flush_chars = xencons_flush_chars,
+       .chars_in_buffer = xencons_chars_in_buffer,
+       .send_xchar = xencons_send_xchar,
+       .flush_buffer = xencons_flush_buffer,
+       .throttle = xencons_throttle,
+       .unthrottle = xencons_unthrottle,
+       .wait_until_sent = xencons_wait_until_sent,
+};
+
+static int __init xencons_init(void)
+{
+       int rc;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       if (xc_mode == XC_OFF)
+               return 0;
+
+       xencons_ring_init();
+
+       xencons_driver = alloc_tty_driver((xc_mode == XC_SERIAL) ?
+                                         1 : MAX_NR_CONSOLES);
+       if (xencons_driver == NULL)
+               return -ENOMEM;
+
+       DRV(xencons_driver)->name            = "xencons";
+       DRV(xencons_driver)->major           = TTY_MAJOR;
+       DRV(xencons_driver)->type            = TTY_DRIVER_TYPE_SERIAL;
+       DRV(xencons_driver)->subtype         = SERIAL_TYPE_NORMAL;
+       DRV(xencons_driver)->init_termios    = tty_std_termios;
+       DRV(xencons_driver)->flags           =
+               TTY_DRIVER_REAL_RAW |
+               TTY_DRIVER_RESET_TERMIOS;
+       DRV(xencons_driver)->termios         = xencons_termios;
+       DRV(xencons_driver)->termios_locked  = xencons_termios_locked;
+
+       if (xc_mode == XC_SERIAL) {
+               DRV(xencons_driver)->name        = "ttyS";
+               DRV(xencons_driver)->minor_start = 64 + xc_num;
+               DRV(xencons_driver)->name_base   = 0 + xc_num;
+       } else {
+               DRV(xencons_driver)->name        = "tty";
+               DRV(xencons_driver)->minor_start = 1;
+               DRV(xencons_driver)->name_base   = 1;
+       }
+
+       tty_set_operations(xencons_driver, &xencons_ops);
+
+       if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) {
+               printk("WARNING: Failed to register Xen virtual "
+                      "console driver as '%s%d'\n",
+                      DRV(xencons_driver)->name,
+                      DRV(xencons_driver)->name_base);
+               put_tty_driver(xencons_driver);
+               xencons_driver = NULL;
+               return rc;
+       }
+
+       if (xen_start_info->flags & SIF_INITDOMAIN) {
+               xencons_priv_irq = bind_virq_to_irqhandler(
+                       VIRQ_CONSOLE,
+                       0,
+                       xencons_priv_interrupt,
+                       0,
+                       "console",
+                       NULL);
+               BUG_ON(xencons_priv_irq < 0);
+       }
+
+       printk("Xen virtual console successfully installed as %s%d\n",
+              DRV(xencons_driver)->name, xc_num);
+
+       return 0;
+}
+
+module_init(xencons_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/evtchn/Makefile b/drivers/xen/evtchn/Makefile
new file mode 100644 (file)
index 0000000..7b082a0
--- /dev/null
@@ -0,0 +1,2 @@
+
+obj-y  := evtchn.o
diff --git a/drivers/xen/evtchn/evtchn.c b/drivers/xen/evtchn/evtchn.c
new file mode 100644 (file)
index 0000000..76bfab8
--- /dev/null
@@ -0,0 +1,458 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <xen/evtchn.h>
+#include <xen/public/evtchn.h>
+
+struct per_user_data {
+       /* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+       evtchn_port_t *ring;
+       unsigned int ring_cons, ring_prod, ring_overflow;
+
+       /* Processes wait on this queue when ring is empty. */
+       wait_queue_head_t evtchn_wait;
+       struct fasync_struct *evtchn_async_queue;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static spinlock_t port_user_lock;
+
+void evtchn_device_upcall(int port)
+{
+       struct per_user_data *u;
+
+       spin_lock(&port_user_lock);
+
+       mask_evtchn(port);
+       clear_evtchn(port);
+
+       if ((u = port_user[port]) != NULL) {
+               if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+                       u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+                       if (u->ring_cons == u->ring_prod++) {
+                               wake_up_interruptible(&u->evtchn_wait);
+                               kill_fasync(&u->evtchn_async_queue,
+                                           SIGIO, POLL_IN);
+                       }
+               } else {
+                       u->ring_overflow = 1;
+               }
+       }
+
+       spin_unlock(&port_user_lock);
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+                          size_t count, loff_t *ppos)
+{
+       int rc;
+       unsigned int c, p, bytes1 = 0, bytes2 = 0;
+       struct per_user_data *u = file->private_data;
+
+       /* Whole number of ports. */
+       count &= ~(sizeof(evtchn_port_t)-1);
+
+       if (count == 0)
+               return 0;
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       for (;;) {
+               if (u->ring_overflow)
+                       return -EFBIG;
+
+               if ((c = u->ring_cons) != (p = u->ring_prod))
+                       break;
+
+               if (file->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+
+               rc = wait_event_interruptible(
+                       u->evtchn_wait, u->ring_cons != u->ring_prod);
+               if (rc)
+                       return rc;
+       }
+
+       /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+       if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+               bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+                       sizeof(evtchn_port_t);
+               bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+       } else {
+               bytes1 = (p - c) * sizeof(evtchn_port_t);
+               bytes2 = 0;
+       }
+
+       /* Truncate chunks according to caller's maximum byte count. */
+       if (bytes1 > count) {
+               bytes1 = count;
+               bytes2 = 0;
+       } else if ((bytes1 + bytes2) > count) {
+               bytes2 = count - bytes1;
+       }
+
+       if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+           ((bytes2 != 0) &&
+            copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+               return -EFAULT;
+
+       u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+
+       return bytes1 + bytes2;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+                           size_t count, loff_t *ppos)
+{
+       int  rc, i;
+       evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+       struct per_user_data *u = file->private_data;
+
+       if (kbuf == NULL)
+               return -ENOMEM;
+
+       /* Whole number of ports. */
+       count &= ~(sizeof(evtchn_port_t)-1);
+
+       if (count == 0) {
+               rc = 0;
+               goto out;
+       }
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       if (copy_from_user(kbuf, buf, count) != 0) {
+               rc = -EFAULT;
+               goto out;
+       }
+
+       spin_lock_irq(&port_user_lock);
+       for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+               if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+                       unmask_evtchn(kbuf[i]);
+       spin_unlock_irq(&port_user_lock);
+
+       rc = count;
+
+ out:
+       free_page((unsigned long)kbuf);
+       return rc;
+}
+
+static void evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+       spin_lock_irq(&port_user_lock);
+       BUG_ON(port_user[port] != NULL);
+       port_user[port] = u;
+       unmask_evtchn(port);
+       spin_unlock_irq(&port_user_lock);
+}
+
+static int evtchn_ioctl(struct inode *inode, struct file *file,
+                       unsigned int cmd, unsigned long arg)
+{
+       int rc;
+       struct per_user_data *u = file->private_data;
+       void __user *uarg = (void __user *) arg;
+
+       switch (cmd) {
+       case IOCTL_EVTCHN_BIND_VIRQ: {
+               struct ioctl_evtchn_bind_virq bind;
+               struct evtchn_bind_virq bind_virq;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               bind_virq.virq = bind.virq;
+               bind_virq.vcpu = 0;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+                                                &bind_virq);
+               if (rc != 0)
+                       break;
+
+               rc = bind_virq.port;
+               evtchn_bind_to_user(u, rc);
+               break;
+       }
+
+       case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+               struct ioctl_evtchn_bind_interdomain bind;
+               struct evtchn_bind_interdomain bind_interdomain;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               bind_interdomain.remote_dom  = bind.remote_domain;
+               bind_interdomain.remote_port = bind.remote_port;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                                &bind_interdomain);
+               if (rc != 0)
+                       break;
+
+               rc = bind_interdomain.local_port;
+               evtchn_bind_to_user(u, rc);
+               break;
+       }
+
+       case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+               struct ioctl_evtchn_bind_unbound_port bind;
+               struct evtchn_alloc_unbound alloc_unbound;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               alloc_unbound.dom        = DOMID_SELF;
+               alloc_unbound.remote_dom = bind.remote_domain;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+                                                &alloc_unbound);
+               if (rc != 0)
+                       break;
+
+               rc = alloc_unbound.port;
+               evtchn_bind_to_user(u, rc);
+               break;
+       }
+
+       case IOCTL_EVTCHN_UNBIND: {
+               struct ioctl_evtchn_unbind unbind;
+               struct evtchn_close close;
+               int ret;
+
+               rc = -EFAULT;
+               if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+                       break;
+
+               rc = -EINVAL;
+               if (unbind.port >= NR_EVENT_CHANNELS)
+                       break;
+
+               spin_lock_irq(&port_user_lock);
+    
+               rc = -ENOTCONN;
+               if (port_user[unbind.port] != u) {
+                       spin_unlock_irq(&port_user_lock);
+                       break;
+               }
+
+               port_user[unbind.port] = NULL;
+               mask_evtchn(unbind.port);
+
+               spin_unlock_irq(&port_user_lock);
+
+               close.port = unbind.port;
+               ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+               BUG_ON(ret);
+
+               rc = 0;
+               break;
+       }
+
+       case IOCTL_EVTCHN_NOTIFY: {
+               struct ioctl_evtchn_notify notify;
+
+               rc = -EFAULT;
+               if (copy_from_user(&notify, uarg, sizeof(notify)))
+                       break;
+
+               if (notify.port >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[notify.port] != u) {
+                       rc = -ENOTCONN;
+               } else {
+                       notify_remote_via_evtchn(notify.port);
+                       rc = 0;
+               }
+               break;
+       }
+
+       case IOCTL_EVTCHN_RESET: {
+               /* Initialise the ring to empty. Clear errors. */
+               spin_lock_irq(&port_user_lock);
+               u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+               spin_unlock_irq(&port_user_lock);
+               rc = 0;
+               break;
+       }
+
+       default:
+               rc = -ENOSYS;
+               break;
+       }
+
+       return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+       unsigned int mask = POLLOUT | POLLWRNORM;
+       struct per_user_data *u = file->private_data;
+
+       poll_wait(file, &u->evtchn_wait, wait);
+       if (u->ring_cons != u->ring_prod)
+               mask |= POLLIN | POLLRDNORM;
+       if (u->ring_overflow)
+               mask = POLLERR;
+       return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+       struct per_user_data *u = filp->private_data;
+       return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+       struct per_user_data *u;
+
+       if ((u = kmalloc(sizeof(*u), GFP_KERNEL)) == NULL)
+               return -ENOMEM;
+
+       memset(u, 0, sizeof(*u));
+       init_waitqueue_head(&u->evtchn_wait);
+
+       u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+       if (u->ring == NULL) {
+               kfree(u);
+               return -ENOMEM;
+       }
+
+       filp->private_data = u;
+
+       return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+       int i;
+       struct per_user_data *u = filp->private_data;
+       struct evtchn_close close;
+
+       spin_lock_irq(&port_user_lock);
+
+       free_page((unsigned long)u->ring);
+
+       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+               int ret;
+               if (port_user[i] != u)
+                       continue;
+
+               port_user[i] = NULL;
+               mask_evtchn(i);
+
+               close.port = i;
+               ret = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+               BUG_ON(ret);
+       }
+
+       spin_unlock_irq(&port_user_lock);
+
+       kfree(u);
+
+       return 0;
+}
+
+static struct file_operations evtchn_fops = {
+       .owner   = THIS_MODULE,
+       .read    = evtchn_read,
+       .write   = evtchn_write,
+       .ioctl   = evtchn_ioctl,
+       .poll    = evtchn_poll,
+       .fasync  = evtchn_fasync,
+       .open    = evtchn_open,
+       .release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+       .minor        = EVTCHN_MINOR,
+       .name         = "evtchn",
+       .fops         = &evtchn_fops,
+       .devfs_name   = "misc/evtchn",
+};
+
+static int __init evtchn_init(void)
+{
+       int err;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       spin_lock_init(&port_user_lock);
+       memset(port_user, 0, sizeof(port_user));
+
+       /* Create '/dev/misc/evtchn'. */
+       err = misc_register(&evtchn_miscdev);
+       if (err != 0) {
+               printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+               return err;
+       }
+
+       printk("Event-channel device installed.\n");
+
+       return 0;
+}
+
+static void evtchn_cleanup(void)
+{
+       misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/netback/Makefile b/drivers/xen/netback/Makefile
new file mode 100644 (file)
index 0000000..d5d2328
--- /dev/null
@@ -0,0 +1,5 @@
+obj-$(CONFIG_XEN_NETDEV_BACKEND) := netbk.o
+obj-$(CONFIG_XEN_NETDEV_LOOPBACK) += netloop.o
+
+netbk-y   := netback.o xenbus.o interface.o
+netloop-y := loopback.o
diff --git a/drivers/xen/netback/common.h b/drivers/xen/netback/common.h
new file mode 100644 (file)
index 0000000..c5ec300
--- /dev/null
@@ -0,0 +1,138 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/common.h
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __NETIF__BACKEND__COMMON_H__
+#define __NETIF__BACKEND__COMMON_H__
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/wait.h>
+#include <xen/evtchn.h>
+#include <xen/interface/io/netif.h>
+#include <asm/io.h>
+#include <asm/pgalloc.h>
+#include <xen/interface/grant_table.h>
+#include <xen/gnttab.h>
+#include <xen/driver_util.h>
+
+#define DPRINTK(_f, _a...)                     \
+       pr_debug("(file=%s, line=%d) " _f,      \
+                __FILE__ , __LINE__ , ## _a )
+#define IPRINTK(fmt, args...)                          \
+       printk(KERN_INFO "xen_net: " fmt, ##args)
+#define WPRINTK(fmt, args...)                          \
+       printk(KERN_WARNING "xen_net: " fmt, ##args)
+
+typedef struct netif_st {
+       /* Unique identifier for this interface. */
+       domid_t          domid;
+       unsigned int     handle;
+
+       u8               fe_dev_addr[6];
+
+       /* Physical parameters of the comms window. */
+       grant_handle_t   tx_shmem_handle;
+       grant_ref_t      tx_shmem_ref; 
+       grant_handle_t   rx_shmem_handle;
+       grant_ref_t      rx_shmem_ref; 
+       unsigned int     evtchn;
+       unsigned int     irq;
+
+       /* The shared rings and indexes. */
+       netif_tx_back_ring_t tx;
+       netif_rx_back_ring_t rx;
+       struct vm_struct *tx_comms_area;
+       struct vm_struct *rx_comms_area;
+
+       /* Set of features that can be turned on in dev->features. */
+       int features;
+       int can_queue;
+
+       /* Allow netif_be_start_xmit() to peek ahead in the rx request ring. */
+       RING_IDX rx_req_cons_peek;
+
+       /* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
+       unsigned long   credit_bytes;
+       unsigned long   credit_usec;
+       unsigned long   remaining_credit;
+       struct timer_list credit_timeout;
+
+       /* Miscellaneous private stuff. */
+       struct list_head list;  /* scheduling list */
+       atomic_t         refcnt;
+       struct net_device *dev;
+       struct net_device_stats stats;
+
+       wait_queue_head_t waiting_to_free;
+} netif_t;
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+void netif_disconnect(netif_t *netif);
+
+netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN]);
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+             unsigned long rx_ring_ref, unsigned int evtchn);
+
+#define netif_get(_b) (atomic_inc(&(_b)->refcnt))
+#define netif_put(_b)                                          \
+       do {                                                    \
+               if ( atomic_dec_and_test(&(_b)->refcnt) )       \
+                       wake_up(&(_b)->waiting_to_free);        \
+       } while (0)
+
+void netif_xenbus_init(void);
+
+void netif_schedule_work(netif_t *netif);
+void netif_deschedule_work(netif_t *netif);
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
+struct net_device_stats *netif_be_get_stats(struct net_device *dev);
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
+
+static inline int netbk_can_queue(struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+       return netif->can_queue;
+}
+
+static inline int netbk_can_sg(struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+       return netif->features & NETIF_F_SG;
+}
+
+#endif /* __NETIF__BACKEND__COMMON_H__ */
diff --git a/drivers/xen/netback/interface.c b/drivers/xen/netback/interface.c
new file mode 100644 (file)
index 0000000..d60b23b
--- /dev/null
@@ -0,0 +1,339 @@
+/******************************************************************************
+ * arch/xen/drivers/netif/backend/interface.c
+ * 
+ * Network-device interface management.
+ * 
+ * Copyright (c) 2004-2005, Keir Fraser
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+
+static void __netif_up(netif_t *netif)
+{
+       enable_irq(netif->irq);
+       netif_schedule_work(netif);
+}
+
+static void __netif_down(netif_t *netif)
+{
+       disable_irq(netif->irq);
+       netif_deschedule_work(netif);
+}
+
+static int net_open(struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+       if (netif_carrier_ok(dev))
+               __netif_up(netif);
+       return 0;
+}
+
+static int net_close(struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+       if (netif_carrier_ok(dev))
+               __netif_down(netif);
+       return 0;
+}
+
+static int netbk_change_mtu(struct net_device *dev, int mtu)
+{
+       int max = netbk_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+       if (mtu > max)
+               return -EINVAL;
+       dev->mtu = mtu;
+       return 0;
+}
+
+static int netbk_set_sg(struct net_device *dev, u32 data)
+{
+       if (data) {
+               netif_t *netif = netdev_priv(dev);
+
+               if (!(netif->features & NETIF_F_SG))
+                       return -ENOSYS;
+       }
+
+       return ethtool_op_set_sg(dev, data);
+}
+
+static int netbk_set_tso(struct net_device *dev, u32 data)
+{
+       if (data) {
+               netif_t *netif = netdev_priv(dev);
+
+               if (!(netif->features & NETIF_F_TSO))
+                       return -ENOSYS;
+       }
+
+       return ethtool_op_set_tso(dev, data);
+}
+
+static struct ethtool_ops network_ethtool_ops =
+{
+       .get_tx_csum = ethtool_op_get_tx_csum,
+       .set_tx_csum = ethtool_op_set_tx_csum,
+       .get_sg = ethtool_op_get_sg,
+       .set_sg = netbk_set_sg,
+       .get_tso = ethtool_op_get_tso,
+       .set_tso = netbk_set_tso,
+       .get_link = ethtool_op_get_link,
+};
+
+netif_t *netif_alloc(domid_t domid, unsigned int handle, u8 be_mac[ETH_ALEN])
+{
+       int err = 0, i;
+       struct net_device *dev;
+       netif_t *netif;
+       char name[IFNAMSIZ] = {};
+
+       snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
+       dev = alloc_netdev(sizeof(netif_t), name, ether_setup);
+       if (dev == NULL) {
+               DPRINTK("Could not create netif: out of memory\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       netif_carrier_off(dev);
+
+       netif = netdev_priv(dev);
+       memset(netif, 0, sizeof(*netif));
+       netif->domid  = domid;
+       netif->handle = handle;
+       atomic_set(&netif->refcnt, 1);
+       init_waitqueue_head(&netif->waiting_to_free);
+       netif->dev = dev;
+
+       netif->credit_bytes = netif->remaining_credit = ~0UL;
+       netif->credit_usec  = 0UL;
+       init_timer(&netif->credit_timeout);
+
+       dev->hard_start_xmit = netif_be_start_xmit;
+       dev->get_stats       = netif_be_get_stats;
+       dev->open            = net_open;
+       dev->stop            = net_close;
+       dev->change_mtu      = netbk_change_mtu;
+       dev->features        = NETIF_F_IP_CSUM;
+
+       SET_ETHTOOL_OPS(dev, &network_ethtool_ops);
+
+       /*
+        * Reduce default TX queuelen so that each guest interface only
+        * allows it to eat around 6.4MB of host memory.
+        */
+       dev->tx_queue_len = 100;
+
+       for (i = 0; i < ETH_ALEN; i++)
+               if (be_mac[i] != 0)
+                       break;
+       if (i == ETH_ALEN) {
+               /*
+                * Initialise a dummy MAC address. We choose the numerically
+                * largest non-broadcast address to prevent the address getting
+                * stolen by an Ethernet bridge for STP purposes.
+                * (FE:FF:FF:FF:FF:FF)
+                */ 
+               memset(dev->dev_addr, 0xFF, ETH_ALEN);
+               dev->dev_addr[0] &= ~0x01;
+       } else
+               memcpy(dev->dev_addr, be_mac, ETH_ALEN);
+
+       rtnl_lock();
+       err = register_netdevice(dev);
+       rtnl_unlock();
+       if (err) {
+               DPRINTK("Could not register new net device %s: err=%d\n",
+                       dev->name, err);
+               free_netdev(dev);
+               return ERR_PTR(err);
+       }
+
+       DPRINTK("Successfully created netif\n");
+       return netif;
+}
+
+static int map_frontend_pages(
+       netif_t *netif, grant_ref_t tx_ring_ref, grant_ref_t rx_ring_ref)
+{
+       struct gnttab_map_grant_ref op;
+       int ret;
+
+       gnttab_set_map_op(&op, (unsigned long)netif->tx_comms_area->addr,
+                         GNTMAP_host_map, tx_ring_ref, netif->domid);
+    
+       lock_vm_area(netif->tx_comms_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+       unlock_vm_area(netif->tx_comms_area);
+       BUG_ON(ret);
+
+       if (op.status) { 
+               DPRINTK(" Gnttab failure mapping tx_ring_ref!\n");
+               return op.status;
+       }
+
+       netif->tx_shmem_ref    = tx_ring_ref;
+       netif->tx_shmem_handle = op.handle;
+
+       gnttab_set_map_op(&op, (unsigned long)netif->rx_comms_area->addr,
+                         GNTMAP_host_map, rx_ring_ref, netif->domid);
+
+       lock_vm_area(netif->rx_comms_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+       unlock_vm_area(netif->rx_comms_area);
+       BUG_ON(ret);
+
+       if (op.status) {
+               DPRINTK(" Gnttab failure mapping rx_ring_ref!\n");
+               return op.status;
+       }
+
+       netif->rx_shmem_ref    = rx_ring_ref;
+       netif->rx_shmem_handle = op.handle;
+
+       return 0;
+}
+
+static void unmap_frontend_pages(netif_t *netif)
+{
+       struct gnttab_unmap_grant_ref op;
+       int ret;
+
+       gnttab_set_unmap_op(&op, (unsigned long)netif->tx_comms_area->addr,
+                           GNTMAP_host_map, netif->tx_shmem_handle);
+
+       lock_vm_area(netif->tx_comms_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+       unlock_vm_area(netif->tx_comms_area);
+       BUG_ON(ret);
+
+       gnttab_set_unmap_op(&op, (unsigned long)netif->rx_comms_area->addr,
+                           GNTMAP_host_map, netif->rx_shmem_handle);
+
+       lock_vm_area(netif->rx_comms_area);
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+       unlock_vm_area(netif->rx_comms_area);
+       BUG_ON(ret);
+}
+
+int netif_map(netif_t *netif, unsigned long tx_ring_ref,
+             unsigned long rx_ring_ref, unsigned int evtchn)
+{
+       int err = -ENOMEM;
+       netif_tx_sring_t *txs;
+       netif_rx_sring_t *rxs;
+       struct evtchn_bind_interdomain bind_interdomain;
+
+       /* Already connected through? */
+       if (netif->irq)
+               return 0;
+
+       netif->tx_comms_area = alloc_vm_area(PAGE_SIZE);
+       if (netif->tx_comms_area == NULL)
+               return -ENOMEM;
+       netif->rx_comms_area = alloc_vm_area(PAGE_SIZE);
+       if (netif->rx_comms_area == NULL)
+               goto err_rx;
+
+       err = map_frontend_pages(netif, tx_ring_ref, rx_ring_ref);
+       if (err)
+               goto err_map;
+
+       bind_interdomain.remote_dom = netif->domid;
+       bind_interdomain.remote_port = evtchn;
+
+       err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                         &bind_interdomain);
+       if (err)
+               goto err_hypervisor;
+
+       netif->evtchn = bind_interdomain.local_port;
+
+       netif->irq = bind_evtchn_to_irqhandler(
+               netif->evtchn, netif_be_int, 0, netif->dev->name, netif);
+       disable_irq(netif->irq);
+
+       txs = (netif_tx_sring_t *)netif->tx_comms_area->addr;
+       BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+       rxs = (netif_rx_sring_t *)
+               ((char *)netif->rx_comms_area->addr);
+       BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+       netif->rx_req_cons_peek = 0;
+
+       netif_get(netif);
+
+       rtnl_lock();
+       netif_carrier_on(netif->dev);
+       if (netif_running(netif->dev))
+               __netif_up(netif);
+       rtnl_unlock();
+
+       return 0;
+err_hypervisor:
+       unmap_frontend_pages(netif);
+err_map:
+       free_vm_area(netif->rx_comms_area);
+err_rx:
+       free_vm_area(netif->tx_comms_area);
+       return err;
+}
+
+static void netif_free(netif_t *netif)
+{
+       atomic_dec(&netif->refcnt);
+       wait_event(netif->waiting_to_free, atomic_read(&netif->refcnt) == 0);
+
+       if (netif->irq)
+               unbind_from_irqhandler(netif->irq, netif);
+       
+       unregister_netdev(netif->dev);
+
+       if (netif->tx.sring) {
+               unmap_frontend_pages(netif);
+               free_vm_area(netif->tx_comms_area);
+               free_vm_area(netif->rx_comms_area);
+       }
+
+       free_netdev(netif->dev);
+}
+
+void netif_disconnect(netif_t *netif)
+{
+       if (netif_carrier_ok(netif->dev)) {
+               rtnl_lock();
+               netif_carrier_off(netif->dev);
+               if (netif_running(netif->dev))
+                       __netif_down(netif);
+               rtnl_unlock();
+               netif_put(netif);
+       }
+       netif_free(netif);
+}
diff --git a/drivers/xen/netback/netback.c b/drivers/xen/netback/netback.c
new file mode 100644 (file)
index 0000000..0316f3b
--- /dev/null
@@ -0,0 +1,1345 @@
+/******************************************************************************
+ * drivers/xen/netback/netback.c
+ * 
+ * Back-end of the driver for virtual network devices. This portion of the
+ * driver exports a 'unified' network-device interface that can be accessed
+ * by any operating system that implements a compatible front end. A 
+ * reference front-end implementation can be found in:
+ *  drivers/xen/netfront/netfront.c
+ * 
+ * Copyright (c) 2002-2005, K A Fraser
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+#include <xen/balloon.h>
+#include <xen/interface/memory.h>
+
+/*#define NETBE_DEBUG_INTERRUPT*/
+
+struct netbk_rx_meta {
+       skb_frag_t frag;
+       int id;
+};
+
+static void netif_idx_release(u16 pending_idx);
+static void netif_page_release(struct page *page);
+static void make_tx_response(netif_t *netif, 
+                            netif_tx_request_t *txp,
+                            s8       st);
+static netif_rx_response_t *make_rx_response(netif_t *netif, 
+                                            u16      id, 
+                                            s8       st,
+                                            u16      offset,
+                                            u16      size,
+                                            u16      flags);
+
+static void net_tx_action(unsigned long unused);
+static DECLARE_TASKLET(net_tx_tasklet, net_tx_action, 0);
+
+static void net_rx_action(unsigned long unused);
+static DECLARE_TASKLET(net_rx_tasklet, net_rx_action, 0);
+
+static struct timer_list net_timer;
+
+#define MAX_PENDING_REQS 256
+
+static struct sk_buff_head rx_queue;
+static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+static unsigned char rx_notify[NR_IRQS];
+
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+#define PKT_PROT_LEN 64
+
+static struct {
+       netif_tx_request_t req;
+       netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static u16 pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+/* Freed TX SKBs get batched on this ring before return to pending_ring. */
+static u16 dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+static struct sk_buff_head tx_queue;
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct list_head net_schedule_list;
+static spinlock_t net_schedule_list_lock;
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+static inline unsigned long alloc_mfn(void)
+{
+       return mfn_list[--alloc_index];
+}
+
+static int check_mfn(int nr)
+{
+       struct xen_memory_reservation reservation = {
+               .extent_order = 0,
+               .domid        = DOMID_SELF
+       };
+
+       if (likely(alloc_index >= nr))
+               return 0;
+
+       set_xen_guest_handle(reservation.extent_start, mfn_list + alloc_index);
+       reservation.nr_extents = MAX_MFN_ALLOC - alloc_index;
+       alloc_index += HYPERVISOR_memory_op(XENMEM_increase_reservation,
+                                           &reservation);
+
+       return alloc_index >= nr ? 0 : -ENOMEM;
+}
+
+static inline void maybe_schedule_tx_action(void)
+{
+       smp_mb();
+       if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) &&
+           !list_empty(&net_schedule_list))
+               tasklet_schedule(&net_tx_tasklet);
+}
+
+/*
+ * A gross way of confirming the origin of an skb data page. The slab
+ * allocator abuses a field in the page struct to cache the kmem_cache_t ptr.
+ */
+static inline int is_xen_skb(struct sk_buff *skb)
+{
+       extern kmem_cache_t *skbuff_cachep;
+       kmem_cache_t *cp = (kmem_cache_t *)virt_to_page(skb->head)->lru.next;
+       return (cp == skbuff_cachep);
+}
+
+static struct sk_buff *netbk_copy_skb(struct sk_buff *skb)
+{
+       struct skb_shared_info *ninfo;
+       struct sk_buff *nskb;
+       unsigned long offset;
+       int ret;
+       int len;
+       int headlen;
+
+       nskb = alloc_skb(SKB_MAX_HEAD(0), GFP_ATOMIC);
+       if (unlikely(!nskb))
+               goto err;
+
+       skb_reserve(nskb, 16);
+       headlen = nskb->end - nskb->data;
+       if (headlen > skb_headlen(skb))
+               headlen = skb_headlen(skb);
+       ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
+       BUG_ON(ret);
+
+       ninfo = skb_shinfo(nskb);
+       ninfo->gso_size = skb_shinfo(skb)->gso_size;
+       ninfo->gso_type = skb_shinfo(skb)->gso_type;
+
+       offset = headlen;
+       len = skb->len - headlen;
+
+       nskb->len = skb->len;
+       nskb->data_len = len;
+       nskb->truesize += len;
+
+       while (len) {
+               struct page *page;
+               int copy;
+               int zero;
+
+               if (unlikely(ninfo->nr_frags >= MAX_SKB_FRAGS)) {
+                       dump_stack();
+                       goto err_free;
+               }
+
+               copy = len >= PAGE_SIZE ? PAGE_SIZE : len;
+               zero = len >= PAGE_SIZE ? 0 : __GFP_ZERO;
+
+               page = alloc_page(GFP_ATOMIC | zero);
+               if (unlikely(!page))
+                       goto err_free;
+
+               ret = skb_copy_bits(skb, offset, page_address(page), copy);
+               BUG_ON(ret);
+
+               ninfo->frags[ninfo->nr_frags].page = page;
+               ninfo->frags[ninfo->nr_frags].page_offset = 0;
+               ninfo->frags[ninfo->nr_frags].size = copy;
+               ninfo->nr_frags++;
+
+               offset += copy;
+               len -= copy;
+       }
+
+       offset = nskb->data - skb->data;
+
+       nskb->h.raw = skb->h.raw + offset;
+       nskb->nh.raw = skb->nh.raw + offset;
+       nskb->mac.raw = skb->mac.raw + offset;
+
+       return nskb;
+
+ err_free:
+       kfree_skb(nskb);
+ err:
+       return NULL;
+}
+
+static inline int netbk_max_required_rx_slots(netif_t *netif)
+{
+       if (netif->features & (NETIF_F_SG|NETIF_F_TSO))
+               return MAX_SKB_FRAGS + 2; /* header + extra_info + frags */
+       return 1; /* all in one */
+}
+
+static inline int netbk_queue_full(netif_t *netif)
+{
+       RING_IDX peek   = netif->rx_req_cons_peek;
+       RING_IDX needed = netbk_max_required_rx_slots(netif);
+
+       return ((netif->rx.sring->req_prod - peek) < needed) ||
+              ((netif->rx.rsp_prod_pvt + NET_RX_RING_SIZE - peek) < needed);
+}
+
+int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+
+       BUG_ON(skb->dev != dev);
+
+       /* Drop the packet if the target domain has no receive buffers. */
+       if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev)))
+               goto drop;
+
+       if (unlikely(netbk_queue_full(netif))) {
+               /* Not a BUG_ON() -- misbehaving netfront can trigger this. */
+               if (netbk_can_queue(dev))
+                       DPRINTK("Queue full but not stopped!\n");
+               goto drop;
+       }
+
+       /*
+        * We do not copy the packet unless:
+        *  1. The data is shared; or
+        *  2. The data is not allocated from our special cache.
+        *  3. The data is fragmented.
+        */
+       if (skb_cloned(skb) || skb_is_nonlinear(skb) || !is_xen_skb(skb)) {
+               struct sk_buff *nskb = netbk_copy_skb(skb);
+               if ( unlikely(nskb == NULL) )
+                       goto drop;
+               /* Copy only the header fields we use in this driver. */
+               nskb->dev = skb->dev;
+               nskb->ip_summed = skb->ip_summed;
+               nskb->proto_data_valid = skb->proto_data_valid;
+               dev_kfree_skb(skb);
+               skb = nskb;
+       }
+
+       netif->rx_req_cons_peek += skb_shinfo(skb)->nr_frags + 1 +
+                                  !!skb_shinfo(skb)->gso_size;
+       netif_get(netif);
+
+       if (netbk_can_queue(dev) && netbk_queue_full(netif)) {
+               netif->rx.sring->req_event = netif->rx_req_cons_peek +
+                       netbk_max_required_rx_slots(netif);
+               mb(); /* request notification /then/ check & stop the queue */
+               if (netbk_queue_full(netif))
+                       netif_stop_queue(dev);
+       }
+
+       skb_queue_tail(&rx_queue, skb);
+       tasklet_schedule(&net_rx_tasklet);
+
+       return 0;
+
+ drop:
+       netif->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
+}
+
+#if 0
+static void xen_network_done_notify(void)
+{
+       static struct net_device *eth0_dev = NULL;
+       if (unlikely(eth0_dev == NULL))
+               eth0_dev = __dev_get_by_name("eth0");
+       netif_rx_schedule(eth0_dev);
+}
+/* 
+ * Add following to poll() function in NAPI driver (Tigon3 is example):
+ *  if ( xen_network_done() )
+ *      tg3_enable_ints(tp); 
+ */
+int xen_network_done(void)
+{
+       return skb_queue_empty(&rx_queue);
+}
+#endif
+
+static u16 netbk_gop_frag(netif_t *netif, struct page *page, int count, int i)
+{
+       multicall_entry_t *mcl = rx_mcl + count;
+       mmu_update_t *mmu = rx_mmu + count;
+       gnttab_transfer_t *gop = grant_rx_op + count;
+       netif_rx_request_t *req;
+       unsigned long old_mfn, new_mfn;
+
+       old_mfn = virt_to_mfn(page_address(page));
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               new_mfn = alloc_mfn();
+
+               /*
+                * Set the new P2M table entry before reassigning
+                * the old data page. Heed the comment in
+                * pgtable-2level.h:pte_page(). :-)
+                */
+               set_phys_to_machine(page_to_pfn(page), new_mfn);
+
+               MULTI_update_va_mapping(mcl, (unsigned long)page_address(page),
+                                       pfn_pte_ma(new_mfn, PAGE_KERNEL), 0);
+
+               mmu->ptr = ((maddr_t)new_mfn << PAGE_SHIFT) |
+                       MMU_MACHPHYS_UPDATE;
+               mmu->val = page_to_pfn(page);
+       }
+
+       req = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons + i);
+       gop->mfn = old_mfn;
+       gop->domid = netif->domid;
+       gop->ref = req->gref;
+       return req->id;
+}
+
+static void netbk_gop_skb(struct sk_buff *skb, struct netbk_rx_meta *meta,
+                         int count)
+{
+       netif_t *netif = netdev_priv(skb->dev);
+       int nr_frags = skb_shinfo(skb)->nr_frags;
+       int i;
+       int extra;
+
+       meta[count].frag.page_offset = skb_shinfo(skb)->gso_type;
+       meta[count].frag.size = skb_shinfo(skb)->gso_size;
+       extra = !!meta[count].frag.size + 1;
+
+       for (i = 0; i < nr_frags; i++) {
+               meta[++count].frag = skb_shinfo(skb)->frags[i];
+               meta[count].id = netbk_gop_frag(netif, meta[count].frag.page,
+                                               count, i + extra);
+       }
+
+       /*
+        * This must occur at the end to ensure that we don't trash
+        * skb_shinfo until we're done.
+        */
+       meta[count - nr_frags].id = netbk_gop_frag(netif,
+                                                  virt_to_page(skb->data),
+                                                  count - nr_frags, 0);
+       netif->rx.req_cons += nr_frags + extra;
+}
+
+static inline void netbk_free_pages(int nr_frags, struct netbk_rx_meta *meta)
+{
+       int i;
+
+       for (i = 0; i < nr_frags; i++)
+               put_page(meta[i].frag.page);
+}
+
+static int netbk_check_gop(int nr_frags, domid_t domid, int count)
+{
+       multicall_entry_t *mcl = rx_mcl + count;
+       gnttab_transfer_t *gop = grant_rx_op + count;
+       int status = NETIF_RSP_OKAY;
+       int i;
+
+       for (i = 0; i <= nr_frags; i++) {
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* The update_va_mapping() must not fail. */
+                       BUG_ON(mcl->result != 0);
+                       mcl++;
+               }
+
+               /* Check the reassignment error code. */
+               if (gop->status != 0) { 
+                       DPRINTK("Bad status %d from grant transfer to DOM%u\n",
+                               gop->status, domid);
+                       /*
+                        * Page no longer belongs to us unless GNTST_bad_page,
+                        * but that should be a fatal error anyway.
+                        */
+                       BUG_ON(gop->status == GNTST_bad_page);
+                       status = NETIF_RSP_ERROR; 
+               }
+               gop++;
+       }
+
+       return status;
+}
+
+static void netbk_add_frag_responses(netif_t *netif, int status,
+                                    struct netbk_rx_meta *meta, int nr_frags)
+{
+       int i;
+
+       for (i = 0; i < nr_frags; i++) {
+               int id = meta[i].id;
+               int flags = (i == nr_frags - 1) ? 0 : NETRXF_more_data;
+
+               make_rx_response(netif, id, status, meta[i].frag.page_offset,
+                                meta[i].frag.size, flags);
+       }
+}
+
+static void net_rx_action(unsigned long unused)
+{
+       netif_t *netif = NULL; 
+       s8 status;
+       u16 id, irq, flags;
+       netif_rx_response_t *resp;
+       struct netif_extra_info *extra;
+       multicall_entry_t *mcl;
+       struct sk_buff_head rxq;
+       struct sk_buff *skb;
+       int notify_nr = 0;
+       int ret;
+       int nr_frags;
+       int count;
+
+       /*
+        * Putting hundreds of bytes on the stack is considered rude.
+        * Static works because a tasklet can only be on one CPU at any time.
+        */
+       static u16 notify_list[NET_RX_RING_SIZE];
+       static struct netbk_rx_meta meta[NET_RX_RING_SIZE];
+
+       skb_queue_head_init(&rxq);
+
+       count = 0;
+
+       while ((skb = skb_dequeue(&rx_queue)) != NULL) {
+               nr_frags = skb_shinfo(skb)->nr_frags;
+               *(int *)skb->cb = nr_frags;
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap) &&
+                   check_mfn(nr_frags + 1)) {
+                       /* Memory squeeze? Back off for an arbitrary while. */
+                       if ( net_ratelimit() )
+                               WPRINTK("Memory squeeze in netback "
+                                       "driver.\n");
+                       mod_timer(&net_timer, jiffies + HZ);
+                       skb_queue_head(&rx_queue, skb);
+                       break;
+               }
+
+               netbk_gop_skb(skb, meta, count);
+
+               count += nr_frags + 1;
+
+               __skb_queue_tail(&rxq, skb);
+
+               /* Filled the batch queue? */
+               if (count + MAX_SKB_FRAGS >= NET_RX_RING_SIZE)
+                       break;
+       }
+
+       if (!count)
+               return;
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               mcl = rx_mcl + count;
+
+               mcl[-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)rx_mmu;
+               mcl->args[1] = count;
+               mcl->args[2] = 0;
+               mcl->args[3] = DOMID_SELF;
+
+               ret = HYPERVISOR_multicall(rx_mcl, count + 1);
+               BUG_ON(ret != 0);
+       }
+
+       ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, count);
+       BUG_ON(ret != 0);
+
+       count = 0;
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               nr_frags = *(int *)skb->cb;
+
+               atomic_set(&(skb_shinfo(skb)->dataref), 1);
+               skb_shinfo(skb)->nr_frags = 0;
+               skb_shinfo(skb)->frag_list = NULL;
+
+               netif = netdev_priv(skb->dev);
+               netif->stats.tx_bytes += skb->len;
+               netif->stats.tx_packets++;
+
+               netbk_free_pages(nr_frags, meta + count + 1);
+               status = netbk_check_gop(nr_frags, netif->domid, count);
+
+               id = meta[count].id;
+               flags = nr_frags ? NETRXF_more_data : 0;
+
+               if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
+                       flags |= NETRXF_csum_blank | NETRXF_data_validated;
+               else if (skb->proto_data_valid) /* remote but checksummed? */
+                       flags |= NETRXF_data_validated;
+
+               resp = make_rx_response(netif, id, status,
+                                       offset_in_page(skb->data),
+                                       skb_headlen(skb), flags);
+
+               extra = NULL;
+
+               if (meta[count].frag.size) {
+                       struct netif_extra_info *gso =
+                               (struct netif_extra_info *)
+                               RING_GET_RESPONSE(&netif->rx,
+                                                 netif->rx.rsp_prod_pvt++);
+
+                       if (extra)
+                               extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+                       else
+                               resp->flags |= NETRXF_extra_info;
+
+                       gso->u.gso.size = meta[count].frag.size;
+                       gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+                       gso->u.gso.pad = 0;
+                       gso->u.gso.features = 0;
+
+                       gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+                       gso->flags = 0;
+                       extra = gso;
+               }
+
+               netbk_add_frag_responses(netif, status, meta + count + 1,
+                                        nr_frags);
+
+               RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, ret);
+               irq = netif->irq;
+               if (ret && !rx_notify[irq]) {
+                       rx_notify[irq] = 1;
+                       notify_list[notify_nr++] = irq;
+               }
+
+               if (netif_queue_stopped(netif->dev) &&
+                   !netbk_queue_full(netif))
+                       netif_wake_queue(netif->dev);
+
+               netif_put(netif);
+               dev_kfree_skb(skb);
+               count += nr_frags + 1;
+       }
+
+       while (notify_nr != 0) {
+               irq = notify_list[--notify_nr];
+               rx_notify[irq] = 0;
+               notify_remote_via_irq(irq);
+       }
+
+       /* More work to do? */
+       if (!skb_queue_empty(&rx_queue) && !timer_pending(&net_timer))
+               tasklet_schedule(&net_rx_tasklet);
+#if 0
+       else
+               xen_network_done_notify();
+#endif
+}
+
+static void net_alarm(unsigned long unused)
+{
+       tasklet_schedule(&net_rx_tasklet);
+}
+
+struct net_device_stats *netif_be_get_stats(struct net_device *dev)
+{
+       netif_t *netif = netdev_priv(dev);
+       return &netif->stats;
+}
+
+static int __on_net_schedule_list(netif_t *netif)
+{
+       return netif->list.next != NULL;
+}
+
+static void remove_from_net_schedule_list(netif_t *netif)
+{
+       spin_lock_irq(&net_schedule_list_lock);
+       if (likely(__on_net_schedule_list(netif))) {
+               list_del(&netif->list);
+               netif->list.next = NULL;
+               netif_put(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
+}
+
+static void add_to_net_schedule_list_tail(netif_t *netif)
+{
+       if (__on_net_schedule_list(netif))
+               return;
+
+       spin_lock_irq(&net_schedule_list_lock);
+       if (!__on_net_schedule_list(netif) &&
+           likely(netif_running(netif->dev) &&
+                  netif_carrier_ok(netif->dev))) {
+               list_add_tail(&netif->list, &net_schedule_list);
+               netif_get(netif);
+       }
+       spin_unlock_irq(&net_schedule_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+void netif_schedule_work(netif_t *netif)
+{
+       int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+       more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+       RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+       if (more_to_do) {
+               add_to_net_schedule_list_tail(netif);
+               maybe_schedule_tx_action();
+       }
+}
+
+void netif_deschedule_work(netif_t *netif)
+{
+       remove_from_net_schedule_list(netif);
+}
+
+
+static void tx_credit_callback(unsigned long data)
+{
+       netif_t *netif = (netif_t *)data;
+       netif->remaining_credit = netif->credit_bytes;
+       netif_schedule_work(netif);
+}
+
+inline static void net_tx_action_dealloc(void)
+{
+       gnttab_unmap_grant_ref_t *gop;
+       u16 pending_idx;
+       PEND_RING_IDX dc, dp;
+       netif_t *netif;
+       int ret;
+
+       dc = dealloc_cons;
+       dp = dealloc_prod;
+
+       /* Ensure we see all indexes enqueued by netif_idx_release(). */
+       smp_rmb();
+
+       /*
+        * Free up any grants we have finished using
+        */
+       gop = tx_unmap_ops;
+       while (dc != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+               gnttab_set_unmap_op(gop, MMAP_VADDR(pending_idx),
+                                   GNTMAP_host_map,
+                                   grant_tx_handle[pending_idx]);
+               gop++;
+       }
+       ret = HYPERVISOR_grant_table_op(
+               GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+       BUG_ON(ret);
+
+       while (dealloc_cons != dp) {
+               pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+               netif = pending_tx_info[pending_idx].netif;
+
+               make_tx_response(netif, &pending_tx_info[pending_idx].req, 
+                                NETIF_RSP_OKAY);
+
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+               netif_put(netif);
+       }
+}
+
+static void netbk_tx_err(netif_t *netif, netif_tx_request_t *txp, RING_IDX end)
+{
+       RING_IDX cons = netif->tx.req_cons;
+
+       do {
+               make_tx_response(netif, txp, NETIF_RSP_ERROR);
+               if (cons >= end)
+                       break;
+               txp = RING_GET_REQUEST(&netif->tx, cons++);
+       } while (1);
+       netif->tx.req_cons = cons;
+       netif_schedule_work(netif);
+       netif_put(netif);
+}
+
+static int netbk_count_requests(netif_t *netif, netif_tx_request_t *txp,
+                               int work_to_do)
+{
+       netif_tx_request_t *first = txp;
+       RING_IDX cons = netif->tx.req_cons;
+       int frags = 0;
+
+       while (txp->flags & NETTXF_more_data) {
+               if (frags >= work_to_do) {
+                       DPRINTK("Need more frags\n");
+                       return -frags;
+               }
+
+               txp = RING_GET_REQUEST(&netif->tx, cons + frags);
+               if (txp->size > first->size) {
+                       DPRINTK("Frags galore\n");
+                       return -frags;
+               }
+
+               first->size -= txp->size;
+               frags++;
+
+               if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) {
+                       DPRINTK("txp->offset: %x, size: %u\n",
+                               txp->offset, txp->size);
+                       return -frags;
+               }
+       }
+
+       return frags;
+}
+
+static gnttab_map_grant_ref_t *netbk_get_requests(netif_t *netif,
+                                                 struct sk_buff *skb,
+                                                 gnttab_map_grant_ref_t *mop)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+       netif_tx_request_t *txp;
+       unsigned long pending_idx = *((u16 *)skb->data);
+       RING_IDX cons = netif->tx.req_cons;
+       int i, start;
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < shinfo->nr_frags; i++) {
+               txp = RING_GET_REQUEST(&netif->tx, cons++);
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons++)];
+
+               gnttab_set_map_op(mop++, MMAP_VADDR(pending_idx),
+                                 GNTMAP_host_map | GNTMAP_readonly,
+                                 txp->gref, netif->domid);
+
+               memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
+               netif_get(netif);
+               pending_tx_info[pending_idx].netif = netif;
+               frags[i].page = (void *)pending_idx;
+       }
+
+       return mop;
+}
+
+static int netbk_tx_check_mop(struct sk_buff *skb,
+                              gnttab_map_grant_ref_t **mopp)
+{
+       gnttab_map_grant_ref_t *mop = *mopp;
+       int pending_idx = *((u16 *)skb->data);
+       netif_t *netif = pending_tx_info[pending_idx].netif;
+       netif_tx_request_t *txp;
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i, err, start;
+
+       /* Check status of header. */
+       err = mop->status;
+       if (unlikely(err)) {
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+       } else {
+               set_phys_to_machine(
+                       __pa(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT,
+                       FOREIGN_FRAME(mop->dev_bus_addr >> PAGE_SHIFT));
+               grant_tx_handle[pending_idx] = mop->handle;
+       }
+
+       /* Skip first skb fragment if it is on same page as header fragment. */
+       start = ((unsigned long)shinfo->frags[0].page == pending_idx);
+
+       for (i = start; i < nr_frags; i++) {
+               int j, newerr;
+
+               pending_idx = (unsigned long)shinfo->frags[i].page;
+
+               /* Check error status: if okay then remember grant handle. */
+               newerr = (++mop)->status;
+               if (likely(!newerr)) {
+                       set_phys_to_machine(
+                               __pa(MMAP_VADDR(pending_idx))>>PAGE_SHIFT,
+                               FOREIGN_FRAME(mop->dev_bus_addr>>PAGE_SHIFT));
+                       grant_tx_handle[pending_idx] = mop->handle;
+                       /* Had a previous error? Invalidate this fragment. */
+                       if (unlikely(err))
+                               netif_idx_release(pending_idx);
+                       continue;
+               }
+
+               /* Error on this fragment: respond to client with an error. */
+               txp = &pending_tx_info[pending_idx].req;
+               make_tx_response(netif, txp, NETIF_RSP_ERROR);
+               pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+               netif_put(netif);
+
+               /* Not the first error? Preceding frags already invalidated. */
+               if (err)
+                       continue;
+
+               /* First error: invalidate header and preceding fragments. */
+               pending_idx = *((u16 *)skb->data);
+               netif_idx_release(pending_idx);
+               for (j = start; j < i; j++) {
+                       pending_idx = (unsigned long)shinfo->frags[i].page;
+                       netif_idx_release(pending_idx);
+               }
+
+               /* Remember the error: invalidate all subsequent fragments. */
+               err = newerr;
+       }
+
+       *mopp = mop + 1;
+       return err;
+}
+
+static void netbk_fill_frags(struct sk_buff *skb)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       int i;
+
+       for (i = 0; i < nr_frags; i++) {
+               skb_frag_t *frag = shinfo->frags + i;
+               netif_tx_request_t *txp;
+               unsigned long pending_idx;
+
+               pending_idx = (unsigned long)frag->page;
+               txp = &pending_tx_info[pending_idx].req;
+               frag->page = virt_to_page(MMAP_VADDR(pending_idx));
+               frag->size = txp->size;
+               frag->page_offset = txp->offset;
+
+               skb->len += txp->size;
+               skb->data_len += txp->size;
+               skb->truesize += txp->size;
+       }
+}
+
+int netbk_get_extras(netif_t *netif, struct netif_extra_info *extras,
+                    int work_to_do)
+{
+       struct netif_extra_info *extra;
+       RING_IDX cons = netif->tx.req_cons;
+
+       do {
+               if (unlikely(work_to_do-- <= 0)) {
+                       DPRINTK("Missing extra info\n");
+                       return -EBADR;
+               }
+
+               extra = (struct netif_extra_info *)
+                       RING_GET_REQUEST(&netif->tx, cons);
+               if (unlikely(!extra->type ||
+                            extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+                       netif->tx.req_cons = ++cons;
+                       DPRINTK("Invalid extra type: %d\n", extra->type);
+                       return -EINVAL;
+               }
+
+               memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+               netif->tx.req_cons = ++cons;
+       } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+       return work_to_do;
+}
+
+static int netbk_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+{
+       if (!gso->u.gso.size) {
+               DPRINTK("GSO size must not be zero.\n");
+               return -EINVAL;
+       }
+
+       /* Currently only TCPv4 S.O. is supported. */
+       if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+               DPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+               return -EINVAL;
+       }
+
+       skb_shinfo(skb)->gso_size = gso->u.gso.size;
+       skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+       /* Header must be checked, and gso_segs computed. */
+       skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+       skb_shinfo(skb)->gso_segs = 0;
+
+       return 0;
+}
+
+/* Called after netfront has transmitted */
+static void net_tx_action(unsigned long unused)
+{
+       struct list_head *ent;
+       struct sk_buff *skb;
+       netif_t *netif;
+       netif_tx_request_t txreq;
+       struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+       u16 pending_idx;
+       RING_IDX i;
+       gnttab_map_grant_ref_t *mop;
+       unsigned int data_len;
+       int ret, work_to_do;
+
+       if (dealloc_cons != dealloc_prod)
+               net_tx_action_dealloc();
+
+       mop = tx_map_ops;
+       while (((NR_PENDING_REQS + MAX_SKB_FRAGS) < MAX_PENDING_REQS) &&
+               !list_empty(&net_schedule_list)) {
+               /* Get a netif from the list with work to do. */
+               ent = net_schedule_list.next;
+               netif = list_entry(ent, netif_t, list);
+               netif_get(netif);
+               remove_from_net_schedule_list(netif);
+
+               RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+               if (!work_to_do) {
+                       netif_put(netif);
+                       continue;
+               }
+
+               i = netif->tx.req_cons;
+               rmb(); /* Ensure that we see the request before we copy it. */
+               memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+               /* Credit-based scheduling. */
+               if (txreq.size > netif->remaining_credit) {
+                       unsigned long now = jiffies;
+                       unsigned long next_credit = 
+                               netif->credit_timeout.expires +
+                               msecs_to_jiffies(netif->credit_usec / 1000);
+
+                       /* Timer could already be pending in rare cases. */
+                       if (timer_pending(&netif->credit_timeout))
+                               break;
+
+                       /* Passed the point where we can replenish credit? */
+                       if (time_after_eq(now, next_credit)) {
+                               netif->credit_timeout.expires = now;
+                               netif->remaining_credit = netif->credit_bytes;
+                       }
+
+                       /* Still too big to send right now? Set a callback. */
+                       if (txreq.size > netif->remaining_credit) {
+                               netif->remaining_credit = 0;
+                               netif->credit_timeout.data     =
+                                       (unsigned long)netif;
+                               netif->credit_timeout.function =
+                                       tx_credit_callback;
+                               __mod_timer(&netif->credit_timeout,
+                                           next_credit);
+                               break;
+                       }
+               }
+               netif->remaining_credit -= txreq.size;
+
+               work_to_do--;
+               netif->tx.req_cons = ++i;
+
+               memset(extras, 0, sizeof(extras));
+               if (txreq.flags & NETTXF_extra_info) {
+                       work_to_do = netbk_get_extras(netif, extras,
+                                                     work_to_do);
+                       i = netif->tx.req_cons;
+                       if (unlikely(work_to_do < 0)) {
+                               netbk_tx_err(netif, &txreq, i);
+                               continue;
+                       }
+               }
+
+               ret = netbk_count_requests(netif, &txreq, work_to_do);
+               if (unlikely(ret < 0)) {
+                       netbk_tx_err(netif, &txreq, i - ret);
+                       continue;
+               }
+               i += ret;
+
+               if (unlikely(ret > MAX_SKB_FRAGS)) {
+                       DPRINTK("Too many frags\n");
+                       netbk_tx_err(netif, &txreq, i);
+                       continue;
+               }
+
+               if (unlikely(txreq.size < ETH_HLEN)) {
+                       DPRINTK("Bad packet size: %d\n", txreq.size);
+                       netbk_tx_err(netif, &txreq, i);
+                       continue; 
+               }
+
+               /* No crossing a page as the payload mustn't fragment. */
+               if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) {
+                       DPRINTK("txreq.offset: %x, size: %u, end: %lu\n", 
+                               txreq.offset, txreq.size, 
+                               (txreq.offset &~PAGE_MASK) + txreq.size);
+                       netbk_tx_err(netif, &txreq, i);
+                       continue;
+               }
+
+               pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+               data_len = (txreq.size > PKT_PROT_LEN &&
+                           ret < MAX_SKB_FRAGS) ?
+                       PKT_PROT_LEN : txreq.size;
+
+               skb = alloc_skb(data_len+16, GFP_ATOMIC);
+               if (unlikely(skb == NULL)) {
+                       DPRINTK("Can't allocate a skb in start_xmit.\n");
+                       netbk_tx_err(netif, &txreq, i);
+                       break;
+               }
+
+               /* Packets passed to netif_rx() must have some headroom. */
+               skb_reserve(skb, 16);
+
+               if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+                       struct netif_extra_info *gso;
+                       gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+                       if (netbk_set_skb_gso(skb, gso)) {
+                               kfree_skb(skb);
+                               netbk_tx_err(netif, &txreq, i);
+                               continue;
+                       }
+               }
+
+               gnttab_set_map_op(mop, MMAP_VADDR(pending_idx),
+                                 GNTMAP_host_map | GNTMAP_readonly,
+                                 txreq.gref, netif->domid);
+               mop++;
+
+               memcpy(&pending_tx_info[pending_idx].req,
+                      &txreq, sizeof(txreq));
+               pending_tx_info[pending_idx].netif = netif;
+               *((u16 *)skb->data) = pending_idx;
+
+               __skb_put(skb, data_len);
+
+               skb_shinfo(skb)->nr_frags = ret;
+               if (data_len < txreq.size) {
+                       skb_shinfo(skb)->nr_frags++;
+                       skb_shinfo(skb)->frags[0].page =
+                               (void *)(unsigned long)pending_idx;
+               }
+
+               __skb_queue_tail(&tx_queue, skb);
+
+               pending_cons++;
+
+               mop = netbk_get_requests(netif, skb, mop);
+
+               netif->tx.req_cons = i;
+               netif_schedule_work(netif);
+
+               if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+                       break;
+       }
+
+       if (mop == tx_map_ops)
+               return;
+
+       ret = HYPERVISOR_grant_table_op(
+               GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+       BUG_ON(ret);
+
+       mop = tx_map_ops;
+       while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
+               netif_tx_request_t *txp;
+
+               pending_idx = *((u16 *)skb->data);
+               netif       = pending_tx_info[pending_idx].netif;
+               txp         = &pending_tx_info[pending_idx].req;
+
+               /* Check the remap error code. */
+               if (unlikely(netbk_tx_check_mop(skb, &mop))) {
+                       printk(KERN_ALERT "#### netback grant fails\n");
+                       skb_shinfo(skb)->nr_frags = 0;
+                       kfree_skb(skb);
+                       continue;
+               }
+
+               data_len = skb->len;
+               memcpy(skb->data, 
+                      (void *)(MMAP_VADDR(pending_idx)|txp->offset),
+                      data_len);
+               if (data_len < txp->size) {
+                       /* Append the packet payload as a fragment. */
+                       txp->offset += data_len;
+                       txp->size -= data_len;
+               } else {
+                       /* Schedule a response immediately. */
+                       netif_idx_release(pending_idx);
+               }
+
+               /*
+                * Old frontends do not assert data_validated but we
+                * can infer it from csum_blank so test both flags.
+                */
+               if (txp->flags & (NETTXF_data_validated|NETTXF_csum_blank)) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       skb->proto_data_valid = 1;
+               } else {
+                       skb->ip_summed = CHECKSUM_NONE;
+                       skb->proto_data_valid = 0;
+               }
+               skb->proto_csum_blank = !!(txp->flags & NETTXF_csum_blank);
+
+               netbk_fill_frags(skb);
+
+               skb->dev      = netif->dev;
+               skb->protocol = eth_type_trans(skb, skb->dev);
+
+               netif->stats.rx_bytes += skb->len;
+               netif->stats.rx_packets++;
+
+               netif_rx(skb);
+               netif->dev->last_rx = jiffies;
+       }
+}
+
+static void netif_idx_release(u16 pending_idx)
+{
+       static DEFINE_SPINLOCK(_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&_lock, flags);
+       dealloc_ring[MASK_PEND_IDX(dealloc_prod)] = pending_idx;
+       /* Sync with net_tx_action_dealloc: insert idx /then/ incr producer. */
+       smp_wmb();
+       dealloc_prod++;
+       spin_unlock_irqrestore(&_lock, flags);
+
+       tasklet_schedule(&net_tx_tasklet);
+}
+
+static void netif_page_release(struct page *page)
+{
+       u16 pending_idx = page - virt_to_page(mmap_vstart);
+
+       /* Ready for next use. */
+       init_page_count(page);
+
+       netif_idx_release(pending_idx);
+}
+
+irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
+{
+       netif_t *netif = dev_id;
+
+       add_to_net_schedule_list_tail(netif);
+       maybe_schedule_tx_action();
+
+       if (netif_queue_stopped(netif->dev) && !netbk_queue_full(netif))
+               netif_wake_queue(netif->dev);
+
+       return IRQ_HANDLED;
+}
+
+static void make_tx_response(netif_t *netif, 
+                            netif_tx_request_t *txp,
+                            s8       st)
+{
+       RING_IDX i = netif->tx.rsp_prod_pvt;
+       netif_tx_response_t *resp;
+       int notify;
+
+       resp = RING_GET_RESPONSE(&netif->tx, i);
+       resp->id     = txp->id;
+       resp->status = st;
+
+       if (txp->flags & NETTXF_extra_info)
+               RING_GET_RESPONSE(&netif->tx, ++i)->status = NETIF_RSP_NULL;
+
+       netif->tx.rsp_prod_pvt = ++i;
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+       if (notify)
+               notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+       if (i == netif->tx.req_cons) {
+               int more_to_do;
+               RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+               if (more_to_do)
+                       add_to_net_schedule_list_tail(netif);
+       }
+#endif
+}
+
+static netif_rx_response_t *make_rx_response(netif_t *netif, 
+                                            u16      id, 
+                                            s8       st,
+                                            u16      offset,
+                                            u16      size,
+                                            u16      flags)
+{
+       RING_IDX i = netif->rx.rsp_prod_pvt;
+       netif_rx_response_t *resp;
+
+       resp = RING_GET_RESPONSE(&netif->rx, i);
+       resp->offset     = offset;
+       resp->flags      = flags;
+       resp->id         = id;
+       resp->status     = (s16)size;
+       if (st < 0)
+               resp->status = (s16)st;
+
+       netif->rx.rsp_prod_pvt = ++i;
+
+       return resp;
+}
+
+#ifdef NETBE_DEBUG_INTERRUPT
+static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
+{
+       struct list_head *ent;
+       netif_t *netif;
+       int i = 0;
+
+       printk(KERN_ALERT "netif_schedule_list:\n");
+       spin_lock_irq(&net_schedule_list_lock);
+
+       list_for_each (ent, &net_schedule_list) {
+               netif = list_entry(ent, netif_t, list);
+               printk(KERN_ALERT " %d: private(rx_req_cons=%08x "
+                      "rx_resp_prod=%08x\n",
+                      i, netif->rx.req_cons, netif->rx.rsp_prod_pvt);
+               printk(KERN_ALERT "   tx_req_cons=%08x tx_resp_prod=%08x)\n",
+                      netif->tx.req_cons, netif->tx.rsp_prod_pvt);
+               printk(KERN_ALERT "   shared(rx_req_prod=%08x "
+                      "rx_resp_prod=%08x\n",
+                      netif->rx.sring->req_prod, netif->rx.sring->rsp_prod);
+               printk(KERN_ALERT "   rx_event=%08x tx_req_prod=%08x\n",
+                      netif->rx.sring->rsp_event, netif->tx.sring->req_prod);
+               printk(KERN_ALERT "   tx_resp_prod=%08x, tx_event=%08x)\n",
+                      netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event);
+               i++;
+       }
+
+       spin_unlock_irq(&net_schedule_list_lock);
+       printk(KERN_ALERT " ** End of netif_schedule_list **\n");
+
+       return IRQ_HANDLED;
+}
+#endif
+
+static int __init netback_init(void)
+{
+       int i;
+       struct page *page;
+
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       /* We can increase reservation by this much in net_rx_action(). */
+       balloon_update_driver_allowance(NET_RX_RING_SIZE);
+
+       skb_queue_head_init(&rx_queue);
+       skb_queue_head_init(&tx_queue);
+
+       init_timer(&net_timer);
+       net_timer.data = 0;
+       net_timer.function = net_alarm;
+    
+       page = balloon_alloc_empty_page_range(MAX_PENDING_REQS);
+       if (page == NULL)
+               return -ENOMEM;
+
+       mmap_vstart = (unsigned long)pfn_to_kaddr(page_to_pfn(page));
+
+       for (i = 0; i < MAX_PENDING_REQS; i++) {
+               page = virt_to_page(MMAP_VADDR(i));
+               init_page_count(page);
+               SetPageForeign(page, netif_page_release);
+       }
+
+       pending_cons = 0;
+       pending_prod = MAX_PENDING_REQS;
+       for (i = 0; i < MAX_PENDING_REQS; i++)
+               pending_ring[i] = i;
+
+       spin_lock_init(&net_schedule_list_lock);
+       INIT_LIST_HEAD(&net_schedule_list);
+
+       netif_xenbus_init();
+
+#ifdef NETBE_DEBUG_INTERRUPT
+       (void)bind_virq_to_irqhandler(
+               VIRQ_DEBUG,
+               0,
+               netif_be_dbg,
+               SA_SHIRQ, 
+               "net-be-dbg",
+               &netif_be_dbg);
+#endif
+
+       return 0;
+}
+
+module_init(netback_init);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/netfront/Kconfig b/drivers/xen/netfront/Kconfig
new file mode 100644 (file)
index 0000000..334e6c3
--- /dev/null
@@ -0,0 +1,6 @@
+
+config XENNET
+       tristate "Xen network driver"
+       depends on NETDEVICES && ARCH_XEN
+       help
+         Network driver for Xen
diff --git a/drivers/xen/netfront/Makefile b/drivers/xen/netfront/Makefile
new file mode 100644 (file)
index 0000000..dc22829
--- /dev/null
@@ -0,0 +1,4 @@
+
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)      := xennet.o
+
+xennet-objs := netfront.o
diff --git a/drivers/xen/netfront/netfront.c b/drivers/xen/netfront/netfront.c
new file mode 100644 (file)
index 0000000..ef6e134
--- /dev/null
@@ -0,0 +1,1841 @@
+/******************************************************************************
+ * Virtual network driver for conversing with remote driver backends.
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, XenSource Ltd
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/ethtool.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/io.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/arp.h>
+#include <net/route.h>
+#include <asm/uaccess.h>
+#include <xen/evtchn.h>
+#include <xen/xenbus.h>
+#include <xen/interface/io/netif.h>
+#include <xen/interface/memory.h>
+#include <xen/balloon.h>
+#include <asm/page.h>
+#include <asm/uaccess.h>
+#include <xen/interface/grant_table.h>
+#include <xen/gnttab.h>
+
+#define RX_COPY_THRESHOLD 256
+
+#define GRANT_INVALID_REF      0
+
+#define NET_TX_RING_SIZE __RING_SIZE((struct netif_tx_sring *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((struct netif_rx_sring *)0, PAGE_SIZE)
+
+struct netfront_info {
+       struct list_head list;
+       struct net_device *netdev;
+
+       struct net_device_stats stats;
+
+       struct netif_tx_front_ring tx;
+       struct netif_rx_front_ring rx;
+
+       spinlock_t   tx_lock;
+       spinlock_t   rx_lock;
+
+       unsigned int handle;
+       unsigned int evtchn, irq;
+
+       /* Receive-ring batched refills. */
+#define RX_MIN_TARGET 8
+#define RX_DFL_MIN_TARGET 64
+#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+       unsigned rx_min_target, rx_max_target, rx_target;
+       struct sk_buff_head rx_batch;
+
+       struct timer_list rx_refill_timer;
+
+       /*
+        * {tx,rx}_skbs store outstanding skbuffs. The first entry in tx_skbs
+        * is an index into a chain of free entries.
+        */
+       struct sk_buff *tx_skbs[NET_TX_RING_SIZE+1];
+       struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
+
+#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
+       grant_ref_t gref_tx_head;
+       grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1];
+       grant_ref_t gref_rx_head;
+       grant_ref_t grant_rx_ref[NET_TX_RING_SIZE];
+
+       struct xenbus_device *xbdev;
+       int tx_ring_ref;
+       int rx_ring_ref;
+       u8 mac[ETH_ALEN];
+
+       unsigned long rx_pfn_array[NET_RX_RING_SIZE];
+       struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
+       struct mmu_update rx_mmu[NET_RX_RING_SIZE];
+};
+
+struct netfront_rx_info {
+       struct netif_rx_response rx;
+       struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static inline void add_id_to_freelist(struct sk_buff **list, unsigned short id)
+{
+       list[id] = list[0];
+       list[0]  = (void *)(unsigned long)id;
+}
+
+static inline unsigned short get_id_from_freelist(struct sk_buff **list)
+{
+       unsigned int id = (unsigned int)(unsigned long)list[0];
+       list[0] = list[id];
+       return id;
+}
+
+static inline int xennet_rxidx(RING_IDX idx)
+{
+       return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static inline struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
+                                               RING_IDX ri)
+{
+       int i = xennet_rxidx(ri);
+       struct sk_buff *skb = np->rx_skbs[i];
+       np->rx_skbs[i] = NULL;
+       return skb;
+}
+
+static inline grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
+                                           RING_IDX ri)
+{
+       int i = xennet_rxidx(ri);
+       grant_ref_t ref = np->grant_rx_ref[i];
+       np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       return ref;
+}
+
+#define DPRINTK(fmt, args...)                          \
+       pr_debug("netfront (%s:%d) " fmt,               \
+                __FUNCTION__, __LINE__, ##args)
+#define IPRINTK(fmt, args...)                          \
+       printk(KERN_INFO "netfront: " fmt, ##args)
+#define WPRINTK(fmt, args...)                          \
+       printk(KERN_WARNING "netfront: " fmt, ##args)
+
+static int talk_to_backend(struct xenbus_device *, struct netfront_info *);
+static int setup_device(struct xenbus_device *, struct netfront_info *);
+static struct net_device *create_netdev(int, struct xenbus_device *);
+
+static void netfront_closing(struct xenbus_device *);
+
+static void end_access(int, void *);
+static void netif_disconnect_backend(struct netfront_info *);
+static void close_netdev(struct netfront_info *);
+static void netif_free(struct netfront_info *);
+
+static void network_connect(struct net_device *);
+static void network_tx_buf_gc(struct net_device *);
+static void network_alloc_rx_buffers(struct net_device *);
+static int send_fake_arp(struct net_device *);
+
+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
+
+#ifdef CONFIG_SYSFS
+static int xennet_sysfs_addif(struct net_device *netdev);
+static void xennet_sysfs_delif(struct net_device *netdev);
+#else /* !CONFIG_SYSFS */
+#define xennet_sysfs_addif(dev) (0)
+#define xennet_sysfs_delif(dev) do { } while(0)
+#endif
+
+static inline int xennet_can_sg(struct net_device *dev)
+{
+       return dev->features & NETIF_F_SG;
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Connected state.
+ */
+static int __devinit netfront_probe(struct xenbus_device *dev,
+                                   const struct xenbus_device_id *id)
+{
+       int err;
+       struct net_device *netdev;
+       struct netfront_info *info;
+       unsigned int handle;
+
+       err = xenbus_scanf(XBT_NIL, dev->nodename, "handle", "%u", &handle);
+       if (err != 1) {
+               xenbus_dev_fatal(dev, err, "reading handle");
+               return err;
+       }
+
+       netdev = create_netdev(handle, dev);
+       if (IS_ERR(netdev)) {
+               err = PTR_ERR(netdev);
+               xenbus_dev_fatal(dev, err, "creating netdev");
+               return err;
+       }
+
+       info = netdev_priv(netdev);
+       dev->dev.driver_data = info;
+
+       err = talk_to_backend(dev, info);
+       if (err) {
+               xennet_sysfs_delif(info->netdev);
+               unregister_netdev(netdev);
+               free_netdev(netdev);
+               dev->dev.driver_data = NULL;
+               return err;
+       }
+
+       return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netfront_resume(struct xenbus_device *dev)
+{
+       struct netfront_info *info = dev->dev.driver_data;
+
+       DPRINTK("%s\n", dev->nodename);
+
+       netif_disconnect_backend(info);
+       return talk_to_backend(dev, info);
+}
+
+static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
+{
+       char *s, *e, *macstr;
+       int i;
+
+       macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
+       if (IS_ERR(macstr))
+               return PTR_ERR(macstr);
+
+       for (i = 0; i < ETH_ALEN; i++) {
+               mac[i] = simple_strtoul(s, &e, 16);
+               if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
+                       kfree(macstr);
+                       return -ENOENT;
+               }
+               s = e+1;
+       }
+
+       kfree(macstr);
+       return 0;
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int talk_to_backend(struct xenbus_device *dev,
+                          struct netfront_info *info)
+{
+       const char *message;
+       struct xenbus_transaction xbt;
+       int err;
+
+       err = xen_net_read_mac(dev, info->mac);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
+               goto out;
+       }
+
+       /* Create shared ring, alloc event channel. */
+       err = setup_device(dev, info);
+       if (err)
+               goto out;
+
+again:
+       err = xenbus_transaction_start(&xbt);
+       if (err) {
+               xenbus_dev_fatal(dev, err, "starting transaction");
+               goto destroy_ring;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
+                           info->tx_ring_ref);
+       if (err) {
+               message = "writing tx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
+                           info->rx_ring_ref);
+       if (err) {
+               message = "writing rx ring-ref";
+               goto abort_transaction;
+       }
+       err = xenbus_printf(xbt, dev->nodename,
+                           "event-channel", "%u", info->evtchn);
+       if (err) {
+               message = "writing event-channel";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
+       if (err) {
+               message = "writing feature-rx-notify";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
+       if (err) {
+               message = "writing feature-sg";
+               goto abort_transaction;
+       }
+
+       err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
+       if (err) {
+               message = "writing feature-gso-tcpv4";
+               goto abort_transaction;
+       }
+
+       err = xenbus_transaction_end(xbt, 0);
+       if (err) {
+               if (err == -EAGAIN)
+                       goto again;
+               xenbus_dev_fatal(dev, err, "completing transaction");
+               goto destroy_ring;
+       }
+
+       return 0;
+
+ abort_transaction:
+       xenbus_transaction_end(xbt, 1);
+       xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+       netif_free(info);
+ out:
+       return err;
+}
+
+
+static int setup_device(struct xenbus_device *dev, struct netfront_info *info)
+{
+       struct netif_tx_sring *txs;
+       struct netif_rx_sring *rxs;
+       int err;
+       struct net_device *netdev = info->netdev;
+
+       info->tx_ring_ref = GRANT_INVALID_REF;
+       info->rx_ring_ref = GRANT_INVALID_REF;
+       info->rx.sring = NULL;
+       info->tx.sring = NULL;
+       info->irq = 0;
+
+       txs = (struct netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
+       if (!txs) {
+               err = -ENOMEM;
+               xenbus_dev_fatal(dev, err, "allocating tx ring page");
+               goto fail;
+       }
+       SHARED_RING_INIT(txs);
+       FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(txs));
+       if (err < 0) {
+               free_page((unsigned long)txs);
+               goto fail;
+       }
+       info->tx_ring_ref = err;
+
+       rxs = (struct netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
+       if (!rxs) {
+               err = -ENOMEM;
+               xenbus_dev_fatal(dev, err, "allocating rx ring page");
+               goto fail;
+       }
+       SHARED_RING_INIT(rxs);
+       FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+       err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
+       if (err < 0) {
+               free_page((unsigned long)rxs);
+               goto fail;
+       }
+       info->rx_ring_ref = err;
+
+       err = xenbus_alloc_evtchn(dev, &info->evtchn);
+       if (err)
+               goto fail;
+
+       memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
+       err = bind_evtchn_to_irqhandler(info->evtchn, netif_int,
+                                       SA_SAMPLE_RANDOM, netdev->name, netdev);
+       if (err < 0)
+               goto fail;
+       info->irq = err;
+       return 0;
+
+ fail:
+       netif_free(info);
+       return err;
+}
+
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void backend_changed(struct xenbus_device *dev,
+                           enum xenbus_state backend_state)
+{
+       struct netfront_info *np = dev->dev.driver_data;
+       struct net_device *netdev = np->netdev;
+
+       DPRINTK("\n");
+
+       switch (backend_state) {
+       case XenbusStateInitialising:
+       case XenbusStateInitialised:
+       case XenbusStateConnected:
+       case XenbusStateUnknown:
+       case XenbusStateClosed:
+               break;
+
+       case XenbusStateInitWait:
+               network_connect(netdev);
+               xenbus_switch_state(dev, XenbusStateConnected);
+               (void)send_fake_arp(netdev);
+               break;
+
+       case XenbusStateClosing:
+               netfront_closing(dev);
+               break;
+       }
+}
+
+
+/** Send a packet on a net device to encourage switches to learn the
+ * MAC. We send a fake ARP request.
+ *
+ * @param dev device
+ * @return 0 on success, error code otherwise
+ */
+static int send_fake_arp(struct net_device *dev)
+{
+       struct sk_buff *skb;
+       u32             src_ip, dst_ip;
+
+       dst_ip = INADDR_BROADCAST;
+       src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
+
+       /* No IP? Then nothing to do. */
+       if (src_ip == 0)
+               return 0;
+
+       skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
+                        dst_ip, dev, src_ip,
+                        /*dst_hw*/ NULL, /*src_hw*/ NULL,
+                        /*target_hw*/ dev->dev_addr);
+       if (skb == NULL)
+               return -ENOMEM;
+
+       return dev_queue_xmit(skb);
+}
+
+
+static int network_open(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+
+       memset(&np->stats, 0, sizeof(np->stats));
+
+       network_alloc_rx_buffers(dev);
+       np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
+
+       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
+               netif_rx_schedule(dev);
+
+       netif_start_queue(dev);
+
+       return 0;
+}
+
+static inline int netfront_tx_slot_available(struct netfront_info *np)
+{
+       return RING_FREE_REQUESTS(&np->tx) >= MAX_SKB_FRAGS + 2;
+}
+
+static inline void network_maybe_wake_tx(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+
+       if (unlikely(netif_queue_stopped(dev)) &&
+           netfront_tx_slot_available(np) &&
+           likely(netif_running(dev)))
+               netif_wake_queue(dev);
+}
+
+static void network_tx_buf_gc(struct net_device *dev)
+{
+       RING_IDX cons, prod;
+       unsigned short id;
+       struct netfront_info *np = netdev_priv(dev);
+       struct sk_buff *skb;
+
+       if (unlikely(!netif_carrier_ok(dev)))
+               return;
+
+       do {
+               prod = np->tx.sring->rsp_prod;
+               rmb(); /* Ensure we see responses up to 'rp'. */
+
+               for (cons = np->tx.rsp_cons; cons != prod; cons++) {
+                       struct netif_tx_response *txrsp;
+
+                       txrsp = RING_GET_RESPONSE(&np->tx, cons);
+                       if (txrsp->status == NETIF_RSP_NULL)
+                               continue;
+
+                       id  = txrsp->id;
+                       skb = np->tx_skbs[id];
+                       if (unlikely(gnttab_query_foreign_access(
+                               np->grant_tx_ref[id]) != 0)) {
+                               printk(KERN_ALERT "network_tx_buf_gc: warning "
+                                      "-- grant still in use by backend "
+                                      "domain.\n");
+                               BUG();
+                       }
+                       gnttab_end_foreign_access_ref(
+                               np->grant_tx_ref[id], GNTMAP_readonly);
+                       gnttab_release_grant_reference(
+                               &np->gref_tx_head, np->grant_tx_ref[id]);
+                       np->grant_tx_ref[id] = GRANT_INVALID_REF;
+                       add_id_to_freelist(np->tx_skbs, id);
+                       dev_kfree_skb_irq(skb);
+               }
+
+               np->tx.rsp_cons = prod;
+
+               /*
+                * Set a new event, then check for race with update of tx_cons.
+                * Note that it is essential to schedule a callback, no matter
+                * how few buffers are pending. Even if there is space in the
+                * transmit ring, higher layers may be blocked because too much
+                * data is outstanding: in such cases notification from Xen is
+                * likely to be the only kick that we'll get.
+                */
+               np->tx.sring->rsp_event =
+                       prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+               mb();
+       } while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
+
+       network_maybe_wake_tx(dev);
+}
+
+
+static void rx_refill_timeout(unsigned long data)
+{
+       struct net_device *dev = (struct net_device *)data;
+       netif_rx_schedule(dev);
+}
+
+
+static void network_alloc_rx_buffers(struct net_device *dev)
+{
+       unsigned short id;
+       struct netfront_info *np = netdev_priv(dev);
+       struct sk_buff *skb;
+       struct page *page;
+       int i, batch_target, notify;
+       RING_IDX req_prod = np->rx.req_prod_pvt;
+       struct xen_memory_reservation reservation;
+       grant_ref_t ref;
+       unsigned long pfn;
+       void *vaddr;
+
+       if (unlikely(!netif_carrier_ok(dev)))
+               return;
+
+       /*
+        * Allocate skbuffs greedily, even though we batch updates to the
+        * receive ring. This creates a less bursty demand on the memory
+        * allocator, so should reduce the chance of failed allocation requests
+        * both for ourself and for other kernel subsystems.
+        */
+       batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
+       for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
+               /*
+                * Allocate an skb and a page. Do not use __dev_alloc_skb as
+                * that will allocate page-sized buffers which is not
+                * necessary here.
+                * 16 bytes added as necessary headroom for netif_receive_skb.
+                */
+               skb = alloc_skb(RX_COPY_THRESHOLD + 16,
+                               GFP_ATOMIC | __GFP_NOWARN);
+               if (unlikely(!skb))
+                       goto no_skb;
+
+               page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
+               if (!page) {
+                       kfree_skb(skb);
+no_skb:
+                       /* Any skbuffs queued for refill? Force them out. */
+                       if (i != 0)
+                               goto refill;
+                       /* Could not allocate any skbuffs. Try again later. */
+                       mod_timer(&np->rx_refill_timer,
+                                 jiffies + (HZ/10));
+                       break;
+               }
+
+               skb_reserve(skb, 16); /* mimic dev_alloc_skb() */
+               skb_shinfo(skb)->frags[0].page = page;
+               skb_shinfo(skb)->nr_frags = 1;
+               __skb_queue_tail(&np->rx_batch, skb);
+       }
+
+       /* Is the batch large enough to be worthwhile? */
+       if (i < (np->rx_target/2)) {
+               if (req_prod > np->rx.sring->req_prod)
+                       goto push;
+               return;
+       }
+
+       /* Adjust our fill target if we risked running out of buffers. */
+       if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
+           ((np->rx_target *= 2) > np->rx_max_target))
+               np->rx_target = np->rx_max_target;
+
+ refill:
+       for (i = 0; ; i++) {
+               if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
+                       break;
+
+               skb->dev = dev;
+
+               id = xennet_rxidx(req_prod + i);
+
+               BUG_ON(np->rx_skbs[id]);
+               np->rx_skbs[id] = skb;
+
+               RING_GET_REQUEST(&np->rx, req_prod + i)->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
+               BUG_ON((signed short)ref < 0);
+               np->grant_rx_ref[id] = ref;
+
+               pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
+               vaddr = page_address(skb_shinfo(skb)->frags[0].page);
+
+               gnttab_grant_foreign_transfer_ref(ref,
+                                                 np->xbdev->otherend_id, pfn);
+               RING_GET_REQUEST(&np->rx, req_prod + i)->gref = ref;
+               np->rx_pfn_array[i] = pfn_to_mfn(pfn);
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remove this page before passing back to Xen. */
+                       set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+                       MULTI_update_va_mapping(np->rx_mcl+i,
+                                               (unsigned long)vaddr,
+                                               __pte(0), 0);
+               }
+       }
+
+       /* Tell the ballon driver what is going on. */
+       balloon_update_driver_allowance(i);
+
+       set_xen_guest_handle(reservation.extent_start, np->rx_pfn_array);
+       reservation.nr_extents   = i;
+       reservation.extent_order = 0;
+       reservation.address_bits = 0;
+       reservation.domid        = DOMID_SELF;
+
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               /* After all PTEs have been zapped, flush the TLB. */
+               np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+                       UVMF_TLB_FLUSH|UVMF_ALL;
+
+               /* Give away a batch of pages. */
+               np->rx_mcl[i].op = __HYPERVISOR_memory_op;
+               np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+               np->rx_mcl[i].args[1] = (unsigned long)&reservation;
+
+               /* Zap PTEs and give away pages in one big multicall. */
+               (void)HYPERVISOR_multicall(np->rx_mcl, i+1);
+
+               /* Check return status of HYPERVISOR_memory_op(). */
+               if (unlikely(np->rx_mcl[i].result != i))
+                       panic("Unable to reduce memory reservation\n");
+       } else
+               if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+                                        &reservation) != i)
+                       panic("Unable to reduce memory reservation\n");
+
+       /* Above is a suitable barrier to ensure backend will see requests. */
+       np->rx.req_prod_pvt = req_prod + i;
+ push:
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
+       if (notify)
+               notify_remote_via_irq(np->irq);
+}
+
+static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
+                             struct netif_tx_request *tx)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       char *data = skb->data;
+       unsigned long mfn;
+       RING_IDX prod = np->tx.req_prod_pvt;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+       unsigned int id;
+       grant_ref_t ref;
+       int i;
+
+       while (len > PAGE_SIZE - offset) {
+               tx->size = PAGE_SIZE - offset;
+               tx->flags |= NETTXF_more_data;
+               len -= tx->size;
+               data += tx->size;
+               offset = 0;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = virt_to_mfn(data);
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = offset;
+               tx->size = len;
+               tx->flags = 0;
+       }
+
+       for (i = 0; i < frags; i++) {
+               skb_frag_t *frag = skb_shinfo(skb)->frags + i;
+
+               tx->flags |= NETTXF_more_data;
+
+               id = get_id_from_freelist(np->tx_skbs);
+               np->tx_skbs[id] = skb_get(skb);
+               tx = RING_GET_REQUEST(&np->tx, prod++);
+               tx->id = id;
+               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+               BUG_ON((signed short)ref < 0);
+
+               mfn = pfn_to_mfn(page_to_pfn(frag->page));
+               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
+                                               mfn, GNTMAP_readonly);
+
+               tx->gref = np->grant_tx_ref[id] = ref;
+               tx->offset = frag->page_offset;
+               tx->size = frag->size;
+               tx->flags = 0;
+       }
+
+       np->tx.req_prod_pvt = prod;
+}
+
+static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       unsigned short id;
+       struct netfront_info *np = netdev_priv(dev);
+       struct netif_tx_request *tx;
+       struct netif_extra_info *extra;
+       char *data = skb->data;
+       RING_IDX i;
+       grant_ref_t ref;
+       unsigned long mfn;
+       int notify;
+       int frags = skb_shinfo(skb)->nr_frags;
+       unsigned int offset = offset_in_page(data);
+       unsigned int len = skb_headlen(skb);
+
+       frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
+       if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
+               printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
+                      frags);
+               dump_stack();
+               goto drop;
+       }
+
+       spin_lock_irq(&np->tx_lock);
+
+       if (unlikely(!netif_carrier_ok(dev) ||
+                    (frags > 1 && !xennet_can_sg(dev)) ||
+                    netif_needs_gso(dev, skb))) {
+               spin_unlock_irq(&np->tx_lock);
+               goto drop;
+       }
+
+       i = np->tx.req_prod_pvt;
+
+       id = get_id_from_freelist(np->tx_skbs);
+       np->tx_skbs[id] = skb;
+
+       tx = RING_GET_REQUEST(&np->tx, i);
+
+       tx->id   = id;
+       ref = gnttab_claim_grant_reference(&np->gref_tx_head);
+       BUG_ON((signed short)ref < 0);
+       mfn = virt_to_mfn(data);
+       gnttab_grant_foreign_access_ref(
+               ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
+       tx->gref = np->grant_tx_ref[id] = ref;
+       tx->offset = offset;
+       tx->size = len;
+
+       tx->flags = 0;
+       extra = NULL;
+
+       if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
+               tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
+       if (skb->proto_data_valid) /* remote but checksummed? */
+               tx->flags |= NETTXF_data_validated;
+
+       if (skb_shinfo(skb)->gso_size) {
+               struct netif_extra_info *gso = (struct netif_extra_info *)
+                       RING_GET_REQUEST(&np->tx, ++i);
+
+               if (extra)
+                       extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+               else
+                       tx->flags |= NETTXF_extra_info;
+
+               gso->u.gso.size = skb_shinfo(skb)->gso_size;
+               gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
+               gso->u.gso.pad = 0;
+               gso->u.gso.features = 0;
+
+               gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+               gso->flags = 0;
+               extra = gso;
+       }
+
+       np->tx.req_prod_pvt = i + 1;
+
+       xennet_make_frags(skb, dev, tx);
+       tx->size = skb->len;
+
+       RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
+       if (notify)
+               notify_remote_via_irq(np->irq);
+
+       network_tx_buf_gc(dev);
+
+       if (!netfront_tx_slot_available(np))
+               netif_stop_queue(dev);
+
+       spin_unlock_irq(&np->tx_lock);
+
+       np->stats.tx_bytes += skb->len;
+       np->stats.tx_packets++;
+
+       return 0;
+
+ drop:
+       np->stats.tx_dropped++;
+       dev_kfree_skb(skb);
+       return 0;
+}
+
+static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
+{
+       struct net_device *dev = dev_id;
+       struct netfront_info *np = netdev_priv(dev);
+       unsigned long flags;
+
+       spin_lock_irqsave(&np->tx_lock, flags);
+       network_tx_buf_gc(dev);
+       spin_unlock_irqrestore(&np->tx_lock, flags);
+
+       if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx) &&
+           likely(netif_running(dev)))
+               netif_rx_schedule(dev);
+
+       return IRQ_HANDLED;
+}
+
+static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
+                               grant_ref_t ref)
+{
+       int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+       BUG_ON(np->rx_skbs[new]);
+       np->rx_skbs[new] = skb;
+       np->grant_rx_ref[new] = ref;
+       RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+       RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+       np->rx.req_prod_pvt++;
+}
+
+int xennet_get_extras(struct netfront_info *np,
+                     struct netif_extra_info *extras, RING_IDX rp)
+
+{
+       struct netif_extra_info *extra;
+       RING_IDX cons = np->rx.rsp_cons;
+       int err = 0;
+
+       do {
+               struct sk_buff *skb;
+               grant_ref_t ref;
+
+               if (unlikely(cons + 1 == rp)) {
+                       if (net_ratelimit())
+                               WPRINTK("Missing extra info\n");
+                       err = -EBADR;
+                       break;
+               }
+
+               extra = (struct netif_extra_info *)
+                       RING_GET_RESPONSE(&np->rx, ++cons);
+
+               if (unlikely(!extra->type ||
+                            extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+                       if (net_ratelimit())
+                               WPRINTK("Invalid extra type: %d\n",
+                                       extra->type);
+                       err = -EINVAL;
+               } else
+                       memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+
+               skb = xennet_get_rx_skb(np, cons);
+               ref = xennet_get_rx_ref(np, cons);
+               xennet_move_rx_slot(np, skb, ref);
+       } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+       np->rx.rsp_cons = cons;
+       return err;
+}
+
+static int xennet_get_responses(struct netfront_info *np,
+                               struct netfront_rx_info *rinfo, RING_IDX rp,
+                               struct sk_buff_head *list, int count)
+{
+       struct mmu_update *mmu = np->rx_mmu + count;
+       struct multicall_entry *mcl = np->rx_mcl + count;
+       struct netif_rx_response *rx = &rinfo->rx;
+       struct netif_extra_info *extras = rinfo->extras;
+       RING_IDX cons = np->rx.rsp_cons;
+       struct sk_buff *skb = xennet_get_rx_skb(np, cons);
+       grant_ref_t ref = xennet_get_rx_ref(np, cons);
+       int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
+       int frags = 1;
+       int err = 0;
+
+       if (rx->flags & NETRXF_extra_info) {
+               err = xennet_get_extras(np, extras, rp);
+               cons = np->rx.rsp_cons;
+       }
+
+       for (;;) {
+               unsigned long mfn;
+
+               if (unlikely(rx->status < 0 ||
+                            rx->offset + rx->status > PAGE_SIZE)) {
+                       if (net_ratelimit())
+                               WPRINTK("rx->offset: %x, size: %u\n",
+                                       rx->offset, rx->status);
+                       err = -EINVAL;
+               }
+
+               /*
+                * This definitely indicates a bug, either in this driver or in
+                * the backend driver. In future this should flag the bad
+                * situation to the system controller to reboot the backed.
+                */
+               if (ref == GRANT_INVALID_REF) {
+                       WPRINTK("Bad rx response id %d.\n", rx->id);
+                       err = -EINVAL;
+                       goto next;
+               }
+
+               /* Memory pressure, insufficient buffer headroom, ... */
+               if ((mfn = gnttab_end_foreign_transfer_ref(ref)) == 0) {
+                       if (net_ratelimit())
+                               WPRINTK("Unfulfilled rx req (id=%d, st=%d).\n",
+                                       rx->id, rx->status);
+                       xennet_move_rx_slot(np, skb, ref);
+                       err = -ENOMEM;
+                       goto next;
+               }
+
+               gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+               if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+                       /* Remap the page. */
+                       struct page *page = skb_shinfo(skb)->frags[0].page;
+                       unsigned long pfn = page_to_pfn(page);
+                       void *vaddr = page_address(page);
+
+                       MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
+                                               pfn_pte_ma(mfn, PAGE_KERNEL),
+                                               0);
+                       mcl++;
+                       mmu->ptr = ((maddr_t)mfn << PAGE_SHIFT)
+                               | MMU_MACHPHYS_UPDATE;
+                       mmu->val = pfn;
+                       mmu++;
+
+                       set_phys_to_machine(pfn, mfn);
+               }
+
+               __skb_queue_tail(list, skb);
+
+next:
+               if (!(rx->flags & NETRXF_more_data))
+                       break;
+
+               if (cons + frags == rp) {
+                       if (net_ratelimit())
+                               WPRINTK("Need more frags\n");
+                       err = -ENOENT;
+                       break;
+               }
+
+               rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+               skb = xennet_get_rx_skb(np, cons + frags);
+               ref = xennet_get_rx_ref(np, cons + frags);
+               frags++;
+       }
+
+       if (unlikely(frags > max)) {
+               if (net_ratelimit())
+                       WPRINTK("Too many frags\n");
+               err = -E2BIG;
+       }
+
+       return err;
+}
+
+static RING_IDX xennet_fill_frags(struct netfront_info *np,
+                                 struct sk_buff *skb,
+                                 struct sk_buff_head *list)
+{
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
+       RING_IDX cons = np->rx.rsp_cons;
+       skb_frag_t *frag = shinfo->frags + nr_frags;
+       struct sk_buff *nskb;
+
+       while ((nskb = __skb_dequeue(list))) {
+               struct netif_rx_response *rx =
+                       RING_GET_RESPONSE(&np->rx, ++cons);
+
+               frag->page = skb_shinfo(nskb)->frags[0].page;
+               frag->page_offset = rx->offset;
+               frag->size = rx->status;
+
+               skb->data_len += rx->status;
+
+               skb_shinfo(nskb)->nr_frags = 0;
+               kfree_skb(nskb);
+
+               frag++;
+               nr_frags++;
+       }
+
+       shinfo->nr_frags = nr_frags;
+       return cons;
+}
+
+static int xennet_set_skb_gso(struct sk_buff *skb, struct netif_extra_info *gso)
+{
+       if (!gso->u.gso.size) {
+               if (net_ratelimit())
+                       WPRINTK("GSO size must not be zero.\n");
+               return -EINVAL;
+       }
+
+       /* Currently only TCPv4 S.O. is supported. */
+       if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
+               if (net_ratelimit())
+                       WPRINTK("Bad GSO type %d.\n", gso->u.gso.type);
+               return -EINVAL;
+       }
+
+       skb_shinfo(skb)->gso_size = gso->u.gso.size;
+       skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+
+       /* Header must be checked, and gso_segs computed. */
+       skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+       skb_shinfo(skb)->gso_segs = 0;
+
+       return 0;
+}
+
+static int netif_poll(struct net_device *dev, int *pbudget)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       struct sk_buff *skb;
+       struct netfront_rx_info rinfo;
+       struct netif_rx_response *rx = &rinfo.rx;
+       struct netif_extra_info *extras = rinfo.extras;
+       RING_IDX i, rp;
+       struct multicall_entry *mcl;
+       int work_done, budget, more_to_do = 1;
+       struct sk_buff_head rxq;
+       struct sk_buff_head errq;
+       struct sk_buff_head tmpq;
+       unsigned long flags;
+       unsigned int len;
+       int pages_done;
+       int err;
+
+       spin_lock(&np->rx_lock);
+
+       if (unlikely(!netif_carrier_ok(dev))) {
+               spin_unlock(&np->rx_lock);
+               return 0;
+       }
+
+       skb_queue_head_init(&rxq);
+       skb_queue_head_init(&errq);
+       skb_queue_head_init(&tmpq);
+
+       if ((budget = *pbudget) > dev->quota)
+               budget = dev->quota;
+       rp = np->rx.sring->rsp_prod;
+       rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+       for (i = np->rx.rsp_cons, work_done = 0, pages_done = 0;
+            (i != rp) && (work_done < budget);
+            np->rx.rsp_cons = ++i, work_done++) {
+               memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+               memset(extras, 0, sizeof(extras));
+
+               err = xennet_get_responses(np, &rinfo, rp, &tmpq, pages_done);
+               pages_done += skb_queue_len(&tmpq);
+
+               if (unlikely(err)) {
+err:
+                       i = np->rx.rsp_cons + skb_queue_len(&tmpq) - 1;
+                       work_done--;
+                       while ((skb = __skb_dequeue(&tmpq)))
+                               __skb_queue_tail(&errq, skb);
+                       np->stats.rx_errors++;
+                       continue;
+               }
+
+               skb = __skb_dequeue(&tmpq);
+
+               if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
+                       struct netif_extra_info *gso;
+                       gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+                       if (unlikely(xennet_set_skb_gso(skb, gso))) {
+                               __skb_queue_head(&tmpq, skb);
+                               goto err;
+                       }
+               }
+
+               skb->nh.raw = (void *)skb_shinfo(skb)->frags[0].page;
+               skb->h.raw = skb->nh.raw + rx->offset;
+
+               len = rx->status;
+               if (len > RX_COPY_THRESHOLD)
+                       len = RX_COPY_THRESHOLD;
+               skb_put(skb, len);
+
+               if (rx->status > len) {
+                       skb_shinfo(skb)->frags[0].page_offset =
+                               rx->offset + len;
+                       skb_shinfo(skb)->frags[0].size = rx->status - len;
+                       skb->data_len = rx->status - len;
+               } else {
+                       skb_shinfo(skb)->frags[0].page = NULL;
+                       skb_shinfo(skb)->nr_frags = 0;
+               }
+
+               i = xennet_fill_frags(np, skb, &tmpq);
+               skb->truesize += skb->data_len;
+               skb->len += skb->data_len;
+
+               /*
+                * Old backends do not assert data_validated but we
+                * can infer it from csum_blank so test both flags.
+                */
+               if (rx->flags & (NETRXF_data_validated|NETRXF_csum_blank)) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       skb->proto_data_valid = 1;
+               } else {
+                       skb->ip_summed = CHECKSUM_NONE;
+                       skb->proto_data_valid = 0;
+               }
+               skb->proto_csum_blank = !!(rx->flags & NETRXF_csum_blank);
+
+               np->stats.rx_packets++;
+               np->stats.rx_bytes += skb->len;
+
+               __skb_queue_tail(&rxq, skb);
+       }
+
+       /* Some pages are no longer absent... */
+       balloon_update_driver_allowance(-pages_done);
+
+       /* Do all the remapping work, and M2P updates, in one big hypercall. */
+       if (likely(pages_done)) {
+               mcl = np->rx_mcl + pages_done;
+               mcl->op = __HYPERVISOR_mmu_update;
+               mcl->args[0] = (unsigned long)np->rx_mmu;
+               mcl->args[1] = pages_done;
+               mcl->args[2] = 0;
+               mcl->args[3] = DOMID_SELF;
+               (void)HYPERVISOR_multicall(np->rx_mcl, pages_done + 1);
+       }
+
+       while ((skb = __skb_dequeue(&errq)))
+               kfree_skb(skb);
+
+       while ((skb = __skb_dequeue(&rxq)) != NULL) {
+               struct page *page = (struct page *)skb->nh.raw;
+               void *vaddr = page_address(page);
+
+               memcpy(skb->data, vaddr + (skb->h.raw - skb->nh.raw),
+                      skb_headlen(skb));
+
+               if (page != skb_shinfo(skb)->frags[0].page)
+                       __free_page(page);
+
+               /* Ethernet work: Delayed to here as it peeks the header. */
+               skb->protocol = eth_type_trans(skb, dev);
+
+               /* Pass it up. */
+               netif_receive_skb(skb);
+               dev->last_rx = jiffies;
+       }
+
+       /* If we get a callback with very few responses, reduce fill target. */
+       /* NB. Note exponential increase, linear decrease. */
+       if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
+            ((3*np->rx_target) / 4)) &&
+           (--np->rx_target < np->rx_min_target))
+               np->rx_target = np->rx_min_target;
+
+       network_alloc_rx_buffers(dev);
+
+       *pbudget   -= work_done;
+       dev->quota -= work_done;
+
+       if (work_done < budget) {
+               local_irq_save(flags);
+
+               RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
+               if (!more_to_do)
+                       __netif_rx_complete(dev);
+
+               local_irq_restore(flags);
+       }
+
+       spin_unlock(&np->rx_lock);
+
+       return more_to_do;
+}
+
+
+static int network_close(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       netif_stop_queue(np->netdev);
+       return 0;
+}
+
+
+static struct net_device_stats *network_get_stats(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       return &np->stats;
+}
+
+static int xennet_change_mtu(struct net_device *dev, int mtu)
+{
+       int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
+
+       if (mtu > max)
+               return -EINVAL;
+       dev->mtu = mtu;
+       return 0;
+}
+
+static int xennet_set_sg(struct net_device *dev, u32 data)
+{
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
+                                "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       } else if (dev->mtu > ETH_DATA_LEN)
+               dev->mtu = ETH_DATA_LEN;
+
+       return ethtool_op_set_sg(dev, data);
+}
+
+static int xennet_set_tso(struct net_device *dev, u32 data)
+{
+       if (data) {
+               struct netfront_info *np = netdev_priv(dev);
+               int val;
+
+               if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
+                                "feature-gso-tcpv4", "%d", &val) < 0)
+                       val = 0;
+               if (!val)
+                       return -ENOSYS;
+       }
+
+       return ethtool_op_set_tso(dev, data);
+}
+
+static void xennet_set_features(struct net_device *dev)
+{
+       /* Turn off all GSO bits except ROBUST. */
+       dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
+       dev->features |= NETIF_F_GSO_ROBUST;
+       xennet_set_sg(dev, 0);
+
+       if (!xennet_set_sg(dev, 1))
+               xennet_set_tso(dev, 1);
+}
+
+static void network_connect(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       int i, requeue_idx;
+       struct sk_buff *skb;
+       grant_ref_t ref;
+
+       xennet_set_features(dev);
+
+       spin_lock_irq(&np->tx_lock);
+       spin_lock(&np->rx_lock);
+
+       /*
+         * Recovery procedure:
+        *  NB. Freelist index entries are always going to be less than
+        *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
+        *  greater than PAGE_OFFSET: we use this property to distinguish
+        *  them.
+         */
+
+       /* Step 1: Discard all pending TX packet fragments. */
+       for (requeue_idx = 0, i = 1; i <= NET_TX_RING_SIZE; i++) {
+               if ((unsigned long)np->tx_skbs[i] < PAGE_OFFSET)
+                       continue;
+
+               skb = np->tx_skbs[i];
+               gnttab_end_foreign_access_ref(
+                       np->grant_tx_ref[i], GNTMAP_readonly);
+               gnttab_release_grant_reference(
+                       &np->gref_tx_head, np->grant_tx_ref[i]);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+               add_id_to_freelist(np->tx_skbs, i);
+               dev_kfree_skb_irq(skb);
+       }
+
+       /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+       for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+               if (!np->rx_skbs[i])
+                       continue;
+
+               skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
+               ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+
+               gnttab_grant_foreign_transfer_ref(
+                       ref, np->xbdev->otherend_id,
+                       page_to_pfn(skb_shinfo(skb)->frags->page));
+
+               RING_GET_REQUEST(&np->rx, requeue_idx)->gref = ref;
+               RING_GET_REQUEST(&np->rx, requeue_idx)->id   = requeue_idx;
+
+               requeue_idx++;
+       }
+
+       np->rx.req_prod_pvt = requeue_idx;
+
+       /*
+        * Step 3: All public and private state should now be sane.  Get
+        * ready to start sending and receiving packets and give the driver
+        * domain a kick because we've probably just requeued some
+        * packets.
+        */
+       netif_carrier_on(dev);
+       notify_remote_via_irq(np->irq);
+       network_tx_buf_gc(dev);
+       network_alloc_rx_buffers(dev);
+
+       spin_unlock(&np->rx_lock);
+       spin_unlock_irq(&np->tx_lock);
+}
+
+static void netif_uninit(struct net_device *dev)
+{
+       struct netfront_info *np = netdev_priv(dev);
+       gnttab_free_grant_references(np->gref_tx_head);
+       gnttab_free_grant_references(np->gref_rx_head);
+}
+
+static struct ethtool_ops network_ethtool_ops =
+{
+       .get_tx_csum = ethtool_op_get_tx_csum,
+       .set_tx_csum = ethtool_op_set_tx_csum,
+       .get_sg = ethtool_op_get_sg,
+       .set_sg = xennet_set_sg,
+       .get_tso = ethtool_op_get_tso,
+       .set_tso = xennet_set_tso,
+       .get_link = ethtool_op_get_link,
+};
+
+#ifdef CONFIG_SYSFS
+static ssize_t show_rxbuf_min(struct class_device *cd, char *buf)
+{
+       struct net_device *netdev = container_of(cd, struct net_device,
+                                                class_dev);
+       struct netfront_info *info = netdev_priv(netdev);
+
+       return sprintf(buf, "%u\n", info->rx_min_target);
+}
+
+static ssize_t store_rxbuf_min(struct class_device *cd,
+                              const char *buf, size_t len)
+{
+       struct net_device *netdev = container_of(cd, struct net_device,
+                                                class_dev);
+       struct netfront_info *np = netdev_priv(netdev);
+       char *endp;
+       unsigned long target;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       target = simple_strtoul(buf, &endp, 0);
+       if (endp == buf)
+               return -EBADMSG;
+
+       if (target < RX_MIN_TARGET)
+               target = RX_MIN_TARGET;
+       if (target > RX_MAX_TARGET)
+               target = RX_MAX_TARGET;
+
+       spin_lock(&np->rx_lock);
+       if (target > np->rx_max_target)
+               np->rx_max_target = target;
+       np->rx_min_target = target;
+       if (target > np->rx_target)
+               np->rx_target = target;
+
+       network_alloc_rx_buffers(netdev);
+
+       spin_unlock(&np->rx_lock);
+       return len;
+}
+
+static ssize_t show_rxbuf_max(struct class_device *cd, char *buf)
+{
+       struct net_device *netdev = container_of(cd, struct net_device,
+                                                class_dev);
+       struct netfront_info *info = netdev_priv(netdev);
+
+       return sprintf(buf, "%u\n", info->rx_max_target);
+}
+
+static ssize_t store_rxbuf_max(struct class_device *cd,
+                              const char *buf, size_t len)
+{
+       struct net_device *netdev = container_of(cd, struct net_device,
+                                                class_dev);
+       struct netfront_info *np = netdev_priv(netdev);
+       char *endp;
+       unsigned long target;
+
+       if (!capable(CAP_NET_ADMIN))
+               return -EPERM;
+
+       target = simple_strtoul(buf, &endp, 0);
+       if (endp == buf)
+               return -EBADMSG;
+
+       if (target < RX_MIN_TARGET)
+               target = RX_MIN_TARGET;
+       if (target > RX_MAX_TARGET)
+               target = RX_MAX_TARGET;
+
+       spin_lock(&np->rx_lock);
+       if (target < np->rx_min_target)
+               np->rx_min_target = target;
+       np->rx_max_target = target;
+       if (target < np->rx_target)
+               np->rx_target = target;
+
+       network_alloc_rx_buffers(netdev);
+
+       spin_unlock(&np->rx_lock);
+       return len;
+}
+
+static ssize_t show_rxbuf_cur(struct class_device *cd, char *buf)
+{
+       struct net_device *netdev = container_of(cd, struct net_device,
+                                                class_dev);
+       struct netfront_info *info = netdev_priv(netdev);
+
+       return sprintf(buf, "%u\n", info->rx_target);
+}
+
+static const struct class_device_attribute xennet_attrs[] = {
+       __ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
+       __ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
+       __ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
+};
+
+static int xennet_sysfs_addif(struct net_device *netdev)
+{
+       int i;
+       int error = 0;
+
+       for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
+               error = class_device_create_file(&netdev->class_dev, 
+                                                &xennet_attrs[i]);
+               if (error)
+                       goto fail;
+       }
+       return 0;
+
+ fail:
+       while (--i >= 0)
+               class_device_remove_file(&netdev->class_dev,
+                                        &xennet_attrs[i]);
+       return error;
+}
+
+static void xennet_sysfs_delif(struct net_device *netdev)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
+               class_device_remove_file(&netdev->class_dev,
+                                        &xennet_attrs[i]);
+       }
+}
+
+#endif /* CONFIG_SYSFS */
+
+
+/*
+ * Nothing to do here. Virtual interface is point-to-point and the
+ * physical interface is probably promiscuous anyway.
+ */
+static void network_set_multicast_list(struct net_device *dev)
+{
+}
+
+/** Create a network device.
+ * @param handle device handle
+ * @param val return parameter for created device
+ * @return 0 on success, error code otherwise
+ */
+static struct net_device * __devinit create_netdev(int handle,
+                                                  struct xenbus_device *dev)
+{
+       int i, err = 0;
+       struct net_device *netdev = NULL;
+       struct netfront_info *np = NULL;
+
+       netdev = alloc_etherdev(sizeof(struct netfront_info));
+       if (!netdev) {
+               printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
+                      __FUNCTION__);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       np                = netdev_priv(netdev);
+       np->handle        = handle;
+       np->xbdev         = dev;
+
+       netif_carrier_off(netdev);
+
+       spin_lock_init(&np->tx_lock);
+       spin_lock_init(&np->rx_lock);
+
+       skb_queue_head_init(&np->rx_batch);
+       np->rx_target     = RX_DFL_MIN_TARGET;
+       np->rx_min_target = RX_DFL_MIN_TARGET;
+       np->rx_max_target = RX_MAX_TARGET;
+
+       init_timer(&np->rx_refill_timer);
+       np->rx_refill_timer.data = (unsigned long)netdev;
+       np->rx_refill_timer.function = rx_refill_timeout;
+
+       /* Initialise {tx,rx}_skbs as a free chain containing every entry. */
+       for (i = 0; i <= NET_TX_RING_SIZE; i++) {
+               np->tx_skbs[i] = (void *)((unsigned long) i+1);
+               np->grant_tx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       for (i = 0; i < NET_RX_RING_SIZE; i++) {
+               np->rx_skbs[i] = NULL;
+               np->grant_rx_ref[i] = GRANT_INVALID_REF;
+       }
+
+       /* A grant for every tx ring slot */
+       if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+                                         &np->gref_tx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
+               err = -ENOMEM;
+               goto exit;
+       }
+       /* A grant for every rx ring slot */
+       if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+                                         &np->gref_rx_head) < 0) {
+               printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
+               err = -ENOMEM;
+               goto exit_free_tx;
+       }
+
+       netdev->open            = network_open;
+       netdev->hard_start_xmit = network_start_xmit;
+       netdev->stop            = network_close;
+       netdev->get_stats       = network_get_stats;
+       netdev->poll            = netif_poll;
+       netdev->set_multicast_list = network_set_multicast_list;
+       netdev->uninit          = netif_uninit;
+       netdev->change_mtu      = xennet_change_mtu;
+       netdev->weight          = 64;
+       netdev->features        = NETIF_F_IP_CSUM;
+
+       SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
+       SET_MODULE_OWNER(netdev);
+       SET_NETDEV_DEV(netdev, &dev->dev);
+
+       err = register_netdev(netdev);
+       if (err) {
+               printk(KERN_WARNING "%s> register_netdev err=%d\n",
+                      __FUNCTION__, err);
+               goto exit_free_rx;
+       }
+
+       err = xennet_sysfs_addif(netdev);
+       if (err) {
+               /* This can be non-fatal: it only means no tuning parameters */
+               printk(KERN_WARNING "%s> add sysfs failed err=%d\n",
+                      __FUNCTION__, err);
+       }
+
+       np->netdev = netdev;
+
+       return netdev;
+
+
+ exit_free_rx:
+       gnttab_free_grant_references(np->gref_rx_head);
+ exit_free_tx:
+       gnttab_free_grant_references(np->gref_tx_head);
+ exit:
+       free_netdev(netdev);
+       return ERR_PTR(err);
+}
+
+/*
+ * We use this notifier to send out a fake ARP reply to reset switches and
+ * router ARP caches when an IP interface is brought up on a VIF.
+ */
+static int
+inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
+{
+       struct in_ifaddr  *ifa = (struct in_ifaddr *)ptr;
+       struct net_device *dev = ifa->ifa_dev->dev;
+
+       /* UP event and is it one of our devices? */
+       if (event == NETDEV_UP && dev->open == network_open)
+               (void)send_fake_arp(dev);
+
+       return NOTIFY_DONE;
+}
+
+
+/* ** Close down ** */
+
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void netfront_closing(struct xenbus_device *dev)
+{
+       struct netfront_info *info = dev->dev.driver_data;
+
+       DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+
+       close_netdev(info);
+
+       xenbus_switch_state(dev, XenbusStateClosed);
+}
+
+
+static int __devexit netfront_remove(struct xenbus_device *dev)
+{
+       struct netfront_info *info = dev->dev.driver_data;
+
+       DPRINTK("%s\n", dev->nodename);
+
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
+
+       return 0;
+}
+
+
+static void close_netdev(struct netfront_info *info)
+{
+       del_timer_sync(&info->rx_refill_timer);
+
+       xennet_sysfs_delif(info->netdev);
+       unregister_netdev(info->netdev);
+}
+
+
+static void netif_disconnect_backend(struct netfront_info *info)
+{
+       /* Stop old i/f to prevent errors whilst we rebuild the state. */
+       spin_lock_irq(&info->tx_lock);
+       spin_lock(&info->rx_lock);
+       netif_carrier_off(info->netdev);
+       spin_unlock(&info->rx_lock);
+       spin_unlock_irq(&info->tx_lock);
+
+       if (info->irq)
+               unbind_from_irqhandler(info->irq, info->netdev);
+       info->evtchn = info->irq = 0;
+
+       end_access(info->tx_ring_ref, info->tx.sring);
+       end_access(info->rx_ring_ref, info->rx.sring);
+       info->tx_ring_ref = GRANT_INVALID_REF;
+       info->rx_ring_ref = GRANT_INVALID_REF;
+       info->tx.sring = NULL;
+       info->rx.sring = NULL;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+       close_netdev(info);
+       netif_disconnect_backend(info);
+       free_netdev(info->netdev);
+}
+
+
+static void end_access(int ref, void *page)
+{
+       if (ref != GRANT_INVALID_REF)
+               gnttab_end_foreign_access(ref, 0, (unsigned long)page);
+}
+
+
+/* ** Driver registration ** */
+
+
+static struct xenbus_device_id netfront_ids[] = {
+       { "vif" },
+       { "" }
+};
+
+
+static struct xenbus_driver netfront = {
+       .name = "vif",
+       .owner = THIS_MODULE,
+       .ids = netfront_ids,
+       .probe = netfront_probe,
+       .remove = __devexit_p(netfront_remove),
+       .resume = netfront_resume,
+       .otherend_changed = backend_changed,
+};
+
+
+static struct notifier_block notifier_inetdev = {
+       .notifier_call  = inetdev_notify,
+       .next           = NULL,
+       .priority       = 0
+};
+
+static int __init netif_init(void)
+{
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               return 0;
+
+       IPRINTK("Initialising virtual ethernet driver.\n");
+
+       (void)register_inetaddr_notifier(&notifier_inetdev);
+
+       return xenbus_register_frontend(&netfront);
+}
+module_init(netif_init);
+
+
+static void __exit netif_exit(void)
+{
+       unregister_inetaddr_notifier(&notifier_inetdev);
+
+       return xenbus_unregister_driver(&netfront);
+}
+module_exit(netif_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/xen/privcmd/Makefile b/drivers/xen/privcmd/Makefile
new file mode 100644 (file)
index 0000000..de090ba
--- /dev/null
@@ -0,0 +1,2 @@
+
+obj-$(CONFIG_XEN_PRIVCMD)      := privcmd.o
diff --git a/drivers/xen/privcmd/privcmd.c b/drivers/xen/privcmd/privcmd.c
new file mode 100644 (file)
index 0000000..3c9bb9f
--- /dev/null
@@ -0,0 +1,286 @@
+/******************************************************************************
+ * privcmd.c
+ * 
+ * Interface to privileged domain-0 commands.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/kthread.h>
+#include <asm/hypervisor.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/uaccess.h>
+#include <asm/tlb.h>
+#include <asm/hypervisor.h>
+#include <xen/public/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/dom0_ops.h>
+#include <xen/xen_proc.h>
+
+static struct proc_dir_entry *privcmd_intf;
+static struct proc_dir_entry *capabilities_intf;
+
+#define NR_HYPERCALLS 64
+static DECLARE_BITMAP(hypercall_permission_map, NR_HYPERCALLS);
+
+static int privcmd_ioctl(struct inode *inode, struct file *file,
+                        unsigned int cmd, unsigned long data)
+{
+       int ret = -ENOSYS;
+       void __user *udata = (void __user *) data;
+
+       switch (cmd) {
+       case IOCTL_PRIVCMD_HYPERCALL: {
+               privcmd_hypercall_t hypercall;
+  
+               if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+                       return -EFAULT;
+
+               /* Check hypercall number for validity. */
+               if (hypercall.op >= NR_HYPERCALLS)
+                       return -EINVAL;
+               if (!test_bit(hypercall.op, hypercall_permission_map))
+                       return -EINVAL;
+
+#if defined(__i386__)
+               __asm__ __volatile__ (
+                       "pushl %%ebx; pushl %%ecx; pushl %%edx; "
+                       "pushl %%esi; pushl %%edi; "
+                       "movl  8(%%eax),%%ebx ;"
+                       "movl 16(%%eax),%%ecx ;"
+                       "movl 24(%%eax),%%edx ;"
+                       "movl 32(%%eax),%%esi ;"
+                       "movl 40(%%eax),%%edi ;"
+                       "movl   (%%eax),%%eax ;"
+                       "shll $5,%%eax ;"
+                       "addl $hypercall_page,%%eax ;"
+                       "call *%%eax ;"
+                       "popl %%edi; popl %%esi; popl %%edx; "
+                       "popl %%ecx; popl %%ebx"
+                       : "=a" (ret) : "0" (&hypercall) : "memory" );
+#elif defined (__x86_64__)
+               {
+                       long ign1, ign2, ign3;
+                       __asm__ __volatile__ (
+                               "movq %8,%%r10; movq %9,%%r8;"
+                               "shlq $5,%%rax ;"
+                               "addq $hypercall_page,%%rax ;"
+                               "call *%%rax"
+                               : "=a" (ret), "=D" (ign1),
+                                 "=S" (ign2), "=d" (ign3)
+                               : "0" ((unsigned long)hypercall.op), 
+                               "1" ((unsigned long)hypercall.arg[0]), 
+                               "2" ((unsigned long)hypercall.arg[1]),
+                               "3" ((unsigned long)hypercall.arg[2]), 
+                               "g" ((unsigned long)hypercall.arg[3]),
+                               "g" ((unsigned long)hypercall.arg[4])
+                               : "r8", "r10", "memory" );
+               }
+#elif defined (__ia64__)
+               __asm__ __volatile__ (
+                       ";; mov r14=%2; mov r15=%3; "
+                       "mov r16=%4; mov r17=%5; mov r18=%6;"
+                       "mov r2=%1; break 0x1000;; mov %0=r8 ;;"
+                       : "=r" (ret)
+                       : "r" (hypercall.op),
+                       "r" (hypercall.arg[0]),
+                       "r" (hypercall.arg[1]),
+                       "r" (hypercall.arg[2]),
+                       "r" (hypercall.arg[3]),
+                       "r" (hypercall.arg[4])
+                       : "r14","r15","r16","r17","r18","r2","r8","memory");
+#endif
+       }
+       break;
+
+#if defined(CONFIG_XEN_PRIVILEGED_GUEST)
+       case IOCTL_PRIVCMD_MMAP: {
+#define PRIVCMD_MMAP_SZ 32
+               privcmd_mmap_t mmapcmd;
+               privcmd_mmap_entry_t msg[PRIVCMD_MMAP_SZ];
+               privcmd_mmap_entry_t __user *p;
+               int i, rc;
+
+               if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+                       return -EFAULT;
+
+               p = mmapcmd.entry;
+
+               for (i = 0; i < mmapcmd.num;
+                    i += PRIVCMD_MMAP_SZ, p += PRIVCMD_MMAP_SZ) {
+                       int j, n = ((mmapcmd.num-i)>PRIVCMD_MMAP_SZ)?
+                               PRIVCMD_MMAP_SZ:(mmapcmd.num-i);
+
+                       if (copy_from_user(&msg, p,
+                                          n*sizeof(privcmd_mmap_entry_t)))
+                               return -EFAULT;
+     
+                       for (j = 0; j < n; j++) {
+                               struct vm_area_struct *vma = 
+                                       find_vma( current->mm, msg[j].va );
+
+                               if (!vma)
+                                       return -EINVAL;
+
+                               if (msg[j].va > PAGE_OFFSET)
+                                       return -EINVAL;
+
+                               if ((msg[j].va + (msg[j].npages << PAGE_SHIFT))
+                                   > vma->vm_end )
+                                       return -EINVAL;
+
+                               if ((rc = direct_remap_pfn_range(
+                                       vma,
+                                       msg[j].va&PAGE_MASK, 
+                                       msg[j].mfn, 
+                                       msg[j].npages<<PAGE_SHIFT, 
+                                       vma->vm_page_prot,
+                                       mmapcmd.dom)) < 0)
+                                       return rc;
+                       }
+               }
+               ret = 0;
+       }
+       break;
+
+       case IOCTL_PRIVCMD_MMAPBATCH: {
+               privcmd_mmapbatch_t m;
+               struct vm_area_struct *vma = NULL;
+               xen_pfn_t __user *p;
+               unsigned long addr, mfn; 
+               int i;
+
+               if (copy_from_user(&m, udata, sizeof(m))) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               if (m.dom == DOMID_SELF) {
+                       ret = -EINVAL;
+                       goto batch_err;
+               }
+
+               vma = find_vma(current->mm, m.addr);
+               if (!vma) {
+                       ret = -EINVAL;
+                       goto batch_err;
+               }
+
+               if (m.addr > PAGE_OFFSET) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               if ((m.addr + (m.num<<PAGE_SHIFT)) > vma->vm_end) {
+                       ret = -EFAULT;
+                       goto batch_err;
+               }
+
+               p = m.arr;
+               addr = m.addr;
+               for (i = 0; i < m.num; i++, addr += PAGE_SIZE, p++) {
+                       if (get_user(mfn, p))
+                               return -EFAULT;
+
+                       ret = direct_remap_pfn_range(vma, addr & PAGE_MASK,
+                                                    mfn, PAGE_SIZE,
+                                                    vma->vm_page_prot, m.dom);
+                       if (ret < 0)
+                               put_user(0xF0000000 | mfn, p);
+               }
+
+               ret = 0;
+               break;
+
+       batch_err:
+               printk("batch_err ret=%d vma=%p addr=%lx "
+                      "num=%d arr=%p %lx-%lx\n", 
+                      ret, vma, (unsigned long)m.addr, m.num, m.arr,
+                      vma ? vma->vm_start : 0, vma ? vma->vm_end : 0);
+               break;
+       }
+       break;
+#endif
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_mmap(struct file * file, struct vm_area_struct * vma)
+{
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+
+       return 0;
+}
+#endif
+
+static struct file_operations privcmd_file_ops = {
+       .ioctl = privcmd_ioctl,
+       .mmap  = privcmd_mmap,
+};
+
+static int capabilities_read(char *page, char **start, off_t off,
+                            int count, int *eof, void *data)
+{
+       int len = 0;
+       *page = 0;
+
+       if (xen_start_info->flags & SIF_INITDOMAIN)
+               len = sprintf( page, "control_d\n" );
+
+       *eof = 1;
+       return len;
+}
+
+static int __init privcmd_init(void)
+{
+       if (!is_running_on_xen())
+               return -ENODEV;
+
+       /* Set of hypercalls that privileged applications may execute. */
+       set_bit(__HYPERVISOR_acm_op,           hypercall_permission_map);
+       set_bit(__HYPERVISOR_dom0_op,          hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op, hypercall_permission_map);
+       set_bit(__HYPERVISOR_memory_op,        hypercall_permission_map);
+       set_bit(__HYPERVISOR_mmu_update,       hypercall_permission_map);
+       set_bit(__HYPERVISOR_mmuext_op,        hypercall_permission_map);
+       set_bit(__HYPERVISOR_xen_version,      hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op,         hypercall_permission_map);
+       set_bit(__HYPERVISOR_sched_op_compat,  hypercall_permission_map);
+       set_bit(__HYPERVISOR_event_channel_op_compat,
+               hypercall_permission_map);
+       set_bit(__HYPERVISOR_hvm_op,           hypercall_permission_map);
+
+       privcmd_intf = create_xen_proc_entry("privcmd", 0400);
+       if (privcmd_intf != NULL)
+               privcmd_intf->proc_fops = &privcmd_file_ops;
+
+       capabilities_intf = create_xen_proc_entry("capabilities", 0400 );
+       if (capabilities_intf != NULL)
+               capabilities_intf->read_proc = capabilities_read;
+
+       return 0;
+}
+
+__initcall(privcmd_init);
index 0167990..1d0cf40 100644 (file)
@@ -842,6 +842,7 @@ config TMPFS
 config HUGETLBFS
        bool "HugeTLB file system support"
        depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
+       depends !XEN
        help
          hugetlbfs is a filesystem backing for HugeTLB pages, based on
          ramfs. For architectures that support it, say Y here and read
index 68ebd10..aa50dc6 100644 (file)
@@ -512,7 +512,11 @@ befs_utf2nls(struct super_block *sb, const char *in,
        wchar_t uni;
        int unilen, utflen;
        char *result;
-       int maxlen = in_len; /* The utf8->nls conversion can't make more chars */
+       /* The utf8->nls conversion won't make the final nls string bigger
+        * than the utf one, but if the string is pure ascii they'll have the
+        * same width and an extra char is needed to save the additional \0
+        */
+       int maxlen = in_len + 1;
 
        befs_debug(sb, "---> utf2nls()");
 
@@ -588,7 +592,10 @@ befs_nls2utf(struct super_block *sb, const char *in,
        wchar_t uni;
        int unilen, utflen;
        char *result;
-       int maxlen = 3 * in_len;
+       /* There're nls characters that will translate to 3-chars-wide UTF-8
+        * characters, a additional byte is needed to save the final \0
+        * in special cases */
+       int maxlen = (3 * in_len) + 1;
 
        befs_debug(sb, "---> nls2utf()\n");
 
index 093645b..479ee5f 100644 (file)
@@ -88,7 +88,7 @@ static struct linux_binfmt elf_format = {
                .min_coredump   = ELF_EXEC_PAGESIZE
 };
 
-#define BAD_ADDR(x)    ((unsigned long)(x) > PAGE_MASK)
+#define BAD_ADDR(x)    ((unsigned long)(x) >= PAGE_MASK)
 
 static int set_brk(unsigned long start, unsigned long end)
 {
@@ -441,7 +441,7 @@ static unsigned long load_elf_interp(struct elfhdr * interp_elf_ex,
             * <= p_memsize so it is only necessary to check p_memsz.
             */
            k = load_addr + eppnt->p_vaddr;
-           if (k > TASK_SIZE || eppnt->p_filesz > eppnt->p_memsz ||
+           if (BAD_ADDR(k) || eppnt->p_filesz > eppnt->p_memsz ||
                eppnt->p_memsz > TASK_SIZE || TASK_SIZE - eppnt->p_memsz < k) {
                error = -ENOMEM;
                goto out_close;
@@ -944,7 +944,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                 * allowed task size. Note that p_filesz must always be
                 * <= p_memsz so it is only necessary to check p_memsz.
                 */
-               if (k > TASK_SIZE || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
+               if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
                    elf_ppnt->p_memsz > TASK_SIZE ||
                    TASK_SIZE - elf_ppnt->p_memsz < k) {
                        /* set_brk can never work.  Avoid overflows.  */
@@ -1005,10 +1005,9 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
                        }
                }
                if (BAD_ADDR(elf_entry)) {
-                       printk(KERN_ERR "Unable to load interpreter %.128s\n",
-                               elf_interpreter);
                        force_sig(SIGSEGV, current);
-                       retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+                       retval = IS_ERR((void *)elf_entry) ?
+                                       (int)elf_entry : -EINVAL;
                        goto out_free_dentry;
                }
                reloc_func_desc = interp_load_addr;
@@ -1019,8 +1018,8 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
        } else {
                elf_entry = loc->elf_ex.e_entry;
                if (BAD_ADDR(elf_entry)) {
-                       send_sig(SIGSEGV, current, 0);
-                       retval = -ENOEXEC; /* Nobody gets to see this, but.. */
+                       force_sig(SIGSEGV, current);
+                       retval = -EINVAL;
                        goto out_free_dentry;
                }
        }
index 23f1f3a..7f6d659 100644 (file)
@@ -473,13 +473,18 @@ out:
    pass does the actual I/O. */
 void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
 {
+       struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       if (mapping->nrpages == 0)
+               return;
+
        invalidate_bh_lrus();
        /*
         * FIXME: what about destroy_dirty_buffers?
         * We really want to use invalidate_inode_pages2() for
         * that, but not until that's cleaned up.
         */
-       invalidate_inode_pages(bdev->bd_inode->i_mapping);
+       invalidate_inode_pages(mapping);
 }
 
 /*
index f05b2cf..2299325 100644 (file)
@@ -252,6 +252,46 @@ static struct super_operations ext2_sops = {
 #endif
 };
 
+static struct dentry *ext2_get_dentry(struct super_block *sb, void *vobjp)
+{
+       __u32 *objp = vobjp;
+       unsigned long ino = objp[0];
+       __u32 generation = objp[1];
+       struct inode *inode;
+       struct dentry *result;
+
+       if (ino != EXT2_ROOT_INO && ino < EXT2_FIRST_INO(sb))
+               return ERR_PTR(-ESTALE);
+       if (ino > le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count))
+               return ERR_PTR(-ESTALE);
+
+       /* iget isn't really right if the inode is currently unallocated!!
+        * ext2_read_inode currently does appropriate checks, but
+        * it might be "neater" to call ext2_get_inode first and check
+        * if the inode is valid.....
+        */
+       inode = iget(sb, ino);
+       if (inode == NULL)
+               return ERR_PTR(-ENOMEM);
+       if (is_bad_inode(inode)
+           || (generation && inode->i_generation != generation)
+               ) {
+               /* we didn't find the right inode.. */
+               iput(inode);
+               return ERR_PTR(-ESTALE);
+       }
+       /* now to find a dentry.
+        * If possible, get a well-connected one
+        */
+       result = d_alloc_anon(inode);
+       if (!result) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       return result;
+}
+
+
 /* Yes, most of these are left as NULL!!
  * A NULL value implies the default, which works with ext2-like file
  * systems, but can be improved upon.
@@ -259,6 +299,7 @@ static struct super_operations ext2_sops = {
  */
 static struct export_operations ext2_export_ops = {
        .get_parent = ext2_get_parent,
+       .get_dentry = ext2_get_dentry,
 };
 
 static unsigned long get_sb_block(void **data)
index bdd65f4..4ce217b 100644 (file)
@@ -1160,7 +1160,7 @@ retry:
                ret = PTR_ERR(handle);
                goto out;
        }
-       if (test_opt(inode->i_sb, NOBH))
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
                ret = nobh_prepare_write(page, from, to, ext3_get_block);
        else
                ret = block_prepare_write(page, from, to, ext3_get_block);
@@ -1246,7 +1246,7 @@ static int ext3_writeback_commit_write(struct file *file, struct page *page,
        if (new_i_size > EXT3_I(inode)->i_disksize)
                EXT3_I(inode)->i_disksize = new_i_size;
 
-       if (test_opt(inode->i_sb, NOBH))
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
                ret = nobh_commit_write(file, page, from, to);
        else
                ret = generic_commit_write(file, page, from, to);
@@ -1496,7 +1496,7 @@ static int ext3_writeback_writepage(struct page *page,
                goto out_fail;
        }
 
-       if (test_opt(inode->i_sb, NOBH))
+       if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
                ret = nobh_writepage(page, ext3_get_block, wbc);
        else
                ret = block_write_full_page(page, ext3_get_block, wbc);
@@ -2403,14 +2403,15 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
        struct buffer_head *bh;
        struct ext3_group_desc * gdp;
 
-
-       if ((ino != EXT3_ROOT_INO && ino != EXT3_JOURNAL_INO &&
-               ino != EXT3_RESIZE_INO && ino < EXT3_FIRST_INO(sb)) ||
-               ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count)) {
-               ext3_error(sb, "ext3_get_inode_block",
-                           "bad inode number: %lu", ino);
+       if (!ext3_valid_inum(sb, ino)) {
+               /*
+                * This error is already checked for in namei.c unless we are
+                * looking at an NFS filehandle, in which case no error
+                * report is needed
+                */
                return 0;
        }
+
        block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
        if (block_group >= EXT3_SB(sb)->s_groups_count) {
                ext3_error(sb,"ext3_get_inode_block","group >= groups count");
index 329eae3..045f588 100644 (file)
@@ -1001,7 +1001,12 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
        if (bh) {
                unsigned long ino = le32_to_cpu(de->inode);
                brelse (bh);
-               inode = iget(dir->i_sb, ino);
+               if (!ext3_valid_inum(dir->i_sb, ino)) {
+                       ext3_error(dir->i_sb, "ext3_lookup",
+                                  "bad inode number: %lu", ino);
+                       inode = NULL;
+               } else
+                       inode = iget(dir->i_sb, ino);
 
                if (!inode)
                        return ERR_PTR(-EACCES);
@@ -1030,7 +1035,13 @@ struct dentry *ext3_get_parent(struct dentry *child)
                return ERR_PTR(-ENOENT);
        ino = le32_to_cpu(de->inode);
        brelse(bh);
-       inode = iget(child->d_inode->i_sb, ino);
+
+       if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
+               ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+                          "bad inode number: %lu", ino);
+               inode = NULL;
+       } else
+               inode = iget(child->d_inode->i_sb, ino);
 
        if (!inode)
                return ERR_PTR(-EACCES);
index 2a0cb4c..ed22106 100644 (file)
@@ -620,8 +620,48 @@ static struct super_operations ext3_sops = {
 #endif
 };
 
+static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+{
+       __u32 *objp = vobjp;
+       unsigned long ino = objp[0];
+       __u32 generation = objp[1];
+       struct inode *inode;
+       struct dentry *result;
+
+       if (ino != EXT3_ROOT_INO && ino < EXT3_FIRST_INO(sb))
+               return ERR_PTR(-ESTALE);
+       if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+               return ERR_PTR(-ESTALE);
+
+       /* iget isn't really right if the inode is currently unallocated!!
+        * ext3_read_inode currently does appropriate checks, but
+        * it might be "neater" to call ext3_get_inode first and check
+        * if the inode is valid.....
+        */
+       inode = iget(sb, ino);
+       if (inode == NULL)
+               return ERR_PTR(-ENOMEM);
+       if (is_bad_inode(inode)
+           || (generation && inode->i_generation != generation)
+               ) {
+               /* we didn't find the right inode.. */
+               iput(inode);
+               return ERR_PTR(-ESTALE);
+       }
+       /* now to find a dentry.
+        * If possible, get a well-connected one
+        */
+       result = d_alloc_anon(inode);
+       if (!result) {
+               iput(inode);
+               return ERR_PTR(-ENOMEM);
+       }
+       return result;
+}
+
 static struct export_operations ext3_export_ops = {
        .get_parent = ext3_get_parent,
+       .get_dentry = ext3_get_dentry,
 };
 
 enum {
index 55f4e70..4c2cacc 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -277,11 +277,13 @@ static struct fdtable *alloc_fdtable(int nr)
        } while (nfds <= nr);
        new_fds = alloc_fd_array(nfds);
        if (!new_fds)
-               goto out;
+               goto out2;
        fdt->fd = new_fds;
        fdt->max_fds = nfds;
        fdt->free_files = NULL;
        return fdt;
+out2:
+       nfds = fdt->max_fdset;
 out:
        if (new_openset)
                free_fdset(new_openset, nfds);
index 7ec121c..7189834 100644 (file)
@@ -1409,8 +1409,9 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
        if (!leases_enable)
                goto out;
 
-       error = lease_alloc(filp, arg, &fl);
-       if (error)
+       error = -ENOMEM;
+       fl = locks_alloc_lock();
+       if (fl == NULL)
                goto out;
 
        locks_copy_lock(fl, lease);
@@ -1418,6 +1419,7 @@ static int __setlease(struct file *filp, long arg, struct file_lock **flp)
        locks_insert_lock(before, fl);
 
        *flp = fl;
+       error = 0;
 out:
        return error;
 }
index 4a36030..75649bf 100644 (file)
@@ -1791,8 +1791,14 @@ do_link:
        if (error)
                goto exit_dput;
        error = __do_follow_link(&path, nd);
-       if (error)
+       if (error) {
+               /* Does someone understand code flow here? Or it is only
+                * me so stupid? Anathema to whoever designed this non-sense
+                * with "intent.open".
+                */
+               release_open_intent(nd);
                return error;
+       }
        nd->flags &= ~LOOKUP_PARENT;
        if (nd->last_type == LAST_BIND)
                goto ok;
index 8c88012..f6e0c62 100644 (file)
@@ -631,6 +631,27 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
        return proc_check_root(inode);
 }
 
+static int proc_setattr(struct dentry *dentry, struct iattr *attr)
+{
+       int error;
+       struct inode *inode = dentry->d_inode;
+
+       if (attr->ia_valid & ATTR_MODE)
+               return -EPERM;
+
+       error = inode_change_ok(inode, attr);
+       if (!error) {
+               error = security_inode_setattr(dentry, attr);
+               if (!error)
+                       error = inode_setattr(inode, attr);
+       }
+       return error;
+}
+
+static struct inode_operations proc_def_inode_operations = {
+       .setattr        = proc_setattr,
+};
+
 static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
        struct dentry *root;
@@ -1022,6 +1043,7 @@ static struct file_operations proc_oom_adjust_operations = {
 
 static struct inode_operations proc_mem_inode_operations = {
        .permission     = proc_permission,
+       .setattr        = proc_setattr,
 };
 
 #ifdef CONFIG_AUDITSYSCALL
@@ -1219,7 +1241,8 @@ out:
 
 static struct inode_operations proc_pid_link_inode_operations = {
        .readlink       = proc_pid_readlink,
-       .follow_link    = proc_pid_follow_link
+       .follow_link    = proc_pid_follow_link,
+       .setattr        = proc_setattr,
 };
 
 #define NUMBUF 10
@@ -1400,6 +1423,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
        ei->task = NULL;
        inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
        inode->i_ino = fake_ino(task->pid, ino);
+       inode->i_op = &proc_def_inode_operations;
 
        if (!pid_alive(task))
                goto out_unlock;
@@ -1455,6 +1479,7 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
                        inode->i_uid = 0;
                        inode->i_gid = 0;
                }
+               inode->i_mode &= ~(S_ISUID | S_ISGID);
                security_task_to_inode(task, inode);
                return 1;
        }
@@ -1483,6 +1508,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
                                inode->i_uid = 0;
                                inode->i_gid = 0;
                        }
+                       inode->i_mode &= ~(S_ISUID | S_ISGID);
                        security_task_to_inode(task, inode);
                        return 1;
                }
@@ -1628,11 +1654,13 @@ static struct file_operations proc_task_operations = {
 static struct inode_operations proc_fd_inode_operations = {
        .lookup         = proc_lookupfd,
        .permission     = proc_permission,
+       .setattr        = proc_setattr,
 };
 
 static struct inode_operations proc_task_inode_operations = {
        .lookup         = proc_task_lookup,
        .permission     = proc_task_permission,
+       .setattr        = proc_setattr,
 };
 
 #ifdef CONFIG_SECURITY
@@ -1943,10 +1971,12 @@ static struct file_operations proc_tid_base_operations = {
 
 static struct inode_operations proc_tgid_base_inode_operations = {
        .lookup         = proc_tgid_base_lookup,
+       .setattr        = proc_setattr,
 };
 
 static struct inode_operations proc_tid_base_inode_operations = {
        .lookup         = proc_tid_base_lookup,
+       .setattr        = proc_setattr,
 };
 
 #ifdef CONFIG_SECURITY
@@ -1988,10 +2018,12 @@ static struct dentry *proc_tid_attr_lookup(struct inode *dir,
 
 static struct inode_operations proc_tgid_attr_inode_operations = {
        .lookup         = proc_tgid_attr_lookup,
+       .setattr        = proc_setattr,
 };
 
 static struct inode_operations proc_tid_attr_inode_operations = {
        .lookup         = proc_tid_attr_lookup,
+       .setattr        = proc_setattr,
 };
 #endif
 
@@ -2016,6 +2048,7 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
 static struct inode_operations proc_self_inode_operations = {
        .readlink       = proc_self_readlink,
        .follow_link    = proc_self_follow_link,
+       .setattr        = proc_setattr,
 };
 
 /**
index a285fd7..8fef667 100644 (file)
@@ -1294,6 +1294,85 @@ asmlinkage long sys_splice(int fd_in, loff_t __user *off_in,
        return error;
 }
 
+/*
+ * Make sure there's data to read. Wait for input if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+       int ret;
+
+       /*
+        * Check ->nrbufs without the inode lock first. This function
+        * is speculative anyways, so missing one is ok.
+        */
+       if (pipe->nrbufs)
+               return 0;
+
+       ret = 0;
+       mutex_lock(&pipe->inode->i_mutex);
+
+       while (!pipe->nrbufs) {
+               if (signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+               if (!pipe->writers)
+                       break;
+               if (!pipe->waiting_writers) {
+                       if (flags & SPLICE_F_NONBLOCK) {
+                               ret = -EAGAIN;
+                               break;
+                       }
+               }
+               pipe_wait(pipe);
+       }
+
+       mutex_unlock(&pipe->inode->i_mutex);
+       return ret;
+}
+
+/*
+ * Make sure there's writeable room. Wait for room if we can, otherwise
+ * return an appropriate error.
+ */
+static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
+{
+       int ret;
+
+       /*
+        * Check ->nrbufs without the inode lock first. This function
+        * is speculative anyways, so missing one is ok.
+        */
+       if (pipe->nrbufs < PIPE_BUFFERS)
+               return 0;
+
+       ret = 0;
+       mutex_lock(&pipe->inode->i_mutex);
+
+       while (pipe->nrbufs >= PIPE_BUFFERS) {
+               if (!pipe->readers) {
+                       send_sig(SIGPIPE, current, 0);
+                       ret = -EPIPE;
+                       break;
+               }
+               if (flags & SPLICE_F_NONBLOCK) {
+                       ret = -EAGAIN;
+                       break;
+               }
+               if (signal_pending(current)) {
+                       ret = -ERESTARTSYS;
+                       break;
+               }
+               pipe->waiting_writers++;
+               pipe_wait(pipe);
+               pipe->waiting_writers--;
+       }
+
+       mutex_unlock(&pipe->inode->i_mutex);
+       return ret;
+}
+
 /*
  * Link contents of ipipe to opipe.
  */
@@ -1302,9 +1381,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
                     size_t len, unsigned int flags)
 {
        struct pipe_buffer *ibuf, *obuf;
-       int ret, do_wakeup, i, ipipe_first;
-
-       ret = do_wakeup = ipipe_first = 0;
+       int ret = 0, i = 0, nbuf;
 
        /*
         * Potential ABBA deadlock, work around it by ordering lock
@@ -1312,7 +1389,6 @@ static int link_pipe(struct pipe_inode_info *ipipe,
         * could deadlock (one doing tee from A -> B, the other from B -> A).
         */
        if (ipipe->inode < opipe->inode) {
-               ipipe_first = 1;
                mutex_lock(&ipipe->inode->i_mutex);
                mutex_lock(&opipe->inode->i_mutex);
        } else {
@@ -1320,118 +1396,55 @@ static int link_pipe(struct pipe_inode_info *ipipe,
                mutex_lock(&ipipe->inode->i_mutex);
        }
 
-       for (i = 0;; i++) {
+       do {
                if (!opipe->readers) {
                        send_sig(SIGPIPE, current, 0);
                        if (!ret)
                                ret = -EPIPE;
                        break;
                }
-               if (ipipe->nrbufs - i) {
-                       ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
 
-                       /*
-                        * If we have room, fill this buffer
-                        */
-                       if (opipe->nrbufs < PIPE_BUFFERS) {
-                               int nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
-
-                               /*
-                                * Get a reference to this pipe buffer,
-                                * so we can copy the contents over.
-                                */
-                               ibuf->ops->get(ipipe, ibuf);
-
-                               obuf = opipe->bufs + nbuf;
-                               *obuf = *ibuf;
-
-                               /*
-                                * Don't inherit the gift flag, we need to
-                                * prevent multiple steals of this page.
-                                */
-                               obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
-
-                               if (obuf->len > len)
-                                       obuf->len = len;
-
-                               opipe->nrbufs++;
-                               do_wakeup = 1;
-                               ret += obuf->len;
-                               len -= obuf->len;
-
-                               if (!len)
-                                       break;
-                               if (opipe->nrbufs < PIPE_BUFFERS)
-                                       continue;
-                       }
-
-                       /*
-                        * We have input available, but no output room.
-                        * If we already copied data, return that. If we
-                        * need to drop the opipe lock, it must be ordered
-                        * last to avoid deadlocks.
-                        */
-                       if ((flags & SPLICE_F_NONBLOCK) || !ipipe_first) {
-                               if (!ret)
-                                       ret = -EAGAIN;
-                               break;
-                       }
-                       if (signal_pending(current)) {
-                               if (!ret)
-                                       ret = -ERESTARTSYS;
-                               break;
-                       }
-                       if (do_wakeup) {
-                               smp_mb();
-                               if (waitqueue_active(&opipe->wait))
-                                       wake_up_interruptible(&opipe->wait);
-                               kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN);
-                               do_wakeup = 0;
-                       }
+               /*
+                * If we have iterated all input buffers or ran out of
+                * output room, break.
+                */
+               if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS)
+                       break;
 
-                       opipe->waiting_writers++;
-                       pipe_wait(opipe);
-                       opipe->waiting_writers--;
-                       continue;
-               }
+               ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1));
+               nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1);
 
                /*
-                * No input buffers, do the usual checks for available
-                * writers and blocking and wait if necessary
+                * Get a reference to this pipe buffer,
+                * so we can copy the contents over.
                 */
-               if (!ipipe->writers)
-                       break;
-               if (!ipipe->waiting_writers) {
-                       if (ret)
-                               break;
-               }
+               ibuf->ops->get(ipipe, ibuf);
+
+               obuf = opipe->bufs + nbuf;
+               *obuf = *ibuf;
+
                /*
-                * pipe_wait() drops the ipipe mutex. To avoid deadlocks
-                * with another process, we can only safely do that if
-                * the ipipe lock is ordered last.
+                * Don't inherit the gift flag, we need to
+                * prevent multiple steals of this page.
                 */
-               if ((flags & SPLICE_F_NONBLOCK) || ipipe_first) {
-                       if (!ret)
-                               ret = -EAGAIN;
-                       break;
-               }
-               if (signal_pending(current)) {
-                       if (!ret)
-                               ret = -ERESTARTSYS;
-                       break;
-               }
+               obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 
-               if (waitqueue_active(&ipipe->wait))
-                       wake_up_interruptible_sync(&ipipe->wait);
-               kill_fasync(&ipipe->fasync_writers, SIGIO, POLL_OUT);
+               if (obuf->len > len)
+                       obuf->len = len;
 
-               pipe_wait(ipipe);
-       }
+               opipe->nrbufs++;
+               ret += obuf->len;
+               len -= obuf->len;
+               i++;
+       } while (len);
 
        mutex_unlock(&ipipe->inode->i_mutex);
        mutex_unlock(&opipe->inode->i_mutex);
 
-       if (do_wakeup) {
+       /*
+        * If we put data in the output pipe, wakeup any potential readers.
+        */
+       if (ret > 0) {
                smp_mb();
                if (waitqueue_active(&opipe->wait))
                        wake_up_interruptible(&opipe->wait);
@@ -1452,14 +1465,29 @@ static long do_tee(struct file *in, struct file *out, size_t len,
 {
        struct pipe_inode_info *ipipe = in->f_dentry->d_inode->i_pipe;
        struct pipe_inode_info *opipe = out->f_dentry->d_inode->i_pipe;
+       int ret = -EINVAL;
 
        /*
-        * Link ipipe to the two output pipes, consuming as we go along.
+        * Duplicate the contents of ipipe to opipe without actually
+        * copying the data.
         */
-       if (ipipe && opipe)
-               return link_pipe(ipipe, opipe, len, flags);
+       if (ipipe && opipe && ipipe != opipe) {
+               /*
+                * Keep going, unless we encounter an error. The ipipe/opipe
+                * ordering doesn't really matter.
+                */
+               ret = link_ipipe_prep(ipipe, flags);
+               if (!ret) {
+                       ret = link_opipe_prep(opipe, flags);
+                       if (!ret) {
+                               ret = link_pipe(ipipe, opipe, len, flags);
+                               if (!ret && (flags & SPLICE_F_NONBLOCK))
+                                       ret = -EAGAIN;
+                       }
+               }
+       }
 
-       return -EINVAL;
+       return ret;
 }
 
 asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags)
index e45789f..73fc0d8 100644 (file)
@@ -1653,7 +1653,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
                iput(inode);
                goto error_out;
        }
-       sb->s_maxbytes = MAX_LFS_FILESIZE;
+       sb->s_maxbytes = 1<<30;
        return 0;
 
 error_out:
index e1b0e8c..0abd66c 100644 (file)
@@ -239,37 +239,51 @@ void udf_truncate_extents(struct inode * inode)
        {
                if (offset)
                {
-                       extoffset -= adsize;
-                       etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
-                       if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
-                       {
-                               extoffset -= adsize;
-                               elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
-                               udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+                       /*
+                        *  OK, there is not extent covering inode->i_size and
+                        *  no extent above inode->i_size => truncate is
+                        *  extending the file by 'offset'.
+                        */
+                       if ((!bh && extoffset == udf_file_entry_alloc_offset(inode)) ||
+                           (bh && extoffset == sizeof(struct allocExtDesc))) {
+                               /* File has no extents at all! */
+                               memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+                               elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+                               udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                        }
-                       else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
-                       {
-                               kernel_lb_addr neloc = { 0, 0 };
+                       else {
                                extoffset -= adsize;
-                               nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
-                                       ((elen + offset + inode->i_sb->s_blocksize - 1) &
-                                       ~(inode->i_sb->s_blocksize - 1));
-                               udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
-                               udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
-                       }
-                       else
-                       {
-                               if (elen & (inode->i_sb->s_blocksize - 1))
+                               etype = udf_next_aext(inode, &bloc, &extoffset, &eloc, &elen, &bh, 1);
+                               if (etype == (EXT_NOT_RECORDED_NOT_ALLOCATED >> 30))
+                               {
+                                       extoffset -= adsize;
+                                       elen = EXT_NOT_RECORDED_NOT_ALLOCATED | (elen + offset);
+                                       udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 0);
+                               }
+                               else if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30))
                                {
+                                       kernel_lb_addr neloc = { 0, 0 };
                                        extoffset -= adsize;
-                                       elen = EXT_RECORDED_ALLOCATED |
-                                               ((elen + inode->i_sb->s_blocksize - 1) &
+                                       nelen = EXT_NOT_RECORDED_NOT_ALLOCATED |
+                                               ((elen + offset + inode->i_sb->s_blocksize - 1) &
                                                ~(inode->i_sb->s_blocksize - 1));
-                                       udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+                                       udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+                                       udf_add_aext(inode, &bloc, &extoffset, eloc, (etype << 30) | elen, &bh, 1);
+                               }
+                               else
+                               {
+                                       if (elen & (inode->i_sb->s_blocksize - 1))
+                                       {
+                                               extoffset -= adsize;
+                                               elen = EXT_RECORDED_ALLOCATED |
+                                                       ((elen + inode->i_sb->s_blocksize - 1) &
+                                                       ~(inode->i_sb->s_blocksize - 1));
+                                               udf_write_aext(inode, bloc, &extoffset, eloc, elen, bh, 1);
+                                       }
+                                       memset(&eloc, 0x00, sizeof(kernel_lb_addr));
+                                       elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
+                                       udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                                }
-                               memset(&eloc, 0x00, sizeof(kernel_lb_addr));
-                               elen = EXT_NOT_RECORDED_NOT_ALLOCATED | offset;
-                               udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &bh, 1);
                        }
                }
        }
index ac511ab..0655cc3 100644 (file)
@@ -970,7 +970,7 @@ xfs_dir2_leafn_remove(
                        /*
                         * One less used entry in the free table.
                         */
-                       free->hdr.nused = cpu_to_be32(-1);
+                       be32_add(&free->hdr.nused, -1);
                        xfs_dir2_free_log_header(tp, fbp);
                        /*
                         * If this was the last entry in the table, we can
index 3b41d2b..010ced7 100644 (file)
 #define MAP_ANON       MAP_ANONYMOUS
 #define MAP_FILE       0
 
+#ifdef __KERNEL__
+#ifndef arch_mmap_check
+#define arch_mmap_check(addr, len, flags)      (0)
+#endif
+#endif
+
 #endif
index 288233f..87476de 100644 (file)
@@ -132,10 +132,12 @@ extern unsigned int nmi_watchdog;
 
 extern int disable_timer_pin_1;
 
+#ifndef CONFIG_XEN
 void smp_send_timer_broadcast_ipi(struct pt_regs *regs);
 void switch_APIC_timer_to_ipi(void *cpumask);
 void switch_ipi_to_APIC_timer(void *cpumask);
 #define ARCH_APICTIMER_STOPS_ON_C3     1
+#endif
 
 extern int timer_over_8254;
 
index aa37153..fdecfa0 100644 (file)
@@ -170,7 +170,7 @@ static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
 }
 
 #define load_user_cs_desc(cpu, mm) \
-       get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs
+       get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs
 
 extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
 extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
index cfb1c61..f11513a 100644 (file)
@@ -20,7 +20,7 @@
  * Leave one empty page between vmalloc'ed areas and
  * the start of the fixmap.
  */
-#define __FIXADDR_TOP  0xfffff000
+extern unsigned long __FIXADDR_TOP;
 
 #ifndef __ASSEMBLY__
 #include <linux/kernel.h>
@@ -95,6 +95,8 @@ enum fixed_addresses {
 extern void __set_fixmap (enum fixed_addresses idx,
                                        unsigned long phys, pgprot_t flags);
 
+extern void set_fixaddr_top(unsigned long top);
+
 #define set_fixmap(idx, phys) \
                __set_fixmap(idx, phys, PAGE_KERNEL)
 /*
index 625438b..4e34a91 100644 (file)
@@ -15,6 +15,18 @@ static inline void clear_mem_error(unsigned char reason)
        outb(reason, 0x61);
 }
 
+static inline void clear_io_check_error(unsigned char reason)
+{
+       unsigned long i;
+
+       reason = (reason & 0xf) | 8;
+       outb(reason, 0x61);
+       i = 2000;
+       while (--i) udelay(1000);
+       reason &= ~8;
+       outb(reason, 0x61);
+}
+
 static inline unsigned char get_nmi_reason(void)
 {
        return inb(0x61);
index 104653b..2b2683e 100644 (file)
@@ -128,7 +128,7 @@ extern int devmem_is_allowed(unsigned long pagenr);
 
 #define PAGE_OFFSET            ((unsigned long)__PAGE_OFFSET)
 #define VMALLOC_RESERVE                ((unsigned long)__VMALLOC_RESERVE)
-#define MAXMEM                 (-__PAGE_OFFSET-__VMALLOC_RESERVE)
+#define MAXMEM                 (__FIXADDR_TOP-__PAGE_OFFSET-__VMALLOC_RESERVE)
 #define __pa(x)                        ((unsigned long)(x)-PAGE_OFFSET)
 #define __va(x)                        ((void *)((unsigned long)(x)+PAGE_OFFSET))
 #define pfn_to_kaddr(pfn)      __va((pfn) << PAGE_SHIFT)
index 0251807..bd6346f 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _I386_PGTABLE_2LEVEL_DEFS_H
 #define _I386_PGTABLE_2LEVEL_DEFS_H
 
+#define HAVE_SHARED_KERNEL_PMD 0
+
 /*
  * traditional i386 two-level paging structure:
  */
index eb3a1ea..0c75992 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _I386_PGTABLE_3LEVEL_DEFS_H
 #define _I386_PGTABLE_3LEVEL_DEFS_H
 
+#define HAVE_SHARED_KERNEL_PMD 1
+
 /*
  * PGDIR_SHIFT determines what a top-level page table entry can map
  */
index d76b769..6d794a3 100644 (file)
@@ -36,7 +36,7 @@
        "\n1:\t" \
        "lock ; decb %0\n\t" \
        "jns 5f\n" \
-       "2:\t" \
+       "2:\t"                 \
        "testl $0x200, %1\n\t" \
        "jz 4f\n\t" \
        "sti\n" \
index 6ba179f..b242f95 100644 (file)
@@ -8,6 +8,14 @@
  *     David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
  */
 
+#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
+#define arch_mmap_check        ia64_map_check_rgn
+int ia64_map_check_rgn(unsigned long addr, unsigned long len,
+               unsigned long flags);
+#endif
+#endif
+
 #include <asm-generic/mman.h>
 
 #define MAP_GROWSDOWN  0x00100         /* stack-like segment */
index 9902185..6f807e0 100644 (file)
@@ -16,9 +16,9 @@
 #define _ASM_IA64_SN_XP_H
 
 
-#include <linux/version.h>
 #include <linux/cache.h>
 #include <linux/hardirq.h>
+#include <linux/mutex.h>
 #include <asm/sn/types.h>
 #include <asm/sn/bte.h>
 
@@ -50,7 +50,7 @@
  * C-brick nasids, thus the need for bitmaps which don't account for
  * odd-numbered (non C-brick) nasids.
  */
-#define XP_MAX_PHYSNODE_ID     (MAX_PHYSNODE_ID / 2)
+#define XP_MAX_PHYSNODE_ID     (MAX_NUMALINK_NODES / 2)
 #define XP_NASID_MASK_BYTES    ((XP_MAX_PHYSNODE_ID + 7) / 8)
 #define XP_NASID_MASK_WORDS    ((XP_MAX_PHYSNODE_ID + 63) / 64)
 
  * the bte_copy() once in the hope that the failure was due to a temporary
  * aberration (i.e., the link going down temporarily).
  *
- * See bte_copy for definition of the input parameters.
+ *     src - physical address of the source of the transfer.
+ *     vdst - virtual address of the destination of the transfer.
+ *     len - number of bytes to transfer from source to destination.
+ *     mode - see bte_copy() for definition.
+ *     notification - see bte_copy() for definition.
  *
  * Note: xp_bte_copy() should never be called while holding a spinlock.
  */
 static inline bte_result_t
-xp_bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
+xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
 {
        bte_result_t ret;
+       u64 pdst = ia64_tpa(vdst);
 
 
-       ret = bte_copy(src, dest, len, mode, notification);
+       /*
+        * Ensure that the physically mapped memory is contiguous.
+        *
+        * We do this by ensuring that the memory is from region 7 only.
+        * If the need should arise to use memory from one of the other
+        * regions, then modify the BUG_ON() statement to ensure that the
+        * memory from that region is always physically contiguous.
+        */
+       BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
 
+       ret = bte_copy(src, pdst, len, mode, notification);
        if (ret != BTE_SUCCESS) {
                if (!in_interrupt()) {
                        cond_resched();
                }
-               ret = bte_copy(src, dest, len, mode, notification);
+               ret = bte_copy(src, pdst, len, mode, notification);
        }
 
        return ret;
@@ -218,7 +232,19 @@ enum xpc_retval {
        xpcInvalidPartid,       /* 42: invalid partition ID */
        xpcLocalPartid,         /* 43: local partition ID */
 
-       xpcUnknownReason        /* 44: unknown reason -- must be last in list */
+       xpcOtherGoingDown,      /* 44: other side going down, reason unknown */
+       xpcSystemGoingDown,     /* 45: system is going down, reason unknown */
+       xpcSystemHalt,          /* 46: system is being halted */
+       xpcSystemReboot,        /* 47: system is being rebooted */
+       xpcSystemPoweroff,      /* 48: system is being powered off */
+
+       xpcDisconnecting,       /* 49: channel disconnecting (closing) */
+
+       xpcOpenCloseError,      /* 50: channel open/close protocol error */
+
+       xpcDisconnected,        /* 51: channel disconnected (closed) */
+
+       xpcUnknownReason        /* 52: unknown reason -- must be last in list */
 };
 
 
@@ -343,12 +369,12 @@ typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid,
  *
  * The 'func' field points to the function to call when aynchronous
  * notification is required for such events as: a connection established/lost,
- * or an incomming message received, or an error condition encountered. A
+ * or an incoming message received, or an error condition encountered. A
  * non-NULL 'func' field indicates that there is an active registration for
  * the channel.
  */
 struct xpc_registration {
-       struct semaphore sema;
+       struct mutex mutex;
        xpc_channel_func func;          /* function to call */
        void *key;                      /* pointer to user's key */
        u16 nentries;                   /* #of msg entries in local msg queue */
index aa3b8ac..b454ad4 100644 (file)
@@ -684,7 +684,9 @@ extern struct xpc_vars *xpc_vars;
 extern struct xpc_rsvd_page *xpc_rsvd_page;
 extern struct xpc_vars_part *xpc_vars_part;
 extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
-extern char xpc_remote_copy_buffer[];
+extern char *xpc_remote_copy_buffer;
+extern void *xpc_remote_copy_buffer_base;
+extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
 extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
 extern void xpc_allow_IPI_ops(void);
 extern void xpc_restrict_IPI_ops(void);
index 1802775..ffedf14 100644 (file)
@@ -98,9 +98,10 @@ futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 
        if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int)))
                return -EFAULT;
-       asm volatile("   cs   %1,%4,0(%5)\n"
+       asm volatile("   sacf 256\n"
+                    "   cs   %1,%4,0(%5)\n"
                     "0: lr   %0,%1\n"
-                    "1:\n"
+                    "1: sacf 0\n"
 #ifndef __s390x__
                     ".section __ex_table,\"a\"\n"
                     "   .align 4\n"
index 88d1886..95ecab5 100644 (file)
@@ -2,6 +2,12 @@
 #ifndef __SPARC_MMAN_H__
 #define __SPARC_MMAN_H__
 
+#ifdef __KERNEL__
+#define arch_mmap_check        sparc_mmap_check
+int sparc_mmap_check(unsigned long addr, unsigned long len,
+               unsigned long flags);
+#endif
+
 #include <asm-generic/mman.h>
 
 /* SunOS'ified... */
index 6fd878e..b300276 100644 (file)
@@ -2,6 +2,12 @@
 #ifndef __SPARC64_MMAN_H__
 #define __SPARC64_MMAN_H__
 
+#ifdef __KERNEL__
+#define arch_mmap_check        sparc64_mmap_check
+int sparc64_mmap_check(unsigned long addr, unsigned long len,
+               unsigned long flags);
+#endif
+
 #include <asm-generic/mman.h>
 
 /* SunOS'ified... */
index 5015bb8..89d4243 100644 (file)
@@ -34,7 +34,7 @@
 #define _FP_MUL_MEAT_D(R,X,Y)                                  \
   _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
 #define _FP_MUL_MEAT_Q(R,X,Y)                                  \
-  _FP_MUL_MEAT_2_wide_3mul(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
+  _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
 
 #define _FP_DIV_MEAT_S(R,X,Y)  _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
 #define _FP_DIV_MEAT_D(R,X,Y)  _FP_DIV_MEAT_1_udiv_norm(D,R,X,Y)
index 0f7229c..4593114 100644 (file)
@@ -116,7 +116,7 @@ extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
 #define HAVE_ARCH_VALIDATE
 #define devmem_is_allowed(x) 1
 
-extern void arch_free_page(struct page *page, int order);
+extern int arch_free_page(struct page *page, int order);
 #define HAVE_ARCH_FREE_PAGE
 
 #include <asm-generic/memory_model.h>
index bdbd893..79db75f 100644 (file)
@@ -105,11 +105,13 @@ extern int disable_timer_pin_1;
 
 extern void setup_threshold_lvt(unsigned long lvt_off);
 
+#ifndef CONFIG_XEN
 void smp_send_timer_broadcast_ipi(void);
 void switch_APIC_timer_to_ipi(void *cpumask);
 void switch_ipi_to_APIC_timer(void *cpumask);
 
 #define ARCH_APICTIMER_STOPS_ON_C3     1
+#endif
 
 #endif /* CONFIG_X86_LOCAL_APIC */
 
index 0df1715..91f751b 100644 (file)
@@ -127,7 +127,7 @@ asmlinkage void IRQ_NAME(nr); \
 __asm__( \
 "\n.p2align\n" \
 "IRQ" #nr "_interrupt:\n\t" \
-       "push $" #nr "-256 ; " \
+       "push $~(" #nr ") ; " \
        "jmp common_interrupt");
 
 #if defined(CONFIG_X86_IO_APIC)
index 2a5c162..be4bc76 100644 (file)
@@ -49,6 +49,7 @@ static inline int __prepare_ICR2 (unsigned int mask)
        return SET_APIC_DEST_FIELD(mask);
 }
 
+#ifndef CONFIG_XEN_UNPRIVILEGED_GUEST
 static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
 {
        /*
@@ -113,5 +114,6 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
        }
        local_irq_restore(flags);
 }
+#endif /* CONFIG_XEN_UNPRIVILEGED_GUEST */
 
 #endif /* __ASM_IPI_H */
index 31e83c3..195952b 100644 (file)
@@ -21,7 +21,7 @@ extern unsigned long __supported_pte_mask;
 
 #define swapper_pg_dir init_level4_pgt
 
-extern int nonx_setup(char *str);
+extern void nonx_setup(const char *str);
 extern void paging_init(void);
 extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
 
index 93535f0..9269df7 100644 (file)
@@ -408,6 +408,8 @@ struct ethtool_ops {
 #define ETHTOOL_GPERMADDR      0x00000020 /* Get permanent hardware address */
 #define ETHTOOL_GUFO           0x00000021 /* Get UFO enable (ethtool_value) */
 #define ETHTOOL_SUFO           0x00000022 /* Set UFO enable (ethtool_value) */
+#define ETHTOOL_GGSO           0x00000023 /* Get GSO enable (ethtool_value) */
+#define ETHTOOL_SGSO           0x00000024 /* Set GSO enable (ethtool_value) */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET         ETHTOOL_GSET
index 75c1dc8..33bc72c 100644 (file)
@@ -508,6 +508,15 @@ static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
 {
        return container_of(inode, struct ext3_inode_info, vfs_inode);
 }
+
+static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
+{
+       return ino == EXT3_ROOT_INO ||
+               ino == EXT3_JOURNAL_INO ||
+               ino == EXT3_RESIZE_INO ||
+               (ino >= EXT3_FIRST_INO(sb) &&
+                ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
+}
 #else
 /* Assume that user mode programs are passing in an ext3fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
index a308228..83ece62 100644 (file)
@@ -100,7 +100,11 @@ static inline int gfp_zone(gfp_t gfp)
  */
 
 #ifndef HAVE_ARCH_FREE_PAGE
-static inline void arch_free_page(struct page *page, int order) { }
+/*
+ * If arch_free_page returns non-zero then the generic free_page code can
+ * immediately bail: the arch-specific function has done all the work.
+ */
+static inline int arch_free_page(struct page *page, int order) { return 0; }
 #endif
 
 extern struct page *
index 892c4ea..8f2bcfb 100644 (file)
@@ -25,10 +25,16 @@ static inline void flush_kernel_dcache_page(struct page *page)
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
+#ifdef CONFIG_XEN
+void kmap_flush_unused(void);
+#endif
 
 #else /* CONFIG_HIGHMEM */
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
+#ifdef CONFIG_XEN
+static inline void kmap_flush_unused(void) { }
+#endif
 
 static inline void *kmap(struct page *page)
 {
index d37c8d8..f559a71 100644 (file)
@@ -78,6 +78,7 @@ void *idr_find(struct idr *idp, int id);
 int idr_pre_get(struct idr *idp, gfp_t gfp_mask);
 int idr_get_new(struct idr *idp, void *ptr, int *id);
 int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id);
+void *idr_replace(struct idr *idp, void *ptr, int id);
 void idr_remove(struct idr *idp, int id);
 void idr_destroy(struct idr *idp);
 void idr_init(struct idr *idp);
index 2c08fdc..d55ff51 100644 (file)
@@ -58,6 +58,12 @@ extern void disable_irq(unsigned int irq);
 extern void enable_irq(unsigned int irq);
 #endif
 
+#ifdef CONFIG_HAVE_IRQ_IGNORE_UNHANDLED
+int irq_ignore_unhandled(unsigned int irq);
+#else
+#define irq_ignore_unhandled(irq) 0
+#endif
+
 #ifndef __ARCH_SET_SOFTIRQ_PENDING
 #define set_softirq_pending(x) (local_softirq_pending() = (x))
 #define or_softirq_pending(x)  (local_softirq_pending() |= (x))
index e4183fa..51eea5b 100644 (file)
@@ -166,6 +166,9 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
+#ifdef CONFIG_XEN
+#define VM_FOREIGN     0x04000000      /* Has pages belonging to another VM */
+#endif
 
 #ifndef VM_STACK_DEFAULT_FLAGS         /* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
@@ -1026,6 +1029,13 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_GET       0x04    /* do get_page on page */
 #define FOLL_ANON      0x08    /* give ZERO_PAGE if no pgtable */
 
+#ifdef CONFIG_XEN
+typedef int (*pte_fn_t)(pte_t *pte, struct page *pmd_page, unsigned long addr,
+                       void *data);
+extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
+                              unsigned long size, pte_fn_t fn, void *data);
+#endif
+
 #ifdef CONFIG_PROC_FS
 void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
 #else
index f4169bb..c5b97cf 100644 (file)
@@ -232,6 +232,7 @@ enum netdev_state_t
        __LINK_STATE_RX_SCHED,
        __LINK_STATE_LINKWATCH_PENDING,
        __LINK_STATE_DORMANT,
+       __LINK_STATE_QDISC_RUNNING,
 };
 
 
@@ -307,9 +308,17 @@ struct net_device
 #define NETIF_F_HW_VLAN_RX     256     /* Receive VLAN hw acceleration */
 #define NETIF_F_HW_VLAN_FILTER 512     /* Receive filtering on VLAN */
 #define NETIF_F_VLAN_CHALLENGED        1024    /* Device cannot handle VLAN packets */
-#define NETIF_F_TSO            2048    /* Can offload TCP/IP segmentation */
+#define NETIF_F_GSO            2048    /* Enable software GSO. */
 #define NETIF_F_LLTX           4096    /* LockLess TX */
-#define NETIF_F_UFO             8192    /* Can offload UDP Large Send*/
+
+       /* Segmentation offload features */
+#define NETIF_F_GSO_SHIFT      16
+#define NETIF_F_TSO            (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_UFO            (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_GSO_ROBUST     (SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
+
+#define NETIF_F_GEN_CSUM       (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
+#define NETIF_F_ALL_CSUM       (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
 
        struct net_device       *next_sched;
 
@@ -398,6 +407,9 @@ struct net_device
        struct list_head        qdisc_list;
        unsigned long           tx_queue_len;   /* Max frames per queue allowed */
 
+       /* Partially transmitted GSO packet. */
+       struct sk_buff          *gso_skb;
+
        /* ingress path synchronizer */
        spinlock_t              ingress_lock;
        struct Qdisc            *qdisc_ingress;
@@ -406,7 +418,7 @@ struct net_device
  * One part is mostly used on xmit path (device)
  */
        /* hard_start_xmit synchronizer */
-       spinlock_t              xmit_lock ____cacheline_aligned_in_smp;
+       spinlock_t              _xmit_lock ____cacheline_aligned_in_smp;
        /* cpu id of processor entered to hard_start_xmit or -1,
           if nobody entered there.
         */
@@ -532,6 +544,9 @@ struct packet_type {
                                         struct net_device *,
                                         struct packet_type *,
                                         struct net_device *);
+       struct sk_buff          *(*gso_segment)(struct sk_buff *skb,
+                                               int features);
+       int                     (*gso_send_check)(struct sk_buff *skb);
        void                    *af_packet_priv;
        struct list_head        list;
 };
@@ -679,7 +694,8 @@ extern int          dev_change_name(struct net_device *, char *);
 extern int             dev_set_mtu(struct net_device *, int);
 extern int             dev_set_mac_address(struct net_device *,
                                            struct sockaddr *);
-extern void            dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
+extern int             dev_hard_start_xmit(struct sk_buff *skb,
+                                           struct net_device *dev);
 
 extern void            dev_init(void);
 
@@ -889,11 +905,43 @@ static inline void __netif_rx_complete(struct net_device *dev)
        clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
 }
 
+static inline void netif_tx_lock(struct net_device *dev)
+{
+       spin_lock(&dev->_xmit_lock);
+       dev->xmit_lock_owner = smp_processor_id();
+}
+
+static inline void netif_tx_lock_bh(struct net_device *dev)
+{
+       spin_lock_bh(&dev->_xmit_lock);
+       dev->xmit_lock_owner = smp_processor_id();
+}
+
+static inline int netif_tx_trylock(struct net_device *dev)
+{
+       int ok = spin_trylock(&dev->_xmit_lock);
+       if (likely(ok))
+               dev->xmit_lock_owner = smp_processor_id();
+       return ok;
+}
+
+static inline void netif_tx_unlock(struct net_device *dev)
+{
+       dev->xmit_lock_owner = -1;
+       spin_unlock(&dev->_xmit_lock);
+}
+
+static inline void netif_tx_unlock_bh(struct net_device *dev)
+{
+       dev->xmit_lock_owner = -1;
+       spin_unlock_bh(&dev->_xmit_lock);
+}
+
 static inline void netif_tx_disable(struct net_device *dev)
 {
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        netif_stop_queue(dev);
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 }
 
 /* These functions live elsewhere (drivers/net/net_init.c, but related) */
@@ -921,6 +969,7 @@ extern int          netdev_max_backlog;
 extern int             weight_p;
 extern int             netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb, int inward);
+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
@@ -940,6 +989,19 @@ extern void dev_seq_stop(struct seq_file *seq, void *v);
 
 extern void linkwatch_run_queue(void);
 
+static inline int skb_gso_ok(struct sk_buff *skb, int features)
+{
+       int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT;
+       return (features & feature) == feature;
+}
+
+static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb)
+{
+       return skb_is_gso(skb) &&
+              (!skb_gso_ok(skb, dev->features) ||
+               unlikely(skb->ip_summed != CHECKSUM_HW));
+}
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_DEV_H */
index a75b84b..178a97e 100644 (file)
@@ -47,18 +47,26 @@ enum nf_br_hook_priorities {
 #define BRNF_BRIDGED                   0x08
 #define BRNF_NF_BRIDGE_PREROUTING      0x10
 
-
 /* Only used in br_forward.c */
-static inline
-void nf_bridge_maybe_copy_header(struct sk_buff *skb)
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
 {
+       int err;
+
        if (skb->nf_bridge) {
                if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
+                       err = skb_cow(skb, 18);
+                       if (err)
+                               return err;
                        memcpy(skb->data - 18, skb->nf_bridge->data, 18);
                        skb_push(skb, 4);
-               } else
+               } else {
+                       err = skb_cow(skb, 16);
+                       if (err)
+                               return err;
                        memcpy(skb->data - 16, skb->nf_bridge->data, 16);
+               }
        }
+       return 0;
 }
 
 /* This is called by the IP fragmenting code and it ensures there is
index 0d514b2..6c33ee6 100644 (file)
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
+
+#ifdef CONFIG_XEN
+#include <xen/interface/xenoprof.h>
+#endif
  
 struct super_block;
 struct dentry;
@@ -27,6 +31,11 @@ struct oprofile_operations {
        /* create any necessary configuration files in the oprofile fs.
         * Optional. */
        int (*create_files)(struct super_block * sb, struct dentry * root);
+       /* setup active domains with Xen */
+       int (*set_active)(int *active_domains, unsigned int adomains);
+        /* setup passive domains with Xen */
+        int (*set_passive)(int *passive_domains, unsigned int pdomains);
+       
        /* Do any necessary interrupt setup. Optional. */
        int (*setup)(void);
        /* Do any necessary interrupt shutdown. Optional. */
@@ -78,6 +87,8 @@ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event);
 /* add a backtrace entry, to be called from the ->backtrace callback */
 void oprofile_add_trace(unsigned long eip);
 
+/* add a domain switch entry */
+int oprofile_add_domain_switch(int32_t domain_id);
 
 /**
  * Create a file of the given name as a child of the given root, with
index 4550ab0..2bd4cba 100644 (file)
 #define PCI_DEVICE_ID_VIA_PX8X0_0      0x0259
 #define PCI_DEVICE_ID_VIA_3269_0       0x0269
 #define PCI_DEVICE_ID_VIA_K8T800PRO_0  0x0282
+#define PCI_DEVICE_ID_VIA_3296_0       0x0296
 #define PCI_DEVICE_ID_VIA_8363_0       0x0305
 #define PCI_DEVICE_ID_VIA_P4M800CE     0x0314
 #define PCI_DEVICE_ID_VIA_8371_0       0x0391
 #define PCI_DEVICE_ID_VIA_82C561       0x0561
 #define PCI_DEVICE_ID_VIA_82C586_1     0x0571
 #define PCI_DEVICE_ID_VIA_82C576       0x0576
+#define PCI_DEVICE_ID_VIA_SATA_EIDE    0x0581
 #define PCI_DEVICE_ID_VIA_82C586_0     0x0586
 #define PCI_DEVICE_ID_VIA_82C596       0x0596
 #define PCI_DEVICE_ID_VIA_82C597_0     0x0597
 #define PCI_DEVICE_ID_VIA_8783_0       0x3208
 #define PCI_DEVICE_ID_VIA_8237         0x3227
 #define PCI_DEVICE_ID_VIA_8251         0x3287
-#define PCI_DEVICE_ID_VIA_3296_0       0x0296
+#define PCI_DEVICE_ID_VIA_8237A                0x3337
 #define PCI_DEVICE_ID_VIA_8231         0x8231
 #define PCI_DEVICE_ID_VIA_8231_4       0x8235
 #define PCI_DEVICE_ID_VIA_8365_1       0x8305
+#define PCI_DEVICE_ID_VIA_CX700                0x8324
 #define PCI_DEVICE_ID_VIA_8371_1       0x8391
 #define PCI_DEVICE_ID_VIA_82C598_1     0x8598
 #define PCI_DEVICE_ID_VIA_838X_1       0xB188
index bb01f8b..ba2a866 100644 (file)
@@ -4,6 +4,6 @@
 #define PFN_ALIGN(x)   (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)
 #define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
 #define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
-#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
+#define PFN_PHYS(x)    ((unsigned long long)(x) << PAGE_SHIFT)
 
 #endif
index c7e0f55..68ed670 100644 (file)
@@ -134,9 +134,10 @@ struct skb_frag_struct {
 struct skb_shared_info {
        atomic_t        dataref;
        unsigned short  nr_frags;
-       unsigned short  tso_size;
-       unsigned short  tso_segs;
-       unsigned short  ufo_size;
+       unsigned short  gso_size;
+       /* Warning: this field is not always filled in (UFO)! */
+       unsigned short  gso_segs;
+       unsigned short  gso_type;
        unsigned int    ip6_frag_id;
        struct sk_buff  *frag_list;
        skb_frag_t      frags[MAX_SKB_FRAGS];
@@ -168,6 +169,14 @@ enum {
        SKB_FCLONE_CLONE,
 };
 
+enum {
+       SKB_GSO_TCPV4 = 1 << 0,
+       SKB_GSO_UDPV4 = 1 << 1,
+
+       /* This indicates the skb is from an untrusted source. */
+       SKB_GSO_DODGY = 1 << 2,
+};
+
 /** 
  *     struct sk_buff - socket buffer
  *     @next: Next buffer in list
@@ -189,6 +198,8 @@ enum {
  *     @local_df: allow local fragmentation
  *     @cloned: Head may be cloned (check refcnt to be sure)
  *     @nohdr: Payload reference only, must not modify header
+ *     @proto_data_valid: Protocol data validated since arriving at localhost
+ *     @proto_csum_blank: Protocol csum must be added before leaving localhost
  *     @pkt_type: Packet class
  *     @fclone: skbuff clone status
  *     @ip_summed: Driver fed us an IP checksum
@@ -265,7 +276,13 @@ struct sk_buff {
                                nfctinfo:3;
        __u8                    pkt_type:3,
                                fclone:2,
+#ifndef CONFIG_XEN
                                ipvs_property:1;
+#else
+                               ipvs_property:1,
+                               proto_data_valid:1,
+                               proto_csum_blank:1;
+#endif
        __be16                  protocol;
 
        void                    (*destructor)(struct sk_buff *skb);
@@ -324,7 +341,8 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 
 extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                            unsigned int size,
-                                           gfp_t priority);
+                                           gfp_t priority,
+                                           int fclone);
 extern void           kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
                                 gfp_t priority);
@@ -969,15 +987,16 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 #define NET_SKB_PAD    16
 #endif
 
-extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc);
+extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
 
 static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
 {
-       if (!skb->data_len) {
-               skb->len  = len;
-               skb->tail = skb->data + len;
-       } else
-               ___pskb_trim(skb, len, 0);
+       if (unlikely(skb->data_len)) {
+               WARN_ON(1);
+               return;
+       }
+       skb->len  = len;
+       skb->tail = skb->data + len;
 }
 
 /**
@@ -987,6 +1006,7 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len)
  *
  *     Cut the length of a buffer down by removing data from the tail. If
  *     the buffer is already under the length specified it is not modified.
+ *     The skb must be linear.
  */
 static inline void skb_trim(struct sk_buff *skb, unsigned int len)
 {
@@ -997,12 +1017,10 @@ static inline void skb_trim(struct sk_buff *skb, unsigned int len)
 
 static inline int __pskb_trim(struct sk_buff *skb, unsigned int len)
 {
-       if (!skb->data_len) {
-               skb->len  = len;
-               skb->tail = skb->data+len;
-               return 0;
-       }
-       return ___pskb_trim(skb, len, 1);
+       if (skb->data_len)
+               return ___pskb_trim(skb, len);
+       __skb_trim(skb, len);
+       return 0;
 }
 
 static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
@@ -1010,6 +1028,21 @@ static inline int pskb_trim(struct sk_buff *skb, unsigned int len)
        return (len < skb->len) ? __pskb_trim(skb, len) : 0;
 }
 
+/**
+ *     pskb_trim_unique - remove end from a paged unique (not cloned) buffer
+ *     @skb: buffer to alter
+ *     @len: new length
+ *
+ *     This is identical to pskb_trim except that the caller knows that
+ *     the skb is not cloned so we should never get an error due to out-
+ *     of-memory.
+ */
+static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len)
+{
+       int err = pskb_trim(skb, len);
+       BUG_ON(err);
+}
+
 /**
  *     skb_orphan - orphan a buffer
  *     @skb: buffer to orphan
@@ -1064,7 +1097,7 @@ static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
        return skb;
 }
 #else
-extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask);
+extern struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask);
 #endif
 
 /**
@@ -1163,18 +1196,34 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i,
        return 0;
 }
 
+static inline int __skb_linearize(struct sk_buff *skb)
+{
+       return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM;
+}
+
 /**
  *     skb_linearize - convert paged skb to linear one
  *     @skb: buffer to linarize
- *     @gfp: allocation mode
  *
  *     If there is no free memory -ENOMEM is returned, otherwise zero
  *     is returned and the old skb data released.
  */
-extern int __skb_linearize(struct sk_buff *skb, gfp_t gfp);
-static inline int skb_linearize(struct sk_buff *skb, gfp_t gfp)
+static inline int skb_linearize(struct sk_buff *skb)
+{
+       return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0;
+}
+
+/**
+ *     skb_linearize_cow - make sure skb is linear and writable
+ *     @skb: buffer to process
+ *
+ *     If there is no free memory -ENOMEM is returned, otherwise zero
+ *     is returned and the old skb data released.
+ */
+static inline int skb_linearize_cow(struct sk_buff *skb)
 {
-       return __skb_linearize(skb, gfp);
+       return skb_is_nonlinear(skb) || skb_cloned(skb) ?
+              __skb_linearize(skb) : 0;
 }
 
 /**
@@ -1271,6 +1320,7 @@ extern void              skb_split(struct sk_buff *skb,
                                 struct sk_buff *skb1, const u32 len);
 
 extern void           skb_release_data(struct sk_buff *skb);
+extern struct sk_buff *skb_segment(struct sk_buff *skb, int features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
                                       int len, void *buffer)
@@ -1400,5 +1450,10 @@ static inline void nf_reset(struct sk_buff *skb)
 static inline void nf_reset(struct sk_buff *skb) {}
 #endif /* CONFIG_NETFILTER */
 
+static inline int skb_is_gso(const struct sk_buff *skb)
+{
+       return skb_shinfo(skb)->gso_size;
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SKBUFF_H */
index 8793155..f13f49a 100644 (file)
@@ -297,8 +297,6 @@ extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp,
                             int buflen);
 extern void tty_write_message(struct tty_struct *tty, char *msg);
 
-extern void tty_get_termios(struct tty_driver *drv, int idx, struct termios *tio);
-
 extern int is_orphaned_pgrp(int pgrp);
 extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
index b94d1ad..75b5b93 100644 (file)
@@ -218,12 +218,13 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
                struct rtattr *tab);
 extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
 
-extern int qdisc_restart(struct net_device *dev);
+extern void __qdisc_run(struct net_device *dev);
 
 static inline void qdisc_run(struct net_device *dev)
 {
-       while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0)
-               /* NOTHING */;
+       if (!netif_queue_stopped(dev) &&
+           !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
+               __qdisc_run(dev);
 }
 
 extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
index 6dc5970..d516c58 100644 (file)
@@ -37,6 +37,9 @@
 struct net_protocol {
        int                     (*handler)(struct sk_buff *skb);
        void                    (*err_handler)(struct sk_buff *skb, u32 info);
+       int                     (*gso_send_check)(struct sk_buff *skb);
+       struct sk_buff         *(*gso_segment)(struct sk_buff *skb,
+                                              int features);
        int                     no_policy;
 };
 
index aa6033c..225dcea 100644 (file)
@@ -405,19 +405,6 @@ static inline int sctp_list_single_entry(struct list_head *head)
        return ((head->next != head) && (head->next == head->prev));
 }
 
-/* Calculate the size (in bytes) occupied by the data of an iovec.  */
-static inline size_t get_user_iov_size(struct iovec *iov, int iovlen)
-{
-       size_t retval = 0;
-
-       for (; iovlen > 0; --iovlen) {
-               retval += iov->iov_len;
-               iov++;
-       }
-
-       return retval;
-}
-
 /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */
 static inline __s32 sctp_jitter(__u32 rto)
 {
index 1eac3d0..de313de 100644 (file)
@@ -221,8 +221,7 @@ struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *,
                                      const struct sctp_chunk *,
                                      __u32 tsn);
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *,
-                                  const struct sctp_chunk *,
-                                  const struct msghdr *);
+                                       const struct msghdr *, size_t msg_len);
 struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
                                   const struct sctp_chunk *,
                                   const __u8 *,
index 21950e7..f937108 100644 (file)
@@ -1042,9 +1042,13 @@ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 {
        __sk_dst_set(sk, dst);
        sk->sk_route_caps = dst->dev->features;
+       if (sk->sk_route_caps & NETIF_F_GSO)
+               sk->sk_route_caps |= NETIF_F_TSO;
        if (sk->sk_route_caps & NETIF_F_TSO) {
                if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
                        sk->sk_route_caps &= ~NETIF_F_TSO;
+               else 
+                       sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
        }
 }
 
index c0f8840..8fa2462 100644 (file)
@@ -568,13 +568,13 @@ struct tcp_skb_cb {
  */
 static inline int tcp_skb_pcount(const struct sk_buff *skb)
 {
-       return skb_shinfo(skb)->tso_segs;
+       return skb_shinfo(skb)->gso_segs;
 }
 
 /* This is valid iff tcp_skb_pcount() > 1. */
 static inline int tcp_skb_mss(const struct sk_buff *skb)
 {
-       return skb_shinfo(skb)->tso_size;
+       return skb_shinfo(skb)->gso_size;
 }
 
 static inline void tcp_dec_pcount_approx(__u32 *count,
@@ -1079,6 +1079,9 @@ extern struct request_sock_ops tcp_request_sock_ops;
 
 extern int tcp_v4_destroy_sock(struct sock *sk);
 
+extern int tcp_v4_gso_send_check(struct sk_buff *skb);
+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features);
+
 #ifdef CONFIG_PROC_FS
 extern int  tcp4_proc_init(void);
 extern void tcp4_proc_exit(void);
index 0196291..e5618b9 100644 (file)
@@ -43,10 +43,6 @@ enum iscsi_uevent_e {
        ISCSI_UEVENT_GET_STATS          = UEVENT_BASE + 10,
        ISCSI_UEVENT_GET_PARAM          = UEVENT_BASE + 11,
 
-       ISCSI_UEVENT_TRANSPORT_EP_CONNECT       = UEVENT_BASE + 12,
-       ISCSI_UEVENT_TRANSPORT_EP_POLL          = UEVENT_BASE + 13,
-       ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT    = UEVENT_BASE + 14,
-
        /* up events */
        ISCSI_KEVENT_RECV_PDU           = KEVENT_BASE + 1,
        ISCSI_KEVENT_CONN_ERROR         = KEVENT_BASE + 2,
@@ -64,83 +60,61 @@ struct iscsi_uevent {
                        uint32_t        initial_cmdsn;
                } c_session;
                struct msg_destroy_session {
+                       uint64_t        session_handle;
                        uint32_t        sid;
                } d_session;
                struct msg_create_conn {
-                       uint32_t        sid;
+                       uint64_t        session_handle;
                        uint32_t        cid;
+                       uint32_t        sid;
                } c_conn;
                struct msg_bind_conn {
-                       uint32_t        sid;
-                       uint32_t        cid;
-                       uint64_t        transport_eph;
+                       uint64_t        session_handle;
+                       uint64_t        conn_handle;
+                       uint32_t        transport_fd;
                        uint32_t        is_leading;
                } b_conn;
                struct msg_destroy_conn {
-                       uint32_t        sid;
+                       uint64_t        conn_handle;
                        uint32_t        cid;
                } d_conn;
                struct msg_send_pdu {
-                       uint32_t        sid;
-                       uint32_t        cid;
                        uint32_t        hdr_size;
                        uint32_t        data_size;
+                       uint64_t        conn_handle;
                } send_pdu;
                struct msg_set_param {
-                       uint32_t        sid;
-                       uint32_t        cid;
+                       uint64_t        conn_handle;
                        uint32_t        param; /* enum iscsi_param */
-                       uint32_t        len;
+                       uint32_t        value;
                } set_param;
                struct msg_start_conn {
-                       uint32_t        sid;
-                       uint32_t        cid;
+                       uint64_t        conn_handle;
                } start_conn;
                struct msg_stop_conn {
-                       uint32_t        sid;
-                       uint32_t        cid;
                        uint64_t        conn_handle;
                        uint32_t        flag;
                } stop_conn;
                struct msg_get_stats {
-                       uint32_t        sid;
-                       uint32_t        cid;
+                       uint64_t        conn_handle;
                } get_stats;
-               struct msg_transport_connect {
-                       uint32_t        non_blocking;
-               } ep_connect;
-               struct msg_transport_poll {
-                       uint64_t        ep_handle;
-                       uint32_t        timeout_ms;
-               } ep_poll;
-               struct msg_transport_disconnect {
-                       uint64_t        ep_handle;
-               } ep_disconnect;
        } u;
        union {
                /* messages k -> u */
+               uint64_t                handle;
                int                     retcode;
                struct msg_create_session_ret {
+                       uint64_t        session_handle;
                        uint32_t        sid;
-                       uint32_t        host_no;
                } c_session_ret;
-               struct msg_create_conn_ret {
-                       uint32_t        sid;
-                       uint32_t        cid;
-               } c_conn_ret;
                struct msg_recv_req {
-                       uint32_t        sid;
-                       uint32_t        cid;
                        uint64_t        recv_handle;
+                       uint64_t        conn_handle;
                } recv_req;
                struct msg_conn_error {
-                       uint32_t        sid;
-                       uint32_t        cid;
+                       uint64_t        conn_handle;
                        uint32_t        error; /* enum iscsi_err */
                } connerror;
-               struct msg_transport_connect_ret {
-                       uint64_t        handle;
-               } ep_connect_ret;
        } r;
 } __attribute__ ((aligned (sizeof(uint64_t))));
 
@@ -165,66 +139,29 @@ enum iscsi_err {
        ISCSI_ERR_SESSION_FAILED        = ISCSI_ERR_BASE + 13,
        ISCSI_ERR_HDR_DGST              = ISCSI_ERR_BASE + 14,
        ISCSI_ERR_DATA_DGST             = ISCSI_ERR_BASE + 15,
-       ISCSI_ERR_PARAM_NOT_FOUND       = ISCSI_ERR_BASE + 16,
-       ISCSI_ERR_NO_SCSI_CMD           = ISCSI_ERR_BASE + 17,
+       ISCSI_ERR_PARAM_NOT_FOUND       = ISCSI_ERR_BASE + 16
 };
 
 /*
  * iSCSI Parameters (RFC3720)
  */
 enum iscsi_param {
-       /* passed in using netlink set param */
-       ISCSI_PARAM_MAX_RECV_DLENGTH,
-       ISCSI_PARAM_MAX_XMIT_DLENGTH,
-       ISCSI_PARAM_HDRDGST_EN,
-       ISCSI_PARAM_DATADGST_EN,
-       ISCSI_PARAM_INITIAL_R2T_EN,
-       ISCSI_PARAM_MAX_R2T,
-       ISCSI_PARAM_IMM_DATA_EN,
-       ISCSI_PARAM_FIRST_BURST,
-       ISCSI_PARAM_MAX_BURST,
-       ISCSI_PARAM_PDU_INORDER_EN,
-       ISCSI_PARAM_DATASEQ_INORDER_EN,
-       ISCSI_PARAM_ERL,
-       ISCSI_PARAM_IFMARKER_EN,
-       ISCSI_PARAM_OFMARKER_EN,
-       ISCSI_PARAM_EXP_STATSN,
-       ISCSI_PARAM_TARGET_NAME,
-       ISCSI_PARAM_TPGT,
-       ISCSI_PARAM_PERSISTENT_ADDRESS,
-       ISCSI_PARAM_PERSISTENT_PORT,
-       ISCSI_PARAM_SESS_RECOVERY_TMO,
-
-       /* pased in through bind conn using transport_fd */
-       ISCSI_PARAM_CONN_PORT,
-       ISCSI_PARAM_CONN_ADDRESS,
-
-       /* must always be last */
-       ISCSI_PARAM_MAX,
+       ISCSI_PARAM_MAX_RECV_DLENGTH    = 0,
+       ISCSI_PARAM_MAX_XMIT_DLENGTH    = 1,
+       ISCSI_PARAM_HDRDGST_EN          = 2,
+       ISCSI_PARAM_DATADGST_EN         = 3,
+       ISCSI_PARAM_INITIAL_R2T_EN      = 4,
+       ISCSI_PARAM_MAX_R2T             = 5,
+       ISCSI_PARAM_IMM_DATA_EN         = 6,
+       ISCSI_PARAM_FIRST_BURST         = 7,
+       ISCSI_PARAM_MAX_BURST           = 8,
+       ISCSI_PARAM_PDU_INORDER_EN      = 9,
+       ISCSI_PARAM_DATASEQ_INORDER_EN  = 10,
+       ISCSI_PARAM_ERL                 = 11,
+       ISCSI_PARAM_IFMARKER_EN         = 12,
+       ISCSI_PARAM_OFMARKER_EN         = 13,
 };
-
-#define ISCSI_MAX_RECV_DLENGTH         (1 << ISCSI_PARAM_MAX_RECV_DLENGTH)
-#define ISCSI_MAX_XMIT_DLENGTH         (1 << ISCSI_PARAM_MAX_XMIT_DLENGTH)
-#define ISCSI_HDRDGST_EN               (1 << ISCSI_PARAM_HDRDGST_EN)
-#define ISCSI_DATADGST_EN              (1 << ISCSI_PARAM_DATADGST_EN)
-#define ISCSI_INITIAL_R2T_EN           (1 << ISCSI_PARAM_INITIAL_R2T_EN)
-#define ISCSI_MAX_R2T                  (1 << ISCSI_PARAM_MAX_R2T)
-#define ISCSI_IMM_DATA_EN              (1 << ISCSI_PARAM_IMM_DATA_EN)
-#define ISCSI_FIRST_BURST              (1 << ISCSI_PARAM_FIRST_BURST)
-#define ISCSI_MAX_BURST                        (1 << ISCSI_PARAM_MAX_BURST)
-#define ISCSI_PDU_INORDER_EN           (1 << ISCSI_PARAM_PDU_INORDER_EN)
-#define ISCSI_DATASEQ_INORDER_EN       (1 << ISCSI_PARAM_DATASEQ_INORDER_EN)
-#define ISCSI_ERL                      (1 << ISCSI_PARAM_ERL)
-#define ISCSI_IFMARKER_EN              (1 << ISCSI_PARAM_IFMARKER_EN)
-#define ISCSI_OFMARKER_EN              (1 << ISCSI_PARAM_OFMARKER_EN)
-#define ISCSI_EXP_STATSN               (1 << ISCSI_PARAM_EXP_STATSN)
-#define ISCSI_TARGET_NAME              (1 << ISCSI_PARAM_TARGET_NAME)
-#define ISCSI_TPGT                     (1 << ISCSI_PARAM_TPGT)
-#define ISCSI_PERSISTENT_ADDRESS       (1 << ISCSI_PARAM_PERSISTENT_ADDRESS)
-#define ISCSI_PERSISTENT_PORT          (1 << ISCSI_PARAM_PERSISTENT_PORT)
-#define ISCSI_SESS_RECOVERY_TMO                (1 << ISCSI_PARAM_SESS_RECOVERY_TMO)
-#define ISCSI_CONN_PORT                        (1 << ISCSI_PARAM_CONN_PORT)
-#define ISCSI_CONN_ADDRESS             (1 << ISCSI_PARAM_CONN_ADDRESS)
+#define ISCSI_PARAM_MAX                        14
 
 #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle)
 #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr)
@@ -253,6 +190,7 @@ enum iscsi_param {
  * These flags describes reason of stop_conn() call
  */
 #define STOP_CONN_TERM         0x1
+#define STOP_CONN_SUSPEND      0x2
 #define STOP_CONN_RECOVER      0x3
 
 #define ISCSI_STATS_CUSTOM_MAX         32
index 02f6e4b..4feda05 100644 (file)
@@ -21,6 +21,8 @@
 #ifndef ISCSI_PROTO_H
 #define ISCSI_PROTO_H
 
+#define ISCSI_VERSION_STR      "0.3"
+#define ISCSI_DATE_STR         "22-Apr-2005"
 #define ISCSI_DRAFT20_VERSION  0x00
 
 /* default iSCSI listen port for incoming connections */
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
deleted file mode 100644 (file)
index cbf7e58..0000000
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * iSCSI lib definitions
- *
- * Copyright (C) 2006 Red Hat, Inc.  All rights reserved.
- * Copyright (C) 2004 - 2006 Mike Christie
- * Copyright (C) 2004 - 2005 Dmitry Yusupov
- * Copyright (C) 2004 - 2005 Alex Aizman
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#ifndef LIBISCSI_H
-#define LIBISCSI_H
-
-#include <linux/types.h>
-#include <linux/mutex.h>
-#include <scsi/iscsi_proto.h>
-#include <scsi/iscsi_if.h>
-
-struct scsi_transport_template;
-struct scsi_device;
-struct Scsi_Host;
-struct scsi_cmnd;
-struct socket;
-struct iscsi_transport;
-struct iscsi_cls_session;
-struct iscsi_cls_conn;
-struct iscsi_session;
-struct iscsi_nopin;
-
-/* #define DEBUG_SCSI */
-#ifdef DEBUG_SCSI
-#define debug_scsi(fmt...) printk(KERN_INFO "iscsi: " fmt)
-#else
-#define debug_scsi(fmt...)
-#endif
-
-#define ISCSI_XMIT_CMDS_MAX    128     /* must be power of 2 */
-#define ISCSI_MGMT_CMDS_MAX    32      /* must be power of 2 */
-#define ISCSI_CONN_MAX                 1
-
-#define ISCSI_MGMT_ITT_OFFSET  0xa00
-
-#define ISCSI_DEF_CMD_PER_LUN          32
-#define ISCSI_MAX_CMD_PER_LUN          128
-
-/* Task Mgmt states */
-#define TMABORT_INITIAL                        0x0
-#define TMABORT_SUCCESS                        0x1
-#define TMABORT_FAILED                 0x2
-#define TMABORT_TIMEDOUT               0x3
-
-/* Connection suspend "bit" */
-#define ISCSI_SUSPEND_BIT              1
-
-#define ISCSI_ITT_MASK                 (0xfff)
-#define ISCSI_CID_SHIFT                        12
-#define ISCSI_CID_MASK                 (0xffff << ISCSI_CID_SHIFT)
-#define ISCSI_AGE_SHIFT                        28
-#define ISCSI_AGE_MASK                 (0xf << ISCSI_AGE_SHIFT)
-
-struct iscsi_mgmt_task {
-       /*
-        * Becuae LLDs allocate their hdr differently, this is a pointer to
-        * that storage. It must be setup at session creation time.
-        */
-       struct iscsi_hdr        *hdr;
-       char                    *data;          /* mgmt payload */
-       int                     data_count;     /* counts data to be sent */
-       uint32_t                itt;            /* this ITT */
-       void                    *dd_data;       /* driver/transport data */
-       struct list_head        running;
-};
-
-struct iscsi_cmd_task {
-       /*
-        * Becuae LLDs allocate their hdr differently, this is a pointer to
-        * that storage. It must be setup at session creation time.
-        */
-       struct iscsi_cmd        *hdr;
-       int                     itt;            /* this ITT */
-       int                     datasn;         /* DataSN */
-
-       uint32_t                unsol_datasn;
-       int                     imm_count;      /* imm-data (bytes)   */
-       int                     unsol_count;    /* unsolicited (bytes)*/
-       int                     data_count;     /* remaining Data-Out */
-       struct scsi_cmnd        *sc;            /* associated SCSI cmd*/
-       int                     total_length;
-       struct iscsi_conn       *conn;          /* used connection    */
-       struct iscsi_mgmt_task  *mtask;         /* tmf mtask in progr */
-
-       struct list_head        running;        /* running cmd list */
-       void                    *dd_data;       /* driver/transport data */
-};
-
-struct iscsi_conn {
-       struct iscsi_cls_conn   *cls_conn;      /* ptr to class connection */
-       void                    *dd_data;       /* iscsi_transport data */
-       struct iscsi_session    *session;       /* parent session */
-       /*
-        * LLDs should set this lock. It protects the transport recv
-        * code
-        */
-       rwlock_t                *recv_lock;
-       /*
-        * conn_stop() flag: stop to recover, stop to terminate
-        */
-        int                    stop_stage;
-
-       /* iSCSI connection-wide sequencing */
-       uint32_t                exp_statsn;
-
-       /* control data */
-       int                     id;             /* CID */
-       struct list_head        item;           /* maintains list of conns */
-       int                     c_stage;        /* connection state */
-       struct iscsi_mgmt_task  *login_mtask;   /* mtask used for login/text */
-       struct iscsi_mgmt_task  *mtask;         /* xmit mtask in progress */
-       struct iscsi_cmd_task   *ctask;         /* xmit ctask in progress */
-
-       /* xmit */
-       struct kfifo            *immqueue;      /* immediate xmit queue */
-       struct kfifo            *mgmtqueue;     /* mgmt (control) xmit queue */
-       struct list_head        mgmt_run_list;  /* list of control tasks */
-       struct kfifo            *xmitqueue;     /* data-path cmd queue */
-       struct list_head        run_list;       /* list of cmds in progress */
-       struct work_struct      xmitwork;       /* per-conn. xmit workqueue */
-       /*
-        * serializes connection xmit, access to kfifos:
-        * xmitqueue, immqueue, mgmtqueue
-        */
-       struct mutex            xmitmutex;
-
-       unsigned long           suspend_tx;     /* suspend Tx */
-       unsigned long           suspend_rx;     /* suspend Rx */
-
-       /* abort */
-       wait_queue_head_t       ehwait;         /* used in eh_abort() */
-       struct iscsi_tm         tmhdr;
-       struct timer_list       tmabort_timer;
-       int                     tmabort_state;  /* see TMABORT_INITIAL, etc.*/
-
-       /* negotiated params */
-       int                     max_recv_dlength; /* initiator_max_recv_dsl*/
-       int                     max_xmit_dlength; /* target_max_recv_dsl */
-       int                     hdrdgst_en;
-       int                     datadgst_en;
-
-       /* MIB-statistics */
-       uint64_t                txdata_octets;
-       uint64_t                rxdata_octets;
-       uint32_t                scsicmd_pdus_cnt;
-       uint32_t                dataout_pdus_cnt;
-       uint32_t                scsirsp_pdus_cnt;
-       uint32_t                datain_pdus_cnt;
-       uint32_t                r2t_pdus_cnt;
-       uint32_t                tmfcmd_pdus_cnt;
-       int32_t                 tmfrsp_pdus_cnt;
-
-       /* custom statistics */
-       uint32_t                eh_abort_cnt;
-};
-
-struct iscsi_queue {
-       struct kfifo            *queue;         /* FIFO Queue */
-       void                    **pool;         /* Pool of elements */
-       int                     max;            /* Max number of elements */
-};
-
-struct iscsi_session {
-       /* iSCSI session-wide sequencing */
-       uint32_t                cmdsn;
-       uint32_t                exp_cmdsn;
-       uint32_t                max_cmdsn;
-
-       /* configuration */
-       int                     initial_r2t_en;
-       int                     max_r2t;
-       int                     imm_data_en;
-       int                     first_burst;
-       int                     max_burst;
-       int                     time2wait;
-       int                     time2retain;
-       int                     pdu_inorder_en;
-       int                     dataseq_inorder_en;
-       int                     erl;
-       int                     ifmarker_en;
-       int                     ofmarker_en;
-
-       /* control data */
-       struct iscsi_transport  *tt;
-       struct Scsi_Host        *host;
-       struct iscsi_conn       *leadconn;      /* leading connection */
-       spinlock_t              lock;           /* protects session state, *
-                                                * sequence numbers,       *
-                                                * session resources:      *
-                                                * - cmdpool,              *
-                                                * - mgmtpool,             *
-                                                * - r2tpool               */
-       int                     state;          /* session state           */
-       struct list_head        item;
-       int                     age;            /* counts session re-opens */
-
-       struct list_head        connections;    /* list of connections */
-       int                     cmds_max;       /* size of cmds array */
-       struct iscsi_cmd_task   **cmds;         /* Original Cmds arr */
-       struct iscsi_queue      cmdpool;        /* PDU's pool */
-       int                     mgmtpool_max;   /* size of mgmt array */
-       struct iscsi_mgmt_task  **mgmt_cmds;    /* Original mgmt arr */
-       struct iscsi_queue      mgmtpool;       /* Mgmt PDU's pool */
-};
-
-/*
- * scsi host template
- */
-extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth);
-extern int iscsi_eh_abort(struct scsi_cmnd *sc);
-extern int iscsi_eh_host_reset(struct scsi_cmnd *sc);
-extern int iscsi_queuecommand(struct scsi_cmnd *sc,
-                             void (*done)(struct scsi_cmnd *));
-
-/*
- * session management
- */
-extern struct iscsi_cls_session *
-iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *,
-                   int, int, uint32_t, uint32_t *);
-extern void iscsi_session_teardown(struct iscsi_cls_session *);
-extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
-extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
-
-#define session_to_cls(_sess) \
-       hostdata_session(_sess->host->hostdata)
-
-/*
- * connection management
- */
-extern struct iscsi_cls_conn *iscsi_conn_setup(struct iscsi_cls_session *,
-                                              uint32_t);
-extern void iscsi_conn_teardown(struct iscsi_cls_conn *);
-extern int iscsi_conn_start(struct iscsi_cls_conn *);
-extern void iscsi_conn_stop(struct iscsi_cls_conn *, int);
-extern int iscsi_conn_bind(struct iscsi_cls_session *, struct iscsi_cls_conn *,
-                          int);
-extern void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err);
-
-/*
- * pdu and task processing
- */
-extern int iscsi_check_assign_cmdsn(struct iscsi_session *,
-                                   struct iscsi_nopin *);
-extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *,
-                                       struct iscsi_data *hdr,
-                                       int transport_data_cnt);
-extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
-                               char *, uint32_t);
-extern int iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
-                             char *, int);
-extern int __iscsi_complete_pdu(struct iscsi_conn *, struct iscsi_hdr *,
-                               char *, int);
-extern int iscsi_verify_itt(struct iscsi_conn *, struct iscsi_hdr *,
-                           uint32_t *);
-
-/*
- * generic helpers
- */
-extern void iscsi_pool_free(struct iscsi_queue *, void **);
-extern int iscsi_pool_init(struct iscsi_queue *, int, void ***, int);
-
-#endif
index 1b26a6c..b41cf07 100644 (file)
@@ -1,8 +1,10 @@
-/* 
+/*
  * iSCSI transport class definitions
  *
  * Copyright (C) IBM Corporation, 2004
- * Copyright (C) Mike Christie, 2004
+ * Copyright (C) Mike Christie, 2004 - 2005
+ * Copyright (C) Dmitry Yusupov, 2004 - 2005
+ * Copyright (C) Alex Aizman, 2004 - 2005
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
 #ifndef SCSI_TRANSPORT_ISCSI_H
 #define SCSI_TRANSPORT_ISCSI_H
 
-#include <linux/config.h>
-#include <linux/in6.h>
-#include <linux/in.h>
+#include <linux/device.h>
+#include <scsi/iscsi_if.h>
 
 struct scsi_transport_template;
+struct Scsi_Host;
+struct mempool_zone;
+struct iscsi_cls_conn;
 
-struct iscsi_class_session {
-       uint8_t isid[6];
-       uint16_t tsih;
-       int header_digest;              /* 1 CRC32, 0 None */
-       int data_digest;                /* 1 CRC32, 0 None */
-       uint16_t tpgt;
-       union {
-               struct in6_addr sin6_addr;
-               struct in_addr sin_addr;
-       } u;
-       sa_family_t addr_type;          /* must be AF_INET or AF_INET6 */
-       uint16_t port;                  /* must be in network byte order */
-       int initial_r2t;                /* 1 Yes, 0 No */
-       int immediate_data;             /* 1 Yes, 0 No */
-       uint32_t max_recv_data_segment_len;
-       uint32_t max_burst_len;
-       uint32_t first_burst_len;
-       uint16_t def_time2wait;
-       uint16_t def_time2retain;
-       uint16_t max_outstanding_r2t;
-       int data_pdu_in_order;          /* 1 Yes, 0 No */
-       int data_sequence_in_order;     /* 1 Yes, 0 No */
-       int erl;
+/**
+ * struct iscsi_transport - iSCSI Transport template
+ *
+ * @name:              transport name
+ * @caps:              iSCSI Data-Path capabilities
+ * @create_session:    create new iSCSI session object
+ * @destroy_session:   destroy existing iSCSI session object
+ * @create_conn:       create new iSCSI connection
+ * @bind_conn:         associate this connection with existing iSCSI session
+ *                     and specified transport descriptor
+ * @destroy_conn:      destroy inactive iSCSI connection
+ * @set_param:         set iSCSI Data-Path operational parameter
+ * @start_conn:                set connection to be operational
+ * @stop_conn:         suspend/recover/terminate connection
+ * @send_pdu:          send iSCSI PDU, Login, Logout, NOP-Out, Reject, Text.
+ *
+ * Template API provided by iSCSI Transport
+ */
+struct iscsi_transport {
+       struct module *owner;
+       char *name;
+       unsigned int caps;
+       struct scsi_host_template *host_template;
+       /* LLD session/scsi_host data size */
+       int hostdata_size;
+       /* LLD iscsi_host data size */
+       int ihostdata_size;
+       /* LLD connection data size */
+       int conndata_size;
+       int max_lun;
+       unsigned int max_conn;
+       unsigned int max_cmd_len;
+       struct iscsi_cls_session *(*create_session)
+               (struct scsi_transport_template *t, uint32_t sn, uint32_t *sid);
+       void (*destroy_session) (struct iscsi_cls_session *session);
+       struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess,
+                               uint32_t cid);
+       int (*bind_conn) (struct iscsi_cls_session *session,
+                         struct iscsi_cls_conn *cls_conn,
+                         uint32_t transport_fd, int is_leading);
+       int (*start_conn) (struct iscsi_cls_conn *conn);
+       void (*stop_conn) (struct iscsi_cls_conn *conn, int flag);
+       void (*destroy_conn) (struct iscsi_cls_conn *conn);
+       int (*set_param) (struct iscsi_cls_conn *conn, enum iscsi_param param,
+                         uint32_t value);
+       int (*get_conn_param) (struct iscsi_cls_conn *conn,
+                              enum iscsi_param param,
+                              uint32_t *value);
+       int (*get_session_param) (struct iscsi_cls_session *session,
+                                 enum iscsi_param param, uint32_t *value);
+       int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
+                        char *data, uint32_t data_size);
+       void (*get_stats) (struct iscsi_cls_conn *conn,
+                          struct iscsi_stats *stats);
 };
 
 /*
- * accessor macros
+ * transport registration upcalls
  */
-#define iscsi_isid(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->isid)
-#define iscsi_tsih(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->tsih)
-#define iscsi_header_digest(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->header_digest)
-#define iscsi_data_digest(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->data_digest)
-#define iscsi_port(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->port)
-#define iscsi_addr_type(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->addr_type)
-#define iscsi_sin_addr(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->u.sin_addr)
-#define iscsi_sin6_addr(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->u.sin6_addr)
-#define iscsi_tpgt(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->tpgt)
-#define iscsi_initial_r2t(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->initial_r2t)
-#define iscsi_immediate_data(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->immediate_data)
-#define iscsi_max_recv_data_segment_len(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->max_recv_data_segment_len)
-#define iscsi_max_burst_len(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->max_burst_len)
-#define iscsi_first_burst_len(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->first_burst_len)
-#define iscsi_def_time2wait(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->def_time2wait)
-#define iscsi_def_time2retain(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->def_time2retain)
-#define iscsi_max_outstanding_r2t(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->max_outstanding_r2t)
-#define iscsi_data_pdu_in_order(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->data_pdu_in_order)
-#define iscsi_data_sequence_in_order(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->data_sequence_in_order)
-#define iscsi_erl(x) \
-       (((struct iscsi_class_session *)&(x)->starget_data)->erl)
+extern struct scsi_transport_template *iscsi_register_transport(struct iscsi_transport *tt);
+extern int iscsi_unregister_transport(struct iscsi_transport *tt);
 
 /*
- * The functions by which the transport class and the driver communicate
+ * control plane upcalls
  */
-struct iscsi_function_template {
-       /*
-        * target attrs
-        */
-       void (*get_isid)(struct scsi_target *);
-       void (*get_tsih)(struct scsi_target *);
-       void (*get_header_digest)(struct scsi_target *);
-       void (*get_data_digest)(struct scsi_target *);
-       void (*get_port)(struct scsi_target *);
-       void (*get_tpgt)(struct scsi_target *);
-       /*
-        * In get_ip_address the lld must set the address and
-        * the address type
-        */
-       void (*get_ip_address)(struct scsi_target *);
-       /*
-        * The lld should snprintf the name or alias to the buffer
-        */
-       ssize_t (*get_target_name)(struct scsi_target *, char *, ssize_t);
-       ssize_t (*get_target_alias)(struct scsi_target *, char *, ssize_t);
-       void (*get_initial_r2t)(struct scsi_target *);
-       void (*get_immediate_data)(struct scsi_target *);
-       void (*get_max_recv_data_segment_len)(struct scsi_target *);
-       void (*get_max_burst_len)(struct scsi_target *);
-       void (*get_first_burst_len)(struct scsi_target *);
-       void (*get_def_time2wait)(struct scsi_target *);
-       void (*get_def_time2retain)(struct scsi_target *);
-       void (*get_max_outstanding_r2t)(struct scsi_target *);
-       void (*get_data_pdu_in_order)(struct scsi_target *);
-       void (*get_data_sequence_in_order)(struct scsi_target *);
-       void (*get_erl)(struct scsi_target *);
+extern void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error);
+extern int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
+                         char *data, uint32_t data_size);
 
-       /*
-        * host atts
-        */
+struct iscsi_cls_conn {
+       struct list_head conn_list;     /* item in connlist */
+       void *dd_data;                  /* LLD private data */
+       struct iscsi_transport *transport;
+       int active;                     /* must be accessed with the connlock */
+       struct device dev;              /* sysfs transport/container device */
+       struct mempool_zone *z_error;
+       struct mempool_zone *z_pdu;
+       struct list_head freequeue;
+};
+
+#define iscsi_dev_to_conn(_dev) \
+       container_of(_dev, struct iscsi_cls_conn, dev)
 
-       /*
-        * The lld should snprintf the name or alias to the buffer
-        */
-       ssize_t (*get_initiator_alias)(struct Scsi_Host *, char *, ssize_t);
-       ssize_t (*get_initiator_name)(struct Scsi_Host *, char *, ssize_t);
-       /*
-        * The driver sets these to tell the transport class it
-        * wants the attributes displayed in sysfs.  If the show_ flag
-        * is not set, the attribute will be private to the transport
-        * class. We could probably just test if a get_ fn was set
-        * since we only use the values for sysfs but this is how
-        * fc does it too.
-        */
-       unsigned long show_isid:1;
-       unsigned long show_tsih:1;
-       unsigned long show_header_digest:1;
-       unsigned long show_data_digest:1;
-       unsigned long show_port:1;
-       unsigned long show_tpgt:1;
-       unsigned long show_ip_address:1;
-       unsigned long show_target_name:1;
-       unsigned long show_target_alias:1;
-       unsigned long show_initial_r2t:1;
-       unsigned long show_immediate_data:1;
-       unsigned long show_max_recv_data_segment_len:1;
-       unsigned long show_max_burst_len:1;
-       unsigned long show_first_burst_len:1;
-       unsigned long show_def_time2wait:1;
-       unsigned long show_def_time2retain:1;
-       unsigned long show_max_outstanding_r2t:1;
-       unsigned long show_data_pdu_in_order:1;
-       unsigned long show_data_sequence_in_order:1;
-       unsigned long show_erl:1;
-       unsigned long show_initiator_name:1;
-       unsigned long show_initiator_alias:1;
+struct iscsi_cls_session {
+       struct list_head sess_list;             /* item in session_list */
+       struct iscsi_transport *transport;
+       struct device dev;      /* sysfs transport/container device */
 };
 
-struct scsi_transport_template *iscsi_attach_transport(struct iscsi_function_template *);
-void iscsi_release_transport(struct scsi_transport_template *);
+#define iscsi_dev_to_session(_dev) \
+       container_of(_dev, struct iscsi_cls_session, dev)
+
+#define iscsi_session_to_shost(_session) \
+       dev_to_shost(_session->dev.parent)
+
+/*
+ * session and connection functions that can be used by HW iSCSI LLDs
+ */
+extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost,
+                               struct iscsi_transport *t);
+extern int iscsi_destroy_session(struct iscsi_cls_session *session);
+extern struct iscsi_cls_conn *iscsi_create_conn(struct iscsi_cls_session *sess,
+                                           uint32_t cid);
+extern int iscsi_destroy_conn(struct iscsi_cls_conn *conn);
+
+/*
+ * session functions used by software iscsi
+ */
+extern struct Scsi_Host *
+iscsi_transport_create_session(struct scsi_transport_template *scsit,
+                               struct iscsi_transport *transport);
+extern int iscsi_transport_destroy_session(struct Scsi_Host *shost);
 
 #endif
index d29e3d3..d45170b 100644 (file)
@@ -62,7 +62,8 @@ static int snd_legacy_find_free_irq(int *irq_table)
 {
        while (*irq_table != -1) {
                if (!request_irq(*irq_table, snd_legacy_empty_irq_handler,
-                                SA_INTERRUPT, "ALSA Test IRQ", (void *) irq_table)) {
+                                SA_INTERRUPT | SA_PROBEIRQ, "ALSA Test IRQ",
+                                (void *) irq_table)) {
                        free_irq(*irq_table, (void *) irq_table);
                        return *irq_table;
                }
index 0b46a5d..17ab322 100644 (file)
@@ -35,6 +35,7 @@ config PREEMPT_VOLUNTARY
 
 config PREEMPT
        bool "Preemptible Kernel (Low-Latency Desktop)"
+       depends on !XEN
        help
          This option reduces the latency of the kernel by making
          all kernel code (that is not executing in a critical section)
index fcaaa99..471d19f 100644 (file)
@@ -280,6 +280,9 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                if (retval)
                        goto out;
        }
+#ifdef arch_dup_mmap
+       arch_dup_mmap(mm, oldmm);
+#endif
        retval = 0;
 out:
        up_write(&mm->mmap_sem);
index 9ddf501..c27bb56 100644 (file)
@@ -594,6 +594,7 @@ static int unqueue_me(struct futex_q *q)
        /* In the common case we don't take the spinlock, which is nice. */
  retry:
        lock_ptr = q->lock_ptr;
+       barrier();
        if (lock_ptr != 0) {
                spin_lock(lock_ptr);
                /*
index 1279e34..97d5559 100644 (file)
@@ -204,14 +204,10 @@ int setup_irq(unsigned int irq, struct irqaction * new)
        p = &desc->action;
        if ((old = *p) != NULL) {
                /* Can't share interrupts unless both agree to */
-               if (!(old->flags & new->flags & SA_SHIRQ))
-                       goto mismatch;
-
-#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
-               /* All handlers must agree on per-cpuness */
-               if ((old->flags & IRQ_PER_CPU) != (new->flags & IRQ_PER_CPU))
-                       goto mismatch;
-#endif
+               if (!(old->flags & new->flags & SA_SHIRQ)) {
+                       spin_unlock_irqrestore(&desc->lock,flags);
+                       return -EBUSY;
+               }
 
                /* add new interrupt at end of irq queue */
                do {
@@ -222,10 +218,7 @@ int setup_irq(unsigned int irq, struct irqaction * new)
        }
 
        *p = new;
-#if defined(ARCH_HAS_IRQ_PER_CPU) && defined(SA_PERCPU_IRQ)
-       if (new->flags & SA_PERCPU_IRQ)
-               desc->status |= IRQ_PER_CPU;
-#endif
+
        if (!shared) {
                desc->depth = 0;
                desc->status &= ~(IRQ_DISABLED | IRQ_AUTODETECT |
@@ -243,14 +236,6 @@ int setup_irq(unsigned int irq, struct irqaction * new)
        register_handler_proc(irq, new);
 
        return 0;
-
-mismatch:
-       spin_unlock_irqrestore(&desc->lock, flags);
-       if (!(new->flags & SA_PROBEIRQ)) {
-               printk(KERN_ERR "%s: irq handler mismatch\n", __FUNCTION__);
-               dump_stack();
-       }
-       return -EBUSY;
 }
 
 /**
@@ -273,7 +258,6 @@ void free_irq(unsigned int irq, void *dev_id)
        struct irqaction **p;
        unsigned long flags;
 
-       WARN_ON(in_interrupt());
        if (irq >= NR_IRQS)
                return;
 
index 7df9abd..f9353e9 100644 (file)
@@ -137,7 +137,8 @@ void note_interrupt(unsigned int irq, irq_desc_t *desc, irqreturn_t action_ret,
                        struct pt_regs *regs)
 {
        if (action_ret != IRQ_HANDLED) {
-               desc->irqs_unhandled++;
+               if (!irq_ignore_unhandled(irq))
+                       desc->irqs_unhandled++;
                if (action_ret != IRQ_NONE)
                        report_bad_irq(irq, desc, action_ret);
        }
index 4d20106..eeaa858 100644 (file)
@@ -4238,17 +4238,22 @@ asmlinkage long sys_sched_yield(void)
        return 0;
 }
 
-static inline void __cond_resched(void)
+static inline int __resched_legal(int expected_preempt_count)
+{
+       if (unlikely(preempt_count() != expected_preempt_count))
+               return 0;
+       if (unlikely(system_state != SYSTEM_RUNNING))
+               return 0;
+       return 1;
+}
+
+static void __cond_resched(void)
 {
        /*
         * The BKS might be reacquired before we have dropped
         * PREEMPT_ACTIVE, which could trigger a second
         * cond_resched() call.
         */
-       if (unlikely(preempt_count()))
-               return;
-       if (unlikely(system_state != SYSTEM_RUNNING))
-               return;
        do {
                add_preempt_count(PREEMPT_ACTIVE);
                schedule();
@@ -4258,13 +4263,12 @@ static inline void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-       if (need_resched()) {
+       if (need_resched() && __resched_legal(0)) {
                __cond_resched();
                return 1;
        }
        return 0;
 }
-
 EXPORT_SYMBOL(cond_resched);
 
 /*
@@ -4285,7 +4289,7 @@ int cond_resched_lock(spinlock_t *lock)
                ret = 1;
                spin_lock(lock);
        }
-       if (need_resched()) {
+       if (need_resched() && __resched_legal(1)) {
                _raw_spin_unlock(lock);
                preempt_enable_no_resched();
                __cond_resched();
@@ -4294,14 +4298,13 @@ int cond_resched_lock(spinlock_t *lock)
        }
        return ret;
 }
-
 EXPORT_SYMBOL(cond_resched_lock);
 
 int __sched cond_resched_softirq(void)
 {
        BUG_ON(!in_softirq());
 
-       if (need_resched()) {
+       if (need_resched() && __resched_legal(0)) {
                __local_bh_enable();
                __cond_resched();
                local_bh_disable();
@@ -4309,10 +4312,8 @@ int __sched cond_resched_softirq(void)
        }
        return 0;
 }
-
 EXPORT_SYMBOL(cond_resched_softirq);
 
-
 /**
  * yield - yield the current processor to other threads.
  *
index dcfb5d7..51cacd1 100644 (file)
@@ -111,7 +111,6 @@ static int stop_machine(void)
        /* If some failed, kill them all. */
        if (ret < 0) {
                stopmachine_set_state(STOPMACHINE_EXIT);
-               up(&stopmachine_mutex);
                return ret;
        }
 
index 2f8cdbe..20a4bca 100644 (file)
@@ -543,6 +543,22 @@ found:
        }
        spin_unlock(&base->lock);
 
+       /*
+        * It can happen that other CPUs service timer IRQs and increment
+        * jiffies, but we have not yet got a local timer tick to process
+        * the timer wheels.  In that case, the expiry time can be before
+        * jiffies, but since the high-resolution timer here is relative to
+        * jiffies, the default expression when high-resolution timers are
+        * not active,
+        *
+        *   time_before(MAX_JIFFY_OFFSET + jiffies, expires)
+        *
+        * would falsely evaluate to true.  If that is the case, just
+        * return jiffies so that we can immediately fire the local timer
+        */
+       if (time_before(expires, jiffies))
+               return jiffies;
+
        /*
         * It can happen that other CPUs service timer IRQs and increment
         * jiffies, but we have not yet got a local timer tick to process
@@ -971,46 +987,18 @@ asmlinkage long sys_getpid(void)
 }
 
 /*
- * Accessing ->group_leader->real_parent is not SMP-safe, it could
- * change from under us. However, rather than getting any lock
- * we can use an optimistic algorithm: get the parent
- * pid, and go back and check that the parent is still
- * the same. If it has changed (which is extremely unlikely
- * indeed), we just try again..
- *
- * NOTE! This depends on the fact that even if we _do_
- * get an old value of "parent", we can happily dereference
- * the pointer (it was and remains a dereferencable kernel pointer
- * no matter what): we just can't necessarily trust the result
- * until we know that the parent pointer is valid.
- *
- * NOTE2: ->group_leader never changes from under us.
+ * Accessing ->real_parent is not SMP-safe, it could
+ * change from under us. However, we can use a stale
+ * value of ->real_parent under rcu_read_lock(), see
+ * release_task()->call_rcu(delayed_put_task_struct).
  */
 asmlinkage long sys_getppid(void)
 {
        int pid;
-       struct task_struct *me = current;
-       struct task_struct *parent;
 
-       parent = me->group_leader->real_parent;
-       for (;;) {
-               pid = parent->tgid;
-#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
-{
-               struct task_struct *old = parent;
-
-               /*
-                * Make sure we read the pid before re-reading the
-                * parent pointer:
-                */
-               smp_rmb();
-               parent = me->group_leader->real_parent;
-               if (old != parent)
-                       continue;
-}
-#endif
-               break;
-       }
+       rcu_read_lock();
+       pid = rcu_dereference(current->real_parent)->tgid;
+       rcu_read_unlock();
        return vx_map_pid(pid);
 }
 
@@ -1025,7 +1013,7 @@ asmlinkage long do_getxpid(long *ppid)
        *ppid = sys_getppid();
        return sys_getpid();
 }
-  
+
 #else /* _alpha_ */
 
 asmlinkage long sys_getuid(void)
index b830c9a..033ff7c 100644 (file)
@@ -48,6 +48,9 @@ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
 obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
 
 obj-$(CONFIG_SWIOTLB) += swiotlb.o
+ifneq ($(CONFIG_XEN_IA64_DOM0_NON_VP),y)
+swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o
+endif
 
 hostprogs-y    := gen_crc32table
 clean-files    := crc32table.h
index de19030..4d09681 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -29,6 +29,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #endif
+#include <linux/err.h>
 #include <linux/string.h>
 #include <linux/idr.h>
 
@@ -398,6 +399,48 @@ void *idr_find(struct idr *idp, int id)
 }
 EXPORT_SYMBOL(idr_find);
 
+/**
+ * idr_replace - replace pointer for given id
+ * @idp: idr handle
+ * @ptr: pointer you want associated with the id
+ * @id: lookup key
+ *
+ * Replace the pointer registered with an id and return the old value.
+ * A -ENOENT return indicates that @id was not found.
+ * A -EINVAL return indicates that @id was not within valid constraints.
+ *
+ * The caller must serialize vs idr_find(), idr_get_new(), and idr_remove().
+ */
+void *idr_replace(struct idr *idp, void *ptr, int id)
+{
+       int n;
+       struct idr_layer *p, *old_p;
+
+       n = idp->layers * IDR_BITS;
+       p = idp->top;
+
+       id &= MAX_ID_MASK;
+
+       if (id >= (1 << n))
+               return ERR_PTR(-EINVAL);
+
+       n -= IDR_BITS;
+       while ((n > 0) && p) {
+               p = p->ary[(id >> n) & IDR_MASK];
+               n -= IDR_BITS;
+       }
+
+       n = id & IDR_MASK;
+       if (unlikely(p == NULL || !test_bit(n, &p->bitmap)))
+               return ERR_PTR(-ENOENT);
+
+       old_p = p->ary[n];
+       p->ary[n] = ptr;
+
+       return old_p;
+}
+EXPORT_SYMBOL(idr_replace);
+
 static void idr_cache_ctor(void * idr_layer, kmem_cache_t *idr_layer_cache,
                unsigned long flags)
 {
index 34022dd..8ebd8ea 100644 (file)
@@ -150,6 +150,7 @@ static void rwlock_bug(rwlock_t *lock, const char *msg)
 
 #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
 
+#if 0          /* __write_lock_debug() can lock up - maybe this can too? */
 static void __read_lock_debug(rwlock_t *lock)
 {
        int print_once = 1;
@@ -171,12 +172,12 @@ static void __read_lock_debug(rwlock_t *lock)
                }
        }
 }
+#endif
 
 void _raw_read_lock(rwlock_t *lock)
 {
        RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
-       if (unlikely(!__raw_read_trylock(&lock->raw_lock)))
-               __read_lock_debug(lock);
+       __raw_read_lock(&lock->raw_lock);
 }
 
 int _raw_read_trylock(rwlock_t *lock)
@@ -222,6 +223,7 @@ static inline void debug_write_unlock(rwlock_t *lock)
        lock->owner_cpu = -1;
 }
 
+#if 0          /* This can cause lockups */
 static void __write_lock_debug(rwlock_t *lock)
 {
        int print_once = 1;
@@ -243,12 +245,12 @@ static void __write_lock_debug(rwlock_t *lock)
                }
        }
 }
+#endif
 
 void _raw_write_lock(rwlock_t *lock)
 {
        debug_write_lock_before(lock);
-       if (unlikely(!__raw_write_trylock(&lock->raw_lock)))
-               __write_lock_debug(lock);
+       __raw_write_lock(&lock->raw_lock);
        debug_write_lock_after(lock);
 }
 
index c4c1ac5..7917265 100644 (file)
@@ -112,15 +112,14 @@ static int subpattern(u8 *pattern, int i, int j, int g)
        return ret;
 }
 
-static void compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
-                              unsigned int len)
+static void compute_prefix_tbl(struct ts_bm *bm)
 {
        int i, j, g;
 
        for (i = 0; i < ASIZE; i++)
-               bm->bad_shift[i] = len;
-       for (i = 0; i < len - 1; i++)
-               bm->bad_shift[pattern[i]] = len - 1 - i;
+               bm->bad_shift[i] = bm->patlen;
+       for (i = 0; i < bm->patlen - 1; i++)
+               bm->bad_shift[bm->pattern[i]] = bm->patlen - 1 - i;
 
        /* Compute the good shift array, used to match reocurrences 
         * of a subpattern */
@@ -151,8 +150,8 @@ static struct ts_config *bm_init(const void *pattern, unsigned int len,
        bm = ts_config_priv(conf);
        bm->patlen = len;
        bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
-       compute_prefix_tbl(bm, pattern, len);
        memcpy(bm->pattern, pattern, len);
+       compute_prefix_tbl(bm);
 
        return conf;
 }
index 67b3f74..ab10075 100644 (file)
@@ -115,7 +115,7 @@ config SPARSEMEM_EXTREME
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
        bool "Allow for memory hot-add"
-       depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+       depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND && ARCH_ENABLE_MEMORY_HOTPLUG
 
 comment "Memory hotplug is currently incompatible with Software Suspend"
        depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
@@ -126,12 +126,16 @@ comment "Memory hotplug is currently incompatible with Software Suspend"
 # Default to 4 for wider testing, though 8 might be more appropriate.
 # ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
 # PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# XEN on x86 architecture uses the mapping field on pagetable pages to store a
+# pointer to the destructor. This conflicts with pte_lock_deinit().
 #
 config SPLIT_PTLOCK_CPUS
        int
        default "4096" if ARM && !CPU_CACHE_VIPT
        default "4096" if PARISC && !PA20
-       default "4096"
+       default "4096" if X86_XEN || X86_64_XEN
+       default "4096" if !64BIT
+       default "4"
 
 #
 # support for page migration
index 4b04495..96f354e 100644 (file)
@@ -518,18 +518,11 @@ EXPORT_SYMBOL(unlock_page);
  */
 void end_page_writeback(struct page *page)
 {
-       struct zone *zone = page_zone(page);
        if (!TestClearPageReclaim(page) || rotate_reclaimable_page(page)) {
                if (!test_clear_page_writeback(page))
                        BUG();
        }
        smp_mb__after_clear_bit();
-       if (zone->all_unreclaimable) {
-               spin_lock(&zone->lock);
-               zone->all_unreclaimable = 0;
-               zone->pages_scanned = 0;
-               spin_unlock(&zone->lock);
-       }
        wake_up_page(page, PG_writeback);
 }
 EXPORT_SYMBOL(end_page_writeback);
@@ -2022,14 +2015,21 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
        do {
                unsigned long index;
                unsigned long offset;
-               unsigned long maxlen;
                size_t copied;
 
                offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
                index = pos >> PAGE_CACHE_SHIFT;
                bytes = PAGE_CACHE_SIZE - offset;
-               if (bytes > count)
-                       bytes = count;
+
+               /* Limit the size of the copy to the caller's write size */
+               bytes = min(bytes, count);
+
+               /*
+                * Limit the size of the copy to that of the current segment,
+                * because fault_in_pages_readable() doesn't know how to walk
+                * segments.
+                */
+               bytes = min(bytes, cur_iov->iov_len - iov_base);
 
                /*
                 * Bring in the user page that we will copy from _first_.
@@ -2037,10 +2037,7 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                 * same page as we're writing to, without it being marked
                 * up-to-date.
                 */
-               maxlen = cur_iov->iov_len - iov_base;
-               if (maxlen > bytes)
-                       maxlen = bytes;
-               fault_in_pages_readable(buf, maxlen);
+               fault_in_pages_readable(buf, bytes);
 
                page = __grab_cache_page(mapping,index,&cached_page,&lru_pvec);
                if (!page) {
@@ -2048,6 +2045,12 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                        break;
                }
 
+               if (unlikely(bytes == 0)) {
+                       status = 0;
+                       copied = 0;
+                       goto zero_length_segment;
+               }
+
                status = a_ops->prepare_write(file, page, offset, offset+bytes);
                if (unlikely(status)) {
                        loff_t isize = i_size_read(inode);
@@ -2077,7 +2080,8 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
                        page_cache_release(page);
                        continue;
                }
-               if (likely(copied > 0)) {
+zero_length_segment:
+               if (likely(copied >= 0)) {
                        if (!status)
                                status = copied;
 
index 13793ba..efd0142 100644 (file)
@@ -78,7 +78,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
        const struct iovec *iov = *iovp;
        size_t base = *basep;
 
-       while (bytes) {
+       do {
                int copy = min(bytes, iov->iov_len - base);
 
                bytes -= copy;
@@ -87,7 +87,7 @@ filemap_set_next_iovec(const struct iovec **iovp, size_t *basep, size_t bytes)
                        iov++;
                        base = 0;
                }
-       }
+       } while (bytes);
        *iovp = iov;
        *basep = base;
 }
index 9b274fd..a02c6de 100644 (file)
@@ -142,6 +142,17 @@ start:
        return vaddr;
 }
 
+#ifdef CONFIG_XEN
+void kmap_flush_unused(void)
+{
+       spin_lock(&kmap_lock);
+       flush_all_zero_pkmaps();
+       spin_unlock(&kmap_lock);
+}
+
+EXPORT_SYMBOL(kmap_flush_unused);
+#endif
+
 void fastcall *kmap_high(struct page *page)
 {
        unsigned long vaddr;
index 1bd0404..9f75bb2 100644 (file)
@@ -402,7 +402,8 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
         * and that the resulting page looks ok.
         */
        if (unlikely(!pfn_valid(pfn))) {
-               print_bad_pte(vma, pte, addr);
+               if (!(vma->vm_flags & VM_RESERVED))
+                       print_bad_pte(vma, pte, addr);
                return NULL;
        }
 
@@ -1017,6 +1018,26 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
                        continue;
                }
 
+#ifdef CONFIG_XEN
+               if (vma && (vma->vm_flags & VM_FOREIGN)) {
+                       struct page **map = vma->vm_private_data;
+                       int offset = (start - vma->vm_start) >> PAGE_SHIFT;
+                       if (map[offset] != NULL) {
+                               if (pages) {
+                                       struct page *page = map[offset];
+                                       
+                                       pages[i] = page;
+                                       get_page(page);
+                               }
+                               if (vmas)
+                                       vmas[i] = vma;
+                               i++;
+                               start += PAGE_SIZE;
+                               len--;
+                               continue;
+                       }
+               }
+#endif
                if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
                                || !(vm_flags & vma->vm_flags))
                        return i ? : -EFAULT;
@@ -1356,6 +1377,102 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 }
 EXPORT_SYMBOL(remap_pfn_range);
 
+#ifdef CONFIG_XEN
+static inline int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
+                                    unsigned long addr, unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pte_t *pte;
+       int err;
+       struct page *pmd_page;
+       spinlock_t *ptl;
+
+       pte = (mm == &init_mm) ?
+               pte_alloc_kernel(pmd, addr) :
+               pte_alloc_map_lock(mm, pmd, addr, &ptl);
+       if (!pte)
+               return -ENOMEM;
+
+       BUG_ON(pmd_huge(*pmd));
+
+       pmd_page = pmd_page(*pmd);
+
+       do {
+               err = fn(pte, pmd_page, addr, data);
+               if (err)
+                       break;
+       } while (pte++, addr += PAGE_SIZE, addr != end);
+
+       if (mm != &init_mm)
+               pte_unmap_unlock(pte-1, ptl);
+       return err;
+}
+
+static inline int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
+                                    unsigned long addr, unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pmd_t *pmd;
+       unsigned long next;
+       int err;
+
+       pmd = pmd_alloc(mm, pud, addr);
+       if (!pmd)
+               return -ENOMEM;
+       do {
+               next = pmd_addr_end(addr, end);
+               err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
+               if (err)
+                       break;
+       } while (pmd++, addr = next, addr != end);
+       return err;
+}
+
+static inline int apply_to_pud_range(struct mm_struct *mm, pgd_t *pgd,
+                                    unsigned long addr, unsigned long end,
+                                    pte_fn_t fn, void *data)
+{
+       pud_t *pud;
+       unsigned long next;
+       int err;
+
+       pud = pud_alloc(mm, pgd, addr);
+       if (!pud)
+               return -ENOMEM;
+       do {
+               next = pud_addr_end(addr, end);
+               err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
+               if (err)
+                       break;
+       } while (pud++, addr = next, addr != end);
+       return err;
+}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+                       unsigned long size, pte_fn_t fn, void *data)
+{
+       pgd_t *pgd;
+       unsigned long next;
+       unsigned long end = addr + size;
+       int err;
+
+       BUG_ON(addr >= end);
+       pgd = pgd_offset(mm, addr);
+       do {
+               next = pgd_addr_end(addr, end);
+               err = apply_to_pud_range(mm, pgd, addr, next, fn, data);
+               if (err)
+                       break;
+       } while (pgd++, addr = next, addr != end);
+       return err;
+}
+EXPORT_SYMBOL_GPL(apply_to_page_range);
+#endif
+
 /*
  * handle_pte_fault chooses page fault handler according to an entry
  * which was read non-atomically.  Before making any commitment, on
index c34750c..f6940d1 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -920,6 +920,10 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
        if (!len)
                return -EINVAL;
 
+       error = arch_mmap_check(addr, len, flags);
+       if (error)
+               return error;
+
        /* Careful about overflows.. */
        len = PAGE_ALIGN(len);
        if (!len || len > TASK_SIZE)
@@ -1940,6 +1944,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        unsigned long flags;
        struct rb_node ** rb_link, * rb_parent;
        pgoff_t pgoff = addr >> PAGE_SHIFT;
+       int error;
 
        len = PAGE_ALIGN(len);
        if (!len)
@@ -1948,6 +1953,12 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        if ((addr + len) > TASK_SIZE || (addr + len) < addr)
                return -EINVAL;
 
+       flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
+
+       error = arch_mmap_check(addr, len, flags);
+       if (error)
+               return error;
+
        /*
         * mlock MCL_FUTURE?
         */
@@ -1991,8 +2002,6 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
                !vx_vmpages_avail(mm, len >> PAGE_SHIFT))
                return -ENOMEM;
 
-       flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
-
        /* Can we just expand an old private anonymous mapping? */
        if (vma_merge(mm, prev, addr, addr + len, flags,
                                        NULL, NULL, pgoff, NULL))
@@ -2033,6 +2042,10 @@ void exit_mmap(struct mm_struct *mm)
        unsigned long nr_accounted = 0;
        unsigned long end;
 
+#ifdef arch_exit_mmap
+       arch_exit_mmap(mm);
+#endif
+
        lru_add_drain();
        flush_cache_mm(mm);
        tlb = tlb_gather_mmu(mm, 1);
index b747cbf..5581dd1 100644 (file)
@@ -439,7 +439,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        int i;
        int reserved = 0;
 
-       arch_free_page(page, order);
+       if (arch_free_page(page, order))
+               return;
        if (!PageHighMem(page))
                mutex_debug_check_no_locks_freed(page_address(page),
                                                 PAGE_SIZE<<order);
@@ -734,7 +735,8 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
        struct per_cpu_pages *pcp;
        unsigned long flags;
 
-       arch_free_page(page, 0);
+       if (arch_free_page(page, 0))
+               return;
 
        if (PageAnon(page))
                page->mapping = NULL;
@@ -751,11 +753,6 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
        if (pcp->count >= pcp->high) {
                free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
                pcp->count -= pcp->batch;
-       } else if (zone->all_unreclaimable) {
-               spin_lock(&zone->lock);
-               zone->all_unreclaimable = 0;
-               zone->pages_scanned = 0;
-               spin_unlock(&zone->lock);
        }
        local_irq_restore(flags);
        put_cpu();
index c4b6d0a..4842716 100644 (file)
@@ -104,21 +104,20 @@ static int __pdflush(struct pdflush_work *my_work)
                list_move(&my_work->list, &pdflush_list);
                my_work->when_i_went_to_sleep = jiffies;
                spin_unlock_irq(&pdflush_lock);
-
                schedule();
-               if (try_to_freeze()) {
-                       spin_lock_irq(&pdflush_lock);
-                       continue;
-               }
-
+               try_to_freeze();
                spin_lock_irq(&pdflush_lock);
                if (!list_empty(&my_work->list)) {
-                       printk("pdflush: bogus wakeup!\n");
+                       /*
+                        * Someone woke us up, but without removing our control
+                        * structure from the global list.  swsusp will do this
+                        * in try_to_freeze()->refrigerator().  Handle it.
+                        */
                        my_work->fn = NULL;
                        continue;
                }
                if (my_work->fn == NULL) {
-                       printk("pdflush: NULL work function\n");
+                       printk("pdflush: bogus wakeup\n");
                        continue;
                }
                spin_unlock_irq(&pdflush_lock);
index 234b2f1..a3bed3f 100644 (file)
@@ -441,11 +441,12 @@ int swap_type_of(dev_t device)
 
                if (!(swap_info[i].flags & SWP_WRITEOK))
                        continue;
+
                if (!device) {
                        spin_unlock(&swap_lock);
                        return i;
                }
-               inode = swap_info->swap_file->f_dentry->d_inode;
+               inode = swap_info[i].swap_file->f_dentry->d_inode;
                if (S_ISBLK(inode->i_mode) &&
                    device == MKDEV(imajor(inode), iminor(inode))) {
                        spin_unlock(&swap_lock);
index 3948949..729abc4 100644 (file)
@@ -67,10 +67,6 @@ static struct packet_type vlan_packet_type = {
        .func = vlan_skb_recv, /* VLAN receive method */
 };
 
-/* Bits of netdev state that are propagated from real device to virtual */
-#define VLAN_LINK_STATE_MASK \
-       ((1<<__LINK_STATE_PRESENT)|(1<<__LINK_STATE_NOCARRIER)|(1<<__LINK_STATE_DORMANT))
-
 /* End of global variables definitions. */
 
 /*
@@ -470,7 +466,9 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
        new_dev->flags = real_dev->flags;
        new_dev->flags &= ~IFF_UP;
 
-       new_dev->state = real_dev->state & ~(1<<__LINK_STATE_START);
+       new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+                                            (1<<__LINK_STATE_DORMANT))) |
+                        (1<<__LINK_STATE_PRESENT);
 
        /* need 4 bytes for extra VLAN header info,
         * hope the underlying device can handle it.
index 72d8529..4f44e98 100644 (file)
@@ -98,7 +98,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
                printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
                return;
        }
-       spin_lock_bh(&entry->neigh->dev->xmit_lock);    /* block clip_start_xmit() */
+       netif_tx_lock_bh(entry->neigh->dev);    /* block clip_start_xmit() */
        entry->neigh->used = jiffies;
        for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
                if (*walk == clip_vcc) {
@@ -120,9 +120,9 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
                        goto out;
                }
        printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
-              "0x%p)\n", entry, clip_vcc);
-      out:
-       spin_unlock_bh(&entry->neigh->dev->xmit_lock);
+         "0x%p)\n",entry,clip_vcc);
+out:
+       netif_tx_unlock_bh(entry->neigh->dev);
 }
 
 /* The neighbour entry n->lock is held. */
index 0c88a2a..f8dbcee 100644 (file)
@@ -145,9 +145,9 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
        struct net_bridge *br = netdev_priv(dev);
 
        if (data)
-               br->feature_mask |= NETIF_F_IP_CSUM;
+               br->feature_mask |= NETIF_F_NO_CSUM;
        else
-               br->feature_mask &= ~NETIF_F_IP_CSUM;
+               br->feature_mask &= ~NETIF_F_ALL_CSUM;
 
        br_features_recompute(br);
        return 0;
@@ -184,6 +184,6 @@ void br_dev_setup(struct net_device *dev)
        dev->set_mac_address = br_set_mac_address;
        dev->priv_flags = IFF_EBRIDGE;
 
-       dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
-               | NETIF_F_HIGHDMA | NETIF_F_TSO | NETIF_F_IP_CSUM;
+       dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
+                       NETIF_F_TSO | NETIF_F_NO_CSUM | NETIF_F_GSO_ROBUST;
 }
index 56f3aa4..056df7c 100644 (file)
@@ -38,16 +38,20 @@ static inline unsigned packet_length(const struct sk_buff *skb)
 int br_dev_queue_push_xmit(struct sk_buff *skb)
 {
        /* drop mtu oversized packets except tso */
-       if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size)
+       if (skb->len > skb->dev->mtu && !skb_is_gso(skb))
                kfree_skb(skb);
        else {
 #ifdef CONFIG_BRIDGE_NETFILTER
                /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
-               nf_bridge_maybe_copy_header(skb);
+               if (nf_bridge_maybe_copy_header(skb))
+                       kfree_skb(skb);
+               else
 #endif
-               skb_push(skb, ETH_HLEN);
+               {
+                       skb_push(skb, ETH_HLEN);
 
-               dev_queue_xmit(skb);
+                       dev_queue_xmit(skb);
+               }
        }
 
        return 0;
index f5d47bf..f55ef68 100644 (file)
@@ -372,17 +372,28 @@ void br_features_recompute(struct net_bridge *br)
        struct net_bridge_port *p;
        unsigned long features, checksum;
 
-       features = br->feature_mask &~ NETIF_F_IP_CSUM;
-       checksum = br->feature_mask & NETIF_F_IP_CSUM;
+       checksum = br->feature_mask & NETIF_F_ALL_CSUM ? NETIF_F_NO_CSUM : 0;
+       features = br->feature_mask & ~NETIF_F_ALL_CSUM;
 
        list_for_each_entry(p, &br->port_list, list) {
-               if (!(p->dev->features 
-                     & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM)))
+               unsigned long feature = p->dev->features;
+
+               if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM))
+                       checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
+               if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM))
+                       checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM;
+               if (!(feature & NETIF_F_IP_CSUM))
                        checksum = 0;
-               features &= p->dev->features;
+
+               if (feature & NETIF_F_GSO)
+                       feature |= NETIF_F_TSO;
+               feature |= NETIF_F_GSO;
+
+               features &= feature;
        }
 
-       br->dev->features = features | checksum | NETIF_F_LLTX;
+       br->dev->features = features | checksum | NETIF_F_LLTX |
+                           NETIF_F_GSO_ROBUST;
 }
 
 /* called with RTNL */
index 3da9264..a642727 100644 (file)
@@ -769,7 +769,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
        if (skb->protocol == htons(ETH_P_IP) &&
            skb->len > skb->dev->mtu &&
-           !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+           !skb_is_gso(skb))
                return ip_fragment(skb, br_dev_queue_push_xmit);
        else
                return br_dev_queue_push_xmit(skb);
@@ -877,8 +877,9 @@ static unsigned int ip_sabotage_out(unsigned int hook, struct sk_buff **pskb,
        struct sk_buff *skb = *pskb;
 
        if ((out->hard_start_xmit == br_dev_xmit &&
-            okfn != br_nf_forward_finish &&
-            okfn != br_nf_local_out_finish && okfn != br_nf_dev_queue_xmit)
+           okfn != br_nf_forward_finish &&
+           okfn != br_nf_local_out_finish &&
+           okfn != br_nf_dev_queue_xmit)
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
            || ((out->priv_flags & IFF_802_1Q_VLAN) &&
                VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit)
index d051e2c..1b37fa7 100644 (file)
@@ -75,6 +75,9 @@ static void ulog_send(unsigned int nlgroup)
        if (timer_pending(&ub->timer))
                del_timer(&ub->timer);
 
+       if (!ub->skb)
+               return;
+
        /* last nlmsg needs NLMSG_DONE */
        if (ub->qlen > 1)
                ub->lastnlh->nlmsg_type = NLMSG_DONE;
index 2795393..cbc26ac 100644 (file)
 #include <net/iw_handler.h>
 #include <asm/current.h>
 #include <linux/audit.h>
+#include <linux/err.h>
 #include <linux/vs_network.h>
 
+#ifdef CONFIG_XEN
+#include <net/ip.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#endif
+
 /*
  *     The list of packet types we will receive (as opposed to discard)
  *     and the routines to invoke.
@@ -1042,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
  *     taps currently in use.
  */
 
-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
        struct packet_type *ptype;
 
@@ -1180,6 +1187,45 @@ out:
        return ret;
 }
 
+/**
+ *     skb_gso_segment - Perform segmentation on skb.
+ *     @skb: buffer to segment
+ *     @features: features for the output path (see dev->features)
+ *
+ *     This function segments the given skb and returns a list of segments.
+ *
+ *     It may return NULL if the skb requires no segmentation.  This is
+ *     only possible when GSO is used for verifying header integrity.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+       struct packet_type *ptype;
+       int type = skb->protocol;
+
+       BUG_ON(skb_shinfo(skb)->frag_list);
+       BUG_ON(skb->ip_summed != CHECKSUM_HW);
+
+       skb->mac.raw = skb->data;
+       skb->mac_len = skb->nh.raw - skb->data;
+       __skb_pull(skb, skb->mac_len);
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+                       segs = ptype->gso_segment(skb, features);
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       __skb_push(skb, skb->data - skb->mac.raw);
+
+       return segs;
+}
+
+EXPORT_SYMBOL(skb_gso_segment);
+
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev)
@@ -1216,78 +1262,148 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 #define illegal_highdma(dev, skb)      (0)
 #endif
 
-/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
-{
-       unsigned int size;
-       u8 *data;
-       long offset;
-       struct skb_shared_info *ninfo;
-       int headerlen = skb->data - skb->head;
-       int expand = (skb->tail + skb->data_len) - skb->end;
-
-       if (skb_shared(skb))
-               BUG();
-
-       if (expand <= 0)
-               expand = 0;
-
-       size = skb->end - skb->head + expand;
-       size = SKB_DATA_ALIGN(size);
-       data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
-       if (!data)
-               return -ENOMEM;
-
-       /* Copy entire thing */
-       if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
-               BUG();
-
-       /* Set up shinfo */
-       ninfo = (struct skb_shared_info*)(data + size);
-       atomic_set(&ninfo->dataref, 1);
-       ninfo->tso_size = skb_shinfo(skb)->tso_size;
-       ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
-       ninfo->nr_frags = 0;
-       ninfo->frag_list = NULL;
-
-       /* Offset between the two in bytes */
-       offset = data - skb->head;
-
-       /* Free old data. */
-       skb_release_data(skb);
-
-       skb->head = data;
-       skb->end  = data + size;
-
-       /* Set up new pointers */
-       skb->h.raw   += offset;
-       skb->nh.raw  += offset;
-       skb->mac.raw += offset;
-       skb->tail    += offset;
-       skb->data    += offset;
-
-       /* We are no longer a clone, even if we were. */
-       skb->cloned    = 0;
-
-       skb->tail     += skb->data_len;
-       skb->data_len  = 0;
+struct dev_gso_cb {
+       void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
+static void dev_gso_skb_destructor(struct sk_buff *skb)
+{
+       struct dev_gso_cb *cb;
+
+       do {
+               struct sk_buff *nskb = skb->next;
+
+               skb->next = nskb->next;
+               nskb->next = NULL;
+               kfree_skb(nskb);
+       } while (skb->next);
+
+       cb = DEV_GSO_CB(skb);
+       if (cb->destructor)
+               cb->destructor(skb);
+}
+
+/**
+ *     dev_gso_segment - Perform emulated hardware segmentation on skb.
+ *     @skb: buffer to segment
+ *
+ *     This function segments the given skb and stores the list of segments
+ *     in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+       struct sk_buff *segs;
+       int features = dev->features & ~(illegal_highdma(dev, skb) ?
+                                        NETIF_F_SG : 0);
+
+       segs = skb_gso_segment(skb, features);
+
+       /* Verifying header integrity only. */
+       if (!segs)
+               return 0;
+
+       if (unlikely(IS_ERR(segs)))
+               return PTR_ERR(segs);
+
+       skb->next = segs;
+       DEV_GSO_CB(skb)->destructor = skb->destructor;
+       skb->destructor = dev_gso_skb_destructor;
+
+       return 0;
+}
+
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+       if (likely(!skb->next)) {
+               if (netdev_nit)
+                       dev_queue_xmit_nit(skb, dev);
+
+               if (netif_needs_gso(dev, skb)) {
+                       if (unlikely(dev_gso_segment(skb)))
+                               goto out_kfree_skb;
+                       if (skb->next)
+                               goto gso;
+               }
+
+               return dev->hard_start_xmit(skb, dev);
+       }
+
+gso:
+       do {
+               struct sk_buff *nskb = skb->next;
+               int rc;
+
+               skb->next = nskb->next;
+               nskb->next = NULL;
+               rc = dev->hard_start_xmit(nskb, dev);
+               if (unlikely(rc)) {
+                       nskb->next = skb->next;
+                       skb->next = nskb;
+                       return rc;
+               }
+               if (unlikely(netif_queue_stopped(dev) && skb->next))
+                       return NETDEV_TX_BUSY;
+       } while (skb->next);
+       
+       skb->destructor = DEV_GSO_CB(skb)->destructor;
+
+out_kfree_skb:
+       kfree_skb(skb);
        return 0;
 }
 
 #define HARD_TX_LOCK(dev, cpu) {                       \
        if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               spin_lock(&dev->xmit_lock);             \
-               dev->xmit_lock_owner = cpu;             \
+               netif_tx_lock(dev);                     \
        }                                               \
 }
 
 #define HARD_TX_UNLOCK(dev) {                          \
        if ((dev->features & NETIF_F_LLTX) == 0) {      \
-               dev->xmit_lock_owner = -1;              \
-               spin_unlock(&dev->xmit_lock);           \
+               netif_tx_unlock(dev);                   \
        }                                               \
 }
 
+#ifdef CONFIG_XEN
+inline int skb_checksum_setup(struct sk_buff *skb)
+{
+       if (skb->proto_csum_blank) {
+               if (skb->protocol != htons(ETH_P_IP))
+                       goto out;
+               skb->h.raw = (unsigned char *)skb->nh.iph + 4*skb->nh.iph->ihl;
+               if (skb->h.raw >= skb->tail)
+                       goto out;
+               switch (skb->nh.iph->protocol) {
+               case IPPROTO_TCP:
+                       skb->csum = offsetof(struct tcphdr, check);
+                       break;
+               case IPPROTO_UDP:
+                       skb->csum = offsetof(struct udphdr, check);
+                       break;
+               default:
+                       if (net_ratelimit())
+                               printk(KERN_ERR "Attempting to checksum a non-"
+                                      "TCP/UDP packet, dropping a protocol"
+                                      " %d packet", skb->nh.iph->protocol);
+                       goto out;
+               }
+               if ((skb->h.raw + skb->csum + 2) > skb->tail)
+                       goto out;
+               skb->ip_summed = CHECKSUM_HW;
+               skb->proto_csum_blank = 0;
+       }
+       return 0;
+out:
+       return -EPROTO;
+}
+#else
+inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
+#endif
+
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1320,9 +1436,19 @@ int dev_queue_xmit(struct sk_buff *skb)
        struct Qdisc *q;
        int rc = -ENOMEM;
 
+       /* If a checksum-deferred packet is forwarded to a device that needs a
+        * checksum, correct the pointers and force checksumming.
+        */
+       if (skb_checksum_setup(skb))
+               goto out_kfree_skb;
+
+       /* GSO will handle the following emulations directly. */
+       if (netif_needs_gso(dev, skb))
+               goto gso;
+
        if (skb_shinfo(skb)->frag_list &&
            !(dev->features & NETIF_F_FRAGLIST) &&
-           __skb_linearize(skb, GFP_ATOMIC))
+           __skb_linearize(skb))
                goto out_kfree_skb;
 
        /* Fragmented skb is linearized if device does not support SG,
@@ -1331,25 +1457,26 @@ int dev_queue_xmit(struct sk_buff *skb)
         */
        if (skb_shinfo(skb)->nr_frags &&
            (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
-           __skb_linearize(skb, GFP_ATOMIC))
+           __skb_linearize(skb))
                goto out_kfree_skb;
 
        /* If packet is not checksummed and device does not support
         * checksumming for this protocol, complete checksumming here.
         */
        if (skb->ip_summed == CHECKSUM_HW &&
-           (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+           (!(dev->features & NETIF_F_GEN_CSUM) &&
             (!(dev->features & NETIF_F_IP_CSUM) ||
              skb->protocol != htons(ETH_P_IP))))
                if (skb_checksum_help(skb, 0))
                        goto out_kfree_skb;
 
+gso:
        spin_lock_prefetch(&dev->queue_lock);
 
        /* Disable soft irqs for various locks below. Also 
         * stops preemption for RCU. 
         */
-       local_bh_disable(); 
+       rcu_read_lock_bh(); 
 
        /* Updates of qdisc are serialized by queue_lock. 
         * The struct Qdisc which is pointed to by qdisc is now a 
@@ -1383,8 +1510,8 @@ int dev_queue_xmit(struct sk_buff *skb)
        /* The device has no queue. Common case for software devices:
           loopback, all the sorts of tunnels...
 
-          Really, it is unlikely that xmit_lock protection is necessary here.
-          (f.e. loopback and IP tunnels are clean ignoring statistics
+          Really, it is unlikely that netif_tx_lock protection is necessary
+          here.  (f.e. loopback and IP tunnels are clean ignoring statistics
           counters.)
           However, it is possible, that they rely on protection
           made by us here.
@@ -1400,11 +1527,8 @@ int dev_queue_xmit(struct sk_buff *skb)
                        HARD_TX_LOCK(dev, cpu);
 
                        if (!netif_queue_stopped(dev)) {
-                               if (netdev_nit)
-                                       dev_queue_xmit_nit(skb, dev);
-
                                rc = 0;
-                               if (!dev->hard_start_xmit(skb, dev)) {
+                               if (!dev_hard_start_xmit(skb, dev)) {
                                        HARD_TX_UNLOCK(dev);
                                        goto out;
                                }
@@ -1423,13 +1547,13 @@ int dev_queue_xmit(struct sk_buff *skb)
        }
 
        rc = -ENETDOWN;
-       local_bh_enable();
+       rcu_read_unlock_bh();
 
 out_kfree_skb:
        kfree_skb(skb);
        return rc;
 out:
-       local_bh_enable();
+       rcu_read_unlock_bh();
        return rc;
 }
 
@@ -1707,6 +1831,19 @@ int netif_receive_skb(struct sk_buff *skb)
        }
 #endif
 
+#ifdef CONFIG_XEN
+       switch (skb->ip_summed) {
+       case CHECKSUM_UNNECESSARY:
+               skb->proto_data_valid = 1;
+               break;
+       case CHECKSUM_HW:
+               /* XXX Implement me. */
+       default:
+               skb->proto_data_valid = 0;
+               break;
+       }
+#endif
+
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
                if (!ptype->dev || ptype->dev == skb->dev) {
                        if (pt_prev) 
@@ -2793,7 +2930,7 @@ int register_netdevice(struct net_device *dev)
        BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
 
        spin_lock_init(&dev->queue_lock);
-       spin_lock_init(&dev->xmit_lock);
+       spin_lock_init(&dev->_xmit_lock);
        dev->xmit_lock_owner = -1;
 #ifdef CONFIG_NET_CLS_ACT
        spin_lock_init(&dev->ingress_lock);
@@ -2837,9 +2974,7 @@ int register_netdevice(struct net_device *dev)
 
        /* Fix illegal SG+CSUM combinations. */
        if ((dev->features & NETIF_F_SG) &&
-           !(dev->features & (NETIF_F_IP_CSUM |
-                              NETIF_F_NO_CSUM |
-                              NETIF_F_HW_CSUM))) {
+           !(dev->features & NETIF_F_ALL_CSUM)) {
                printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
                       dev->name);
                dev->features &= ~NETIF_F_SG;
@@ -3379,7 +3514,6 @@ subsys_initcall(net_dev_init);
 EXPORT_SYMBOL(__dev_get_by_index);
 EXPORT_SYMBOL(__dev_get_by_name);
 EXPORT_SYMBOL(__dev_remove_pack);
-EXPORT_SYMBOL(__skb_linearize);
 EXPORT_SYMBOL(dev_valid_name);
 EXPORT_SYMBOL(dev_add_pack);
 EXPORT_SYMBOL(dev_alloc_name);
@@ -3414,6 +3548,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 EXPORT_SYMBOL(net_enable_timestamp);
 EXPORT_SYMBOL(net_disable_timestamp);
 EXPORT_SYMBOL(dev_get_flags);
+EXPORT_SYMBOL(skb_checksum_setup);
 
 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
 EXPORT_SYMBOL(br_handle_frame_hook);
index 05d6085..c57d887 100644 (file)
@@ -62,7 +62,7 @@
  *     Device mc lists are changed by bh at least if IPv6 is enabled,
  *     so that it must be bh protected.
  *
- *     We block accesses to device mc filters with dev->xmit_lock.
+ *     We block accesses to device mc filters with netif_tx_lock.
  */
 
 /*
@@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev)
 
 void dev_mc_upload(struct net_device *dev)
 {
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        __dev_mc_upload(dev);
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 }
 
 /*
@@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
        int err = 0;
        struct dev_mc_list *dmi, **dmip;
 
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
 
        for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
                /*
@@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
                         */
                        __dev_mc_upload(dev);
                        
-                       spin_unlock_bh(&dev->xmit_lock);
+                       netif_tx_unlock_bh(dev);
                        return 0;
                }
        }
        err = -ENOENT;
 done:
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        return err;
 }
 
@@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 
        dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
 
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
                if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
                    dmi->dmi_addrlen == alen) {
@@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
        }
 
        if ((dmi = dmi1) == NULL) {
-               spin_unlock_bh(&dev->xmit_lock);
+               netif_tx_unlock_bh(dev);
                return -ENOMEM;
        }
        memcpy(dmi->dmi_addr, addr, alen);
@@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
 
        __dev_mc_upload(dev);
        
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        return 0;
 
 done:
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        kfree(dmi1);
        return err;
 }
@@ -204,7 +204,7 @@ done:
 
 void dev_mc_discard(struct net_device *dev)
 {
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        
        while (dev->mc_list != NULL) {
                struct dev_mc_list *tmp = dev->mc_list;
@@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *dev)
        }
        dev->mc_count = 0;
 
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
        struct dev_mc_list *m;
        struct net_device *dev = v;
 
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        for (m = dev->mc_list; m; m = m->next) {
                int i;
 
@@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
 
                seq_putc(seq, '\n');
        }
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
        return 0;
 }
 
index 470c05b..1a5e49d 100644 (file)
@@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy)
                dst_gc_timer_inc = DST_GC_INC;
                dst_gc_timer_expires = DST_GC_MIN;
        }
-       dst_gc_timer.expires = jiffies + dst_gc_timer_expires;
 #if RT_CACHE_DEBUG >= 2
        printk("dst_total: %d/%d %ld\n",
               atomic_read(&dst_total), delayed,  dst_gc_timer_expires);
 #endif
-       add_timer(&dst_gc_timer);
+       mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
 
 out:
        spin_unlock(&dst_lock);
index c680b7e..987ccca 100644 (file)
@@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 
 u32 ethtool_op_get_tx_csum(struct net_device *dev)
 {
-       return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
+       return (dev->features & NETIF_F_ALL_CSUM) != 0;
 }
 
 int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -437,7 +437,7 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
 {
        struct ethtool_pauseparam pauseparam;
 
-       if (!dev->ethtool_ops->get_pauseparam)
+       if (!dev->ethtool_ops->set_pauseparam)
                return -EOPNOTSUPP;
 
        if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam)))
@@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
                return -EFAULT;
 
        if (edata.data && 
-           !(dev->features & (NETIF_F_IP_CSUM |
-                              NETIF_F_NO_CSUM |
-                              NETIF_F_HW_CSUM)))
+           !(dev->features & NETIF_F_ALL_CSUM))
                return -EINVAL;
 
        return __ethtool_set_sg(dev, edata.data);
@@ -561,7 +559,7 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
 
 static int ethtool_get_tso(struct net_device *dev, char __user *useraddr)
 {
-       struct ethtool_value edata = { ETHTOOL_GTSO };
+       struct ethtool_value edata = { ETHTOOL_GUFO };
 
        if (!dev->ethtool_ops->get_tso)
                return -EOPNOTSUPP;
@@ -616,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
        return dev->ethtool_ops->set_ufo(dev, edata.data);
 }
 
+static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
+{
+       struct ethtool_value edata = { ETHTOOL_GGSO };
+
+       edata.data = dev->features & NETIF_F_GSO;
+       if (copy_to_user(useraddr, &edata, sizeof(edata)))
+                return -EFAULT;
+       return 0;
+}
+
+static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
+{
+       struct ethtool_value edata;
+
+       if (copy_from_user(&edata, useraddr, sizeof(edata)))
+               return -EFAULT;
+       if (edata.data)
+               dev->features |= NETIF_F_GSO;
+       else
+               dev->features &= ~NETIF_F_GSO;
+       return 0;
+}
+
 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 {
        struct ethtool_test test;
@@ -907,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
        case ETHTOOL_SUFO:
                rc = ethtool_set_ufo(dev, useraddr);
                break;
+       case ETHTOOL_GGSO:
+               rc = ethtool_get_gso(dev, useraddr);
+               break;
+       case ETHTOOL_SGSO:
+               rc = ethtool_set_gso(dev, useraddr);
+               break;
        default:
                rc =  -EOPNOTSUPP;
        }
index e8e05ce..9cb7818 100644 (file)
@@ -273,24 +273,21 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
        do {
                npinfo->tries--;
-               spin_lock(&np->dev->xmit_lock);
-               np->dev->xmit_lock_owner = smp_processor_id();
+               netif_tx_lock(np->dev);
 
                /*
                 * network drivers do not expect to be called if the queue is
                 * stopped.
                 */
                if (netif_queue_stopped(np->dev)) {
-                       np->dev->xmit_lock_owner = -1;
-                       spin_unlock(&np->dev->xmit_lock);
+                       netif_tx_unlock(np->dev);
                        netpoll_poll(np);
                        udelay(50);
                        continue;
                }
 
                status = np->dev->hard_start_xmit(skb, np->dev);
-               np->dev->xmit_lock_owner = -1;
-               spin_unlock(&np->dev->xmit_lock);
+               netif_tx_unlock(np->dev);
 
                /* success */
                if(!status) {
index c23e9c0..4e7437a 100644 (file)
@@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
        skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32);
        skb->dev = odev;
        skb->pkt_type = PACKET_HOST;
+       skb->nh.iph = iph;
+       skb->h.uh = udph;
 
        if (pkt_dev->nfrags <= 0)
                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2460,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
        skb->protocol = protocol;
        skb->dev = odev;
        skb->pkt_type = PACKET_HOST;
+       skb->nh.ipv6h = iph;
+       skb->h.uh = udph;
 
        if (pkt_dev->nfrags <= 0)
                pgh = (struct pktgen_hdr *)skb_put(skb, datalen);
@@ -2896,8 +2900,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
                        pkt_dev->clone_count = 0;       /* reset counter */
                }
        }
-
-       spin_lock_bh(&odev->xmit_lock);
+       
+       netif_tx_lock_bh(odev);
        if (!netif_queue_stopped(odev)) {
 
                atomic_inc(&(pkt_dev->skb->users));
@@ -2942,8 +2946,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
                pkt_dev->next_tx_ns = 0;
        }
 
-       spin_unlock_bh(&odev->xmit_lock);
-
+       netif_tx_unlock_bh(odev);
+       
        /* If pkt_dev->count is zero, then run forever */
        if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
                if (atomic_read(&(pkt_dev->skb->users)) != 1) {
index cfd2d1e..375dd90 100644 (file)
@@ -398,6 +398,9 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
        }
 
        if (ida[IFLA_ADDRESS - 1]) {
+               struct sockaddr *sa;
+               int len;
+
                if (!dev->set_mac_address) {
                        err = -EOPNOTSUPP;
                        goto out;
@@ -409,7 +412,17 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
                if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len))
                        goto out;
 
-               err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1]));
+               len = sizeof(sa_family_t) + dev->addr_len;
+               sa = kmalloc(len, GFP_KERNEL);
+               if (!sa) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+               sa->sa_family = dev->type;
+               memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]),
+                      dev->addr_len);
+               err = dev->set_mac_address(dev, sa);
+               kfree(sa);
                if (err)
                        goto out;
                send_addr_notify = 1;
index a304412..e5b26c2 100644 (file)
@@ -140,6 +140,7 @@ EXPORT_SYMBOL(skb_truesize_bug);
  *     Buffers may only be allocated from interrupts using a @gfp_mask of
  *     %GFP_ATOMIC.
  */
+#ifndef CONFIG_HAVE_ARCH_ALLOC_SKB
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
                            int fclone)
 {
@@ -172,9 +173,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
        shinfo = skb_shinfo(skb);
        atomic_set(&shinfo->dataref, 1);
        shinfo->nr_frags  = 0;
-       shinfo->tso_size = 0;
-       shinfo->tso_segs = 0;
-       shinfo->ufo_size = 0;
+       shinfo->gso_size = 0;
+       shinfo->gso_segs = 0;
+       shinfo->gso_type = 0;
        shinfo->ip6_frag_id = 0;
        shinfo->frag_list = NULL;
 
@@ -194,6 +195,7 @@ nodata:
        skb = NULL;
        goto out;
 }
+#endif /* !CONFIG_HAVE_ARCH_ALLOC_SKB */
 
 /**
  *     alloc_skb_from_cache    -       allocate a network buffer
@@ -211,14 +213,18 @@ nodata:
  */
 struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
                                     unsigned int size,
-                                    gfp_t gfp_mask)
+                                    gfp_t gfp_mask,
+                                    int fclone)
 {
+       kmem_cache_t *cache;
+       struct skb_shared_info *shinfo;
        struct sk_buff *skb;
        u8 *data;
 
+       cache = fclone ? skbuff_fclone_cache : skbuff_head_cache;
+
        /* Get the HEAD */
-       skb = kmem_cache_alloc(skbuff_head_cache,
-                              gfp_mask & ~__GFP_DMA);
+       skb = kmem_cache_alloc(cache, gfp_mask & ~__GFP_DMA);
        if (!skb)
                goto out;
 
@@ -235,26 +241,39 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
        skb->data = data;
        skb->tail = data;
        skb->end  = data + size;
+       /* make sure we initialize shinfo sequentially */
+       shinfo = skb_shinfo(skb);
+       atomic_set(&shinfo->dataref, 1);
+       shinfo->nr_frags  = 0;
+       shinfo->gso_size = 0;
+       shinfo->gso_segs = 0;
+       shinfo->gso_type = 0;
+       shinfo->ip6_frag_id = 0;
+       shinfo->frag_list = NULL;
 
-       atomic_set(&(skb_shinfo(skb)->dataref), 1);
-       skb_shinfo(skb)->nr_frags  = 0;
-       skb_shinfo(skb)->tso_size = 0;
-       skb_shinfo(skb)->tso_segs = 0;
-       skb_shinfo(skb)->frag_list = NULL;
+       if (fclone) {
+               struct sk_buff *child = skb + 1;
+               atomic_t *fclone_ref = (atomic_t *) (child + 1);
+
+               skb->fclone = SKB_FCLONE_ORIG;
+               atomic_set(fclone_ref, 1);
+
+               child->fclone = SKB_FCLONE_UNAVAILABLE;
+       }
 out:
        return skb;
 nodata:
-       kmem_cache_free(skbuff_head_cache, skb);
+       kmem_cache_free(cache, skb);
        skb = NULL;
        goto out;
 }
 
 
-static void skb_drop_fraglist(struct sk_buff *skb)
+static void skb_drop_list(struct sk_buff **listp)
 {
-       struct sk_buff *list = skb_shinfo(skb)->frag_list;
+       struct sk_buff *list = *listp;
 
-       skb_shinfo(skb)->frag_list = NULL;
+       *listp = NULL;
 
        do {
                struct sk_buff *this = list;
@@ -263,6 +282,11 @@ static void skb_drop_fraglist(struct sk_buff *skb)
        } while (list);
 }
 
+static inline void skb_drop_fraglist(struct sk_buff *skb)
+{
+       skb_drop_list(&skb_shinfo(skb)->frag_list);
+}
+
 static void skb_clone_fraglist(struct sk_buff *skb)
 {
        struct sk_buff *list;
@@ -434,6 +458,10 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
        C(local_df);
        n->cloned = 1;
        n->nohdr = 0;
+#ifdef CONFIG_XEN
+       C(proto_data_valid);
+       C(proto_csum_blank);
+#endif
        C(pkt_type);
        C(ip_summed);
        C(priority);
@@ -533,8 +561,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
        new->xid        = old->xid;
 #endif
        atomic_set(&new->users, 1);
-       skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
-       skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+       skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+       skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+       skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
 }
 
 /**
@@ -806,49 +835,86 @@ struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
        return nskb;
 }      
  
-/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
- * If realloc==0 and trimming is impossible without change of data,
- * it is BUG().
+/* Trims skb to length len. It can change skb pointers.
  */
 
-int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len)
 {
+       struct sk_buff **fragp;
+       struct sk_buff *frag;
        int offset = skb_headlen(skb);
        int nfrags = skb_shinfo(skb)->nr_frags;
        int i;
+       int err;
 
-       for (i = 0; i < nfrags; i++) {
+       if (skb_cloned(skb) &&
+           unlikely((err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))))
+               return err;
+
+       i = 0;
+       if (offset >= len)
+               goto drop_pages;
+
+       for (; i < nfrags; i++) {
                int end = offset + skb_shinfo(skb)->frags[i].size;
-               if (end > len) {
-                       if (skb_cloned(skb)) {
-                               BUG_ON(!realloc);
-                               if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-                                       return -ENOMEM;
-                       }
-                       if (len <= offset) {
-                               put_page(skb_shinfo(skb)->frags[i].page);
-                               skb_shinfo(skb)->nr_frags--;
-                       } else {
-                               skb_shinfo(skb)->frags[i].size = len - offset;
-                       }
+
+               if (end < len) {
+                       offset = end;
+                       continue;
                }
-               offset = end;
+
+               skb_shinfo(skb)->frags[i++].size = len - offset;
+
+drop_pages:
+               skb_shinfo(skb)->nr_frags = i;
+
+               for (; i < nfrags; i++)
+                       put_page(skb_shinfo(skb)->frags[i].page);
+
+               if (skb_shinfo(skb)->frag_list)
+                       skb_drop_fraglist(skb);
+               goto done;
        }
 
-       if (offset < len) {
+       for (fragp = &skb_shinfo(skb)->frag_list; (frag = *fragp);
+            fragp = &frag->next) {
+               int end = offset + frag->len;
+
+               if (skb_shared(frag)) {
+                       struct sk_buff *nfrag;
+
+                       nfrag = skb_clone(frag, GFP_ATOMIC);
+                       if (unlikely(!nfrag))
+                               return -ENOMEM;
+
+                       nfrag->next = frag->next;
+                       kfree_skb(frag);
+                       frag = nfrag;
+                       *fragp = frag;
+               }
+
+               if (end < len) {
+                       offset = end;
+                       continue;
+               }
+
+               if (end > len &&
+                   unlikely((err = pskb_trim(frag, len - offset))))
+                       return err;
+
+               if (frag->next)
+                       skb_drop_list(&frag->next);
+               break;
+       }
+
+done:
+       if (len > skb_headlen(skb)) {
                skb->data_len -= skb->len - len;
                skb->len       = len;
        } else {
-               if (len <= skb_headlen(skb)) {
-                       skb->len      = len;
-                       skb->data_len = 0;
-                       skb->tail     = skb->data + len;
-                       if (skb_shinfo(skb)->frag_list && !skb_cloned(skb))
-                               skb_drop_fraglist(skb);
-               } else {
-                       skb->data_len -= skb->len - len;
-                       skb->len       = len;
-               }
+               skb->len       = len;
+               skb->data_len  = 0;
+               skb->tail      = skb->data + len;
        }
 
        return 0;
@@ -1832,6 +1898,133 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 
 EXPORT_SYMBOL_GPL(skb_pull_rcsum);
 
+/**
+ *     skb_segment - Perform protocol segmentation on skb.
+ *     @skb: buffer to segment
+ *     @features: features for the output path (see dev->features)
+ *
+ *     This function performs segmentation on the given skb.  It returns
+ *     the segment at the given position.  It returns NULL if there are
+ *     no more segments to generate, or when an error is encountered.
+ */
+struct sk_buff *skb_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = NULL;
+       struct sk_buff *tail = NULL;
+       unsigned int mss = skb_shinfo(skb)->gso_size;
+       unsigned int doffset = skb->data - skb->mac.raw;
+       unsigned int offset = doffset;
+       unsigned int headroom;
+       unsigned int len;
+       int sg = features & NETIF_F_SG;
+       int nfrags = skb_shinfo(skb)->nr_frags;
+       int err = -ENOMEM;
+       int i = 0;
+       int pos;
+
+       __skb_push(skb, doffset);
+       headroom = skb_headroom(skb);
+       pos = skb_headlen(skb);
+
+       do {
+               struct sk_buff *nskb;
+               skb_frag_t *frag;
+               int hsize, nsize;
+               int k;
+               int size;
+
+               len = skb->len - offset;
+               if (len > mss)
+                       len = mss;
+
+               hsize = skb_headlen(skb) - offset;
+               if (hsize < 0)
+                       hsize = 0;
+               nsize = hsize + doffset;
+               if (nsize > len + doffset || !sg)
+                       nsize = len + doffset;
+
+               nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
+               if (unlikely(!nskb))
+                       goto err;
+
+               if (segs)
+                       tail->next = nskb;
+               else
+                       segs = nskb;
+               tail = nskb;
+
+               nskb->dev = skb->dev;
+               nskb->priority = skb->priority;
+               nskb->protocol = skb->protocol;
+               nskb->dst = dst_clone(skb->dst);
+               memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+               nskb->pkt_type = skb->pkt_type;
+               nskb->mac_len = skb->mac_len;
+
+               skb_reserve(nskb, headroom);
+               nskb->mac.raw = nskb->data;
+               nskb->nh.raw = nskb->data + skb->mac_len;
+               nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
+               memcpy(skb_put(nskb, doffset), skb->data, doffset);
+
+               if (!sg) {
+                       nskb->csum = skb_copy_and_csum_bits(skb, offset,
+                                                           skb_put(nskb, len),
+                                                           len, 0);
+                       continue;
+               }
+
+               frag = skb_shinfo(nskb)->frags;
+               k = 0;
+
+               nskb->ip_summed = CHECKSUM_HW;
+               nskb->csum = skb->csum;
+               memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+
+               while (pos < offset + len) {
+                       BUG_ON(i >= nfrags);
+
+                       *frag = skb_shinfo(skb)->frags[i];
+                       get_page(frag->page);
+                       size = frag->size;
+
+                       if (pos < offset) {
+                               frag->page_offset += offset - pos;
+                               frag->size -= offset - pos;
+                       }
+
+                       k++;
+
+                       if (pos + size <= offset + len) {
+                               i++;
+                               pos += size;
+                       } else {
+                               frag->size -= pos + size - (offset + len);
+                               break;
+                       }
+
+                       frag++;
+               }
+
+               skb_shinfo(nskb)->nr_frags = k;
+               nskb->data_len = len - hsize;
+               nskb->len += nskb->data_len;
+               nskb->truesize += nskb->data_len;
+       } while ((offset += len) < skb->len);
+
+       return segs;
+
+err:
+       while ((skb = segs)) {
+               segs = skb->next;
+               kfree(skb);
+       }
+       return ERR_PTR(err);
+}
+
+EXPORT_SYMBOL_GPL(skb_segment);
+
 void __init skb_init(void)
 {
        skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
index 547523b..a2ba9db 100644 (file)
@@ -801,8 +801,7 @@ got_it:
                 * We linearize everything except data segments here.
                 */
                if (cb->nsp_flags & ~0x60) {
-                       if (unlikely(skb_is_nonlinear(skb)) &&
-                           skb_linearize(skb, GFP_ATOMIC) != 0)
+                       if (unlikely(skb_linearize(skb)))
                                goto free_out;
                }
 
index e172cf9..5abf705 100644 (file)
@@ -629,8 +629,7 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
                        padlen);
 
         if (flags & DN_RT_PKT_CNTL) {
-               if (unlikely(skb_is_nonlinear(skb)) &&
-                   skb_linearize(skb, GFP_ATOMIC) != 0)
+               if (unlikely(skb_linearize(skb)))
                        goto dump_it;
 
                 switch(flags & DN_RT_CNTL_MSK) {
index 446faaf..2440d05 100644 (file)
@@ -400,9 +400,10 @@ int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
        rcu_read_lock();
        hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) {
                if (idx < s_idx)
-                       continue;
+                       goto next;
                if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0)
                        break;
+next:
                idx++;
        }
        rcu_read_unlock();
index dbb0852..f7e84e9 100644 (file)
@@ -58,6 +58,7 @@ config IEEE80211_CRYPT_TKIP
        depends on IEEE80211 && NET_RADIO
        select CRYPTO
        select CRYPTO_MICHAEL_MIC
+       select CRC32
        ---help---
        Include software based cipher suites in support of IEEE 802.11i
        (aka TGi, WPA, WPA2, WPA-PSK, etc.) for use with TKIP enabled
index c8c6a86..7351fa1 100644 (file)
@@ -68,6 +68,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/socket.h>
@@ -1157,6 +1158,88 @@ int inet_sk_rebuild_header(struct sock *sk)
 
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
+static int inet_gso_send_check(struct sk_buff *skb)
+{
+       struct iphdr *iph;
+       struct net_protocol *ops;
+       int proto;
+       int ihl;
+       int err = -EINVAL;
+
+       if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
+               goto out;
+
+       iph = skb->nh.iph;
+       ihl = iph->ihl * 4;
+       if (ihl < sizeof(*iph))
+               goto out;
+
+       if (unlikely(!pskb_may_pull(skb, ihl)))
+               goto out;
+
+       skb->h.raw = __skb_pull(skb, ihl);
+       iph = skb->nh.iph;
+       proto = iph->protocol & (MAX_INET_PROTOS - 1);
+       err = -EPROTONOSUPPORT;
+
+       rcu_read_lock();
+       ops = rcu_dereference(inet_protos[proto]);
+       if (likely(ops && ops->gso_send_check))
+               err = ops->gso_send_check(skb);
+       rcu_read_unlock();
+
+out:
+       return err;
+}
+
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       struct iphdr *iph;
+       struct net_protocol *ops;
+       int proto;
+       int ihl;
+       int id;
+
+       if (!pskb_may_pull(skb, sizeof(*iph)))
+               goto out;
+
+       iph = skb->nh.iph;
+       ihl = iph->ihl * 4;
+       if (ihl < sizeof(*iph))
+               goto out;
+
+       if (!pskb_may_pull(skb, ihl))
+               goto out;
+
+       skb->h.raw = __skb_pull(skb, ihl);
+       iph = skb->nh.iph;
+       id = ntohs(iph->id);
+       proto = iph->protocol & (MAX_INET_PROTOS - 1);
+       segs = ERR_PTR(-EPROTONOSUPPORT);
+
+       rcu_read_lock();
+       ops = rcu_dereference(inet_protos[proto]);
+       if (ops && ops->gso_segment)
+               segs = ops->gso_segment(skb, features);
+       rcu_read_unlock();
+
+       if (!segs || unlikely(IS_ERR(segs)))
+               goto out;
+
+       skb = segs;
+       do {
+               iph = skb->nh.iph;
+               iph->id = htons(id++);
+               iph->tot_len = htons(skb->len - skb->mac_len);
+               iph->check = 0;
+               iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
+       } while ((skb = skb->next));
+
+out:
+       return segs;
+}
+
 #ifdef CONFIG_IP_MULTICAST
 static struct net_protocol igmp_protocol = {
        .handler =      igmp_rcv,
@@ -1166,6 +1249,8 @@ static struct net_protocol igmp_protocol = {
 static struct net_protocol tcp_protocol = {
        .handler =      tcp_v4_rcv,
        .err_handler =  tcp_v4_err,
+       .gso_send_check = tcp_v4_gso_send_check,
+       .gso_segment =  tcp_tso_segment,
        .no_policy =    1,
 };
 
@@ -1211,6 +1296,8 @@ static int ipv4_proc_init(void);
 static struct packet_type ip_packet_type = {
        .type = __constant_htons(ETH_P_IP),
        .func = ip_rcv,
+       .gso_send_check = inet_gso_send_check,
+       .gso_segment = inet_gso_segment,
 };
 
 static int __init inet_init(void)
index ec566f3..a66c96a 100644 (file)
@@ -458,13 +458,13 @@ int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb)
 
        rcu_read_lock();
        hlist_for_each_entry(r, node, &fib_rules, hlist) {
-
                if (idx < s_idx)
-                       continue;
+                       goto next;
                if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid,
                                   cb->nlh->nlmsg_seq,
                                   RTM_NEWRULE, NLM_F_MULTI) < 0)
                        break;
+next:
                idx++;
        }
        rcu_read_unlock();
index 0f4145b..1de08c1 100644 (file)
@@ -160,7 +160,7 @@ void free_fib_info(struct fib_info *fi)
 
 void fib_release_info(struct fib_info *fi)
 {
-       write_lock(&fib_info_lock);
+       write_lock_bh(&fib_info_lock);
        if (fi && --fi->fib_treeref == 0) {
                hlist_del(&fi->fib_hash);
                if (fi->fib_prefsrc)
@@ -173,7 +173,7 @@ void fib_release_info(struct fib_info *fi)
                fi->fib_dead = 1;
                fib_info_put(fi);
        }
-       write_unlock(&fib_info_lock);
+       write_unlock_bh(&fib_info_lock);
 }
 
 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
@@ -599,7 +599,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
        unsigned int old_size = fib_hash_size;
        unsigned int i, bytes;
 
-       write_lock(&fib_info_lock);
+       write_lock_bh(&fib_info_lock);
        old_info_hash = fib_info_hash;
        old_laddrhash = fib_info_laddrhash;
        fib_hash_size = new_size;
@@ -640,7 +640,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
        }
        fib_info_laddrhash = new_laddrhash;
 
-       write_unlock(&fib_info_lock);
+       write_unlock_bh(&fib_info_lock);
 
        bytes = old_size * sizeof(struct hlist_head *);
        fib_hash_free(old_info_hash, bytes);
@@ -822,7 +822,7 @@ link_it:
 
        fi->fib_treeref++;
        atomic_inc(&fi->fib_clntref);
-       write_lock(&fib_info_lock);
+       write_lock_bh(&fib_info_lock);
        hlist_add_head(&fi->fib_hash,
                       &fib_info_hash[fib_info_hashfn(fi)]);
        if (fi->fib_prefsrc) {
@@ -841,7 +841,7 @@ link_it:
                head = &fib_info_devhash[hash];
                hlist_add_head(&nh->nh_hash, head);
        } endfor_nexthops(fi)
-       write_unlock(&fib_info_lock);
+       write_unlock_bh(&fib_info_lock);
        return fi;
 
 err_inval:
index cff9c3a..20b1704 100644 (file)
@@ -210,8 +210,7 @@ static inline int ip_finish_output(struct sk_buff *skb)
                return dst_output(skb);
        }
 #endif
-       if (skb->len > dst_mtu(skb->dst) &&
-           !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size))
+       if (skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb))
                return ip_fragment(skb, ip_finish_output2);
        else
                return ip_finish_output2(skb);
@@ -362,7 +361,7 @@ packet_routed:
        }
 
        ip_select_ident_more(iph, &rt->u.dst, sk,
-                            (skb_shinfo(skb)->tso_segs ?: 1) - 1);
+                            (skb_shinfo(skb)->gso_segs ?: 1) - 1);
 
        /* Add an IP checksum. */
        ip_send_check(iph);
@@ -743,7 +742,8 @@ static inline int ip_ufo_append_data(struct sock *sk,
                               (length - transhdrlen));
        if (!err) {
                /* specify the length of each IP datagram fragment*/
-               skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+               skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+               skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
                __skb_queue_tail(&sk->sk_write_queue, skb);
 
                return 0;
@@ -839,7 +839,7 @@ int ip_append_data(struct sock *sk,
         */
        if (transhdrlen &&
            length + fragheaderlen <= mtu &&
-           rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
+           rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
            !exthdrlen)
                csummode = CHECKSUM_HW;
 
@@ -946,7 +946,7 @@ alloc_new_skb:
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                data += fraggap;
-                               skb_trim(skb_prev, maxfraglen);
+                               pskb_trim_unique(skb_prev, maxfraglen);
                        }
 
                        copy = datalen - transhdrlen - fraggap;
@@ -1086,14 +1086,16 @@ ssize_t ip_append_page(struct sock *sk, struct page *page,
 
        inet->cork.length += size;
        if ((sk->sk_protocol == IPPROTO_UDP) &&
-           (rt->u.dst.dev->features & NETIF_F_UFO))
-               skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen);
+           (rt->u.dst.dev->features & NETIF_F_UFO)) {
+               skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
+               skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
+       }
 
 
        while (size > 0) {
                int i;
 
-               if (skb_shinfo(skb)->ufo_size)
+               if (skb_is_gso(skb))
                        len = size;
                else {
 
@@ -1139,7 +1141,7 @@ ssize_t   ip_append_page(struct sock *sk, struct page *page,
                                        data, fraggap, 0);
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
-                               skb_trim(skb_prev, maxfraglen);
+                               pskb_trim_unique(skb_prev, maxfraglen);
                        }
 
                        /*
index 95278b2..098d103 100644 (file)
@@ -84,7 +84,7 @@ out:
 static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
 {
        u8 nexthdr;
-       int err = 0;
+       int err = -ENOMEM;
        struct iphdr *iph;
        union {
                struct iphdr    iph;
@@ -92,11 +92,8 @@ static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
        } tmp_iph;
 
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-           skb_linearize(skb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
+       if (skb_linearize_cow(skb))
                goto out;
-       }
 
        skb->ip_summed = CHECKSUM_NONE;
 
@@ -171,10 +168,8 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
                goto out_ok;
        }
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-           skb_linearize(skb, GFP_ATOMIC) != 0) {
+       if (skb_linearize_cow(skb))
                goto out_ok;
-       }
        
        err = ipcomp_compress(x, skb);
        iph = skb->nh.iph;
index d0d1919..92adfeb 100644 (file)
@@ -237,7 +237,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
        struct arpt_entry *e, *back;
        const char *indev, *outdev;
        void *table_base;
-       struct xt_table_info *private = table->private;
+       struct xt_table_info *private;
 
        /* ARP header, plus 2 device addresses, plus 2 IP addresses.  */
        if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
@@ -249,6 +249,7 @@ unsigned int arpt_do_table(struct sk_buff **pskb,
        outdev = out ? out->name : nulldevname;
 
        read_lock_bh(&table->lock);
+       private = table->private;
        table_base = (void *)private->entries[smp_processor_id()];
        e = get_entry(table_base, private->hook_entry[hook]);
        back = get_entry(table_base, private->underflow[hook]);
index 518f581..853a3d5 100644 (file)
@@ -1092,7 +1092,7 @@ static struct ip_conntrack_expect *find_expect(struct ip_conntrack *ct,
        tuple.dst.protonum = IPPROTO_TCP;
 
        exp = __ip_conntrack_expect_find(&tuple);
-       if (exp->master == ct)
+       if (exp && exp->master == ct)
                return exp;
        return NULL;
 }
index a3d1407..35d1d90 100644 (file)
@@ -129,7 +129,12 @@ tcp_manip_pkt(struct sk_buff **pskb,
        if (hdrsize < sizeof(*hdr))
                return 1;
 
-       hdr->check = ip_nat_cheat_check(~oldip, newip,
+#ifdef CONFIG_XEN
+       if ((*pskb)->proto_csum_blank)
+               hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
+       else
+#endif
+               hdr->check = ip_nat_cheat_check(~oldip, newip,
                                        ip_nat_cheat_check(oldport ^ 0xFFFF,
                                                           newport,
                                                           hdr->check));
index ec6053f..e8eaf6a 100644 (file)
@@ -113,11 +113,17 @@ udp_manip_pkt(struct sk_buff **pskb,
                newport = tuple->dst.u.udp.port;
                portptr = &hdr->dest;
        }
-       if (hdr->check) /* 0 is a special case meaning no checksum */
-               hdr->check = ip_nat_cheat_check(~oldip, newip,
+       if (hdr->check) { /* 0 is a special case meaning no checksum */
+#ifdef CONFIG_XEN
+               if ((*pskb)->proto_csum_blank)
+                       hdr->check = ip_nat_cheat_check(oldip, ~newip, hdr->check);
+               else
+#endif
+                       hdr->check = ip_nat_cheat_check(~oldip, newip,
                                        ip_nat_cheat_check(*portptr ^ 0xFFFF,
                                                           newport,
                                                           hdr->check));
+       }
        *portptr = newport;
        return 1;
 }
index cee3397..71871c4 100644 (file)
@@ -231,7 +231,7 @@ ipt_do_table(struct sk_buff **pskb,
        const char *indev, *outdev;
        void *table_base;
        struct ipt_entry *e, *back;
-       struct xt_table_info *private = table->private;
+       struct xt_table_info *private;
 
        /* Initialization */
        ip = (*pskb)->nh.iph;
@@ -248,6 +248,7 @@ ipt_do_table(struct sk_buff **pskb,
 
        read_lock_bh(&table->lock);
        IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+       private = table->private;
        table_base = (void *)private->entries[smp_processor_id()];
        e = get_entry(table_base, private->hook_entry[hook]);
 
index f2b2171..2a79b61 100644 (file)
@@ -116,6 +116,11 @@ static void ulog_send(unsigned int nlgroupnum)
                del_timer(&ub->timer);
        }
 
+       if (!ub->skb) {
+               DEBUGP("ipt_ULOG: ulog_send: nothing to send\n");
+               return;
+       }
+
        /* last nlmsg needs NLMSG_DONE */
        if (ub->qlen > 1)
                ub->lastnlh->nlmsg_type = NLMSG_DONE;
index cc9423d..5fe2fcf 100644 (file)
@@ -3144,7 +3144,7 @@ int __init ip_rt_init(void)
                                        rhash_entries,
                                        (num_physpages >= 128 * 1024) ?
                                        15 : 17,
-                                       HASH_HIGHMEM,
+                                       0,
                                        &rt_hash_log,
                                        &rt_hash_mask,
                                        0);
index 6a22831..e369c0b 100644 (file)
 #include <linux/random.h>
 #include <linux/bootmem.h>
 #include <linux/cache.h>
+#include <linux/err.h>
 #include <linux/in.h>
 
 #include <net/icmp.h>
@@ -572,7 +573,7 @@ new_segment:
                skb->ip_summed = CHECKSUM_HW;
                tp->write_seq += copy;
                TCP_SKB_CB(skb)->end_seq += copy;
-               skb_shinfo(skb)->tso_segs = 0;
+               skb_shinfo(skb)->gso_segs = 0;
 
                if (!copied)
                        TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -623,14 +624,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
        ssize_t res;
        struct sock *sk = sock->sk;
 
-#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-
        if (!(sk->sk_route_caps & NETIF_F_SG) ||
-           !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
+           !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
                return sock_no_sendpage(sock, page, offset, size, flags);
 
-#undef TCP_ZC_CSUM_FLAGS
-
        lock_sock(sk);
        TCP_CHECK_TIMER(sk);
        res = do_tcp_sendpages(sk, &page, offset, size, flags);
@@ -727,9 +724,7 @@ new_segment:
                                /*
                                 * Check whether we can use HW checksum.
                                 */
-                               if (sk->sk_route_caps &
-                                   (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
-                                    NETIF_F_HW_CSUM))
+                               if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
                                        skb->ip_summed = CHECKSUM_HW;
 
                                skb_entail(sk, tp, skb);
@@ -825,7 +820,7 @@ new_segment:
 
                        tp->write_seq += copy;
                        TCP_SKB_CB(skb)->end_seq += copy;
-                       skb_shinfo(skb)->tso_segs = 0;
+                       skb_shinfo(skb)->gso_segs = 0;
 
                        from += copy;
                        copied += copy;
@@ -2072,6 +2067,77 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       struct tcphdr *th;
+       unsigned thlen;
+       unsigned int seq;
+       unsigned int delta;
+       unsigned int oldlen;
+       unsigned int len;
+
+       if (!pskb_may_pull(skb, sizeof(*th)))
+               goto out;
+
+       th = skb->h.th;
+       thlen = th->doff * 4;
+       if (thlen < sizeof(*th))
+               goto out;
+
+       if (!pskb_may_pull(skb, thlen))
+               goto out;
+
+       oldlen = (u16)~skb->len;
+       __skb_pull(skb, thlen);
+
+       if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+               /* Packet is from an untrusted source, reset gso_segs. */
+               int mss = skb_shinfo(skb)->gso_size;
+
+               skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
+
+               segs = NULL;
+               goto out;
+       }
+
+       segs = skb_segment(skb, features);
+       if (IS_ERR(segs))
+               goto out;
+
+       len = skb_shinfo(skb)->gso_size;
+       delta = htonl(oldlen + (thlen + len));
+
+       skb = segs;
+       th = skb->h.th;
+       seq = ntohl(th->seq);
+
+       do {
+               th->fin = th->psh = 0;
+
+               th->check = ~csum_fold(th->check + delta);
+               if (skb->ip_summed != CHECKSUM_HW)
+                       th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+                                                          skb->csum));
+
+               seq += len;
+               skb = skb->next;
+               th = skb->h.th;
+
+               th->seq = htonl(seq);
+               th->cwr = 0;
+       } while (skb->next);
+
+       delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+       th->check = ~csum_fold(th->check + delta);
+       if (skb->ip_summed != CHECKSUM_HW)
+               th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+                                                  skb->csum));
+
+out:
+       return segs;
+}
+
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;
 
index 662bd24..0eadd3a 100644 (file)
@@ -1072,7 +1072,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
                                else
                                        pkt_len = (end_seq -
                                                   TCP_SKB_CB(skb)->seq);
-                               if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size))
+                               if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size))
                                        break;
                                pcount = tcp_skb_pcount(skb);
                        }
index 14cffde..17f5857 100644 (file)
@@ -496,6 +496,24 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
        }
 }
 
+int tcp_v4_gso_send_check(struct sk_buff *skb)
+{
+       struct iphdr *iph;
+       struct tcphdr *th;
+
+       if (!pskb_may_pull(skb, sizeof(*th)))
+               return -EINVAL;
+
+       iph = skb->nh.iph;
+       th = skb->h.th;
+
+       th->check = 0;
+       th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0);
+       skb->csum = offsetof(struct tcphdr, check);
+       skb->ip_summed = CHECKSUM_HW;
+       return 0;
+}
+
 /*
  *     This routine will send an RST to the other tcp.
  *
index 1e848ad..ba50b52 100644 (file)
@@ -197,6 +197,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
                 * See RFC1323 for an explanation of the limit to 14 
                 */
                space = max_t(u32, sysctl_tcp_rmem[2], sysctl_rmem_max);
+               space = min_t(u32, space, *window_clamp);
                while (space > 65535 && (*rcv_wscale) < 14) {
                        space >>= 1;
                        (*rcv_wscale)++;
@@ -511,15 +512,17 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
                /* Avoid the costly divide in the normal
                 * non-TSO case.
                 */
-               skb_shinfo(skb)->tso_segs = 1;
-               skb_shinfo(skb)->tso_size = 0;
+               skb_shinfo(skb)->gso_segs = 1;
+               skb_shinfo(skb)->gso_size = 0;
+               skb_shinfo(skb)->gso_type = 0;
        } else {
                unsigned int factor;
 
                factor = skb->len + (mss_now - 1);
                factor /= mss_now;
-               skb_shinfo(skb)->tso_segs = factor;
-               skb_shinfo(skb)->tso_size = mss_now;
+               skb_shinfo(skb)->gso_segs = factor;
+               skb_shinfo(skb)->gso_size = mss_now;
+               skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
        }
 }
 
@@ -912,7 +915,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int
 
        if (!tso_segs ||
            (tso_segs > 1 &&
-            skb_shinfo(skb)->tso_size != mss_now)) {
+            tcp_skb_mss(skb) != mss_now)) {
                tcp_set_skb_tso_segs(sk, skb, mss_now);
                tso_segs = tcp_skb_pcount(skb);
        }
@@ -1723,8 +1726,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
           tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
                if (!pskb_trim(skb, 0)) {
                        TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1;
-                       skb_shinfo(skb)->tso_segs = 1;
-                       skb_shinfo(skb)->tso_size = 0;
+                       skb_shinfo(skb)->gso_segs = 1;
+                       skb_shinfo(skb)->gso_size = 0;
+                       skb_shinfo(skb)->gso_type = 0;
                        skb->ip_summed = CHECKSUM_NONE;
                        skb->csum = 0;
                }
@@ -1929,8 +1933,9 @@ void tcp_send_fin(struct sock *sk)
                skb->csum = 0;
                TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
                TCP_SKB_CB(skb)->sacked = 0;
-               skb_shinfo(skb)->tso_segs = 1;
-               skb_shinfo(skb)->tso_size = 0;
+               skb_shinfo(skb)->gso_segs = 1;
+               skb_shinfo(skb)->gso_size = 0;
+               skb_shinfo(skb)->gso_type = 0;
 
                /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
                TCP_SKB_CB(skb)->seq = tp->write_seq;
@@ -1962,8 +1967,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
        skb->csum = 0;
        TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
        TCP_SKB_CB(skb)->sacked = 0;
-       skb_shinfo(skb)->tso_segs = 1;
-       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->gso_segs = 1;
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_type = 0;
 
        /* Send it off. */
        TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
@@ -2046,8 +2052,9 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
        TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn;
        TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
        TCP_SKB_CB(skb)->sacked = 0;
-       skb_shinfo(skb)->tso_segs = 1;
-       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->gso_segs = 1;
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_type = 0;
        th->seq = htonl(TCP_SKB_CB(skb)->seq);
        th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
        if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2151,8 +2158,9 @@ int tcp_connect(struct sock *sk)
        TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
        TCP_ECN_send_syn(sk, tp, buff);
        TCP_SKB_CB(buff)->sacked = 0;
-       skb_shinfo(buff)->tso_segs = 1;
-       skb_shinfo(buff)->tso_size = 0;
+       skb_shinfo(buff)->gso_segs = 1;
+       skb_shinfo(buff)->gso_size = 0;
+       skb_shinfo(buff)->gso_type = 0;
        buff->csum = 0;
        TCP_SKB_CB(buff)->seq = tp->write_seq++;
        TCP_SKB_CB(buff)->end_seq = tp->write_seq;
@@ -2256,8 +2264,9 @@ void tcp_send_ack(struct sock *sk)
                buff->csum = 0;
                TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
                TCP_SKB_CB(buff)->sacked = 0;
-               skb_shinfo(buff)->tso_segs = 1;
-               skb_shinfo(buff)->tso_size = 0;
+               skb_shinfo(buff)->gso_segs = 1;
+               skb_shinfo(buff)->gso_size = 0;
+               skb_shinfo(buff)->gso_type = 0;
 
                /* Send it off, this clears delayed acks for us. */
                TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
@@ -2292,8 +2301,9 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
        skb->csum = 0;
        TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
        TCP_SKB_CB(skb)->sacked = urgent;
-       skb_shinfo(skb)->tso_segs = 1;
-       skb_shinfo(skb)->tso_size = 0;
+       skb_shinfo(skb)->gso_segs = 1;
+       skb_shinfo(skb)->gso_size = 0;
+       skb_shinfo(skb)->gso_type = 0;
 
        /* Use a previous sequence.  This should cause the other
         * end to send an ack.  Don't queue or clone SKB, just
index af2392a..ecc4119 100644 (file)
@@ -8,13 +8,19 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
+#include <linux/if_ether.h>
+#include <linux/kernel.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
+#include <linux/netfilter_ipv4.h>
 #include <net/inet_ecn.h>
 #include <net/ip.h>
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
+extern int skb_checksum_setup(struct sk_buff *skb);
+
 /* Add encapsulation header.
  *
  * In transport mode, the IP header will be moved forward to make space
@@ -33,6 +39,7 @@ static void xfrm4_encap(struct sk_buff *skb)
        struct dst_entry *dst = skb->dst;
        struct xfrm_state *x = dst->xfrm;
        struct iphdr *iph, *top_iph;
+       int flags;
 
        iph = skb->nh.iph;
        skb->h.ipiph = iph;
@@ -51,12 +58,15 @@ static void xfrm4_encap(struct sk_buff *skb)
 
        /* DS disclosed */
        top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
-       if (x->props.flags & XFRM_STATE_NOECN)
+
+       flags = x->props.flags;
+       if (flags & XFRM_STATE_NOECN)
                IP_ECN_clear(top_iph);
 
-       top_iph->frag_off = iph->frag_off & htons(IP_DF);
+       top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
+               0 : (iph->frag_off & htons(IP_DF));
        if (!top_iph->frag_off)
-               __ip_select_ident(top_iph, dst, 0);
+               __ip_select_ident(top_iph, dst->child, 0);
 
        top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
 
@@ -91,12 +101,16 @@ out:
        return ret;
 }
 
-int xfrm4_output(struct sk_buff *skb)
+static int xfrm4_output_one(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
        struct xfrm_state *x = dst->xfrm;
        int err;
        
+       err = skb_checksum_setup(skb);
+       if (err)
+               goto error_nolock;
+
        if (skb->ip_summed == CHECKSUM_HW) {
                err = skb_checksum_help(skb, 0);
                if (err)
@@ -109,27 +123,33 @@ int xfrm4_output(struct sk_buff *skb)
                        goto error_nolock;
        }
 
-       spin_lock_bh(&x->lock);
-       err = xfrm_state_check(x, skb);
-       if (err)
-               goto error;
+       do {
+               spin_lock_bh(&x->lock);
+               err = xfrm_state_check(x, skb);
+               if (err)
+                       goto error;
 
-       xfrm4_encap(skb);
+               xfrm4_encap(skb);
 
-       err = x->type->output(x, skb);
-       if (err)
-               goto error;
+               err = x->type->output(x, skb);
+               if (err)
+                       goto error;
 
-       x->curlft.bytes += skb->len;
-       x->curlft.packets++;
+               x->curlft.bytes += skb->len;
+               x->curlft.packets++;
 
-       spin_unlock_bh(&x->lock);
+               spin_unlock_bh(&x->lock);
        
-       if (!(skb->dst = dst_pop(dst))) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
+               if (!(skb->dst = dst_pop(dst))) {
+                       err = -EHOSTUNREACH;
+                       goto error_nolock;
+               }
+               dst = skb->dst;
+               x = dst->xfrm;
+       } while (x && !x->props.mode);
+
+       IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+       err = 0;
 
 out_exit:
        return err;
@@ -139,3 +159,76 @@ error_nolock:
        kfree_skb(skb);
        goto out_exit;
 }
+
+static int xfrm4_output_finish2(struct sk_buff *skb)
+{
+       int err;
+
+       while (likely((err = xfrm4_output_one(skb)) == 0)) {
+               nf_reset(skb);
+
+               err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
+                             skb->dst->dev, dst_output);
+               if (unlikely(err != 1))
+                       break;
+
+               if (!skb->dst->xfrm)
+                       return dst_output(skb);
+
+               err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
+                             skb->dst->dev, xfrm4_output_finish2);
+               if (unlikely(err != 1))
+                       break;
+       }
+
+       return err;
+}
+
+static int xfrm4_output_finish(struct sk_buff *skb)
+{
+       struct sk_buff *segs;
+
+#ifdef CONFIG_NETFILTER
+       if (!skb->dst->xfrm) {
+               IPCB(skb)->flags |= IPSKB_REROUTED;
+               return dst_output(skb);
+       }
+#endif
+
+       if (!skb_is_gso(skb))
+               return xfrm4_output_finish2(skb);
+
+       skb->protocol = htons(ETH_P_IP);
+       segs = skb_gso_segment(skb, 0);
+       kfree_skb(skb);
+       if (unlikely(IS_ERR(segs)))
+               return PTR_ERR(segs);
+
+       do {
+               struct sk_buff *nskb = segs->next;
+               int err;
+
+               segs->next = NULL;
+               err = xfrm4_output_finish2(segs);
+
+               if (unlikely(err)) {
+                       while ((segs = nskb)) {
+                               nskb = segs->next;
+                               segs->next = NULL;
+                               kfree_skb(segs);
+                       }
+                       return err;
+               }
+
+               segs = nskb;
+       } while (segs);
+
+       return 0;
+}
+
+int xfrm4_output(struct sk_buff *skb)
+{
+       return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
+                           xfrm4_output_finish,
+                           !(IPCB(skb)->flags & IPSKB_REROUTED));
+}
index 1f5e0c6..386ca5c 100644 (file)
@@ -2470,6 +2470,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
        spin_lock_bh(&ifp->lock);
 
        if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+           !(dev->flags&IFF_MULTICAST) ||
            !(ifp->flags&IFA_F_TENTATIVE)) {
                ifp->flags &= ~IFA_F_TENTATIVE;
                spin_unlock_bh(&ifp->lock);
@@ -2554,6 +2555,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
        if (ifp->idev->cnf.forwarding == 0 &&
            ifp->idev->cnf.rtr_solicits > 0 &&
            (dev->flags&IFF_LOOPBACK) == 0 &&
+           (dev->flags & IFF_MULTICAST) &&
            (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
                struct in6_addr all_routers;
 
index a18d425..9ca783d 100644 (file)
@@ -635,14 +635,17 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
        struct ipv6_txoptions *opt2;
        int err;
 
-       if (newtype != IPV6_HOPOPTS && opt->hopopt)
-               tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
-       if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
-               tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
-       if (newtype != IPV6_RTHDR && opt->srcrt)
-               tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
-       if (newtype != IPV6_DSTOPTS && opt->dst1opt)
-               tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+       if (opt) {
+               if (newtype != IPV6_HOPOPTS && opt->hopopt)
+                       tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt));
+               if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt)
+                       tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt));
+               if (newtype != IPV6_RTHDR && opt->srcrt)
+                       tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt));
+               if (newtype != IPV6_DSTOPTS && opt->dst1opt)
+                       tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
+       }
+
        if (newopt && newoptlen)
                tot_len += CMSG_ALIGN(newoptlen);
 
@@ -659,25 +662,25 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
        opt2->tot_len = tot_len;
        p = (char *)(opt2 + 1);
 
-       err = ipv6_renew_option(opt->hopopt, newopt, newoptlen,
+       err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
                                newtype != IPV6_HOPOPTS,
                                &opt2->hopopt, &p);
        if (err)
                goto out;
 
-       err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen,
+       err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
                                newtype != IPV6_RTHDRDSTOPTS,
                                &opt2->dst0opt, &p);
        if (err)
                goto out;
 
-       err = ipv6_renew_option(opt->srcrt, newopt, newoptlen,
+       err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
                                newtype != IPV6_RTHDR,
-                               (struct ipv6_opt_hdr **)opt2->srcrt, &p);
+                               (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
        if (err)
                goto out;
 
-       err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen,
+       err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
                                newtype != IPV6_DSTOPTS,
                                &opt2->dst1opt, &p);
        if (err)
index e460489..1d5dfeb 100644 (file)
@@ -147,7 +147,7 @@ static int ip6_output2(struct sk_buff *skb)
 
 int ip6_output(struct sk_buff *skb)
 {
-       if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) ||
+       if ((skb->len > dst_mtu(skb->dst) && !skb_is_gso(skb)) ||
                                dst_allfrag(skb->dst))
                return ip6_fragment(skb, ip6_output2);
        else
@@ -830,8 +830,9 @@ static inline int ip6_ufo_append_data(struct sock *sk,
                struct frag_hdr fhdr;
 
                /* specify the length of each IP datagram fragment*/
-               skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - 
-                                               sizeof(struct frag_hdr);
+               skb_shinfo(skb)->gso_size = mtu - fragheaderlen - 
+                                           sizeof(struct frag_hdr);
+               skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4;
                ipv6_select_ident(skb, &fhdr);
                skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
                __skb_queue_tail(&sk->sk_write_queue, skb);
@@ -1047,7 +1048,7 @@ alloc_new_skb:
                                skb_prev->csum = csum_sub(skb_prev->csum,
                                                          skb->csum);
                                data += fraggap;
-                               skb_trim(skb_prev, maxfraglen);
+                               pskb_trim_unique(skb_prev, maxfraglen);
                        }
                        copy = datalen - transhdrlen - fraggap;
                        if (copy < 0) {
index 4863643..96cac9a 100644 (file)
@@ -65,7 +65,7 @@ static LIST_HEAD(ipcomp6_tfms_list);
 
 static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
 {
-       int err = 0;
+       int err = -ENOMEM;
        u8 nexthdr = 0;
        int hdr_len = skb->h.raw - skb->nh.raw;
        unsigned char *tmp_hdr = NULL;
@@ -76,11 +76,8 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb)
        struct crypto_tfm *tfm;
        int cpu;
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-               skb_linearize(skb, GFP_ATOMIC) != 0) {
-               err = -ENOMEM;
+       if (skb_linearize_cow(skb))
                goto out;
-       }
 
        skb->ip_summed = CHECKSUM_NONE;
 
@@ -159,10 +156,8 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb)
                goto out_ok;
        }
 
-       if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
-               skb_linearize(skb, GFP_ATOMIC) != 0) {
+       if (skb_linearize_cow(skb))
                goto out_ok;
-       }
 
        /* compression */
        plen = skb->len - hdr_len;
index 6b98677..e9ea338 100644 (file)
@@ -9,9 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/compiler.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
+#include <linux/netfilter_ipv6.h>
 #include <net/dsfield.h>
 #include <net/inet_ecn.h>
 #include <net/ipv6.h>
@@ -92,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
        return ret;
 }
 
-int xfrm6_output(struct sk_buff *skb)
+static int xfrm6_output_one(struct sk_buff *skb)
 {
        struct dst_entry *dst = skb->dst;
        struct xfrm_state *x = dst->xfrm;
@@ -110,29 +112,35 @@ int xfrm6_output(struct sk_buff *skb)
                        goto error_nolock;
        }
 
-       spin_lock_bh(&x->lock);
-       err = xfrm_state_check(x, skb);
-       if (err)
-               goto error;
+       do {
+               spin_lock_bh(&x->lock);
+               err = xfrm_state_check(x, skb);
+               if (err)
+                       goto error;
 
-       xfrm6_encap(skb);
+               xfrm6_encap(skb);
 
-       err = x->type->output(x, skb);
-       if (err)
-               goto error;
+               err = x->type->output(x, skb);
+               if (err)
+                       goto error;
 
-       x->curlft.bytes += skb->len;
-       x->curlft.packets++;
+               x->curlft.bytes += skb->len;
+               x->curlft.packets++;
 
-       spin_unlock_bh(&x->lock);
+               spin_unlock_bh(&x->lock);
 
-       skb->nh.raw = skb->data;
-       
-       if (!(skb->dst = dst_pop(dst))) {
-               err = -EHOSTUNREACH;
-               goto error_nolock;
-       }
-       err = NET_XMIT_BYPASS;
+               skb->nh.raw = skb->data;
+               
+               if (!(skb->dst = dst_pop(dst))) {
+                       err = -EHOSTUNREACH;
+                       goto error_nolock;
+               }
+               dst = skb->dst;
+               x = dst->xfrm;
+       } while (x && !x->props.mode);
+
+       IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+       err = 0;
 
 out_exit:
        return err;
@@ -142,3 +150,68 @@ error_nolock:
        kfree_skb(skb);
        goto out_exit;
 }
+
+static int xfrm6_output_finish2(struct sk_buff *skb)
+{
+       int err;
+
+       while (likely((err = xfrm6_output_one(skb)) == 0)) {
+               nf_reset(skb);
+       
+               err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL,
+                             skb->dst->dev, dst_output);
+               if (unlikely(err != 1))
+                       break;
+
+               if (!skb->dst->xfrm)
+                       return dst_output(skb);
+
+               err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL,
+                             skb->dst->dev, xfrm6_output_finish2);
+               if (unlikely(err != 1))
+                       break;
+       }
+
+       return err;
+}
+
+static int xfrm6_output_finish(struct sk_buff *skb)
+{
+       struct sk_buff *segs;
+
+       if (!skb_is_gso(skb))
+               return xfrm6_output_finish2(skb);
+
+       skb->protocol = htons(ETH_P_IP);
+       segs = skb_gso_segment(skb, 0);
+       kfree_skb(skb);
+       if (unlikely(IS_ERR(segs)))
+               return PTR_ERR(segs);
+
+       do {
+               struct sk_buff *nskb = segs->next;
+               int err;
+
+               segs->next = NULL;
+               err = xfrm6_output_finish2(segs);
+
+               if (unlikely(err)) {
+                       while ((segs = nskb)) {
+                               nskb = segs->next;
+                               segs->next = NULL;
+                               kfree_skb(segs);
+                       }
+                       return err;
+               }
+
+               segs = nskb;
+       } while (segs);
+
+       return 0;
+}
+
+int xfrm6_output(struct sk_buff *skb)
+{
+       return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev,
+                      xfrm6_output_finish);
+}
index 811d998..e6a50e8 100644 (file)
@@ -1647,7 +1647,8 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
        ipx_pktsize     = ntohs(ipx->ipx_pktsize);
        
        /* Too small or invalid header? */
-       if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len)
+       if (ipx_pktsize < sizeof(struct ipxhdr)
+          || !pskb_may_pull(skb, ipx_pktsize))
                goto drop;
                         
        if (ipx->ipx_checksum != IPX_NO_CHECKSUM &&
index 9cc07e6..23af261 100644 (file)
@@ -366,6 +366,9 @@ __nfulnl_send(struct nfulnl_instance *inst)
        if (timer_pending(&inst->timer))
                del_timer(&inst->timer);
 
+       if (!inst->skb)
+               return 0;
+
        if (inst->qlen > 1)
                inst->lastnlh->nlmsg_type = NLMSG_DONE;
 
index 914c85f..eb7dc29 100644 (file)
@@ -34,7 +34,7 @@
 #include <net/sch_generic.h>
 #include <net/act_api.h>
 
-#if 1 /* control */
+#if 0 /* control */
 #define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args)
 #else
 #define DPRINTK(format, args...)
@@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act,
        while ((a = act) != NULL) {
 repeat:
                if (a->ops && a->ops->act) {
-                       ret = a->ops->act(&skb, a);
+                       ret = a->ops->act(skb, a, res);
                        if (TC_MUNGED & skb->tc_verd) {
                                /* copied already, allow trampling */
                                skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
@@ -179,11 +179,6 @@ repeat:
                act = a->next;
        }
 exec_done:
-       if (skb->tc_classid > 0) {
-               res->classid = skb->tc_classid;
-               res->class = 0;
-               skb->tc_classid = 0;
-       }
        return ret;
 }
 
@@ -256,15 +251,17 @@ tcf_action_dump(struct sk_buff *skb, struct tc_action *act, int bind, int ref)
                RTA_PUT(skb, a->order, 0, NULL);
                err = tcf_action_dump_1(skb, a, bind, ref);
                if (err < 0)
-                       goto rtattr_failure;
+                       goto errout;
                r->rta_len = skb->tail - (u8*)r;
        }
 
        return 0;
 
 rtattr_failure:
+       err = -EINVAL;
+errout:
        skb_trim(skb, b - skb->data);
-       return -err;
+       return err;
 }
 
 struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
@@ -295,7 +292,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
        if (a_o == NULL) {
 #ifdef CONFIG_KMOD
                rtnl_unlock();
-               request_module(act_name);
+               request_module("act_%s", act_name);
                rtnl_lock();
 
                a_o = tc_lookup_action_n(act_name);
@@ -311,6 +308,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est,
                        goto err_mod;
                }
 #endif
+               *err = -ENOENT;
                goto err_out;
        }
 
@@ -428,17 +426,19 @@ errout:
 
 static int
 tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq,
-             unsigned flags, int event, int bind, int ref)
+             u16 flags, int event, int bind, int ref)
 {
        struct tcamsg *t;
        struct nlmsghdr *nlh;
        unsigned char *b = skb->tail;
        struct rtattr *x;
 
-       nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
-       nlh->nlmsg_flags = flags;
+       nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
+
        t = NLMSG_DATA(nlh);
        t->tca_family = AF_UNSPEC;
+       t->tca__pad1 = 0;
+       t->tca__pad2 = 0;
        
        x = (struct rtattr*) skb->tail;
        RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
@@ -580,6 +580,8 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
        nlh = NLMSG_PUT(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t));
        t = NLMSG_DATA(nlh);
        t->tca_family = AF_UNSPEC;
+       t->tca__pad1 = 0;
+       t->tca__pad2 = 0;
 
        x = (struct rtattr *) skb->tail;
        RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
@@ -594,7 +596,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid)
        nlh->nlmsg_flags |= NLM_F_ROOT;
        module_put(a->ops->owner);
        kfree(a);
-       err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
        if (err > 0)
                return 0;
 
@@ -657,7 +659,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event)
 
                /* now do the delete */
                tcf_action_destroy(head, 0);
-               ret = rtnetlink_send(skb, pid, RTMGRP_TC,
+               ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
                                     n->nlmsg_flags&NLM_F_ECHO);
                if (ret > 0)
                        return 0;
@@ -669,7 +671,7 @@ err:
 }
 
 static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
-                          unsigned flags)
+                          u16 flags)
 {
        struct tcamsg *t;
        struct nlmsghdr *nlh;
@@ -684,11 +686,12 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 
        b = (unsigned char *)skb->tail;
 
-       nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*t));
-       nlh->nlmsg_flags = flags;
+       nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*t), flags);
        t = NLMSG_DATA(nlh);
        t->tca_family = AF_UNSPEC;
-       
+       t->tca__pad1 = 0;
+       t->tca__pad2 = 0;
+
        x = (struct rtattr*) skb->tail;
        RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
 
@@ -698,16 +701,16 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
        x->rta_len = skb->tail - (u8*)x;
        
        nlh->nlmsg_len = skb->tail - b;
-       NETLINK_CB(skb).dst_groups = RTMGRP_TC;
+       NETLINK_CB(skb).dst_group = RTNLGRP_TC;
        
-       err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO);
+       err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
        if (err > 0)
                err = 0;
        return err;
 
 rtattr_failure:
 nlmsg_failure:
-       skb_trim(skb, b - skb->data);
+       kfree_skb(skb);
        return -1;
 }
 
@@ -777,7 +780,7 @@ replay:
        return ret;
 }
 
-static char *
+static struct rtattr *
 find_dump_kind(struct nlmsghdr *n)
 {
        struct rtattr *tb1, *tb2[TCA_ACT_MAX+1];
@@ -805,7 +808,7 @@ find_dump_kind(struct nlmsghdr *n)
                return NULL;
        kind = tb2[TCA_ACT_KIND-1];
 
-       return (char *) RTA_DATA(kind);
+       return kind;
 }
 
 static int
@@ -818,16 +821,15 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        struct tc_action a;
        int ret = 0;
        struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
-       char *kind = find_dump_kind(cb->nlh);
+       struct rtattr *kind = find_dump_kind(cb->nlh);
 
        if (kind == NULL) {
                printk("tc_dump_action: action bad kind\n");
                return 0;
        }
 
-       a_o = tc_lookup_action_n(kind);
+       a_o = tc_lookup_action(kind);
        if (a_o == NULL) {
-               printk("failed to find %s\n", kind);
                return 0;
        }
 
@@ -835,7 +837,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        a.ops = a_o;
 
        if (a_o->walk == NULL) {
-               printk("tc_dump_action: %s !capable of dumping table\n", kind);
+               printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
                goto rtattr_failure;
        }
 
@@ -843,6 +845,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
                        cb->nlh->nlmsg_type, sizeof(*t));
        t = NLMSG_DATA(nlh);
        t->tca_family = AF_UNSPEC;
+       t->tca__pad1 = 0;
+       t->tca__pad2 = 0;
 
        x = (struct rtattr *) skb->tail;
        RTA_PUT(skb, TCA_ACT_TAB, 0, NULL);
index 138ea92..74d4a1d 100644 (file)
@@ -72,9 +72,9 @@ void qdisc_unlock_tree(struct net_device *dev)
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
 
-   dev->xmit_lock serializes accesses to device driver.
+   netif_tx_lock serializes accesses to device driver.
 
-   dev->queue_lock and dev->xmit_lock are mutually exclusive,
+   dev->queue_lock and netif_tx_lock are mutually exclusive,
    if one is grabbed, another must be free.
  */
 
@@ -90,14 +90,17 @@ void qdisc_unlock_tree(struct net_device *dev)
    NOTE: Called under dev->queue_lock with locally disabled BH.
 */
 
-int qdisc_restart(struct net_device *dev)
+static inline int qdisc_restart(struct net_device *dev)
 {
        struct Qdisc *q = dev->qdisc;
        struct sk_buff *skb;
 
        /* Dequeue packet */
-       if ((skb = q->dequeue(q)) != NULL) {
+       if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
                unsigned nolock = (dev->features & NETIF_F_LLTX);
+
+               dev->gso_skb = NULL;
+
                /*
                 * When the driver has LLTX set it does its own locking
                 * in start_xmit. No need to add additional overhead by
@@ -108,7 +111,7 @@ int qdisc_restart(struct net_device *dev)
                 * will be requeued.
                 */
                if (!nolock) {
-                       if (!spin_trylock(&dev->xmit_lock)) {
+                       if (!netif_tx_trylock(dev)) {
                        collision:
                                /* So, someone grabbed the driver. */
                                
@@ -126,8 +129,6 @@ int qdisc_restart(struct net_device *dev)
                                __get_cpu_var(netdev_rx_stat).cpu_collision++;
                                goto requeue;
                        }
-                       /* Remember that the driver is grabbed by us. */
-                       dev->xmit_lock_owner = smp_processor_id();
                }
                
                {
@@ -136,14 +137,11 @@ int qdisc_restart(struct net_device *dev)
 
                        if (!netif_queue_stopped(dev)) {
                                int ret;
-                               if (netdev_nit)
-                                       dev_queue_xmit_nit(skb, dev);
 
-                               ret = dev->hard_start_xmit(skb, dev);
+                               ret = dev_hard_start_xmit(skb, dev);
                                if (ret == NETDEV_TX_OK) { 
                                        if (!nolock) {
-                                               dev->xmit_lock_owner = -1;
-                                               spin_unlock(&dev->xmit_lock);
+                                               netif_tx_unlock(dev);
                                        }
                                        spin_lock(&dev->queue_lock);
                                        return -1;
@@ -157,8 +155,7 @@ int qdisc_restart(struct net_device *dev)
                        /* NETDEV_TX_BUSY - we need to requeue */
                        /* Release the driver */
                        if (!nolock) { 
-                               dev->xmit_lock_owner = -1;
-                               spin_unlock(&dev->xmit_lock);
+                               netif_tx_unlock(dev);
                        } 
                        spin_lock(&dev->queue_lock);
                        q = dev->qdisc;
@@ -175,7 +172,10 @@ int qdisc_restart(struct net_device *dev)
                 */
 
 requeue:
-               q->ops->requeue(skb, q);
+               if (skb->next)
+                       dev->gso_skb = skb;
+               else
+                       q->ops->requeue(skb, q);
                netif_schedule(dev);
                return 1;
        }
@@ -183,11 +183,23 @@ requeue:
        return q->q.qlen;
 }
 
+void __qdisc_run(struct net_device *dev)
+{
+       if (unlikely(dev->qdisc == &noop_qdisc))
+               goto out;
+
+       while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev))
+               /* NOTHING */;
+
+out:
+       clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state);
+}
+
 static void dev_watchdog(unsigned long arg)
 {
        struct net_device *dev = (struct net_device *)arg;
 
-       spin_lock(&dev->xmit_lock);
+       netif_tx_lock(dev);
        if (dev->qdisc != &noop_qdisc) {
                if (netif_device_present(dev) &&
                    netif_running(dev) &&
@@ -203,7 +215,7 @@ static void dev_watchdog(unsigned long arg)
                                dev_hold(dev);
                }
        }
-       spin_unlock(&dev->xmit_lock);
+       netif_tx_unlock(dev);
 
        dev_put(dev);
 }
@@ -227,17 +239,17 @@ void __netdev_watchdog_up(struct net_device *dev)
 
 static void dev_watchdog_up(struct net_device *dev)
 {
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        __netdev_watchdog_up(dev);
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 }
 
 static void dev_watchdog_down(struct net_device *dev)
 {
-       spin_lock_bh(&dev->xmit_lock);
+       netif_tx_lock_bh(dev);
        if (del_timer(&dev->watchdog_timer))
                dev_put(dev);
-       spin_unlock_bh(&dev->xmit_lock);
+       netif_tx_unlock_bh(dev);
 }
 
 void netif_carrier_on(struct net_device *dev)
@@ -579,10 +591,17 @@ void dev_deactivate(struct net_device *dev)
 
        dev_watchdog_down(dev);
 
-       while (test_bit(__LINK_STATE_SCHED, &dev->state))
+       /* Wait for outstanding dev_queue_xmit calls. */
+       synchronize_rcu();
+
+       /* Wait for outstanding qdisc_run calls. */
+       while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state))
                yield();
 
-       spin_unlock_wait(&dev->xmit_lock);
+       if (dev->gso_skb) {
+               kfree_skb(dev->gso_skb);
+               dev->gso_skb = NULL;
+       }
 }
 
 void dev_init_scheduler(struct net_device *dev)
@@ -624,6 +643,5 @@ EXPORT_SYMBOL(qdisc_create_dflt);
 EXPORT_SYMBOL(qdisc_alloc);
 EXPORT_SYMBOL(qdisc_destroy);
 EXPORT_SYMBOL(qdisc_reset);
-EXPORT_SYMBOL(qdisc_restart);
 EXPORT_SYMBOL(qdisc_lock_tree);
 EXPORT_SYMBOL(qdisc_unlock_tree);
index 79b8ef3..4c16ad5 100644 (file)
@@ -302,20 +302,17 @@ restart:
 
                switch (teql_resolve(skb, skb_res, slave)) {
                case 0:
-                       if (spin_trylock(&slave->xmit_lock)) {
-                               slave->xmit_lock_owner = smp_processor_id();
+                       if (netif_tx_trylock(slave)) {
                                if (!netif_queue_stopped(slave) &&
                                    slave->hard_start_xmit(skb, slave) == 0) {
-                                       slave->xmit_lock_owner = -1;
-                                       spin_unlock(&slave->xmit_lock);
+                                       netif_tx_unlock(slave);
                                        master->slaves = NEXT_SLAVE(q);
                                        netif_wake_queue(dev);
                                        master->stats.tx_packets++;
                                        master->stats.tx_bytes += len;
                                        return 0;
                                }
-                               slave->xmit_lock_owner = -1;
-                               spin_unlock(&slave->xmit_lock);
+                               netif_tx_unlock(slave);
                        }
                        if (netif_queue_stopped(dev))
                                busy = 1;
index 5e0de3c..b9b9070 100644 (file)
@@ -806,38 +806,26 @@ no_mem:
 
 /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error.  */
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
-                                  const struct sctp_chunk *chunk,
-                                  const struct msghdr *msg)
+                                       const struct msghdr *msg,
+                                       size_t paylen)
 {
        struct sctp_chunk *retval;
-       void *payload = NULL, *payoff;
-       size_t paylen = 0;
-       struct iovec *iov = NULL;
-       int iovlen = 0;
-
-       if (msg) {
-               iov = msg->msg_iov;
-               iovlen = msg->msg_iovlen;
-               paylen = get_user_iov_size(iov, iovlen);
-       }
+       void *payload = NULL;
+       int err;
 
-       retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen);
+       retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen);
        if (!retval)
                goto err_chunk;
 
        if (paylen) {
                /* Put the msg_iov together into payload.  */
-               payload = kmalloc(paylen, GFP_ATOMIC);
+               payload = kmalloc(paylen, GFP_KERNEL);
                if (!payload)
                        goto err_payload;
-               payoff = payload;
 
-               for (; iovlen > 0; --iovlen) {
-                       if (copy_from_user(payoff, iov->iov_base,iov->iov_len))
-                               goto err_copy;
-                       payoff += iov->iov_len;
-                       iov++;
-               }
+               err = memcpy_fromiovec(payload, msg->msg_iov, paylen);
+               if (err < 0)
+                       goto err_copy;
        }
 
        sctp_init_cause(retval, SCTP_ERROR_USER_ABORT, payload, paylen);
index 9e58144..66e9c5b 100644 (file)
@@ -4026,18 +4026,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
         * from its upper layer, but retransmits data to the far end
         * if necessary to fill gaps.
         */
-       struct msghdr *msg = arg;
-       struct sctp_chunk *abort;
+       struct sctp_chunk *abort = arg;
        sctp_disposition_t retval;
 
        retval = SCTP_DISPOSITION_CONSUME;
 
-       /* Generate ABORT chunk to send the peer.  */
-       abort = sctp_make_abort_user(asoc, NULL, msg);
-       if (!abort)
-               retval = SCTP_DISPOSITION_NOMEM;
-       else
-               sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+       sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
        /* Even if we can't send the ABORT due to low memory delete the
         * TCB.  This is a departure from our typical NOMEM handling.
@@ -4161,8 +4155,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
        void *arg,
        sctp_cmd_seq_t *commands)
 {
-       struct msghdr *msg = arg;
-       struct sctp_chunk *abort;
+       struct sctp_chunk *abort = arg;
        sctp_disposition_t retval;
 
        /* Stop T1-init timer */
@@ -4170,12 +4163,7 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
                        SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
        retval = SCTP_DISPOSITION_CONSUME;
 
-       /* Generate ABORT chunk to send the peer */
-       abort = sctp_make_abort_user(asoc, NULL, msg);
-       if (!abort)
-               retval = SCTP_DISPOSITION_NOMEM;
-       else
-               sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+       sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
 
        sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
                        SCTP_STATE(SCTP_STATE_CLOSED));
index b811691..5b1c837 100644 (file)
@@ -1246,9 +1246,13 @@ SCTP_STATIC void sctp_close(struct sock *sk, long timeout)
                        }
                }
 
-               if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)
-                       sctp_primitive_ABORT(asoc, NULL);
-               else
+               if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+                       struct sctp_chunk *chunk;
+
+                       chunk = sctp_make_abort_user(asoc, NULL, 0);
+                       if (chunk)
+                               sctp_primitive_ABORT(asoc, chunk);
+               } else
                        sctp_primitive_SHUTDOWN(asoc, NULL);
        }
 
@@ -1477,8 +1481,16 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
                        goto out_unlock;
                }
                if (sinfo_flags & SCTP_ABORT) {
+                       struct sctp_chunk *chunk;
+
+                       chunk = sctp_make_abort_user(asoc, msg, msg_len);
+                       if (!chunk) {
+                               err = -ENOMEM;
+                               goto out_unlock;
+                       }
+
                        SCTP_DEBUG_PRINTK("Aborting association: %p\n", asoc);
-                       sctp_primitive_ABORT(asoc, msg);
+                       sctp_primitive_ABORT(asoc, chunk);
                        err = 0;
                        goto out_unlock;
                }
index 7026b08..00cb388 100644 (file)
@@ -71,7 +71,12 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
        new = detail->alloc();
        if (!new)
                return NULL;
+       /* must fully initialise 'new', else
+        * we might get lose if we need to
+        * cache_put it soon.
+        */
        cache_init(new);
+       detail->init(new, key);
 
        write_lock(&detail->hash_lock);
 
@@ -85,7 +90,6 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
                        return tmp;
                }
        }
-       detail->init(new, key);
        new->next = *head;
        *head = new;
        detail->entries++;
index 7f393a0..0691aca 100644 (file)
@@ -1745,6 +1745,8 @@ static int snd_pcm_oss_open_file(struct file *file,
        for (idx = 0; idx < 2; idx++) {
                if (setup[idx].disable)
                        continue;
+               if (! pcm->streams[idx].substream_count)
+                       continue; /* no matching substream */
                if (idx == SNDRV_PCM_STREAM_PLAYBACK) {
                        if (! (f_mode & FMODE_WRITE))
                                continue;
index cdeeb63..4585600 100644 (file)
@@ -628,8 +628,9 @@ static void snd_timer_tasklet(unsigned long arg)
        struct snd_timer_instance *ti;
        struct list_head *p;
        unsigned long resolution, ticks;
+       unsigned long flags;
 
-       spin_lock(&timer->lock);
+       spin_lock_irqsave(&timer->lock, flags);
        /* now process all callbacks */
        while (!list_empty(&timer->sack_list_head)) {
                p = timer->sack_list_head.next;         /* get first item */
@@ -649,7 +650,7 @@ static void snd_timer_tasklet(unsigned long arg)
                spin_lock(&timer->lock);
                ti->flags &= ~SNDRV_TIMER_IFLG_CALLBACK;
        }
-       spin_unlock(&timer->lock);
+       spin_unlock_irqrestore(&timer->lock, flags);
 }
 
 /*
index d2afaea..2fb4f74 100644 (file)
@@ -11,6 +11,7 @@ snd-cs4236-objs := cs4236.o
 
 # Toplevel Module Dependency
 obj-$(CONFIG_SND_AZT2320) += snd-cs4231-lib.o
+obj-$(CONFIG_SND_MIRO) += snd-cs4231-lib.o
 obj-$(CONFIG_SND_OPL3SA2) += snd-cs4231-lib.o
 obj-$(CONFIG_SND_CS4231) += snd-cs4231.o snd-cs4231-lib.o
 obj-$(CONFIG_SND_CS4232) += snd-cs4232.o snd-cs4231-lib.o
index a208180..8f34986 100644 (file)
@@ -318,17 +318,19 @@ config SND_FM801
          To compile this driver as a module, choose M here: the module
          will be called snd-fm801.
 
-config SND_FM801_TEA575X
-       tristate "ForteMedia FM801 + TEA5757 tuner"
+config SND_FM801_TEA575X_BOOL
+       bool "ForteMedia FM801 + TEA5757 tuner"
        depends on SND_FM801
-        select VIDEO_DEV
        help
          Say Y here to include support for soundcards based on the ForteMedia
          FM801 chip with a TEA5757 tuner connected to GPIO1-3 pins (Media
-         Forte SF256-PCS-02).
+         Forte SF256-PCS-02) into the snd-fm801 driver.
 
-         To compile this driver as a module, choose M here: the module
-         will be called snd-fm801-tea575x.
+config SND_FM801_TEA575X
+       tristate
+       depends on SND_FM801_TEA575X_BOOL
+       default SND_FM801
+       select VIDEO_DEV
 
 config SND_HDA_INTEL
        tristate "Intel HD Audio"
index 873f486..118dcc7 100644 (file)
@@ -47,7 +47,7 @@ static int __devinit snd_vortex_midi(vortex_t * vortex)
        struct snd_rawmidi *rmidi;
        int temp, mode;
        struct snd_mpu401 *mpu;
-       int port;
+       unsigned long port;
 
 #ifdef VORTEX_MPU401_LEGACY
        /* EnableHardCodedMPU401Port() */
index d72fc28..09a2885 100644 (file)
@@ -35,7 +35,7 @@
 
 #include <asm/io.h>
 
-#if (defined(CONFIG_SND_FM801_TEA575X) || defined(CONFIG_SND_FM801_TEA575X_MODULE)) && (defined(CONFIG_VIDEO_DEV) || defined(CONFIG_VIDEO_DEV_MODULE))
+#ifdef CONFIG_SND_FM801_TEA575X_BOOL
 #include <sound/tea575x-tuner.h>
 #define TEA575X_RADIO 1
 #endif
index 959953c..9dd541d 100644 (file)
 #include <asm/io.h>
 #include <linux/delay.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/mutex.h>
 #include <sound/core.h>
 #include <sound/initval.h>
 #include "hda_codec.h"
 
 
-static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
-static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
-static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
-static char *model[SNDRV_CARDS];
+static int index = SNDRV_DEFAULT_IDX1;
+static char *id = SNDRV_DEFAULT_STR1;
+static char *model;
+static int position_fix;
+static int probe_mask = -1;
+static int single_cmd;
 
-module_param_array(index, int, NULL, 0444);
+module_param(index, int, 0444);
 MODULE_PARM_DESC(index, "Index value for Intel HD audio interface.");
-module_param_array(id, charp, NULL, 0444);
+module_param(id, charp, 0444);
 MODULE_PARM_DESC(id, "ID string for Intel HD audio interface.");
-module_param_array(enable, bool, NULL, 0444);
-MODULE_PARM_DESC(enable, "Enable Intel HD audio interface.");
-module_param_array(model, charp, NULL, 0444);
+module_param(model, charp, 0444);
 MODULE_PARM_DESC(model, "Use the given board model.");
+module_param(position_fix, int, 0444);
+MODULE_PARM_DESC(position_fix, "Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size).");
+module_param(probe_mask, int, 0444);
+MODULE_PARM_DESC(probe_mask, "Bitmask to probe codecs (default = -1).");
+module_param(single_cmd, bool, 0444);
+MODULE_PARM_DESC(single_cmd, "Use single command to communicate with codecs (for debugging only).");
+
+
+/* just for backward compatibility */
+static int enable;
+module_param(enable, bool, 0444);
 
 MODULE_LICENSE("GPL");
 MODULE_SUPPORTED_DEVICE("{{Intel, ICH6},"
                         "{Intel, ICH6M},"
                         "{Intel, ICH7},"
-                        "{Intel, ESB2}}");
+                        "{Intel, ESB2},"
+                        "{Intel, ICH8},"
+                        "{ATI, SB450},"
+                        "{ATI, SB600},"
+                        "{VIA, VT8251},"
+                        "{VIA, VT8237A},"
+                        "{SiS, SIS966},"
+                        "{ULI, M5461}}");
 MODULE_DESCRIPTION("Intel HDA driver");
 
 #define SFX    "hda-intel: "
@@ -135,13 +155,30 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
  */
 
 /* max number of SDs */
-#define MAX_ICH6_DEV           8
+/* ICH, ATI and VIA have 4 playback and 4 capture */
+#define ICH6_CAPTURE_INDEX     0
+#define ICH6_NUM_CAPTURE       4
+#define ICH6_PLAYBACK_INDEX    4
+#define ICH6_NUM_PLAYBACK      4
+
+/* ULI has 6 playback and 5 capture */
+#define ULI_CAPTURE_INDEX      0
+#define ULI_NUM_CAPTURE                5
+#define ULI_PLAYBACK_INDEX     5
+#define ULI_NUM_PLAYBACK       6
+
+/* this number is statically defined for simplicity */
+#define MAX_AZX_DEV            16
+
 /* max number of fragments - we may use more if allocating more pages for BDL */
-#define AZX_MAX_FRAG           (PAGE_SIZE / (MAX_ICH6_DEV * 16))
+#define BDL_SIZE               PAGE_ALIGN(8192)
+#define AZX_MAX_FRAG           (BDL_SIZE / (MAX_AZX_DEV * 16))
 /* max buffer size - no h/w limit, you can increase as you like */
 #define AZX_MAX_BUF_SIZE       (1024*1024*1024)
 /* max number of PCM devics per card */
-#define AZX_MAX_PCMS           8
+#define AZX_MAX_AUDIO_PCMS     6
+#define AZX_MAX_MODEM_PCMS     2
+#define AZX_MAX_PCMS           (AZX_MAX_AUDIO_PCMS + AZX_MAX_MODEM_PCMS)
 
 /* RIRB int mask: overrun[2], response[0] */
 #define RIRB_INT_RESPONSE      0x01
@@ -150,7 +187,7 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 
 /* STATESTS int mask: SD2,SD1,SD0 */
 #define STATESTS_INT_MASK      0x07
-#define AZX_MAX_CODECS         3
+#define AZX_MAX_CODECS         4
 
 /* SD_CTL bits */
 #define SD_CTL_STREAM_RESET    0x01    /* stream reset bit */
@@ -172,6 +209,9 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define ICH6_INT_CTRL_EN       0x40000000      /* controller interrupt enable bit */
 #define ICH6_INT_GLOBAL_EN     0x80000000      /* global interrupt enable bit */
 
+/* GCTL unsolicited response enable bit */
+#define ICH6_GCTL_UREN         (1<<8)
+
 /* GCTL reset bit */
 #define ICH6_GCTL_RESET                (1<<0)
 
@@ -183,27 +223,26 @@ enum { SDI0, SDI1, SDI2, SDI3, SDO0, SDO1, SDO2, SDO3 };
 #define ICH6_MAX_CORB_ENTRIES  256
 #define ICH6_MAX_RIRB_ENTRIES  256
 
+/* position fix mode */
+enum {
+       POS_FIX_AUTO,
+       POS_FIX_NONE,
+       POS_FIX_POSBUF,
+       POS_FIX_FIFO,
+};
 
-/*
- * Use CORB/RIRB for communication from/to codecs.
- * This is the way recommended by Intel (see below).
- */
-#define USE_CORB_RIRB
+/* Defines for ATI HD Audio support in SB450 south bridge */
+#define ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR   0x42
+#define ATI_SB450_HDAUDIO_ENABLE_SNOOP      0x02
 
-/*
- * Define this if use the position buffer instead of reading SD_LPIB
- * It's not used as default since SD_LPIB seems to give more accurate position
- */
-/* #define USE_POSBUF */
+/* Defines for Nvidia HDA support */
+#define NVIDIA_HDA_TRANSREG_ADDR      0x4e
+#define NVIDIA_HDA_ENABLE_COHBITS     0x0f
 
 /*
  */
 
-typedef struct snd_azx azx_t;
-typedef struct snd_azx_rb azx_rb_t;
-typedef struct snd_azx_dev azx_dev_t;
-
-struct snd_azx_dev {
+struct azx_dev {
        u32 *bdl;                       /* virtual address of the BDL */
        dma_addr_t bdl_addr;            /* physical address of the BDL */
        volatile u32 *posbuf;                   /* position buffer pointer */
@@ -218,17 +257,19 @@ struct snd_azx_dev {
        u32 sd_int_sta_mask;            /* stream int status mask */
 
        /* pcm support */
-       snd_pcm_substream_t *substream; /* assigned substream, set in PCM open */
+       struct snd_pcm_substream *substream;    /* assigned substream, set in PCM open */
        unsigned int format_val;        /* format value to be set in the controller and the codec */
        unsigned char stream_tag;       /* assigned stream */
        unsigned char index;            /* stream index */
+       /* for sanity check of position buffer */
+       unsigned int period_intr;
 
        unsigned int opened: 1;
        unsigned int running: 1;
 };
 
 /* CORB/RIRB */
-struct snd_azx_rb {
+struct azx_rb {
        u32 *buf;               /* CORB/RIRB buffer
                                 * Each CORB entry is 4byte, RIRB is 8byte
                                 */
@@ -239,10 +280,18 @@ struct snd_azx_rb {
        u32 res;                /* last read value */
 };
 
-struct snd_azx {
-       snd_card_t *card;
+struct azx {
+       struct snd_card *card;
        struct pci_dev *pci;
 
+       /* chip type specific */
+       int driver_type;
+       int playback_streams;
+       int playback_index_offset;
+       int capture_streams;
+       int capture_index_offset;
+       int num_streams;
+
        /* pci resources */
        unsigned long addr;
        void __iomem *remap_addr;
@@ -250,27 +299,51 @@ struct snd_azx {
 
        /* locks */
        spinlock_t reg_lock;
-       struct semaphore open_mutex;
+       struct mutex open_mutex;
 
-       /* streams */
-       azx_dev_t azx_dev[MAX_ICH6_DEV];
+       /* streams (x num_streams) */
+       struct azx_dev *azx_dev;
 
        /* PCM */
        unsigned int pcm_devs;
-       snd_pcm_t *pcm[AZX_MAX_PCMS];
+       struct snd_pcm *pcm[AZX_MAX_PCMS];
 
        /* HD codec */
        unsigned short codec_mask;
        struct hda_bus *bus;
 
        /* CORB/RIRB */
-       azx_rb_t corb;
-       azx_rb_t rirb;
+       struct azx_rb corb;
+       struct azx_rb rirb;
 
        /* BDL, CORB/RIRB and position buffers */
        struct snd_dma_buffer bdl;
        struct snd_dma_buffer rb;
        struct snd_dma_buffer posbuf;
+
+       /* flags */
+       int position_fix;
+       unsigned int initialized: 1;
+       unsigned int single_cmd: 1;
+};
+
+/* driver types */
+enum {
+       AZX_DRIVER_ICH,
+       AZX_DRIVER_ATI,
+       AZX_DRIVER_VIA,
+       AZX_DRIVER_SIS,
+       AZX_DRIVER_ULI,
+       AZX_DRIVER_NVIDIA,
+};
+
+static char *driver_short_names[] __devinitdata = {
+       [AZX_DRIVER_ICH] = "HDA Intel",
+       [AZX_DRIVER_ATI] = "HDA ATI SB",
+       [AZX_DRIVER_VIA] = "HDA VIA VT82xx",
+       [AZX_DRIVER_SIS] = "HDA SIS966",
+       [AZX_DRIVER_ULI] = "HDA ULI M5461",
+       [AZX_DRIVER_NVIDIA] = "HDA NVidia",
 };
 
 /*
@@ -303,7 +376,7 @@ struct snd_azx {
        readb((dev)->sd_addr + ICH6_REG_##reg)
 
 /* for pcm support */
-#define get_azx_dev(substream) (azx_dev_t*)(substream->runtime->private_data)
+#define get_azx_dev(substream) (substream->runtime->private_data)
 
 /* Get the upper 32bit of the given dma_addr_t
  * Compiler should optimize and eliminate the code if dma_addr_t is 32bit
@@ -315,11 +388,10 @@ struct snd_azx {
  * Interface for HD codec
  */
 
-#ifdef USE_CORB_RIRB
 /*
  * CORB / RIRB interface
  */
-static int azx_alloc_cmd_io(azx_t *chip)
+static int azx_alloc_cmd_io(struct azx *chip)
 {
        int err;
 
@@ -333,7 +405,7 @@ static int azx_alloc_cmd_io(azx_t *chip)
        return 0;
 }
 
-static void azx_init_cmd_io(azx_t *chip)
+static void azx_init_cmd_io(struct azx *chip)
 {
        /* CORB set up */
        chip->corb.addr = chip->rb.addr;
@@ -341,6 +413,8 @@ static void azx_init_cmd_io(azx_t *chip)
        azx_writel(chip, CORBLBASE, (u32)chip->corb.addr);
        azx_writel(chip, CORBUBASE, upper_32bit(chip->corb.addr));
 
+       /* set the corb size to 256 entries (ULI requires explicitly) */
+       azx_writeb(chip, CORBSIZE, 0x02);
        /* set the corb write pointer to 0 */
        azx_writew(chip, CORBWP, 0);
        /* reset the corb hw read pointer */
@@ -354,20 +428,18 @@ static void azx_init_cmd_io(azx_t *chip)
        azx_writel(chip, RIRBLBASE, (u32)chip->rirb.addr);
        azx_writel(chip, RIRBUBASE, upper_32bit(chip->rirb.addr));
 
+       /* set the rirb size to 256 entries (ULI requires explicitly) */
+       azx_writeb(chip, RIRBSIZE, 0x02);
        /* reset the rirb hw write pointer */
        azx_writew(chip, RIRBWP, ICH6_RBRWP_CLR);
        /* set N=1, get RIRB response interrupt for new entry */
        azx_writew(chip, RINTCNT, 1);
        /* enable rirb dma and response irq */
-#ifdef USE_CORB_RIRB
        azx_writeb(chip, RIRBCTL, ICH6_RBCTL_DMA_EN | ICH6_RBCTL_IRQ_EN);
-#else
-       azx_writeb(chip, RIRBCTL, ICH6_RBCTL_DMA_EN);
-#endif
        chip->rirb.rp = chip->rirb.cmds = 0;
 }
 
-static void azx_free_cmd_io(azx_t *chip)
+static void azx_free_cmd_io(struct azx *chip)
 {
        /* disable ringbuffer DMAs */
        azx_writeb(chip, RIRBCTL, 0);
@@ -375,10 +447,10 @@ static void azx_free_cmd_io(azx_t *chip)
 }
 
 /* send a command */
-static int azx_send_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
-                       unsigned int verb, unsigned int para)
+static int azx_corb_send_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
+                            unsigned int verb, unsigned int para)
 {
-       azx_t *chip = codec->bus->private_data;
+       struct azx *chip = codec->bus->private_data;
        unsigned int wp;
        u32 val;
 
@@ -405,7 +477,7 @@ static int azx_send_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
 #define ICH6_RIRB_EX_UNSOL_EV  (1<<4)
 
 /* retrieve RIRB entry - called from interrupt handler */
-static void azx_update_rirb(azx_t *chip)
+static void azx_update_rirb(struct azx *chip)
 {
        unsigned int rp, wp;
        u32 res, res_ex;
@@ -432,16 +504,21 @@ static void azx_update_rirb(azx_t *chip)
 }
 
 /* receive a response */
-static unsigned int azx_get_response(struct hda_codec *codec)
+static unsigned int azx_rirb_get_response(struct hda_codec *codec)
 {
-       azx_t *chip = codec->bus->private_data;
+       struct azx *chip = codec->bus->private_data;
        int timeout = 50;
 
        while (chip->rirb.cmds) {
                if (! --timeout) {
-                       snd_printk(KERN_ERR "azx_get_response timeout\n");
+                       snd_printk(KERN_ERR
+                                  "hda_intel: azx_get_response timeout, "
+                                  "switching to single_cmd mode...\n");
                        chip->rirb.rp = azx_readb(chip, RIRBWP);
                        chip->rirb.cmds = 0;
+                       /* switch to single_cmd mode */
+                       chip->single_cmd = 1;
+                       azx_free_cmd_io(chip);
                        return -1;
                }
                msleep(1);
@@ -449,7 +526,6 @@ static unsigned int azx_get_response(struct hda_codec *codec)
        return chip->rirb.res; /* the last value */
 }
 
-#else
 /*
  * Use the single immediate command instead of CORB/RIRB for simplicity
  *
@@ -460,15 +536,12 @@ static unsigned int azx_get_response(struct hda_codec *codec)
  *       I left the codes, however, for debugging/testing purposes.
  */
 
-#define azx_alloc_cmd_io(chip) 0
-#define azx_init_cmd_io(chip)
-#define azx_free_cmd_io(chip)
-
 /* send a command */
-static int azx_send_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
-                       unsigned int verb, unsigned int para)
+static int azx_single_send_cmd(struct hda_codec *codec, hda_nid_t nid,
+                              int direct, unsigned int verb,
+                              unsigned int para)
 {
-       azx_t *chip = codec->bus->private_data;
+       struct azx *chip = codec->bus->private_data;
        u32 val;
        int timeout = 50;
 
@@ -494,9 +567,9 @@ static int azx_send_cmd(struct hda_codec *codec, hda_nid_t nid, int direct,
 }
 
 /* receive a response */
-static unsigned int azx_get_response(struct hda_codec *codec)
+static unsigned int azx_single_get_response(struct hda_codec *codec)
 {
-       azx_t *chip = codec->bus->private_data;
+       struct azx *chip = codec->bus->private_data;
        int timeout = 50;
 
        while (timeout--) {
@@ -509,12 +582,38 @@ static unsigned int azx_get_response(struct hda_codec *codec)
        return (unsigned int)-1;
 }
 
-#define azx_update_rirb(chip)
+/*
+ * The below are the main callbacks from hda_codec.
+ *
+ * They are just the skeleton to call sub-callbacks according to the
+ * current setting of chip->single_cmd.
+ */
+
+/* send a command */
+static int azx_send_cmd(struct hda_codec *codec, hda_nid_t nid,
+                       int direct, unsigned int verb,
+                       unsigned int para)
+{
+       struct azx *chip = codec->bus->private_data;
+       if (chip->single_cmd)
+               return azx_single_send_cmd(codec, nid, direct, verb, para);
+       else
+               return azx_corb_send_cmd(codec, nid, direct, verb, para);
+}
+
+/* get a response */
+static unsigned int azx_get_response(struct hda_codec *codec)
+{
+       struct azx *chip = codec->bus->private_data;
+       if (chip->single_cmd)
+               return azx_single_get_response(codec);
+       else
+               return azx_rirb_get_response(codec);
+}
 
-#endif /* USE_CORB_RIRB */
 
 /* reset codec link */
-static int azx_reset(azx_t *chip)
+static int azx_reset(struct azx *chip)
 {
        int count;
 
@@ -546,6 +645,9 @@ static int azx_reset(azx_t *chip)
                return -EBUSY;
        }
 
+       /* Accept unsolicited responses */
+       azx_writel(chip, GCTL, azx_readl(chip, GCTL) | ICH6_GCTL_UREN);
+
        /* detect codecs */
        if (! chip->codec_mask) {
                chip->codec_mask = azx_readw(chip, STATESTS);
@@ -561,7 +663,7 @@ static int azx_reset(azx_t *chip)
  */  
 
 /* enable interrupts */
-static void azx_int_enable(azx_t *chip)
+static void azx_int_enable(struct azx *chip)
 {
        /* enable controller CIE and GIE */
        azx_writel(chip, INTCTL, azx_readl(chip, INTCTL) |
@@ -569,13 +671,13 @@ static void azx_int_enable(azx_t *chip)
 }
 
 /* disable interrupts */
-static void azx_int_disable(azx_t *chip)
+static void azx_int_disable(struct azx *chip)
 {
        int i;
 
        /* disable interrupts in stream descriptor */
-       for (i = 0; i < MAX_ICH6_DEV; i++) {
-               azx_dev_t *azx_dev = &chip->azx_dev[i];
+       for (i = 0; i < chip->num_streams; i++) {
+               struct azx_dev *azx_dev = &chip->azx_dev[i];
                azx_sd_writeb(azx_dev, SD_CTL,
                              azx_sd_readb(azx_dev, SD_CTL) & ~SD_INT_MASK);
        }
@@ -589,13 +691,13 @@ static void azx_int_disable(azx_t *chip)
 }
 
 /* clear interrupts */
-static void azx_int_clear(azx_t *chip)
+static void azx_int_clear(struct azx *chip)
 {
        int i;
 
        /* clear stream status */
-       for (i = 0; i < MAX_ICH6_DEV; i++) {
-               azx_dev_t *azx_dev = &chip->azx_dev[i];
+       for (i = 0; i < chip->num_streams; i++) {
+               struct azx_dev *azx_dev = &chip->azx_dev[i];
                azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK);
        }
 
@@ -610,7 +712,7 @@ static void azx_int_clear(azx_t *chip)
 }
 
 /* start a stream */
-static void azx_stream_start(azx_t *chip, azx_dev_t *azx_dev)
+static void azx_stream_start(struct azx *chip, struct azx_dev *azx_dev)
 {
        /* enable SIE */
        azx_writeb(chip, INTCTL,
@@ -621,7 +723,7 @@ static void azx_stream_start(azx_t *chip, azx_dev_t *azx_dev)
 }
 
 /* stop a stream */
-static void azx_stream_stop(azx_t *chip, azx_dev_t *azx_dev)
+static void azx_stream_stop(struct azx *chip, struct azx_dev *azx_dev)
 {
        /* stop DMA */
        azx_sd_writeb(azx_dev, SD_CTL, azx_sd_readb(azx_dev, SD_CTL) &
@@ -636,16 +738,16 @@ static void azx_stream_stop(azx_t *chip, azx_dev_t *azx_dev)
 /*
  * initialize the chip
  */
-static void azx_init_chip(azx_t *chip)
+static void azx_init_chip(struct azx *chip)
 {
-       unsigned char tcsel_reg;
+       unsigned char reg;
 
        /* Clear bits 0-2 of PCI register TCSEL (at offset 0x44)
         * TCSEL == Traffic Class Select Register, which sets PCI express QOS
         * Ensuring these bits are 0 clears playback static on some HD Audio codecs
         */
-       pci_read_config_byte (chip->pci, ICH6_PCIREG_TCSEL, &tcsel_reg);
-       pci_write_config_byte(chip->pci, ICH6_PCIREG_TCSEL, tcsel_reg & 0xf8);
+       pci_read_config_byte (chip->pci, ICH6_PCIREG_TCSEL, &reg);
+       pci_write_config_byte(chip->pci, ICH6_PCIREG_TCSEL, reg & 0xf8);
 
        /* reset controller */
        azx_reset(chip);
@@ -655,13 +757,28 @@ static void azx_init_chip(azx_t *chip)
        azx_int_enable(chip);
 
        /* initialize the codec command I/O */
-       azx_init_cmd_io(chip);
+       if (! chip->single_cmd)
+               azx_init_cmd_io(chip);
 
-#ifdef USE_POSBUF
        /* program the position buffer */
        azx_writel(chip, DPLBASE, (u32)chip->posbuf.addr);
        azx_writel(chip, DPUBASE, upper_32bit(chip->posbuf.addr));
-#endif
+
+       switch (chip->driver_type) {
+       case AZX_DRIVER_ATI:
+               /* For ATI SB450 azalia HD audio, we need to enable snoop */
+               pci_read_config_byte(chip->pci, ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR, 
+                                    &reg);
+               pci_write_config_byte(chip->pci, ATI_SB450_HDAUDIO_MISC_CNTR2_ADDR, 
+                                     (reg & 0xf8) | ATI_SB450_HDAUDIO_ENABLE_SNOOP);
+               break;
+       case AZX_DRIVER_NVIDIA:
+               /* For NVIDIA HDA, enable snoop */
+               pci_read_config_byte(chip->pci,NVIDIA_HDA_TRANSREG_ADDR, &reg);
+               pci_write_config_byte(chip->pci,NVIDIA_HDA_TRANSREG_ADDR,
+                                     (reg & 0xf0) | NVIDIA_HDA_ENABLE_COHBITS);
+               break;
+        }
 }
 
 
@@ -670,8 +787,8 @@ static void azx_init_chip(azx_t *chip)
  */
 static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
 {
-       azx_t *chip = dev_id;
-       azx_dev_t *azx_dev;
+       struct azx *chip = dev_id;
+       struct azx_dev *azx_dev;
        u32 status;
        int i;
 
@@ -683,11 +800,12 @@ static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
                return IRQ_NONE;
        }
        
-       for (i = 0; i < MAX_ICH6_DEV; i++) {
+       for (i = 0; i < chip->num_streams; i++) {
                azx_dev = &chip->azx_dev[i];
                if (status & azx_dev->sd_int_sta_mask) {
                        azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK);
                        if (azx_dev->substream && azx_dev->running) {
+                               azx_dev->period_intr++;
                                spin_unlock(&chip->reg_lock);
                                snd_pcm_period_elapsed(azx_dev->substream);
                                spin_lock(&chip->reg_lock);
@@ -698,7 +816,7 @@ static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
        /* clear rirb int */
        status = azx_readb(chip, RIRBSTS);
        if (status & RIRB_INT_MASK) {
-               if (status & RIRB_INT_RESPONSE)
+               if (! chip->single_cmd && (status & RIRB_INT_RESPONSE))
                        azx_update_rirb(chip);
                azx_writeb(chip, RIRBSTS, RIRB_INT_MASK);
        }
@@ -717,7 +835,7 @@ static irqreturn_t azx_interrupt(int irq, void* dev_id, struct pt_regs *regs)
 /*
  * set up BDL entries
  */
-static void azx_setup_periods(azx_dev_t *azx_dev)
+static void azx_setup_periods(struct azx_dev *azx_dev)
 {
        u32 *bdl = azx_dev->bdl;
        dma_addr_t dma_addr = azx_dev->substream->runtime->dma_addr;
@@ -746,7 +864,7 @@ static void azx_setup_periods(azx_dev_t *azx_dev)
 /*
  * set up the SD for streaming
  */
-static int azx_setup_controller(azx_t *chip, azx_dev_t *azx_dev)
+static int azx_setup_controller(struct azx *chip, struct azx_dev *azx_dev)
 {
        unsigned char val;
        int timeout;
@@ -791,11 +909,10 @@ static int azx_setup_controller(azx_t *chip, azx_dev_t *azx_dev)
        /* upper BDL address */
        azx_sd_writel(azx_dev, SD_BDLPU, upper_32bit(azx_dev->bdl_addr));
 
-#ifdef USE_POSBUF
        /* enable the position buffer */
        if (! (azx_readl(chip, DPLBASE) & ICH6_DPLBASE_ENABLE))
                azx_writel(chip, DPLBASE, (u32)chip->posbuf.addr | ICH6_DPLBASE_ENABLE);
-#endif
+
        /* set the interrupt enable bits in the descriptor control register */
        azx_sd_writel(azx_dev, SD_CTL, azx_sd_readl(azx_dev, SD_CTL) | SD_INT_MASK);
 
@@ -807,7 +924,7 @@ static int azx_setup_controller(azx_t *chip, azx_dev_t *azx_dev)
  * Codec initialization
  */
 
-static int __devinit azx_codec_create(azx_t *chip, const char *model)
+static int __devinit azx_codec_create(struct azx *chip, const char *model)
 {
        struct hda_bus_template bus_temp;
        int c, codecs, err;
@@ -824,7 +941,7 @@ static int __devinit azx_codec_create(azx_t *chip, const char *model)
 
        codecs = 0;
        for (c = 0; c < AZX_MAX_CODECS; c++) {
-               if (chip->codec_mask & (1 << c)) {
+               if ((chip->codec_mask & (1 << c)) & probe_mask) {
                        err = snd_hda_codec_new(chip->bus, c, NULL);
                        if (err < 0)
                                continue;
@@ -845,11 +962,17 @@ static int __devinit azx_codec_create(azx_t *chip, const char *model)
  */
 
 /* assign a stream for the PCM */
-static inline azx_dev_t *azx_assign_device(azx_t *chip, int stream)
+static inline struct azx_dev *azx_assign_device(struct azx *chip, int stream)
 {
-       int dev, i;
-       dev = stream == SNDRV_PCM_STREAM_PLAYBACK ? 4 : 0;
-       for (i = 0; i < 4; i++, dev++)
+       int dev, i, nums;
+       if (stream == SNDRV_PCM_STREAM_PLAYBACK) {
+               dev = chip->playback_index_offset;
+               nums = chip->playback_streams;
+       } else {
+               dev = chip->capture_index_offset;
+               nums = chip->capture_streams;
+       }
+       for (i = 0; i < nums; i++, dev++)
                if (! chip->azx_dev[dev].opened) {
                        chip->azx_dev[dev].opened = 1;
                        return &chip->azx_dev[dev];
@@ -858,17 +981,17 @@ static inline azx_dev_t *azx_assign_device(azx_t *chip, int stream)
 }
 
 /* release the assigned stream */
-static inline void azx_release_device(azx_dev_t *azx_dev)
+static inline void azx_release_device(struct azx_dev *azx_dev)
 {
        azx_dev->opened = 0;
 }
 
-static snd_pcm_hardware_t azx_pcm_hw = {
+static struct snd_pcm_hardware azx_pcm_hw = {
        .info =                 (SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED |
                                 SNDRV_PCM_INFO_BLOCK_TRANSFER |
                                 SNDRV_PCM_INFO_MMAP_VALID |
-                                SNDRV_PCM_INFO_PAUSE |
-                                SNDRV_PCM_INFO_RESUME),
+                                SNDRV_PCM_INFO_PAUSE /*|*/
+                                /*SNDRV_PCM_INFO_RESUME*/),
        .formats =              SNDRV_PCM_FMTBIT_S16_LE,
        .rates =                SNDRV_PCM_RATE_48000,
        .rate_min =             48000,
@@ -884,25 +1007,25 @@ static snd_pcm_hardware_t azx_pcm_hw = {
 };
 
 struct azx_pcm {
-       azx_t *chip;
+       struct azx *chip;
        struct hda_codec *codec;
        struct hda_pcm_stream *hinfo[2];
 };
 
-static int azx_pcm_open(snd_pcm_substream_t *substream)
+static int azx_pcm_open(struct snd_pcm_substream *substream)
 {
        struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
        struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream];
-       azx_t *chip = apcm->chip;
-       azx_dev_t *azx_dev;
-       snd_pcm_runtime_t *runtime = substream->runtime;
+       struct azx *chip = apcm->chip;
+       struct azx_dev *azx_dev;
+       struct snd_pcm_runtime *runtime = substream->runtime;
        unsigned long flags;
        int err;
 
-       down(&chip->open_mutex);
+       mutex_lock(&chip->open_mutex);
        azx_dev = azx_assign_device(chip, substream->stream);
        if (azx_dev == NULL) {
-               up(&chip->open_mutex);
+               mutex_unlock(&chip->open_mutex);
                return -EBUSY;
        }
        runtime->hw = azx_pcm_hw;
@@ -914,7 +1037,7 @@ static int azx_pcm_open(snd_pcm_substream_t *substream)
        snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS);
        if ((err = hinfo->ops.open(hinfo, apcm->codec, substream)) < 0) {
                azx_release_device(azx_dev);
-               up(&chip->open_mutex);
+               mutex_unlock(&chip->open_mutex);
                return err;
        }
        spin_lock_irqsave(&chip->reg_lock, flags);
@@ -923,38 +1046,38 @@ static int azx_pcm_open(snd_pcm_substream_t *substream)
        spin_unlock_irqrestore(&chip->reg_lock, flags);
 
        runtime->private_data = azx_dev;
-       up(&chip->open_mutex);
+       mutex_unlock(&chip->open_mutex);
        return 0;
 }
 
-static int azx_pcm_close(snd_pcm_substream_t *substream)
+static int azx_pcm_close(struct snd_pcm_substream *substream)
 {
        struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
        struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream];
-       azx_t *chip = apcm->chip;
-       azx_dev_t *azx_dev = get_azx_dev(substream);
+       struct azx *chip = apcm->chip;
+       struct azx_dev *azx_dev = get_azx_dev(substream);
        unsigned long flags;
 
-       down(&chip->open_mutex);
+       mutex_lock(&chip->open_mutex);
        spin_lock_irqsave(&chip->reg_lock, flags);
        azx_dev->substream = NULL;
        azx_dev->running = 0;
        spin_unlock_irqrestore(&chip->reg_lock, flags);
        azx_release_device(azx_dev);
        hinfo->ops.close(hinfo, apcm->codec, substream);
-       up(&chip->open_mutex);
+       mutex_unlock(&chip->open_mutex);
        return 0;
 }
 
-static int azx_pcm_hw_params(snd_pcm_substream_t *substream, snd_pcm_hw_params_t *hw_params)
+static int azx_pcm_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *hw_params)
 {
        return snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params));
 }
 
-static int azx_pcm_hw_free(snd_pcm_substream_t *substream)
+static int azx_pcm_hw_free(struct snd_pcm_substream *substream)
 {
        struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
-       azx_dev_t *azx_dev = get_azx_dev(substream);
+       struct azx_dev *azx_dev = get_azx_dev(substream);
        struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream];
 
        /* reset BDL address */
@@ -967,13 +1090,13 @@ static int azx_pcm_hw_free(snd_pcm_substream_t *substream)
        return snd_pcm_lib_free_pages(substream);
 }
 
-static int azx_pcm_prepare(snd_pcm_substream_t *substream)
+static int azx_pcm_prepare(struct snd_pcm_substream *substream)
 {
        struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
-       azx_t *chip = apcm->chip;
-       azx_dev_t *azx_dev = get_azx_dev(substream);
+       struct azx *chip = apcm->chip;
+       struct azx_dev *azx_dev = get_azx_dev(substream);
        struct hda_pcm_stream *hinfo = apcm->hinfo[substream->stream];
-       snd_pcm_runtime_t *runtime = substream->runtime;
+       struct snd_pcm_runtime *runtime = substream->runtime;
 
        azx_dev->bufsize = snd_pcm_lib_buffer_bytes(substream);
        azx_dev->fragsize = snd_pcm_lib_period_bytes(substream);
@@ -1001,11 +1124,11 @@ static int azx_pcm_prepare(snd_pcm_substream_t *substream)
                                  azx_dev->format_val, substream);
 }
 
-static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd)
+static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd)
 {
        struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
-       azx_dev_t *azx_dev = get_azx_dev(substream);
-       azx_t *chip = apcm->chip;
+       struct azx_dev *azx_dev = get_azx_dev(substream);
+       struct azx *chip = apcm->chip;
        int err = 0;
 
        spin_lock(&chip->reg_lock);
@@ -1017,6 +1140,7 @@ static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd)
                azx_dev->running = 1;
                break;
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+       case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_STOP:
                azx_stream_stop(chip, azx_dev);
                azx_dev->running = 0;
@@ -1026,6 +1150,7 @@ static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd)
        }
        spin_unlock(&chip->reg_lock);
        if (cmd == SNDRV_PCM_TRIGGER_PAUSE_PUSH ||
+           cmd == SNDRV_PCM_TRIGGER_SUSPEND ||
            cmd == SNDRV_PCM_TRIGGER_STOP) {
                int timeout = 5000;
                while (azx_sd_readb(azx_dev, SD_CTL) & SD_CTL_DMA_START && --timeout)
@@ -1034,24 +1159,38 @@ static int azx_pcm_trigger(snd_pcm_substream_t *substream, int cmd)
        return err;
 }
 
-static snd_pcm_uframes_t azx_pcm_pointer(snd_pcm_substream_t *substream)
+static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream)
 {
-       azx_dev_t *azx_dev = get_azx_dev(substream);
+       struct azx_pcm *apcm = snd_pcm_substream_chip(substream);
+       struct azx *chip = apcm->chip;
+       struct azx_dev *azx_dev = get_azx_dev(substream);
        unsigned int pos;
 
-#ifdef USE_POSBUF
-       /* use the position buffer */
-       pos = *azx_dev->posbuf;
-#else
-       /* read LPIB */
-       pos = azx_sd_readl(azx_dev, SD_LPIB) + azx_dev->fifo_size;
-#endif
+       if (chip->position_fix == POS_FIX_POSBUF ||
+           chip->position_fix == POS_FIX_AUTO) {
+               /* use the position buffer */
+               pos = *azx_dev->posbuf;
+               if (chip->position_fix == POS_FIX_AUTO &&
+                   azx_dev->period_intr == 1 && ! pos) {
+                       printk(KERN_WARNING
+                              "hda-intel: Invalid position buffer, "
+                              "using LPIB read method instead.\n");
+                       chip->position_fix = POS_FIX_NONE;
+                       goto read_lpib;
+               }
+       } else {
+       read_lpib:
+               /* read LPIB */
+               pos = azx_sd_readl(azx_dev, SD_LPIB);
+               if (chip->position_fix == POS_FIX_FIFO)
+                       pos += azx_dev->fifo_size;
+       }
        if (pos >= azx_dev->bufsize)
                pos = 0;
        return bytes_to_frames(substream->runtime, pos);
 }
 
-static snd_pcm_ops_t azx_pcm_ops = {
+static struct snd_pcm_ops azx_pcm_ops = {
        .open = azx_pcm_open,
        .close = azx_pcm_close,
        .ioctl = snd_pcm_lib_ioctl,
@@ -1062,16 +1201,16 @@ static snd_pcm_ops_t azx_pcm_ops = {
        .pointer = azx_pcm_pointer,
 };
 
-static void azx_pcm_free(snd_pcm_t *pcm)
+static void azx_pcm_free(struct snd_pcm *pcm)
 {
        kfree(pcm->private_data);
 }
 
-static int __devinit create_codec_pcm(azx_t *chip, struct hda_codec *codec,
+static int __devinit create_codec_pcm(struct azx *chip, struct hda_codec *codec,
                                      struct hda_pcm *cpcm, int pcm_dev)
 {
        int err;
-       snd_pcm_t *pcm;
+       struct snd_pcm *pcm;
        struct azx_pcm *apcm;
 
        snd_assert(cpcm->stream[0].substreams || cpcm->stream[1].substreams, return -EINVAL);
@@ -1100,11 +1239,12 @@ static int __devinit create_codec_pcm(azx_t *chip, struct hda_codec *codec,
                                              snd_dma_pci_data(chip->pci),
                                              1024 * 64, 1024 * 128);
        chip->pcm[pcm_dev] = pcm;
+       chip->pcm_devs = pcm_dev + 1;
 
        return 0;
 }
 
-static int __devinit azx_pcm_create(azx_t *chip)
+static int __devinit azx_pcm_create(struct azx *chip)
 {
        struct list_head *p;
        struct hda_codec *codec;
@@ -1114,17 +1254,39 @@ static int __devinit azx_pcm_create(azx_t *chip)
        if ((err = snd_hda_build_pcms(chip->bus)) < 0)
                return err;
 
+       /* create audio PCMs */
        pcm_dev = 0;
        list_for_each(p, &chip->bus->codec_list) {
                codec = list_entry(p, struct hda_codec, list);
                for (c = 0; c < codec->num_pcms; c++) {
+                       if (codec->pcm_info[c].is_modem)
+                               continue; /* create later */
+                       if (pcm_dev >= AZX_MAX_AUDIO_PCMS) {
+                               snd_printk(KERN_ERR SFX "Too many audio PCMs\n");
+                               return -EINVAL;
+                       }
+                       err = create_codec_pcm(chip, codec, &codec->pcm_info[c], pcm_dev);
+                       if (err < 0)
+                               return err;
+                       pcm_dev++;
+               }
+       }
+
+       /* create modem PCMs */
+       pcm_dev = AZX_MAX_AUDIO_PCMS;
+       list_for_each(p, &chip->bus->codec_list) {
+               codec = list_entry(p, struct hda_codec, list);
+               for (c = 0; c < codec->num_pcms; c++) {
+                       if (! codec->pcm_info[c].is_modem)
+                               continue; /* already created */
                        if (pcm_dev >= AZX_MAX_PCMS) {
-                               snd_printk(KERN_ERR SFX "Too many PCMs\n");
+                               snd_printk(KERN_ERR SFX "Too many modem PCMs\n");
                                return -EINVAL;
                        }
                        err = create_codec_pcm(chip, codec, &codec->pcm_info[c], pcm_dev);
                        if (err < 0)
                                return err;
+                       chip->pcm[pcm_dev]->dev_class = SNDRV_PCM_CLASS_MODEM;
                        pcm_dev++;
                }
        }
@@ -1134,7 +1296,7 @@ static int __devinit azx_pcm_create(azx_t *chip)
 /*
  * mixer creation - all stuff is implemented in hda module
  */
-static int __devinit azx_mixer_create(azx_t *chip)
+static int __devinit azx_mixer_create(struct azx *chip)
 {
        return snd_hda_build_controls(chip->bus);
 }
@@ -1143,21 +1305,19 @@ static int __devinit azx_mixer_create(azx_t *chip)
 /*
  * initialize SD streams
  */
-static int __devinit azx_init_stream(azx_t *chip)
+static int __devinit azx_init_stream(struct azx *chip)
 {
        int i;
 
        /* initialize each stream (aka device)
         * assign the starting bdl address to each stream (device) and initialize
         */
-       for (i = 0; i < MAX_ICH6_DEV; i++) {
+       for (i = 0; i < chip->num_streams; i++) {
                unsigned int off = sizeof(u32) * (i * AZX_MAX_FRAG * 4);
-               azx_dev_t *azx_dev = &chip->azx_dev[i];
+               struct azx_dev *azx_dev = &chip->azx_dev[i];
                azx_dev->bdl = (u32 *)(chip->bdl.area + off);
                azx_dev->bdl_addr = chip->bdl.addr + off;
-#ifdef USE_POSBUF
                azx_dev->posbuf = (volatile u32 *)(chip->posbuf.area + i * 8);
-#endif
                /* offset: SDI0=0x80, SDI1=0xa0, ... SDO3=0x160 */
                azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
                /* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
@@ -1175,28 +1335,33 @@ static int __devinit azx_init_stream(azx_t *chip)
 /*
  * power management
  */
-static int azx_suspend(snd_card_t *card, pm_message_t state)
+static int azx_suspend(struct pci_dev *pci, pm_message_t state)
 {
-       azx_t *chip = card->pm_private_data;
+       struct snd_card *card = pci_get_drvdata(pci);
+       struct azx *chip = card->private_data;
        int i;
 
+       snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
        for (i = 0; i < chip->pcm_devs; i++)
-               if (chip->pcm[i])
-                       snd_pcm_suspend_all(chip->pcm[i]);
+               snd_pcm_suspend_all(chip->pcm[i]);
        snd_hda_suspend(chip->bus, state);
        azx_free_cmd_io(chip);
-       pci_disable_device(chip->pci);
+       pci_disable_device(pci);
+       pci_save_state(pci);
        return 0;
 }
 
-static int azx_resume(snd_card_t *card)
+static int azx_resume(struct pci_dev *pci)
 {
-       azx_t *chip = card->pm_private_data;
+       struct snd_card *card = pci_get_drvdata(pci);
+       struct azx *chip = card->private_data;
 
-       pci_enable_device(chip->pci);
-       pci_set_master(chip->pci);
+       pci_restore_state(pci);
+       pci_enable_device(pci);
+       pci_set_master(pci);
        azx_init_chip(chip);
        snd_hda_resume(chip->bus);
+       snd_power_change_state(card, SNDRV_CTL_POWER_D0);
        return 0;
 }
 #endif /* CONFIG_PM */
@@ -1205,12 +1370,12 @@ static int azx_resume(snd_card_t *card)
 /*
  * destructor
  */
-static int azx_free(azx_t *chip)
+static int azx_free(struct azx *chip)
 {
-       if (chip->remap_addr) {
+       if (chip->initialized) {
                int i;
 
-               for (i = 0; i < MAX_ICH6_DEV; i++)
+               for (i = 0; i < chip->num_streams; i++)
                        azx_stream_stop(chip, &chip->azx_dev[i]);
 
                /* disable interrupts */
@@ -1226,29 +1391,28 @@ static int azx_free(azx_t *chip)
 
                /* wait a little for interrupts to finish */
                msleep(1);
-
-               iounmap(chip->remap_addr);
        }
 
        if (chip->irq >= 0)
                free_irq(chip->irq, (void*)chip);
+       if (chip->remap_addr)
+               iounmap(chip->remap_addr);
 
        if (chip->bdl.area)
                snd_dma_free_pages(&chip->bdl);
        if (chip->rb.area)
                snd_dma_free_pages(&chip->rb);
-#ifdef USE_POSBUF
        if (chip->posbuf.area)
                snd_dma_free_pages(&chip->posbuf);
-#endif
        pci_release_regions(chip->pci);
        pci_disable_device(chip->pci);
+       kfree(chip->azx_dev);
        kfree(chip);
 
        return 0;
 }
 
-static int azx_dev_free(snd_device_t *device)
+static int azx_dev_free(struct snd_device *device)
 {
        return azx_free(device->device_data);
 }
@@ -1256,11 +1420,13 @@ static int azx_dev_free(snd_device_t *device)
 /*
  * constructor
  */
-static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **rchip)
+static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci,
+                               int driver_type,
+                               struct azx **rchip)
 {
-       azx_t *chip;
+       struct azx *chip;
        int err = 0;
-       static snd_device_ops_t ops = {
+       static struct snd_device_ops ops = {
                .dev_free = azx_dev_free,
        };
 
@@ -1269,7 +1435,7 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
        if ((err = pci_enable_device(pci)) < 0)
                return err;
 
-       chip = kcalloc(1, sizeof(*chip), GFP_KERNEL);
+       chip = kzalloc(sizeof(*chip), GFP_KERNEL);
        
        if (NULL == chip) {
                snd_printk(KERN_ERR SFX "cannot allocate chip\n");
@@ -1278,10 +1444,24 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
        }
 
        spin_lock_init(&chip->reg_lock);
-       init_MUTEX(&chip->open_mutex);
+       mutex_init(&chip->open_mutex);
        chip->card = card;
        chip->pci = pci;
        chip->irq = -1;
+       chip->driver_type = driver_type;
+
+       chip->position_fix = position_fix;
+       chip->single_cmd = single_cmd;
+
+#if BITS_PER_LONG != 64
+       /* Fix up base address on ULI M5461 */
+       if (chip->driver_type == AZX_DRIVER_ULI) {
+               u16 tmp3;
+               pci_read_config_word(pci, 0x40, &tmp3);
+               pci_write_config_word(pci, 0x40, tmp3 | 0x10);
+               pci_write_config_dword(pci, PCI_BASE_ADDRESS_1, 0);
+       }
+#endif
 
        if ((err = pci_request_regions(pci, "ICH HD audio")) < 0) {
                kfree(chip);
@@ -1308,23 +1488,43 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
        pci_set_master(pci);
        synchronize_irq(chip->irq);
 
+       switch (chip->driver_type) {
+       case AZX_DRIVER_ULI:
+               chip->playback_streams = ULI_NUM_PLAYBACK;
+               chip->capture_streams = ULI_NUM_CAPTURE;
+               chip->playback_index_offset = ULI_PLAYBACK_INDEX;
+               chip->capture_index_offset = ULI_CAPTURE_INDEX;
+               break;
+       default:
+               chip->playback_streams = ICH6_NUM_PLAYBACK;
+               chip->capture_streams = ICH6_NUM_CAPTURE;
+               chip->playback_index_offset = ICH6_PLAYBACK_INDEX;
+               chip->capture_index_offset = ICH6_CAPTURE_INDEX;
+               break;
+       }
+       chip->num_streams = chip->playback_streams + chip->capture_streams;
+       chip->azx_dev = kcalloc(chip->num_streams, sizeof(*chip->azx_dev), GFP_KERNEL);
+       if (! chip->azx_dev) {
+               snd_printk(KERN_ERR "cannot malloc azx_dev\n");
+               goto errout;
+       }
+
        /* allocate memory for the BDL for each stream */
        if ((err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci),
-                                      PAGE_SIZE, &chip->bdl)) < 0) {
+                                      BDL_SIZE, &chip->bdl)) < 0) {
                snd_printk(KERN_ERR SFX "cannot allocate BDL\n");
                goto errout;
        }
-#ifdef USE_POSBUF
        /* allocate memory for the position buffer */
        if ((err = snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV, snd_dma_pci_data(chip->pci),
-                                      MAX_ICH6_DEV * 8, &chip->posbuf)) < 0) {
+                                      chip->num_streams * 8, &chip->posbuf)) < 0) {
                snd_printk(KERN_ERR SFX "cannot allocate posbuf\n");
                goto errout;
        }
-#endif
        /* allocate CORB/RIRB */
-       if ((err = azx_alloc_cmd_io(chip)) < 0)
-               goto errout;
+       if (! chip->single_cmd)
+               if ((err = azx_alloc_cmd_io(chip)) < 0)
+                       goto errout;
 
        /* initialize streams */
        azx_init_stream(chip);
@@ -1332,6 +1532,8 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
        /* initialize chip */
        azx_init_chip(chip);
 
+       chip->initialized = 1;
+
        /* codec detection */
        if (! chip->codec_mask) {
                snd_printk(KERN_ERR SFX "no codecs found!\n");
@@ -1344,6 +1546,10 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
                goto errout;
        }
 
+       strcpy(card->driver, "HDA-Intel");
+       strcpy(card->shortname, driver_short_names[chip->driver_type]);
+       sprintf(card->longname, "%s at 0x%lx irq %i", card->shortname, chip->addr, chip->irq);
+
        *rchip = chip;
        return 0;
 
@@ -1354,35 +1560,25 @@ static int __devinit azx_create(snd_card_t *card, struct pci_dev *pci, azx_t **r
 
 static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
-       static int dev;
-       snd_card_t *card;
-       azx_t *chip;
+       struct snd_card *card;
+       struct azx *chip;
        int err = 0;
 
-       if (dev >= SNDRV_CARDS)
-               return -ENODEV;
-       if (! enable[dev]) {
-               dev++;
-               return -ENOENT;
-       }
-
-       card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+       card = snd_card_new(index, id, THIS_MODULE, 0);
        if (NULL == card) {
                snd_printk(KERN_ERR SFX "Error creating card!\n");
                return -ENOMEM;
        }
 
-       if ((err = azx_create(card, pci, &chip)) < 0) {
+       if ((err = azx_create(card, pci, pci_id->driver_data,
+                             &chip)) < 0) {
                snd_card_free(card);
                return err;
        }
-
-       strcpy(card->driver, "HDA-Intel");
-       strcpy(card->shortname, "HDA Intel");
-       sprintf(card->longname, "%s at 0x%lx irq %i", card->shortname, chip->addr, chip->irq);
+       card->private_data = chip;
 
        /* create codec instances */
-       if ((err = azx_codec_create(chip, model[dev])) < 0) {
+       if ((err = azx_codec_create(chip, model)) < 0) {
                snd_card_free(card);
                return err;
        }
@@ -1399,7 +1595,6 @@ static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id *
                return err;
        }
 
-       snd_card_set_pm_callback(card, azx_suspend, azx_resume, chip);
        snd_card_set_dev(card, &pci->dev);
 
        if ((err = snd_card_register(card)) < 0) {
@@ -1408,7 +1603,6 @@ static int __devinit azx_probe(struct pci_dev *pci, const struct pci_device_id *
        }
 
        pci_set_drvdata(pci, card);
-       dev++;
 
        return err;
 }
@@ -1420,10 +1614,18 @@ static void __devexit azx_remove(struct pci_dev *pci)
 }
 
 /* PCI IDs */
-static struct pci_device_id azx_ids[] = {
-       { 0x8086, 0x2668, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ICH6 */
-       { 0x8086, 0x27d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ICH7 */
-       { 0x8086, 0x269a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* ESB2 */
+static struct pci_device_id azx_ids[] __devinitdata = {
+       { 0x8086, 0x2668, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH6 */
+       { 0x8086, 0x27d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH7 */
+       { 0x8086, 0x269a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ESB2 */
+       { 0x8086, 0x284b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ICH }, /* ICH8 */
+       { 0x1002, 0x437b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATI }, /* ATI SB450 */
+       { 0x1002, 0x4383, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ATI }, /* ATI SB600 */
+       { 0x1106, 0x3288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_VIA }, /* VIA VT8251/VT8237A */
+       { 0x1039, 0x7502, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_SIS }, /* SIS966 */
+       { 0x10b9, 0x5461, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_ULI }, /* ULI M5461 */
+       { 0x10de, 0x026c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_NVIDIA }, /* NVIDIA 026c */
+       { 0x10de, 0x0371, PCI_ANY_ID, PCI_ANY_ID, 0, 0, AZX_DRIVER_NVIDIA }, /* NVIDIA 0371 */
        { 0, }
 };
 MODULE_DEVICE_TABLE(pci, azx_ids);
@@ -1434,12 +1636,15 @@ static struct pci_driver driver = {
        .id_table = azx_ids,
        .probe = azx_probe,
        .remove = __devexit_p(azx_remove),
-       SND_PCI_PM_CALLBACKS
+#ifdef CONFIG_PM
+       .suspend = azx_suspend,
+       .resume = azx_resume,
+#endif
 };
 
 static int __init alsa_card_azx_init(void)
 {
-       return pci_module_init(&driver);
+       return pci_register_driver(&driver);
 }
 
 static void __exit alsa_card_azx_exit(void)
index 75d2384..d7343dc 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * HD audio interface patch for AD1986A
+ * HD audio interface patch for AD1981HD, AD1983, AD1986A, AD1988
  *
  * Copyright (c) 2005 Takashi Iwai <tiwai@suse.de>
  *
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/mutex.h>
+
 #include <sound/core.h>
 #include "hda_codec.h"
 #include "hda_local.h"
 
-struct ad1986a_spec {
-       struct semaphore amp_mutex;     /* PCM volume/mute control mutex */
-       struct hda_multi_out multiout;  /* playback */
-       unsigned int cur_mux;           /* capture source */
-       struct hda_pcm pcm_rec[2];      /* PCM information */
+struct ad198x_spec {
+       struct snd_kcontrol_new *mixers[5];
+       int num_mixers;
+
+       const struct hda_verb *init_verbs[5];   /* initialization verbs
+                                                * don't forget NULL termination!
+                                                */
+       unsigned int num_init_verbs;
+
+       /* playback */
+       struct hda_multi_out multiout;  /* playback set-up
+                                        * max_channels, dacs must be set
+                                        * dig_out_nid and hp_nid are optional
+                                        */
+       unsigned int cur_eapd;
+       unsigned int need_dac_fix;
+
+       /* capture */
+       unsigned int num_adc_nids;
+       hda_nid_t *adc_nids;
+       hda_nid_t dig_in_nid;           /* digital-in NID; optional */
+
+       /* capture source */
+       const struct hda_input_mux *input_mux;
+       hda_nid_t *capsrc_nids;
+       unsigned int cur_mux[3];
+
+       /* channel model */
+       const struct hda_channel_mode *channel_mode;
+       int num_channel_mode;
+
+       /* PCM information */
+       struct hda_pcm pcm_rec[2];      /* used in alc_build_pcms() */
+
+       struct mutex amp_mutex; /* PCM volume/mute control mutex */
+       unsigned int spdif_route;
+
+       /* dynamic controls, init_verbs and input_mux */
+       struct auto_pin_cfg autocfg;
+       unsigned int num_kctl_alloc, num_kctl_used;
+       struct snd_kcontrol_new *kctl_alloc;
+       struct hda_input_mux private_imux;
+       hda_nid_t private_dac_nids[4];
+};
+
+/*
+ * input MUX handling (common part)
+ */
+static int ad198x_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+
+       return snd_hda_input_mux_info(spec->input_mux, uinfo);
+}
+
+static int ad198x_mux_enum_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+
+       ucontrol->value.enumerated.item[0] = spec->cur_mux[adc_idx];
+       return 0;
+}
+
+static int ad198x_mux_enum_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
+
+       return snd_hda_input_mux_put(codec, spec->input_mux, ucontrol,
+                                    spec->capsrc_nids[adc_idx],
+                                    &spec->cur_mux[adc_idx]);
+}
+
+/*
+ * initialization (common callbacks)
+ */
+static int ad198x_init(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < spec->num_init_verbs; i++)
+               snd_hda_sequence_write(codec, spec->init_verbs[i]);
+       return 0;
+}
+
+static int ad198x_build_controls(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       unsigned int i;
+       int err;
+
+       for (i = 0; i < spec->num_mixers; i++) {
+               err = snd_hda_add_new_ctls(codec, spec->mixers[i]);
+               if (err < 0)
+                       return err;
+       }
+       if (spec->multiout.dig_out_nid) {
+               err = snd_hda_create_spdif_out_ctls(codec, spec->multiout.dig_out_nid);
+               if (err < 0)
+                       return err;
+       } 
+       if (spec->dig_in_nid) {
+               err = snd_hda_create_spdif_in_ctls(codec, spec->dig_in_nid);
+               if (err < 0)
+                       return err;
+       }
+       return 0;
+}
+
+/*
+ * Analog playback callbacks
+ */
+static int ad198x_playback_pcm_open(struct hda_pcm_stream *hinfo,
+                                   struct hda_codec *codec,
+                                   struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream);
+}
+
+static int ad198x_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
+                                      struct hda_codec *codec,
+                                      unsigned int stream_tag,
+                                      unsigned int format,
+                                      struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_multi_out_analog_prepare(codec, &spec->multiout, stream_tag,
+                                               format, substream);
+}
+
+static int ad198x_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
+                                      struct hda_codec *codec,
+                                      struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout);
+}
+
+/*
+ * Digital out
+ */
+static int ad198x_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
+                                       struct hda_codec *codec,
+                                       struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_multi_out_dig_open(codec, &spec->multiout);
+}
+
+static int ad198x_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
+                                        struct hda_codec *codec,
+                                        struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+}
+
+/*
+ * Analog capture
+ */
+static int ad198x_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
+                                     struct hda_codec *codec,
+                                     unsigned int stream_tag,
+                                     unsigned int format,
+                                     struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       snd_hda_codec_setup_stream(codec, spec->adc_nids[substream->number],
+                                  stream_tag, 0, format);
+       return 0;
+}
+
+static int ad198x_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
+                                     struct hda_codec *codec,
+                                     struct snd_pcm_substream *substream)
+{
+       struct ad198x_spec *spec = codec->spec;
+       snd_hda_codec_setup_stream(codec, spec->adc_nids[substream->number],
+                                  0, 0, 0);
+       return 0;
+}
+
+
+/*
+ */
+static struct hda_pcm_stream ad198x_pcm_analog_playback = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 6, /* changed later */
+       .nid = 0, /* fill later */
+       .ops = {
+               .open = ad198x_playback_pcm_open,
+               .prepare = ad198x_playback_pcm_prepare,
+               .cleanup = ad198x_playback_pcm_cleanup
+       },
+};
+
+static struct hda_pcm_stream ad198x_pcm_analog_capture = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 2,
+       .nid = 0, /* fill later */
+       .ops = {
+               .prepare = ad198x_capture_pcm_prepare,
+               .cleanup = ad198x_capture_pcm_cleanup
+       },
+};
+
+static struct hda_pcm_stream ad198x_pcm_digital_playback = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 2,
+       .nid = 0, /* fill later */
+       .ops = {
+               .open = ad198x_dig_playback_pcm_open,
+               .close = ad198x_dig_playback_pcm_close
+       },
+};
+
+static struct hda_pcm_stream ad198x_pcm_digital_capture = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 2,
+       /* NID is set in alc_build_pcms */
+};
+
+static int ad198x_build_pcms(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       struct hda_pcm *info = spec->pcm_rec;
+
+       codec->num_pcms = 1;
+       codec->pcm_info = info;
+
+       info->name = "AD198x Analog";
+       info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_analog_playback;
+       info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = spec->multiout.max_channels;
+       info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->multiout.dac_nids[0];
+       info->stream[SNDRV_PCM_STREAM_CAPTURE] = ad198x_pcm_analog_capture;
+       info->stream[SNDRV_PCM_STREAM_CAPTURE].substreams = spec->num_adc_nids;
+       info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[0];
+
+       if (spec->multiout.dig_out_nid) {
+               info++;
+               codec->num_pcms++;
+               info->name = "AD198x Digital";
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad198x_pcm_digital_playback;
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->multiout.dig_out_nid;
+               if (spec->dig_in_nid) {
+                       info->stream[SNDRV_PCM_STREAM_CAPTURE] = ad198x_pcm_digital_capture;
+                       info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->dig_in_nid;
+               }
+       }
+
+       return 0;
+}
+
+static void ad198x_free(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       unsigned int i;
+
+       if (spec->kctl_alloc) {
+               for (i = 0; i < spec->num_kctl_used; i++)
+                       kfree(spec->kctl_alloc[i].name);
+               kfree(spec->kctl_alloc);
+       }
+       kfree(codec->spec);
+}
+
+#ifdef CONFIG_PM
+static int ad198x_resume(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int i;
+
+       codec->patch_ops.init(codec);
+       for (i = 0; i < spec->num_mixers; i++)
+               snd_hda_resume_ctls(codec, spec->mixers[i]);
+       if (spec->multiout.dig_out_nid)
+               snd_hda_resume_spdif_out(codec);
+       if (spec->dig_in_nid)
+               snd_hda_resume_spdif_in(codec);
+       return 0;
+}
+#endif
+
+static struct hda_codec_ops ad198x_patch_ops = {
+       .build_controls = ad198x_build_controls,
+       .build_pcms = ad198x_build_pcms,
+       .init = ad198x_init,
+       .free = ad198x_free,
+#ifdef CONFIG_PM
+       .resume = ad198x_resume,
+#endif
 };
 
+
+/*
+ * EAPD control
+ * the private value = nid | (invert << 8)
+ */
+static int ad198x_eapd_info(struct snd_kcontrol *kcontrol,
+                           struct snd_ctl_elem_info *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+       uinfo->count = 1;
+       uinfo->value.integer.min = 0;
+       uinfo->value.integer.max = 1;
+       return 0;
+}
+
+static int ad198x_eapd_get(struct snd_kcontrol *kcontrol,
+                          struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       int invert = (kcontrol->private_value >> 8) & 1;
+       if (invert)
+               ucontrol->value.integer.value[0] = ! spec->cur_eapd;
+       else
+               ucontrol->value.integer.value[0] = spec->cur_eapd;
+       return 0;
+}
+
+static int ad198x_eapd_put(struct snd_kcontrol *kcontrol,
+                          struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       int invert = (kcontrol->private_value >> 8) & 1;
+       hda_nid_t nid = kcontrol->private_value & 0xff;
+       unsigned int eapd;
+       eapd = ucontrol->value.integer.value[0];
+       if (invert)
+               eapd = !eapd;
+       if (eapd == spec->cur_eapd && ! codec->in_resume)
+               return 0;
+       spec->cur_eapd = eapd;
+       snd_hda_codec_write(codec, nid,
+                           0, AC_VERB_SET_EAPD_BTLENABLE,
+                           eapd ? 0x02 : 0x00);
+       return 1;
+}
+
+static int ad198x_ch_mode_info(struct snd_kcontrol *kcontrol,
+                              struct snd_ctl_elem_info *uinfo);
+static int ad198x_ch_mode_get(struct snd_kcontrol *kcontrol,
+                             struct snd_ctl_elem_value *ucontrol);
+static int ad198x_ch_mode_put(struct snd_kcontrol *kcontrol,
+                             struct snd_ctl_elem_value *ucontrol);
+
+
+/*
+ * AD1986A specific
+ */
+
 #define AD1986A_SPDIF_OUT      0x02
 #define AD1986A_FRONT_DAC      0x03
 #define AD1986A_SURR_DAC       0x04
@@ -43,6 +401,8 @@ struct ad1986a_spec {
 static hda_nid_t ad1986a_dac_nids[3] = {
        AD1986A_FRONT_DAC, AD1986A_SURR_DAC, AD1986A_CLFE_DAC
 };
+static hda_nid_t ad1986a_adc_nids[1] = { AD1986A_ADC };
+static hda_nid_t ad1986a_capsrc_nids[1] = { 0x12 };
 
 static struct hda_input_mux ad1986a_capture_source = {
        .num_items = 7,
@@ -65,92 +425,66 @@ static struct hda_input_mux ad1986a_capture_source = {
 
 #define ad1986a_pcm_amp_vol_info       snd_hda_mixer_amp_volume_info
 
-static int ad1986a_pcm_amp_vol_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int ad1986a_pcm_amp_vol_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *ad = codec->spec;
+       struct ad198x_spec *ad = codec->spec;
 
-       down(&ad->amp_mutex);
+       mutex_lock(&ad->amp_mutex);
        snd_hda_mixer_amp_volume_get(kcontrol, ucontrol);
-       up(&ad->amp_mutex);
+       mutex_unlock(&ad->amp_mutex);
        return 0;
 }
 
-static int ad1986a_pcm_amp_vol_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int ad1986a_pcm_amp_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *ad = codec->spec;
+       struct ad198x_spec *ad = codec->spec;
        int i, change = 0;
 
-       down(&ad->amp_mutex);
+       mutex_lock(&ad->amp_mutex);
        for (i = 0; i < ARRAY_SIZE(ad1986a_dac_nids); i++) {
                kcontrol->private_value = HDA_COMPOSE_AMP_VAL(ad1986a_dac_nids[i], 3, 0, HDA_OUTPUT);
                change |= snd_hda_mixer_amp_volume_put(kcontrol, ucontrol);
        }
        kcontrol->private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT);
-       up(&ad->amp_mutex);
+       mutex_unlock(&ad->amp_mutex);
        return change;
 }
 
-#define ad1986a_pcm_amp_sw_info                snd_hda_mixer_amp_volume_info
+#define ad1986a_pcm_amp_sw_info                snd_hda_mixer_amp_switch_info
 
-static int ad1986a_pcm_amp_sw_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int ad1986a_pcm_amp_sw_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *ad = codec->spec;
+       struct ad198x_spec *ad = codec->spec;
 
-       down(&ad->amp_mutex);
+       mutex_lock(&ad->amp_mutex);
        snd_hda_mixer_amp_switch_get(kcontrol, ucontrol);
-       up(&ad->amp_mutex);
+       mutex_unlock(&ad->amp_mutex);
        return 0;
 }
 
-static int ad1986a_pcm_amp_sw_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int ad1986a_pcm_amp_sw_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *ad = codec->spec;
+       struct ad198x_spec *ad = codec->spec;
        int i, change = 0;
 
-       down(&ad->amp_mutex);
+       mutex_lock(&ad->amp_mutex);
        for (i = 0; i < ARRAY_SIZE(ad1986a_dac_nids); i++) {
                kcontrol->private_value = HDA_COMPOSE_AMP_VAL(ad1986a_dac_nids[i], 3, 0, HDA_OUTPUT);
                change |= snd_hda_mixer_amp_switch_put(kcontrol, ucontrol);
        }
        kcontrol->private_value = HDA_COMPOSE_AMP_VAL(AD1986A_FRONT_DAC, 3, 0, HDA_OUTPUT);
-       up(&ad->amp_mutex);
+       mutex_unlock(&ad->amp_mutex);
        return change;
 }
 
-/*
- * input MUX handling
- */
-static int ad1986a_mux_enum_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
-{
-       return snd_hda_input_mux_info(&ad1986a_capture_source, uinfo);
-}
-
-static int ad1986a_mux_enum_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
-{
-       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *spec = codec->spec;
-
-       ucontrol->value.enumerated.item[0] = spec->cur_mux;
-       return 0;
-}
-
-static int ad1986a_mux_enum_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
-{
-       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct ad1986a_spec *spec = codec->spec;
-
-       return snd_hda_input_mux_put(codec, &ad1986a_capture_source, ucontrol,
-                                    AD1986A_ADC, &spec->cur_mux);
-}
-
 /*
  * mixers
  */
-static snd_kcontrol_new_t ad1986a_mixers[] = {
+static struct snd_kcontrol_new ad1986a_mixers[] = {
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
                .name = "PCM Playback Volume",
@@ -194,14 +528,151 @@ static snd_kcontrol_new_t ad1986a_mixers[] = {
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
                .name = "Capture Source",
-               .info = ad1986a_mux_enum_info,
-               .get = ad1986a_mux_enum_get,
-               .put = ad1986a_mux_enum_put,
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
        },
        HDA_CODEC_MUTE("Stereo Downmix Switch", 0x09, 0x0, HDA_OUTPUT),
        { } /* end */
 };
 
+/* additional mixers for 3stack mode */
+static struct snd_kcontrol_new ad1986a_3st_mixers[] = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = ad198x_ch_mode_info,
+               .get = ad198x_ch_mode_get,
+               .put = ad198x_ch_mode_put,
+       },
+       { } /* end */
+};
+
+/* laptop model - 2ch only */
+static hda_nid_t ad1986a_laptop_dac_nids[1] = { AD1986A_FRONT_DAC };
+
+static struct snd_kcontrol_new ad1986a_laptop_mixers[] = {
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Master Playback Volume", 0x1b, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Master Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
+       /* HDA_CODEC_VOLUME("Headphone Playback Volume", 0x1a, 0x0, HDA_OUTPUT),
+          HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x0, HDA_OUTPUT), */
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x17, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x17, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Aux Playback Volume", 0x16, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Aux Playback Switch", 0x16, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x13, 0x0, HDA_OUTPUT),
+       /* HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x18, 0x0, HDA_OUTPUT),
+          HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x18, 0x0, HDA_OUTPUT),
+          HDA_CODEC_VOLUME("Mono Playback Volume", 0x1e, 0x0, HDA_OUTPUT),
+          HDA_CODEC_MUTE("Mono Playback Switch", 0x1e, 0x0, HDA_OUTPUT), */
+       HDA_CODEC_VOLUME("Capture Volume", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x12, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       { } /* end */
+};
+
+/* laptop-eapd model - 2ch only */
+
+/* master controls both pins 0x1a and 0x1b */
+static int ad1986a_laptop_master_vol_put(struct snd_kcontrol *kcontrol,
+                                        struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       long *valp = ucontrol->value.integer.value;
+       int change;
+
+       change = snd_hda_codec_amp_update(codec, 0x1a, 0, HDA_OUTPUT, 0,
+                                         0x7f, valp[0] & 0x7f);
+       change |= snd_hda_codec_amp_update(codec, 0x1a, 1, HDA_OUTPUT, 0,
+                                          0x7f, valp[1] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x1b, 0, HDA_OUTPUT, 0,
+                                0x7f, valp[0] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x1b, 1, HDA_OUTPUT, 0,
+                                0x7f, valp[1] & 0x7f);
+       return change;
+}
+
+static int ad1986a_laptop_master_sw_put(struct snd_kcontrol *kcontrol,
+                                       struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       long *valp = ucontrol->value.integer.value;
+       int change;
+
+       change = snd_hda_codec_amp_update(codec, 0x1a, 0, HDA_OUTPUT, 0,
+                                         0x80, valp[0] ? 0 : 0x80);
+       change |= snd_hda_codec_amp_update(codec, 0x1a, 1, HDA_OUTPUT, 0,
+                                          0x80, valp[1] ? 0 : 0x80);
+       snd_hda_codec_amp_update(codec, 0x1b, 0, HDA_OUTPUT, 0,
+                                0x80, valp[0] ? 0 : 0x80);
+       snd_hda_codec_amp_update(codec, 0x1b, 1, HDA_OUTPUT, 0,
+                                0x80, valp[1] ? 0 : 0x80);
+       return change;
+}
+
+static struct hda_input_mux ad1986a_laptop_eapd_capture_source = {
+       .num_items = 3,
+       .items = {
+               { "Mic", 0x0 },
+               { "Internal Mic", 0x4 },
+               { "Mix", 0x5 },
+       },
+};
+
+static struct snd_kcontrol_new ad1986a_laptop_eapd_mixers[] = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Volume",
+               .info = snd_hda_mixer_amp_volume_info,
+               .get = snd_hda_mixer_amp_volume_get,
+               .put = ad1986a_laptop_master_vol_put,
+               .private_value = HDA_COMPOSE_AMP_VAL(0x1a, 3, 0, HDA_OUTPUT),
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Switch",
+               .info = snd_hda_mixer_amp_switch_info,
+               .get = snd_hda_mixer_amp_switch_get,
+               .put = ad1986a_laptop_master_sw_put,
+               .private_value = HDA_COMPOSE_AMP_VAL(0x1a, 3, 0, HDA_OUTPUT),
+       },
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Internal Mic Playback Volume", 0x17, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Internal Mic Playback Switch", 0x17, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Capture Volume", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x12, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "External Amplifier",
+               .info = ad198x_eapd_info,
+               .get = ad198x_eapd_get,
+               .put = ad198x_eapd_put,
+               .private_value = 0x1b | (1 << 8), /* port-D, inversed */
+       },
+       { } /* end */
+};
+
 /*
  * initialization verbs
  */
@@ -241,205 +712,1924 @@ static struct hda_verb ad1986a_init_verbs[] = {
        {0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
        {0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
        {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* HP Pin */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+       /* Front, Surround, CLFE Pins */
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       {0x1d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* Mono Pin */
+       {0x1e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* Mic Pin */
+       {0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* Line, Aux, CD, Beep-In Pin */
+       {0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       {0x21, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       {0x22, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       {0x23, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       {0x24, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
        { } /* end */
 };
 
+/* additional verbs for 3-stack model */
+static struct hda_verb ad1986a_3st_init_verbs[] = {
+       /* Mic and line-in selectors */
+       {0x0f, AC_VERB_SET_CONNECT_SEL, 0x2},
+       {0x10, AC_VERB_SET_CONNECT_SEL, 0x1},
+       { } /* end */
+};
 
-static int ad1986a_init(struct hda_codec *codec)
-{
-       snd_hda_sequence_write(codec, ad1986a_init_verbs);
-       return 0;
-}
+static struct hda_verb ad1986a_ch2_init[] = {
+       /* Surround out -> Line In */
+       { 0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       { 0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* CLFE -> Mic in */
+       { 0x1d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       { 0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       { } /* end */
+};
 
-static int ad1986a_build_controls(struct hda_codec *codec)
-{
-       int err;
+static struct hda_verb ad1986a_ch4_init[] = {
+       /* Surround out -> Surround */
+       { 0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* CLFE -> Mic in */
+       { 0x1d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       { 0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       { } /* end */
+};
 
-       err = snd_hda_add_new_ctls(codec, ad1986a_mixers);
-       if (err < 0)
-               return err;
-       err = snd_hda_create_spdif_out_ctls(codec, AD1986A_SPDIF_OUT);
-       if (err < 0)
-               return err;
-       return 0;
-}
+static struct hda_verb ad1986a_ch6_init[] = {
+       /* Surround out -> Surround out */
+       { 0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* CLFE -> CLFE */
+       { 0x1d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       { } /* end */
+};
+
+static struct hda_channel_mode ad1986a_modes[3] = {
+       { 2, ad1986a_ch2_init },
+       { 4, ad1986a_ch4_init },
+       { 6, ad1986a_ch6_init },
+};
+
+/* eapd initialization */
+static struct hda_verb ad1986a_eapd_init_verbs[] = {
+       {0x1b, AC_VERB_SET_EAPD_BTLENABLE, 0x00},
+       {}
+};
+
+/* models */
+enum { AD1986A_6STACK, AD1986A_3STACK, AD1986A_LAPTOP, AD1986A_LAPTOP_EAPD };
+
+static struct hda_board_config ad1986a_cfg_tbl[] = {
+       { .modelname = "6stack",        .config = AD1986A_6STACK },
+       { .modelname = "3stack",        .config = AD1986A_3STACK },
+       { .pci_subvendor = 0x10de, .pci_subdevice = 0xcb84,
+         .config = AD1986A_3STACK }, /* ASUS A8N-VM CSM */
+       { .modelname = "laptop",        .config = AD1986A_LAPTOP },
+       { .pci_subvendor = 0x144d, .pci_subdevice = 0xc01e,
+         .config = AD1986A_LAPTOP }, /* FSC V2060 */
+       { .pci_subvendor = 0x17c0, .pci_subdevice = 0x2017,
+         .config = AD1986A_LAPTOP }, /* Samsung M50 */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x818f,
+         .config = AD1986A_LAPTOP }, /* ASUS P5GV-MX */
+       { .modelname = "laptop-eapd",   .config = AD1986A_LAPTOP_EAPD },
+       { .pci_subvendor = 0x144d, .pci_subdevice = 0xc024,
+         .config = AD1986A_LAPTOP_EAPD }, /* Samsung R65-T2300 Charis */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1153,
+         .config = AD1986A_LAPTOP_EAPD }, /* ASUS M9 */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1213,
+         .config = AD1986A_LAPTOP_EAPD }, /* ASUS A6J */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x11f7,
+         .config = AD1986A_LAPTOP_EAPD }, /* ASUS U5A */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1297,
+         .config = AD1986A_LAPTOP_EAPD }, /* ASUS Z62F */
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x30af,
+         .config = AD1986A_LAPTOP_EAPD }, /* HP Compaq Presario B2800 */
+       {}
+};
+
+static int patch_ad1986a(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec;
+       int board_config;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       mutex_init(&spec->amp_mutex);
+       codec->spec = spec;
+
+       spec->multiout.max_channels = 6;
+       spec->multiout.num_dacs = ARRAY_SIZE(ad1986a_dac_nids);
+       spec->multiout.dac_nids = ad1986a_dac_nids;
+       spec->multiout.dig_out_nid = AD1986A_SPDIF_OUT;
+       spec->num_adc_nids = 1;
+       spec->adc_nids = ad1986a_adc_nids;
+       spec->capsrc_nids = ad1986a_capsrc_nids;
+       spec->input_mux = &ad1986a_capture_source;
+       spec->num_mixers = 1;
+       spec->mixers[0] = ad1986a_mixers;
+       spec->num_init_verbs = 1;
+       spec->init_verbs[0] = ad1986a_init_verbs;
+
+       codec->patch_ops = ad198x_patch_ops;
+
+       /* override some parameters */
+       board_config = snd_hda_check_board_config(codec, ad1986a_cfg_tbl);
+       switch (board_config) {
+       case AD1986A_3STACK:
+               spec->num_mixers = 2;
+               spec->mixers[1] = ad1986a_3st_mixers;
+               spec->num_init_verbs = 3;
+               spec->init_verbs[1] = ad1986a_3st_init_verbs;
+               spec->init_verbs[2] = ad1986a_ch2_init;
+               spec->channel_mode = ad1986a_modes;
+               spec->num_channel_mode = ARRAY_SIZE(ad1986a_modes);
+               spec->need_dac_fix = 1;
+               spec->multiout.max_channels = 2;
+               spec->multiout.num_dacs = 1;
+               break;
+       case AD1986A_LAPTOP:
+               spec->mixers[0] = ad1986a_laptop_mixers;
+               spec->multiout.max_channels = 2;
+               spec->multiout.num_dacs = 1;
+               spec->multiout.dac_nids = ad1986a_laptop_dac_nids;
+               break;
+       case AD1986A_LAPTOP_EAPD:
+               spec->mixers[0] = ad1986a_laptop_eapd_mixers;
+               spec->num_init_verbs = 2;
+               spec->init_verbs[1] = ad1986a_eapd_init_verbs;
+               spec->multiout.max_channels = 2;
+               spec->multiout.num_dacs = 1;
+               spec->multiout.dac_nids = ad1986a_laptop_dac_nids;
+               spec->multiout.dig_out_nid = 0;
+               spec->input_mux = &ad1986a_laptop_eapd_capture_source;
+               break;
+       }
+
+       return 0;
+}
 
 /*
- * Analog playback callbacks
+ * AD1983 specific
  */
-static int ad1986a_playback_pcm_open(struct hda_pcm_stream *hinfo,
-                                    struct hda_codec *codec,
-                                    snd_pcm_substream_t *substream)
+
+#define AD1983_SPDIF_OUT       0x02
+#define AD1983_DAC             0x03
+#define AD1983_ADC             0x04
+
+static hda_nid_t ad1983_dac_nids[1] = { AD1983_DAC };
+static hda_nid_t ad1983_adc_nids[1] = { AD1983_ADC };
+static hda_nid_t ad1983_capsrc_nids[1] = { 0x15 };
+
+static struct hda_input_mux ad1983_capture_source = {
+       .num_items = 4,
+       .items = {
+               { "Mic", 0x0 },
+               { "Line", 0x1 },
+               { "Mix", 0x2 },
+               { "Mix Mono", 0x3 },
+       },
+};
+
+/*
+ * SPDIF playback route
+ */
+static int ad1983_spdif_route_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream);
+       static char *texts[] = { "PCM", "ADC" };
+
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 2;
+       if (uinfo->value.enumerated.item > 1)
+               uinfo->value.enumerated.item = 1;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+       return 0;
 }
 
-static int ad1986a_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
-                                       struct hda_codec *codec,
-                                       unsigned int stream_tag,
-                                       unsigned int format,
-                                       snd_pcm_substream_t *substream)
+static int ad1983_spdif_route_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       return snd_hda_multi_out_analog_prepare(codec, &spec->multiout, stream_tag,
-                                               format, substream);
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+
+       ucontrol->value.enumerated.item[0] = spec->spdif_route;
+       return 0;
 }
 
-static int ad1986a_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
-                                       struct hda_codec *codec,
-                                       snd_pcm_substream_t *substream)
+static int ad1983_spdif_route_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout);
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+
+       if (spec->spdif_route != ucontrol->value.enumerated.item[0]) {
+               spec->spdif_route = ucontrol->value.enumerated.item[0];
+               snd_hda_codec_write(codec, spec->multiout.dig_out_nid, 0,
+                                   AC_VERB_SET_CONNECT_SEL, spec->spdif_route);
+               return 1;
+       }
+       return 0;
 }
 
+static struct snd_kcontrol_new ad1983_mixers[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x05, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Playback Switch", 0x05, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x07, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("Mono Playback Switch", 0x07, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x11, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x11, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("PC Speaker Playback Volume", 0x10, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("PC Speaker Playback Switch", 0x10, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Boost", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Capture Volume", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x15, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route",
+               .info = ad1983_spdif_route_info,
+               .get = ad1983_spdif_route_get,
+               .put = ad1983_spdif_route_put,
+       },
+       { } /* end */
+};
+
+static struct hda_verb ad1983_init_verbs[] = {
+       /* Front, HP, Mono; mute as default */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* Beep, PCM, Mic, Line-In: mute */
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* Front, HP selectors; from Mix */
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x01},
+       {0x06, AC_VERB_SET_CONNECT_SEL, 0x01},
+       /* Mono selector; from Mix */
+       {0x0b, AC_VERB_SET_CONNECT_SEL, 0x03},
+       /* Mic selector; Mic */
+       {0x0c, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Line-in selector: Line-in */
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Mic boost: 0dB */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* Record selector: mic */
+       {0x15, AC_VERB_SET_CONNECT_SEL, 0x0},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* SPDIF route: PCM */
+       {0x02, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Front Pin */
+       {0x05, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* HP Pin */
+       {0x06, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+       /* Mono Pin */
+       {0x07, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* Mic Pin */
+       {0x08, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* Line Pin */
+       {0x09, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       { } /* end */
+};
+
+
+static int patch_ad1983(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       mutex_init(&spec->amp_mutex);
+       codec->spec = spec;
+
+       spec->multiout.max_channels = 2;
+       spec->multiout.num_dacs = ARRAY_SIZE(ad1983_dac_nids);
+       spec->multiout.dac_nids = ad1983_dac_nids;
+       spec->multiout.dig_out_nid = AD1983_SPDIF_OUT;
+       spec->num_adc_nids = 1;
+       spec->adc_nids = ad1983_adc_nids;
+       spec->capsrc_nids = ad1983_capsrc_nids;
+       spec->input_mux = &ad1983_capture_source;
+       spec->num_mixers = 1;
+       spec->mixers[0] = ad1983_mixers;
+       spec->num_init_verbs = 1;
+       spec->init_verbs[0] = ad1983_init_verbs;
+       spec->spdif_route = 0;
+
+       codec->patch_ops = ad198x_patch_ops;
+
+       return 0;
+}
+
+
 /*
- * Digital out
+ * AD1981 HD specific
  */
-static int ad1986a_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
-                                        struct hda_codec *codec,
-                                        snd_pcm_substream_t *substream)
+
+#define AD1981_SPDIF_OUT       0x02
+#define AD1981_DAC             0x03
+#define AD1981_ADC             0x04
+
+static hda_nid_t ad1981_dac_nids[1] = { AD1981_DAC };
+static hda_nid_t ad1981_adc_nids[1] = { AD1981_ADC };
+static hda_nid_t ad1981_capsrc_nids[1] = { 0x15 };
+
+/* 0x0c, 0x09, 0x0e, 0x0f, 0x19, 0x05, 0x18, 0x17 */
+static struct hda_input_mux ad1981_capture_source = {
+       .num_items = 7,
+       .items = {
+               { "Front Mic", 0x0 },
+               { "Line", 0x1 },
+               { "Mix", 0x2 },
+               { "Mix Mono", 0x3 },
+               { "CD", 0x4 },
+               { "Mic", 0x6 },
+               { "Aux", 0x7 },
+       },
+};
+
+static struct snd_kcontrol_new ad1981_mixers[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x05, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Playback Switch", 0x05, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x07, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("Mono Playback Switch", 0x07, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x11, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x11, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Aux Playback Volume", 0x1b, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Aux Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x1c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x1c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x1d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x1d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("PC Speaker Playback Volume", 0x0d, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("PC Speaker Playback Switch", 0x0d, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Front Mic Boost", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Boost", 0x18, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Capture Volume", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x15, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       /* identical with AD1983 */
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = SNDRV_CTL_NAME_IEC958("",PLAYBACK,NONE) "Route",
+               .info = ad1983_spdif_route_info,
+               .get = ad1983_spdif_route_get,
+               .put = ad1983_spdif_route_put,
+       },
+       { } /* end */
+};
+
+static struct hda_verb ad1981_init_verbs[] = {
+       /* Front, HP, Mono; mute as default */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* Beep, PCM, Front Mic, Line, Rear Mic, Aux, CD-In: mute */
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x1c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x1d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* Front, HP selectors; from Mix */
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x01},
+       {0x06, AC_VERB_SET_CONNECT_SEL, 0x01},
+       /* Mono selector; from Mix */
+       {0x0b, AC_VERB_SET_CONNECT_SEL, 0x03},
+       /* Mic Mixer; select Front Mic */
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       {0x1f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* Mic boost: 0dB */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* Record selector: Front mic */
+       {0x15, AC_VERB_SET_CONNECT_SEL, 0x0},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       /* SPDIF route: PCM */
+       {0x02, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Front Pin */
+       {0x05, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* HP Pin */
+       {0x06, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+       /* Mono Pin */
+       {0x07, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* Front & Rear Mic Pins */
+       {0x08, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* Line Pin */
+       {0x09, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* Digital Beep */
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0x00},
+       /* Line-Out as Input: disabled */
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       { } /* end */
+};
+
+/*
+ * Patch for HP nx6320
+ *
+ * nx6320 uses EAPD in the reserve way - EAPD-on means the internal
+ * speaker output enabled _and_ mute-LED off.
+ */
+
+#define AD1981_HP_EVENT                0x37
+#define AD1981_MIC_EVENT       0x38
+
+static struct hda_verb ad1981_hp_init_verbs[] = {
+       {0x05, AC_VERB_SET_EAPD_BTLENABLE, 0x00 }, /* default off */
+       /* pin sensing on HP and Mic jacks */
+       {0x06, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | AD1981_HP_EVENT},
+       {0x08, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | AD1981_MIC_EVENT},
+       {}
+};
+
+/* turn on/off EAPD (+ mute HP) as a master switch */
+static int ad1981_hp_master_sw_put(struct snd_kcontrol *kcontrol,
+                                  struct snd_ctl_elem_value *ucontrol)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       return snd_hda_multi_out_dig_open(codec, &spec->multiout);
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+
+       if (! ad198x_eapd_put(kcontrol, ucontrol))
+               return 0;
+
+       /* toggle HP mute appropriately */
+       snd_hda_codec_amp_update(codec, 0x06, 0, HDA_OUTPUT, 0,
+                                0x80, spec->cur_eapd ? 0 : 0x80);
+       snd_hda_codec_amp_update(codec, 0x06, 1, HDA_OUTPUT, 0,
+                                0x80, spec->cur_eapd ? 0 : 0x80);
+       return 1;
 }
 
-static int ad1986a_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
-                                         struct hda_codec *codec,
-                                         snd_pcm_substream_t *substream)
+/* bind volumes of both NID 0x05 and 0x06 */
+static int ad1981_hp_master_vol_put(struct snd_kcontrol *kcontrol,
+                                   struct snd_ctl_elem_value *ucontrol)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       return snd_hda_multi_out_dig_close(codec, &spec->multiout);
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       long *valp = ucontrol->value.integer.value;
+       int change;
+
+       change = snd_hda_codec_amp_update(codec, 0x05, 0, HDA_OUTPUT, 0,
+                                         0x7f, valp[0] & 0x7f);
+       change |= snd_hda_codec_amp_update(codec, 0x05, 1, HDA_OUTPUT, 0,
+                                          0x7f, valp[1] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x06, 0, HDA_OUTPUT, 0,
+                                0x7f, valp[0] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x06, 1, HDA_OUTPUT, 0,
+                                0x7f, valp[1] & 0x7f);
+       return change;
 }
 
-/*
- * Analog capture
- */
-static int ad1986a_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
-                                      struct hda_codec *codec,
-                                      unsigned int stream_tag,
-                                      unsigned int format,
-                                      snd_pcm_substream_t *substream)
+/* mute internal speaker if HP is plugged */
+static void ad1981_hp_automute(struct hda_codec *codec)
+{
+       unsigned int present;
+
+       present = snd_hda_codec_read(codec, 0x06, 0,
+                                    AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+       snd_hda_codec_amp_update(codec, 0x05, 0, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+       snd_hda_codec_amp_update(codec, 0x05, 1, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+}
+
+/* toggle input of built-in and mic jack appropriately */
+static void ad1981_hp_automic(struct hda_codec *codec)
 {
-       snd_hda_codec_setup_stream(codec, AD1986A_ADC, stream_tag, 0, format);
+       static struct hda_verb mic_jack_on[] = {
+               {0x1f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+               {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+               {}
+       };
+       static struct hda_verb mic_jack_off[] = {
+               {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+               {0x1f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+               {}
+       };
+       unsigned int present;
+
+       present = snd_hda_codec_read(codec, 0x08, 0,
+                                AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+       if (present)
+               snd_hda_sequence_write(codec, mic_jack_on);
+       else
+               snd_hda_sequence_write(codec, mic_jack_off);
+}
+
+/* unsolicited event for HP jack sensing */
+static void ad1981_hp_unsol_event(struct hda_codec *codec,
+                                 unsigned int res)
+{
+       res >>= 26;
+       switch (res) {
+       case AD1981_HP_EVENT:
+               ad1981_hp_automute(codec);
+               break;
+       case AD1981_MIC_EVENT:
+               ad1981_hp_automic(codec);
+               break;
+       }
+}
+
+static struct hda_input_mux ad1981_hp_capture_source = {
+       .num_items = 3,
+       .items = {
+               { "Mic", 0x0 },
+               { "Docking-Station", 0x1 },
+               { "Mix", 0x2 },
+       },
+};
+
+static struct snd_kcontrol_new ad1981_hp_mixers[] = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Volume",
+               .info = snd_hda_mixer_amp_volume_info,
+               .get = snd_hda_mixer_amp_volume_get,
+               .put = ad1981_hp_master_vol_put,
+               .private_value = HDA_COMPOSE_AMP_VAL(0x05, 3, 0, HDA_OUTPUT),
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Switch",
+               .info = ad198x_eapd_info,
+               .get = ad198x_eapd_get,
+               .put = ad1981_hp_master_sw_put,
+               .private_value = 0x05,
+       },
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x11, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x11, 0x0, HDA_OUTPUT),
+#if 0
+       /* FIXME: analog mic/line loopback doesn't work with my tests...
+        *        (although recording is OK)
+        */
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x12, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Docking-Station Playback Volume", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Docking-Station Playback Switch", 0x13, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Internal Mic Playback Volume", 0x1c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Internal Mic Playback Switch", 0x1c, 0x0, HDA_OUTPUT),
+       /* FIXME: does this laptop have analog CD connection? */
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x1d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x1d, 0x0, HDA_OUTPUT),
+#endif
+       HDA_CODEC_VOLUME("Mic Boost", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Internal Mic Boost", 0x18, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Capture Volume", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x15, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       { } /* end */
+};
+
+/* initialize jack-sensing, too */
+static int ad1981_hp_init(struct hda_codec *codec)
+{
+       ad198x_init(codec);
+       ad1981_hp_automute(codec);
+       ad1981_hp_automic(codec);
        return 0;
 }
 
-static int ad1986a_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
-                                      struct hda_codec *codec,
-                                      snd_pcm_substream_t *substream)
+/* models */
+enum { AD1981_BASIC, AD1981_HP };
+
+static struct hda_board_config ad1981_cfg_tbl[] = {
+       { .modelname = "hp", .config = AD1981_HP },
+       /* All HP models */
+       { .pci_subvendor = 0x103c, .config = AD1981_HP },
+       { .modelname = "basic", .config = AD1981_BASIC },
+       {}
+};
+
+static int patch_ad1981(struct hda_codec *codec)
 {
-       snd_hda_codec_setup_stream(codec, AD1986A_ADC, 0, 0, 0);
+       struct ad198x_spec *spec;
+       int board_config;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       mutex_init(&spec->amp_mutex);
+       codec->spec = spec;
+
+       spec->multiout.max_channels = 2;
+       spec->multiout.num_dacs = ARRAY_SIZE(ad1981_dac_nids);
+       spec->multiout.dac_nids = ad1981_dac_nids;
+       spec->multiout.dig_out_nid = AD1981_SPDIF_OUT;
+       spec->num_adc_nids = 1;
+       spec->adc_nids = ad1981_adc_nids;
+       spec->capsrc_nids = ad1981_capsrc_nids;
+       spec->input_mux = &ad1981_capture_source;
+       spec->num_mixers = 1;
+       spec->mixers[0] = ad1981_mixers;
+       spec->num_init_verbs = 1;
+       spec->init_verbs[0] = ad1981_init_verbs;
+       spec->spdif_route = 0;
+
+       codec->patch_ops = ad198x_patch_ops;
+
+       /* override some parameters */
+       board_config = snd_hda_check_board_config(codec, ad1981_cfg_tbl);
+       switch (board_config) {
+       case AD1981_HP:
+               spec->mixers[0] = ad1981_hp_mixers;
+               spec->num_init_verbs = 2;
+               spec->init_verbs[1] = ad1981_hp_init_verbs;
+               spec->multiout.dig_out_nid = 0;
+               spec->input_mux = &ad1981_hp_capture_source;
+
+               codec->patch_ops.init = ad1981_hp_init;
+               codec->patch_ops.unsol_event = ad1981_hp_unsol_event;
+               break;
+       }
+
        return 0;
 }
 
 
 /*
+ * AD1988
+ *
+ * Output pins and routes
+ *
+ *        Pin               Mix     Sel     DAC (*)
+ * port-A 0x11 (mute/hp) <- 0x22 <- 0x37 <- 03/04/06
+ * port-B 0x14 (mute/hp) <- 0x2b <- 0x30 <- 03/04/06
+ * port-C 0x15 (mute)    <- 0x2c <- 0x31 <- 05/0a
+ * port-D 0x12 (mute/hp) <- 0x29         <- 04
+ * port-E 0x17 (mute/hp) <- 0x26 <- 0x32 <- 05/0a
+ * port-F 0x16 (mute)    <- 0x2a         <- 06
+ * port-G 0x24 (mute)    <- 0x27         <- 05
+ * port-H 0x25 (mute)    <- 0x28         <- 0a
+ * mono   0x13 (mute/amp)<- 0x1e <- 0x36 <- 03/04/06
+ *
+ * DAC0 = 03h, DAC1 = 04h, DAC2 = 05h, DAC3 = 06h, DAC4 = 0ah
+ * (*) DAC2/3/4 are swapped to DAC3/4/2 on AD198A rev.2 due to a h/w bug.
+ *
+ * Input pins and routes
+ *
+ *        pin     boost   mix input # / adc input #
+ * port-A 0x11 -> 0x38 -> mix 2, ADC 0
+ * port-B 0x14 -> 0x39 -> mix 0, ADC 1
+ * port-C 0x15 -> 0x3a -> 33:0 - mix 1, ADC 2
+ * port-D 0x12 -> 0x3d -> mix 3, ADC 8
+ * port-E 0x17 -> 0x3c -> 34:0 - mix 4, ADC 4
+ * port-F 0x16 -> 0x3b -> mix 5, ADC 3
+ * port-G 0x24 -> N/A  -> 33:1 - mix 1, 34:1 - mix 4, ADC 6
+ * port-H 0x25 -> N/A  -> 33:2 - mix 1, 34:2 - mix 4, ADC 7
+ *
+ *
+ * DAC assignment
+ *   6stack - front/surr/CLFE/side/opt DACs - 04/06/05/0a/03
+ *   3stack - front/surr/CLFE/opt DACs - 04/05/0a/03
+ *
+ * Inputs of Analog Mix (0x20)
+ *   0:Port-B (front mic)
+ *   1:Port-C/G/H (line-in)
+ *   2:Port-A
+ *   3:Port-D (line-in/2)
+ *   4:Port-E/G/H (mic-in)
+ *   5:Port-F (mic2-in)
+ *   6:CD
+ *   7:Beep
+ *
+ * ADC selection
+ *   0:Port-A
+ *   1:Port-B (front mic-in)
+ *   2:Port-C (line-in)
+ *   3:Port-F (mic2-in)
+ *   4:Port-E (mic-in)
+ *   5:CD
+ *   6:Port-G
+ *   7:Port-H
+ *   8:Port-D (line-in/2)
+ *   9:Mix
+ *
+ * Proposed pin assignments by the datasheet
+ *
+ * 6-stack
+ * Port-A front headphone
+ *      B front mic-in
+ *      C rear line-in
+ *      D rear front-out
+ *      E rear mic-in
+ *      F rear surround
+ *      G rear CLFE
+ *      H rear side
+ *
+ * 3-stack
+ * Port-A front headphone
+ *      B front mic
+ *      C rear line-in/surround
+ *      D rear front-out
+ *      E rear mic-in/CLFE
+ *
+ * laptop
+ * Port-A headphone
+ *      B mic-in
+ *      C docking station
+ *      D internal speaker (with EAPD)
+ *      E/F quad mic array
+ */
+
+
+/* models */
+enum {
+       AD1988_6STACK,
+       AD1988_6STACK_DIG,
+       AD1988_3STACK,
+       AD1988_3STACK_DIG,
+       AD1988_LAPTOP,
+       AD1988_LAPTOP_DIG,
+       AD1988_AUTO,
+       AD1988_MODEL_LAST,
+};
+
+/* reivision id to check workarounds */
+#define AD1988A_REV2           0x100200
+
+#define is_rev2(codec) \
+       ((codec)->vendor_id == 0x11d41988 && \
+        (codec)->revision_id == AD1988A_REV2)
+
+/*
+ * mixers
  */
-static struct hda_pcm_stream ad1986a_pcm_analog_playback = {
-       .substreams = 1,
-       .channels_min = 2,
-       .channels_max = 6,
-       .nid = AD1986A_FRONT_DAC, /* NID to query formats and rates */
-       .ops = {
-               .open = ad1986a_playback_pcm_open,
-               .prepare = ad1986a_playback_pcm_prepare,
-               .cleanup = ad1986a_playback_pcm_cleanup
+
+static hda_nid_t ad1988_6stack_dac_nids[4] = {
+       0x04, 0x06, 0x05, 0x0a
+};
+
+static hda_nid_t ad1988_3stack_dac_nids[3] = {
+       0x04, 0x05, 0x0a
+};
+
+/* for AD1988A revision-2, DAC2-4 are swapped */
+static hda_nid_t ad1988_6stack_dac_nids_rev2[4] = {
+       0x04, 0x05, 0x0a, 0x06
+};
+
+static hda_nid_t ad1988_3stack_dac_nids_rev2[3] = {
+       0x04, 0x0a, 0x06
+};
+
+static hda_nid_t ad1988_adc_nids[3] = {
+       0x08, 0x09, 0x0f
+};
+
+static hda_nid_t ad1988_capsrc_nids[3] = {
+       0x0c, 0x0d, 0x0e
+};
+
+#define AD1988_SPDIF_OUT       0x02
+#define AD1988_SPDIF_IN                0x07
+
+static struct hda_input_mux ad1988_6stack_capture_source = {
+       .num_items = 5,
+       .items = {
+               { "Front Mic", 0x0 },
+               { "Line", 0x1 },
+               { "Mic", 0x4 },
+               { "CD", 0x5 },
+               { "Mix", 0x9 },
        },
 };
 
-static struct hda_pcm_stream ad1986a_pcm_analog_capture = {
-       .substreams = 2,
-       .channels_min = 2,
-       .channels_max = 2,
-       .nid = AD1986A_ADC, /* NID to query formats and rates */
-       .ops = {
-               .prepare = ad1986a_capture_pcm_prepare,
-               .cleanup = ad1986a_capture_pcm_cleanup
+static struct hda_input_mux ad1988_laptop_capture_source = {
+       .num_items = 3,
+       .items = {
+               { "Mic/Line", 0x0 },
+               { "CD", 0x5 },
+               { "Mix", 0x9 },
        },
 };
 
-static struct hda_pcm_stream ad1986a_pcm_digital_playback = {
-       .substreams = 1,
-       .channels_min = 2,
-       .channels_max = 2,
-       .nid = AD1986A_SPDIF_OUT, 
-       .ops = {
-               .open = ad1986a_dig_playback_pcm_open,
-               .close = ad1986a_dig_playback_pcm_close
+/*
+ */
+static int ad198x_ch_mode_info(struct snd_kcontrol *kcontrol,
+                              struct snd_ctl_elem_info *uinfo)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_ch_mode_info(codec, uinfo, spec->channel_mode,
+                                   spec->num_channel_mode);
+}
+
+static int ad198x_ch_mode_get(struct snd_kcontrol *kcontrol,
+                             struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       return snd_hda_ch_mode_get(codec, ucontrol, spec->channel_mode,
+                                  spec->num_channel_mode, spec->multiout.max_channels);
+}
+
+static int ad198x_ch_mode_put(struct snd_kcontrol *kcontrol,
+                             struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct ad198x_spec *spec = codec->spec;
+       if (spec->need_dac_fix)
+               spec->multiout.num_dacs = spec->multiout.max_channels / 2;
+       return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+                                  spec->num_channel_mode, &spec->multiout.max_channels);
+}
+
+/* 6-stack mode */
+static struct snd_kcontrol_new ad1988_6stack_mixers1[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x04, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x05, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x05, 2, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Side Playback Volume", 0x0a, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new ad1988_6stack_mixers1_rev2[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x04, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x05, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0a, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0a, 2, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Side Playback Volume", 0x06, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new ad1988_6stack_mixers2[] = {
+       HDA_BIND_MUTE("Front Playback Switch", 0x29, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x2a, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x27, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x27, 2, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Side Playback Switch", 0x28, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x22, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Mono Playback Switch", 0x1e, 2, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x20, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x20, 0x1, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x20, 0x4, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x20, 0x4, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x10, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x10, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Analog Mix Playback Volume", 0x21, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Analog Mix Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Front Mic Boost", 0x39, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Boost", 0x3c, 0x0, HDA_OUTPUT),
+
+       { } /* end */
+};
+
+/* 3-stack mode */
+static struct snd_kcontrol_new ad1988_3stack_mixers1[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x04, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0a, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x05, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x05, 2, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new ad1988_3stack_mixers1_rev2[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x04, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0a, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x06, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x06, 2, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new ad1988_3stack_mixers2[] = {
+       HDA_BIND_MUTE("Front Playback Switch", 0x29, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x2c, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x26, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x26, 2, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x22, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Mono Playback Switch", 0x1e, 2, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x20, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x20, 0x1, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x20, 0x4, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x20, 0x4, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x10, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x10, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Analog Mix Playback Volume", 0x21, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Analog Mix Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Front Mic Boost", 0x39, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Mic Boost", 0x3c, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = ad198x_ch_mode_info,
+               .get = ad198x_ch_mode_get,
+               .put = ad198x_ch_mode_put,
        },
+
+       { } /* end */
+};
+
+/* laptop mode */
+static struct snd_kcontrol_new ad1988_laptop_mixers[] = {
+       HDA_CODEC_VOLUME("PCM Playback Volume", 0x04, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("PCM Playback Switch", 0x29, 0x0, HDA_INPUT),
+       HDA_BIND_MUTE("Mono Playback Switch", 0x1e, 2, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x20, 0x6, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x20, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x20, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x20, 0x1, HDA_INPUT),
+
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x10, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x10, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Analog Mix Playback Volume", 0x21, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Analog Mix Playback Switch", 0x21, 0x0, HDA_OUTPUT),
+
+       HDA_CODEC_VOLUME("Mic Boost", 0x39, 0x0, HDA_OUTPUT),
+
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "External Amplifier",
+               .info = ad198x_eapd_info,
+               .get = ad198x_eapd_get,
+               .put = ad198x_eapd_put,
+               .private_value = 0x12 | (1 << 8), /* port-D, inversed */
+       },
+
+       { } /* end */
+};
+
+/* capture */
+static struct snd_kcontrol_new ad1988_capture_mixers[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x0d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x0d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x0e, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x0e, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 3,
+               .info = ad198x_mux_enum_info,
+               .get = ad198x_mux_enum_get,
+               .put = ad198x_mux_enum_put,
+       },
+       { } /* end */
 };
 
-static int ad1986a_build_pcms(struct hda_codec *codec)
+static int ad1988_spdif_playback_source_info(struct snd_kcontrol *kcontrol,
+                                            struct snd_ctl_elem_info *uinfo)
 {
-       struct ad1986a_spec *spec = codec->spec;
-       struct hda_pcm *info = spec->pcm_rec;
+       static char *texts[] = {
+               "PCM", "ADC1", "ADC2", "ADC3"
+       };
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 4;
+       if (uinfo->value.enumerated.item >= 4)
+               uinfo->value.enumerated.item = 3;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+       return 0;
+}
 
-       codec->num_pcms = 2;
-       codec->pcm_info = info;
+static int ad1988_spdif_playback_source_get(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       unsigned int sel;
 
-       info->name = "AD1986A Analog";
-       info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad1986a_pcm_analog_playback;
-       info->stream[SNDRV_PCM_STREAM_CAPTURE] = ad1986a_pcm_analog_capture;
-       info++;
+       sel = snd_hda_codec_read(codec, 0x02, 0, AC_VERB_GET_CONNECT_SEL, 0);
+       if (sel > 0) {
+               sel = snd_hda_codec_read(codec, 0x0b, 0, AC_VERB_GET_CONNECT_SEL, 0);
+               if (sel <= 3)
+                       sel++;
+               else
+                       sel = 0;
+       }
+       ucontrol->value.enumerated.item[0] = sel;
+       return 0;
+}
 
-       info->name = "AD1986A Digital";
-       info->stream[SNDRV_PCM_STREAM_PLAYBACK] = ad1986a_pcm_digital_playback;
+static int ad1988_spdif_playback_source_put(struct snd_kcontrol *kcontrol,
+                                           struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       unsigned int sel;
+       int change;
+
+       sel = snd_hda_codec_read(codec, 0x02, 0, AC_VERB_GET_CONNECT_SEL, 0);
+       if (! ucontrol->value.enumerated.item[0]) {
+               change = sel != 0;
+               if (change)
+                       snd_hda_codec_write(codec, 0x02, 0, AC_VERB_SET_CONNECT_SEL, 0);
+       } else {
+               change = sel == 0;
+               if (change)
+                       snd_hda_codec_write(codec, 0x02, 0, AC_VERB_SET_CONNECT_SEL, 1);
+               sel = snd_hda_codec_read(codec, 0x0b, 0, AC_VERB_GET_CONNECT_SEL, 0) + 1;
+               change |= sel == ucontrol->value.enumerated.item[0];
+               if (change)
+                       snd_hda_codec_write(codec, 0x02, 0, AC_VERB_SET_CONNECT_SEL,
+                                           ucontrol->value.enumerated.item[0] - 1);
+       }
+       return change;
+}
+
+static struct snd_kcontrol_new ad1988_spdif_out_mixers[] = {
+       HDA_CODEC_VOLUME("IEC958 Playback Volume", 0x1b, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "IEC958 Playback Source",
+               .info = ad1988_spdif_playback_source_info,
+               .get = ad1988_spdif_playback_source_get,
+               .put = ad1988_spdif_playback_source_put,
+       },
+       { } /* end */
+};
+
+static struct snd_kcontrol_new ad1988_spdif_in_mixers[] = {
+       HDA_CODEC_VOLUME("IEC958 Capture Volume", 0x1c, 0x0, HDA_INPUT),
+       { } /* end */
+};
+
+
+/*
+ * initialization verbs
+ */
+
+/*
+ * for 6-stack (+dig)
+ */
+static struct hda_verb ad1988_6stack_init_verbs[] = {
+       /* Front, Surround, CLFE, side DAC; unmute as default */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Port-A front headphon path */
+       {0x37, AC_VERB_SET_CONNECT_SEL, 0x01}, /* DAC1:04h */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* Port-D line-out path */
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Port-F surround path */
+       {0x2a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x2a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Port-G CLFE path */
+       {0x27, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x27, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x24, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Port-H side path */
+       {0x28, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x28, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x25, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x25, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Mono out path */
+       {0x36, AC_VERB_SET_CONNECT_SEL, 0x1}, /* DAC1:04h */
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, 0xb01f}, /* unmute, 0dB */
+       /* Port-B front mic-in path */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x39, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* Port-C line-in path */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x3a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x33, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Port-E mic-in path */
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x3c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x34, AC_VERB_SET_CONNECT_SEL, 0x0},
+
+       { }
+};
+
+static struct hda_verb ad1988_capture_init_verbs[] = {
+       /* mute analog mix */
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)},
+       /* select ADCs - front-mic */
+       {0x0c, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0e, AC_VERB_SET_CONNECT_SEL, 0x1},
+       /* ADCs; muted */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+
+       { }
+};
+
+static struct hda_verb ad1988_spdif_init_verbs[] = {
+       /* SPDIF out sel */
+       {0x02, AC_VERB_SET_CONNECT_SEL, 0x0}, /* PCM */
+       {0x0b, AC_VERB_SET_CONNECT_SEL, 0x0}, /* ADC1 */
+       {0x1d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x1d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       /* SPDIF out pin */
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE | 0x27}, /* 0dB */
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0) | 0x17}, /* 0dB */
+
+       { }
+};
 
+/*
+ * verbs for 3stack (+dig)
+ */
+static struct hda_verb ad1988_3stack_ch2_init[] = {
+       /* set port-C to line-in */
+       { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+       /* set port-E to mic-in */
+       { 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+       { } /* end */
+};
+
+static struct hda_verb ad1988_3stack_ch6_init[] = {
+       /* set port-C to surround out */
+       { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       /* set port-E to CLFE out */
+       { 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       { } /* end */
+};
+
+static struct hda_channel_mode ad1988_3stack_modes[2] = {
+       { 2, ad1988_3stack_ch2_init },
+       { 6, ad1988_3stack_ch6_init },
+};
+
+static struct hda_verb ad1988_3stack_init_verbs[] = {
+       /* Front, Surround, CLFE, side DAC; unmute as default */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Port-A front headphon path */
+       {0x37, AC_VERB_SET_CONNECT_SEL, 0x01}, /* DAC1:04h */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* Port-D line-out path */
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Mono out path */
+       {0x36, AC_VERB_SET_CONNECT_SEL, 0x1}, /* DAC1:04h */
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, 0xb01f}, /* unmute, 0dB */
+       /* Port-B front mic-in path */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x39, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* Port-C line-in/surround path - 6ch mode as default */
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x3a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x31, AC_VERB_SET_CONNECT_SEL, 0x0}, /* output sel: DAC 0x05 */
+       {0x33, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* Port-E mic-in/CLFE path - 6ch mode as default */
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x3c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x32, AC_VERB_SET_CONNECT_SEL, 0x1}, /* output sel: DAC 0x0a */
+       {0x34, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* mute analog mix */
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)},
+       /* select ADCs - front-mic */
+       {0x0c, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0e, AC_VERB_SET_CONNECT_SEL, 0x1},
+       /* ADCs; muted */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       { }
+};
+
+/*
+ * verbs for laptop mode (+dig)
+ */
+static struct hda_verb ad1988_laptop_hp_on[] = {
+       /* unmute port-A and mute port-D */
+       { 0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       { 0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { } /* end */
+};
+static struct hda_verb ad1988_laptop_hp_off[] = {
+       /* mute port-A and unmute port-D */
+       { 0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { 0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       { } /* end */
+};
+
+#define AD1988_HP_EVENT        0x01
+
+static struct hda_verb ad1988_laptop_init_verbs[] = {
+       /* Front, Surround, CLFE, side DAC; unmute as default */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Port-A front headphon path */
+       {0x37, AC_VERB_SET_CONNECT_SEL, 0x01}, /* DAC1:04h */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* unsolicited event for pin-sense */
+       {0x11, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | AD1988_HP_EVENT },
+       /* Port-D line-out path + EAPD */
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x29, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x12, AC_VERB_SET_EAPD_BTLENABLE, 0x00}, /* EAPD-off */
+       /* Mono out path */
+       {0x36, AC_VERB_SET_CONNECT_SEL, 0x1}, /* DAC1:04h */
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x1e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, 0xb01f}, /* unmute, 0dB */
+       /* Port-B mic-in path */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x39, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* Port-C docking station - try to output */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x3a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x33, AC_VERB_SET_CONNECT_SEL, 0x0},
+       /* mute analog mix */
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)},
+       {0x20, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)},
+       /* select ADCs - mic */
+       {0x0c, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0x1},
+       {0x0e, AC_VERB_SET_CONNECT_SEL, 0x1},
+       /* ADCs; muted */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       { }
+};
+
+static void ad1988_laptop_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+       if ((res >> 26) != AD1988_HP_EVENT)
+               return;
+       if (snd_hda_codec_read(codec, 0x11, 0, AC_VERB_GET_PIN_SENSE, 0) & (1 << 31))
+               snd_hda_sequence_write(codec, ad1988_laptop_hp_on);
+       else
+               snd_hda_sequence_write(codec, ad1988_laptop_hp_off);
+} 
+
+
+/*
+ * Automatic parse of I/O pins from the BIOS configuration
+ */
+
+#define NUM_CONTROL_ALLOC      32
+#define NUM_VERB_ALLOC         32
+
+enum {
+       AD_CTL_WIDGET_VOL,
+       AD_CTL_WIDGET_MUTE,
+       AD_CTL_BIND_MUTE,
+};
+static struct snd_kcontrol_new ad1988_control_templates[] = {
+       HDA_CODEC_VOLUME(NULL, 0, 0, 0),
+       HDA_CODEC_MUTE(NULL, 0, 0, 0),
+       HDA_BIND_MUTE(NULL, 0, 0, 0),
+};
+
+/* add dynamic controls */
+static int add_control(struct ad198x_spec *spec, int type, const char *name,
+                      unsigned long val)
+{
+       struct snd_kcontrol_new *knew;
+
+       if (spec->num_kctl_used >= spec->num_kctl_alloc) {
+               int num = spec->num_kctl_alloc + NUM_CONTROL_ALLOC;
+
+               knew = kcalloc(num + 1, sizeof(*knew), GFP_KERNEL); /* array + terminator */
+               if (! knew)
+                       return -ENOMEM;
+               if (spec->kctl_alloc) {
+                       memcpy(knew, spec->kctl_alloc, sizeof(*knew) * spec->num_kctl_alloc);
+                       kfree(spec->kctl_alloc);
+               }
+               spec->kctl_alloc = knew;
+               spec->num_kctl_alloc = num;
+       }
+
+       knew = &spec->kctl_alloc[spec->num_kctl_used];
+       *knew = ad1988_control_templates[type];
+       knew->name = kstrdup(name, GFP_KERNEL);
+       if (! knew->name)
+               return -ENOMEM;
+       knew->private_value = val;
+       spec->num_kctl_used++;
        return 0;
 }
 
-static void ad1986a_free(struct hda_codec *codec)
+#define AD1988_PIN_CD_NID              0x18
+#define AD1988_PIN_BEEP_NID            0x10
+
+static hda_nid_t ad1988_mixer_nids[8] = {
+       /* A     B     C     D     E     F     G     H */
+       0x22, 0x2b, 0x2c, 0x29, 0x26, 0x2a, 0x27, 0x28
+};
+
+static inline hda_nid_t ad1988_idx_to_dac(struct hda_codec *codec, int idx)
 {
-       kfree(codec->spec);
+       static hda_nid_t idx_to_dac[8] = {
+               /* A     B     C     D     E     F     G     H */
+               0x04, 0x06, 0x05, 0x04, 0x0a, 0x06, 0x05, 0x0a
+       };
+       static hda_nid_t idx_to_dac_rev2[8] = {
+               /* A     B     C     D     E     F     G     H */
+               0x04, 0x05, 0x0a, 0x04, 0x06, 0x05, 0x0a, 0x06
+       };
+       if (is_rev2(codec))
+               return idx_to_dac_rev2[idx];
+       else
+               return idx_to_dac[idx];
 }
 
-#ifdef CONFIG_PM
-static int ad1986a_resume(struct hda_codec *codec)
+static hda_nid_t ad1988_boost_nids[8] = {
+       0x38, 0x39, 0x3a, 0x3d, 0x3c, 0x3b, 0, 0
+};
+
+static int ad1988_pin_idx(hda_nid_t nid)
 {
-       ad1986a_init(codec);
-       snd_hda_resume_ctls(codec, ad1986a_mixers);
-       snd_hda_resume_spdif_out(codec);
+       static hda_nid_t ad1988_io_pins[8] = {
+               0x11, 0x14, 0x15, 0x12, 0x17, 0x16, 0x24, 0x25
+       };
+       int i;
+       for (i = 0; i < ARRAY_SIZE(ad1988_io_pins); i++)
+               if (ad1988_io_pins[i] == nid)
+                       return i;
+       return 0; /* should be -1 */
+}
+
+static int ad1988_pin_to_loopback_idx(hda_nid_t nid)
+{
+       static int loopback_idx[8] = {
+               2, 0, 1, 3, 4, 5, 1, 4
+       };
+       switch (nid) {
+       case AD1988_PIN_CD_NID:
+               return 6;
+       default:
+               return loopback_idx[ad1988_pin_idx(nid)];
+       }
+}
+
+static int ad1988_pin_to_adc_idx(hda_nid_t nid)
+{
+       static int adc_idx[8] = {
+               0, 1, 2, 8, 4, 3, 6, 7
+       };
+       switch (nid) {
+       case AD1988_PIN_CD_NID:
+               return 5;
+       default:
+               return adc_idx[ad1988_pin_idx(nid)];
+       }
+}
+
+/* fill in the dac_nids table from the parsed pin configuration */
+static int ad1988_auto_fill_dac_nids(struct hda_codec *codec,
+                                    const struct auto_pin_cfg *cfg)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int i, idx;
+
+       spec->multiout.dac_nids = spec->private_dac_nids;
+
+       /* check the pins hardwired to audio widget */
+       for (i = 0; i < cfg->line_outs; i++) {
+               idx = ad1988_pin_idx(cfg->line_out_pins[i]);
+               spec->multiout.dac_nids[i] = ad1988_idx_to_dac(codec, idx);
+       }
+       spec->multiout.num_dacs = cfg->line_outs;
        return 0;
 }
-#endif
 
-static struct hda_codec_ops ad1986a_patch_ops = {
-       .build_controls = ad1986a_build_controls,
-       .build_pcms = ad1986a_build_pcms,
-       .init = ad1986a_init,
-       .free = ad1986a_free,
-#ifdef CONFIG_PM
-       .resume = ad1986a_resume,
-#endif
+/* add playback controls from the parsed DAC table */
+static int ad1988_auto_create_multi_out_ctls(struct ad198x_spec *spec,
+                                            const struct auto_pin_cfg *cfg)
+{
+       char name[32];
+       static const char *chname[4] = { "Front", "Surround", NULL /*CLFE*/, "Side" };
+       hda_nid_t nid;
+       int i, err;
+
+       for (i = 0; i < cfg->line_outs; i++) {
+               hda_nid_t dac = spec->multiout.dac_nids[i];
+               if (! dac)
+                       continue;
+               nid = ad1988_mixer_nids[ad1988_pin_idx(cfg->line_out_pins[i])];
+               if (i == 2) {
+                       /* Center/LFE */
+                       err = add_control(spec, AD_CTL_WIDGET_VOL,
+                                         "Center Playback Volume",
+                                         HDA_COMPOSE_AMP_VAL(dac, 1, 0, HDA_OUTPUT));
+                       if (err < 0)
+                               return err;
+                       err = add_control(spec, AD_CTL_WIDGET_VOL,
+                                         "LFE Playback Volume",
+                                         HDA_COMPOSE_AMP_VAL(dac, 2, 0, HDA_OUTPUT));
+                       if (err < 0)
+                               return err;
+                       err = add_control(spec, AD_CTL_BIND_MUTE,
+                                         "Center Playback Switch",
+                                         HDA_COMPOSE_AMP_VAL(nid, 1, 2, HDA_INPUT));
+                       if (err < 0)
+                               return err;
+                       err = add_control(spec, AD_CTL_BIND_MUTE,
+                                         "LFE Playback Switch",
+                                         HDA_COMPOSE_AMP_VAL(nid, 2, 2, HDA_INPUT));
+                       if (err < 0)
+                               return err;
+               } else {
+                       sprintf(name, "%s Playback Volume", chname[i]);
+                       err = add_control(spec, AD_CTL_WIDGET_VOL, name,
+                                         HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT));
+                       if (err < 0)
+                               return err;
+                       sprintf(name, "%s Playback Switch", chname[i]);
+                       err = add_control(spec, AD_CTL_BIND_MUTE, name,
+                                         HDA_COMPOSE_AMP_VAL(nid, 3, 2, HDA_INPUT));
+                       if (err < 0)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+/* add playback controls for speaker and HP outputs */
+static int ad1988_auto_create_extra_out(struct hda_codec *codec, hda_nid_t pin,
+                                       const char *pfx)
+{
+       struct ad198x_spec *spec = codec->spec;
+       hda_nid_t nid;
+       int idx, err;
+       char name[32];
+
+       if (! pin)
+               return 0;
+
+       idx = ad1988_pin_idx(pin);
+       nid = ad1988_idx_to_dac(codec, idx);
+       /* specify the DAC as the extra output */
+       if (! spec->multiout.hp_nid)
+               spec->multiout.hp_nid = nid;
+       else
+               spec->multiout.extra_out_nid[0] = nid;
+       /* control HP volume/switch on the output mixer amp */
+       sprintf(name, "%s Playback Volume", pfx);
+       if ((err = add_control(spec, AD_CTL_WIDGET_VOL, name,
+                              HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+               return err;
+       nid = ad1988_mixer_nids[idx];
+       sprintf(name, "%s Playback Switch", pfx);
+       if ((err = add_control(spec, AD_CTL_BIND_MUTE, name,
+                              HDA_COMPOSE_AMP_VAL(nid, 3, 2, HDA_INPUT))) < 0)
+               return err;
+       return 0;
+}
+
+/* create input playback/capture controls for the given pin */
+static int new_analog_input(struct ad198x_spec *spec, hda_nid_t pin,
+                           const char *ctlname, int boost)
+{
+       char name[32];
+       int err, idx;
+
+       sprintf(name, "%s Playback Volume", ctlname);
+       idx = ad1988_pin_to_loopback_idx(pin);
+       if ((err = add_control(spec, AD_CTL_WIDGET_VOL, name,
+                              HDA_COMPOSE_AMP_VAL(0x20, 3, idx, HDA_INPUT))) < 0)
+               return err;
+       sprintf(name, "%s Playback Switch", ctlname);
+       if ((err = add_control(spec, AD_CTL_WIDGET_MUTE, name,
+                              HDA_COMPOSE_AMP_VAL(0x20, 3, idx, HDA_INPUT))) < 0)
+               return err;
+       if (boost) {
+               hda_nid_t bnid;
+               idx = ad1988_pin_idx(pin);
+               bnid = ad1988_boost_nids[idx];
+               if (bnid) {
+                       sprintf(name, "%s Boost", ctlname);
+                       return add_control(spec, AD_CTL_WIDGET_VOL, name,
+                                          HDA_COMPOSE_AMP_VAL(bnid, 3, idx, HDA_OUTPUT));
+
+               }
+       }
+       return 0;
+}
+
+/* create playback/capture controls for input pins */
+static int ad1988_auto_create_analog_input_ctls(struct ad198x_spec *spec,
+                                               const struct auto_pin_cfg *cfg)
+{
+       struct hda_input_mux *imux = &spec->private_imux;
+       int i, err;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               err = new_analog_input(spec, cfg->input_pins[i],
+                                      auto_pin_cfg_labels[i],
+                                      i <= AUTO_PIN_FRONT_MIC);
+               if (err < 0)
+                       return err;
+               imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+               imux->items[imux->num_items].index = ad1988_pin_to_adc_idx(cfg->input_pins[i]);
+               imux->num_items++;
+       }
+       imux->items[imux->num_items].label = "Mix";
+       imux->items[imux->num_items].index = 9;
+       imux->num_items++;
+
+       if ((err = add_control(spec, AD_CTL_WIDGET_VOL,
+                              "Analog Mix Playback Volume",
+                              HDA_COMPOSE_AMP_VAL(0x21, 3, 0x0, HDA_OUTPUT))) < 0)
+               return err;
+       if ((err = add_control(spec, AD_CTL_WIDGET_MUTE,
+                              "Analog Mix Playback Switch",
+                              HDA_COMPOSE_AMP_VAL(0x21, 3, 0x0, HDA_OUTPUT))) < 0)
+               return err;
+
+       return 0;
+}
+
+static void ad1988_auto_set_output_and_unmute(struct hda_codec *codec,
+                                             hda_nid_t nid, int pin_type,
+                                             int dac_idx)
+{
+       /* set as output */
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_type);
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+       switch (nid) {
+       case 0x11: /* port-A - DAC 04 */
+               snd_hda_codec_write(codec, 0x37, 0, AC_VERB_SET_CONNECT_SEL, 0x01);
+               break;
+       case 0x14: /* port-B - DAC 06 */
+               snd_hda_codec_write(codec, 0x30, 0, AC_VERB_SET_CONNECT_SEL, 0x02);
+               break;
+       case 0x15: /* port-C - DAC 05 */
+               snd_hda_codec_write(codec, 0x31, 0, AC_VERB_SET_CONNECT_SEL, 0x00);
+               break;
+       case 0x17: /* port-E - DAC 0a */
+               snd_hda_codec_write(codec, 0x32, 0, AC_VERB_SET_CONNECT_SEL, 0x01);
+               break;
+       case 0x13: /* mono - DAC 04 */
+               snd_hda_codec_write(codec, 0x36, 0, AC_VERB_SET_CONNECT_SEL, 0x01);
+               break;
+       }
+}
+
+static void ad1988_auto_init_multi_out(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < spec->autocfg.line_outs; i++) {
+               hda_nid_t nid = spec->autocfg.line_out_pins[i];
+               ad1988_auto_set_output_and_unmute(codec, nid, PIN_OUT, i);
+       }
+}
+
+static void ad1988_auto_init_extra_out(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       hda_nid_t pin;
+
+       pin = spec->autocfg.speaker_pins[0];
+       if (pin) /* connect to front */
+               ad1988_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
+       pin = spec->autocfg.hp_pin;
+       if (pin) /* connect to front */
+               ad1988_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
+}
+
+static void ad1988_auto_init_analog_input(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int i, idx;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               hda_nid_t nid = spec->autocfg.input_pins[i];
+               if (! nid)
+                       continue;
+               switch (nid) {
+               case 0x15: /* port-C */
+                       snd_hda_codec_write(codec, 0x33, 0, AC_VERB_SET_CONNECT_SEL, 0x0);
+                       break;
+               case 0x17: /* port-E */
+                       snd_hda_codec_write(codec, 0x34, 0, AC_VERB_SET_CONNECT_SEL, 0x0);
+                       break;
+               }
+               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                   i <= AUTO_PIN_FRONT_MIC ? PIN_VREF80 : PIN_IN);
+               if (nid != AD1988_PIN_CD_NID)
+                       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+                                           AMP_OUT_MUTE);
+               idx = ad1988_pin_idx(nid);
+               if (ad1988_boost_nids[idx])
+                       snd_hda_codec_write(codec, ad1988_boost_nids[idx], 0,
+                                           AC_VERB_SET_AMP_GAIN_MUTE,
+                                           AMP_OUT_ZERO);
+       }
+}
+
+/* parse the BIOS configuration and set up the alc_spec */
+/* return 1 if successful, 0 if the proper config is not found, or a negative error code */
+static int ad1988_parse_auto_config(struct hda_codec *codec)
+{
+       struct ad198x_spec *spec = codec->spec;
+       int err;
+
+       if ((err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL)) < 0)
+               return err;
+       if ((err = ad1988_auto_fill_dac_nids(codec, &spec->autocfg)) < 0)
+               return err;
+       if (! spec->autocfg.line_outs)
+               return 0; /* can't find valid BIOS pin config */
+       if ((err = ad1988_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
+           (err = ad1988_auto_create_extra_out(codec,
+                                               spec->autocfg.speaker_pins[0],
+                                               "Speaker")) < 0 ||
+           (err = ad1988_auto_create_extra_out(codec, spec->autocfg.hp_pin,
+                                               "Headphone")) < 0 ||
+           (err = ad1988_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+
+       spec->multiout.max_channels = spec->multiout.num_dacs * 2;
+
+       if (spec->autocfg.dig_out_pin)
+               spec->multiout.dig_out_nid = AD1988_SPDIF_OUT;
+       if (spec->autocfg.dig_in_pin)
+               spec->dig_in_nid = AD1988_SPDIF_IN;
+
+       if (spec->kctl_alloc)
+               spec->mixers[spec->num_mixers++] = spec->kctl_alloc;
+
+       spec->init_verbs[spec->num_init_verbs++] = ad1988_6stack_init_verbs;
+
+       spec->input_mux = &spec->private_imux;
+
+       return 1;
+}
+
+/* init callback for auto-configuration model -- overriding the default init */
+static int ad1988_auto_init(struct hda_codec *codec)
+{
+       ad198x_init(codec);
+       ad1988_auto_init_multi_out(codec);
+       ad1988_auto_init_extra_out(codec);
+       ad1988_auto_init_analog_input(codec);
+       return 0;
+}
+
+
+/*
+ */
+
+static struct hda_board_config ad1988_cfg_tbl[] = {
+       { .modelname = "6stack",        .config = AD1988_6STACK },
+       { .modelname = "6stack-dig",    .config = AD1988_6STACK_DIG },
+       { .modelname = "3stack",        .config = AD1988_3STACK },
+       { .modelname = "3stack-dig",    .config = AD1988_3STACK_DIG },
+       { .modelname = "laptop",        .config = AD1988_LAPTOP },
+       { .modelname = "laptop-dig",    .config = AD1988_LAPTOP_DIG },
+       { .modelname = "auto",          .config = AD1988_AUTO },
+       {}
 };
 
-static int patch_ad1986a(struct hda_codec *codec)
+static int patch_ad1988(struct hda_codec *codec)
 {
-       struct ad1986a_spec *spec;
+       struct ad198x_spec *spec;
+       int board_config;
 
-       spec = kcalloc(1, sizeof(*spec), GFP_KERNEL);
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
        if (spec == NULL)
                return -ENOMEM;
 
-       init_MUTEX(&spec->amp_mutex);
+       mutex_init(&spec->amp_mutex);
        codec->spec = spec;
 
-       spec->multiout.max_channels = 6;
-       spec->multiout.num_dacs = ARRAY_SIZE(ad1986a_dac_nids);
-       spec->multiout.dac_nids = ad1986a_dac_nids;
-       spec->multiout.dig_out_nid = AD1986A_SPDIF_OUT;
+       if (is_rev2(codec))
+               snd_printk(KERN_INFO "patch_analog: AD1988A rev.2 is detected, enable workarounds\n");
+
+       board_config = snd_hda_check_board_config(codec, ad1988_cfg_tbl);
+       if (board_config < 0 || board_config >= AD1988_MODEL_LAST) {
+               printk(KERN_INFO "hda_codec: Unknown model for AD1988, trying auto-probe from BIOS...\n");
+               board_config = AD1988_AUTO;
+       }
 
-       codec->patch_ops = ad1986a_patch_ops;
+       if (board_config == AD1988_AUTO) {
+               /* automatic parse from the BIOS config */
+               int err = ad1988_parse_auto_config(codec);
+               if (err < 0) {
+                       ad198x_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using 6-stack mode...\n");
+                       board_config = AD1988_6STACK;
+               }
+       }
+
+       switch (board_config) {
+       case AD1988_6STACK:
+       case AD1988_6STACK_DIG:
+               spec->multiout.max_channels = 8;
+               spec->multiout.num_dacs = 4;
+               if (is_rev2(codec))
+                       spec->multiout.dac_nids = ad1988_6stack_dac_nids_rev2;
+               else
+                       spec->multiout.dac_nids = ad1988_6stack_dac_nids;
+               spec->input_mux = &ad1988_6stack_capture_source;
+               spec->num_mixers = 2;
+               if (is_rev2(codec))
+                       spec->mixers[0] = ad1988_6stack_mixers1_rev2;
+               else
+                       spec->mixers[0] = ad1988_6stack_mixers1;
+               spec->mixers[1] = ad1988_6stack_mixers2;
+               spec->num_init_verbs = 1;
+               spec->init_verbs[0] = ad1988_6stack_init_verbs;
+               if (board_config == AD1988_6STACK_DIG) {
+                       spec->multiout.dig_out_nid = AD1988_SPDIF_OUT;
+                       spec->dig_in_nid = AD1988_SPDIF_IN;
+               }
+               break;
+       case AD1988_3STACK:
+       case AD1988_3STACK_DIG:
+               spec->multiout.max_channels = 6;
+               spec->multiout.num_dacs = 3;
+               if (is_rev2(codec))
+                       spec->multiout.dac_nids = ad1988_3stack_dac_nids_rev2;
+               else
+                       spec->multiout.dac_nids = ad1988_3stack_dac_nids;
+               spec->input_mux = &ad1988_6stack_capture_source;
+               spec->channel_mode = ad1988_3stack_modes;
+               spec->num_channel_mode = ARRAY_SIZE(ad1988_3stack_modes);
+               spec->num_mixers = 2;
+               if (is_rev2(codec))
+                       spec->mixers[0] = ad1988_3stack_mixers1_rev2;
+               else
+                       spec->mixers[0] = ad1988_3stack_mixers1;
+               spec->mixers[1] = ad1988_3stack_mixers2;
+               spec->num_init_verbs = 1;
+               spec->init_verbs[0] = ad1988_3stack_init_verbs;
+               if (board_config == AD1988_3STACK_DIG)
+                       spec->multiout.dig_out_nid = AD1988_SPDIF_OUT;
+               break;
+       case AD1988_LAPTOP:
+       case AD1988_LAPTOP_DIG:
+               spec->multiout.max_channels = 2;
+               spec->multiout.num_dacs = 1;
+               spec->multiout.dac_nids = ad1988_3stack_dac_nids;
+               spec->input_mux = &ad1988_laptop_capture_source;
+               spec->num_mixers = 1;
+               spec->mixers[0] = ad1988_laptop_mixers;
+               spec->num_init_verbs = 1;
+               spec->init_verbs[0] = ad1988_laptop_init_verbs;
+               if (board_config == AD1988_LAPTOP_DIG)
+                       spec->multiout.dig_out_nid = AD1988_SPDIF_OUT;
+               break;
+       }
+
+       spec->num_adc_nids = ARRAY_SIZE(ad1988_adc_nids);
+       spec->adc_nids = ad1988_adc_nids;
+       spec->capsrc_nids = ad1988_capsrc_nids;
+       spec->mixers[spec->num_mixers++] = ad1988_capture_mixers;
+       spec->init_verbs[spec->num_init_verbs++] = ad1988_capture_init_verbs;
+       if (spec->multiout.dig_out_nid) {
+               spec->mixers[spec->num_mixers++] = ad1988_spdif_out_mixers;
+               spec->init_verbs[spec->num_init_verbs++] = ad1988_spdif_init_verbs;
+       }
+       if (spec->dig_in_nid)
+               spec->mixers[spec->num_mixers++] = ad1988_spdif_in_mixers;
+
+       codec->patch_ops = ad198x_patch_ops;
+       switch (board_config) {
+       case AD1988_AUTO:
+               codec->patch_ops.init = ad1988_auto_init;
+               break;
+       case AD1988_LAPTOP:
+       case AD1988_LAPTOP_DIG:
+               codec->patch_ops.unsol_event = ad1988_laptop_unsol_event;
+               break;
+       }
 
        return 0;
 }
 
+
 /*
  * patch entries
  */
 struct hda_codec_preset snd_hda_preset_analog[] = {
+       { .id = 0x11d41981, .name = "AD1981", .patch = patch_ad1981 },
+       { .id = 0x11d41983, .name = "AD1983", .patch = patch_ad1983 },
        { .id = 0x11d41986, .name = "AD1986A", .patch = patch_ad1986a },
+       { .id = 0x11d41988, .name = "AD1988", .patch = patch_ad1988 },
+       { .id = 0x11d4198b, .name = "AD1988B", .patch = patch_ad1988 },
        {} /* terminator */
 };
index 17c5062..94cf292 100644 (file)
@@ -3,8 +3,10 @@
  *
  * HD audio interface patch for ALC 260/880/882 codecs
  *
- * Copyright (c) 2004 PeiSen Hou <pshou@realtek.com.tw>
+ * Copyright (c) 2004 Kailang Yang <kailang@realtek.com.tw>
+ *                    PeiSen Hou <pshou@realtek.com.tw>
  *                    Takashi Iwai <tiwai@suse.de>
+ *                    Jonathan Woithe <jwoithe@physics.adelaide.edu.au>
  *
  *  This driver is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
 
 /* ALC880 board config type */
 enum {
-       ALC880_MINIMAL,
        ALC880_3ST,
        ALC880_3ST_DIG,
        ALC880_5ST,
        ALC880_5ST_DIG,
        ALC880_W810,
+       ALC880_Z71V,
+       ALC880_6ST,
+       ALC880_6ST_DIG,
+       ALC880_F1734,
+       ALC880_ASUS,
+       ALC880_ASUS_DIG,
+       ALC880_ASUS_W1V,
+       ALC880_ASUS_DIG2,
+       ALC880_UNIWILL_DIG,
+       ALC880_CLEVO,
+       ALC880_TCL_S700,
+       ALC880_LG,
+       ALC880_LG_LW,
+#ifdef CONFIG_SND_DEBUG
+       ALC880_TEST,
+#endif
+       ALC880_AUTO,
+       ALC880_MODEL_LAST /* last tag */
+};
+
+/* ALC260 models */
+enum {
+       ALC260_BASIC,
+       ALC260_HP,
+       ALC260_HP_3013,
+       ALC260_FUJITSU_S702X,
+       ALC260_ACER,
+#ifdef CONFIG_SND_DEBUG
+       ALC260_TEST,
+#endif
+       ALC260_AUTO,
+       ALC260_MODEL_LAST /* last tag */
+};
+
+/* ALC262 models */
+enum {
+       ALC262_BASIC,
+       ALC262_FUJITSU,
+       ALC262_AUTO,
+       ALC262_MODEL_LAST /* last tag */
 };
 
+/* ALC861 models */
+enum {
+       ALC861_3ST,
+       ALC861_3ST_DIG,
+       ALC861_6ST_DIG,
+       ALC861_AUTO,
+       ALC861_MODEL_LAST,
+};
+
+/* ALC882 models */
+enum {
+       ALC882_3ST_DIG,
+       ALC882_6ST_DIG,
+       ALC882_AUTO,
+       ALC882_MODEL_LAST,
+};
+
+/* for GPIO Poll */
+#define GPIO_MASK      0x03
+
 struct alc_spec {
        /* codec parameterization */
-       unsigned int front_panel: 1;
-
-       snd_kcontrol_new_t* mixers[2];
+       struct snd_kcontrol_new *mixers[5];     /* mixer arrays */
        unsigned int num_mixers;
 
-       struct hda_verb *init_verbs;
+       const struct hda_verb *init_verbs[5];   /* initialization verbs
+                                                * don't forget NULL termination!
+                                                */
+       unsigned int num_init_verbs;
 
-       char* stream_name_analog;
+       char *stream_name_analog;       /* analog PCM stream */
        struct hda_pcm_stream *stream_analog_playback;
        struct hda_pcm_stream *stream_analog_capture;
 
-       char* stream_name_digital;
+       char *stream_name_digital;      /* digital PCM stream */ 
        struct hda_pcm_stream *stream_digital_playback;
        struct hda_pcm_stream *stream_digital_capture;
 
        /* playback */
-       struct hda_multi_out multiout;
+       struct hda_multi_out multiout;  /* playback set-up
+                                        * max_channels, dacs must be set
+                                        * dig_out_nid and hp_nid are optional
+                                        */
 
        /* capture */
        unsigned int num_adc_nids;
        hda_nid_t *adc_nids;
-       hda_nid_t dig_in_nid;
+       hda_nid_t dig_in_nid;           /* digital-in NID; optional */
 
        /* capture source */
+       unsigned int num_mux_defs;
        const struct hda_input_mux *input_mux;
        unsigned int cur_mux[3];
 
        /* channel model */
-       const struct alc_channel_mode *channel_mode;
+       const struct hda_channel_mode *channel_mode;
        int num_channel_mode;
 
        /* PCM information */
-       struct hda_pcm pcm_rec[2];
-};
-
-/* DAC/ADC assignment */
-
-static hda_nid_t alc880_dac_nids[4] = {
-       /* front, rear, clfe, rear_surr */
-       0x02, 0x05, 0x04, 0x03
-};
-
-static hda_nid_t alc880_w810_dac_nids[3] = {
-       /* front, rear/surround, clfe */
-       0x02, 0x03, 0x04
-};
-
-static hda_nid_t alc880_adc_nids[3] = {
-       /* ADC0-2 */
-       0x07, 0x08, 0x09,
-};
-
-#define ALC880_DIGOUT_NID      0x06
-#define ALC880_DIGIN_NID       0x0a
-
-static hda_nid_t alc260_dac_nids[1] = {
-       /* front */
-       0x02,
-};
-
-static hda_nid_t alc260_adc_nids[2] = {
-       /* ADC0-1 */
-       0x04, 0x05,
+       struct hda_pcm pcm_rec[3];      /* used in alc_build_pcms() */
+
+       /* dynamic controls, init_verbs and input_mux */
+       struct auto_pin_cfg autocfg;
+       unsigned int num_kctl_alloc, num_kctl_used;
+       struct snd_kcontrol_new *kctl_alloc;
+       struct hda_input_mux private_imux;
+       hda_nid_t private_dac_nids[5];
+
+       /* hooks */
+       void (*init_hook)(struct hda_codec *codec);
+       void (*unsol_event)(struct hda_codec *codec, unsigned int res);
+
+       /* for pin sensing */
+       unsigned int sense_updated: 1;
+       unsigned int jack_present: 1;
 };
 
-#define ALC260_DIGOUT_NID      0x03
-#define ALC260_DIGIN_NID       0x06
-
-static struct hda_input_mux alc880_capture_source = {
-       .num_items = 4,
-       .items = {
-               { "Mic", 0x0 },
-               { "Front Mic", 0x3 },
-               { "Line", 0x2 },
-               { "CD", 0x4 },
-       },
+/*
+ * configuration template - to be copied to the spec instance
+ */
+struct alc_config_preset {
+       struct snd_kcontrol_new *mixers[5]; /* should be identical size with spec */
+       const struct hda_verb *init_verbs[5];
+       unsigned int num_dacs;
+       hda_nid_t *dac_nids;
+       hda_nid_t dig_out_nid;          /* optional */
+       hda_nid_t hp_nid;               /* optional */
+       unsigned int num_adc_nids;
+       hda_nid_t *adc_nids;
+       hda_nid_t dig_in_nid;
+       unsigned int num_channel_mode;
+       const struct hda_channel_mode *channel_mode;
+       unsigned int num_mux_defs;
+       const struct hda_input_mux *input_mux;
+       void (*unsol_event)(struct hda_codec *, unsigned int);
+       void (*init_hook)(struct hda_codec *);
 };
 
-static struct hda_input_mux alc260_capture_source = {
-       .num_items = 4,
-       .items = {
-               { "Mic", 0x0 },
-               { "Front Mic", 0x1 },
-               { "Line", 0x2 },
-               { "CD", 0x4 },
-       },
-};
 
 /*
  * input MUX handling
  */
-static int alc_mux_enum_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
+static int alc_mux_enum_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
        struct alc_spec *spec = codec->spec;
-       return snd_hda_input_mux_info(spec->input_mux, uinfo);
+       unsigned int mux_idx = snd_ctl_get_ioffidx(kcontrol, &uinfo->id);
+       if (mux_idx >= spec->num_mux_defs)
+               mux_idx = 0;
+       return snd_hda_input_mux_info(&spec->input_mux[mux_idx], uinfo);
 }
 
-static int alc_mux_enum_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int alc_mux_enum_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
        struct alc_spec *spec = codec->spec;
@@ -151,205 +204,392 @@ static int alc_mux_enum_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucon
        return 0;
 }
 
-static int alc_mux_enum_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int alc_mux_enum_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
        struct alc_spec *spec = codec->spec;
        unsigned int adc_idx = snd_ctl_get_ioffidx(kcontrol, &ucontrol->id);
-       return snd_hda_input_mux_put(codec, spec->input_mux, ucontrol,
+       unsigned int mux_idx = adc_idx >= spec->num_mux_defs ? 0 : adc_idx;
+       return snd_hda_input_mux_put(codec, &spec->input_mux[mux_idx], ucontrol,
                                     spec->adc_nids[adc_idx], &spec->cur_mux[adc_idx]);
 }
 
+
 /*
  * channel mode setting
  */
-struct alc_channel_mode {
-       int channels;
-       const struct hda_verb *sequence;
-};
-
+static int alc_ch_mode_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct alc_spec *spec = codec->spec;
+       return snd_hda_ch_mode_info(codec, uinfo, spec->channel_mode,
+                                   spec->num_channel_mode);
+}
 
-/*
- * channel source setting (2/6 channel selection for 3-stack)
- */
+static int alc_ch_mode_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct alc_spec *spec = codec->spec;
+       return snd_hda_ch_mode_get(codec, ucontrol, spec->channel_mode,
+                                  spec->num_channel_mode, spec->multiout.max_channels);
+}
 
-/*
- * set the path ways for 2 channel output
- * need to set the codec line out and mic 1 pin widgets to inputs
- */
-static struct hda_verb alc880_threestack_ch2_init[] = {
-       /* set pin widget 1Ah (line in) for input */
-       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
-       /* set pin widget 18h (mic1) for input, for mic also enable the vref */
-       { 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
-       /* mute the output for Line In PW */
-       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080 },
-       /* mute for Mic1 PW */
-       { 0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080 },
-       { } /* end */
-};
+static int alc_ch_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       struct alc_spec *spec = codec->spec;
+       return snd_hda_ch_mode_put(codec, ucontrol, spec->channel_mode,
+                                  spec->num_channel_mode, &spec->multiout.max_channels);
+}
 
 /*
- * 6ch mode
- * need to set the codec line out and mic 1 pin widgets to outputs
+ * Control the mode of pin widget settings via the mixer.  "pc" is used
+ * instead of "%" to avoid consequences of accidently treating the % as 
+ * being part of a format specifier.  Maximum allowed length of a value is
+ * 63 characters plus NULL terminator.
+ *
+ * Note: some retasking pin complexes seem to ignore requests for input
+ * states other than HiZ (eg: PIN_VREFxx) and revert to HiZ if any of these
+ * are requested.  Therefore order this list so that this behaviour will not
+ * cause problems when mixer clients move through the enum sequentially.
+ * NIDs 0x0f and 0x10 have been observed to have this behaviour as of
+ * March 2006.
  */
-static struct hda_verb alc880_threestack_ch6_init[] = {
-       /* set pin widget 1Ah (line in) for output */
-       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
-       /* set pin widget 18h (mic1) for output */
-       { 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
-       /* unmute the output for Line In PW */
-       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 },
-       /* unmute for Mic1 PW */
-       { 0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 },
-       /* for rear channel output using Line In 1
-        * set select widget connection (nid = 0x12) - to summer node
-        * for rear NID = 0x0f...offset 3 in connection list
-        */
-       { 0x12, AC_VERB_SET_CONNECT_SEL, 0x3 },
-       /* for Mic1 - retask for center/lfe */
-       /* set select widget connection (nid = 0x10) - to summer node for
-        * front CLFE NID = 0x0e...offset 2 in connection list
-        */
-       { 0x10, AC_VERB_SET_CONNECT_SEL, 0x2 },
-       { } /* end */
+static char *alc_pin_mode_names[] = {
+       "Mic 50pc bias", "Mic 80pc bias",
+       "Line in", "Line out", "Headphone out",
 };
-
-static struct alc_channel_mode alc880_threestack_modes[2] = {
-       { 2, alc880_threestack_ch2_init },
-       { 6, alc880_threestack_ch6_init },
+static unsigned char alc_pin_mode_values[] = {
+       PIN_VREF50, PIN_VREF80, PIN_IN, PIN_OUT, PIN_HP,
 };
-
-
-/*
- * channel source setting (6/8 channel selection for 5-stack)
+/* The control can present all 5 options, or it can limit the options based
+ * in the pin being assumed to be exclusively an input or an output pin.  In
+ * addition, "input" pins may or may not process the mic bias option
+ * depending on actual widget capability (NIDs 0x0f and 0x10 don't seem to
+ * accept requests for bias as of chip versions up to March 2006) and/or
+ * wiring in the computer.
  */
-
-/* set the path ways for 6 channel output
- * need to set the codec line out and mic 1 pin widgets to inputs
+#define ALC_PIN_DIR_IN              0x00
+#define ALC_PIN_DIR_OUT             0x01
+#define ALC_PIN_DIR_INOUT           0x02
+#define ALC_PIN_DIR_IN_NOMICBIAS    0x03
+#define ALC_PIN_DIR_INOUT_NOMICBIAS 0x04
+
+/* Info about the pin modes supported by the different pin direction modes. 
+ * For each direction the minimum and maximum values are given.
  */
-static struct hda_verb alc880_fivestack_ch6_init[] = {
-       /* set pin widget 1Ah (line in) for input */
-       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
-       /* mute the output for Line In PW */
-       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080 },
-       { } /* end */
+static signed char alc_pin_mode_dir_info[5][2] = {
+       { 0, 2 },    /* ALC_PIN_DIR_IN */
+       { 3, 4 },    /* ALC_PIN_DIR_OUT */
+       { 0, 4 },    /* ALC_PIN_DIR_INOUT */
+       { 2, 2 },    /* ALC_PIN_DIR_IN_NOMICBIAS */
+       { 2, 4 },    /* ALC_PIN_DIR_INOUT_NOMICBIAS */
 };
+#define alc_pin_mode_min(_dir) (alc_pin_mode_dir_info[_dir][0])
+#define alc_pin_mode_max(_dir) (alc_pin_mode_dir_info[_dir][1])
+#define alc_pin_mode_n_items(_dir) \
+       (alc_pin_mode_max(_dir)-alc_pin_mode_min(_dir)+1)
 
-/* need to set the codec line out and mic 1 pin widgets to outputs */
-static struct hda_verb alc880_fivestack_ch8_init[] = {
-       /* set pin widget 1Ah (line in) for output */
-       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
-       /* unmute the output for Line In PW */
-       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 },
-       /* output for surround channel output using Line In 1 */
-       /* set select widget connection (nid = 0x12) - to summer node
-        * for surr_rear NID = 0x0d...offset 1 in connection list
-        */
-       { 0x12, AC_VERB_SET_CONNECT_SEL, 0x1 },
-       { } /* end */
-};
+static int alc_pin_mode_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       unsigned int item_num = uinfo->value.enumerated.item;
+       unsigned char dir = (kcontrol->private_value >> 16) & 0xff;
 
-static struct alc_channel_mode alc880_fivestack_modes[2] = {
-       { 6, alc880_fivestack_ch6_init },
-       { 8, alc880_fivestack_ch8_init },
-};
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = alc_pin_mode_n_items(dir);
 
-/*
- * channel source setting for W810 system
- *
- * W810 has rear IO for:
- * Front (DAC 02)
- * Surround (DAC 03)
- * Center/LFE (DAC 04)
- * Digital out (06)
- *
- * The system also has a pair of internal speakers, and a headphone jack.
- * These are both connected to Line2 on the codec, hence to DAC 02.
- * 
- * There is a variable resistor to control the speaker or headphone
- * volume. This is a hardware-only device without a software API.
- *
- * Plugging headphones in will disable the internal speakers. This is
- * implemented in hardware, not via the driver using jack sense. In
- * a similar fashion, plugging into the rear socket marked "front" will
- * disable both the speakers and headphones.
- *
- * For input, there's a microphone jack, and an "audio in" jack.
- * These may not do anything useful with this driver yet, because I
- * haven't setup any initialization verbs for these yet...
- */
+       if (item_num<alc_pin_mode_min(dir) || item_num>alc_pin_mode_max(dir))
+               item_num = alc_pin_mode_min(dir);
+       strcpy(uinfo->value.enumerated.name, alc_pin_mode_names[item_num]);
+       return 0;
+}
 
-static struct alc_channel_mode alc880_w810_modes[1] = {
-       { 6, NULL }
-};
+static int alc_pin_mode_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       unsigned int i;
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char dir = (kcontrol->private_value >> 16) & 0xff;
+       long *valp = ucontrol->value.integer.value;
+       unsigned int pinctl = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_PIN_WIDGET_CONTROL,0x00);
+
+       /* Find enumerated value for current pinctl setting */
+       i = alc_pin_mode_min(dir);
+       while (alc_pin_mode_values[i]!=pinctl && i<=alc_pin_mode_max(dir))
+               i++;
+       *valp = i<=alc_pin_mode_max(dir)?i:alc_pin_mode_min(dir);
+       return 0;
+}
 
-/*
- */
-static int alc880_ch_mode_info(snd_kcontrol_t *kcontrol, snd_ctl_elem_info_t *uinfo)
+static int alc_pin_mode_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
+       signed int change;
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct alc_spec *spec = codec->spec;
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char dir = (kcontrol->private_value >> 16) & 0xff;
+       long val = *ucontrol->value.integer.value;
+       unsigned int pinctl = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_PIN_WIDGET_CONTROL,0x00);
+
+       if (val<alc_pin_mode_min(dir) || val>alc_pin_mode_max(dir)) 
+               val = alc_pin_mode_min(dir);
+
+       change = pinctl != alc_pin_mode_values[val];
+       if (change) {
+               /* Set pin mode to that requested */
+               snd_hda_codec_write(codec,nid,0,AC_VERB_SET_PIN_WIDGET_CONTROL,
+                       alc_pin_mode_values[val]);
+
+               /* Also enable the retasking pin's input/output as required 
+                * for the requested pin mode.  Enum values of 2 or less are
+                * input modes.
+                *
+                * Dynamically switching the input/output buffers probably
+                * reduces noise slightly (particularly on input) so we'll
+                * do it.  However, having both input and output buffers
+                * enabled simultaneously doesn't seem to be problematic if
+                * this turns out to be necessary in the future.
+                */
+               if (val <= 2) {
+                       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_AMP_GAIN_MUTE,
+                               AMP_OUT_MUTE);
+                       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_AMP_GAIN_MUTE,
+                               AMP_IN_UNMUTE(0));
+               } else {
+                       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_AMP_GAIN_MUTE,
+                               AMP_IN_MUTE(0));
+                       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_AMP_GAIN_MUTE,
+                               AMP_OUT_UNMUTE);
+               }
+       }
+       return change;
+}
 
-       snd_assert(spec->channel_mode, return -ENXIO);
-       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+#define ALC_PIN_MODE(xname, nid, dir) \
+       { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = 0,  \
+         .info = alc_pin_mode_info, \
+         .get = alc_pin_mode_get, \
+         .put = alc_pin_mode_put, \
+         .private_value = nid | (dir<<16) }
+
+/* A switch control for ALC260 GPIO pins.  Multiple GPIOs can be ganged
+ * together using a mask with more than one bit set.  This control is
+ * currently used only by the ALC260 test model.  At this stage they are not
+ * needed for any "production" models.
+ */
+#ifdef CONFIG_SND_DEBUG
+static int alc_gpio_data_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
        uinfo->count = 1;
-       uinfo->value.enumerated.items = 2;
-       if (uinfo->value.enumerated.item >= 2)
-               uinfo->value.enumerated.item = 1;
-       sprintf(uinfo->value.enumerated.name, "%dch",
-               spec->channel_mode[uinfo->value.enumerated.item].channels);
+       uinfo->value.integer.min = 0;
+       uinfo->value.integer.max = 1;
        return 0;
-}
-
-static int alc880_ch_mode_get(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+}                                
+static int alc_gpio_data_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct alc_spec *spec = codec->spec;
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char mask = (kcontrol->private_value >> 16) & 0xff;
+       long *valp = ucontrol->value.integer.value;
+       unsigned int val = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_GPIO_DATA,0x00);
 
-       snd_assert(spec->channel_mode, return -ENXIO);
-       ucontrol->value.enumerated.item[0] =
-               (spec->multiout.max_channels == spec->channel_mode[0].channels) ? 0 : 1;
+       *valp = (val & mask) != 0;
        return 0;
 }
+static int alc_gpio_data_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       signed int change;
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char mask = (kcontrol->private_value >> 16) & 0xff;
+       long val = *ucontrol->value.integer.value;
+       unsigned int gpio_data = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_GPIO_DATA,0x00);
+
+       /* Set/unset the masked GPIO bit(s) as needed */
+       change = (val==0?0:mask) != (gpio_data & mask);
+       if (val==0)
+               gpio_data &= ~mask;
+       else
+               gpio_data |= mask;
+       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_GPIO_DATA,gpio_data);
+
+       return change;
+}
+#define ALC_GPIO_DATA_SWITCH(xname, nid, mask) \
+       { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = 0,  \
+         .info = alc_gpio_data_info, \
+         .get = alc_gpio_data_get, \
+         .put = alc_gpio_data_put, \
+         .private_value = nid | (mask<<16) }
+#endif   /* CONFIG_SND_DEBUG */
+
+/* A switch control to allow the enabling of the digital IO pins on the
+ * ALC260.  This is incredibly simplistic; the intention of this control is
+ * to provide something in the test model allowing digital outputs to be
+ * identified if present.  If models are found which can utilise these
+ * outputs a more complete mixer control can be devised for those models if
+ * necessary.
+ */
+#ifdef CONFIG_SND_DEBUG
+static int alc_spdif_ctrl_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+       uinfo->count = 1;
+       uinfo->value.integer.min = 0;
+       uinfo->value.integer.max = 1;
+       return 0;
+}                                
+static int alc_spdif_ctrl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char mask = (kcontrol->private_value >> 16) & 0xff;
+       long *valp = ucontrol->value.integer.value;
+       unsigned int val = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_DIGI_CONVERT,0x00);
 
-static int alc880_ch_mode_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+       *valp = (val & mask) != 0;
+       return 0;
+}
+static int alc_spdif_ctrl_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
+       signed int change;
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
-       struct alc_spec *spec = codec->spec;
-       int mode;
+       hda_nid_t nid = kcontrol->private_value & 0xffff;
+       unsigned char mask = (kcontrol->private_value >> 16) & 0xff;
+       long val = *ucontrol->value.integer.value;
+       unsigned int ctrl_data = snd_hda_codec_read(codec,nid,0,AC_VERB_GET_DIGI_CONVERT,0x00);
+
+       /* Set/unset the masked control bit(s) as needed */
+       change = (val==0?0:mask) != (ctrl_data & mask);
+       if (val==0)
+               ctrl_data &= ~mask;
+       else
+               ctrl_data |= mask;
+       snd_hda_codec_write(codec,nid,0,AC_VERB_SET_DIGI_CONVERT_1,ctrl_data);
+
+       return change;
+}
+#define ALC_SPDIF_CTRL_SWITCH(xname, nid, mask) \
+       { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, .index = 0,  \
+         .info = alc_spdif_ctrl_info, \
+         .get = alc_spdif_ctrl_get, \
+         .put = alc_spdif_ctrl_put, \
+         .private_value = nid | (mask<<16) }
+#endif   /* CONFIG_SND_DEBUG */
 
-       snd_assert(spec->channel_mode, return -ENXIO);
-       mode = ucontrol->value.enumerated.item[0] ? 1 : 0;
-       if (spec->multiout.max_channels == spec->channel_mode[mode].channels &&
-           ! codec->in_resume)
-               return 0;
+/*
+ * set up from the preset table
+ */
+static void setup_preset(struct alc_spec *spec, const struct alc_config_preset *preset)
+{
+       int i;
 
-       /* change the current channel setting */
-       spec->multiout.max_channels = spec->channel_mode[mode].channels;
-       if (spec->channel_mode[mode].sequence)
-               snd_hda_sequence_write(codec, spec->channel_mode[mode].sequence);
+       for (i = 0; i < ARRAY_SIZE(preset->mixers) && preset->mixers[i]; i++)
+               spec->mixers[spec->num_mixers++] = preset->mixers[i];
+       for (i = 0; i < ARRAY_SIZE(preset->init_verbs) && preset->init_verbs[i]; i++)
+               spec->init_verbs[spec->num_init_verbs++] = preset->init_verbs[i];
+       
+       spec->channel_mode = preset->channel_mode;
+       spec->num_channel_mode = preset->num_channel_mode;
 
-       return 1;
-}
+       spec->multiout.max_channels = spec->channel_mode[0].channels;
 
+       spec->multiout.num_dacs = preset->num_dacs;
+       spec->multiout.dac_nids = preset->dac_nids;
+       spec->multiout.dig_out_nid = preset->dig_out_nid;
+       spec->multiout.hp_nid = preset->hp_nid;
+       
+       spec->num_mux_defs = preset->num_mux_defs;
+       if (! spec->num_mux_defs)
+               spec->num_mux_defs = 1;
+       spec->input_mux = preset->input_mux;
+
+       spec->num_adc_nids = preset->num_adc_nids;
+       spec->adc_nids = preset->adc_nids;
+       spec->dig_in_nid = preset->dig_in_nid;
+
+       spec->unsol_event = preset->unsol_event;
+       spec->init_hook = preset->init_hook;
+}
 
 /*
+ * ALC880 3-stack model
+ *
+ * DAC: Front = 0x02 (0x0c), Surr = 0x05 (0x0f), CLFE = 0x04 (0x0e)
+ * Pin assignment: Front = 0x14, Line-In/Surr = 0x1a, Mic/CLFE = 0x18, F-Mic = 0x1b
+ *                 HP = 0x19
  */
 
-/* 3-stack mode
- * Pin assignment: Front=0x14, Line-In/Rear=0x1a, Mic/CLFE=0x18, F-Mic=0x1b
- *                 HP=0x19
+static hda_nid_t alc880_dac_nids[4] = {
+       /* front, rear, clfe, rear_surr */
+       0x02, 0x05, 0x04, 0x03
+};
+
+static hda_nid_t alc880_adc_nids[3] = {
+       /* ADC0-2 */
+       0x07, 0x08, 0x09,
+};
+
+/* The datasheet says the node 0x07 is connected from inputs,
+ * but it shows zero connection in the real implementation on some devices.
+ * Note: this is a 915GAV bug, fixed on 915GLV
  */
-static snd_kcontrol_new_t alc880_base_mixer[] = {
+static hda_nid_t alc880_adc_nids_alt[2] = {
+       /* ADC1-2 */
+       0x08, 0x09,
+};
+
+#define ALC880_DIGOUT_NID      0x06
+#define ALC880_DIGIN_NID       0x0a
+
+static struct hda_input_mux alc880_capture_source = {
+       .num_items = 4,
+       .items = {
+               { "Mic", 0x0 },
+               { "Front Mic", 0x3 },
+               { "Line", 0x2 },
+               { "CD", 0x4 },
+       },
+};
+
+/* channel source setting (2/6 channel selection for 3-stack) */
+/* 2ch mode */
+static struct hda_verb alc880_threestack_ch2_init[] = {
+       /* set line-in to input, mute it */
+       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       /* set mic-in to input vref 80%, mute it */
+       { 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+       { 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { } /* end */
+};
+
+/* 6ch mode */
+static struct hda_verb alc880_threestack_ch6_init[] = {
+       /* set line-in to output, unmute it */
+       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       /* set mic-in to output, unmute it */
+       { 0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       { } /* end */
+};
+
+static struct hda_channel_mode alc880_threestack_modes[2] = {
+       { 2, alc880_threestack_ch2_init },
+       { 6, alc880_threestack_ch6_init },
+};
+
+static struct snd_kcontrol_new alc880_three_stack_mixer[] = {
        HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("Surround Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Surround Playback Switch", 0x1a, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0f, 2, HDA_INPUT),
        HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x0, HDA_OUTPUT),
        HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x18, 1, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x18, 2, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
        HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
        HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
@@ -360,12 +600,25 @@ static snd_kcontrol_new_t alc880_base_mixer[] = {
        HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x3, HDA_INPUT),
        HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x0b, 0x05, HDA_INPUT),
        HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x0b, 0x05, HDA_INPUT),
-       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
        HDA_CODEC_MUTE("Headphone Playback Switch", 0x19, 0x0, HDA_OUTPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+       },
+       { } /* end */
+};
+
+/* capture mixer elements */
+static struct snd_kcontrol_new alc880_capture_mixer[] = {
        HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT),
        HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT),
        HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT),
        HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT),
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
                /* The multiple "Capture Source" controls confuse alsamixer
@@ -374,91 +627,313 @@ static snd_kcontrol_new_t alc880_base_mixer[] = {
                 */
                /* .name = "Capture Source", */
                .name = "Input Source",
-               .count = 2,
+               .count = 3,
                .info = alc_mux_enum_info,
                .get = alc_mux_enum_get,
                .put = alc_mux_enum_put,
        },
+       { } /* end */
+};
+
+/* capture mixer elements (in case NID 0x07 not available) */
+static struct snd_kcontrol_new alc880_capture_alt_mixer[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x09, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x09, 0x0, HDA_INPUT),
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-               .name = "Channel Mode",
-               .info = alc880_ch_mode_info,
-               .get = alc880_ch_mode_get,
-               .put = alc880_ch_mode_put,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 2,
+               .info = alc_mux_enum_info,
+               .get = alc_mux_enum_get,
+               .put = alc_mux_enum_put,
        },
        { } /* end */
 };
 
-/* 5-stack mode
- * Pin assignment: Front=0x14, Rear=0x17, CLFE=0x16
- *                 Line-In/Side=0x1a, Mic=0x18, F-Mic=0x1b, HP=0x19
- */
-static snd_kcontrol_new_t alc880_five_stack_mixer[] = {
-       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
-       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Surround Playback Switch", 0x17, 0x0, HDA_OUTPUT),
+
+
+/*
+ * ALC880 5-stack model
+ *
+ * DAC: Front = 0x02 (0x0c), Surr = 0x05 (0x0f), CLFE = 0x04 (0x0d), Side = 0x02 (0xd)
+ * Pin assignment: Front = 0x14, Surr = 0x17, CLFE = 0x16
+ *                 Line-In/Side = 0x1a, Mic = 0x18, F-Mic = 0x1b, HP = 0x19
+ */
+
+/* additional mixers to alc880_three_stack_mixer */
+static struct snd_kcontrol_new alc880_five_stack_mixer[] = {
+       HDA_CODEC_VOLUME("Side Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Side Playback Switch", 0x0d, 2, HDA_INPUT),
+       { } /* end */
+};
+
+/* channel source setting (6/8 channel selection for 5-stack) */
+/* 6ch mode */
+static struct hda_verb alc880_fivestack_ch6_init[] = {
+       /* set line-in to input, mute it */
+       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE },
+       { } /* end */
+};
+
+/* 8ch mode */
+static struct hda_verb alc880_fivestack_ch8_init[] = {
+       /* set line-in to output, unmute it */
+       { 0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE },
+       { } /* end */
+};
+
+static struct hda_channel_mode alc880_fivestack_modes[2] = {
+       { 6, alc880_fivestack_ch6_init },
+       { 8, alc880_fivestack_ch8_init },
+};
+
+
+/*
+ * ALC880 6-stack model
+ *
+ * DAC: Front = 0x02 (0x0c), Surr = 0x03 (0x0d), CLFE = 0x04 (0x0e), Side = 0x05 (0x0f)
+ * Pin assignment: Front = 0x14, Surr = 0x15, CLFE = 0x16, Side = 0x17,
+ *   Mic = 0x18, F-Mic = 0x19, Line = 0x1a, HP = 0x1b
+ */
+
+static hda_nid_t alc880_6st_dac_nids[4] = {
+       /* front, rear, clfe, rear_surr */
+       0x02, 0x03, 0x04, 0x05
+};     
+
+static struct hda_input_mux alc880_6stack_capture_source = {
+       .num_items = 4,
+       .items = {
+               { "Mic", 0x0 },
+               { "Front Mic", 0x1 },
+               { "Line", 0x2 },
+               { "CD", 0x4 },
+       },
+};
+
+/* fixed 8-channels */
+static struct hda_channel_mode alc880_sixstack_modes[1] = {
+       { 8, NULL },
+};
+
+static struct snd_kcontrol_new alc880_six_stack_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 2, HDA_INPUT),
        HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x0, HDA_OUTPUT),
        HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x16, 1, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x16, 2, 0x0, HDA_OUTPUT),
-       HDA_CODEC_VOLUME("Side Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Side Playback Switch", 0x1a, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Side Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Side Playback Switch", 0x0f, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
        HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
        HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
        HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
        HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
        HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
-       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x3, HDA_INPUT),
-       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x3, HDA_INPUT),
+       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
        HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x0b, 0x05, HDA_INPUT),
        HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x0b, 0x05, HDA_INPUT),
-       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Headphone Playback Switch", 0x19, 0x0, HDA_OUTPUT),
-       HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT),
-       HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT),
-       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT),
-       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT),
-       {
-               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-               /* The multiple "Capture Source" controls confuse alsamixer
-                * So call somewhat different..
-                * FIXME: the controls appear in the "playback" view!
-                */
-               /* .name = "Capture Source", */
-               .name = "Input Source",
-               .count = 2,
-               .info = alc_mux_enum_info,
-               .get = alc_mux_enum_get,
-               .put = alc_mux_enum_put,
-       },
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
                .name = "Channel Mode",
-               .info = alc880_ch_mode_info,
-               .get = alc880_ch_mode_get,
-               .put = alc880_ch_mode_put,
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
        },
        { } /* end */
 };
 
-static snd_kcontrol_new_t alc880_w810_base_mixer[] = {
+
+/*
+ * ALC880 W810 model
+ *
+ * W810 has rear IO for:
+ * Front (DAC 02)
+ * Surround (DAC 03)
+ * Center/LFE (DAC 04)
+ * Digital out (06)
+ *
+ * The system also has a pair of internal speakers, and a headphone jack.
+ * These are both connected to Line2 on the codec, hence to DAC 02.
+ * 
+ * There is a variable resistor to control the speaker or headphone
+ * volume. This is a hardware-only device without a software API.
+ *
+ * Plugging headphones in will disable the internal speakers. This is
+ * implemented in hardware, not via the driver using jack sense. In
+ * a similar fashion, plugging into the rear socket marked "front" will
+ * disable both the speakers and headphones.
+ *
+ * For input, there's a microphone jack, and an "audio in" jack.
+ * These may not do anything useful with this driver yet, because I
+ * haven't setup any initialization verbs for these yet...
+ */
+
+static hda_nid_t alc880_w810_dac_nids[3] = {
+       /* front, rear/surround, clfe */
+       0x02, 0x03, 0x04
+};
+
+/* fixed 6 channels */
+static struct hda_channel_mode alc880_w810_modes[1] = {
+       { 6, NULL }
+};
+
+/* Pin assignment: Front = 0x14, Surr = 0x15, CLFE = 0x16, HP = 0x1b */
+static struct snd_kcontrol_new alc880_w810_base_mixer[] = {
        HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Surround Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 2, HDA_INPUT),
        HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x0, HDA_OUTPUT),
        HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x16, 1, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x16, 2, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 2, HDA_INPUT),
        HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
-       HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT),
-       HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT),
-       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT),
-       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT),
-       HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT),
-       HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT),
+       { } /* end */
+};
+
+
+/*
+ * Z710V model
+ *
+ * DAC: Front = 0x02 (0x0c), HP = 0x03 (0x0d)
+ * Pin assignment: Front = 0x14, HP = 0x15, Mic = 0x18, Mic2 = 0x19(?), Line = 0x1a
+ */
+
+static hda_nid_t alc880_z71v_dac_nids[1] = {
+       0x02
+};
+#define ALC880_Z71V_HP_DAC     0x03
+
+/* fixed 2 channels */
+static struct hda_channel_mode alc880_2_jack_modes[1] = {
+       { 2, NULL }
+};
+
+static struct snd_kcontrol_new alc880_z71v_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x0d, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       { } /* end */
+};
+
+
+/* FIXME! */
+/*
+ * ALC880 F1734 model
+ *
+ * DAC: HP = 0x02 (0x0c), Front = 0x03 (0x0d)
+ * Pin assignment: HP = 0x14, Front = 0x15, Mic = 0x18
+ */
+
+static hda_nid_t alc880_f1734_dac_nids[1] = {
+       0x03
+};
+#define ALC880_F1734_HP_DAC    0x02
+
+static struct snd_kcontrol_new alc880_f1734_mixer[] = {
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Internal Speaker Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Internal Speaker Playback Switch", 0x0d, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       { } /* end */
+};
+
+
+/* FIXME! */
+/*
+ * ALC880 ASUS model
+ *
+ * DAC: HP/Front = 0x02 (0x0c), Surr = 0x03 (0x0d), CLFE = 0x04 (0x0e)
+ * Pin assignment: HP/Front = 0x14, Surr = 0x15, CLFE = 0x16,
+ *  Mic = 0x18, Line = 0x1a
+ */
+
+#define alc880_asus_dac_nids   alc880_w810_dac_nids    /* identical with w810 */
+#define alc880_asus_modes      alc880_threestack_modes /* 2/6 channel mode */
+
+static struct snd_kcontrol_new alc880_asus_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+       },
+       { } /* end */
+};
+
+/* FIXME! */
+/*
+ * ALC880 ASUS W1V model
+ *
+ * DAC: HP/Front = 0x02 (0x0c), Surr = 0x03 (0x0d), CLFE = 0x04 (0x0e)
+ * Pin assignment: HP/Front = 0x14, Surr = 0x15, CLFE = 0x16,
+ *  Mic = 0x18, Line = 0x1a, Line2 = 0x1b
+ */
+
+/* additional mixers to alc880_asus_mixer */
+static struct snd_kcontrol_new alc880_asus_w1v_mixer[] = {
+       HDA_CODEC_VOLUME("Line2 Playback Volume", 0x0b, 0x03, HDA_INPUT),
+       HDA_CODEC_MUTE("Line2 Playback Switch", 0x0b, 0x03, HDA_INPUT),
+       { } /* end */
+};
+
+/* additional mixers to alc880_asus_mixer */
+static struct snd_kcontrol_new alc880_pcbeep_mixer[] = {
+       HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x0b, 0x05, HDA_INPUT),
+       HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x0b, 0x05, HDA_INPUT),
+       { } /* end */
+};
+
+/* TCL S700 */
+static struct snd_kcontrol_new alc880_tcl_s700_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0B, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0B, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0B, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0B, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
                /* The multiple "Capture Source" controls confuse alsamixer
@@ -467,7 +942,7 @@ static snd_kcontrol_new_t alc880_w810_base_mixer[] = {
                 */
                /* .name = "Capture Source", */
                .name = "Input Source",
-               .count = 3,
+               .count = 1,
                .info = alc_mux_enum_info,
                .get = alc_mux_enum_get,
                .put = alc_mux_enum_put,
@@ -476,6 +951,7 @@ static snd_kcontrol_new_t alc880_w810_base_mixer[] = {
 };
 
 /*
+ * build control elements
  */
 static int alc_build_controls(struct hda_codec *codec)
 {
@@ -502,230 +978,576 @@ static int alc_build_controls(struct hda_codec *codec)
        return 0;
 }
 
+
 /*
  * initialize the codec volumes, etc
  */
 
-static struct hda_verb alc880_init_verbs_three_stack[] = {
-       /* Line In pin widget for input */
-       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
-       /* CD pin widget for input */
-       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
-       /* Mic1 (rear panel) pin widget for input and vref at 80% */
-       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* Mic2 (front panel) pin widget for input and vref at 80% */
-       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* unmute amp left and right */
-       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
-       /* set connection select to line in (default select for this ADC) */
-       {0x07, AC_VERB_SET_CONNECT_SEL, 0x02},
-       /* unmute front mixer amp left (volume = 0) */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* unmute rear mixer amp left and right (volume = 0) */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* unmute rear mixer amp left and right (volume = 0) */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-
-       /* using rear surround as the path for headphone output */
-       /* unmute rear surround mixer amp left and right (volume = 0) */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* PASD 3 stack boards use the Mic 2 as the headphone output */
-       /* need to program the selector associated with the Mic 2 pin widget to
-        * surround path (index 0x01) for headphone output */
-       {0x11, AC_VERB_SET_CONNECT_SEL, 0x01},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* need to retask the Mic 2 pin widget to output */
-       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-
-       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) for mixer widget(nid=0x0B)
-        * to support the input path of analog loopback
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc880_volume_init_verbs[] = {
+       /*
+        * Unmute ADC0-2 and set the default input to mic-in
+        */
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) of the analog-loopback
+        * mixer widget
         * Note: PASD motherboards uses the Line In 2 as the input for front panel
         * mic (mic 2)
         */
-       /* Amp Indexes: CD = 0x04, Line In 1 = 0x02, Mic 1 = 0x00 & Line In 2 = 0x03 */
-       /* unmute CD */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
-       /* unmute Line In */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-       /* unmute Mic 1 */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       /* unmute Line In 2 (for PASD boards Mic 2) */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-
-       /* Unmute input amps for the line out paths to support the output path of
-        * analog loopback
-        * the mixers on the output path has 2 inputs, one from the DAC and one
-        * from the mixer
+       /* Amp Indices: Mic1 = 0, Mic2 = 1, Line1 = 2, Line2 = 3, CD = 4 */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+
+       /*
+        * Set up output mixers (0x0c - 0x0f)
         */
-       /* Amp Indexes: DAC = 0x01 & mixer = 0x00 */
-       /* Unmute Front out path */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Unmute Surround (used as HP) out path */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Unmute C/LFE out path */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))}, /* mute */
-       /* Unmute rear Surround out path */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       /* set vol=0 to output mixers */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* set up input amps for analog loopback */
+       /* Amp Indices: DAC = 0, mixer = 1 */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
 
        { }
 };
 
-static struct hda_verb alc880_init_verbs_five_stack[] = {
-       /* Line In pin widget for input */
-       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
-       /* CD pin widget for input */
-       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
-       /* Mic1 (rear panel) pin widget for input and vref at 80% */
-       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* Mic2 (front panel) pin widget for input and vref at 80% */
-       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* unmute amp left and right */
-       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
-       /* set connection select to line in (default select for this ADC) */
-       {0x07, AC_VERB_SET_CONNECT_SEL, 0x02},
-       /* unmute front mixer amp left and right (volume = 0) */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* five rear and clfe */
-       /* unmute rear mixer amp left and right (volume = 0)  */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* unmute clfe mixer amp left and right (volume = 0) */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-
-       /* using rear surround as the path for headphone output */
-       /* unmute rear surround mixer amp left and right (volume = 0) */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* PASD 3 stack boards use the Mic 2 as the headphone output */
-       /* need to program the selector associated with the Mic 2 pin widget to
-        * surround path (index 0x01) for headphone output
+/*
+ * 3-stack pin configuration:
+ * front = 0x14, mic/clfe = 0x18, HP = 0x19, line/surr = 0x1a, f-mic = 0x1b
+ */
+static struct hda_verb alc880_pin_3stack_init_verbs[] = {
+       /*
+        * preset connection lists of input pins
+        * 0 = front, 1 = rear_surr, 2 = CLFE, 3 = surround
         */
-       {0x11, AC_VERB_SET_CONNECT_SEL, 0x01},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* need to retask the Mic 2 pin widget to output */
-       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x10, AC_VERB_SET_CONNECT_SEL, 0x02}, /* mic/clfe */
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */
+       {0x12, AC_VERB_SET_CONNECT_SEL, 0x03}, /* line/surround */
 
-       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) for mixer
-        * widget(nid=0x0B) to support the input path of analog loopback
-        */
-       /* Note: PASD motherboards uses the Line In 2 as the input for front panel mic (mic 2) */
-       /* Amp Indexes: CD = 0x04, Line In 1 = 0x02, Mic 1 = 0x00 & Line In 2 = 0x03*/
-       /* unmute CD */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
-       /* unmute Line In */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
-       /* unmute Mic 1 */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       /* unmute Line In 2 (for PASD boards Mic 2) */
-       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x03 << 8))},
-
-       /* Unmute input amps for the line out paths to support the output path of
-        * analog loopback
-        * the mixers on the output path has 2 inputs, one from the DAC and
-        * one from the mixer
+       /*
+        * Set pin mode and muting
         */
-       /* Amp Indexes: DAC = 0x01 & mixer = 0x00 */
-       /* Unmute Front out path */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Unmute Surround (used as HP) out path */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Unmute C/LFE out path */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))}, /* mute */
-       /* Unmute rear Surround out path */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       /* set front pin widgets 0x14 for output */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Mic1 (rear panel) pin widget for input and vref at 80% */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Mic2 (as headphone out) for HP output */
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Line In pin widget for input */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Line2 (as front mic) pin widget for input and vref at 80% */
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* CD pin widget for input */
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 
        { }
 };
 
-static struct hda_verb alc880_w810_init_verbs[] = {
-       /* front channel selector/amp: input 0: DAC: unmuted, (no volume selection) */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
+/*
+ * 5-stack pin configuration:
+ * front = 0x14, surround = 0x17, clfe = 0x16, mic = 0x18, HP = 0x19,
+ * line-in/side = 0x1a, f-mic = 0x1b
+ */
+static struct hda_verb alc880_pin_5stack_init_verbs[] = {
+       /*
+        * preset connection lists of input pins
+        * 0 = front, 1 = rear_surr, 2 = CLFE, 3 = surround
+        */
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */
+       {0x12, AC_VERB_SET_CONNECT_SEL, 0x01}, /* line/side */
 
-       /* front channel selector/amp: input 1: capture mix: muted, (no volume selection) */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0x7180},
+       /*
+        * Set pin mode and muting
+        */
+       /* set pin widgets 0x14-0x17 for output */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* unmute pins for output (no gain on this amp) */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 
-       /* front channel selector/amp: output 0: unmuted, max volume */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* Mic1 (rear panel) pin widget for input and vref at 80% */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Mic2 (as headphone out) for HP output */
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Line In pin widget for input */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Line2 (as front mic) pin widget for input and vref at 80% */
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* CD pin widget for input */
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 
-       /* front out pin: muted, (no volume selection)  */
-       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       { }
+};
 
-       /* front out pin: NOT headphone enable, out enable, vref disabled */
-       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+/*
+ * W810 pin configuration:
+ * front = 0x14, surround = 0x15, clfe = 0x16, HP = 0x1b
+ */
+static struct hda_verb alc880_pin_w810_init_verbs[] = {
+       /* hphone/speaker input selector: front DAC */
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x0},
 
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 
-       /* surround channel selector/amp: input 0: DAC: unmuted, (no volume selection) */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
 
-       /* surround channel selector/amp: input 1: capture mix: muted, (no volume selection) */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0x7180},
+       { }
+};
 
-       /* surround channel selector/amp: output 0: unmuted, max volume */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+/*
+ * Z71V pin configuration:
+ * Speaker-out = 0x14, HP = 0x15, Mic = 0x18, Line-in = 0x1a, Mic2 = 0x1b (?)
+ */
+static struct hda_verb alc880_pin_z71v_init_verbs[] = {
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
 
-       /* surround out pin: muted, (no volume selection)  */
-       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 
-       /* surround out pin: NOT headphone enable, out enable, vref disabled */
-       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       { }
+};
 
+/*
+ * 6-stack pin configuration:
+ * front = 0x14, surr = 0x15, clfe = 0x16, side = 0x17, mic = 0x18, f-mic = 0x19,
+ * line = 0x1a, HP = 0x1b
+ */
+static struct hda_verb alc880_pin_6stack_init_verbs[] = {
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x00}, /* HP */
+
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       
+       { }
+};
 
-       /* c/lfe channel selector/amp: input 0: DAC: unmuted, (no volume selection) */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
+/* FIXME! */
+/*
+ * F1734 pin configuration:
+ * HP = 0x14, speaker-out = 0x15, mic = 0x18
+ */
+static struct hda_verb alc880_pin_f1734_init_verbs[] = {
+       {0x10, AC_VERB_SET_CONNECT_SEL, 0x02},
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x12, AC_VERB_SET_CONNECT_SEL, 0x01},
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 
-       /* c/lfe channel selector/amp: input 1: capture mix: muted, (no volume selection) */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0x7180},
+       { }
+};
 
-       /* c/lfe channel selector/amp: output 0: unmuted, max volume */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+/* FIXME! */
+/*
+ * ASUS pin configuration:
+ * HP/front = 0x14, surr = 0x15, clfe = 0x16, mic = 0x18, line = 0x1a
+ */
+static struct hda_verb alc880_pin_asus_init_verbs[] = {
+       {0x10, AC_VERB_SET_CONNECT_SEL, 0x02},
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x12, AC_VERB_SET_CONNECT_SEL, 0x01},
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       
+       { }
+};
 
-       /* c/lfe out pin: muted, (no volume selection)  */
-       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+/* Enable GPIO mask and set output */
+static struct hda_verb alc880_gpio1_init_verbs[] = {
+       {0x01, AC_VERB_SET_GPIO_MASK, 0x01},
+       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x01},
+       {0x01, AC_VERB_SET_GPIO_DATA, 0x01},
 
-       /* c/lfe out pin: NOT headphone enable, out enable, vref disabled */
-       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       { }
+};
 
+/* Enable GPIO mask and set output */
+static struct hda_verb alc880_gpio2_init_verbs[] = {
+       {0x01, AC_VERB_SET_GPIO_MASK, 0x02},
+       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x02},
+       {0x01, AC_VERB_SET_GPIO_DATA, 0x02},
 
-       /* hphone/speaker input selector: front DAC */
-       {0x13, AC_VERB_SET_CONNECT_SEL, 0x0},
+       { }
+};
+
+/* Clevo m520g init */
+static struct hda_verb alc880_pin_clevo_init_verbs[] = {
+       /* headphone output */
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x01},
+       /* line-out */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Line-in */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* CD */
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Mic1 (rear panel) */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Mic2 (front panel) */
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* headphone */
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+        /* change to EAPD mode */
+       {0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+       {0x20, AC_VERB_SET_PROC_COEF,  0x3060},
+
+       { }
+};
+
+static struct hda_verb alc880_pin_tcl_S700_init_verbs[] = {
+       /* Headphone output */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* Front output*/
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x1b, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Line In pin widget for input */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       /* CD pin widget for input */
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       /* Mic1 (rear panel) pin widget for input and vref at 80% */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+
+       /* change to EAPD mode */
+       {0x20, AC_VERB_SET_COEF_INDEX, 0x07},
+       {0x20, AC_VERB_SET_PROC_COEF,  0x3070},
+
+       { }
+};
+
+/*
+ * LG m1 express dual
+ *
+ * Pin assignment:
+ *   Rear Line-In/Out (blue): 0x14
+ *   Build-in Mic-In: 0x15
+ *   Speaker-out: 0x17
+ *   HP-Out (green): 0x1b
+ *   Mic-In/Out (red): 0x19
+ *   SPDIF-Out: 0x1e
+ */
+
+/* To make 5.1 output working (green=Front, blue=Surr, red=CLFE) */
+static hda_nid_t alc880_lg_dac_nids[3] = {
+       0x05, 0x02, 0x03
+};
+
+/* seems analog CD is not working */
+static struct hda_input_mux alc880_lg_capture_source = {
+       .num_items = 3,
+       .items = {
+               { "Mic", 0x1 },
+               { "Line", 0x5 },
+               { "Internal Mic", 0x6 },
+       },
+};
+
+/* 2,4,6 channel modes */
+static struct hda_verb alc880_lg_ch2_init[] = {
+       /* set line-in and mic-in to input */
+       { 0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN },
+       { 0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+       { }
+};
+
+static struct hda_verb alc880_lg_ch4_init[] = {
+       /* set line-in to out and mic-in to input */
+       { 0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP },
+       { 0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80 },
+       { }
+};
+
+static struct hda_verb alc880_lg_ch6_init[] = {
+       /* set line-in and mic-in to output */
+       { 0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP },
+       { 0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP },
+       { }
+};
+
+static struct hda_channel_mode alc880_lg_ch_modes[3] = {
+       { 2, alc880_lg_ch2_init },
+       { 4, alc880_lg_ch4_init },
+       { 6, alc880_lg_ch6_init },
+};
+
+static struct snd_kcontrol_new alc880_lg_mixer[] = {
+       /* FIXME: it's not really "master" but front channels */
+       HDA_CODEC_VOLUME("Master Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Master Playback Switch", 0x0f, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0d, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0d, 2, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0d, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0d, 2, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x06, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x06, HDA_INPUT),
+       HDA_CODEC_VOLUME("Internal Mic Playback Volume", 0x0b, 0x07, HDA_INPUT),
+       HDA_CODEC_MUTE("Internal Mic Playback Switch", 0x0b, 0x07, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+       },
+       { } /* end */
+};
 
-       /* hphone/speaker out pin: muted, (no volume selection)  */
-       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+static struct hda_verb alc880_lg_init_verbs[] = {
+       /* set capture source to mic-in */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       /* mute all amp mixer inputs */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(5)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(6)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(7)},
+       /* line-in to input */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* built-in mic */
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* speaker-out */
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* mic-in to input */
+       {0x11, AC_VERB_SET_CONNECT_SEL, 0x01},
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* HP-out */
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x03},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* jack sense */
+       {0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
+       { }
+};
+
+/* toggle speaker-output according to the hp-jack state */
+static void alc880_lg_automute(struct hda_codec *codec)
+{
+       unsigned int present;
+
+       present = snd_hda_codec_read(codec, 0x1b, 0,
+                                    AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+       snd_hda_codec_amp_update(codec, 0x17, 0, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+       snd_hda_codec_amp_update(codec, 0x17, 1, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+}
+
+static void alc880_lg_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+       /* Looks like the unsol event is incompatible with the standard
+        * definition.  4bit tag is placed at 28 bit!
+        */
+       if ((res >> 28) == 0x01)
+               alc880_lg_automute(codec);
+}
+
+/*
+ * LG LW20
+ *
+ * Pin assignment:
+ *   Speaker-out: 0x14
+ *   Mic-In: 0x18
+ *   Built-in Mic-In: 0x19 (?)
+ *   HP-Out: 0x1b
+ *   SPDIF-Out: 0x1e
+ */
 
-       /* hphone/speaker out pin: NOT headphone enable, out enable, vref disabled */
-       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+/* seems analog CD is not working */
+static struct hda_input_mux alc880_lg_lw_capture_source = {
+       .num_items = 2,
+       .items = {
+               { "Mic", 0x0 },
+               { "Internal Mic", 0x1 },
+       },
+};
 
+static struct snd_kcontrol_new alc880_lg_lw_mixer[] = {
+       HDA_CODEC_VOLUME("Master Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Master Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Internal Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Internal Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+       { } /* end */
+};
 
+static struct hda_verb alc880_lg_lw_init_verbs[] = {
+       /* set capture source to mic-in */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(7)},
+       /* speaker-out */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* HP-out */
+       {0x13, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* mic-in to input */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* built-in mic */
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* jack sense */
+       {0x1b, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | 0x1},
        { }
 };
 
+/* toggle speaker-output according to the hp-jack state */
+static void alc880_lg_lw_automute(struct hda_codec *codec)
+{
+       unsigned int present;
+
+       present = snd_hda_codec_read(codec, 0x1b, 0,
+                                    AC_VERB_GET_PIN_SENSE, 0) & 0x80000000;
+       snd_hda_codec_amp_update(codec, 0x14, 0, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+       snd_hda_codec_amp_update(codec, 0x14, 1, HDA_OUTPUT, 0,
+                                0x80, present ? 0x80 : 0);
+}
+
+static void alc880_lg_lw_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+       /* Looks like the unsol event is incompatible with the standard
+        * definition.  4bit tag is placed at 28 bit!
+        */
+       if ((res >> 28) == 0x01)
+               alc880_lg_lw_automute(codec);
+}
+
+/*
+ * Common callbacks
+ */
+
 static int alc_init(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
-       snd_hda_sequence_write(codec, spec->init_verbs);
+       unsigned int i;
+
+       for (i = 0; i < spec->num_init_verbs; i++)
+               snd_hda_sequence_write(codec, spec->init_verbs[i]);
+
+       if (spec->init_hook)
+               spec->init_hook(codec);
+
        return 0;
 }
 
+static void alc_unsol_event(struct hda_codec *codec, unsigned int res)
+{
+       struct alc_spec *spec = codec->spec;
+
+       if (spec->unsol_event)
+               spec->unsol_event(codec, res);
+}
+
 #ifdef CONFIG_PM
 /*
  * resume
@@ -736,9 +1558,8 @@ static int alc_resume(struct hda_codec *codec)
        int i;
 
        alc_init(codec);
-       for (i = 0; i < spec->num_mixers; i++) {
+       for (i = 0; i < spec->num_mixers; i++)
                snd_hda_resume_ctls(codec, spec->mixers[i]);
-       }
        if (spec->multiout.dig_out_nid)
                snd_hda_resume_spdif_out(codec);
        if (spec->dig_in_nid)
@@ -753,7 +1574,7 @@ static int alc_resume(struct hda_codec *codec)
  */
 static int alc880_playback_pcm_open(struct hda_pcm_stream *hinfo,
                                    struct hda_codec *codec,
-                                   snd_pcm_substream_t *substream)
+                                   struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
        return snd_hda_multi_out_analog_open(codec, &spec->multiout, substream);
@@ -763,7 +1584,7 @@ static int alc880_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
                                       struct hda_codec *codec,
                                       unsigned int stream_tag,
                                       unsigned int format,
-                                      snd_pcm_substream_t *substream)
+                                      struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
        return snd_hda_multi_out_analog_prepare(codec, &spec->multiout, stream_tag,
@@ -772,7 +1593,7 @@ static int alc880_playback_pcm_prepare(struct hda_pcm_stream *hinfo,
 
 static int alc880_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
                                       struct hda_codec *codec,
-                                      snd_pcm_substream_t *substream)
+                                      struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
        return snd_hda_multi_out_analog_cleanup(codec, &spec->multiout);
@@ -783,7 +1604,7 @@ static int alc880_playback_pcm_cleanup(struct hda_pcm_stream *hinfo,
  */
 static int alc880_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
                                        struct hda_codec *codec,
-                                       snd_pcm_substream_t *substream)
+                                       struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
        return snd_hda_multi_out_dig_open(codec, &spec->multiout);
@@ -791,7 +1612,7 @@ static int alc880_dig_playback_pcm_open(struct hda_pcm_stream *hinfo,
 
 static int alc880_dig_playback_pcm_close(struct hda_pcm_stream *hinfo,
                                         struct hda_codec *codec,
-                                        snd_pcm_substream_t *substream)
+                                        struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
        return snd_hda_multi_out_dig_close(codec, &spec->multiout);
@@ -804,7 +1625,7 @@ static int alc880_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
                                      struct hda_codec *codec,
                                      unsigned int stream_tag,
                                      unsigned int format,
-                                     snd_pcm_substream_t *substream)
+                                     struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
 
@@ -815,7 +1636,7 @@ static int alc880_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
 
 static int alc880_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
                                      struct hda_codec *codec,
-                                     snd_pcm_substream_t *substream)
+                                     struct snd_pcm_substream *substream)
 {
        struct alc_spec *spec = codec->spec;
 
@@ -830,7 +1651,7 @@ static struct hda_pcm_stream alc880_pcm_analog_playback = {
        .substreams = 1,
        .channels_min = 2,
        .channels_max = 8,
-       .nid = 0x02, /* NID to query formats and rates */
+       /* NID is set in alc_build_pcms */
        .ops = {
                .open = alc880_playback_pcm_open,
                .prepare = alc880_playback_pcm_prepare,
@@ -842,7 +1663,7 @@ static struct hda_pcm_stream alc880_pcm_analog_capture = {
        .substreams = 2,
        .channels_min = 2,
        .channels_max = 2,
-       .nid = 0x07, /* NID to query formats and rates */
+       /* NID is set in alc_build_pcms */
        .ops = {
                .prepare = alc880_capture_pcm_prepare,
                .cleanup = alc880_capture_pcm_cleanup
@@ -867,6 +1688,13 @@ static struct hda_pcm_stream alc880_pcm_digital_capture = {
        /* NID is set in alc_build_pcms */
 };
 
+/* Used by alc_build_pcms to flag that a PCM has no playback stream */
+static struct hda_pcm_stream alc_pcm_null_playback = {
+       .substreams = 0,
+       .channels_min = 0,
+       .channels_max = 0,
+};
+
 static int alc_build_pcms(struct hda_codec *codec)
 {
        struct alc_spec *spec = codec->spec;
@@ -877,13 +1705,40 @@ static int alc_build_pcms(struct hda_codec *codec)
        codec->pcm_info = info;
 
        info->name = spec->stream_name_analog;
-       info->stream[SNDRV_PCM_STREAM_PLAYBACK] = *(spec->stream_analog_playback);
-       info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
+       if (spec->stream_analog_playback) {
+               snd_assert(spec->multiout.dac_nids, return -EINVAL);
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK] = *(spec->stream_analog_playback);
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->multiout.dac_nids[0];
+       }
+       if (spec->stream_analog_capture) {
+               snd_assert(spec->adc_nids, return -EINVAL);
+               info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
+               info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[0];
+       }
 
-       info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = 0;
-       for (i = 0; i < spec->num_channel_mode; i++) {
-               if (spec->channel_mode[i].channels > info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max) {
-                   info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = spec->channel_mode[i].channels;
+       if (spec->channel_mode) {
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = 0;
+               for (i = 0; i < spec->num_channel_mode; i++) {
+                       if (spec->channel_mode[i].channels > info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max) {
+                               info->stream[SNDRV_PCM_STREAM_PLAYBACK].channels_max = spec->channel_mode[i].channels;
+                       }
+               }
+       }
+
+       /* If the use of more than one ADC is requested for the current
+        * model, configure a second analog capture-only PCM.
+        */
+       if (spec->num_adc_nids > 1) {
+               codec->num_pcms++;
+               info++;
+               info->name = spec->stream_name_analog;
+               /* No playback stream for second PCM */
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK] = alc_pcm_null_playback;
+               info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = 0;
+               if (spec->stream_analog_capture) {
+                       snd_assert(spec->adc_nids, return -EINVAL);
+                       info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_analog_capture);
+                       info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->adc_nids[1];
                }
        }
 
@@ -891,11 +1746,13 @@ static int alc_build_pcms(struct hda_codec *codec)
                codec->num_pcms++;
                info++;
                info->name = spec->stream_name_digital;
-               if (spec->multiout.dig_out_nid) {
+               if (spec->multiout.dig_out_nid &&
+                   spec->stream_digital_playback) {
                        info->stream[SNDRV_PCM_STREAM_PLAYBACK] = *(spec->stream_digital_playback);
                        info->stream[SNDRV_PCM_STREAM_PLAYBACK].nid = spec->multiout.dig_out_nid;
                }
-               if (spec->dig_in_nid) {
+               if (spec->dig_in_nid &&
+                   spec->stream_digital_capture) {
                        info->stream[SNDRV_PCM_STREAM_CAPTURE] = *(spec->stream_digital_capture);
                        info->stream[SNDRV_PCM_STREAM_CAPTURE].nid = spec->dig_in_nid;
                }
@@ -906,7 +1763,18 @@ static int alc_build_pcms(struct hda_codec *codec)
 
 static void alc_free(struct hda_codec *codec)
 {
-       kfree(codec->spec);
+       struct alc_spec *spec = codec->spec;
+       unsigned int i;
+
+       if (! spec)
+               return;
+
+       if (spec->kctl_alloc) {
+               for (i = 0; i < spec->num_kctl_used; i++)
+                       kfree(spec->kctl_alloc[i].name);
+               kfree(spec->kctl_alloc);
+       }
+       kfree(spec);
 }
 
 /*
@@ -916,211 +1784,1139 @@ static struct hda_codec_ops alc_patch_ops = {
        .build_pcms = alc_build_pcms,
        .init = alc_init,
        .free = alc_free,
+       .unsol_event = alc_unsol_event,
 #ifdef CONFIG_PM
        .resume = alc_resume,
 #endif
 };
 
+
+/*
+ * Test configuration for debugging
+ *
+ * Almost all inputs/outputs are enabled.  I/O pins can be configured via
+ * enum controls.
+ */
+#ifdef CONFIG_SND_DEBUG
+static hda_nid_t alc880_test_dac_nids[4] = {
+       0x02, 0x03, 0x04, 0x05
+};
+
+static struct hda_input_mux alc880_test_capture_source = {
+       .num_items = 7,
+       .items = {
+               { "In-1", 0x0 },
+               { "In-2", 0x1 },
+               { "In-3", 0x2 },
+               { "In-4", 0x3 },
+               { "CD", 0x4 },
+               { "Front", 0x5 },
+               { "Surround", 0x6 },
+       },
+};
+
+static struct hda_channel_mode alc880_test_modes[4] = {
+       { 2, NULL },
+       { 4, NULL },
+       { 6, NULL },
+       { 8, NULL },
+};
+
+static int alc_test_pin_ctl_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       static char *texts[] = {
+               "N/A", "Line Out", "HP Out",
+               "In Hi-Z", "In 50%", "In Grd", "In 80%", "In 100%"
+       };
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 8;
+       if (uinfo->value.enumerated.item >= 8)
+               uinfo->value.enumerated.item = 7;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+       return 0;
+}
+
+static int alc_test_pin_ctl_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = (hda_nid_t)kcontrol->private_value;
+       unsigned int pin_ctl, item = 0;
+
+       pin_ctl = snd_hda_codec_read(codec, nid, 0,
+                                    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+       if (pin_ctl & AC_PINCTL_OUT_EN) {
+               if (pin_ctl & AC_PINCTL_HP_EN)
+                       item = 2;
+               else
+                       item = 1;
+       } else if (pin_ctl & AC_PINCTL_IN_EN) {
+               switch (pin_ctl & AC_PINCTL_VREFEN) {
+               case AC_PINCTL_VREF_HIZ: item = 3; break;
+               case AC_PINCTL_VREF_50:  item = 4; break;
+               case AC_PINCTL_VREF_GRD: item = 5; break;
+               case AC_PINCTL_VREF_80:  item = 6; break;
+               case AC_PINCTL_VREF_100: item = 7; break;
+               }
+       }
+       ucontrol->value.enumerated.item[0] = item;
+       return 0;
+}
+
+static int alc_test_pin_ctl_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = (hda_nid_t)kcontrol->private_value;
+       static unsigned int ctls[] = {
+               0, AC_PINCTL_OUT_EN, AC_PINCTL_OUT_EN | AC_PINCTL_HP_EN,
+               AC_PINCTL_IN_EN | AC_PINCTL_VREF_HIZ,
+               AC_PINCTL_IN_EN | AC_PINCTL_VREF_50,
+               AC_PINCTL_IN_EN | AC_PINCTL_VREF_GRD,
+               AC_PINCTL_IN_EN | AC_PINCTL_VREF_80,
+               AC_PINCTL_IN_EN | AC_PINCTL_VREF_100,
+       };
+       unsigned int old_ctl, new_ctl;
+
+       old_ctl = snd_hda_codec_read(codec, nid, 0,
+                                    AC_VERB_GET_PIN_WIDGET_CONTROL, 0);
+       new_ctl = ctls[ucontrol->value.enumerated.item[0]];
+       if (old_ctl != new_ctl) {
+               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, new_ctl);
+               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+                                   ucontrol->value.enumerated.item[0] >= 3 ? 0xb080 : 0xb000);
+               return 1;
+       }
+       return 0;
+}
+
+static int alc_test_pin_src_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *uinfo)
+{
+       static char *texts[] = {
+               "Front", "Surround", "CLFE", "Side"
+       };
+       uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+       uinfo->count = 1;
+       uinfo->value.enumerated.items = 4;
+       if (uinfo->value.enumerated.item >= 4)
+               uinfo->value.enumerated.item = 3;
+       strcpy(uinfo->value.enumerated.name, texts[uinfo->value.enumerated.item]);
+       return 0;
+}
+
+static int alc_test_pin_src_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = (hda_nid_t)kcontrol->private_value;
+       unsigned int sel;
+
+       sel = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONNECT_SEL, 0);
+       ucontrol->value.enumerated.item[0] = sel & 3;
+       return 0;
+}
+
+static int alc_test_pin_src_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       hda_nid_t nid = (hda_nid_t)kcontrol->private_value;
+       unsigned int sel;
+
+       sel = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_CONNECT_SEL, 0) & 3;
+       if (ucontrol->value.enumerated.item[0] != sel) {
+               sel = ucontrol->value.enumerated.item[0] & 3;
+               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, sel);
+               return 1;
+       }
+       return 0;
+}
+
+#define PIN_CTL_TEST(xname,nid) {                      \
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,    \
+                       .name = xname,                 \
+                       .info = alc_test_pin_ctl_info, \
+                       .get = alc_test_pin_ctl_get,   \
+                       .put = alc_test_pin_ctl_put,   \
+                       .private_value = nid           \
+                       }
+
+#define PIN_SRC_TEST(xname,nid) {                      \
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,    \
+                       .name = xname,                 \
+                       .info = alc_test_pin_src_info, \
+                       .get = alc_test_pin_src_get,   \
+                       .put = alc_test_pin_src_put,   \
+                       .private_value = nid           \
+                       }
+
+static struct snd_kcontrol_new alc880_test_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("CLFE Playback Volume", 0x0e, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Side Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 2, HDA_INPUT),
+       HDA_BIND_MUTE("CLFE Playback Switch", 0x0e, 2, HDA_INPUT),
+       HDA_BIND_MUTE("Side Playback Switch", 0x0f, 2, HDA_INPUT),
+       PIN_CTL_TEST("Front Pin Mode", 0x14),
+       PIN_CTL_TEST("Surround Pin Mode", 0x15),
+       PIN_CTL_TEST("CLFE Pin Mode", 0x16),
+       PIN_CTL_TEST("Side Pin Mode", 0x17),
+       PIN_CTL_TEST("In-1 Pin Mode", 0x18),
+       PIN_CTL_TEST("In-2 Pin Mode", 0x19),
+       PIN_CTL_TEST("In-3 Pin Mode", 0x1a),
+       PIN_CTL_TEST("In-4 Pin Mode", 0x1b),
+       PIN_SRC_TEST("In-1 Pin Source", 0x18),
+       PIN_SRC_TEST("In-2 Pin Source", 0x19),
+       PIN_SRC_TEST("In-3 Pin Source", 0x1a),
+       PIN_SRC_TEST("In-4 Pin Source", 0x1b),
+       HDA_CODEC_VOLUME("In-1 Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("In-1 Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("In-2 Playback Volume", 0x0b, 0x1, HDA_INPUT),
+       HDA_CODEC_MUTE("In-2 Playback Switch", 0x0b, 0x1, HDA_INPUT),
+       HDA_CODEC_VOLUME("In-3 Playback Volume", 0x0b, 0x2, HDA_INPUT),
+       HDA_CODEC_MUTE("In-3 Playback Switch", 0x0b, 0x2, HDA_INPUT),
+       HDA_CODEC_VOLUME("In-4 Playback Volume", 0x0b, 0x3, HDA_INPUT),
+       HDA_CODEC_MUTE("In-4 Playback Switch", 0x0b, 0x3, HDA_INPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x4, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x4, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+       },
+       { } /* end */
+};
+
+static struct hda_verb alc880_test_init_verbs[] = {
+       /* Unmute inputs of 0x0c - 0x0f */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       /* Vol output for 0x0c-0x0f */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* Set output pins 0x14-0x17 */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Unmute output pins 0x14-0x17 */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Set input pins 0x18-0x1c */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       /* Mute input pins 0x18-0x1b */
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* ADC set up */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
+       /* Analog input/passthru */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+       { }
+};
+#endif
+
 /*
  */
 
 static struct hda_board_config alc880_cfg_tbl[] = {
        /* Back 3 jack, front 2 jack */
        { .modelname = "3stack", .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe200, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe201, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe202, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe203, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe204, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe205, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe206, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe207, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe208, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe209, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20a, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20b, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20c, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20d, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20e, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe20f, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe210, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe211, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe214, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe302, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe303, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe304, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe306, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe307, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xe404, .config = ALC880_3ST },
-       { .pci_vendor = 0x8086, .pci_device = 0xa101, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x3031, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4036, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4037, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4038, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4040, .config = ALC880_3ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4041, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe200, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe201, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe202, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe203, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe204, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe205, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe206, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe207, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe208, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe209, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20a, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20b, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20c, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20d, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20e, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe20f, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe210, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe211, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe214, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe302, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe303, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe304, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe306, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe307, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe404, .config = ALC880_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xa101, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x3031, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4036, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4037, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4038, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4040, .config = ALC880_3ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4041, .config = ALC880_3ST },
+       /* TCL S700 */
+       { .pci_subvendor = 0x19db, .pci_subdevice = 0x4188, .config = ALC880_TCL_S700 },
 
        /* Back 3 jack, front 2 jack (Internal add Aux-In) */
-       { .pci_vendor = 0x1025, .pci_device = 0xe310, .config = ALC880_3ST },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0xe310, .config = ALC880_3ST },
+       { .pci_subvendor = 0x104d, .pci_subdevice = 0x81d6, .config = ALC880_3ST }, 
+       { .pci_subvendor = 0x104d, .pci_subdevice = 0x81a0, .config = ALC880_3ST },
 
        /* Back 3 jack plus 1 SPDIF out jack, front 2 jack */
        { .modelname = "3stack-digout", .config = ALC880_3ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xe308, .config = ALC880_3ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe308, .config = ALC880_3ST_DIG },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0x0070, .config = ALC880_3ST_DIG },
+       /* Clevo m520G NB */
+       { .pci_subvendor = 0x1558, .pci_subdevice = 0x0520, .config = ALC880_CLEVO },
 
        /* Back 3 jack plus 1 SPDIF out jack, front 2 jack (Internal add Aux-In)*/
-       { .pci_vendor = 0x8086, .pci_device = 0xe305, .config = ALC880_3ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xd402, .config = ALC880_3ST_DIG },
-       { .pci_vendor = 0x1025, .pci_device = 0xe309, .config = ALC880_3ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe305, .config = ALC880_3ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xd402, .config = ALC880_3ST_DIG },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0xe309, .config = ALC880_3ST_DIG },
 
        /* Back 5 jack, front 2 jack */
        { .modelname = "5stack", .config = ALC880_5ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x3033, .config = ALC880_5ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x4039, .config = ALC880_5ST },
-       { .pci_vendor = 0x107b, .pci_device = 0x3032, .config = ALC880_5ST },
-       { .pci_vendor = 0x103c, .pci_device = 0x2a09, .config = ALC880_5ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x3033, .config = ALC880_5ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x4039, .config = ALC880_5ST },
+       { .pci_subvendor = 0x107b, .pci_subdevice = 0x3032, .config = ALC880_5ST },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x2a09, .config = ALC880_5ST },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x814e, .config = ALC880_5ST },
 
        /* Back 5 jack plus 1 SPDIF out jack, front 2 jack */
        { .modelname = "5stack-digout", .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xe224, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xe400, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xe401, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xe402, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xd400, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xd401, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x8086, .pci_device = 0xa100, .config = ALC880_5ST_DIG },
-       { .pci_vendor = 0x1565, .pci_device = 0x8202, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe224, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe400, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe401, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xe402, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xd400, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xd401, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xa100, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x1565, .pci_subdevice = 0x8202, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0x1019, .pci_subdevice = 0xa880, .config = ALC880_5ST_DIG },
+       { .pci_subvendor = 0xa0a0, .pci_subdevice = 0x0560,
+         .config = ALC880_5ST_DIG }, /* Aopen i915GMm-HFS */
+       /* { .pci_subvendor = 0x1019, .pci_subdevice = 0xa884, .config = ALC880_5ST_DIG }, */ /* conflict with 6stack */
+       { .pci_subvendor = 0x1695, .pci_subdevice = 0x400d, .config = ALC880_5ST_DIG },
+       /* note subvendor = 0 below */
+       /* { .pci_subvendor = 0x0000, .pci_subdevice = 0x8086, .config = ALC880_5ST_DIG }, */
 
        { .modelname = "w810", .config = ALC880_W810 },
-       { .pci_vendor = 0x161f, .pci_device = 0x203d, .config = ALC880_W810 },
+       { .pci_subvendor = 0x161f, .pci_subdevice = 0x203d, .config = ALC880_W810 },
+
+       { .modelname = "z71v", .config = ALC880_Z71V },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1964, .config = ALC880_Z71V },
+
+       { .modelname = "6stack", .config = ALC880_6ST },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x8196, .config = ALC880_6ST }, /* ASUS P5GD1-HVM */
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x81b4, .config = ALC880_6ST },
+       { .pci_subvendor = 0x1019, .pci_subdevice = 0xa884, .config = ALC880_6ST }, /* Acer APFV */
+       { .pci_subvendor = 0x1458, .pci_subdevice = 0xa102, .config = ALC880_6ST }, /* Gigabyte K8N51 */
+
+       { .modelname = "6stack-digout", .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x2668, .pci_subdevice = 0x8086, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0x2668, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1462, .pci_subdevice = 0x1150, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0xe803, .pci_subdevice = 0x1019, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1039, .pci_subdevice = 0x1234, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0x0077, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0x0078, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0x0087, .config = ALC880_6ST_DIG },
+       { .pci_subvendor = 0x1297, .pci_subdevice = 0xc790, .config = ALC880_6ST_DIG }, /* Shuttle ST20G5 */
+       { .pci_subvendor = 0x1509, .pci_subdevice = 0x925d, .config = ALC880_6ST_DIG }, /* FIC P4M-915GD1 */
+       { .pci_subvendor = 0x1695, .pci_subdevice = 0x4012, .config = ALC880_5ST_DIG }, /* Epox EP-5LDA+ GLi */
+
+       { .modelname = "asus", .config = ALC880_ASUS },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1964, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1973, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x19b3, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1113, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1173, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1993, .config = ALC880_ASUS },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x10c3, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1133, .config = ALC880_ASUS },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1123, .config = ALC880_ASUS_DIG },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x1143, .config = ALC880_ASUS },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x10b3, .config = ALC880_ASUS_W1V },
+       { .pci_subvendor = 0x1043, .pci_subdevice = 0x8181, .config = ALC880_ASUS_DIG }, /* ASUS P4GPL-X */
+       { .pci_subvendor = 0x1558, .pci_subdevice = 0x5401, .config = ALC880_ASUS_DIG2 },
+
+       { .modelname = "uniwill", .config = ALC880_UNIWILL_DIG },
+       { .pci_subvendor = 0x1584, .pci_subdevice = 0x9050, .config = ALC880_UNIWILL_DIG },     
+
+       { .modelname = "F1734", .config = ALC880_F1734 },
+       { .pci_subvendor = 0x1734, .pci_subdevice = 0x107c, .config = ALC880_F1734 },
+       { .pci_subvendor = 0x1584, .pci_subdevice = 0x9054, .config = ALC880_F1734 },
+
+       { .modelname = "lg", .config = ALC880_LG },
+       { .pci_subvendor = 0x1854, .pci_subdevice = 0x003b, .config = ALC880_LG },
+
+       { .modelname = "lg-lw", .config = ALC880_LG_LW },
+       { .pci_subvendor = 0x1854, .pci_subdevice = 0x0018, .config = ALC880_LG_LW },
+
+#ifdef CONFIG_SND_DEBUG
+       { .modelname = "test", .config = ALC880_TEST },
+#endif
+       { .modelname = "auto", .config = ALC880_AUTO },
 
        {}
 };
 
-static int patch_alc880(struct hda_codec *codec)
-{
-       struct alc_spec *spec;
-       int board_config;
-
-       spec = kcalloc(1, sizeof(*spec), GFP_KERNEL);
-       if (spec == NULL)
-               return -ENOMEM;
+/*
+ * ALC880 codec presets
+ */
+static struct alc_config_preset alc880_presets[] = {
+       [ALC880_3ST] = {
+               .mixers = { alc880_three_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_3stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
+               .channel_mode = alc880_threestack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_3ST_DIG] = {
+               .mixers = { alc880_three_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_3stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
+               .channel_mode = alc880_threestack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_TCL_S700] = {
+               .mixers = { alc880_tcl_s700_mixer },
+               .init_verbs = { alc880_volume_init_verbs,
+                               alc880_pin_tcl_S700_init_verbs,
+                               alc880_gpio2_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc880_2_jack_modes),
+               .channel_mode = alc880_2_jack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_5ST] = {
+               .mixers = { alc880_three_stack_mixer, alc880_five_stack_mixer},
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_5stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .num_channel_mode = ARRAY_SIZE(alc880_fivestack_modes),
+               .channel_mode = alc880_fivestack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_5ST_DIG] = {
+               .mixers = { alc880_three_stack_mixer, alc880_five_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_5stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_fivestack_modes),
+               .channel_mode = alc880_fivestack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_6ST] = {
+               .mixers = { alc880_six_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_6stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_6st_dac_nids),
+               .dac_nids = alc880_6st_dac_nids,
+               .num_channel_mode = ARRAY_SIZE(alc880_sixstack_modes),
+               .channel_mode = alc880_sixstack_modes,
+               .input_mux = &alc880_6stack_capture_source,
+       },
+       [ALC880_6ST_DIG] = {
+               .mixers = { alc880_six_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_6stack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_6st_dac_nids),
+               .dac_nids = alc880_6st_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_sixstack_modes),
+               .channel_mode = alc880_sixstack_modes,
+               .input_mux = &alc880_6stack_capture_source,
+       },
+       [ALC880_W810] = {
+               .mixers = { alc880_w810_base_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_w810_init_verbs,
+                               alc880_gpio2_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_w810_dac_nids),
+               .dac_nids = alc880_w810_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_w810_modes),
+               .channel_mode = alc880_w810_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_Z71V] = {
+               .mixers = { alc880_z71v_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_z71v_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_z71v_dac_nids),
+               .dac_nids = alc880_z71v_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc880_2_jack_modes),
+               .channel_mode = alc880_2_jack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_F1734] = {
+               .mixers = { alc880_f1734_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_f1734_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_f1734_dac_nids),
+               .dac_nids = alc880_f1734_dac_nids,
+               .hp_nid = 0x02,
+               .num_channel_mode = ARRAY_SIZE(alc880_2_jack_modes),
+               .channel_mode = alc880_2_jack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_ASUS] = {
+               .mixers = { alc880_asus_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs,
+                               alc880_gpio1_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids),
+               .dac_nids = alc880_asus_dac_nids,
+               .num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
+               .channel_mode = alc880_asus_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_ASUS_DIG] = {
+               .mixers = { alc880_asus_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs,
+                               alc880_gpio1_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids),
+               .dac_nids = alc880_asus_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
+               .channel_mode = alc880_asus_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_ASUS_DIG2] = {
+               .mixers = { alc880_asus_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs,
+                               alc880_gpio2_init_verbs }, /* use GPIO2 */
+               .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids),
+               .dac_nids = alc880_asus_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
+               .channel_mode = alc880_asus_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_ASUS_W1V] = {
+               .mixers = { alc880_asus_mixer, alc880_asus_w1v_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs,
+                               alc880_gpio1_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids),
+               .dac_nids = alc880_asus_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
+               .channel_mode = alc880_asus_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_UNIWILL_DIG] = {
+               .mixers = { alc880_asus_mixer, alc880_pcbeep_mixer },
+               .init_verbs = { alc880_volume_init_verbs, alc880_pin_asus_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_asus_dac_nids),
+               .dac_nids = alc880_asus_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_asus_modes),
+               .channel_mode = alc880_asus_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_CLEVO] = {
+               .mixers = { alc880_three_stack_mixer },
+               .init_verbs = { alc880_volume_init_verbs,
+                               alc880_pin_clevo_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_dac_nids),
+               .dac_nids = alc880_dac_nids,
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc880_threestack_modes),
+               .channel_mode = alc880_threestack_modes,
+               .input_mux = &alc880_capture_source,
+       },
+       [ALC880_LG] = {
+               .mixers = { alc880_lg_mixer },
+               .init_verbs = { alc880_volume_init_verbs,
+                               alc880_lg_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_lg_dac_nids),
+               .dac_nids = alc880_lg_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_lg_ch_modes),
+               .channel_mode = alc880_lg_ch_modes,
+               .input_mux = &alc880_lg_capture_source,
+               .unsol_event = alc880_lg_unsol_event,
+               .init_hook = alc880_lg_automute,
+       },
+       [ALC880_LG_LW] = {
+               .mixers = { alc880_lg_lw_mixer },
+               .init_verbs = { alc880_volume_init_verbs,
+                               alc880_lg_lw_init_verbs },
+               .num_dacs = 1, 
+               .dac_nids = alc880_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_2_jack_modes),
+               .channel_mode = alc880_2_jack_modes,
+               .input_mux = &alc880_lg_lw_capture_source,
+               .unsol_event = alc880_lg_lw_unsol_event,
+               .init_hook = alc880_lg_lw_automute,
+       },
+#ifdef CONFIG_SND_DEBUG
+       [ALC880_TEST] = {
+               .mixers = { alc880_test_mixer },
+               .init_verbs = { alc880_test_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc880_test_dac_nids),
+               .dac_nids = alc880_test_dac_nids,
+               .dig_out_nid = ALC880_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc880_test_modes),
+               .channel_mode = alc880_test_modes,
+               .input_mux = &alc880_test_capture_source,
+       },
+#endif
+};
 
-       codec->spec = spec;
+/*
+ * Automatic parse of I/O pins from the BIOS configuration
+ */
 
-       board_config = snd_hda_check_board_config(codec, alc880_cfg_tbl);
-       if (board_config < 0) {
-               snd_printd(KERN_INFO "hda_codec: Unknown model for ALC880\n");
-               board_config = ALC880_MINIMAL;
-       }
+#define NUM_CONTROL_ALLOC      32
+#define NUM_VERB_ALLOC         32
 
-       switch (board_config) {
-       case ALC880_W810:
-               spec->mixers[spec->num_mixers] = alc880_w810_base_mixer;
-               spec->num_mixers++;
-               break;
-       case ALC880_5ST:
-       case ALC880_5ST_DIG:
-               spec->mixers[spec->num_mixers] = alc880_five_stack_mixer;
-               spec->num_mixers++;
-               break;
-       default:
-               spec->mixers[spec->num_mixers] = alc880_base_mixer;
-               spec->num_mixers++;
-               break;
-       }
+enum {
+       ALC_CTL_WIDGET_VOL,
+       ALC_CTL_WIDGET_MUTE,
+       ALC_CTL_BIND_MUTE,
+};
+static struct snd_kcontrol_new alc880_control_templates[] = {
+       HDA_CODEC_VOLUME(NULL, 0, 0, 0),
+       HDA_CODEC_MUTE(NULL, 0, 0, 0),
+       HDA_BIND_MUTE(NULL, 0, 0, 0),
+};
 
-       switch (board_config) {
-       case ALC880_3ST_DIG:
-       case ALC880_5ST_DIG:
-       case ALC880_W810:
-               spec->multiout.dig_out_nid = ALC880_DIGOUT_NID;
-               break;
-       default:
-               break;
-       }
+/* add dynamic controls */
+static int add_control(struct alc_spec *spec, int type, const char *name, unsigned long val)
+{
+       struct snd_kcontrol_new *knew;
 
-       switch (board_config) {
-       case ALC880_3ST:
-       case ALC880_3ST_DIG:
-       case ALC880_5ST:
-       case ALC880_5ST_DIG:
-       case ALC880_W810:
-               spec->front_panel = 1;
-               break;
-       default:
-               break;
-       }
+       if (spec->num_kctl_used >= spec->num_kctl_alloc) {
+               int num = spec->num_kctl_alloc + NUM_CONTROL_ALLOC;
 
-       switch (board_config) {
-       case ALC880_5ST:
-       case ALC880_5ST_DIG:
-               spec->init_verbs = alc880_init_verbs_five_stack;
-               spec->channel_mode = alc880_fivestack_modes;
-               spec->num_channel_mode = ARRAY_SIZE(alc880_fivestack_modes);
-               break;
-       case ALC880_W810:
-               spec->init_verbs = alc880_w810_init_verbs;
-               spec->channel_mode = alc880_w810_modes;
-               spec->num_channel_mode = ARRAY_SIZE(alc880_w810_modes);
-               break;
-       default:
-               spec->init_verbs = alc880_init_verbs_three_stack;
-               spec->channel_mode = alc880_threestack_modes;
-               spec->num_channel_mode = ARRAY_SIZE(alc880_threestack_modes);
-               break;
+               knew = kcalloc(num + 1, sizeof(*knew), GFP_KERNEL); /* array + terminator */
+               if (! knew)
+                       return -ENOMEM;
+               if (spec->kctl_alloc) {
+                       memcpy(knew, spec->kctl_alloc, sizeof(*knew) * spec->num_kctl_alloc);
+                       kfree(spec->kctl_alloc);
+               }
+               spec->kctl_alloc = knew;
+               spec->num_kctl_alloc = num;
        }
 
-       spec->stream_name_analog = "ALC880 Analog";
-       spec->stream_analog_playback = &alc880_pcm_analog_playback;
-       spec->stream_analog_capture = &alc880_pcm_analog_capture;
-
-       spec->stream_name_digital = "ALC880 Digital";
-       spec->stream_digital_playback = &alc880_pcm_digital_playback;
-       spec->stream_digital_capture = &alc880_pcm_digital_capture;
-
-       spec->multiout.max_channels = spec->channel_mode[0].channels;
+       knew = &spec->kctl_alloc[spec->num_kctl_used];
+       *knew = alc880_control_templates[type];
+       knew->name = kstrdup(name, GFP_KERNEL);
+       if (! knew->name)
+               return -ENOMEM;
+       knew->private_value = val;
+       spec->num_kctl_used++;
+       return 0;
+}
 
-       switch (board_config) {
-       case ALC880_W810:
-               spec->multiout.num_dacs = ARRAY_SIZE(alc880_w810_dac_nids);
-               spec->multiout.dac_nids = alc880_w810_dac_nids;
-               // No dedicated headphone socket - it's shared with built-in speakers.
-               break;
-       default:
-               spec->multiout.num_dacs = ARRAY_SIZE(alc880_dac_nids);
-               spec->multiout.dac_nids = alc880_dac_nids;
-               spec->multiout.hp_nid = 0x03; /* rear-surround NID */
-               break;
+#define alc880_is_fixed_pin(nid)       ((nid) >= 0x14 && (nid) <= 0x17)
+#define alc880_fixed_pin_idx(nid)      ((nid) - 0x14)
+#define alc880_is_multi_pin(nid)       ((nid) >= 0x18)
+#define alc880_multi_pin_idx(nid)      ((nid) - 0x18)
+#define alc880_is_input_pin(nid)       ((nid) >= 0x18)
+#define alc880_input_pin_idx(nid)      ((nid) - 0x18)
+#define alc880_idx_to_dac(nid)         ((nid) + 0x02)
+#define alc880_dac_to_idx(nid)         ((nid) - 0x02)
+#define alc880_idx_to_mixer(nid)       ((nid) + 0x0c)
+#define alc880_idx_to_selector(nid)    ((nid) + 0x10)
+#define ALC880_PIN_CD_NID              0x1c
+
+/* fill in the dac_nids table from the parsed pin configuration */
+static int alc880_auto_fill_dac_nids(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
+{
+       hda_nid_t nid;
+       int assigned[4];
+       int i, j;
+
+       memset(assigned, 0, sizeof(assigned));
+       spec->multiout.dac_nids = spec->private_dac_nids;
+
+       /* check the pins hardwired to audio widget */
+       for (i = 0; i < cfg->line_outs; i++) {
+               nid = cfg->line_out_pins[i];
+               if (alc880_is_fixed_pin(nid)) {
+                       int idx = alc880_fixed_pin_idx(nid);
+                       spec->multiout.dac_nids[i] = alc880_idx_to_dac(idx);
+                       assigned[idx] = 1;
+               }
        }
-
-       spec->input_mux = &alc880_capture_source;
-       spec->num_adc_nids = ARRAY_SIZE(alc880_adc_nids);
-       spec->adc_nids = alc880_adc_nids;
-
-       codec->patch_ops = alc_patch_ops;
-
+       /* left pins can be connect to any audio widget */
+       for (i = 0; i < cfg->line_outs; i++) {
+               nid = cfg->line_out_pins[i];
+               if (alc880_is_fixed_pin(nid))
+                       continue;
+               /* search for an empty channel */
+               for (j = 0; j < cfg->line_outs; j++) {
+                       if (! assigned[j]) {
+                               spec->multiout.dac_nids[i] = alc880_idx_to_dac(j);
+                               assigned[j] = 1;
+                               break;
+                       }
+               }
+       }
+       spec->multiout.num_dacs = cfg->line_outs;
        return 0;
 }
 
-/*
+/* add playback controls from the parsed DAC table */
+static int alc880_auto_create_multi_out_ctls(struct alc_spec *spec,
+                                            const struct auto_pin_cfg *cfg)
+{
+       char name[32];
+       static const char *chname[4] = { "Front", "Surround", NULL /*CLFE*/, "Side" };
+       hda_nid_t nid;
+       int i, err;
+
+       for (i = 0; i < cfg->line_outs; i++) {
+               if (! spec->multiout.dac_nids[i])
+                       continue;
+               nid = alc880_idx_to_mixer(alc880_dac_to_idx(spec->multiout.dac_nids[i]));
+               if (i == 2) {
+                       /* Center/LFE */
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, "Center Playback Volume",
+                                              HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, "LFE Playback Volume",
+                                              HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, "Center Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 1, 2, HDA_INPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, "LFE Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 2, 2, HDA_INPUT))) < 0)
+                               return err;
+               } else {
+                       sprintf(name, "%s Playback Volume", chname[i]);
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, name,
+                                              HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       sprintf(name, "%s Playback Switch", chname[i]);
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, name,
+                                              HDA_COMPOSE_AMP_VAL(nid, 3, 2, HDA_INPUT))) < 0)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+/* add playback controls for speaker and HP outputs */
+static int alc880_auto_create_extra_out(struct alc_spec *spec, hda_nid_t pin,
+                                       const char *pfx)
+{
+       hda_nid_t nid;
+       int err;
+       char name[32];
+
+       if (! pin)
+               return 0;
+
+       if (alc880_is_fixed_pin(pin)) {
+               nid = alc880_idx_to_dac(alc880_fixed_pin_idx(pin));
+               /* specify the DAC as the extra output */
+               if (! spec->multiout.hp_nid)
+                       spec->multiout.hp_nid = nid;
+               else
+                       spec->multiout.extra_out_nid[0] = nid;
+               /* control HP volume/switch on the output mixer amp */
+               nid = alc880_idx_to_mixer(alc880_fixed_pin_idx(pin));
+               sprintf(name, "%s Playback Volume", pfx);
+               if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, name,
+                                      HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                       return err;
+               sprintf(name, "%s Playback Switch", pfx);
+               if ((err = add_control(spec, ALC_CTL_BIND_MUTE, name,
+                                      HDA_COMPOSE_AMP_VAL(nid, 3, 2, HDA_INPUT))) < 0)
+                       return err;
+       } else if (alc880_is_multi_pin(pin)) {
+               /* set manual connection */
+               /* we have only a switch on HP-out PIN */
+               sprintf(name, "%s Playback Switch", pfx);
+               if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, name,
+                                      HDA_COMPOSE_AMP_VAL(pin, 3, 0, HDA_OUTPUT))) < 0)
+                       return err;
+       }
+       return 0;
+}
+
+/* create input playback/capture controls for the given pin */
+static int new_analog_input(struct alc_spec *spec, hda_nid_t pin, const char *ctlname,
+                           int idx, hda_nid_t mix_nid)
+{
+       char name[32];
+       int err;
+
+       sprintf(name, "%s Playback Volume", ctlname);
+       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, name,
+                              HDA_COMPOSE_AMP_VAL(mix_nid, 3, idx, HDA_INPUT))) < 0)
+               return err;
+       sprintf(name, "%s Playback Switch", ctlname);
+       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, name,
+                              HDA_COMPOSE_AMP_VAL(mix_nid, 3, idx, HDA_INPUT))) < 0)
+               return err;
+       return 0;
+}
+
+/* create playback/capture controls for input pins */
+static int alc880_auto_create_analog_input_ctls(struct alc_spec *spec,
+                                               const struct auto_pin_cfg *cfg)
+{
+       struct hda_input_mux *imux = &spec->private_imux;
+       int i, err, idx;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               if (alc880_is_input_pin(cfg->input_pins[i])) {
+                       idx = alc880_input_pin_idx(cfg->input_pins[i]);
+                       err = new_analog_input(spec, cfg->input_pins[i],
+                                              auto_pin_cfg_labels[i],
+                                              idx, 0x0b);
+                       if (err < 0)
+                               return err;
+                       imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+                       imux->items[imux->num_items].index = alc880_input_pin_idx(cfg->input_pins[i]);
+                       imux->num_items++;
+               }
+       }
+       return 0;
+}
+
+static void alc880_auto_set_output_and_unmute(struct hda_codec *codec,
+                                             hda_nid_t nid, int pin_type,
+                                             int dac_idx)
+{
+       /* set as output */
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_type);
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+       /* need the manual connection? */
+       if (alc880_is_multi_pin(nid)) {
+               struct alc_spec *spec = codec->spec;
+               int idx = alc880_multi_pin_idx(nid);
+               snd_hda_codec_write(codec, alc880_idx_to_selector(idx), 0,
+                                   AC_VERB_SET_CONNECT_SEL,
+                                   alc880_dac_to_idx(spec->multiout.dac_nids[dac_idx]));
+       }
+}
+
+static void alc880_auto_init_multi_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < spec->autocfg.line_outs; i++) {
+               hda_nid_t nid = spec->autocfg.line_out_pins[i];
+               alc880_auto_set_output_and_unmute(codec, nid, PIN_OUT, i);
+       }
+}
+
+static void alc880_auto_init_extra_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       hda_nid_t pin;
+
+       pin = spec->autocfg.speaker_pins[0];
+       if (pin) /* connect to front */
+               alc880_auto_set_output_and_unmute(codec, pin, PIN_OUT, 0);
+       pin = spec->autocfg.hp_pin;
+       if (pin) /* connect to front */
+               alc880_auto_set_output_and_unmute(codec, pin, PIN_HP, 0);
+}
+
+static void alc880_auto_init_analog_input(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               hda_nid_t nid = spec->autocfg.input_pins[i];
+               if (alc880_is_input_pin(nid)) {
+                       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                           i <= AUTO_PIN_FRONT_MIC ? PIN_VREF80 : PIN_IN);
+                       if (nid != ALC880_PIN_CD_NID)
+                               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+                                                   AMP_OUT_MUTE);
+               }
+       }
+}
+
+/* parse the BIOS configuration and set up the alc_spec */
+/* return 1 if successful, 0 if the proper config is not found, or a negative error code */
+static int alc880_parse_auto_config(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int err;
+       static hda_nid_t alc880_ignore[] = { 0x1d, 0 };
+
+       if ((err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
+                                               alc880_ignore)) < 0)
+               return err;
+       if (! spec->autocfg.line_outs)
+               return 0; /* can't find valid BIOS pin config */
+
+       if ((err = alc880_auto_fill_dac_nids(spec, &spec->autocfg)) < 0 ||
+           (err = alc880_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
+           (err = alc880_auto_create_extra_out(spec,
+                                               spec->autocfg.speaker_pins[0],
+                                               "Speaker")) < 0 ||
+           (err = alc880_auto_create_extra_out(spec, spec->autocfg.hp_pin,
+                                               "Headphone")) < 0 ||
+           (err = alc880_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+
+       spec->multiout.max_channels = spec->multiout.num_dacs * 2;
+
+       if (spec->autocfg.dig_out_pin)
+               spec->multiout.dig_out_nid = ALC880_DIGOUT_NID;
+       if (spec->autocfg.dig_in_pin)
+               spec->dig_in_nid = ALC880_DIGIN_NID;
+
+       if (spec->kctl_alloc)
+               spec->mixers[spec->num_mixers++] = spec->kctl_alloc;
+
+       spec->init_verbs[spec->num_init_verbs++] = alc880_volume_init_verbs;
+
+       spec->num_mux_defs = 1;
+       spec->input_mux = &spec->private_imux;
+
+       return 1;
+}
+
+/* additional initialization for auto-configuration model */
+static void alc880_auto_init(struct hda_codec *codec)
+{
+       alc880_auto_init_multi_out(codec);
+       alc880_auto_init_extra_out(codec);
+       alc880_auto_init_analog_input(codec);
+}
+
+/*
+ * OK, here we have finally the patch for ALC880
+ */
+
+static int patch_alc880(struct hda_codec *codec)
+{
+       struct alc_spec *spec;
+       int board_config;
+       int err;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       codec->spec = spec;
+
+       board_config = snd_hda_check_board_config(codec, alc880_cfg_tbl);
+       if (board_config < 0 || board_config >= ALC880_MODEL_LAST) {
+               printk(KERN_INFO "hda_codec: Unknown model for ALC880, trying auto-probe from BIOS...\n");
+               board_config = ALC880_AUTO;
+       }
+
+       if (board_config == ALC880_AUTO) {
+               /* automatic parse from the BIOS config */
+               err = alc880_parse_auto_config(codec);
+               if (err < 0) {
+                       alc_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using 3-stack mode...\n");
+                       board_config = ALC880_3ST;
+               }
+       }
+
+       if (board_config != ALC880_AUTO)
+               setup_preset(spec, &alc880_presets[board_config]);
+
+       spec->stream_name_analog = "ALC880 Analog";
+       spec->stream_analog_playback = &alc880_pcm_analog_playback;
+       spec->stream_analog_capture = &alc880_pcm_analog_capture;
+
+       spec->stream_name_digital = "ALC880 Digital";
+       spec->stream_digital_playback = &alc880_pcm_digital_playback;
+       spec->stream_digital_capture = &alc880_pcm_digital_capture;
+
+       if (! spec->adc_nids && spec->input_mux) {
+               /* check whether NID 0x07 is valid */
+               unsigned int wcap = get_wcaps(codec, alc880_adc_nids[0]);
+               wcap = (wcap & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; /* get type */
+               if (wcap != AC_WID_AUD_IN) {
+                       spec->adc_nids = alc880_adc_nids_alt;
+                       spec->num_adc_nids = ARRAY_SIZE(alc880_adc_nids_alt);
+                       spec->mixers[spec->num_mixers] = alc880_capture_alt_mixer;
+                       spec->num_mixers++;
+               } else {
+                       spec->adc_nids = alc880_adc_nids;
+                       spec->num_adc_nids = ARRAY_SIZE(alc880_adc_nids);
+                       spec->mixers[spec->num_mixers] = alc880_capture_mixer;
+                       spec->num_mixers++;
+               }
+       }
+
+       codec->patch_ops = alc_patch_ops;
+       if (board_config == ALC880_AUTO)
+               spec->init_hook = alc880_auto_init;
+
+       return 0;
+}
+
+
+/*
  * ALC260 support
  */
 
+static hda_nid_t alc260_dac_nids[1] = {
+       /* front */
+       0x02,
+};
+
+static hda_nid_t alc260_adc_nids[1] = {
+       /* ADC0 */
+       0x04,
+};
+
+static hda_nid_t alc260_adc_nids_alt[1] = {
+       /* ADC1 */
+       0x05,
+};
+
+static hda_nid_t alc260_hp_adc_nids[2] = {
+       /* ADC1, 0 */
+       0x05, 0x04
+};
+
+/* NIDs used when simultaneous access to both ADCs makes sense.  Note that
+ * alc260_capture_mixer assumes ADC0 (nid 0x04) is the first ADC.
+ */
+static hda_nid_t alc260_dual_adc_nids[2] = {
+       /* ADC0, ADC1 */
+       0x04, 0x05
+};
+
+#define ALC260_DIGOUT_NID      0x03
+#define ALC260_DIGIN_NID       0x06
+
+static struct hda_input_mux alc260_capture_source = {
+       .num_items = 4,
+       .items = {
+               { "Mic", 0x0 },
+               { "Front Mic", 0x1 },
+               { "Line", 0x2 },
+               { "CD", 0x4 },
+       },
+};
+
+/* On Fujitsu S702x laptops capture only makes sense from Mic/LineIn jack,
+ * headphone jack and the internal CD lines since these are the only pins at
+ * which audio can appear.  For flexibility, also allow the option of
+ * recording the mixer output on the second ADC (ADC0 doesn't have a
+ * connection to the mixer output).
+ */
+static struct hda_input_mux alc260_fujitsu_capture_sources[2] = {
+       {
+               .num_items = 3,
+               .items = {
+                       { "Mic/Line", 0x0 },
+                       { "CD", 0x4 },
+                       { "Headphone", 0x2 },
+               },
+       },
+       {
+               .num_items = 4,
+               .items = {
+                       { "Mic/Line", 0x0 },
+                       { "CD", 0x4 },
+                       { "Headphone", 0x2 },
+                       { "Mixer", 0x5 },
+               },
+       },
+
+};
+
+/* Acer TravelMate(/Extensa/Aspire) notebooks have similar configuration to
+ * the Fujitsu S702x, but jacks are marked differently.
+ */
+static struct hda_input_mux alc260_acer_capture_sources[2] = {
+       {
+               .num_items = 4,
+               .items = {
+                       { "Mic", 0x0 },
+                       { "Line", 0x2 },
+                       { "CD", 0x4 },
+                       { "Headphone", 0x5 },
+               },
+       },
+       {
+               .num_items = 5,
+               .items = {
+                       { "Mic", 0x0 },
+                       { "Line", 0x2 },
+                       { "CD", 0x4 },
+                       { "Headphone", 0x6 },
+                       { "Mixer", 0x5 },
+               },
+       },
+};
 /*
  * This is just place-holder, so there's something for alc_build_pcms to look
  * at when it calculates the maximum number of channels. ALC260 has no mixer
  * element which allows changing the channel mode, so the verb list is
  * never used.
  */
-static struct alc_channel_mode alc260_modes[1] = {
+static struct hda_channel_mode alc260_modes[1] = {
        { 2, NULL },
 };
 
-snd_kcontrol_new_t alc260_base_mixer[] = {
+
+/* Mixer combinations
+ *
+ * basic: base_output + input + pc_beep + capture
+ * HP: base_output + input + capture_alt
+ * HP_3013: hp_3013 + input + capture
+ * fujitsu: fujitsu + capture
+ * acer: acer + capture
+ */
+
+static struct snd_kcontrol_new alc260_base_output_mixer[] = {
        HDA_CODEC_VOLUME("Front Playback Volume", 0x08, 0x0, HDA_OUTPUT),
-       /* use LINE2 for the output */
-       /* HDA_CODEC_MUTE("Front Playback Switch", 0x0f, 0x0, HDA_OUTPUT), */
-       HDA_CODEC_MUTE("Front Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x08, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x09, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x09, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x0a, 1, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Mono Playback Switch", 0x0a, 1, 2, HDA_INPUT),
+       { } /* end */
+};     
+
+static struct snd_kcontrol_new alc260_input_mixer[] = {
        HDA_CODEC_VOLUME("CD Playback Volume", 0x07, 0x04, HDA_INPUT),
        HDA_CODEC_MUTE("CD Playback Switch", 0x07, 0x04, HDA_INPUT),
        HDA_CODEC_VOLUME("Line Playback Volume", 0x07, 0x02, HDA_INPUT),
@@ -1129,17 +2925,96 @@ snd_kcontrol_new_t alc260_base_mixer[] = {
        HDA_CODEC_MUTE("Mic Playback Switch", 0x07, 0x0, HDA_INPUT),
        HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x07, 0x01, HDA_INPUT),
        HDA_CODEC_MUTE("Front Mic Playback Switch", 0x07, 0x01, HDA_INPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new alc260_pc_beep_mixer[] = {
        HDA_CODEC_VOLUME("PC Speaker Playback Volume", 0x07, 0x05, HDA_INPUT),
        HDA_CODEC_MUTE("PC Speaker Playback Switch", 0x07, 0x05, HDA_INPUT),
-       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x09, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Headphone Playback Switch", 0x10, 0x0, HDA_OUTPUT),
-       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x0a, 1, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("Mono Playback Switch", 0x11, 1, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+static struct snd_kcontrol_new alc260_hp_3013_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x09, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Playback Switch", 0x10, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("Aux-In Playback Volume", 0x07, 0x06, HDA_INPUT),
+       HDA_CODEC_MUTE("Aux-In Playback Switch", 0x07, 0x06, HDA_INPUT),
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x08, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("iSpeaker Playback Volume", 0x0a, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("iSpeaker Playback Switch", 0x11, 1, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+/* Fujitsu S702x series laptops.  ALC260 pin usage: Mic/Line jack = 0x12, 
+ * HP jack = 0x14, CD audio =  0x16, internal speaker = 0x10.
+ */
+static struct snd_kcontrol_new alc260_fujitsu_mixer[] = {
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x08, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Headphone Playback Switch", 0x08, 2, HDA_INPUT),
+       ALC_PIN_MODE("Headphone Jack Mode", 0x14, ALC_PIN_DIR_INOUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic/Line Playback Volume", 0x07, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic/Line Playback Switch", 0x07, 0x0, HDA_INPUT),
+       ALC_PIN_MODE("Mic/Line Jack Mode", 0x12, ALC_PIN_DIR_IN),
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x07, 0x05, HDA_INPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x07, 0x05, HDA_INPUT),
+       HDA_CODEC_VOLUME("Internal Speaker Playback Volume", 0x09, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Internal Speaker Playback Switch", 0x09, 2, HDA_INPUT),
+       { } /* end */
+};
+
+/* Mixer for Acer TravelMate(/Extensa/Aspire) notebooks.  Note that current
+ * versions of the ALC260 don't act on requests to enable mic bias from NID
+ * 0x0f (used to drive the headphone jack in these laptops).  The ALC260
+ * datasheet doesn't mention this restriction.  At this stage it's not clear
+ * whether this behaviour is intentional or is a hardware bug in chip
+ * revisions available in early 2006.  Therefore for now allow the
+ * "Headphone Jack Mode" control to span all choices, but if it turns out
+ * that the lack of mic bias for this NID is intentional we could change the
+ * mode from ALC_PIN_DIR_INOUT to ALC_PIN_DIR_INOUT_NOMICBIAS.
+ *
+ * In addition, Acer TravelMate(/Extensa/Aspire) notebooks in early 2006
+ * don't appear to make the mic bias available from the "line" jack, even
+ * though the NID used for this jack (0x14) can supply it.  The theory is
+ * that perhaps Acer have included blocking capacitors between the ALC260
+ * and the output jack.  If this turns out to be the case for all such
+ * models the "Line Jack Mode" mode could be changed from ALC_PIN_DIR_INOUT
+ * to ALC_PIN_DIR_INOUT_NOMICBIAS.
+ */
+static struct snd_kcontrol_new alc260_acer_mixer[] = {
+       HDA_CODEC_VOLUME("Master Playback Volume", 0x08, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Master Playback Switch", 0x08, 2, HDA_INPUT),
+       ALC_PIN_MODE("Headphone Jack Mode", 0x0f, ALC_PIN_DIR_INOUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x07, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x07, 0x0, HDA_INPUT),
+       ALC_PIN_MODE("Mic Jack Mode", 0x12, ALC_PIN_DIR_IN),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x07, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x07, 0x02, HDA_INPUT),
+       ALC_PIN_MODE("Line Jack Mode", 0x14, ALC_PIN_DIR_INOUT),
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x07, 0x05, HDA_INPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x07, 0x05, HDA_INPUT),
+       { } /* end */
+};
+
+/* capture mixer elements */
+static struct snd_kcontrol_new alc260_capture_mixer[] = {
        HDA_CODEC_VOLUME("Capture Volume", 0x04, 0x0, HDA_INPUT),
        HDA_CODEC_MUTE("Capture Switch", 0x04, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x05, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x05, 0x0, HDA_INPUT),
        {
                .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
-               .name = "Capture Source",
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 2,
                .info = alc_mux_enum_info,
                .get = alc_mux_enum_get,
                .put = alc_mux_enum_put,
@@ -1147,25 +3022,104 @@ snd_kcontrol_new_t alc260_base_mixer[] = {
        { } /* end */
 };
 
+static struct snd_kcontrol_new alc260_capture_alt_mixer[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x05, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x05, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 1,
+               .info = alc_mux_enum_info,
+               .get = alc_mux_enum_get,
+               .put = alc_mux_enum_put,
+       },
+       { } /* end */
+};
+
+/*
+ * initialization verbs
+ */
 static struct hda_verb alc260_init_verbs[] = {
        /* Line In pin widget for input */
-       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
        /* CD pin widget for input */
-       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
        /* Mic1 (rear panel) pin widget for input and vref at 80% */
-       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
        /* Mic2 (front panel) pin widget for input and vref at 80% */
-       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
        /* LINE-2 is used for line-out in rear */
-       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
        /* select line-out */
-       {0x15, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x0e, AC_VERB_SET_CONNECT_SEL, 0x00},
        /* LINE-OUT pin */
-       {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
        /* enable HP */
-       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
        /* enable Mono */
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* mute capture amp left and right */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       /* set connection select to line in (default select for this ADC) */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x02},
+       /* mute capture amp left and right */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       /* set connection select to line in (default select for this ADC) */
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x02},
+       /* set vol=0 Line-Out mixer amp left and right */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* unmute pin widget amp left and right (no gain on this amp) */
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* set vol=0 HP mixer amp left and right */
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* unmute pin widget amp left and right (no gain on this amp) */
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* set vol=0 Mono mixer amp left and right */
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* unmute pin widget amp left and right (no gain on this amp) */
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* unmute LINE-2 out pin */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Amp Indexes: CD = 0x04, Line In 1 = 0x02, Mic 1 = 0x00 & Line In 2 = 0x03 */
+       /* mute CD */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+       /* mute Line In */
+       {0x07,  AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       /* mute Mic */
+       {0x07,  AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       /* Amp Indexes: DAC = 0x01 & mixer = 0x00 */
+       /* mute Front out path */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       /* mute Headphone out path */
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       /* mute Mono out path */
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       { }
+};
+
+static struct hda_verb alc260_hp_init_verbs[] = {
+       /* Headphone and output */
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0},
+       /* mono output */
        {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       /* Mic1 (rear panel) pin widget for input and vref at 80% */
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       /* Mic2 (front panel) pin widget for input and vref at 80% */
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       /* Line In pin widget for input */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       /* Line-2 pin widget for output */
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       /* CD pin widget for input */
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
        /* unmute amp left and right */
        {0x04, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
        /* set connection select to line in (default select for this ADC) */
@@ -1173,17 +3127,11 @@ static struct hda_verb alc260_init_verbs[] = {
        /* unmute Line-Out mixer amp left and right (volume = 0) */
        {0x08, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
        /* mute pin widget amp left and right (no gain on this amp) */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
        /* unmute HP mixer amp left and right (volume = 0) */
        {0x09, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
        /* mute pin widget amp left and right (no gain on this amp) */
-       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
-       /* unmute Mono mixer amp left and right (volume = 0) */
-       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       /* mute pin widget amp left and right (no gain on this amp) */
-       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
-       /* mute LINE-2 out */
-       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080},
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
        /* Amp Indexes: CD = 0x04, Line In 1 = 0x02, Mic 1 = 0x00 & Line In 2 = 0x03 */
        /* unmute CD */
        {0x07, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
@@ -1204,54 +3152,829 @@ static struct hda_verb alc260_init_verbs[] = {
        { }
 };
 
-static struct hda_pcm_stream alc260_pcm_analog_playback = {
-       .substreams = 1,
-       .channels_min = 2,
-       .channels_max = 2,
-       .nid = 0x2,
-};
-
-static struct hda_pcm_stream alc260_pcm_analog_capture = {
-       .substreams = 1,
-       .channels_min = 2,
-       .channels_max = 2,
-       .nid = 0x4,
-};
-
-static int patch_alc260(struct hda_codec *codec)
-{
-       struct alc_spec *spec;
-
-       spec = kcalloc(1, sizeof(*spec), GFP_KERNEL);
-       if (spec == NULL)
-               return -ENOMEM;
-
-       codec->spec = spec;
-
-       spec->mixers[spec->num_mixers] = alc260_base_mixer;
+static struct hda_verb alc260_hp_3013_init_verbs[] = {
+       /* Line out and output */
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       /* mono output */
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       /* Mic1 (rear panel) pin widget for input and vref at 80% */
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       /* Mic2 (front panel) pin widget for input and vref at 80% */
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       /* Line In pin widget for input */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       /* Headphone pin widget for output */
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0},
+       /* CD pin widget for input */
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       /* unmute amp left and right */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
+       /* set connection select to line in (default select for this ADC) */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x02},
+       /* unmute Line-Out mixer amp left and right (volume = 0) */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* mute pin widget amp left and right (no gain on this amp) */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       /* unmute HP mixer amp left and right (volume = 0) */
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
+       /* mute pin widget amp left and right (no gain on this amp) */
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       /* Amp Indexes: CD = 0x04, Line In 1 = 0x02, Mic 1 = 0x00 & Line In 2 = 0x03 */
+       /* unmute CD */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x04 << 8))},
+       /* unmute Line In */
+       {0x07,  AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8))},
+       /* unmute Mic */
+       {0x07,  AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       /* Amp Indexes: DAC = 0x01 & mixer = 0x00 */
+       /* Unmute Front out path */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       /* Unmute Headphone out path */
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       /* Unmute Mono out path */
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       { }
+};
+
+/* Initialisation sequence for ALC260 as configured in Fujitsu S702x
+ * laptops.  ALC260 pin usage: Mic/Line jack = 0x12, HP jack = 0x14, CD
+ * audio = 0x16, internal speaker = 0x10.
+ */
+static struct hda_verb alc260_fujitsu_init_verbs[] = {
+       /* Disable all GPIOs */
+       {0x01, AC_VERB_SET_GPIO_MASK, 0},
+       /* Internal speaker is connected to headphone pin */
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* Headphone/Line-out jack connects to Line1 pin; make it an output */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       /* Mic/Line-in jack is connected to mic1 pin, so make it an input */
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       /* Ensure all other unused pins are disabled and muted. */
+       {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+
+       /* Disable digital (SPDIF) pins */
+       {0x03, AC_VERB_SET_DIGI_CONVERT_1, 0},
+       {0x06, AC_VERB_SET_DIGI_CONVERT_1, 0},
+
+       /* Ensure Line1 pin widget takes its input from the OUT1 sum bus 
+        * when acting as an output.
+        */
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0},
+
+       /* Start with output sum widgets muted and their output gains at min */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+
+       /* Unmute HP pin widget amp left and right (no equiv mixer ctrl) */
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Unmute Line1 pin widget output buffer since it starts as an output.
+        * If the pin mode is changed by the user the pin mode control will
+        * take care of enabling the pin's input/output buffers as needed.
+        * Therefore there's no need to enable the input buffer at this
+        * stage.
+        */
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Unmute input buffer of pin widget used for Line-in (no equiv 
+        * mixer ctrl)
+        */
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Mute capture amp left and right */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       /* Set ADC connection select to match default mixer setting - line 
+        * in (on mic1 pin)
+        */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Do the same for the second ADC: mute capture input amp and
+        * set ADC connection to line in (on mic1 pin)
+        */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Mute all inputs to mixer widget (even unconnected ones) */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* mic1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)}, /* mic2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)}, /* line1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)}, /* line2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)}, /* CD pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)}, /* Beep-gen pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)}, /* Line-out pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)}, /* HP-pin pin */
+
+       { }
+};
+
+/* Initialisation sequence for ALC260 as configured in Acer TravelMate and
+ * similar laptops (adapted from Fujitsu init verbs).
+ */
+static struct hda_verb alc260_acer_init_verbs[] = {
+       /* On TravelMate laptops, GPIO 0 enables the internal speaker and
+        * the headphone jack.  Turn this on and rely on the standard mute
+        * methods whenever the user wants to turn these outputs off.
+        */
+       {0x01, AC_VERB_SET_GPIO_MASK, 0x01},
+       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x01},
+       {0x01, AC_VERB_SET_GPIO_DATA, 0x01},
+       /* Internal speaker/Headphone jack is connected to Line-out pin */
+       {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       /* Internal microphone/Mic jack is connected to Mic1 pin */
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF50},
+       /* Line In jack is connected to Line1 pin */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       /* Ensure all other unused pins are disabled and muted. */
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       /* Disable digital (SPDIF) pins */
+       {0x03, AC_VERB_SET_DIGI_CONVERT_1, 0},
+       {0x06, AC_VERB_SET_DIGI_CONVERT_1, 0},
+
+       /* Ensure Mic1 and Line1 pin widgets take input from the OUT1 sum 
+        * bus when acting as outputs.
+        */
+       {0x0b, AC_VERB_SET_CONNECT_SEL, 0},
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0},
+
+       /* Start with output sum widgets muted and their output gains at min */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+
+       /* Unmute Line-out pin widget amp left and right (no equiv mixer ctrl) */
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Unmute Mic1 and Line1 pin widget input buffers since they start as
+        * inputs. If the pin mode is changed by the user the pin mode control
+        * will take care of enabling the pin's input/output buffers as needed.
+        * Therefore there's no need to enable the input buffer at this
+        * stage.
+        */
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Mute capture amp left and right */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       /* Set ADC connection select to match default mixer setting - mic
+        * (on mic1 pin)
+        */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Do similar with the second ADC: mute capture input amp and
+        * set ADC connection to mic to match ALSA's default state.
+        */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Mute all inputs to mixer widget (even unconnected ones) */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* mic1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)}, /* mic2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)}, /* line1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)}, /* line2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)}, /* CD pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)}, /* Beep-gen pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)}, /* Line-out pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)}, /* HP-pin pin */
+
+       { }
+};
+
+/* Test configuration for debugging, modelled after the ALC880 test
+ * configuration.
+ */
+#ifdef CONFIG_SND_DEBUG
+static hda_nid_t alc260_test_dac_nids[1] = {
+       0x02,
+};
+static hda_nid_t alc260_test_adc_nids[2] = {
+       0x04, 0x05,
+};
+/* For testing the ALC260, each input MUX needs its own definition since
+ * the signal assignments are different.  This assumes that the first ADC 
+ * is NID 0x04.
+ */
+static struct hda_input_mux alc260_test_capture_sources[2] = {
+       {
+               .num_items = 7,
+               .items = {
+                       { "MIC1 pin", 0x0 },
+                       { "MIC2 pin", 0x1 },
+                       { "LINE1 pin", 0x2 },
+                       { "LINE2 pin", 0x3 },
+                       { "CD pin", 0x4 },
+                       { "LINE-OUT pin", 0x5 },
+                       { "HP-OUT pin", 0x6 },
+               },
+        },
+       {
+               .num_items = 8,
+               .items = {
+                       { "MIC1 pin", 0x0 },
+                       { "MIC2 pin", 0x1 },
+                       { "LINE1 pin", 0x2 },
+                       { "LINE2 pin", 0x3 },
+                       { "CD pin", 0x4 },
+                       { "Mixer", 0x5 },
+                       { "LINE-OUT pin", 0x6 },
+                       { "HP-OUT pin", 0x7 },
+               },
+        },
+};
+static struct snd_kcontrol_new alc260_test_mixer[] = {
+       /* Output driver widgets */
+       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x0a, 1, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Mono Playback Switch", 0x0a, 1, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("LOUT2 Playback Volume", 0x09, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("LOUT2 Playback Switch", 0x09, 2, HDA_INPUT),
+       HDA_CODEC_VOLUME("LOUT1 Playback Volume", 0x08, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("LOUT1 Playback Switch", 0x08, 2, HDA_INPUT),
+
+       /* Modes for retasking pin widgets
+        * Note: the ALC260 doesn't seem to act on requests to enable mic
+         * bias from NIDs 0x0f and 0x10.  The ALC260 datasheet doesn't
+         * mention this restriction.  At this stage it's not clear whether
+         * this behaviour is intentional or is a hardware bug in chip
+         * revisions available at least up until early 2006.  Therefore for
+         * now allow the "HP-OUT" and "LINE-OUT" Mode controls to span all
+         * choices, but if it turns out that the lack of mic bias for these
+         * NIDs is intentional we could change their modes from
+         * ALC_PIN_DIR_INOUT to ALC_PIN_DIR_INOUT_NOMICBIAS.
+        */
+       ALC_PIN_MODE("HP-OUT pin mode", 0x10, ALC_PIN_DIR_INOUT),
+       ALC_PIN_MODE("LINE-OUT pin mode", 0x0f, ALC_PIN_DIR_INOUT),
+       ALC_PIN_MODE("LINE2 pin mode", 0x15, ALC_PIN_DIR_INOUT),
+       ALC_PIN_MODE("LINE1 pin mode", 0x14, ALC_PIN_DIR_INOUT),
+       ALC_PIN_MODE("MIC2 pin mode", 0x13, ALC_PIN_DIR_INOUT),
+       ALC_PIN_MODE("MIC1 pin mode", 0x12, ALC_PIN_DIR_INOUT),
+
+       /* Loopback mixer controls */
+       HDA_CODEC_VOLUME("MIC1 Playback Volume", 0x07, 0x00, HDA_INPUT),
+       HDA_CODEC_MUTE("MIC1 Playback Switch", 0x07, 0x00, HDA_INPUT),
+       HDA_CODEC_VOLUME("MIC2 Playback Volume", 0x07, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("MIC2 Playback Switch", 0x07, 0x01, HDA_INPUT),
+       HDA_CODEC_VOLUME("LINE1 Playback Volume", 0x07, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("LINE1 Playback Switch", 0x07, 0x02, HDA_INPUT),
+       HDA_CODEC_VOLUME("LINE2 Playback Volume", 0x07, 0x03, HDA_INPUT),
+       HDA_CODEC_MUTE("LINE2 Playback Switch", 0x07, 0x03, HDA_INPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x07, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Beep Playback Volume", 0x07, 0x05, HDA_INPUT),
+       HDA_CODEC_MUTE("Beep Playback Switch", 0x07, 0x05, HDA_INPUT),
+       HDA_CODEC_VOLUME("LINE-OUT loopback Playback Volume", 0x07, 0x06, HDA_INPUT),
+       HDA_CODEC_MUTE("LINE-OUT loopback Playback Switch", 0x07, 0x06, HDA_INPUT),
+       HDA_CODEC_VOLUME("HP-OUT loopback Playback Volume", 0x07, 0x7, HDA_INPUT),
+       HDA_CODEC_MUTE("HP-OUT loopback Playback Switch", 0x07, 0x7, HDA_INPUT),
+
+       /* Controls for GPIO pins, assuming they are configured as outputs */
+       ALC_GPIO_DATA_SWITCH("GPIO pin 0", 0x01, 0x01),
+       ALC_GPIO_DATA_SWITCH("GPIO pin 1", 0x01, 0x02),
+       ALC_GPIO_DATA_SWITCH("GPIO pin 2", 0x01, 0x04),
+       ALC_GPIO_DATA_SWITCH("GPIO pin 3", 0x01, 0x08),
+
+       /* Switches to allow the digital IO pins to be enabled.  The datasheet
+        * is ambigious as to which NID is which; testing on laptops which
+        * make this output available should provide clarification. 
+        */
+       ALC_SPDIF_CTRL_SWITCH("SPDIF Playback Switch", 0x03, 0x01),
+       ALC_SPDIF_CTRL_SWITCH("SPDIF Capture Switch", 0x06, 0x01),
+
+       { } /* end */
+};
+static struct hda_verb alc260_test_init_verbs[] = {
+       /* Enable all GPIOs as outputs with an initial value of 0 */
+       {0x01, AC_VERB_SET_GPIO_DIRECTION, 0x0f},
+       {0x01, AC_VERB_SET_GPIO_DATA, 0x00},
+       {0x01, AC_VERB_SET_GPIO_MASK, 0x0f},
+
+       /* Enable retasking pins as output, initially without power amp */
+       {0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x13, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+
+       /* Disable digital (SPDIF) pins initially, but users can enable
+        * them via a mixer switch.  In the case of SPDIF-out, this initverb
+        * payload also sets the generation to 0, output to be in "consumer"
+        * PCM format, copyright asserted, no pre-emphasis and no validity
+        * control.
+        */
+       {0x03, AC_VERB_SET_DIGI_CONVERT_1, 0},
+       {0x06, AC_VERB_SET_DIGI_CONVERT_1, 0},
+
+       /* Ensure mic1, mic2, line1 and line2 pin widgets take input from the 
+        * OUT1 sum bus when acting as an output.
+        */
+       {0x0b, AC_VERB_SET_CONNECT_SEL, 0},
+       {0x0c, AC_VERB_SET_CONNECT_SEL, 0},
+       {0x0d, AC_VERB_SET_CONNECT_SEL, 0},
+       {0x0e, AC_VERB_SET_CONNECT_SEL, 0},
+
+       /* Start with output sum widgets muted and their output gains at min */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+
+       /* Unmute retasking pin widget output buffers since the default
+        * state appears to be output.  As the pin mode is changed by the
+        * user the pin mode control will take care of enabling the pin's
+        * input/output buffers as needed.
+        */
+       {0x10, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x13, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x12, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       /* Also unmute the mono-out pin widget */
+       {0x11, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+
+       /* Mute capture amp left and right */
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       /* Set ADC connection select to match default mixer setting (mic1
+        * pin)
+        */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Do the same for the second ADC: mute capture input amp and
+        * set ADC connection to mic1 pin
+        */
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x00},
+
+       /* Mute all inputs to mixer widget (even unconnected ones) */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)}, /* mic1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)}, /* mic2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)}, /* line1 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)}, /* line2 pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)}, /* CD pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(5)}, /* Beep-gen pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(6)}, /* Line-out pin */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(7)}, /* HP-pin pin */
+
+       { }
+};
+#endif
+
+static struct hda_pcm_stream alc260_pcm_analog_playback = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 2,
+};
+
+static struct hda_pcm_stream alc260_pcm_analog_capture = {
+       .substreams = 1,
+       .channels_min = 2,
+       .channels_max = 2,
+};
+
+#define alc260_pcm_digital_playback    alc880_pcm_digital_playback
+#define alc260_pcm_digital_capture     alc880_pcm_digital_capture
+
+/*
+ * for BIOS auto-configuration
+ */
+
+static int alc260_add_playback_controls(struct alc_spec *spec, hda_nid_t nid,
+                                       const char *pfx)
+{
+       hda_nid_t nid_vol;
+       unsigned long vol_val, sw_val;
+       char name[32];
+       int err;
+
+       if (nid >= 0x0f && nid < 0x11) {
+               nid_vol = nid - 0x7;
+               vol_val = HDA_COMPOSE_AMP_VAL(nid_vol, 3, 0, HDA_OUTPUT);
+               sw_val = HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT);
+       } else if (nid == 0x11) {
+               nid_vol = nid - 0x7;
+               vol_val = HDA_COMPOSE_AMP_VAL(nid_vol, 2, 0, HDA_OUTPUT);
+               sw_val = HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT);
+       } else if (nid >= 0x12 && nid <= 0x15) {
+               nid_vol = 0x08;
+               vol_val = HDA_COMPOSE_AMP_VAL(nid_vol, 3, 0, HDA_OUTPUT);
+               sw_val = HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT);
+       } else
+               return 0; /* N/A */
+       
+       snprintf(name, sizeof(name), "%s Playback Volume", pfx);
+       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, name, vol_val)) < 0)
+               return err;
+       snprintf(name, sizeof(name), "%s Playback Switch", pfx);
+       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, name, sw_val)) < 0)
+               return err;
+       return 1;
+}
+
+/* add playback controls from the parsed DAC table */
+static int alc260_auto_create_multi_out_ctls(struct alc_spec *spec,
+                                            const struct auto_pin_cfg *cfg)
+{
+       hda_nid_t nid;
+       int err;
+
+       spec->multiout.num_dacs = 1;
+       spec->multiout.dac_nids = spec->private_dac_nids;
+       spec->multiout.dac_nids[0] = 0x02;
+
+       nid = cfg->line_out_pins[0];
+       if (nid) {
+               err = alc260_add_playback_controls(spec, nid, "Front");
+               if (err < 0)
+                       return err;
+       }
+
+       nid = cfg->speaker_pins[0];
+       if (nid) {
+               err = alc260_add_playback_controls(spec, nid, "Speaker");
+               if (err < 0)
+                       return err;
+       }
+
+       nid = cfg->hp_pin;
+       if (nid) {
+               err = alc260_add_playback_controls(spec, nid, "Headphone");
+               if (err < 0)
+                       return err;
+       }
+       return 0;       
+}
+
+/* create playback/capture controls for input pins */
+static int alc260_auto_create_analog_input_ctls(struct alc_spec *spec,
+                                               const struct auto_pin_cfg *cfg)
+{
+       struct hda_input_mux *imux = &spec->private_imux;
+       int i, err, idx;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               if (cfg->input_pins[i] >= 0x12) {
+                       idx = cfg->input_pins[i] - 0x12;
+                       err = new_analog_input(spec, cfg->input_pins[i],
+                                              auto_pin_cfg_labels[i], idx, 0x07);
+                       if (err < 0)
+                               return err;
+                       imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+                       imux->items[imux->num_items].index = idx;
+                       imux->num_items++;
+               }
+               if ((cfg->input_pins[i] >= 0x0f) && (cfg->input_pins[i] <= 0x10)){
+                       idx = cfg->input_pins[i] - 0x09;
+                       err = new_analog_input(spec, cfg->input_pins[i],
+                                              auto_pin_cfg_labels[i], idx, 0x07);
+                       if (err < 0)
+                               return err;
+                       imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+                       imux->items[imux->num_items].index = idx;
+                       imux->num_items++;
+               }
+       }
+       return 0;
+}
+
+static void alc260_auto_set_output_and_unmute(struct hda_codec *codec,
+                                             hda_nid_t nid, int pin_type,
+                                             int sel_idx)
+{
+       /* set as output */
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_type);
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+       /* need the manual connection? */
+       if (nid >= 0x12) {
+               int idx = nid - 0x12;
+               snd_hda_codec_write(codec, idx + 0x0b, 0,
+                                   AC_VERB_SET_CONNECT_SEL, sel_idx);
+                                   
+       }
+}
+
+static void alc260_auto_init_multi_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       hda_nid_t nid;
+
+       nid = spec->autocfg.line_out_pins[0];   
+       if (nid)
+               alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
+       
+       nid = spec->autocfg.speaker_pins[0];
+       if (nid)
+               alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
+
+       nid = spec->autocfg.hp_pin;
+       if (nid)
+               alc260_auto_set_output_and_unmute(codec, nid, PIN_OUT, 0);
+}      
+
+#define ALC260_PIN_CD_NID              0x16
+static void alc260_auto_init_analog_input(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               hda_nid_t nid = spec->autocfg.input_pins[i];
+               if (nid >= 0x12) {
+                       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                           i <= AUTO_PIN_FRONT_MIC ? PIN_VREF80 : PIN_IN);
+                       if (nid != ALC260_PIN_CD_NID)
+                               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+                                                   AMP_OUT_MUTE);
+               }
+       }
+}
+
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc260_volume_init_verbs[] = {
+       /*
+        * Unmute ADC0-1 and set the default input to mic-in
+        */
+       {0x04, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x05, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       
+       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) of the analog-loopback
+        * mixer widget
+        * Note: PASD motherboards uses the Line In 2 as the input for front panel
+        * mic (mic 2)
+        */
+       /* Amp Indices: Mic1 = 0, Mic2 = 1, Line1 = 2, Line2 = 3, CD = 4 */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+
+       /*
+        * Set up output mixers (0x08 - 0x0a)
+        */
+       /* set vol=0 to output mixers */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* set up input amps for analog loopback */
+       /* Amp Indices: DAC = 0, mixer = 1 */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       
+       { }
+};
+
+static int alc260_parse_auto_config(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       unsigned int wcap;
+       int err;
+       static hda_nid_t alc260_ignore[] = { 0x17, 0 };
+
+       if ((err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
+                                               alc260_ignore)) < 0)
+               return err;
+       if ((err = alc260_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+       if (! spec->kctl_alloc)
+               return 0; /* can't find valid BIOS pin config */
+       if ((err = alc260_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+
+       spec->multiout.max_channels = 2;
+
+       if (spec->autocfg.dig_out_pin)
+               spec->multiout.dig_out_nid = ALC260_DIGOUT_NID;
+       if (spec->kctl_alloc)
+               spec->mixers[spec->num_mixers++] = spec->kctl_alloc;
+
+       spec->init_verbs[spec->num_init_verbs++] = alc260_volume_init_verbs;
+
+       spec->num_mux_defs = 1;
+       spec->input_mux = &spec->private_imux;
+
+       /* check whether NID 0x04 is valid */
+       wcap = get_wcaps(codec, 0x04);
+       wcap = (wcap & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; /* get type */
+       if (wcap != AC_WID_AUD_IN) {
+               spec->adc_nids = alc260_adc_nids_alt;
+               spec->num_adc_nids = ARRAY_SIZE(alc260_adc_nids_alt);
+               spec->mixers[spec->num_mixers] = alc260_capture_alt_mixer;
+       } else {
+               spec->adc_nids = alc260_adc_nids;
+               spec->num_adc_nids = ARRAY_SIZE(alc260_adc_nids);
+               spec->mixers[spec->num_mixers] = alc260_capture_mixer;
+       }
        spec->num_mixers++;
 
-       spec->init_verbs = alc260_init_verbs;
-       spec->channel_mode = alc260_modes;
-       spec->num_channel_mode = ARRAY_SIZE(alc260_modes);
+       return 1;
+}
+
+/* additional initialization for auto-configuration model */
+static void alc260_auto_init(struct hda_codec *codec)
+{
+       alc260_auto_init_multi_out(codec);
+       alc260_auto_init_analog_input(codec);
+}
+
+/*
+ * ALC260 configurations
+ */
+static struct hda_board_config alc260_cfg_tbl[] = {
+       { .modelname = "basic", .config = ALC260_BASIC },
+       { .pci_subvendor = 0x104d, .pci_subdevice = 0x81bb,
+         .config = ALC260_BASIC }, /* Sony VAIO */
+       { .pci_subvendor = 0x152d, .pci_subdevice = 0x0729,
+         .config = ALC260_BASIC }, /* CTL Travel Master U553W */
+       { .modelname = "hp", .config = ALC260_HP },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3010, .config = ALC260_HP },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3011, .config = ALC260_HP },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3012, .config = ALC260_HP_3013 },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3013, .config = ALC260_HP_3013 },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3014, .config = ALC260_HP },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3015, .config = ALC260_HP },
+       { .pci_subvendor = 0x103c, .pci_subdevice = 0x3016, .config = ALC260_HP },
+       { .modelname = "fujitsu", .config = ALC260_FUJITSU_S702X },
+       { .pci_subvendor = 0x10cf, .pci_subdevice = 0x1326, .config = ALC260_FUJITSU_S702X },
+       { .modelname = "acer", .config = ALC260_ACER },
+       { .pci_subvendor = 0x1025, .pci_subdevice = 0x008f, .config = ALC260_ACER },
+#ifdef CONFIG_SND_DEBUG
+       { .modelname = "test", .config = ALC260_TEST },
+#endif
+       { .modelname = "auto", .config = ALC260_AUTO },
+       {}
+};
+
+static struct alc_config_preset alc260_presets[] = {
+       [ALC260_BASIC] = {
+               .mixers = { alc260_base_output_mixer,
+                           alc260_input_mixer,
+                           alc260_pc_beep_mixer,
+                           alc260_capture_mixer },
+               .init_verbs = { alc260_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_dac_nids),
+               .dac_nids = alc260_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_adc_nids),
+               .adc_nids = alc260_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .input_mux = &alc260_capture_source,
+       },
+       [ALC260_HP] = {
+               .mixers = { alc260_base_output_mixer,
+                           alc260_input_mixer,
+                           alc260_capture_alt_mixer },
+               .init_verbs = { alc260_hp_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_dac_nids),
+               .dac_nids = alc260_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_hp_adc_nids),
+               .adc_nids = alc260_hp_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .input_mux = &alc260_capture_source,
+       },
+       [ALC260_HP_3013] = {
+               .mixers = { alc260_hp_3013_mixer,
+                           alc260_input_mixer,
+                           alc260_capture_alt_mixer },
+               .init_verbs = { alc260_hp_3013_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_dac_nids),
+               .dac_nids = alc260_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_hp_adc_nids),
+               .adc_nids = alc260_hp_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .input_mux = &alc260_capture_source,
+       },
+       [ALC260_FUJITSU_S702X] = {
+               .mixers = { alc260_fujitsu_mixer,
+                           alc260_capture_mixer },
+               .init_verbs = { alc260_fujitsu_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_dac_nids),
+               .dac_nids = alc260_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_dual_adc_nids),
+               .adc_nids = alc260_dual_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .num_mux_defs = ARRAY_SIZE(alc260_fujitsu_capture_sources),
+               .input_mux = alc260_fujitsu_capture_sources,
+       },
+       [ALC260_ACER] = {
+               .mixers = { alc260_acer_mixer,
+                           alc260_capture_mixer },
+               .init_verbs = { alc260_acer_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_dac_nids),
+               .dac_nids = alc260_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_dual_adc_nids),
+               .adc_nids = alc260_dual_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .num_mux_defs = ARRAY_SIZE(alc260_acer_capture_sources),
+               .input_mux = alc260_acer_capture_sources,
+       },
+#ifdef CONFIG_SND_DEBUG
+       [ALC260_TEST] = {
+               .mixers = { alc260_test_mixer,
+                           alc260_capture_mixer },
+               .init_verbs = { alc260_test_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc260_test_dac_nids),
+               .dac_nids = alc260_test_dac_nids,
+               .num_adc_nids = ARRAY_SIZE(alc260_test_adc_nids),
+               .adc_nids = alc260_test_adc_nids,
+               .num_channel_mode = ARRAY_SIZE(alc260_modes),
+               .channel_mode = alc260_modes,
+               .num_mux_defs = ARRAY_SIZE(alc260_test_capture_sources),
+               .input_mux = alc260_test_capture_sources,
+       },
+#endif
+};
+
+static int patch_alc260(struct hda_codec *codec)
+{
+       struct alc_spec *spec;
+       int err, board_config;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       codec->spec = spec;
+
+       board_config = snd_hda_check_board_config(codec, alc260_cfg_tbl);
+       if (board_config < 0 || board_config >= ALC260_MODEL_LAST) {
+               snd_printd(KERN_INFO "hda_codec: Unknown model for ALC260\n");
+               board_config = ALC260_AUTO;
+       }
+
+       if (board_config == ALC260_AUTO) {
+               /* automatic parse from the BIOS config */
+               err = alc260_parse_auto_config(codec);
+               if (err < 0) {
+                       alc_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using base mode...\n");
+                       board_config = ALC260_BASIC;
+               }
+       }
+
+       if (board_config != ALC260_AUTO)
+               setup_preset(spec, &alc260_presets[board_config]);
 
        spec->stream_name_analog = "ALC260 Analog";
        spec->stream_analog_playback = &alc260_pcm_analog_playback;
        spec->stream_analog_capture = &alc260_pcm_analog_capture;
 
-       spec->multiout.max_channels = spec->channel_mode[0].channels;
-       spec->multiout.num_dacs = ARRAY_SIZE(alc260_dac_nids);
-       spec->multiout.dac_nids = alc260_dac_nids;
-
-       spec->input_mux = &alc260_capture_source;
-       spec->num_adc_nids = ARRAY_SIZE(alc260_adc_nids);
-       spec->adc_nids = alc260_adc_nids;
+       spec->stream_name_digital = "ALC260 Digital";
+       spec->stream_digital_playback = &alc260_pcm_digital_playback;
+       spec->stream_digital_capture = &alc260_pcm_digital_capture;
 
        codec->patch_ops = alc_patch_ops;
+       if (board_config == ALC260_AUTO)
+               spec->init_hook = alc260_auto_init;
 
        return 0;
 }
 
+
 /*
  * ALC882 support
  *
@@ -1263,8 +3986,10 @@ static int patch_alc260(struct hda_codec *codec)
  * In addition, an independent DAC for the multi-playback (not used in this
  * driver yet).
  */
+#define ALC882_DIGOUT_NID      0x06
+#define ALC882_DIGIN_NID       0x0a
 
-static struct alc_channel_mode alc882_ch_modes[1] = {
+static struct hda_channel_mode alc882_ch_modes[1] = {
        { 8, NULL }
 };
 
@@ -1273,10 +3998,9 @@ static hda_nid_t alc882_dac_nids[4] = {
        0x02, 0x03, 0x04, 0x05
 };
 
-static hda_nid_t alc882_adc_nids[3] = {
-       /* ADC0-2 */
-       0x07, 0x08, 0x09,
-};
+/* identical with ALC880 */
+#define alc882_adc_nids                alc880_adc_nids
+#define alc882_adc_nids_alt    alc880_adc_nids_alt
 
 /* input MUX */
 /* FIXME: should be a matrix-type input source selection */
@@ -1290,11 +4014,10 @@ static struct hda_input_mux alc882_capture_source = {
                { "CD", 0x4 },
        },
 };
-
 #define alc882_mux_enum_info alc_mux_enum_info
 #define alc882_mux_enum_get alc_mux_enum_get
 
-static int alc882_mux_enum_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *ucontrol)
+static int alc882_mux_enum_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol)
 {
        struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
        struct alc_spec *spec = codec->spec;
@@ -1319,20 +4042,47 @@ static int alc882_mux_enum_put(snd_kcontrol_t *kcontrol, snd_ctl_elem_value_t *u
        return 1;
 }
 
+/*
+ * 6ch mode
+ */
+static struct hda_verb alc882_sixstack_ch6_init[] = {
+       { 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+       { 0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { } /* end */
+};
+
+/*
+ * 8ch mode
+ */
+static struct hda_verb alc882_sixstack_ch8_init[] = {
+       { 0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { 0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT },
+       { } /* end */
+};
+
+static struct hda_channel_mode alc882_sixstack_modes[2] = {
+       { 6, alc882_sixstack_ch6_init },
+       { 8, alc882_sixstack_ch8_init },
+};
+
 /* Pin assignment: Front=0x14, Rear=0x15, CLFE=0x16, Side=0x17
  *                 Mic=0x18, Front Mic=0x19, Line-In=0x1a, HP=0x1b
  */
-static snd_kcontrol_new_t alc882_base_mixer[] = {
+static struct snd_kcontrol_new alc882_base_mixer[] = {
        HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Front Playback Switch", 0x0c, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("Surround Playback Volume", 0x0d, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Surround Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Surround Playback Switch", 0x0d, 2, HDA_INPUT),
        HDA_CODEC_VOLUME_MONO("Center Playback Volume", 0x0e, 1, 0x0, HDA_OUTPUT),
        HDA_CODEC_VOLUME_MONO("LFE Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x16, 1, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x16, 2, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE_MONO("Center Playback Switch", 0x0e, 1, 2, HDA_INPUT),
+       HDA_BIND_MUTE_MONO("LFE Playback Switch", 0x0e, 2, 2, HDA_INPUT),
        HDA_CODEC_VOLUME("Side Playback Volume", 0x0f, 0x0, HDA_OUTPUT),
-       HDA_CODEC_MUTE("Side Playback Switch", 0x17, 0x0, HDA_OUTPUT),
+       HDA_BIND_MUTE("Side Playback Switch", 0x0f, 2, HDA_INPUT),
        HDA_CODEC_MUTE("Headphone Playback Switch", 0x1b, 0x0, HDA_OUTPUT),
        HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
        HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
@@ -1362,133 +4112,1602 @@ static snd_kcontrol_new_t alc882_base_mixer[] = {
        { } /* end */
 };
 
+static struct snd_kcontrol_new alc882_chmode_mixer[] = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+       },
+       { } /* end */
+};
+
 static struct hda_verb alc882_init_verbs[] = {
        /* Front mixer: unmute input/output amp left and right (volume = 0) */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
        /* Rear mixer */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
        /* CLFE mixer */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
        /* Side mixer */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000},
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
 
-       /* Front Pin: to output mode */
-       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-       /* Front Pin: mute amp left and right (no volume) */
-       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* select Front mixer (0x0c, index 0) */
+       /* Front Pin: output 0 (0x0c) */
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
        {0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
-       /* Rear Pin */
-       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-       /* Rear Pin: mute amp left and right (no volume) */
-       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* select Rear mixer (0x0d, index 1) */
+       /* Rear Pin: output 1 (0x0d) */
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
        {0x15, AC_VERB_SET_CONNECT_SEL, 0x01},
-       /* CLFE Pin */
-       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-       /* CLFE Pin: mute amp left and right (no volume) */
-       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* select CLFE mixer (0x0e, index 2) */
+       /* CLFE Pin: output 2 (0x0e) */
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
        {0x16, AC_VERB_SET_CONNECT_SEL, 0x02},
-       /* Side Pin */
-       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-       /* Side Pin: mute amp left and right (no volume) */
-       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* select Side mixer (0x0f, index 3) */
+       /* Side Pin: output 3 (0x0f) */
+       {0x17, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
        {0x17, AC_VERB_SET_CONNECT_SEL, 0x03},
-       /* Headphone Pin */
-       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
-       /* Headphone Pin: mute amp left and right (no volume) */
-       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
-       /* select Front mixer (0x0c, index 0) */
+       /* Mic (rear) pin: input vref at 80% */
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Front Mic pin: input vref at 80% */
+       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Line In pin: input */
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       /* Line-2 In: Headphone output (output 0 - 0x0c) */
+       {0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
        {0x1b, AC_VERB_SET_CONNECT_SEL, 0x00},
-       /* Mic (rear) pin widget for input and vref at 80% */
-       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* Front Mic pin widget for input and vref at 80% */
-       {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
-       /* Line In pin widget for input */
-       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
        /* CD pin widget for input */
-       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
 
        /* FIXME: use matrix-type input source selection */
        /* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
        /* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
-       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
-       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
-       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
        /* Input mixer2 */
-       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
-       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
-       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
        /* Input mixer3 */
-       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
-       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
-       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
-       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
-       /* ADC1: unmute amp left and right */
-       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
-       /* ADC2: unmute amp left and right */
-       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
-       /* ADC3: unmute amp left and right */
-       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, 0x7000},
-
-       /* Unmute front loopback */
-       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Unmute rear loopback */
-       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
-       /* Mute CLFE loopback */
-       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8))},
-       /* Unmute side loopback */
-       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(3)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(2)},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(4)},
+       /* ADC1: mute amp left and right */
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       /* ADC2: mute amp left and right */
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       /* ADC3: mute amp left and right */
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
 
        { }
 };
 
-static int patch_alc882(struct hda_codec *codec)
-{
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc882_auto_init_verbs[] = {
+       /*
+        * Unmute ADC0-2 and set the default input to mic-in
+        */
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) of the analog-loopback
+        * mixer widget
+        * Note: PASD motherboards uses the Line In 2 as the input for front panel
+        * mic (mic 2)
+        */
+       /* Amp Indices: Mic1 = 0, Mic2 = 1, Line1 = 2, Line2 = 3, CD = 4 */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+
+       /*
+        * Set up output mixers (0x0c - 0x0f)
+        */
+       /* set vol=0 to output mixers */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* set up input amps for analog loopback */
+       /* Amp Indices: DAC = 0, mixer = 1 */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0f, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x26, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x26, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+
+       /* FIXME: use matrix-type input source selection */
+       /* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
+       /* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer2 */
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer3 */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+
+       { }
+};
+
+/* capture mixer elements */
+static struct snd_kcontrol_new alc882_capture_alt_mixer[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x09, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x09, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 2,
+               .info = alc882_mux_enum_info,
+               .get = alc882_mux_enum_get,
+               .put = alc882_mux_enum_put,
+       },
+       { } /* end */
+};
+
+static struct snd_kcontrol_new alc882_capture_mixer[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                * FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 3,
+               .info = alc882_mux_enum_info,
+               .get = alc882_mux_enum_get,
+               .put = alc882_mux_enum_put,
+       },
+       { } /* end */
+};
+
+/* pcm configuration: identiacal with ALC880 */
+#define alc882_pcm_analog_playback     alc880_pcm_analog_playback
+#define alc882_pcm_analog_capture      alc880_pcm_analog_capture
+#define alc882_pcm_digital_playback    alc880_pcm_digital_playback
+#define alc882_pcm_digital_capture     alc880_pcm_digital_capture
+
+/*
+ * configuration and preset
+ */
+static struct hda_board_config alc882_cfg_tbl[] = {
+       { .modelname = "3stack-dig", .config = ALC882_3ST_DIG },
+       { .modelname = "6stack-dig", .config = ALC882_6ST_DIG },
+       { .pci_subvendor = 0x1462, .pci_subdevice = 0x6668, .config = ALC882_6ST_DIG }, /* MSI  */
+       { .pci_subvendor = 0x105b, .pci_subdevice = 0x6668, .config = ALC882_6ST_DIG }, /* Foxconn */
+       { .pci_subvendor = 0x1019, .pci_subdevice = 0x6668, .config = ALC882_6ST_DIG }, /* ECS */
+       { .modelname = "auto", .config = ALC882_AUTO },
+       {}
+};
+
+static struct alc_config_preset alc882_presets[] = {
+       [ALC882_3ST_DIG] = {
+               .mixers = { alc882_base_mixer },
+               .init_verbs = { alc882_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc882_dac_nids),
+               .dac_nids = alc882_dac_nids,
+               .dig_out_nid = ALC882_DIGOUT_NID,
+               .num_adc_nids = ARRAY_SIZE(alc882_adc_nids),
+               .adc_nids = alc882_adc_nids,
+               .dig_in_nid = ALC882_DIGIN_NID,
+               .num_channel_mode = ARRAY_SIZE(alc882_ch_modes),
+               .channel_mode = alc882_ch_modes,
+               .input_mux = &alc882_capture_source,
+       },
+       [ALC882_6ST_DIG] = {
+               .mixers = { alc882_base_mixer, alc882_chmode_mixer },
+               .init_verbs = { alc882_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc882_dac_nids),
+               .dac_nids = alc882_dac_nids,
+               .dig_out_nid = ALC882_DIGOUT_NID,
+               .num_adc_nids = ARRAY_SIZE(alc882_adc_nids),
+               .adc_nids = alc882_adc_nids,
+               .dig_in_nid = ALC882_DIGIN_NID,
+               .num_channel_mode = ARRAY_SIZE(alc882_sixstack_modes),
+               .channel_mode = alc882_sixstack_modes,
+               .input_mux = &alc882_capture_source,
+       },
+};
+
+
+/*
+ * BIOS auto configuration
+ */
+static void alc882_auto_set_output_and_unmute(struct hda_codec *codec,
+                                             hda_nid_t nid, int pin_type,
+                                             int dac_idx)
+{
+       /* set as output */
+       struct alc_spec *spec = codec->spec;
+       int idx; 
+       
+       if (spec->multiout.dac_nids[dac_idx] == 0x25)
+               idx = 4;
+       else
+               idx = spec->multiout.dac_nids[dac_idx] - 2;
+
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_type);
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_CONNECT_SEL, idx);
+
+}
+
+static void alc882_auto_init_multi_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i <= HDA_SIDE; i++) {
+               hda_nid_t nid = spec->autocfg.line_out_pins[i]; 
+               if (nid)
+                       alc882_auto_set_output_and_unmute(codec, nid, PIN_OUT, i);
+       }
+}
+
+static void alc882_auto_init_hp_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       hda_nid_t pin;
+
+       pin = spec->autocfg.hp_pin;
+       if (pin) /* connect to front */
+               alc882_auto_set_output_and_unmute(codec, pin, PIN_HP, 0); /* use dac 0 */
+}
+
+#define alc882_is_input_pin(nid)       alc880_is_input_pin(nid)
+#define ALC882_PIN_CD_NID              ALC880_PIN_CD_NID
+
+static void alc882_auto_init_analog_input(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               hda_nid_t nid = spec->autocfg.input_pins[i];
+               if (alc882_is_input_pin(nid)) {
+                       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                           i <= AUTO_PIN_FRONT_MIC ? PIN_VREF80 : PIN_IN);
+                       if (nid != ALC882_PIN_CD_NID)
+                               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
+                                                   AMP_OUT_MUTE);
+               }
+       }
+}
+
+/* almost identical with ALC880 parser... */
+static int alc882_parse_auto_config(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int err = alc880_parse_auto_config(codec);
+
+       if (err < 0)
+               return err;
+       else if (err > 0)
+               /* hack - override the init verbs */
+               spec->init_verbs[0] = alc882_auto_init_verbs;
+       return err;
+}
+
+/* additional initialization for auto-configuration model */
+static void alc882_auto_init(struct hda_codec *codec)
+{
+       alc882_auto_init_multi_out(codec);
+       alc882_auto_init_hp_out(codec);
+       alc882_auto_init_analog_input(codec);
+}
+
+/*
+ *  ALC882 Headphone poll in 3.5.1a or 3.5.2
+ */
+
+static int patch_alc882(struct hda_codec *codec)
+{
+       struct alc_spec *spec;
+       int err, board_config;
+
+       spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       codec->spec = spec;
+
+       board_config = snd_hda_check_board_config(codec, alc882_cfg_tbl);
+
+       if (board_config < 0 || board_config >= ALC882_MODEL_LAST) {
+               printk(KERN_INFO "hda_codec: Unknown model for ALC882, trying auto-probe from BIOS...\n");
+               board_config = ALC882_AUTO;
+       }
+
+       if (board_config == ALC882_AUTO) {
+               /* automatic parse from the BIOS config */
+               err = alc882_parse_auto_config(codec);
+               if (err < 0) {
+                       alc_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using base mode...\n");
+                       board_config = ALC882_3ST_DIG;
+               }
+       }
+
+       if (board_config != ALC882_AUTO)
+               setup_preset(spec, &alc882_presets[board_config]);
+
+       spec->stream_name_analog = "ALC882 Analog";
+       spec->stream_analog_playback = &alc882_pcm_analog_playback;
+       spec->stream_analog_capture = &alc882_pcm_analog_capture;
+
+       spec->stream_name_digital = "ALC882 Digital";
+       spec->stream_digital_playback = &alc882_pcm_digital_playback;
+       spec->stream_digital_capture = &alc882_pcm_digital_capture;
+
+       if (! spec->adc_nids && spec->input_mux) {
+               /* check whether NID 0x07 is valid */
+               unsigned int wcap = get_wcaps(codec, 0x07);
+               wcap = (wcap & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; /* get type */
+               if (wcap != AC_WID_AUD_IN) {
+                       spec->adc_nids = alc882_adc_nids_alt;
+                       spec->num_adc_nids = ARRAY_SIZE(alc882_adc_nids_alt);
+                       spec->mixers[spec->num_mixers] = alc882_capture_alt_mixer;
+                       spec->num_mixers++;
+               } else {
+                       spec->adc_nids = alc882_adc_nids;
+                       spec->num_adc_nids = ARRAY_SIZE(alc882_adc_nids);
+                       spec->mixers[spec->num_mixers] = alc882_capture_mixer;
+                       spec->num_mixers++;
+               }
+       }
+
+       codec->patch_ops = alc_patch_ops;
+       if (board_config == ALC882_AUTO)
+               spec->init_hook = alc882_auto_init;
+
+       return 0;
+}
+
+/*
+ * ALC262 support
+ */
+
+#define ALC262_DIGOUT_NID      ALC880_DIGOUT_NID
+#define ALC262_DIGIN_NID       ALC880_DIGIN_NID
+
+#define alc262_dac_nids                alc260_dac_nids
+#define alc262_adc_nids                alc882_adc_nids
+#define alc262_adc_nids_alt    alc882_adc_nids_alt
+
+#define alc262_modes           alc260_modes
+#define alc262_capture_source  alc882_capture_source
+
+static struct snd_kcontrol_new alc262_base_mixer[] = {
+       HDA_CODEC_VOLUME("Front Playback Volume", 0x0c, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x0b, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x0b, 0x02, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Front Mic Playback Volume", 0x0b, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x0b, 0x01, HDA_INPUT),
+       /* HDA_CODEC_VOLUME("PC Beep Playback Volume", 0x0b, 0x05, HDA_INPUT),
+          HDA_CODEC_MUTE("PC Beelp Playback Switch", 0x0b, 0x05, HDA_INPUT), */
+       HDA_CODEC_VOLUME("Headphone Playback Volume", 0x0D, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x15, 0x0, HDA_OUTPUT),
+       HDA_CODEC_VOLUME_MONO("Mono Playback Volume", 0x0e, 2, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("Mono Playback Switch", 0x16, 2, 0x0, HDA_OUTPUT),
+       { } /* end */
+};
+
+#define alc262_capture_mixer           alc882_capture_mixer
+#define alc262_capture_alt_mixer       alc882_capture_alt_mixer
+
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc262_init_verbs[] = {
+       /*
+        * Unmute ADC0-2 and set the default input to mic-in
+        */
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) of the analog-loopback
+        * mixer widget
+        * Note: PASD motherboards uses the Line In 2 as the input for front panel
+        * mic (mic 2)
+        */
+       /* Amp Indices: Mic1 = 0, Mic2 = 1, Line1 = 2, Line2 = 3, CD = 4 */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+
+       /*
+        * Set up output mixers (0x0c - 0x0e)
+        */
+       /* set vol=0 to output mixers */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       /* set up input amps for analog loopback */
+       /* Amp Indices: DAC = 0, mixer = 1 */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0},
+       {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
+       {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
+       {0x1a, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+       {0x1c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+
+       {0x14, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, 0x0000},
+       
+       {0x14, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x15, AC_VERB_SET_CONNECT_SEL, 0x01},
+       
+       /* FIXME: use matrix-type input source selection */
+       /* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
+       /* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer2 */
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer3 */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},      
+
+       { }
+};
+
+/*
+ * fujitsu model
+ *  0x14 = headphone/spdif-out, 0x15 = internal speaker
+ */
+
+#define ALC_HP_EVENT   0x37
+
+static struct hda_verb alc262_fujitsu_unsol_verbs[] = {
+       {0x14, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | ALC_HP_EVENT},
+       {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+       {}
+};
+
+static struct hda_input_mux alc262_fujitsu_capture_source = {
+       .num_items = 2,
+       .items = {
+               { "Mic", 0x0 },
+               { "CD", 0x4 },
+       },
+};
+
+/* mute/unmute internal speaker according to the hp jack and mute state */
+static void alc262_fujitsu_automute(struct hda_codec *codec, int force)
+{
+       struct alc_spec *spec = codec->spec;
+       unsigned int mute;
+
+       if (force || ! spec->sense_updated) {
+               unsigned int present;
+               /* need to execute and sync at first */
+               snd_hda_codec_read(codec, 0x14, 0, AC_VERB_SET_PIN_SENSE, 0);
+               present = snd_hda_codec_read(codec, 0x14, 0,
+                                        AC_VERB_GET_PIN_SENSE, 0);
+               spec->jack_present = (present & 0x80000000) != 0;
+               spec->sense_updated = 1;
+       }
+       if (spec->jack_present) {
+               /* mute internal speaker */
+               snd_hda_codec_amp_update(codec, 0x15, 0, HDA_OUTPUT, 0,
+                                        0x80, 0x80);
+               snd_hda_codec_amp_update(codec, 0x15, 1, HDA_OUTPUT, 0,
+                                        0x80, 0x80);
+       } else {
+               /* unmute internal speaker if necessary */
+               mute = snd_hda_codec_amp_read(codec, 0x14, 0, HDA_OUTPUT, 0);
+               snd_hda_codec_amp_update(codec, 0x15, 0, HDA_OUTPUT, 0,
+                                        0x80, mute & 0x80);
+               mute = snd_hda_codec_amp_read(codec, 0x14, 1, HDA_OUTPUT, 0);
+               snd_hda_codec_amp_update(codec, 0x15, 1, HDA_OUTPUT, 0,
+                                        0x80, mute & 0x80);
+       }
+}
+
+/* unsolicited event for HP jack sensing */
+static void alc262_fujitsu_unsol_event(struct hda_codec *codec,
+                                      unsigned int res)
+{
+       if ((res >> 26) != ALC_HP_EVENT)
+               return;
+       alc262_fujitsu_automute(codec, 1);
+}
+
+/* bind volumes of both NID 0x0c and 0x0d */
+static int alc262_fujitsu_master_vol_put(struct snd_kcontrol *kcontrol,
+                                        struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       long *valp = ucontrol->value.integer.value;
+       int change;
+
+       change = snd_hda_codec_amp_update(codec, 0x0c, 0, HDA_OUTPUT, 0,
+                                         0x7f, valp[0] & 0x7f);
+       change |= snd_hda_codec_amp_update(codec, 0x0c, 1, HDA_OUTPUT, 0,
+                                          0x7f, valp[1] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x0d, 0, HDA_OUTPUT, 0,
+                                0x7f, valp[0] & 0x7f);
+       snd_hda_codec_amp_update(codec, 0x0d, 1, HDA_OUTPUT, 0,
+                                0x7f, valp[1] & 0x7f);
+       return change;
+}
+
+/* bind hp and internal speaker mute (with plug check) */
+static int alc262_fujitsu_master_sw_put(struct snd_kcontrol *kcontrol,
+                                        struct snd_ctl_elem_value *ucontrol)
+{
+       struct hda_codec *codec = snd_kcontrol_chip(kcontrol);
+       long *valp = ucontrol->value.integer.value;
+       int change;
+
+       change = snd_hda_codec_amp_update(codec, 0x14, 0, HDA_OUTPUT, 0,
+                                         0x80, valp[0] ? 0 : 0x80);
+       change |= snd_hda_codec_amp_update(codec, 0x14, 1, HDA_OUTPUT, 0,
+                                          0x80, valp[1] ? 0 : 0x80);
+       if (change || codec->in_resume)
+               alc262_fujitsu_automute(codec, codec->in_resume);
+       return change;
+}
+
+static struct snd_kcontrol_new alc262_fujitsu_mixer[] = {
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Volume",
+               .info = snd_hda_mixer_amp_volume_info,
+               .get = snd_hda_mixer_amp_volume_get,
+               .put = alc262_fujitsu_master_vol_put,
+               .private_value = HDA_COMPOSE_AMP_VAL(0x0c, 3, 0, HDA_OUTPUT),
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Master Playback Switch",
+               .info = snd_hda_mixer_amp_switch_info,
+               .get = snd_hda_mixer_amp_switch_get,
+               .put = alc262_fujitsu_master_sw_put,
+               .private_value = HDA_COMPOSE_AMP_VAL(0x14, 3, 0, HDA_OUTPUT),
+       },
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+       { } /* end */
+};
+
+/* add playback controls from the parsed DAC table */
+static int alc262_auto_create_multi_out_ctls(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
+{
+       hda_nid_t nid;
+       int err;
+
+       spec->multiout.num_dacs = 1;    /* only use one dac */
+       spec->multiout.dac_nids = spec->private_dac_nids;
+       spec->multiout.dac_nids[0] = 2;
+
+       nid = cfg->line_out_pins[0];
+       if (nid) {
+               if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, "Front Playback Volume",
+                                      HDA_COMPOSE_AMP_VAL(0x0c, 3, 0, HDA_OUTPUT))) < 0)
+                       return err;
+               if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Front Playback Switch",
+                                      HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                       return err;
+       }
+
+       nid = cfg->speaker_pins[0];
+       if (nid) {
+               if (nid == 0x16) {
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, "Speaker Playback Volume",
+                                              HDA_COMPOSE_AMP_VAL(0x0e, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Speaker Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               } else {
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Speaker Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               }
+       }
+       nid = cfg->hp_pin;
+       if (nid) {
+               /* spec->multiout.hp_nid = 2; */
+               if (nid == 0x16) {
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_VOL, "Headphone Playback Volume",
+                                              HDA_COMPOSE_AMP_VAL(0x0e, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Headphone Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               } else {
+                       if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Headphone Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               }
+       }
+       return 0;       
+}
+
+/* identical with ALC880 */
+#define alc262_auto_create_analog_input_ctls alc880_auto_create_analog_input_ctls
+
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc262_volume_init_verbs[] = {
+       /*
+        * Unmute ADC0-2 and set the default input to mic-in
+        */
+       {0x07, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x09, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x09, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+
+       /* Unmute input amps (CD, Line In, Mic 1 & Mic 2) of the analog-loopback
+        * mixer widget
+        * Note: PASD motherboards uses the Line In 2 as the input for front panel
+        * mic (mic 2)
+        */
+       /* Amp Indices: Mic1 = 0, Mic2 = 1, Line1 = 2, Line2 = 3, CD = 4 */
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},
+       {0x0b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(4)},
+
+       /*
+        * Set up output mixers (0x0c - 0x0f)
+        */
+       /* set vol=0 to output mixers */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+       
+       /* set up input amps for analog loopback */
+       /* Amp Indices: DAC = 0, mixer = 1 */
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0d, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x0e, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+
+       /* FIXME: use matrix-type input source selection */
+       /* Mixer elements: 0x18, 19, 1a, 1b, 1c, 1d, 14, 15, 16, 17, 0b */
+       /* Input mixer1: unmute Mic, F-Mic, Line, CD inputs */
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x24, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer2 */
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x23, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+       /* Input mixer3 */
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x00 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x03 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8))},
+       {0x22, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x04 << 8))},
+
+       { }
+};
+
+/* pcm configuration: identiacal with ALC880 */
+#define alc262_pcm_analog_playback     alc880_pcm_analog_playback
+#define alc262_pcm_analog_capture      alc880_pcm_analog_capture
+#define alc262_pcm_digital_playback    alc880_pcm_digital_playback
+#define alc262_pcm_digital_capture     alc880_pcm_digital_capture
+
+/*
+ * BIOS auto configuration
+ */
+static int alc262_parse_auto_config(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int err;
+       static hda_nid_t alc262_ignore[] = { 0x1d, 0 };
+
+       if ((err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
+                                               alc262_ignore)) < 0)
+               return err;
+       if (! spec->autocfg.line_outs)
+               return 0; /* can't find valid BIOS pin config */
+       if ((err = alc262_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
+           (err = alc262_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+
+       spec->multiout.max_channels = spec->multiout.num_dacs * 2;
+
+       if (spec->autocfg.dig_out_pin)
+               spec->multiout.dig_out_nid = ALC262_DIGOUT_NID;
+       if (spec->autocfg.dig_in_pin)
+               spec->dig_in_nid = ALC262_DIGIN_NID;
+
+       if (spec->kctl_alloc)
+               spec->mixers[spec->num_mixers++] = spec->kctl_alloc;
+
+       spec->init_verbs[spec->num_init_verbs++] = alc262_volume_init_verbs;
+       spec->num_mux_defs = 1;
+       spec->input_mux = &spec->private_imux;
+
+       return 1;
+}
+
+#define alc262_auto_init_multi_out     alc882_auto_init_multi_out
+#define alc262_auto_init_hp_out                alc882_auto_init_hp_out
+#define alc262_auto_init_analog_input  alc882_auto_init_analog_input
+
+
+/* init callback for auto-configuration model -- overriding the default init */
+static void alc262_auto_init(struct hda_codec *codec)
+{
+       alc262_auto_init_multi_out(codec);
+       alc262_auto_init_hp_out(codec);
+       alc262_auto_init_analog_input(codec);
+}
+
+/*
+ * configuration and preset
+ */
+static struct hda_board_config alc262_cfg_tbl[] = {
+       { .modelname = "basic", .config = ALC262_BASIC },
+       { .modelname = "fujitsu", .config = ALC262_FUJITSU },
+       { .pci_subvendor = 0x10cf, .pci_subdevice = 0x1397, .config = ALC262_FUJITSU },
+       { .modelname = "auto", .config = ALC262_AUTO },
+       {}
+};
+
+static struct alc_config_preset alc262_presets[] = {
+       [ALC262_BASIC] = {
+               .mixers = { alc262_base_mixer },
+               .init_verbs = { alc262_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc262_dac_nids),
+               .dac_nids = alc262_dac_nids,
+               .hp_nid = 0x03,
+               .num_channel_mode = ARRAY_SIZE(alc262_modes),
+               .channel_mode = alc262_modes,
+               .input_mux = &alc262_capture_source,
+       },
+       [ALC262_FUJITSU] = {
+               .mixers = { alc262_fujitsu_mixer },
+               .init_verbs = { alc262_init_verbs, alc262_fujitsu_unsol_verbs },
+               .num_dacs = ARRAY_SIZE(alc262_dac_nids),
+               .dac_nids = alc262_dac_nids,
+               .hp_nid = 0x03,
+               .dig_out_nid = ALC262_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc262_modes),
+               .channel_mode = alc262_modes,
+               .input_mux = &alc262_fujitsu_capture_source,
+               .unsol_event = alc262_fujitsu_unsol_event,
+       },
+};
+
+static int patch_alc262(struct hda_codec *codec)
+{
        struct alc_spec *spec;
+       int board_config;
+       int err;
 
        spec = kcalloc(1, sizeof(*spec), GFP_KERNEL);
        if (spec == NULL)
                return -ENOMEM;
 
        codec->spec = spec;
+#if 0
+       /* pshou 07/11/05  set a zero PCM sample to DAC when FIFO is under-run */
+       {
+       int tmp;
+       snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_COEF_INDEX, 7);
+       tmp = snd_hda_codec_read(codec, 0x20, 0, AC_VERB_GET_PROC_COEF, 0);
+       snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_COEF_INDEX, 7);
+       snd_hda_codec_write(codec, 0x1a, 0, AC_VERB_SET_PROC_COEF, tmp | 0x80);
+       }
+#endif
+
+       board_config = snd_hda_check_board_config(codec, alc262_cfg_tbl);
+       if (board_config < 0 || board_config >= ALC262_MODEL_LAST) {
+               printk(KERN_INFO "hda_codec: Unknown model for ALC262, trying auto-probe from BIOS...\n");
+               board_config = ALC262_AUTO;
+       }
+
+       if (board_config == ALC262_AUTO) {
+               /* automatic parse from the BIOS config */
+               err = alc262_parse_auto_config(codec);
+               if (err < 0) {
+                       alc_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using base mode...\n");
+                       board_config = ALC262_BASIC;
+               }
+       }
+
+       if (board_config != ALC262_AUTO)
+               setup_preset(spec, &alc262_presets[board_config]);
+
+       spec->stream_name_analog = "ALC262 Analog";
+       spec->stream_analog_playback = &alc262_pcm_analog_playback;
+       spec->stream_analog_capture = &alc262_pcm_analog_capture;
+               
+       spec->stream_name_digital = "ALC262 Digital";
+       spec->stream_digital_playback = &alc262_pcm_digital_playback;
+       spec->stream_digital_capture = &alc262_pcm_digital_capture;
+
+       if (! spec->adc_nids && spec->input_mux) {
+               /* check whether NID 0x07 is valid */
+               unsigned int wcap = get_wcaps(codec, 0x07);
+
+               wcap = (wcap & AC_WCAP_TYPE) >> AC_WCAP_TYPE_SHIFT; /* get type */
+               if (wcap != AC_WID_AUD_IN) {
+                       spec->adc_nids = alc262_adc_nids_alt;
+                       spec->num_adc_nids = ARRAY_SIZE(alc262_adc_nids_alt);
+                       spec->mixers[spec->num_mixers] = alc262_capture_alt_mixer;
+                       spec->num_mixers++;
+               } else {
+                       spec->adc_nids = alc262_adc_nids;
+                       spec->num_adc_nids = ARRAY_SIZE(alc262_adc_nids);
+                       spec->mixers[spec->num_mixers] = alc262_capture_mixer;
+                       spec->num_mixers++;
+               }
+       }
+
+       codec->patch_ops = alc_patch_ops;
+       if (board_config == ALC262_AUTO)
+               spec->init_hook = alc262_auto_init;
+               
+       return 0;
+}
+
+
+/*
+ *  ALC861 channel source setting (2/6 channel selection for 3-stack)
+ */
+
+/*
+ * set the path ways for 2 channel output
+ * need to set the codec line out and mic 1 pin widgets to inputs
+ */
+static struct hda_verb alc861_threestack_ch2_init[] = {
+       /* set pin widget 1Ah (line in) for input */
+       { 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* set pin widget 18h (mic1/2) for input, for mic also enable the vref */
+       { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c },
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x01 << 8)) }, //mic
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7000 | (0x02 << 8)) }, //line in
+       { } /* end */
+};
+/*
+ * 6ch mode
+ * need to set the codec line out and mic 1 pin widgets to outputs
+ */
+static struct hda_verb alc861_threestack_ch6_init[] = {
+       /* set pin widget 1Ah (line in) for output (Back Surround)*/
+       { 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       /* set pin widget 18h (mic1) for output (CLFE)*/
+       { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+
+       { 0x0c, AC_VERB_SET_CONNECT_SEL, 0x00 },
+        { 0x0d, AC_VERB_SET_CONNECT_SEL, 0x00 },
+
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb080 },
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x01 << 8)) }, //mic
+        { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, (0x7080 | (0x02 << 8)) }, //line in
+       { } /* end */
+};
+
+static struct hda_channel_mode alc861_threestack_modes[2] = {
+       { 2, alc861_threestack_ch2_init },
+       { 6, alc861_threestack_ch6_init },
+};
+
+/* patch-ALC861 */
+
+static struct snd_kcontrol_new alc861_base_mixer[] = {
+        /* output mixer control */
+       HDA_CODEC_MUTE("Front Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Surround Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x05, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x05, 2, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Side Playback Switch", 0x04, 0x0, HDA_OUTPUT),
+
+        /*Input mixer control */
+       /* HDA_CODEC_VOLUME("Input Playback Volume", 0x15, 0x0, HDA_OUTPUT),
+          HDA_CODEC_MUTE("Input Playback Switch", 0x15, 0x0, HDA_OUTPUT), */
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x15, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x15, 0x02, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x15, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x15, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x10, 0x01, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x03, HDA_INPUT),
+        /* Capture mixer control */
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .count = 1,
+               .info = alc_mux_enum_info,
+               .get = alc_mux_enum_get,
+               .put = alc_mux_enum_put,
+       },
+       { } /* end */
+};
+
+static struct snd_kcontrol_new alc861_3ST_mixer[] = {
+        /* output mixer control */
+       HDA_CODEC_MUTE("Front Playback Switch", 0x03, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Surround Playback Switch", 0x06, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("Center Playback Switch", 0x05, 1, 0x0, HDA_OUTPUT),
+       HDA_CODEC_MUTE_MONO("LFE Playback Switch", 0x05, 2, 0x0, HDA_OUTPUT),
+       /*HDA_CODEC_MUTE("Side Playback Switch", 0x04, 0x0, HDA_OUTPUT), */
+
+       /* Input mixer control */
+       /* HDA_CODEC_VOLUME("Input Playback Volume", 0x15, 0x0, HDA_OUTPUT),
+          HDA_CODEC_MUTE("Input Playback Switch", 0x15, 0x0, HDA_OUTPUT), */
+       HDA_CODEC_VOLUME("CD Playback Volume", 0x15, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("CD Playback Switch", 0x15, 0x0, HDA_INPUT),
+       HDA_CODEC_VOLUME("Line Playback Volume", 0x15, 0x02, HDA_INPUT),
+       HDA_CODEC_MUTE("Line Playback Switch", 0x15, 0x02, HDA_INPUT),
+       HDA_CODEC_VOLUME("Mic Playback Volume", 0x15, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Mic Playback Switch", 0x15, 0x01, HDA_INPUT),
+       HDA_CODEC_MUTE("Front Mic Playback Switch", 0x10, 0x01, HDA_OUTPUT),
+       HDA_CODEC_MUTE("Headphone Playback Switch", 0x1a, 0x03, HDA_INPUT),
+       /* Capture mixer control */
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Capture Source",
+               .count = 1,
+               .info = alc_mux_enum_info,
+               .get = alc_mux_enum_get,
+               .put = alc_mux_enum_put,
+       },
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               .name = "Channel Mode",
+               .info = alc_ch_mode_info,
+               .get = alc_ch_mode_get,
+               .put = alc_ch_mode_put,
+                .private_value = ARRAY_SIZE(alc861_threestack_modes),
+       },
+       { } /* end */
+};                     
+       
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc861_base_init_verbs[] = {
+       /*
+        * Unmute ADC0 and set the default input to mic-in
+        */
+       /* port-A for surround (rear panel) */
+       { 0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x0e, AC_VERB_SET_CONNECT_SEL, 0x00 },
+       /* port-B for mic-in (rear panel) with vref */
+       { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* port-C for line-in (rear panel) */
+       { 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* port-D for Front */
+       { 0x0b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x0b, AC_VERB_SET_CONNECT_SEL, 0x00 },
+       /* port-E for HP out (front panel) */
+       { 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+       /* route front PCM to HP */
+       { 0x0f, AC_VERB_SET_CONNECT_SEL, 0x01 },
+       /* port-F for mic-in (front panel) with vref */
+       { 0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* port-G for CLFE (rear panel) */
+       { 0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x1f, AC_VERB_SET_CONNECT_SEL, 0x00 },
+       /* port-H for side (rear panel) */
+       { 0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x20, AC_VERB_SET_CONNECT_SEL, 0x00 },
+       /* CD-in */
+       { 0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* route front mic to ADC1*/
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       
+       /* Unmute DAC0~3 & spdif out*/
+       {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       
+       /* Unmute Mixer 14 (mic) 1c (Line in)*/
+       {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       
+       /* Unmute Stereo Mixer 15 */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c          }, //Output 0~12 step
+
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)}, // hp used DAC 3 (Front)
+        {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+
+       { }
+};
+
+static struct hda_verb alc861_threestack_init_verbs[] = {
+       /*
+        * Unmute ADC0 and set the default input to mic-in
+        */
+       /* port-A for surround (rear panel) */
+       { 0x0e, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+       /* port-B for mic-in (rear panel) with vref */
+       { 0x0d, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* port-C for line-in (rear panel) */
+       { 0x0c, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* port-D for Front */
+       { 0x0b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40 },
+       { 0x0b, AC_VERB_SET_CONNECT_SEL, 0x00 },
+       /* port-E for HP out (front panel) */
+       { 0x0f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 },
+       /* route front PCM to HP */
+       { 0x0f, AC_VERB_SET_CONNECT_SEL, 0x01 },
+       /* port-F for mic-in (front panel) with vref */
+       { 0x10, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 },
+       /* port-G for CLFE (rear panel) */
+       { 0x1f, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+       /* port-H for side (rear panel) */
+       { 0x20, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x00 },
+       /* CD-in */
+       { 0x11, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20 },
+       /* route front mic to ADC1*/
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       /* Unmute DAC0~3 & spdif out*/
+       {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       
+       /* Unmute Mixer 14 (mic) 1c (Line in)*/
+       {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+        {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       
+       /* Unmute Stereo Mixer 15 */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c          }, //Output 0~12 step
+
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)}, // hp used DAC 3 (Front)
+        {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       { }
+};
+/*
+ * generic initialization of ADC, input mixers and output mixers
+ */
+static struct hda_verb alc861_auto_init_verbs[] = {
+       /*
+        * Unmute ADC0 and set the default input to mic-in
+        */
+//     {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},
+       {0x08, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       
+       /* Unmute DAC0~3 & spdif out*/
+       {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x05, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x06, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+       {0x07, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+       
+       /* Unmute Mixer 14 (mic) 1c (Line in)*/
+       {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x014, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x01c, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       
+       /* Unmute Stereo Mixer 15 */
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},
+       {0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb00c},
+
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x17, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(0)},
+       {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(1)},
+
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},    
+       {0x1a, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},            
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(0)},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_MUTE(1)},
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(2)},    
+       {0x1b, AC_VERB_SET_AMP_GAIN_MUTE, AMP_IN_UNMUTE(3)},    
+
+       {0x08, AC_VERB_SET_CONNECT_SEL, 0x00},  // set Mic 1
+
+       { }
+};
+
+/* pcm configuration: identiacal with ALC880 */
+#define alc861_pcm_analog_playback     alc880_pcm_analog_playback
+#define alc861_pcm_analog_capture      alc880_pcm_analog_capture
+#define alc861_pcm_digital_playback    alc880_pcm_digital_playback
+#define alc861_pcm_digital_capture     alc880_pcm_digital_capture
+
+
+#define ALC861_DIGOUT_NID      0x07
+
+static struct hda_channel_mode alc861_8ch_modes[1] = {
+       { 8, NULL }
+};
+
+static hda_nid_t alc861_dac_nids[4] = {
+       /* front, surround, clfe, side */
+       0x03, 0x06, 0x05, 0x04
+};
+
+static hda_nid_t alc861_adc_nids[1] = {
+       /* ADC0-2 */
+       0x08,
+};
+
+static struct hda_input_mux alc861_capture_source = {
+       .num_items = 5,
+       .items = {
+               { "Mic", 0x0 },
+               { "Front Mic", 0x3 },
+               { "Line", 0x1 },
+               { "CD", 0x4 },
+               { "Mixer", 0x5 },
+       },
+};
+
+/* fill in the dac_nids table from the parsed pin configuration */
+static int alc861_auto_fill_dac_nids(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
+{
+       int i;
+       hda_nid_t nid;
+
+       spec->multiout.dac_nids = spec->private_dac_nids;
+       for (i = 0; i < cfg->line_outs; i++) {
+               nid = cfg->line_out_pins[i];
+               if (nid) {
+                       if (i >= ARRAY_SIZE(alc861_dac_nids))
+                               continue;
+                       spec->multiout.dac_nids[i] = alc861_dac_nids[i];
+               }
+       }
+       spec->multiout.num_dacs = cfg->line_outs;
+       return 0;
+}
+
+/* add playback controls from the parsed DAC table */
+static int alc861_auto_create_multi_out_ctls(struct alc_spec *spec,
+                                            const struct auto_pin_cfg *cfg)
+{
+       char name[32];
+       static const char *chname[4] = { "Front", "Surround", NULL /*CLFE*/, "Side" };
+       hda_nid_t nid;
+       int i, idx, err;
+
+       for (i = 0; i < cfg->line_outs; i++) {
+               nid = spec->multiout.dac_nids[i];
+               if (! nid)
+                       continue;
+               if (nid == 0x05) {
+                       /* Center/LFE */
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, "Center Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 1, 0, HDA_OUTPUT))) < 0)
+                               return err;
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, "LFE Playback Switch",
+                                              HDA_COMPOSE_AMP_VAL(nid, 2, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               } else {
+                       for (idx = 0; idx < ARRAY_SIZE(alc861_dac_nids) - 1; idx++)
+                               if (nid == alc861_dac_nids[idx])
+                                       break;
+                       sprintf(name, "%s Playback Switch", chname[idx]);
+                       if ((err = add_control(spec, ALC_CTL_BIND_MUTE, name,
+                                              HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+static int alc861_auto_create_hp_ctls(struct alc_spec *spec, hda_nid_t pin)
+{
+       int err;
+       hda_nid_t nid;
+
+       if (! pin)
+               return 0;
+
+       if ((pin >= 0x0b && pin <= 0x10) || pin == 0x1f || pin == 0x20) {
+               nid = 0x03;
+               if ((err = add_control(spec, ALC_CTL_WIDGET_MUTE, "Headphone Playback Switch",
+                                      HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0)
+                       return err;
+               spec->multiout.hp_nid = nid;
+       }
+       return 0;
+}
+
+/* create playback/capture controls for input pins */
+static int alc861_auto_create_analog_input_ctls(struct alc_spec *spec, const struct auto_pin_cfg *cfg)
+{
+       struct hda_input_mux *imux = &spec->private_imux;
+       int i, err, idx, idx1;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               switch(cfg->input_pins[i]) {
+               case 0x0c:
+                       idx1 = 1;
+                       idx = 2;        // Line In
+                       break;
+               case 0x0f:
+                       idx1 = 2;
+                       idx = 2;        // Line In
+                       break;
+               case 0x0d:
+                       idx1 = 0;
+                       idx = 1;        // Mic In 
+                       break;
+               case 0x10:      
+                       idx1 = 3;
+                       idx = 1;        // Mic In 
+                       break;
+               case 0x11:
+                       idx1 = 4;
+                       idx = 0;        // CD
+                       break;
+               default:
+                       continue;
+               }
+
+               err = new_analog_input(spec, cfg->input_pins[i],
+                                      auto_pin_cfg_labels[i], idx, 0x15);
+               if (err < 0)
+                       return err;
+
+               imux->items[imux->num_items].label = auto_pin_cfg_labels[i];
+               imux->items[imux->num_items].index = idx1;
+               imux->num_items++;      
+       }
+       return 0;
+}
+
+static struct snd_kcontrol_new alc861_capture_mixer[] = {
+       HDA_CODEC_VOLUME("Capture Volume", 0x08, 0x0, HDA_INPUT),
+       HDA_CODEC_MUTE("Capture Switch", 0x08, 0x0, HDA_INPUT),
+
+       {
+               .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+               /* The multiple "Capture Source" controls confuse alsamixer
+                * So call somewhat different..
+                *FIXME: the controls appear in the "playback" view!
+                */
+               /* .name = "Capture Source", */
+               .name = "Input Source",
+               .count = 1,
+               .info = alc_mux_enum_info,
+               .get = alc_mux_enum_get,
+               .put = alc_mux_enum_put,
+       },
+       { } /* end */
+};
+
+static void alc861_auto_set_output_and_unmute(struct hda_codec *codec, hda_nid_t nid,
+                                             int pin_type, int dac_idx)
+{
+       /* set as output */
+
+       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, pin_type);
+       snd_hda_codec_write(codec, dac_idx, 0, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+
+}
+
+static void alc861_auto_init_multi_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < spec->autocfg.line_outs; i++) {
+               hda_nid_t nid = spec->autocfg.line_out_pins[i];
+               if (nid)
+                       alc861_auto_set_output_and_unmute(codec, nid, PIN_OUT, spec->multiout.dac_nids[i]);
+       }
+}
+
+static void alc861_auto_init_hp_out(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       hda_nid_t pin;
+
+       pin = spec->autocfg.hp_pin;
+       if (pin) /* connect to front */
+               alc861_auto_set_output_and_unmute(codec, pin, PIN_HP, spec->multiout.dac_nids[0]);
+}
+
+static void alc861_auto_init_analog_input(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int i;
+
+       for (i = 0; i < AUTO_PIN_LAST; i++) {
+               hda_nid_t nid = spec->autocfg.input_pins[i];
+               if ((nid>=0x0c) && (nid <=0x11)) {
+                       snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+                                           i <= AUTO_PIN_FRONT_MIC ? PIN_VREF80 : PIN_IN);
+               }
+       }
+}
+
+/* parse the BIOS configuration and set up the alc_spec */
+/* return 1 if successful, 0 if the proper config is not found, or a negative error code */
+static int alc861_parse_auto_config(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       int err;
+       static hda_nid_t alc861_ignore[] = { 0x1d, 0 };
+
+       if ((err = snd_hda_parse_pin_def_config(codec, &spec->autocfg,
+                                               alc861_ignore)) < 0)
+               return err;
+       if (! spec->autocfg.line_outs)
+               return 0; /* can't find valid BIOS pin config */
+
+       if ((err = alc861_auto_fill_dac_nids(spec, &spec->autocfg)) < 0 ||
+           (err = alc861_auto_create_multi_out_ctls(spec, &spec->autocfg)) < 0 ||
+           (err = alc861_auto_create_hp_ctls(spec, spec->autocfg.hp_pin)) < 0 ||
+           (err = alc861_auto_create_analog_input_ctls(spec, &spec->autocfg)) < 0)
+               return err;
+
+       spec->multiout.max_channels = spec->multiout.num_dacs * 2;
+
+       if (spec->autocfg.dig_out_pin)
+               spec->multiout.dig_out_nid = ALC861_DIGOUT_NID;
+
+       if (spec->kctl_alloc)
+               spec->mixers[spec->num_mixers++] = spec->kctl_alloc;
+
+       spec->init_verbs[spec->num_init_verbs++] = alc861_auto_init_verbs;
 
-       spec->mixers[spec->num_mixers] = alc882_base_mixer;
+       spec->num_mux_defs = 1;
+       spec->input_mux = &spec->private_imux;
+
+       spec->adc_nids = alc861_adc_nids;
+       spec->num_adc_nids = ARRAY_SIZE(alc861_adc_nids);
+       spec->mixers[spec->num_mixers] = alc861_capture_mixer;
        spec->num_mixers++;
 
-       spec->multiout.dig_out_nid = ALC880_DIGOUT_NID;
-       spec->dig_in_nid = ALC880_DIGIN_NID;
-       spec->front_panel = 1;
-       spec->init_verbs = alc882_init_verbs;
-       spec->channel_mode = alc882_ch_modes;
-       spec->num_channel_mode = ARRAY_SIZE(alc882_ch_modes);
+       return 1;
+}
 
-       spec->stream_name_analog = "ALC882 Analog";
-       spec->stream_analog_playback = &alc880_pcm_analog_playback;
-       spec->stream_analog_capture = &alc880_pcm_analog_capture;
+/* additional initialization for auto-configuration model */
+static void alc861_auto_init(struct hda_codec *codec)
+{
+       alc861_auto_init_multi_out(codec);
+       alc861_auto_init_hp_out(codec);
+       alc861_auto_init_analog_input(codec);
+}
 
-       spec->stream_name_digital = "ALC882 Digital";
-       spec->stream_digital_playback = &alc880_pcm_digital_playback;
-       spec->stream_digital_capture = &alc880_pcm_digital_capture;
 
-       spec->multiout.max_channels = spec->channel_mode[0].channels;
-       spec->multiout.num_dacs = ARRAY_SIZE(alc882_dac_nids);
-       spec->multiout.dac_nids = alc882_dac_nids;
+/*
+ * configuration and preset
+ */
+static struct hda_board_config alc861_cfg_tbl[] = {
+       { .modelname = "3stack", .config = ALC861_3ST },
+       { .pci_subvendor = 0x8086, .pci_subdevice = 0xd600, .config = ALC861_3ST },
+       { .modelname = "3stack-dig", .config = ALC861_3ST_DIG },
+       { .modelname = "6stack-dig", .config = ALC861_6ST_DIG },
+       { .modelname = "auto", .config = ALC861_AUTO },
+       {}
+};
 
-       spec->input_mux = &alc882_capture_source;
-       spec->num_adc_nids = ARRAY_SIZE(alc882_adc_nids);
-       spec->adc_nids = alc882_adc_nids;
+static struct alc_config_preset alc861_presets[] = {
+       [ALC861_3ST] = {
+               .mixers = { alc861_3ST_mixer },
+               .init_verbs = { alc861_threestack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc861_dac_nids),
+               .dac_nids = alc861_dac_nids,
+               .num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
+               .channel_mode = alc861_threestack_modes,
+               .num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
+               .adc_nids = alc861_adc_nids,
+               .input_mux = &alc861_capture_source,
+       },
+       [ALC861_3ST_DIG] = {
+               .mixers = { alc861_base_mixer },
+               .init_verbs = { alc861_threestack_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc861_dac_nids),
+               .dac_nids = alc861_dac_nids,
+               .dig_out_nid = ALC861_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc861_threestack_modes),
+               .channel_mode = alc861_threestack_modes,
+               .num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
+               .adc_nids = alc861_adc_nids,
+               .input_mux = &alc861_capture_source,
+       },
+       [ALC861_6ST_DIG] = {
+               .mixers = { alc861_base_mixer },
+               .init_verbs = { alc861_base_init_verbs },
+               .num_dacs = ARRAY_SIZE(alc861_dac_nids),
+               .dac_nids = alc861_dac_nids,
+               .dig_out_nid = ALC861_DIGOUT_NID,
+               .num_channel_mode = ARRAY_SIZE(alc861_8ch_modes),
+               .channel_mode = alc861_8ch_modes,
+               .num_adc_nids = ARRAY_SIZE(alc861_adc_nids),
+               .adc_nids = alc861_adc_nids,
+               .input_mux = &alc861_capture_source,
+       },
+};     
 
-       codec->patch_ops = alc_patch_ops;
 
+static int patch_alc861(struct hda_codec *codec)
+{
+       struct alc_spec *spec;
+       int board_config;
+       int err;
+
+       spec = kcalloc(1, sizeof(*spec), GFP_KERNEL);
+       if (spec == NULL)
+               return -ENOMEM;
+
+       codec->spec = spec;     
+
+        board_config = snd_hda_check_board_config(codec, alc861_cfg_tbl);
+       if (board_config < 0 || board_config >= ALC861_MODEL_LAST) {
+               printk(KERN_INFO "hda_codec: Unknown model for ALC861, trying auto-probe from BIOS...\n");
+               board_config = ALC861_AUTO;
+       }
+
+       if (board_config == ALC861_AUTO) {
+               /* automatic parse from the BIOS config */
+               err = alc861_parse_auto_config(codec);
+               if (err < 0) {
+                       alc_free(codec);
+                       return err;
+               } else if (! err) {
+                       printk(KERN_INFO "hda_codec: Cannot set up configuration from BIOS.  Using base mode...\n");
+                  board_config = ALC861_3ST_DIG;
+               }
+       }
+
+       if (board_config != ALC861_AUTO)
+               setup_preset(spec, &alc861_presets[board_config]);
+
+       spec->stream_name_analog = "ALC861 Analog";
+       spec->stream_analog_playback = &alc861_pcm_analog_playback;
+       spec->stream_analog_capture = &alc861_pcm_analog_capture;
+
+       spec->stream_name_digital = "ALC861 Digital";
+       spec->stream_digital_playback = &alc861_pcm_digital_playback;
+       spec->stream_digital_capture = &alc861_pcm_digital_capture;
+
+       codec->patch_ops = alc_patch_ops;
+       if (board_config == ALC861_AUTO)
+               spec->init_hook = alc861_auto_init;
+               
        return 0;
 }
 
@@ -1497,7 +5716,11 @@ static int patch_alc882(struct hda_codec *codec)
  */
 struct hda_codec_preset snd_hda_preset_realtek[] = {
        { .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 },
+       { .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 },
        { .id = 0x10ec0880, .name = "ALC880", .patch = patch_alc880 },
        { .id = 0x10ec0882, .name = "ALC882", .patch = patch_alc882 },
+       { .id = 0x10ec0883, .name = "ALC883", .patch = patch_alc882 },
+       { .id = 0x10ec0885, .name = "ALC885", .patch = patch_alc882 },
+       { .id = 0x10ec0861, .name = "ALC861", .patch = patch_alc861 },
        {} /* terminator */
 };
index 8c440fb..d862295 100644 (file)
@@ -1262,13 +1262,13 @@ static int vaio_master_sw_put(struct snd_kcontrol *kcontrol,
        int change;
 
        change = snd_hda_codec_amp_update(codec, 0x02, 0, HDA_OUTPUT, 0,
-                                         0x80, valp[0] & 0x80);
+                                         0x80, (valp[0] ? 0 : 0x80));
        change |= snd_hda_codec_amp_update(codec, 0x02, 1, HDA_OUTPUT, 0,
-                                          0x80, valp[1] & 0x80);
+                                          0x80, (valp[1] ? 0 : 0x80));
        snd_hda_codec_amp_update(codec, 0x05, 0, HDA_OUTPUT, 0,
-                                0x80, valp[0] & 0x80);
+                                0x80, (valp[0] ? 0 : 0x80));
        snd_hda_codec_amp_update(codec, 0x05, 1, HDA_OUTPUT, 0,
-                                0x80, valp[1] & 0x80);
+                                0x80, (valp[1] ? 0 : 0x80));
        return change;
 }
 
index 61f82f0..10586e4 100644 (file)
@@ -389,7 +389,7 @@ MODULE_SUPPORTED_DEVICE("{{RME Hammerfall-DSP},"
 
 /* use hotplug firmeare loader? */
 #if defined(CONFIG_FW_LOADER) || defined(CONFIG_FW_LOADER_MODULE)
-#ifndef HDSP_USE_HWDEP_LOADER
+#if !defined(HDSP_USE_HWDEP_LOADER) && !defined(CONFIG_SND_HDSP)
 #define HDSP_FW_LOADER
 #endif
 #endif
@@ -3169,9 +3169,10 @@ snd_hdsp_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer)
        char *clock_source;
        int x;
 
-       if (hdsp_check_for_iobox (hdsp))
+       if (hdsp_check_for_iobox (hdsp)) {
                snd_iprintf(buffer, "No I/O box connected.\nPlease connect one and upload firmware.\n");
                return;
+        }
 
        if (hdsp_check_for_firmware(hdsp, 0)) {
                if (hdsp->state & HDSP_FirmwareCached) {