From: Sapan Bhatia <sapanb@cs.princeton.edu>
Date: Thu, 20 Dec 2007 04:42:15 +0000 (+0000)
Subject: Finally...
X-Git-Tag: trellis-2.6.22-Jan-2009~80
X-Git-Url: http://git.onelab.eu/?a=commitdiff_plain;h=2917f3848c08300057f2d9f9523327a3fd6b4e76;p=linux-2.6.git

Finally...
---

diff --git a/trellis-mm.patch b/trellis-mm.patch
new file mode 100644
index 000000000..95353ad2a
--- /dev/null
+++ b/trellis-mm.patch
@@ -0,0 +1,165910 @@
+diff -Nurb linux-2.6.22-570/Documentation/DocBook/Makefile linux-2.6.22-try2/Documentation/DocBook/Makefile
+--- linux-2.6.22-570/Documentation/DocBook/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/DocBook/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -11,7 +11,7 @@
+ 	    procfs-guide.xml writing_usb_driver.xml \
+ 	    kernel-api.xml filesystems.xml lsm.xml usb.xml \
+ 	    gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+-	    genericirq.xml
++	    genericirq.xml kgdb.xml
+ 
+ ###
+ # The build process is as follows (targets):
+diff -Nurb linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl linux-2.6.22-try2/Documentation/DocBook/kgdb.tmpl
+--- linux-2.6.22-570/Documentation/DocBook/kgdb.tmpl	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/DocBook/kgdb.tmpl	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,250 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
++	"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
++
++<book id="kgdbInternals">
++ <bookinfo>
++  <title>KGDB Internals</title>
++
++  <authorgroup>
++   <author>
++    <firstname>Tom</firstname>
++    <surname>Rini</surname>
++    <affiliation>
++     <address>
++      <email>trini@kernel.crashing.org</email>
++     </address>
++    </affiliation>
++   </author>
++  </authorgroup>
++
++  <authorgroup>
++   <author>
++    <firstname>Amit S.</firstname>
++    <surname>Kale</surname>
++    <affiliation>
++     <address>
++      <email>amitkale@linsyssoft.com</email>
++     </address>
++    </affiliation>
++   </author>
++  </authorgroup>
++
++  <copyright>
++   <year>2004-2005</year>
++   <holder>MontaVista Software, Inc.</holder>
++  </copyright>
++  <copyright>
++   <year>2004</year>
++   <holder>Amit S. Kale</holder>
++  </copyright>
++
++  <legalnotice>
++   <para>
++   This file is licensed under the terms of the GNU General Public License
++   version 2. This program is licensed "as is" without any warranty of any
++   kind, whether express or implied.
++   </para>
++
++  </legalnotice>
++ </bookinfo>
++
++<toc></toc>
++  <chapter id="Introduction">
++    <title>Introduction</title>
++    <para>
++    kgdb is a source level debugger for linux kernel. It is used along
++    with gdb to debug a linux kernel. Kernel developers can debug a kernel
++    similar to application programs with the use of kgdb. It makes it
++    possible to place breakpoints in kernel code, step through the code
++    and observe variables.
++    </para>
++    <para>
++    Two machines are required for using kgdb. One of these machines is a
++    development machine and the other is a test machine. The machines are
++    typically connected through a serial line, a null-modem cable which
++    connects their serial ports.  It is also possible however, to use an
++    ethernet connection between the machines.  The kernel to be debugged
++    runs on the test machine. gdb runs on the development machine. The
++    serial line or ethernet connection is used by gdb to communicate to
++    the kernel being debugged.
++    </para>
++  </chapter>
++  <chapter id="CompilingAKernel">
++    <title>Compiling a kernel</title>
++    <para>
++    To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
++    and then select "KGDB: kernel debugging with remote gdb".
++    </para>
++    <para>
++    The first choice for I/O is <symbol>CONFIG_KGDB_ONLY_MODULES</symbol>.
++    This means that you will only be able to use KGDB after loading a
++    kernel module that defines how you want to be able to talk with
++    KGDB.  There are two other choices (more on some architectures) that
++    can be enabled as modules later, if not picked here.
++    </para>
++    <para>The first of these is <symbol>CONFIG_KGDB_8250_NOMODULE</symbol>.
++    This has sub-options such as <symbol>CONFIG_KGDB_SIMPLE_SERIAL</symbol>
++    which toggles choosing the serial port by ttyS number or by specifying
++    a port and IRQ number.
++    </para>
++    <para>
++    The second of these choices on most systems for I/O is
++    <symbol>CONFIG_KGDBOE</symbol>. This requires that the machine to be
++    debugged has an ethernet card which supports the netpoll API, such as
++    the cards supported by <symbol>CONFIG_E100</symbol>.  There are no
++    sub-options for this, but a kernel command line option is required.
++    </para>
++  </chapter>
++  <chapter id="BootingTheKernel">
++    <title>Booting the kernel</title>
++    <para>
++    The Kernel command line option <constant>kgdbwait</constant> makes kgdb
++    wait for gdb connection during booting of a kernel.  If the
++    <symbol>CONFIG_KGDB_8250</symbol> driver is used (or if applicable,
++    another serial driver) this breakpoint will happen very early on, before
++    console output.  If you wish to change serial port information and you
++    have enabled both <symbol>CONFIG_KGDB_8250</symbol> and
++    <symbol>CONFIG_KGDB_SIMPLE_SERIAL</symbol> then you must pass the option
++    <constant>kgdb8250=&lt;io or mmio&gt;,&lt;address&gt;,&lt;baud
++    rate&gt;,&lt;irq&gt;</constant> before <constant>kgdbwait</constant>.
++    The values <constant>io</constant> or <constant>mmio</constant> refer to
++    if the address being passed next needs to be memory mapped
++    (<constant>mmio</constant>) or not. The <constant>address</constant> must
++    be passed in hex and is the hardware address and will be remapped if
++    passed as <constant>mmio</constant>. The value
++    <constant>baud rate</constant> and <constant>irq</constant> are base-10.
++    The supported values for <constant>baud rate</constant> are
++    <constant>9600</constant>, <constant>19200</constant>,
++    <constant>38400</constant>, <constant>57600</constant>, and
++    <constant>115200</constant>.
++    </para>
++    <para>
++    To have KGDB stop the kernel and wait, with the compiled values for the
++    serial driver, pass in: <constant>kgdbwait</constant>.
++    </para>
++    <para>
++    To specify the values of the SH SCI(F) serial port at boot:
++    <constant>kgdbsci=0,115200</constant>.
++    </para>
++    <para>
++    To specify the values of the serial port at boot:
++    <constant>kgdb8250=io,3f8,115200,3</constant>.
++    On IA64 this could also be:
++    <constant>kgdb8250=mmio,0xff5e0000,115200,74</constant>
++    And to have KGDB also stop the kernel and wait for GDB to connect, pass in
++    <constant>kgdbwait</constant> after this arguement.
++    </para>
++    <para>
++    To configure the <symbol>CONFIG_KGDBOE</symbol> driver, pass in
++    <constant>kgdboe=[src-port]@&lt;src-ip&gt;/[dev],[tgt-port]@&lt;tgt-ip&gt;/[tgt-macaddr]</constant>
++    where:
++    <itemizedlist>
++      <listitem><para>src-port (optional): source for UDP packets (defaults to <constant>6443</constant>)</para></listitem>
++      <listitem><para>src-ip: source IP to use (interface address)</para></listitem>
++      <listitem><para>dev (optional): network interface (<constant>eth0</constant>)</para></listitem>
++      <listitem><para>tgt-port (optional): port GDB will use (defaults to <constant>6442</constant>)</para></listitem>
++      <listitem><para>tgt-ip: IP address GDB will be connecting from</para></listitem>
++      <listitem><para>tgt-macaddr (optional): ethernet MAC address for logging agent (default is broadcast)</para></listitem>
++    </itemizedlist>
++    </para>
++    <para>
++    The <symbol>CONFIG_KGDBOE</symbol> driver can be reconfigured at run
++    time, if <symbol>CONFIG_SYSFS</symbol> and
++    <symbol>CONFIG_MODULES</symbol> by echo'ing a new config string to
++    <constant>/sys/module/kgdboe/parameter/kgdboe</constant>.  The
++    driver can be unconfigured with the special string
++    <constant>not_configured</constant>.
++    </para>
++  </chapter>
++  <chapter id="ConnectingGDB">
++  <title>Connecting gdb</title>
++    <para>
++    If you have used any of the methods to have KGDB stop and create
++    an initial breakpoint described in the previous chapter, kgdb prints
++    the message "Waiting for connection from remote gdb..." on the console
++    and waits for connection from gdb. At this point you connect gdb to kgdb.
++    </para>
++    <para>
++    Example (serial):
++    </para>
++    <programlisting>
++    % gdb ./vmlinux
++    (gdb) set remotebaud 115200
++    (gdb) target remote /dev/ttyS0
++    </programlisting>
++    <para>
++    Example (ethernet):
++    </para>
++    <programlisting>
++    % gdb ./vmlinux
++    (gdb) target remote udp:192.168.2.2:6443
++    </programlisting>
++    <para>
++    Once connected, you can debug a kernel the way you would debug an
++    application program.
++    </para>
++  </chapter>
++  <chapter id="ArchitectureNotes">
++    <title>Architecture specific notes</title>
++      <para>
++      SuperH: The NMI switch found on some boards can be used to trigger an
++      initial breakpoint.  Subsequent triggers do nothing.  If console
++      is enabled on the SCI(F) serial port, and that is the port being used
++      for KGDB, then you must trigger a breakpoint via sysrq, NMI, or
++      some other method prior to connecting, or echo a control-c to the
++      serial port.  Also, to use the SCI(F) port for KGDB, the
++      <symbol>CONFIG_SERIAL_SH_SCI</symbol> driver must be enabled.
++      </para>
++  </chapter>
++  <chapter id="CommonBackEndReq">
++    <title>The common backend (required)</title>
++      <para>
++      There are a few flags which must be set on every architecture in
++      their &lt;asm/kgdb.h&gt; file.  These are:
++      <itemizedlist>
++        <listitem>
++	  <para>
++	  NUMREGBYTES: The size in bytes of all of the registers, so
++	  that we can ensure they will all fit into a packet.
++	  </para>
++	  <para>
++	  BUFMAX: The size in bytes of the buffer GDB will read into.
++	  This must be larger than NUMREGBYTES.
++	  </para>
++	  <para>
++	  CACHE_FLUSH_IS_SAFE: Set to one if it always safe to call
++	  flush_cache_range or flush_icache_range.  On some architectures,
++	  these functions may not be safe to call on SMP since we keep other
++	  CPUs in a holding pattern.
++	  </para>
++	</listitem>
++      </itemizedlist>
++      </para>
++      <para>
++      There are also the following functions for the common backend,
++      found in kernel/kgdb.c that must be supplied by the
++      architecture-specific backend.  No weak version of these is provided.
++      </para>
++!Iinclude/linux/kgdb.h
++  </chapter>
++  <chapter id="CommonBackEndOpt">
++    <title>The common backend (optional)</title>
++      <para>
++      These functions are part of the common backend, found in kernel/kgdb.c
++      and are optionally implemented.  Some functions (with _hw_ in the name)
++      end up being required on arches which use hardware breakpoints.
++      </para>
++!Ikernel/kgdb.c
++  </chapter>
++  <chapter id="DriverSpecificFunctions">
++    <title>Driver-Specific Functions</title>
++      <para>
++      Some of the I/O drivers have additional functions that can be
++      called, that are specific to the driver.  Calls from other places
++      to these functions must be wrapped in #ifdefs for the driver in
++      question.
++      </para>
++!Idrivers/serial/8250_kgdb.c
++   </chapter>
++</book>
+diff -Nurb linux-2.6.22-570/Documentation/accounting/getdelays.c linux-2.6.22-try2/Documentation/accounting/getdelays.c
+--- linux-2.6.22-570/Documentation/accounting/getdelays.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/accounting/getdelays.c	2007-12-19 15:29:24.000000000 -0500
+@@ -49,6 +49,7 @@
+ int dbg;
+ int print_delays;
+ int print_io_accounting;
++int print_task_context_switch_counts;
+ __u64 stime, utime;
+ 
+ #define PRINTF(fmt, arg...) {			\
+@@ -195,7 +196,7 @@
+ 	       "IO    %15s%15s\n"
+ 	       "      %15llu%15llu\n"
+ 	       "MEM   %15s%15s\n"
+-	       "      %15llu%15llu\n\n",
++	       "      %15llu%15llu\n"
+ 	       "count", "real total", "virtual total", "delay total",
+ 	       t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
+ 	       t->cpu_delay_total,
+@@ -204,6 +205,14 @@
+ 	       "count", "delay total", t->swapin_count, t->swapin_delay_total);
+ }
+ 
++void task_context_switch_counts(struct taskstats *t)
++{
++	printf("\n\nTask   %15s%15s\n"
++	       "       %15lu%15lu\n",
++	       "voluntary", "nonvoluntary",
++	       t->nvcsw, t->nivcsw);
++}
++
+ void print_ioacct(struct taskstats *t)
+ {
+ 	printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+@@ -235,7 +244,7 @@
+ 	struct msgtemplate msg;
+ 
+ 	while (1) {
+-		c = getopt(argc, argv, "diw:r:m:t:p:vl");
++		c = getopt(argc, argv, "qdiw:r:m:t:p:vl");
+ 		if (c < 0)
+ 			break;
+ 
+@@ -248,6 +257,10 @@
+ 			printf("printing IO accounting\n");
+ 			print_io_accounting = 1;
+ 			break;
++		case 'q':
++			printf("printing task/process context switch rates\n");
++			print_task_context_switch_counts = 1;
++			break;
+ 		case 'w':
+ 			logfile = strdup(optarg);
+ 			printf("write to file %s\n", logfile);
+@@ -389,6 +402,8 @@
+ 							print_delayacct((struct taskstats *) NLA_DATA(na));
+ 						if (print_io_accounting)
+ 							print_ioacct((struct taskstats *) NLA_DATA(na));
++						if (print_task_context_switch_counts)
++							task_context_switch_counts((struct taskstats *) NLA_DATA(na));
+ 						if (fd) {
+ 							if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
+ 								err(1,"write error\n");
+diff -Nurb linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt linux-2.6.22-try2/Documentation/accounting/taskstats-struct.txt
+--- linux-2.6.22-570/Documentation/accounting/taskstats-struct.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/accounting/taskstats-struct.txt	2007-12-19 15:29:24.000000000 -0500
+@@ -22,6 +22,8 @@
+     /* Extended accounting fields end */
+     Their values are collected if CONFIG_TASK_XACCT is set.
+ 
++4) Per-task and per-thread context switch count statistics
++
+ Future extension should add fields to the end of the taskstats struct, and
+ should not change the relative position of each field within the struct.
+ 
+@@ -158,4 +160,8 @@
+ 
+ 	/* Extended accounting fields end */
+ 
++4) Per-task and per-thread statistics
++	__u64	nvcsw;			/* Context voluntary switch counter */
++	__u64	nivcsw;			/* Context involuntary switch counter */
++
+ }
+diff -Nurb linux-2.6.22-570/Documentation/cachetlb.txt linux-2.6.22-try2/Documentation/cachetlb.txt
+--- linux-2.6.22-570/Documentation/cachetlb.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/cachetlb.txt	2007-12-19 15:29:22.000000000 -0500
+@@ -253,7 +253,7 @@
+ 
+ 	The first of these two routines is invoked after map_vm_area()
+ 	has installed the page table entries.  The second is invoked
+-	before unmap_vm_area() deletes the page table entries.
++	before unmap_kernel_range() deletes the page table entries.
+ 
+ There exists another whole class of cpu cache issues which currently
+ require a whole different set of interfaces to handle properly.
+diff -Nurb linux-2.6.22-570/Documentation/containers.txt linux-2.6.22-try2/Documentation/containers.txt
+--- linux-2.6.22-570/Documentation/containers.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/containers.txt	2007-12-19 15:29:25.000000000 -0500
+@@ -0,0 +1,543 @@
++				CONTAINERS
++				-------
++
++Written by Paul Menage <menage@google.com> based on Documentation/cpusets.txt
++
++Original copyright statements from cpusets.txt:
++Portions Copyright (C) 2004 BULL SA.
++Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
++Modified by Paul Jackson <pj@sgi.com>
++Modified by Christoph Lameter <clameter@sgi.com>
++
++CONTENTS:
++=========
++
++1. Containers
++  1.1 What are containers ?
++  1.2 Why are containers needed ?
++  1.3 How are containers implemented ?
++  1.4 What does notify_on_release do ?
++  1.5 How do I use containers ?
++2. Usage Examples and Syntax
++  2.1 Basic Usage
++  2.2 Attaching processes
++3. Kernel API
++  3.1 Overview
++  3.2 Synchronization
++  3.3 Subsystem API
++4. Questions
++
++1. Containers
++==========
++
++1.1 What are containers ?
++----------------------
++
++Containers provide a mechanism for aggregating/partitioning sets of
++tasks, and all their future children, into hierarchical groups with
++specialized behaviour.
++
++Definitions:
++
++A *container* associates a set of tasks with a set of parameters for one
++or more subsystems.
++
++A *subsystem* is a module that makes use of the task grouping
++facilities provided by containers to treat groups of tasks in
++particular ways. A subsystem is typically a "resource controller" that
++schedules a resource or applies per-container limits, but it may be
++anything that wants to act on a group of processes, e.g. a
++virtualization subsystem.
++
++A *hierarchy* is a set of containers arranged in a tree, such that
++every task in the system is in exactly one of the containers in the
++hierarchy, and a set of subsystems; each subsystem has system-specific
++state attached to each container in the hierarchy.  Each hierarchy has
++an instance of the container virtual filesystem associated with it.
++
++At any one time there may be multiple active hierachies of task
++containers. Each hierarchy is a partition of all tasks in the system.
++
++User level code may create and destroy containers by name in an
++instance of the container virtual file system, specify and query to
++which container a task is assigned, and list the task pids assigned to
++a container. Those creations and assignments only affect the hierarchy
++associated with that instance of the container file system.
++
++On their own, the only use for containers is for simple job
++tracking. The intention is that other subsystems hook into the generic
++container support to provide new attributes for containers, such as
++accounting/limiting the resources which processes in a container can
++access. For example, cpusets (see Documentation/cpusets.txt) allows
++you to associate a set of CPUs and a set of memory nodes with the
++tasks in each container.
++
++1.2 Why are containers needed ?
++----------------------------
++
++There are multiple efforts to provide process aggregations in the
++Linux kernel, mainly for resource tracking purposes. Such efforts
++include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
++namespaces. These all require the basic notion of a
++grouping/partitioning of processes, with newly forked processes ending
++in the same group (container) as their parent process.
++
++The kernel container patch provides the minimum essential kernel
++mechanisms required to efficiently implement such groups. It has
++minimal impact on the system fast paths, and provides hooks for
++specific subsystems such as cpusets to provide additional behaviour as
++desired.
++
++Multiple hierarchy support is provided to allow for situations where
++the division of tasks into containers is distinctly different for
++different subsystems - having parallel hierarchies allows each
++hierarchy to be a natural division of tasks, without having to handle
++complex combinations of tasks that would be present if several
++unrelated subsystems needed to be forced into the same tree of
++containers.
++
++At one extreme, each resource controller or subsystem could be in a
++separate hierarchy; at the other extreme, all subsystems
++would be attached to the same hierarchy.
++
++As an example of a scenario (originally proposed by vatsa@in.ibm.com)
++that can benefit from multiple hierarchies, consider a large
++university server with various users - students, professors, system
++tasks etc. The resource planning for this server could be along the
++following lines:
++
++       CPU :           Top cpuset
++                       /       \
++               CPUSet1         CPUSet2
++                  |              |
++               (Profs)         (Students)
++
++               In addition (system tasks) are attached to topcpuset (so
++               that they can run anywhere) with a limit of 20%
++
++       Memory : Professors (50%), students (30%), system (20%)
++
++       Disk : Prof (50%), students (30%), system (20%)
++
++       Network : WWW browsing (20%), Network File System (60%), others (20%)
++                               / \
++                       Prof (15%) students (5%)
++
++Browsers like firefox/lynx go into the WWW network class, while (k)nfsd go
++into NFS network class.
++
++At the same time firefox/lynx will share an appropriate CPU/Memory class
++depending on who launched it (prof/student).
++
++With the ability to classify tasks differently for different resources
++(by putting those resource subsystems in different hierarchies) then
++the admin can easily set up a script which receives exec notifications
++and depending on who is launching the browser he can
++
++       # echo browser_pid > /mnt/<restype>/<userclass>/tasks
++
++With only a single hierarchy, he now would potentially have to create
++a separate container for every browser launched and associate it with
++approp network and other resource class.  This may lead to
++proliferation of such containers.
++
++Also lets say that the administrator would like to give enhanced network
++access temporarily to a student's browser (since it is night and the user
++wants to do online gaming :)  OR give one of the students simulation
++apps enhanced CPU power,
++
++With ability to write pids directly to resource classes, its just a
++matter of :
++
++       # echo pid > /mnt/network/<new_class>/tasks
++       (after some time)
++       # echo pid > /mnt/network/<orig_class>/tasks
++
++Without this ability, he would have to split the container into
++multiple separate ones and then associate the new containers with the
++new resource classes.
++
++
++
++1.3 How are containers implemented ?
++---------------------------------
++
++Containers extends the kernel as follows:
++
++ - Each task in the system has a reference-counted pointer to a
++   css_group.
++
++ - A css_group contains a set of reference-counted pointers to
++   container_subsys_state objects, one for each container subsystem
++   registered in the system. There is no direct link from a task to
++   the container of which it's a member in each hierarchy, but this
++   can be determined by following pointers through the
++   container_subsys_state objects. This is because accessing the
++   subsystem state is something that's expected to happen frequently
++   and in performance-critical code, whereas operations that require a
++   task's actual container assignments (in particular, moving between
++   containers) are less common. A linked list runs through the cg_list
++   field of each task_struct using the css_group, anchored at
++   css_group->tasks.
++
++ - A container hierarchy filesystem can be mounted  for browsing and
++   manipulation from user space.
++
++ - You can list all the tasks (by pid) attached to any container.
++
++The implementation of containers requires a few, simple hooks
++into the rest of the kernel, none in performance critical paths:
++
++ - in init/main.c, to initialize the root containers and initial
++   css_group at system boot.
++
++ - in fork and exit, to attach and detach a task from its css_group.
++
++In addition a new file system, of type "container" may be mounted, to
++enable browsing and modifying the containers presently known to the
++kernel.  When mounting a container hierarchy, you may specify a
++comma-separated list of subsystems to mount as the filesystem mount
++options.  By default, mounting the container filesystem attempts to
++mount a hierarchy containing all registered subsystems.
++
++If an active hierarchy with exactly the same set of subsystems already
++exists, it will be reused for the new mount. If no existing hierarchy
++matches, and any of the requested subsystems are in use in an existing
++hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
++is activated, associated with the requested subsystems.
++
++It's not currently possible to bind a new subsystem to an active
++container hierarchy, or to unbind a subsystem from an active container
++hierarchy. This may be possible in future, but is fraught with nasty
++error-recovery issues.
++
++When a container filesystem is unmounted, if there are any
++subcontainers created below the top-level container, that hierarchy
++will remain active even though unmounted; if there are no
++subcontainers then the hierarchy will be deactivated.
++
++No new system calls are added for containers - all support for
++querying and modifying containers is via this container file system.
++
++Each task under /proc has an added file named 'container' displaying,
++for each active hierarchy, the subsystem names and the container name
++as the path relative to the root of the container file system.
++
++Each container is represented by a directory in the container file system
++containing the following files describing that container:
++
++ - tasks: list of tasks (by pid) attached to that container
++ - notify_on_release flag: run /sbin/container_release_agent on exit?
++
++Other subsystems such as cpusets may add additional files in each
++container dir
++
++New containers are created using the mkdir system call or shell
++command.  The properties of a container, such as its flags, are
++modified by writing to the appropriate file in that containers
++directory, as listed above.
++
++The named hierarchical structure of nested containers allows partitioning
++a large system into nested, dynamically changeable, "soft-partitions".
++
++The attachment of each task, automatically inherited at fork by any
++children of that task, to a container allows organizing the work load
++on a system into related sets of tasks.  A task may be re-attached to
++any other container, if allowed by the permissions on the necessary
++container file system directories.
++
++When a task is moved from one container to another, it gets a new
++css_group pointer - if there's an already existing css_group with the
++desired collection of containers then that group is reused, else a new
++css_group is allocated. Note that the current implementation uses a
++linear search to locate an appropriate existing css_group, so isn't
++very efficient. A future version will use a hash table for better
++performance.
++
++To allow access from a container to the css_groups (and hence tasks)
++that comprise it, a set of cg_container_link objects form a lattice;
++each cg_container_link is linked into a list of cg_container_links for
++a single container on its cont_link_list field, and a list of
++cg_container_links for a single css_group on its cg_link_list.
++
++Thus the set of tasks in a container can be listed by iterating over
++each css_group that references the container, and sub-iterating over
++each css_group's task set.
++
++The use of a Linux virtual file system (vfs) to represent the
++container hierarchy provides for a familiar permission and name space
++for containers, with a minimum of additional kernel code.
++
++1.4 What does notify_on_release do ?
++------------------------------------
++
++*** notify_on_release is disabled in the current patch set. It will be
++*** reactivated in a future patch in a less-intrusive manner
++
++If the notify_on_release flag is enabled (1) in a container, then
++whenever the last task in the container leaves (exits or attaches to
++some other container) and the last child container of that container
++is removed, then the kernel runs the command specified by the contents
++of the "release_agent" file in that hierarchy's root directory,
++supplying the pathname (relative to the mount point of the container
++file system) of the abandoned container.  This enables automatic
++removal of abandoned containers.  The default value of
++notify_on_release in the root container at system boot is disabled
++(0).  The default value of other containers at creation is the current
++value of their parents notify_on_release setting. The default value of
++a container hierarchy's release_agent path is empty.
++
++1.5 How do I use containers ?
++--------------------------
++
++To start a new job that is to be contained within a container, using
++the "cpuset" container subsystem, the steps are something like:
++
++ 1) mkdir /dev/container
++ 2) mount -t container -ocpuset cpuset /dev/container
++ 3) Create the new container by doing mkdir's and write's (or echo's) in
++    the /dev/container virtual file system.
++ 4) Start a task that will be the "founding father" of the new job.
++ 5) Attach that task to the new container by writing its pid to the
++    /dev/container tasks file for that container.
++ 6) fork, exec or clone the job tasks from this founding father task.
++
++For example, the following sequence of commands will setup a container
++named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
++and then start a subshell 'sh' in that container:
++
++  mount -t container cpuset -ocpuset /dev/container
++  cd /dev/container
++  mkdir Charlie
++  cd Charlie
++  /bin/echo $$ > tasks
++  sh
++  # The subshell 'sh' is now running in container Charlie
++  # The next line should display '/Charlie'
++  cat /proc/self/container
++
++2. Usage Examples and Syntax
++============================
++
++2.1 Basic Usage
++---------------
++
++Creating, modifying, using the containers can be done through the container
++virtual filesystem.
++
++To mount a container hierarchy will all available subsystems, type:
++# mount -t container xxx /dev/container
++
++The "xxx" is not interpreted by the container code, but will appear in
++/proc/mounts so may be any useful identifying string that you like.
++
++To mount a container hierarchy with just the cpuset and numtasks
++subsystems, type:
++# mount -t container -o cpuset,numtasks hier1 /dev/container
++
++To change the set of subsystems bound to a mounted hierarchy, just
++remount with different options:
++
++# mount -o remount,cpuset,ns  /dev/container
++
++Note that changing the set of subsystems is currently only supported
++when the hierarchy consists of a single (root) container. Supporting
++the ability to arbitrarily bind/unbind subsystems from an existing
++container hierarchy is intended to be implemented in the future.
++
++Then under /dev/container you can find a tree that corresponds to the
++tree of the containers in the system. For instance, /dev/container
++is the container that holds the whole system.
++
++If you want to create a new container under /dev/container:
++# cd /dev/container
++# mkdir my_container
++
++Now you want to do something with this container.
++# cd my_container
++
++In this directory you can find several files:
++# ls
++notify_on_release release_agent tasks
++(plus whatever files are added by the attached subsystems)
++
++Now attach your shell to this container:
++# /bin/echo $$ > tasks
++
++You can also create containers inside your container by using mkdir in this
++directory.
++# mkdir my_sub_cs
++
++To remove a container, just use rmdir:
++# rmdir my_sub_cs
++
++This will fail if the container is in use (has containers inside, or
++has processes attached, or is held alive by other subsystem-specific
++reference).
++
++2.2 Attaching processes
++-----------------------
++
++# /bin/echo PID > tasks
++
++Note that it is PID, not PIDs. You can only attach ONE task at a time.
++If you have several tasks to attach, you have to do it one after another:
++
++# /bin/echo PID1 > tasks
++# /bin/echo PID2 > tasks
++	...
++# /bin/echo PIDn > tasks
++
++3. Kernel API
++=============
++
++3.1 Overview
++------------
++
++Each kernel subsystem that wants to hook into the generic container
++system needs to create a container_subsys object. This contains
++various methods, which are callbacks from the container system, along
++with a subsystem id which will be assigned by the container system.
++
++Other fields in the container_subsys object include:
++
++- subsys_id: a unique array index for the subsystem, indicating which
++  entry in container->subsys[] this subsystem should be
++  managing. Initialized by container_register_subsys(); prior to this
++  it should be initialized to -1
++
++- hierarchy: an index indicating which hierarchy, if any, this
++  subsystem is currently attached to. If this is -1, then the
++  subsystem is not attached to any hierarchy, and all tasks should be
++  considered to be members of the subsystem's top_container. It should
++  be initialized to -1.
++
++- name: should be initialized to a unique subsystem name prior to
++  calling container_register_subsystem. Should be no longer than
++  MAX_CONTAINER_TYPE_NAMELEN
++
++Each container object created by the system has an array of pointers,
++indexed by subsystem id; this pointer is entirely managed by the
++subsystem; the generic container code will never touch this pointer.
++
++3.2 Synchronization
++-------------------
++
++There is a global mutex, container_mutex, used by the container
++system. This should be taken by anything that wants to modify a
++container. It may also be taken to prevent containers from being
++modified, but more specific locks may be more appropriate in that
++situation.
++
++See kernel/container.c for more details.
++
++Subsystems can take/release the container_mutex via the functions
++container_lock()/container_unlock(), and can
++take/release the callback_mutex via the functions
++container_lock()/container_unlock().
++
++Accessing a task's container pointer may be done in the following ways:
++- while holding container_mutex
++- while holding the task's alloc_lock (via task_lock())
++- inside an rcu_read_lock() section via rcu_dereference()
++
++3.3 Subsystem API
++--------------------------
++
++Each subsystem should:
++
++- add an entry in linux/container_subsys.h
++- define a container_subsys object called <name>_subsys
++
++Each subsystem may export the following methods. The only mandatory
++methods are create/destroy. Any others that are null are presumed to
++be successful no-ops.
++
++int create(struct container *cont)
++LL=container_mutex
++
++Called to create a subsystem state object for a container. The
++subsystem should set its subsystem pointer for the passed container,
++returning 0 on success or a negative error code. On success, the
++subsystem pointer should point to a structure of type
++container_subsys_state (typically embedded in a larger
++subsystem-specific object), which will be initialized by the container
++system. Note that this will be called at initialization to create the
++root subsystem state for this subsystem; this case can be identified
++by the passed container object having a NULL parent (since it's the
++root of the hierarchy) and may be an appropriate place for
++initialization code.
++
++void destroy(struct container *cont)
++LL=container_mutex
++
++The container system is about to destroy the passed container; the
++subsystem should do any necessary cleanup
++
++int can_attach(struct container_subsys *ss, struct container *cont,
++	       struct task_struct *task)
++LL=container_mutex
++
++Called prior to moving a task into a container; if the subsystem
++returns an error, this will abort the attach operation.  If a NULL
++task is passed, then a successful result indicates that *any*
++unspecified task can be moved into the container. Note that this isn't
++called on a fork. If this method returns 0 (success) then this should
++remain valid while the caller holds container_mutex.
++
++void attach(struct container_subsys *ss, struct container *cont,
++	    struct container *old_cont, struct task_struct *task)
++LL=container_mutex
++
++
++Called after the task has been attached to the container, to allow any
++post-attachment activity that requires memory allocations or blocking.
++
++void fork(struct container_subsy *ss, struct task_struct *task)
++LL=callback_mutex, maybe read_lock(tasklist_lock)
++
++Called when a task is forked into a container. Also called during
++registration for all existing tasks.
++
++void exit(struct container_subsys *ss, struct task_struct *task)
++LL=callback_mutex
++
++Called during task exit
++
++int populate(struct container_subsys *ss, struct container *cont)
++LL=none
++
++Called after creation of a container to allow a subsystem to populate
++the container directory with file entries.  The subsystem should make
++calls to container_add_file() with objects of type cftype (see
++include/linux/container.h for details).  Note that although this
++method can return an error code, the error code is currently not
++always handled well.
++
++void post_clone(struct container_subsys *ss, struct container *cont)
++
++Called at the end of container_clone() to do any paramater
++initialization which might be required before a task could attach.  For
++example in cpusets, no task may attach before 'cpus' and 'mems' are set
++up.
++
++void bind(struct container_subsys *ss, struct container *root)
++LL=callback_mutex
++
++Called when a container subsystem is rebound to a different hierarchy
++and root container. Currently this will only involve movement between
++the default hierarchy (which never has sub-containers) and a hierarchy
++that is being created/destroyed (and hence has no sub-containers).
++
++4. Questions
++============
++
++Q: what's up with this '/bin/echo' ?
++A: bash's builtin 'echo' command does not check calls to write() against
++   errors. If you use it in the container file system, you won't be
++   able to tell whether a command succeeded or failed.
++
++Q: When I attach processes, only the first of the line gets really attached !
++A: We can only return one error code per call to write(). So you should also
++   put only ONE pid.
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/core.txt linux-2.6.22-try2/Documentation/cpuidle/core.txt
+--- linux-2.6.22-570/Documentation/cpuidle/core.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/cpuidle/core.txt	2007-12-19 15:29:19.000000000 -0500
+@@ -0,0 +1,17 @@
++
++		Supporting multiple CPU idle levels in kernel
++
++				cpuidle
++
++General Information:
++
++Various CPUs today support multiple idle levels that are differentiated
++by varying exit latencies and power consumption during idle.
++cpuidle is a generic in-kernel infrastructure that separates
++idle policy (governor) from idle mechanism (driver) and provides a
++standardized infrastructure to support independent development of
++governors and drivers.
++
++cpuidle resides under /drivers/cpuidle.
++
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/driver.txt linux-2.6.22-try2/Documentation/cpuidle/driver.txt
+--- linux-2.6.22-570/Documentation/cpuidle/driver.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/cpuidle/driver.txt	2007-12-19 15:29:19.000000000 -0500
+@@ -0,0 +1,24 @@
++
++
++		Supporting multiple CPU idle levels in kernel
++
++				cpuidle drivers
++
++
++
++
++cpuidle driver supports capability detection for a particular system. The
++init and exit routines will be called for each online CPU, with a percpu
++cpuidle_driver object and driver should fill in cpuidle_states inside
++cpuidle_driver depending on the CPU capability.
++
++Driver can handle dynamic state changes (like battery<->AC), by calling
++force_redetect interface.
++
++It is possible to have more than one driver registered at the same time and
++user can switch between drivers using /sysfs interface.
++
++Interfaces:
++int cpuidle_register_driver(struct cpuidle_driver *drv);
++void cpuidle_unregister_driver(struct cpuidle_driver *drv);
++int cpuidle_force_redetect(struct cpuidle_device *dev);
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/governor.txt linux-2.6.22-try2/Documentation/cpuidle/governor.txt
+--- linux-2.6.22-570/Documentation/cpuidle/governor.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/cpuidle/governor.txt	2007-12-19 15:29:19.000000000 -0500
+@@ -0,0 +1,24 @@
++
++
++
++		Supporting multiple CPU idle levels in kernel
++
++				cpuidle governors
++
++
++
++
++cpuidle governor is policy routine that decides what idle state to enter at
++any given time. cpuidle core uses different callbacks to governor while
++handling idle entry.
++* select_state callback where governor can determine next idle state to enter
++* prepare_idle callback is called before entering an idle state
++* scan callback is called after a driver forces redetection of the states
++
++More than one governor can be registered at the same time and
++user can switch between drivers using /sysfs interface.
++
++Interfaces:
++int cpuidle_register_governor(struct cpuidle_governor *gov);
++void cpuidle_unregister_governor(struct cpuidle_governor *gov);
++
+diff -Nurb linux-2.6.22-570/Documentation/cpuidle/sysfs.txt linux-2.6.22-try2/Documentation/cpuidle/sysfs.txt
+--- linux-2.6.22-570/Documentation/cpuidle/sysfs.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/cpuidle/sysfs.txt	2007-12-19 15:29:19.000000000 -0500
+@@ -0,0 +1,27 @@
++
++
++		Supporting multiple CPU idle levels in kernel
++
++				cpuidle sysfs
++
++System global cpuidle information are under
++/sys/devices/system/cpu/cpuidle
++
++The current interfaces in this directory has self-explanatory names:
++* available_drivers
++* available_governors
++* current_driver
++* current_governor
++
++Per logical CPU specific cpuidle information are under
++/sys/devices/system/cpu/cpuX/cpuidle
++for each online cpu X
++
++Under this percpu directory, there is a directory for each idle state supported
++by the driver, which in turn has
++* latency
++* power
++* time
++* usage
++
++
+diff -Nurb linux-2.6.22-570/Documentation/cpusets.txt linux-2.6.22-try2/Documentation/cpusets.txt
+--- linux-2.6.22-570/Documentation/cpusets.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/cpusets.txt	2007-12-19 15:29:24.000000000 -0500
+@@ -7,6 +7,7 @@
+ Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+ Modified by Paul Jackson <pj@sgi.com>
+ Modified by Christoph Lameter <clameter@sgi.com>
++Modified by Paul Menage <menage@google.com>
+ 
+ CONTENTS:
+ =========
+@@ -16,10 +17,9 @@
+   1.2 Why are cpusets needed ?
+   1.3 How are cpusets implemented ?
+   1.4 What are exclusive cpusets ?
+-  1.5 What does notify_on_release do ?
+-  1.6 What is memory_pressure ?
+-  1.7 What is memory spread ?
+-  1.8 How do I use cpusets ?
++  1.5 What is memory_pressure ?
++  1.6 What is memory spread ?
++  1.7 How do I use cpusets ?
+ 2. Usage Examples and Syntax
+   2.1 Basic Usage
+   2.2 Adding/removing cpus
+@@ -43,18 +43,19 @@
+ hooks, beyond what is already present, required to manage dynamic
+ job placement on large systems.
+ 
+-Each task has a pointer to a cpuset.  Multiple tasks may reference
+-the same cpuset.  Requests by a task, using the sched_setaffinity(2)
+-system call to include CPUs in its CPU affinity mask, and using the
+-mbind(2) and set_mempolicy(2) system calls to include Memory Nodes
+-in its memory policy, are both filtered through that tasks cpuset,
+-filtering out any CPUs or Memory Nodes not in that cpuset.  The
+-scheduler will not schedule a task on a CPU that is not allowed in
+-its cpus_allowed vector, and the kernel page allocator will not
+-allocate a page on a node that is not allowed in the requesting tasks
+-mems_allowed vector.
++Cpusets use the generic container subsystem described in
++Documentation/container.txt.
+ 
+-User level code may create and destroy cpusets by name in the cpuset
++Requests by a task, using the sched_setaffinity(2) system call to
++include CPUs in its CPU affinity mask, and using the mbind(2) and
++set_mempolicy(2) system calls to include Memory Nodes in its memory
++policy, are both filtered through that tasks cpuset, filtering out any
++CPUs or Memory Nodes not in that cpuset.  The scheduler will not
++schedule a task on a CPU that is not allowed in its cpus_allowed
++vector, and the kernel page allocator will not allocate a page on a
++node that is not allowed in the requesting tasks mems_allowed vector.
++
++User level code may create and destroy cpusets by name in the container
+ virtual file system, manage the attributes and permissions of these
+ cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+ specify and query to which cpuset a task is assigned, and list the
+@@ -86,9 +87,6 @@
+       and a database), or
+     * NUMA systems running large HPC applications with demanding
+       performance characteristics.
+-    * Also cpu_exclusive cpusets are useful for servers running orthogonal
+-      workloads such as RT applications requiring low latency and HPC
+-      applications that are throughput sensitive
+ 
+ These subsets, or "soft partitions" must be able to be dynamically
+ adjusted, as the job mix changes, without impacting other concurrently
+@@ -117,7 +115,7 @@
+  - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+    kernel.
+  - Each task in the system is attached to a cpuset, via a pointer
+-   in the task structure to a reference counted cpuset structure.
++   in the task structure to a reference counted container structure.
+  - Calls to sched_setaffinity are filtered to just those CPUs
+    allowed in that tasks cpuset.
+  - Calls to mbind and set_mempolicy are filtered to just
+@@ -131,8 +129,6 @@
+  - A cpuset may be marked exclusive, which ensures that no other
+    cpuset (except direct ancestors and descendents) may contain
+    any overlapping CPUs or Memory Nodes.
+-   Also a cpu_exclusive cpuset would be associated with a sched
+-   domain.
+  - You can list all the tasks (by pid) attached to any cpuset.
+ 
+ The implementation of cpusets requires a few, simple hooks
+@@ -144,23 +140,15 @@
+    allowed in that tasks cpuset.
+  - in sched.c migrate_all_tasks(), to keep migrating tasks within
+    the CPUs allowed by their cpuset, if possible.
+- - in sched.c, a new API partition_sched_domains for handling
+-   sched domain changes associated with cpu_exclusive cpusets
+-   and related changes in both sched.c and arch/ia64/kernel/domain.c
+  - in the mbind and set_mempolicy system calls, to mask the requested
+    Memory Nodes by what's allowed in that tasks cpuset.
+  - in page_alloc.c, to restrict memory to allowed nodes.
+  - in vmscan.c, to restrict page recovery to the current cpuset.
+ 
+-In addition a new file system, of type "cpuset" may be mounted,
+-typically at /dev/cpuset, to enable browsing and modifying the cpusets
+-presently known to the kernel.  No new system calls are added for
+-cpusets - all support for querying and modifying cpusets is via
+-this cpuset file system.
+-
+-Each task under /proc has an added file named 'cpuset', displaying
+-the cpuset name, as the path relative to the root of the cpuset file
+-system.
++You should mount the "container" filesystem type in order to enable
++browsing and modifying the cpusets presently known to the kernel.  No
++new system calls are added for cpusets - all support for querying and
++modifying cpusets is via this cpuset file system.
+ 
+ The /proc/<pid>/status file for each task has two added lines,
+ displaying the tasks cpus_allowed (on which CPUs it may be scheduled)
+@@ -170,16 +158,15 @@
+   Cpus_allowed:   ffffffff,ffffffff,ffffffff,ffffffff
+   Mems_allowed:   ffffffff,ffffffff
+ 
+-Each cpuset is represented by a directory in the cpuset file system
+-containing the following files describing that cpuset:
++Each cpuset is represented by a directory in the container file system
++containing (on top of the standard container files) the following
++files describing that cpuset:
+ 
+  - cpus: list of CPUs in that cpuset
+  - mems: list of Memory Nodes in that cpuset
+  - memory_migrate flag: if set, move pages to cpusets nodes
+  - cpu_exclusive flag: is cpu placement exclusive?
+  - mem_exclusive flag: is memory placement exclusive?
+- - tasks: list of tasks (by pid) attached to that cpuset
+- - notify_on_release flag: run /sbin/cpuset_release_agent on exit?
+  - memory_pressure: measure of how much paging pressure in cpuset
+ 
+ In addition, the root cpuset only has the following file:
+@@ -231,15 +218,6 @@
+ a direct ancestor or descendent, may share any of the same CPUs or
+ Memory Nodes.
+ 
+-A cpuset that is cpu_exclusive has a scheduler (sched) domain
+-associated with it.  The sched domain consists of all CPUs in the
+-current cpuset that are not part of any exclusive child cpusets.
+-This ensures that the scheduler load balancing code only balances
+-against the CPUs that are in the sched domain as defined above and
+-not all of the CPUs in the system. This removes any overhead due to
+-load balancing code trying to pull tasks outside of the cpu_exclusive
+-cpuset only to be prevented by the tasks' cpus_allowed mask.
+-
+ A cpuset that is mem_exclusive restricts kernel allocations for
+ page, buffer and other data commonly shared by the kernel across
+ multiple users.  All cpusets, whether mem_exclusive or not, restrict
+@@ -253,21 +231,7 @@
+ outside even a mem_exclusive cpuset.
+ 
+ 
+-1.5 What does notify_on_release do ?
+-------------------------------------
+-
+-If the notify_on_release flag is enabled (1) in a cpuset, then whenever
+-the last task in the cpuset leaves (exits or attaches to some other
+-cpuset) and the last child cpuset of that cpuset is removed, then
+-the kernel runs the command /sbin/cpuset_release_agent, supplying the
+-pathname (relative to the mount point of the cpuset file system) of the
+-abandoned cpuset.  This enables automatic removal of abandoned cpusets.
+-The default value of notify_on_release in the root cpuset at system
+-boot is disabled (0).  The default value of other cpusets at creation
+-is the current value of their parents notify_on_release setting.
+-
+-
+-1.6 What is memory_pressure ?
++1.5 What is memory_pressure ?
+ -----------------------------
+ The memory_pressure of a cpuset provides a simple per-cpuset metric
+ of the rate that the tasks in a cpuset are attempting to free up in
+@@ -324,7 +288,7 @@
+ times 1000.
+ 
+ 
+-1.7 What is memory spread ?
++1.6 What is memory spread ?
+ ---------------------------
+ There are two boolean flag files per cpuset that control where the
+ kernel allocates pages for the file system buffers and related in
+@@ -395,7 +359,7 @@
+ can become very uneven.
+ 
+ 
+-1.8 How do I use cpusets ?
++1.7 How do I use cpusets ?
+ --------------------------
+ 
+ In order to minimize the impact of cpusets on critical kernel
+@@ -485,7 +449,7 @@
+ To start a new job that is to be contained within a cpuset, the steps are:
+ 
+  1) mkdir /dev/cpuset
+- 2) mount -t cpuset none /dev/cpuset
++ 2) mount -t container -ocpuset cpuset /dev/cpuset
+  3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+     the /dev/cpuset virtual file system.
+  4) Start a task that will be the "founding father" of the new job.
+@@ -497,7 +461,7 @@
+ named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+ and then start a subshell 'sh' in that cpuset:
+ 
+-  mount -t cpuset none /dev/cpuset
++  mount -t container -ocpuset cpuset /dev/cpuset
+   cd /dev/cpuset
+   mkdir Charlie
+   cd Charlie
+@@ -529,7 +493,7 @@
+ virtual filesystem.
+ 
+ To mount it, type:
+-# mount -t cpuset none /dev/cpuset
++# mount -t container -o cpuset cpuset /dev/cpuset
+ 
+ Then under /dev/cpuset you can find a tree that corresponds to the
+ tree of the cpusets in the system. For instance, /dev/cpuset
+@@ -572,6 +536,18 @@
+ This will fail if the cpuset is in use (has cpusets inside, or has
+ processes attached).
+ 
++Note that for legacy reasons, the "cpuset" filesystem exists as a
++wrapper around the container filesystem.
++
++The command
++
++mount -t cpuset X /dev/cpuset
++
++is equivalent to
++
++mount -t container -ocpuset X /dev/cpuset
++echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent
++
+ 2.2 Adding/removing cpus
+ ------------------------
+ 
+diff -Nurb linux-2.6.22-570/Documentation/feature-removal-schedule.txt linux-2.6.22-try2/Documentation/feature-removal-schedule.txt
+--- linux-2.6.22-570/Documentation/feature-removal-schedule.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/feature-removal-schedule.txt	2007-12-19 15:29:24.000000000 -0500
+@@ -162,6 +162,33 @@
+ 
+ ---------------------------
+ 
++What:	filemap_nopage, filemap_populate
++When:	April 2007
++Why:	These legacy interfaces no longer have any callers in the kernel and
++	any functionality provided can be provided with filemap_fault. The
++	removal schedule is short because they are a big maintainence burden
++	and have some bugs.
++Who:	Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
++What:	vm_ops.populate, install_page
++When:	April 2007
++Why:	These legacy interfaces no longer have any callers in the kernel and
++	any functionality provided can be provided with vm_ops.fault.
++Who:	Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
++What:	vm_ops.nopage
++When:	February 2008, provided in-kernel callers have been converted
++Why:	This interface is replaced by vm_ops.fault, but it has been around
++	forever, is used by a lot of drivers, and doesn't cost much to
++	maintain.
++Who:	Nick Piggin <npiggin@suse.de>
++
++---------------------------
++
+ What:	Interrupt only SA_* flags
+ When:	September 2007
+ Why:	The interrupt related SA_* flags are replaced by IRQF_* to move them
+@@ -280,25 +307,6 @@
+ 
+ ---------------------------
+ 
+-What:	Multipath cached routing support in ipv4
+-When:	in 2.6.23
+-Why:	Code was merged, then submitter immediately disappeared leaving
+-	us with no maintainer and lots of bugs.  The code should not have
+-	been merged in the first place, and many aspects of it's
+-	implementation are blocking more critical core networking
+-	development.  It's marked EXPERIMENTAL and no distribution
+-	enables it because it cause obscure crashes due to unfixable bugs
+-	(interfaces don't return errors so memory allocation can't be
+-	handled, calling contexts of these interfaces make handling
+-	errors impossible too because they get called after we've
+-	totally commited to creating a route object, for example).
+-	This problem has existed for years and no forward progress
+-	has ever been made, and nobody steps up to try and salvage
+-	this code, so we're going to finally just get rid of it.
+-Who:	David S. Miller <davem@davemloft.net>
+-
+----------------------------
+-
+ What:	read_dev_chars(), read_conf_data{,_lpm}() (s390 common I/O layer)
+ When:	December 2007
+ Why:	These functions are a leftover from 2.4 times. They have several
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/00-INDEX linux-2.6.22-try2/Documentation/filesystems/00-INDEX
+--- linux-2.6.22-570/Documentation/filesystems/00-INDEX	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/filesystems/00-INDEX	2007-12-19 15:29:23.000000000 -0500
+@@ -84,6 +84,8 @@
+ 	- info and mount options for the UDF filesystem.
+ ufs.txt
+ 	- info on the ufs filesystem.
++unionfs/
++	- info on the unionfs filesystem
+ vfat.txt
+ 	- info on using the VFAT filesystem used in Windows NT and Windows 95
+ vfs.txt
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/Locking linux-2.6.22-try2/Documentation/filesystems/Locking
+--- linux-2.6.22-570/Documentation/filesystems/Locking	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/filesystems/Locking	2007-12-19 15:29:24.000000000 -0500
+@@ -510,12 +510,14 @@
+ prototypes:
+ 	void (*open)(struct vm_area_struct*);
+ 	void (*close)(struct vm_area_struct*);
++	struct page *(*fault)(struct vm_area_struct*, struct fault_data *);
+ 	struct page *(*nopage)(struct vm_area_struct*, unsigned long, int *);
+ 
+ locking rules:
+ 		BKL	mmap_sem
+ open:		no	yes
+ close:		no	yes
++fault:		no	yes
+ nopage:		no	yes
+ 
+ ================================================================================
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt linux-2.6.22-try2/Documentation/filesystems/configfs/configfs.txt
+--- linux-2.6.22-570/Documentation/filesystems/configfs/configfs.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/filesystems/configfs/configfs.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -238,6 +238,8 @@
+ 		struct config_group *(*make_group)(struct config_group *group,
+ 						   const char *name);
+ 		int (*commit_item)(struct config_item *item);
++		void (*disconnect_notify)(struct config_group *group,
++					  struct config_item *item);
+ 		void (*drop_item)(struct config_group *group,
+ 				  struct config_item *item);
+ 	};
+@@ -268,6 +270,16 @@
+ for the item to actually disappear from the subsystem's usage.  But it
+ is gone from configfs.
+ 
++When drop_item() is called, the item's linkage has already been torn
++down.  It no longer has a reference on its parent and has no place in
++the item hierarchy.  If a client needs to do some cleanup before this
++teardown happens, the subsystem can implement the
++ct_group_ops->disconnect_notify() method.  The method is called after
++configfs has removed the item from the filesystem view but before the
++item is removed from its parent group.  Like drop_item(),
++disconnect_notify() is void and cannot fail.  Client subsystems should
++not drop any references here, as they still must do it in drop_item().
++
+ A config_group cannot be removed while it still has child items.  This
+ is implemented in the configfs rmdir(2) code.  ->drop_item() will not be
+ called, as the item has not been dropped.  rmdir(2) will fail, as the
+@@ -386,6 +398,33 @@
+ rmdir(2).  They also are not considered when rmdir(2) on the parent
+ group is checking for children.
+ 
++[Dependant Subsystems]
++
++Sometimes other drivers depend on particular configfs items.  For
++example, ocfs2 mounts depend on a heartbeat region item.  If that
++region item is removed with rmdir(2), the ocfs2 mount must BUG or go
++readonly.  Not happy.
++    
++configfs provides two additional API calls: configfs_depend_item() and
++configfs_undepend_item().  A client driver can call
++configfs_depend_item() on an existing item to tell configfs that it is
++depended on.  configfs will then return -EBUSY from rmdir(2) for that
++item.  When the item is no longer depended on, the client driver calls
++configfs_undepend_item() on it.
++
++These API cannot be called underneath any configfs callbacks, as
++they will conflict.  They can block and allocate.  A client driver
++probably shouldn't calling them of its own gumption.  Rather it should
++be providing an API that external subsystems call.
++
++How does this work?  Imagine the ocfs2 mount process.  When it mounts,
++it asks for a heartbeat region item.  This is done via a call into the
++heartbeat code.  Inside the heartbeat code, the region item is looked
++up.  Here, the heartbeat code calls configfs_depend_item().  If it
++succeeds, then heartbeat knows the region is safe to give to ocfs2.
++If it fails, it was being torn down anyway, and heartbeat can gracefully
++pass up an error.
++
+ [Committable Items]
+ 
+ NOTE: Committable items are currently unimplemented.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX linux-2.6.22-try2/Documentation/filesystems/unionfs/00-INDEX
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/00-INDEX	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/filesystems/unionfs/00-INDEX	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,10 @@
++00-INDEX
++	- this file.
++concepts.txt
++	- A brief introduction of concepts.
++issues.txt
++	- A summary of known issues with unionfs.
++rename.txt
++	- Information regarding rename operations.
++usage.txt
++	- Usage information and examples.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt linux-2.6.22-try2/Documentation/filesystems/unionfs/concepts.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/concepts.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/filesystems/unionfs/concepts.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,75 @@
++Unionfs 2.0 CONCEPTS:
++=====================
++
++This file describes the concepts needed by a namespace unification file
++system.
++
++Branch Priority:
++================
++
++Each branch is assigned a unique priority - starting from 0 (highest
++priority).  No two branches can have the same priority.
++
++
++Branch Mode:
++============
++
++Each branch is assigned a mode - read-write or read-only. This allows
++directories on media mounted read-write to be used in a read-only manner.
++
++
++Whiteouts:
++==========
++
++A whiteout removes a file name from the namespace. Whiteouts are needed when
++one attempts to remove a file on a read-only branch.
++
++Suppose we have a two-branch union, where branch 0 is read-write and branch
++1 is read-only. And a file 'foo' on branch 1:
++
++./b0/
++./b1/
++./b1/foo
++
++The unified view would simply be:
++
++./union/
++./union/foo
++
++Since 'foo' is stored on a read-only branch, it cannot be removed. A
++whiteout is used to remove the name 'foo' from the unified namespace. Again,
++since branch 1 is read-only, the whiteout cannot be created there. So, we
++try on a higher priority (lower numerically) branch and create the whiteout
++there.
++
++./b0/
++./b0/.wh.foo
++./b1/
++./b1/foo
++
++Later, when Unionfs traverses branches (due to lookup or readdir), it
++eliminate 'foo' from the namespace (as well as the whiteout itself.)
++
++
++Duplicate Elimination:
++======================
++
++It is possible for files on different branches to have the same name.
++Unionfs then has to select which instance of the file to show to the user.
++Given the fact that each branch has a priority associated with it, the
++simplest solution is to take the instance from the highest priority
++(numerically lowest value) and "hide" the others.
++
++
++Copyup:
++=======
++
++When a change is made to the contents of a file's data or meta-data, they
++have to be stored somewhere. The best way is to create a copy of the
++original file on a branch that is writable, and then redirect the write
++though to this copy. The copy must be made on a higher priority branch so
++that lookup and readdir return this newer "version" of the file rather than
++the original (see duplicate elimination).
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt linux-2.6.22-try2/Documentation/filesystems/unionfs/issues.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/issues.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/filesystems/unionfs/issues.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,39 @@
++KNOWN Unionfs 2.0 ISSUES:
++=========================
++
++1. The NFS server returns -EACCES for read-only exports, instead of -EROFS.
++   This means we can't reliably detect a read-only NFS export.
++
++2. Modifying a Unionfs branch directly, while the union is mounted, is
++   currently unsupported, because it could cause a cache incoherency between
++   the union layer and the lower file systems (for that reason, Unionfs
++   currently prohibits using branches which overlap with each other, even
++   partially).  We have tested Unionfs under such conditions, and fixed any
++   bugs we found (Unionfs comes with an extensive regression test suite).
++   However, it may still be possible that changes made to lower branches
++   directly could cause cache incoherency which, in the worst case, may case
++   an oops.
++
++   Unionfs 2.0 has a temporary workaround for this.  You can force Unionfs
++   to increase the superblock generation number, and hence purge all cached
++   Unionfs objects, which would then  be re-gotten from the lower branches.
++   This should ensure cache consistency.  To increase the generation number,
++   executed the command:
++
++	mount -t unionfs -o remount,incgen none MOUNTPOINT
++
++   Note that the older way of incrementing the generation number using an
++   ioctl, is no longer supported in Unionfs 2.0.  Ioctls in general are not
++   encouraged.  Plus, an ioctl is per-file concept, whereas the generation
++   number is a per-file-system concept.  Worse, such an ioctl requires an
++   open file, which then has to be invalidated by the very nature of the
++   generation number increase (read: the old generation increase ioctl was
++   pretty racy).
++
++3. Unionfs should not use lookup_one_len() on the underlying f/s as it
++   confuses NFS.  Currently, unionfs_lookup() passes lookup intents to the
++   lower file-system, this eliminates part of the problem.  The remaining
++   calls to lookup_one_len may need to be changed to pass an intent.
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt linux-2.6.22-try2/Documentation/filesystems/unionfs/rename.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/rename.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/filesystems/unionfs/rename.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,31 @@
++Rename is a complex beast. The following table shows which rename(2) operations
++should succeed and which should fail.
++
++o: success
++E: error (either unionfs or vfs)
++X: EXDEV
++
++none = file does not exist
++file = file is a file
++dir  = file is a empty directory
++child= file is a non-empty directory
++wh   = file is a directory containing only whiteouts; this makes it logically
++		empty
++
++                      none    file    dir     child   wh
++file                  o       o       E       E       E
++dir                   o       E       o       E       o
++child                 X       E       X       E       X
++wh                    o       E       o       E       o
++
++
++Renaming directories:
++=====================
++
++Whenever a empty (either physically or logically) directory is being renamed,
++the following sequence of events should take place:
++
++1) Remove whiteouts from both source and destination directory
++2) Rename source to destination
++3) Make destination opaque to prevent anything under it from showing up
++
+diff -Nurb linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt linux-2.6.22-try2/Documentation/filesystems/unionfs/usage.txt
+--- linux-2.6.22-570/Documentation/filesystems/unionfs/usage.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/filesystems/unionfs/usage.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,90 @@
++Unionfs is a stackable unification file system, which can appear to merge
++the contents of several directories (branches), while keeping their physical
++content separate.  Unionfs is useful for unified source tree management,
++merged contents of split CD-ROM, merged separate software package
++directories, data grids, and more.  Unionfs allows any mix of read-only and
++read-write branches, as well as insertion and deletion of branches anywhere
++in the fan-out.  To maintain Unix semantics, Unionfs handles elimination of
++duplicates, partial-error conditions, and more.
++
++# mount -t unionfs -o branch-option[,union-options[,...]] none MOUNTPOINT
++
++The available branch-option for the mount command is:
++
++	dirs=branch[=ro|=rw][:...]
++
++specifies a separated list of which directories compose the union.
++Directories that come earlier in the list have a higher precedence than
++those which come later. Additionally, read-only or read-write permissions of
++the branch can be specified by appending =ro or =rw (default) to each
++directory.
++
++Syntax:
++
++	dirs=/branch1[=ro|=rw]:/branch2[=ro|=rw]:...:/branchN[=ro|=rw]
++
++Example:
++
++	dirs=/writable_branch=rw:/read-only_branch=ro
++
++
++DYNAMIC BRANCH MANAGEMENT AND REMOUNTS
++======================================
++
++You can remount a union and change its overall mode, or reconfigure the
++branches, as follows.
++
++To downgrade a union from read-write to read-only:
++
++# mount -t unionfs -o remount,ro none MOUNTPOINT
++
++To upgrade a union from read-only to read-write:
++
++# mount -t unionfs -o remount,rw none MOUNTPOINT
++
++To delete a branch /foo, regardless where it is in the current union:
++
++# mount -t unionfs -o del=/foo none MOUNTPOINT
++
++To insert (add) a branch /foo before /bar:
++
++# mount -t unionfs -o remount,add=/bar:/foo none MOUNTPOINT
++
++To insert (add) a branch /foo (with the "rw" mode flag) before /bar:
++
++# mount -t unionfs -o remount,add=/bar:/foo=rw none MOUNTPOINT
++
++To insert (add) a branch /foo (in "rw" mode) at the very beginning (i.e., a
++new highest-priority branch), you can use the above syntax, or use a short
++hand version as follows:
++
++# mount -t unionfs -o remount,add=/foo none MOUNTPOINT
++
++To append a branch to the very end (new lowest-priority branch):
++
++# mount -t unionfs -o remount,add=:/foo none MOUNTPOINT
++
++To append a branch to the very end (new lowest-priority branch), in
++read-only mode:
++
++# mount -t unionfs -o remount,add=:/foo:ro none MOUNTPOINT
++
++Finally, to change the mode of one existing branch, say /foo, from read-only
++to read-write, and change /bar from read-write to read-only:
++
++# mount -t unionfs -o remount,mode=/foo=rw,mode=/bar=ro none MOUNTPOINT
++
++
++CACHE CONSISTENCY
++=================
++
++If you modify any file on any of the lower branches directly, while there is
++a Unionfs 2.0 mounted above any of those branches, you should tell Unionfs
++to purge its caches and re-get the objects.  To do that, you have to
++increment the generation number of the superblock using the following
++command:
++
++# mount -t unionfs -o remount,incgen none MOUNTPOINT
++
++
++For more information, see <http://unionfs.filesystems.org/>.
+diff -Nurb linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c linux-2.6.22-try2/Documentation/firmware_class/firmware_sample_firmware_class.c
+--- linux-2.6.22-570/Documentation/firmware_class/firmware_sample_firmware_class.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/firmware_class/firmware_sample_firmware_class.c	2007-12-19 15:29:19.000000000 -0500
+@@ -78,6 +78,7 @@
+ 			 firmware_loading_show, firmware_loading_store);
+ 
+ static ssize_t firmware_data_read(struct kobject *kobj,
++				  struct bin_attribute *bin_attr,
+ 				  char *buffer, loff_t offset, size_t count)
+ {
+ 	struct class_device *class_dev = to_class_dev(kobj);
+@@ -88,6 +89,7 @@
+ 	return count;
+ }
+ static ssize_t firmware_data_write(struct kobject *kobj,
++				   struct bin_attribute *bin_attr,
+ 				   char *buffer, loff_t offset, size_t count)
+ {
+ 	struct class_device *class_dev = to_class_dev(kobj);
+diff -Nurb linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt linux-2.6.22-try2/Documentation/power/freezing-of-tasks.txt
+--- linux-2.6.22-570/Documentation/power/freezing-of-tasks.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/power/freezing-of-tasks.txt	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,160 @@
++Freezing of tasks
++	(C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
++
++I. What is the freezing of tasks?
++
++The freezing of tasks is a mechanism by which user space processes and some
++kernel threads are controlled during hibernation or system-wide suspend (on some
++architectures).
++
++II. How does it work?
++
++There are four per-task flags used for that, PF_NOFREEZE, PF_FROZEN, TIF_FREEZE
++and PF_FREEZER_SKIP (the last one is auxiliary).  The tasks that have
++PF_NOFREEZE unset (all user space processes and some kernel threads) are
++regarded as 'freezable' and treated in a special way before the system enters a
++suspend state as well as before a hibernation image is created (in what follows
++we only consider hibernation, but the description also applies to suspend).
++
++Namely, as the first step of the hibernation procedure the function
++freeze_processes() (defined in kernel/power/process.c) is called.  It executes
++try_to_freeze_tasks() that sets TIF_FREEZE for all of the freezable tasks and
++sends a fake signal to each of them.  A task that receives such a signal and has
++TIF_FREEZE set, should react to it by calling the refrigerator() function
++(defined in kernel/power/process.c), which sets the task's PF_FROZEN flag,
++changes its state to TASK_UNINTERRUPTIBLE and makes it loop until PF_FROZEN is
++cleared for it.  Then, we say that the task is 'frozen' and therefore the set of
++functions handling this mechanism is called 'the freezer' (these functions are
++defined in kernel/power/process.c and include/linux/freezer.h).  User space
++processes are generally frozen before kernel threads.
++
++It is not recommended to call refrigerator() directly.  Instead, it is
++recommended to use the try_to_freeze() function (defined in
++include/linux/freezer.h), that checks the task's TIF_FREEZE flag and makes the
++task enter refrigerator() if the flag is set.
++
++For user space processes try_to_freeze() is called automatically from the
++signal-handling code, but the freezable kernel threads need to call it
++explicitly in suitable places.  The code to do this may look like the following:
++
++	do {
++		hub_events();
++		wait_event_interruptible(khubd_wait,
++					!list_empty(&hub_event_list));
++		try_to_freeze();
++	} while (!signal_pending(current));
++
++(from drivers/usb/core/hub.c::hub_thread()).
++
++If a freezable kernel thread fails to call try_to_freeze() after the freezer has
++set TIF_FREEZE for it, the freezing of tasks will fail and the entire
++hibernation operation will be cancelled.  For this reason, freezable kernel
++threads must call try_to_freeze() somewhere.
++
++After the system memory state has been restored from a hibernation image and
++devices have been reinitialized, the function thaw_processes() is called in
++order to clear the PF_FROZEN flag for each frozen task.  Then, the tasks that
++have been frozen leave refrigerator() and continue running.
++
++III. Which kernel threads are freezable?
++
++Kernel threads are not freezable by default.  However, a kernel thread may clear
++PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
++directly is strongly discouraged).  From this point it is regarded as freezable
++and must call try_to_freeze() in a suitable place.
++
++IV. Why do we do that?
++
++Generally speaking, there is a couple of reasons to use the freezing of tasks:
++
++1. The principal reason is to prevent filesystems from being damaged after
++hibernation.  At the moment we have no simple means of checkpointing
++filesystems, so if there are any modifications made to filesystem data and/or
++metadata on disks, we cannot bring them back to the state from before the
++modifications.  At the same time each hibernation image contains some
++filesystem-related information that must be consistent with the state of the
++on-disk data and metadata after the system memory state has been restored from
++the image (otherwise the filesystems will be damaged in a nasty way, usually
++making them almost impossible to repair).  We therefore freeze tasks that might
++cause the on-disk filesystems' data and metadata to be modified after the
++hibernation image has been created and before the system is finally powered off.
++The majority of these are user space processes, but if any of the kernel threads
++may cause something like this to happen, they have to be freezable.
++
++2. The second reason is to prevent user space processes and some kernel threads
++from interfering with the suspending and resuming of devices.  A user space
++process running on a second CPU while we are suspending devices may, for
++example, be troublesome and without the freezing of tasks we would need some
++safeguards against race conditions that might occur in such a case.
++
++Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
++of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
++
++"RJW:> Why we freeze tasks at all or why we freeze kernel threads?
++
++Linus: In many ways, 'at all'.
++
++I _do_ realize the IO request queue issues, and that we cannot actually do
++s2ram with some devices in the middle of a DMA.  So we want to be able to
++avoid *that*, there's no question about that.  And I suspect that stopping
++user threads and then waiting for a sync is practically one of the easier
++ways to do so.
++
++So in practice, the 'at all' may become a 'why freeze kernel threads?' and
++freezing user threads I don't find really objectionable."
++
++Still, there are kernel threads that may want to be freezable.  For example, if
++a kernel that belongs to a device driver accesses the device directly, it in
++principle needs to know when the device is suspended, so that it doesn't try to
++access it at that time.  However, if the kernel thread is freezable, it will be
++frozen before the driver's .suspend() callback is executed and it will be
++thawed after the driver's .resume() callback has run, so it won't be accessing
++the device while it's suspended.
++
++3. Another reason for freezing tasks is to prevent user space processes from
++realizing that hibernation (or suspend) operation takes place.  Ideally, user
++space processes should not notice that such a system-wide operation has occurred
++and should continue running without any problems after the restore (or resume
++from suspend).  Unfortunately, in the most general case this is quite difficult
++to achieve without the freezing of tasks.  Consider, for example, a process
++that depends on all CPUs being online while it's running.  Since we need to
++disable nonboot CPUs during the hibernation, if this process is not frozen, it
++may notice that the number of CPUs has changed and may start to work incorrectly
++because of that.
++
++V. Are there any problems related to the freezing of tasks?
++
++Yes, there are.
++
++First of all, the freezing of kernel threads may be tricky if they depend one
++on another.  For example, if kernel thread A waits for a completion (in the
++TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B
++and B is frozen in the meantime, then A will be blocked until B is thawed, which
++may be undesirable.  That's why kernel threads are not freezable by default.
++
++Second, there are the following two problems related to the freezing of user
++space processes:
++1. Putting processes into an uninterruptible sleep distorts the load average.
++2. Now that we have FUSE, plus the framework for doing device drivers in
++userspace, it gets even more complicated because some userspace processes are
++now doing the sorts of things that kernel threads do
++(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
++
++The problem 1. seems to be fixable, although it hasn't been fixed so far.  The
++other one is more serious, but it seems that we can work around it by using
++hibernation (and suspend) notifiers (in that case, though, we won't be able to
++avoid the realization by the user space processes that the hibernation is taking
++place).
++
++There are also problems that the freezing of tasks tends to expose, although
++they are not directly related to it.  For example, if request_firmware() is
++called from a device driver's .resume() routine, it will timeout and eventually
++fail, because the user land process that should respond to the request is frozen
++at this point.  So, seemingly, the failure is due to the freezing of tasks.
++Suppose, however, that the firmware file is located on a filesystem accessible
++only through another device that hasn't been resumed yet.  In that case,
++request_firmware() will fail regardless of whether or not the freezing of tasks
++is used.  Consequently, the problem is not really related to the freezing of
++tasks, since it generally exists anyway.  [The solution to this particular
++problem is to keep the firmware in memory after it's loaded for the first time
++and upload if from memory to the device whenever necessary.]
+diff -Nurb linux-2.6.22-570/Documentation/power/kernel_threads.txt linux-2.6.22-try2/Documentation/power/kernel_threads.txt
+--- linux-2.6.22-570/Documentation/power/kernel_threads.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/power/kernel_threads.txt	1969-12-31 19:00:00.000000000 -0500
+@@ -1,40 +0,0 @@
+-KERNEL THREADS
+-
+-
+-Freezer
+-
+-Upon entering a suspended state the system will freeze all
+-tasks. This is done by delivering pseudosignals. This affects
+-kernel threads, too. To successfully freeze a kernel thread
+-the thread has to check for the pseudosignal and enter the
+-refrigerator. Code to do this looks like this:
+-
+-	do {
+-		hub_events();
+-		wait_event_interruptible(khubd_wait, !list_empty(&hub_event_list));
+-		try_to_freeze();
+-	} while (!signal_pending(current));
+-
+-from drivers/usb/core/hub.c::hub_thread()
+-
+-
+-The Unfreezable
+-
+-Some kernel threads however, must not be frozen. The kernel must
+-be able to finish pending IO operations and later on be able to
+-write the memory image to disk. Kernel threads needed to do IO
+-must stay awake. Such threads must mark themselves unfreezable
+-like this:
+-
+-	/*
+-	 * This thread doesn't need any user-level access,
+-	 * so get rid of all our resources.
+-	 */
+-	daemonize("usb-storage");
+-
+-	current->flags |= PF_NOFREEZE;
+-
+-from drivers/usb/storage/usb.c::usb_stor_control_thread()
+-
+-Such drivers are themselves responsible for staying quiet during
+-the actual snapshotting.
+diff -Nurb linux-2.6.22-570/Documentation/power/swsusp.txt linux-2.6.22-try2/Documentation/power/swsusp.txt
+--- linux-2.6.22-570/Documentation/power/swsusp.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/power/swsusp.txt	2007-12-19 15:29:24.000000000 -0500
+@@ -140,21 +140,11 @@
+ website, and not to the Linux Kernel Mailing List. We are working
+ toward merging suspend2 into the mainline kernel.
+ 
+-Q: A kernel thread must voluntarily freeze itself (call 'refrigerator').
+-I found some kernel threads that don't do it, and they don't freeze
+-so the system can't sleep. Is this a known behavior?
+-
+-A: All such kernel threads need to be fixed, one by one. Select the
+-place where the thread is safe to be frozen (no kernel semaphores
+-should be held at that point and it must be safe to sleep there), and
+-add:
+-
+-       try_to_freeze();
+-
+-If the thread is needed for writing the image to storage, you should
+-instead set the PF_NOFREEZE process flag when creating the thread (and
+-be very careful).
++Q: What is the freezing of tasks and why are we using it?
+ 
++A: The freezing of tasks is a mechanism by which user space processes and some
++kernel threads are controlled during hibernation or system-wide suspend (on some
++architectures).  See freezing-of-tasks.txt for details.
+ 
+ Q: What is the difference between "platform" and "shutdown"?
+ 
+diff -Nurb linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt linux-2.6.22-try2/Documentation/scsi/scsi_fc_transport.txt
+--- linux-2.6.22-570/Documentation/scsi/scsi_fc_transport.txt	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/Documentation/scsi/scsi_fc_transport.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,450 @@
++                             SCSI FC Tansport
++                 =============================================
++
++Date:  4/12/2007
++Kernel Revisions for features:
++  rports : <<TBS>>
++  vports : 2.6.22 (? TBD)
++
++
++Introduction
++============
++This file documents the features and components of the SCSI FC Transport.
++It also provides documents the API between the transport and FC LLDDs.
++The FC transport can be found at:
++  drivers/scsi/scsi_transport_fc.c
++  include/scsi/scsi_transport_fc.h
++  include/scsi/scsi_netlink_fc.h
++
++This file is found at Documentation/scsi/scsi_fc_transport.txt
++
++
++FC Remote Ports (rports)
++========================================================================
++<< To Be Supplied >>
++
++
++FC Virtual Ports (vports)
++========================================================================
++
++Overview:
++-------------------------------
++
++  New FC standards have defined mechanisms which allows for a single physical
++  port to appear on as multiple communication ports. Using the N_Port Id
++  Virtualization (NPIV) mechanism, a point-to-point connection to a Fabric
++  can be assigned more than 1 N_Port_ID.  Each N_Port_ID appears as a
++  separate port to other endpoints on the fabric, even though it shares one
++  physical link to the switch for communication. Each N_Port_ID can have a
++  unique view of the fabric based on fabric zoning and array lun-masking
++  (just like a normal non-NPIV adapter).  Using the Virtual Fabric (VF)
++  mechanism, adding a fabric header to each frame allows the port to
++  interact with the Fabric Port to join multiple fabrics. The port will
++  obtain an N_Port_ID on each fabric it joins. Each fabric will have its
++  own unique view of endpoints and configuration parameters.  NPIV may be
++  used together with VF so that the port can obtain multiple N_Port_IDs
++  on each virtual fabric.
++
++  The FC transport is now recognizing a new object - a vport.  A vport is
++  an entity that has a world-wide unique World Wide Port Name (wwpn) and
++  World Wide Node Name (wwnn). The transport also allows for the FC4's to
++  be specified for the vport, with FCP_Initiator being the primary role
++  expected. Once instantiated by one of the above methods, it will have a
++  distinct N_Port_ID and view of fabric endpoints and storage entities.
++  The fc_host associated with the physical adapter will export the ability
++  to create vports. The transport will create the vport object within the
++  Linux device tree, and instruct the fc_host's driver to instantiate the
++  virtual port. Typically, the driver will create a new scsi_host instance
++  on the vport, resulting in a unique <H,C,T,L> namespace for the vport.
++  Thus, whether a FC port is based on a physical port or on a virtual port,
++  each will appear as a unique scsi_host with its own target and lun space.
++
++  Note: At this time, the transport is written to create only NPIV-based
++    vports. However, consideration was given to VF-based vports and it
++    should be a minor change to add support if needed.  The remaining
++    discussion will concentrate on NPIV.
++
++  Note: World Wide Name assignment (and uniqueness guarantees) are left
++    up to an administrative entity controling the vport. For example,
++    if vports are to be associated with virtual machines, a XEN mgmt
++    utility would be responsible for creating wwpn/wwnn's for the vport,
++    using it's own naming authority and OUI. (Note: it already does this
++    for virtual MAC addresses).
++
++
++Device Trees and Vport Objects:
++-------------------------------
++
++  Today, the device tree typically contains the scsi_host object,
++  with rports and scsi target objects underneath it. Currently the FC
++  transport creates the vport object and places it under the scsi_host
++  object corresponding to the physical adapter.  The LLDD will allocate
++  a new scsi_host for the vport and link it's object under the vport.
++  The remainder of the tree under the vports scsi_host is the same
++  as the non-NPIV case. The transport is written currently to easily
++  allow the parent of the vport to be something other than the scsi_host.
++  This could be used in the future to link the object onto a vm-specific
++  device tree. If the vport's parent is not the physical port's scsi_host,
++  a symbolic link to the vport object will be placed in the physical
++  port's scsi_host.
++
++  Here's what to expect in the device tree :
++   The typical Physical Port's Scsi_Host:
++     /sys/devices/.../host17/
++   and it has the typical decendent tree:
++     /sys/devices/.../host17/rport-17:0-0/target17:0:0/17:0:0:0:
++   and then the vport is created on the Physical Port:
++     /sys/devices/.../host17/vport-17:0-0
++   and the vport's Scsi_Host is then created:
++     /sys/devices/.../host17/vport-17:0-0/host18
++   and then the rest of the tree progresses, such as:
++     /sys/devices/.../host17/vport-17:0-0/host18/rport-18:0-0/target18:0:0/18:0:0:0:
++
++  Here's what to expect in the sysfs tree :
++   scsi_hosts:
++     /sys/class/scsi_host/host17                physical port's scsi_host
++     /sys/class/scsi_host/host18                vport's scsi_host
++   fc_hosts:
++     /sys/class/fc_host/host17                  physical port's fc_host
++     /sys/class/fc_host/host18                  vport's fc_host
++   fc_vports:
++     /sys/class/fc_vports/vport-17:0-0          the vport's fc_vport
++   fc_rports:
++     /sys/class/fc_remote_ports/rport-17:0-0    rport on the physical port
++     /sys/class/fc_remote_ports/rport-18:0-0    rport on the vport
++
++
++Vport Attributes:
++-------------------------------
++
++  The new fc_vport class object has the following attributes
++
++     node_name:                                                 Read_Only
++       The WWNN of the vport
++
++     port_name:                                                 Read_Only
++       The WWPN of the vport
++
++     roles:                                                     Read_Only
++       Indicates the FC4 roles enabled on the vport.
++
++     symbolic_name:                                             Read_Write
++       A string, appended to the driver's symbolic port name string, which
++       is registered with the switch to identify the vport. For example,
++       a hypervisor could set this string to "Xen Domain 2 VM 5 Vport 2",
++       and this set of identifiers can be seen on switch management screens
++       to identify the port.
++
++     vport_delete:                                              Write_Only
++       When written with a "1", will tear down the vport.
++
++     vport_disable:                                             Write_Only
++       When written with a "1", will transition the vport to a disabled.
++       state.  The vport will still be instantiated with the Linux kernel,
++       but it will not be active on the FC link.
++       When written with a "0", will enable the vport.
++
++     vport_last_state:                                          Read_Only
++       Indicates the previous state of the vport.  See the section below on
++       "Vport States".
++
++     vport_state:                                               Read_Only
++       Indicates the state of the vport.  See the section below on
++       "Vport States".
++
++     vport_type:                                                Read_Only
++       Reflects the FC mechanism used to create the virtual port.
++       Only NPIV is supported currently.
++
++
++  For the fc_host class object, the following attributes are added for vports:
++
++     max_npiv_vports:                                           Read_Only
++       Indicates the maximum number of NPIV-based vports that the
++       driver/adapter can support on the fc_host.
++
++     npiv_vports_inuse:                                         Read_Only
++       Indicates how many NPIV-based vports have been instantiated on the
++       fc_host.
++
++     vport_create:                                              Write_Only
++       A "simple" create interface to instantiate a vport on an fc_host.
++       A "<WWPN>:<WWNN>" string is written to the attribute. The transport
++       then instantiates the vport object and calls the LLDD to create the
++       vport with the role of FCP_Initiator.  Each WWN is specified as 16
++       hex characters and may *not* contain any prefixes (e.g. 0x, x, etc).
++
++     vport_delete:                                              Write_Only
++        A "simple" delete interface to teardown a vport. A "<WWPN>:<WWNN>"
++        string is written to the attribute. The transport will locate the
++        vport on the fc_host with the same WWNs and tear it down.  Each WWN
++        is specified as 16 hex characters and may *not* contain any prefixes
++        (e.g. 0x, x, etc).
++
++
++Vport States:
++-------------------------------
++
++  Vport instantiation consists of two parts:
++    - Creation with the kernel and LLDD. This means all transport and
++      driver data structures are built up, and device objects created.
++      This is equivalent to a driver "attach" on an adapter, which is
++      independent of the adapter's link state.
++    - Instantiation of the vport on the FC link via ELS traffic, etc.
++      This is equivalent to a "link up" and successfull link initialization.
++  Futher information can be found in the interfaces section below for
++  Vport Creation.
++
++  Once a vport has been instantiated with the kernel/LLDD, a vport state
++  can be reported via the sysfs attribute. The following states exist:
++
++    FC_VPORT_UNKNOWN            - Unknown
++      An temporary state, typically set only while the vport is being
++      instantiated with the kernel and LLDD.
++
++    FC_VPORT_ACTIVE             - Active
++      The vport has been successfully been created on the FC link.
++      It is fully functional.
++
++    FC_VPORT_DISABLED           - Disabled
++      The vport instantiated, but "disabled". The vport is not instantiated
++      on the FC link. This is equivalent to a physical port with the
++      link "down".
++
++    FC_VPORT_LINKDOWN           - Linkdown
++      The vport is not operational as the physical link is not operational.
++
++    FC_VPORT_INITIALIZING       - Initializing
++      The vport is in the process of instantiating on the FC link.
++      The LLDD will set this state just prior to starting the ELS traffic
++      to create the vport. This state will persist until the vport is
++      successfully created (state becomes FC_VPORT_ACTIVE) or it fails
++      (state is one of the values below).  As this state is transitory,
++      it will not be preserved in the "vport_last_state".
++
++    FC_VPORT_NO_FABRIC_SUPP     - No Fabric Support
++      The vport is not operational. One of the following conditions were
++      encountered:
++       - The FC topology is not Point-to-Point
++       - The FC port is not connected to an F_Port
++       - The F_Port has indicated that NPIV is not supported.
++
++    FC_VPORT_NO_FABRIC_RSCS     - No Fabric Resources
++      The vport is not operational. The Fabric failed FDISC with a status
++      indicating that it does not have sufficient resources to complete
++      the operation.
++
++    FC_VPORT_FABRIC_LOGOUT      - Fabric Logout
++      The vport is not operational. The Fabric has LOGO'd the N_Port_ID
++      associated with the vport.
++
++    FC_VPORT_FABRIC_REJ_WWN     - Fabric Rejected WWN
++      The vport is not operational. The Fabric failed FDISC with a status
++      indicating that the WWN's are not valid.
++
++    FC_VPORT_FAILED             - VPort Failed
++      The vport is not operational. This is a catchall for all other
++      error conditions.
++
++
++  The following state table indicates the different state transitions:
++
++    State              Event                            New State
++    --------------------------------------------------------------------
++     n/a                Initialization                  Unknown
++    Unknown:            Link Down                       Linkdown
++                        Link Up & Loop                  No Fabric Support
++                        Link Up & no Fabric             No Fabric Support
++                        Link Up & FLOGI response        No Fabric Support
++                          indicates no NPIV support
++                        Link Up & FDISC being sent      Initializing
++                        Disable request                 Disable
++    Linkdown:           Link Up                         Unknown
++    Initializing:       FDISC ACC                       Active
++                        FDISC LS_RJT w/ no resources    No Fabric Resources
++                        FDISC LS_RJT w/ invalid         Fabric Rejected WWN
++                          pname or invalid nport_id
++                        FDISC LS_RJT failed for         Vport Failed
++                          other reasons
++                        Link Down                       Linkdown
++                        Disable request                 Disable
++    Disable:            Enable request                  Unknown
++    Active:             LOGO received from fabric       Fabric Logout
++                        Link Down                       Linkdown
++                        Disable request                 Disable
++    Fabric Logout:      Link still up                   Unknown
++
++         The following 4 error states all have the same transitions:
++    No Fabric Support:
++    No Fabric Resources:
++    Fabric Rejected WWN:
++    Vport Failed:
++                        Disable request                 Disable
++                        Link goes down                  Linkdown
++
++
++Transport <-> LLDD Interfaces :
++-------------------------------
++
++Vport support by LLDD:
++
++  The LLDD indicates support for vports by supplying a vport_create()
++  function in the transport template.  The presense of this function will
++  cause the creation of the new attributes on the fc_host.  As part of
++  the physical port completing its initialization relative to the
++  transport, it should set the max_npiv_vports attribute to indicate the
++  maximum number of vports the driver and/or adapter supports.
++
++
++Vport Creation:
++
++  The LLDD vport_create() syntax is:
++
++      int vport_create(struct fc_vport *vport, bool disable)
++
++    where:
++      vport:    Is the newly allocated vport object
++      disable:  If "true", the vport is to be created in a disabled stated.
++                If "false", the vport is to be enabled upon creation.
++
++  When a request is made to create a new vport (via sgio/netlink, or the
++  vport_create fc_host attribute), the transport will validate that the LLDD
++  can support another vport (e.g. max_npiv_vports > npiv_vports_inuse).
++  If not, the create request will be failed.  If space remains, the transport
++  will increment the vport count, create the vport object, and then call the
++  LLDD's vport_create() function with the newly allocated vport object.
++
++  As mentioned above, vport creation is divided into two parts:
++    - Creation with the kernel and LLDD. This means all transport and
++      driver data structures are built up, and device objects created.
++      This is equivalent to a driver "attach" on an adapter, which is
++      independent of the adapter's link state.
++    - Instantiation of the vport on the FC link via ELS traffic, etc.
++      This is equivalent to a "link up" and successfull link initialization.
++
++  The LLDD's vport_create() function will not synchronously wait for both
++  parts to be fully completed before returning. It must validate that the
++  infrastructure exists to support NPIV, and complete the first part of
++  vport creation (data structure build up) before returning.  We do not
++  hinge vport_create() on the link-side operation mainly because:
++    - The link may be down. It is not a failure if it is. It simply
++      means the vport is in an inoperable state until the link comes up.
++      This is consistent with the link bouncing post vport creation.
++    - The vport may be created in a disabled state.
++    - This is consistent with a model where:  the vport equates to a
++      FC adapter. The vport_create is synonymous with driver attachment
++      to the adapter, which is independent of link state.
++
++    Note: special error codes have been defined to delineate infrastructure
++      failure cases for quicker resolution.
++
++  The expected behavior for the LLDD's vport_create() function is:
++    - Validate Infrastructure:
++        - If the driver or adapter cannot support another vport, whether
++            due to improper firmware, (a lie about) max_npiv, or a lack of
++            some other resource - return VPCERR_UNSUPPORTED.
++        - If the driver validates the WWN's against those already active on
++            the adapter and detects an overlap - return VPCERR_BAD_WWN.
++        - If the driver detects the topology is loop, non-fabric, or the
++            FLOGI did not support NPIV - return VPCERR_NO_FABRIC_SUPP.
++    - Allocate data structures. If errors are encountered, such as out
++        of memory conditions, return the respective negative Exxx error code.
++    - If the role is FCP Initiator, the LLDD is to :
++        - Call scsi_host_alloc() to allocate a scsi_host for the vport.
++        - Call scsi_add_host(new_shost, &vport->dev) to start the scsi_host
++          and bind it as a child of the vport device.
++        - Initializes the fc_host attribute values.
++    - Kick of further vport state transitions based on the disable flag and
++        link state - and return success (zero).
++
++  LLDD Implementers Notes:
++  - It is suggested that there be a different fc_function_templates for
++    the physical port and the virtual port.  The physical port's template
++    would have the vport_create, vport_delete, and vport_disable functions,
++    while the vports would not.
++  - It is suggested that there be different scsi_host_templates
++    for the physical port and virtual port. Likely, there are driver
++    attributes, embedded into the scsi_host_template, that are applicable
++    for the physical port only (link speed, topology setting, etc). This
++    ensures that the attributes are applicable to the respective scsi_host.
++
++
++Vport Disable/Enable:
++
++  The LLDD vport_disable() syntax is:
++
++      int vport_disable(struct fc_vport *vport, bool disable)
++
++    where:
++      vport:    Is vport to to be enabled or disabled
++      disable:  If "true", the vport is to be disabled.
++                If "false", the vport is to be enabled.
++
++  When a request is made to change the disabled state on a vport, the
++  transport will validate the request against the existing vport state.
++  If the request is to disable and the vport is already disabled, the
++  request will fail. Similarly, if the request is to enable, and the
++  vport is not in a disabled state, the request will fail.  If the request
++  is valid for the vport state, the transport will call the LLDD to
++  change the vport's state.
++
++  Within the LLDD, if a vport is disabled, it remains instantiated with
++  the kernel and LLDD, but it is not active or visible on the FC link in
++  any way. (see Vport Creation and the 2 part instantiation discussion).
++  The vport will remain in this state until it is deleted or re-enabled.
++  When enabling a vport, the LLDD reinstantiates the vport on the FC
++  link - essentially restarting the LLDD statemachine (see Vport States
++  above).
++
++
++Vport Deletion:
++
++  The LLDD vport_delete() syntax is:
++
++      int vport_delete(struct fc_vport *vport)
++
++    where:
++      vport:    Is vport to delete
++
++  When a request is made to delete a vport (via sgio/netlink, or via the
++  fc_host or fc_vport vport_delete attributes), the transport will call
++  the LLDD to terminate the vport on the FC link, and teardown all other
++  datastructures and references.  If the LLDD completes successfully,
++  the transport will teardown the vport objects and complete the vport
++  removal.  If the LLDD delete request fails, the vport object will remain,
++  but will be in an indeterminate state.
++
++  Within the LLDD, the normal code paths for a scsi_host teardown should
++  be followed. E.g. If the vport has a FCP Initiator role, the LLDD
++  will call fc_remove_host() for the vports scsi_host, followed by
++  scsi_remove_host() and scsi_host_put() for the vports scsi_host.
++
++
++Other:
++  fc_host port_type attribute:
++    There is a new fc_host port_type value - FC_PORTTYPE_NPIV. This value
++    must be set on all vport-based fc_hosts.  Normally, on a physical port,
++    the port_type attribute would be set to NPORT, NLPORT, etc based on the
++    topology type and existence of the fabric. As this is not applicable to
++    a vport, it makes more sense to report the FC mechanism used to create
++    the vport.
++
++  Driver unload:
++    FC drivers are required to call fc_remove_host() prior to calling
++    scsi_remove_host().  This allows the fc_host to tear down all remote
++    ports prior the scsi_host being torn down.  The fc_remove_host() call
++    was updated to remove all vports for the fc_host as well.
++
++
++Credits
++=======
++The following people have contributed to this document:
++
++
++
++
++
++
++James Smart
++james.smart@emulex.com
++
+diff -Nurb linux-2.6.22-570/Documentation/sysctl/kernel.txt linux-2.6.22-try2/Documentation/sysctl/kernel.txt
+--- linux-2.6.22-570/Documentation/sysctl/kernel.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/Documentation/sysctl/kernel.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -29,6 +29,7 @@
+ - java-interpreter            [ binfmt_java, obsolete ]
+ - kstack_depth_to_print       [ X86 only ]
+ - l2cr                        [ PPC only ]
++- mmap_min_addr
+ - modprobe                    ==> Documentation/kmod.txt
+ - msgmax
+ - msgmnb
+@@ -178,6 +179,19 @@
+ 
+ ==============================================================
+ 
++mmap_min_addr
++
++This file indicates the amount of address space  which a user process will be
++restricted from mmaping.  Since kernel null dereference bugs could
++accidentally operate based on the information in the first couple of pages of
++memory userspace processes should not be allowed to write to them.  By default
++this value is set to 0 and no protections will be enforced by the security
++module.  Setting this value to something like 64k will allow the vast majority
++of applications to work correctly and provide defense in depth against future
++potential kernel bugs.
++
++==============================================================
++
+ osrelease, ostype & version:
+ 
+ # cat osrelease
+diff -Nurb linux-2.6.22-570/MAINTAINERS linux-2.6.22-try2/MAINTAINERS
+--- linux-2.6.22-570/MAINTAINERS	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/MAINTAINERS	2007-12-19 15:29:23.000000000 -0500
+@@ -232,15 +232,15 @@
+ S:	Supported
+ 
+ ACPI BATTERY DRIVERS
+-P:	Vladimir P. Lebedev
+-M:	vladimir.p.lebedev@intel.com
++P:	Alexey Starikovskiy
++M:	astarikovskiy@suse.de
+ L:	linux-acpi@vger.kernel.org
+ W:	http://acpi.sourceforge.net/
+ S:	Supported
+ 
+ ACPI EC DRIVER
+ P:	Alexey Starikovskiy
+-M:	alexey.y.starikovskiy@linux.intel.com
++M:	astarikovskiy@suse.de
+ L:	linux-acpi@vger.kernel.org
+ W:	http://acpi.sourceforge.net/
+ S:	Supported
+@@ -2127,6 +2127,15 @@
+ L:	kexec@lists.infradead.org
+ S:	Maintained
+ 
++KGDB
++P:	Jason Wessel
++M:	jason.wessel@windriver.com
++P:	Amit S. Kale
++M:	amitkale@linsyssoft.com
++W:	http://sourceforge.net/projects/kgdb
++L:	kgdb-bugreport@lists.sourceforge.net
++S:	Maintained
++
+ KPROBES
+ P:	Prasanna S Panchamukhi
+ M:	prasanna@in.ibm.com
+@@ -3593,6 +3602,15 @@
+ W:	http://www.kernel.dk
+ S:	Maintained
+ 
++UNIONFS
++P:	Erez Zadok
++M:	ezk@cs.sunysb.edu
++P:	Josef "Jeff" Sipek
++M:	jsipek@cs.sunysb.edu
++L:	unionfs@filesystems.org
++W:	http://unionfs.filesystems.org
++S:	Maintained
++
+ USB ACM DRIVER
+ P:	Oliver Neukum
+ M:	oliver@neukum.name
+diff -Nurb linux-2.6.22-570/Makefile linux-2.6.22-try2/Makefile
+--- linux-2.6.22-570/Makefile	2007-12-12 18:08:43.000000000 -0500
++++ linux-2.6.22-try2/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -496,6 +496,11 @@
+ CFLAGS		+= -fomit-frame-pointer
+ endif
+ 
++ifdef CONFIG_UNWIND_INFO
++CFLAGS		+= -fasynchronous-unwind-tables
++LDFLAGS_vmlinux	+= --eh-frame-hdr
++endif
++
+ ifdef CONFIG_DEBUG_INFO
+ CFLAGS		+= -g
+ endif
+diff -Nurb linux-2.6.22-570/arch/arm/Kconfig linux-2.6.22-try2/arch/arm/Kconfig
+--- linux-2.6.22-570/arch/arm/Kconfig	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -1034,6 +1034,8 @@
+ 
+ source "drivers/rtc/Kconfig"
+ 
++source "drivers/dma/Kconfig"
++
+ endmenu
+ 
+ source "fs/Kconfig"
+diff -Nurb linux-2.6.22-570/arch/arm/boot/.gitignore.rej linux-2.6.22-try2/arch/arm/boot/.gitignore.rej
+--- linux-2.6.22-570/arch/arm/boot/.gitignore.rej	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/boot/.gitignore.rej	2007-12-19 15:28:52.000000000 -0500
+@@ -0,0 +1,10 @@
++***************
++*** 1,2 ****
++  Image
++  zImage
++--- 1,5 ----
++  Image
++  zImage
+++ xipImage
+++ bootpImage
+++ uImage
+Files linux-2.6.22-570/arch/arm/boot/compressed/.head.S.rej.swp and linux-2.6.22-try2/arch/arm/boot/compressed/.head.S.rej.swp differ
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/Makefile linux-2.6.22-try2/arch/arm/kernel/Makefile
+--- linux-2.6.22-570/arch/arm/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/kernel/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -20,6 +20,7 @@
+ obj-$(CONFIG_SMP)		+= smp.o
+ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+ obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb-jmp.o
+ 
+ obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
+ AFLAGS_crunch-bits.o		:= -Wa,-mcpu=ep9312
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S linux-2.6.22-try2/arch/arm/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/arm/kernel/kgdb-jmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/kernel/kgdb-jmp.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,32 @@
++/*
++ * arch/arm/kernel/kgdb-jmp.S
++ *
++ * Trivial setjmp and longjmp procedures to support bus error recovery
++ * which may occur during kgdb memory read/write operations.
++ *
++ * Author: MontaVista Software, Inc. <source@mvista.com>
++ *         source@mvista.com
++ *
++ * 2002-2005 (c) MontaVista Software, Inc.  This file is licensed under the
++ * terms of the GNU General Public License version 2. This program as licensed
++ * "as is" without any warranty of any kind, whether express or implied.
++ */
++#include <linux/linkage.h>
++
++ENTRY (kgdb_fault_setjmp)
++	/* Save registers */
++	stmia	r0, {r0-r14}
++	str	lr,[r0, #60]
++	mrs	r1,cpsr
++	str	r1,[r0,#64]
++	ldr	r1,[r0,#4]
++	mov	r0, #0
++	mov	pc,lr
++
++ENTRY (kgdb_fault_longjmp)
++	/* Restore registers */
++	mov	r1,#1
++	str	r1,[r0]
++	ldr     r1,[r0, #64]
++	msr     spsr,r1
++	ldmia	r0,{r0-pc}^
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/kgdb.c linux-2.6.22-try2/arch/arm/kernel/kgdb.c
+--- linux-2.6.22-570/arch/arm/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/kernel/kgdb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,202 @@
++/*
++ * arch/arm/kernel/kgdb.c
++ *
++ * ARM KGDB support
++ *
++ * Copyright (c) 2002-2004 MontaVista Software, Inc
++ *
++ * Authors:  George Davis <davis_g@mvista.com>
++ *           Deepak Saxena <dsaxena@plexity.net>
++ */
++#include <linux/types.h>
++#include <linux/kernel.h>
++#include <linux/signal.h>
++#include <linux/sched.h>
++#include <linux/mm.h>
++#include <linux/spinlock.h>
++#include <linux/personality.h>
++#include <linux/ptrace.h>
++#include <linux/elf.h>
++#include <linux/interrupt.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/atomic.h>
++#include <asm/io.h>
++#include <asm/pgtable.h>
++#include <asm/system.h>
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++#include <asm/ptrace.h>
++#include <asm/traps.h>
++
++/* Make a local copy of the registers passed into the handler (bletch) */
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
++{
++	int regno;
++
++	/* Initialize all to zero (??) */
++	for (regno = 0; regno < GDB_MAX_REGS; regno++)
++		gdb_regs[regno] = 0;
++
++	gdb_regs[_R0] = kernel_regs->ARM_r0;
++	gdb_regs[_R1] = kernel_regs->ARM_r1;
++	gdb_regs[_R2] = kernel_regs->ARM_r2;
++	gdb_regs[_R3] = kernel_regs->ARM_r3;
++	gdb_regs[_R4] = kernel_regs->ARM_r4;
++	gdb_regs[_R5] = kernel_regs->ARM_r5;
++	gdb_regs[_R6] = kernel_regs->ARM_r6;
++	gdb_regs[_R7] = kernel_regs->ARM_r7;
++	gdb_regs[_R8] = kernel_regs->ARM_r8;
++	gdb_regs[_R9] = kernel_regs->ARM_r9;
++	gdb_regs[_R10] = kernel_regs->ARM_r10;
++	gdb_regs[_FP] = kernel_regs->ARM_fp;
++	gdb_regs[_IP] = kernel_regs->ARM_ip;
++	gdb_regs[_SP] = kernel_regs->ARM_sp;
++	gdb_regs[_LR] = kernel_regs->ARM_lr;
++	gdb_regs[_PC] = kernel_regs->ARM_pc;
++	gdb_regs[_CPSR] = kernel_regs->ARM_cpsr;
++}
++
++/* Copy local gdb registers back to kgdb regs, for later copy to kernel */
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *kernel_regs)
++{
++	kernel_regs->ARM_r0 = gdb_regs[_R0];
++	kernel_regs->ARM_r1 = gdb_regs[_R1];
++	kernel_regs->ARM_r2 = gdb_regs[_R2];
++	kernel_regs->ARM_r3 = gdb_regs[_R3];
++	kernel_regs->ARM_r4 = gdb_regs[_R4];
++	kernel_regs->ARM_r5 = gdb_regs[_R5];
++	kernel_regs->ARM_r6 = gdb_regs[_R6];
++	kernel_regs->ARM_r7 = gdb_regs[_R7];
++	kernel_regs->ARM_r8 = gdb_regs[_R8];
++	kernel_regs->ARM_r9 = gdb_regs[_R9];
++	kernel_regs->ARM_r10 = gdb_regs[_R10];
++	kernel_regs->ARM_fp = gdb_regs[_FP];
++	kernel_regs->ARM_ip = gdb_regs[_IP];
++	kernel_regs->ARM_sp = gdb_regs[_SP];
++	kernel_regs->ARM_lr = gdb_regs[_LR];
++	kernel_regs->ARM_pc = gdb_regs[_PC];
++	kernel_regs->ARM_cpsr = gdb_regs[_CPSR];
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
++				 struct task_struct *task)
++{
++	int regno;
++	struct pt_regs *thread_regs;
++
++	/* Just making sure... */
++	if (task == NULL)
++		return;
++
++	/* Initialize to zero */
++	for (regno = 0; regno < GDB_MAX_REGS; regno++)
++		gdb_regs[regno] = 0;
++
++	/* Otherwise, we have only some registers from switch_to() */
++	thread_regs = task_pt_regs(task);
++	gdb_regs[_R0] = thread_regs->ARM_r0;	/* Not really valid? */
++	gdb_regs[_R1] = thread_regs->ARM_r1;	/* "               " */
++	gdb_regs[_R2] = thread_regs->ARM_r2;	/* "               " */
++	gdb_regs[_R3] = thread_regs->ARM_r3;	/* "               " */
++	gdb_regs[_R4] = thread_regs->ARM_r4;
++	gdb_regs[_R5] = thread_regs->ARM_r5;
++	gdb_regs[_R6] = thread_regs->ARM_r6;
++	gdb_regs[_R7] = thread_regs->ARM_r7;
++	gdb_regs[_R8] = thread_regs->ARM_r8;
++	gdb_regs[_R9] = thread_regs->ARM_r9;
++	gdb_regs[_R10] = thread_regs->ARM_r10;
++	gdb_regs[_FP] = thread_regs->ARM_fp;
++	gdb_regs[_IP] = thread_regs->ARM_ip;
++	gdb_regs[_SP] = thread_regs->ARM_sp;
++	gdb_regs[_LR] = thread_regs->ARM_lr;
++	gdb_regs[_PC] = thread_regs->ARM_pc;
++	gdb_regs[_CPSR] = thread_regs->ARM_cpsr;
++}
++
++static int compiled_break;
++
++int kgdb_arch_handle_exception(int exception_vector, int signo,
++			       int err_code, char *remcom_in_buffer,
++			       char *remcom_out_buffer,
++			       struct pt_regs *linux_regs)
++{
++	long addr;
++	char *ptr;
++
++	switch (remcom_in_buffer[0]) {
++	case 'D':
++	case 'k':
++	case 'c':
++		kgdb_contthread = NULL;
++
++		/*
++		 * Try to read optional parameter, pc unchanged if no parm.
++		 * If this was a compiled breakpoint, we need to move
++		 * to the next instruction or we will just breakpoint
++		 * over and over again.
++		 */
++		ptr = &remcom_in_buffer[1];
++		if (kgdb_hex2long(&ptr, &addr)) {
++			linux_regs->ARM_pc = addr;
++		} else if (compiled_break == 1) {
++			linux_regs->ARM_pc += 4;
++		}
++
++		compiled_break = 0;
++
++		return 0;
++	}
++
++	return -1;
++}
++
++static int kgdb_brk_fn(struct pt_regs *regs, unsigned int instr)
++{
++	kgdb_handle_exception(1, SIGTRAP, 0, regs);
++
++	return 0;
++}
++
++static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr)
++{
++	compiled_break = 1;
++	kgdb_handle_exception(1, SIGTRAP, 0, regs);
++
++	return 0;
++}
++
++static struct undef_hook kgdb_brkpt_hook = {
++	.instr_mask = 0xffffffff,
++	.instr_val = KGDB_BREAKINST,
++	.fn = kgdb_brk_fn
++};
++
++static struct undef_hook kgdb_compiled_brkpt_hook = {
++	.instr_mask = 0xffffffff,
++	.instr_val = KGDB_COMPILED_BREAK,
++	.fn = kgdb_compiled_brk_fn
++};
++
++/*
++ * Register our undef instruction hooks with ARM undef core.
++ * We regsiter a hook specifically looking for the KGB break inst
++ * and we handle the normal undef case within the do_undefinstr
++ * handler.
++ */
++int kgdb_arch_init(void)
++{
++	register_undef_hook(&kgdb_brkpt_hook);
++	register_undef_hook(&kgdb_compiled_brkpt_hook);
++
++	return 0;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifndef __ARMEB__
++	.gdb_bpt_instr = {0xfe, 0xde, 0xff, 0xe7}
++#else
++	.gdb_bpt_instr = {0xe7, 0xff, 0xde, 0xfe}
++#endif
++};
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/setup.c linux-2.6.22-try2/arch/arm/kernel/setup.c
+--- linux-2.6.22-570/arch/arm/kernel/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/kernel/setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -832,6 +832,11 @@
+ 	conswitchp = &dummy_con;
+ #endif
+ #endif
++
++#if	defined(CONFIG_KGDB)
++	extern void __init early_trap_init(void);
++	early_trap_init();
++#endif
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-570/arch/arm/kernel/traps.c linux-2.6.22-try2/arch/arm/kernel/traps.c
+--- linux-2.6.22-570/arch/arm/kernel/traps.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/kernel/traps.c	2007-12-19 15:29:23.000000000 -0500
+@@ -301,6 +301,7 @@
+ 	unsigned int instr;
+ 	struct undef_hook *hook;
+ 	siginfo_t info;
++	mm_segment_t fs;
+ 	void __user *pc;
+ 	unsigned long flags;
+ 
+@@ -311,6 +312,8 @@
+ 	 */
+ 	regs->ARM_pc -= correction;
+ 
++	fs = get_fs();
++	set_fs(KERNEL_DS);
+ 	pc = (void __user *)instruction_pointer(regs);
+ 
+ 	if (processor_mode(regs) == SVC_MODE) {
+@@ -320,6 +323,7 @@
+ 	} else {
+ 		get_user(instr, (u32 __user *)pc);
+ 	}
++	set_fs(fs);
+ 
+ 	spin_lock_irqsave(&undef_lock, flags);
+ 	list_for_each_entry(hook, &undef_hook, node) {
+@@ -707,6 +711,13 @@
+ 
+ void __init trap_init(void)
+ {
++#if   defined(CONFIG_KGDB)
++	return;
++}
++
++void __init early_trap_init(void)
++{
++#endif
+ 	unsigned long vectors = CONFIG_VECTORS_BASE;
+ 	extern char __stubs_start[], __stubs_end[];
+ 	extern char __vectors_start[], __vectors_end[];
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c linux-2.6.22-try2/arch/arm/mach-iop13xx/setup.c
+--- linux-2.6.22-570/arch/arm/mach-iop13xx/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop13xx/setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <asm/hardware.h>
+ #include <asm/irq.h>
+ #include <asm/io.h>
++#include <asm/hardware/iop_adma.h>
+ 
+ #define IOP13XX_UART_XTAL 33334000
+ #define IOP13XX_SETUP_DEBUG 0
+@@ -236,19 +237,143 @@
+ }
+ #endif
+ 
++/* ADMA Channels */
++static struct resource iop13xx_adma_0_resources[] = {
++	[0] = {
++		.start = IOP13XX_ADMA_PHYS_BASE(0),
++		.end = IOP13XX_ADMA_UPPER_PA(0),
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_IOP13XX_ADMA0_EOT,
++		.end = IRQ_IOP13XX_ADMA0_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_IOP13XX_ADMA0_EOC,
++		.end = IRQ_IOP13XX_ADMA0_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_IOP13XX_ADMA0_ERR,
++		.end = IRQ_IOP13XX_ADMA0_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++static struct resource iop13xx_adma_1_resources[] = {
++	[0] = {
++		.start = IOP13XX_ADMA_PHYS_BASE(1),
++		.end = IOP13XX_ADMA_UPPER_PA(1),
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_IOP13XX_ADMA1_EOT,
++		.end = IRQ_IOP13XX_ADMA1_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_IOP13XX_ADMA1_EOC,
++		.end = IRQ_IOP13XX_ADMA1_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_IOP13XX_ADMA1_ERR,
++		.end = IRQ_IOP13XX_ADMA1_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++static struct resource iop13xx_adma_2_resources[] = {
++	[0] = {
++		.start = IOP13XX_ADMA_PHYS_BASE(2),
++		.end = IOP13XX_ADMA_UPPER_PA(2),
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_IOP13XX_ADMA2_EOT,
++		.end = IRQ_IOP13XX_ADMA2_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_IOP13XX_ADMA2_EOC,
++		.end = IRQ_IOP13XX_ADMA2_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_IOP13XX_ADMA2_ERR,
++		.end = IRQ_IOP13XX_ADMA2_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK;
++static struct iop_adma_platform_data iop13xx_adma_0_data = {
++	.hw_id = 0,
++	.pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop13xx_adma_1_data = {
++	.hw_id = 1,
++	.pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop13xx_adma_2_data = {
++	.hw_id = 2,
++	.pool_size = PAGE_SIZE,
++};
++
++/* The ids are fixed up later in iop13xx_platform_init */
++static struct platform_device iop13xx_adma_0_channel = {
++	.name = "iop-adma",
++	.id = 0,
++	.num_resources = 4,
++	.resource = iop13xx_adma_0_resources,
++	.dev = {
++		.dma_mask = &iop13xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop13xx_adma_0_data,
++	},
++};
++
++static struct platform_device iop13xx_adma_1_channel = {
++	.name = "iop-adma",
++	.id = 0,
++	.num_resources = 4,
++	.resource = iop13xx_adma_1_resources,
++	.dev = {
++		.dma_mask = &iop13xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop13xx_adma_1_data,
++	},
++};
++
++static struct platform_device iop13xx_adma_2_channel = {
++	.name = "iop-adma",
++	.id = 0,
++	.num_resources = 4,
++	.resource = iop13xx_adma_2_resources,
++	.dev = {
++		.dma_mask = &iop13xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop13xx_adma_2_data,
++	},
++};
++
+ void __init iop13xx_map_io(void)
+ {
+ 	/* Initialize the Static Page Table maps */
+ 	iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc));
+ }
+ 
+-static int init_uart = 0;
+-static int init_i2c = 0;
++static int init_uart;
++static int init_i2c;
++static int init_adma;
+ 
+ void __init iop13xx_platform_init(void)
+ {
+ 	int i;
+-	u32 uart_idx, i2c_idx, plat_idx;
++	u32 uart_idx, i2c_idx, adma_idx, plat_idx;
+ 	struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES];
+ 
+ 	/* set the bases so we can read the device id */
+@@ -294,6 +419,12 @@
+ 		}
+ 	}
+ 
++	if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) {
++		init_adma |= IOP13XX_INIT_ADMA_0;
++		init_adma |= IOP13XX_INIT_ADMA_1;
++		init_adma |= IOP13XX_INIT_ADMA_2;
++	}
++
+ 	plat_idx = 0;
+ 	uart_idx = 0;
+ 	i2c_idx = 0;
+@@ -332,6 +463,56 @@
+ 		}
+ 	}
+ 
++	/* initialize adma channel ids and capabilities */
++	adma_idx = 0;
++	for (i = 0; i < IQ81340_NUM_ADMA; i++) {
++		struct iop_adma_platform_data *plat_data;
++		if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG)
++			printk(KERN_INFO
++				"Adding adma%d to platform device list\n", i);
++		switch (init_adma & (1 << i)) {
++		case IOP13XX_INIT_ADMA_0:
++			iop13xx_adma_0_channel.id = adma_idx++;
++			iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel;
++			plat_data = &iop13xx_adma_0_data;
++			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++			dma_cap_set(DMA_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++			break;
++		case IOP13XX_INIT_ADMA_1:
++			iop13xx_adma_1_channel.id = adma_idx++;
++			iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel;
++			plat_data = &iop13xx_adma_1_data;
++			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++			dma_cap_set(DMA_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++			break;
++		case IOP13XX_INIT_ADMA_2:
++			iop13xx_adma_2_channel.id = adma_idx++;
++			iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel;
++			plat_data = &iop13xx_adma_2_data;
++			dma_cap_set(DMA_MEMCPY, plat_data->cap_mask);
++			dma_cap_set(DMA_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMSET, plat_data->cap_mask);
++			dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask);
++			dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask);
++			dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask);
++			dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask);
++			dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask);
++			break;
++		}
++	}
++
+ #ifdef CONFIG_MTD_PHYSMAP
+ 	iq8134x_flash_resource.end = iq8134x_flash_resource.start +
+ 				iq8134x_probe_flash_size() - 1;
+@@ -399,5 +580,35 @@
+ 	return 1;
+ }
+ 
++static int __init iop13xx_init_adma_setup(char *str)
++{
++	if (str)	{
++		while (*str != '\0') {
++			switch (*str) {
++			case '0':
++				init_adma |= IOP13XX_INIT_ADMA_0;
++				break;
++			case '1':
++				init_adma |= IOP13XX_INIT_ADMA_1;
++				break;
++			case '2':
++				init_adma |= IOP13XX_INIT_ADMA_2;
++				break;
++			case ',':
++			case '=':
++				break;
++			default:
++				PRINTK("\"iop13xx_init_adma\" malformed"
++					    " at character: \'%c\'", *str);
++				*(str + 1) = '\0';
++				init_adma = IOP13XX_INIT_ADMA_DEFAULT;
++			}
++			str++;
++		}
++	}
++	return 1;
++}
++
++__setup("iop13xx_init_adma", iop13xx_init_adma_setup);
+ __setup("iop13xx_init_uart", iop13xx_init_uart_setup);
+ __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup);
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c linux-2.6.22-try2/arch/arm/mach-iop32x/glantank.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/glantank.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop32x/glantank.c	2007-12-19 15:29:23.000000000 -0500
+@@ -180,6 +180,8 @@
+ 	platform_device_register(&iop3xx_i2c1_device);
+ 	platform_device_register(&glantank_flash_device);
+ 	platform_device_register(&glantank_serial_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
+ 
+ 	pm_power_off = glantank_power_off;
+ }
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c linux-2.6.22-try2/arch/arm/mach-iop32x/iq31244.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/iq31244.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop32x/iq31244.c	2007-12-19 15:29:23.000000000 -0500
+@@ -298,9 +298,14 @@
+ 	platform_device_register(&iop3xx_i2c1_device);
+ 	platform_device_register(&iq31244_flash_device);
+ 	platform_device_register(&iq31244_serial_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
+ 
+ 	if (is_ep80219())
+ 		pm_power_off = ep80219_power_off;
++
++	if (!is_80219())
++		platform_device_register(&iop3xx_aau_channel);
+ }
+ 
+ static int __init force_ep80219_setup(char *str)
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c linux-2.6.22-try2/arch/arm/mach-iop32x/iq80321.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/iq80321.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop32x/iq80321.c	2007-12-19 15:29:23.000000000 -0500
+@@ -181,6 +181,9 @@
+ 	platform_device_register(&iop3xx_i2c1_device);
+ 	platform_device_register(&iq80321_flash_device);
+ 	platform_device_register(&iq80321_serial_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
++	platform_device_register(&iop3xx_aau_channel);
+ }
+ 
+ MACHINE_START(IQ80321, "Intel IQ80321")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c linux-2.6.22-try2/arch/arm/mach-iop32x/n2100.c
+--- linux-2.6.22-570/arch/arm/mach-iop32x/n2100.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop32x/n2100.c	2007-12-19 15:29:23.000000000 -0500
+@@ -245,6 +245,8 @@
+ 	platform_device_register(&iop3xx_i2c0_device);
+ 	platform_device_register(&n2100_flash_device);
+ 	platform_device_register(&n2100_serial_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
+ 
+ 	pm_power_off = n2100_power_off;
+ 
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c linux-2.6.22-try2/arch/arm/mach-iop33x/iq80331.c
+--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80331.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop33x/iq80331.c	2007-12-19 15:29:23.000000000 -0500
+@@ -136,6 +136,9 @@
+ 	platform_device_register(&iop33x_uart0_device);
+ 	platform_device_register(&iop33x_uart1_device);
+ 	platform_device_register(&iq80331_flash_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
++	platform_device_register(&iop3xx_aau_channel);
+ }
+ 
+ MACHINE_START(IQ80331, "Intel IQ80331")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c linux-2.6.22-try2/arch/arm/mach-iop33x/iq80332.c
+--- linux-2.6.22-570/arch/arm/mach-iop33x/iq80332.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-iop33x/iq80332.c	2007-12-19 15:29:23.000000000 -0500
+@@ -136,6 +136,9 @@
+ 	platform_device_register(&iop33x_uart0_device);
+ 	platform_device_register(&iop33x_uart1_device);
+ 	platform_device_register(&iq80332_flash_device);
++	platform_device_register(&iop3xx_dma_0_channel);
++	platform_device_register(&iop3xx_dma_1_channel);
++	platform_device_register(&iop3xx_aau_channel);
+ }
+ 
+ MACHINE_START(IQ80332, "Intel IQ80332")
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/core.c linux-2.6.22-try2/arch/arm/mach-ixp2000/core.c
+--- linux-2.6.22-570/arch/arm/mach-ixp2000/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-ixp2000/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -34,6 +34,7 @@
+ #include <asm/system.h>
+ #include <asm/tlbflush.h>
+ #include <asm/pgtable.h>
++#include <asm/kgdb.h>
+ 
+ #include <asm/mach/map.h>
+ #include <asm/mach/time.h>
+@@ -184,6 +185,9 @@
+ void __init ixp2000_uart_init(void)
+ {
+ 	platform_device_register(&ixp2000_serial_device);
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_platform_port(0, &ixp2000_serial_port);
++#endif
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c linux-2.6.22-try2/arch/arm/mach-ixp2000/ixdp2x01.c
+--- linux-2.6.22-570/arch/arm/mach-ixp2000/ixdp2x01.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-ixp2000/ixdp2x01.c	2007-12-19 15:29:23.000000000 -0500
+@@ -38,6 +38,7 @@
+ #include <asm/system.h>
+ #include <asm/hardware.h>
+ #include <asm/mach-types.h>
++#include <asm/kgdb.h>
+ 
+ #include <asm/mach/pci.h>
+ #include <asm/mach/map.h>
+@@ -413,6 +414,11 @@
+ 	platform_add_devices(ixdp2x01_devices, ARRAY_SIZE(ixdp2x01_devices));
+ 	ixp2000_uart_init();
+ 	ixdp2x01_uart_init();
++
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_platform_port(0, ixdp2x01_serial_port1);
++	kgdb8250_add_platform_port(1, ixdp2x01_serial_port1);
++#endif
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c linux-2.6.22-try2/arch/arm/mach-ixp4xx/coyote-setup.c
+--- linux-2.6.22-570/arch/arm/mach-ixp4xx/coyote-setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-ixp4xx/coyote-setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -96,6 +96,10 @@
+ 	}
+ 
+ 	platform_add_devices(coyote_devices, ARRAY_SIZE(coyote_devices));
++
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_platform_port(0, &coyote_uart_data);
++#endif
+ }
+ 
+ #ifdef CONFIG_ARCH_ADI_COYOTE
+diff -Nurb linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c linux-2.6.22-try2/arch/arm/mach-ixp4xx/ixdp425-setup.c
+--- linux-2.6.22-570/arch/arm/mach-ixp4xx/ixdp425-setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-ixp4xx/ixdp425-setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -76,7 +76,8 @@
+ 		.mapbase	= IXP4XX_UART1_BASE_PHYS,
+ 		.membase	= (char *)IXP4XX_UART1_BASE_VIRT + REG_OFFSET,
+ 		.irq		= IRQ_IXP4XX_UART1,
+-		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
++		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
++					UPF_SHARE_IRQ,
+ 		.iotype		= UPIO_MEM,
+ 		.regshift	= 2,
+ 		.uartclk	= IXP4XX_UART_XTAL,
+@@ -85,7 +86,8 @@
+ 		.mapbase	= IXP4XX_UART2_BASE_PHYS,
+ 		.membase	= (char *)IXP4XX_UART2_BASE_VIRT + REG_OFFSET,
+ 		.irq		= IRQ_IXP4XX_UART2,
+-		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST,
++		.flags		= UPF_BOOT_AUTOCONF | UPF_SKIP_TEST |
++					UPF_SHARE_IRQ,
+ 		.iotype		= UPIO_MEM,
+ 		.regshift	= 2,
+ 		.uartclk	= IXP4XX_UART_XTAL,
+@@ -123,12 +125,22 @@
+ 	platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices));
+ }
+ 
++static void __init ixdp425_map_io(void)
++{
++	ixp4xx_map_io();
++
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_platform_port(0, &ixdp425_uart_data[0]);
++	kgdb8250_add_platform_port(1, &ixdp425_uart_data[1]);
++#endif
++}
++
+ #ifdef CONFIG_ARCH_IXDP425
+ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform")
+ 	/* Maintainer: MontaVista Software, Inc. */
+ 	.phys_io	= IXP4XX_PERIPHERAL_BASE_PHYS,
+ 	.io_pg_offst	= ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc,
+-	.map_io		= ixp4xx_map_io,
++	.map_io		= ixdp425_map_io,
+ 	.init_irq	= ixp4xx_init_irq,
+ 	.timer		= &ixp4xx_timer,
+ 	.boot_params	= 0x0100,
+@@ -141,7 +153,7 @@
+ 	/* Maintainer: MontaVista Software, Inc. */
+ 	.phys_io	= IXP4XX_PERIPHERAL_BASE_PHYS,
+ 	.io_pg_offst	= ((IXP4XX_PERIPHERAL_BASE_VIRT) >> 18) & 0xfffc,
+-	.map_io		= ixp4xx_map_io,
++	.map_io		= ixdp425_map_io,
+ 	.init_irq	= ixp4xx_init_irq,
+ 	.timer		= &ixp4xx_timer,
+ 	.boot_params	= 0x0100,
+diff -Nurb linux-2.6.22-570/arch/arm/mach-omap1/serial.c linux-2.6.22-try2/arch/arm/mach-omap1/serial.c
+--- linux-2.6.22-570/arch/arm/mach-omap1/serial.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-omap1/serial.c	2007-12-19 15:29:23.000000000 -0500
+@@ -15,6 +15,7 @@
+ #include <linux/delay.h>
+ #include <linux/serial.h>
+ #include <linux/tty.h>
++#include <linux/kgdb.h>
+ #include <linux/serial_8250.h>
+ #include <linux/serial_reg.h>
+ #include <linux/clk.h>
+@@ -199,6 +200,9 @@
+ 			break;
+ 		}
+ 		omap_serial_reset(&serial_platform_data[i]);
++#ifdef CONFIG_KGDB_8250
++		kgdb8250_add_platform_port(i, &serial_platform_data[i]);
++#endif
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pnx4008/core.c linux-2.6.22-try2/arch/arm/mach-pnx4008/core.c
+--- linux-2.6.22-570/arch/arm/mach-pnx4008/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-pnx4008/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -224,6 +224,10 @@
+ 	spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+ 	/* Switch on the UART clocks */
+ 	pnx4008_uart_init();
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_platform_port(0, &platform_serial_ports[0]);
++	kgdb8250_add_platform_port(1, &platform_serial_ports[1]);
++#endif
+ }
+ 
+ static struct map_desc pnx4008_io_desc[] __initdata = {
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/Makefile linux-2.6.22-try2/arch/arm/mach-pxa/Makefile
+--- linux-2.6.22-570/arch/arm/mach-pxa/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-pxa/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -31,6 +31,7 @@
+ # Misc features
+ obj-$(CONFIG_PM) += pm.o sleep.o
+ obj-$(CONFIG_PXA_SSP) += ssp.o
++obj-$(CONFIG_KGDB_PXA_SERIAL) += kgdb-serial.o
+ 
+ ifeq ($(CONFIG_PXA27x),y)
+ obj-$(CONFIG_PM) += standby.o
+diff -Nurb linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c linux-2.6.22-try2/arch/arm/mach-pxa/kgdb-serial.c
+--- linux-2.6.22-570/arch/arm/mach-pxa/kgdb-serial.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/mach-pxa/kgdb-serial.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,97 @@
++/*
++ * linux/arch/arm/mach-pxa/kgdb-serial.c
++ *
++ * Provides low level kgdb serial support hooks for PXA2xx boards
++ *
++ * Author:	Nicolas Pitre
++ * Copyright:	(C) 2002-2005 MontaVista Software Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/serial_reg.h>
++#include <linux/kgdb.h>
++#include <asm/processor.h>
++#include <asm/hardware.h>
++#include <asm/arch/pxa-regs.h>
++
++#if   defined(CONFIG_KGDB_PXA_FFUART)
++
++#define UART		FFUART
++#define CKEN_UART	CKEN6_FFUART
++#define GPIO_RX_MD	GPIO34_FFRXD_MD
++#define GPIO_TX_MD	GPIO39_FFTXD_MD
++
++#elif defined(CONFIG_KGDB_PXA_BTUART)
++
++#define UART		BTUART
++#define CKEN_UART	CKEN7_BTUART
++#define GPIO_RX_MD	GPIO42_BTRXD_MD
++#define GPIO_TX_MD	GPIO43_BTTXD_MD
++
++#elif defined(CONFIG_KGDB_PXA_STUART)
++
++#define UART		STUART
++#define CKEN_UART	CKEN5_STUART
++#define GPIO_RX_MD	GPIO46_STRXD_MD
++#define GPIO_TX_MD	GPIO47_STTXD_MD
++
++#endif
++
++#define UART_BAUDRATE	(CONFIG_KGDB_BAUDRATE)
++
++static volatile unsigned long *port = (unsigned long *)&UART;
++
++static int kgdb_serial_init(void)
++{
++	pxa_set_cken(CKEN_UART, 1);
++	pxa_gpio_mode(GPIO_RX_MD);
++	pxa_gpio_mode(GPIO_TX_MD);
++
++	port[UART_IER] = 0;
++	port[UART_LCR] = LCR_DLAB;
++	port[UART_DLL] = ((921600 / UART_BAUDRATE) & 0xff);
++	port[UART_DLM] = ((921600 / UART_BAUDRATE) >> 8);
++	port[UART_LCR] = LCR_WLS1 | LCR_WLS0;
++	port[UART_MCR] = 0;
++	port[UART_IER] = IER_UUE;
++	port[UART_FCR] = FCR_ITL_16;
++
++	return 0;
++}
++
++static void kgdb_serial_putchar(u8 c)
++{
++	if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE)
++		kgdb_serial_init();
++	while (!(port[UART_LSR] & LSR_TDRQ))
++		cpu_relax();
++	port[UART_TX] = c;
++}
++
++static void kgdb_serial_flush(void)
++{
++	if ((CKEN & CKEN_UART) && (port[UART_IER] & IER_UUE))
++		while (!(port[UART_LSR] & LSR_TEMT))
++			cpu_relax();
++}
++
++static int kgdb_serial_getchar(void)
++{
++	unsigned char c;
++	if (!(CKEN & CKEN_UART) || port[UART_IER] != IER_UUE)
++		kgdb_serial_init();
++	while (!(port[UART_LSR] & UART_LSR_DR))
++		cpu_relax();
++	c = port[UART_RX];
++	return c;
++}
++
++struct kgdb_io kgdb_io_ops = {
++	.init = kgdb_serial_init,
++	.write_char = kgdb_serial_putchar,
++	.flush = kgdb_serial_flush,
++	.read_char = kgdb_serial_getchar,
++};
+diff -Nurb linux-2.6.22-570/arch/arm/mach-versatile/core.c linux-2.6.22-try2/arch/arm/mach-versatile/core.c
+--- linux-2.6.22-570/arch/arm/mach-versatile/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mach-versatile/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -184,6 +184,14 @@
+ 		.type		= MT_DEVICE
+ 	},
+ #endif
++#ifdef CONFIG_KGDB_AMBA_PL011
++	{
++		.virtual	=  IO_ADDRESS(CONFIG_KGDB_AMBA_BASE),
++		.pfn		= __phys_to_pfn(CONFIG_KGDB_AMBA_BASE),
++		.length		= SZ_4K,
++		.type		= MT_DEVICE
++	},
++#endif
+ #ifdef CONFIG_PCI
+  	{
+ 		.virtual	=  IO_ADDRESS(VERSATILE_PCI_CORE_BASE),
+diff -Nurb linux-2.6.22-570/arch/arm/mm/extable.c linux-2.6.22-try2/arch/arm/mm/extable.c
+--- linux-2.6.22-570/arch/arm/mm/extable.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/mm/extable.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2,6 +2,7 @@
+  *  linux/arch/arm/mm/extable.c
+  */
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/uaccess.h>
+ 
+ int fixup_exception(struct pt_regs *regs)
+@@ -11,6 +12,12 @@
+ 	fixup = search_exception_tables(instruction_pointer(regs));
+ 	if (fixup)
+ 		regs->ARM_pc = fixup->fixup;
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault)
++		/* Restore our previous state. */
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Not reached. */
++#endif
+ 
+ 	return fixup != NULL;
+ }
+diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/Makefile linux-2.6.22-try2/arch/arm/plat-iop/Makefile
+--- linux-2.6.22-570/arch/arm/plat-iop/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/arm/plat-iop/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -12,6 +12,7 @@
+ obj-$(CONFIG_ARCH_IOP32X) += time.o
+ obj-$(CONFIG_ARCH_IOP32X) += io.o
+ obj-$(CONFIG_ARCH_IOP32X) += cp6.o
++obj-$(CONFIG_ARCH_IOP32X) += adma.o
+ 
+ # IOP33X
+ obj-$(CONFIG_ARCH_IOP33X) += gpio.o
+@@ -21,6 +22,7 @@
+ obj-$(CONFIG_ARCH_IOP33X) += time.o
+ obj-$(CONFIG_ARCH_IOP33X) += io.o
+ obj-$(CONFIG_ARCH_IOP33X) += cp6.o
++obj-$(CONFIG_ARCH_IOP33X) += adma.o
+ 
+ # IOP13XX
+ obj-$(CONFIG_ARCH_IOP13XX) += cp6.o
+diff -Nurb linux-2.6.22-570/arch/arm/plat-iop/adma.c linux-2.6.22-try2/arch/arm/plat-iop/adma.c
+--- linux-2.6.22-570/arch/arm/plat-iop/adma.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/arm/plat-iop/adma.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,209 @@
++/*
++ * platform device definitions for the iop3xx dma/xor engines
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/platform_device.h>
++#include <asm/hardware/iop3xx.h>
++#include <linux/dma-mapping.h>
++#include <asm/arch/adma.h>
++#include <asm/hardware/iop_adma.h>
++
++#ifdef CONFIG_ARCH_IOP32X
++#define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT
++#define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC
++#define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR
++
++#define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT
++#define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC
++#define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR
++
++#define IRQ_AA_EOT IRQ_IOP32X_AA_EOT
++#define IRQ_AA_EOC IRQ_IOP32X_AA_EOC
++#define IRQ_AA_ERR IRQ_IOP32X_AA_ERR
++#endif
++#ifdef CONFIG_ARCH_IOP33X
++#define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT
++#define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC
++#define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR
++
++#define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT
++#define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC
++#define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR
++
++#define IRQ_AA_EOT IRQ_IOP33X_AA_EOT
++#define IRQ_AA_EOC IRQ_IOP33X_AA_EOC
++#define IRQ_AA_ERR IRQ_IOP33X_AA_ERR
++#endif
++/* AAU and DMA Channels */
++static struct resource iop3xx_dma_0_resources[] = {
++	[0] = {
++		.start = IOP3XX_DMA_PHYS_BASE(0),
++		.end = IOP3XX_DMA_UPPER_PA(0),
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_DMA0_EOT,
++		.end = IRQ_DMA0_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_DMA0_EOC,
++		.end = IRQ_DMA0_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_DMA0_ERR,
++		.end = IRQ_DMA0_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++static struct resource iop3xx_dma_1_resources[] = {
++	[0] = {
++		.start = IOP3XX_DMA_PHYS_BASE(1),
++		.end = IOP3XX_DMA_UPPER_PA(1),
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_DMA1_EOT,
++		.end = IRQ_DMA1_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_DMA1_EOC,
++		.end = IRQ_DMA1_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_DMA1_ERR,
++		.end = IRQ_DMA1_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++
++static struct resource iop3xx_aau_resources[] = {
++	[0] = {
++		.start = IOP3XX_AAU_PHYS_BASE,
++		.end = IOP3XX_AAU_UPPER_PA,
++		.flags = IORESOURCE_MEM,
++	},
++	[1] = {
++		.start = IRQ_AA_EOT,
++		.end = IRQ_AA_EOT,
++		.flags = IORESOURCE_IRQ
++	},
++	[2] = {
++		.start = IRQ_AA_EOC,
++		.end = IRQ_AA_EOC,
++		.flags = IORESOURCE_IRQ
++	},
++	[3] = {
++		.start = IRQ_AA_ERR,
++		.end = IRQ_AA_ERR,
++		.flags = IORESOURCE_IRQ
++	}
++};
++
++static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK;
++
++static struct iop_adma_platform_data iop3xx_dma_0_data = {
++	.hw_id = DMA0_ID,
++	.pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop3xx_dma_1_data = {
++	.hw_id = DMA1_ID,
++	.pool_size = PAGE_SIZE,
++};
++
++static struct iop_adma_platform_data iop3xx_aau_data = {
++	.hw_id = AAU_ID,
++	.pool_size = 3 * PAGE_SIZE,
++};
++
++struct platform_device iop3xx_dma_0_channel = {
++	.name = "iop-adma",
++	.id = 0,
++	.num_resources = 4,
++	.resource = iop3xx_dma_0_resources,
++	.dev = {
++		.dma_mask = &iop3xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop3xx_dma_0_data,
++	},
++};
++
++struct platform_device iop3xx_dma_1_channel = {
++	.name = "iop-adma",
++	.id = 1,
++	.num_resources = 4,
++	.resource = iop3xx_dma_1_resources,
++	.dev = {
++		.dma_mask = &iop3xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop3xx_dma_1_data,
++	},
++};
++
++struct platform_device iop3xx_aau_channel = {
++	.name = "iop-adma",
++	.id = 2,
++	.num_resources = 4,
++	.resource = iop3xx_aau_resources,
++	.dev = {
++		.dma_mask = &iop3xx_adma_dmamask,
++		.coherent_dma_mask = DMA_64BIT_MASK,
++		.platform_data = (void *) &iop3xx_aau_data,
++	},
++};
++
++static int __init iop3xx_adma_cap_init(void)
++{
++	#ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
++	dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
++	#else
++	dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask);
++	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask);
++	#endif
++
++	#ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */
++	dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
++	#else
++	dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask);
++	dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask);
++	#endif
++
++	#ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */
++	dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
++	dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
++	#else
++	dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask);
++	dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask);
++	dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask);
++	dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask);
++	#endif
++
++	return 0;
++}
++
++arch_initcall(iop3xx_adma_cap_init);
+diff -Nurb linux-2.6.22-570/arch/i386/Kconfig linux-2.6.22-try2/arch/i386/Kconfig
+--- linux-2.6.22-570/arch/i386/Kconfig	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/Kconfig	2007-12-19 15:29:19.000000000 -0500
+@@ -1053,6 +1053,8 @@
+ 
+ source "arch/i386/kernel/cpu/cpufreq/Kconfig"
+ 
++source "drivers/cpuidle/Kconfig"
++
+ endmenu
+ 
+ menu "Bus options (PCI, PCMCIA, EISA, MCA, ISA)"
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/Makefile linux-2.6.22-try2/arch/i386/kernel/Makefile
+--- linux-2.6.22-570/arch/i386/kernel/Makefile	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -39,6 +39,7 @@
+ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+ obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
+ obj-$(CONFIG_K8_NB)		+= k8.o
++obj-$(CONFIG_STACK_UNWIND)	+= unwind.o
+ 
+ obj-$(CONFIG_VMI)		+= vmi.o vmiclock.o
+ obj-$(CONFIG_PARAVIRT)		+= paravirt.o
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/acpi/boot.c linux-2.6.22-try2/arch/i386/kernel/acpi/boot.c
+--- linux-2.6.22-570/arch/i386/kernel/acpi/boot.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/i386/kernel/acpi/boot.c	2007-12-19 15:29:19.000000000 -0500
+@@ -950,14 +950,6 @@
+ 	 },
+ 	{
+ 	 .callback = force_acpi_ht,
+-	 .ident = "DELL GX240",
+-	 .matches = {
+-		     DMI_MATCH(DMI_BOARD_VENDOR, "Dell Computer Corporation"),
+-		     DMI_MATCH(DMI_BOARD_NAME, "OptiPlex GX240"),
+-		     },
+-	 },
+-	{
+-	 .callback = force_acpi_ht,
+ 	 .ident = "HP VISUALIZE NT Workstation",
+ 	 .matches = {
+ 		     DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/apm.c linux-2.6.22-try2/arch/i386/kernel/apm.c
+--- linux-2.6.22-570/arch/i386/kernel/apm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/i386/kernel/apm.c	2007-12-19 15:29:24.000000000 -0500
+@@ -222,6 +222,7 @@
+ #include <linux/capability.h>
+ #include <linux/device.h>
+ #include <linux/kernel.h>
++#include <linux/freezer.h>
+ #include <linux/smp.h>
+ #include <linux/dmi.h>
+ #include <linux/suspend.h>
+@@ -2311,7 +2312,6 @@
+ 		remove_proc_entry("apm", NULL);
+ 		return err;
+ 	}
+-	kapmd_task->flags |= PF_NOFREEZE;
+ 	wake_up_process(kapmd_task);
+ 
+ 	if (num_online_cpus() > 1 && !smp ) {
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/io_apic.c linux-2.6.22-try2/arch/i386/kernel/io_apic.c
+--- linux-2.6.22-570/arch/i386/kernel/io_apic.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/io_apic.c	2007-12-19 15:29:24.000000000 -0500
+@@ -667,6 +667,7 @@
+ 		set_pending_irq(i, cpumask_of_cpu(0));
+ 	}
+ 
++	set_freezable();
+ 	for ( ; ; ) {
+ 		time_remaining = schedule_timeout_interruptible(time_remaining);
+ 		try_to_freeze();
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S linux-2.6.22-try2/arch/i386/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/i386/kernel/kgdb-jmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/kgdb-jmp.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,74 @@
++/*
++ * arch/i386/kernel/kgdb-jmp.S
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: George Anzinger <george@mvista.com>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 1996, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
++ * Copyright (C) 2005 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/linkage.h>
++
++#define PCOFF		0
++#define LINKAGE		4		/* just the return address */
++#define PTR_SIZE	4
++#define PARMS		LINKAGE		/* no space for saved regs */
++#define JMPBUF		PARMS
++#define VAL		JMPBUF+PTR_SIZE
++
++#define JB_BX		0
++#define JB_SI		1
++#define JB_DI		2
++#define JB_BP		3
++#define JB_SP		4
++#define JB_PC		5
++
++/* This must be called prior to kgdb_fault_longjmp and
++ * kgdb_fault_longjmp must not be called outside of the context of the
++ * last call to kgdb_fault_setjmp.
++ * kgdb_fault_setjmp(int *jmp_buf[6])
++ */
++ENTRY(kgdb_fault_setjmp)
++	movl JMPBUF(%esp), %eax
++
++	/* Save registers.  */
++	movl	%ebx, (JB_BX*4)(%eax)
++	movl	%esi, (JB_SI*4)(%eax)
++	movl	%edi, (JB_DI*4)(%eax)
++	/* Save SP as it will be after we return.  */
++	leal	JMPBUF(%esp), %ecx
++	movl	%ecx, (JB_SP*4)(%eax)
++	movl	PCOFF(%esp), %ecx	/* Save PC we are returning to now.  */
++	movl	%ecx, (JB_PC*4)(%eax)
++	movl	%ebp, (JB_BP*4)(%eax)	/* Save caller's frame pointer.  */
++
++	/* Restore state so we can now try the access. */
++	movl	JMPBUF(%esp), %ecx	/* User's jmp_buf in %ecx.  */
++	/* Save the return address now.  */
++	movl	(JB_PC*4)(%ecx), %edx
++	/* Restore registers.  */
++	movl	$0, %eax
++	movl	(JB_SP*4)(%ecx), %esp
++	jmp	*%edx		/* Jump to saved PC. */
++
++/* kgdb_fault_longjmp(int *jmp_buf[6]) */
++ENTRY(kgdb_fault_longjmp)
++	movl	JMPBUF(%esp), %ecx	/* User's jmp_buf in %ecx.  */
++	/* Save the return address now.  */
++	movl	(JB_PC*4)(%ecx), %edx
++	/* Restore registers.  */
++	movl	(JB_BX*4)(%ecx), %ebx
++	movl	(JB_SI*4)(%ecx), %esi
++	movl	(JB_DI*4)(%ecx), %edi
++	movl	(JB_BP*4)(%ecx), %ebp
++	movl	$1, %eax
++	movl	(JB_SP*4)(%ecx), %esp
++	jmp	*%edx		/* Jump to saved PC. */
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/kgdb.c linux-2.6.22-try2/arch/i386/kernel/kgdb.c
+--- linux-2.6.22-570/arch/i386/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/kgdb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,388 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2007 Wind River Systems, Inc.
++ */
++/*
++ *  Contributor:     Lake Stevens Instrument Division$
++ *  Written by:      Glenn Engel $
++ *  Updated by:	     Amit Kale<akale@veritas.com>
++ *  Updated by:	     Tom Rini <trini@kernel.crashing.org>
++ *  Updated by:	     Jason Wessel <jason.wessel@windriver.com>
++ *  Modified for 386 by Jim Kingdon, Cygnus Support.
++ *  Origianl kgdb, compatibility with 2.1.xx kernel by
++ *  David Grothe <dave@gcom.com>
++ *  Additional support from Tigran Aivazian <tigran@sco.com>
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/vm86.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>		/* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/apicdef.h>
++#include <asm/desc.h>
++#include <asm/kdebug.h>
++
++#include "mach_ipi.h"
++
++/* Put the error code here just in case the user cares.  */
++int gdb_i386errcode;
++/* Likewise, the vector number here (since GDB only gets the signal
++   number through the usual means, and that's not very specific).  */
++int gdb_i386vector = -1;
++
++extern atomic_t cpu_doing_single_step;
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	gdb_regs[_EAX] = regs->eax;
++	gdb_regs[_EBX] = regs->ebx;
++	gdb_regs[_ECX] = regs->ecx;
++	gdb_regs[_EDX] = regs->edx;
++	gdb_regs[_ESI] = regs->esi;
++	gdb_regs[_EDI] = regs->edi;
++	gdb_regs[_EBP] = regs->ebp;
++	gdb_regs[_DS] = regs->xds;
++	gdb_regs[_ES] = regs->xes;
++	gdb_regs[_PS] = regs->eflags;
++	gdb_regs[_CS] = regs->xcs;
++	gdb_regs[_PC] = regs->eip;
++	gdb_regs[_ESP] = (int)(&regs->esp);
++	gdb_regs[_SS] = __KERNEL_DS;
++	gdb_regs[_FS] = 0xFFFF;
++	gdb_regs[_GS] = 0xFFFF;
++}
++
++/*
++ * Extracts ebp, esp and eip values understandable by gdb from the values
++ * saved by switch_to.
++ * thread.esp points to ebp. flags and ebp are pushed in switch_to hence esp
++ * prior to entering switch_to is 8 greater then the value that is saved.
++ * If switch_to changes, change following code appropriately.
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	gdb_regs[_EAX] = 0;
++	gdb_regs[_EBX] = 0;
++	gdb_regs[_ECX] = 0;
++	gdb_regs[_EDX] = 0;
++	gdb_regs[_ESI] = 0;
++	gdb_regs[_EDI] = 0;
++	gdb_regs[_EBP] = *(unsigned long *)p->thread.esp;
++	gdb_regs[_DS] = __KERNEL_DS;
++	gdb_regs[_ES] = __KERNEL_DS;
++	gdb_regs[_PS] = 0;
++	gdb_regs[_CS] = __KERNEL_CS;
++	gdb_regs[_PC] = p->thread.eip;
++	gdb_regs[_ESP] = p->thread.esp;
++	gdb_regs[_SS] = __KERNEL_DS;
++	gdb_regs[_FS] = 0xFFFF;
++	gdb_regs[_GS] = 0xFFFF;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	regs->eax = gdb_regs[_EAX];
++	regs->ebx = gdb_regs[_EBX];
++	regs->ecx = gdb_regs[_ECX];
++	regs->edx = gdb_regs[_EDX];
++	regs->esi = gdb_regs[_ESI];
++	regs->edi = gdb_regs[_EDI];
++	regs->ebp = gdb_regs[_EBP];
++	regs->xds = gdb_regs[_DS];
++	regs->xes = gdb_regs[_ES];
++	regs->eflags = gdb_regs[_PS];
++	regs->xcs = gdb_regs[_CS];
++	regs->eip = gdb_regs[_PC];
++}
++
++static struct hw_breakpoint {
++	unsigned enabled;
++	unsigned type;
++	unsigned len;
++	unsigned addr;
++} breakinfo[4] = {
++	{ .enabled = 0 },
++	{ .enabled = 0 },
++	{ .enabled = 0 },
++	{ .enabled = 0 },
++};
++
++static void kgdb_correct_hw_break(void)
++{
++	int breakno;
++	int correctit;
++	int breakbit;
++	unsigned long dr7;
++
++	get_debugreg(dr7, 7);
++	correctit = 0;
++	for (breakno = 0; breakno < 3; breakno++) {
++		breakbit = 2 << (breakno << 1);
++ 		if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
++			correctit = 1;
++			dr7 |= breakbit;
++			dr7 &= ~(0xf0000 << (breakno << 2));
++			dr7 |= (((breakinfo[breakno].len << 2) |
++				 breakinfo[breakno].type) << 16) <<
++			    (breakno << 2);
++			switch (breakno) {
++			case 0:
++				set_debugreg(breakinfo[breakno].addr, 0);
++				break;
++
++			case 1:
++				set_debugreg(breakinfo[breakno].addr, 1);
++				break;
++
++			case 2:
++				set_debugreg(breakinfo[breakno].addr, 2);
++				break;
++
++			case 3:
++				set_debugreg(breakinfo[breakno].addr, 3);
++				break;
++			}
++		} else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
++			correctit = 1;
++			dr7 &= ~breakbit;
++			dr7 &= ~(0xf0000 << (breakno << 2));
++		}
++	}
++	if (correctit)
++		set_debugreg(dr7, 7);
++}
++
++static int kgdb_remove_hw_break(unsigned long addr, int len,
++						 enum kgdb_bptype bptype)
++{
++	int i, idx = -1;
++	for (i = 0; i < 4; i++) {
++		if (breakinfo[i].addr == addr && breakinfo[i].enabled) {
++			idx = i;
++			break;
++		}
++	}
++	if (idx == -1)
++		return -1;
++
++	breakinfo[idx].enabled = 0;
++	return 0;
++}
++
++static void kgdb_remove_all_hw_break(void)
++{
++	int i;
++
++	for (i = 0; i < 4; i++) {
++		memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint));
++	}
++}
++
++static int kgdb_set_hw_break(unsigned long addr, int len,
++					  enum kgdb_bptype bptype)
++{
++	int i, idx = -1;
++	for (i = 0; i < 4; i++) {
++		if (!breakinfo[i].enabled) {
++			idx = i;
++			break;
++		}
++	}
++	if (idx == -1)
++		return -1;
++	if (bptype == bp_hardware_breakpoint) {
++		breakinfo[idx].type = 0;
++		breakinfo[idx].len = 0;
++	} else if (bptype == bp_write_watchpoint) {
++		breakinfo[idx].type = 1;
++		if (len == 1 || len == 2 || len == 4)
++			breakinfo[idx].len = len - 1;
++		else
++			return -1;
++	} else if (bptype == bp_access_watchpoint) {
++		breakinfo[idx].type = 3;
++		if (len == 1 || len == 2 || len == 4)
++			breakinfo[idx].len = len - 1;
++		else
++			return -1;
++	} else
++		return -1;
++	breakinfo[idx].enabled = 1;
++	breakinfo[idx].addr = addr;
++	return 0;
++}
++
++void kgdb_disable_hw_debug(struct pt_regs *regs)
++{
++	/* Disable hardware debugging while we are in kgdb */
++	set_debugreg(0, 7);
++}
++
++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code)
++{
++	/* Master processor is completely in the debugger */
++	gdb_i386vector = e_vector;
++	gdb_i386errcode = err_code;
++}
++
++#ifdef CONFIG_SMP
++void kgdb_roundup_cpus(unsigned long flags)
++{
++	send_IPI_allbutself(APIC_DM_NMI);
++}
++#endif
++
++int kgdb_arch_handle_exception(int e_vector, int signo,
++			       int err_code, char *remcom_in_buffer,
++			       char *remcom_out_buffer,
++			       struct pt_regs *linux_regs)
++{
++	long addr;
++	char *ptr;
++	int newPC, dr6;
++
++	switch (remcom_in_buffer[0]) {
++	case 'c':
++	case 's':
++		/* try to read optional parameter, pc unchanged if no parm */
++		ptr = &remcom_in_buffer[1];
++		if (kgdb_hex2long(&ptr, &addr))
++			linux_regs->eip = addr;
++		newPC = linux_regs->eip;
++
++		/* clear the trace bit */
++		linux_regs->eflags &= ~TF_MASK;
++		atomic_set(&cpu_doing_single_step, -1);
++
++		/* set the trace bit if we're stepping */
++		if (remcom_in_buffer[0] == 's') {
++			linux_regs->eflags |= TF_MASK;
++			debugger_step = 1;
++			atomic_set(&cpu_doing_single_step,raw_smp_processor_id());
++		}
++
++		get_debugreg(dr6, 6);
++		if (!(dr6 & 0x4000)) {
++			long breakno;
++			for (breakno = 0; breakno < 4; ++breakno) {
++				if (dr6 & (1 << breakno) &&
++				    breakinfo[breakno].type == 0) {
++					/* Set restore flag */
++					linux_regs->eflags |= X86_EFLAGS_RF;
++					break;
++				}
++			}
++		}
++		set_debugreg(0, 6);
++		kgdb_correct_hw_break();
++
++		return (0);
++	}			/* switch */
++	/* this means that we do not want to exit from the handler */
++	return -1;
++}
++
++/* Register KGDB with the i386die_chain so that we hook into all of the right
++ * spots. */
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++		       void *ptr)
++{
++	struct die_args *args = ptr;
++	struct pt_regs *regs = args->regs;
++
++	/* Bad memory access? */
++	if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active)
++		&& kgdb_may_fault) {
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		return NOTIFY_STOP;
++	} else if (cmd == DIE_PAGE_FAULT)
++		/* A normal page fault, ignore. */
++		return NOTIFY_DONE;
++	else if ((cmd == DIE_NMI || cmd == DIE_NMI_IPI ||
++			  cmd == DIE_NMIWATCHDOG) && atomic_read(&debugger_active)) {
++		/* CPU roundup */
++		kgdb_nmihook(raw_smp_processor_id(), regs);
++		return NOTIFY_STOP;
++	} else if (cmd == DIE_DEBUG
++			   && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id()
++			   && user_mode(regs)) {
++		/* single step exception from kernel space to user space so
++		 * eat the exception and continue the process
++		 */
++		printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n");
++		kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs);
++		return NOTIFY_STOP;
++	} else if (cmd == DIE_NMI_IPI || cmd == DIE_NMI || user_mode(regs) ||
++			   (cmd == DIE_DEBUG && atomic_read(&debugger_active)))
++		/* Normal watchdog event or userspace debugging, or spurious
++		 * debug exception, ignore. */
++		return NOTIFY_DONE;
++
++	kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++
++	return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++	.notifier_call = kgdb_notify,
++};
++
++int kgdb_arch_init(void)
++{
++	register_die_notifier(&kgdb_notifier);
++	return 0;
++}
++
++/*
++ * Skip an int3 exception when it occurs after a breakpoint has been
++ * removed. Backtrack eip by 1 since the int3 would have caused it to
++ * increment by 1.
++ */
++
++int kgdb_skipexception(int exception, struct pt_regs *regs)
++{
++	if (exception == 3 && kgdb_isremovedbreak(regs->eip - 1)) {
++		regs->eip -= 1;
++		return 1;
++	}
++	return 0;
++}
++
++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++	if (exception == 3) {
++		return instruction_pointer(regs) - 1;
++	}
++	return instruction_pointer(regs);
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++	.gdb_bpt_instr = {0xcc},
++	.flags = KGDB_HW_BREAKPOINT,
++	.set_hw_breakpoint = kgdb_set_hw_break,
++	.remove_hw_breakpoint = kgdb_remove_hw_break,
++	.remove_all_hw_break = kgdb_remove_all_hw_break,
++	.correct_hw_break = kgdb_correct_hw_break,
++};
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/process.c linux-2.6.22-try2/arch/i386/kernel/process.c
+--- linux-2.6.22-570/arch/i386/kernel/process.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/process.c	2007-12-19 15:29:19.000000000 -0500
+@@ -179,13 +179,13 @@
+ 
+ 	/* endless idle loop with no priority at all */
+ 	while (1) {
+-		tick_nohz_stop_sched_tick();
+ 		while (!need_resched()) {
+ 			void (*idle)(void);
+ 
+ 			if (__get_cpu_var(cpu_idle_state))
+ 				__get_cpu_var(cpu_idle_state) = 0;
+ 
++			tick_nohz_stop_sched_tick();
+ 			check_pgt_cache();
+ 			rmb();
+ 			idle = pm_idle;
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/setup.c linux-2.6.22-try2/arch/i386/kernel/setup.c
+--- linux-2.6.22-570/arch/i386/kernel/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/i386/kernel/setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -124,6 +124,7 @@
+ #endif
+ 
+ extern void early_cpu_init(void);
++extern void early_trap_init(void);
+ extern int root_mountflags;
+ 
+ unsigned long saved_videomode;
+@@ -514,6 +515,7 @@
+ 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+ 	pre_setup_arch_hook();
+ 	early_cpu_init();
++	early_trap_init();
+ 
+ 	/*
+ 	 * FIXME: This isn't an official loader_type right
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/signal.c linux-2.6.22-try2/arch/i386/kernel/signal.c
+--- linux-2.6.22-570/arch/i386/kernel/signal.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/i386/kernel/signal.c	2007-12-19 15:29:23.000000000 -0500
+@@ -199,6 +199,13 @@
+ 	return eax;
+ 
+ badframe:
++	if (show_unhandled_signals && printk_ratelimit())
++		printk("%s%s[%d] bad frame in sigreturn frame:%p eip:%lx"
++		       " esp:%lx oeax:%lx\n",
++		    current->pid > 1 ? KERN_INFO : KERN_EMERG,
++		    current->comm, current->pid, frame, regs->eip,
++		    regs->esp, regs->orig_eax);
++
+ 	force_sig(SIGSEGV, current);
+ 	return 0;
+ }	
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/syscall_table.S linux-2.6.22-try2/arch/i386/kernel/syscall_table.S
+--- linux-2.6.22-570/arch/i386/kernel/syscall_table.S	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/syscall_table.S	2007-12-19 15:29:24.000000000 -0500
+@@ -323,3 +323,6 @@
+ 	.long sys_signalfd
+ 	.long sys_timerfd
+ 	.long sys_eventfd
++	.long sys_revokeat
++	.long sys_frevoke		/* 325 */
++	.long sys_fallocate
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/traps.c linux-2.6.22-try2/arch/i386/kernel/traps.c
+--- linux-2.6.22-570/arch/i386/kernel/traps.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/traps.c	2007-12-19 15:29:23.000000000 -0500
+@@ -97,6 +97,11 @@
+ 
+ int kstack_depth_to_print = 24;
+ static unsigned int code_bytes = 64;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+ 
+ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+ {
+@@ -136,6 +141,34 @@
+ 	return ebp;
+ }
+ 
++struct ops_and_data {
++	struct stacktrace_ops *ops;
++	void *data;
++};
++
++static asmlinkage int
++dump_trace_unwind(struct unwind_frame_info *info, void *data)
++{
++	struct ops_and_data *oad = (struct ops_and_data *)data;
++	int n = 0;
++	unsigned long sp = UNW_SP(info);
++
++	if (arch_unw_user_mode(info))
++		return -1;
++	while (unwind(info) == 0 && UNW_PC(info)) {
++		n++;
++		oad->ops->address(oad->data, UNW_PC(info));
++		if (arch_unw_user_mode(info))
++			break;
++		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++		    && sp > UNW_SP(info))
++			break;
++		sp = UNW_SP(info);
++		touch_nmi_watchdog();
++	}
++	return n;
++}
++
+ #define MSG(msg) ops->warning(data, msg)
+ 
+ void dump_trace(struct task_struct *task, struct pt_regs *regs,
+@@ -147,6 +180,41 @@
+ 	if (!task)
+ 		task = current;
+ 
++	if (call_trace >= 0) {
++		int unw_ret = 0;
++		struct unwind_frame_info info;
++		struct ops_and_data oad = { .ops = ops, .data = data };
++
++		if (regs) {
++			if (unwind_init_frame_info(&info, task, regs) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		} else if (task == current)
++			unw_ret = unwind_init_running(&info, dump_trace_unwind,
++						      &oad);
++		else {
++			if (unwind_init_blocked(&info, task) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		}
++		if (unw_ret > 0) {
++			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++				ops->warning_symbol(data,
++					     "DWARF2 unwinder stuck at %s",
++					     UNW_PC(&info));
++				if (UNW_SP(&info) >= PAGE_OFFSET) {
++					MSG("Leftover inexact backtrace:");
++					stack = (void *)UNW_SP(&info);
++					if (!stack)
++						return;
++					ebp = UNW_FP(&info);
++				} else
++					MSG("Full inexact backtrace again:");
++			} else if (call_trace >= 1)
++				return;
++			else
++				MSG("Full inexact backtrace again:");
++		} else
++			MSG("Inexact backtrace:");
++	}
+ 	if (!stack) {
+ 		unsigned long dummy;
+ 		stack = &dummy;
+@@ -614,6 +682,13 @@
+ 
+ 	current->thread.error_code = error_code;
+ 	current->thread.trap_no = 13;
++	if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
++	    printk_ratelimit())
++		printk(KERN_INFO
++		    "%s[%d] general protection eip:%lx esp:%lx error:%lx\n",
++		    current->comm, current->pid,
++		    regs->eip, regs->esp, error_code);
++
+ 	force_sig(SIGSEGV, current);
+ 	return;
+ 
+@@ -854,6 +929,7 @@
+ 	 */
+ clear_dr7:
+ 	set_debugreg(0, 7);
++	notify_die(DIE_DEBUG, "debug2", regs, condition, error_code, SIGTRAP);
+ 	return;
+ 
+ debug_vm86:
+@@ -1118,6 +1194,12 @@
+ 	_set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
+ }
+ 
++/* Some traps need to be set early. */
++void __init early_trap_init(void) {
++	set_intr_gate(1,&debug);
++	set_system_intr_gate(3, &int3); /* int3 can be called from all */
++	set_intr_gate(14,&page_fault);
++}
+ 
+ void __init trap_init(void)
+ {
+@@ -1134,10 +1216,8 @@
+ #endif
+ 
+ 	set_trap_gate(0,&divide_error);
+-	set_intr_gate(1,&debug);
+ 	set_intr_gate(2,&nmi);
+-	set_system_intr_gate(3, &int3); /* int3/4 can be called from all */
+-	set_system_gate(4,&overflow);
++	set_system_gate(4,&overflow); /* int4/5 can be called from all */
+ 	set_trap_gate(5,&bounds);
+ 	set_trap_gate(6,&invalid_op);
+ 	set_trap_gate(7,&device_not_available);
+@@ -1147,7 +1227,6 @@
+ 	set_trap_gate(11,&segment_not_present);
+ 	set_trap_gate(12,&stack_segment);
+ 	set_trap_gate(13,&general_protection);
+-	set_intr_gate(14,&page_fault);
+ 	set_trap_gate(15,&spurious_interrupt_bug);
+ 	set_trap_gate(16,&coprocessor_error);
+ 	set_trap_gate(17,&alignment_check);
+@@ -1204,3 +1283,19 @@
+ 	return 1;
+ }
+ __setup("code_bytes=", code_bytes_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++	if (strcmp(s, "old") == 0)
++		call_trace = -1;
++	else if (strcmp(s, "both") == 0)
++		call_trace = 0;
++	else if (strcmp(s, "newfallback") == 0)
++		call_trace = 1;
++	else if (strcmp(s, "new") == 2)
++		call_trace = 2;
++	return 1;
++}
++__setup("call_trace=", call_trace_setup);
++#endif
+diff -Nurb linux-2.6.22-570/arch/i386/kernel/unwind.S linux-2.6.22-try2/arch/i386/kernel/unwind.S
+--- linux-2.6.22-570/arch/i386/kernel/unwind.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/kernel/unwind.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,36 @@
++/* Assembler support code for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/ptrace-abi.h>
++#include <asm/segment.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++	CFI_STARTPROC
++	movl	4(%esp), %edx
++	movl	(%esp), %ecx
++	leal	4(%esp), %eax
++	movl	%ebx, PT_EBX(%edx)
++	xorl	%ebx, %ebx
++	movl	%ebx, PT_ECX(%edx)
++	movl	%ebx, PT_EDX(%edx)
++	movl	%esi, PT_ESI(%edx)
++	movl	%edi, PT_EDI(%edx)
++	movl	%ebp, PT_EBP(%edx)
++	movl	%ebx, PT_EAX(%edx)
++	movl	$__USER_DS, PT_DS(%edx)
++	movl	$__USER_DS, PT_ES(%edx)
++	movl	$0, PT_FS(%edx)
++	movl	%ebx, PT_ORIG_EAX(%edx)
++	movl	%ecx, PT_EIP(%edx)
++	movl	12(%esp), %ecx
++	movl	$__KERNEL_CS, PT_CS(%edx)
++	movl	%ebx, PT_EFLAGS(%edx)
++	movl	%eax, PT_OLDESP(%edx)
++	movl	8(%esp), %eax
++	movl	%ecx, 8(%esp)
++	movl	PT_EBX(%edx), %ebx
++	movl	$__KERNEL_DS, PT_OLDSS(%edx)
++	jmpl	*%eax
++	CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
+diff -Nurb linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c linux-2.6.22-try2/arch/i386/mach-voyager/voyager_thread.c
+--- linux-2.6.22-570/arch/i386/mach-voyager/voyager_thread.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/i386/mach-voyager/voyager_thread.c	2007-12-19 15:29:23.000000000 -0500
+@@ -52,7 +52,7 @@
+ 		NULL,
+ 	};
+ 
+-	if ((ret = call_usermodehelper(argv[0], argv, envp, 1)) != 0) {
++	if ((ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+ 		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n",
+ 		       string, ret);
+ 	}
+diff -Nurb linux-2.6.22-570/arch/i386/mm/fault.c linux-2.6.22-try2/arch/i386/mm/fault.c
+--- linux-2.6.22-570/arch/i386/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/i386/mm/fault.c	2007-12-19 15:29:23.000000000 -0500
+@@ -284,6 +284,8 @@
+ 	return 0;
+ }
+ 
++int show_unhandled_signals = 1;
++
+ /*
+  * This routine handles page faults.  It determines the address,
+  * and the problem, and then passes it off to one of the appropriate
+@@ -471,6 +473,14 @@
+ 		if (is_prefetch(regs, address, error_code))
+ 			return;
+ 
++		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
++		    printk_ratelimit()) {
++			printk("%s%s[%d]: segfault at %08lx eip %08lx "
++			    "esp %08lx error %lx\n",
++			    tsk->pid > 1 ? KERN_INFO : KERN_EMERG,
++			    tsk->comm, tsk->pid, address, regs->eip,
++			    regs->esp, error_code);
++		}
+ 		tsk->thread.cr2 = address;
+ 		/* Kernel addresses are always protection faults */
+ 		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+@@ -508,6 +518,10 @@
+  	if (is_prefetch(regs, address, error_code))
+  		return;
+ 
++	if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++				error_code, 14, SIGSEGV) == NOTIFY_STOP)
++		return;
++
+ /*
+  * Oops. The kernel tried to access some bad page. We'll have to
+  * terminate things with extreme prejudice.
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/Makefile linux-2.6.22-try2/arch/ia64/kernel/Makefile
+--- linux-2.6.22-570/arch/ia64/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ia64/kernel/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -35,6 +35,7 @@
+ obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
+ mca_recovery-y			+= mca_drv.o mca_drv_asm.o
+ obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb-jmp.o
+ 
+ obj-$(CONFIG_IA64_ESI)		+= esi.o
+ ifneq ($(CONFIG_IA64_ESI),)
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S linux-2.6.22-try2/arch/ia64/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/ia64/kernel/kgdb-jmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/ia64/kernel/kgdb-jmp.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,238 @@
++/* setjmp() and longjmp() assembler support for kdb on ia64.
++
++   This code was copied from glibc CVS as of 2001-06-27 and modified where
++   necessary to fit the kernel.
++   Keith Owens <kaos@melbourne.sgi.com> 2001-06-27
++ */
++
++/* Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
++   Contributed by David Mosberger-Tang <davidm@hpl.hp.com>.
++
++   The GNU C Library is free software; you can redistribute it and/or
++   modify it under the terms of the GNU Library General Public License as
++   published by the Free Software Foundation; either version 2 of the
++   License, or (at your option) any later version.
++
++   The GNU C Library is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++   Library General Public License for more details.
++
++   You should have received a copy of the GNU Library General Public
++   License along with the GNU C Library; see the file COPYING.LIB.  If
++   not, write to the Free Software Foundation, Inc.,
++   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++*/
++
++#include <asm/asmmacro.h>
++GLOBAL_ENTRY(kgdb_fault_setjmp)
++	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
++	alloc loc1=ar.pfs,2,2,2,0
++	mov r16=ar.unat
++	;;
++	mov r17=ar.fpsr
++	mov r2=in0
++	add r3=8,in0
++	;;
++.mem.offset 0,0;
++	st8.spill.nta [r2]=sp,16	// r12 (sp)
++.mem.offset 8,0;
++	st8.spill.nta [r3]=gp,16	// r1 (gp)
++	;;
++	st8.nta [r2]=r16,16		// save caller's unat
++	st8.nta [r3]=r17,16		// save fpsr
++	add r8=0xa0,in0
++	;;
++.mem.offset 160,0;
++	st8.spill.nta [r2]=r4,16	// r4
++.mem.offset 168,0;
++	st8.spill.nta [r3]=r5,16	// r5
++	add r9=0xb0,in0
++	;;
++	stf.spill.nta [r8]=f2,32
++	stf.spill.nta [r9]=f3,32
++	mov loc0=rp
++	.body
++	;;
++	stf.spill.nta [r8]=f4,32
++	stf.spill.nta [r9]=f5,32
++	mov r17=b1
++	;;
++	stf.spill.nta [r8]=f16,32
++	stf.spill.nta [r9]=f17,32
++	mov r18=b2
++	;;
++	stf.spill.nta [r8]=f18,32
++	stf.spill.nta [r9]=f19,32
++	mov r19=b3
++	;;
++	stf.spill.nta [r8]=f20,32
++	stf.spill.nta [r9]=f21,32
++	mov r20=b4
++	;;
++	stf.spill.nta [r8]=f22,32
++	stf.spill.nta [r9]=f23,32
++	mov r21=b5
++	;;
++	stf.spill.nta [r8]=f24,32
++	stf.spill.nta [r9]=f25,32
++	mov r22=ar.lc
++	;;
++	stf.spill.nta [r8]=f26,32
++	stf.spill.nta [r9]=f27,32
++	mov r24=pr
++	;;
++	stf.spill.nta [r8]=f28,32
++	stf.spill.nta [r9]=f29,32
++	;;
++	stf.spill.nta [r8]=f30
++	stf.spill.nta [r9]=f31
++
++.mem.offset 0,0;
++	st8.spill.nta [r2]=r6,16	// r6
++.mem.offset 8,0;
++	st8.spill.nta [r3]=r7,16	// r7
++	;;
++	mov r23=ar.bsp
++	mov r25=ar.unat
++	st8.nta [r2]=loc0,16		// b0
++	st8.nta [r3]=r17,16		// b1
++	;;
++	st8.nta [r2]=r18,16		// b2
++	st8.nta [r3]=r19,16		// b3
++	;;
++	st8.nta [r2]=r20,16		// b4
++	st8.nta [r3]=r21,16		// b5
++	;;
++	st8.nta [r2]=loc1,16		// ar.pfs
++	st8.nta [r3]=r22,16		// ar.lc
++	;;
++	st8.nta [r2]=r24,16		// pr
++	st8.nta [r3]=r23,16		// ar.bsp
++	;;
++	st8.nta [r2]=r25		// ar.unat
++	st8.nta [r3]=in0		// &__jmp_buf
++	mov r8=0
++	mov rp=loc0
++	mov ar.pfs=loc1
++	br.ret.sptk.few rp
++END(kdba_setjmp)
++#define	pPos	p6	/* is rotate count positive? */
++#define	pNeg	p7	/* is rotate count negative? */
++GLOBAL_ENTRY(kgdb_fault_longjmp)
++	alloc r8=ar.pfs,2,1,0,0
++	mov r27=ar.rsc
++	add r2=0x98,in0		// r2 <- &jmpbuf.orig_jmp_buf_addr
++	;;
++	ld8 r8=[r2],-16		// r8 <- orig_jmp_buf_addr
++	mov r10=ar.bsp
++	and r11=~0x3,r27	// clear ar.rsc.mode
++	;;
++	flushrs			// flush dirty regs to backing store (must be first in insn grp)
++	ld8 r23=[r2],8		// r23 <- jmpbuf.ar_bsp
++	sub r8=r8,in0		// r8 <- &orig_jmpbuf - &jmpbuf
++	;;
++	ld8 r25=[r2]		// r25 <- jmpbuf.ar_unat
++	extr.u r8=r8,3,6	// r8 <- (&orig_jmpbuf - &jmpbuf)/8 & 0x3f
++	;;
++	cmp.lt pNeg,pPos=r8,r0
++	mov r2=in0
++	;;
++(pPos)	mov r16=r8
++(pNeg)	add r16=64,r8
++(pPos)	sub r17=64,r8
++(pNeg)	sub r17=r0,r8
++	;;
++	mov ar.rsc=r11		// put RSE in enforced lazy mode
++	shr.u r8=r25,r16
++	add r3=8,in0		// r3 <- &jmpbuf.r1
++	shl r9=r25,r17
++	;;
++	or r25=r8,r9
++	;;
++	mov r26=ar.rnat
++	mov ar.unat=r25		// setup ar.unat (NaT bits for r1, r4-r7, and r12)
++	;;
++	ld8.fill.nta sp=[r2],16	// r12 (sp)
++	ld8.fill.nta gp=[r3],16		// r1 (gp)
++	dep r11=-1,r23,3,6	// r11 <- ia64_rse_rnat_addr(jmpbuf.ar_bsp)
++	;;
++	ld8.nta r16=[r2],16		// caller's unat
++	ld8.nta r17=[r3],16		// fpsr
++	;;
++	ld8.fill.nta r4=[r2],16	// r4
++	ld8.fill.nta r5=[r3],16		// r5 (gp)
++	cmp.geu p8,p0=r10,r11	// p8 <- (ar.bsp >= jmpbuf.ar_bsp)
++	;;
++	ld8.fill.nta r6=[r2],16	// r6
++	ld8.fill.nta r7=[r3],16		// r7
++	;;
++	mov ar.unat=r16			// restore caller's unat
++	mov ar.fpsr=r17			// restore fpsr
++	;;
++	ld8.nta r16=[r2],16		// b0
++	ld8.nta r17=[r3],16		// b1
++	;;
++(p8)	ld8 r26=[r11]		// r26 <- *ia64_rse_rnat_addr(jmpbuf.ar_bsp)
++	mov ar.bspstore=r23	// restore ar.bspstore
++	;;
++	ld8.nta r18=[r2],16		// b2
++	ld8.nta r19=[r3],16		// b3
++	;;
++	ld8.nta r20=[r2],16		// b4
++	ld8.nta r21=[r3],16		// b5
++	;;
++	ld8.nta r11=[r2],16		// ar.pfs
++	ld8.nta r22=[r3],56		// ar.lc
++	;;
++	ld8.nta r24=[r2],32		// pr
++	mov b0=r16
++	;;
++	ldf.fill.nta f2=[r2],32
++	ldf.fill.nta f3=[r3],32
++	mov b1=r17
++	;;
++	ldf.fill.nta f4=[r2],32
++	ldf.fill.nta f5=[r3],32
++	mov b2=r18
++	;;
++	ldf.fill.nta f16=[r2],32
++	ldf.fill.nta f17=[r3],32
++	mov b3=r19
++	;;
++	ldf.fill.nta f18=[r2],32
++	ldf.fill.nta f19=[r3],32
++	mov b4=r20
++	;;
++	ldf.fill.nta f20=[r2],32
++	ldf.fill.nta f21=[r3],32
++	mov b5=r21
++	;;
++	ldf.fill.nta f22=[r2],32
++	ldf.fill.nta f23=[r3],32
++	mov ar.lc=r22
++	;;
++	ldf.fill.nta f24=[r2],32
++	ldf.fill.nta f25=[r3],32
++	cmp.eq p8,p9=0,in1
++	;;
++	ldf.fill.nta f26=[r2],32
++	ldf.fill.nta f27=[r3],32
++	mov ar.pfs=r11
++	;;
++	ldf.fill.nta f28=[r2],32
++	ldf.fill.nta f29=[r3],32
++	;;
++	ldf.fill.nta f30=[r2]
++	ldf.fill.nta f31=[r3]
++(p8)	mov r8=1
++
++	mov ar.rnat=r26		// restore ar.rnat
++	;;
++	mov ar.rsc=r27		// restore ar.rsc
++(p9)	mov r8=in1
++
++	invala			// virt. -> phys. regnum mapping may change
++	mov pr=r24,-1
++	br.ret.sptk.few rp
++END(kgdb_fault_longjmp)
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/kgdb.c linux-2.6.22-try2/arch/ia64/kernel/kgdb.c
+--- linux-2.6.22-570/arch/ia64/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/ia64/kernel/kgdb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,944 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * (c) Copyright 2005 Hewlett-Packard Development Company, L.P.
++ *     Bob Picco <bob.picco@hp.com>
++ */
++/*
++ *  Contributor:     Lake Stevens Instrument Division$
++ *  Written by:      Glenn Engel $
++ *  Updated by:	     Amit Kale<akale@veritas.com>
++ *  Modified for 386 by Jim Kingdon, Cygnus Support.
++ *  Origianl kgdb, compatibility with 2.1.xx kernel by David Grothe <dave@gcom.com>
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>		/* for linux pt_regs struct */
++#include <asm/unwind.h>
++#include <asm/rse.h>
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <asm/cacheflush.h>
++#include <asm/kdebug.h>
++
++#define NUM_REGS 590
++#define REGISTER_BYTES (NUM_REGS*8+128*8)
++#define REGISTER_BYTE(N) (((N) * 8)                                    \
++	+ ((N) <= IA64_FR0_REGNUM ?                                     \
++	0 : 8 * (((N) > IA64_FR127_REGNUM) ? 128 : (N) - IA64_FR0_REGNUM)))
++#define REGISTER_SIZE(N)                                               \
++	(((N) >= IA64_FR0_REGNUM && (N) <= IA64_FR127_REGNUM) ? 16 : 8)
++#define IA64_GR0_REGNUM         0
++#define IA64_FR0_REGNUM         128
++#define IA64_FR127_REGNUM       (IA64_FR0_REGNUM+127)
++#define IA64_PR0_REGNUM         256
++#define IA64_BR0_REGNUM         320
++#define IA64_VFP_REGNUM         328
++#define IA64_PR_REGNUM          330
++#define IA64_IP_REGNUM          331
++#define IA64_PSR_REGNUM         332
++#define IA64_CFM_REGNUM         333
++#define IA64_AR0_REGNUM         334
++#define IA64_NAT0_REGNUM        462
++#define IA64_NAT31_REGNUM       (IA64_NAT0_REGNUM+31)
++#define IA64_NAT32_REGNUM       (IA64_NAT0_REGNUM+32)
++#define IA64_RSC_REGNUM		(IA64_AR0_REGNUM+16)
++#define IA64_BSP_REGNUM		(IA64_AR0_REGNUM+17)
++#define IA64_BSPSTORE_REGNUM	(IA64_AR0_REGNUM+18)
++#define IA64_RNAT_REGNUM	(IA64_AR0_REGNUM+19)
++#define IA64_FCR_REGNUM		(IA64_AR0_REGNUM+21)
++#define IA64_EFLAG_REGNUM	(IA64_AR0_REGNUM+24)
++#define IA64_CSD_REGNUM		(IA64_AR0_REGNUM+25)
++#define IA64_SSD_REGNUM		(IA64_AR0_REGNUM+26)
++#define IA64_CFLG_REGNUM	(IA64_AR0_REGNUM+27)
++#define IA64_FSR_REGNUM		(IA64_AR0_REGNUM+28)
++#define IA64_FIR_REGNUM		(IA64_AR0_REGNUM+29)
++#define IA64_FDR_REGNUM		(IA64_AR0_REGNUM+30)
++#define IA64_CCV_REGNUM		(IA64_AR0_REGNUM+32)
++#define IA64_UNAT_REGNUM	(IA64_AR0_REGNUM+36)
++#define IA64_FPSR_REGNUM	(IA64_AR0_REGNUM+40)
++#define IA64_ITC_REGNUM		(IA64_AR0_REGNUM+44)
++#define IA64_PFS_REGNUM		(IA64_AR0_REGNUM+64)
++#define IA64_LC_REGNUM		(IA64_AR0_REGNUM+65)
++#define IA64_EC_REGNUM		(IA64_AR0_REGNUM+66)
++
++#define	REGISTER_INDEX(N)	(REGISTER_BYTE(N) / sizeof (unsigned long))
++#define BREAK_INSTR_ALIGN	(~0xfULL)
++
++#define	ptoff(V)	((unsigned int) &((struct pt_regs *)0x0)->V)
++struct reg_to_ptreg_index {
++	unsigned int reg;
++	unsigned int ptregoff;
++};
++
++static struct reg_to_ptreg_index gr_reg_to_ptreg_index[] = {
++	{IA64_GR0_REGNUM + 1, ptoff(r1)},
++	{IA64_GR0_REGNUM + 2, ptoff(r2)},
++	{IA64_GR0_REGNUM + 3, ptoff(r3)},
++	{IA64_GR0_REGNUM + 8, ptoff(r8)},
++	{IA64_GR0_REGNUM + 9, ptoff(r9)},
++	{IA64_GR0_REGNUM + 10, ptoff(r10)},
++	{IA64_GR0_REGNUM + 11, ptoff(r11)},
++	{IA64_GR0_REGNUM + 12, ptoff(r12)},
++	{IA64_GR0_REGNUM + 13, ptoff(r13)},
++	{IA64_GR0_REGNUM + 14, ptoff(r14)},
++	{IA64_GR0_REGNUM + 15, ptoff(r15)},
++	{IA64_GR0_REGNUM + 16, ptoff(r16)},
++	{IA64_GR0_REGNUM + 17, ptoff(r17)},
++	{IA64_GR0_REGNUM + 18, ptoff(r18)},
++	{IA64_GR0_REGNUM + 19, ptoff(r19)},
++	{IA64_GR0_REGNUM + 20, ptoff(r20)},
++	{IA64_GR0_REGNUM + 21, ptoff(r21)},
++	{IA64_GR0_REGNUM + 22, ptoff(r22)},
++	{IA64_GR0_REGNUM + 23, ptoff(r23)},
++	{IA64_GR0_REGNUM + 24, ptoff(r24)},
++	{IA64_GR0_REGNUM + 25, ptoff(r25)},
++	{IA64_GR0_REGNUM + 26, ptoff(r26)},
++	{IA64_GR0_REGNUM + 27, ptoff(r27)},
++	{IA64_GR0_REGNUM + 28, ptoff(r28)},
++	{IA64_GR0_REGNUM + 29, ptoff(r29)},
++	{IA64_GR0_REGNUM + 30, ptoff(r30)},
++	{IA64_GR0_REGNUM + 31, ptoff(r31)},
++};
++
++static struct reg_to_ptreg_index br_reg_to_ptreg_index[] = {
++	{IA64_BR0_REGNUM, ptoff(b0)},
++	{IA64_BR0_REGNUM + 6, ptoff(b6)},
++	{IA64_BR0_REGNUM + 7, ptoff(b7)},
++};
++
++static struct reg_to_ptreg_index ar_reg_to_ptreg_index[] = {
++	{IA64_PFS_REGNUM, ptoff(ar_pfs)},
++	{IA64_UNAT_REGNUM, ptoff(ar_unat)},
++	{IA64_RNAT_REGNUM, ptoff(ar_rnat)},
++	{IA64_BSPSTORE_REGNUM, ptoff(ar_bspstore)},
++	{IA64_RSC_REGNUM, ptoff(ar_rsc)},
++	{IA64_CSD_REGNUM, ptoff(ar_csd)},
++	{IA64_SSD_REGNUM, ptoff(ar_ssd)},
++	{IA64_FPSR_REGNUM, ptoff(ar_fpsr)},
++	{IA64_CCV_REGNUM, ptoff(ar_ccv)},
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int kgdb_gr_reg(int regnum, struct unw_frame_info *info,
++	unsigned long *reg, int rw)
++{
++	char nat;
++
++	if ((regnum >= IA64_GR0_REGNUM && regnum <= (IA64_GR0_REGNUM + 1)) ||
++		(regnum >= (IA64_GR0_REGNUM + 4) &&
++		regnum <= (IA64_GR0_REGNUM + 7)))
++		return !unw_access_gr(info, regnum - IA64_GR0_REGNUM,
++		reg, &nat, rw);
++	else
++		return 0;
++}
++static int kgdb_gr_ptreg(int regnum, struct pt_regs * ptregs,
++	struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++	int i, result = 1;
++	char nat;
++
++	if (!((regnum >= (IA64_GR0_REGNUM + 2) &&
++		regnum <= (IA64_GR0_REGNUM + 3)) ||
++		(regnum >= (IA64_GR0_REGNUM + 8) &&
++		regnum <= (IA64_GR0_REGNUM + 15)) ||
++		(regnum >= (IA64_GR0_REGNUM + 16) &&
++		regnum <= (IA64_GR0_REGNUM + 31))))
++		return 0;
++	else if (rw && ptregs) {
++		for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++)
++			if (gr_reg_to_ptreg_index[i].reg == regnum) {
++				*((unsigned long *)(((void *)ptregs) +
++				gr_reg_to_ptreg_index[i].ptregoff)) = *reg;
++				break;
++			}
++	} else if (!rw && ptregs) {
++		for (i = 0; i < ARRAY_SIZE(gr_reg_to_ptreg_index); i++)
++			if (gr_reg_to_ptreg_index[i].reg == regnum) {
++				*reg = *((unsigned long *)
++				(((void *)ptregs) +
++				 gr_reg_to_ptreg_index[i].ptregoff));
++				break;
++			}
++	} else
++		result = !unw_access_gr(info, regnum - IA64_GR0_REGNUM,
++					reg, &nat, rw);
++	return result;
++}
++
++static int kgdb_br_reg(int regnum, struct pt_regs * ptregs,
++	struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++	int i, result = 1;
++
++	if (!(regnum >= IA64_BR0_REGNUM && regnum <= (IA64_BR0_REGNUM + 7)))
++		return 0;
++
++	switch (regnum) {
++	case IA64_BR0_REGNUM:
++	case IA64_BR0_REGNUM + 6:
++	case IA64_BR0_REGNUM + 7:
++		if (rw) {
++			for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++)
++				if (br_reg_to_ptreg_index[i].reg == regnum) {
++					*((unsigned long *)
++					(((void *)ptregs) +
++					br_reg_to_ptreg_index[i].ptregoff)) =
++					*reg;
++					break;
++				}
++		} else
++			for (i = 0; i < ARRAY_SIZE(br_reg_to_ptreg_index); i++)
++				if (br_reg_to_ptreg_index[i].reg == regnum) {
++						*reg = *((unsigned long *)
++						(((void *)ptregs) +
++						br_reg_to_ptreg_index[i].
++						ptregoff));
++						break;
++				}
++		break;
++	case IA64_BR0_REGNUM + 1:
++	case IA64_BR0_REGNUM + 2:
++	case IA64_BR0_REGNUM + 3:
++	case IA64_BR0_REGNUM + 4:
++	case IA64_BR0_REGNUM + 5:
++		result = !unw_access_br(info, regnum - IA64_BR0_REGNUM,
++				reg, rw);
++		break;
++	}
++
++	return result;
++}
++
++static int kgdb_fr_reg(int regnum, char *inbuffer, struct pt_regs * ptregs,
++	struct unw_frame_info *info, unsigned long *reg,
++	struct ia64_fpreg *freg, int rw)
++{
++	int result = 1;
++
++	if (!(regnum >= IA64_FR0_REGNUM && regnum <= (IA64_FR0_REGNUM + 127)))
++		return 0;
++
++	switch (regnum) {
++	case IA64_FR0_REGNUM + 6:
++	case IA64_FR0_REGNUM + 7:
++	case IA64_FR0_REGNUM + 8:
++	case IA64_FR0_REGNUM + 9:
++	case IA64_FR0_REGNUM + 10:
++	case IA64_FR0_REGNUM + 11:
++	case IA64_FR0_REGNUM + 12:
++		if (rw) {
++			char *ptr = inbuffer;
++
++			freg->u.bits[0] = *reg;
++			kgdb_hex2long(&ptr, &freg->u.bits[1]);
++			*(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6))) =
++				*freg;
++			break;
++		} else if (!ptregs)
++			result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM,
++				freg, rw);
++		else
++			*freg =
++			*(&ptregs->f6 + (regnum - (IA64_FR0_REGNUM + 6)));
++		break;
++	default:
++		if (!rw)
++			result = !unw_access_fr(info, regnum - IA64_FR0_REGNUM,
++				freg, rw);
++		else
++			result = 0;
++		break;
++	}
++
++	return result;
++}
++
++static int kgdb_ar_reg(int regnum, struct pt_regs * ptregs,
++	struct unw_frame_info *info, unsigned long *reg, int rw)
++{
++	int result = 0, i;
++
++	if (!(regnum >= IA64_AR0_REGNUM && regnum <= IA64_EC_REGNUM))
++		return 0;
++
++	if (rw && ptregs) {
++		for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++)
++			if (ar_reg_to_ptreg_index[i].reg == regnum) {
++				*((unsigned long *) (((void *)ptregs) +
++				ar_reg_to_ptreg_index[i].ptregoff)) =
++					*reg;
++				result = 1;
++				break;
++			}
++	} else if (ptregs) {
++		for (i = 0; i < ARRAY_SIZE(ar_reg_to_ptreg_index); i++)
++			if (ar_reg_to_ptreg_index[i].reg == regnum) {
++				*reg = *((unsigned long *) (((void *)ptregs) +
++					ar_reg_to_ptreg_index[i].ptregoff));
++					result = 1;
++				break;
++			}
++	}
++
++	if (result)
++		return result;
++
++       result = 1;
++
++	switch (regnum) {
++	case IA64_CSD_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_CSD, reg, rw);
++		break;
++	case IA64_SSD_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_SSD, reg, rw);
++		break;
++	case IA64_UNAT_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++		break;
++		case IA64_RNAT_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++		break;
++	case IA64_BSPSTORE_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++		break;
++	case IA64_PFS_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_RNAT, reg, rw);
++		break;
++	case IA64_LC_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_LC, reg, rw);
++		break;
++	case IA64_EC_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_EC, reg, rw);
++		break;
++	case IA64_FPSR_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_FPSR, reg, rw);
++		break;
++	case IA64_RSC_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_RSC, reg, rw);
++		break;
++	case IA64_CCV_REGNUM:
++		result = !unw_access_ar(info, UNW_AR_CCV, reg, rw);
++		break;
++	default:
++		result = 0;
++	}
++
++	return result;
++}
++
++void kgdb_get_reg(char *outbuffer, int regnum, struct unw_frame_info *info,
++	struct pt_regs *ptregs)
++{
++	unsigned long reg, size = 0, *mem = &reg;
++	struct ia64_fpreg freg;
++
++	if (kgdb_gr_reg(regnum, info, &reg, 0) ||
++		kgdb_gr_ptreg(regnum, ptregs, info, &reg, 0) ||
++		kgdb_br_reg(regnum, ptregs, info, &reg, 0) ||
++		kgdb_ar_reg(regnum, ptregs, info, &reg, 0))
++			size = sizeof(reg);
++	else if (kgdb_fr_reg(regnum, NULL, ptregs, info, &reg, &freg, 0)) {
++		size = sizeof(freg);
++		mem = (unsigned long *)&freg;
++	} else if (regnum == IA64_IP_REGNUM) {
++		if (!ptregs) {
++			unw_get_ip(info, &reg);
++			size = sizeof(reg);
++		} else {
++			reg = ptregs->cr_iip;
++			size = sizeof(reg);
++		}
++	} else if (regnum == IA64_CFM_REGNUM) {
++		if (!ptregs)
++			unw_get_cfm(info, &reg);
++		else
++			reg = ptregs->cr_ifs;
++		size = sizeof(reg);
++	} else if (regnum == IA64_PSR_REGNUM) {
++		if (!ptregs && kgdb_usethread)
++			ptregs = (struct pt_regs *)
++			((unsigned long)kgdb_usethread +
++			IA64_STK_OFFSET) - 1;
++		if (ptregs)
++			reg = ptregs->cr_ipsr;
++		size = sizeof(reg);
++	} else if (regnum == IA64_PR_REGNUM) {
++		if (ptregs)
++			reg = ptregs->pr;
++		else
++			unw_access_pr(info, &reg, 0);
++		size = sizeof(reg);
++	} else if (regnum == IA64_BSP_REGNUM) {
++		unw_get_bsp(info, &reg);
++		size = sizeof(reg);
++	}
++
++	if (size) {
++		kgdb_mem2hex((char *) mem, outbuffer, size);
++		outbuffer[size*2] = 0;
++	}
++	else
++		strcpy(outbuffer, "E0");
++
++	return;
++}
++
++void kgdb_put_reg(char *inbuffer, char *outbuffer, int regnum,
++		  struct unw_frame_info *info, struct pt_regs *ptregs)
++{
++	unsigned long reg;
++	struct ia64_fpreg freg;
++	char *ptr = inbuffer;
++
++	kgdb_hex2long(&ptr, &reg);
++	strcpy(outbuffer, "OK");
++
++	if (kgdb_gr_reg(regnum, info, &reg, 1) ||
++		kgdb_gr_ptreg(regnum, ptregs, info, &reg, 1) ||
++		kgdb_br_reg(regnum, ptregs, info, &reg, 1) ||
++		kgdb_fr_reg(regnum, inbuffer, ptregs, info, &reg, &freg, 1) ||
++		kgdb_ar_reg(regnum, ptregs, info, &reg, 1)) ;
++	else if (regnum == IA64_IP_REGNUM)
++		ptregs->cr_iip = reg;
++	else if (regnum == IA64_CFM_REGNUM)
++		ptregs->cr_ifs = reg;
++	else if (regnum == IA64_PSR_REGNUM)
++		ptregs->cr_ipsr = reg;
++	else if (regnum == IA64_PR_REGNUM)
++		ptregs->pr = reg;
++	else
++		strcpy(outbuffer, "E01");
++	return;
++}
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++
++}
++
++#define	MAX_HW_BREAKPOINT	(20)
++long hw_break_total_dbr, hw_break_total_ibr;
++#define	HW_BREAKPOINT	(hw_break_total_dbr + hw_break_total_ibr)
++#define	WATCH_INSTRUCTION	0x0
++#define WATCH_WRITE		0x1
++#define	WATCH_READ		0x2
++#define	WATCH_ACCESS		0x3
++
++#define	HWCAP_DBR	((1 << WATCH_WRITE) | (1 << WATCH_READ))
++#define	HWCAP_IBR	(1 << WATCH_INSTRUCTION)
++struct hw_breakpoint {
++	unsigned enabled;
++	unsigned long capable;
++	unsigned long type;
++	unsigned long mask;
++	unsigned long addr;
++} *breakinfo;
++
++static struct hw_breakpoint hwbreaks[MAX_HW_BREAKPOINT];
++
++enum instruction_type { A, I, M, F, B, L, X, u };
++
++static enum instruction_type bundle_encoding[32][3] = {
++	{M, I, I},		/* 00 */
++	{M, I, I},		/* 01 */
++	{M, I, I},		/* 02 */
++	{M, I, I},		/* 03 */
++	{M, L, X},		/* 04 */
++	{M, L, X},		/* 05 */
++	{u, u, u},		/* 06 */
++	{u, u, u},		/* 07 */
++	{M, M, I},		/* 08 */
++	{M, M, I},		/* 09 */
++	{M, M, I},		/* 0A */
++	{M, M, I},		/* 0B */
++	{M, F, I},		/* 0C */
++	{M, F, I},		/* 0D */
++	{M, M, F},		/* 0E */
++	{M, M, F},		/* 0F */
++	{M, I, B},		/* 10 */
++	{M, I, B},		/* 11 */
++	{M, B, B},		/* 12 */
++	{M, B, B},		/* 13 */
++	{u, u, u},		/* 14 */
++	{u, u, u},		/* 15 */
++	{B, B, B},		/* 16 */
++	{B, B, B},		/* 17 */
++	{M, M, B},		/* 18 */
++	{M, M, B},		/* 19 */
++	{u, u, u},		/* 1A */
++	{u, u, u},		/* 1B */
++	{M, F, B},		/* 1C */
++	{M, F, B},		/* 1D */
++	{u, u, u},		/* 1E */
++	{u, u, u},		/* 1F */
++};
++
++int kgdb_validate_break_address(unsigned long addr)
++{
++	int error;
++	char tmp_variable[BREAK_INSTR_SIZE];
++	error = kgdb_get_mem((char *)(addr & BREAK_INSTR_ALIGN), tmp_variable,
++		BREAK_INSTR_SIZE);
++	return error;
++}
++
++int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
++{
++	extern unsigned long _start[];
++	unsigned long slot = addr & BREAK_INSTR_ALIGN, bundle_addr;
++	unsigned long template;
++	struct bundle {
++		struct {
++			unsigned long long template:5;
++			unsigned long long slot0:41;
++			unsigned long long slot1_p0:64 - 46;
++		} quad0;
++		struct {
++			unsigned long long slot1_p1:41 - (64 - 46);
++			unsigned long long slot2:41;
++		} quad1;
++	} bundle;
++	int ret;
++
++	bundle_addr = addr & ~0xFULL;
++
++	if (bundle_addr == (unsigned long)_start)
++		return 0;
++
++	ret = kgdb_get_mem((char *)bundle_addr, (char *)&bundle,
++			   BREAK_INSTR_SIZE);
++	if (ret < 0)
++		return ret;
++
++	if (slot > 2)
++		slot = 0;
++
++	memcpy(saved_instr, &bundle, BREAK_INSTR_SIZE);
++	template = bundle.quad0.template;
++
++	if (slot == 1 && bundle_encoding[template][1] == L)
++		slot = 2;
++
++	switch (slot) {
++	case 0:
++		bundle.quad0.slot0 = BREAKNUM;
++		break;
++	case 1:
++		bundle.quad0.slot1_p0 = BREAKNUM;
++		bundle.quad1.slot1_p1 = (BREAKNUM >> (64 - 46));
++		break;
++	case 2:
++		bundle.quad1.slot2 = BREAKNUM;
++		break;
++	}
++
++	return kgdb_set_mem((char *)bundle_addr, (char *)&bundle,
++			    BREAK_INSTR_SIZE);
++}
++
++int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
++{
++	extern unsigned long _start[];
++	
++	addr = addr & BREAK_INSTR_ALIGN;
++	if (addr == (unsigned long)_start)
++		return 0;
++	return kgdb_set_mem((char *)addr, (char *)bundle, BREAK_INSTR_SIZE);
++}
++
++volatile static struct smp_unw {
++	struct unw_frame_info *unw;
++	struct task_struct *task;
++} smp_unw[NR_CPUS];
++
++static int inline kgdb_get_blocked_state(struct task_struct *p,
++					 struct unw_frame_info *unw)
++{
++	unsigned long ip;
++	int count = 0;
++
++	unw_init_from_blocked_task(unw, p);
++	ip = 0UL;
++	do {
++		if (unw_unwind(unw) < 0)
++			return -1;
++		unw_get_ip(unw, &ip);
++		if (!in_sched_functions(ip))
++			break;
++	} while (count++ < 16);
++
++	if (!ip)
++		return -1;
++	else
++		return 0;
++}
++
++static void inline kgdb_wait(struct pt_regs *regs)
++{
++	unsigned long hw_breakpoint_status = ia64_getreg(_IA64_REG_PSR);
++	if (hw_breakpoint_status & IA64_PSR_DB)
++		ia64_setreg(_IA64_REG_PSR_L,
++			    hw_breakpoint_status ^ IA64_PSR_DB);
++	kgdb_nmihook(smp_processor_id(), regs);
++	if (hw_breakpoint_status & IA64_PSR_DB)
++		ia64_setreg(_IA64_REG_PSR_L, hw_breakpoint_status);
++
++	return;
++}
++
++static void inline normalize(struct unw_frame_info *running,
++			     struct pt_regs *regs)
++{
++	unsigned long sp;
++
++	do {
++		unw_get_sp(running, &sp);
++		if ((sp + 0x10) >= (unsigned long)regs)
++			break;
++	} while (unw_unwind(running) >= 0);
++
++	return;
++}
++
++static void kgdb_init_running(struct unw_frame_info *unw, void *data)
++{
++	struct pt_regs *regs;
++
++	regs = data;
++	normalize(unw, regs);
++	smp_unw[smp_processor_id()].unw = unw;
++	kgdb_wait(regs);
++}
++
++void kgdb_wait_ipi(struct pt_regs *regs)
++{
++	struct unw_frame_info unw;
++
++	smp_unw[smp_processor_id()].task = current;
++
++	if (user_mode(regs)) {
++		smp_unw[smp_processor_id()].unw = (struct unw_frame_info *)1;
++		kgdb_wait(regs);
++	} else {
++		if (current->state == TASK_RUNNING)
++			unw_init_running(kgdb_init_running, regs);
++		else {
++			if (kgdb_get_blocked_state(current, &unw))
++				smp_unw[smp_processor_id()].unw =
++				    (struct unw_frame_info *)1;
++			else
++				smp_unw[smp_processor_id()].unw = &unw;
++			kgdb_wait(regs);
++		}
++	}
++
++	smp_unw[smp_processor_id()].unw = NULL;
++	return;
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++	if (num_online_cpus() > 1)
++		smp_send_nmi_allbutself();
++}
++
++static volatile int kgdb_hwbreak_sstep[NR_CPUS];
++
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++	void *ptr)
++{
++	struct die_args *args = ptr;
++	struct pt_regs *regs = args->regs;
++	unsigned long err = args->err;
++
++	switch (cmd) {
++	default:
++		return NOTIFY_DONE;
++	case DIE_PAGE_FAULT_NO_CONTEXT:
++		if (atomic_read(&debugger_active) && kgdb_may_fault) {
++			kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++			return NOTIFY_STOP;
++		}
++		break;
++	case DIE_BREAK:
++		if (user_mode(regs) || err == 0x80001)
++			return NOTIFY_DONE;
++		break;
++	case DIE_FAULT:
++		if (user_mode(regs))
++			return NOTIFY_DONE;
++		else if (err == 36 && kgdb_hwbreak_sstep[smp_processor_id()]) {
++			kgdb_hwbreak_sstep[smp_processor_id()] = 0;
++			regs->cr_ipsr &= ~IA64_PSR_SS;
++			return NOTIFY_STOP;
++		}
++	case DIE_MCA_MONARCH_PROCESS:
++	case DIE_INIT_MONARCH_PROCESS:
++		break;
++	}
++
++	kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++	return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++	.notifier_call = kgdb_notify,
++};
++
++int kgdb_arch_init(void)
++{
++	atomic_notifier_chain_register(&ia64die_chain, &kgdb_notifier);
++	return 0;
++}
++
++static void do_kgdb_handle_exception(struct unw_frame_info *, void *data);
++
++struct kgdb_state {
++	int e_vector;
++	int signo;
++	unsigned long err_code;
++	struct pt_regs *regs;
++	struct unw_frame_info *unw;
++	char *inbuf;
++	char *outbuf;
++	int unwind;
++	int ret;
++};
++
++static void inline kgdb_pc(struct pt_regs *regs, unsigned long pc)
++{
++	regs->cr_iip = pc & ~0xf;
++	ia64_psr(regs)->ri = pc & 0x3;
++	return;
++}
++
++int kgdb_arch_handle_exception(int e_vector, int signo,
++			       int err_code, char *remcom_in_buffer,
++			       char *remcom_out_buffer,
++			       struct pt_regs *linux_regs)
++{
++	struct kgdb_state info;
++
++	info.e_vector = e_vector;
++	info.signo = signo;
++	info.err_code = err_code;
++	info.unw = (void *)0;
++	info.inbuf = remcom_in_buffer;
++	info.outbuf = remcom_out_buffer;
++	info.unwind = 0;
++	info.ret = -1;
++
++	if (remcom_in_buffer[0] == 'c' || remcom_in_buffer[0] == 's') {
++		info.regs = linux_regs;
++		do_kgdb_handle_exception(NULL, &info);
++	} else if (kgdb_usethread == current) {
++		info.regs = linux_regs;
++		info.unwind = 1;
++		unw_init_running(do_kgdb_handle_exception, &info);
++	} else if (kgdb_usethread->state != TASK_RUNNING) {
++		struct unw_frame_info unw_info;
++
++		if (kgdb_get_blocked_state(kgdb_usethread, &unw_info)) {
++			info.ret = 1;
++			goto bad;
++		}
++		info.regs = NULL;
++		do_kgdb_handle_exception(&unw_info, &info);
++	} else {
++		int i;
++
++		for (i = 0; i < NR_CPUS; i++)
++			if (smp_unw[i].task == kgdb_usethread && smp_unw[i].unw
++			    && smp_unw[i].unw != (struct unw_frame_info *)1) {
++				info.regs = NULL;
++				do_kgdb_handle_exception(smp_unw[i].unw, &info);
++				break;
++			} else {
++				info.ret = 1;
++				goto bad;
++			}
++	}
++
++      bad:
++	if (info.ret != -1 && remcom_in_buffer[0] == 'p') {
++		unsigned long bad = 0xbad4badbadbadbadUL;
++
++		printk("kgdb_arch_handle_exception: p packet bad (%s)\n",
++		       remcom_in_buffer);
++		kgdb_mem2hex((char *)&bad, remcom_out_buffer, sizeof(bad));
++		remcom_out_buffer[sizeof(bad) * 2] = 0;
++		info.ret = -1;
++	}
++	return info.ret;
++}
++
++/*
++ * This is done because I evidently made an incorrect 'p' encoding
++ * when my patch for gdb was committed. It was later corrected. This
++ * check supports both my wrong encoding of the register number and
++ * the correct encoding. Eventually this should be eliminated and
++ * kgdb_hex2long should be demarshalling the regnum.
++ */
++static inline int check_packet(unsigned int regnum, char *packet)
++{
++	static int check_done, swap;
++	unsigned long reglong;
++
++	if (likely(check_done)) {
++		if (swap) {
++			kgdb_hex2long(&packet, &reglong);
++			regnum = (int) reglong;
++		}
++
++	} else {
++		if (regnum > NUM_REGS) {
++			kgdb_hex2long(&packet, &reglong);
++			regnum = (int) reglong;
++			swap = 1;
++		}
++		check_done = 1;
++	}
++	return regnum;
++}
++
++static void do_kgdb_handle_exception(struct unw_frame_info *unw_info,
++	void *data)
++{
++	long addr;
++	char *ptr;
++	unsigned long newPC;
++	int e_vector, signo;
++	unsigned long err_code;
++	struct pt_regs *linux_regs;
++	struct kgdb_state *info;
++	char *remcom_in_buffer, *remcom_out_buffer;
++
++	info = data;
++	info->unw = unw_info;
++	e_vector = info->e_vector;
++	signo = info->signo;
++	err_code = info->err_code;
++	remcom_in_buffer = info->inbuf;
++	remcom_out_buffer = info->outbuf;
++	linux_regs = info->regs;
++
++	if (info->unwind)
++		normalize(unw_info, linux_regs);
++
++	switch (remcom_in_buffer[0]) {
++	case 'p':
++		{
++			unsigned int regnum;
++
++			kgdb_hex2mem(&remcom_in_buffer[1], (char *)&regnum,
++				     sizeof(regnum));
++			regnum = check_packet(regnum, &remcom_in_buffer[1]);
++			if (regnum >= NUM_REGS) {
++				remcom_out_buffer[0] = 'E';
++				remcom_out_buffer[1] = 0;
++			} else
++				kgdb_get_reg(remcom_out_buffer, regnum,
++					     unw_info, linux_regs);
++			break;
++		}
++	case 'P':
++		{
++			unsigned int regno;
++			long v;
++			char *ptr;
++
++			ptr = &remcom_in_buffer[1];
++			if ((!kgdb_usethread || kgdb_usethread == current) &&
++			    kgdb_hex2long(&ptr, &v) &&
++			    *ptr++ == '=' && (v >= 0)) {
++				regno = (unsigned int)v;
++				regno = (regno >= NUM_REGS ? 0 : regno);
++				kgdb_put_reg(ptr, remcom_out_buffer, regno,
++					     unw_info, linux_regs);
++			} else
++				strcpy(remcom_out_buffer, "E01");
++			break;
++		}
++	case 'c':
++	case 's':
++		if (e_vector == TRAP_BRKPT && err_code == KGDBBREAKNUM) {
++			if (ia64_psr(linux_regs)->ri < 2)
++				kgdb_pc(linux_regs, linux_regs->cr_iip +
++					ia64_psr(linux_regs)->ri + 1);
++			else
++				kgdb_pc(linux_regs, linux_regs->cr_iip + 16);
++		}
++
++		/* try to read optional parameter, pc unchanged if no parm */
++		ptr = &remcom_in_buffer[1];
++		if (kgdb_hex2long(&ptr, &addr)) {
++			linux_regs->cr_iip = addr;
++		}
++		newPC = linux_regs->cr_iip;
++
++		/* clear the trace bit */
++		linux_regs->cr_ipsr &= ~IA64_PSR_SS;
++
++		atomic_set(&cpu_doing_single_step, -1);
++
++		/* set the trace bit if we're stepping or took a hardware break */
++		if (remcom_in_buffer[0] == 's' || e_vector == TRAP_HWBKPT) {
++			linux_regs->cr_ipsr |= IA64_PSR_SS;
++			debugger_step = 1;
++			if (kgdb_contthread)
++				atomic_set(&cpu_doing_single_step,
++					   smp_processor_id());
++		}
++
++		kgdb_correct_hw_break();
++
++		/* if not hardware breakpoint, then reenable them */
++		if (e_vector != TRAP_HWBKPT)
++			linux_regs->cr_ipsr |= IA64_PSR_DB;
++		else {
++			kgdb_hwbreak_sstep[smp_processor_id()] = 1;
++			linux_regs->cr_ipsr &= ~IA64_PSR_DB;
++		}
++
++		info->ret = 0;
++		break;
++	default:
++		break;
++	}
++
++	return;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++	.gdb_bpt_instr = {0xcc},
++};
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/smp.c linux-2.6.22-try2/arch/ia64/kernel/smp.c
+--- linux-2.6.22-570/arch/ia64/kernel/smp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ia64/kernel/smp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -48,6 +48,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/unistd.h>
+ #include <asm/mca.h>
++#include <linux/kgdb.h>
+ 
+ /*
+  * Note: alignment of 4 entries/cacheline was empirically determined
+@@ -79,6 +80,9 @@
+ 
+ #define IPI_CALL_FUNC		0
+ #define IPI_CPU_STOP		1
++#ifdef	CONFIG_KGDB
++#define	IPI_KGDB_INTERRUPT	2
++#endif
+ #define IPI_KDUMP_CPU_STOP	3
+ 
+ /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
+@@ -169,6 +173,11 @@
+ 			      case IPI_CPU_STOP:
+ 				stop_this_cpu();
+ 				break;
++#ifdef	CONFIG_KGDB
++			      case IPI_KGDB_INTERRUPT:
++				kgdb_wait_ipi(get_irq_regs());
++				break;
++#endif
+ #ifdef CONFIG_KEXEC
+ 			      case IPI_KDUMP_CPU_STOP:
+ 				unw_init_running(kdump_cpu_freeze, NULL);
+@@ -399,6 +408,14 @@
+ }
+ EXPORT_SYMBOL(smp_call_function_single);
+ 
++#ifdef	CONFIG_KGDB
++void
++smp_send_nmi_allbutself(void)
++{
++	send_IPI_allbutself(IPI_KGDB_INTERRUPT);
++}
++#endif
++
+ /*
+  * this function sends a 'generic call function' IPI to all other CPUs
+  * in the system.
+diff -Nurb linux-2.6.22-570/arch/ia64/kernel/traps.c linux-2.6.22-try2/arch/ia64/kernel/traps.c
+--- linux-2.6.22-570/arch/ia64/kernel/traps.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/ia64/kernel/traps.c	2007-12-19 15:29:23.000000000 -0500
+@@ -155,8 +155,12 @@
+ 		break;
+ 
+ 	      default:
+-		if (break_num < 0x40000 || break_num > 0x100000)
++		if (break_num < 0x40000 || break_num > 0x100000) {
++			if (notify_die(DIE_BREAK, "bad break", regs,
++				break_num, TRAP_BRKPT, SIGTRAP) == NOTIFY_STOP)
++				return;
+ 			die_if_kernel("Bad break", regs, break_num);
++		}
+ 
+ 		if (break_num < 0x80000) {
+ 			sig = SIGILL; code = __ILL_BREAK;
+diff -Nurb linux-2.6.22-570/arch/ia64/mm/extable.c linux-2.6.22-try2/arch/ia64/mm/extable.c
+--- linux-2.6.22-570/arch/ia64/mm/extable.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ia64/mm/extable.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,7 @@
+  */
+ 
+ #include <linux/sort.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/module.h>
+@@ -73,6 +74,11 @@
+                 else
+                         last = mid - 1;
+         }
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault)
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Not reached. */
++#endif
+         return NULL;
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/ia64/mm/fault.c linux-2.6.22-try2/arch/ia64/mm/fault.c
+--- linux-2.6.22-570/arch/ia64/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/ia64/mm/fault.c	2007-12-19 15:29:23.000000000 -0500
+@@ -255,6 +255,10 @@
+ 	 */
+ 	bust_spinlocks(1);
+ 
++	if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++			isr, 14, SIGSEGV) == NOTIFY_STOP)
++		return;
++
+ 	if (address < PAGE_SIZE)
+ 		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
+ 	else
+diff -Nurb linux-2.6.22-570/arch/mips/Kconfig linux-2.6.22-try2/arch/mips/Kconfig
+--- linux-2.6.22-570/arch/mips/Kconfig	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -30,7 +30,6 @@
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	help
+ 	  The eXcite is a smart camera platform manufactured by
+ 	  Basler Vision Technologies AG.
+@@ -98,7 +97,6 @@
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	help
+ 	  This is an evaluation board based on the Galileo GT-64120
+ 	  single-chip system controller that contains a MIPS R5000 compatible
+@@ -269,7 +267,6 @@
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	help
+ 	  The Ocelot is a MIPS-based Single Board Computer (SBC) made by
+ 	  Momentum Computer <http://www.momenco.com/>.
+@@ -331,8 +328,6 @@
+ 	select SYS_HAS_CPU_R5432
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select SYS_SUPPORTS_64BIT_KERNEL if EXPERIMENTAL
+-	select SYS_SUPPORTS_KGDB
+-	select SYS_SUPPORTS_KGDB
+ 	select SYS_SUPPORTS_LITTLE_ENDIAN
+ 	help
+ 	  This enables support for the R5432-based NEC DDB Vrc-5477,
+@@ -360,7 +355,6 @@
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+ 	select SYS_SUPPORTS_HIGHMEM
+-	select SYS_SUPPORTS_KGDB
+ 	select SYS_SUPPORTS_SMP
+ 	help
+ 	  Yosemite is an evaluation board for the RM9000x2 processor
+@@ -440,7 +434,6 @@
+ 	select SYS_HAS_CPU_R10000
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	select SYS_SUPPORTS_NUMA
+ 	select SYS_SUPPORTS_SMP
+ 	select GENERIC_HARDIRQS_NO__DO_IRQ
+@@ -490,7 +483,6 @@
+ 	select SYS_HAS_CPU_SB1
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+ 	select SYS_SUPPORTS_HIGHMEM
+-	select SYS_SUPPORTS_KGDB
+ 	select SYS_SUPPORTS_LITTLE_ENDIAN
+ 
+ config SIBYTE_SENTOSA
+@@ -631,7 +623,6 @@
+ 	select SYS_SUPPORTS_64BIT_KERNEL
+ 	select SYS_SUPPORTS_LITTLE_ENDIAN
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	select GENERIC_HARDIRQS_NO__DO_IRQ
+ 	help
+ 	  This Toshiba board is based on the TX4927 processor. Say Y here to
+@@ -650,7 +641,6 @@
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select SYS_SUPPORTS_LITTLE_ENDIAN
+ 	select SYS_SUPPORTS_BIG_ENDIAN
+-	select SYS_SUPPORTS_KGDB
+ 	select GENERIC_HARDIRQS_NO__DO_IRQ
+ 	help
+ 	  This Toshiba board is based on the TX4938 processor. Say Y here to
+@@ -826,7 +816,6 @@
+ 
+ config DDB5XXX_COMMON
+ 	bool
+-	select SYS_SUPPORTS_KGDB
+ 
+ config MIPS_BOARDS_GEN
+ 	bool
+@@ -862,7 +851,6 @@
+ 	select SYS_HAS_EARLY_PRINTK
+ 	select SYS_SUPPORTS_32BIT_KERNEL
+ 	select GENERIC_HARDIRQS_NO__DO_IRQ
+-	select SYS_SUPPORTS_KGDB
+ 
+ config SWAP_IO_SPACE
+ 	bool
+diff -Nurb linux-2.6.22-570/arch/mips/Kconfig.debug linux-2.6.22-try2/arch/mips/Kconfig.debug
+--- linux-2.6.22-570/arch/mips/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/Kconfig.debug	2007-12-19 15:29:23.000000000 -0500
+@@ -46,28 +46,6 @@
+ 	  arch/mips/kernel/smtc.c.  This debugging option result in significant
+ 	  overhead so should be disabled in production kernels.
+ 
+-config KGDB
+-	bool "Remote GDB kernel debugging"
+-	depends on DEBUG_KERNEL && SYS_SUPPORTS_KGDB
+-	select DEBUG_INFO
+-	help
+-	  If you say Y here, it will be possible to remotely debug the MIPS
+-	  kernel using gdb. This enlarges your kernel image disk size by
+-	  several megabytes and requires a machine with more than 16 MB,
+-	  better 32 MB RAM to avoid excessive linking time. This is only
+-	  useful for kernel hackers. If unsure, say N.
+-
+-config SYS_SUPPORTS_KGDB
+-	bool
+-
+-config GDB_CONSOLE
+-	bool "Console output to GDB"
+-	depends on KGDB
+-	help
+-	  If you are using GDB for remote debugging over a serial port and
+-	  would like kernel messages to be formatted into GDB $O packets so
+-	  that GDB prints them as program output, say 'Y'.
+-
+ config SB1XXX_CORELIS
+ 	bool "Corelis Debugger"
+ 	depends on SIBYTE_SB1xxx_SOC
+diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/Makefile linux-2.6.22-try2/arch/mips/au1000/common/Makefile
+--- linux-2.6.22-570/arch/mips/au1000/common/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/au1000/common/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -10,5 +10,4 @@
+ 	au1xxx_irqmap.o clocks.o platform.o power.o setup.o \
+ 	sleeper.o cputable.o dma.o dbdma.o gpio.o
+ 
+-obj-$(CONFIG_KGDB)		+= dbg_io.o
+ obj-$(CONFIG_PCI)		+= pci.o
+diff -Nurb linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c linux-2.6.22-try2/arch/mips/au1000/common/dbg_io.c
+--- linux-2.6.22-570/arch/mips/au1000/common/dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/au1000/common/dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/io.h>
+-#include <asm/mach-au1x00/au1000.h>
+-
+-#ifdef CONFIG_KGDB
+-
+-/*
+- * FIXME the user should be able to select the
+- * uart to be used for debugging.
+- */
+-#define DEBUG_BASE  UART_DEBUG_BASE
+-/**/
+-
+-/* we need uint32 uint8 */
+-/* #include "types.h" */
+-typedef         unsigned char uint8;
+-typedef         unsigned int  uint32;
+-
+-#define         UART16550_BAUD_2400             2400
+-#define         UART16550_BAUD_4800             4800
+-#define         UART16550_BAUD_9600             9600
+-#define         UART16550_BAUD_19200            19200
+-#define         UART16550_BAUD_38400            38400
+-#define         UART16550_BAUD_57600            57600
+-#define         UART16550_BAUD_115200           115200
+-
+-#define         UART16550_PARITY_NONE           0
+-#define         UART16550_PARITY_ODD            0x08
+-#define         UART16550_PARITY_EVEN           0x18
+-#define         UART16550_PARITY_MARK           0x28
+-#define         UART16550_PARITY_SPACE          0x38
+-
+-#define         UART16550_DATA_5BIT             0x0
+-#define         UART16550_DATA_6BIT             0x1
+-#define         UART16550_DATA_7BIT             0x2
+-#define         UART16550_DATA_8BIT             0x3
+-
+-#define         UART16550_STOP_1BIT             0x0
+-#define         UART16550_STOP_2BIT             0x4
+-
+-
+-#define UART_RX		0	/* Receive buffer */
+-#define UART_TX		4	/* Transmit buffer */
+-#define UART_IER	8	/* Interrupt Enable Register */
+-#define UART_IIR	0xC	/* Interrupt ID Register */
+-#define UART_FCR	0x10	/* FIFO Control Register */
+-#define UART_LCR	0x14	/* Line Control Register */
+-#define UART_MCR	0x18	/* Modem Control Register */
+-#define UART_LSR	0x1C	/* Line Status Register */
+-#define UART_MSR	0x20	/* Modem Status Register */
+-#define UART_CLK	0x28	/* Baud Rat4e Clock Divider */
+-#define UART_MOD_CNTRL	0x100	/* Module Control */
+-
+-/* memory-mapped read/write of the port */
+-#define UART16550_READ(y)    (au_readl(DEBUG_BASE + y) & 0xff)
+-#define UART16550_WRITE(y,z) (au_writel(z&0xff, DEBUG_BASE + y))
+-
+-extern unsigned long get_au1x00_uart_baud_base(void);
+-extern unsigned long cal_r4koff(void);
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+-
+-	if (UART16550_READ(UART_MOD_CNTRL) != 0x3) {
+-		UART16550_WRITE(UART_MOD_CNTRL, 3);
+-	}
+-	cal_r4koff();
+-
+-	/* disable interrupts */
+-	UART16550_WRITE(UART_IER, 0);
+-
+-	/* set up baud rate */
+-	{
+-		uint32 divisor;
+-
+-		/* set divisor */
+-		divisor = get_au1x00_uart_baud_base() / baud;
+-		UART16550_WRITE(UART_CLK, divisor & 0xffff);
+-	}
+-
+-	/* set data format */
+-	UART16550_WRITE(UART_LCR, (data | parity | stop));
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_115200,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE,
+-			  UART16550_STOP_1BIT);
+-	}
+-
+-	while((UART16550_READ(UART_LSR) & 0x1) == 0);
+-	return UART16550_READ(UART_RX);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+-//	int i;
+-
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_115200,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE,
+-			  UART16550_STOP_1BIT);
+-	}
+-
+-	while ((UART16550_READ(UART_LSR)&0x40) == 0);
+-	UART16550_WRITE(UART_TX, byte);
+-	//for (i=0;i<0xfff;i++);
+-
+-	return 1;
+-}
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/Makefile linux-2.6.22-try2/arch/mips/basler/excite/Makefile
+--- linux-2.6.22-570/arch/mips/basler/excite/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/basler/excite/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -5,5 +5,4 @@
+ obj-$(CONFIG_BASLER_EXCITE)	+= excite_irq.o excite_prom.o excite_setup.o \
+ 				   excite_device.o excite_procfs.o
+ 
+-obj-$(CONFIG_KGDB)		+= excite_dbg_io.o
+ obj-m				+= excite_iodev.o
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c linux-2.6.22-try2/arch/mips/basler/excite/excite_dbg_io.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/basler/excite/excite_dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-/*
+- *  Copyright (C) 2004 by Basler Vision Technologies AG
+- *  Author: Thomas Koeller <thomas.koeller@baslerweb.com>
+- *
+- *  This program is free software; you can redistribute it and/or modify
+- *  it under the terms of the GNU General Public License as published by
+- *  the Free Software Foundation; either version 2 of the License, or
+- *  (at your option) any later version.
+- *
+- *  This program is distributed in the hope that it will be useful,
+- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- *  GNU General Public License for more details.
+- *
+- *  You should have received a copy of the GNU General Public License
+- *  along with this program; if not, write to the Free Software
+- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+- */
+-
+-#include <linux/linkage.h>
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <asm/gdb-stub.h>
+-#include <asm/rm9k-ocd.h>
+-#include <excite.h>
+-
+-#if defined(CONFIG_SERIAL_8250) && CONFIG_SERIAL_8250_NR_UARTS > 1
+-#error Debug port used by serial driver
+-#endif
+-
+-#define UART_CLK		25000000
+-#define BASE_BAUD		(UART_CLK / 16)
+-#define REGISTER_BASE_0		0x0208UL
+-#define REGISTER_BASE_1		0x0238UL
+-
+-#define REGISTER_BASE_DBG	REGISTER_BASE_1
+-
+-#define CPRR	0x0004
+-#define UACFG	0x0200
+-#define UAINTS	0x0204
+-#define UARBR	(REGISTER_BASE_DBG + 0x0000)
+-#define UATHR	(REGISTER_BASE_DBG + 0x0004)
+-#define UADLL	(REGISTER_BASE_DBG + 0x0008)
+-#define UAIER	(REGISTER_BASE_DBG + 0x000c)
+-#define UADLH	(REGISTER_BASE_DBG + 0x0010)
+-#define UAIIR	(REGISTER_BASE_DBG + 0x0014)
+-#define UAFCR	(REGISTER_BASE_DBG + 0x0018)
+-#define UALCR	(REGISTER_BASE_DBG + 0x001c)
+-#define UAMCR	(REGISTER_BASE_DBG + 0x0020)
+-#define UALSR	(REGISTER_BASE_DBG + 0x0024)
+-#define UAMSR	(REGISTER_BASE_DBG + 0x0028)
+-#define UASCR	(REGISTER_BASE_DBG + 0x002c)
+-
+-#define	PARITY_NONE	0
+-#define	PARITY_ODD	0x08
+-#define	PARITY_EVEN	0x18
+-#define	PARITY_MARK	0x28
+-#define	PARITY_SPACE	0x38
+-
+-#define	DATA_5BIT	0x0
+-#define	DATA_6BIT	0x1
+-#define	DATA_7BIT	0x2
+-#define	DATA_8BIT	0x3
+-
+-#define	STOP_1BIT	0x0
+-#define	STOP_2BIT	0x4
+-
+-#define BAUD_DBG	57600
+-#define	PARITY_DBG	PARITY_NONE
+-#define	DATA_DBG	DATA_8BIT
+-#define	STOP_DBG	STOP_1BIT
+-
+-/* Initialize the serial port for KGDB debugging */
+-void __init excite_kgdb_init(void)
+-{
+-	const u32 divisor = BASE_BAUD / BAUD_DBG;
+-
+-	/* Take the UART out of reset */
+-	titan_writel(0x00ff1cff, CPRR);
+-	titan_writel(0x00000000, UACFG);
+-	titan_writel(0x00000002, UACFG);
+-
+-	titan_writel(0x0, UALCR);
+-	titan_writel(0x0, UAIER);
+-
+-	/* Disable FIFOs */
+-	titan_writel(0x00, UAFCR);
+-
+-	titan_writel(0x80, UALCR);
+-	titan_writel(divisor & 0xff, UADLL);
+-	titan_writel((divisor & 0xff00) >> 8, UADLH);
+-	titan_writel(0x0, UALCR);
+-
+-	titan_writel(DATA_DBG | PARITY_DBG | STOP_DBG, UALCR);
+-
+-	/* Enable receiver interrupt */
+-	titan_readl(UARBR);
+-	titan_writel(0x1, UAIER);
+-}
+-
+-int getDebugChar(void)
+-{
+-	while (!(titan_readl(UALSR) & 0x1));
+-	return titan_readl(UARBR);
+-}
+-
+-int putDebugChar(int data)
+-{
+-	while (!(titan_readl(UALSR) & 0x20));
+-	titan_writel(data, UATHR);
+-	return 1;
+-}
+-
+-/* KGDB interrupt handler */
+-asmlinkage void excite_kgdb_inthdl(void)
+-{
+-	if (unlikely(
+-		((titan_readl(UAIIR) & 0x7) == 4)
+-		&& ((titan_readl(UARBR) & 0xff) == 0x3)))
+-			set_async_breakpoint(&regs->cp0_epc);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c linux-2.6.22-try2/arch/mips/basler/excite/excite_irq.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_irq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/basler/excite/excite_irq.c	2007-12-19 15:29:23.000000000 -0500
+@@ -50,10 +50,6 @@
+ 	mips_cpu_irq_init();
+ 	rm7k_cpu_irq_init();
+ 	rm9k_cpu_irq_init();
+-
+-#ifdef CONFIG_KGDB
+-	excite_kgdb_init();
+-#endif
+ }
+ 
+ asmlinkage void plat_irq_dispatch(void)
+@@ -90,9 +86,6 @@
+ 	msgint	    = msgintflags & msgintmask & (0x1 << (TITAN_MSGINT % 0x20));
+ 	if ((pending & (1 << TITAN_IRQ)) && msgint) {
+ 		ocd_writel(msgint, INTP0Clear0 + (TITAN_MSGINT / 0x20 * 0x10));
+-#if defined(CONFIG_KGDB)
+-		excite_kgdb_inthdl();
+-#endif
+ 		do_IRQ(TITAN_IRQ);
+ 		return;
+ 	}
+diff -Nurb linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c linux-2.6.22-try2/arch/mips/basler/excite/excite_setup.c
+--- linux-2.6.22-570/arch/mips/basler/excite/excite_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/basler/excite/excite_setup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -95,13 +95,13 @@
+ 	/* Take the DUART out of reset */
+ 	titan_writel(0x00ff1cff, CPRR);
+ 
+-#if defined(CONFIG_KGDB) || (CONFIG_SERIAL_8250_NR_UARTS > 1)
++#if (CONFIG_SERIAL_8250_NR_UARTS > 1)
+ 	/* Enable both ports */
+ 	titan_writel(MASK_SER0 | MASK_SER1, UACFG);
+ #else
+ 	/* Enable port #0 only */
+ 	titan_writel(MASK_SER0, UACFG);
+-#endif	/* defined(CONFIG_KGDB) */
++#endif
+ 
+  	/*
+ 	 * Set up serial port #0. Do not use autodetection; the result is
+diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile linux-2.6.22-try2/arch/mips/ddb5xxx/ddb5477/Makefile
+--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/ddb5xxx/ddb5477/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -5,4 +5,3 @@
+ obj-y	 		+= irq.o irq_5477.o setup.o lcd44780.o
+ 
+ obj-$(CONFIG_RUNTIME_DEBUG) 	+= debug.o
+-obj-$(CONFIG_KGDB)		+= kgdb_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c linux-2.6.22-try2/arch/mips/ddb5xxx/ddb5477/kgdb_io.c
+--- linux-2.6.22-570/arch/mips/ddb5xxx/ddb5477/kgdb_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/ddb5xxx/ddb5477/kgdb_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,136 +0,0 @@
+-/*
+- * kgdb io functions for DDB5477.  We use the second serial port (upper one).
+- *
+- * Copyright (C) 2001 MontaVista Software Inc.
+- * Author: jsun@mvista.com or jsun@junsun.net
+- *
+- * This program is free software; you can redistribute  it and/or modify it
+- * under  the terms of  the GNU General  Public License as published by the
+- * Free Software Foundation;  either version 2 of the  License, or (at your
+- * option) any later version.
+- *
+- */
+-
+-/* ======================= CONFIG ======================== */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define         BASE                    0xbfa04240
+-#define         MAX_BAUD                115200
+-
+-/* distance in bytes between two serial registers */
+-#define         REG_OFFSET              8
+-
+-/*
+- * 0 - kgdb does serial init
+- * 1 - kgdb skip serial init
+- */
+-static int remoteDebugInitialized = 0;
+-
+-/*
+- * the default baud rate *if* kgdb does serial init
+- */
+-#define		BAUD_DEFAULT		UART16550_BAUD_38400
+-
+-/* ======================= END OF CONFIG ======================== */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-#define         UART16550_BAUD_2400             2400
+-#define         UART16550_BAUD_4800             4800
+-#define         UART16550_BAUD_9600             9600
+-#define         UART16550_BAUD_19200            19200
+-#define         UART16550_BAUD_38400            38400
+-#define         UART16550_BAUD_57600            57600
+-#define         UART16550_BAUD_115200           115200
+-
+-#define         UART16550_PARITY_NONE           0
+-#define         UART16550_PARITY_ODD            0x08
+-#define         UART16550_PARITY_EVEN           0x18
+-#define         UART16550_PARITY_MARK           0x28
+-#define         UART16550_PARITY_SPACE          0x38
+-
+-#define         UART16550_DATA_5BIT             0x0
+-#define         UART16550_DATA_6BIT             0x1
+-#define         UART16550_DATA_7BIT             0x2
+-#define         UART16550_DATA_8BIT             0x3
+-
+-#define         UART16550_STOP_1BIT             0x0
+-#define         UART16550_STOP_2BIT             0x4
+-
+-/* register offset */
+-#define         OFS_RCV_BUFFER          0
+-#define         OFS_TRANS_HOLD          0
+-#define         OFS_SEND_BUFFER         0
+-#define         OFS_INTR_ENABLE         (1*REG_OFFSET)
+-#define         OFS_INTR_ID             (2*REG_OFFSET)
+-#define         OFS_DATA_FORMAT         (3*REG_OFFSET)
+-#define         OFS_LINE_CONTROL        (3*REG_OFFSET)
+-#define         OFS_MODEM_CONTROL       (4*REG_OFFSET)
+-#define         OFS_RS232_OUTPUT        (4*REG_OFFSET)
+-#define         OFS_LINE_STATUS         (5*REG_OFFSET)
+-#define         OFS_MODEM_STATUS        (6*REG_OFFSET)
+-#define         OFS_RS232_INPUT         (6*REG_OFFSET)
+-#define         OFS_SCRATCH_PAD         (7*REG_OFFSET)
+-
+-#define         OFS_DIVISOR_LSB         (0*REG_OFFSET)
+-#define         OFS_DIVISOR_MSB         (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define         UART16550_READ(y)    (*((volatile uint8*)(BASE + y)))
+-#define         UART16550_WRITE(y, z)  ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+-        /* disable interrupts */
+-        UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+-        /* set up baud rate */
+-        {
+-                uint32 divisor;
+-
+-                /* set DIAB bit */
+-                UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+-                /* set divisor */
+-                divisor = MAX_BAUD / baud;
+-                UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+-                UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+-                /* clear DIAB bit */
+-                UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+-        }
+-
+-        /* set data format */
+-        UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-
+-uint8 getDebugChar(void)
+-{
+-        if (!remoteDebugInitialized) {
+-                remoteDebugInitialized = 1;
+-                debugInit(BAUD_DEFAULT,
+-                          UART16550_DATA_8BIT,
+-                          UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-        }
+-
+-        while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+-        return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+-        if (!remoteDebugInitialized) {
+-                remoteDebugInitialized = 1;
+-                debugInit(BAUD_DEFAULT,
+-                          UART16550_DATA_8BIT,
+-                          UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-        }
+-
+-        while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+-        UART16550_WRITE(OFS_SEND_BUFFER, byte);
+-        return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile linux-2.6.22-try2/arch/mips/gt64120/momenco_ocelot/Makefile
+--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/gt64120/momenco_ocelot/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -3,5 +3,3 @@
+ #
+ 
+ obj-y	 		+= irq.o prom.o reset.o setup.o
+-
+-obj-$(CONFIG_KGDB)	+= dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c linux-2.6.22-try2/arch/mips/gt64120/momenco_ocelot/dbg_io.c
+--- linux-2.6.22-570/arch/mips/gt64120/momenco_ocelot/dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/gt64120/momenco_ocelot/dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/serial.h> /* For the serial port location and base baud */
+-
+-/* --- CONFIG --- */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-/* --- END OF CONFIG --- */
+-
+-#define         UART16550_BAUD_2400             2400
+-#define         UART16550_BAUD_4800             4800
+-#define         UART16550_BAUD_9600             9600
+-#define         UART16550_BAUD_19200            19200
+-#define         UART16550_BAUD_38400            38400
+-#define         UART16550_BAUD_57600            57600
+-#define         UART16550_BAUD_115200           115200
+-
+-#define         UART16550_PARITY_NONE           0
+-#define         UART16550_PARITY_ODD            0x08
+-#define         UART16550_PARITY_EVEN           0x18
+-#define         UART16550_PARITY_MARK           0x28
+-#define         UART16550_PARITY_SPACE          0x38
+-
+-#define         UART16550_DATA_5BIT             0x0
+-#define         UART16550_DATA_6BIT             0x1
+-#define         UART16550_DATA_7BIT             0x2
+-#define         UART16550_DATA_8BIT             0x3
+-
+-#define         UART16550_STOP_1BIT             0x0
+-#define         UART16550_STOP_2BIT             0x4
+-
+-/* ----------------------------------------------------- */
+-
+-/* === CONFIG === */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define         BASE                    OCELOT_SERIAL1_BASE
+-#define         MAX_BAUD                OCELOT_BASE_BAUD
+-
+-/* === END OF CONFIG === */
+-
+-#define         REG_OFFSET              4
+-
+-/* register offset */
+-#define         OFS_RCV_BUFFER          0
+-#define         OFS_TRANS_HOLD          0
+-#define         OFS_SEND_BUFFER         0
+-#define         OFS_INTR_ENABLE         (1*REG_OFFSET)
+-#define         OFS_INTR_ID             (2*REG_OFFSET)
+-#define         OFS_DATA_FORMAT         (3*REG_OFFSET)
+-#define         OFS_LINE_CONTROL        (3*REG_OFFSET)
+-#define         OFS_MODEM_CONTROL       (4*REG_OFFSET)
+-#define         OFS_RS232_OUTPUT        (4*REG_OFFSET)
+-#define         OFS_LINE_STATUS         (5*REG_OFFSET)
+-#define         OFS_MODEM_STATUS        (6*REG_OFFSET)
+-#define         OFS_RS232_INPUT         (6*REG_OFFSET)
+-#define         OFS_SCRATCH_PAD         (7*REG_OFFSET)
+-
+-#define         OFS_DIVISOR_LSB         (0*REG_OFFSET)
+-#define         OFS_DIVISOR_MSB         (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define         UART16550_READ(y)    (*((volatile uint8*)(BASE + y)))
+-#define         UART16550_WRITE(y, z)  ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+-	/* disable interrupts */
+-	UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+-	/* set up baud rate */
+-	{
+-		uint32 divisor;
+-
+-		/* set DIAB bit */
+-		UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+-		/* set divisor */
+-		divisor = MAX_BAUD / baud;
+-		UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+-		UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+-		/* clear DIAB bit */
+-		UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+-	}
+-
+-	/* set data format */
+-	UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_38400,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-	}
+-
+-	while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+-	return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_38400,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-	}
+-
+-	while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+-	UART16550_WRITE(OFS_SEND_BUFFER, byte);
+-	return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile linux-2.6.22-try2/arch/mips/jmr3927/rbhma3100/Makefile
+--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/jmr3927/rbhma3100/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -3,4 +3,3 @@
+ #
+ 
+ obj-y	 			+= init.o irq.o setup.o
+-obj-$(CONFIG_KGDB)		+= kgdb_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c linux-2.6.22-try2/arch/mips/jmr3927/rbhma3100/kgdb_io.c
+--- linux-2.6.22-570/arch/mips/jmr3927/rbhma3100/kgdb_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/jmr3927/rbhma3100/kgdb_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,105 +0,0 @@
+-/*
+- * BRIEF MODULE DESCRIPTION
+- *	Low level uart routines to directly access a TX[34]927 SIO.
+- *
+- * Copyright 2001 MontaVista Software Inc.
+- * Author: MontaVista Software, Inc.
+- *         	ahennessy@mvista.com or source@mvista.com
+- *
+- * Based on arch/mips/ddb5xxx/ddb5477/kgdb_io.c
+- *
+- * Copyright (C) 2000-2001 Toshiba Corporation
+- *
+- *  This program is free software; you can redistribute  it and/or modify it
+- *  under  the terms of  the GNU General  Public License as published by the
+- *  Free Software Foundation;  either version 2 of the  License, or (at your
+- *  option) any later version.
+- *
+- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- *  You should have received a copy of the  GNU General Public License along
+- *  with this program; if not, write  to the Free Software Foundation, Inc.,
+- *  675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#include <asm/jmr3927/jmr3927.h>
+-
+-#define TIMEOUT       0xffffff
+-
+-static int remoteDebugInitialized = 0;
+-static void debugInit(int baud);
+-
+-int putDebugChar(unsigned char c)
+-{
+-        int i = 0;
+-
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(38400);
+-	}
+-
+-        do {
+-            slow_down();
+-            i++;
+-            if (i>TIMEOUT) {
+-                break;
+-            }
+-        } while (!(tx3927_sioptr(0)->cisr & TXx927_SICISR_TXALS));
+-	tx3927_sioptr(0)->tfifo = c;
+-
+-	return 1;
+-}
+-
+-unsigned char getDebugChar(void)
+-{
+-        int i = 0;
+-	int dicr;
+-	char c;
+-
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(38400);
+-	}
+-
+-	/* diable RX int. */
+-	dicr = tx3927_sioptr(0)->dicr;
+-	tx3927_sioptr(0)->dicr = 0;
+-
+-        do {
+-            slow_down();
+-            i++;
+-            if (i>TIMEOUT) {
+-                break;
+-            }
+-        } while (tx3927_sioptr(0)->disr & TXx927_SIDISR_UVALID)
+-		;
+-	c = tx3927_sioptr(0)->rfifo;
+-
+-	/* clear RX int. status */
+-	tx3927_sioptr(0)->disr &= ~TXx927_SIDISR_RDIS;
+-	/* enable RX int. */
+-	tx3927_sioptr(0)->dicr = dicr;
+-
+-	return c;
+-}
+-
+-static void debugInit(int baud)
+-{
+-	tx3927_sioptr(0)->lcr = 0x020;
+-	tx3927_sioptr(0)->dicr = 0;
+-	tx3927_sioptr(0)->disr = 0x4100;
+-	tx3927_sioptr(0)->cisr = 0x014;
+-	tx3927_sioptr(0)->fcr = 0;
+-	tx3927_sioptr(0)->flcr = 0x02;
+-	tx3927_sioptr(0)->bgr = ((JMR3927_BASE_BAUD + baud / 2) / baud) |
+-		TXx927_SIBGR_BCLK_T0;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/Makefile linux-2.6.22-try2/arch/mips/kernel/Makefile
+--- linux-2.6.22-570/arch/mips/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/kernel/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -57,7 +57,8 @@
+ obj-$(CONFIG_MIPS32_N32)	+= binfmt_elfn32.o scall64-n32.o signal_n32.o
+ obj-$(CONFIG_MIPS32_O32)	+= binfmt_elfo32.o scall64-o32.o
+ 
+-obj-$(CONFIG_KGDB)		+= gdb-low.o gdb-stub.o
++obj-$(CONFIG_KGDB)		+= kgdb_handler.o kgdb.o kgdb-jmp.o	\
++					kgdb-setjmp.o
+ obj-$(CONFIG_PROC_FS)		+= proc.o
+ 
+ obj-$(CONFIG_64BIT)		+= cpu-bugs64.o
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/cpu-probe.c linux-2.6.22-try2/arch/mips/kernel/cpu-probe.c
+--- linux-2.6.22-570/arch/mips/kernel/cpu-probe.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/kernel/cpu-probe.c	2007-12-19 15:28:56.000000000 -0500
+@@ -177,6 +177,17 @@
+ 
+ 		cpu_wait = r4k_wait;
+ 		break;
++	case CPU_20KC:
++		/*
++		 * WAIT on Rev1.0 has E1, E2, E3 and E16.
++		 * WAIT on Rev2.0 and Rev3.0 has E16.
++		 * Rev3.1 WAIT is nop, why bother
++		 */
++		if ((c->processor_id & 0xff) <= 0x64)
++			break;
++
++		cpu_wait = r4k_wait;
++		break;
+ 	case CPU_RM9000:
+ 		if ((c->processor_id & 0x00ff) >= 0x40)
+ 			cpu_wait = r4k_wait;
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-low.S linux-2.6.22-try2/arch/mips/kernel/gdb-low.S
+--- linux-2.6.22-570/arch/mips/kernel/gdb-low.S	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/kernel/gdb-low.S	1969-12-31 19:00:00.000000000 -0500
+@@ -1,394 +0,0 @@
+-/*
+- * gdb-low.S contains the low-level trap handler for the GDB stub.
+- *
+- * Copyright (C) 1995 Andreas Busse
+- */
+-#include <linux/sys.h>
+-
+-#include <asm/asm.h>
+-#include <asm/errno.h>
+-#include <asm/irqflags.h>
+-#include <asm/mipsregs.h>
+-#include <asm/regdef.h>
+-#include <asm/stackframe.h>
+-#include <asm/gdb-stub.h>
+-
+-#ifdef CONFIG_32BIT
+-#define DMFC0	mfc0
+-#define DMTC0	mtc0
+-#define LDC1	lwc1
+-#define SDC1	lwc1
+-#endif
+-#ifdef CONFIG_64BIT
+-#define DMFC0	dmfc0
+-#define DMTC0	dmtc0
+-#define LDC1	ldc1
+-#define SDC1	ldc1
+-#endif
+-
+-/*
+- * [jsun] We reserves about 2x GDB_FR_SIZE in stack.  The lower (addressed)
+- * part is used to store registers and passed to exception handler.
+- * The upper part is reserved for "call func" feature where gdb client
+- * saves some of the regs, setups call frame and passes args.
+- *
+- * A trace shows about 200 bytes are used to store about half of all regs.
+- * The rest should be big enough for frame setup and passing args.
+- */
+-
+-/*
+- * The low level trap handler
+- */
+-		.align 	5
+-		NESTED(trap_low, GDB_FR_SIZE, sp)
+-		.set	noat
+-		.set 	noreorder
+-
+-		mfc0	k0, CP0_STATUS
+-		sll	k0, 3     		/* extract cu0 bit */
+-		bltz	k0, 1f
+-		move	k1, sp
+-
+-		/*
+-		 * Called from user mode, go somewhere else.
+-		 */
+-		mfc0	k0, CP0_CAUSE
+-		andi	k0, k0, 0x7c
+-#ifdef CONFIG_64BIT
+-		dsll	k0, k0, 1
+-#endif
+-		PTR_L	k1, saved_vectors(k0)
+-		jr	k1
+-		nop
+-1:
+-		move	k0, sp
+-		PTR_SUBU sp, k1, GDB_FR_SIZE*2	# see comment above
+-		LONG_S	k0, GDB_FR_REG29(sp)
+-		LONG_S	$2, GDB_FR_REG2(sp)
+-
+-/*
+- * First save the CP0 and special registers
+- */
+-
+-		mfc0	v0, CP0_STATUS
+-		LONG_S	v0, GDB_FR_STATUS(sp)
+-		mfc0	v0, CP0_CAUSE
+-		LONG_S	v0, GDB_FR_CAUSE(sp)
+-		DMFC0	v0, CP0_EPC
+-		LONG_S	v0, GDB_FR_EPC(sp)
+-		DMFC0	v0, CP0_BADVADDR
+-		LONG_S	v0, GDB_FR_BADVADDR(sp)
+-		mfhi	v0
+-		LONG_S	v0, GDB_FR_HI(sp)
+-		mflo	v0
+-		LONG_S	v0, GDB_FR_LO(sp)
+-
+-/*
+- * Now the integer registers
+- */
+-
+-		LONG_S	zero, GDB_FR_REG0(sp)		/* I know... */
+-		LONG_S	$1, GDB_FR_REG1(sp)
+-		/* v0 already saved */
+-		LONG_S	$3, GDB_FR_REG3(sp)
+-		LONG_S	$4, GDB_FR_REG4(sp)
+-		LONG_S	$5, GDB_FR_REG5(sp)
+-		LONG_S	$6, GDB_FR_REG6(sp)
+-		LONG_S	$7, GDB_FR_REG7(sp)
+-		LONG_S	$8, GDB_FR_REG8(sp)
+-		LONG_S	$9, GDB_FR_REG9(sp)
+-		LONG_S	$10, GDB_FR_REG10(sp)
+-		LONG_S	$11, GDB_FR_REG11(sp)
+-		LONG_S	$12, GDB_FR_REG12(sp)
+-		LONG_S	$13, GDB_FR_REG13(sp)
+-		LONG_S	$14, GDB_FR_REG14(sp)
+-		LONG_S	$15, GDB_FR_REG15(sp)
+-		LONG_S	$16, GDB_FR_REG16(sp)
+-		LONG_S	$17, GDB_FR_REG17(sp)
+-		LONG_S	$18, GDB_FR_REG18(sp)
+-		LONG_S	$19, GDB_FR_REG19(sp)
+-		LONG_S	$20, GDB_FR_REG20(sp)
+-		LONG_S	$21, GDB_FR_REG21(sp)
+-		LONG_S	$22, GDB_FR_REG22(sp)
+-		LONG_S	$23, GDB_FR_REG23(sp)
+-		LONG_S	$24, GDB_FR_REG24(sp)
+-		LONG_S	$25, GDB_FR_REG25(sp)
+-		LONG_S	$26, GDB_FR_REG26(sp)
+-		LONG_S	$27, GDB_FR_REG27(sp)
+-		LONG_S	$28, GDB_FR_REG28(sp)
+-		/* sp already saved */
+-		LONG_S	$30, GDB_FR_REG30(sp)
+-		LONG_S	$31, GDB_FR_REG31(sp)
+-
+-		CLI				/* disable interrupts */
+-		TRACE_IRQS_OFF
+-
+-/*
+- * Followed by the floating point registers
+- */
+-		mfc0	v0, CP0_STATUS		/* FPU enabled? */
+-		srl	v0, v0, 16
+-		andi	v0, v0, (ST0_CU1 >> 16)
+-
+-		beqz	v0,2f			/* disabled, skip */
+-		 nop
+-
+-		SDC1	$0, GDB_FR_FPR0(sp)
+-		SDC1	$1, GDB_FR_FPR1(sp)
+-		SDC1	$2, GDB_FR_FPR2(sp)
+-		SDC1	$3, GDB_FR_FPR3(sp)
+-		SDC1	$4, GDB_FR_FPR4(sp)
+-		SDC1	$5, GDB_FR_FPR5(sp)
+-		SDC1	$6, GDB_FR_FPR6(sp)
+-		SDC1	$7, GDB_FR_FPR7(sp)
+-		SDC1	$8, GDB_FR_FPR8(sp)
+-		SDC1	$9, GDB_FR_FPR9(sp)
+-		SDC1	$10, GDB_FR_FPR10(sp)
+-		SDC1	$11, GDB_FR_FPR11(sp)
+-		SDC1	$12, GDB_FR_FPR12(sp)
+-		SDC1	$13, GDB_FR_FPR13(sp)
+-		SDC1	$14, GDB_FR_FPR14(sp)
+-		SDC1	$15, GDB_FR_FPR15(sp)
+-		SDC1	$16, GDB_FR_FPR16(sp)
+-		SDC1	$17, GDB_FR_FPR17(sp)
+-		SDC1	$18, GDB_FR_FPR18(sp)
+-		SDC1	$19, GDB_FR_FPR19(sp)
+-		SDC1	$20, GDB_FR_FPR20(sp)
+-		SDC1	$21, GDB_FR_FPR21(sp)
+-		SDC1	$22, GDB_FR_FPR22(sp)
+-		SDC1	$23, GDB_FR_FPR23(sp)
+-		SDC1	$24, GDB_FR_FPR24(sp)
+-		SDC1	$25, GDB_FR_FPR25(sp)
+-		SDC1	$26, GDB_FR_FPR26(sp)
+-		SDC1	$27, GDB_FR_FPR27(sp)
+-		SDC1	$28, GDB_FR_FPR28(sp)
+-		SDC1	$29, GDB_FR_FPR29(sp)
+-		SDC1	$30, GDB_FR_FPR30(sp)
+-		SDC1	$31, GDB_FR_FPR31(sp)
+-
+-/*
+- * FPU control registers
+- */
+-
+-		cfc1	v0, CP1_STATUS
+-		LONG_S	v0, GDB_FR_FSR(sp)
+-		cfc1	v0, CP1_REVISION
+-		LONG_S	v0, GDB_FR_FIR(sp)
+-
+-/*
+- * Current stack frame ptr
+- */
+-
+-2:
+-		LONG_S	sp, GDB_FR_FRP(sp)
+-
+-/*
+- * CP0 registers (R4000/R4400 unused registers skipped)
+- */
+-
+-		mfc0	v0, CP0_INDEX
+-		LONG_S	v0, GDB_FR_CP0_INDEX(sp)
+-		mfc0	v0, CP0_RANDOM
+-		LONG_S	v0, GDB_FR_CP0_RANDOM(sp)
+-		DMFC0	v0, CP0_ENTRYLO0
+-		LONG_S	v0, GDB_FR_CP0_ENTRYLO0(sp)
+-		DMFC0	v0, CP0_ENTRYLO1
+-		LONG_S	v0, GDB_FR_CP0_ENTRYLO1(sp)
+-		DMFC0	v0, CP0_CONTEXT
+-		LONG_S	v0, GDB_FR_CP0_CONTEXT(sp)
+-		mfc0	v0, CP0_PAGEMASK
+-		LONG_S	v0, GDB_FR_CP0_PAGEMASK(sp)
+-		mfc0	v0, CP0_WIRED
+-		LONG_S	v0, GDB_FR_CP0_WIRED(sp)
+-		DMFC0	v0, CP0_ENTRYHI
+-		LONG_S	v0, GDB_FR_CP0_ENTRYHI(sp)
+-		mfc0	v0, CP0_PRID
+-		LONG_S	v0, GDB_FR_CP0_PRID(sp)
+-
+-		.set	at
+-
+-/*
+- * Continue with the higher level handler
+- */
+-
+-		move	a0,sp
+-
+-		jal	handle_exception
+-		 nop
+-
+-/*
+- * Restore all writable registers, in reverse order
+- */
+-
+-		.set	noat
+-
+-		LONG_L	v0, GDB_FR_CP0_ENTRYHI(sp)
+-		LONG_L	v1, GDB_FR_CP0_WIRED(sp)
+-		DMTC0	v0, CP0_ENTRYHI
+-		mtc0	v1, CP0_WIRED
+-		LONG_L	v0, GDB_FR_CP0_PAGEMASK(sp)
+-		LONG_L	v1, GDB_FR_CP0_ENTRYLO1(sp)
+-		mtc0	v0, CP0_PAGEMASK
+-		DMTC0	v1, CP0_ENTRYLO1
+-		LONG_L	v0, GDB_FR_CP0_ENTRYLO0(sp)
+-		LONG_L	v1, GDB_FR_CP0_INDEX(sp)
+-		DMTC0	v0, CP0_ENTRYLO0
+-		LONG_L	v0, GDB_FR_CP0_CONTEXT(sp)
+-		mtc0	v1, CP0_INDEX
+-		DMTC0	v0, CP0_CONTEXT
+-
+-
+-/*
+- * Next, the floating point registers
+- */
+-		mfc0	v0, CP0_STATUS		/* check if the FPU is enabled */
+-		srl	v0, v0, 16
+-		andi	v0, v0, (ST0_CU1 >> 16)
+-
+-		beqz	v0, 3f			/* disabled, skip */
+-		 nop
+-
+-		LDC1	$31, GDB_FR_FPR31(sp)
+-		LDC1	$30, GDB_FR_FPR30(sp)
+-		LDC1	$29, GDB_FR_FPR29(sp)
+-		LDC1	$28, GDB_FR_FPR28(sp)
+-		LDC1	$27, GDB_FR_FPR27(sp)
+-		LDC1	$26, GDB_FR_FPR26(sp)
+-		LDC1	$25, GDB_FR_FPR25(sp)
+-		LDC1	$24, GDB_FR_FPR24(sp)
+-		LDC1	$23, GDB_FR_FPR23(sp)
+-		LDC1	$22, GDB_FR_FPR22(sp)
+-		LDC1	$21, GDB_FR_FPR21(sp)
+-		LDC1	$20, GDB_FR_FPR20(sp)
+-		LDC1	$19, GDB_FR_FPR19(sp)
+-		LDC1	$18, GDB_FR_FPR18(sp)
+-		LDC1	$17, GDB_FR_FPR17(sp)
+-		LDC1	$16, GDB_FR_FPR16(sp)
+-		LDC1	$15, GDB_FR_FPR15(sp)
+-		LDC1	$14, GDB_FR_FPR14(sp)
+-		LDC1	$13, GDB_FR_FPR13(sp)
+-		LDC1	$12, GDB_FR_FPR12(sp)
+-		LDC1	$11, GDB_FR_FPR11(sp)
+-		LDC1	$10, GDB_FR_FPR10(sp)
+-		LDC1	$9, GDB_FR_FPR9(sp)
+-		LDC1	$8, GDB_FR_FPR8(sp)
+-		LDC1	$7, GDB_FR_FPR7(sp)
+-		LDC1	$6, GDB_FR_FPR6(sp)
+-		LDC1	$5, GDB_FR_FPR5(sp)
+-		LDC1	$4, GDB_FR_FPR4(sp)
+-		LDC1	$3, GDB_FR_FPR3(sp)
+-		LDC1	$2, GDB_FR_FPR2(sp)
+-		LDC1	$1, GDB_FR_FPR1(sp)
+-		LDC1	$0, GDB_FR_FPR0(sp)
+-
+-/*
+- * Now the CP0 and integer registers
+- */
+-
+-3:
+-#ifdef CONFIG_MIPS_MT_SMTC
+-		/* Read-modify write of Status must be atomic */
+-		mfc0	t2, CP0_TCSTATUS
+-		ori	t1, t2, TCSTATUS_IXMT
+-		mtc0	t1, CP0_TCSTATUS
+-		andi	t2, t2, TCSTATUS_IXMT
+-		_ehb
+-		DMT	9				# dmt	t1
+-		jal	mips_ihb
+-		nop
+-#endif /* CONFIG_MIPS_MT_SMTC */
+-		mfc0	t0, CP0_STATUS
+-		ori	t0, 0x1f
+-		xori	t0, 0x1f
+-		mtc0	t0, CP0_STATUS
+-#ifdef CONFIG_MIPS_MT_SMTC
+-        	andi    t1, t1, VPECONTROL_TE
+-        	beqz    t1, 9f
+-		nop
+-        	EMT					# emt
+-9:
+-		mfc0	t1, CP0_TCSTATUS
+-		xori	t1, t1, TCSTATUS_IXMT
+-		or	t1, t1, t2
+-		mtc0	t1, CP0_TCSTATUS
+-		_ehb
+-#endif /* CONFIG_MIPS_MT_SMTC */
+-		LONG_L	v0, GDB_FR_STATUS(sp)
+-		LONG_L	v1, GDB_FR_EPC(sp)
+-		mtc0	v0, CP0_STATUS
+-		DMTC0	v1, CP0_EPC
+-		LONG_L	v0, GDB_FR_HI(sp)
+-		LONG_L	v1, GDB_FR_LO(sp)
+-		mthi	v0
+-		mtlo	v1
+-		LONG_L	$31, GDB_FR_REG31(sp)
+-		LONG_L	$30, GDB_FR_REG30(sp)
+-		LONG_L	$28, GDB_FR_REG28(sp)
+-		LONG_L	$27, GDB_FR_REG27(sp)
+-		LONG_L	$26, GDB_FR_REG26(sp)
+-		LONG_L	$25, GDB_FR_REG25(sp)
+-		LONG_L	$24, GDB_FR_REG24(sp)
+-		LONG_L	$23, GDB_FR_REG23(sp)
+-		LONG_L	$22, GDB_FR_REG22(sp)
+-		LONG_L	$21, GDB_FR_REG21(sp)
+-		LONG_L	$20, GDB_FR_REG20(sp)
+-		LONG_L	$19, GDB_FR_REG19(sp)
+-		LONG_L	$18, GDB_FR_REG18(sp)
+-		LONG_L	$17, GDB_FR_REG17(sp)
+-		LONG_L	$16, GDB_FR_REG16(sp)
+-		LONG_L	$15, GDB_FR_REG15(sp)
+-		LONG_L	$14, GDB_FR_REG14(sp)
+-		LONG_L	$13, GDB_FR_REG13(sp)
+-		LONG_L	$12, GDB_FR_REG12(sp)
+-		LONG_L	$11, GDB_FR_REG11(sp)
+-		LONG_L	$10, GDB_FR_REG10(sp)
+-		LONG_L	$9, GDB_FR_REG9(sp)
+-		LONG_L	$8, GDB_FR_REG8(sp)
+-		LONG_L	$7, GDB_FR_REG7(sp)
+-		LONG_L	$6, GDB_FR_REG6(sp)
+-		LONG_L	$5, GDB_FR_REG5(sp)
+-		LONG_L	$4, GDB_FR_REG4(sp)
+-		LONG_L	$3, GDB_FR_REG3(sp)
+-		LONG_L	$2, GDB_FR_REG2(sp)
+-		LONG_L	$1, GDB_FR_REG1(sp)
+-#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
+-		LONG_L	k0, GDB_FR_EPC(sp)
+-		LONG_L	$29, GDB_FR_REG29(sp)		/* Deallocate stack */
+-		jr	k0
+-		rfe
+-#else
+-		LONG_L	sp, GDB_FR_REG29(sp)		/* Deallocate stack */
+-
+-		.set	mips3
+-		eret
+-		.set	mips0
+-#endif
+-		.set	at
+-		.set	reorder
+-		END(trap_low)
+-
+-LEAF(kgdb_read_byte)
+-4:		lb	t0, (a0)
+-		sb	t0, (a1)
+-		li	v0, 0
+-		jr	ra
+-		.section __ex_table,"a"
+-		PTR	4b, kgdbfault
+-		.previous
+-		END(kgdb_read_byte)
+-
+-LEAF(kgdb_write_byte)
+-5:		sb	a0, (a1)
+-		li	v0, 0
+-		jr	ra
+-		.section __ex_table,"a"
+-		PTR	5b, kgdbfault
+-		.previous
+-		END(kgdb_write_byte)
+-
+-		.type	kgdbfault@function
+-		.ent	kgdbfault
+-
+-kgdbfault:	li	v0, -EFAULT
+-		jr	ra
+-		.end	kgdbfault
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/gdb-stub.c linux-2.6.22-try2/arch/mips/kernel/gdb-stub.c
+--- linux-2.6.22-570/arch/mips/kernel/gdb-stub.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/kernel/gdb-stub.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1154 +0,0 @@
+-/*
+- *  arch/mips/kernel/gdb-stub.c
+- *
+- *  Originally written by Glenn Engel, Lake Stevens Instrument Division
+- *
+- *  Contributed by HP Systems
+- *
+- *  Modified for SPARC by Stu Grossman, Cygnus Support.
+- *
+- *  Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
+- *  Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
+- *
+- *  Copyright (C) 1995 Andreas Busse
+- *
+- *  Copyright (C) 2003 MontaVista Software Inc.
+- *  Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
+- */
+-
+-/*
+- *  To enable debugger support, two things need to happen.  One, a
+- *  call to set_debug_traps() is necessary in order to allow any breakpoints
+- *  or error conditions to be properly intercepted and reported to gdb.
+- *  Two, a breakpoint needs to be generated to begin communication.  This
+- *  is most easily accomplished by a call to breakpoint().  Breakpoint()
+- *  simulates a breakpoint by executing a BREAK instruction.
+- *
+- *
+- *    The following gdb commands are supported:
+- *
+- * command          function                               Return value
+- *
+- *    g             return the value of the CPU registers  hex data or ENN
+- *    G             set the value of the CPU registers     OK or ENN
+- *
+- *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+- *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+- *
+- *    c             Resume at current address              SNN   ( signal NN)
+- *    cAA..AA       Continue at address AA..AA             SNN
+- *
+- *    s             Step one instruction                   SNN
+- *    sAA..AA       Step one instruction from AA..AA       SNN
+- *
+- *    k             kill
+- *
+- *    ?             What was the last sigval ?             SNN   (signal NN)
+- *
+- *    bBB..BB	    Set baud rate to BB..BB		   OK or BNN, then sets
+- *							   baud rate
+- *
+- * All commands and responses are sent with a packet which includes a
+- * checksum.  A packet consists of
+- *
+- * $<packet info>#<checksum>.
+- *
+- * where
+- * <packet info> :: <characters representing the command or response>
+- * <checksum>    :: < two hex digits computed as modulo 256 sum of <packetinfo>>
+- *
+- * When a packet is received, it is first acknowledged with either '+' or '-'.
+- * '+' indicates a successful transfer.  '-' indicates a failed transfer.
+- *
+- * Example:
+- *
+- * Host:                  Reply:
+- * $m0,10#2a               +$00010203040506070809101112131415#42
+- *
+- *
+- *  ==============
+- *  MORE EXAMPLES:
+- *  ==============
+- *
+- *  For reference -- the following are the steps that one
+- *  company took (RidgeRun Inc) to get remote gdb debugging
+- *  going. In this scenario the host machine was a PC and the
+- *  target platform was a Galileo EVB64120A MIPS evaluation
+- *  board.
+- *
+- *  Step 1:
+- *  First download gdb-5.0.tar.gz from the internet.
+- *  and then build/install the package.
+- *
+- *  Example:
+- *    $ tar zxf gdb-5.0.tar.gz
+- *    $ cd gdb-5.0
+- *    $ ./configure --target=mips-linux-elf
+- *    $ make
+- *    $ install
+- *    $ which mips-linux-elf-gdb
+- *    /usr/local/bin/mips-linux-elf-gdb
+- *
+- *  Step 2:
+- *  Configure linux for remote debugging and build it.
+- *
+- *  Example:
+- *    $ cd ~/linux
+- *    $ make menuconfig <go to "Kernel Hacking" and turn on remote debugging>
+- *    $ make
+- *
+- *  Step 3:
+- *  Download the kernel to the remote target and start
+- *  the kernel running. It will promptly halt and wait
+- *  for the host gdb session to connect. It does this
+- *  since the "Kernel Hacking" option has defined
+- *  CONFIG_KGDB which in turn enables your calls
+- *  to:
+- *     set_debug_traps();
+- *     breakpoint();
+- *
+- *  Step 4:
+- *  Start the gdb session on the host.
+- *
+- *  Example:
+- *    $ mips-linux-elf-gdb vmlinux
+- *    (gdb) set remotebaud 115200
+- *    (gdb) target remote /dev/ttyS1
+- *    ...at this point you are connected to
+- *       the remote target and can use gdb
+- *       in the normal fasion. Setting
+- *       breakpoints, single stepping,
+- *       printing variables, etc.
+- */
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/signal.h>
+-#include <linux/sched.h>
+-#include <linux/mm.h>
+-#include <linux/console.h>
+-#include <linux/init.h>
+-#include <linux/smp.h>
+-#include <linux/spinlock.h>
+-#include <linux/slab.h>
+-#include <linux/reboot.h>
+-
+-#include <asm/asm.h>
+-#include <asm/cacheflush.h>
+-#include <asm/mipsregs.h>
+-#include <asm/pgtable.h>
+-#include <asm/system.h>
+-#include <asm/gdb-stub.h>
+-#include <asm/inst.h>
+-#include <asm/smp.h>
+-
+-/*
+- * external low-level support routines
+- */
+-
+-extern int putDebugChar(char c);    /* write a single character      */
+-extern char getDebugChar(void);     /* read and return a single char */
+-extern void trap_low(void);
+-
+-/*
+- * breakpoint and test functions
+- */
+-extern void breakpoint(void);
+-extern void breakinst(void);
+-extern void async_breakpoint(void);
+-extern void async_breakinst(void);
+-extern void adel(void);
+-
+-/*
+- * local prototypes
+- */
+-
+-static void getpacket(char *buffer);
+-static void putpacket(char *buffer);
+-static int computeSignal(int tt);
+-static int hex(unsigned char ch);
+-static int hexToInt(char **ptr, int *intValue);
+-static int hexToLong(char **ptr, long *longValue);
+-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault);
+-void handle_exception(struct gdb_regs *regs);
+-
+-int kgdb_enabled;
+-
+-/*
+- * spin locks for smp case
+- */
+-static DEFINE_SPINLOCK(kgdb_lock);
+-static raw_spinlock_t kgdb_cpulock[NR_CPUS] = {
+-	[0 ... NR_CPUS-1] = __RAW_SPIN_LOCK_UNLOCKED,
+-};
+-
+-/*
+- * BUFMAX defines the maximum number of characters in inbound/outbound buffers
+- * at least NUMREGBYTES*2 are needed for register packets
+- */
+-#define BUFMAX 2048
+-
+-static char input_buffer[BUFMAX];
+-static char output_buffer[BUFMAX];
+-static int initialized;	/* !0 means we've been initialized */
+-static int kgdb_started;
+-static const char hexchars[]="0123456789abcdef";
+-
+-/* Used to prevent crashes in memory access.  Note that they'll crash anyway if
+-   we haven't set up fault handlers yet... */
+-int kgdb_read_byte(unsigned char *address, unsigned char *dest);
+-int kgdb_write_byte(unsigned char val, unsigned char *dest);
+-
+-/*
+- * Convert ch from a hex digit to an int
+- */
+-static int hex(unsigned char ch)
+-{
+-	if (ch >= 'a' && ch <= 'f')
+-		return ch-'a'+10;
+-	if (ch >= '0' && ch <= '9')
+-		return ch-'0';
+-	if (ch >= 'A' && ch <= 'F')
+-		return ch-'A'+10;
+-	return -1;
+-}
+-
+-/*
+- * scan for the sequence $<data>#<checksum>
+- */
+-static void getpacket(char *buffer)
+-{
+-	unsigned char checksum;
+-	unsigned char xmitcsum;
+-	int i;
+-	int count;
+-	unsigned char ch;
+-
+-	do {
+-		/*
+-		 * wait around for the start character,
+-		 * ignore all other characters
+-		 */
+-		while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+-
+-		checksum = 0;
+-		xmitcsum = -1;
+-		count = 0;
+-
+-		/*
+-		 * now, read until a # or end of buffer is found
+-		 */
+-		while (count < BUFMAX) {
+-			ch = getDebugChar();
+-			if (ch == '#')
+-				break;
+-			checksum = checksum + ch;
+-			buffer[count] = ch;
+-			count = count + 1;
+-		}
+-
+-		if (count >= BUFMAX)
+-			continue;
+-
+-		buffer[count] = 0;
+-
+-		if (ch == '#') {
+-			xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+-			xmitcsum |= hex(getDebugChar() & 0x7f);
+-
+-			if (checksum != xmitcsum)
+-				putDebugChar('-');	/* failed checksum */
+-			else {
+-				putDebugChar('+'); /* successful transfer */
+-
+-				/*
+-				 * if a sequence char is present,
+-				 * reply the sequence ID
+-				 */
+-				if (buffer[2] == ':') {
+-					putDebugChar(buffer[0]);
+-					putDebugChar(buffer[1]);
+-
+-					/*
+-					 * remove sequence chars from buffer
+-					 */
+-					count = strlen(buffer);
+-					for (i=3; i <= count; i++)
+-						buffer[i-3] = buffer[i];
+-				}
+-			}
+-		}
+-	}
+-	while (checksum != xmitcsum);
+-}
+-
+-/*
+- * send the packet in buffer.
+- */
+-static void putpacket(char *buffer)
+-{
+-	unsigned char checksum;
+-	int count;
+-	unsigned char ch;
+-
+-	/*
+-	 * $<packet info>#<checksum>.
+-	 */
+-
+-	do {
+-		putDebugChar('$');
+-		checksum = 0;
+-		count = 0;
+-
+-		while ((ch = buffer[count]) != 0) {
+-			if (!(putDebugChar(ch)))
+-				return;
+-			checksum += ch;
+-			count += 1;
+-		}
+-
+-		putDebugChar('#');
+-		putDebugChar(hexchars[checksum >> 4]);
+-		putDebugChar(hexchars[checksum & 0xf]);
+-
+-	}
+-	while ((getDebugChar() & 0x7f) != '+');
+-}
+-
+-
+-/*
+- * Convert the memory pointed to by mem into hex, placing result in buf.
+- * Return a pointer to the last char put in buf (null), in case of mem fault,
+- * return 0.
+- * may_fault is non-zero if we are reading from arbitrary memory, but is currently
+- * not used.
+- */
+-static unsigned char *mem2hex(char *mem, char *buf, int count, int may_fault)
+-{
+-	unsigned char ch;
+-
+-	while (count-- > 0) {
+-		if (kgdb_read_byte(mem++, &ch) != 0)
+-			return 0;
+-		*buf++ = hexchars[ch >> 4];
+-		*buf++ = hexchars[ch & 0xf];
+-	}
+-
+-	*buf = 0;
+-
+-	return buf;
+-}
+-
+-/*
+- * convert the hex array pointed to by buf into binary to be placed in mem
+- * return a pointer to the character AFTER the last byte written
+- * may_fault is non-zero if we are reading from arbitrary memory, but is currently
+- * not used.
+- */
+-static char *hex2mem(char *buf, char *mem, int count, int binary, int may_fault)
+-{
+-	int i;
+-	unsigned char ch;
+-
+-	for (i=0; i<count; i++)
+-	{
+-		if (binary) {
+-			ch = *buf++;
+-			if (ch == 0x7d)
+-				ch = 0x20 ^ *buf++;
+-		}
+-		else {
+-			ch = hex(*buf++) << 4;
+-			ch |= hex(*buf++);
+-		}
+-		if (kgdb_write_byte(ch, mem++) != 0)
+-			return 0;
+-	}
+-
+-	return mem;
+-}
+-
+-/*
+- * This table contains the mapping between SPARC hardware trap types, and
+- * signals, which are primarily what GDB understands.  It also indicates
+- * which hardware traps we need to commandeer when initializing the stub.
+- */
+-static struct hard_trap_info {
+-	unsigned char tt;		/* Trap type code for MIPS R3xxx and R4xxx */
+-	unsigned char signo;		/* Signal that we map this trap into */
+-} hard_trap_info[] = {
+-	{ 6, SIGBUS },			/* instruction bus error */
+-	{ 7, SIGBUS },			/* data bus error */
+-	{ 9, SIGTRAP },			/* break */
+-	{ 10, SIGILL },			/* reserved instruction */
+-/*	{ 11, SIGILL },		*/	/* CPU unusable */
+-	{ 12, SIGFPE },			/* overflow */
+-	{ 13, SIGTRAP },		/* trap */
+-	{ 14, SIGSEGV },		/* virtual instruction cache coherency */
+-	{ 15, SIGFPE },			/* floating point exception */
+-	{ 23, SIGSEGV },		/* watch */
+-	{ 31, SIGSEGV },		/* virtual data cache coherency */
+-	{ 0, 0}				/* Must be last */
+-};
+-
+-/* Save the normal trap handlers for user-mode traps. */
+-void *saved_vectors[32];
+-
+-/*
+- * Set up exception handlers for tracing and breakpoints
+- */
+-void set_debug_traps(void)
+-{
+-	struct hard_trap_info *ht;
+-	unsigned long flags;
+-	unsigned char c;
+-
+-	local_irq_save(flags);
+-	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+-		saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low);
+-
+-	putDebugChar('+'); /* 'hello world' */
+-	/*
+-	 * In case GDB is started before us, ack any packets
+-	 * (presumably "$?#xx") sitting there.
+-	 */
+-	while((c = getDebugChar()) != '$');
+-	while((c = getDebugChar()) != '#');
+-	c = getDebugChar(); /* eat first csum byte */
+-	c = getDebugChar(); /* eat second csum byte */
+-	putDebugChar('+'); /* ack it */
+-
+-	initialized = 1;
+-	local_irq_restore(flags);
+-}
+-
+-void restore_debug_traps(void)
+-{
+-	struct hard_trap_info *ht;
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+-	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+-		set_except_vector(ht->tt, saved_vectors[ht->tt]);
+-	local_irq_restore(flags);
+-}
+-
+-/*
+- * Convert the MIPS hardware trap type code to a Unix signal number.
+- */
+-static int computeSignal(int tt)
+-{
+-	struct hard_trap_info *ht;
+-
+-	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+-		if (ht->tt == tt)
+-			return ht->signo;
+-
+-	return SIGHUP;		/* default for things we don't know about */
+-}
+-
+-/*
+- * While we find nice hex chars, build an int.
+- * Return number of chars processed.
+- */
+-static int hexToInt(char **ptr, int *intValue)
+-{
+-	int numChars = 0;
+-	int hexValue;
+-
+-	*intValue = 0;
+-
+-	while (**ptr) {
+-		hexValue = hex(**ptr);
+-		if (hexValue < 0)
+-			break;
+-
+-		*intValue = (*intValue << 4) | hexValue;
+-		numChars ++;
+-
+-		(*ptr)++;
+-	}
+-
+-	return (numChars);
+-}
+-
+-static int hexToLong(char **ptr, long *longValue)
+-{
+-	int numChars = 0;
+-	int hexValue;
+-
+-	*longValue = 0;
+-
+-	while (**ptr) {
+-		hexValue = hex(**ptr);
+-		if (hexValue < 0)
+-			break;
+-
+-		*longValue = (*longValue << 4) | hexValue;
+-		numChars ++;
+-
+-		(*ptr)++;
+-	}
+-
+-	return numChars;
+-}
+-
+-
+-#if 0
+-/*
+- * Print registers (on target console)
+- * Used only to debug the stub...
+- */
+-void show_gdbregs(struct gdb_regs * regs)
+-{
+-	/*
+-	 * Saved main processor registers
+-	 */
+-	printk("$0 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+-	       regs->reg0, regs->reg1, regs->reg2, regs->reg3,
+-	       regs->reg4, regs->reg5, regs->reg6, regs->reg7);
+-	printk("$8 : %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+-	       regs->reg8, regs->reg9, regs->reg10, regs->reg11,
+-	       regs->reg12, regs->reg13, regs->reg14, regs->reg15);
+-	printk("$16: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+-	       regs->reg16, regs->reg17, regs->reg18, regs->reg19,
+-	       regs->reg20, regs->reg21, regs->reg22, regs->reg23);
+-	printk("$24: %08lx %08lx %08lx %08lx %08lx %08lx %08lx %08lx\n",
+-	       regs->reg24, regs->reg25, regs->reg26, regs->reg27,
+-	       regs->reg28, regs->reg29, regs->reg30, regs->reg31);
+-
+-	/*
+-	 * Saved cp0 registers
+-	 */
+-	printk("epc  : %08lx\nStatus: %08lx\nCause : %08lx\n",
+-	       regs->cp0_epc, regs->cp0_status, regs->cp0_cause);
+-}
+-#endif /* dead code */
+-
+-/*
+- * We single-step by setting breakpoints. When an exception
+- * is handled, we need to restore the instructions hoisted
+- * when the breakpoints were set.
+- *
+- * This is where we save the original instructions.
+- */
+-static struct gdb_bp_save {
+-	unsigned long addr;
+-	unsigned int val;
+-} step_bp[2];
+-
+-#define BP 0x0000000d  /* break opcode */
+-
+-/*
+- * Set breakpoint instructions for single stepping.
+- */
+-static void single_step(struct gdb_regs *regs)
+-{
+-	union mips_instruction insn;
+-	unsigned long targ;
+-	int is_branch, is_cond, i;
+-
+-	targ = regs->cp0_epc;
+-	insn.word = *(unsigned int *)targ;
+-	is_branch = is_cond = 0;
+-
+-	switch (insn.i_format.opcode) {
+-	/*
+-	 * jr and jalr are in r_format format.
+-	 */
+-	case spec_op:
+-		switch (insn.r_format.func) {
+-		case jalr_op:
+-		case jr_op:
+-			targ = *(&regs->reg0 + insn.r_format.rs);
+-			is_branch = 1;
+-			break;
+-		}
+-		break;
+-
+-	/*
+-	 * This group contains:
+-	 * bltz_op, bgez_op, bltzl_op, bgezl_op,
+-	 * bltzal_op, bgezal_op, bltzall_op, bgezall_op.
+-	 */
+-	case bcond_op:
+-		is_branch = is_cond = 1;
+-		targ += 4 + (insn.i_format.simmediate << 2);
+-		break;
+-
+-	/*
+-	 * These are unconditional and in j_format.
+-	 */
+-	case jal_op:
+-	case j_op:
+-		is_branch = 1;
+-		targ += 4;
+-		targ >>= 28;
+-		targ <<= 28;
+-		targ |= (insn.j_format.target << 2);
+-		break;
+-
+-	/*
+-	 * These are conditional.
+-	 */
+-	case beq_op:
+-	case beql_op:
+-	case bne_op:
+-	case bnel_op:
+-	case blez_op:
+-	case blezl_op:
+-	case bgtz_op:
+-	case bgtzl_op:
+-	case cop0_op:
+-	case cop1_op:
+-	case cop2_op:
+-	case cop1x_op:
+-		is_branch = is_cond = 1;
+-		targ += 4 + (insn.i_format.simmediate << 2);
+-		break;
+-	}
+-
+-	if (is_branch) {
+-		i = 0;
+-		if (is_cond && targ != (regs->cp0_epc + 8)) {
+-			step_bp[i].addr = regs->cp0_epc + 8;
+-			step_bp[i++].val = *(unsigned *)(regs->cp0_epc + 8);
+-			*(unsigned *)(regs->cp0_epc + 8) = BP;
+-		}
+-		step_bp[i].addr = targ;
+-		step_bp[i].val  = *(unsigned *)targ;
+-		*(unsigned *)targ = BP;
+-	} else {
+-		step_bp[0].addr = regs->cp0_epc + 4;
+-		step_bp[0].val  = *(unsigned *)(regs->cp0_epc + 4);
+-		*(unsigned *)(regs->cp0_epc + 4) = BP;
+-	}
+-}
+-
+-/*
+- *  If asynchronously interrupted by gdb, then we need to set a breakpoint
+- *  at the interrupted instruction so that we wind up stopped with a
+- *  reasonable stack frame.
+- */
+-static struct gdb_bp_save async_bp;
+-
+-/*
+- * Swap the interrupted EPC with our asynchronous breakpoint routine.
+- * This is safer than stuffing the breakpoint in-place, since no cache
+- * flushes (or resulting smp_call_functions) are required.  The
+- * assumption is that only one CPU will be handling asynchronous bp's,
+- * and only one can be active at a time.
+- */
+-extern spinlock_t smp_call_lock;
+-
+-void set_async_breakpoint(unsigned long *epc)
+-{
+-	/* skip breaking into userland */
+-	if ((*epc & 0x80000000) == 0)
+-		return;
+-
+-#ifdef CONFIG_SMP
+-	/* avoid deadlock if someone is make IPC */
+-	if (spin_is_locked(&smp_call_lock))
+-		return;
+-#endif
+-
+-	async_bp.addr = *epc;
+-	*epc = (unsigned long)async_breakpoint;
+-}
+-
+-static void kgdb_wait(void *arg)
+-{
+-	unsigned flags;
+-	int cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-
+-	__raw_spin_lock(&kgdb_cpulock[cpu]);
+-	__raw_spin_unlock(&kgdb_cpulock[cpu]);
+-
+-	local_irq_restore(flags);
+-}
+-
+-/*
+- * GDB stub needs to call kgdb_wait on all processor with interrupts
+- * disabled, so it uses it's own special variant.
+- */
+-static int kgdb_smp_call_kgdb_wait(void)
+-{
+-#ifdef CONFIG_SMP
+-	struct call_data_struct data;
+-	int i, cpus = num_online_cpus() - 1;
+-	int cpu = smp_processor_id();
+-
+-	/*
+-	 * Can die spectacularly if this CPU isn't yet marked online
+-	 */
+-	BUG_ON(!cpu_online(cpu));
+-
+-	if (!cpus)
+-		return 0;
+-
+-	if (spin_is_locked(&smp_call_lock)) {
+-		/*
+-		 * Some other processor is trying to make us do something
+-		 * but we're not going to respond... give up
+-		 */
+-		return -1;
+-		}
+-
+-	/*
+-	 * We will continue here, accepting the fact that
+-	 * the kernel may deadlock if another CPU attempts
+-	 * to call smp_call_function now...
+-	 */
+-
+-	data.func = kgdb_wait;
+-	data.info = NULL;
+-	atomic_set(&data.started, 0);
+-	data.wait = 0;
+-
+-	spin_lock(&smp_call_lock);
+-	call_data = &data;
+-	mb();
+-
+-	/* Send a message to all other CPUs and wait for them to respond */
+-	for (i = 0; i < NR_CPUS; i++)
+-		if (cpu_online(i) && i != cpu)
+-			core_send_ipi(i, SMP_CALL_FUNCTION);
+-
+-	/* Wait for response */
+-	/* FIXME: lock-up detection, backtrace on lock-up */
+-	while (atomic_read(&data.started) != cpus)
+-		barrier();
+-
+-	call_data = NULL;
+-	spin_unlock(&smp_call_lock);
+-#endif
+-
+-	return 0;
+-}
+-
+-/*
+- * This function does all command processing for interfacing to gdb.  It
+- * returns 1 if you should skip the instruction at the trap address, 0
+- * otherwise.
+- */
+-void handle_exception (struct gdb_regs *regs)
+-{
+-	int trap;			/* Trap type */
+-	int sigval;
+-	long addr;
+-	int length;
+-	char *ptr;
+-	unsigned long *stack;
+-	int i;
+-	int bflag = 0;
+-
+-	kgdb_started = 1;
+-
+-	/*
+-	 * acquire the big kgdb spinlock
+-	 */
+-	if (!spin_trylock(&kgdb_lock)) {
+-		/*
+-		 * some other CPU has the lock, we should go back to
+-		 * receive the gdb_wait IPC
+-		 */
+-		return;
+-	}
+-
+-	/*
+-	 * If we're in async_breakpoint(), restore the real EPC from
+-	 * the breakpoint.
+-	 */
+-	if (regs->cp0_epc == (unsigned long)async_breakinst) {
+-		regs->cp0_epc = async_bp.addr;
+-		async_bp.addr = 0;
+-	}
+-
+-	/*
+-	 * acquire the CPU spinlocks
+-	 */
+-	for (i = num_online_cpus()-1; i >= 0; i--)
+-		if (__raw_spin_trylock(&kgdb_cpulock[i]) == 0)
+-			panic("kgdb: couldn't get cpulock %d\n", i);
+-
+-	/*
+-	 * force other cpus to enter kgdb
+-	 */
+-	kgdb_smp_call_kgdb_wait();
+-
+-	/*
+-	 * If we're in breakpoint() increment the PC
+-	 */
+-	trap = (regs->cp0_cause & 0x7c) >> 2;
+-	if (trap == 9 && regs->cp0_epc == (unsigned long)breakinst)
+-		regs->cp0_epc += 4;
+-
+-	/*
+-	 * If we were single_stepping, restore the opcodes hoisted
+-	 * for the breakpoint[s].
+-	 */
+-	if (step_bp[0].addr) {
+-		*(unsigned *)step_bp[0].addr = step_bp[0].val;
+-		step_bp[0].addr = 0;
+-
+-		if (step_bp[1].addr) {
+-			*(unsigned *)step_bp[1].addr = step_bp[1].val;
+-			step_bp[1].addr = 0;
+-		}
+-	}
+-
+-	stack = (long *)regs->reg29;			/* stack ptr */
+-	sigval = computeSignal(trap);
+-
+-	/*
+-	 * reply to host that an exception has occurred
+-	 */
+-	ptr = output_buffer;
+-
+-	/*
+-	 * Send trap type (converted to signal)
+-	 */
+-	*ptr++ = 'T';
+-	*ptr++ = hexchars[sigval >> 4];
+-	*ptr++ = hexchars[sigval & 0xf];
+-
+-	/*
+-	 * Send Error PC
+-	 */
+-	*ptr++ = hexchars[REG_EPC >> 4];
+-	*ptr++ = hexchars[REG_EPC & 0xf];
+-	*ptr++ = ':';
+-	ptr = mem2hex((char *)&regs->cp0_epc, ptr, sizeof(long), 0);
+-	*ptr++ = ';';
+-
+-	/*
+-	 * Send frame pointer
+-	 */
+-	*ptr++ = hexchars[REG_FP >> 4];
+-	*ptr++ = hexchars[REG_FP & 0xf];
+-	*ptr++ = ':';
+-	ptr = mem2hex((char *)&regs->reg30, ptr, sizeof(long), 0);
+-	*ptr++ = ';';
+-
+-	/*
+-	 * Send stack pointer
+-	 */
+-	*ptr++ = hexchars[REG_SP >> 4];
+-	*ptr++ = hexchars[REG_SP & 0xf];
+-	*ptr++ = ':';
+-	ptr = mem2hex((char *)&regs->reg29, ptr, sizeof(long), 0);
+-	*ptr++ = ';';
+-
+-	*ptr++ = 0;
+-	putpacket(output_buffer);	/* send it off... */
+-
+-	/*
+-	 * Wait for input from remote GDB
+-	 */
+-	while (1) {
+-		output_buffer[0] = 0;
+-		getpacket(input_buffer);
+-
+-		switch (input_buffer[0])
+-		{
+-		case '?':
+-			output_buffer[0] = 'S';
+-			output_buffer[1] = hexchars[sigval >> 4];
+-			output_buffer[2] = hexchars[sigval & 0xf];
+-			output_buffer[3] = 0;
+-			break;
+-
+-		/*
+-		 * Detach debugger; let CPU run
+-		 */
+-		case 'D':
+-			putpacket(output_buffer);
+-			goto finish_kgdb;
+-			break;
+-
+-		case 'd':
+-			/* toggle debug flag */
+-			break;
+-
+-		/*
+-		 * Return the value of the CPU registers
+-		 */
+-		case 'g':
+-			ptr = output_buffer;
+-			ptr = mem2hex((char *)&regs->reg0, ptr, 32*sizeof(long), 0); /* r0...r31 */
+-			ptr = mem2hex((char *)&regs->cp0_status, ptr, 6*sizeof(long), 0); /* cp0 */
+-			ptr = mem2hex((char *)&regs->fpr0, ptr, 32*sizeof(long), 0); /* f0...31 */
+-			ptr = mem2hex((char *)&regs->cp1_fsr, ptr, 2*sizeof(long), 0); /* cp1 */
+-			ptr = mem2hex((char *)&regs->frame_ptr, ptr, 2*sizeof(long), 0); /* frp */
+-			ptr = mem2hex((char *)&regs->cp0_index, ptr, 16*sizeof(long), 0); /* cp0 */
+-			break;
+-
+-		/*
+-		 * set the value of the CPU registers - return OK
+-		 */
+-		case 'G':
+-		{
+-			ptr = &input_buffer[1];
+-			hex2mem(ptr, (char *)&regs->reg0, 32*sizeof(long), 0, 0);
+-			ptr += 32*(2*sizeof(long));
+-			hex2mem(ptr, (char *)&regs->cp0_status, 6*sizeof(long), 0, 0);
+-			ptr += 6*(2*sizeof(long));
+-			hex2mem(ptr, (char *)&regs->fpr0, 32*sizeof(long), 0, 0);
+-			ptr += 32*(2*sizeof(long));
+-			hex2mem(ptr, (char *)&regs->cp1_fsr, 2*sizeof(long), 0, 0);
+-			ptr += 2*(2*sizeof(long));
+-			hex2mem(ptr, (char *)&regs->frame_ptr, 2*sizeof(long), 0, 0);
+-			ptr += 2*(2*sizeof(long));
+-			hex2mem(ptr, (char *)&regs->cp0_index, 16*sizeof(long), 0, 0);
+-			strcpy(output_buffer,"OK");
+-		 }
+-		break;
+-
+-		/*
+-		 * mAA..AA,LLLL  Read LLLL bytes at address AA..AA
+-		 */
+-		case 'm':
+-			ptr = &input_buffer[1];
+-
+-			if (hexToLong(&ptr, &addr)
+-				&& *ptr++ == ','
+-				&& hexToInt(&ptr, &length)) {
+-				if (mem2hex((char *)addr, output_buffer, length, 1))
+-					break;
+-				strcpy (output_buffer, "E03");
+-			} else
+-				strcpy(output_buffer,"E01");
+-			break;
+-
+-		/*
+-		 * XAA..AA,LLLL: Write LLLL escaped binary bytes at address AA.AA
+-		 */
+-		case 'X':
+-			bflag = 1;
+-			/* fall through */
+-
+-		/*
+-		 * MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK
+-		 */
+-		case 'M':
+-			ptr = &input_buffer[1];
+-
+-			if (hexToLong(&ptr, &addr)
+-				&& *ptr++ == ','
+-				&& hexToInt(&ptr, &length)
+-				&& *ptr++ == ':') {
+-				if (hex2mem(ptr, (char *)addr, length, bflag, 1))
+-					strcpy(output_buffer, "OK");
+-				else
+-					strcpy(output_buffer, "E03");
+-			}
+-			else
+-				strcpy(output_buffer, "E02");
+-			break;
+-
+-		/*
+-		 * cAA..AA    Continue at address AA..AA(optional)
+-		 */
+-		case 'c':
+-			/* try to read optional parameter, pc unchanged if no parm */
+-
+-			ptr = &input_buffer[1];
+-			if (hexToLong(&ptr, &addr))
+-				regs->cp0_epc = addr;
+-
+-			goto exit_kgdb_exception;
+-			break;
+-
+-		/*
+-		 * kill the program; let us try to restart the machine
+-		 * Reset the whole machine.
+-		 */
+-		case 'k':
+-		case 'r':
+-			machine_restart("kgdb restarts machine");
+-			break;
+-
+-		/*
+-		 * Step to next instruction
+-		 */
+-		case 's':
+-			/*
+-			 * There is no single step insn in the MIPS ISA, so we
+-			 * use breakpoints and continue, instead.
+-			 */
+-			single_step(regs);
+-			goto exit_kgdb_exception;
+-			/* NOTREACHED */
+-			break;
+-
+-		/*
+-		 * Set baud rate (bBB)
+-		 * FIXME: Needs to be written
+-		 */
+-		case 'b':
+-		{
+-#if 0
+-			int baudrate;
+-			extern void set_timer_3();
+-
+-			ptr = &input_buffer[1];
+-			if (!hexToInt(&ptr, &baudrate))
+-			{
+-				strcpy(output_buffer,"B01");
+-				break;
+-			}
+-
+-			/* Convert baud rate to uart clock divider */
+-
+-			switch (baudrate)
+-			{
+-				case 38400:
+-					baudrate = 16;
+-					break;
+-				case 19200:
+-					baudrate = 33;
+-					break;
+-				case 9600:
+-					baudrate = 65;
+-					break;
+-				default:
+-					baudrate = 0;
+-					strcpy(output_buffer,"B02");
+-					goto x1;
+-			}
+-
+-			if (baudrate) {
+-				putpacket("OK");	/* Ack before changing speed */
+-				set_timer_3(baudrate); /* Set it */
+-			}
+-#endif
+-		}
+-		break;
+-
+-		}			/* switch */
+-
+-		/*
+-		 * reply to the request
+-		 */
+-
+-		putpacket(output_buffer);
+-
+-	} /* while */
+-
+-	return;
+-
+-finish_kgdb:
+-	restore_debug_traps();
+-
+-exit_kgdb_exception:
+-	/* release locks so other CPUs can go */
+-	for (i = num_online_cpus()-1; i >= 0; i--)
+-		__raw_spin_unlock(&kgdb_cpulock[i]);
+-	spin_unlock(&kgdb_lock);
+-
+-	__flush_cache_all();
+-	return;
+-}
+-
+-/*
+- * This function will generate a breakpoint exception.  It is used at the
+- * beginning of a program to sync up with a debugger and can be used
+- * otherwise as a quick means to stop program execution and "break" into
+- * the debugger.
+- */
+-void breakpoint(void)
+-{
+-	if (!initialized)
+-		return;
+-
+-	__asm__ __volatile__(
+-			".globl	breakinst\n\t"
+-			".set\tnoreorder\n\t"
+-			"nop\n"
+-			"breakinst:\tbreak\n\t"
+-			"nop\n\t"
+-			".set\treorder"
+-			);
+-}
+-
+-/* Nothing but the break; don't pollute any registers */
+-void async_breakpoint(void)
+-{
+-	__asm__ __volatile__(
+-			".globl	async_breakinst\n\t"
+-			".set\tnoreorder\n\t"
+-			"nop\n"
+-			"async_breakinst:\tbreak\n\t"
+-			"nop\n\t"
+-			".set\treorder"
+-			);
+-}
+-
+-void adel(void)
+-{
+-	__asm__ __volatile__(
+-			".globl\tadel\n\t"
+-			"lui\t$8,0x8000\n\t"
+-			"lw\t$9,1($8)\n\t"
+-			);
+-}
+-
+-/*
+- * malloc is needed by gdb client in "call func()", even a private one
+- * will make gdb happy
+- */
+-static void * __attribute_used__ malloc(size_t size)
+-{
+-	return kmalloc(size, GFP_ATOMIC);
+-}
+-
+-static void __attribute_used__ free (void *where)
+-{
+-	kfree(where);
+-}
+-
+-#ifdef CONFIG_GDB_CONSOLE
+-
+-void gdb_putsn(const char *str, int l)
+-{
+-	char outbuf[18];
+-
+-	if (!kgdb_started)
+-		return;
+-
+-	outbuf[0]='O';
+-
+-	while(l) {
+-		int i = (l>8)?8:l;
+-		mem2hex((char *)str, &outbuf[1], i, 0);
+-		outbuf[(i*2)+1]=0;
+-		putpacket(outbuf);
+-		str += i;
+-		l -= i;
+-	}
+-}
+-
+-static void gdb_console_write(struct console *con, const char *s, unsigned n)
+-{
+-	gdb_putsn(s, n);
+-}
+-
+-static struct console gdb_console = {
+-	.name	= "gdb",
+-	.write	= gdb_console_write,
+-	.flags	= CON_PRINTBUFFER,
+-	.index	= -1
+-};
+-
+-static int __init register_gdb_console(void)
+-{
+-	register_console(&gdb_console);
+-
+-	return 0;
+-}
+-
+-console_initcall(register_gdb_console);
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/irq.c linux-2.6.22-try2/arch/mips/kernel/irq.c
+--- linux-2.6.22-570/arch/mips/kernel/irq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/kernel/irq.c	2007-12-19 15:29:23.000000000 -0500
+@@ -25,6 +25,10 @@
+ #include <asm/atomic.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <asm/kgdb.h>
++
++/* Keep track of if we've done certain initialization already or not. */
++int kgdb_early_setup;
+ 
+ static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+ 
+@@ -138,28 +142,23 @@
+ 	atomic_inc(&irq_err_count);
+ }
+ 
+-#ifdef CONFIG_KGDB
+-extern void breakpoint(void);
+-extern void set_debug_traps(void);
+-
+-static int kgdb_flag = 1;
+-static int __init nokgdb(char *str)
++void __init init_IRQ(void)
+ {
+-	kgdb_flag = 0;
+-	return 1;
+-}
+-__setup("nokgdb", nokgdb);
++
++#ifdef CONFIG_KGDB
++	if (kgdb_early_setup)
++		return;
+ #endif
+ 
+-void __init init_IRQ(void)
+-{
+ 	arch_init_irq();
+ 
++
+ #ifdef CONFIG_KGDB
+-	if (kgdb_flag) {
+-		printk("Wait for gdb client connection ...\n");
+-		set_debug_traps();
+-		breakpoint();
+-	}
++	/*
++	 * We have been called before kgdb_arch_init(). Hence,
++	 * we dont want the traps to be reinitialized
++	 */
++	if (kgdb_early_setup == 0)
++		kgdb_early_setup = 1;
+ #endif
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c linux-2.6.22-try2/arch/mips/kernel/kgdb-jmp.c
+--- linux-2.6.22-570/arch/mips/kernel/kgdb-jmp.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/kernel/kgdb-jmp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,110 @@
++/*
++ * arch/mips/kernel/kgdb-jmp.c
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ * Author: Manish Lachwani <mlachwani@mvista.com>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 1996, 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
++ * Copyright (C) 2005-2006 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/kgdb.h>
++
++#ifdef CONFIG_64BIT
++/*
++ * MIPS 64-bit
++ */
++
++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp)
++{
++	__asm__ __volatile__ ("sd $gp, %0" : : "m" (curr_context[0]));
++	__asm__ __volatile__ ("sd $16, %0" : : "m" (curr_context[1]));
++	__asm__ __volatile__ ("sd $17, %0" : : "m" (curr_context[2]));
++	__asm__ __volatile__ ("sd $18, %0" : : "m" (curr_context[3]));
++	__asm__ __volatile__ ("sd $19, %0" : : "m" (curr_context[4]));
++	__asm__ __volatile__ ("sd $20, %0" : : "m" (curr_context[5]));
++	__asm__ __volatile__ ("sd $21, %0" : : "m" (curr_context[6]));
++	__asm__ __volatile__ ("sd $22, %0" : : "m" (curr_context[7]));
++	__asm__ __volatile__ ("sd $23, %0" : : "m" (curr_context[8]));
++	__asm__ __volatile__ ("sd $31, %0" : : "m" (curr_context[9]));
++	curr_context[10] = sp;
++	curr_context[11] = fp;
++
++	return 0;
++}
++
++void kgdb_fault_longjmp(unsigned long *curr_context)
++{
++	__asm__ __volatile__ ("ld $gp, %0" : : "m" (curr_context[0]));
++	__asm__ __volatile__ ("ld $16, %0" : : "m" (curr_context[1]));
++	__asm__ __volatile__ ("ld $17, %0" : : "m" (curr_context[2]));
++	__asm__ __volatile__ ("ld $18, %0" : : "m" (curr_context[3]));
++	__asm__ __volatile__ ("ld $19, %0" : : "m" (curr_context[4]));
++	__asm__ __volatile__ ("ld $20, %0" : : "m" (curr_context[5]));
++	__asm__ __volatile__ ("ld $21, %0" : : "m" (curr_context[6]));
++	__asm__ __volatile__ ("ld $22, %0" : : "m" (curr_context[7]));
++	__asm__ __volatile__ ("ld $23, %0" : : "m" (curr_context[8]));
++	__asm__ __volatile__ ("ld $25, %0" : : "m" (curr_context[9]));
++	__asm__ __volatile__ ("ld $29, %0\n\t"
++			      "ld $30, %1\n\t" : :
++			      "m" (curr_context[10]), "m" (curr_context[11]));
++
++	__asm__ __volatile__ ("dli $2, 1");
++	__asm__ __volatile__ ("j $25");
++
++	for (;;);
++}
++#else
++/*
++ * MIPS 32-bit
++ */
++
++int kgdb_fault_setjmp_aux(unsigned long *curr_context, unsigned long sp, unsigned long fp)
++{
++	__asm__ __volatile__("sw $gp, %0" : : "m" (curr_context[0]));
++	__asm__ __volatile__("sw $16, %0" : : "m" (curr_context[1]));
++	__asm__ __volatile__("sw $17, %0" : : "m" (curr_context[2]));
++	__asm__ __volatile__("sw $18, %0" : : "m" (curr_context[3]));
++	__asm__ __volatile__("sw $19, %0" : : "m" (curr_context[4]));
++	__asm__ __volatile__("sw $20, %0" : : "m" (curr_context[5]));
++	__asm__ __volatile__("sw $21, %0" : : "m" (curr_context[6]));
++	__asm__ __volatile__("sw $22, %0" : : "m" (curr_context[7]));
++	__asm__ __volatile__("sw $23, %0" : : "m" (curr_context[8]));
++	__asm__ __volatile__("sw $31, %0" : : "m" (curr_context[9]));
++	curr_context[10] = sp;
++	curr_context[11] = fp;
++
++	return 0;
++}
++
++void kgdb_fault_longjmp(unsigned long *curr_context)
++{
++	__asm__ __volatile__("lw $gp, %0" : : "m" (curr_context[0]));
++	__asm__ __volatile__("lw $16, %0" : : "m" (curr_context[1]));
++	__asm__ __volatile__("lw $17, %0" : : "m" (curr_context[2]));
++	__asm__ __volatile__("lw $18, %0" : : "m" (curr_context[3]));
++	__asm__ __volatile__("lw $19, %0" : : "m" (curr_context[4]));
++	__asm__ __volatile__("lw $20, %0" : : "m" (curr_context[5]));
++	__asm__ __volatile__("lw $21, %0" : : "m" (curr_context[6]));
++	__asm__ __volatile__("lw $22, %0" : : "m" (curr_context[7]));
++	__asm__ __volatile__("lw $23, %0" : : "m" (curr_context[8]));
++	__asm__ __volatile__("lw $25, %0" : : "m" (curr_context[9]));
++
++	__asm__ __volatile__("lw $29, %0\n\t"
++			     "lw $30, %1\n\t" : :
++			     "m" (curr_context[10]), "m" (curr_context[11]));
++
++	__asm__ __volatile__("li $2, 1");
++	__asm__ __volatile__("jr $25");
++
++	for (;;);
++}
++#endif
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S linux-2.6.22-try2/arch/mips/kernel/kgdb-setjmp.S
+--- linux-2.6.22-570/arch/mips/kernel/kgdb-setjmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/kernel/kgdb-setjmp.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,28 @@
++/*
++ * arch/mips/kernel/kgdb-jmp.c
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Copyright (C) 2005 by MontaVista Software.
++ * Author: Manish Lachwani (mlachwani@mvista.com)
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <asm/asm.h>
++#include <asm/mipsregs.h>
++#include <asm/regdef.h>
++#include <asm/stackframe.h>
++
++	.ent	kgdb_fault_setjmp,0
++ENTRY (kgdb_fault_setjmp)
++	move    a1, sp
++	move	a2, fp
++#ifdef CONFIG_64BIT
++	nop
++#endif
++	j	kgdb_fault_setjmp_aux
++	.end	kgdb_fault_setjmp
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb.c linux-2.6.22-try2/arch/mips/kernel/kgdb.c
+--- linux-2.6.22-570/arch/mips/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,299 @@
++/*
++ * arch/mips/kernel/kgdb.c
++ *
++ *  Originally written by Glenn Engel, Lake Stevens Instrument Division
++ *
++ *  Contributed by HP Systems
++ *
++ *  Modified for SPARC by Stu Grossman, Cygnus Support.
++ *
++ *  Modified for Linux/MIPS (and MIPS in general) by Andreas Busse
++ *  Send complaints, suggestions etc. to <andy@waldorf-gmbh.de>
++ *
++ *  Copyright (C) 1995 Andreas Busse
++ *
++ *  Copyright (C) 2003 MontaVista Software Inc.
++ *  Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
++ *
++ *  Copyright (C) 2004-2005 MontaVista Software Inc.
++ *  Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ *  This file is licensed under the terms of the GNU General Public License
++ *  version 2. This program is licensed "as is" without any warranty of any
++ *  kind, whether express or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>		/* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/inst.h>
++#include <asm/gdb-stub.h>
++#include <asm/cacheflush.h>
++#include <asm/kdebug.h>
++
++static struct hard_trap_info {
++	unsigned char tt;	/* Trap type code for MIPS R3xxx and R4xxx */
++	unsigned char signo;	/* Signal that we map this trap into */
++} hard_trap_info[] = {
++	{ 6, SIGBUS },		/* instruction bus error */
++	{ 7, SIGBUS },		/* data bus error */
++	{ 9, SIGTRAP },		/* break */
++/*	{ 11, SIGILL },	*/	/* CPU unusable */
++	{ 12, SIGFPE },		/* overflow */
++	{ 13, SIGTRAP },	/* trap */
++	{ 14, SIGSEGV },	/* virtual instruction cache coherency */
++	{ 15, SIGFPE },		/* floating point exception */
++	{ 23, SIGSEGV },	/* watch */
++	{ 31, SIGSEGV },	/* virtual data cache coherency */
++	{ 0, 0}			/* Must be last */
++};
++
++/* Save the normal trap handlers for user-mode traps. */
++void *saved_vectors[32];
++
++extern void trap_low(void);
++extern void breakinst(void);
++extern void init_IRQ(void);
++
++void kgdb_call_nmi_hook(void *ignored)
++{
++	kgdb_nmihook(smp_processor_id(), (void *)0);
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++	local_irq_enable();
++	smp_call_function(kgdb_call_nmi_hook, 0, 0, 0);
++	local_irq_disable();
++}
++
++static int compute_signal(int tt)
++{
++	struct hard_trap_info *ht;
++
++	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++		if (ht->tt == tt)
++			return ht->signo;
++
++	return SIGHUP;		/* default for things we don't know about */
++}
++
++/*
++ * Set up exception handlers for tracing and breakpoints
++ */
++void handle_exception(struct pt_regs *regs)
++{
++	int trap = (regs->cp0_cause & 0x7c) >> 2;
++
++	if (fixup_exception(regs)) {
++		return;
++	}
++
++	if (atomic_read(&debugger_active))
++		kgdb_nmihook(smp_processor_id(), regs);
++
++	if (atomic_read(&kgdb_setting_breakpoint))
++		if ((trap == 9) && (regs->cp0_epc == (unsigned long)breakinst))
++			regs->cp0_epc += 4;
++
++	kgdb_handle_exception(0, compute_signal(trap), 0, regs);
++
++	/* In SMP mode, __flush_cache_all does IPI */
++	local_irq_enable();
++	__flush_cache_all();
++}
++
++void set_debug_traps(void)
++{
++	struct hard_trap_info *ht;
++	unsigned long flags;
++
++	local_irq_save(flags);
++
++	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++		saved_vectors[ht->tt] = set_except_vector(ht->tt, trap_low);
++
++	local_irq_restore(flags);
++}
++
++#if 0
++/* This should be called before we exit kgdb_handle_exception() I believe.
++ * -- Tom
++ */
++void restore_debug_traps(void)
++{
++	struct hard_trap_info *ht;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++		set_except_vector(ht->tt, saved_vectors[ht->tt]);
++	local_irq_restore(flags);
++}
++#endif
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	int reg;
++	gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++	for (reg = 0; reg < 32; reg++)
++		*(ptr++) = regs->regs[reg];
++
++	*(ptr++) = regs->cp0_status;
++	*(ptr++) = regs->lo;
++	*(ptr++) = regs->hi;
++	*(ptr++) = regs->cp0_badvaddr;
++	*(ptr++) = regs->cp0_cause;
++	*(ptr++) = regs->cp0_epc;
++
++	return;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++
++	int reg;
++	const gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++	for (reg = 0; reg < 32; reg++)
++		regs->regs[reg] = *(ptr++);
++
++	regs->cp0_status = *(ptr++);
++	regs->lo = *(ptr++);
++	regs->hi = *(ptr++);
++	regs->cp0_badvaddr = *(ptr++);
++	regs->cp0_cause = *(ptr++);
++	regs->cp0_epc = *(ptr++);
++
++	return;
++}
++
++/*
++ * Similar to regs_to_gdb_regs() except that process is sleeping and so
++ * we may not be able to get all the info.
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	int reg;
++	struct thread_info *ti = task_thread_info(p);
++	unsigned long ksp = (unsigned long)ti + THREAD_SIZE - 32;
++	struct pt_regs *regs = (struct pt_regs *)ksp - 1;
++	gdb_reg_t *ptr = (gdb_reg_t*)gdb_regs;
++
++	for (reg = 0; reg < 16; reg++)
++		*(ptr++) = regs->regs[reg];
++
++	/* S0 - S7 */
++	for (reg = 16; reg < 24; reg++)
++		*(ptr++) = regs->regs[reg];
++
++	for (reg = 24; reg < 28; reg++)
++		*(ptr++) = 0;
++
++	/* GP, SP, FP, RA */
++	for (reg = 28; reg < 32; reg++)
++		*(ptr++) = regs->regs[reg];
++
++	*(ptr++) = regs->cp0_status;
++	*(ptr++) = regs->lo;
++	*(ptr++) = regs->hi;
++	*(ptr++) = regs->cp0_badvaddr;
++	*(ptr++) = regs->cp0_cause;
++	*(ptr++) = regs->cp0_epc;
++
++	return;
++}
++
++/*
++ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
++ * then try to fall into the debugger
++ */
++static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd,
++			    void *ptr)
++{
++	struct die_args *args = (struct die_args *)ptr;
++	struct pt_regs *regs = args->regs;
++	int trap = (regs->cp0_cause & 0x7c) >> 2;
++
++	/* See if KGDB is interested. */
++	if (user_mode(regs))
++		/* Userpace events, ignore. */
++		return NOTIFY_DONE;
++
++	kgdb_handle_exception(trap, compute_signal(trap), 0, regs);
++	return NOTIFY_OK;
++}
++
++static struct notifier_block kgdb_notifier = {
++	.notifier_call = kgdb_mips_notify,
++};
++
++/*
++ * Handle the 's' and 'c' commands
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++			       char *remcom_in_buffer, char *remcom_out_buffer,
++			       struct pt_regs *regs)
++{
++	char *ptr;
++	unsigned long address;
++	int cpu = smp_processor_id();
++
++	switch (remcom_in_buffer[0]) {
++	case 's':
++	case 'c':
++		/* handle the optional parameter */
++		ptr = &remcom_in_buffer[1];
++		if (kgdb_hex2long(&ptr, &address))
++			regs->cp0_epc = address;
++
++		atomic_set(&cpu_doing_single_step, -1);
++		if (remcom_in_buffer[0] == 's')
++			if (kgdb_contthread)
++				atomic_set(&cpu_doing_single_step, cpu);
++
++		return 0;
++	}
++
++	return -1;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifdef CONFIG_CPU_LITTLE_ENDIAN
++	.gdb_bpt_instr = {0xd},
++#else
++	.gdb_bpt_instr = {0x00, 0x00, 0x00, 0x0d},
++#endif
++};
++
++/*
++ * We use kgdb_early_setup so that functions we need to call now don't
++ * cause trouble when called again later.
++ */
++__init int kgdb_arch_init(void)
++{
++	/* Board-specifics. */
++	/* Force some calls to happen earlier. */
++	if (kgdb_early_setup == 0) {
++		trap_init();
++		init_IRQ();
++		kgdb_early_setup = 1;
++	}
++
++	/* Set our traps. */
++	/* This needs to be done more finely grained again, paired in
++	 * a before/after in kgdb_handle_exception(...) -- Tom */
++	set_debug_traps();
++	register_die_notifier(&kgdb_notifier);
++
++	return 0;
++}
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S linux-2.6.22-try2/arch/mips/kernel/kgdb_handler.S
+--- linux-2.6.22-570/arch/mips/kernel/kgdb_handler.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/kernel/kgdb_handler.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,339 @@
++/*
++ * arch/mips/kernel/kgdb_handler.S
++ *
++ * Copyright (C) 2007 Wind River Systems, Inc
++ *
++ * Copyright (C) 2004-2005 MontaVista Software Inc.
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++/*
++ * Trap Handler for the new KGDB framework. The main KGDB handler is
++ * handle_exception that will be called from here
++ *
++ */
++
++#include <linux/sys.h>
++
++#include <asm/asm.h>
++#include <asm/errno.h>
++#include <asm/mipsregs.h>
++#include <asm/regdef.h>
++#include <asm/stackframe.h>
++#include <asm/gdb-stub.h>
++
++#ifdef CONFIG_32BIT
++#define DMFC0	mfc0
++#define DMTC0	mtc0
++#define LDC1	lwc1
++#define SDC1	swc1
++#endif
++#ifdef CONFIG_64BIT
++#define DMFC0	dmfc0
++#define DMTC0	dmtc0
++#define LDC1	ldc1
++#define SDC1	sdc1
++#endif
++
++#include <asm/asmmacro.h>
++
++/*
++ * [jsun] We reserves about 2x GDB_FR_SIZE in stack.  The lower (addressed)
++ * part is used to store registers and passed to exception handler.
++ * The upper part is reserved for "call func" feature where gdb client
++ * saves some of the regs, setups call frame and passes args.
++ *
++ * A trace shows about 200 bytes are used to store about half of all regs.
++ * The rest should be big enough for frame setup and passing args.
++ */
++
++/*
++ * The low level trap handler
++ */
++		.align 	5
++		NESTED(trap_low, GDB_FR_SIZE, sp)
++ 		.set	noat
++		.set 	noreorder
++
++		mfc0	k0, CP0_STATUS
++		sll	k0, 3     		/* extract cu0 bit */
++		bltz	k0, 1f
++		move	k1, sp
++
++		/*
++		 * Called from user mode, go somewhere else.
++		 */
++#if defined(CONFIG_32BIT)
++		lui	k1, %hi(saved_vectors)
++		mfc0	k0, CP0_CAUSE
++		andi	k0, k0, 0x7c
++		add	k1, k1, k0
++		lw	k0, %lo(saved_vectors)(k1)
++#elif defined(CONFIG_64BIT) && defined(CONFIG_BUILD_ELF64)
++		DMFC0	k0, CP0_CAUSE
++		lui	k1, %highest(saved_vectors)
++                andi    k0, k0, 0x7c           /* mask exception type */
++                dsll    k0, 1                  /* turn into byte offset */
++		daddiu	k1, %higher(saved_vectors)
++		dsll	k1, k1, 16
++		daddiu	k1, %hi(saved_vectors)
++		dsll	k1, k1, 16
++		daddu	k1, k1, k0
++		LONG_L	k0, %lo(saved_vectors)(k1)
++#else
++#error "MIPS configuration is unsupported for kgdb!!"
++#endif
++		jr	k0
++		nop
++1:
++		move	k0, sp
++		PTR_SUBU sp, k1, GDB_FR_SIZE*2	# see comment above
++		LONG_S	k0, GDB_FR_REG29(sp)
++		LONG_S	$2, GDB_FR_REG2(sp)
++
++/*
++ * First save the CP0 and special registers
++ */
++
++		mfc0	v0, CP0_STATUS
++		LONG_S	v0, GDB_FR_STATUS(sp)
++		mfc0	v0, CP0_CAUSE
++		LONG_S	v0, GDB_FR_CAUSE(sp)
++		DMFC0	v0, CP0_EPC
++		LONG_S	v0, GDB_FR_EPC(sp)
++		DMFC0	v0, CP0_BADVADDR
++		LONG_S	v0, GDB_FR_BADVADDR(sp)
++		mfhi	v0
++		LONG_S	v0, GDB_FR_HI(sp)
++		mflo	v0
++		LONG_S	v0, GDB_FR_LO(sp)
++
++/*
++ * Now the integer registers
++ */
++
++		LONG_S	zero, GDB_FR_REG0(sp)		/* I know... */
++		LONG_S	$1, GDB_FR_REG1(sp)
++		/* v0 already saved */
++		LONG_S	$3, GDB_FR_REG3(sp)
++		LONG_S	$4, GDB_FR_REG4(sp)
++		LONG_S	$5, GDB_FR_REG5(sp)
++		LONG_S	$6, GDB_FR_REG6(sp)
++		LONG_S	$7, GDB_FR_REG7(sp)
++		LONG_S	$8, GDB_FR_REG8(sp)
++		LONG_S	$9, GDB_FR_REG9(sp)
++		LONG_S	$10, GDB_FR_REG10(sp)
++		LONG_S	$11, GDB_FR_REG11(sp)
++		LONG_S	$12, GDB_FR_REG12(sp)
++		LONG_S	$13, GDB_FR_REG13(sp)
++		LONG_S	$14, GDB_FR_REG14(sp)
++		LONG_S	$15, GDB_FR_REG15(sp)
++		LONG_S	$16, GDB_FR_REG16(sp)
++		LONG_S	$17, GDB_FR_REG17(sp)
++		LONG_S	$18, GDB_FR_REG18(sp)
++		LONG_S	$19, GDB_FR_REG19(sp)
++		LONG_S	$20, GDB_FR_REG20(sp)
++		LONG_S	$21, GDB_FR_REG21(sp)
++		LONG_S	$22, GDB_FR_REG22(sp)
++		LONG_S	$23, GDB_FR_REG23(sp)
++		LONG_S	$24, GDB_FR_REG24(sp)
++		LONG_S	$25, GDB_FR_REG25(sp)
++		LONG_S	$26, GDB_FR_REG26(sp)
++		LONG_S	$27, GDB_FR_REG27(sp)
++		LONG_S	$28, GDB_FR_REG28(sp)
++		/* sp already saved */
++		LONG_S	$30, GDB_FR_REG30(sp)
++		LONG_S	$31, GDB_FR_REG31(sp)
++
++		CLI				/* disable interrupts */
++
++/*
++ * Followed by the floating point registers
++ */
++		mfc0	v0, CP0_STATUS		/* FPU enabled? */
++		srl	v0, v0, 16
++		andi	v0, v0, (ST0_CU1 >> 16)
++
++		beqz	v0,3f			/* disabled, skip */
++		 nop
++
++		li	t0, 0
++#ifdef CONFIG_64BIT
++		mfc0	t0, CP0_STATUS
++#endif
++		fpu_save_double_kgdb sp t0 t1	# clobbers t1
++
++
++/*
++ * Current stack frame ptr
++ */
++
++3:
++		LONG_S	sp, GDB_FR_FRP(sp)
++
++/*
++ * CP0 registers (R4000/R4400 unused registers skipped)
++ */
++
++		mfc0	v0, CP0_INDEX
++		LONG_S	v0, GDB_FR_CP0_INDEX(sp)
++		mfc0	v0, CP0_RANDOM
++		LONG_S	v0, GDB_FR_CP0_RANDOM(sp)
++		DMFC0	v0, CP0_ENTRYLO0
++		LONG_S	v0, GDB_FR_CP0_ENTRYLO0(sp)
++		DMFC0	v0, CP0_ENTRYLO1
++		LONG_S	v0, GDB_FR_CP0_ENTRYLO1(sp)
++		DMFC0	v0, CP0_CONTEXT
++		LONG_S	v0, GDB_FR_CP0_CONTEXT(sp)
++		mfc0	v0, CP0_PAGEMASK
++		LONG_S	v0, GDB_FR_CP0_PAGEMASK(sp)
++		mfc0	v0, CP0_WIRED
++		LONG_S	v0, GDB_FR_CP0_WIRED(sp)
++		DMFC0	v0, CP0_ENTRYHI
++		LONG_S	v0, GDB_FR_CP0_ENTRYHI(sp)
++		mfc0	v0, CP0_PRID
++		LONG_S	v0, GDB_FR_CP0_PRID(sp)
++
++		.set	at
++
++/*
++ * Continue with the higher level handler
++ */
++
++		move	a0,sp
++
++		jal	handle_exception
++		 nop
++
++/*
++ * Restore all writable registers, in reverse order
++ */
++
++		.set	noat
++
++		LONG_L	v0, GDB_FR_CP0_ENTRYHI(sp)
++		LONG_L	v1, GDB_FR_CP0_WIRED(sp)
++		DMTC0	v0, CP0_ENTRYHI
++		mtc0	v1, CP0_WIRED
++		LONG_L	v0, GDB_FR_CP0_PAGEMASK(sp)
++		LONG_L	v1, GDB_FR_CP0_ENTRYLO1(sp)
++		mtc0	v0, CP0_PAGEMASK
++		DMTC0	v1, CP0_ENTRYLO1
++		LONG_L	v0, GDB_FR_CP0_ENTRYLO0(sp)
++		LONG_L	v1, GDB_FR_CP0_INDEX(sp)
++		DMTC0	v0, CP0_ENTRYLO0
++		LONG_L	v0, GDB_FR_CP0_CONTEXT(sp)
++		mtc0	v1, CP0_INDEX
++		DMTC0	v0, CP0_CONTEXT
++
++
++/*
++ * Next, the floating point registers
++ */
++		mfc0	v0, CP0_STATUS		/* check if the FPU is enabled */
++		srl	v0, v0, 16
++		andi	v0, v0, (ST0_CU1 >> 16)
++
++		beqz	v0, 3f			/* disabled, skip */
++		 nop
++
++		li	t0, 0
++#ifdef CONFIG_64BIT
++		mfc0	t0, CP0_STATUS
++#endif
++		fpu_restore_double_kgdb sp t0 t1 # clobbers t1
++
++
++/*
++ * Now the CP0 and integer registers
++ */
++
++3:
++		mfc0	t0, CP0_STATUS
++		ori	t0, 0x1f
++		xori	t0, 0x1f
++		mtc0	t0, CP0_STATUS
++
++		LONG_L	v0, GDB_FR_STATUS(sp)
++		LONG_L	v1, GDB_FR_EPC(sp)
++		mtc0	v0, CP0_STATUS
++		DMTC0	v1, CP0_EPC
++		LONG_L	v0, GDB_FR_HI(sp)
++		LONG_L	v1, GDB_FR_LO(sp)
++		mthi	v0
++		mtlo	v1
++		LONG_L	$31, GDB_FR_REG31(sp)
++		LONG_L	$30, GDB_FR_REG30(sp)
++		LONG_L	$28, GDB_FR_REG28(sp)
++		LONG_L	$27, GDB_FR_REG27(sp)
++		LONG_L	$26, GDB_FR_REG26(sp)
++		LONG_L	$25, GDB_FR_REG25(sp)
++		LONG_L	$24, GDB_FR_REG24(sp)
++		LONG_L	$23, GDB_FR_REG23(sp)
++		LONG_L	$22, GDB_FR_REG22(sp)
++		LONG_L	$21, GDB_FR_REG21(sp)
++		LONG_L	$20, GDB_FR_REG20(sp)
++		LONG_L	$19, GDB_FR_REG19(sp)
++		LONG_L	$18, GDB_FR_REG18(sp)
++		LONG_L	$17, GDB_FR_REG17(sp)
++		LONG_L	$16, GDB_FR_REG16(sp)
++		LONG_L	$15, GDB_FR_REG15(sp)
++		LONG_L	$14, GDB_FR_REG14(sp)
++		LONG_L	$13, GDB_FR_REG13(sp)
++		LONG_L	$12, GDB_FR_REG12(sp)
++		LONG_L	$11, GDB_FR_REG11(sp)
++		LONG_L	$10, GDB_FR_REG10(sp)
++		LONG_L	$9, GDB_FR_REG9(sp)
++		LONG_L	$8, GDB_FR_REG8(sp)
++		LONG_L	$7, GDB_FR_REG7(sp)
++		LONG_L	$6, GDB_FR_REG6(sp)
++		LONG_L	$5, GDB_FR_REG5(sp)
++		LONG_L	$4, GDB_FR_REG4(sp)
++		LONG_L	$3, GDB_FR_REG3(sp)
++		LONG_L	$2, GDB_FR_REG2(sp)
++		LONG_L	$1, GDB_FR_REG1(sp)
++#if defined(CONFIG_CPU_R3000) || defined(CONFIG_CPU_TX39XX)
++		LONG_L	k0, GDB_FR_EPC(sp)
++		LONG_L	$29, GDB_FR_REG29(sp)		/* Deallocate stack */
++		jr	k0
++		rfe
++#else
++		LONG_L	sp, GDB_FR_REG29(sp)		/* Deallocate stack */
++
++		.set	mips3
++		eret
++		.set	mips0
++#endif
++		.set	at
++		.set	reorder
++		END(trap_low)
++
++LEAF(kgdb_read_byte)
++4:		lb	t0, (a0)
++		sb	t0, (a1)
++		li	v0, 0
++		jr	ra
++		.section __ex_table,"a"
++		PTR	4b, kgdbfault
++		.previous
++		END(kgdb_read_byte)
++
++LEAF(kgdb_write_byte)
++5:		sb	a0, (a1)
++		li	v0, 0
++		jr	ra
++		.section __ex_table,"a"
++		PTR	5b, kgdbfault
++		.previous
++		END(kgdb_write_byte)
++
++		.type	kgdbfault@function
++		.ent	kgdbfault
++
++kgdbfault:	li	v0, -EFAULT
++		jr	ra
++		.end	kgdbfault
+diff -Nurb linux-2.6.22-570/arch/mips/kernel/traps.c linux-2.6.22-try2/arch/mips/kernel/traps.c
+--- linux-2.6.22-570/arch/mips/kernel/traps.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/kernel/traps.c	2007-12-19 15:29:24.000000000 -0500
+@@ -10,6 +10,8 @@
+  * Kevin D. Kissell, kevink@mips.com and Carsten Langgaard, carstenl@mips.com
+  * Copyright (C) 2000, 01 MIPS Technologies, Inc.
+  * Copyright (C) 2002, 2003, 2004, 2005  Maciej W. Rozycki
++ *
++ * KGDB specific changes - Manish Lachwani (mlachwani@mvista.com)
+  */
+ #include <linux/bug.h>
+ #include <linux/init.h>
+@@ -21,6 +23,7 @@
+ #include <linux/kallsyms.h>
+ #include <linux/bootmem.h>
+ #include <linux/interrupt.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/bootinfo.h>
+ #include <asm/branch.h>
+@@ -42,6 +45,7 @@
+ #include <asm/watch.h>
+ #include <asm/types.h>
+ #include <asm/stacktrace.h>
++#include <asm/kdebug.h>
+ 
+ extern asmlinkage void handle_int(void);
+ extern asmlinkage void handle_tlbm(void);
+@@ -1445,6 +1449,11 @@
+ 	extern char except_vec4;
+ 	unsigned long i;
+ 
++#if defined(CONFIG_KGDB)
++	if (kgdb_early_setup)
++		return;	/* Already done */
++#endif
++
+ 	if (cpu_has_veic || cpu_has_vint)
+ 		ebase = (unsigned long) alloc_bootmem_low_pages (0x200 + VECTORSPACING*64);
+ 	else
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile linux-2.6.22-try2/arch/mips/mips-boards/atlas/Makefile
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/atlas/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -17,4 +17,3 @@
+ #
+ 
+ obj-y			:= atlas_int.o atlas_setup.o
+-obj-$(CONFIG_KGDB)	+= atlas_gdb.o
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c linux-2.6.22-try2/arch/mips/mips-boards/atlas/atlas_gdb.c
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_gdb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/atlas/atlas_gdb.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,97 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+- *
+- *  This program is free software; you can distribute it and/or modify it
+- *  under the terms of the GNU General Public License (Version 2) as
+- *  published by the Free Software Foundation.
+- *
+- *  This program is distributed in the hope it will be useful, but WITHOUT
+- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- *  for more details.
+- *
+- *  You should have received a copy of the GNU General Public License along
+- *  with this program; if not, write to the Free Software Foundation, Inc.,
+- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * This is the interface to the remote debugger stub.
+- */
+-#include <asm/io.h>
+-#include <asm/mips-boards/atlas.h>
+-#include <asm/mips-boards/saa9730_uart.h>
+-
+-#define INB(a)     inb((unsigned long)a)
+-#define OUTB(x,a)  outb(x,(unsigned long)a)
+-
+-/*
+- * This is the interface to the remote debugger stub
+- * if the Philips part is used for the debug port,
+- * called from the platform setup code.
+- */
+-void *saa9730_base = (void *)ATLAS_SAA9730_REG;
+-
+-static int saa9730_kgdb_active = 0;
+-
+-#define SAA9730_BAUDCLOCK(baud) (((ATLAS_SAA9730_BAUDCLOCK/(baud))/16)-1)
+-
+-int saa9730_kgdb_hook(int speed)
+-{
+-	int baudclock;
+-	t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+-
+-        /*
+-         * Clear all interrupts
+-         */
+-	(void) INB(&kgdb_uart->Lsr);
+-	(void) INB(&kgdb_uart->Msr);
+-	(void) INB(&kgdb_uart->Thr_Rbr);
+-	(void) INB(&kgdb_uart->Iir_Fcr);
+-
+-        /*
+-         * Now, initialize the UART
+-         */
+-	/* 8 data bits, one stop bit, no parity */
+-	OUTB(SAA9730_LCR_DATA8, &kgdb_uart->Lcr);
+-
+-	baudclock = SAA9730_BAUDCLOCK(speed);
+-
+-	OUTB((baudclock >> 16) & 0xff, &kgdb_uart->BaudDivMsb);
+-	OUTB( baudclock        & 0xff, &kgdb_uart->BaudDivLsb);
+-
+-	/* Set RTS/DTR active */
+-	OUTB(SAA9730_MCR_DTR | SAA9730_MCR_RTS, &kgdb_uart->Mcr);
+-	saa9730_kgdb_active = 1;
+-
+-	return speed;
+-}
+-
+-int saa9730_putDebugChar(char c)
+-{
+-	t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+-
+-        if (!saa9730_kgdb_active) {     /* need to init device first */
+-                return 0;
+-        }
+-
+-        while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_THRE))
+-                ;
+-	OUTB(c, &kgdb_uart->Thr_Rbr);
+-
+-        return 1;
+-}
+-
+-char saa9730_getDebugChar(void)
+-{
+-	t_uart_saa9730_regmap *kgdb_uart = (t_uart_saa9730_regmap *)(saa9730_base + SAA9730_UART_REGS_ADDR);
+-	char c;
+-
+-        if (!saa9730_kgdb_active) {     /* need to init device first */
+-                return 0;
+-        }
+-        while (!(INB(&kgdb_uart->Lsr) & SAA9730_LSR_DR))
+-                ;
+-
+-	c = INB(&kgdb_uart->Thr_Rbr);
+-	return(c);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c linux-2.6.22-try2/arch/mips/mips-boards/atlas/atlas_setup.c
+--- linux-2.6.22-570/arch/mips/mips-boards/atlas/atlas_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/atlas/atlas_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -37,10 +37,6 @@
+ extern void mips_time_init(void);
+ extern unsigned long mips_rtc_get_time(void);
+ 
+-#ifdef CONFIG_KGDB
+-extern void kgdb_config(void);
+-#endif
+-
+ static void __init serial_init(void);
+ 
+ const char *get_system_type(void)
+@@ -58,9 +54,6 @@
+ 
+ 	serial_init ();
+ 
+-#ifdef CONFIG_KGDB
+-	kgdb_config();
+-#endif
+ 	mips_reboot_setup();
+ 
+ 	board_time_init = mips_time_init;
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c linux-2.6.22-try2/arch/mips/mips-boards/generic/gdb_hook.c
+--- linux-2.6.22-570/arch/mips/mips-boards/generic/gdb_hook.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/generic/gdb_hook.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,133 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+- *
+- *  This program is free software; you can distribute it and/or modify it
+- *  under the terms of the GNU General Public License (Version 2) as
+- *  published by the Free Software Foundation.
+- *
+- *  This program is distributed in the hope it will be useful, but WITHOUT
+- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- *  for more details.
+- *
+- *  You should have received a copy of the GNU General Public License along
+- *  with this program; if not, write to the Free Software Foundation, Inc.,
+- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * This is the interface to the remote debugger stub.
+- */
+-#include <linux/types.h>
+-#include <linux/serial.h>
+-#include <linux/serialP.h>
+-#include <linux/serial_reg.h>
+-
+-#include <asm/serial.h>
+-#include <asm/io.h>
+-
+-static struct serial_state rs_table[] = {
+-	SERIAL_PORT_DFNS	/* Defined in serial.h */
+-};
+-
+-static struct async_struct kdb_port_info = {0};
+-
+-int (*generic_putDebugChar)(char);
+-char (*generic_getDebugChar)(void);
+-
+-static __inline__ unsigned int serial_in(struct async_struct *info, int offset)
+-{
+-	return inb(info->port + offset);
+-}
+-
+-static __inline__ void serial_out(struct async_struct *info, int offset,
+-				int value)
+-{
+-	outb(value, info->port+offset);
+-}
+-
+-int rs_kgdb_hook(int tty_no, int speed) {
+-	int t;
+-	struct serial_state *ser = &rs_table[tty_no];
+-
+-	kdb_port_info.state = ser;
+-	kdb_port_info.magic = SERIAL_MAGIC;
+-	kdb_port_info.port = ser->port;
+-	kdb_port_info.flags = ser->flags;
+-
+-	/*
+-	 * Clear all interrupts
+-	 */
+-	serial_in(&kdb_port_info, UART_LSR);
+-	serial_in(&kdb_port_info, UART_RX);
+-	serial_in(&kdb_port_info, UART_IIR);
+-	serial_in(&kdb_port_info, UART_MSR);
+-
+-	/*
+-	 * Now, initialize the UART
+-	 */
+-	serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8);	/* reset DLAB */
+-	if (kdb_port_info.flags & ASYNC_FOURPORT) {
+-		kdb_port_info.MCR = UART_MCR_DTR | UART_MCR_RTS;
+-		t = UART_MCR_DTR | UART_MCR_OUT1;
+-	} else {
+-		kdb_port_info.MCR
+-			= UART_MCR_DTR | UART_MCR_RTS | UART_MCR_OUT2;
+-		t = UART_MCR_DTR | UART_MCR_RTS;
+-	}
+-
+-	kdb_port_info.MCR = t;		/* no interrupts, please */
+-	serial_out(&kdb_port_info, UART_MCR, kdb_port_info.MCR);
+-
+-	/*
+-	 * and set the speed of the serial port
+-	 */
+-	if (speed == 0)
+-		speed = 9600;
+-
+-	t = kdb_port_info.state->baud_base / speed;
+-	/* set DLAB */
+-	serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8 | UART_LCR_DLAB);
+-	serial_out(&kdb_port_info, UART_DLL, t & 0xff);/* LS of divisor */
+-	serial_out(&kdb_port_info, UART_DLM, t >> 8);  /* MS of divisor */
+-	/* reset DLAB */
+-	serial_out(&kdb_port_info, UART_LCR, UART_LCR_WLEN8);
+-
+-	return speed;
+-}
+-
+-int putDebugChar(char c)
+-{
+-	return generic_putDebugChar(c);
+-}
+-
+-char getDebugChar(void)
+-{
+-	return generic_getDebugChar();
+-}
+-
+-int rs_putDebugChar(char c)
+-{
+-
+-	if (!kdb_port_info.state) { 	/* need to init device first */
+-		return 0;
+-	}
+-
+-	while ((serial_in(&kdb_port_info, UART_LSR) & UART_LSR_THRE) == 0)
+-		;
+-
+-	serial_out(&kdb_port_info, UART_TX, c);
+-
+-	return 1;
+-}
+-
+-char rs_getDebugChar(void)
+-{
+-	if (!kdb_port_info.state) { 	/* need to init device first */
+-		return 0;
+-	}
+-
+-	while (!(serial_in(&kdb_port_info, UART_LSR) & 1))
+-		;
+-
+-	return serial_in(&kdb_port_info, UART_RX);
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/generic/init.c linux-2.6.22-try2/arch/mips/mips-boards/generic/init.c
+--- linux-2.6.22-570/arch/mips/mips-boards/generic/init.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/generic/init.c	2007-12-19 15:29:24.000000000 -0500
+@@ -37,15 +37,6 @@
+ 
+ #include <asm/mips-boards/malta.h>
+ 
+-#ifdef CONFIG_KGDB
+-extern int rs_kgdb_hook(int, int);
+-extern int rs_putDebugChar(char);
+-extern char rs_getDebugChar(void);
+-extern int saa9730_kgdb_hook(int);
+-extern int saa9730_putDebugChar(char);
+-extern char saa9730_getDebugChar(void);
+-#endif
+-
+ int prom_argc;
+ int *_prom_argv, *_prom_envp;
+ 
+@@ -173,59 +164,6 @@
+ }
+ #endif
+ 
+-#ifdef CONFIG_KGDB
+-void __init kgdb_config (void)
+-{
+-	extern int (*generic_putDebugChar)(char);
+-	extern char (*generic_getDebugChar)(void);
+-	char *argptr;
+-	int line, speed;
+-
+-	argptr = prom_getcmdline();
+-	if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) {
+-		argptr += strlen("kgdb=ttyS");
+-		if (*argptr != '0' && *argptr != '1')
+-			printk("KGDB: Unknown serial line /dev/ttyS%c, "
+-			       "falling back to /dev/ttyS1\n", *argptr);
+-		line = *argptr == '0' ? 0 : 1;
+-		printk("KGDB: Using serial line /dev/ttyS%d for session\n", line);
+-
+-		speed = 0;
+-		if (*++argptr == ',')
+-		{
+-			int c;
+-			while ((c = *++argptr) && ('0' <= c && c <= '9'))
+-				speed = speed * 10 + c - '0';
+-		}
+-#ifdef CONFIG_MIPS_ATLAS
+-		if (line == 1) {
+-			speed = saa9730_kgdb_hook(speed);
+-			generic_putDebugChar = saa9730_putDebugChar;
+-			generic_getDebugChar = saa9730_getDebugChar;
+-		}
+-		else
+-#endif
+-		{
+-			speed = rs_kgdb_hook(line, speed);
+-			generic_putDebugChar = rs_putDebugChar;
+-			generic_getDebugChar = rs_getDebugChar;
+-		}
+-
+-		pr_info("KGDB: Using serial line /dev/ttyS%d at %d for "
+-		        "session, please connect your debugger\n",
+-		        line ? 1 : 0, speed);
+-
+-		{
+-			char *s;
+-			for (s = "Please connect GDB to this port\r\n"; *s; )
+-				generic_putDebugChar (*s++);
+-		}
+-
+-		/* Breakpoint is invoked after interrupts are initialised */
+-	}
+-}
+-#endif
+-
+ void __init mips_nmi_setup (void)
+ {
+ 	void *base;
+diff -Nurb linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c linux-2.6.22-try2/arch/mips/mips-boards/malta/malta_setup.c
+--- linux-2.6.22-570/arch/mips/mips-boards/malta/malta_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mips-boards/malta/malta_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -39,10 +39,6 @@
+ extern void mips_time_init(void);
+ extern unsigned long mips_rtc_get_time(void);
+ 
+-#ifdef CONFIG_KGDB
+-extern void kgdb_config(void);
+-#endif
+-
+ struct resource standard_io_resources[] = {
+ 	{ .name = "dma1", .start = 0x00, .end = 0x1f, .flags = IORESOURCE_BUSY },
+ 	{ .name = "timer", .start = 0x40, .end = 0x5f, .flags = IORESOURCE_BUSY },
+@@ -99,10 +95,6 @@
+ 	 */
+ 	enable_dma(4);
+ 
+-#ifdef CONFIG_KGDB
+-	kgdb_config ();
+-#endif
+-
+ 	if (mips_revision_sconid == MIPS_REVISION_SCON_BONITO) {
+ 		char *argptr;
+ 
+diff -Nurb linux-2.6.22-570/arch/mips/mm/extable.c linux-2.6.22-try2/arch/mips/mm/extable.c
+--- linux-2.6.22-570/arch/mips/mm/extable.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/mm/extable.c	2007-12-19 15:29:24.000000000 -0500
+@@ -3,6 +3,7 @@
+  */
+ #include <linux/module.h>
+ #include <linux/spinlock.h>
++#include <linux/kgdb.h>
+ #include <asm/branch.h>
+ #include <asm/uaccess.h>
+ 
+@@ -16,6 +17,12 @@
+ 
+ 		return 1;
+ 	}
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault)
++		/* Restore our previous state. */
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Not reached. */
++#endif
+ 
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile linux-2.6.22-try2/arch/mips/momentum/ocelot_c/Makefile
+--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/momentum/ocelot_c/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -4,5 +4,3 @@
+ 
+ obj-y	 		+= cpci-irq.o irq.o platform.o prom.o reset.o \
+ 			   setup.o uart-irq.o
+-
+-obj-$(CONFIG_KGDB)	+= dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c linux-2.6.22-try2/arch/mips/momentum/ocelot_c/dbg_io.c
+--- linux-2.6.22-570/arch/mips/momentum/ocelot_c/dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/momentum/ocelot_c/dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,121 +0,0 @@
+-
+-#include <asm/serial.h> /* For the serial port location and base baud */
+-
+-/* --- CONFIG --- */
+-
+-typedef unsigned char uint8;
+-typedef unsigned int uint32;
+-
+-/* --- END OF CONFIG --- */
+-
+-#define         UART16550_BAUD_2400             2400
+-#define         UART16550_BAUD_4800             4800
+-#define         UART16550_BAUD_9600             9600
+-#define         UART16550_BAUD_19200            19200
+-#define         UART16550_BAUD_38400            38400
+-#define         UART16550_BAUD_57600            57600
+-#define         UART16550_BAUD_115200           115200
+-
+-#define         UART16550_PARITY_NONE           0
+-#define         UART16550_PARITY_ODD            0x08
+-#define         UART16550_PARITY_EVEN           0x18
+-#define         UART16550_PARITY_MARK           0x28
+-#define         UART16550_PARITY_SPACE          0x38
+-
+-#define         UART16550_DATA_5BIT             0x0
+-#define         UART16550_DATA_6BIT             0x1
+-#define         UART16550_DATA_7BIT             0x2
+-#define         UART16550_DATA_8BIT             0x3
+-
+-#define         UART16550_STOP_1BIT             0x0
+-#define         UART16550_STOP_2BIT             0x4
+-
+-/* ----------------------------------------------------- */
+-
+-/* === CONFIG === */
+-
+-/* [jsun] we use the second serial port for kdb */
+-#define         BASE                    OCELOT_SERIAL1_BASE
+-#define         MAX_BAUD                OCELOT_BASE_BAUD
+-
+-/* === END OF CONFIG === */
+-
+-#define         REG_OFFSET              4
+-
+-/* register offset */
+-#define         OFS_RCV_BUFFER          0
+-#define         OFS_TRANS_HOLD          0
+-#define         OFS_SEND_BUFFER         0
+-#define         OFS_INTR_ENABLE         (1*REG_OFFSET)
+-#define         OFS_INTR_ID             (2*REG_OFFSET)
+-#define         OFS_DATA_FORMAT         (3*REG_OFFSET)
+-#define         OFS_LINE_CONTROL        (3*REG_OFFSET)
+-#define         OFS_MODEM_CONTROL       (4*REG_OFFSET)
+-#define         OFS_RS232_OUTPUT        (4*REG_OFFSET)
+-#define         OFS_LINE_STATUS         (5*REG_OFFSET)
+-#define         OFS_MODEM_STATUS        (6*REG_OFFSET)
+-#define         OFS_RS232_INPUT         (6*REG_OFFSET)
+-#define         OFS_SCRATCH_PAD         (7*REG_OFFSET)
+-
+-#define         OFS_DIVISOR_LSB         (0*REG_OFFSET)
+-#define         OFS_DIVISOR_MSB         (1*REG_OFFSET)
+-
+-
+-/* memory-mapped read/write of the port */
+-#define         UART16550_READ(y)    (*((volatile uint8*)(BASE + y)))
+-#define         UART16550_WRITE(y, z)  ((*((volatile uint8*)(BASE + y))) = z)
+-
+-void debugInit(uint32 baud, uint8 data, uint8 parity, uint8 stop)
+-{
+-	/* disable interrupts */
+-	UART16550_WRITE(OFS_INTR_ENABLE, 0);
+-
+-	/* set up baud rate */
+-	{
+-		uint32 divisor;
+-
+-		/* set DIAB bit */
+-		UART16550_WRITE(OFS_LINE_CONTROL, 0x80);
+-
+-		/* set divisor */
+-		divisor = MAX_BAUD / baud;
+-		UART16550_WRITE(OFS_DIVISOR_LSB, divisor & 0xff);
+-		UART16550_WRITE(OFS_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-
+-		/* clear DIAB bit */
+-		UART16550_WRITE(OFS_LINE_CONTROL, 0x0);
+-	}
+-
+-	/* set data format */
+-	UART16550_WRITE(OFS_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-uint8 getDebugChar(void)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_38400,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-	}
+-
+-	while ((UART16550_READ(OFS_LINE_STATUS) & 0x1) == 0);
+-	return UART16550_READ(OFS_RCV_BUFFER);
+-}
+-
+-
+-int putDebugChar(uint8 byte)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(UART16550_BAUD_38400,
+-			  UART16550_DATA_8BIT,
+-			  UART16550_PARITY_NONE, UART16550_STOP_1BIT);
+-	}
+-
+-	while ((UART16550_READ(OFS_LINE_STATUS) & 0x20) == 0);
+-	UART16550_WRITE(OFS_SEND_BUFFER, byte);
+-	return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/pci/fixup-atlas.c linux-2.6.22-try2/arch/mips/pci/fixup-atlas.c
+--- linux-2.6.22-570/arch/mips/pci/fixup-atlas.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/pci/fixup-atlas.c	2007-12-19 15:29:24.000000000 -0500
+@@ -68,24 +68,3 @@
+ {
+ 	return 0;
+ }
+-
+-#ifdef CONFIG_KGDB
+-/*
+- * The PCI scan may have moved the saa9730 I/O address, so reread
+- * the address here.
+- * This does mean that it's not possible to debug the PCI bus configuration
+- * code, but it is better than nothing...
+- */
+-
+-static void atlas_saa9730_base_fixup (struct pci_dev *pdev)
+-{
+-	extern void *saa9730_base;
+-	if (pdev->bus == 0 && PCI_SLOT(pdev->devfn) == 19)
+-		(void) pci_read_config_dword (pdev, 0x14, (u32 *)&saa9730_base);
+-	printk ("saa9730_base = %x\n", saa9730_base);
+-}
+-
+-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_PHILIPS, PCI_DEVICE_ID_PHILIPS_SAA9730,
+-	 atlas_saa9730_base_fixup);
+-
+-#endif
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile linux-2.6.22-try2/arch/mips/philips/pnx8550/common/Makefile
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/philips/pnx8550/common/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -24,4 +24,3 @@
+ 
+ obj-y := setup.o prom.o int.o reset.o time.o proc.o platform.o
+ obj-$(CONFIG_PCI) += pci.o
+-obj-$(CONFIG_KGDB) += gdb_hook.o
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c linux-2.6.22-try2/arch/mips/philips/pnx8550/common/gdb_hook.c
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/gdb_hook.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/philips/pnx8550/common/gdb_hook.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,109 +0,0 @@
+-/*
+- * Carsten Langgaard, carstenl@mips.com
+- * Copyright (C) 2000 MIPS Technologies, Inc.  All rights reserved.
+- *
+- * ########################################################################
+- *
+- *  This program is free software; you can distribute it and/or modify it
+- *  under the terms of the GNU General Public License (Version 2) as
+- *  published by the Free Software Foundation.
+- *
+- *  This program is distributed in the hope it will be useful, but WITHOUT
+- *  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+- *  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+- *  for more details.
+- *
+- *  You should have received a copy of the GNU General Public License along
+- *  with this program; if not, write to the Free Software Foundation, Inc.,
+- *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * ########################################################################
+- *
+- * This is the interface to the remote debugger stub.
+- *
+- */
+-#include <linux/types.h>
+-#include <linux/serial.h>
+-#include <linux/serialP.h>
+-#include <linux/serial_reg.h>
+-#include <linux/serial_ip3106.h>
+-
+-#include <asm/serial.h>
+-#include <asm/io.h>
+-
+-#include <uart.h>
+-
+-static struct serial_state rs_table[IP3106_NR_PORTS] = {
+-};
+-static struct async_struct kdb_port_info = {0};
+-
+-void rs_kgdb_hook(int tty_no)
+-{
+-	struct serial_state *ser = &rs_table[tty_no];
+-
+-	kdb_port_info.state = ser;
+-	kdb_port_info.magic = SERIAL_MAGIC;
+-	kdb_port_info.port  = tty_no;
+-	kdb_port_info.flags = ser->flags;
+-
+-	/*
+-	 * Clear all interrupts
+-	 */
+-	/* Clear all the transmitter FIFO counters (pointer and status) */
+-	ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_TX_RST;
+-	/* Clear all the receiver FIFO counters (pointer and status) */
+-	ip3106_lcr(UART_BASE, tty_no) |= IP3106_UART_LCR_RX_RST;
+-	/* Clear all interrupts */
+-	ip3106_iclr(UART_BASE, tty_no) = IP3106_UART_INT_ALLRX |
+-		IP3106_UART_INT_ALLTX;
+-
+-	/*
+-	 * Now, initialize the UART
+-	 */
+-	ip3106_lcr(UART_BASE, tty_no) = IP3106_UART_LCR_8BIT;
+-	ip3106_baud(UART_BASE, tty_no) = 5; // 38400 Baud
+-}
+-
+-int putDebugChar(char c)
+-{
+-	/* Wait until FIFO not full */
+-	while (((ip3106_fifo(UART_BASE, kdb_port_info.port) & IP3106_UART_FIFO_TXFIFO) >> 16) >= 16)
+-		;
+-	/* Send one char */
+-	ip3106_fifo(UART_BASE, kdb_port_info.port) = c;
+-
+-	return 1;
+-}
+-
+-char getDebugChar(void)
+-{
+-	char ch;
+-
+-	/* Wait until there is a char in the FIFO */
+-	while (!((ip3106_fifo(UART_BASE, kdb_port_info.port) &
+-					IP3106_UART_FIFO_RXFIFO) >> 8))
+-		;
+-	/* Read one char */
+-	ch = ip3106_fifo(UART_BASE, kdb_port_info.port) &
+-		IP3106_UART_FIFO_RBRTHR;
+-	/* Advance the RX FIFO read pointer */
+-	ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_NEXT;
+-	return (ch);
+-}
+-
+-void rs_disable_debug_interrupts(void)
+-{
+-	ip3106_ien(UART_BASE, kdb_port_info.port) = 0; /* Disable all interrupts */
+-}
+-
+-void rs_enable_debug_interrupts(void)
+-{
+-	/* Clear all the transmitter FIFO counters (pointer and status) */
+-	ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_TX_RST;
+-	/* Clear all the receiver FIFO counters (pointer and status) */
+-	ip3106_lcr(UART_BASE, kdb_port_info.port) |= IP3106_UART_LCR_RX_RST;
+-	/* Clear all interrupts */
+-	ip3106_iclr(UART_BASE, kdb_port_info.port) = IP3106_UART_INT_ALLRX |
+-		IP3106_UART_INT_ALLTX;
+-	ip3106_ien(UART_BASE, kdb_port_info.port)  = IP3106_UART_INT_ALLRX; /* Enable RX interrupts */
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c linux-2.6.22-try2/arch/mips/philips/pnx8550/common/setup.c
+--- linux-2.6.22-570/arch/mips/philips/pnx8550/common/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/philips/pnx8550/common/setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -145,16 +145,5 @@
+ 		ip3106_baud(UART_BASE, pnx8550_console_port) = 5;
+ 	}
+ 
+-#ifdef CONFIG_KGDB
+-	argptr = prom_getcmdline();
+-	if ((argptr = strstr(argptr, "kgdb=ttyS")) != NULL) {
+-		int line;
+-		argptr += strlen("kgdb=ttyS");
+-		line = *argptr == '0' ? 0 : 1;
+-		rs_kgdb_hook(line);
+-		pr_info("KGDB: Using ttyS%i for session, "
+-		        "please connect your debugger\n", line ? 1 : 0);
+-	}
+-#endif
+ 	return;
+ }
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/Makefile
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -4,5 +4,4 @@
+ 
+ obj-y    += irq.o i2c-yosemite.o prom.o py-console.o setup.o
+ 
+-obj-$(CONFIG_KGDB)		+= dbg_io.o
+ obj-$(CONFIG_SMP)		+= smp.o
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/dbg_io.c
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,180 +0,0 @@
+-/*
+- * Copyright 2003 PMC-Sierra
+- * Author: Manish Lachwani (lachwani@pmc-sierra.com)
+- *
+- *  This program is free software; you can redistribute  it and/or modify it
+- *  under  the terms of  the GNU General  Public License as published by the
+- *  Free Software Foundation;  either version 2 of the  License, or (at your
+- *  option) any later version.
+- *
+- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- *  You should have received a copy of the  GNU General Public License along
+- *  with this program; if not, write  to the Free Software Foundation, Inc.,
+- *  675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-/*
+- * Support for KGDB for the Yosemite board. We make use of single serial
+- * port to be used for KGDB as well as console. The second serial port
+- * seems to be having a problem. Single IRQ is allocated for both the
+- * ports. Hence, the interrupt routing code needs to figure out whether
+- * the interrupt came from channel A or B.
+- */
+-
+-#include <asm/serial.h>
+-
+-/*
+- * Baud rate, Parity, Data and Stop bit settings for the
+- * serial port on the Yosemite. Note that the Early printk
+- * patch has been added. So, we should be all set to go
+- */
+-#define	YOSEMITE_BAUD_2400	2400
+-#define	YOSEMITE_BAUD_4800	4800
+-#define	YOSEMITE_BAUD_9600	9600
+-#define	YOSEMITE_BAUD_19200	19200
+-#define	YOSEMITE_BAUD_38400	38400
+-#define	YOSEMITE_BAUD_57600	57600
+-#define	YOSEMITE_BAUD_115200	115200
+-
+-#define	YOSEMITE_PARITY_NONE	0
+-#define	YOSEMITE_PARITY_ODD	0x08
+-#define	YOSEMITE_PARITY_EVEN	0x18
+-#define	YOSEMITE_PARITY_MARK	0x28
+-#define	YOSEMITE_PARITY_SPACE	0x38
+-
+-#define	YOSEMITE_DATA_5BIT	0x0
+-#define	YOSEMITE_DATA_6BIT	0x1
+-#define	YOSEMITE_DATA_7BIT	0x2
+-#define	YOSEMITE_DATA_8BIT	0x3
+-
+-#define	YOSEMITE_STOP_1BIT	0x0
+-#define	YOSEMITE_STOP_2BIT	0x4
+-
+-/* This is crucial */
+-#define	SERIAL_REG_OFS		0x1
+-
+-#define	SERIAL_RCV_BUFFER	0x0
+-#define	SERIAL_TRANS_HOLD	0x0
+-#define	SERIAL_SEND_BUFFER	0x0
+-#define	SERIAL_INTR_ENABLE	(1 * SERIAL_REG_OFS)
+-#define	SERIAL_INTR_ID		(2 * SERIAL_REG_OFS)
+-#define	SERIAL_DATA_FORMAT	(3 * SERIAL_REG_OFS)
+-#define	SERIAL_LINE_CONTROL	(3 * SERIAL_REG_OFS)
+-#define	SERIAL_MODEM_CONTROL	(4 * SERIAL_REG_OFS)
+-#define	SERIAL_RS232_OUTPUT	(4 * SERIAL_REG_OFS)
+-#define	SERIAL_LINE_STATUS	(5 * SERIAL_REG_OFS)
+-#define	SERIAL_MODEM_STATUS	(6 * SERIAL_REG_OFS)
+-#define	SERIAL_RS232_INPUT	(6 * SERIAL_REG_OFS)
+-#define	SERIAL_SCRATCH_PAD	(7 * SERIAL_REG_OFS)
+-
+-#define	SERIAL_DIVISOR_LSB	(0 * SERIAL_REG_OFS)
+-#define	SERIAL_DIVISOR_MSB	(1 * SERIAL_REG_OFS)
+-
+-/*
+- * Functions to READ and WRITE to serial port 0
+- */
+-#define	SERIAL_READ(ofs)		(*((volatile unsigned char*)	\
+-					(TITAN_SERIAL_BASE + ofs)))
+-
+-#define	SERIAL_WRITE(ofs, val)		((*((volatile unsigned char*)	\
+-					(TITAN_SERIAL_BASE + ofs))) = val)
+-
+-/*
+- * Functions to READ and WRITE to serial port 1
+- */
+-#define	SERIAL_READ_1(ofs)		(*((volatile unsigned char*)	\
+-					(TITAN_SERIAL_BASE_1 + ofs)))
+-
+-#define	SERIAL_WRITE_1(ofs, val)	((*((volatile unsigned char*)	\
+-					(TITAN_SERIAL_BASE_1 + ofs))) = val)
+-
+-/*
+- * Second serial port initialization
+- */
+-void init_second_port(void)
+-{
+-	/* Disable Interrupts */
+-	SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0);
+-	SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0x0);
+-
+-	{
+-		unsigned int divisor;
+-
+-		SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x80);
+-		divisor = TITAN_SERIAL_BASE_BAUD / YOSEMITE_BAUD_115200;
+-		SERIAL_WRITE_1(SERIAL_DIVISOR_LSB, divisor & 0xff);
+-
+-		SERIAL_WRITE_1(SERIAL_DIVISOR_MSB,
+-			       (divisor & 0xff00) >> 8);
+-		SERIAL_WRITE_1(SERIAL_LINE_CONTROL, 0x0);
+-	}
+-
+-	SERIAL_WRITE_1(SERIAL_DATA_FORMAT, YOSEMITE_DATA_8BIT |
+-		       YOSEMITE_PARITY_NONE | YOSEMITE_STOP_1BIT);
+-
+-	/* Enable Interrupts */
+-	SERIAL_WRITE_1(SERIAL_INTR_ENABLE, 0xf);
+-}
+-
+-/* Initialize the serial port for KGDB debugging */
+-void debugInit(unsigned int baud, unsigned char data, unsigned char parity,
+-	       unsigned char stop)
+-{
+-	/* Disable Interrupts */
+-	SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0);
+-	SERIAL_WRITE(SERIAL_INTR_ENABLE, 0x0);
+-
+-	{
+-		unsigned int divisor;
+-
+-		SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x80);
+-
+-		divisor = TITAN_SERIAL_BASE_BAUD / baud;
+-		SERIAL_WRITE(SERIAL_DIVISOR_LSB, divisor & 0xff);
+-
+-		SERIAL_WRITE(SERIAL_DIVISOR_MSB, (divisor & 0xff00) >> 8);
+-		SERIAL_WRITE(SERIAL_LINE_CONTROL, 0x0);
+-	}
+-
+-	SERIAL_WRITE(SERIAL_DATA_FORMAT, data | parity | stop);
+-}
+-
+-static int remoteDebugInitialized = 0;
+-
+-unsigned char getDebugChar(void)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(YOSEMITE_BAUD_115200,
+-			  YOSEMITE_DATA_8BIT,
+-			  YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT);
+-	}
+-
+-	while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x1) == 0);
+-	return SERIAL_READ(SERIAL_RCV_BUFFER);
+-}
+-
+-int putDebugChar(unsigned char byte)
+-{
+-	if (!remoteDebugInitialized) {
+-		remoteDebugInitialized = 1;
+-		debugInit(YOSEMITE_BAUD_115200,
+-			  YOSEMITE_DATA_8BIT,
+-			  YOSEMITE_PARITY_NONE, YOSEMITE_STOP_1BIT);
+-	}
+-
+-	while ((SERIAL_READ(SERIAL_LINE_STATUS) & 0x20) == 0);
+-	SERIAL_WRITE(SERIAL_SEND_BUFFER, byte);
+-
+-	return 1;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/irq.c
+--- linux-2.6.22-570/arch/mips/pmc-sierra/yosemite/irq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/pmc-sierra/yosemite/irq.c	2007-12-19 15:29:24.000000000 -0500
+@@ -137,10 +137,6 @@
+ 	}
+ }
+ 
+-#ifdef CONFIG_KGDB
+-extern void init_second_port(void);
+-#endif
+-
+ /*
+  * Initialize the next level interrupt handler
+  */
+@@ -152,11 +148,6 @@
+ 	rm7k_cpu_irq_init();
+ 	rm9k_cpu_irq_init();
+ 
+-#ifdef CONFIG_KGDB
+-	/* At this point, initialize the second serial port */
+-	init_second_port();
+-#endif
+-
+ #ifdef CONFIG_GDB_CONSOLE
+ 	register_gdb_console();
+ #endif
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c linux-2.6.22-try2/arch/mips/sgi-ip22/ip22-setup.c
+--- linux-2.6.22-570/arch/mips/sgi-ip22/ip22-setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sgi-ip22/ip22-setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -101,30 +101,6 @@
+ 		add_preferred_console("arc", 0, NULL);
+ 	}
+ 
+-#ifdef CONFIG_KGDB
+-	{
+-	char *kgdb_ttyd = prom_getcmdline();
+-
+-	if ((kgdb_ttyd = strstr(kgdb_ttyd, "kgdb=ttyd")) != NULL) {
+-		int line;
+-		kgdb_ttyd += strlen("kgdb=ttyd");
+-		if (*kgdb_ttyd != '1' && *kgdb_ttyd != '2')
+-			printk(KERN_INFO "KGDB: Uknown serial line /dev/ttyd%c"
+-			       ", falling back to /dev/ttyd1\n", *kgdb_ttyd);
+-		line = *kgdb_ttyd == '2' ? 0 : 1;
+-		printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for "
+-		       "session\n", line ? 1 : 2);
+-		rs_kgdb_hook(line);
+-
+-		printk(KERN_INFO "KGDB: Using serial line /dev/ttyd%d for "
+-		       "session, please connect your debugger\n", line ? 1:2);
+-
+-		kgdb_enabled = 1;
+-		/* Breakpoints and stuff are in sgi_irq_setup() */
+-	}
+-	}
+-#endif
+-
+ #if defined(CONFIG_VT) && defined(CONFIG_SGI_NEWPORT_CONSOLE)
+ 	{
+ 		ULONG *gfxinfo;
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/Makefile linux-2.6.22-try2/arch/mips/sgi-ip27/Makefile
+--- linux-2.6.22-570/arch/mips/sgi-ip27/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sgi-ip27/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -7,5 +7,4 @@
+ 	   ip27-xtalk.o
+ 
+ obj-$(CONFIG_EARLY_PRINTK)	+= ip27-console.o
+-obj-$(CONFIG_KGDB)		+= ip27-dbgio.o
+ obj-$(CONFIG_SMP)		+= ip27-smp.o
+diff -Nurb linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c linux-2.6.22-try2/arch/mips/sgi-ip27/ip27-dbgio.c
+--- linux-2.6.22-570/arch/mips/sgi-ip27/ip27-dbgio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sgi-ip27/ip27-dbgio.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,60 +0,0 @@
+-/*
+- *  This program is free software; you can redistribute  it and/or modify it
+- *  under  the terms of  the GNU General  Public License as published by the
+- *  Free Software Foundation;  either version 2 of the  License, or (at your
+- *  option) any later version.
+- *
+- *  THIS  SOFTWARE  IS PROVIDED   ``AS  IS'' AND   ANY  EXPRESS OR IMPLIED
+- *  WARRANTIES,   INCLUDING, BUT NOT  LIMITED  TO, THE IMPLIED WARRANTIES OF
+- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
+- *  NO  EVENT  SHALL   THE AUTHOR  BE    LIABLE FOR ANY   DIRECT, INDIRECT,
+- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+- *  NOT LIMITED   TO, PROCUREMENT OF  SUBSTITUTE GOODS  OR SERVICES; LOSS OF
+- *  USE, DATA,  OR PROFITS; OR  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+- *  ANY THEORY OF LIABILITY, WHETHER IN  CONTRACT, STRICT LIABILITY, OR TORT
+- *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+- *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- *  You should have received a copy of the  GNU General Public License along
+- *  with this program; if not, write  to the Free Software Foundation, Inc.,
+- *  675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Copyright 2004 Ralf Baechle <ralf@linux-mips.org>
+- */
+-#include <asm/sn/addrs.h>
+-#include <asm/sn/sn0/hub.h>
+-#include <asm/sn/klconfig.h>
+-#include <asm/sn/ioc3.h>
+-#include <asm/sn/sn_private.h>
+-
+-#include <linux/serial.h>
+-#include <linux/serial_core.h>
+-#include <linux/serial_reg.h>
+-
+-#define IOC3_CLK        (22000000 / 3)
+-#define IOC3_FLAGS      (0)
+-
+-static inline struct ioc3_uartregs *console_uart(void)
+-{
+-	struct ioc3 *ioc3;
+-
+-	ioc3 = (struct ioc3 *)KL_CONFIG_CH_CONS_INFO(get_nasid())->memory_base;
+-
+-	return &ioc3->sregs.uarta;
+-}
+-
+-unsigned char getDebugChar(void)
+-{
+-	struct ioc3_uartregs *uart = console_uart();
+-
+-	while ((uart->iu_lsr & UART_LSR_DR) == 0);
+-	return uart->iu_rbr;
+-}
+-
+-void putDebugChar(unsigned char c)
+-{
+-	struct ioc3_uartregs *uart = console_uart();
+-
+-	while ((uart->iu_lsr & UART_LSR_THRE) == 0);
+-	uart->iu_thr = c;
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c linux-2.6.22-try2/arch/mips/sibyte/bcm1480/irq.c
+--- linux-2.6.22-570/arch/mips/sibyte/bcm1480/irq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/bcm1480/irq.c	2007-12-19 15:29:24.000000000 -0500
+@@ -57,30 +57,6 @@
+ extern unsigned long ht_eoi_space;
+ #endif
+ 
+-#ifdef CONFIG_KGDB
+-#include <asm/gdb-stub.h>
+-extern void breakpoint(void);
+-static int kgdb_irq;
+-#ifdef CONFIG_GDB_CONSOLE
+-extern void register_gdb_console(void);
+-#endif
+-
+-/* kgdb is on when configured.  Pass "nokgdb" kernel arg to turn it off */
+-static int kgdb_flag = 1;
+-static int __init nokgdb(char *str)
+-{
+-	kgdb_flag = 0;
+-	return 1;
+-}
+-__setup("nokgdb", nokgdb);
+-
+-/* Default to UART1 */
+-int kgdb_port = 1;
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-extern char sb1250_duart_present[];
+-#endif
+-#endif
+-
+ static struct irq_chip bcm1480_irq_type = {
+ 	.name = "BCM1480-IMR",
+ 	.ack = ack_bcm1480_irq,
+@@ -394,62 +370,11 @@
+ 	 * does its own management of IP7.
+ 	 */
+ 
+-#ifdef CONFIG_KGDB
+-	imask |= STATUSF_IP6;
+-#endif
+ 	/* Enable necessary IPs, disable the rest */
+ 	change_c0_status(ST0_IM, imask);
+ 
+-#ifdef CONFIG_KGDB
+-	if (kgdb_flag) {
+-		kgdb_irq = K_BCM1480_INT_UART_0 + kgdb_port;
+-
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-		sb1250_duart_present[kgdb_port] = 0;
+-#endif
+-		/* Setup uart 1 settings, mapper */
+-		/* QQQ FIXME */
+-		__raw_writeq(M_DUART_IMR_BRK, IO_SPACE_BASE + A_DUART_IMRREG(kgdb_port));
+-
+-		bcm1480_steal_irq(kgdb_irq);
+-		__raw_writeq(IMR_IP6_VAL,
+-			     IO_SPACE_BASE + A_BCM1480_IMR_REGISTER(0, R_BCM1480_IMR_INTERRUPT_MAP_BASE_H) +
+-			     (kgdb_irq<<3));
+-		bcm1480_unmask_irq(0, kgdb_irq);
+-
+-#ifdef CONFIG_GDB_CONSOLE
+-		register_gdb_console();
+-#endif
+-		printk("Waiting for GDB on UART port %d\n", kgdb_port);
+-		set_debug_traps();
+-		breakpoint();
+-	}
+-#endif
+ }
+ 
+-#ifdef CONFIG_KGDB
+-
+-#include <linux/delay.h>
+-
+-#define duart_out(reg, val)     csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg)           csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-static void bcm1480_kgdb_interrupt(void)
+-{
+-	/*
+-	 * Clear break-change status (allow some time for the remote
+-	 * host to stop the break, since we would see another
+-	 * interrupt on the end-of-break too)
+-	 */
+-	kstat.irqs[smp_processor_id()][kgdb_irq]++;
+-	mdelay(500);
+-	duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
+-				M_DUART_RX_EN | M_DUART_TX_EN);
+-	set_async_breakpoint(&get_irq_regs()->cp0_epc);
+-}
+-
+-#endif 	/* CONFIG_KGDB */
+-
+ extern void bcm1480_timer_interrupt(void);
+ extern void bcm1480_mailbox_interrupt(void);
+ 
+@@ -478,11 +403,6 @@
+ 		bcm1480_mailbox_interrupt();
+ #endif
+ 
+-#ifdef CONFIG_KGDB
+-	else if (pending & CAUSEF_IP6)
+-		bcm1480_kgdb_interrupt();		/* KGDB (uart 1) */
+-#endif
+-
+ 	else if (pending & CAUSEF_IP2) {
+ 		unsigned long long mask_h, mask_l;
+ 		unsigned long base;
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c linux-2.6.22-try2/arch/mips/sibyte/cfe/setup.c
+--- linux-2.6.22-570/arch/mips/sibyte/cfe/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/cfe/setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -58,10 +58,6 @@
+ extern unsigned long initrd_start, initrd_end;
+ #endif
+ 
+-#ifdef CONFIG_KGDB
+-extern int kgdb_port;
+-#endif
+-
+ static void ATTRIB_NORET cfe_linux_exit(void *arg)
+ {
+ 	int warm = *(int *)arg;
+@@ -242,9 +238,6 @@
+ 	int argc = fw_arg0;
+ 	char **envp = (char **) fw_arg2;
+ 	int *prom_vec = (int *) fw_arg3;
+-#ifdef CONFIG_KGDB
+-	char *arg;
+-#endif
+ 
+ 	_machine_restart   = cfe_linux_restart;
+ 	_machine_halt      = cfe_linux_halt;
+@@ -308,13 +301,6 @@
+ 		}
+ 	}
+ 
+-#ifdef CONFIG_KGDB
+-	if ((arg = strstr(arcs_cmdline,"kgdb=duart")) != NULL)
+-		kgdb_port = (arg[10] == '0') ? 0 : 1;
+-	else
+-		kgdb_port = 1;
+-#endif
+-
+ #ifdef CONFIG_BLK_DEV_INITRD
+ 	{
+ 		char *ptr;
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile linux-2.6.22-try2/arch/mips/sibyte/sb1250/Makefile
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/sb1250/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -3,3 +3,4 @@
+ obj-$(CONFIG_SMP)			+= smp.o
+ obj-$(CONFIG_SIBYTE_STANDALONE)		+= prom.o
+ obj-$(CONFIG_SIBYTE_BUS_WATCHER)	+= bus_watcher.o
++obj-$(CONFIG_KGDB_SIBYTE)		+= kgdb_sibyte.o
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c linux-2.6.22-try2/arch/mips/sibyte/sb1250/irq.c
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/irq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/sb1250/irq.c	2007-12-19 15:29:24.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <asm/signal.h>
+ #include <asm/system.h>
+ #include <asm/io.h>
++#include <asm/kgdb.h>
+ 
+ #include <asm/sibyte/sb1250_regs.h>
+ #include <asm/sibyte/sb1250_int.h>
+@@ -56,16 +57,6 @@
+ extern unsigned long ldt_eoi_space;
+ #endif
+ 
+-#ifdef CONFIG_KGDB
+-static int kgdb_irq;
+-
+-/* Default to UART1 */
+-int kgdb_port = 1;
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-extern char sb1250_duart_present[];
+-#endif
+-#endif
+-
+ static struct irq_chip sb1250_irq_type = {
+ 	.name = "SB1250-IMR",
+ 	.ack = ack_sb1250_irq,
+@@ -304,6 +295,11 @@
+ 	unsigned int imask = STATUSF_IP4 | STATUSF_IP3 | STATUSF_IP2 |
+ 		STATUSF_IP1 | STATUSF_IP0;
+ 
++#ifdef CONFIG_KGDB
++	if (kgdb_early_setup)
++		return;
++#endif
++
+ 	/* Default everything to IP2 */
+ 	for (i = 0; i < SB1250_NR_IRQS; i++) {	/* was I0 */
+ 		__raw_writeq(IMR_IP2_VAL,
+@@ -349,58 +345,16 @@
+ 	 * does its own management of IP7.
+ 	 */
+ 
+-#ifdef CONFIG_KGDB
++#ifdef CONFIG_KGDB_SIBYTE
+ 	imask |= STATUSF_IP6;
+ #endif
+ 	/* Enable necessary IPs, disable the rest */
+ 	change_c0_status(ST0_IM, imask);
+-
+-#ifdef CONFIG_KGDB
+-	if (kgdb_flag) {
+-		kgdb_irq = K_INT_UART_0 + kgdb_port;
+-
+-#ifdef CONFIG_SIBYTE_SB1250_DUART
+-		sb1250_duart_present[kgdb_port] = 0;
+-#endif
+-		/* Setup uart 1 settings, mapper */
+-		__raw_writeq(M_DUART_IMR_BRK,
+-			     IOADDR(A_DUART_IMRREG(kgdb_port)));
+-
+-		sb1250_steal_irq(kgdb_irq);
+-		__raw_writeq(IMR_IP6_VAL,
+-			     IOADDR(A_IMR_REGISTER(0,
+-						   R_IMR_INTERRUPT_MAP_BASE) +
+-				    (kgdb_irq << 3)));
+-		sb1250_unmask_irq(0, kgdb_irq);
+-	}
+-#endif
+ }
+ 
+-#ifdef CONFIG_KGDB
+-
+-#include <linux/delay.h>
+-
+-#define duart_out(reg, val)     csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg)           csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-static void sb1250_kgdb_interrupt(void)
+-{
+-	/*
+-	 * Clear break-change status (allow some time for the remote
+-	 * host to stop the break, since we would see another
+-	 * interrupt on the end-of-break too)
+-	 */
+-	kstat_this_cpu.irqs[kgdb_irq]++;
+-	mdelay(500);
+-	duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
+-				M_DUART_RX_EN | M_DUART_TX_EN);
+-	set_async_breakpoint(&get_irq_regs()->cp0_epc);
+-}
+-
+-#endif 	/* CONFIG_KGDB */
+-
+ extern void sb1250_timer_interrupt(void);
+ extern void sb1250_mailbox_interrupt(void);
++extern void sb1250_kgdb_interrupt(void);
+ 
+ asmlinkage void plat_irq_dispatch(void)
+ {
+@@ -437,7 +391,7 @@
+ 		sb1250_mailbox_interrupt();
+ #endif
+ 
+-#ifdef CONFIG_KGDB
++#ifdef CONFIG_KGDB_SIBYTE
+ 	else if (pending & CAUSEF_IP6)			/* KGDB (uart 1) */
+ 		sb1250_kgdb_interrupt();
+ #endif
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c linux-2.6.22-try2/arch/mips/sibyte/sb1250/kgdb_sibyte.c
+--- linux-2.6.22-570/arch/mips/sibyte/sb1250/kgdb_sibyte.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/mips/sibyte/sb1250/kgdb_sibyte.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,144 @@
++/*
++ * arch/mips/sibyte/sb1250/kgdb_sibyte.c
++ *
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * 2004 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++/*
++ * Support for KGDB on the Broadcom Sibyte. The SWARM board
++ * for example does not have a 8250/16550 compatible serial
++ * port. Hence, we need to have a driver for the serial
++ * ports to handle KGDB.  This board needs nothing in addition
++ * to what is normally provided by the gdb portion of the stub.
++ */
++
++#include <linux/delay.h>
++#include <linux/kernel_stat.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/io.h>
++#include <asm/sibyte/sb1250.h>
++#include <asm/sibyte/sb1250_regs.h>
++#include <asm/sibyte/sb1250_uart.h>
++#include <asm/sibyte/sb1250_int.h>
++#include <asm/addrspace.h>
++
++int kgdb_port = 1;
++static int kgdb_irq;
++
++extern char sb1250_duart_present[];
++extern int sb1250_steal_irq(int irq);
++
++/* Forward declarations. */
++static void kgdbsibyte_init_duart(void);
++static int kgdb_init_io(void);
++
++#define IMR_IP6_VAL	K_INT_MAP_I4
++#define	duart_out(reg, val)	csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
++#define duart_in(reg)		csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
++
++static void kgdbsibyte_write_char(u8 c)
++{
++	while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0) ;
++	duart_out(R_DUART_TX_HOLD, c);
++}
++
++static int kgdbsibyte_read_char(void)
++{
++	int ret_char;
++	unsigned int status;
++
++	do {
++		status = duart_in(R_DUART_STATUS);
++	} while ((status & M_DUART_RX_RDY) == 0);
++
++	/*
++	 * Check for framing error
++	 */
++	if (status & M_DUART_FRM_ERR) {
++		kgdbsibyte_init_duart();
++		kgdbsibyte_write_char('-');
++		return '-';
++	}
++
++	ret_char = duart_in(R_DUART_RX_HOLD);
++
++	return ret_char;
++}
++
++void sb1250_kgdb_interrupt(void)
++{
++	int kgdb_irq = K_INT_UART_0 + kgdb_port;
++
++	/*
++	 * Clear break-change status (allow some time for the remote
++	 * host to stop the break, since we would see another
++	 * interrupt on the end-of-break too)
++	 */
++	kstat_this_cpu.irqs[kgdb_irq]++;
++	mdelay(500);
++	duart_out(R_DUART_CMD, V_DUART_MISC_CMD_RESET_BREAK_INT |
++		  M_DUART_RX_EN | M_DUART_TX_EN);
++	breakpoint();
++
++}
++
++/*
++ * We use port #1 and we set it for 115200 BAUD, 8n1.
++ */
++static void kgdbsibyte_init_duart(void)
++{
++	/* Set 8n1. */
++	duart_out(R_DUART_MODE_REG_1,
++		  V_DUART_BITS_PER_CHAR_8 | V_DUART_PARITY_MODE_NONE);
++	duart_out(R_DUART_MODE_REG_2, M_DUART_STOP_BIT_LEN_1);
++	/* Set baud rate of 115200. */
++	duart_out(R_DUART_CLK_SEL, V_DUART_BAUD_RATE(115200));
++	/* Enable rx and tx */
++	duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN);
++}
++
++static int kgdb_init_io(void)
++{
++#ifdef CONFIG_SIBYTE_SB1250_DUART
++	sb1250_duart_present[kgdb_port] = 0;
++#endif
++
++	kgdbsibyte_init_duart();
++
++	return 0;
++}
++
++/*
++ * Hookup our IRQ line.  We will already have been initialized a
++ * this point.
++ */
++static void __init kgdbsibyte_hookup_irq(void)
++{
++	/* Steal the IRQ. */
++	kgdb_irq = K_INT_UART_0 + kgdb_port;
++
++	/* Setup uart 1 settings, mapper */
++	__raw_writeq(M_DUART_IMR_BRK, IOADDR(A_DUART_IMRREG(kgdb_port)));
++
++	sb1250_steal_irq(kgdb_irq);
++
++	__raw_writeq(IMR_IP6_VAL,
++		     IOADDR(A_IMR_REGISTER(0, R_IMR_INTERRUPT_MAP_BASE) +
++			    (kgdb_irq << 3)));
++
++	sb1250_unmask_irq(0, kgdb_irq);
++}
++
++struct kgdb_io kgdb_io_ops = {
++	.read_char  = kgdbsibyte_read_char,
++	.write_char = kgdbsibyte_write_char,
++	.init = kgdb_init_io,
++	.late_init  = kgdbsibyte_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile linux-2.6.22-try2/arch/mips/sibyte/swarm/Makefile
+--- linux-2.6.22-570/arch/mips/sibyte/swarm/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/swarm/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -1,3 +1 @@
+ lib-y				= setup.o rtc_xicor1241.o rtc_m41t81.o
+-
+-lib-$(CONFIG_KGDB)		+= dbg_io.o
+diff -Nurb linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c linux-2.6.22-try2/arch/mips/sibyte/swarm/dbg_io.c
+--- linux-2.6.22-570/arch/mips/sibyte/swarm/dbg_io.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/sibyte/swarm/dbg_io.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,76 +0,0 @@
+-/*
+- * kgdb debug routines for SiByte boards.
+- *
+- * Copyright (C) 2001 MontaVista Software Inc.
+- * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net
+- *
+- * This program is free software; you can redistribute  it and/or modify it
+- * under  the terms of  the GNU General  Public License as published by the
+- * Free Software Foundation;  either version 2 of the  License, or (at your
+- * option) any later version.
+- *
+- */
+-
+-/* -------------------- BEGINNING OF CONFIG --------------------- */
+-
+-#include <linux/delay.h>
+-#include <asm/io.h>
+-#include <asm/sibyte/sb1250.h>
+-#include <asm/sibyte/sb1250_regs.h>
+-#include <asm/sibyte/sb1250_uart.h>
+-#include <asm/sibyte/sb1250_int.h>
+-#include <asm/addrspace.h>
+-
+-/*
+- * We use the second serial port for kgdb traffic.
+- * 	115200, 8, N, 1.
+- */
+-
+-#define	BAUD_RATE		115200
+-#define	CLK_DIVISOR		V_DUART_BAUD_RATE(BAUD_RATE)
+-#define	DATA_BITS		V_DUART_BITS_PER_CHAR_8		/* or 7    */
+-#define	PARITY			V_DUART_PARITY_MODE_NONE	/* or even */
+-#define	STOP_BITS		M_DUART_STOP_BIT_LEN_1		/* or 2    */
+-
+-static int duart_initialized = 0;	/* 0: need to be init'ed by kgdb */
+-
+-/* -------------------- END OF CONFIG --------------------- */
+-extern int kgdb_port;
+-
+-#define	duart_out(reg, val)	csr_out32(val, IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-#define duart_in(reg)		csr_in32(IOADDR(A_DUART_CHANREG(kgdb_port,reg)))
+-
+-void putDebugChar(unsigned char c);
+-unsigned char getDebugChar(void);
+-static void
+-duart_init(int clk_divisor, int data, int parity, int stop)
+-{
+-	duart_out(R_DUART_MODE_REG_1, data | parity);
+-	duart_out(R_DUART_MODE_REG_2, stop);
+-	duart_out(R_DUART_CLK_SEL, clk_divisor);
+-
+-	duart_out(R_DUART_CMD, M_DUART_RX_EN | M_DUART_TX_EN);	/* enable rx and tx */
+-}
+-
+-void
+-putDebugChar(unsigned char c)
+-{
+-	if (!duart_initialized) {
+-		duart_initialized = 1;
+-		duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS);
+-	}
+-	while ((duart_in(R_DUART_STATUS) & M_DUART_TX_RDY) == 0);
+-	duart_out(R_DUART_TX_HOLD, c);
+-}
+-
+-unsigned char
+-getDebugChar(void)
+-{
+-	if (!duart_initialized) {
+-		duart_initialized = 1;
+-		duart_init(CLK_DIVISOR, DATA_BITS, PARITY, STOP_BITS);
+-	}
+-	while ((duart_in(R_DUART_STATUS) & M_DUART_RX_RDY) == 0) ;
+-	return duart_in(R_DUART_RX_HOLD);
+-}
+-
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/Makefile linux-2.6.22-try2/arch/mips/tx4927/common/Makefile
+--- linux-2.6.22-570/arch/mips/tx4927/common/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4927/common/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -9,4 +9,3 @@
+ obj-y	+= tx4927_prom.o tx4927_setup.o tx4927_irq.o
+ 
+ obj-$(CONFIG_TOSHIBA_FPCIB0)	   += smsc_fdc37m81x.o
+-obj-$(CONFIG_KGDB)                 += tx4927_dbgio.o
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c linux-2.6.22-try2/arch/mips/tx4927/common/tx4927_dbgio.c
+--- linux-2.6.22-570/arch/mips/tx4927/common/tx4927_dbgio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4927/common/tx4927_dbgio.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,47 +0,0 @@
+-/*
+- * linux/arch/mips/tx4927/common/tx4927_dbgio.c
+- *
+- * kgdb interface for gdb
+- *
+- * Author: MontaVista Software, Inc.
+- *         source@mvista.com
+- *
+- * Copyright 2001-2002 MontaVista Software Inc.
+- *
+- *  This program is free software; you can redistribute it and/or modify it
+- *  under the terms of the GNU General Public License as published by the
+- *  Free Software Foundation; either version 2 of the License, or (at your
+- *  option) any later version.
+- *
+- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+- *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+- *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+- *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- *  You should have received a copy of the GNU General Public License along
+- *  with this program; if not, write to the Free Software Foundation, Inc.,
+- *  675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#include <asm/mipsregs.h>
+-#include <asm/system.h>
+-#include <asm/tx4927/tx4927_mips.h>
+-
+-u8 getDebugChar(void)
+-{
+-	extern u8 txx9_sio_kdbg_rd(void);
+-	return (txx9_sio_kdbg_rd());
+-}
+-
+-
+-int putDebugChar(u8 byte)
+-{
+-	extern int txx9_sio_kdbg_wr( u8 ch );
+-	return (txx9_sio_kdbg_wr(byte));
+-}
+diff -Nurb linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c linux-2.6.22-try2/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c
+--- linux-2.6.22-570/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4927/toshiba_rbtx4927/toshiba_rbtx4927_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -76,7 +76,7 @@
+ #include <linux/hdreg.h>
+ #include <linux/ide.h>
+ #endif
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+@@ -973,9 +973,10 @@
+ 
+ #endif /* CONFIG_PCI */
+ 
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined (CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ 	{
+ 		extern int early_serial_txx9_setup(struct uart_port *port);
++		extern int txx9_kgdb_add_port(int n, struct uart_port *port);
+ 		int i;
+ 		struct uart_port req;
+ 		for(i = 0; i < 2; i++) {
+@@ -987,7 +988,12 @@
+ 			req.irq = 32 + i;
+ 			req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+ 			req.uartclk = 50000000;
++#ifdef CONFIG_SERIAL_TXX9
+ 			early_serial_txx9_setup(&req);
++#endif
++#ifdef CONFIG_KGDB_TXX9
++			txx9_kgdb_add_port(i, &req);
++#endif
+ 		}
+ 	}
+ #ifdef CONFIG_SERIAL_TXX9_CONSOLE
+@@ -996,7 +1002,7 @@
+                 strcat(argptr, " console=ttyS0,38400");
+         }
+ #endif
+-#endif
++#endif /* defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9) */
+ 
+ #ifdef CONFIG_ROOT_NFS
+         argptr = prom_getcmdline();
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/Makefile linux-2.6.22-try2/arch/mips/tx4938/common/Makefile
+--- linux-2.6.22-570/arch/mips/tx4938/common/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4938/common/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -7,5 +7,4 @@
+ #
+ 
+ obj-y	+= prom.o setup.o irq.o rtc_rx5c348.o
+-obj-$(CONFIG_KGDB) += dbgio.o
+ 
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c linux-2.6.22-try2/arch/mips/tx4938/common/dbgio.c
+--- linux-2.6.22-570/arch/mips/tx4938/common/dbgio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4938/common/dbgio.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,50 +0,0 @@
+-/*
+- * linux/arch/mips/tx4938/common/dbgio.c
+- *
+- * kgdb interface for gdb
+- *
+- * Author: MontaVista Software, Inc.
+- *         source@mvista.com
+- *
+- * Copyright 2005 MontaVista Software Inc.
+- *
+- *  This program is free software; you can redistribute it and/or modify it
+- *  under the terms of the GNU General Public License as published by the
+- *  Free Software Foundation; either version 2 of the License, or (at your
+- *  option) any later version.
+- *
+- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+- *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+- *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+- *  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+- *  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+- *  USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+- *
+- *  You should have received a copy of the GNU General Public License along
+- *  with this program; if not, write to the Free Software Foundation, Inc.,
+- *  675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Support for TX4938 in 2.6 - Hiroshi DOYU <Hiroshi_DOYU@montavista.co.jp>
+- */
+-
+-#include <asm/mipsregs.h>
+-#include <asm/system.h>
+-#include <asm/tx4938/tx4938_mips.h>
+-
+-extern u8 txx9_sio_kdbg_rd(void);
+-extern int txx9_sio_kdbg_wr( u8 ch );
+-
+-u8 getDebugChar(void)
+-{
+-	return (txx9_sio_kdbg_rd());
+-}
+-
+-int putDebugChar(u8 byte)
+-{
+-	return (txx9_sio_kdbg_wr(byte));
+-}
+-
+diff -Nurb linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c linux-2.6.22-try2/arch/mips/tx4938/toshiba_rbtx4938/setup.c
+--- linux-2.6.22-570/arch/mips/tx4938/toshiba_rbtx4938/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/mips/tx4938/toshiba_rbtx4938/setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,7 +30,7 @@
+ #include <asm/io.h>
+ #include <asm/bootinfo.h>
+ #include <asm/tx4938/rbtx4938.h>
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined(CONFIG_SERIAL_TXX9) || defined(CONFIG_KGDB_TXX9)
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+@@ -924,9 +924,10 @@
+ 	set_io_port_base(RBTX4938_ETHER_BASE);
+ #endif
+ 
+-#ifdef CONFIG_SERIAL_TXX9
++#if defined (CONFIG_SERIAL_TXX9) || defined (CONFIG_KGDB_TXX9)
+ 	{
+ 		extern int early_serial_txx9_setup(struct uart_port *port);
++		extern int txx9_kgdb_add_port(int n, struct uart_port *port);
+ 		int i;
+ 		struct uart_port req;
+ 		for(i = 0; i < 2; i++) {
+@@ -938,7 +939,12 @@
+ 			req.irq = 32 + i;
+ 			req.flags |= UPF_BUGGY_UART /*HAVE_CTS_LINE*/;
+ 			req.uartclk = 50000000;
++#ifdef CONFIG_SERIAL_TXX9
+ 			early_serial_txx9_setup(&req);
++#endif
++#ifdef CONFIG_KGDB_TXX9
++			txx9_kgdb_add_port(i, &req);
++#endif
+ 		}
+ 	}
+ #ifdef CONFIG_SERIAL_TXX9_CONSOLE
+diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig linux-2.6.22-try2/arch/powerpc/Kconfig
+--- linux-2.6.22-570/arch/powerpc/Kconfig	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/Kconfig	2007-12-19 15:29:22.000000000 -0500
+@@ -4,12 +4,7 @@
+ 
+ mainmenu "Linux/PowerPC Kernel Configuration"
+ 
+-config PPC64
+-	bool "64-bit kernel"
+-	default n
+-	help
+-	  This option selects whether a 32-bit or a 64-bit kernel
+-	  will be built.
++source "arch/powerpc/platforms/Kconfig.cputype"
+ 
+ config PPC_PM_NEEDS_RTC_LIB
+ 	bool
+@@ -132,123 +127,6 @@
+ 	depends on PPC64 && (BROKEN || (PPC_PMAC64 && EXPERIMENTAL))
+ 	default y
+ 
+-menu "Processor support"
+-choice
+-	prompt "Processor Type"
+-	depends on PPC32
+-	default 6xx
+-
+-config CLASSIC32
+-	bool "52xx/6xx/7xx/74xx"
+-	select PPC_FPU
+-	select 6xx
+-	help
+-	  There are four families of PowerPC chips supported.  The more common
+-	  types (601, 603, 604, 740, 750, 7400), the Motorola embedded
+-	  versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
+-	  embedded versions (403 and 405) and the high end 64 bit Power
+-	  processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
+-
+-	  This option is the catch-all for 6xx types, including some of the
+-	  embedded versions.  Unless there is see an option for the specific
+-	  chip family you are using, you want this option.
+-	  
+-	  You do not want this if you are building a kernel for a 64 bit
+-	  IBM RS/6000 or an Apple G5, choose 6xx.
+-	  
+-	  If unsure, select this option
+-	  
+-	  Note that the kernel runs in 32-bit mode even on 64-bit chips.
+-
+-config PPC_82xx
+-	bool "Freescale 82xx"
+-	select 6xx
+-	select PPC_FPU
+-
+-config PPC_83xx
+-	bool "Freescale 83xx"
+-	select 6xx
+-	select FSL_SOC
+-	select 83xx
+-	select PPC_FPU
+-	select WANT_DEVICE_TREE
+-
+-config PPC_85xx
+-	bool "Freescale 85xx"
+-	select E500
+-	select FSL_SOC
+-	select 85xx
+-	select WANT_DEVICE_TREE
+-
+-config PPC_86xx
+-	bool "Freescale 86xx"
+-	select 6xx
+-	select FSL_SOC
+-	select FSL_PCIE
+-	select PPC_FPU
+-	select ALTIVEC
+-	help
+-	  The Freescale E600 SoCs have 74xx cores.
+-
+-config PPC_8xx
+-	bool "Freescale 8xx"
+-	select FSL_SOC
+-	select 8xx
+-
+-config 40x
+-	bool "AMCC 40x"
+-	select PPC_DCR_NATIVE
+-
+-config 44x
+-	bool "AMCC 44x"
+-	select PPC_DCR_NATIVE
+-	select WANT_DEVICE_TREE
+-
+-config E200
+-	bool "Freescale e200"
+-
+-endchoice
+-
+-config POWER4_ONLY
+-	bool "Optimize for POWER4"
+-	depends on PPC64
+-	default n
+-	---help---
+-	  Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
+-	  The resulting binary will not work on POWER3 or RS64 processors
+-	  when compiled with binutils 2.15 or later.
+-
+-config POWER3
+-	bool
+-	depends on PPC64
+-	default y if !POWER4_ONLY
+-
+-config POWER4
+-	depends on PPC64
+-	def_bool y
+-
+-config 6xx
+-	bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 8xx
+-	bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 83xx
+-	bool
+-
+-# this is temp to handle compat with arch=ppc
+-config 85xx
+-	bool
+-
+-config E500
+-	bool
+-
+-config PPC_FPU
+-	bool
+-	default y if PPC64
+-
+ config PPC_DCR_NATIVE
+ 	bool
+ 	default n
+@@ -267,134 +145,6 @@
+ 	depends on PPC64 # not supported on 32 bits yet
+ 	default n
+ 
+-config 4xx
+-	bool
+-	depends on 40x || 44x
+-	default y
+-
+-config BOOKE
+-	bool
+-	depends on E200 || E500 || 44x
+-	default y
+-
+-config FSL_BOOKE
+-	bool
+-	depends on E200 || E500
+-	default y
+-
+-config PTE_64BIT
+-	bool
+-	depends on 44x || E500
+-	default y if 44x
+-	default y if E500 && PHYS_64BIT
+-
+-config PHYS_64BIT
+-	bool 'Large physical address support' if E500
+-	depends on 44x || E500
+-	select RESOURCES_64BIT
+-	default y if 44x
+-	---help---
+-	  This option enables kernel support for larger than 32-bit physical
+-	  addresses.  This features is not be available on all e500 cores.
+-
+-	  If in doubt, say N here.
+-
+-config ALTIVEC
+-	bool "AltiVec Support"
+-	depends on CLASSIC32 || POWER4
+-	---help---
+-	  This option enables kernel support for the Altivec extensions to the
+-	  PowerPC processor. The kernel currently supports saving and restoring
+-	  altivec registers, and turning on the 'altivec enable' bit so user
+-	  processes can execute altivec instructions.
+-
+-	  This option is only usefully if you have a processor that supports
+-	  altivec (G4, otherwise known as 74xx series), but does not have
+-	  any affect on a non-altivec cpu (it does, however add code to the
+-	  kernel).
+-
+-	  If in doubt, say Y here.
+-
+-config SPE
+-	bool "SPE Support"
+-	depends on E200 || E500
+-	default y
+-	---help---
+-	  This option enables kernel support for the Signal Processing
+-	  Extensions (SPE) to the PowerPC processor. The kernel currently
+-	  supports saving and restoring SPE registers, and turning on the
+-	  'spe enable' bit so user processes can execute SPE instructions.
+-
+-	  This option is only useful if you have a processor that supports
+-	  SPE (e500, otherwise known as 85xx series), but does not have any
+-	  effect on a non-spe cpu (it does, however add code to the kernel).
+-
+-	  If in doubt, say Y here.
+-
+-config PPC_STD_MMU
+-	bool
+-	depends on 6xx || POWER3 || POWER4 || PPC64
+-	default y
+-
+-config PPC_STD_MMU_32
+-	def_bool y
+-	depends on PPC_STD_MMU && PPC32
+-
+-config PPC_MM_SLICES
+-	bool
+-	default y if HUGETLB_PAGE
+-	default n
+-
+-config VIRT_CPU_ACCOUNTING
+-	bool "Deterministic task and CPU time accounting"
+-	depends on PPC64
+-	default y
+-	help
+-	  Select this option to enable more accurate task and CPU time
+-	  accounting.  This is done by reading a CPU counter on each
+-	  kernel entry and exit and on transitions within the kernel
+-	  between system, softirq and hardirq state, so there is a
+-	  small performance impact.  This also enables accounting of
+-	  stolen time on logically-partitioned systems running on
+-	  IBM POWER5-based machines.
+-
+-	  If in doubt, say Y here.
+-
+-config SMP
+-	depends on PPC_STD_MMU
+-	bool "Symmetric multi-processing support"
+-	---help---
+-	  This enables support for systems with more than one CPU. If you have
+-	  a system with only one CPU, say N. If you have a system with more
+-	  than one CPU, say Y.  Note that the kernel does not currently
+-	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
+-	  since they have inadequate hardware support for multiprocessor
+-	  operation.
+-
+-	  If you say N here, the kernel will run on single and multiprocessor
+-	  machines, but will use only one CPU of a multiprocessor machine. If
+-	  you say Y here, the kernel will run on single-processor machines.
+-	  On a single-processor machine, the kernel will run faster if you say
+-	  N here.
+-
+-	  If you don't know what to do here, say N.
+-
+-config NR_CPUS
+-	int "Maximum number of CPUs (2-128)"
+-	range 2 128
+-	depends on SMP
+-	default "32" if PPC64
+-	default "4"
+-
+-config NOT_COHERENT_CACHE
+-	bool
+-	depends on 4xx || 8xx || E200
+-	default y
+-
+-config CONFIG_CHECK_CACHE_COHERENCY
+-	bool
+-endmenu
+-
+ source "init/Kconfig"
+ 
+ source "arch/powerpc/platforms/Kconfig"
+@@ -686,9 +436,9 @@
+ 	bool "PCI support" if 40x || CPM2 || PPC_83xx || PPC_85xx || PPC_86xx \
+ 		|| PPC_MPC52xx || (EMBEDDED && (PPC_PSERIES || PPC_ISERIES)) \
+ 		|| MPC7448HPC2 || PPC_PS3 || PPC_HOLLY
+-	default y if !40x && !CPM2 && !8xx && !APUS && !PPC_83xx \
++	default y if !40x && !CPM2 && !8xx && !PPC_83xx \
+ 		&& !PPC_85xx && !PPC_86xx
+-	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS
++	default PCI_PERMEDIA if !4xx && !CPM2 && !8xx
+ 	default PCI_QSPAN if !4xx && !CPM2 && 8xx
+ 	select ARCH_SUPPORTS_MSI
+ 	help
+diff -Nurb linux-2.6.22-570/arch/powerpc/Kconfig.debug linux-2.6.22-try2/arch/powerpc/Kconfig.debug
+--- linux-2.6.22-570/arch/powerpc/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -41,52 +41,9 @@
+ 	  This option will add a small amount of overhead to all hypervisor
+ 	  calls.
+ 
+-config DEBUGGER
+-	bool "Enable debugger hooks"
+-	depends on DEBUG_KERNEL
+-	help
+-	  Include in-kernel hooks for kernel debuggers. Unless you are
+-	  intending to debug the kernel, say N here.
+-
+-config KGDB
+-	bool "Include kgdb kernel debugger"
+-	depends on DEBUGGER && (BROKEN || PPC_GEN550 || 4xx)
+-	select DEBUG_INFO
+-	help
+-	  Include in-kernel hooks for kgdb, the Linux kernel source level
+-	  debugger.  See <http://kgdb.sourceforge.net/> for more information.
+-	  Unless you are intending to debug the kernel, say N here.
+-
+-choice
+-	prompt "Serial Port"
+-	depends on KGDB
+-	default KGDB_TTYS1
+-
+-config KGDB_TTYS0
+-	bool "ttyS0"
+-
+-config KGDB_TTYS1
+-	bool "ttyS1"
+-
+-config KGDB_TTYS2
+-	bool "ttyS2"
+-
+-config KGDB_TTYS3
+-	bool "ttyS3"
+-
+-endchoice
+-
+-config KGDB_CONSOLE
+-	bool "Enable serial console thru kgdb port"
+-	depends on KGDB && 8xx || CPM2
+-	help
+-	  If you enable this, all serial console messages will be sent
+-	  over the gdb stub.
+-	  If unsure, say N.
+-
+ config XMON
+ 	bool "Include xmon kernel debugger"
+-	depends on DEBUGGER
++	depends on DEBUG_KERNEL
+ 	help
+ 	  Include in-kernel hooks for the xmon kernel monitor/debugger.
+ 	  Unless you are intending to debug the kernel, say N here.
+@@ -116,6 +73,11 @@
+ 	  to say Y here, unless you're building for a memory-constrained
+ 	  system.
+ 
++config DEBUGGER
++	bool
++	depends on KGDB || XMON
++	default y
++
+ config IRQSTACKS
+ 	bool "Use separate kernel stacks when processing interrupts"
+ 	depends on PPC64
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.c linux-2.6.22-try2/arch/powerpc/boot/44x.c
+--- linux-2.6.22-570/arch/powerpc/boot/44x.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/44x.c	2007-12-19 15:29:22.000000000 -0500
+@@ -38,3 +38,48 @@
+ 
+ 	dt_fixup_memory(0, memsize);
+ }
++
++#define SPRN_DBCR0		0x134
++#define   DBCR0_RST_SYSTEM	0x30000000
++
++void ibm44x_dbcr_reset(void)
++{
++	unsigned long tmp;
++
++	asm volatile (
++		"mfspr	%0,%1\n"
++		"oris	%0,%0,%2@h\n"
++		"mtspr	%1,%0"
++		: "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
++		);
++
++}
++
++/* Read 4xx EBC bus bridge registers to get mappings of the peripheral
++ * banks into the OPB address space */
++void ibm4xx_fixup_ebc_ranges(const char *ebc)
++{
++	void *devp;
++	u32 bxcr;
++	u32 ranges[EBC_NUM_BANKS*4];
++	u32 *p = ranges;
++	int i;
++
++	for (i = 0; i < EBC_NUM_BANKS; i++) {
++		mtdcr(DCRN_EBC0_CFGADDR, EBC_BXCR(i));
++		bxcr = mfdcr(DCRN_EBC0_CFGDATA);
++
++		if ((bxcr & EBC_BXCR_BU) != EBC_BXCR_BU_OFF) {
++			*p++ = i;
++			*p++ = 0;
++			*p++ = bxcr & EBC_BXCR_BAS;
++			*p++ = EBC_BXCR_BANK_SIZE(bxcr);
++		}
++	}
++
++	devp = finddevice(ebc);
++	if (! devp)
++		fatal("Couldn't locate EBC node %s\n\r", ebc);
++
++	setprop(devp, "ranges", ranges, (p - ranges) * sizeof(u32));
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/44x.h linux-2.6.22-try2/arch/powerpc/boot/44x.h
+--- linux-2.6.22-570/arch/powerpc/boot/44x.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/44x.h	2007-12-19 15:29:22.000000000 -0500
+@@ -11,6 +11,9 @@
+ #define _PPC_BOOT_44X_H_
+ 
+ void ibm44x_fixup_memsize(void);
++void ibm4xx_fixup_ebc_ranges(const char *ebc);
++
++void ibm44x_dbcr_reset(void);
+ void ebony_init(void *mac0, void *mac1);
+ 
+ #endif /* _PPC_BOOT_44X_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/Makefile linux-2.6.22-try2/arch/powerpc/boot/Makefile
+--- linux-2.6.22-570/arch/powerpc/boot/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -43,8 +43,8 @@
+ 
+ src-wlib := string.S crt0.S stdio.c main.c flatdevtree.c flatdevtree_misc.c \
+ 		ns16550.c serial.c simple_alloc.c div64.S util.S \
+-		gunzip_util.c elf_util.c $(zlib) devtree.c \
+-		44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c
++		gunzip_util.c elf_util.c $(zlib) devtree.c oflib.c ofconsole.c \
++		44x.c ebony.c mv64x60.c mpsc.c mv64x60_i2c.c cuboot.c
+ src-plat := of.c cuboot-83xx.c cuboot-85xx.c holly.c \
+ 		cuboot-ebony.c treeboot-ebony.c prpmc2800.c
+ src-boot := $(src-wlib) $(src-plat) empty.c
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c linux-2.6.22-try2/arch/powerpc/boot/cuboot-83xx.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-83xx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/cuboot-83xx.c	2007-12-19 15:29:22.000000000 -0500
+@@ -12,12 +12,12 @@
+ 
+ #include "ops.h"
+ #include "stdio.h"
++#include "cuboot.h"
+ 
+ #define TARGET_83xx
+ #include "ppcboot.h"
+ 
+ static bd_t bd;
+-extern char _end[];
+ extern char _dtb_start[], _dtb_end[];
+ 
+ static void platform_fixups(void)
+@@ -52,16 +52,7 @@
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                    unsigned long r6, unsigned long r7)
+ {
+-	unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+-	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+-	memcpy(&bd, (bd_t *)r3, sizeof(bd));
+-	loader_info.initrd_addr = r4;
+-	loader_info.initrd_size = r4 ? r5 - r4 : 0;
+-	loader_info.cmdline = (char *)r6;
+-	loader_info.cmdline_len = r7 - r6;
+-
+-	simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++	CUBOOT_INIT();
+ 	ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+ 	serial_console_init();
+ 	platform_ops.fixups = platform_fixups;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c linux-2.6.22-try2/arch/powerpc/boot/cuboot-85xx.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-85xx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/cuboot-85xx.c	2007-12-19 15:29:22.000000000 -0500
+@@ -12,12 +12,12 @@
+ 
+ #include "ops.h"
+ #include "stdio.h"
++#include "cuboot.h"
+ 
+ #define TARGET_85xx
+ #include "ppcboot.h"
+ 
+ static bd_t bd;
+-extern char _end[];
+ extern char _dtb_start[], _dtb_end[];
+ 
+ static void platform_fixups(void)
+@@ -53,16 +53,7 @@
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                    unsigned long r6, unsigned long r7)
+ {
+-	unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+-	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+-	memcpy(&bd, (bd_t *)r3, sizeof(bd));
+-	loader_info.initrd_addr = r4;
+-	loader_info.initrd_size = r4 ? r5 - r4 : 0;
+-	loader_info.cmdline = (char *)r6;
+-	loader_info.cmdline_len = r7 - r6;
+-
+-	simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++	CUBOOT_INIT();
+ 	ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+ 	serial_console_init();
+ 	platform_ops.fixups = platform_fixups;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c linux-2.6.22-try2/arch/powerpc/boot/cuboot-ebony.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot-ebony.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/cuboot-ebony.c	2007-12-19 15:29:22.000000000 -0500
+@@ -15,28 +15,16 @@
+ #include "ops.h"
+ #include "stdio.h"
+ #include "44x.h"
++#include "cuboot.h"
+ 
+ #define TARGET_44x
+ #include "ppcboot.h"
+ 
+ static bd_t bd;
+-extern char _end[];
+-
+-BSS_STACK(4096);
+ 
+ void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+                    unsigned long r6, unsigned long r7)
+ {
+-	unsigned long end_of_ram = bd.bi_memstart + bd.bi_memsize;
+-	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
+-
+-	memcpy(&bd, (bd_t *)r3, sizeof(bd));
+-	loader_info.initrd_addr = r4;
+-	loader_info.initrd_size = r4 ? r5 : 0;
+-	loader_info.cmdline = (char *)r6;
+-	loader_info.cmdline_len = r7 - r6;
+-
+-	simple_alloc_init(_end, avail_ram, 32, 64);
+-
++	CUBOOT_INIT();
+ 	ebony_init(&bd.bi_enetaddr, &bd.bi_enet1addr);
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.c linux-2.6.22-try2/arch/powerpc/boot/cuboot.c
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/boot/cuboot.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,35 @@
++/*
++ * Compatibility for old (not device tree aware) U-Boot versions
++ *
++ * Author: Scott Wood <scottwood@freescale.com>
++ * Consolidated using macros by David Gibson <david@gibson.dropbear.id.au>
++ *
++ * Copyright 2007 David Gibson, IBM Corporation.
++ * Copyright (c) 2007 Freescale Semiconductor, Inc.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ */
++
++#include "ops.h"
++#include "stdio.h"
++
++#include "ppcboot.h"
++
++extern char _end[];
++extern char _dtb_start[], _dtb_end[];
++
++void cuboot_init(unsigned long r4, unsigned long r5,
++		 unsigned long r6, unsigned long r7,
++		 unsigned long end_of_ram)
++{
++	unsigned long avail_ram = end_of_ram - (unsigned long)_end;
++
++	loader_info.initrd_addr = r4;
++	loader_info.initrd_size = r4 ? r5 - r4 : 0;
++	loader_info.cmdline = (char *)r6;
++	loader_info.cmdline_len = r7 - r6;
++
++	simple_alloc_init(_end, avail_ram - 1024*1024, 32, 64);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/cuboot.h linux-2.6.22-try2/arch/powerpc/boot/cuboot.h
+--- linux-2.6.22-570/arch/powerpc/boot/cuboot.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/boot/cuboot.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,14 @@
++#ifndef _PPC_BOOT_CUBOOT_H_
++#define _PPC_BOOT_CUBOOT_H_
++
++void cuboot_init(unsigned long r4, unsigned long r5,
++		 unsigned long r6, unsigned long r7,
++		 unsigned long end_of_ram);
++
++#define CUBOOT_INIT() \
++	do { \
++		memcpy(&bd, (bd_t *)r3, sizeof(bd)); \
++		cuboot_init(r4, r5, r6, r7, bd.bi_memstart + bd.bi_memsize); \
++	} while (0)
++
++#endif /* _PPC_BOOT_CUBOOT_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dcr.h linux-2.6.22-try2/arch/powerpc/boot/dcr.h
+--- linux-2.6.22-570/arch/powerpc/boot/dcr.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/dcr.h	2007-12-19 15:29:22.000000000 -0500
+@@ -26,6 +26,43 @@
+ #define			SDRAM_CONFIG_BANK_SIZE(reg)	\
+ 	(0x00400000 << ((reg & SDRAM_CONFIG_SIZE_MASK) >> 17))
+ 
++/* 440GP External Bus Controller (EBC) */
++#define DCRN_EBC0_CFGADDR				0x012
++#define DCRN_EBC0_CFGDATA				0x013
++#define   EBC_NUM_BANKS					  8
++#define   EBC_B0CR					  0x00
++#define   EBC_B1CR					  0x01
++#define   EBC_B2CR					  0x02
++#define   EBC_B3CR					  0x03
++#define   EBC_B4CR					  0x04
++#define   EBC_B5CR					  0x05
++#define   EBC_B6CR					  0x06
++#define   EBC_B7CR					  0x07
++#define   EBC_BXCR(n)					  (n)
++#define	    EBC_BXCR_BAS				    0xfff00000
++#define	    EBC_BXCR_BS				  	    0x000e0000
++#define	    EBC_BXCR_BANK_SIZE(reg) \
++	(0x100000 << (((reg) & EBC_BXCR_BS) >> 17))
++#define	    EBC_BXCR_BU				  	    0x00018000
++#define	      EBC_BXCR_BU_OFF			  	      0x00000000
++#define	      EBC_BXCR_BU_RO			  	      0x00008000
++#define	      EBC_BXCR_BU_WO			  	      0x00010000
++#define	      EBC_BXCR_BU_RW			  	      0x00018000
++#define	    EBC_BXCR_BW				  	    0x00006000
++#define   EBC_B0AP					  0x10
++#define   EBC_B1AP					  0x11
++#define   EBC_B2AP					  0x12
++#define   EBC_B3AP					  0x13
++#define   EBC_B4AP					  0x14
++#define   EBC_B5AP					  0x15
++#define   EBC_B6AP					  0x16
++#define   EBC_B7AP					  0x17
++#define   EBC_BXAP(n)					  (0x10+(n))
++#define   EBC_BEAR					  0x20
++#define   EBC_BESR					  0x21
++#define   EBC_CFG					  0x23
++#define   EBC_CID					  0x24
++
+ /* 440GP Clock, PM, chip control */
+ #define DCRN_CPC0_SR					0x0b0
+ #define DCRN_CPC0_ER					0x0b1
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts linux-2.6.22-try2/arch/powerpc/boot/dts/ebony.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/ebony.dts	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/dts/ebony.dts	2007-12-19 15:29:22.000000000 -0500
+@@ -135,11 +135,9 @@
+ 				#address-cells = <2>;
+ 				#size-cells = <1>;
+ 				clock-frequency = <0>; // Filled in by zImage
+-				ranges = <0 00000000 fff00000 100000
+-					  1 00000000 48000000 100000
+-					  2 00000000 ff800000 400000
+-					  3 00000000 48200000 100000
+-					  7 00000000 48300000 100000>;
++				// ranges property is supplied by zImage
++				// based on firmware's configuration of the
++				// EBC bridge
+ 				interrupts = <5 4>;
+ 				interrupt-parent = <&UIC1>;
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts linux-2.6.22-try2/arch/powerpc/boot/dts/holly.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/holly.dts	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/dts/holly.dts	2007-12-19 15:29:22.000000000 -0500
+@@ -46,7 +46,7 @@
+ 
+   	tsi109@c0000000 {
+ 		device_type = "tsi-bridge";
+-		compatible = "tsi-bridge";
++		compatible = "tsi109-bridge", "tsi108-bridge";
+ 		#address-cells = <1>;
+ 		#size-cells = <1>;
+ 		ranges = <00000000 c0000000 00010000>;
+@@ -54,52 +54,55 @@
+ 
+ 		i2c@7000 {
+ 			device_type = "i2c";
+-			compatible  = "tsi-i2c";
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			compatible  = "tsi109-i2c", "tsi108-i2c";
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <e 2>;
+ 			reg = <7000 400>;
+ 		};
+ 
+-		mdio@6000 {
++		MDIO: mdio@6000 {
+ 			device_type = "mdio";
+-			compatible = "tsi-ethernet";
+-
+-			PHY1: ethernet-phy@6000 {
+-				device_type = "ethernet-phy";
+-				compatible = "bcm54xx";
++			compatible = "tsi109-mdio", "tsi108-mdio";
+ 				reg = <6000 50>;
+-				phy-id = <1>;
++			#address-cells = <1>;
++			#size-cells = <0>;
++
++			PHY1: ethernet-phy@1 {
++				compatible = "bcm5461a";
++				reg = <1>;
++				txc-rxc-delay-disable;
+ 			};
+ 
+-			PHY2: ethernet-phy@6400 {
+-				device_type = "ethernet-phy";
+-				compatible = "bcm54xx";
+-				reg = <6000 50>;
+-				phy-id = <2>;
++			PHY2: ethernet-phy@2 {
++				compatible = "bcm5461a";
++				reg = <2>;
++				txc-rxc-delay-disable;
+ 			};
+ 		};
+ 
+ 		ethernet@6200 {
+ 			device_type = "network";
+-			compatible = "tsi-ethernet";
++			compatible = "tsi109-ethernet", "tsi108-ethernet";
+ 			#address-cells = <1>;
+ 			#size-cells = <0>;
+ 			reg = <6000 200>;
+ 			local-mac-address = [ 00 00 00 00 00 00 ];
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <10 2>;
++			mdio-handle = <&MDIO>;
+ 			phy-handle = <&PHY1>;
+ 		};
+ 
+ 		ethernet@6600 {
+ 			device_type = "network";
+-			compatible = "tsi-ethernet";
++			compatible = "tsi109-ethernet", "tsi108-ethernet";
+ 			#address-cells = <1>;
+ 			#size-cells = <0>;
+ 			reg = <6400 200>;
+ 			local-mac-address = [ 00 00 00 00 00 00 ];
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <11 2>;
++			mdio-handle = <&MDIO>;
+ 			phy-handle = <&PHY2>;
+ 		};
+ 
+@@ -110,7 +113,7 @@
+ 			virtual-reg = <c0007808>;
+ 			clock-frequency = <3F9C6000>;
+ 			current-speed = <1c200>;
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <c 2>;
+ 		};
+ 
+@@ -121,7 +124,7 @@
+ 			virtual-reg = <c0007c08>;
+ 			clock-frequency = <3F9C6000>;
+ 			current-speed = <1c200>;
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <d 2>;
+ 		};
+ 
+@@ -136,7 +139,7 @@
+ 
+ 		pci@1000 {
+ 			device_type = "pci";
+-			compatible = "tsi109";
++			compatible = "tsi109-pci", "tsi108-pci";
+ 			#interrupt-cells = <1>;
+ 			#size-cells = <2>;
+ 			#address-cells = <3>;
+@@ -150,7 +153,7 @@
+ 			ranges = <02000000 0 40000000 40000000 0 10000000
+ 				  01000000 0 00000000 7e000000 0 00010000>;
+ 			clock-frequency = <7f28154>;
+-			interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++			interrupt-parent = <&MPIC>;
+ 			interrupts = <17 2>;
+ 			interrupt-map-mask = <f800 0 0 7>;
+ 			/*----------------------------------------------------+
+@@ -186,13 +189,12 @@
+  				#address-cells = <0>;
+  				#interrupt-cells = <2>;
+  				interrupts = <17 2>;
+-				interrupt-parent = < &/tsi109@c0000000/pic@7400 >;
++				interrupt-parent = <&MPIC>;
+ 			};
+ 		};
+ 	};
+ 
+ 	chosen {
+ 		linux,stdout-path = "/tsi109@c0000000/serial@7808";
+-		bootargs = "console=ttyS0,115200";
+ 	};
+ };
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts linux-2.6.22-try2/arch/powerpc/boot/dts/mpc7448hpc2.dts
+--- linux-2.6.22-570/arch/powerpc/boot/dts/mpc7448hpc2.dts	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/dts/mpc7448hpc2.dts	2007-12-19 15:29:22.000000000 -0500
+@@ -45,7 +45,7 @@
+ 		#address-cells = <1>;
+ 		#size-cells = <1>;
+ 		#interrupt-cells = <2>;
+-		device_type = "tsi-bridge";
++		device_type = "tsi108-bridge";
+ 		ranges = <00000000 c0000000 00010000>;
+ 		reg = <c0000000 00010000>;
+ 		bus-frequency = <0>;
+@@ -55,27 +55,26 @@
+ 			interrupts = <E 0>;
+ 			reg = <7000 400>;
+ 			device_type = "i2c";
+-			compatible  = "tsi-i2c";
++			compatible  = "tsi108-i2c";
+ 		};
+ 
+-		mdio@6000 {
++		MDIO: mdio@6000 {
+ 			device_type = "mdio";
+-			compatible = "tsi-ethernet";
++			compatible = "tsi108-mdio";
++			reg = <6000 50>;
++			#address-cells = <1>;
++			#size-cells = <0>;
+ 
+-			phy8: ethernet-phy@6000 {
++			phy8: ethernet-phy@8 {
+ 				interrupt-parent = <&mpic>;
+ 				interrupts = <2 1>;
+-				reg = <6000 50>;
+-				phy-id = <8>;
+-				device_type = "ethernet-phy";
++				reg = <8>;
+ 			};
+ 
+-			phy9: ethernet-phy@6400 {
++			phy9: ethernet-phy@9 {
+ 				interrupt-parent = <&mpic>;
+ 				interrupts = <2 1>;
+-				reg = <6000 50>;
+-				phy-id = <9>;
+-				device_type = "ethernet-phy";
++				reg = <9>;
+ 			};
+ 
+ 		};
+@@ -83,12 +82,12 @@
+ 		ethernet@6200 {
+ 			#size-cells = <0>;
+ 			device_type = "network";
+-			model = "TSI-ETH";
+-			compatible = "tsi-ethernet";
++			compatible = "tsi108-ethernet";
+ 			reg = <6000 200>;
+ 			address = [ 00 06 D2 00 00 01 ];
+ 			interrupts = <10 2>;
+ 			interrupt-parent = <&mpic>;
++			mdio-handle = <&MDIO>;
+ 			phy-handle = <&phy8>;
+ 		};
+ 
+@@ -96,12 +95,12 @@
+ 			#address-cells = <1>;
+ 			#size-cells = <0>;
+ 			device_type = "network";
+-			model = "TSI-ETH";
+-			compatible = "tsi-ethernet";
++			compatible = "tsi108-ethernet";
+ 			reg = <6400 200>;
+ 			address = [ 00 06 D2 00 00 02 ];
+ 			interrupts = <11 2>;
+ 			interrupt-parent = <&mpic>;
++			mdio-handle = <&MDIO>;
+ 			phy-handle = <&phy9>;
+ 		};
+ 
+@@ -135,7 +134,7 @@
+                        	big-endian;
+ 		};
+ 		pci@1000 {
+-			compatible = "tsi10x";
++			compatible = "tsi108-pci";
+ 			device_type = "pci";
+ 			#interrupt-cells = <1>;
+ 			#size-cells = <2>;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ebony.c linux-2.6.22-try2/arch/powerpc/boot/ebony.c
+--- linux-2.6.22-570/arch/powerpc/boot/ebony.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/ebony.c	2007-12-19 15:29:22.000000000 -0500
+@@ -100,28 +100,13 @@
+ 	ibm440gp_fixup_clocks(sysclk, 6 * 1843200);
+ 	ibm44x_fixup_memsize();
+ 	dt_fixup_mac_addresses(ebony_mac0, ebony_mac1);
+-}
+-
+-#define SPRN_DBCR0		0x134
+-#define   DBCR0_RST_SYSTEM	0x30000000
+-
+-static void ebony_exit(void)
+-{
+-	unsigned long tmp;
+-
+-	asm volatile (
+-		"mfspr	%0,%1\n"
+-		"oris	%0,%0,%2@h\n"
+-		"mtspr	%1,%0"
+-		: "=&r"(tmp) : "i"(SPRN_DBCR0), "i"(DBCR0_RST_SYSTEM)
+-		);
+-
++	ibm4xx_fixup_ebc_ranges("/plb/opb/ebc");
+ }
+ 
+ void ebony_init(void *mac0, void *mac1)
+ {
+ 	platform_ops.fixups = ebony_fixups;
+-	platform_ops.exit = ebony_exit;
++	platform_ops.exit = ibm44x_dbcr_reset;
+ 	ebony_mac0 = mac0;
+ 	ebony_mac1 = mac1;
+ 	ft_init(_dtb_start, _dtb_end - _dtb_start, 32);
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.c linux-2.6.22-try2/arch/powerpc/boot/of.c
+--- linux-2.6.22-570/arch/powerpc/boot/of.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/boot/of.c	2007-12-19 15:29:22.000000000 -0500
+@@ -15,8 +15,7 @@
+ #include "page.h"
+ #include "ops.h"
+ 
+-typedef void *ihandle;
+-typedef void *phandle;
++#include "of.h"
+ 
+ extern char _end[];
+ 
+@@ -25,154 +24,10 @@
+ #define RAM_END		(512<<20)	/* Fixme: use OF */
+ #define	ONE_MB		0x100000
+ 
+-int (*prom) (void *);
+ 
+ 
+ static unsigned long claim_base;
+ 
+-static int call_prom(const char *service, int nargs, int nret, ...)
+-{
+-	int i;
+-	struct prom_args {
+-		const char *service;
+-		int nargs;
+-		int nret;
+-		unsigned int args[12];
+-	} args;
+-	va_list list;
+-
+-	args.service = service;
+-	args.nargs = nargs;
+-	args.nret = nret;
+-
+-	va_start(list, nret);
+-	for (i = 0; i < nargs; i++)
+-		args.args[i] = va_arg(list, unsigned int);
+-	va_end(list);
+-
+-	for (i = 0; i < nret; i++)
+-		args.args[nargs+i] = 0;
+-
+-	if (prom(&args) < 0)
+-		return -1;
+-
+-	return (nret > 0)? args.args[nargs]: 0;
+-}
+-
+-static int call_prom_ret(const char *service, int nargs, int nret,
+-		  unsigned int *rets, ...)
+-{
+-	int i;
+-	struct prom_args {
+-		const char *service;
+-		int nargs;
+-		int nret;
+-		unsigned int args[12];
+-	} args;
+-	va_list list;
+-
+-	args.service = service;
+-	args.nargs = nargs;
+-	args.nret = nret;
+-
+-	va_start(list, rets);
+-	for (i = 0; i < nargs; i++)
+-		args.args[i] = va_arg(list, unsigned int);
+-	va_end(list);
+-
+-	for (i = 0; i < nret; i++)
+-		args.args[nargs+i] = 0;
+-
+-	if (prom(&args) < 0)
+-		return -1;
+-
+-	if (rets != (void *) 0)
+-		for (i = 1; i < nret; ++i)
+-			rets[i-1] = args.args[nargs+i];
+-
+-	return (nret > 0)? args.args[nargs]: 0;
+-}
+-
+-/*
+- * Older OF's require that when claiming a specific range of addresses,
+- * we claim the physical space in the /memory node and the virtual
+- * space in the chosen mmu node, and then do a map operation to
+- * map virtual to physical.
+- */
+-static int need_map = -1;
+-static ihandle chosen_mmu;
+-static phandle memory;
+-
+-/* returns true if s2 is a prefix of s1 */
+-static int string_match(const char *s1, const char *s2)
+-{
+-	for (; *s2; ++s2)
+-		if (*s1++ != *s2)
+-			return 0;
+-	return 1;
+-}
+-
+-static int check_of_version(void)
+-{
+-	phandle oprom, chosen;
+-	char version[64];
+-
+-	oprom = finddevice("/openprom");
+-	if (oprom == (phandle) -1)
+-		return 0;
+-	if (getprop(oprom, "model", version, sizeof(version)) <= 0)
+-		return 0;
+-	version[sizeof(version)-1] = 0;
+-	printf("OF version = '%s'\r\n", version);
+-	if (!string_match(version, "Open Firmware, 1.")
+-	    && !string_match(version, "FirmWorks,3."))
+-		return 0;
+-	chosen = finddevice("/chosen");
+-	if (chosen == (phandle) -1) {
+-		chosen = finddevice("/chosen@0");
+-		if (chosen == (phandle) -1) {
+-			printf("no chosen\n");
+-			return 0;
+-		}
+-	}
+-	if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
+-		printf("no mmu\n");
+-		return 0;
+-	}
+-	memory = (ihandle) call_prom("open", 1, 1, "/memory");
+-	if (memory == (ihandle) -1) {
+-		memory = (ihandle) call_prom("open", 1, 1, "/memory@0");
+-		if (memory == (ihandle) -1) {
+-			printf("no memory node\n");
+-			return 0;
+-		}
+-	}
+-	printf("old OF detected\r\n");
+-	return 1;
+-}
+-
+-static void *claim(unsigned long virt, unsigned long size, unsigned long align)
+-{
+-	int ret;
+-	unsigned int result;
+-
+-	if (need_map < 0)
+-		need_map = check_of_version();
+-	if (align || !need_map)
+-		return (void *) call_prom("claim", 3, 1, virt, size, align);
+-
+-	ret = call_prom_ret("call-method", 5, 2, &result, "claim", memory,
+-			    align, size, virt);
+-	if (ret != 0 || result == -1)
+-		return (void *) -1;
+-	ret = call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
+-			    align, size, virt);
+-	/* 0x12 == coherent + read/write */
+-	ret = call_prom("call-method", 6, 1, "map", chosen_mmu,
+-			0x12, size, virt, virt);
+-	return (void *) virt;
+-}
+-
+ static void *of_try_claim(unsigned long size)
+ {
+ 	unsigned long addr = 0;
+@@ -184,7 +39,7 @@
+ #ifdef DEBUG
+ 		printf("    trying: 0x%08lx\n\r", claim_base);
+ #endif
+-		addr = (unsigned long)claim(claim_base, size, 0);
++		addr = (unsigned long)of_claim(claim_base, size, 0);
+ 		if ((void *)addr != (void *)-1)
+ 			break;
+ 	}
+@@ -218,52 +73,24 @@
+ 	return p;
+ }
+ 
+-static void of_exit(void)
+-{
+-	call_prom("exit", 0, 0);
+-}
+-
+ /*
+  * OF device tree routines
+  */
+ static void *of_finddevice(const char *name)
+ {
+-	return (phandle) call_prom("finddevice", 1, 1, name);
++	return (phandle) of_call_prom("finddevice", 1, 1, name);
+ }
+ 
+ static int of_getprop(const void *phandle, const char *name, void *buf,
+ 		const int buflen)
+ {
+-	return call_prom("getprop", 4, 1, phandle, name, buf, buflen);
++	return of_call_prom("getprop", 4, 1, phandle, name, buf, buflen);
+ }
+ 
+ static int of_setprop(const void *phandle, const char *name, const void *buf,
+ 		const int buflen)
+ {
+-	return call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+-}
+-
+-/*
+- * OF console routines
+- */
+-static void *of_stdout_handle;
+-
+-static int of_console_open(void)
+-{
+-	void *devp;
+-
+-	if (((devp = finddevice("/chosen")) != NULL)
+-			&& (getprop(devp, "stdout", &of_stdout_handle,
+-				sizeof(of_stdout_handle))
+-				== sizeof(of_stdout_handle)))
+-		return 0;
+-
+-	return -1;
+-}
+-
+-static void of_console_write(char *buf, int len)
+-{
+-	call_prom("write", 3, 1, of_stdout_handle, buf, len);
++	return of_call_prom("setprop", 4, 1, phandle, name, buf, buflen);
+ }
+ 
+ void platform_init(unsigned long a1, unsigned long a2, void *promptr)
+@@ -277,10 +104,9 @@
+ 	dt_ops.getprop = of_getprop;
+ 	dt_ops.setprop = of_setprop;
+ 
+-	console_ops.open = of_console_open;
+-	console_ops.write = of_console_write;
++	of_console_init();
+ 
+-	prom = (int (*)(void *))promptr;
++	of_init(promptr);
+ 	loader_info.promptr = promptr;
+ 	if (a1 && a2 && a2 != 0xdeadbeef) {
+ 		loader_info.initrd_addr = a1;
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/of.h linux-2.6.22-try2/arch/powerpc/boot/of.h
+--- linux-2.6.22-570/arch/powerpc/boot/of.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/boot/of.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,15 @@
++#ifndef _PPC_BOOT_OF_H_
++#define _PPC_BOOT_OF_H_
++
++typedef void *phandle;
++typedef void *ihandle;
++
++void of_init(void *promptr);
++int of_call_prom(const char *service, int nargs, int nret, ...);
++void *of_claim(unsigned long virt, unsigned long size, unsigned long align);
++void of_exit(void);
++
++/* Console functions */
++void of_console_init(void);
++
++#endif /* _PPC_BOOT_OF_H_ */
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/ofconsole.c linux-2.6.22-try2/arch/powerpc/boot/ofconsole.c
+--- linux-2.6.22-570/arch/powerpc/boot/ofconsole.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/boot/ofconsole.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,45 @@
++/*
++ * OF console routines
++ *
++ * Copyright (C) Paul Mackerras 1997.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <stddef.h>
++#include "types.h"
++#include "elf.h"
++#include "string.h"
++#include "stdio.h"
++#include "page.h"
++#include "ops.h"
++
++#include "of.h"
++
++static void *of_stdout_handle;
++
++static int of_console_open(void)
++{
++	void *devp;
++
++	if (((devp = finddevice("/chosen")) != NULL)
++			&& (getprop(devp, "stdout", &of_stdout_handle,
++				sizeof(of_stdout_handle))
++				== sizeof(of_stdout_handle)))
++		return 0;
++
++	return -1;
++}
++
++static void of_console_write(char *buf, int len)
++{
++	of_call_prom("write", 3, 1, of_stdout_handle, buf, len);
++}
++
++void of_console_init(void)
++{
++	console_ops.open = of_console_open;
++	console_ops.write = of_console_write;
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/boot/oflib.c linux-2.6.22-try2/arch/powerpc/boot/oflib.c
+--- linux-2.6.22-570/arch/powerpc/boot/oflib.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/boot/oflib.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,172 @@
++/*
++ * Copyright (C) Paul Mackerras 1997.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <stddef.h>
++#include "types.h"
++#include "elf.h"
++#include "string.h"
++#include "stdio.h"
++#include "page.h"
++#include "ops.h"
++
++#include "of.h"
++
++static int (*prom) (void *);
++
++void of_init(void *promptr)
++{
++	prom = (int (*)(void *))promptr;
++}
++
++int of_call_prom(const char *service, int nargs, int nret, ...)
++{
++	int i;
++	struct prom_args {
++		const char *service;
++		int nargs;
++		int nret;
++		unsigned int args[12];
++	} args;
++	va_list list;
++
++	args.service = service;
++	args.nargs = nargs;
++	args.nret = nret;
++
++	va_start(list, nret);
++	for (i = 0; i < nargs; i++)
++		args.args[i] = va_arg(list, unsigned int);
++	va_end(list);
++
++	for (i = 0; i < nret; i++)
++		args.args[nargs+i] = 0;
++
++	if (prom(&args) < 0)
++		return -1;
++
++	return (nret > 0)? args.args[nargs]: 0;
++}
++
++static int of_call_prom_ret(const char *service, int nargs, int nret,
++			    unsigned int *rets, ...)
++{
++	int i;
++	struct prom_args {
++		const char *service;
++		int nargs;
++		int nret;
++		unsigned int args[12];
++	} args;
++	va_list list;
++
++	args.service = service;
++	args.nargs = nargs;
++	args.nret = nret;
++
++	va_start(list, rets);
++	for (i = 0; i < nargs; i++)
++		args.args[i] = va_arg(list, unsigned int);
++	va_end(list);
++
++	for (i = 0; i < nret; i++)
++		args.args[nargs+i] = 0;
++
++	if (prom(&args) < 0)
++		return -1;
++
++	if (rets != (void *) 0)
++		for (i = 1; i < nret; ++i)
++			rets[i-1] = args.args[nargs+i];
++
++	return (nret > 0)? args.args[nargs]: 0;
++}
++
++/* returns true if s2 is a prefix of s1 */
++static int string_match(const char *s1, const char *s2)
++{
++	for (; *s2; ++s2)
++		if (*s1++ != *s2)
++			return 0;
++	return 1;
++}
++
++/*
++ * Older OF's require that when claiming a specific range of addresses,
++ * we claim the physical space in the /memory node and the virtual
++ * space in the chosen mmu node, and then do a map operation to
++ * map virtual to physical.
++ */
++static int need_map = -1;
++static ihandle chosen_mmu;
++static phandle memory;
++
++static int check_of_version(void)
++{
++	phandle oprom, chosen;
++	char version[64];
++
++	oprom = finddevice("/openprom");
++	if (oprom == (phandle) -1)
++		return 0;
++	if (getprop(oprom, "model", version, sizeof(version)) <= 0)
++		return 0;
++	version[sizeof(version)-1] = 0;
++	printf("OF version = '%s'\r\n", version);
++	if (!string_match(version, "Open Firmware, 1.")
++	    && !string_match(version, "FirmWorks,3."))
++		return 0;
++	chosen = finddevice("/chosen");
++	if (chosen == (phandle) -1) {
++		chosen = finddevice("/chosen@0");
++		if (chosen == (phandle) -1) {
++			printf("no chosen\n");
++			return 0;
++		}
++	}
++	if (getprop(chosen, "mmu", &chosen_mmu, sizeof(chosen_mmu)) <= 0) {
++		printf("no mmu\n");
++		return 0;
++	}
++	memory = (ihandle) of_call_prom("open", 1, 1, "/memory");
++	if (memory == (ihandle) -1) {
++		memory = (ihandle) of_call_prom("open", 1, 1, "/memory@0");
++		if (memory == (ihandle) -1) {
++			printf("no memory node\n");
++			return 0;
++		}
++	}
++	printf("old OF detected\r\n");
++	return 1;
++}
++
++void *of_claim(unsigned long virt, unsigned long size, unsigned long align)
++{
++	int ret;
++	unsigned int result;
++
++	if (need_map < 0)
++		need_map = check_of_version();
++	if (align || !need_map)
++		return (void *) of_call_prom("claim", 3, 1, virt, size, align);
++
++	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", memory,
++			       align, size, virt);
++	if (ret != 0 || result == -1)
++		return (void *) -1;
++	ret = of_call_prom_ret("call-method", 5, 2, &result, "claim", chosen_mmu,
++			       align, size, virt);
++	/* 0x12 == coherent + read/write */
++	ret = of_call_prom("call-method", 6, 1, "map", chosen_mmu,
++			   0x12, size, virt, virt);
++	return (void *) virt;
++}
++
++void of_exit(void)
++{
++	of_call_prom("exit", 0, 0);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/configs/holly_defconfig linux-2.6.22-try2/arch/powerpc/configs/holly_defconfig
+--- linux-2.6.22-570/arch/powerpc/configs/holly_defconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/configs/holly_defconfig	2007-12-19 15:29:22.000000000 -0500
+@@ -190,7 +190,8 @@
+ # CONFIG_RESOURCES_64BIT is not set
+ CONFIG_ZONE_DMA_FLAG=1
+ CONFIG_PROC_DEVICETREE=y
+-# CONFIG_CMDLINE_BOOL is not set
++CONFIG_CMDLINE_BOOL=y
++CONFIG_CMDLINE="console=ttyS0,115200"
+ # CONFIG_PM is not set
+ # CONFIG_SECCOMP is not set
+ # CONFIG_WANT_DEVICE_TREE is not set
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/Makefile linux-2.6.22-try2/arch/powerpc/kernel/Makefile
+--- linux-2.6.22-570/arch/powerpc/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -12,7 +12,8 @@
+ 
+ obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
+ 				   irq.o align.o signal_32.o pmc.o vdso.o \
+-				   init_task.o process.o systbl.o idle.o
++				   init_task.o process.o systbl.o idle.o \
++				   signal.o
+ obj-y				+= vdso32/
+ obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
+ 				   signal_64.o ptrace32.o \
+@@ -62,10 +63,16 @@
+ obj-$(CONFIG_KPROBES)		+= kprobes.o
+ obj-$(CONFIG_PPC_UDBG_16550)	+= legacy_serial.o udbg_16550.o
+ 
++ifeq ($(CONFIG_PPC32),y)
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb_setjmp32.o
++else
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb_setjmp64.o
++endif
++
+ module-$(CONFIG_PPC64)		+= module_64.o
+ obj-$(CONFIG_MODULES)		+= $(module-y)
+ 
+-pci64-$(CONFIG_PPC64)		+= pci_64.o pci_dn.o
++pci64-$(CONFIG_PPC64)		+= pci_64.o pci_dn.o isa-bridge.o
+ pci32-$(CONFIG_PPC32)		:= pci_32.o
+ obj-$(CONFIG_PCI)		+= $(pci64-y) $(pci32-y)
+ obj-$(CONFIG_PCI_MSI)		+= msi.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/head_32.S linux-2.6.22-try2/arch/powerpc/kernel/head_32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/head_32.S	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/head_32.S	2007-12-19 15:29:22.000000000 -0500
+@@ -9,7 +9,6 @@
+  *  rewritten by Paul Mackerras.
+  *    Copyright (C) 1996 Paul Mackerras.
+  *  MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  This file contains the low-level support and setup for the
+  *  PowerPC platform, including trap and interrupt dispatch.
+@@ -32,10 +31,6 @@
+ #include <asm/ppc_asm.h>
+ #include <asm/asm-offsets.h>
+ 
+-#ifdef CONFIG_APUS
+-#include <asm/amigappc.h>
+-#endif
+-
+ /* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+ #define LOAD_BAT(n, reg, RA, RB)	\
+ 	/* see the comment for clear_bats() -- Cort */ \
+@@ -92,11 +87,6 @@
+  *  r4: virtual address of boot_infos_t
+  *  r5: 0
+  *
+- * APUS
+- *   r3: 'APUS'
+- *   r4: physical address of memory base
+- *   Linux/m68k style BootInfo structure at &_end.
+- *
+  * PREP
+  * This is jumped to on prep systems right after the kernel is relocated
+  * to its proper place in memory by the boot loader.  The expected layout
+@@ -150,14 +140,6 @@
+  */
+ 	bl	early_init
+ 
+-#ifdef CONFIG_APUS
+-/* On APUS the __va/__pa constants need to be set to the correct
+- * values before continuing.
+- */
+-	mr	r4,r30
+-	bl	fix_mem_constants
+-#endif /* CONFIG_APUS */
+-
+ /* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains
+  * the physical address we are running at, returned by early_init()
+  */
+@@ -167,7 +149,7 @@
+ 	bl	flush_tlbs
+ 
+ 	bl	initial_bats
+-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
++#if defined(CONFIG_BOOTX_TEXT)
+ 	bl	setup_disp_bat
+ #endif
+ 
+@@ -183,7 +165,6 @@
+ #endif /* CONFIG_6xx */
+ 
+ 
+-#ifndef CONFIG_APUS
+ /*
+  * We need to run with _start at physical address 0.
+  * On CHRP, we are loaded at 0x10000 since OF on CHRP uses
+@@ -196,7 +177,6 @@
+ 	addis	r4,r3,KERNELBASE@h	/* current address of _start */
+ 	cmpwi	0,r4,0			/* are we already running at 0? */
+ 	bne	relocate_kernel
+-#endif /* CONFIG_APUS */
+ /*
+  * we now have the 1st 16M of ram mapped with the bats.
+  * prep needs the mmu to be turned on here, but pmac already has it on.
+@@ -881,85 +861,6 @@
+ 	addi	r6,r6,4
+ 	blr
+ 
+-#ifdef CONFIG_APUS
+-/*
+- * On APUS the physical base address of the kernel is not known at compile
+- * time, which means the __pa/__va constants used are incorrect. In the
+- * __init section is recorded the virtual addresses of instructions using
+- * these constants, so all that has to be done is fix these before
+- * continuing the kernel boot.
+- *
+- * r4 = The physical address of the kernel base.
+- */
+-fix_mem_constants:
+-	mr	r10,r4
+-	addis	r10,r10,-KERNELBASE@h    /* virt_to_phys constant */
+-	neg	r11,r10	                 /* phys_to_virt constant */
+-
+-	lis	r12,__vtop_table_begin@h
+-	ori	r12,r12,__vtop_table_begin@l
+-	add	r12,r12,r10	         /* table begin phys address */
+-	lis	r13,__vtop_table_end@h
+-	ori	r13,r13,__vtop_table_end@l
+-	add	r13,r13,r10	         /* table end phys address */
+-	subi	r12,r12,4
+-	subi	r13,r13,4
+-1:	lwzu	r14,4(r12)               /* virt address of instruction */
+-	add     r14,r14,r10              /* phys address of instruction */
+-	lwz     r15,0(r14)               /* instruction, now insert top */
+-	rlwimi  r15,r10,16,16,31         /* half of vp const in low half */
+-	stw	r15,0(r14)               /* of instruction and restore. */
+-	dcbst	r0,r14			 /* write it to memory */
+-	sync
+-	icbi	r0,r14			 /* flush the icache line */
+-	cmpw	r12,r13
+-	bne     1b
+-	sync				/* additional sync needed on g4 */
+-	isync
+-
+-/*
+- * Map the memory where the exception handlers will
+- * be copied to when hash constants have been patched.
+- */
+-#ifdef CONFIG_APUS_FAST_EXCEPT
+-	lis	r8,0xfff0
+-#else
+-	lis	r8,0
+-#endif
+-	ori	r8,r8,0x2		/* 128KB, supervisor */
+-	mtspr	SPRN_DBAT3U,r8
+-	mtspr	SPRN_DBAT3L,r8
+-
+-	lis	r12,__ptov_table_begin@h
+-	ori	r12,r12,__ptov_table_begin@l
+-	add	r12,r12,r10	         /* table begin phys address */
+-	lis	r13,__ptov_table_end@h
+-	ori	r13,r13,__ptov_table_end@l
+-	add	r13,r13,r10	         /* table end phys address */
+-	subi	r12,r12,4
+-	subi	r13,r13,4
+-1:	lwzu	r14,4(r12)               /* virt address of instruction */
+-	add     r14,r14,r10              /* phys address of instruction */
+-	lwz     r15,0(r14)               /* instruction, now insert top */
+-	rlwimi  r15,r11,16,16,31         /* half of pv const in low half*/
+-	stw	r15,0(r14)               /* of instruction and restore. */
+-	dcbst	r0,r14			 /* write it to memory */
+-	sync
+-	icbi	r0,r14			 /* flush the icache line */
+-	cmpw	r12,r13
+-	bne     1b
+-
+-	sync				/* additional sync needed on g4 */
+-	isync				/* No speculative loading until now */
+-	blr
+-
+-/***********************************************************************
+- *  Please note that on APUS the exception handlers are located at the
+- *  physical address 0xfff0000. For this reason, the exception handlers
+- *  cannot use relative branches to access the code below.
+- ***********************************************************************/
+-#endif /* CONFIG_APUS */
+-
+ #ifdef CONFIG_SMP
+ #ifdef CONFIG_GEMINI
+ 	.globl	__secondary_start_gemini
+@@ -1135,19 +1036,6 @@
+ 	bl	__save_cpu_setup
+ 	bl	MMU_init
+ 
+-#ifdef CONFIG_APUS
+-	/* Copy exception code to exception vector base on APUS. */
+-	lis	r4,KERNELBASE@h
+-#ifdef CONFIG_APUS_FAST_EXCEPT
+-	lis	r3,0xfff0		/* Copy to 0xfff00000 */
+-#else
+-	lis	r3,0			/* Copy to 0x00000000 */
+-#endif
+-	li	r5,0x4000		/* # bytes of memory to copy */
+-	li	r6,0
+-	bl	copy_and_flush		/* copy the first 0x4000 bytes */
+-#endif  /* CONFIG_APUS */
+-
+ /*
+  * Go back to running unmapped so we can load up new values
+  * for SDR1 (hash table pointer) and the segment registers
+@@ -1324,11 +1212,7 @@
+ #else
+ 	ori	r8,r8,2			/* R/W access */
+ #endif /* CONFIG_SMP */
+-#ifdef CONFIG_APUS
+-	ori	r11,r11,BL_8M<<2|0x2	/* set up 8MB BAT registers for 604 */
+-#else
+ 	ori	r11,r11,BL_256M<<2|0x2	/* set up BAT registers for 604 */
+-#endif /* CONFIG_APUS */
+ 
+ 	mtspr	SPRN_DBAT0L,r8		/* N.B. 6xx (not 601) have valid */
+ 	mtspr	SPRN_DBAT0U,r11		/* bit in upper BAT register */
+@@ -1338,7 +1222,7 @@
+ 	blr
+ 
+ 
+-#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT)
++#ifdef CONFIG_BOOTX_TEXT
+ setup_disp_bat:
+ 	/*
+ 	 * setup the display bat prepared for us in prom.c
+@@ -1362,7 +1246,7 @@
+ 1:	mtspr	SPRN_IBAT3L,r8
+ 	mtspr	SPRN_IBAT3U,r11
+ 	blr
+-#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */
++#endif /* CONFIG_BOOTX_TEXT */
+ 
+ #ifdef CONFIG_8260
+ /* Jump into the system reset for the rom.
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/irq.c linux-2.6.22-try2/arch/powerpc/kernel/irq.c
+--- linux-2.6.22-570/arch/powerpc/kernel/irq.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/irq.c	2007-12-19 15:29:22.000000000 -0500
+@@ -7,7 +7,6 @@
+  *    Copyright (C) 1996-2001 Cort Dougan
+  *  Adapted for Power Macintosh by Paul Mackerras
+  *    Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au)
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  * This program is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU General Public License
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c linux-2.6.22-try2/arch/powerpc/kernel/isa-bridge.c
+--- linux-2.6.22-570/arch/powerpc/kernel/isa-bridge.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/isa-bridge.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,271 @@
++/*
++ * Routines for tracking a legacy ISA bridge
++ *
++ * Copyrigh 2007 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp.
++ *
++ * Some bits and pieces moved over from pci_64.c
++ *
++ * Copyrigh 2003 Anton Blanchard <anton@au.ibm.com>, IBM Corp.
++ *
++ *      This program is free software; you can redistribute it and/or
++ *      modify it under the terms of the GNU General Public License
++ *      as published by the Free Software Foundation; either version
++ *      2 of the License, or (at your option) any later version.
++ */
++
++#define DEBUG
++
++#include <linux/kernel.h>
++#include <linux/pci.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/mm.h>
++#include <linux/notifier.h>
++
++#include <asm/processor.h>
++#include <asm/io.h>
++#include <asm/prom.h>
++#include <asm/pci-bridge.h>
++#include <asm/machdep.h>
++#include <asm/ppc-pci.h>
++#include <asm/firmware.h>
++
++unsigned long isa_io_base;	/* NULL if no ISA bus */
++EXPORT_SYMBOL(isa_io_base);
++
++/* Cached ISA bridge dev. */
++static struct device_node *isa_bridge_devnode;
++struct pci_dev *isa_bridge_pcidev;
++EXPORT_SYMBOL_GPL(isa_bridge_pcidev);
++
++#define ISA_SPACE_MASK 0x1
++#define ISA_SPACE_IO 0x1
++
++static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
++						unsigned long phb_io_base_phys)
++{
++	/* We should get some saner parsing here and remove these structs */
++	struct pci_address {
++		u32 a_hi;
++		u32 a_mid;
++		u32 a_lo;
++	};
++
++	struct isa_address {
++		u32 a_hi;
++		u32 a_lo;
++	};
++
++	struct isa_range {
++		struct isa_address isa_addr;
++		struct pci_address pci_addr;
++		unsigned int size;
++	};
++
++	const struct isa_range *range;
++	unsigned long pci_addr;
++	unsigned int isa_addr;
++	unsigned int size;
++	int rlen = 0;
++
++	range = of_get_property(isa_node, "ranges", &rlen);
++	if (range == NULL || (rlen < sizeof(struct isa_range)))
++		goto inval_range;
++
++	/* From "ISA Binding to 1275"
++	 * The ranges property is laid out as an array of elements,
++	 * each of which comprises:
++	 *   cells 0 - 1:	an ISA address
++	 *   cells 2 - 4:	a PCI address
++	 *			(size depending on dev->n_addr_cells)
++	 *   cell 5:		the size of the range
++	 */
++	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO) {
++		range++;
++		rlen -= sizeof(struct isa_range);
++		if (rlen < sizeof(struct isa_range))
++			goto inval_range;
++	}
++	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) != ISA_SPACE_IO)
++		goto inval_range;
++
++	isa_addr = range->isa_addr.a_lo;
++	pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
++		range->pci_addr.a_lo;
++
++	/* Assume these are both zero. Note: We could fix that and
++	 * do a proper parsing instead ... oh well, that will do for
++	 * now as nobody uses fancy mappings for ISA bridges
++	 */
++	if ((pci_addr != 0) || (isa_addr != 0)) {
++		printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
++		       __FUNCTION__);
++		return;
++	}
++
++	/* Align size and make sure it's cropped to 64K */
++	size = PAGE_ALIGN(range->size);
++	if (size > 0x10000)
++		size = 0x10000;
++
++	printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
++	       "mapping 64k\n");
++
++	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
++		     size, _PAGE_NO_CACHE|_PAGE_GUARDED);
++	return;
++
++inval_range:
++	printk(KERN_ERR "no ISA IO ranges or unexpected isa range,"
++	       "mapping 64k\n");
++	__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
++		     0x10000, _PAGE_NO_CACHE|_PAGE_GUARDED);
++}
++
++
++/**
++ * isa_bridge_find_early - Find and map the ISA IO space early before
++ *                         main PCI discovery. This is optionally called by
++ *                         the arch code when adding PCI PHBs to get early
++ *                         access to ISA IO ports
++ */
++void __init isa_bridge_find_early(struct pci_controller *hose)
++{
++	struct device_node *np, *parent = NULL, *tmp;
++
++	/* If we already have an ISA bridge, bail off */
++	if (isa_bridge_devnode != NULL)
++		return;
++
++	/* For each "isa" node in the system. Note : we do a search by
++	 * type and not by name. It might be better to do by name but that's
++	 * what the code used to do and I don't want to break too much at
++	 * once. We can look into changing that separately
++	 */
++	for_each_node_by_type(np, "isa") {
++		/* Look for our hose being a parent */
++		for (parent = of_get_parent(np); parent;) {
++			if (parent == hose->arch_data) {
++				of_node_put(parent);
++				break;
++			}
++			tmp = parent;
++			parent = of_get_parent(parent);
++			of_node_put(tmp);
++		}
++		if (parent != NULL)
++			break;
++	}
++	if (np == NULL)
++		return;
++	isa_bridge_devnode = np;
++
++	/* Now parse the "ranges" property and setup the ISA mapping */
++	pci_process_ISA_OF_ranges(np, hose->io_base_phys);
++
++	/* Set the global ISA io base to indicate we have an ISA bridge */
++	isa_io_base = ISA_IO_BASE;
++
++	pr_debug("ISA bridge (early) is %s\n", np->full_name);
++}
++
++/**
++ * isa_bridge_find_late - Find and map the ISA IO space upon discovery of
++ *                        a new ISA bridge
++ */
++static void __devinit isa_bridge_find_late(struct pci_dev *pdev,
++					   struct device_node *devnode)
++{
++	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
++
++	/* Store ISA device node and PCI device */
++	isa_bridge_devnode = of_node_get(devnode);
++	isa_bridge_pcidev = pdev;
++
++	/* Now parse the "ranges" property and setup the ISA mapping */
++	pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
++
++	/* Set the global ISA io base to indicate we have an ISA bridge */
++	isa_io_base = ISA_IO_BASE;
++
++	pr_debug("ISA bridge (late) is %s on %s\n",
++		 devnode->full_name, pci_name(pdev));
++}
++
++/**
++ * isa_bridge_remove - Remove/unmap an ISA bridge
++ */
++static void isa_bridge_remove(void)
++{
++	pr_debug("ISA bridge removed !\n");
++
++	/* Clear the global ISA io base to indicate that we have no more
++	 * ISA bridge. Note that drivers don't quite handle that, though
++	 * we should probably do something about it. But do we ever really
++	 * have ISA bridges being removed on machines using legacy devices ?
++	 */
++	isa_io_base = ISA_IO_BASE;
++
++	/* Clear references to the bridge */
++	of_node_put(isa_bridge_devnode);
++	isa_bridge_devnode = NULL;
++	isa_bridge_pcidev = NULL;
++
++	/* Unmap the ISA area */
++	__iounmap_at((void *)ISA_IO_BASE, 0x10000);
++}
++
++/**
++ * isa_bridge_notify - Get notified of PCI devices addition/removal
++ */
++static int __devinit isa_bridge_notify(struct notifier_block *nb,
++				       unsigned long action, void *data)
++{
++	struct device *dev = data;
++	struct pci_dev *pdev = to_pci_dev(dev);
++	struct device_node *devnode = pci_device_to_OF_node(pdev);
++
++	switch(action) {
++	case BUS_NOTIFY_ADD_DEVICE:
++		/* Check if we have an early ISA device, without PCI dev */
++		if (isa_bridge_devnode && isa_bridge_devnode == devnode &&
++		    !isa_bridge_pcidev) {
++			pr_debug("ISA bridge PCI attached: %s\n",
++				 pci_name(pdev));
++			isa_bridge_pcidev = pdev;
++		}
++
++		/* Check if we have no ISA device, and this happens to be one,
++		 * register it as such if it has an OF device
++		 */
++		if (!isa_bridge_devnode && devnode && devnode->type &&
++		    !strcmp(devnode->type, "isa"))
++			isa_bridge_find_late(pdev, devnode);
++
++		return 0;
++	case BUS_NOTIFY_DEL_DEVICE:
++		/* Check if this our existing ISA device */
++		if (pdev == isa_bridge_pcidev ||
++		    (devnode && devnode == isa_bridge_devnode))
++			isa_bridge_remove();
++		return 0;
++	}
++	return 0;
++}
++
++static struct notifier_block isa_bridge_notifier = {
++	.notifier_call = isa_bridge_notify
++};
++
++/**
++ * isa_bridge_init - register to be notified of ISA bridge addition/removal
++ *
++ */
++static int __init isa_bridge_init(void)
++{
++	if (firmware_has_feature(FW_FEATURE_ISERIES))
++		return 0;
++	bus_register_notifier(&pci_bus_type, &isa_bridge_notifier);
++	return 0;
++}
++arch_initcall(isa_bridge_init);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb.c linux-2.6.22-try2/arch/powerpc/kernel/kgdb.c
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,499 @@
++/*
++ * arch/powerpc/kernel/kgdb.c
++ *
++ * PowerPC backend to the KGDB stub.
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * Copied from arch/ppc/kernel/kgdb.c, updated for ppc64
++ *
++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
++ * Copyright (C) 2003 Timesys Corporation.
++ * Copyright (C) 2004-2006 MontaVista Software, Inc.
++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
++ * PPC32 support restored by Vitaly Wool <vwool@ru.mvista.com> and
++ * Sergei Shtylyov <sshtylyov@ru.mvista.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/smp.h>
++#include <linux/signal.h>
++#include <linux/ptrace.h>
++#include <asm/current.h>
++#include <asm/ptrace.h>
++#include <asm/processor.h>
++#include <asm/machdep.h>
++
++/*
++ * This table contains the mapping between PowerPC hardware trap types, and
++ * signals, which are primarily what GDB understands.  GDB and the kernel
++ * don't always agree on values, so we use constants taken from gdb-6.2.
++ */
++static struct hard_trap_info
++{
++	unsigned int tt;		/* Trap type code for powerpc */
++	unsigned char signo;		/* Signal that we map this trap into */
++} hard_trap_info[] = {
++	{ 0x0100, 0x02 /* SIGINT */  },		/* system reset */
++	{ 0x0200, 0x0b /* SIGSEGV */ },		/* machine check */
++	{ 0x0300, 0x0b /* SIGSEGV */ },		/* data access */
++	{ 0x0400, 0x0b /* SIGSEGV */ },		/* instruction access */
++	{ 0x0500, 0x02 /* SIGINT */  },		/* external interrupt */
++	{ 0x0600, 0x0a /* SIGBUS */  },		/* alignment */
++	{ 0x0700, 0x05 /* SIGTRAP */ },		/* program check */
++	{ 0x0800, 0x08 /* SIGFPE */  },		/* fp unavailable */
++	{ 0x0900, 0x0e /* SIGALRM */ },		/* decrementer */
++	{ 0x0c00, 0x14 /* SIGCHLD */ },		/* system call */
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++	{ 0x2002, 0x05 /* SIGTRAP */ },		/* debug */
++#if defined(CONFIG_FSL_BOOKE)
++	{ 0x2010, 0x08 /* SIGFPE */  },		/* spe unavailable */
++	{ 0x2020, 0x08 /* SIGFPE */  },		/* spe unavailable */
++	{ 0x2030, 0x08 /* SIGFPE */  },		/* spe fp data */
++	{ 0x2040, 0x08 /* SIGFPE */  },		/* spe fp data */
++	{ 0x2050, 0x08 /* SIGFPE */  },		/* spe fp round */
++	{ 0x2060, 0x0e /* SIGILL */  },		/* performace monitor */
++	{ 0x2900, 0x08 /* SIGFPE */  },		/* apu unavailable */
++	{ 0x3100, 0x0e /* SIGALRM */ },		/* fixed interval timer */
++	{ 0x3200, 0x02 /* SIGINT */  }, 	/* watchdog */
++#else
++	{ 0x1000, 0x0e /* SIGALRM */ },		/* programmable interval timer */
++	{ 0x1010, 0x0e /* SIGALRM */ },		/* fixed interval timer */
++	{ 0x1020, 0x02 /* SIGINT */  }, 	/* watchdog */
++	{ 0x2010, 0x08 /* SIGFPE */  },		/* fp unavailable */
++	{ 0x2020, 0x08 /* SIGFPE */  },		/* ap unavailable */
++#endif
++#else
++	{ 0x0d00, 0x05 /* SIGTRAP */ },		/* single-step */
++#if defined(CONFIG_8xx)
++	{ 0x1000, 0x04 /* SIGILL */  },		/* software emulation */
++#else
++	{ 0x0f00, 0x04 /* SIGILL */  },		/* performance monitor */
++	{ 0x0f20, 0x08 /* SIGFPE */  },		/* altivec unavailable */
++	{ 0x1300, 0x05 /* SIGTRAP */ }, 	/* instruction address break */
++#if defined(CONFIG_PPC64)
++	{ 0x1200, 0x05 /* SIGILL */  },		/* system error */
++	{ 0x1500, 0x04 /* SIGILL */  },		/* soft patch */
++	{ 0x1600, 0x04 /* SIGILL */  },		/* maintenance */
++	{ 0x1700, 0x08 /* SIGFPE */  },		/* altivec assist */
++	{ 0x1800, 0x04 /* SIGILL */  },		/* thermal */
++#else
++	{ 0x1400, 0x02 /* SIGINT */  },		/* SMI */
++	{ 0x1600, 0x08 /* SIGFPE */  },		/* altivec assist */
++	{ 0x1700, 0x04 /* SIGILL */  },		/* TAU */
++	{ 0x2000, 0x05 /* SIGTRAP */ },		/* run mode */
++#endif
++#endif
++#endif
++	{ 0x0000, 0x00 }			/* Must be last */
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int computeSignal(unsigned int tt)
++{
++	struct hard_trap_info *ht;
++
++	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++		if (ht->tt == tt)
++			return ht->signo;
++
++	return SIGHUP;		/* default for things we don't know about */
++}
++
++static int kgdb_call_nmi_hook(struct pt_regs *regs)
++{
++	kgdb_nmihook(smp_processor_id(), regs);
++	return 0;
++}
++
++#ifdef CONFIG_SMP
++void kgdb_roundup_cpus(unsigned long flags)
++{
++	smp_send_debugger_break(MSG_ALL_BUT_SELF);
++}
++#endif
++
++/* KGDB functions to use existing PowerPC64 hooks. */
++static int kgdb_debugger(struct pt_regs *regs)
++{
++	return kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++}
++
++static int kgdb_breakpoint(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++	if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
++		regs->nip += 4;
++
++	return 1;
++}
++
++static int kgdb_singlestep(struct pt_regs *regs)
++{
++	struct thread_info *thread_info, *exception_thread_info;
++
++	if (user_mode(regs))
++		return 0;
++
++	/*
++	 * On Book E and perhaps other processsors, singlestep is handled on
++	 * the critical exception stack.  This causes current_thread_info()
++	 * to fail, since it it locates the thread_info by masking off
++	 * the low bits of the current stack pointer.  We work around
++	 * this issue by copying the thread_info from the kernel stack
++	 * before calling kgdb_handle_exception, and copying it back
++	 * afterwards.  On most processors the copy is avoided since
++	 * exception_thread_info == thread_info.
++	 */
++	thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
++	exception_thread_info = current_thread_info();
++
++	if (thread_info != exception_thread_info)
++		memcpy(exception_thread_info, thread_info, sizeof *thread_info);
++
++	kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++	if (thread_info != exception_thread_info)
++		memcpy(thread_info, exception_thread_info, sizeof *thread_info);
++
++	return 1;
++}
++
++int kgdb_iabr_match(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++	return 1;
++}
++
++int kgdb_dabr_match(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++	return 1;
++}
++
++#define PACK64(ptr,src) do { *(ptr++) = (src); } while(0)
++
++#define PACK32(ptr,src) do {          \
++	u32 *ptr32;                   \
++	ptr32 = (u32 *)ptr;           \
++	*(ptr32++) = (src);           \
++	ptr = (unsigned long *)ptr32; \
++	} while(0)
++
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	unsigned long *ptr = gdb_regs;
++	int reg;
++
++	memset(gdb_regs, 0, NUMREGBYTES);
++
++	for (reg = 0; reg < 32; reg++)
++		PACK64(ptr, regs->gpr[reg]);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		PACK64(ptr, current->thread.evr[reg]);
++#else
++	ptr += 32;
++#endif
++#else
++	/* fp registers not used by kernel, leave zero */
++	ptr += 32 * 8 / sizeof(long);
++#endif
++
++	PACK64(ptr, regs->nip);
++	PACK64(ptr, regs->msr);
++	PACK32(ptr, regs->ccr);
++	PACK64(ptr, regs->link);
++	PACK64(ptr, regs->ctr);
++	PACK32(ptr, regs->xer);
++
++#if 0
++	Following are in struct thread_struct, not struct pt_regs,
++	ignoring for now since kernel does not use them.  Would it
++	make sense to get them from the thread that kgdb is set to?
++
++	If this code is enabled, update the definition of NUMREGBYTES to
++	include the vector registers and vector state registers.
++
++	PACK32(ptr, current->thread->fpscr);
++
++	/* vr registers not used by kernel, leave zero */
++	ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++	PACK32(ptr, current->thread->vscr);
++	PACK32(ptr, current->thread->vrsave);
++#else
++	ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	PACK32(ptr, current->thread.acc >> 32);
++	PACK32(ptr, current->thread.acc & 0xffffffff);
++	PACK64(ptr, current->thread.spefscr);
++#else
++	ptr += 2 + 1;
++#endif
++#else
++	/* fpscr not used by kernel, leave zero */
++	PACK32(ptr, 0);
++#endif
++#endif
++
++	BUG_ON((unsigned long)ptr >
++	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
++						  STACK_FRAME_OVERHEAD);
++	unsigned long *ptr = gdb_regs;
++	int reg;
++
++	memset(gdb_regs, 0, NUMREGBYTES);
++
++	/* Regs GPR0-2 */
++	for (reg = 0; reg < 3; reg++)
++		PACK64(ptr, regs->gpr[reg]);
++
++	/* Regs GPR3-13 are caller saved, not in regs->gpr[] */
++	ptr += 11;
++
++	/* Regs GPR14-31 */
++	for (reg = 14; reg < 32; reg++)
++		PACK64(ptr, regs->gpr[reg]);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		PACK64(ptr, p->thread.evr[reg]);
++#else
++	ptr += 32;
++#endif
++#else
++	/* fp registers not used by kernel, leave zero */
++	ptr += 32 * 8 / sizeof(long);
++#endif
++
++	PACK64(ptr, regs->nip);
++	PACK64(ptr, regs->msr);
++	PACK32(ptr, regs->ccr);
++	PACK64(ptr, regs->link);
++	PACK64(ptr, regs->ctr);
++	PACK32(ptr, regs->xer);
++
++#if 0
++	Following are in struct thread_struct, not struct pt_regs,
++	ignoring for now since kernel does not use them.  Would it
++	make sense to get them from the thread that kgdb is set to?
++
++	If this code is enabled, update the definition of NUMREGBYTES to
++	include the vector registers and vector state registers.
++
++	PACK32(ptr, p->thread->fpscr);
++
++	/* vr registers not used by kernel, leave zero */
++	ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++	PACK32(ptr, p->thread->vscr);
++	PACK32(ptr, p->thread->vrsave);
++#else
++	ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	PACK32(ptr, p->thread.acc >> 32);
++	PACK32(ptr, p->thread.acc & 0xffffffff);
++	PACK64(ptr, p->thread.spefscr);
++#else
++	ptr += 2 + 1;
++#endif
++#else
++	/* fpscr not used by kernel, leave zero */
++	PACK32(ptr, 0);
++#endif
++#endif
++
++	BUG_ON((unsigned long)ptr >
++	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++#define UNPACK64(dest,ptr) do { dest = *(ptr++); } while(0)
++
++#define UNPACK32(dest,ptr) do {       \
++	u32 *ptr32;                   \
++	ptr32 = (u32 *)ptr;           \
++	dest = *(ptr32++);            \
++	ptr = (unsigned long *)ptr32; \
++	} while(0)
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	unsigned long *ptr = gdb_regs;
++	int reg;
++#ifdef CONFIG_SPE
++	union {
++		u32 v32[2];
++		u64 v64;
++	} acc;
++#endif
++
++	for (reg = 0; reg < 32; reg++)
++		UNPACK64(regs->gpr[reg], ptr);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		UNPACK64(current->thread.evr[reg], ptr);
++#else
++	ptr += 32;
++#endif
++#else
++	/* fp registers not used by kernel, leave zero */
++	ptr += 32 * 8 / sizeof(int);
++#endif
++
++	UNPACK64(regs->nip, ptr);
++	UNPACK64(regs->msr, ptr);
++	UNPACK32(regs->ccr, ptr);
++	UNPACK64(regs->link, ptr);
++	UNPACK64(regs->ctr, ptr);
++	UNPACK32(regs->xer, ptr);
++
++#if 0
++	Following are in struct thread_struct, not struct pt_regs,
++	ignoring for now since kernel does not use them.  Would it
++	make sense to get them from the thread that kgdb is set to?
++
++	If this code is enabled, update the definition of NUMREGBYTES to
++	include the vector registers and vector state registers.
++
++	/* fpscr, vscr, vrsave not used by kernel, leave unchanged */
++
++	UNPACK32(current->thread->fpscr, ptr);
++
++	/* vr registers not used by kernel, leave zero */
++	ptr += 32 * 16 / sizeof(long);
++
++#ifdef CONFIG_ALTIVEC
++	UNPACK32(current->thread->vscr, ptr);
++	UNPACK32(current->thread->vrsave, ptr);
++#else
++	ptr += 2 * 4 / sizeof(long);
++#endif
++#else
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	UNPACK32(acc.v32[0], ptr);
++	UNPACK32(acc.v32[1], ptr);
++	current->thread.acc = acc.v64;
++	UNPACK64(current->thread.spefscr, ptr);
++#else
++	ptr += 2 + 1;
++#endif
++#endif
++#endif
++
++	BUG_ON((unsigned long)ptr >
++	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
++}
++
++/*
++ * This function does PowerPC specific procesing for interfacing to gdb.
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++			       char *remcom_in_buffer, char *remcom_out_buffer,
++			       struct pt_regs *linux_regs)
++{
++	char *ptr = &remcom_in_buffer[1];
++	unsigned long addr;
++
++	switch (remcom_in_buffer[0]) {
++		/*
++		 * sAA..AA   Step one instruction from AA..AA
++		 * This will return an error to gdb ..
++		 */
++	case 's':
++	case 'c':
++		/* handle the optional parameter */
++		if (kgdb_hex2long(&ptr, &addr))
++			linux_regs->nip = addr;
++
++		atomic_set(&cpu_doing_single_step, -1);
++		/* set the trace bit if we're stepping */
++		if (remcom_in_buffer[0] == 's') {
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++			mtspr(SPRN_DBCR0,
++			      mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
++			linux_regs->msr |= MSR_DE;
++#else
++			linux_regs->msr |= MSR_SE;
++#endif
++			debugger_step = 1;
++			if (kgdb_contthread)
++				atomic_set(&cpu_doing_single_step,
++					   smp_processor_id());
++		}
++		return 0;
++	}
++
++	return -1;
++}
++
++/*
++ * Global data
++ */
++struct kgdb_arch arch_kgdb_ops = {
++	.gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
++};
++
++int kgdb_not_implemented(struct pt_regs *regs)
++{
++	return 0;
++}
++
++int kgdb_arch_init(void)
++{
++#ifdef CONFIG_XMON
++#error Both XMON and KGDB selected in .config.  Unselect one of them.
++#endif
++
++	__debugger_ipi = kgdb_call_nmi_hook;
++	__debugger = kgdb_debugger;
++	__debugger_bpt = kgdb_breakpoint;
++	__debugger_sstep = kgdb_singlestep;
++	__debugger_iabr_match = kgdb_iabr_match;
++	__debugger_dabr_match = kgdb_dabr_match;
++	__debugger_fault_handler = kgdb_not_implemented;
++
++	return 0;
++}
++
++arch_initcall(kgdb_arch_init);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S linux-2.6.22-try2/arch/powerpc/kernel/kgdb_setjmp32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp32.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/kgdb_setjmp32.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++	.text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++	mflr	r0
++	stw	r0,0(r3)
++	stw	r1,4(r3)
++	stw	r2,8(r3)
++	mfcr	r0
++	stw	r0,12(r3)
++	stmw	r13,16(r3)
++	li	r3,0
++	blr
++
++_GLOBAL(kgdb_fault_longjmp)
++	lmw     r13,16(r3)
++	lwz     r0,12(r3)
++	mtcrf   0x38,r0
++	lwz     r0,0(r3)
++	lwz     r1,4(r3)
++	lwz     r2,8(r3)
++	mtlr    r0
++	mr      r3,r1
++	blr
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S linux-2.6.22-try2/arch/powerpc/kernel/kgdb_setjmp64.S
+--- linux-2.6.22-570/arch/powerpc/kernel/kgdb_setjmp64.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/kgdb_setjmp64.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++	.text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++	mflr	r0
++	std	r0,0(r3)
++	std	r1,8(r3)
++	std	r2,16(r3)
++	mfcr	r0
++	std	r0,24(r3)
++	std	r13,32(r3)
++	std	r14,40(r3)
++	std	r15,48(r3)
++	std	r16,56(r3)
++	std	r17,64(r3)
++	std	r18,72(r3)
++	std	r19,80(r3)
++	std	r20,88(r3)
++	std	r21,96(r3)
++	std	r22,104(r3)
++	std	r23,112(r3)
++	std	r24,120(r3)
++	std	r25,128(r3)
++	std	r26,136(r3)
++	std	r27,144(r3)
++	std	r28,152(r3)
++	std	r29,160(r3)
++	std	r30,168(r3)
++	std	r31,176(r3)
++	li	r3,0
++	blr
++
++_GLOBAL(kgdb_fault_longjmp)
++	ld	r13,32(r3)
++	ld	r14,40(r3)
++	ld	r15,48(r3)
++	ld	r16,56(r3)
++	ld	r17,64(r3)
++	ld	r18,72(r3)
++	ld	r19,80(r3)
++	ld	r20,88(r3)
++	ld	r21,96(r3)
++	ld	r22,104(r3)
++	ld	r23,112(r3)
++	ld	r24,120(r3)
++	ld	r25,128(r3)
++	ld	r26,136(r3)
++	ld	r27,144(r3)
++	ld	r28,152(r3)
++	ld	r29,160(r3)
++	ld	r30,168(r3)
++	ld	r31,176(r3)
++	ld	r0,24(r3)
++	mtcrf	0x38,r0
++	ld	r0,0(r3)
++	ld	r1,8(r3)
++	ld	r2,16(r3)
++	mtlr	r0
++	mr	r3,r1
++	blr
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c linux-2.6.22-try2/arch/powerpc/kernel/legacy_serial.c
+--- linux-2.6.22-570/arch/powerpc/kernel/legacy_serial.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/legacy_serial.c	2007-12-19 15:29:24.000000000 -0500
+@@ -11,6 +11,9 @@
+ #include <asm/udbg.h>
+ #include <asm/pci-bridge.h>
+ #include <asm/ppc-pci.h>
++#ifdef CONFIG_KGDB_8250
++#include <linux/kgdb.h>
++#endif
+ 
+ #undef DEBUG
+ 
+@@ -487,6 +490,9 @@
+ 			fixup_port_pio(i, np, port);
+ 		if ((port->iotype == UPIO_MEM) || (port->iotype == UPIO_TSI))
+ 			fixup_port_mmio(i, np, port);
++#ifdef CONFIG_KGDB_8250
++		kgdb8250_add_platform_port(i, port);
++#endif
+ 	}
+ 
+ 	DBG("Registering platform serial ports\n");
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/misc_32.S linux-2.6.22-try2/arch/powerpc/kernel/misc_32.S
+--- linux-2.6.22-570/arch/powerpc/kernel/misc_32.S	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/misc_32.S	2007-12-19 15:29:22.000000000 -0500
+@@ -392,7 +392,7 @@
+ 	mtspr   SPRN_L1CSR0,r3
+ 	isync
+ 	blr
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+ 	mfspr	r3,SPRN_L1CSR1
+ 	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
+ 	mtspr	SPRN_L1CSR1,r3
+@@ -419,7 +419,7 @@
+ _GLOBAL(__flush_icache_range)
+ BEGIN_FTR_SECTION
+ 	blr				/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	li	r5,L1_CACHE_BYTES-1
+ 	andc	r3,r3,r5
+ 	subf	r4,r3,r4
+@@ -514,8 +514,8 @@
+  */
+ _GLOBAL(__flush_dcache_icache)
+ BEGIN_FTR_SECTION
+-	blr					/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++	blr
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	rlwinm	r3,r3,0,0,19			/* Get page base address */
+ 	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+ 	mtctr	r4
+@@ -543,7 +543,7 @@
+ _GLOBAL(__flush_dcache_icache_phys)
+ BEGIN_FTR_SECTION
+ 	blr					/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	mfmsr	r10
+ 	rlwinm	r0,r10,0,28,26			/* clear DR */
+ 	mtmsr	r0
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/of_platform.c linux-2.6.22-try2/arch/powerpc/kernel/of_platform.c
+--- linux-2.6.22-570/arch/powerpc/kernel/of_platform.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/of_platform.c	2007-12-19 15:29:22.000000000 -0500
+@@ -427,14 +427,6 @@
+ 	/* Process "ranges" property */
+ 	pci_process_bridge_OF_ranges(phb, dev->node, 0);
+ 
+-	/* Setup IO space. We use the non-dynamic version of that code here,
+-	 * which doesn't quite support unplugging. Next kernel release will
+-	 * have a better fix for this.
+-	 * Note also that we don't do ISA, this will also be fixed with a
+-	 * more massive rework.
+-	 */
+-	pci_setup_phb_io(phb, pci_io_base == 0);
+-
+ 	/* Init pci_dn data structures */
+ 	pci_devs_phb_init_dynamic(phb);
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/pci_64.c linux-2.6.22-try2/arch/powerpc/kernel/pci_64.c
+--- linux-2.6.22-570/arch/powerpc/kernel/pci_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/pci_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -11,7 +11,7 @@
+  *      2 of the License, or (at your option) any later version.
+  */
+ 
+-#undef DEBUG
++#define DEBUG
+ 
+ #include <linux/kernel.h>
+ #include <linux/pci.h>
+@@ -22,6 +22,7 @@
+ #include <linux/list.h>
+ #include <linux/syscalls.h>
+ #include <linux/irq.h>
++#include <linux/vmalloc.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/io.h>
+@@ -41,35 +42,26 @@
+ 
+ unsigned long pci_probe_only = 1;
+ int pci_assign_all_buses = 0;
+-static int pci_initial_scan_done;
+ 
+ static void fixup_resource(struct resource *res, struct pci_dev *dev);
+ static void do_bus_setup(struct pci_bus *bus);
+-static void phbs_remap_io(void);
+ 
+ /* pci_io_base -- the base address from which io bars are offsets.
+  * This is the lowest I/O base address (so bar values are always positive),
+  * and it *must* be the start of ISA space if an ISA bus exists because
+- * ISA drivers use hard coded offsets.  If no ISA bus exists a dummy
+- * page is mapped and isa_io_limit prevents access to it.
++ * ISA drivers use hard coded offsets.  If no ISA bus exists nothing
++ * is mapped on the first 64K of IO space
+  */
+-unsigned long isa_io_base;	/* NULL if no ISA bus */
+-EXPORT_SYMBOL(isa_io_base);
+-unsigned long pci_io_base;
++unsigned long pci_io_base = ISA_IO_BASE;
+ EXPORT_SYMBOL(pci_io_base);
+ 
+-void iSeries_pcibios_init(void);
+-
+ LIST_HEAD(hose_list);
+ 
+ static struct dma_mapping_ops *pci_dma_ops;
+ 
++/* XXX kill that some day ... */
+ int global_phb_number;		/* Global phb counter */
+ 
+-/* Cached ISA bridge dev. */
+-struct pci_dev *ppc64_isabridge_dev = NULL;
+-EXPORT_SYMBOL_GPL(ppc64_isabridge_dev);
+-
+ void set_pci_dma_ops(struct dma_mapping_ops *dma_ops)
+ {
+ 	pci_dma_ops = dma_ops;
+@@ -100,7 +92,7 @@
+ 		return;
+ 
+ 	if (res->flags & IORESOURCE_IO)
+-	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
++	        offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ 
+ 	if (res->flags & IORESOURCE_MEM)
+ 		offset = hose->pci_mem_offset;
+@@ -119,7 +111,7 @@
+ 		return;
+ 
+ 	if (res->flags & IORESOURCE_IO)
+-	        offset = (unsigned long)hose->io_base_virt - pci_io_base;
++	        offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ 
+ 	if (res->flags & IORESOURCE_MEM)
+ 		offset = hose->pci_mem_offset;
+@@ -156,7 +148,7 @@
+ 
+ 	if (res->flags & IORESOURCE_IO) {
+ 	        unsigned long offset = (unsigned long)hose->io_base_virt -
+-					pci_io_base;
++					_IO_BASE;
+ 		/* Make sure we start at our min on all hoses */
+ 		if (start - offset < PCIBIOS_MIN_IO)
+ 			start = PCIBIOS_MIN_IO + offset;
+@@ -535,10 +527,16 @@
+ 	bus->secondary = hose->first_busno;
+ 	hose->bus = bus;
+ 
++	if (!firmware_has_feature(FW_FEATURE_ISERIES))
++		pcibios_map_io_space(bus);
++
+ 	bus->resource[0] = res = &hose->io_resource;
+-	if (res->flags && request_resource(&ioport_resource, res))
++	if (res->flags && request_resource(&ioport_resource, res)) {
+ 		printk(KERN_ERR "Failed to request PCI IO region "
+ 		       "on PCI domain %04x\n", hose->global_number);
++		DBG("res->start = 0x%016lx, res->end = 0x%016lx\n",
++		    res->start, res->end);
++	}
+ 
+ 	for (i = 0; i < 3; ++i) {
+ 		res = &hose->mem_resources[i];
+@@ -596,17 +594,6 @@
+ 	if (ppc_md.pcibios_fixup)
+ 		ppc_md.pcibios_fixup();
+ 
+-	/* Cache the location of the ISA bridge (if we have one) */
+-	ppc64_isabridge_dev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
+-	if (ppc64_isabridge_dev != NULL)
+-		printk(KERN_DEBUG "ISA bridge at %s\n", pci_name(ppc64_isabridge_dev));
+-
+-	if (!firmware_has_feature(FW_FEATURE_ISERIES))
+-		/* map in PCI I/O space */
+-		phbs_remap_io();
+-
+-	pci_initial_scan_done = 1;
+-
+ 	printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
+ 
+ 	return 0;
+@@ -711,7 +698,7 @@
+ #endif
+ 		res_bit = IORESOURCE_MEM;
+ 	} else {
+-		io_offset = (unsigned long)hose->io_base_virt - pci_io_base;
++		io_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ 		*offset += io_offset;
+ 		res_bit = IORESOURCE_IO;
+ 	}
+@@ -881,76 +868,6 @@
+ 	device_create_file(&pdev->dev, &dev_attr_devspec);
+ }
+ 
+-#define ISA_SPACE_MASK 0x1
+-#define ISA_SPACE_IO 0x1
+-
+-static void __devinit pci_process_ISA_OF_ranges(struct device_node *isa_node,
+-				      unsigned long phb_io_base_phys,
+-				      void __iomem * phb_io_base_virt)
+-{
+-	/* Remove these asap */
+-
+-	struct pci_address {
+-		u32 a_hi;
+-		u32 a_mid;
+-		u32 a_lo;
+-	};
+-
+-	struct isa_address {
+-		u32 a_hi;
+-		u32 a_lo;
+-	};
+-
+-	struct isa_range {
+-		struct isa_address isa_addr;
+-		struct pci_address pci_addr;
+-		unsigned int size;
+-	};
+-
+-	const struct isa_range *range;
+-	unsigned long pci_addr;
+-	unsigned int isa_addr;
+-	unsigned int size;
+-	int rlen = 0;
+-
+-	range = of_get_property(isa_node, "ranges", &rlen);
+-	if (range == NULL || (rlen < sizeof(struct isa_range))) {
+-		printk(KERN_ERR "no ISA ranges or unexpected isa range size,"
+-		       "mapping 64k\n");
+-		__ioremap_explicit(phb_io_base_phys,
+-				   (unsigned long)phb_io_base_virt,
+-				   0x10000, _PAGE_NO_CACHE | _PAGE_GUARDED);
+-		return;	
+-	}
+-	
+-	/* From "ISA Binding to 1275"
+-	 * The ranges property is laid out as an array of elements,
+-	 * each of which comprises:
+-	 *   cells 0 - 1:	an ISA address
+-	 *   cells 2 - 4:	a PCI address 
+-	 *			(size depending on dev->n_addr_cells)
+-	 *   cell 5:		the size of the range
+-	 */
+-	if ((range->isa_addr.a_hi && ISA_SPACE_MASK) == ISA_SPACE_IO) {
+-		isa_addr = range->isa_addr.a_lo;
+-		pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | 
+-			range->pci_addr.a_lo;
+-
+-		/* Assume these are both zero */
+-		if ((pci_addr != 0) || (isa_addr != 0)) {
+-			printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
+-					__FUNCTION__);
+-			return;
+-		}
+-		
+-		size = PAGE_ALIGN(range->size);
+-
+-		__ioremap_explicit(phb_io_base_phys, 
+-				   (unsigned long) phb_io_base_virt, 
+-				   size, _PAGE_NO_CACHE | _PAGE_GUARDED);
+-	}
+-}
+-
+ void __devinit pci_process_bridge_OF_ranges(struct pci_controller *hose,
+ 					    struct device_node *dev, int prim)
+ {
+@@ -1045,155 +962,122 @@
+ 	}
+ }
+ 
+-void __devinit pci_setup_phb_io(struct pci_controller *hose, int primary)
+-{
+-	unsigned long size = hose->pci_io_size;
+-	unsigned long io_virt_offset;
+-	struct resource *res;
+-	struct device_node *isa_dn;
+-
+-	if (size == 0)
+-		return;
+-
+-	hose->io_base_virt = reserve_phb_iospace(size);
+-	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+-		hose->global_number, hose->io_base_phys,
+-		(unsigned long) hose->io_base_virt);
+-
+-	if (primary) {
+-		pci_io_base = (unsigned long)hose->io_base_virt;
+-		isa_dn = of_find_node_by_type(NULL, "isa");
+-		if (isa_dn) {
+-			isa_io_base = pci_io_base;
+-			pci_process_ISA_OF_ranges(isa_dn, hose->io_base_phys,
+-						hose->io_base_virt);
+-			of_node_put(isa_dn);
+-		}
+-	}
+-
+-	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-	res = &hose->io_resource;
+-	res->start += io_virt_offset;
+-	res->end += io_virt_offset;
+-
+-	/* If this is called after the initial PCI scan, then we need to
+-	 * proceed to IO mappings now
+-	 */
+-	if (pci_initial_scan_done)
+-		__ioremap_explicit(hose->io_base_phys,
+-				   (unsigned long)hose->io_base_virt,
+-				   hose->pci_io_size,
+-				   _PAGE_NO_CACHE | _PAGE_GUARDED);
+-}
++#ifdef CONFIG_HOTPLUG
+ 
+-void __devinit pci_setup_phb_io_dynamic(struct pci_controller *hose,
+-					int primary)
++int pcibios_unmap_io_space(struct pci_bus *bus)
+ {
+-	unsigned long size = hose->pci_io_size;
+-	unsigned long io_virt_offset;
+-	struct resource *res;
++	struct pci_controller *hose;
+ 
+-	if (size == 0)
+-		return;
++	WARN_ON(bus == NULL);
+ 
+-	hose->io_base_virt = __ioremap(hose->io_base_phys, size,
+-					_PAGE_NO_CACHE | _PAGE_GUARDED);
+-	DBG("phb%d io_base_phys 0x%lx io_base_virt 0x%lx\n",
+-		hose->global_number, hose->io_base_phys,
+-		(unsigned long) hose->io_base_virt);
+-
+-	if (primary)
+-		pci_io_base = (unsigned long)hose->io_base_virt;
+-
+-	io_virt_offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-	res = &hose->io_resource;
+-	res->start += io_virt_offset;
+-	res->end += io_virt_offset;
+-}
++	/* If this is not a PHB, we only flush the hash table over
++	 * the area mapped by this bridge. We don't play with the PTE
++	 * mappings since we might have to deal with sub-page alignemnts
++	 * so flushing the hash table is the only sane way to make sure
++	 * that no hash entries are covering that removed bridge area
++	 * while still allowing other busses overlapping those pages
++	 */
++	if (bus->self) {
++		struct resource *res = bus->resource[0];
+ 
++		DBG("IO unmapping for PCI-PCI bridge %s\n",
++		    pci_name(bus->self));
+ 
+-static int get_bus_io_range(struct pci_bus *bus, unsigned long *start_phys,
+-				unsigned long *start_virt, unsigned long *size)
+-{
+-	struct pci_controller *hose = pci_bus_to_host(bus);
+-	struct resource *res;
++		__flush_hash_table_range(&init_mm, res->start + _IO_BASE,
++					 res->end - res->start + 1);
++		return 0;
++	}
+ 
+-	if (bus->self)
+-		res = bus->resource[0];
+-	else
+-		/* Root Bus */
+-		res = &hose->io_resource;
++	/* Get the host bridge */
++	hose = pci_bus_to_host(bus);
+ 
+-	if (res->end == 0 && res->start == 0)
+-		return 1;
++	/* Check if we have IOs allocated */
++	if (hose->io_base_alloc == 0)
++		return 0;
+ 
+-	*start_virt = pci_io_base + res->start;
+-	*start_phys = *start_virt + hose->io_base_phys
+-		- (unsigned long) hose->io_base_virt;
++	DBG("IO unmapping for PHB %s\n",
++	    ((struct device_node *)hose->arch_data)->full_name);
++	DBG("  alloc=0x%p\n", hose->io_base_alloc);
+ 
+-	if (res->end > res->start)
+-		*size = res->end - res->start + 1;
+-	else {
+-		printk("%s(): unexpected region 0x%lx->0x%lx\n",
+-		       __FUNCTION__, res->start, res->end);
+-		return 1;
+-	}
++	/* This is a PHB, we fully unmap the IO area */
++	vunmap(hose->io_base_alloc);
+ 
+ 	return 0;
+ }
++EXPORT_SYMBOL_GPL(pcibios_unmap_io_space);
+ 
+-int unmap_bus_range(struct pci_bus *bus)
+-{
+-	unsigned long start_phys;
+-	unsigned long start_virt;
+-	unsigned long size;
++#endif /* CONFIG_HOTPLUG */
+ 
+-	if (!bus) {
+-		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+-		return 1;
+-	}
++int __devinit pcibios_map_io_space(struct pci_bus *bus)
++{
++	struct vm_struct *area;
++	unsigned long phys_page;
++	unsigned long size_page;
++	unsigned long io_virt_offset;
++	struct pci_controller *hose;
+ 	
+-	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+-		return 1;
+-	if (__iounmap_explicit((void __iomem *) start_virt, size))
+-		return 1;
++	WARN_ON(bus == NULL);
+ 
++	/* If this not a PHB, nothing to do, page tables still exist and
++	 * thus HPTEs will be faulted in when needed
++	 */
++	if (bus->self) {
++		DBG("IO mapping for PCI-PCI bridge %s\n",
++		    pci_name(bus->self));
++		DBG("  virt=0x%016lx...0x%016lx\n",
++		    bus->resource[0]->start + _IO_BASE,
++		    bus->resource[0]->end + _IO_BASE);
+ 	return 0;
+-}
+-EXPORT_SYMBOL(unmap_bus_range);
+-
+-int remap_bus_range(struct pci_bus *bus)
+-{
+-	unsigned long start_phys;
+-	unsigned long start_virt;
+-	unsigned long size;
+-
+-	if (!bus) {
+-		printk(KERN_ERR "%s() expected bus\n", __FUNCTION__);
+-		return 1;
+ 	}
+ 	
++	/* Get the host bridge */
++	hose = pci_bus_to_host(bus);
++	phys_page = _ALIGN_DOWN(hose->io_base_phys, PAGE_SIZE);
++	size_page = _ALIGN_UP(hose->pci_io_size, PAGE_SIZE);
+ 	
+-	if (get_bus_io_range(bus, &start_phys, &start_virt, &size))
+-		return 1;
+-	if (start_phys == 0)
+-		return 1;
+-	printk(KERN_DEBUG "mapping IO %lx -> %lx, size: %lx\n", start_phys, start_virt, size);
+-	if (__ioremap_explicit(start_phys, start_virt, size,
+-			       _PAGE_NO_CACHE | _PAGE_GUARDED))
+-		return 1;
++	/* Make sure IO area address is clear */
++	hose->io_base_alloc = NULL;
+ 
++	/* If there's no IO to map on that bus, get away too */
++	if (hose->pci_io_size == 0 || hose->io_base_phys == 0)
+ 	return 0;
+-}
+-EXPORT_SYMBOL(remap_bus_range);
+ 
+-static void phbs_remap_io(void)
+-{
+-	struct pci_controller *hose, *tmp;
++	/* Let's allocate some IO space for that guy. We don't pass
++	 * VM_IOREMAP because we don't care about alignment tricks that
++	 * the core does in that case. Maybe we should due to stupid card
++	 * with incomplete address decoding but I'd rather not deal with
++	 * those outside of the reserved 64K legacy region.
++	 */
++	area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END);
++	if (area == NULL)
++		return -ENOMEM;
++	hose->io_base_alloc = area->addr;
++	hose->io_base_virt = (void __iomem *)(area->addr +
++					      hose->io_base_phys - phys_page);
++
++	DBG("IO mapping for PHB %s\n",
++	    ((struct device_node *)hose->arch_data)->full_name);
++	DBG("  phys=0x%016lx, virt=0x%p (alloc=0x%p)\n",
++	    hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc);
++	DBG("  size=0x%016lx (alloc=0x%016lx)\n",
++	    hose->pci_io_size, size_page);
++
++	/* Establish the mapping */
++	if (__ioremap_at(phys_page, area->addr, size_page,
++			 _PAGE_NO_CACHE | _PAGE_GUARDED) == NULL)
++		return -ENOMEM;
++
++	/* Fixup hose IO resource */
++	io_virt_offset = (unsigned long)hose->io_base_virt - _IO_BASE;
++	hose->io_resource.start += io_virt_offset;
++	hose->io_resource.end += io_virt_offset;
+ 
+-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+-		remap_bus_range(hose->bus);
++	DBG("  hose->io_resource=0x%016lx...0x%016lx\n",
++	    hose->io_resource.start, hose->io_resource.end);
++
++	return 0;
+ }
++EXPORT_SYMBOL_GPL(pcibios_map_io_space);
+ 
+ static void __devinit fixup_resource(struct resource *res, struct pci_dev *dev)
+ {
+@@ -1201,8 +1085,7 @@
+ 	unsigned long offset;
+ 
+ 	if (res->flags & IORESOURCE_IO) {
+-		offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-
++		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ 		res->start += offset;
+ 		res->end += offset;
+ 	} else if (res->flags & IORESOURCE_MEM) {
+@@ -1217,9 +1100,20 @@
+ 	/* Update device resources.  */
+ 	int i;
+ 
+-	for (i = 0; i < PCI_NUM_RESOURCES; i++)
+-		if (dev->resource[i].flags)
+-			fixup_resource(&dev->resource[i], dev);
++	DBG("%s: Fixup resources:\n", pci_name(dev));
++	for (i = 0; i < PCI_NUM_RESOURCES; i++) {
++		struct resource *res = &dev->resource[i];
++		if (!res->flags)
++			continue;
++
++		DBG("  0x%02x < %08lx:0x%016lx...0x%016lx\n",
++		    i, res->flags, res->start, res->end);
++
++		fixup_resource(res, dev);
++
++		DBG("       > %08lx:0x%016lx...0x%016lx\n",
++		    res->flags, res->start, res->end);
++	}
+ }
+ EXPORT_SYMBOL(pcibios_fixup_device_resources);
+ 
+@@ -1360,7 +1254,7 @@
+ 		return;
+ 
+ 	if (rsrc->flags & IORESOURCE_IO)
+-		offset = (unsigned long)hose->io_base_virt - pci_io_base;
++		offset = (unsigned long)hose->io_base_virt - _IO_BASE;
+ 
+ 	/* We pass a fully fixed up address to userland for MMIO instead of
+ 	 * a BAR value because X is lame and expects to be able to use that
+@@ -1410,7 +1304,7 @@
+ 		if (address >= hose->io_base_phys &&
+ 		    address < (hose->io_base_phys + hose->pci_io_size)) {
+ 			unsigned long base =
+-				(unsigned long)hose->io_base_virt - pci_io_base;
++				(unsigned long)hose->io_base_virt - _IO_BASE;
+ 			return base + (address - hose->io_base_phys);
+ 		}
+ 	}
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/process.c linux-2.6.22-try2/arch/powerpc/kernel/process.c
+--- linux-2.6.22-570/arch/powerpc/kernel/process.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/process.c	2007-12-19 15:29:22.000000000 -0500
+@@ -219,22 +219,26 @@
+ }
+ #endif /* CONFIG_SMP */
+ 
+-#ifdef CONFIG_PPC_MERGE		/* XXX for now */
+ int set_dabr(unsigned long dabr)
+ {
++#ifdef CONFIG_PPC_MERGE		/* XXX for now */
+ 	if (ppc_md.set_dabr)
+ 		return ppc_md.set_dabr(dabr);
++#endif
+ 
++	/* XXX should we have a CPU_FTR_HAS_DABR ? */
++#if defined(CONFIG_PPC64) || defined(CONFIG_6xx)
+ 	mtspr(SPRN_DABR, dabr);
++#endif
+ 	return 0;
+ }
+-#endif
+ 
+ #ifdef CONFIG_PPC64
+ DEFINE_PER_CPU(struct cpu_usage, cpu_usage_array);
+-static DEFINE_PER_CPU(unsigned long, current_dabr);
+ #endif
+ 
++static DEFINE_PER_CPU(unsigned long, current_dabr);
++
+ struct task_struct *__switch_to(struct task_struct *prev,
+ 	struct task_struct *new)
+ {
+@@ -299,12 +303,10 @@
+ 
+ #endif /* CONFIG_SMP */
+ 
+-#ifdef CONFIG_PPC64	/* for now */
+ 	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) {
+ 		set_dabr(new->thread.dabr);
+ 		__get_cpu_var(current_dabr) = new->thread.dabr;
+ 	}
+-#endif /* CONFIG_PPC64 */
+ 
+ 	new_thread = &new->thread;
+ 	old_thread = &current->thread;
+@@ -474,12 +476,10 @@
+ 
+ 	discard_lazy_cpu_state();
+ 
+-#ifdef CONFIG_PPC64	/* for now */
+ 	if (current->thread.dabr) {
+ 		current->thread.dabr = 0;
+ 		set_dabr(0);
+ 	}
+-#endif
+ }
+ 
+ void
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/prom_init.c linux-2.6.22-try2/arch/powerpc/kernel/prom_init.c
+--- linux-2.6.22-570/arch/powerpc/kernel/prom_init.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/prom_init.c	2007-12-19 15:29:22.000000000 -0500
+@@ -635,6 +635,7 @@
+ /* ibm,dynamic-reconfiguration-memory property supported */
+ #define OV5_DRCONF_MEMORY	0x20
+ #define OV5_LARGE_PAGES		0x10	/* large pages supported */
++#define OV5_DONATE_DEDICATE_CPU 0x02	/* donate dedicated CPU support */
+ /* PCIe/MSI support.  Without MSI full PCIe is not supported */
+ #ifdef CONFIG_PCI_MSI
+ #define OV5_MSI			0x01	/* PCIe/MSI support */
+@@ -685,7 +686,8 @@
+ 	/* option vector 5: PAPR/OF options */
+ 	3 - 2,				/* length */
+ 	0,				/* don't ignore, don't halt */
+-	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY | OV5_MSI,
++	OV5_LPAR | OV5_SPLPAR | OV5_LARGE_PAGES | OV5_DRCONF_MEMORY |
++	OV5_DONATE_DEDICATE_CPU | OV5_MSI,
+ };
+ 
+ /* Old method - ELF header with PT_NOTE sections */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h linux-2.6.22-try2/arch/powerpc/kernel/ptrace-common.h
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace-common.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/ptrace-common.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,161 +0,0 @@
+-/*
+- *    Copyright (c) 2002 Stephen Rothwell, IBM Coproration
+- *    Extracted from ptrace.c and ptrace32.c
+- *
+- * This file is subject to the terms and conditions of the GNU General
+- * Public License.  See the file README.legal in the main directory of
+- * this archive for more details.
+- */
+-
+-#ifndef _PPC64_PTRACE_COMMON_H
+-#define _PPC64_PTRACE_COMMON_H
+-
+-#include <asm/system.h>
+-
+-/*
+- * Set of msr bits that gdb can change on behalf of a process.
+- */
+-#define MSR_DEBUGCHANGE	(MSR_FE0 | MSR_SE | MSR_BE | MSR_FE1)
+-
+-/*
+- * Get contents of register REGNO in task TASK.
+- */
+-static inline unsigned long get_reg(struct task_struct *task, int regno)
+-{
+-	unsigned long tmp = 0;
+-
+-	/*
+-	 * Put the correct FP bits in, they might be wrong as a result
+-	 * of our lazy FP restore.
+-	 */
+-	if (regno == PT_MSR) {
+-		tmp = ((unsigned long *)task->thread.regs)[PT_MSR];
+-		tmp |= task->thread.fpexc_mode;
+-	} else if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+-		tmp = ((unsigned long *)task->thread.regs)[regno];
+-	}
+-
+-	return tmp;
+-}
+-
+-/*
+- * Write contents of register REGNO in task TASK.
+- */
+-static inline int put_reg(struct task_struct *task, int regno,
+-			  unsigned long data)
+-{
+-	if (regno < PT_SOFTE) {
+-		if (regno == PT_MSR)
+-			data = (data & MSR_DEBUGCHANGE)
+-				| (task->thread.regs->msr & ~MSR_DEBUGCHANGE);
+-		((unsigned long *)task->thread.regs)[regno] = data;
+-		return 0;
+-	}
+-	return -EIO;
+-}
+-
+-static inline void set_single_step(struct task_struct *task)
+-{
+-	struct pt_regs *regs = task->thread.regs;
+-	if (regs != NULL)
+-		regs->msr |= MSR_SE;
+-	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+-}
+-
+-static inline void clear_single_step(struct task_struct *task)
+-{
+-	struct pt_regs *regs = task->thread.regs;
+-	if (regs != NULL)
+-		regs->msr &= ~MSR_SE;
+-	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+-}
+-
+-#ifdef CONFIG_ALTIVEC
+-/*
+- * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
+- * The transfer totals 34 quadword.  Quadwords 0-31 contain the
+- * corresponding vector registers.  Quadword 32 contains the vscr as the
+- * last word (offset 12) within that quadword.  Quadword 33 contains the
+- * vrsave as the first word (offset 0) within the quadword.
+- *
+- * This definition of the VMX state is compatible with the current PPC32
+- * ptrace interface.  This allows signal handling and ptrace to use the
+- * same structures.  This also simplifies the implementation of a bi-arch
+- * (combined (32- and 64-bit) gdb.
+- */
+-
+-/*
+- * Get contents of AltiVec register state in task TASK
+- */
+-static inline int get_vrregs(unsigned long __user *data,
+-			     struct task_struct *task)
+-{
+-	unsigned long regsize;
+-
+-	/* copy AltiVec registers VR[0] .. VR[31] */
+-	regsize = 32 * sizeof(vector128);
+-	if (copy_to_user(data, task->thread.vr, regsize))
+-		return -EFAULT;
+-	data += (regsize / sizeof(unsigned long));
+-
+-	/* copy VSCR */
+-	regsize = 1 * sizeof(vector128);
+-	if (copy_to_user(data, &task->thread.vscr, regsize))
+-		return -EFAULT;
+-	data += (regsize / sizeof(unsigned long));
+-
+-	/* copy VRSAVE */
+-	if (put_user(task->thread.vrsave, (u32 __user *)data))
+-		return -EFAULT;
+-
+-	return 0;
+-}
+-
+-/*
+- * Write contents of AltiVec register state into task TASK.
+- */
+-static inline int set_vrregs(struct task_struct *task,
+-			     unsigned long __user *data)
+-{
+-	unsigned long regsize;
+-
+-	/* copy AltiVec registers VR[0] .. VR[31] */
+-	regsize = 32 * sizeof(vector128);
+-	if (copy_from_user(task->thread.vr, data, regsize))
+-		return -EFAULT;
+-	data += (regsize / sizeof(unsigned long));
+-
+-	/* copy VSCR */
+-	regsize = 1 * sizeof(vector128);
+-	if (copy_from_user(&task->thread.vscr, data, regsize))
+-		return -EFAULT;
+-	data += (regsize / sizeof(unsigned long));
+-
+-	/* copy VRSAVE */
+-	if (get_user(task->thread.vrsave, (u32 __user *)data))
+-		return -EFAULT;
+-
+-	return 0;
+-}
+-#endif
+-
+-static inline int ptrace_set_debugreg(struct task_struct *task,
+-				      unsigned long addr, unsigned long data)
+-{
+-	/* We only support one DABR and no IABRS at the moment */
+-	if (addr > 0)
+-		return -EINVAL;
+-
+-	/* The bottom 3 bits are flags */
+-	if ((data & ~0x7UL) >= TASK_SIZE)
+-		return -EIO;
+-
+-	/* Ensure translation is on */
+-	if (data && !(data & DABR_TRANSLATION))
+-		return -EIO;
+-
+-	task->thread.dabr = data;
+-	return 0;
+-}
+-
+-#endif /* _PPC64_PTRACE_COMMON_H */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace.c linux-2.6.22-try2/arch/powerpc/kernel/ptrace.c
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/ptrace.c	2007-12-19 15:29:22.000000000 -0500
+@@ -35,11 +35,11 @@
+ #include <asm/pgtable.h>
+ #include <asm/system.h>
+ 
+-#ifdef CONFIG_PPC64
+-#include "ptrace-common.h"
+-#endif
++/*
++ * does not yet catch signals sent when the child dies.
++ * in exit.c or in signal.c.
++ */
+ 
+-#ifdef CONFIG_PPC32
+ /*
+  * Set of msr bits that gdb can change on behalf of a process.
+  */
+@@ -48,65 +48,117 @@
+ #else
+ #define MSR_DEBUGCHANGE	(MSR_SE | MSR_BE)
+ #endif
+-#endif /* CONFIG_PPC32 */
+ 
+ /*
+- * does not yet catch signals sent when the child dies.
+- * in exit.c or in signal.c.
++ * Max register writeable via put_reg
+  */
+-
+ #ifdef CONFIG_PPC32
++#define PT_MAX_PUT_REG	PT_MQ
++#else
++#define PT_MAX_PUT_REG	PT_CCR
++#endif
++
+ /*
+  * Get contents of register REGNO in task TASK.
+  */
+-static inline unsigned long get_reg(struct task_struct *task, int regno)
++unsigned long ptrace_get_reg(struct task_struct *task, int regno)
+ {
+-	if (regno < sizeof(struct pt_regs) / sizeof(unsigned long)
+-	    && task->thread.regs != NULL)
++	unsigned long tmp = 0;
++
++	if (task->thread.regs == NULL)
++		return -EIO;
++
++	if (regno == PT_MSR) {
++		tmp = ((unsigned long *)task->thread.regs)[PT_MSR];
++		return tmp | task->thread.fpexc_mode;
++	}
++
++	if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long)))
+ 		return ((unsigned long *)task->thread.regs)[regno];
+-	return (0);
++
++	return -EIO;
+ }
+ 
+ /*
+  * Write contents of register REGNO in task TASK.
+  */
+-static inline int put_reg(struct task_struct *task, int regno,
+-			  unsigned long data)
++int ptrace_put_reg(struct task_struct *task, int regno, unsigned long data)
+ {
+-	if (regno <= PT_MQ && task->thread.regs != NULL) {
++	if (task->thread.regs == NULL)
++		return -EIO;
++
++	if (regno <= PT_MAX_PUT_REG || regno == PT_TRAP) {
+ 		if (regno == PT_MSR)
+ 			data = (data & MSR_DEBUGCHANGE)
+ 				| (task->thread.regs->msr & ~MSR_DEBUGCHANGE);
++		/* We prevent mucking around with the reserved area of trap
++		 * which are used internally by the kernel
++		 */
++		if (regno == PT_TRAP)
++			data &= 0xfff0;
+ 		((unsigned long *)task->thread.regs)[regno] = data;
+ 		return 0;
+ 	}
+ 	return -EIO;
+ }
+ 
++
++static int get_fpregs(void __user *data, struct task_struct *task,
++		      int has_fpscr)
++{
++	unsigned int count = has_fpscr ? 33 : 32;
++
++	if (copy_to_user(data, task->thread.fpr, count * sizeof(double)))
++		return -EFAULT;
++	return 0;
++}
++
++static int set_fpregs(void __user *data, struct task_struct *task,
++		      int has_fpscr)
++{
++	unsigned int count = has_fpscr ? 33 : 32;
++
++	if (copy_from_user(task->thread.fpr, data, count * sizeof(double)))
++		return -EFAULT;
++	return 0;
++}
++
++
+ #ifdef CONFIG_ALTIVEC
+ /*
++ * Get/set all the altivec registers vr0..vr31, vscr, vrsave, in one go.
++ * The transfer totals 34 quadword.  Quadwords 0-31 contain the
++ * corresponding vector registers.  Quadword 32 contains the vscr as the
++ * last word (offset 12) within that quadword.  Quadword 33 contains the
++ * vrsave as the first word (offset 0) within the quadword.
++ *
++ * This definition of the VMX state is compatible with the current PPC32
++ * ptrace interface.  This allows signal handling and ptrace to use the
++ * same structures.  This also simplifies the implementation of a bi-arch
++ * (combined (32- and 64-bit) gdb.
++ */
++
++/*
+  * Get contents of AltiVec register state in task TASK
+  */
+-static inline int get_vrregs(unsigned long __user *data, struct task_struct *task)
++static int get_vrregs(unsigned long __user *data, struct task_struct *task)
+ {
+-	int i, j;
+-
+-	if (!access_ok(VERIFY_WRITE, data, 133 * sizeof(unsigned long)))
+-		return -EFAULT;
++	unsigned long regsize;
+ 
+ 	/* copy AltiVec registers VR[0] .. VR[31] */
+-	for (i = 0; i < 32; i++)
+-		for (j = 0; j < 4; j++, data++)
+-			if (__put_user(task->thread.vr[i].u[j], data))
++	regsize = 32 * sizeof(vector128);
++	if (copy_to_user(data, task->thread.vr, regsize))
+ 				return -EFAULT;
++	data += (regsize / sizeof(unsigned long));
+ 
+ 	/* copy VSCR */
+-	for (i = 0; i < 4; i++, data++)
+-		if (__put_user(task->thread.vscr.u[i], data))
++	regsize = 1 * sizeof(vector128);
++	if (copy_to_user(data, &task->thread.vscr, regsize))
+ 			return -EFAULT;
++	data += (regsize / sizeof(unsigned long));
+ 
+         /* copy VRSAVE */
+-	if (__put_user(task->thread.vrsave, data))
++	if (put_user(task->thread.vrsave, (u32 __user *)data))
+ 		return -EFAULT;
+ 
+ 	return 0;
+@@ -115,31 +167,29 @@
+ /*
+  * Write contents of AltiVec register state into task TASK.
+  */
+-static inline int set_vrregs(struct task_struct *task, unsigned long __user *data)
++static int set_vrregs(struct task_struct *task, unsigned long __user *data)
+ {
+-	int i, j;
+-
+-	if (!access_ok(VERIFY_READ, data, 133 * sizeof(unsigned long)))
+-		return -EFAULT;
++	unsigned long regsize;
+ 
+ 	/* copy AltiVec registers VR[0] .. VR[31] */
+-	for (i = 0; i < 32; i++)
+-		for (j = 0; j < 4; j++, data++)
+-			if (__get_user(task->thread.vr[i].u[j], data))
++	regsize = 32 * sizeof(vector128);
++	if (copy_from_user(task->thread.vr, data, regsize))
+ 				return -EFAULT;
++	data += (regsize / sizeof(unsigned long));
+ 
+ 	/* copy VSCR */
+-	for (i = 0; i < 4; i++, data++)
+-		if (__get_user(task->thread.vscr.u[i], data))
++	regsize = 1 * sizeof(vector128);
++	if (copy_from_user(&task->thread.vscr, data, regsize))
+ 			return -EFAULT;
++	data += (regsize / sizeof(unsigned long));
+ 
+ 	/* copy VRSAVE */
+-	if (__get_user(task->thread.vrsave, data))
++	if (get_user(task->thread.vrsave, (u32 __user *)data))
+ 		return -EFAULT;
+ 
+ 	return 0;
+ }
+-#endif
++#endif /* CONFIG_ALTIVEC */
+ 
+ #ifdef CONFIG_SPE
+ 
+@@ -156,7 +206,7 @@
+ /*
+  * Get contents of SPE register state in task TASK.
+  */
+-static inline int get_evrregs(unsigned long *data, struct task_struct *task)
++static int get_evrregs(unsigned long *data, struct task_struct *task)
+ {
+ 	int i;
+ 
+@@ -182,7 +232,7 @@
+ /*
+  * Write contents of SPE register state into task TASK.
+  */
+-static inline int set_evrregs(struct task_struct *task, unsigned long *data)
++static int set_evrregs(struct task_struct *task, unsigned long *data)
+ {
+ 	int i;
+ 
+@@ -205,8 +255,8 @@
+ }
+ #endif /* CONFIG_SPE */
+ 
+-static inline void
+-set_single_step(struct task_struct *task)
++
++static void set_single_step(struct task_struct *task)
+ {
+ 	struct pt_regs *regs = task->thread.regs;
+ 
+@@ -221,8 +271,7 @@
+ 	set_tsk_thread_flag(task, TIF_SINGLESTEP);
+ }
+ 
+-static inline void
+-clear_single_step(struct task_struct *task)
++static void clear_single_step(struct task_struct *task)
+ {
+ 	struct pt_regs *regs = task->thread.regs;
+ 
+@@ -236,7 +285,25 @@
+ 	}
+ 	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
+ }
+-#endif /* CONFIG_PPC32 */
++
++static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
++			       unsigned long data)
++{
++	/* We only support one DABR and no IABRS at the moment */
++	if (addr > 0)
++		return -EINVAL;
++
++	/* The bottom 3 bits are flags */
++	if ((data & ~0x7UL) >= TASK_SIZE)
++		return -EIO;
++
++	/* Ensure translation is on */
++	if (data && !(data & DABR_TRANSLATION))
++		return -EIO;
++
++	task->thread.dabr = data;
++	return 0;
++}
+ 
+ /*
+  * Called by kernel/ptrace.c when detaching..
+@@ -249,6 +316,62 @@
+ 	clear_single_step(child);
+ }
+ 
++/*
++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
++ * we mark them as obsolete now, they will be removed in a future version
++ */
++static long arch_ptrace_old(struct task_struct *child, long request, long addr,
++			    long data)
++{
++	int ret = -EPERM;
++
++	switch(request) {
++	case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
++		int i;
++		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++		unsigned long __user *tmp = (unsigned long __user *)addr;
++
++		for (i = 0; i < 32; i++) {
++			ret = put_user(*reg, tmp);
++			if (ret)
++				break;
++			reg++;
++			tmp++;
++		}
++		break;
++	}
++
++	case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
++		int i;
++		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++		unsigned long __user *tmp = (unsigned long __user *)addr;
++
++		for (i = 0; i < 32; i++) {
++			ret = get_user(*reg, tmp);
++			if (ret)
++				break;
++			reg++;
++			tmp++;
++		}
++		break;
++	}
++
++	case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
++		flush_fp_to_thread(child);
++		ret = get_fpregs((void __user *)addr, child, 0);
++		break;
++	}
++
++	case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
++		flush_fp_to_thread(child);
++		ret = set_fpregs((void __user *)addr, child, 0);
++		break;
++	}
++
++	}
++	return ret;
++}
++
+ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+ {
+ 	int ret = -EPERM;
+@@ -284,11 +407,9 @@
+ #endif
+ 			break;
+ 
+-#ifdef CONFIG_PPC32
+ 		CHECK_FULL_REGS(child->thread.regs);
+-#endif
+ 		if (index < PT_FPR0) {
+-			tmp = get_reg(child, (int) index);
++			tmp = ptrace_get_reg(child, (int) index);
+ 		} else {
+ 			flush_fp_to_thread(child);
+ 			tmp = ((unsigned long *)child->thread.fpr)[index - PT_FPR0];
+@@ -323,13 +444,9 @@
+ #endif
+ 			break;
+ 
+-#ifdef CONFIG_PPC32
+ 		CHECK_FULL_REGS(child->thread.regs);
+-#endif
+-		if (index == PT_ORIG_R3)
+-			break;
+ 		if (index < PT_FPR0) {
+-			ret = put_reg(child, index, data);
++			ret = ptrace_put_reg(child, index, data);
+ 		} else {
+ 			flush_fp_to_thread(child);
+ 			((unsigned long *)child->thread.fpr)[index - PT_FPR0] = data;
+@@ -384,7 +501,6 @@
+ 		break;
+ 	}
+ 
+-#ifdef CONFIG_PPC64
+ 	case PTRACE_GET_DEBUGREG: {
+ 		ret = -EINVAL;
+ 		/* We only support one DABR and no IABRS at the moment */
+@@ -398,73 +514,61 @@
+ 	case PTRACE_SET_DEBUGREG:
+ 		ret = ptrace_set_debugreg(child, addr, data);
+ 		break;
+-#endif
+ 
+ 	case PTRACE_DETACH:
+ 		ret = ptrace_detach(child, data);
+ 		break;
+ 
+-	case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+-		unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = put_user(*reg, tmp);
+-			if (ret)
++#ifdef CONFIG_PPC64
++	case PTRACE_GETREGS64:
++#endif
++	case PTRACE_GETREGS: { /* Get all pt_regs from the child. */
++		int ui;
++	  	if (!access_ok(VERIFY_WRITE, (void __user *)data,
++			       sizeof(struct pt_regs))) {
++			ret = -EIO;
+ 				break;
+-			reg++;
+-			tmp++;
+ 		}
+-		break;
++		ret = 0;
++		for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++			ret |= __put_user(ptrace_get_reg(child, ui),
++					  (unsigned long __user *) data);
++			data += sizeof(long);
+ 	}
+-
+-	case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+-		unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = get_user(*reg, tmp);
+-			if (ret)
+ 				break;
+-			reg++;
+-			tmp++;
+ 		}
++
++#ifdef CONFIG_PPC64
++	case PTRACE_SETREGS64:
++#endif
++	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
++		unsigned long tmp;
++		int ui;
++	  	if (!access_ok(VERIFY_READ, (void __user *)data,
++			       sizeof(struct pt_regs))) {
++			ret = -EIO;
+ 		break;
+ 	}
+-
+-	case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+-		unsigned long __user *tmp = (unsigned long __user *)addr;
+-
+-		flush_fp_to_thread(child);
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = put_user(*reg, tmp);
++		ret = 0;
++		for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++			ret = __get_user(tmp, (unsigned long __user *) data);
+ 			if (ret)
+ 				break;
+-			reg++;
+-			tmp++;
++			ptrace_put_reg(child, ui, tmp);
++			data += sizeof(long);
+ 		}
+ 		break;
+ 	}
+ 
+-	case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+-		unsigned long __user *tmp = (unsigned long __user *)addr;
+-
++	case PTRACE_GETFPREGS: { /* Get the child FPU state (FPR0...31 + FPSCR) */
+ 		flush_fp_to_thread(child);
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = get_user(*reg, tmp);
+-			if (ret)
++		ret = get_fpregs((void __user *)data, child, 1);
+ 				break;
+-			reg++;
+-			tmp++;
+ 		}
++
++	case PTRACE_SETFPREGS: { /* Set the child FPU state (FPR0...31 + FPSCR) */
++		flush_fp_to_thread(child);
++		ret = set_fpregs((void __user *)data, child, 1);
+ 		break;
+ 	}
+ 
+@@ -499,11 +603,18 @@
+ 		break;
+ #endif
+ 
++	/* Old reverse args ptrace callss */
++	case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
++	case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
++	case PPC_PTRACE_GETFPREGS: /* Get FPRs 0 - 31. */
++	case PPC_PTRACE_SETFPREGS: /* Get FPRs 0 - 31. */
++		ret = arch_ptrace_old(child, request, addr, data);
++		break;
++
+ 	default:
+ 		ret = ptrace_request(child, request, addr, data);
+ 		break;
+ 	}
+-
+ 	return ret;
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c linux-2.6.22-try2/arch/powerpc/kernel/ptrace32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/ptrace32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/ptrace32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -33,13 +33,55 @@
+ #include <asm/pgtable.h>
+ #include <asm/system.h>
+ 
+-#include "ptrace-common.h"
+-
+ /*
+  * does not yet catch signals sent when the child dies.
+  * in exit.c or in signal.c.
+  */
+ 
++/*
++ * Here are the old "legacy" powerpc specific getregs/setregs ptrace calls,
++ * we mark them as obsolete now, they will be removed in a future version
++ */
++static long compat_ptrace_old(struct task_struct *child, long request,
++			      long addr, long data)
++{
++	int ret = -EPERM;
++
++	switch(request) {
++	case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
++		int i;
++		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++		unsigned int __user *tmp = (unsigned int __user *)addr;
++
++		for (i = 0; i < 32; i++) {
++			ret = put_user(*reg, tmp);
++			if (ret)
++				break;
++			reg++;
++			tmp++;
++		}
++		break;
++	}
++
++	case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
++		int i;
++		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
++		unsigned int __user *tmp = (unsigned int __user *)addr;
++
++		for (i = 0; i < 32; i++) {
++			ret = get_user(*reg, tmp);
++			if (ret)
++				break;
++			reg++;
++			tmp++;
++		}
++		break;
++	}
++
++	}
++	return ret;
++}
++
+ long compat_sys_ptrace(int request, int pid, unsigned long addr,
+ 		       unsigned long data)
+ {
+@@ -123,7 +165,7 @@
+ 			break;
+ 
+ 		if (index < PT_FPR0) {
+-			tmp = get_reg(child, index);
++			tmp = ptrace_get_reg(child, index);
+ 		} else {
+ 			flush_fp_to_thread(child);
+ 			/*
+@@ -162,7 +204,9 @@
+ 		else
+ 			part = 0;  /* want the 1st half of the register (left-most). */
+ 
+-		/* Validate the input - check to see if address is on the wrong boundary or beyond the end of the user area */
++		/* Validate the input - check to see if address is on the wrong boundary
++		 * or beyond the end of the user area
++		 */
+ 		if ((addr & 3) || numReg > PT_FPSCR)
+ 			break;
+ 
+@@ -170,7 +214,7 @@
+ 			flush_fp_to_thread(child);
+ 			tmp = ((unsigned long int *)child->thread.fpr)[numReg - PT_FPR0];
+ 		} else { /* register within PT_REGS struct */
+-			tmp = get_reg(child, numReg);
++			tmp = ptrace_get_reg(child, numReg);
+ 		} 
+ 		reg32bits = ((u32*)&tmp)[part];
+ 		ret = put_user(reg32bits, (u32 __user *)data);
+@@ -226,10 +270,8 @@
+ 		if ((addr & 3) || (index > PT_FPSCR32))
+ 			break;
+ 
+-		if (index == PT_ORIG_R3)
+-			break;
+ 		if (index < PT_FPR0) {
+-			ret = put_reg(child, index, data);
++			ret = ptrace_put_reg(child, index, data);
+ 		} else {
+ 			flush_fp_to_thread(child);
+ 			/*
+@@ -258,70 +300,25 @@
+ 		/* Determine which register the user wants */
+ 		index = (u64)addr >> 2;
+ 		numReg = index / 2;
++
+ 		/*
+ 		 * Validate the input - check to see if address is on the
+ 		 * wrong boundary or beyond the end of the user area
+ 		 */
+ 		if ((addr & 3) || (numReg > PT_FPSCR))
+ 			break;
+-		/* Insure it is a register we let them change */
+-		if ((numReg == PT_ORIG_R3)
+-				|| ((numReg > PT_CCR) && (numReg < PT_FPR0)))
+-			break;
+-		if (numReg >= PT_FPR0) {
+-			flush_fp_to_thread(child);
+-		}
+-		if (numReg == PT_MSR)
+-			data = (data & MSR_DEBUGCHANGE)
+-				| (child->thread.regs->msr & ~MSR_DEBUGCHANGE);
+-		((u32*)child->thread.regs)[index] = data;
+-		ret = 0;
+-		break;
+-	}
+-
+-	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
+-	case PTRACE_CONT: { /* restart after signal. */
+-		ret = -EIO;
+-		if (!valid_signal(data))
+-			break;
+-		if (request == PTRACE_SYSCALL)
+-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
++		if (numReg < PT_FPR0) {
++			unsigned long freg = ptrace_get_reg(child, numReg);
++			if (index % 2)
++				freg = (freg & ~0xfffffffful) | (data & 0xfffffffful);
+ 		else
+-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-		child->exit_code = data;
+-		/* make sure the single step bit is not set. */
+-		clear_single_step(child);
+-		wake_up_process(child);
+-		ret = 0;
+-		break;
+-	}
+-
+-	/*
+-	 * make the child exit.  Best I can do is send it a sigkill.
+-	 * perhaps it should be put in the status that it wants to
+-	 * exit.
+-	 */
+-	case PTRACE_KILL: {
++				freg = (freg & 0xfffffffful) | (data << 32);
++			ret = ptrace_put_reg(child, numReg, freg);
++		} else {
++			flush_fp_to_thread(child);
++			((unsigned int *)child->thread.regs)[index] = data;
+ 		ret = 0;
+-		if (child->exit_state == EXIT_ZOMBIE)	/* already dead */
+-			break;
+-		child->exit_code = SIGKILL;
+-		/* make sure the single step bit is not set. */
+-		clear_single_step(child);
+-		wake_up_process(child);
+-		break;
+ 	}
+-
+-	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
+-		ret = -EIO;
+-		if (!valid_signal(data))
+-			break;
+-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-		set_single_step(child);
+-		child->exit_code = data;
+-		/* give it a chance to run. */
+-		wake_up_process(child);
+-		ret = 0;
+ 		break;
+ 	}
+ 
+@@ -334,95 +331,67 @@
+ 		break;
+ 	}
+ 
+-	case PTRACE_SET_DEBUGREG:
+-		ret = ptrace_set_debugreg(child, addr, data);
+-		break;
+-
+-	case PTRACE_DETACH:
+-		ret = ptrace_detach(child, data);
++	case PTRACE_GETEVENTMSG:
++		ret = put_user(child->ptrace_message, (unsigned int __user *) data);
+ 		break;
+ 
+-	case PPC_PTRACE_GETREGS: { /* Get GPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+-		unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = put_user(*reg, tmp);
+-			if (ret)
+-				break;
+-			reg++;
+-			tmp++;
+-		}
++	case PTRACE_GETREGS: { /* Get all pt_regs from the child. */
++		int ui;
++	  	if (!access_ok(VERIFY_WRITE, (void __user *)data,
++			       PT_REGS_COUNT * sizeof(int))) {
++			ret = -EIO;
+ 		break;
+ 	}
+-
+-	case PPC_PTRACE_SETREGS: { /* Set GPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.regs)[0];
+-		unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = get_user(*reg, tmp);
+-			if (ret)
+-				break;
+-			reg++;
+-			tmp++;
++		ret = 0;
++		for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++			ret |= __put_user(ptrace_get_reg(child, ui),
++					  (unsigned int __user *) data);
++			data += sizeof(int);
+ 		}
+ 		break;
+ 	}
+ 
+-	case PPC_PTRACE_GETFPREGS: { /* Get FPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+-		unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+-		flush_fp_to_thread(child);
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = put_user(*reg, tmp);
+-			if (ret)
+-				break;
+-			reg++;
+-			tmp++;
+-		}
++	case PTRACE_SETREGS: { /* Set all gp regs in the child. */
++		unsigned long tmp;
++		int ui;
++	  	if (!access_ok(VERIFY_READ, (void __user *)data,
++			       PT_REGS_COUNT * sizeof(int))) {
++			ret = -EIO;
+ 		break;
+ 	}
+-
+-	case PPC_PTRACE_SETFPREGS: { /* Get FPRs 0 - 31. */
+-		int i;
+-		unsigned long *reg = &((unsigned long *)child->thread.fpr)[0];
+-		unsigned int __user *tmp = (unsigned int __user *)addr;
+-
+-		flush_fp_to_thread(child);
+-
+-		for (i = 0; i < 32; i++) {
+-			ret = get_user(*reg, tmp);
++		ret = 0;
++		for (ui = 0; ui < PT_REGS_COUNT; ui ++) {
++			ret = __get_user(tmp, (unsigned int __user *) data);
+ 			if (ret)
+ 				break;
+-			reg++;
+-			tmp++;
++			ptrace_put_reg(child, ui, tmp);
++			data += sizeof(int);
+ 		}
+ 		break;
+ 	}
+ 
+-	case PTRACE_GETEVENTMSG:
+-		ret = put_user(child->ptrace_message, (unsigned int __user *) data);
+-		break;
+-
+-#ifdef CONFIG_ALTIVEC
++	case PTRACE_GETFPREGS:
++	case PTRACE_SETFPREGS:
+ 	case PTRACE_GETVRREGS:
+-		/* Get the child altivec register state. */
+-		flush_altivec_to_thread(child);
+-		ret = get_vrregs((unsigned long __user *)data, child);
++	case PTRACE_SETVRREGS:
++	case PTRACE_GETREGS64:
++	case PTRACE_SETREGS64:
++	case PPC_PTRACE_GETFPREGS:
++	case PPC_PTRACE_SETFPREGS:
++	case PTRACE_KILL:
++	case PTRACE_SINGLESTEP:
++	case PTRACE_DETACH:
++	case PTRACE_SET_DEBUGREG:
++	case PTRACE_SYSCALL:
++	case PTRACE_CONT:
++		ret = arch_ptrace(child, request, addr, data);
+ 		break;
+ 
+-	case PTRACE_SETVRREGS:
+-		/* Set the child altivec register state. */
+-		flush_altivec_to_thread(child);
+-		ret = set_vrregs(child, (unsigned long __user *)data);
++	/* Old reverse args ptrace callss */
++	case PPC_PTRACE_GETREGS: /* Get GPRs 0 - 31. */
++	case PPC_PTRACE_SETREGS: /* Set GPRs 0 - 31. */
++		ret = compat_ptrace_old(child, request, addr, data);
+ 		break;
+-#endif
+ 
+ 	default:
+ 		ret = ptrace_request(child, request, addr, data);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c linux-2.6.22-try2/arch/powerpc/kernel/rtas_pci.c
+--- linux-2.6.22-570/arch/powerpc/kernel/rtas_pci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/rtas_pci.c	2007-12-19 15:29:22.000000000 -0500
+@@ -278,10 +278,8 @@
+ {
+ 	struct device_node *node;
+ 	struct pci_controller *phb;
+-	unsigned int index;
+ 	struct device_node *root = of_find_node_by_path("/");
+ 
+-	index = 0;
+ 	for (node = of_get_next_child(root, NULL);
+ 	     node != NULL;
+ 	     node = of_get_next_child(root, node)) {
+@@ -295,8 +293,7 @@
+ 			continue;
+ 		rtas_setup_phb(phb);
+ 		pci_process_bridge_OF_ranges(phb, node, 0);
+-		pci_setup_phb_io(phb, index == 0);
+-		index++;
++		isa_bridge_find_early(phb);
+ 	}
+ 
+ 	of_node_put(root);
+@@ -335,7 +332,7 @@
+ 		return 1;
+ 	}
+ 
+-	rc = unmap_bus_range(b);
++	rc = pcibios_unmap_io_space(b);
+ 	if (rc) {
+ 		printk(KERN_ERR "%s: failed to unmap IO on bus %s\n",
+ 			__FUNCTION__, b->name);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/setup_32.c linux-2.6.22-try2/arch/powerpc/kernel/setup_32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/setup_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/setup_32.c	2007-12-19 15:29:24.000000000 -0500
+@@ -45,10 +45,6 @@
+ 
+ #define DBG(fmt...)
+ 
+-#if defined CONFIG_KGDB
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void bootx_init(unsigned long r4, unsigned long phys);
+ 
+ struct ide_machdep_calls ppc_ide_md;
+@@ -245,30 +241,16 @@
+ 
+ 	xmon_setup();
+ 
+-#if defined(CONFIG_KGDB)
+-	if (ppc_md.kgdb_map_scc)
+-		ppc_md.kgdb_map_scc();
+-	set_debug_traps();
+-	if (strstr(cmd_line, "gdb")) {
+-		if (ppc_md.progress)
+-			ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
+-		printk("kgdb breakpoint activated\n");
+-		breakpoint();
+-	}
+-#endif
+-
+ 	/*
+ 	 * Set cache line size based on type of cpu as a default.
+ 	 * Systems with OF can look in the properties on the cpu node(s)
+ 	 * for a possibly more accurate value.
+ 	 */
+-	if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
+ 		dcache_bsize = cur_cpu_spec->dcache_bsize;
+ 		icache_bsize = cur_cpu_spec->icache_bsize;
+ 		ucache_bsize = 0;
+-	} else
+-		ucache_bsize = dcache_bsize = icache_bsize
+-			= cur_cpu_spec->dcache_bsize;
++	if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
++		ucache_bsize = icache_bsize = dcache_bsize;
+ 
+ 	/* reboot on panic */
+ 	panic_timeout = 180;
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.c linux-2.6.22-try2/arch/powerpc/kernel/signal.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/signal.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,180 @@
++/*
++ * Common signal handling code for both 32 and 64 bits
++ *
++ *    Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
++ *    Extracted from signal_32.c and signal_64.c
++ *
++ * This file is subject to the terms and conditions of the GNU General
++ * Public License.  See the file README.legal in the main directory of
++ * this archive for more details.
++ */
++
++#include <linux/ptrace.h>
++#include <linux/signal.h>
++#include <asm/uaccess.h>
++#include <asm/unistd.h>
++
++#include "signal.h"
++
++/*
++ * Allocate space for the signal frame
++ */
++void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
++			   size_t frame_size)
++{
++        unsigned long oldsp, newsp;
++
++        /* Default to using normal stack */
++        oldsp = regs->gpr[1];
++
++	/* Check for alt stack */
++	if ((ka->sa.sa_flags & SA_ONSTACK) &&
++	    current->sas_ss_size && !on_sig_stack(oldsp))
++		oldsp = (current->sas_ss_sp + current->sas_ss_size);
++
++	/* Get aligned frame */
++	newsp = (oldsp - frame_size) & ~0xFUL;
++
++	/* Check access */
++	if (!access_ok(VERIFY_WRITE, (void __user *)newsp, oldsp - newsp))
++		return NULL;
++
++        return (void __user *)newsp;
++}
++
++
++/*
++ * Restore the user process's signal mask
++ */
++void restore_sigmask(sigset_t *set)
++{
++	sigdelsetmask(set, ~_BLOCKABLE);
++	spin_lock_irq(&current->sighand->siglock);
++	current->blocked = *set;
++	recalc_sigpending();
++	spin_unlock_irq(&current->sighand->siglock);
++}
++
++static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
++				  int has_handler)
++{
++	unsigned long ret = regs->gpr[3];
++	int restart = 1;
++
++	/* syscall ? */
++	if (TRAP(regs) != 0x0C00)
++		return;
++
++	/* error signalled ? */
++	if (!(regs->ccr & 0x10000000))
++		return;
++
++	switch (ret) {
++	case ERESTART_RESTARTBLOCK:
++	case ERESTARTNOHAND:
++		/* ERESTARTNOHAND means that the syscall should only be
++		 * restarted if there was no handler for the signal, and since
++		 * we only get here if there is a handler, we dont restart.
++		 */
++		restart = !has_handler;
++		break;
++	case ERESTARTSYS:
++		/* ERESTARTSYS means to restart the syscall if there is no
++		 * handler or the handler was registered with SA_RESTART
++		 */
++		restart = !has_handler || (ka->sa.sa_flags & SA_RESTART) != 0;
++		break;
++	case ERESTARTNOINTR:
++		/* ERESTARTNOINTR means that the syscall should be
++		 * called again after the signal handler returns.
++		 */
++		break;
++	default:
++		return;
++	}
++	if (restart) {
++		if (ret == ERESTART_RESTARTBLOCK)
++			regs->gpr[0] = __NR_restart_syscall;
++		else
++			regs->gpr[3] = regs->orig_gpr3;
++		regs->nip -= 4;
++		regs->result = 0;
++	} else {
++		regs->result = -EINTR;
++		regs->gpr[3] = EINTR;
++		regs->ccr |= 0x10000000;
++	}
++}
++
++int do_signal(sigset_t *oldset, struct pt_regs *regs)
++{
++	siginfo_t info;
++	int signr;
++	struct k_sigaction ka;
++	int ret;
++	int is32 = is_32bit_task();
++
++	if (test_thread_flag(TIF_RESTORE_SIGMASK))
++		oldset = &current->saved_sigmask;
++	else if (!oldset)
++		oldset = &current->blocked;
++
++	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
++
++	/* Is there any syscall restart business here ? */
++	check_syscall_restart(regs, &ka, signr > 0);
++
++	if (signr <= 0) {
++		/* No signal to deliver -- put the saved sigmask back */
++		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
++			clear_thread_flag(TIF_RESTORE_SIGMASK);
++			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
++		}
++		return 0;               /* no signals delivered */
++	}
++
++        /*
++	 * Reenable the DABR before delivering the signal to
++	 * user space. The DABR will have been cleared if it
++	 * triggered inside the kernel.
++	 */
++	if (current->thread.dabr)
++		set_dabr(current->thread.dabr);
++
++	if (is32) {
++        	if (ka.sa.sa_flags & SA_SIGINFO)
++			ret = handle_rt_signal32(signr, &ka, &info, oldset,
++					regs);
++		else
++			ret = handle_signal32(signr, &ka, &info, oldset,
++					regs);
++	} else {
++		ret = handle_rt_signal64(signr, &ka, &info, oldset, regs);
++	}
++
++	if (ret) {
++		spin_lock_irq(&current->sighand->siglock);
++		sigorsets(&current->blocked, &current->blocked,
++			  &ka.sa.sa_mask);
++		if (!(ka.sa.sa_flags & SA_NODEFER))
++			sigaddset(&current->blocked, signr);
++		recalc_sigpending();
++		spin_unlock_irq(&current->sighand->siglock);
++
++		/*
++		 * A signal was successfully delivered; the saved sigmask is in
++		 * its frame, and we can clear the TIF_RESTORE_SIGMASK flag.
++		 */
++		if (test_thread_flag(TIF_RESTORE_SIGMASK))
++			clear_thread_flag(TIF_RESTORE_SIGMASK);
++	}
++
++	return ret;
++}
++
++long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
++		unsigned long r5, unsigned long r6, unsigned long r7,
++		unsigned long r8, struct pt_regs *regs)
++{
++	return do_sigaltstack(uss, uoss, regs->gpr[1]);
++}
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal.h linux-2.6.22-try2/arch/powerpc/kernel/signal.h
+--- linux-2.6.22-570/arch/powerpc/kernel/signal.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/signal.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,55 @@
++/*
++ *    Copyright (c) 2007 Benjamin Herrenschmidt, IBM Coproration
++ *    Extracted from signal_32.c and signal_64.c
++ *
++ * This file is subject to the terms and conditions of the GNU General
++ * Public License.  See the file README.legal in the main directory of
++ * this archive for more details.
++ */
++
++#ifndef _POWERPC_ARCH_SIGNAL_H
++#define _POWERPC_ARCH_SIGNAL_H
++
++#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++
++extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
++				  size_t frame_size);
++extern void restore_sigmask(sigset_t *set);
++
++extern int handle_signal32(unsigned long sig, struct k_sigaction *ka,
++			   siginfo_t *info, sigset_t *oldset,
++			   struct pt_regs *regs);
++
++extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
++			      siginfo_t *info, sigset_t *oldset,
++			      struct pt_regs *regs);
++
++
++#ifdef CONFIG_PPC64
++
++static inline int is_32bit_task(void)
++{
++	return test_thread_flag(TIF_32BIT);
++}
++
++extern int handle_rt_signal64(int signr, struct k_sigaction *ka,
++			      siginfo_t *info, sigset_t *set,
++			      struct pt_regs *regs);
++
++#else /* CONFIG_PPC64 */
++
++static inline int is_32bit_task(void)
++{
++	return 1;
++}
++
++static inline int handle_rt_signal64(int signr, struct k_sigaction *ka,
++				     siginfo_t *info, sigset_t *set,
++				     struct pt_regs *regs)
++{
++	return -EFAULT;
++}
++
++#endif /* !defined(CONFIG_PPC64) */
++
++#endif  /* _POWERPC_ARCH_SIGNAL_H */
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_32.c linux-2.6.22-try2/arch/powerpc/kernel/signal_32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/signal_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -51,12 +51,11 @@
+ #include <asm/pgtable.h>
+ #endif
+ 
+-#undef DEBUG_SIG
++#include "signal.h"
+ 
+-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++#undef DEBUG_SIG
+ 
+ #ifdef CONFIG_PPC64
+-#define do_signal	do_signal32
+ #define sys_sigsuspend	compat_sys_sigsuspend
+ #define sys_rt_sigsuspend	compat_sys_rt_sigsuspend
+ #define sys_rt_sigreturn	compat_sys_rt_sigreturn
+@@ -231,8 +230,6 @@
+ 
+ #endif /* CONFIG_PPC64 */
+ 
+-int do_signal(sigset_t *oldset, struct pt_regs *regs);
+-
+ /*
+  * Atomically swap in the new signal mask, and wait for a signal.
+  */
+@@ -251,14 +248,6 @@
+  	return -ERESTARTNOHAND;
+ }
+ 
+-#ifdef CONFIG_PPC32
+-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, int r5,
+-		int r6, int r7, int r8, struct pt_regs *regs)
+-{
+-	return do_sigaltstack(uss, uoss, regs->gpr[1]);
+-}
+-#endif
+-
+ long sys_sigaction(int sig, struct old_sigaction __user *act,
+ 		struct old_sigaction __user *oact)
+ {
+@@ -293,14 +282,17 @@
+ /*
+  * When we have signals to deliver, we set up on the
+  * user stack, going down from the original stack pointer:
+- *	a sigregs struct
++ *	an ABI gap of 56 words
++ *	an mcontext struct
+  *	a sigcontext struct
+  *	a gap of __SIGNAL_FRAMESIZE bytes
+  *
+- * Each of these things must be a multiple of 16 bytes in size.
++ * Each of these things must be a multiple of 16 bytes in size. The following
++ * structure represent all of this except the __SIGNAL_FRAMESIZE gap
+  *
+  */
+-struct sigregs {
++struct sigframe {
++	struct sigcontext sctx;		/* the sigcontext */
+ 	struct mcontext	mctx;		/* all the register values */
+ 	/*
+ 	 * Programs using the rs6000/xcoff abi can save up to 19 gp
+@@ -703,44 +695,22 @@
+ }
+ #endif /* CONFIG_PPC64 */
+ 
+-
+-/*
+- * Restore the user process's signal mask
+- */
+-#ifdef CONFIG_PPC64
+-extern void restore_sigmask(sigset_t *set);
+-#else /* CONFIG_PPC64 */
+-static void restore_sigmask(sigset_t *set)
+-{
+-	sigdelsetmask(set, ~_BLOCKABLE);
+-	spin_lock_irq(&current->sighand->siglock);
+-	current->blocked = *set;
+-	recalc_sigpending();
+-	spin_unlock_irq(&current->sighand->siglock);
+-}
+-#endif
+-
+ /*
+  * Set up a signal frame for a "real-time" signal handler
+  * (one which gets siginfo).
+  */
+-static int handle_rt_signal(unsigned long sig, struct k_sigaction *ka,
++int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
+ 		siginfo_t *info, sigset_t *oldset,
+-		struct pt_regs *regs, unsigned long newsp)
++		struct pt_regs *regs)
+ {
+ 	struct rt_sigframe __user *rt_sf;
+ 	struct mcontext __user *frame;
+-	unsigned long origsp = newsp;
++	unsigned long newsp = 0;
+ 
+ 	/* Set up Signal Frame */
+ 	/* Put a Real Time Context onto stack */
+-	newsp -= sizeof(*rt_sf);
+-	rt_sf = (struct rt_sigframe __user *)newsp;
+-
+-	/* create a stack frame for the caller of the handler */
+-	newsp -= __SIGNAL_FRAMESIZE + 16;
+-
+-	if (!access_ok(VERIFY_WRITE, (void __user *)newsp, origsp - newsp))
++	rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf));
++	if (unlikely(rt_sf == NULL))
+ 		goto badframe;
+ 
+ 	/* Put the siginfo & fill in most of the ucontext */
+@@ -770,8 +740,12 @@
+ 
+ 	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+ 
++	/* create a stack frame for the caller of the handler */
++	newsp = ((unsigned long)rt_sf) - (__SIGNAL_FRAMESIZE + 16);
+ 	if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ 		goto badframe;
++
++	/* Fill registers for signal handler */
+ 	regs->gpr[1] = newsp;
+ 	regs->gpr[3] = sig;
+ 	regs->gpr[4] = (unsigned long) &rt_sf->info;
+@@ -1015,27 +989,18 @@
+ /*
+  * OK, we're invoking a handler
+  */
+-static int handle_signal(unsigned long sig, struct k_sigaction *ka,
+-		siginfo_t *info, sigset_t *oldset, struct pt_regs *regs,
+-		unsigned long newsp)
++int handle_signal32(unsigned long sig, struct k_sigaction *ka,
++		    siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+ {
+ 	struct sigcontext __user *sc;
+-	struct sigregs __user *frame;
+-	unsigned long origsp = newsp;
++	struct sigframe __user *frame;
++	unsigned long newsp = 0;
+ 
+ 	/* Set up Signal Frame */
+-	newsp -= sizeof(struct sigregs);
+-	frame = (struct sigregs __user *) newsp;
+-
+-	/* Put a sigcontext on the stack */
+-	newsp -= sizeof(*sc);
+-	sc = (struct sigcontext __user *) newsp;
+-
+-	/* create a stack frame for the caller of the handler */
+-	newsp -= __SIGNAL_FRAMESIZE;
+-
+-	if (!access_ok(VERIFY_WRITE, (void __user *) newsp, origsp - newsp))
++	frame = get_sigframe(ka, regs, sizeof(*frame));
++	if (unlikely(frame == NULL))
+ 		goto badframe;
++	sc = (struct sigcontext __user *) &frame->sctx;
+ 
+ #if _NSIG != 64
+ #error "Please adjust handle_signal()"
+@@ -1047,7 +1012,7 @@
+ #else
+ 	    || __put_user(oldset->sig[1], &sc->_unused[3])
+ #endif
+-	    || __put_user(to_user_ptr(frame), &sc->regs)
++	    || __put_user(to_user_ptr(&frame->mctx), &sc->regs)
+ 	    || __put_user(sig, &sc->signal))
+ 		goto badframe;
+ 
+@@ -1063,8 +1028,11 @@
+ 
+ 	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
+ 
++	/* create a stack frame for the caller of the handler */
++	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ 	if (put_user(regs->gpr[1], (u32 __user *)newsp))
+ 		goto badframe;
++
+ 	regs->gpr[1] = newsp;
+ 	regs->gpr[3] = sig;
+ 	regs->gpr[4] = (unsigned long) sc;
+@@ -1126,106 +1094,3 @@
+ 	force_sig(SIGSEGV, current);
+ 	return 0;
+ }
+-
+-/*
+- * Note that 'init' is a special process: it doesn't get signals it doesn't
+- * want to handle. Thus you cannot kill init even with a SIGKILL even by
+- * mistake.
+- */
+-int do_signal(sigset_t *oldset, struct pt_regs *regs)
+-{
+-	siginfo_t info;
+-	struct k_sigaction ka;
+-	unsigned int newsp;
+-	int signr, ret;
+-
+-#ifdef CONFIG_PPC32
+-	if (try_to_freeze()) {
+-		signr = 0;
+-		if (!signal_pending(current))
+-			goto no_signal;
+-	}
+-#endif
+-
+-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+-		oldset = &current->saved_sigmask;
+-	else if (!oldset)
+-		oldset = &current->blocked;
+-
+-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+-#ifdef CONFIG_PPC32
+-no_signal:
+-#endif
+-	if (TRAP(regs) == 0x0C00		/* System Call! */
+-	    && regs->ccr & 0x10000000		/* error signalled */
+-	    && ((ret = regs->gpr[3]) == ERESTARTSYS
+-		|| ret == ERESTARTNOHAND || ret == ERESTARTNOINTR
+-		|| ret == ERESTART_RESTARTBLOCK)) {
+-
+-		if (signr > 0
+-		    && (ret == ERESTARTNOHAND || ret == ERESTART_RESTARTBLOCK
+-			|| (ret == ERESTARTSYS
+-			    && !(ka.sa.sa_flags & SA_RESTART)))) {
+-			/* make the system call return an EINTR error */
+-			regs->result = -EINTR;
+-			regs->gpr[3] = EINTR;
+-			/* note that the cr0.SO bit is already set */
+-		} else {
+-			regs->nip -= 4;	/* Back up & retry system call */
+-			regs->result = 0;
+-			regs->trap = 0;
+-			if (ret == ERESTART_RESTARTBLOCK)
+-				regs->gpr[0] = __NR_restart_syscall;
+-			else
+-				regs->gpr[3] = regs->orig_gpr3;
+-		}
+-	}
+-
+-	if (signr == 0) {
+-		/* No signal to deliver -- put the saved sigmask back */
+-		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+-			clear_thread_flag(TIF_RESTORE_SIGMASK);
+-			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+-		}
+-		return 0;		/* no signals delivered */
+-	}
+-
+-	if ((ka.sa.sa_flags & SA_ONSTACK) && current->sas_ss_size
+-	    && !on_sig_stack(regs->gpr[1]))
+-		newsp = current->sas_ss_sp + current->sas_ss_size;
+-	else
+-		newsp = regs->gpr[1];
+-	newsp &= ~0xfUL;
+-
+-#ifdef CONFIG_PPC64
+-	/*
+-	 * Reenable the DABR before delivering the signal to
+-	 * user space. The DABR will have been cleared if it
+-	 * triggered inside the kernel.
+-	 */
+-	if (current->thread.dabr)
+-		set_dabr(current->thread.dabr);
+-#endif
+-
+-	/* Whee!  Actually deliver the signal.  */
+-	if (ka.sa.sa_flags & SA_SIGINFO)
+-		ret = handle_rt_signal(signr, &ka, &info, oldset, regs, newsp);
+-	else
+-		ret = handle_signal(signr, &ka, &info, oldset, regs, newsp);
+-
+-	if (ret) {
+-		spin_lock_irq(&current->sighand->siglock);
+-		sigorsets(&current->blocked, &current->blocked,
+-			  &ka.sa.sa_mask);
+-		if (!(ka.sa.sa_flags & SA_NODEFER))
+-			sigaddset(&current->blocked, signr);
+-		recalc_sigpending();
+-		spin_unlock_irq(&current->sighand->siglock);
+-		/* A signal was successfully delivered; the saved sigmask is in
+-		   its frame, and we can clear the TIF_RESTORE_SIGMASK flag */
+-		if (test_thread_flag(TIF_RESTORE_SIGMASK))
+-			clear_thread_flag(TIF_RESTORE_SIGMASK);
+-	}
+-
+-	return ret;
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/signal_64.c linux-2.6.22-try2/arch/powerpc/kernel/signal_64.c
+--- linux-2.6.22-570/arch/powerpc/kernel/signal_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/kernel/signal_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -34,9 +34,9 @@
+ #include <asm/syscalls.h>
+ #include <asm/vdso.h>
+ 
+-#define DEBUG_SIG 0
++#include "signal.h"
+ 
+-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
++#define DEBUG_SIG 0
+ 
+ #define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
+ #define FP_REGS_SIZE	sizeof(elf_fpregset_t)
+@@ -64,14 +64,6 @@
+ 	char abigap[288];
+ } __attribute__ ((aligned (16)));
+ 
+-long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r5,
+-		     unsigned long r6, unsigned long r7, unsigned long r8,
+-		     struct pt_regs *regs)
+-{
+-	return do_sigaltstack(uss, uoss, regs->gpr[1]);
+-}
+-
+-
+ /*
+  * Set up the sigcontext for the signal frame.
+  */
+@@ -208,25 +200,6 @@
+ }
+ 
+ /*
+- * Allocate space for the signal frame
+- */
+-static inline void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
+-				  size_t frame_size)
+-{
+-        unsigned long newsp;
+-
+-        /* Default to using normal stack */
+-        newsp = regs->gpr[1];
+-
+-	if ((ka->sa.sa_flags & SA_ONSTACK) && current->sas_ss_size) {
+-		if (! on_sig_stack(regs->gpr[1]))
+-			newsp = (current->sas_ss_sp + current->sas_ss_size);
+-	}
+-
+-        return (void __user *)((newsp - frame_size) & -16ul);
+-}
+-
+-/*
+  * Setup the trampoline code on the stack
+  */
+ static long setup_trampoline(unsigned int syscall, unsigned int __user *tramp)
+@@ -253,19 +226,6 @@
+ }
+ 
+ /*
+- * Restore the user process's signal mask (also used by signal32.c)
+- */
+-void restore_sigmask(sigset_t *set)
+-{
+-	sigdelsetmask(set, ~_BLOCKABLE);
+-	spin_lock_irq(&current->sighand->siglock);
+-	current->blocked = *set;
+-	recalc_sigpending();
+-	spin_unlock_irq(&current->sighand->siglock);
+-}
+-
+-
+-/*
+  * Handle {get,set,swap}_context operations
+  */
+ int sys_swapcontext(struct ucontext __user *old_ctx,
+@@ -359,7 +319,7 @@
+ 	return 0;
+ }
+ 
+-static int setup_rt_frame(int signr, struct k_sigaction *ka, siginfo_t *info,
++int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
+ 		sigset_t *set, struct pt_regs *regs)
+ {
+ 	/* Handler is *really* a pointer to the function descriptor for
+@@ -373,8 +333,7 @@
+ 	long err = 0;
+ 
+ 	frame = get_sigframe(ka, regs, sizeof(*frame));
+-
+-	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
++	if (unlikely(frame == NULL))
+ 		goto badframe;
+ 
+ 	err |= __put_user(&frame->info, &frame->pinfo);
+@@ -411,7 +370,7 @@
+ 	funct_desc_ptr = (func_descr_t __user *) ka->sa.sa_handler;
+ 
+ 	/* Allocate a dummy caller frame for the signal handler. */
+-	newsp = (unsigned long)frame - __SIGNAL_FRAMESIZE;
++	newsp = ((unsigned long)frame) - __SIGNAL_FRAMESIZE;
+ 	err |= put_user(regs->gpr[1], (unsigned long __user *)newsp);
+ 
+ 	/* Set up "regs" so we "return" to the signal handler. */
+@@ -442,134 +401,3 @@
+ 	force_sigsegv(signr, current);
+ 	return 0;
+ }
+-
+-
+-/*
+- * OK, we're invoking a handler
+- */
+-static int handle_signal(unsigned long sig, struct k_sigaction *ka,
+-			 siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+-{
+-	int ret;
+-
+-	/* Set up Signal Frame */
+-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+-
+-	if (ret) {
+-		spin_lock_irq(&current->sighand->siglock);
+-		sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
+-		if (!(ka->sa.sa_flags & SA_NODEFER))
+-			sigaddset(&current->blocked,sig);
+-		recalc_sigpending();
+-		spin_unlock_irq(&current->sighand->siglock);
+-	}
+-
+-	return ret;
+-}
+-
+-static inline void syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
+-{
+-	switch ((int)regs->result) {
+-	case -ERESTART_RESTARTBLOCK:
+-	case -ERESTARTNOHAND:
+-		/* ERESTARTNOHAND means that the syscall should only be
+-		 * restarted if there was no handler for the signal, and since
+-		 * we only get here if there is a handler, we dont restart.
+-		 */
+-		regs->result = -EINTR;
+-		regs->gpr[3] = EINTR;
+-		regs->ccr |= 0x10000000;
+-		break;
+-	case -ERESTARTSYS:
+-		/* ERESTARTSYS means to restart the syscall if there is no
+-		 * handler or the handler was registered with SA_RESTART
+-		 */
+-		if (!(ka->sa.sa_flags & SA_RESTART)) {
+-			regs->result = -EINTR;
+-			regs->gpr[3] = EINTR;
+-			regs->ccr |= 0x10000000;
+-			break;
+-		}
+-		/* fallthrough */
+-	case -ERESTARTNOINTR:
+-		/* ERESTARTNOINTR means that the syscall should be
+-		 * called again after the signal handler returns.
+-		 */
+-		regs->gpr[3] = regs->orig_gpr3;
+-		regs->nip -= 4;
+-		regs->result = 0;
+-		break;
+-	}
+-}
+-
+-/*
+- * Note that 'init' is a special process: it doesn't get signals it doesn't
+- * want to handle. Thus you cannot kill init even with a SIGKILL even by
+- * mistake.
+- */
+-int do_signal(sigset_t *oldset, struct pt_regs *regs)
+-{
+-	siginfo_t info;
+-	int signr;
+-	struct k_sigaction ka;
+-
+-	/*
+-	 * If the current thread is 32 bit - invoke the
+-	 * 32 bit signal handling code
+-	 */
+-	if (test_thread_flag(TIF_32BIT))
+-		return do_signal32(oldset, regs);
+-
+-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
+-		oldset = &current->saved_sigmask;
+-	else if (!oldset)
+-		oldset = &current->blocked;
+-
+-	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+-	if (signr > 0) {
+-		int ret;
+-
+-		/* Whee!  Actually deliver the signal.  */
+-		if (TRAP(regs) == 0x0C00)
+-			syscall_restart(regs, &ka);
+-
+-		/*
+-		 * Reenable the DABR before delivering the signal to
+-		 * user space. The DABR will have been cleared if it
+-		 * triggered inside the kernel.
+-		 */
+-		if (current->thread.dabr)
+-			set_dabr(current->thread.dabr);
+-
+-		ret = handle_signal(signr, &ka, &info, oldset, regs);
+-
+-		/* If a signal was successfully delivered, the saved sigmask is in
+-		   its frame, and we can clear the TIF_RESTORE_SIGMASK flag */
+-		if (ret && test_thread_flag(TIF_RESTORE_SIGMASK))
+-			clear_thread_flag(TIF_RESTORE_SIGMASK);
+-
+-		return ret;
+-	}
+-
+-	if (TRAP(regs) == 0x0C00) {	/* System Call! */
+-		if ((int)regs->result == -ERESTARTNOHAND ||
+-		    (int)regs->result == -ERESTARTSYS ||
+-		    (int)regs->result == -ERESTARTNOINTR) {
+-			regs->gpr[3] = regs->orig_gpr3;
+-			regs->nip -= 4; /* Back up & retry system call */
+-			regs->result = 0;
+-		} else if ((int)regs->result == -ERESTART_RESTARTBLOCK) {
+-			regs->gpr[0] = __NR_restart_syscall;
+-			regs->nip -= 4;
+-			regs->result = 0;
+-		}
+-	}
+-	/* No signal to deliver -- put the saved sigmask back */
+-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
+-		clear_thread_flag(TIF_RESTORE_SIGMASK);
+-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
+-	}
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL(do_signal);
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c linux-2.6.22-try2/arch/powerpc/kernel/sys_ppc32.c
+--- linux-2.6.22-570/arch/powerpc/kernel/sys_ppc32.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/sys_ppc32.c	2007-12-19 15:29:24.000000000 -0500
+@@ -773,6 +773,13 @@
+ 	return sys_truncate(path, (high << 32) | low);
+ }
+ 
++asmlinkage long compat_sys_fallocate(int fd, int mode, u32 offhi, u32 offlo,
++				     u32 lenhi, u32 lenlo)
++{
++	return sys_fallocate(fd, mode, ((loff_t)offhi << 32) | offlo,
++			     ((loff_t)lenhi << 32) | lenlo);
++}
++
+ asmlinkage int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long high,
+ 				 unsigned long low)
+ {
+diff -Nurb linux-2.6.22-570/arch/powerpc/kernel/vdso.c linux-2.6.22-try2/arch/powerpc/kernel/vdso.c
+--- linux-2.6.22-570/arch/powerpc/kernel/vdso.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/kernel/vdso.c	2007-12-19 15:29:22.000000000 -0500
+@@ -671,7 +671,7 @@
+ 	/*
+ 	 * Fill up the "systemcfg" stuff for backward compatiblity
+ 	 */
+-	strcpy(vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
++	strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
+ 	vdso_data->version.major = SYSTEMCFG_MAJOR;
+ 	vdso_data->version.minor = SYSTEMCFG_MINOR;
+ 	vdso_data->processor = mfspr(SPRN_PVR);
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c linux-2.6.22-try2/arch/powerpc/mm/44x_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/44x_mmu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/44x_mmu.c	2007-12-19 15:29:22.000000000 -0500
+@@ -12,7 +12,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c linux-2.6.22-try2/arch/powerpc/mm/4xx_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/4xx_mmu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/4xx_mmu.c	2007-12-19 15:29:22.000000000 -0500
+@@ -9,7 +9,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/Makefile linux-2.6.22-try2/arch/powerpc/mm/Makefile
+--- linux-2.6.22-570/arch/powerpc/mm/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -11,8 +11,7 @@
+ hash-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
+ obj-$(CONFIG_PPC64)		+= init_64.o pgtable_64.o mmu_context_64.o \
+ 				   hash_utils_64.o hash_low_64.o tlb_64.o \
+-				   slb_low.o slb.o stab.o mmap.o imalloc.o \
+-				   $(hash-y)
++				   slb_low.o slb.o stab.o mmap.o $(hash-y)
+ obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o tlb_32.o
+ obj-$(CONFIG_40x)		+= 4xx_mmu.o
+ obj-$(CONFIG_44x)		+= 44x_mmu.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fault.c linux-2.6.22-try2/arch/powerpc/mm/fault.c
+--- linux-2.6.22-570/arch/powerpc/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/mm/fault.c	2007-12-19 15:29:24.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/kdebug.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -381,7 +382,7 @@
+ 	printk("VM: killing process %s(%d:#%u)\n",
+ 		current->comm, current->pid, current->xid);
+ 	if (user_mode(regs))
+-		do_exit(SIGKILL);
++		do_group_exit(SIGKILL);
+ 	return SIGKILL;
+ 
+ do_sigbus:
+@@ -412,6 +413,13 @@
+ 		return;
+ 	}
+ 
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault)
++		/* Restore our previous state. */
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Not reached. */
++#endif
++
+ 	/* kernel has accessed a bad area */
+ 
+ 	switch (regs->trap) {
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c linux-2.6.22-try2/arch/powerpc/mm/fsl_booke_mmu.c
+--- linux-2.6.22-570/arch/powerpc/mm/fsl_booke_mmu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/fsl_booke_mmu.c	2007-12-19 15:29:22.000000000 -0500
+@@ -14,7 +14,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c linux-2.6.22-try2/arch/powerpc/mm/hash_native_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/hash_native_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/hash_native_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -104,7 +104,7 @@
+ 		spin_unlock(&native_tlbie_lock);
+ }
+ 
+-static inline void native_lock_hpte(hpte_t *hptep)
++static inline void native_lock_hpte(struct hash_pte *hptep)
+ {
+ 	unsigned long *word = &hptep->v;
+ 
+@@ -116,7 +116,7 @@
+ 	}
+ }
+ 
+-static inline void native_unlock_hpte(hpte_t *hptep)
++static inline void native_unlock_hpte(struct hash_pte *hptep)
+ {
+ 	unsigned long *word = &hptep->v;
+ 
+@@ -128,7 +128,7 @@
+ 			unsigned long pa, unsigned long rflags,
+ 			unsigned long vflags, int psize)
+ {
+-	hpte_t *hptep = htab_address + hpte_group;
++	struct hash_pte *hptep = htab_address + hpte_group;
+ 	unsigned long hpte_v, hpte_r;
+ 	int i;
+ 
+@@ -177,7 +177,7 @@
+ 
+ static long native_hpte_remove(unsigned long hpte_group)
+ {
+-	hpte_t *hptep;
++	struct hash_pte *hptep;
+ 	int i;
+ 	int slot_offset;
+ 	unsigned long hpte_v;
+@@ -217,7 +217,7 @@
+ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ 				 unsigned long va, int psize, int local)
+ {
+-	hpte_t *hptep = htab_address + slot;
++	struct hash_pte *hptep = htab_address + slot;
+ 	unsigned long hpte_v, want_v;
+ 	int ret = 0;
+ 
+@@ -233,15 +233,14 @@
+ 	/* Even if we miss, we need to invalidate the TLB */
+ 	if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
+ 		DBG_LOW(" -> miss\n");
+-		native_unlock_hpte(hptep);
+ 		ret = -1;
+ 	} else {
+ 		DBG_LOW(" -> hit\n");
+ 		/* Update the HPTE */
+ 		hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
+ 			(newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C));
+-		native_unlock_hpte(hptep);
+ 	}
++	native_unlock_hpte(hptep);
+ 
+ 	/* Ensure it is out of the tlb too. */
+ 	tlbie(va, psize, local);
+@@ -251,7 +250,7 @@
+ 
+ static long native_hpte_find(unsigned long va, int psize)
+ {
+-	hpte_t *hptep;
++	struct hash_pte *hptep;
+ 	unsigned long hash;
+ 	unsigned long i, j;
+ 	long slot;
+@@ -294,7 +293,7 @@
+ {
+ 	unsigned long vsid, va;
+ 	long slot;
+-	hpte_t *hptep;
++	struct hash_pte *hptep;
+ 
+ 	vsid = get_kernel_vsid(ea);
+ 	va = (vsid << 28) | (ea & 0x0fffffff);
+@@ -315,7 +314,7 @@
+ static void native_hpte_invalidate(unsigned long slot, unsigned long va,
+ 				   int psize, int local)
+ {
+-	hpte_t *hptep = htab_address + slot;
++	struct hash_pte *hptep = htab_address + slot;
+ 	unsigned long hpte_v;
+ 	unsigned long want_v;
+ 	unsigned long flags;
+@@ -345,7 +344,7 @@
+ #define LP_BITS		8
+ #define LP_MASK(i)	((0xFF >> (i)) << LP_SHIFT)
+ 
+-static void hpte_decode(hpte_t *hpte, unsigned long slot,
++static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
+ 			int *psize, unsigned long *va)
+ {
+ 	unsigned long hpte_r = hpte->r;
+@@ -415,7 +414,7 @@
+ static void native_hpte_clear(void)
+ {
+ 	unsigned long slot, slots, flags;
+-	hpte_t *hptep = htab_address;
++	struct hash_pte *hptep = htab_address;
+ 	unsigned long hpte_v, va;
+ 	unsigned long pteg_count;
+ 	int psize;
+@@ -462,7 +461,7 @@
+ static void native_flush_hash_range(unsigned long number, int local)
+ {
+ 	unsigned long va, hash, index, hidx, shift, slot;
+-	hpte_t *hptep;
++	struct hash_pte *hptep;
+ 	unsigned long hpte_v;
+ 	unsigned long want_v;
+ 	unsigned long flags;
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c linux-2.6.22-try2/arch/powerpc/mm/hash_utils_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/hash_utils_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/hash_utils_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -87,7 +87,7 @@
+ static unsigned long _SDR1;
+ struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+ 
+-hpte_t *htab_address;
++struct hash_pte *htab_address;
+ unsigned long htab_size_bytes;
+ unsigned long htab_hash_mask;
+ int mmu_linear_psize = MMU_PAGE_4K;
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/imalloc.c linux-2.6.22-try2/arch/powerpc/mm/imalloc.c
+--- linux-2.6.22-570/arch/powerpc/mm/imalloc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/imalloc.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,313 +0,0 @@
+-/*
+- * c 2001 PPC 64 Team, IBM Corp
+- * 
+- *      This program is free software; you can redistribute it and/or
+- *      modify it under the terms of the GNU General Public License
+- *      as published by the Free Software Foundation; either version
+- *      2 of the License, or (at your option) any later version.
+- */
+-
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+-
+-#include <asm/uaccess.h>
+-#include <asm/pgalloc.h>
+-#include <asm/pgtable.h>
+-#include <linux/mutex.h>
+-#include <asm/cacheflush.h>
+-
+-#include "mmu_decl.h"
+-
+-static DEFINE_MUTEX(imlist_mutex);
+-struct vm_struct * imlist = NULL;
+-
+-static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
+-{
+-	unsigned long addr;
+-	struct vm_struct **p, *tmp;
+-
+-	addr = ioremap_bot;
+-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+-		if (size + addr < (unsigned long) tmp->addr)
+-			break;
+-		if ((unsigned long)tmp->addr >= ioremap_bot)
+-			addr = tmp->size + (unsigned long) tmp->addr;
+-		if (addr >= IMALLOC_END-size)
+-			return 1;
+-	}
+-	*im_addr = addr;
+-
+-	return 0;
+-}
+-
+-/* Return whether the region described by v_addr and size is a subset
+- * of the region described by parent
+- */
+-static inline int im_region_is_subset(unsigned long v_addr, unsigned long size,
+-			struct vm_struct *parent)
+-{
+-	return (int) (v_addr >= (unsigned long) parent->addr &&
+-	              v_addr < (unsigned long) parent->addr + parent->size &&
+-	    	      size < parent->size);
+-}
+-
+-/* Return whether the region described by v_addr and size is a superset
+- * of the region described by child
+- */
+-static int im_region_is_superset(unsigned long v_addr, unsigned long size,
+-		struct vm_struct *child)
+-{
+-	struct vm_struct parent;
+-
+-	parent.addr = (void *) v_addr;
+-	parent.size = size;
+-
+-	return im_region_is_subset((unsigned long) child->addr, child->size,
+-			&parent);
+-}
+-
+-/* Return whether the region described by v_addr and size overlaps
+- * the region described by vm.  Overlapping regions meet the
+- * following conditions:
+- * 1) The regions share some part of the address space
+- * 2) The regions aren't identical
+- * 3) Neither region is a subset of the other
+- */
+-static int im_region_overlaps(unsigned long v_addr, unsigned long size,
+-		     struct vm_struct *vm)
+-{
+-	if (im_region_is_superset(v_addr, size, vm))
+-		return 0;
+-
+-	return (v_addr + size > (unsigned long) vm->addr + vm->size &&
+-		v_addr < (unsigned long) vm->addr + vm->size) ||
+-	       (v_addr < (unsigned long) vm->addr &&
+-		v_addr + size > (unsigned long) vm->addr);
+-}
+-
+-/* Determine imalloc status of region described by v_addr and size.
+- * Can return one of the following:
+- * IM_REGION_UNUSED   -  Entire region is unallocated in imalloc space.
+- * IM_REGION_SUBSET -    Region is a subset of a region that is already
+- * 			 allocated in imalloc space.
+- * 		         vm will be assigned to a ptr to the parent region.
+- * IM_REGION_EXISTS -    Exact region already allocated in imalloc space.
+- *                       vm will be assigned to a ptr to the existing imlist
+- *                       member.
+- * IM_REGION_OVERLAPS -  Region overlaps an allocated region in imalloc space.
+- * IM_REGION_SUPERSET -  Region is a superset of a region that is already
+- *                       allocated in imalloc space.
+- */
+-static int im_region_status(unsigned long v_addr, unsigned long size,
+-		    struct vm_struct **vm)
+-{
+-	struct vm_struct *tmp;
+-
+-	for (tmp = imlist; tmp; tmp = tmp->next)
+-		if (v_addr < (unsigned long) tmp->addr + tmp->size)
+-			break;
+-
+-	*vm = NULL;
+-	if (tmp) {
+-		if (im_region_overlaps(v_addr, size, tmp))
+-			return IM_REGION_OVERLAP;
+-
+-		*vm = tmp;
+-		if (im_region_is_subset(v_addr, size, tmp)) {
+-			/* Return with tmp pointing to superset */
+-			return IM_REGION_SUBSET;
+-		}
+-		if (im_region_is_superset(v_addr, size, tmp)) {
+-			/* Return with tmp pointing to first subset */
+-			return IM_REGION_SUPERSET;
+-		}
+-		else if (v_addr == (unsigned long) tmp->addr &&
+-		 	 size == tmp->size) {
+-			/* Return with tmp pointing to exact region */
+-			return IM_REGION_EXISTS;
+-		}
+-	}
+-
+-	return IM_REGION_UNUSED;
+-}
+-
+-static struct vm_struct * split_im_region(unsigned long v_addr, 
+-		unsigned long size, struct vm_struct *parent)
+-{
+-	struct vm_struct *vm1 = NULL;
+-	struct vm_struct *vm2 = NULL;
+-	struct vm_struct *new_vm = NULL;
+-	
+-	vm1 = kmalloc(sizeof(*vm1), GFP_KERNEL);
+-	if (vm1	== NULL) {
+-		printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+-		return NULL;
+-	}
+-
+-	if (v_addr == (unsigned long) parent->addr) {
+-	        /* Use existing parent vm_struct to represent child, allocate
+-		 * new one for the remainder of parent range
+-		 */
+-		vm1->size = parent->size - size;
+-		vm1->addr = (void *) (v_addr + size);
+-		vm1->next = parent->next;
+-
+-		parent->size = size;
+-		parent->next = vm1;
+-		new_vm = parent;
+-	} else if (v_addr + size == (unsigned long) parent->addr + 
+-			parent->size) {
+-		/* Allocate new vm_struct to represent child, use existing
+-		 * parent one for remainder of parent range
+-		 */
+-		vm1->size = size;
+-		vm1->addr = (void *) v_addr;
+-		vm1->next = parent->next;
+-		new_vm = vm1;
+-
+-		parent->size -= size;
+-		parent->next = vm1;
+-	} else {
+-	        /* Allocate two new vm_structs for the new child and 
+-		 * uppermost remainder, and use existing parent one for the
+-		 * lower remainder of parent range
+-		 */
+-		vm2 = kmalloc(sizeof(*vm2), GFP_KERNEL);
+-		if (vm2 == NULL) {
+-			printk(KERN_ERR "%s() out of memory\n", __FUNCTION__);
+-			kfree(vm1);
+-			return NULL;
+-		}
+-
+-		vm1->size = size;
+-		vm1->addr = (void *) v_addr;
+-		vm1->next = vm2;
+-		new_vm = vm1;
+-
+-		vm2->size = ((unsigned long) parent->addr + parent->size) - 
+-				(v_addr + size);
+-		vm2->addr = (void *) v_addr + size;
+-		vm2->next = parent->next;
+-
+-		parent->size = v_addr - (unsigned long) parent->addr;
+-		parent->next = vm1;
+-	}
+-
+-	return new_vm;
+-}
+-
+-static struct vm_struct * __add_new_im_area(unsigned long req_addr, 
+-					    unsigned long size)
+-{
+-	struct vm_struct **p, *tmp, *area;
+-		
+-	for (p = &imlist; (tmp = *p) ; p = &tmp->next) {
+-		if (req_addr + size <= (unsigned long)tmp->addr)
+-			break;
+-	}
+-	
+-	area = kmalloc(sizeof(*area), GFP_KERNEL);
+-	if (!area)
+-		return NULL;
+-	area->flags = 0;
+-	area->addr = (void *)req_addr;
+-	area->size = size;
+-	area->next = *p;
+-	*p = area;
+-
+-	return area;
+-}
+-
+-static struct vm_struct * __im_get_area(unsigned long req_addr, 
+-					unsigned long size,
+-					int criteria)
+-{
+-	struct vm_struct *tmp;
+-	int status;
+-
+-	status = im_region_status(req_addr, size, &tmp);
+-	if ((criteria & status) == 0) {
+-		return NULL;
+-	}
+-	
+-	switch (status) {
+-	case IM_REGION_UNUSED:
+-		tmp = __add_new_im_area(req_addr, size);
+-		break;
+-	case IM_REGION_SUBSET:
+-		tmp = split_im_region(req_addr, size, tmp);
+-		break;
+-	case IM_REGION_EXISTS:
+-		/* Return requested region */
+-		break;
+-	case IM_REGION_SUPERSET:
+-		/* Return first existing subset of requested region */
+-		break;
+-	default:
+-		printk(KERN_ERR "%s() unexpected imalloc region status\n",
+-				__FUNCTION__);
+-		tmp = NULL;
+-	}
+-
+-	return tmp;
+-}
+-
+-struct vm_struct * im_get_free_area(unsigned long size)
+-{
+-	struct vm_struct *area;
+-	unsigned long addr;
+-	
+-	mutex_lock(&imlist_mutex);
+-	if (get_free_im_addr(size, &addr)) {
+-		printk(KERN_ERR "%s() cannot obtain addr for size 0x%lx\n",
+-				__FUNCTION__, size);
+-		area = NULL;
+-		goto next_im_done;
+-	}
+-
+-	area = __im_get_area(addr, size, IM_REGION_UNUSED);
+-	if (area == NULL) {
+-		printk(KERN_ERR 
+-		       "%s() cannot obtain area for addr 0x%lx size 0x%lx\n",
+-			__FUNCTION__, addr, size);
+-	}
+-next_im_done:
+-	mutex_unlock(&imlist_mutex);
+-	return area;
+-}
+-
+-struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+-		int criteria)
+-{
+-	struct vm_struct *area;
+-
+-	mutex_lock(&imlist_mutex);
+-	area = __im_get_area(v_addr, size, criteria);
+-	mutex_unlock(&imlist_mutex);
+-	return area;
+-}
+-
+-void im_free(void * addr)
+-{
+-	struct vm_struct **p, *tmp;
+-  
+-	if (!addr)
+-		return;
+-	if ((unsigned long) addr & ~PAGE_MASK) {
+-		printk(KERN_ERR "Trying to %s bad address (%p)\n", __FUNCTION__,			addr);
+-		return;
+-	}
+-	mutex_lock(&imlist_mutex);
+-	for (p = &imlist ; (tmp = *p) ; p = &tmp->next) {
+-		if (tmp->addr == addr) {
+-			*p = tmp->next;
+-			unmap_vm_area(tmp);
+-			kfree(tmp);
+-			mutex_unlock(&imlist_mutex);
+-			return;
+-		}
+-	}
+-	mutex_unlock(&imlist_mutex);
+-	printk(KERN_ERR "Trying to %s nonexistent area (%p)\n", __FUNCTION__,
+-			addr);
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_32.c linux-2.6.22-try2/arch/powerpc/mm/init_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/init_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/init_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -5,7 +5,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+  *
+  *  Derived from "arch/i386/mm/init.c"
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/init_64.c linux-2.6.22-try2/arch/powerpc/mm/init_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/init_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/init_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -5,7 +5,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mem.c linux-2.6.22-try2/arch/powerpc/mm/mem.c
+--- linux-2.6.22-570/arch/powerpc/mm/mem.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/mem.c	2007-12-19 15:29:22.000000000 -0500
+@@ -5,7 +5,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *  PPC44x/36-bit changes by Matt Porter (mporter@mvista.com)
+  *
+  *  Derived from "arch/i386/mm/init.c"
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c linux-2.6.22-try2/arch/powerpc/mm/mmu_context_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/mmu_context_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/mmu_context_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -11,7 +11,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h linux-2.6.22-try2/arch/powerpc/mm/mmu_decl.h
+--- linux-2.6.22-570/arch/powerpc/mm/mmu_decl.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/mmu_decl.h	2007-12-19 15:29:22.000000000 -0500
+@@ -8,7 +8,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+@@ -40,8 +39,8 @@
+ extern unsigned long ioremap_base;
+ extern unsigned int rtas_data, rtas_size;
+ 
+-struct _PTE;
+-extern struct _PTE *Hash, *Hash_end;
++struct hash_pte;
++extern struct hash_pte *Hash, *Hash_end;
+ extern unsigned long Hash_size, Hash_mask;
+ 
+ extern unsigned int num_tlbcam_entries;
+@@ -90,16 +89,4 @@
+ 	else
+ 		_tlbie(va);
+ }
+-#else /* CONFIG_PPC64 */
+-/* imalloc region types */
+-#define IM_REGION_UNUSED	0x1
+-#define IM_REGION_SUBSET	0x2
+-#define IM_REGION_EXISTS	0x4
+-#define IM_REGION_OVERLAP	0x8
+-#define IM_REGION_SUPERSET	0x10
+-
+-extern struct vm_struct * im_get_free_area(unsigned long size);
+-extern struct vm_struct * im_get_area(unsigned long v_addr, unsigned long size,
+-				      int region_type);
+-extern void im_free(void *addr);
+ #endif
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c linux-2.6.22-try2/arch/powerpc/mm/pgtable_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/pgtable_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/pgtable_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -8,7 +8,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+@@ -37,7 +36,6 @@
+ unsigned long ioremap_base;
+ unsigned long ioremap_bot;
+ EXPORT_SYMBOL(ioremap_bot);	/* aka VMALLOC_END */
+-int io_bat_index;
+ 
+ #if defined(CONFIG_6xx) || defined(CONFIG_POWER3)
+ #define HAVE_BATS	1
+@@ -300,51 +298,6 @@
+ 	}
+ }
+ 
+-/* is x a power of 4? */
+-#define is_power_of_4(x)	is_power_of_2(x) && (ffs(x) & 1)
+-
+-/*
+- * Set up a mapping for a block of I/O.
+- * virt, phys, size must all be page-aligned.
+- * This should only be called before ioremap is called.
+- */
+-void __init io_block_mapping(unsigned long virt, phys_addr_t phys,
+-			     unsigned int size, int flags)
+-{
+-	int i;
+-
+-	if (virt > KERNELBASE && virt < ioremap_bot)
+-		ioremap_bot = ioremap_base = virt;
+-
+-#ifdef HAVE_BATS
+-	/*
+-	 * Use a BAT for this if possible...
+-	 */
+-	if (io_bat_index < 2 && is_power_of_2(size)
+-	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+-		setbat(io_bat_index, virt, phys, size, flags);
+-		++io_bat_index;
+-		return;
+-	}
+-#endif /* HAVE_BATS */
+-
+-#ifdef HAVE_TLBCAM
+-	/*
+-	 * Use a CAM for this if possible...
+-	 */
+-	if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size)
+-	    && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) {
+-		settlbcam(tlbcam_index, virt, phys, size, flags, 0);
+-		++tlbcam_index;
+-		return;
+-	}
+-#endif /* HAVE_TLBCAM */
+-
+-	/* No BATs available, put it in the page tables. */
+-	for (i = 0; i < size; i += PAGE_SIZE)
+-		map_page(virt + i, phys + i, flags);
+-}
+-
+ /* Scan the real Linux page tables and return a PTE pointer for
+  * a virtual address in a context.
+  * Returns true (1) if PTE was found, zero otherwise.  The pointer to
+@@ -379,82 +332,6 @@
+         return(retval);
+ }
+ 
+-/* Find physical address for this virtual address.  Normally used by
+- * I/O functions, but anyone can call it.
+- */
+-unsigned long iopa(unsigned long addr)
+-{
+-	unsigned long pa;
+-
+-	/* I don't know why this won't work on PMacs or CHRP.  It
+-	 * appears there is some bug, or there is some implicit
+-	 * mapping done not properly represented by BATs or in page
+-	 * tables.......I am actively working on resolving this, but
+-	 * can't hold up other stuff.  -- Dan
+-	 */
+-	pte_t *pte;
+-	struct mm_struct *mm;
+-
+-	/* Check the BATs */
+-	pa = v_mapped_by_bats(addr);
+-	if (pa)
+-		return pa;
+-
+-	/* Allow mapping of user addresses (within the thread)
+-	 * for DMA if necessary.
+-	 */
+-	if (addr < TASK_SIZE)
+-		mm = current->mm;
+-	else
+-		mm = &init_mm;
+-
+-	pa = 0;
+-	if (get_pteptr(mm, addr, &pte, NULL)) {
+-		pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK);
+-		pte_unmap(pte);
+-	}
+-
+-	return(pa);
+-}
+-
+-/* This is will find the virtual address for a physical one....
+- * Swiped from APUS, could be dangerous :-).
+- * This is only a placeholder until I really find a way to make this
+- * work.  -- Dan
+- */
+-unsigned long
+-mm_ptov (unsigned long paddr)
+-{
+-	unsigned long ret;
+-#if 0
+-	if (paddr < 16*1024*1024)
+-		ret = ZTWO_VADDR(paddr);
+-	else {
+-		int i;
+-
+-		for (i = 0; i < kmap_chunk_count;){
+-			unsigned long phys = kmap_chunks[i++];
+-			unsigned long size = kmap_chunks[i++];
+-			unsigned long virt = kmap_chunks[i++];
+-			if (paddr >= phys
+-			    && paddr < (phys + size)){
+-				ret = virt + paddr - phys;
+-				goto exit;
+-			}
+-		}
+-	
+-		ret = (unsigned long) __va(paddr);
+-	}
+-exit:
+-#ifdef DEBUGPV
+-	printk ("PTOV(%lx)=%lx\n", paddr, ret);
+-#endif
+-#else
+-	ret = (unsigned long)paddr + KERNELBASE;
+-#endif
+-	return ret;
+-}
+-
+ #ifdef CONFIG_DEBUG_PAGEALLOC
+ 
+ static int __change_page_attr(struct page *page, pgprot_t prot)
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c linux-2.6.22-try2/arch/powerpc/mm/pgtable_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/pgtable_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/pgtable_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -7,7 +7,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@samba.org)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+@@ -34,41 +33,27 @@
+ #include <linux/stddef.h>
+ #include <linux/vmalloc.h>
+ #include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/highmem.h>
+-#include <linux/idr.h>
+-#include <linux/nodemask.h>
+-#include <linux/module.h>
+ 
+ #include <asm/pgalloc.h>
+ #include <asm/page.h>
+ #include <asm/prom.h>
+-#include <asm/lmb.h>
+-#include <asm/rtas.h>
+ #include <asm/io.h>
+ #include <asm/mmu_context.h>
+ #include <asm/pgtable.h>
+ #include <asm/mmu.h>
+-#include <asm/uaccess.h>
+ #include <asm/smp.h>
+ #include <asm/machdep.h>
+ #include <asm/tlb.h>
+-#include <asm/eeh.h>
+ #include <asm/processor.h>
+-#include <asm/mmzone.h>
+ #include <asm/cputable.h>
+ #include <asm/sections.h>
+ #include <asm/system.h>
+-#include <asm/iommu.h>
+ #include <asm/abs_addr.h>
+-#include <asm/vdso.h>
+ #include <asm/firmware.h>
+ 
+ #include "mmu_decl.h"
+ 
+-unsigned long ioremap_bot = IMALLOC_BASE;
+-static unsigned long phbs_io_bot = PHBS_IO_BASE;
++unsigned long ioremap_bot = IOREMAP_BASE;
+ 
+ /*
+  * map_io_page currently only called by __ioremap
+@@ -102,8 +87,8 @@
+ 		 * entry in the hardware page table.
+ 		 *
+ 		 */
+-		if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+-				      mmu_io_psize)) {
++		if (htab_bolt_mapping(ea, (unsigned long)ea + PAGE_SIZE,
++				      pa, flags, mmu_io_psize)) {
+ 			printk(KERN_ERR "Failed to do bolted mapping IO "
+ 			       "memory at %016lx !\n", pa);
+ 			return -ENOMEM;
+@@ -113,8 +98,11 @@
+ }
+ 
+ 
+-static void __iomem * __ioremap_com(phys_addr_t addr, unsigned long pa,
+-			    unsigned long ea, unsigned long size,
++/**
++ * __ioremap_at - Low level function to establish the page tables
++ *                for an IO mapping
++ */
++void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
+ 			    unsigned long flags)
+ {
+ 	unsigned long i;
+@@ -122,17 +110,35 @@
+ 	if ((flags & _PAGE_PRESENT) == 0)
+ 		flags |= pgprot_val(PAGE_KERNEL);
+ 
++	WARN_ON(pa & ~PAGE_MASK);
++	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
++	WARN_ON(size & ~PAGE_MASK);
++
+ 	for (i = 0; i < size; i += PAGE_SIZE)
+-		if (map_io_page(ea+i, pa+i, flags))
++		if (map_io_page((unsigned long)ea+i, pa+i, flags))
+ 			return NULL;
+ 
+-	return (void __iomem *) (ea + (addr & ~PAGE_MASK));
++	return (void __iomem *)ea;
++}
++
++/**
++ * __iounmap_from - Low level function to tear down the page tables
++ *                  for an IO mapping. This is used for mappings that
++ *                  are manipulated manually, like partial unmapping of
++ *                  PCI IOs or ISA space.
++ */
++void __iounmap_at(void *ea, unsigned long size)
++{
++	WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
++	WARN_ON(size & ~PAGE_MASK);
++
++	unmap_kernel_range((unsigned long)ea, size);
+ }
+ 
+ void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
+ 			 unsigned long flags)
+ {
+-	unsigned long pa, ea;
++	phys_addr_t paligned;
+ 	void __iomem *ret;
+ 
+ 	/*
+@@ -144,27 +150,30 @@
+ 	 * IMALLOC_END
+ 	 * 
+ 	 */
+-	pa = addr & PAGE_MASK;
+-	size = PAGE_ALIGN(addr + size) - pa;
++	paligned = addr & PAGE_MASK;
++	size = PAGE_ALIGN(addr + size) - paligned;
+ 
+-	if ((size == 0) || (pa == 0))
++	if ((size == 0) || (paligned == 0))
+ 		return NULL;
+ 
+ 	if (mem_init_done) {
+ 		struct vm_struct *area;
+-		area = im_get_free_area(size);
++
++		area = __get_vm_area(size, VM_IOREMAP,
++				     ioremap_bot, IOREMAP_END);
+ 		if (area == NULL)
+ 			return NULL;
+-		ea = (unsigned long)(area->addr);
+-		ret = __ioremap_com(addr, pa, ea, size, flags);
++		ret = __ioremap_at(paligned, area->addr, size, flags);
+ 		if (!ret)
+-			im_free(area->addr);
++			vunmap(area->addr);
+ 	} else {
+-		ea = ioremap_bot;
+-		ret = __ioremap_com(addr, pa, ea, size, flags);
++		ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
+ 		if (ret)
+ 			ioremap_bot += size;
+ 	}
++
++	if (ret)
++		ret += addr & ~PAGE_MASK;
+ 	return ret;
+ }
+ 
+@@ -187,62 +196,9 @@
+ }
+ 
+ 
+-#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK))
+-
+-int __ioremap_explicit(phys_addr_t pa, unsigned long ea,
+-		       unsigned long size, unsigned long flags)
+-{
+-	struct vm_struct *area;
+-	void __iomem *ret;
+-	
+-	/* For now, require page-aligned values for pa, ea, and size */
+-	if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) ||
+-	    !IS_PAGE_ALIGNED(size)) {
+-		printk(KERN_ERR	"unaligned value in %s\n", __FUNCTION__);
+-		return 1;
+-	}
+-	
+-	if (!mem_init_done) {
+-		/* Two things to consider in this case:
+-		 * 1) No records will be kept (imalloc, etc) that the region
+-		 *    has been remapped
+-		 * 2) It won't be easy to iounmap() the region later (because
+-		 *    of 1)
+-		 */
+-		;
+-	} else {
+-		area = im_get_area(ea, size,
+-			IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS);
+-		if (area == NULL) {
+-			/* Expected when PHB-dlpar is in play */
+-			return 1;
+-		}
+-		if (ea != (unsigned long) area->addr) {
+-			printk(KERN_ERR "unexpected addr return from "
+-			       "im_get_area\n");
+-			return 1;
+-		}
+-	}
+-	
+-	ret = __ioremap_com(pa, pa, ea, size, flags);
+-	if (ret == NULL) {
+-		printk(KERN_ERR "ioremap_explicit() allocation failure !\n");
+-		return 1;
+-	}
+-	if (ret != (void *) ea) {
+-		printk(KERN_ERR "__ioremap_com() returned unexpected addr\n");
+-		return 1;
+-	}
+-
+-	return 0;
+-}
+-
+ /*  
+  * Unmap an IO region and remove it from imalloc'd list.
+  * Access to IO memory should be serialized by driver.
+- * This code is modeled after vmalloc code - unmap_vm_area()
+- *
+- * XXX	what about calls before mem_init_done (ie python_countermeasures())
+  */
+ void __iounmap(volatile void __iomem *token)
+ {
+@@ -251,9 +207,14 @@
+ 	if (!mem_init_done)
+ 		return;
+ 	
+-	addr = (void *) ((unsigned long __force) token & PAGE_MASK);
+-
+-	im_free(addr);
++	addr = (void *) ((unsigned long __force)
++			 PCI_FIX_ADDR(token) & PAGE_MASK);
++	if ((unsigned long)addr < ioremap_bot) {
++		printk(KERN_WARNING "Attempt to iounmap early bolted mapping"
++		       " at 0x%p\n", addr);
++		return;
++	}
++	vunmap(addr);
+ }
+ 
+ void iounmap(volatile void __iomem *token)
+@@ -264,77 +225,8 @@
+ 		__iounmap(token);
+ }
+ 
+-static int iounmap_subset_regions(unsigned long addr, unsigned long size)
+-{
+-	struct vm_struct *area;
+-
+-	/* Check whether subsets of this region exist */
+-	area = im_get_area(addr, size, IM_REGION_SUPERSET);
+-	if (area == NULL)
+-		return 1;
+-
+-	while (area) {
+-		iounmap((void __iomem *) area->addr);
+-		area = im_get_area(addr, size,
+-				IM_REGION_SUPERSET);
+-	}
+-
+-	return 0;
+-}
+-
+-int __iounmap_explicit(volatile void __iomem *start, unsigned long size)
+-{
+-	struct vm_struct *area;
+-	unsigned long addr;
+-	int rc;
+-	
+-	addr = (unsigned long __force) start & PAGE_MASK;
+-
+-	/* Verify that the region either exists or is a subset of an existing
+-	 * region.  In the latter case, split the parent region to create 
+-	 * the exact region 
+-	 */
+-	area = im_get_area(addr, size, 
+-			    IM_REGION_EXISTS | IM_REGION_SUBSET);
+-	if (area == NULL) {
+-		/* Determine whether subset regions exist.  If so, unmap */
+-		rc = iounmap_subset_regions(addr, size);
+-		if (rc) {
+-			printk(KERN_ERR
+-			       "%s() cannot unmap nonexistent range 0x%lx\n",
+- 				__FUNCTION__, addr);
+-			return 1;
+-		}
+-	} else {
+-		iounmap((void __iomem *) area->addr);
+-	}
+-	/*
+-	 * FIXME! This can't be right:
+-	iounmap(area->addr);
+-	 * Maybe it should be "iounmap(area);"
+-	 */
+-	return 0;
+-}
+-
+ EXPORT_SYMBOL(ioremap);
+ EXPORT_SYMBOL(ioremap_flags);
+ EXPORT_SYMBOL(__ioremap);
+ EXPORT_SYMBOL(iounmap);
+ EXPORT_SYMBOL(__iounmap);
+-
+-static DEFINE_SPINLOCK(phb_io_lock);
+-
+-void __iomem * reserve_phb_iospace(unsigned long size)
+-{
+-	void __iomem *virt_addr;
+-		
+-	if (phbs_io_bot >= IMALLOC_BASE) 
+-		panic("reserve_phb_iospace(): phb io space overflow\n");
+-			
+-	spin_lock(&phb_io_lock);
+-	virt_addr = (void __iomem *) phbs_io_bot;
+-	phbs_io_bot += size;
+-	spin_unlock(&phb_io_lock);
+-
+-	return virt_addr;
+-}
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c linux-2.6.22-try2/arch/powerpc/mm/ppc_mmu_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/ppc_mmu_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/ppc_mmu_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -11,7 +11,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+@@ -35,12 +34,12 @@
+ 
+ #include "mmu_decl.h"
+ 
+-PTE *Hash, *Hash_end;
++struct hash_pte *Hash, *Hash_end;
+ unsigned long Hash_size, Hash_mask;
+ unsigned long _SDR1;
+ 
+ union ubat {			/* BAT register values to be loaded */
+-	BAT	bat;
++	struct ppc_bat bat;
+ 	u32	word[2];
+ } BATS[8][2];			/* 8 pairs of IBAT, DBAT */
+ 
+@@ -245,7 +244,7 @@
+ 	cacheable_memzero(Hash, Hash_size);
+ 	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
+ 
+-	Hash_end = (PTE *) ((unsigned long)Hash + Hash_size);
++	Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+ 
+ 	printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n",
+ 	       total_memory >> 20, Hash_size >> 10, Hash);
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_32.c linux-2.6.22-try2/arch/powerpc/mm/tlb_32.c
+--- linux-2.6.22-570/arch/powerpc/mm/tlb_32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/tlb_32.c	2007-12-19 15:29:22.000000000 -0500
+@@ -11,7 +11,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+diff -Nurb linux-2.6.22-570/arch/powerpc/mm/tlb_64.c linux-2.6.22-try2/arch/powerpc/mm/tlb_64.c
+--- linux-2.6.22-570/arch/powerpc/mm/tlb_64.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/mm/tlb_64.c	2007-12-19 15:29:22.000000000 -0500
+@@ -8,7 +8,6 @@
+  *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
+  *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
+  *    Copyright (C) 1996 Paul Mackerras
+- *  Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk).
+  *
+  *  Derived from "arch/i386/mm/init.c"
+  *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+@@ -239,3 +238,59 @@
+ 	pte_free_submit(*batchp);
+ 	*batchp = NULL;
+ }
++
++/**
++ * __flush_hash_table_range - Flush all HPTEs for a given address range
++ *                            from the hash table (and the TLB). But keeps
++ *                            the linux PTEs intact.
++ *
++ * @mm		: mm_struct of the target address space (generally init_mm)
++ * @start	: starting address
++ * @end         : ending address (not included in the flush)
++ *
++ * This function is mostly to be used by some IO hotplug code in order
++ * to remove all hash entries from a given address range used to map IO
++ * space on a removed PCI-PCI bidge without tearing down the full mapping
++ * since 64K pages may overlap with other bridges when using 64K pages
++ * with 4K HW pages on IO space.
++ *
++ * Because of that usage pattern, it's only available with CONFIG_HOTPLUG
++ * and is implemented for small size rather than speed.
++ */
++#ifdef CONFIG_HOTPLUG
++
++void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
++			      unsigned long end)
++{
++	unsigned long flags;
++
++	start = _ALIGN_DOWN(start, PAGE_SIZE);
++	end = _ALIGN_UP(end, PAGE_SIZE);
++
++	BUG_ON(!mm->pgd);
++
++	/* Note: Normally, we should only ever use a batch within a
++	 * PTE locked section. This violates the rule, but will work
++	 * since we don't actually modify the PTEs, we just flush the
++	 * hash while leaving the PTEs intact (including their reference
++	 * to being hashed). This is not the most performance oriented
++	 * way to do things but is fine for our needs here.
++	 */
++	local_irq_save(flags);
++	arch_enter_lazy_mmu_mode();
++	for (; start < end; start += PAGE_SIZE) {
++		pte_t *ptep = find_linux_pte(mm->pgd, start);
++		unsigned long pte;
++
++		if (ptep == NULL)
++			continue;
++		pte = pte_val(*ptep);
++		if (!(pte & _PAGE_HASHPTE))
++			continue;
++		hpte_need_flush(mm, start, ptep, pte, 0);
++	}
++	arch_leave_lazy_mmu_mode();
++	local_irq_restore(flags);
++}
++
++#endif /* CONFIG_HOTPLUG */
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig linux-2.6.22-try2/arch/powerpc/platforms/Kconfig
+--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/Kconfig	2007-12-19 15:29:22.000000000 -0500
+@@ -16,13 +16,6 @@
+ 	bool "Embedded 6xx/7xx/7xxx-based board"
+ 	depends on PPC32 && (BROKEN||BROKEN_ON_SMP)
+ 
+-config APUS
+-	bool "Amiga-APUS"
+-	depends on PPC32 && BROKEN
+-	help
+-	  Select APUS if configuring for a PowerUP Amiga.
+-	  More information is available at:
+-	  <http://linux-apus.sourceforge.net/>.
+ endchoice
+ 
+ source "arch/powerpc/platforms/pseries/Kconfig"
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype linux-2.6.22-try2/arch/powerpc/platforms/Kconfig.cputype
+--- linux-2.6.22-570/arch/powerpc/platforms/Kconfig.cputype	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/platforms/Kconfig.cputype	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,252 @@
++config PPC64
++	bool "64-bit kernel"
++	default n
++	help
++	  This option selects whether a 32-bit or a 64-bit kernel
++	  will be built.
++
++menu "Processor support"
++choice
++	prompt "Processor Type"
++	depends on PPC32
++	default 6xx
++
++config CLASSIC32
++	bool "52xx/6xx/7xx/74xx"
++	select PPC_FPU
++	select 6xx
++	help
++	  There are four families of PowerPC chips supported.  The more common
++	  types (601, 603, 604, 740, 750, 7400), the Motorola embedded
++	  versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC
++	  embedded versions (403 and 405) and the high end 64 bit Power
++	  processors (POWER 3, POWER4, and IBM PPC970 also known as G5).
++
++	  This option is the catch-all for 6xx types, including some of the
++	  embedded versions.  Unless there is see an option for the specific
++	  chip family you are using, you want this option.
++
++	  You do not want this if you are building a kernel for a 64 bit
++	  IBM RS/6000 or an Apple G5, choose 6xx.
++
++	  If unsure, select this option
++
++	  Note that the kernel runs in 32-bit mode even on 64-bit chips.
++
++config PPC_82xx
++	bool "Freescale 82xx"
++	select 6xx
++	select PPC_FPU
++
++config PPC_83xx
++	bool "Freescale 83xx"
++	select 6xx
++	select FSL_SOC
++	select 83xx
++	select PPC_FPU
++	select WANT_DEVICE_TREE
++
++config PPC_85xx
++	bool "Freescale 85xx"
++	select E500
++	select FSL_SOC
++	select 85xx
++	select WANT_DEVICE_TREE
++
++config PPC_86xx
++	bool "Freescale 86xx"
++	select 6xx
++	select FSL_SOC
++	select FSL_PCIE
++	select PPC_FPU
++	select ALTIVEC
++	help
++	  The Freescale E600 SoCs have 74xx cores.
++
++config PPC_8xx
++	bool "Freescale 8xx"
++	select FSL_SOC
++	select 8xx
++
++config 40x
++	bool "AMCC 40x"
++	select PPC_DCR_NATIVE
++
++config 44x
++	bool "AMCC 44x"
++	select PPC_DCR_NATIVE
++	select WANT_DEVICE_TREE
++
++config E200
++	bool "Freescale e200"
++
++endchoice
++
++config POWER4_ONLY
++	bool "Optimize for POWER4"
++	depends on PPC64
++	default n
++	---help---
++	  Cause the compiler to optimize for POWER4/POWER5/PPC970 processors.
++	  The resulting binary will not work on POWER3 or RS64 processors
++	  when compiled with binutils 2.15 or later.
++
++config POWER3
++	bool
++	depends on PPC64
++	default y if !POWER4_ONLY
++
++config POWER4
++	depends on PPC64
++	def_bool y
++
++config 6xx
++	bool
++
++# this is temp to handle compat with arch=ppc
++config 8xx
++	bool
++
++# this is temp to handle compat with arch=ppc
++config 83xx
++	bool
++
++# this is temp to handle compat with arch=ppc
++config 85xx
++	bool
++
++config E500
++	bool
++
++config PPC_FPU
++	bool
++	default y if PPC64
++
++config 4xx
++	bool
++	depends on 40x || 44x
++	default y
++
++config BOOKE
++	bool
++	depends on E200 || E500 || 44x
++	default y
++
++config FSL_BOOKE
++	bool
++	depends on E200 || E500
++	default y
++
++config PTE_64BIT
++	bool
++	depends on 44x || E500
++	default y if 44x
++	default y if E500 && PHYS_64BIT
++
++config PHYS_64BIT
++	bool 'Large physical address support' if E500
++	depends on 44x || E500
++	select RESOURCES_64BIT
++	default y if 44x
++	---help---
++	  This option enables kernel support for larger than 32-bit physical
++	  addresses.  This features is not be available on all e500 cores.
++
++	  If in doubt, say N here.
++
++config ALTIVEC
++	bool "AltiVec Support"
++	depends on CLASSIC32 || POWER4
++	---help---
++	  This option enables kernel support for the Altivec extensions to the
++	  PowerPC processor. The kernel currently supports saving and restoring
++	  altivec registers, and turning on the 'altivec enable' bit so user
++	  processes can execute altivec instructions.
++
++	  This option is only usefully if you have a processor that supports
++	  altivec (G4, otherwise known as 74xx series), but does not have
++	  any affect on a non-altivec cpu (it does, however add code to the
++	  kernel).
++
++	  If in doubt, say Y here.
++
++config SPE
++	bool "SPE Support"
++	depends on E200 || E500
++	default y
++	---help---
++	  This option enables kernel support for the Signal Processing
++	  Extensions (SPE) to the PowerPC processor. The kernel currently
++	  supports saving and restoring SPE registers, and turning on the
++	  'spe enable' bit so user processes can execute SPE instructions.
++
++	  This option is only useful if you have a processor that supports
++	  SPE (e500, otherwise known as 85xx series), but does not have any
++	  effect on a non-spe cpu (it does, however add code to the kernel).
++
++	  If in doubt, say Y here.
++
++config PPC_STD_MMU
++	bool
++	depends on 6xx || POWER3 || POWER4 || PPC64
++	default y
++
++config PPC_STD_MMU_32
++	def_bool y
++	depends on PPC_STD_MMU && PPC32
++
++config PPC_MM_SLICES
++	bool
++	default y if HUGETLB_PAGE
++	default n
++
++config VIRT_CPU_ACCOUNTING
++	bool "Deterministic task and CPU time accounting"
++	depends on PPC64
++	default y
++	help
++	  Select this option to enable more accurate task and CPU time
++	  accounting.  This is done by reading a CPU counter on each
++	  kernel entry and exit and on transitions within the kernel
++	  between system, softirq and hardirq state, so there is a
++	  small performance impact.  This also enables accounting of
++	  stolen time on logically-partitioned systems running on
++	  IBM POWER5-based machines.
++
++	  If in doubt, say Y here.
++
++config SMP
++	depends on PPC_STD_MMU
++	bool "Symmetric multi-processing support"
++	---help---
++	  This enables support for systems with more than one CPU. If you have
++	  a system with only one CPU, say N. If you have a system with more
++	  than one CPU, say Y.  Note that the kernel does not currently
++	  support SMP machines with 603/603e/603ev or PPC750 ("G3") processors
++	  since they have inadequate hardware support for multiprocessor
++	  operation.
++
++	  If you say N here, the kernel will run on single and multiprocessor
++	  machines, but will use only one CPU of a multiprocessor machine. If
++	  you say Y here, the kernel will run on single-processor machines.
++	  On a single-processor machine, the kernel will run faster if you say
++	  N here.
++
++	  If you don't know what to do here, say N.
++
++config NR_CPUS
++	int "Maximum number of CPUs (2-128)"
++	range 2 128
++	depends on SMP
++	default "32" if PPC64
++	default "4"
++
++config NOT_COHERENT_CACHE
++	bool
++	depends on 4xx || 8xx || E200
++	default y
++
++config CONFIG_CHECK_CACHE_COHERENCY
++	bool
++
++endmenu
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig linux-2.6.22-try2/arch/powerpc/platforms/apus/Kconfig
+--- linux-2.6.22-570/arch/powerpc/platforms/apus/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/apus/Kconfig	1969-12-31 19:00:00.000000000 -0500
+@@ -1,130 +0,0 @@
+-
+-config AMIGA
+-	bool
+-	depends on APUS
+-	default y
+-	help
+-	  This option enables support for the Amiga series of computers.
+-
+-config ZORRO
+-	bool
+-	depends on APUS
+-	default y
+-	help
+-	  This enables support for the Zorro bus in the Amiga. If you have
+-	  expansion cards in your Amiga that conform to the Amiga
+-	  AutoConfig(tm) specification, say Y, otherwise N. Note that even
+-	  expansion cards that do not fit in the Zorro slots but fit in e.g.
+-	  the CPU slot may fall in this category, so you have to say Y to let
+-	  Linux use these.
+-
+-config ABSTRACT_CONSOLE
+-	bool
+-	depends on APUS
+-	default y
+-
+-config APUS_FAST_EXCEPT
+-	bool
+-	depends on APUS
+-	default y
+-
+-config AMIGA_PCMCIA
+-	bool "Amiga 1200/600 PCMCIA support"
+-	depends on APUS && EXPERIMENTAL
+-	help
+-	  Include support in the kernel for pcmcia on Amiga 1200 and Amiga
+-	  600. If you intend to use pcmcia cards say Y; otherwise say N.
+-
+-config AMIGA_BUILTIN_SERIAL
+-	tristate "Amiga builtin serial support"
+-	depends on APUS
+-	help
+-	  If you want to use your Amiga's built-in serial port in Linux,
+-	  answer Y.
+-
+-	  To compile this driver as a module, choose M here.
+-
+-config GVPIOEXT
+-	tristate "GVP IO-Extender support"
+-	depends on APUS
+-	help
+-	  If you want to use a GVP IO-Extender serial card in Linux, say Y.
+-	  Otherwise, say N.
+-
+-config GVPIOEXT_LP
+-	tristate "GVP IO-Extender parallel printer support"
+-	depends on GVPIOEXT
+-	help
+-	  Say Y to enable driving a printer from the parallel port on your
+-	  GVP IO-Extender card, N otherwise.
+-
+-config GVPIOEXT_PLIP
+-	tristate "GVP IO-Extender PLIP support"
+-	depends on GVPIOEXT
+-	help
+-	  Say Y to enable doing IP over the parallel port on your GVP
+-	  IO-Extender card, N otherwise.
+-
+-config MULTIFACE_III_TTY
+-	tristate "Multiface Card III serial support"
+-	depends on APUS
+-	help
+-	  If you want to use a Multiface III card's serial port in Linux,
+-	  answer Y.
+-
+-	  To compile this driver as a module, choose M here.
+-
+-config A2232
+-	tristate "Commodore A2232 serial support (EXPERIMENTAL)"
+-	depends on EXPERIMENTAL && APUS
+-	---help---
+-	  This option supports the 2232 7-port serial card shipped with the
+-	  Amiga 2000 and other Zorro-bus machines, dating from 1989.  At
+-	  a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip
+-	  each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The
+-	  ports were connected with 8 pin DIN connectors on the card bracket,
+-	  for which 8 pin to DB25 adapters were supplied. The card also had
+-	  jumpers internally to toggle various pinning configurations.
+-
+-	  This driver can be built as a module; but then "generic_serial"
+-	  will also be built as a module. This has to be loaded before
+-	  "ser_a2232". If you want to do this, answer M here.
+-
+-config WHIPPET_SERIAL
+-	tristate "Hisoft Whippet PCMCIA serial support"
+-	depends on AMIGA_PCMCIA
+-	help
+-	  HiSoft has a web page at <http://www.hisoft.co.uk/>, but there
+-	  is no listing for the Whippet in their Amiga section.
+-
+-config APNE
+-	tristate "PCMCIA NE2000 support"
+-	depends on AMIGA_PCMCIA
+-	help
+-	  If you have a PCMCIA NE2000 compatible adapter, say Y.  Otherwise,
+-	  say N.
+-
+-	  To compile this driver as a module, choose M here: the
+-	  module will be called apne.
+-
+-config SERIAL_CONSOLE
+-	bool "Support for serial port console"
+-	depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y)
+-
+-config HEARTBEAT
+-	bool "Use power LED as a heartbeat"
+-	depends on APUS
+-	help
+-	  Use the power-on LED on your machine as a load meter.  The exact
+-	  behavior is platform-dependent, but normally the flash frequency is
+-	  a hyperbolic function of the 5-minute load average.
+-
+-config PROC_HARDWARE
+-	bool "/proc/hardware support"
+-	depends on APUS
+-
+-source "drivers/zorro/Kconfig"
+-
+-config PCI_PERMEDIA
+-	bool "PCI for Permedia2"
+-	depends on !4xx && !8xx && APUS
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c linux-2.6.22-try2/arch/powerpc/platforms/cell/io-workarounds.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/io-workarounds.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/cell/io-workarounds.c	2007-12-19 15:29:22.000000000 -0500
+@@ -102,7 +102,7 @@
+ 		vaddr = (unsigned long)PCI_FIX_ADDR(addr);
+ 
+ 		/* Check if it's in allowed range for  PIO */
+-		if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE)
++		if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END)
+ 			return;
+ 
+ 		/* Try to find a PTE. If not, clear the paddr, we'll do
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c linux-2.6.22-try2/arch/powerpc/platforms/cell/spufs/file.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/file.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/cell/spufs/file.c	2007-12-19 15:29:22.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/poll.h>
+ #include <linux/ptrace.h>
++#include <linux/seq_file.h>
+ 
+ #include <asm/io.h>
+ #include <asm/semaphore.h>
+@@ -39,6 +40,7 @@
+ 
+ #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000)
+ 
++
+ static int
+ spufs_mem_open(struct inode *inode, struct file *file)
+ {
+@@ -1797,6 +1799,29 @@
+ 	return 0;
+ }
+ 
++static int spufs_caps_show(struct seq_file *s, void *private)
++{
++	struct spu_context *ctx = s->private;
++
++	if (!(ctx->flags & SPU_CREATE_NOSCHED))
++		seq_puts(s, "sched\n");
++	if (!(ctx->flags & SPU_CREATE_ISOLATE))
++		seq_puts(s, "step\n");
++	return 0;
++}
++
++static int spufs_caps_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, spufs_caps_show, SPUFS_I(inode)->i_ctx);
++}
++
++static const struct file_operations spufs_caps_fops = {
++	.open		= spufs_caps_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
+ static ssize_t __spufs_mbox_info_read(struct spu_context *ctx,
+ 			char __user *buf, size_t len, loff_t *pos)
+ {
+@@ -2015,6 +2040,7 @@
+ };
+ 
+ struct tree_descr spufs_dir_contents[] = {
++	{ "capabilities", &spufs_caps_fops, 0444, },
+ 	{ "mem",  &spufs_mem_fops,  0666, },
+ 	{ "regs", &spufs_regs_fops,  0666, },
+ 	{ "mbox", &spufs_mbox_fops, 0444, },
+@@ -2050,6 +2076,7 @@
+ };
+ 
+ struct tree_descr spufs_dir_nosched_contents[] = {
++	{ "capabilities", &spufs_caps_fops, 0444, },
+ 	{ "mem",  &spufs_mem_fops,  0666, },
+ 	{ "mbox", &spufs_mbox_fops, 0444, },
+ 	{ "ibox", &spufs_ibox_fops, 0444, },
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c linux-2.6.22-try2/arch/powerpc/platforms/cell/spufs/run.c
+--- linux-2.6.22-570/arch/powerpc/platforms/cell/spufs/run.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/cell/spufs/run.c	2007-12-19 15:29:22.000000000 -0500
+@@ -142,8 +142,12 @@
+ 			runcntl = SPU_RUNCNTL_RUNNABLE;
+ 		ctx->ops->runcntl_write(ctx, runcntl);
+ 	} else {
++		unsigned long mode = SPU_PRIVCNTL_MODE_NORMAL;
+ 		spu_start_tick(ctx);
+ 		ctx->ops->npc_write(ctx, *npc);
++		if (test_thread_flag(TIF_SINGLESTEP))
++			mode = SPU_PRIVCNTL_MODE_SINGLE_STEP;
++		out_be64(&ctx->spu->priv2->spu_privcntl_RW, mode);
+ 		ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE);
+ 	}
+ 
+@@ -334,7 +338,8 @@
+ 		ret = spu_process_events(ctx);
+ 
+ 	} while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP |
+-				      SPU_STATUS_STOPPED_BY_HALT)));
++				      SPU_STATUS_STOPPED_BY_HALT |
++				       SPU_STATUS_SINGLE_STEP)));
+ 
+ 	ctx->ops->master_stop(ctx);
+ 	ret = spu_run_fini(ctx, npc, &status);
+@@ -344,10 +349,15 @@
+ 	if ((ret == 0) ||
+ 	    ((ret == -ERESTARTSYS) &&
+ 	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
++	      (status & SPU_STATUS_SINGLE_STEP) ||
+ 	      ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+ 	       (status >> SPU_STOP_STATUS_SHIFT != 0x2104)))))
+ 		ret = status;
+ 
++	/* Note: we don't need to force_sig SIGTRAP on single-step
++	 * since we have TIF_SINGLESTEP set, thus the kernel will do
++	 * it upon return from the syscall anyawy
++	 */
+ 	if ((status & SPU_STATUS_STOPPED_BY_STOP)
+ 	    && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) {
+ 		force_sig(SIGTRAP, current);
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h linux-2.6.22-try2/arch/powerpc/platforms/iseries/call_hpt.h
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/call_hpt.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/iseries/call_hpt.h	2007-12-19 15:29:22.000000000 -0500
+@@ -76,24 +76,25 @@
+ 	return compressedStatus;
+ }
+ 
+-static inline u64 HvCallHpt_findValid(hpte_t *hpte, u64 vpn)
++static inline u64 HvCallHpt_findValid(struct hash_pte *hpte, u64 vpn)
+ {
+ 	return HvCall3Ret16(HvCallHptFindValid, hpte, vpn, 0, 0);
+ }
+ 
+-static inline u64 HvCallHpt_findNextValid(hpte_t *hpte, u32 hpteIndex,
++static inline u64 HvCallHpt_findNextValid(struct hash_pte *hpte, u32 hpteIndex,
+ 		u8 bitson, u8 bitsoff)
+ {
+ 	return HvCall3Ret16(HvCallHptFindNextValid, hpte, hpteIndex,
+ 			bitson, bitsoff);
+ }
+ 
+-static inline void HvCallHpt_get(hpte_t *hpte, u32 hpteIndex)
++static inline void HvCallHpt_get(struct hash_pte *hpte, u32 hpteIndex)
+ {
+ 	HvCall2Ret16(HvCallHptGet, hpte, hpteIndex, 0);
+ }
+ 
+-static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit, hpte_t *hpte)
++static inline void HvCallHpt_addValidate(u32 hpteIndex, u32 hBit,
++					 struct hash_pte *hpte)
+ {
+ 	HvCall4(HvCallHptAddValidate, hpteIndex, hBit, hpte->v, hpte->r);
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c linux-2.6.22-try2/arch/powerpc/platforms/iseries/htab.c
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/htab.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/iseries/htab.c	2007-12-19 15:29:22.000000000 -0500
+@@ -44,7 +44,7 @@
+ 			 unsigned long vflags, int psize)
+ {
+ 	long slot;
+-	hpte_t lhpte;
++	struct hash_pte lhpte;
+ 	int secondary = 0;
+ 
+ 	BUG_ON(psize != MMU_PAGE_4K);
+@@ -99,7 +99,7 @@
+ 
+ static unsigned long iSeries_hpte_getword0(unsigned long slot)
+ {
+-	hpte_t hpte;
++	struct hash_pte hpte;
+ 
+ 	HvCallHpt_get(&hpte, slot);
+ 	return hpte.v;
+@@ -144,7 +144,7 @@
+ static long iSeries_hpte_updatepp(unsigned long slot, unsigned long newpp,
+ 				  unsigned long va, int psize, int local)
+ {
+-	hpte_t hpte;
++	struct hash_pte hpte;
+ 	unsigned long want_v;
+ 
+ 	iSeries_hlock(slot);
+@@ -176,7 +176,7 @@
+  */
+ static long iSeries_hpte_find(unsigned long vpn)
+ {
+-	hpte_t hpte;
++	struct hash_pte hpte;
+ 	long slot;
+ 
+ 	/*
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c linux-2.6.22-try2/arch/powerpc/platforms/iseries/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/iseries/pci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/iseries/pci.c	2007-12-19 15:29:22.000000000 -0500
+@@ -742,6 +742,11 @@
+ 	/* Install IO hooks */
+ 	ppc_pci_io = iseries_pci_io;
+ 
++	/* iSeries has no IO space in the common sense, it needs to set
++	 * the IO base to 0
++	 */
++	pci_io_base = 0;
++
+ 	if (root == NULL) {
+ 		printk(KERN_CRIT "iSeries_pcibios_init: can't find root "
+ 				"of device tree\n");
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c linux-2.6.22-try2/arch/powerpc/platforms/maple/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/maple/pci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/maple/pci.c	2007-12-19 15:29:22.000000000 -0500
+@@ -519,23 +519,6 @@
+ 	DBG(" <- maple_pci_irq_fixup\n");
+ }
+ 
+-static void __init maple_fixup_phb_resources(void)
+-{
+-	struct pci_controller *hose, *tmp;
+-	
+-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+-		unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-
+-		hose->io_resource.start += offset;
+-		hose->io_resource.end += offset;
+-
+-		printk(KERN_INFO "PCI Host %d, io start: %llx; io end: %llx\n",
+-		       hose->global_number,
+-		       (unsigned long long)hose->io_resource.start,
+-		       (unsigned long long)hose->io_resource.end);
+-	}
+-}
+-
+ void __init maple_pci_init(void)
+ {
+ 	struct device_node *np, *root;
+@@ -573,24 +556,6 @@
+ 	if (ht && add_bridge(ht) != 0)
+ 		of_node_put(ht);
+ 
+-        /*
+-         * We need to call pci_setup_phb_io for the HT bridge first
+-         * so it gets the I/O port numbers starting at 0, and we
+-         * need to call it for the AGP bridge after that so it gets
+-         * small positive I/O port numbers.
+-         */
+-        if (u3_ht)
+-                pci_setup_phb_io(u3_ht, 1);
+-        if (u3_agp)
+-                pci_setup_phb_io(u3_agp, 0);
+-        if (u4_pcie)
+-                pci_setup_phb_io(u4_pcie, 0);
+-
+-	/* Fixup the IO resources on our host bridges as the common code
+-	 * does it only for childs of the host bridges
+-	 */
+-	maple_fixup_phb_resources();
+-
+ 	/* Setup the linkage between OF nodes and PHBs */ 
+ 	pci_devs_phb_init();
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c linux-2.6.22-try2/arch/powerpc/platforms/pasemi/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pasemi/pci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pasemi/pci.c	2007-12-19 15:29:22.000000000 -0500
+@@ -150,29 +150,11 @@
+ 	printk(KERN_INFO "Found PA-PXP PCI host bridge.\n");
+ 
+ 	/* Interpret the "ranges" property */
+-	/* This also maps the I/O region and sets isa_io/mem_base */
+ 	pci_process_bridge_OF_ranges(hose, dev, 1);
+-	pci_setup_phb_io(hose, 1);
+ 
+ 	return 0;
+ }
+ 
+-
+-static void __init pas_fixup_phb_resources(void)
+-{
+-	struct pci_controller *hose, *tmp;
+-
+-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+-		unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base;
+-		hose->io_resource.start += offset;
+-		hose->io_resource.end += offset;
+-		printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+-		       hose->global_number,
+-		       hose->io_resource.start, hose->io_resource.end);
+-	}
+-}
+-
+-
+ void __init pas_pci_init(void)
+ {
+ 	struct device_node *np, *root;
+@@ -190,8 +172,6 @@
+ 
+ 	of_node_put(root);
+ 
+-	pas_fixup_phb_resources();
+-
+ 	/* Setup the linkage between OF nodes and PHBs */
+ 	pci_devs_phb_init();
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c linux-2.6.22-try2/arch/powerpc/platforms/powermac/pci.c
+--- linux-2.6.22-570/arch/powerpc/platforms/powermac/pci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/powermac/pci.c	2007-12-19 15:29:22.000000000 -0500
+@@ -1006,19 +1006,6 @@
+ #endif /* CONFIG_PPC32 */
+ }
+ 
+-#ifdef CONFIG_PPC64
+-static void __init pmac_fixup_phb_resources(void)
+-{
+-	struct pci_controller *hose, *tmp;
+-
+-	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+-		printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n",
+-		       hose->global_number,
+-		       hose->io_resource.start, hose->io_resource.end);
+-	}
+-}
+-#endif
+-
+ void __init pmac_pci_init(void)
+ {
+ 	struct device_node *np, *root;
+@@ -1053,25 +1040,6 @@
+ 	if (ht && add_bridge(ht) != 0)
+ 		of_node_put(ht);
+ 
+-	/*
+-	 * We need to call pci_setup_phb_io for the HT bridge first
+-	 * so it gets the I/O port numbers starting at 0, and we
+-	 * need to call it for the AGP bridge after that so it gets
+-	 * small positive I/O port numbers.
+-	 */
+-	if (u3_ht)
+-		pci_setup_phb_io(u3_ht, 1);
+-	if (u3_agp)
+-		pci_setup_phb_io(u3_agp, 0);
+-	if (u4_pcie)
+-		pci_setup_phb_io(u4_pcie, 0);
+-
+-	/*
+-	 * On ppc64, fixup the IO resources on our host bridges as
+-	 * the common code does it only for children of the host bridges
+-	 */
+-	pmac_fixup_phb_resources();
+-
+ 	/* Setup the linkage between OF nodes and PHBs */
+ 	pci_devs_phb_init();
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c linux-2.6.22-try2/arch/powerpc/platforms/powermac/setup.c
+--- linux-2.6.22-570/arch/powerpc/platforms/powermac/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/powermac/setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -98,8 +98,6 @@
+ int sccdbg;
+ #endif
+ 
+-extern void zs_kgdb_hook(int tty_num);
+-
+ sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN;
+ EXPORT_SYMBOL(sys_ctrler);
+ 
+@@ -330,10 +328,6 @@
+ 	l2cr_init();
+ #endif /* CONFIG_PPC32 */
+ 
+-#ifdef CONFIG_KGDB
+-	zs_kgdb_hook(0);
+-#endif
+-
+ 	find_via_cuda();
+ 	find_via_pmu();
+ 	smu_init();
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c linux-2.6.22-try2/arch/powerpc/platforms/ps3/htab.c
+--- linux-2.6.22-570/arch/powerpc/platforms/ps3/htab.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/ps3/htab.c	2007-12-19 15:29:22.000000000 -0500
+@@ -34,7 +34,7 @@
+ #define DBG(fmt...) do{if(0)printk(fmt);}while(0)
+ #endif
+ 
+-static hpte_t *htab;
++static struct hash_pte *htab;
+ static unsigned long htab_addr;
+ static unsigned char *bolttab;
+ static unsigned char *inusetab;
+@@ -44,8 +44,8 @@
+ #define debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g) \
+ 	_debug_dump_hpte(_a, _b, _c, _d, _e, _f, _g, __func__, __LINE__)
+ static void _debug_dump_hpte(unsigned long pa, unsigned long va,
+-	unsigned long group, unsigned long bitmap, hpte_t lhpte, int psize,
+-	unsigned long slot, const char* func, int line)
++	unsigned long group, unsigned long bitmap, struct hash_pte lhpte,
++	int psize, unsigned long slot, const char* func, int line)
+ {
+ 	DBG("%s:%d: pa     = %lxh\n", func, line, pa);
+ 	DBG("%s:%d: lpar   = %lxh\n", func, line,
+@@ -63,7 +63,7 @@
+ 	unsigned long pa, unsigned long rflags, unsigned long vflags, int psize)
+ {
+ 	unsigned long slot;
+-	hpte_t lhpte;
++	struct hash_pte lhpte;
+ 	int secondary = 0;
+ 	unsigned long result;
+ 	unsigned long bitmap;
+@@ -255,7 +255,7 @@
+ 
+ 	ppc64_pft_size = __ilog2(htab_size);
+ 
+-	bitmap_size = htab_size / sizeof(hpte_t) / 8;
++	bitmap_size = htab_size / sizeof(struct hash_pte) / 8;
+ 
+ 	bolttab = __va(lmb_alloc(bitmap_size, 1));
+ 	inusetab = __va(lmb_alloc(bitmap_size, 1));
+@@ -273,7 +273,7 @@
+ 
+ 	result = lv1_map_htab(0, &htab_addr);
+ 
+-	htab = (hpte_t *)__ioremap(htab_addr, htab_size,
++	htab = (struct hash_pte *)__ioremap(htab_addr, htab_size,
+ 				   pgprot_val(PAGE_READONLY_X));
+ 
+ 	DBG("%s:%d: lpar %016lxh, virt %016lxh\n", __func__, __LINE__,
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile linux-2.6.22-try2/arch/powerpc/platforms/pseries/Makefile
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -8,7 +8,7 @@
+ obj-$(CONFIG_SMP)	+= smp.o
+ obj-$(CONFIG_XICS)	+= xics.o
+ obj-$(CONFIG_SCANLOG)	+= scanlog.o
+-obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o
++obj-$(CONFIG_EEH)	+= eeh.o eeh_cache.o eeh_driver.o eeh_event.o eeh_sysfs.o
+ obj-$(CONFIG_KEXEC)	+= kexec.o
+ obj-$(CONFIG_PCI)	+= pci.o pci_dlpar.o
+ obj-$(CONFIG_PCI_MSI)	+= msi.o
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh.c	2007-12-19 15:29:22.000000000 -0500
+@@ -1,6 +1,8 @@
+ /*
+  * eeh.c
+- * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
++ * Copyright IBM Corporation 2001, 2005, 2006
++ * Copyright Dave Engebretsen & Todd Inglett 2001
++ * Copyright Linas Vepstas 2005, 2006
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -15,6 +17,8 @@
+  * You should have received a copy of the GNU General Public License
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
++ *
++ * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+  */
+ 
+ #include <linux/delay.h>
+@@ -117,7 +121,6 @@
+ static unsigned long ignored_check;
+ static unsigned long total_mmio_ffs;
+ static unsigned long false_positives;
+-static unsigned long ignored_failures;
+ static unsigned long slot_resets;
+ 
+ #define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
+@@ -505,6 +508,7 @@
+ 		printk(KERN_WARNING "EEH: read_slot_reset_state() failed; rc=%d dn=%s\n",
+ 		       ret, dn->full_name);
+ 		false_positives++;
++		pdn->eeh_false_positives ++;
+ 		rc = 0;
+ 		goto dn_unlock;
+ 	}
+@@ -513,6 +517,7 @@
+ 	 * they are empty when they don't have children. */
+ 	if ((rets[0] == 5) && (dn->child == NULL)) {
+ 		false_positives++;
++		pdn->eeh_false_positives ++;
+ 		rc = 0;
+ 		goto dn_unlock;
+ 	}
+@@ -522,6 +527,7 @@
+ 		printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
+ 		       ret, dn->full_name);
+ 		false_positives++;
++		pdn->eeh_false_positives ++;
+ 		rc = 0;
+ 		goto dn_unlock;
+ 	}
+@@ -529,6 +535,7 @@
+ 	/* If not the kind of error we know about, punt. */
+ 	if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
+ 		false_positives++;
++		pdn->eeh_false_positives ++;
+ 		rc = 0;
+ 		goto dn_unlock;
+ 	}
+@@ -921,6 +928,7 @@
+ 	pdn->eeh_mode = 0;
+ 	pdn->eeh_check_count = 0;
+ 	pdn->eeh_freeze_count = 0;
++	pdn->eeh_false_positives = 0;
+ 
+ 	if (status && strcmp(status, "ok") != 0)
+ 		return NULL;	/* ignore devices with bad status */
+@@ -1139,7 +1147,8 @@
+ 	pdn = PCI_DN(dn);
+ 	pdn->pcidev = dev;
+ 
+-	pci_addr_cache_insert_device (dev);
++	pci_addr_cache_insert_device(dev);
++	eeh_sysfs_add_device(dev);
+ }
+ 
+ void eeh_add_device_tree_late(struct pci_bus *bus)
+@@ -1178,6 +1187,7 @@
+ 	printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev));
+ #endif
+ 	pci_addr_cache_remove_device(dev);
++	eeh_sysfs_remove_device(dev);
+ 
+ 	dn = pci_device_to_OF_node(dev);
+ 	if (PCI_DN(dn)->pcidev) {
+@@ -1214,11 +1224,10 @@
+ 				"check not wanted=%ld\n"
+ 				"eeh_total_mmio_ffs=%ld\n"
+ 				"eeh_false_positives=%ld\n"
+-				"eeh_ignored_failures=%ld\n"
+ 				"eeh_slot_resets=%ld\n",
+ 				no_device, no_dn, no_cfg_addr, 
+ 				ignored_check, total_mmio_ffs, 
+-				false_positives, ignored_failures, 
++				false_positives,
+ 				slot_resets);
+ 	}
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_cache.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_cache.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_cache.c	2007-12-19 15:29:22.000000000 -0500
+@@ -2,7 +2,8 @@
+  * eeh_cache.c
+  * PCI address cache; allows the lookup of PCI devices based on I/O address
+  *
+- * Copyright (C) 2004 Linas Vepstas <linas@austin.ibm.com> IBM Corporation
++ * Copyright IBM Corporation 2004
++ * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -295,6 +296,8 @@
+ 			continue;
+ 		pci_dev_get (dev);  /* matching put is in eeh_remove_device() */
+ 		PCI_DN(dn)->pcidev = dev;
++
++		eeh_sysfs_add_device(dev);
+ 	}
+ 
+ #ifdef DEBUG
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_driver.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_driver.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_driver.c	2007-12-19 15:29:22.000000000 -0500
+@@ -1,6 +1,7 @@
+ /*
+  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
+- * Copyright (C) 2004, 2005 Linas Vepstas <linas@linas.org>
++ * Copyright IBM Corp. 2004 2005
++ * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
+  *
+  * All rights reserved.
+  *
+@@ -19,8 +20,7 @@
+  * along with this program; if not, write to the Free Software
+  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+  *
+- * Send feedback to <linas@us.ibm.com>
+- *
++ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
+  */
+ #include <linux/delay.h>
+ #include <linux/interrupt.h>
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_sysfs.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/eeh_sysfs.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/eeh_sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,87 @@
++/*
++ * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
++ * Copyright IBM Corporation 2007
++ * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
++ *
++ * All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or (at
++ * your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
++ * NON INFRINGEMENT.  See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
++ */
++#include <linux/pci.h>
++#include <asm/ppc-pci.h>
++#include <asm/pci-bridge.h>
++#include <linux/kobject.h>
++
++/**
++ * EEH_SHOW_ATTR -- create sysfs entry for eeh statistic
++ * @_name: name of file in sysfs directory
++ * @_memb: name of member in struct pci_dn to access
++ * @_format: printf format for display
++ *
++ * All of the attributes look very similar, so just
++ * auto-gen a cut-n-paste routine to display them.
++ */
++#define EEH_SHOW_ATTR(_name,_memb,_format)               \
++static ssize_t eeh_show_##_name(struct device *dev,      \
++		struct device_attribute *attr, char *buf)          \
++{                                                        \
++	struct pci_dev *pdev = to_pci_dev(dev);               \
++	struct device_node *dn = pci_device_to_OF_node(pdev); \
++	struct pci_dn *pdn;                                   \
++	                                                      \
++	if (!dn || PCI_DN(dn) == NULL)                        \
++		return 0;                                          \
++	                                                      \
++	pdn = PCI_DN(dn);                                     \
++	return sprintf(buf, _format "\n", pdn->_memb);        \
++}                                                        \
++static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
++
++
++EEH_SHOW_ATTR(eeh_mode, eeh_mode, "0x%x");
++EEH_SHOW_ATTR(eeh_config_addr, eeh_config_addr, "0x%x");
++EEH_SHOW_ATTR(eeh_pe_config_addr, eeh_pe_config_addr, "0x%x");
++EEH_SHOW_ATTR(eeh_check_count, eeh_check_count, "%d");
++EEH_SHOW_ATTR(eeh_freeze_count, eeh_freeze_count, "%d");
++EEH_SHOW_ATTR(eeh_false_positives, eeh_false_positives, "%d");
++
++void eeh_sysfs_add_device(struct pci_dev *pdev)
++{
++	int rc=0;
++
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_check_count);
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_false_positives);
++	rc += device_create_file(&pdev->dev, &dev_attr_eeh_freeze_count);
++
++	if (rc)
++		printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
++}
++
++void eeh_sysfs_remove_device(struct pci_dev *pdev)
++{
++	device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
++	device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
++	device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
++	device_remove_file(&pdev->dev, &dev_attr_eeh_check_count);
++	device_remove_file(&pdev->dev, &dev_attr_eeh_false_positives);
++	device_remove_file(&pdev->dev, &dev_attr_eeh_freeze_count);
++}
++
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/pci_dlpar.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pci_dlpar.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/pci_dlpar.c	2007-12-19 15:29:22.000000000 -0500
+@@ -110,8 +110,6 @@
+ 			}
+ 		}
+ 	}
+-
+-	eeh_add_device_tree_late(bus);
+ }
+ EXPORT_SYMBOL_GPL(pcibios_fixup_new_pci_devices);
+ 
+@@ -139,6 +137,8 @@
+ 
+ 	/* Make the discovered devices available */
+ 	pci_bus_add_devices(child_bus);
++
++	eeh_add_device_tree_late(child_bus);
+ 	return 0;
+ }
+ 
+@@ -171,6 +171,7 @@
+ 		if (!list_empty(&bus->devices)) {
+ 			pcibios_fixup_new_pci_devices(bus, 0);
+ 			pci_bus_add_devices(bus);
++			eeh_add_device_tree_late(bus);
+ 		}
+ 	} else if (mode == PCI_PROBE_NORMAL) {
+ 		/* use legacy probe */
+@@ -179,6 +180,7 @@
+ 		if (num) {
+ 			pcibios_fixup_new_pci_devices(bus, 1);
+ 			pci_bus_add_devices(bus);
++			eeh_add_device_tree_late(bus);
+ 		}
+ 
+ 		list_for_each_entry(dev, &bus->devices, bus_list)
+@@ -200,8 +202,6 @@
+ 	rtas_setup_phb(phb);
+ 	pci_process_bridge_OF_ranges(phb, dn, 0);
+ 
+-	pci_setup_phb_io_dynamic(phb, primary);
+-
+ 	pci_devs_phb_init_dynamic(phb);
+ 
+ 	if (dn->child)
+@@ -210,6 +210,7 @@
+ 	scan_phb(phb);
+ 	pcibios_fixup_new_pci_devices(phb->bus, 0);
+ 	pci_bus_add_devices(phb->bus);
++	eeh_add_device_tree_late(phb->bus);
+ 
+ 	return phb;
+ }
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h linux-2.6.22-try2/arch/powerpc/platforms/pseries/pseries.h
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/pseries.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/pseries.h	2007-12-19 15:29:22.000000000 -0500
+@@ -33,6 +33,8 @@
+ static inline void setup_kexec_cpu_down_mpic(void) { }
+ #endif
+ 
++extern void pSeries_final_fixup(void);
++
+ /* Poweron flag used for enabling auto ups restart */
+ extern unsigned long rtas_poweron_auto;
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c linux-2.6.22-try2/arch/powerpc/platforms/pseries/setup.c
+--- linux-2.6.22-570/arch/powerpc/platforms/pseries/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/platforms/pseries/setup.c	2007-12-19 15:29:22.000000000 -0500
+@@ -399,6 +399,7 @@
+ 	 * a good time to find other work to dispatch.
+ 	 */
+ 	get_lppaca()->idle = 1;
++	get_lppaca()->donate_dedicated_cpu = 1;
+ 
+ 	/*
+ 	 * We come in with interrupts disabled, and need_resched()
+@@ -431,6 +432,7 @@
+ 
+ out:
+ 	HMT_medium();
++	get_lppaca()->donate_dedicated_cpu = 0;
+ 	get_lppaca()->idle = 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c linux-2.6.22-try2/arch/powerpc/sysdev/tsi108_dev.c
+--- linux-2.6.22-570/arch/powerpc/sysdev/tsi108_dev.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/powerpc/sysdev/tsi108_dev.c	2007-12-19 15:29:22.000000000 -0500
+@@ -72,12 +72,11 @@
+ 	int ret;
+ 
+ 	for (np = NULL, i = 0;
+-	     (np = of_find_compatible_node(np, "network", "tsi-ethernet")) != NULL;
++	     (np = of_find_compatible_node(np, "network", "tsi108-ethernet")) != NULL;
+ 	     i++) {
+ 		struct resource r[2];
+-		struct device_node *phy;
++		struct device_node *phy, *mdio;
+ 		hw_info tsi_eth_data;
+-		const unsigned int *id;
+ 		const unsigned int *phy_id;
+ 		const void *mac_addr;
+ 		const phandle *ph;
+@@ -111,6 +110,13 @@
+ 		if (mac_addr)
+ 			memcpy(tsi_eth_data.mac_addr, mac_addr, 6);
+ 
++		ph = of_get_property(np, "mdio-handle", NULL);
++		mdio = of_find_node_by_phandle(*ph);
++		ret = of_address_to_resource(mdio, 0, &res);
++		of_node_put(mdio);
++		if (ret)
++			goto unreg;
++
+ 		ph = of_get_property(np, "phy-handle", NULL);
+ 		phy = of_find_node_by_phandle(*ph);
+ 
+@@ -119,20 +125,25 @@
+ 			goto unreg;
+ 		}
+ 
+-		id = of_get_property(phy, "reg", NULL);
+-		phy_id = of_get_property(phy, "phy-id", NULL);
+-		ret = of_address_to_resource(phy, 0, &res);
+-		if (ret) {
+-			of_node_put(phy);
+-			goto unreg;
+-		}
++		phy_id = of_get_property(phy, "reg", NULL);
++
+ 		tsi_eth_data.regs = r[0].start;
+ 		tsi_eth_data.phyregs = res.start;
+ 		tsi_eth_data.phy = *phy_id;
+ 		tsi_eth_data.irq_num = irq_of_parse_and_map(np, 0);
+-		if (of_device_is_compatible(phy, "bcm54xx"))
++
++		/* Some boards with the TSI108 bridge (e.g. Holly)
++		 * have a miswiring of the ethernet PHYs which
++		 * requires a workaround.  The special
++		 * "txc-rxc-delay-disable" property enables this
++		 * workaround.  FIXME: Need to port the tsi108_eth
++		 * driver itself to phylib and use a non-misleading
++		 * name for the workaround flag - it's not actually to
++		 * do with the model of PHY in use */
++		if (of_get_property(phy, "txc-rxc-delay-disable", NULL))
+ 			tsi_eth_data.phy_type = TSI108_PHY_BCM54XX;
+ 		of_node_put(phy);
++
+ 		ret =
+ 		    platform_device_add_data(tsi_eth_dev, &tsi_eth_data,
+ 					     sizeof(hw_info));
+diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/enet.c linux-2.6.22-try2/arch/ppc/8260_io/enet.c
+--- linux-2.6.22-570/arch/ppc/8260_io/enet.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/8260_io/enet.c	2007-12-19 15:29:23.000000000 -0500
+@@ -477,9 +477,9 @@
+ 		}
+ 		else {
+ 			skb_put(skb,pkt_len-4);	/* Make room */
+-			eth_copy_and_sum(skb,
++			skb_copy_to_linear_data(skb,
+ 				(unsigned char *)__va(bdp->cbd_bufaddr),
+-				pkt_len-4, 0);
++				pkt_len-4);
+ 			skb->protocol=eth_type_trans(skb,dev);
+ 			netif_rx(skb);
+ 		}
+diff -Nurb linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c linux-2.6.22-try2/arch/ppc/8260_io/fcc_enet.c
+--- linux-2.6.22-570/arch/ppc/8260_io/fcc_enet.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/8260_io/fcc_enet.c	2007-12-19 15:29:23.000000000 -0500
+@@ -734,9 +734,9 @@
+ 		}
+ 		else {
+ 			skb_put(skb,pkt_len);	/* Make room */
+-			eth_copy_and_sum(skb,
++			skb_copy_to_linear_data(skb,
+ 				(unsigned char *)__va(bdp->cbd_bufaddr),
+-				pkt_len, 0);
++				pkt_len);
+ 			skb->protocol=eth_type_trans(skb,dev);
+ 			netif_rx(skb);
+ 		}
+diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/enet.c linux-2.6.22-try2/arch/ppc/8xx_io/enet.c
+--- linux-2.6.22-570/arch/ppc/8xx_io/enet.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/8xx_io/enet.c	2007-12-19 15:29:23.000000000 -0500
+@@ -506,9 +506,9 @@
+ 		}
+ 		else {
+ 			skb_put(skb,pkt_len-4);	/* Make room */
+-			eth_copy_and_sum(skb,
++			skb_copy_to_linear_data(skb,
+ 				cep->rx_vaddr[bdp - cep->rx_bd_base],
+-				pkt_len-4, 0);
++				pkt_len-4);
+ 			skb->protocol=eth_type_trans(skb,dev);
+ 			netif_rx(skb);
+ 		}
+diff -Nurb linux-2.6.22-570/arch/ppc/8xx_io/fec.c linux-2.6.22-try2/arch/ppc/8xx_io/fec.c
+--- linux-2.6.22-570/arch/ppc/8xx_io/fec.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/8xx_io/fec.c	2007-12-19 15:29:23.000000000 -0500
+@@ -725,7 +725,7 @@
+ 		fep->stats.rx_dropped++;
+ 	} else {
+ 		skb_put(skb,pkt_len-4);	/* Make room */
+-		eth_copy_and_sum(skb, data, pkt_len-4, 0);
++		skb_copy_to_linear_data(skb, data, pkt_len-4);
+ 		skb->protocol=eth_type_trans(skb,dev);
+ 		netif_rx(skb);
+ 	}
+diff -Nurb linux-2.6.22-570/arch/ppc/Kconfig.debug linux-2.6.22-try2/arch/ppc/Kconfig.debug
+--- linux-2.6.22-570/arch/ppc/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -2,42 +2,6 @@
+ 
+ source "lib/Kconfig.debug"
+ 
+-config KGDB
+-	bool "Include kgdb kernel debugger"
+-	depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx)
+-	select DEBUG_INFO
+-	help
+-	  Include in-kernel hooks for kgdb, the Linux kernel source level
+-	  debugger.  See <http://kgdb.sourceforge.net/> for more information.
+-	  Unless you are intending to debug the kernel, say N here.
+-
+-choice
+-	prompt "Serial Port"
+-	depends on KGDB
+-	default KGDB_TTYS1
+-
+-config KGDB_TTYS0
+-	bool "ttyS0"
+-
+-config KGDB_TTYS1
+-	bool "ttyS1"
+-
+-config KGDB_TTYS2
+-	bool "ttyS2"
+-
+-config KGDB_TTYS3
+-	bool "ttyS3"
+-
+-endchoice
+-
+-config KGDB_CONSOLE
+-	bool "Enable serial console thru kgdb port"
+-	depends on KGDB && 8xx || CPM2
+-	help
+-	  If you enable this, all serial console messages will be sent
+-	  over the gdb stub.
+-	  If unsure, say N.
+-
+ config XMON
+ 	bool "Include xmon kernel debugger"
+ 	depends on DEBUG_KERNEL
+diff -Nurb linux-2.6.22-570/arch/ppc/amiga/config.c linux-2.6.22-try2/arch/ppc/amiga/config.c
+--- linux-2.6.22-570/arch/ppc/amiga/config.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/amiga/config.c	2007-12-19 15:29:24.000000000 -0500
+@@ -753,17 +753,11 @@
+ void amiga_serial_console_write(struct console *co, const char *s,
+ 				       unsigned int count)
+ {
+-#if 0 /* def CONFIG_KGDB */
+-	/* FIXME:APUS GDB doesn't seem to like O-packages before it is
+-           properly connected with the target. */
+-	__gdb_output_string (s, count);
+-#else
+ 	while (count--) {
+ 		if (*s == '\n')
+ 			amiga_serial_putc('\r');
+ 		amiga_serial_putc(*s++);
+ 	}
+-#endif
+ }
+ 
+ #ifdef CONFIG_SERIAL_CONSOLE
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/Makefile linux-2.6.22-try2/arch/ppc/kernel/Makefile
+--- linux-2.6.22-570/arch/ppc/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/kernel/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -14,7 +14,7 @@
+ obj-$(CONFIG_MODULES)		+= ppc_ksyms.o
+ obj-$(CONFIG_PCI)		+= pci.o
+ obj-$(CONFIG_RAPIDIO)		+= rio.o
+-obj-$(CONFIG_KGDB)		+= ppc-stub.o
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb_setjmp32.o
+ obj-$(CONFIG_SMP)		+= smp.o smp-tbsync.o
+ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb.c linux-2.6.22-try2/arch/ppc/kernel/kgdb.c
+--- linux-2.6.22-570/arch/ppc/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/ppc/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,348 @@
++/*
++ * arch/ppc/kernel/kgdb.c
++ *
++ * PowerPC backend to the KGDB stub.
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 1998 (c) Michael AK Tesch (tesch@cs.wisc.edu)
++ * Copyright (C) 2003 Timesys Corporation.
++ * Copyright (C) 2004, 2006 MontaVista Software, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/smp.h>
++#include <linux/signal.h>
++#include <linux/ptrace.h>
++#include <asm/current.h>
++#include <asm/ptrace.h>
++#include <asm/processor.h>
++#include <asm/machdep.h>
++
++/*
++ * This table contains the mapping between PowerPC hardware trap types, and
++ * signals, which are primarily what GDB understands.  GDB and the kernel
++ * don't always agree on values, so we use constants taken from gdb-6.2.
++ */
++static struct hard_trap_info
++{
++	unsigned int tt;		/* Trap type code for powerpc */
++	unsigned char signo;		/* Signal that we map this trap into */
++} hard_trap_info[] = {
++	{ 0x0100, 0x02 /* SIGINT */  },		/* system reset */
++	{ 0x0200, 0x0b /* SIGSEGV */ },		/* machine check */
++	{ 0x0300, 0x0b /* SIGSEGV */ },		/* data access */
++	{ 0x0400, 0x0b /* SIGSEGV */ },		/* instruction access */
++	{ 0x0500, 0x02 /* SIGINT */  },		/* external interrupt */
++	{ 0x0600, 0x0a /* SIGBUS */  },		/* alignment */
++	{ 0x0700, 0x05 /* SIGTRAP */ },		/* program check */
++	{ 0x0800, 0x08 /* SIGFPE */  },		/* fp unavailable */
++	{ 0x0900, 0x0e /* SIGALRM */ },		/* decrementer */
++	{ 0x0c00, 0x14 /* SIGCHLD */ },		/* system call */
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++	{ 0x2002, 0x05 /* SIGTRAP */ },		/* debug */
++#if defined(CONFIG_FSL_BOOKE)
++	{ 0x2010, 0x08 /* SIGFPE */  },		/* spe unavailable */
++	{ 0x2020, 0x08 /* SIGFPE */  },		/* spe unavailable */
++	{ 0x2030, 0x08 /* SIGFPE */  },		/* spe fp data */
++	{ 0x2040, 0x08 /* SIGFPE */  },		/* spe fp data */
++	{ 0x2050, 0x08 /* SIGFPE */  },		/* spe fp round */
++	{ 0x2060, 0x0e /* SIGILL */  },		/* performace monitor */
++	{ 0x2900, 0x08 /* SIGFPE */  },		/* apu unavailable */
++	{ 0x3100, 0x0e /* SIGALRM */ },		/* fixed interval timer */
++	{ 0x3200, 0x02 /* SIGINT */  }, 	/* watchdog */
++#else
++	{ 0x1000, 0x0e /* SIGALRM */ },		/* programmable interval timer */
++	{ 0x1010, 0x0e /* SIGALRM */ },		/* fixed interval timer */
++	{ 0x1020, 0x02 /* SIGINT */  }, 	/* watchdog */
++	{ 0x2010, 0x08 /* SIGFPE */  },		/* fp unavailable */
++	{ 0x2020, 0x08 /* SIGFPE */  },		/* ap unavailable */
++#endif
++#else
++	{ 0x0d00, 0x05 /* SIGTRAP */ },		/* single-step */
++#if defined(CONFIG_8xx)
++	{ 0x1000, 0x04 /* SIGILL */  },		/* software emulation */
++#else
++	{ 0x0f00, 0x04 /* SIGILL */  },		/* performance monitor */
++	{ 0x0f20, 0x08 /* SIGFPE */  },		/* altivec unavailable */
++	{ 0x1300, 0x05 /* SIGTRAP */ }, 	/* instruction address break */
++	{ 0x1400, 0x02 /* SIGINT */  },		/* SMI */
++	{ 0x1600, 0x08 /* SIGFPE */  },		/* altivec assist */
++	{ 0x1700, 0x04 /* SIGILL */  },		/* TAU */
++	{ 0x2000, 0x05 /* SIGTRAP */ },		/* run mode */
++#endif
++#endif
++	{ 0x0000, 0x00 }			/* Must be last */
++};
++
++extern atomic_t cpu_doing_single_step;
++
++static int computeSignal(unsigned int tt)
++{
++	struct hard_trap_info *ht;
++
++	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
++		if (ht->tt == tt)
++			return ht->signo;
++
++	return SIGHUP;		/* default for things we don't know about */
++}
++
++/* KGDB functions to use existing PowerPC hooks. */
++static void kgdb_debugger(struct pt_regs *regs)
++{
++	kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++}
++
++static int kgdb_breakpoint(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++	if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
++		regs->nip += 4;
++
++	return 1;
++}
++
++static int kgdb_singlestep(struct pt_regs *regs)
++{
++	struct thread_info *thread_info, *exception_thread_info;
++
++	if (user_mode(regs))
++		return 0;
++	/*
++	* On Book E and perhaps other processsors, singlestep is handled on
++	* the critical exception stack.  This causes current_thread_info()
++	* to fail, since it it locates the thread_info by masking off
++	* the low bits of the current stack pointer.  We work around
++	* this issue by copying the thread_info from the kernel stack
++	* before calling kgdb_handle_exception, and copying it back
++	* afterwards.  On most processors the copy is avoided since
++	* exception_thread_info == thread_info.
++	*/
++	thread_info = (struct thread_info *)(regs->gpr[1] & ~(THREAD_SIZE-1));
++	exception_thread_info = current_thread_info();
++
++	if (thread_info != exception_thread_info)
++		memcpy(exception_thread_info, thread_info, sizeof *thread_info);
++
++	kgdb_handle_exception(0, SIGTRAP, 0, regs);
++
++	if (thread_info != exception_thread_info)
++		memcpy(thread_info, exception_thread_info, sizeof *thread_info);
++
++	return 1;
++}
++
++int kgdb_iabr_match(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++	return 1;
++}
++
++int kgdb_dabr_match(struct pt_regs *regs)
++{
++	if (user_mode(regs))
++		return 0;
++
++	kgdb_handle_exception(0, computeSignal(TRAP(regs)), 0, regs);
++	return 1;
++}
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	unsigned long *ptr = gdb_regs;
++	int reg;
++
++	memset(gdb_regs, 0, MAXREG * 4);
++
++	for (reg = 0; reg < 32; reg++)
++		*(ptr++) = regs->gpr[reg];
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		*(ptr++) = current->thread.evr[reg];
++#else
++	ptr += 32;
++#endif
++#else
++	ptr += 64;
++#endif
++
++	*(ptr++) = regs->nip;
++	*(ptr++) = regs->msr;
++	*(ptr++) = regs->ccr;
++	*(ptr++) = regs->link;
++	*(ptr++) = regs->ctr;
++	*(ptr++) = regs->xer;
++
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	*(ptr++) = current->thread.acc >> 32;
++	*(ptr++) = current->thread.acc & 0xffffffff;
++	*(ptr++) = current->thread.spefscr;
++#endif
++}
++
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
++						  STACK_FRAME_OVERHEAD);
++	unsigned long *ptr = gdb_regs;
++	int reg;
++
++	memset(gdb_regs, 0, MAXREG * 4);
++
++	/* Regs GPR0-2 */
++	for (reg = 0; reg < 3; reg++)
++		*(ptr++) = regs->gpr[reg];
++
++	/* Regs GPR3-13 are not saved */
++	ptr += 11;
++
++	/* Regs GPR14-31 */
++	for (reg = 14; reg < 32; reg++)
++		*(ptr++) = regs->gpr[reg];
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		*(ptr++) = p->thread.evr[reg];
++#else
++	ptr += 32;
++#endif
++#else
++	ptr += 64;
++#endif
++
++	*(ptr++) = regs->nip;
++	*(ptr++) = regs->msr;
++	*(ptr++) = regs->ccr;
++	*(ptr++) = regs->link;
++	*(ptr++) = regs->ctr;
++	*(ptr++) = regs->xer;
++
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	*(ptr++) = p->thread.acc >> 32;
++	*(ptr++) = p->thread.acc & 0xffffffff;
++	*(ptr++) = p->thread.spefscr;
++#endif
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	unsigned long *ptr = gdb_regs;
++	int reg;
++#ifdef CONFIG_SPE
++	union {
++		u32 v32[2];
++		u64 v64;
++	} acc;
++#endif
++
++	for (reg = 0; reg < 32; reg++)
++		regs->gpr[reg] = *(ptr++);
++
++#ifdef CONFIG_FSL_BOOKE
++#ifdef CONFIG_SPE
++	for (reg = 0; reg < 32; reg++)
++		current->thread.evr[reg] = *(ptr++);
++#else
++	ptr += 32;
++#endif
++#else
++	ptr += 64;
++#endif
++
++	regs->nip = *(ptr++);
++	regs->msr = *(ptr++);
++	regs->ccr = *(ptr++);
++	regs->link = *(ptr++);
++	regs->ctr = *(ptr++);
++	regs->xer = *(ptr++);
++
++#ifdef CONFIG_SPE
++	/* u64 acc */
++	acc.v32[0] = *(ptr++);
++	acc.v32[1] = *(ptr++);
++	current->thread.acc = acc.v64;
++	current->thread.spefscr = *(ptr++);
++#endif
++}
++
++/*
++ * This function does PowerPC specific processing for interfacing to gdb.
++ */
++int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++			       char *remcom_in_buffer, char *remcom_out_buffer,
++			       struct pt_regs *linux_regs)
++{
++	char *ptr = &remcom_in_buffer[1];
++	unsigned long addr;
++
++	switch (remcom_in_buffer[0])
++		{
++		/*
++		 * sAA..AA   Step one instruction from AA..AA
++		 * This will return an error to gdb ..
++		 */
++		case 's':
++		case 'c':
++			/* handle the optional parameter */
++			if (kgdb_hex2long (&ptr, &addr))
++				linux_regs->nip = addr;
++
++			atomic_set(&cpu_doing_single_step, -1);
++			/* set the trace bit if we're stepping */
++			if (remcom_in_buffer[0] == 's') {
++#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
++				mtspr(SPRN_DBCR0,
++				      mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
++				linux_regs->msr |= MSR_DE;
++#else
++				linux_regs->msr |= MSR_SE;
++#endif
++				debugger_step = 1;
++				if (kgdb_contthread)
++					atomic_set(&cpu_doing_single_step,
++							smp_processor_id());
++			}
++			return 0;
++	}
++
++	return -1;
++}
++
++/*
++ * Global data
++ */
++struct kgdb_arch arch_kgdb_ops = {
++	.gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
++};
++
++int kgdb_arch_init(void)
++{
++	debugger = kgdb_debugger;
++	debugger_bpt = kgdb_breakpoint;
++	debugger_sstep = kgdb_singlestep;
++	debugger_iabr_match = kgdb_iabr_match;
++	debugger_dabr_match = kgdb_dabr_match;
++
++	return 0;
++}
++
++arch_initcall(kgdb_arch_init);
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S linux-2.6.22-try2/arch/ppc/kernel/kgdb_setjmp32.S
+--- linux-2.6.22-570/arch/ppc/kernel/kgdb_setjmp32.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/ppc/kernel/kgdb_setjmp32.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (C) 1996 Paul Mackerras
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <asm/processor.h>
++#include <asm/ppc_asm.h>
++
++	.text
++
++/*
++ * Save/restore state in case a memory access causes a fault.
++ *
++ * int kgdb_fault_setjmp(unsigned long *curr_context);
++ * void kgdb_fault_longjmp(unsigned long *curr_context);
++ */
++
++_GLOBAL(kgdb_fault_setjmp)
++	mflr	r0
++	stw	r0,0(r3)
++	stw	r1,4(r3)
++	stw	r2,8(r3)
++	mfcr	r0
++	stw	r0,12(r3)
++	stmw	r13,16(r3)
++	li	r3,0
++	blr
++
++_GLOBAL(kgdb_fault_longjmp)
++	lmw     r13,16(r3)
++	lwz     r0,12(r3)
++	mtcrf   0x38,r0
++	lwz     r0,0(r3)
++	lwz     r1,4(r3)
++	lwz     r2,8(r3)
++	mtlr    r0
++	mr      r3,r1
++	blr
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/misc.S linux-2.6.22-try2/arch/ppc/kernel/misc.S
+--- linux-2.6.22-570/arch/ppc/kernel/misc.S	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/ppc/kernel/misc.S	2007-12-19 15:29:22.000000000 -0500
+@@ -328,7 +328,7 @@
+ 	mtspr   SPRN_L1CSR0,r3
+ 	isync
+ 	blr
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+ 	mfspr	r3,SPRN_L1CSR1
+ 	ori	r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
+ 	mtspr	SPRN_L1CSR1,r3
+@@ -355,7 +355,7 @@
+ _GLOBAL(__flush_icache_range)
+ BEGIN_FTR_SECTION
+ 	blr				/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	li	r5,L1_CACHE_BYTES-1
+ 	andc	r3,r3,r5
+ 	subf	r4,r3,r4
+@@ -472,7 +472,7 @@
+ _GLOBAL(__flush_dcache_icache)
+ BEGIN_FTR_SECTION
+ 	blr					/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	rlwinm	r3,r3,0,0,19			/* Get page base address */
+ 	li	r4,4096/L1_CACHE_BYTES	/* Number of lines in a page */
+ 	mtctr	r4
+@@ -500,7 +500,7 @@
+ _GLOBAL(__flush_dcache_icache_phys)
+ BEGIN_FTR_SECTION
+ 	blr					/* for 601, do nothing */
+-END_FTR_SECTION_IFCLR(CPU_FTR_SPLIT_ID_CACHE)
++END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ 	mfmsr	r10
+ 	rlwinm	r0,r10,0,28,26			/* clear DR */
+ 	mtmsr	r0
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c linux-2.6.22-try2/arch/ppc/kernel/ppc-stub.c
+--- linux-2.6.22-570/arch/ppc/kernel/ppc-stub.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/kernel/ppc-stub.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,866 +0,0 @@
+-/*
+- * ppc-stub.c:  KGDB support for the Linux kernel.
+- *
+- * adapted from arch/sparc/kernel/sparc-stub.c for the PowerPC
+- * some stuff borrowed from Paul Mackerras' xmon
+- * Copyright (C) 1998 Michael AK Tesch (tesch@cs.wisc.edu)
+- *
+- * Modifications to run under Linux
+- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+- *
+- * This file originally came from the gdb sources, and the
+- * copyright notices have been retained below.
+- */
+-
+-/****************************************************************************
+-
+-		THIS SOFTWARE IS NOT COPYRIGHTED
+-
+-   HP offers the following for use in the public domain.  HP makes no
+-   warranty with regard to the software or its performance and the
+-   user accepts the software "AS IS" with all faults.
+-
+-   HP DISCLAIMS ANY WARRANTIES, EXPRESS OR IMPLIED, WITH REGARD
+-   TO THIS SOFTWARE INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+-   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+-
+-****************************************************************************/
+-
+-/****************************************************************************
+- *  Header: remcom.c,v 1.34 91/03/09 12:29:49 glenne Exp $
+- *
+- *  Module name: remcom.c $
+- *  Revision: 1.34 $
+- *  Date: 91/03/09 12:29:49 $
+- *  Contributor:     Lake Stevens Instrument Division$
+- *
+- *  Description:     low level support for gdb debugger. $
+- *
+- *  Considerations:  only works on target hardware $
+- *
+- *  Written by:      Glenn Engel $
+- *  ModuleState:     Experimental $
+- *
+- *  NOTES:           See Below $
+- *
+- *  Modified for SPARC by Stu Grossman, Cygnus Support.
+- *
+- *  This code has been extensively tested on the Fujitsu SPARClite demo board.
+- *
+- *  To enable debugger support, two things need to happen.  One, a
+- *  call to set_debug_traps() is necessary in order to allow any breakpoints
+- *  or error conditions to be properly intercepted and reported to gdb.
+- *  Two, a breakpoint needs to be generated to begin communication.  This
+- *  is most easily accomplished by a call to breakpoint().  Breakpoint()
+- *  simulates a breakpoint by executing a trap #1.
+- *
+- *************
+- *
+- *    The following gdb commands are supported:
+- *
+- * command          function		          Return value
+- *
+- *    g             return the value of the CPU registers  hex data or ENN
+- *    G             set the value of the CPU registers     OK or ENN
+- *    qOffsets      Get section offsets.  Reply is Text=xxx;Data=yyy;Bss=zzz
+- *
+- *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+- *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+- *
+- *    c             Resume at current address              SNN   ( signal NN)
+- *    cAA..AA       Continue at address AA..AA             SNN
+- *
+- *    s             Step one instruction                   SNN
+- *    sAA..AA       Step one instruction from AA..AA       SNN
+- *
+- *    k             kill
+- *
+- *    ?             What was the last sigval ?             SNN   (signal NN)
+- *
+- *    bBB..BB	    Set baud rate to BB..BB		   OK or BNN, then sets
+- *							   baud rate
+- *
+- * All commands and responses are sent with a packet which includes a
+- * checksum.  A packet consists of
+- *
+- * $<packet info>#<checksum>.
+- *
+- * where
+- * <packet info> :: <characters representing the command or response>
+- * <checksum>    :: <two hex digits computed as modulo 256 sum of <packetinfo>>
+- *
+- * When a packet is received, it is first acknowledged with either '+' or '-'.
+- * '+' indicates a successful transfer.  '-' indicates a failed transfer.
+- *
+- * Example:
+- *
+- * Host:                  Reply:
+- * $m0,10#2a               +$00010203040506070809101112131415#42
+- *
+- ****************************************************************************/
+-
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/mm.h>
+-#include <linux/smp.h>
+-#include <linux/smp_lock.h>
+-#include <linux/init.h>
+-#include <linux/sysrq.h>
+-
+-#include <asm/cacheflush.h>
+-#include <asm/system.h>
+-#include <asm/signal.h>
+-#include <asm/kgdb.h>
+-#include <asm/pgtable.h>
+-#include <asm/ptrace.h>
+-
+-void breakinst(void);
+-
+-/*
+- * BUFMAX defines the maximum number of characters in inbound/outbound buffers
+- * at least NUMREGBYTES*2 are needed for register packets
+- */
+-#define BUFMAX 2048
+-static char remcomInBuffer[BUFMAX];
+-static char remcomOutBuffer[BUFMAX];
+-
+-static int initialized;
+-static int kgdb_active;
+-static int kgdb_started;
+-static u_int fault_jmp_buf[100];
+-static int kdebug;
+-
+-
+-static const char hexchars[]="0123456789abcdef";
+-
+-/* Place where we save old trap entries for restoration - sparc*/
+-/* struct tt_entry kgdb_savettable[256]; */
+-/* typedef void (*trapfunc_t)(void); */
+-
+-static void kgdb_fault_handler(struct pt_regs *regs);
+-static int handle_exception (struct pt_regs *regs);
+-
+-#if 0
+-/* Install an exception handler for kgdb */
+-static void exceptionHandler(int tnum, unsigned int *tfunc)
+-{
+-	/* We are dorking with a live trap table, all irqs off */
+-}
+-#endif
+-
+-int
+-kgdb_setjmp(long *buf)
+-{
+-	asm ("mflr 0; stw 0,0(%0);"
+-	     "stw 1,4(%0); stw 2,8(%0);"
+-	     "mfcr 0; stw 0,12(%0);"
+-	     "stmw 13,16(%0)"
+-	     : : "r" (buf));
+-	/* XXX should save fp regs as well */
+-	return 0;
+-}
+-void
+-kgdb_longjmp(long *buf, int val)
+-{
+-	if (val == 0)
+-		val = 1;
+-	asm ("lmw 13,16(%0);"
+-	     "lwz 0,12(%0); mtcrf 0x38,0;"
+-	     "lwz 0,0(%0); lwz 1,4(%0); lwz 2,8(%0);"
+-	     "mtlr 0; mr 3,%1"
+-	     : : "r" (buf), "r" (val));
+-}
+-/* Convert ch from a hex digit to an int */
+-static int
+-hex(unsigned char ch)
+-{
+-	if (ch >= 'a' && ch <= 'f')
+-		return ch-'a'+10;
+-	if (ch >= '0' && ch <= '9')
+-		return ch-'0';
+-	if (ch >= 'A' && ch <= 'F')
+-		return ch-'A'+10;
+-	return -1;
+-}
+-
+-/* Convert the memory pointed to by mem into hex, placing result in buf.
+- * Return a pointer to the last char put in buf (null), in case of mem fault,
+- * return 0.
+- */
+-static unsigned char *
+-mem2hex(const char *mem, char *buf, int count)
+-{
+-	unsigned char ch;
+-	unsigned short tmp_s;
+-	unsigned long tmp_l;
+-
+-	if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+-		debugger_fault_handler = kgdb_fault_handler;
+-
+-		/* Accessing 16 bit and 32 bit objects in a single
+-		** load instruction is required to avoid bad side
+-		** effects for some IO registers.
+-		*/
+-
+-		if ((count == 2) && (((long)mem & 1) == 0)) {
+-			tmp_s = *(unsigned short *)mem;
+-			mem += 2;
+-			*buf++ = hexchars[(tmp_s >> 12) & 0xf];
+-			*buf++ = hexchars[(tmp_s >> 8) & 0xf];
+-			*buf++ = hexchars[(tmp_s >> 4) & 0xf];
+-			*buf++ = hexchars[tmp_s & 0xf];
+-
+-		} else if ((count == 4) && (((long)mem & 3) == 0)) {
+-			tmp_l = *(unsigned int *)mem;
+-			mem += 4;
+-			*buf++ = hexchars[(tmp_l >> 28) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 24) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 20) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 16) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 12) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 8) & 0xf];
+-			*buf++ = hexchars[(tmp_l >> 4) & 0xf];
+-			*buf++ = hexchars[tmp_l & 0xf];
+-
+-		} else {
+-			while (count-- > 0) {
+-				ch = *mem++;
+-				*buf++ = hexchars[ch >> 4];
+-				*buf++ = hexchars[ch & 0xf];
+-			}
+-		}
+-
+-	} else {
+-		/* error condition */
+-	}
+-	debugger_fault_handler = NULL;
+-	*buf = 0;
+-	return buf;
+-}
+-
+-/* convert the hex array pointed to by buf into binary to be placed in mem
+- * return a pointer to the character AFTER the last byte written.
+-*/
+-static char *
+-hex2mem(char *buf, char *mem, int count)
+-{
+-	unsigned char ch;
+-	int i;
+-	char *orig_mem;
+-	unsigned short tmp_s;
+-	unsigned long tmp_l;
+-
+-	orig_mem = mem;
+-
+-	if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+-		debugger_fault_handler = kgdb_fault_handler;
+-
+-		/* Accessing 16 bit and 32 bit objects in a single
+-		** store instruction is required to avoid bad side
+-		** effects for some IO registers.
+-		*/
+-
+-		if ((count == 2) && (((long)mem & 1) == 0)) {
+-			tmp_s = hex(*buf++) << 12;
+-			tmp_s |= hex(*buf++) << 8;
+-			tmp_s |= hex(*buf++) << 4;
+-			tmp_s |= hex(*buf++);
+-
+-			*(unsigned short *)mem = tmp_s;
+-			mem += 2;
+-
+-		} else if ((count == 4) && (((long)mem & 3) == 0)) {
+-			tmp_l = hex(*buf++) << 28;
+-			tmp_l |= hex(*buf++) << 24;
+-			tmp_l |= hex(*buf++) << 20;
+-			tmp_l |= hex(*buf++) << 16;
+-			tmp_l |= hex(*buf++) << 12;
+-			tmp_l |= hex(*buf++) << 8;
+-			tmp_l |= hex(*buf++) << 4;
+-			tmp_l |= hex(*buf++);
+-
+-			*(unsigned long *)mem = tmp_l;
+-			mem += 4;
+-
+-		} else {
+-			for (i=0; i<count; i++) {
+-				ch = hex(*buf++) << 4;
+-				ch |= hex(*buf++);
+-				*mem++ = ch;
+-			}
+-		}
+-
+-
+-		/*
+-		** Flush the data cache, invalidate the instruction cache.
+-		*/
+-		flush_icache_range((int)orig_mem, (int)orig_mem + count - 1);
+-
+-	} else {
+-		/* error condition */
+-	}
+-	debugger_fault_handler = NULL;
+-	return mem;
+-}
+-
+-/*
+- * While we find nice hex chars, build an int.
+- * Return number of chars processed.
+- */
+-static int
+-hexToInt(char **ptr, int *intValue)
+-{
+-	int numChars = 0;
+-	int hexValue;
+-
+-	*intValue = 0;
+-
+-	if (kgdb_setjmp((long*)fault_jmp_buf) == 0) {
+-		debugger_fault_handler = kgdb_fault_handler;
+-		while (**ptr) {
+-			hexValue = hex(**ptr);
+-			if (hexValue < 0)
+-				break;
+-
+-			*intValue = (*intValue << 4) | hexValue;
+-			numChars ++;
+-
+-			(*ptr)++;
+-		}
+-	} else {
+-		/* error condition */
+-	}
+-	debugger_fault_handler = NULL;
+-
+-	return (numChars);
+-}
+-
+-/* scan for the sequence $<data>#<checksum> */
+-static void
+-getpacket(char *buffer)
+-{
+-	unsigned char checksum;
+-	unsigned char xmitcsum;
+-	int i;
+-	int count;
+-	unsigned char ch;
+-
+-	do {
+-		/* wait around for the start character, ignore all other
+-		 * characters */
+-		while ((ch = (getDebugChar() & 0x7f)) != '$') ;
+-
+-		checksum = 0;
+-		xmitcsum = -1;
+-
+-		count = 0;
+-
+-		/* now, read until a # or end of buffer is found */
+-		while (count < BUFMAX) {
+-			ch = getDebugChar() & 0x7f;
+-			if (ch == '#')
+-				break;
+-			checksum = checksum + ch;
+-			buffer[count] = ch;
+-			count = count + 1;
+-		}
+-
+-		if (count >= BUFMAX)
+-			continue;
+-
+-		buffer[count] = 0;
+-
+-		if (ch == '#') {
+-			xmitcsum = hex(getDebugChar() & 0x7f) << 4;
+-			xmitcsum |= hex(getDebugChar() & 0x7f);
+-			if (checksum != xmitcsum)
+-				putDebugChar('-');	/* failed checksum */
+-			else {
+-				putDebugChar('+'); /* successful transfer */
+-				/* if a sequence char is present, reply the ID */
+-				if (buffer[2] == ':') {
+-					putDebugChar(buffer[0]);
+-					putDebugChar(buffer[1]);
+-					/* remove sequence chars from buffer */
+-					count = strlen(buffer);
+-					for (i=3; i <= count; i++)
+-						buffer[i-3] = buffer[i];
+-				}
+-			}
+-		}
+-	} while (checksum != xmitcsum);
+-}
+-
+-/* send the packet in buffer. */
+-static void putpacket(unsigned char *buffer)
+-{
+-	unsigned char checksum;
+-	int count;
+-	unsigned char ch, recv;
+-
+-	/* $<packet info>#<checksum>. */
+-	do {
+-		putDebugChar('$');
+-		checksum = 0;
+-		count = 0;
+-
+-		while ((ch = buffer[count])) {
+-			putDebugChar(ch);
+-			checksum += ch;
+-			count += 1;
+-		}
+-
+-		putDebugChar('#');
+-		putDebugChar(hexchars[checksum >> 4]);
+-		putDebugChar(hexchars[checksum & 0xf]);
+-		recv = getDebugChar();
+-	} while ((recv & 0x7f) != '+');
+-}
+-
+-static void kgdb_flush_cache_all(void)
+-{
+-	flush_instruction_cache();
+-}
+-
+-/* Set up exception handlers for tracing and breakpoints
+- * [could be called kgdb_init()]
+- */
+-void set_debug_traps(void)
+-{
+-#if 0
+-	unsigned char c;
+-
+-	save_and_cli(flags);
+-
+-	/* In case GDB is started before us, ack any packets (presumably
+-	 * "$?#xx") sitting there.
+-	 *
+-	 * I've found this code causes more problems than it solves,
+-	 * so that's why it's commented out.  GDB seems to work fine
+-	 * now starting either before or after the kernel   -bwb
+-	 */
+-
+-	while((c = getDebugChar()) != '$');
+-	while((c = getDebugChar()) != '#');
+-	c = getDebugChar(); /* eat first csum byte */
+-	c = getDebugChar(); /* eat second csum byte */
+-	putDebugChar('+'); /* ack it */
+-#endif
+-	debugger = kgdb;
+-	debugger_bpt = kgdb_bpt;
+-	debugger_sstep = kgdb_sstep;
+-	debugger_iabr_match = kgdb_iabr_match;
+-	debugger_dabr_match = kgdb_dabr_match;
+-
+-	initialized = 1;
+-}
+-
+-static void kgdb_fault_handler(struct pt_regs *regs)
+-{
+-	kgdb_longjmp((long*)fault_jmp_buf, 1);
+-}
+-
+-int kgdb_bpt(struct pt_regs *regs)
+-{
+-	return handle_exception(regs);
+-}
+-
+-int kgdb_sstep(struct pt_regs *regs)
+-{
+-	return handle_exception(regs);
+-}
+-
+-void kgdb(struct pt_regs *regs)
+-{
+-	handle_exception(regs);
+-}
+-
+-int kgdb_iabr_match(struct pt_regs *regs)
+-{
+-	printk(KERN_ERR "kgdb doesn't support iabr, what?!?\n");
+-	return handle_exception(regs);
+-}
+-
+-int kgdb_dabr_match(struct pt_regs *regs)
+-{
+-	printk(KERN_ERR "kgdb doesn't support dabr, what?!?\n");
+-	return handle_exception(regs);
+-}
+-
+-/* Convert the hardware trap type code to a unix signal number. */
+-/*
+- * This table contains the mapping between PowerPC hardware trap types, and
+- * signals, which are primarily what GDB understands.
+- */
+-static struct hard_trap_info
+-{
+-	unsigned int tt;		/* Trap type code for powerpc */
+-	unsigned char signo;		/* Signal that we map this trap into */
+-} hard_trap_info[] = {
+-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+-	{ 0x100, SIGINT  },		/* critical input interrupt */
+-	{ 0x200, SIGSEGV },		/* machine check */
+-	{ 0x300, SIGSEGV },		/* data storage */
+-	{ 0x400, SIGBUS  },		/* instruction storage */
+-	{ 0x500, SIGINT  },		/* interrupt */
+-	{ 0x600, SIGBUS  },		/* alignment */
+-	{ 0x700, SIGILL  },		/* program */
+-	{ 0x800, SIGILL  },		/* reserved */
+-	{ 0x900, SIGILL  },		/* reserved */
+-	{ 0xa00, SIGILL  },		/* reserved */
+-	{ 0xb00, SIGILL  },		/* reserved */
+-	{ 0xc00, SIGCHLD },		/* syscall */
+-	{ 0xd00, SIGILL  },		/* reserved */
+-	{ 0xe00, SIGILL  },		/* reserved */
+-	{ 0xf00, SIGILL  },		/* reserved */
+-	/*
+-	** 0x1000  PIT
+-	** 0x1010  FIT
+-	** 0x1020  watchdog
+-	** 0x1100  data TLB miss
+-	** 0x1200  instruction TLB miss
+-	*/
+-	{ 0x2002, SIGTRAP},		/* debug */
+-#else
+-	{ 0x200, SIGSEGV },		/* machine check */
+-	{ 0x300, SIGSEGV },		/* address error (store) */
+-	{ 0x400, SIGBUS },		/* instruction bus error */
+-	{ 0x500, SIGINT },		/* interrupt */
+-	{ 0x600, SIGBUS },		/* alingment */
+-	{ 0x700, SIGTRAP },		/* breakpoint trap */
+-	{ 0x800, SIGFPE },		/* fpu unavail */
+-	{ 0x900, SIGALRM },		/* decrementer */
+-	{ 0xa00, SIGILL },		/* reserved */
+-	{ 0xb00, SIGILL },		/* reserved */
+-	{ 0xc00, SIGCHLD },		/* syscall */
+-	{ 0xd00, SIGTRAP },		/* single-step/watch */
+-	{ 0xe00, SIGFPE },		/* fp assist */
+-#endif
+-	{ 0, 0}				/* Must be last */
+-
+-};
+-
+-static int computeSignal(unsigned int tt)
+-{
+-	struct hard_trap_info *ht;
+-
+-	for (ht = hard_trap_info; ht->tt && ht->signo; ht++)
+-		if (ht->tt == tt)
+-			return ht->signo;
+-
+-	return SIGHUP; /* default for things we don't know about */
+-}
+-
+-#define PC_REGNUM 64
+-#define SP_REGNUM 1
+-
+-/*
+- * This function does all command processing for interfacing to gdb.
+- */
+-static int
+-handle_exception (struct pt_regs *regs)
+-{
+-	int sigval;
+-	int addr;
+-	int length;
+-	char *ptr;
+-	unsigned int msr;
+-
+-	/* We don't handle user-mode breakpoints. */
+-	if (user_mode(regs))
+-		return 0;
+-
+-	if (debugger_fault_handler) {
+-		debugger_fault_handler(regs);
+-		panic("kgdb longjump failed!\n");
+-	}
+-	if (kgdb_active) {
+-		printk(KERN_ERR "interrupt while in kgdb, returning\n");
+-		return 0;
+-	}
+-
+-	kgdb_active = 1;
+-	kgdb_started = 1;
+-
+-#ifdef KGDB_DEBUG
+-	printk("kgdb: entering handle_exception; trap [0x%x]\n",
+-			(unsigned int)regs->trap);
+-#endif
+-
+-	kgdb_interruptible(0);
+-	lock_kernel();
+-	msr = mfmsr();
+-	mtmsr(msr & ~MSR_EE);	/* disable interrupts */
+-
+-	if (regs->nip == (unsigned long)breakinst) {
+-		/* Skip over breakpoint trap insn */
+-		regs->nip += 4;
+-	}
+-
+-	/* reply to host that an exception has occurred */
+-	sigval = computeSignal(regs->trap);
+-	ptr = remcomOutBuffer;
+-
+-	*ptr++ = 'T';
+-	*ptr++ = hexchars[sigval >> 4];
+-	*ptr++ = hexchars[sigval & 0xf];
+-	*ptr++ = hexchars[PC_REGNUM >> 4];
+-	*ptr++ = hexchars[PC_REGNUM & 0xf];
+-	*ptr++ = ':';
+-	ptr = mem2hex((char *)&regs->nip, ptr, 4);
+-	*ptr++ = ';';
+-	*ptr++ = hexchars[SP_REGNUM >> 4];
+-	*ptr++ = hexchars[SP_REGNUM & 0xf];
+-	*ptr++ = ':';
+-	ptr = mem2hex(((char *)regs) + SP_REGNUM*4, ptr, 4);
+-	*ptr++ = ';';
+-	*ptr++ = 0;
+-
+-	putpacket(remcomOutBuffer);
+-	if (kdebug)
+-		printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+-
+-	/* XXX We may want to add some features dealing with poking the
+-	 * XXX page tables, ... (look at sparc-stub.c for more info)
+-	 * XXX also required hacking to the gdb sources directly...
+-	 */
+-
+-	while (1) {
+-		remcomOutBuffer[0] = 0;
+-
+-		getpacket(remcomInBuffer);
+-		switch (remcomInBuffer[0]) {
+-		case '?': /* report most recent signal */
+-			remcomOutBuffer[0] = 'S';
+-			remcomOutBuffer[1] = hexchars[sigval >> 4];
+-			remcomOutBuffer[2] = hexchars[sigval & 0xf];
+-			remcomOutBuffer[3] = 0;
+-			break;
+-#if 0
+-		case 'q': /* this screws up gdb for some reason...*/
+-		{
+-			extern long _start, sdata, __bss_start;
+-
+-			ptr = &remcomInBuffer[1];
+-			if (strncmp(ptr, "Offsets", 7) != 0)
+-				break;
+-
+-			ptr = remcomOutBuffer;
+-			sprintf(ptr, "Text=%8.8x;Data=%8.8x;Bss=%8.8x",
+-				&_start, &sdata, &__bss_start);
+-			break;
+-		}
+-#endif
+-		case 'd':
+-			/* toggle debug flag */
+-			kdebug ^= 1;
+-			break;
+-
+-		case 'g':	/* return the value of the CPU registers.
+-				 * some of them are non-PowerPC names :(
+-				 * they are stored in gdb like:
+-				 * struct {
+-				 *     u32 gpr[32];
+-				 *     f64 fpr[32];
+-				 *     u32 pc, ps, cnd, lr; (ps=msr)
+-				 *     u32 cnt, xer, mq;
+-				 * }
+-				 */
+-		{
+-			int i;
+-			ptr = remcomOutBuffer;
+-			/* General Purpose Regs */
+-			ptr = mem2hex((char *)regs, ptr, 32 * 4);
+-			/* Floating Point Regs - FIXME */
+-			/*ptr = mem2hex((char *), ptr, 32 * 8);*/
+-			for(i=0; i<(32*8*2); i++) { /* 2chars/byte */
+-				ptr[i] = '0';
+-			}
+-			ptr += 32*8*2;
+-			/* pc, msr, cr, lr, ctr, xer, (mq is unused) */
+-			ptr = mem2hex((char *)&regs->nip, ptr, 4);
+-			ptr = mem2hex((char *)&regs->msr, ptr, 4);
+-			ptr = mem2hex((char *)&regs->ccr, ptr, 4);
+-			ptr = mem2hex((char *)&regs->link, ptr, 4);
+-			ptr = mem2hex((char *)&regs->ctr, ptr, 4);
+-			ptr = mem2hex((char *)&regs->xer, ptr, 4);
+-		}
+-			break;
+-
+-		case 'G': /* set the value of the CPU registers */
+-		{
+-			ptr = &remcomInBuffer[1];
+-
+-			/*
+-			 * If the stack pointer has moved, you should pray.
+-			 * (cause only god can help you).
+-			 */
+-
+-			/* General Purpose Regs */
+-			hex2mem(ptr, (char *)regs, 32 * 4);
+-
+-			/* Floating Point Regs - FIXME?? */
+-			/*ptr = hex2mem(ptr, ??, 32 * 8);*/
+-			ptr += 32*8*2;
+-
+-			/* pc, msr, cr, lr, ctr, xer, (mq is unused) */
+-			ptr = hex2mem(ptr, (char *)&regs->nip, 4);
+-			ptr = hex2mem(ptr, (char *)&regs->msr, 4);
+-			ptr = hex2mem(ptr, (char *)&regs->ccr, 4);
+-			ptr = hex2mem(ptr, (char *)&regs->link, 4);
+-			ptr = hex2mem(ptr, (char *)&regs->ctr, 4);
+-			ptr = hex2mem(ptr, (char *)&regs->xer, 4);
+-
+-			strcpy(remcomOutBuffer,"OK");
+-		}
+-			break;
+-		case 'H':
+-			/* don't do anything, yet, just acknowledge */
+-			hexToInt(&ptr, &addr);
+-			strcpy(remcomOutBuffer,"OK");
+-			break;
+-
+-		case 'm':	/* mAA..AA,LLLL  Read LLLL bytes at address AA..AA */
+-				/* Try to read %x,%x.  */
+-
+-			ptr = &remcomInBuffer[1];
+-
+-			if (hexToInt(&ptr, &addr) && *ptr++ == ','
+-					&& hexToInt(&ptr, &length)) {
+-				if (mem2hex((char *)addr, remcomOutBuffer,
+-							length))
+-					break;
+-				strcpy(remcomOutBuffer, "E03");
+-			} else
+-				strcpy(remcomOutBuffer, "E01");
+-			break;
+-
+-		case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA.AA return OK */
+-			/* Try to read '%x,%x:'.  */
+-
+-			ptr = &remcomInBuffer[1];
+-
+-			if (hexToInt(&ptr, &addr) && *ptr++ == ','
+-					&& hexToInt(&ptr, &length)
+-					&& *ptr++ == ':') {
+-				if (hex2mem(ptr, (char *)addr, length))
+-					strcpy(remcomOutBuffer, "OK");
+-				else
+-					strcpy(remcomOutBuffer, "E03");
+-				flush_icache_range(addr, addr+length);
+-			} else
+-				strcpy(remcomOutBuffer, "E02");
+-			break;
+-
+-
+-		case 'k': /* kill the program, actually just continue */
+-		case 'c': /* cAA..AA  Continue; address AA..AA optional */
+-			/* try to read optional parameter, pc unchanged if no parm */
+-
+-			ptr = &remcomInBuffer[1];
+-			if (hexToInt(&ptr, &addr))
+-				regs->nip = addr;
+-
+-/* Need to flush the instruction cache here, as we may have deposited a
+- * breakpoint, and the icache probably has no way of knowing that a data ref to
+- * some location may have changed something that is in the instruction cache.
+- */
+-			kgdb_flush_cache_all();
+-			mtmsr(msr);
+-
+-			kgdb_interruptible(1);
+-			unlock_kernel();
+-			kgdb_active = 0;
+-			if (kdebug) {
+-				printk("remcomInBuffer: %s\n", remcomInBuffer);
+-				printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+-			}
+-			return 1;
+-
+-		case 's':
+-			kgdb_flush_cache_all();
+-#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
+-			mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) | DBCR0_IC);
+-			regs->msr |= MSR_DE;
+-#else
+-			regs->msr |= MSR_SE;
+-#endif
+-			unlock_kernel();
+-			kgdb_active = 0;
+-			if (kdebug) {
+-				printk("remcomInBuffer: %s\n", remcomInBuffer);
+-				printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+-			}
+-			return 1;
+-
+-		case 'r':		/* Reset (if user process..exit ???)*/
+-			panic("kgdb reset.");
+-			break;
+-		}			/* switch */
+-		if (remcomOutBuffer[0] && kdebug) {
+-			printk("remcomInBuffer: %s\n", remcomInBuffer);
+-			printk("remcomOutBuffer: %s\n", remcomOutBuffer);
+-		}
+-		/* reply to the request */
+-		putpacket(remcomOutBuffer);
+-	} /* while(1) */
+-}
+-
+-/* This function will generate a breakpoint exception.  It is used at the
+-   beginning of a program to sync up with a debugger and can be used
+-   otherwise as a quick means to stop program execution and "break" into
+-   the debugger. */
+-
+-void
+-breakpoint(void)
+-{
+-	if (!initialized) {
+-		printk("breakpoint() called b4 kgdb init\n");
+-		return;
+-	}
+-
+-	asm("	.globl breakinst	\n\
+-	     breakinst: .long 0x7d821008");
+-}
+-
+-#ifdef CONFIG_KGDB_CONSOLE
+-/* Output string in GDB O-packet format if GDB has connected. If nothing
+-   output, returns 0 (caller must then handle output). */
+-int
+-kgdb_output_string (const char* s, unsigned int count)
+-{
+-	char buffer[512];
+-
+-	if (!kgdb_started)
+-		return 0;
+-
+-	count = (count <= (sizeof(buffer) / 2 - 2))
+-		? count : (sizeof(buffer) / 2 - 2);
+-
+-	buffer[0] = 'O';
+-	mem2hex (s, &buffer[1], count);
+-	putpacket(buffer);
+-
+-	return 1;
+-}
+-#endif
+-
+-static void sysrq_handle_gdb(int key, struct pt_regs *pt_regs,
+-			     struct tty_struct *tty)
+-{
+-	printk("Entering GDB stub\n");
+-	breakpoint();
+-}
+-static struct sysrq_key_op sysrq_gdb_op = {
+-        .handler        = sysrq_handle_gdb,
+-        .help_msg       = "Gdb",
+-        .action_msg     = "GDB",
+-};
+-
+-static int gdb_register_sysrq(void)
+-{
+-	printk("Registering GDB sysrq handler\n");
+-	register_sysrq_key('g', &sysrq_gdb_op);
+-	return 0;
+-}
+-module_init(gdb_register_sysrq);
+diff -Nurb linux-2.6.22-570/arch/ppc/kernel/setup.c linux-2.6.22-try2/arch/ppc/kernel/setup.c
+--- linux-2.6.22-570/arch/ppc/kernel/setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/kernel/setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -48,10 +48,6 @@
+ #include <asm/ppc_sys.h>
+ #endif
+ 
+-#if defined CONFIG_KGDB
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void platform_init(unsigned long r3, unsigned long r4,
+ 		unsigned long r5, unsigned long r6, unsigned long r7);
+ extern void reloc_got2(unsigned long offset);
+@@ -509,24 +505,12 @@
+ #endif /* CONFIG_XMON */
+ 	if ( ppc_md.progress ) ppc_md.progress("setup_arch: enter", 0x3eab);
+ 
+-#if defined(CONFIG_KGDB)
+-	if (ppc_md.kgdb_map_scc)
+-		ppc_md.kgdb_map_scc();
+-	set_debug_traps();
+-	if (strstr(cmd_line, "gdb")) {
+-		if (ppc_md.progress)
+-			ppc_md.progress("setup_arch: kgdb breakpoint", 0x4000);
+-		printk("kgdb breakpoint activated\n");
+-		breakpoint();
+-	}
+-#endif
+-
+ 	/*
+ 	 * Set cache line size based on type of cpu as a default.
+ 	 * Systems with OF can look in the properties on the cpu node(s)
+ 	 * for a possibly more accurate value.
+ 	 */
+-	if (cpu_has_feature(CPU_FTR_SPLIT_ID_CACHE)) {
++	if (! cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE)) {
+ 		dcache_bsize = cur_cpu_spec->dcache_bsize;
+ 		icache_bsize = cur_cpu_spec->icache_bsize;
+ 		ucache_bsize = 0;
+diff -Nurb linux-2.6.22-570/arch/ppc/mm/fault.c linux-2.6.22-try2/arch/ppc/mm/fault.c
+--- linux-2.6.22-570/arch/ppc/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/ppc/mm/fault.c	2007-12-19 15:29:24.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/highmem.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+@@ -330,6 +331,14 @@
+ 		return;
+ 	}
+ 
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault) {
++		/* Restore our previous state. */
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Not reached. */
++	}
++#endif
++
+ 	/* kernel has accessed a bad area */
+ #if defined(CONFIG_XMON) || defined(CONFIG_KGDB)
+ 	if (debugger_kernel_faults)
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c linux-2.6.22-try2/arch/ppc/platforms/4xx/bamboo.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/bamboo.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/bamboo.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
+ #include <linux/ethtool.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -337,10 +338,13 @@
+ 		printk("Early serial init of port 0 failed\n");
+ 	}
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(0, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
+ 	port.membase = ioremap64(PPC440EP_UART1_ADDR, 8);
+ 	port.irq = 1;
+@@ -351,10 +355,13 @@
+ 		printk("Early serial init of port 1 failed\n");
+ 	}
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &port);
++#endif
+ 
+ 	port.membase = ioremap64(PPC440EP_UART2_ADDR, 8);
+ 	port.irq = 3;
+@@ -365,10 +372,13 @@
+ 		printk("Early serial init of port 2 failed\n");
+ 	}
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(2, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(2, &port);
++#endif
+ 
+ 	port.membase = ioremap64(PPC440EP_UART3_ADDR, 8);
+ 	port.irq = 4;
+@@ -378,6 +388,10 @@
+ 	if (early_serial_setup(&port) != 0) {
+ 		printk("Early serial init of port 3 failed\n");
+ 	}
++
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(3, &port);
++#endif
+ }
+ 
+ static void __init
+@@ -435,8 +449,5 @@
+ 
+ 	ppc_md.nvram_read_val = todc_direct_read_val;
+ 	ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = bamboo_early_serial_map;
+-#endif
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c linux-2.6.22-try2/arch/ppc/platforms/4xx/bubinga.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/bubinga.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/bubinga.c	2007-12-19 15:29:24.000000000 -0500
+@@ -4,7 +4,7 @@
+  * Author: SAW (IBM), derived from walnut.c.
+  *         Maintained by MontaVista Software <source@mvista.com>
+  *
+- * 2003 (c) MontaVista Softare Inc.  This file is licensed under the
++ * 2003-2004 (c) MontaVista Softare Inc.  This file is licensed under the
+  * terms of the GNU General Public License version 2. This program is
+  * licensed "as is" without any warranty of any kind, whether express
+  * or implied.
+@@ -21,6 +21,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pci-bridge.h>
+@@ -30,7 +31,6 @@
+ #include <asm/time.h>
+ #include <asm/io.h>
+ #include <asm/todc.h>
+-#include <asm/kgdb.h>
+ #include <asm/ocp.h>
+ #include <asm/ibm_ocp_pci.h>
+ 
+@@ -100,17 +100,26 @@
+ 	port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ 	port.line = 0;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 0 failed\n");
+-	}
++#endif
++
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
+ 	port.membase = (void*)ACTING_UART1_IO_BASE;
+ 	port.irq = ACTING_UART1_INT;
+ 	port.line = 1;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 1 failed\n");
+-	}
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &port);
++#endif
+ }
+ 
+ void __init
+@@ -257,8 +266,4 @@
+ 	ppc_md.nvram_read_val = todc_direct_read_val;
+ 	ppc_md.nvram_write_val = todc_direct_write_val;
+ #endif
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = bubinga_early_serial_map;
+-#endif
+ }
+-
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c linux-2.6.22-try2/arch/ppc/platforms/4xx/ebony.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/ebony.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/ebony.c	2007-12-19 15:29:24.000000000 -0500
+@@ -32,6 +32,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -226,14 +227,20 @@
+ 	port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ 	port.line = 0;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 0 failed\n");
+-	}
++#endif
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(0, &port);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	/* Purge TLB entry added in head_44x.S for early serial access */
+ 	_tlbie(UART0_IO_BASE);
+ #endif
+@@ -243,14 +250,18 @@
+ 	port.uartclk = clocks.uart1;
+ 	port.line = 1;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 1)
+ 		printk("Early serial init of port 1 failed\n");
+-	}
++#endif
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &port);
++#endif
+ }
+ 
+ static void __init
+@@ -327,8 +338,4 @@
+ 
+ 	ppc_md.nvram_read_val = todc_direct_read_val;
+ 	ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = ebony_early_serial_map;
+-#endif
+ }
+-
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c linux-2.6.22-try2/arch/ppc/platforms/4xx/luan.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/luan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/luan.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -283,6 +284,9 @@
+ 	if (early_serial_setup(&port) != 0) {
+ 		printk("Early serial init of port 0 failed\n");
+ 	}
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
+ 	port.membase = ioremap64(PPC440SP_UART1_ADDR, 8);
+ 	port.irq = UART1_INT;
+@@ -292,6 +296,9 @@
+ 	if (early_serial_setup(&port) != 0) {
+ 		printk("Early serial init of port 1 failed\n");
+ 	}
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &port);
++#endif
+ 
+ 	port.membase = ioremap64(PPC440SP_UART2_ADDR, 8);
+ 	port.irq = UART2_INT;
+@@ -301,6 +308,9 @@
+ 	if (early_serial_setup(&port) != 0) {
+ 		printk("Early serial init of port 2 failed\n");
+ 	}
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(2, &port);
++#endif
+ }
+ 
+ static void __init
+@@ -360,7 +370,4 @@
+ 	ppc_md.get_irq = NULL;		/* Set in ppc4xx_pic_init() */
+ 
+ 	ppc_md.calibrate_decr = luan_calibrate_decr;
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = luan_early_serial_map;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c linux-2.6.22-try2/arch/ppc/platforms/4xx/ocotea.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/ocotea.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/ocotea.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/tty.h>
+ #include <linux/serial.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -249,14 +250,20 @@
+ 	port.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST;
+ 	port.line = 0;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 0 failed\n");
+-	}
++#endif
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(0, &port);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	/* Purge TLB entry added in head_44x.S for early serial access */
+ 	_tlbie(UART0_IO_BASE);
+ #endif
+@@ -266,14 +273,18 @@
+ 	port.uartclk = clocks.uart1;
+ 	port.line = 1;
+ 
+-	if (early_serial_setup(&port) != 0) {
++#ifdef CONFIG_SERIAL_8250
++	if (early_serial_setup(&port) != 1)
+ 		printk("Early serial init of port 1 failed\n");
+-	}
++#endif
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(1, &port);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &port);
++#endif
+ }
+ 
+ static void __init
+@@ -343,8 +354,5 @@
+ 
+ 	ppc_md.nvram_read_val = todc_direct_read_val;
+ 	ppc_md.nvram_write_val = todc_direct_write_val;
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = ocotea_early_serial_map;
+-#endif
+ 	ppc_md.init = ocotea_init;
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c linux-2.6.22-try2/arch/ppc/platforms/4xx/taishan.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/taishan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/taishan.c	2007-12-19 15:29:24.000000000 -0500
+@@ -310,7 +310,7 @@
+ 	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 0 failed\n");
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(0, &port);
+ 
+@@ -326,7 +326,7 @@
+ 	if (early_serial_setup(&port) != 0)
+ 		printk("Early serial init of port 1 failed\n");
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Configure debug serial access */
+ 	gen550_init(1, &port);
+ #endif
+@@ -387,9 +387,6 @@
+ 
+ 	ppc_md.calibrate_decr = taishan_calibrate_decr;
+ 
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = taishan_early_serial_map;
+-#endif
+ 	ppc_md.init = taishan_init;
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c linux-2.6.22-try2/arch/ppc/platforms/4xx/xilinx_ml300.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml300.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/xilinx_ml300.c	2007-12-19 15:29:24.000000000 -0500
+@@ -16,6 +16,8 @@
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
+ #include <linux/serialP.h>
++#include <linux/kgdb.h>
++
+ #include <asm/io.h>
+ #include <asm/machdep.h>
+ 
+@@ -41,9 +43,6 @@
+  *      ppc4xx_map_io				arch/ppc/syslib/ppc4xx_setup.c
+  *  start_kernel				init/main.c
+  *    setup_arch				arch/ppc/kernel/setup.c
+- * #if defined(CONFIG_KGDB)
+- *      *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc
+- * #endif
+  *      *ppc_md.setup_arch == ml300_setup_arch	this file
+  *        ppc4xx_setup_arch			arch/ppc/syslib/ppc4xx_setup.c
+  *          ppc4xx_find_bridges			arch/ppc/syslib/ppc405_pci.c
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c linux-2.6.22-try2/arch/ppc/platforms/4xx/xilinx_ml403.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/xilinx_ml403.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/xilinx_ml403.c	2007-12-19 15:29:24.000000000 -0500
+@@ -43,9 +43,6 @@
+  *      ppc4xx_map_io				arch/ppc/syslib/ppc4xx_setup.c
+  *  start_kernel				init/main.c
+  *    setup_arch				arch/ppc/kernel/setup.c
+- * #if defined(CONFIG_KGDB)
+- *      *ppc_md.kgdb_map_scc() == gen550_kgdb_map_scc
+- * #endif
+  *      *ppc_md.setup_arch == ml403_setup_arch	this file
+  *        ppc4xx_setup_arch			arch/ppc/syslib/ppc4xx_setup.c
+  *          ppc4xx_find_bridges			arch/ppc/syslib/ppc405_pci.c
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c linux-2.6.22-try2/arch/ppc/platforms/4xx/yucca.c
+--- linux-2.6.22-570/arch/ppc/platforms/4xx/yucca.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/4xx/yucca.c	2007-12-19 15:29:24.000000000 -0500
+@@ -386,7 +386,4 @@
+ 	ppc_md.get_irq = NULL;		/* Set in ppc4xx_pic_init() */
+ 
+ 	ppc_md.calibrate_decr = yucca_calibrate_decr;
+-#ifdef CONFIG_KGDB
+-	ppc_md.early_serial_map = yucca_early_serial_map;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c linux-2.6.22-try2/arch/ppc/platforms/83xx/mpc834x_sys.c
+--- linux-2.6.22-570/arch/ppc/platforms/83xx/mpc834x_sys.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/83xx/mpc834x_sys.c	2007-12-19 15:29:24.000000000 -0500
+@@ -42,11 +42,11 @@
+ #include <asm/pci-bridge.h>
+ #include <asm/mpc83xx.h>
+ #include <asm/irq.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+ 
+ #include <syslib/ppc83xx_setup.h>
++#include <syslib/gen550.h>
+ 
+ #ifndef CONFIG_PCI
+ unsigned long isa_io_base = 0;
+@@ -114,7 +114,9 @@
+ 	/* setup PCI host bridges */
+ 	mpc83xx_setup_hose();
+ #endif
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ 	mpc83xx_early_serial_map();
++#endif
+ 
+ 	/* setup the board related info for the MDIO bus */
+ 	mdata = (struct gianfar_mdio_data *) ppc_sys_get_pdata(MPC83xx_MDIO);
+@@ -334,7 +336,6 @@
+ 	ppc_md.get_rtc_time = NULL;
+ 	ppc_md.calibrate_decr = mpc83xx_calibrate_decr;
+ 
+-	ppc_md.early_serial_map = mpc83xx_early_serial_map;
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif	/* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc8540_ads.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8540_ads.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc8540_ads.c	2007-12-19 15:29:24.000000000 -0500
+@@ -43,11 +43,11 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+ 
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+ 
+ /* ************************************************************************
+  *
+@@ -77,7 +77,7 @@
+ 	mpc85xx_setup_hose();
+ #endif
+ 
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ 	mpc85xx_early_serial_map();
+ #endif
+ 
+@@ -215,9 +215,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif	/* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+-	ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif	/* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+ 
+ 	if (ppc_md.progress)
+ 		ppc_md.progress("mpc8540ads_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc8560_ads.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc8560_ads.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc8560_ads.c	2007-12-19 15:29:24.000000000 -0500
+@@ -44,7 +44,6 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <asm/cpm2.h>
+ #include <mm/mmu_decl.h>
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc85xx_cds_common.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/mpc85xx_cds_common.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/85xx/mpc85xx_cds_common.c	2007-12-19 15:29:24.000000000 -0500
+@@ -47,12 +47,12 @@
+ #include <asm/immap_85xx.h>
+ #include <asm/cpm2.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+ 
+ #include <mm/mmu_decl.h>
+ #include <syslib/cpm2_pic.h>
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+ 
+ 
+ #ifndef CONFIG_PCI
+@@ -436,7 +436,7 @@
+ 	mpc85xx_setup_hose();
+ #endif
+ 
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ 	mpc85xx_early_serial_map();
+ #endif
+ 
+@@ -590,9 +590,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+-	ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif	/* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+ 
+ 	if (ppc_md.progress)
+ 		ppc_md.progress("mpc85xx_cds_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c linux-2.6.22-try2/arch/ppc/platforms/85xx/sbc8560.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/sbc8560.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/85xx/sbc8560.c	2007-12-19 15:29:24.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/initrd.h>
+ #include <linux/module.h>
+ #include <linux/fsl_devices.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -43,14 +44,13 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <mm/mmu_decl.h>
+ 
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+ 
+-#ifdef CONFIG_SERIAL_8250
+ static void __init
+ sbc8560_early_serial_map(void)
+ {
+@@ -66,12 +66,16 @@
+         uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART0_SIZE);
+ 	uart_req.type = PORT_16650;
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+-        gen550_init(0, &uart_req);
+-#endif
+- 
++#ifdef CONFIG_SERIAL_8250
+         if (early_serial_setup(&uart_req) != 0)
+                 printk("Early serial init of port 0 failed\n");
++#endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++	gen550_init(0, &uart_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &uart_req);
++#endif
+  
+         /* Assume early_serial_setup() doesn't modify uart_req */
+ 	uart_req.line = 1;
+@@ -79,14 +83,17 @@
+         uart_req.membase = ioremap(uart_req.mapbase, MPC85xx_UART1_SIZE);
+ 	uart_req.irq = MPC85xx_IRQ_EXT10;
+  
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
+-        gen550_init(1, &uart_req);
+-#endif
+- 
++#ifdef CONFIG_SERIAL_8250
+         if (early_serial_setup(&uart_req) != 0)
+-                printk("Early serial init of port 1 failed\n");
+-}
++		printk("Early serial init of port 0 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++	gen550_init(0, &uart_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &uart_req);
++#endif
++}
+ 
+ /* ************************************************************************
+  *
+@@ -115,9 +122,7 @@
+ 	/* setup PCI host bridges */
+ 	mpc85xx_setup_hose();
+ #endif
+-#ifdef CONFIG_SERIAL_8250
+ 	sbc8560_early_serial_map();
+-#endif
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	/* Invalidate the entry we stole earlier the serial ports
+ 	 * should be properly mapped */ 
+@@ -224,9 +229,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif	/* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+-	ppc_md.early_serial_map = sbc8560_early_serial_map;
+-#endif	/* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+ 
+ 	if (ppc_md.progress)
+ 		ppc_md.progress("sbc8560_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c linux-2.6.22-try2/arch/ppc/platforms/85xx/tqm85xx.c
+--- linux-2.6.22-570/arch/ppc/platforms/85xx/tqm85xx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/85xx/tqm85xx.c	2007-12-19 15:29:24.000000000 -0500
+@@ -46,7 +46,6 @@
+ #include <asm/mpc85xx.h>
+ #include <asm/irq.h>
+ #include <asm/immap_85xx.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ #include <asm/cpm2.h>
+ #include <mm/mmu_decl.h>
+@@ -55,6 +54,7 @@
+ #include <syslib/cpm2_pic.h>
+ #include <syslib/ppc85xx_common.h>
+ #include <syslib/ppc85xx_rio.h>
++#include <syslib/gen550.h>
+ 
+ #ifndef CONFIG_PCI
+ unsigned long isa_io_base = 0;
+@@ -121,7 +121,7 @@
+ #endif
+ 
+ #ifndef CONFIG_MPC8560
+-#if defined(CONFIG_SERIAL_8250)
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ 	mpc85xx_early_serial_map();
+ #endif
+ 
+@@ -400,9 +400,6 @@
+ #if defined(CONFIG_SERIAL_8250) && defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif	/* CONFIG_SERIAL_8250 && CONFIG_SERIAL_TEXT_DEBUG */
+-#if defined(CONFIG_SERIAL_8250) && defined(CONFIG_KGDB)
+-	ppc_md.early_serial_map = mpc85xx_early_serial_map;
+-#endif	/* CONFIG_SERIAL_8250 && CONFIG_KGDB */
+ #endif /* CONFIG_MPC8560 */
+ 
+ 	if (ppc_md.progress)
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/apus_setup.c linux-2.6.22-try2/arch/ppc/platforms/apus_setup.c
+--- linux-2.6.22-570/arch/ppc/platforms/apus_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/apus_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -598,12 +598,6 @@
+ 	ciab.ddra |= (SER_DTR | SER_RTS);   /* outputs */
+ 	ciab.ddra &= ~(SER_DCD | SER_CTS | SER_DSR);  /* inputs */
+ 
+-#ifdef CONFIG_KGDB
+-	/* turn Rx interrupts on for GDB */
+-	amiga_custom.intena = IF_SETCLR | IF_RBF;
+-	ser_RTSon();
+-#endif
+-
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/chestnut.c linux-2.6.22-try2/arch/ppc/platforms/chestnut.c
+--- linux-2.6.22-570/arch/ppc/platforms/chestnut.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/chestnut.c	2007-12-19 15:29:24.000000000 -0500
+@@ -34,9 +34,9 @@
+ #include <asm/io.h>
+ #include <asm/hw_irq.h>
+ #include <asm/machdep.h>
+-#include <asm/kgdb.h>
+ #include <asm/bootinfo.h>
+ #include <asm/mv64x60.h>
++#include <syslib/gen550.h>
+ #include <platforms/chestnut.h>
+ 
+ static void __iomem *sram_base; /* Virtual addr of Internal SRAM */
+@@ -492,7 +492,7 @@
+ static void __init
+ chestnut_map_io(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	io_block_mapping(CHESTNUT_UART_BASE, CHESTNUT_UART_BASE, 0x100000,
+ 		_PAGE_IO);
+ #endif
+@@ -566,9 +566,6 @@
+ #if defined(CONFIG_SERIAL_TEXT_DEBUG)
+ 	ppc_md.progress = gen550_progress;
+ #endif
+-#if defined(CONFIG_KGDB)
+-	ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ 
+ 	if (ppc_md.progress)
+                 ppc_md.progress("chestnut_init(): exit", 0);
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/ev64260.c linux-2.6.22-try2/arch/ppc/platforms/ev64260.c
+--- linux-2.6.22-570/arch/ppc/platforms/ev64260.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/ev64260.c	2007-12-19 15:29:24.000000000 -0500
+@@ -330,7 +330,7 @@
+ 		port.iotype = UPIO_MEM;
+ 		port.flags = STD_COM_FLAGS;
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 		gen550_init(0, &port);
+ #endif
+ 
+@@ -568,7 +568,7 @@
+ 	return;
+ }
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ static void __init
+ ev64260_map_io(void)
+ {
+@@ -624,20 +624,12 @@
+ 	ppc_md.setup_io_mappings = ev64260_map_io;
+ 	ppc_md.progress = gen550_progress;
+ #endif
+-#if defined(CONFIG_KGDB)
+-	ppc_md.setup_io_mappings = ev64260_map_io;
+-	ppc_md.early_serial_map = ev64260_early_serial_map;
+-#endif
+ #elif defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ #ifdef	CONFIG_SERIAL_TEXT_DEBUG
+ 	ppc_md.setup_io_mappings = ev64260_map_io;
+ 	ppc_md.progress = mv64x60_mpsc_progress;
+ 	mv64x60_progress_init(CONFIG_MV64X60_NEW_BASE);
+ #endif	/* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef	CONFIG_KGDB
+-	ppc_md.setup_io_mappings = ev64260_map_io;
+-	ppc_md.early_serial_map = ev64260_early_serial_map;
+-#endif	/* CONFIG_KGDB */
+ 
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/hdpu.c linux-2.6.22-try2/arch/ppc/platforms/hdpu.c
+--- linux-2.6.22-570/arch/ppc/platforms/hdpu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/hdpu.c	2007-12-19 15:29:24.000000000 -0500
+@@ -281,25 +281,6 @@
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ static void __init hdpu_early_serial_map(void)
+ {
+-#ifdef	CONFIG_KGDB
+-	static char first_time = 1;
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#else
+-#error "Invalid kgdb_tty port"
+-#endif
+-
+-	if (first_time) {
+-		gt_early_mpsc_init(KGDB_PORT,
+-				   B9600 | CS8 | CREAD | HUPCL | CLOCAL);
+-		first_time = 0;
+-	}
+-
+-	return;
+-#endif
+ }
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/lopec.c linux-2.6.22-try2/arch/ppc/platforms/lopec.c
+--- linux-2.6.22-570/arch/ppc/platforms/lopec.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/lopec.c	2007-12-19 15:29:24.000000000 -0500
+@@ -32,7 +32,8 @@
+ #include <asm/mpc10x.h>
+ #include <asm/hw_irq.h>
+ #include <asm/prep_nvram.h>
+-#include <asm/kgdb.h>
++
++#include <syslib/gen550.h>
+ 
+ /*
+  * Define all of the IRQ senses and polarities.  Taken from the
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/pplus.c linux-2.6.22-try2/arch/ppc/platforms/pplus.c
+--- linux-2.6.22-570/arch/ppc/platforms/pplus.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/pplus.c	2007-12-19 15:29:24.000000000 -0500
+@@ -35,9 +35,9 @@
+ #include <asm/hawk.h>
+ #include <asm/todc.h>
+ #include <asm/bootinfo.h>
+-#include <asm/kgdb.h>
+ #include <asm/reg.h>
+ 
++#include <syslib/gen550.h>
+ #include "pplus.h"
+ 
+ #undef DUMP_DBATS
+@@ -893,9 +893,6 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	ppc_md.progress = gen550_progress;
+ #endif				/* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+-	ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ #ifdef CONFIG_SMP
+ 	smp_ops = &pplus_smp_ops;
+ #endif				/* CONFIG_SMP */
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c linux-2.6.22-try2/arch/ppc/platforms/radstone_ppc7d.c
+--- linux-2.6.22-570/arch/ppc/platforms/radstone_ppc7d.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/radstone_ppc7d.c	2007-12-19 15:29:24.000000000 -0500
+@@ -84,7 +84,7 @@
+  * Serial port code
+  *****************************************************************************/
+ 
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ static void __init ppc7d_early_serial_map(void)
+ {
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+@@ -113,10 +113,10 @@
+ 	if (early_serial_setup(&serial_req) != 0)
+ 		printk(KERN_ERR "Early serial init of port 1 failed\n");
+ #else
+-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
+ #endif
+ }
+-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */
++#endif /* CONFIG_SERIAL_TEXT_DEBUG */
+ 
+ /*****************************************************************************
+  * Low-level board support code
+@@ -1459,18 +1459,16 @@
+ 	     PPC7D_CPLD_COMS_COM4_TXEN, PPC7D_CPLD_COMS);
+ #endif /* CONFIG_SERIAL_MPSC */
+ 
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+-	ppc7d_early_serial_map();
+ #ifdef  CONFIG_SERIAL_TEXT_DEBUG
++	ppc7d_early_serial_map();
+ #if defined(CONFIG_SERIAL_MPSC_CONSOLE)
+ 	ppc_md.progress = mv64x60_mpsc_progress;
+ #elif defined(CONFIG_SERIAL_8250)
+ 	ppc_md.progress = gen550_progress;
+ #else
+-#error CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
++#error CONFIG_SERIAL_TEXT_DEBUG has no supported CONFIG_SERIAL_XXX
+ #endif /* CONFIG_SERIAL_8250 */
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#endif /* CONFIG_KGDB || CONFIG_SERIAL_TEXT_DEBUG */
+ 
+ 	/* Enable write access to user flash.  This is necessary for
+ 	 * flash probe.
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/sandpoint.c linux-2.6.22-try2/arch/ppc/platforms/sandpoint.c
+--- linux-2.6.22-570/arch/ppc/platforms/sandpoint.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/sandpoint.c	2007-12-19 15:29:24.000000000 -0500
+@@ -95,9 +95,9 @@
+ #include <asm/bootinfo.h>
+ #include <asm/mpc10x.h>
+ #include <asm/pci-bridge.h>
+-#include <asm/kgdb.h>
+ #include <asm/ppc_sys.h>
+ 
++#include <syslib/gen550.h>
+ #include "sandpoint.h"
+ 
+ /* Set non-zero if an X2 Sandpoint detected. */
+@@ -730,9 +730,6 @@
+ 	ppc_md.nvram_read_val = todc_mc146818_read_val;
+ 	ppc_md.nvram_write_val = todc_mc146818_write_val;
+ 
+-#ifdef CONFIG_KGDB
+-	ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	ppc_md.progress = gen550_progress;
+ #endif
+diff -Nurb linux-2.6.22-570/arch/ppc/platforms/spruce.c linux-2.6.22-try2/arch/ppc/platforms/spruce.c
+--- linux-2.6.22-570/arch/ppc/platforms/spruce.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/platforms/spruce.c	2007-12-19 15:29:24.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/serial.h>
+ #include <linux/tty.h>
+ #include <linux/serial_core.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/system.h>
+ #include <asm/pgtable.h>
+@@ -37,9 +38,9 @@
+ #include <asm/time.h>
+ #include <asm/todc.h>
+ #include <asm/bootinfo.h>
+-#include <asm/kgdb.h>
+ 
+ #include <syslib/cpc700.h>
++#include <syslib/gen550.h>
+ 
+ #include "spruce.h"
+ 
+@@ -178,26 +179,32 @@
+ 	serial_req.membase = (u_char *)UART0_IO_BASE;
+ 	serial_req.regshift = 0;
+ 
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+-	gen550_init(0, &serial_req);
+-#endif
+ #ifdef CONFIG_SERIAL_8250
+ 	if (early_serial_setup(&serial_req) != 0)
+ 		printk("Early serial init of port 0 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++	gen550_init(0, &serial_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &port);
++#endif
+ 
+ 	/* Assume early_serial_setup() doesn't modify serial_req */
+ 	serial_req.line = 1;
+ 	serial_req.irq = UART1_INT;
+ 	serial_req.membase = (u_char *)UART1_IO_BASE;
+ 
+-#if defined(CONFIG_KGDB) || defined(CONFIG_SERIAL_TEXT_DEBUG)
+-	gen550_init(1, &serial_req);
+-#endif
+ #ifdef CONFIG_SERIAL_8250
+ 	if (early_serial_setup(&serial_req) != 0)
+ 		printk("Early serial init of port 1 failed\n");
+ #endif
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
++	gen550_init(1, &serial_req);
++#endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &serial_req);
++#endif
+ }
+ 
+ TODC_ALLOC();
+@@ -316,7 +323,4 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+-	ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/Makefile linux-2.6.22-try2/arch/ppc/syslib/Makefile
+--- linux-2.6.22-570/arch/ppc/syslib/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -77,7 +77,6 @@
+ obj-$(CONFIG_8260_PCI9)		+= m8260_pci_erratum9.o
+ obj-$(CONFIG_CPM2)		+= cpm2_common.o cpm2_pic.o
+ ifeq ($(CONFIG_PPC_GEN550),y)
+-obj-$(CONFIG_KGDB)		+= gen550_kgdb.o gen550_dbg.o
+ obj-$(CONFIG_SERIAL_TEXT_DEBUG)	+= gen550_dbg.o
+ endif
+ ifeq ($(CONFIG_SERIAL_MPSC_CONSOLE),y)
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550.h linux-2.6.22-try2/arch/ppc/syslib/gen550.h
+--- linux-2.6.22-570/arch/ppc/syslib/gen550.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/gen550.h	2007-12-19 15:29:24.000000000 -0500
+@@ -11,4 +11,3 @@
+ 
+ extern void gen550_progress(char *, unsigned short);
+ extern void gen550_init(int, struct uart_port *);
+-extern void gen550_kgdb_map_scc(void);
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c linux-2.6.22-try2/arch/ppc/syslib/gen550_kgdb.c
+--- linux-2.6.22-570/arch/ppc/syslib/gen550_kgdb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/gen550_kgdb.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,83 +0,0 @@
+-/*
+- * Generic 16550 kgdb support intended to be useful on a variety
+- * of platforms.  To enable this support, it is necessary to set
+- * the CONFIG_GEN550 option.  Any virtual mapping of the serial
+- * port(s) to be used can be accomplished by setting
+- * ppc_md.early_serial_map to a platform-specific mapping function.
+- *
+- * Adapted from ppc4xx_kgdb.c.
+- *
+- * Author: Matt Porter <mporter@kernel.crashing.org>
+- *
+- * 2002-2004 (c) MontaVista Software, Inc.  This file is licensed under
+- * the terms of the GNU General Public License version 2.  This program
+- * is licensed "as is" without any warranty of any kind, whether express
+- * or implied.
+- */
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-
+-#include <asm/machdep.h>
+-
+-extern unsigned long serial_init(int, void *);
+-extern unsigned long serial_getc(unsigned long);
+-extern unsigned long serial_putc(unsigned long, unsigned char);
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#elif defined(CONFIG_KGDB_TTYS2)
+-#define KGDB_PORT 2
+-#elif defined(CONFIG_KGDB_TTYS3)
+-#define KGDB_PORT 3
+-#else
+-#error "invalid kgdb_tty port"
+-#endif
+-
+-static volatile unsigned int kgdb_debugport;
+-
+-void putDebugChar(unsigned char c)
+-{
+-	if (kgdb_debugport == 0)
+-		kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-
+-	serial_putc(kgdb_debugport, c);
+-}
+-
+-int getDebugChar(void)
+-{
+-	if (kgdb_debugport == 0)
+-		kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-
+-	return(serial_getc(kgdb_debugport));
+-}
+-
+-void kgdb_interruptible(int enable)
+-{
+-	return;
+-}
+-
+-void putDebugString(char* str)
+-{
+-	while (*str != '\0') {
+-		putDebugChar(*str);
+-		str++;
+-	}
+-	putDebugChar('\r');
+-	return;
+-}
+-
+-/*
+- * Note: gen550_init() must be called already on the port we are going
+- * to use.
+- */
+-void
+-gen550_kgdb_map_scc(void)
+-{
+-	printk(KERN_DEBUG "kgdb init\n");
+-	if (ppc_md.early_serial_map)
+-		ppc_md.early_serial_map();
+-	kgdb_debugport = serial_init(KGDB_PORT, NULL);
+-}
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c linux-2.6.22-try2/arch/ppc/syslib/ibm44x_common.c
+--- linux-2.6.22-570/arch/ppc/syslib/ibm44x_common.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/ibm44x_common.c	2007-12-19 15:29:24.000000000 -0500
+@@ -192,9 +192,6 @@
+ #ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	ppc_md.progress = gen550_progress;
+ #endif /* CONFIG_SERIAL_TEXT_DEBUG */
+-#ifdef CONFIG_KGDB
+-	ppc_md.kgdb_map_scc = gen550_kgdb_map_scc;
+-#endif
+ 
+ 	/*
+ 	 * The Abatron BDI JTAG debugger does not tolerate others
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60.c linux-2.6.22-try2/arch/ppc/syslib/mv64x60.c
+--- linux-2.6.22-570/arch/ppc/syslib/mv64x60.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/mv64x60.c	2007-12-19 15:29:24.000000000 -0500
+@@ -241,6 +241,12 @@
+ 		.end	= MV64x60_IRQ_SDMA_0,
+ 		.flags	= IORESOURCE_IRQ,
+ 	},
++	[4] = {
++		.name	= "mpsc 0 irq",
++		.start	= MV64x60_IRQ_MPSC_0,
++		.end	= MV64x60_IRQ_MPSC_0,
++		.flags	= IORESOURCE_IRQ,
++	},
+ };
+ 
+ static struct platform_device mpsc0_device = {
+@@ -298,6 +304,12 @@
+ 		.end	= MV64360_IRQ_SDMA_1,
+ 		.flags	= IORESOURCE_IRQ,
+ 	},
++	[4] = {
++		.name	= "mpsc 1 irq",
++		.start	= MV64360_IRQ_MPSC_1,
++		.end	= MV64360_IRQ_MPSC_1,
++		.flags	= IORESOURCE_IRQ,
++	},
+ };
+ 
+ static struct platform_device mpsc1_device = {
+@@ -1432,12 +1444,46 @@
+ static int __init
+ mv64x60_add_pds(void)
+ {
+-	return platform_add_devices(mv64x60_pd_devs,
+-		ARRAY_SIZE(mv64x60_pd_devs));
++	int i, ret = 0;
++
++	for (i = 0; i < ARRAY_SIZE(mv64x60_pd_devs); i++) {
++		if (mv64x60_pd_devs[i]) {
++			ret = platform_device_register(mv64x60_pd_devs[i]);
++		}
++		if (ret) {
++			while (--i >= 0)
++				platform_device_unregister(mv64x60_pd_devs[i]);
++			break;
++		}
++	}
++	return ret;
+ }
+ arch_initcall(mv64x60_add_pds);
+ 
+ /*
++ * mv64x60_early_get_pdev_data()
++ *
++ * Get the data associated with a platform device by name and number.
++ */
++struct platform_device * __init
++mv64x60_early_get_pdev_data(const char *name, int id, int remove)
++{
++	int i;
++	struct platform_device *pdev;
++
++	for (i = 0; i <ARRAY_SIZE(mv64x60_pd_devs); i++) {
++		if ((pdev = mv64x60_pd_devs[i]) &&
++			pdev->id == id &&
++			!strcmp(pdev->name, name)) {
++			if (remove)
++				mv64x60_pd_devs[i] = NULL;
++			return pdev;
++		}
++	}
++	return NULL;
++}
++
++/*
+  *****************************************************************************
+  *
+  *	GT64260-Specific Routines
+@@ -1770,6 +1816,11 @@
+ 		r->start = MV64x60_IRQ_SDMA_0;
+ 		r->end = MV64x60_IRQ_SDMA_0;
+ 	}
++	if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 1))
++			!= NULL) {
++		r->start = GT64260_IRQ_MPSC_1;
++		r->end = GT64260_IRQ_MPSC_1;
++	}
+ #endif
+ }
+ 
+@@ -2415,7 +2466,6 @@
+ 	.attr = {
+ 		.name = "hs_reg",
+ 		.mode = S_IRUGO | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size  = VAL_LEN_MAX,
+ 	.read  = mv64xxx_hs_reg_read,
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c linux-2.6.22-try2/arch/ppc/syslib/mv64x60_dbg.c
+--- linux-2.6.22-570/arch/ppc/syslib/mv64x60_dbg.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/mv64x60_dbg.c	2007-12-19 15:29:24.000000000 -0500
+@@ -34,7 +34,7 @@
+ void
+ mv64x60_progress_init(u32 base)
+ {
+-	mv64x60_dbg_bh.v_base = base;
++	mv64x60_dbg_bh.v_base = (void*)base;
+ 	return;
+ }
+ 
+@@ -69,53 +69,3 @@
+ 	return;
+ }
+ #endif	/* CONFIG_SERIAL_TEXT_DEBUG */
+-
+-
+-#if defined(CONFIG_KGDB)
+-
+-#if defined(CONFIG_KGDB_TTYS0)
+-#define KGDB_PORT 0
+-#elif defined(CONFIG_KGDB_TTYS1)
+-#define KGDB_PORT 1
+-#else
+-#error "Invalid kgdb_tty port"
+-#endif
+-
+-void
+-putDebugChar(unsigned char c)
+-{
+-	mv64x60_polled_putc(KGDB_PORT, (char)c);
+-}
+-
+-int
+-getDebugChar(void)
+-{
+-	unsigned char	c;
+-
+-	while (!mv64x60_polled_getc(KGDB_PORT, &c));
+-	return (int)c;
+-}
+-
+-void
+-putDebugString(char* str)
+-{
+-	while (*str != '\0') {
+-		putDebugChar(*str);
+-		str++;
+-	}
+-	putDebugChar('\r');
+-	return;
+-}
+-
+-void
+-kgdb_interruptible(int enable)
+-{
+-}
+-
+-void
+-kgdb_map_scc(void)
+-{
+-	if (ppc_md.early_serial_map)
+-		ppc_md.early_serial_map();
+-}
+-#endif	/* CONFIG_KGDB */
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c linux-2.6.22-try2/arch/ppc/syslib/ppc4xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc4xx_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/ppc4xx_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -32,7 +32,6 @@
+ #include <asm/processor.h>
+ #include <asm/machdep.h>
+ #include <asm/page.h>
+-#include <asm/kgdb.h>
+ #include <asm/ibm4xx.h>
+ #include <asm/time.h>
+ #include <asm/todc.h>
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c linux-2.6.22-try2/arch/ppc/syslib/ppc83xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc83xx_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/ppc83xx_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,12 +30,12 @@
+ #include <linux/tty.h>	/* for linux/serial_core.h */
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/time.h>
+ #include <asm/mpc83xx.h>
+ #include <asm/mmu.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+ #include <asm/delay.h>
+ #include <asm/machdep.h>
+ 
+@@ -44,6 +44,7 @@
+ #include <asm/delay.h>
+ #include <syslib/ppc83xx_pci.h>
+ #endif
++#include <syslib/gen550.h>
+ 
+ phys_addr_t immrbar;
+ 
+@@ -87,11 +88,11 @@
+ 	tb_to_us = mulhwu_scale_factor(freq / divisor, 1000000);
+ }
+ 
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ void __init
+ mpc83xx_early_serial_map(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	struct uart_port serial_req;
+ #endif
+ 	struct plat_serial8250_port *pdata;
+@@ -103,27 +104,40 @@
+ 	pdata[0].mapbase += binfo->bi_immr_base;
+ 	pdata[0].membase = ioremap(pdata[0].mapbase, 0x100);
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	memset(&serial_req, 0, sizeof (serial_req));
+ 	serial_req.iotype = UPIO_MEM;
+ 	serial_req.mapbase = pdata[0].mapbase;
+ 	serial_req.membase = pdata[0].membase;
+ 	serial_req.regshift = 0;
++	serial_req.irq = pdata[0].irq;
++	serial_req.flags = pdata[0].flags;
++	serial_req.uartclk = pdata[0].uartclk;
+ 
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	gen550_init(0, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &serial_req);
++#endif
++#endif
+ 
+ 	pdata[1].uartclk = binfo->bi_busfreq;
+ 	pdata[1].mapbase += binfo->bi_immr_base;
+ 	pdata[1].membase = ioremap(pdata[1].mapbase, 0x100);
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	/* Assume gen550_init() doesn't modify serial_req */
+ 	serial_req.mapbase = pdata[1].mapbase;
+ 	serial_req.membase = pdata[1].membase;
+ 
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	gen550_init(1, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &serial_req);
++#endif
++#endif
+ }
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c linux-2.6.22-try2/arch/ppc/syslib/ppc85xx_setup.c
+--- linux-2.6.22-570/arch/ppc/syslib/ppc85xx_setup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/ppc/syslib/ppc85xx_setup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -19,16 +19,17 @@
+ #include <linux/tty.h>	/* for linux/serial_core.h */
+ #include <linux/serial_core.h>
+ #include <linux/serial_8250.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/time.h>
+ #include <asm/mpc85xx.h>
+ #include <asm/immap_85xx.h>
+ #include <asm/mmu.h>
+ #include <asm/ppc_sys.h>
+-#include <asm/kgdb.h>
+ #include <asm/machdep.h>
+ 
+ #include <syslib/ppc85xx_setup.h>
++#include <syslib/gen550.h>
+ 
+ extern void abort(void);
+ 
+@@ -69,11 +70,11 @@
+ 	mtspr(SPRN_TCR, TCR_DIE);
+ }
+ 
+-#ifdef CONFIG_SERIAL_8250
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_KGDB_8250)
+ void __init
+ mpc85xx_early_serial_map(void)
+ {
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	struct uart_port serial_req;
+ #endif
+ 	struct plat_serial8250_port *pdata;
+@@ -85,27 +86,40 @@
+ 	pdata[0].mapbase += binfo->bi_immr_base;
+ 	pdata[0].membase = ioremap(pdata[0].mapbase, MPC85xx_UART0_SIZE);
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	memset(&serial_req, 0, sizeof (serial_req));
+ 	serial_req.iotype = UPIO_MEM;
+ 	serial_req.mapbase = pdata[0].mapbase;
+ 	serial_req.membase = pdata[0].membase;
+ 	serial_req.regshift = 0;
++	serial_req.irq = pdata[0].irq;
++	serial_req.flags = pdata[0].flags;
++	serial_req.uartclk = pdata[0].uartclk;
+ 
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	gen550_init(0, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(0, &serial_req);
++#endif
++#endif
+ 
+ 	pdata[1].uartclk = binfo->bi_busfreq;
+ 	pdata[1].mapbase += binfo->bi_immr_base;
+ 	pdata[1].membase = ioremap(pdata[1].mapbase, MPC85xx_UART0_SIZE);
+ 
+-#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB)
++#if defined(CONFIG_SERIAL_TEXT_DEBUG) || defined(CONFIG_KGDB_8250)
+ 	/* Assume gen550_init() doesn't modify serial_req */
+ 	serial_req.mapbase = pdata[1].mapbase;
+ 	serial_req.membase = pdata[1].membase;
+ 
++#ifdef CONFIG_SERIAL_TEXT_DEBUG
+ 	gen550_init(1, &serial_req);
+ #endif
++#ifdef CONFIG_KGDB_8250
++	kgdb8250_add_port(1, &serial_req);
++#endif
++#endif
+ }
+ #endif
+ 
+@@ -363,5 +377,3 @@
+ 	return;
+ }
+ #endif /* CONFIG_PCI */
+-
+-
+diff -Nurb linux-2.6.22-570/arch/s390/kernel/ipl.c linux-2.6.22-try2/arch/s390/kernel/ipl.c
+--- linux-2.6.22-570/arch/s390/kernel/ipl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/s390/kernel/ipl.c	2007-12-19 15:29:22.000000000 -0500
+@@ -314,7 +314,6 @@
+ 	.attr = {
+ 		.name = "binary_parameter",
+ 		.mode = S_IRUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = PAGE_SIZE,
+ 	.read = &ipl_parameter_read,
+@@ -338,7 +337,6 @@
+ 	.attr = {
+ 		.name = "scp_data",
+ 		.mode = S_IRUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = PAGE_SIZE,
+ 	.read = &ipl_scp_data_read,
+diff -Nurb linux-2.6.22-570/arch/sh/Kconfig.debug linux-2.6.22-try2/arch/sh/Kconfig.debug
+--- linux-2.6.22-570/arch/sh/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -78,82 +78,4 @@
+ 	  on the VM subsystem for higher order allocations. This option
+ 	  will also use IRQ stacks to compensate for the reduced stackspace.
+ 
+-config SH_KGDB
+-	bool "Include KGDB kernel debugger"
+-	select FRAME_POINTER
+-	select DEBUG_INFO
+-	help
+-	  Include in-kernel hooks for kgdb, the Linux kernel source level
+-	  debugger.  See <http://kgdb.sourceforge.net/> for more information.
+-	  Unless you are intending to debug the kernel, say N here.
+-
+-menu "KGDB configuration options"
+-	depends on SH_KGDB
+-
+-config MORE_COMPILE_OPTIONS
+-	bool "Add any additional compile options"
+-	help
+-	  If you want to add additional CFLAGS to the kernel build, enable this
+-	  option and then enter what you would like to add in the next question.
+-	  Note however that -g is already appended with the selection of KGDB.
+-
+-config COMPILE_OPTIONS
+-	string "Additional compile arguments"
+-	depends on MORE_COMPILE_OPTIONS
+-
+-config KGDB_NMI
+-	bool "Enter KGDB on NMI"
+-	default n
+-
+-config SH_KGDB_CONSOLE
+-	bool "Console messages through GDB"
+-	depends on !SERIAL_SH_SCI_CONSOLE
+-	select SERIAL_CORE_CONSOLE
+-	default n
+-
+-config KGDB_SYSRQ
+-	bool "Allow SysRq 'G' to enter KGDB"
+-	default y
+-
+-comment "Serial port setup"
+-
+-config KGDB_DEFPORT
+-	int "Port number (ttySCn)"
+-	default "1"
+-
+-config KGDB_DEFBAUD
+-	int "Baud rate"
+-	default "115200"
+-
+-choice
+-	prompt "Parity"
+-	depends on SH_KGDB
+-	default KGDB_DEFPARITY_N
+-
+-config KGDB_DEFPARITY_N
+-	bool "None"
+-
+-config KGDB_DEFPARITY_E
+-	bool "Even"
+-
+-config KGDB_DEFPARITY_O
+-	bool "Odd"
+-
+-endchoice
+-
+-choice
+-	prompt "Data bits"
+-	depends on SH_KGDB
+-	default KGDB_DEFBITS_8
+-
+-config KGDB_DEFBITS_8
+-	bool "8"
+-
+-config KGDB_DEFBITS_7
+-	bool "7"
+-
+-endchoice
+-
+-endmenu
+-
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/Makefile linux-2.6.22-try2/arch/sh/kernel/Makefile
+--- linux-2.6.22-570/arch/sh/kernel/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -15,7 +15,7 @@
+ obj-$(CONFIG_SMP)		+= smp.o
+ obj-$(CONFIG_CF_ENABLER)	+= cf-enabler.o
+ obj-$(CONFIG_SH_STANDARD_BIOS)	+= sh_bios.o
+-obj-$(CONFIG_SH_KGDB)		+= kgdb_stub.o kgdb_jmp.o
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb-jmp.o
+ obj-$(CONFIG_SH_CPU_FREQ)	+= cpufreq.o
+ obj-$(CONFIG_MODULES)		+= module.o
+ obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S linux-2.6.22-try2/arch/sh/kernel/cpu/sh3/ex.S
+--- linux-2.6.22-570/arch/sh/kernel/cpu/sh3/ex.S	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/cpu/sh3/ex.S	2007-12-19 15:29:24.000000000 -0500
+@@ -45,7 +45,7 @@
+ 	.long	exception_error	! reserved_instruction (filled by trap_init) /* 180 */
+ 	.long	exception_error	! illegal_slot_instruction (filled by trap_init) /*1A0*/
+ ENTRY(nmi_slot)
+-#if defined (CONFIG_KGDB_NMI)
++#if defined (CONFIG_KGDB)
+ 	.long	debug_enter	/* 1C0 */	! Allow trap to debugger
+ #else
+ 	.long	exception_none	/* 1C0 */	! Not implemented yet
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S linux-2.6.22-try2/arch/sh/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/sh/kernel/kgdb-jmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/sh/kernel/kgdb-jmp.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,32 @@
++#include <linux/linkage.h>
++
++ENTRY(kgdb_fault_setjmp)
++	add	#(9*4), r4
++	sts.l	pr, @-r4
++	mov.l	r15, @-r4
++	mov.l	r14, @-r4
++	mov.l	r13, @-r4
++	mov.l	r12, @-r4
++	mov.l	r11, @-r4
++	mov.l	r10, @-r4
++	mov.l	r9, @-r4
++	mov.l	r8, @-r4
++	rts
++	 mov	#0, r0
++
++ENTRY(kgdb_fault_longjmp)
++	mov.l	@r4+, r8
++	mov.l	@r4+, r9
++	mov.l	@r4+, r10
++	mov.l	@r4+, r11
++	mov.l	@r4+, r12
++	mov.l	@r4+, r13
++	mov.l	@r4+, r14
++	mov.l	@r4+, r15
++	lds.l	@r4+, pr
++	mov	r5, r0
++	tst	r0, r0
++	bf	1f
++	mov	#1, r0
++1:	rts
++	 nop
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb.c linux-2.6.22-try2/arch/sh/kernel/kgdb.c
+--- linux-2.6.22-570/arch/sh/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/sh/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,363 @@
++/*
++ * arch/sh/kernel/kgdb.c
++ *
++ * Contains SH-specific low-level support for KGDB.
++ *
++ * Containes extracts from code by Glenn Engel, Jim Kingdon,
++ * David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
++ * Amit S. Kale <akale@veritas.com>,  William Gatliff <bgat@open-widgets.com>,
++ * Ben Lee, Steve Chamberlain and Benoit Miller <fulg@iname.com>,
++ * Henry Bell <henry.bell@st.com> and Jeremy Siegel <jsiegel@mvista.com>
++ *
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2004 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <linux/linkage.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++
++#include <asm/system.h>
++#include <asm/current.h>
++#include <asm/signal.h>
++#include <asm/pgtable.h>
++#include <asm/ptrace.h>
++
++extern void per_cpu_trap_init(void);
++extern atomic_t cpu_doing_single_step;
++
++/* Function pointers for linkage */
++static struct kgdb_regs trap_registers;
++
++/* Globals. */
++char in_nmi;			/* Set during NMI to prevent reentry */
++
++/* TRA differs sh3/4 */
++#if defined(CONFIG_CPU_SH3)
++#define TRA 0xffffffd0
++#elif defined(CONFIG_CPU_SH4)
++#define TRA 0xff000020
++#endif
++
++/* Macros for single step instruction identification */
++#define OPCODE_BT(op)         (((op) & 0xff00) == 0x8900)
++#define OPCODE_BF(op)         (((op) & 0xff00) == 0x8b00)
++#define OPCODE_BTF_DISP(op)   (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \
++			      (((op) & 0x7f ) << 1))
++#define OPCODE_BFS(op)        (((op) & 0xff00) == 0x8f00)
++#define OPCODE_BTS(op)        (((op) & 0xff00) == 0x8d00)
++#define OPCODE_BRA(op)        (((op) & 0xf000) == 0xa000)
++#define OPCODE_BRA_DISP(op)   (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
++			      (((op) & 0x7ff) << 1))
++#define OPCODE_BRAF(op)       (((op) & 0xf0ff) == 0x0023)
++#define OPCODE_BRAF_REG(op)   (((op) & 0x0f00) >> 8)
++#define OPCODE_BSR(op)        (((op) & 0xf000) == 0xb000)
++#define OPCODE_BSR_DISP(op)   (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
++			      (((op) & 0x7ff) << 1))
++#define OPCODE_BSRF(op)       (((op) & 0xf0ff) == 0x0003)
++#define OPCODE_BSRF_REG(op)   (((op) >> 8) & 0xf)
++#define OPCODE_JMP(op)        (((op) & 0xf0ff) == 0x402b)
++#define OPCODE_JMP_REG(op)    (((op) >> 8) & 0xf)
++#define OPCODE_JSR(op)        (((op) & 0xf0ff) == 0x400b)
++#define OPCODE_JSR_REG(op)    (((op) >> 8) & 0xf)
++#define OPCODE_RTS(op)        ((op) == 0xb)
++#define OPCODE_RTE(op)        ((op) == 0x2b)
++
++#define SR_T_BIT_MASK           0x1
++#define STEP_OPCODE             0xc320
++#define BIOS_CALL_TRAP          0x3f
++
++/* Exception codes as per SH-4 core manual */
++#define ADDRESS_ERROR_LOAD_VEC   7
++#define ADDRESS_ERROR_STORE_VEC  8
++#define TRAP_VEC                 11
++#define INVALID_INSN_VEC         12
++#define INVALID_SLOT_VEC         13
++#define NMI_VEC                  14
++#define SERIAL_BREAK_VEC         58
++
++/* Misc static */
++static int stepped_address;
++static short stepped_opcode;
++
++/* Translate SH-3/4 exception numbers to unix-like signal values */
++static int compute_signal(const int excep_code)
++{
++	switch (excep_code) {
++	case INVALID_INSN_VEC:
++	case INVALID_SLOT_VEC:
++		return SIGILL;
++	case ADDRESS_ERROR_LOAD_VEC:
++	case ADDRESS_ERROR_STORE_VEC:
++		return SIGSEGV;
++	case SERIAL_BREAK_VEC:
++	case NMI_VEC:
++		return SIGINT;
++	default:
++		/* Act like it was a break/trap. */
++		return SIGTRAP;
++	}
++}
++
++/*
++ * Translate the registers of the system into the format that GDB wants.  Since
++ * we use a local structure to store things, instead of getting them out
++ * of pt_regs, we can just do a memcpy.
++ */
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *ign)
++{
++	memcpy(gdb_regs, &trap_registers, sizeof(trap_registers));
++}
++
++/*
++ * On SH we save: r1 (prev->thread.sp) r2 (prev->thread.pc) r4 (prev) r5 (next)
++ * r6 (next->thread.sp) r7 (next->thread.pc)
++ */
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	int count;
++
++	for (count = 0; count < 16; count++)
++		*(gdb_regs++) = 0;
++	*(gdb_regs++) = p->thread.pc;
++	*(gdb_regs++) = 0;
++	*(gdb_regs++) = 0;
++	*(gdb_regs++) = 0;
++	*(gdb_regs++) = 0;
++	*(gdb_regs++) = 0;
++	*(gdb_regs++) = 0;
++}
++
++/*
++ * Translate the registers values that GDB has given us back into the
++ * format of the system.  See the comment above about memcpy.
++ */
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *ign)
++{
++	memcpy(&trap_registers, gdb_regs, sizeof(trap_registers));
++}
++
++/* Calculate the new address for after a step */
++static short *get_step_address(void)
++{
++	short op = *(short *)trap_registers.pc;
++	long addr;
++
++	/* BT */
++	if (OPCODE_BT(op)) {
++		if (trap_registers.sr & SR_T_BIT_MASK)
++			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++		else
++			addr = trap_registers.pc + 2;
++	}
++
++	/* BTS */
++	else if (OPCODE_BTS(op)) {
++		if (trap_registers.sr & SR_T_BIT_MASK)
++			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++		else
++			addr = trap_registers.pc + 4;	/* Not in delay slot */
++	}
++
++	/* BF */
++	else if (OPCODE_BF(op)) {
++		if (!(trap_registers.sr & SR_T_BIT_MASK))
++			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++		else
++			addr = trap_registers.pc + 2;
++	}
++
++	/* BFS */
++	else if (OPCODE_BFS(op)) {
++		if (!(trap_registers.sr & SR_T_BIT_MASK))
++			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
++		else
++			addr = trap_registers.pc + 4;	/* Not in delay slot */
++	}
++
++	/* BRA */
++	else if (OPCODE_BRA(op))
++		addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op);
++
++	/* BRAF */
++	else if (OPCODE_BRAF(op))
++		addr = trap_registers.pc + 4
++		    + trap_registers.regs[OPCODE_BRAF_REG(op)];
++
++	/* BSR */
++	else if (OPCODE_BSR(op))
++		addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op);
++
++	/* BSRF */
++	else if (OPCODE_BSRF(op))
++		addr = trap_registers.pc + 4
++		    + trap_registers.regs[OPCODE_BSRF_REG(op)];
++
++	/* JMP */
++	else if (OPCODE_JMP(op))
++		addr = trap_registers.regs[OPCODE_JMP_REG(op)];
++
++	/* JSR */
++	else if (OPCODE_JSR(op))
++		addr = trap_registers.regs[OPCODE_JSR_REG(op)];
++
++	/* RTS */
++	else if (OPCODE_RTS(op))
++		addr = trap_registers.pr;
++
++	/* RTE */
++	else if (OPCODE_RTE(op))
++		addr = trap_registers.regs[15];
++
++	/* Other */
++	else
++		addr = trap_registers.pc + 2;
++
++	kgdb_flush_icache_range(addr, addr + 2);
++	return (short *)addr;
++}
++
++/* The command loop, read and act on requests */
++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
++			       char *remcom_in_buffer, char *remcom_out_buffer,
++			       struct pt_regs *ign)
++{
++	unsigned long addr;
++	char *ptr = &remcom_in_buffer[1];
++
++	/* Examine first char of buffer to see what we need to do */
++	switch (remcom_in_buffer[0]) {
++	case 'c':		/* Continue at address AA..AA (optional) */
++	case 's':		/* Step one instruction from AA..AA */
++		/* Try to read optional parameter, PC unchanged if none */
++		if (kgdb_hex2long(&ptr, &addr))
++			trap_registers.pc = addr;
++
++		atomic_set(&cpu_doing_single_step, -1);
++		if (remcom_in_buffer[0] == 's') {
++			/* Replace the instruction immediately after the
++			 * current instruction (i.e. next in the expected
++			 * flow of control) with a trap instruction, so that
++			 * returning will cause only a single instruction to
++			 * be executed. Note that this model is slightly
++			 * broken for instructions with delay slots
++			 * (e.g. B[TF]S, BSR, BRA etc), where both the branch
++			 * and the instruction in the delay slot will be
++			 * executed.
++			 */
++			/* Determine where the target instruction will send
++			 * us to */
++			unsigned short *next_addr = get_step_address();
++			stepped_address = (int)next_addr;
++
++			/* Replace it */
++			stepped_opcode = *(short *)next_addr;
++			*next_addr = STEP_OPCODE;
++
++			/* Flush and return */
++			kgdb_flush_icache_range((long)next_addr,
++						(long)next_addr + 2);
++			if (kgdb_contthread)
++				atomic_set(&cpu_doing_single_step,
++					   smp_processor_id());
++		}
++		return 0;
++	}
++	return -1;
++}
++
++/*
++ * When an exception has occured, we are called.  We need to set things
++ * up so that we can call kgdb_handle_exception to handle requests from
++ * the remote GDB.
++ */
++void kgdb_exception_handler(struct pt_regs *regs)
++{
++	int excep_code, vbr_val;
++	int count;
++
++	/* Copy kernel regs (from stack) */
++	for (count = 0; count < 16; count++)
++		trap_registers.regs[count] = regs->regs[count];
++	trap_registers.pc = regs->pc;
++	trap_registers.pr = regs->pr;
++	trap_registers.sr = regs->sr;
++	trap_registers.gbr = regs->gbr;
++	trap_registers.mach = regs->mach;
++	trap_registers.macl = regs->macl;
++
++	__asm__ __volatile__("stc vbr, %0":"=r"(vbr_val));
++	trap_registers.vbr = vbr_val;
++
++	/* Get the execption code. */
++	__asm__ __volatile__("stc r2_bank, %0":"=r"(excep_code));
++
++	excep_code >>= 5;
++
++	/* If we got an NMI, and KGDB is not yet initialized, call
++	 * breakpoint() to try and initialize everything for us. */
++	if (excep_code == NMI_VEC && !kgdb_initialized) {
++		breakpoint();
++		return;
++	}
++
++	/* TRAP_VEC exception indicates a software trap inserted in place of
++	 * code by GDB so back up PC by one instruction, as this instruction
++	 * will later be replaced by its original one.  Do NOT do this for
++	 * trap 0xff, since that indicates a compiled-in breakpoint which
++	 * will not be replaced (and we would retake the trap forever) */
++	if (excep_code == TRAP_VEC &&
++	    (*(volatile unsigned long *)TRA != (0xff << 2)))
++		trap_registers.pc -= 2;
++
++	/* If we have been single-stepping, put back the old instruction.
++	 * We use stepped_address in case we have stopped more than one
++	 * instruction away. */
++	if (stepped_opcode != 0) {
++		*(short *)stepped_address = stepped_opcode;
++		kgdb_flush_icache_range(stepped_address, stepped_address + 2);
++	}
++	stepped_opcode = 0;
++
++	/* Call the stub to do the processing.  Note that not everything we
++	 * need to send back and forth lives in pt_regs. */
++	kgdb_handle_exception(excep_code, compute_signal(excep_code), 0, regs);
++
++	/* Copy back the (maybe modified) registers */
++	for (count = 0; count < 16; count++)
++		regs->regs[count] = trap_registers.regs[count];
++	regs->pc = trap_registers.pc;
++	regs->pr = trap_registers.pr;
++	regs->sr = trap_registers.sr;
++	regs->gbr = trap_registers.gbr;
++	regs->mach = trap_registers.mach;
++	regs->macl = trap_registers.macl;
++
++	vbr_val = trap_registers.vbr;
++	__asm__ __volatile__("ldc %0, vbr": :"r"(vbr_val));
++}
++
++int __init kgdb_arch_init(void)
++{
++	per_cpu_trap_init();
++
++	return 0;
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++#ifdef CONFIG_CPU_LITTLE_ENDIAN
++	.gdb_bpt_instr = {0xff, 0xc3},
++#else
++	.gdb_bpt_instr = {0xc3, 0xff},
++#endif
++};
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S linux-2.6.22-try2/arch/sh/kernel/kgdb_jmp.S
+--- linux-2.6.22-570/arch/sh/kernel/kgdb_jmp.S	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/kgdb_jmp.S	1969-12-31 19:00:00.000000000 -0500
+@@ -1,33 +0,0 @@
+-#include <linux/linkage.h>
+-
+-ENTRY(setjmp)
+-	add	#(9*4), r4
+-	sts.l	pr, @-r4
+-	mov.l	r15, @-r4
+-	mov.l	r14, @-r4
+-	mov.l	r13, @-r4
+-	mov.l	r12, @-r4
+-	mov.l	r11, @-r4
+-	mov.l	r10, @-r4
+-	mov.l	r9, @-r4
+-	mov.l	r8, @-r4
+-	rts
+-	 mov	#0, r0
+-
+-ENTRY(longjmp)
+-	mov.l	@r4+, r8
+-	mov.l	@r4+, r9
+-	mov.l	@r4+, r10
+-	mov.l	@r4+, r11
+-	mov.l	@r4+, r12
+-	mov.l	@r4+, r13
+-	mov.l	@r4+, r14
+-	mov.l	@r4+, r15
+-	lds.l	@r4+, pr
+-	mov	r5, r0
+-	tst	r0, r0
+-	bf	1f
+-	mov	#1, r0	! in case val==0
+-1:	rts
+-	 nop
+-
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c linux-2.6.22-try2/arch/sh/kernel/kgdb_stub.c
+--- linux-2.6.22-570/arch/sh/kernel/kgdb_stub.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/kgdb_stub.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1093 +0,0 @@
+-/*
+- * May be copied or modified under the terms of the GNU General Public
+- * License.  See linux/COPYING for more information.
+- *
+- * Contains extracts from code by Glenn Engel, Jim Kingdon,
+- * David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
+- * Amit S. Kale <akale@veritas.com>,  William Gatliff <bgat@open-widgets.com>,
+- * Ben Lee, Steve Chamberlain and Benoit Miller <fulg@iname.com>.
+- *
+- * This version by Henry Bell <henry.bell@st.com>
+- * Minor modifications by Jeremy Siegel <jsiegel@mvista.com>
+- *
+- * Contains low-level support for remote debug using GDB.
+- *
+- * To enable debugger support, two things need to happen. A call to
+- * set_debug_traps() is necessary in order to allow any breakpoints
+- * or error conditions to be properly intercepted and reported to gdb.
+- * A breakpoint also needs to be generated to begin communication.  This
+- * is most easily accomplished by a call to breakpoint() which does
+- * a trapa if the initialisation phase has been successfully completed.
+- *
+- * In this case, set_debug_traps() is not used to "take over" exceptions;
+- * other kernel code is modified instead to enter the kgdb functions here
+- * when appropriate (see entry.S for breakpoint traps and NMI interrupts,
+- * see traps.c for kernel error exceptions).
+- *
+- * The following gdb commands are supported:
+- *
+- *    Command       Function                               Return value
+- *
+- *    g             return the value of the CPU registers  hex data or ENN
+- *    G             set the value of the CPU registers     OK or ENN
+- *
+- *    mAA..AA,LLLL  Read LLLL bytes at address AA..AA      hex data or ENN
+- *    MAA..AA,LLLL: Write LLLL bytes at address AA.AA      OK or ENN
+- *    XAA..AA,LLLL: Same, but data is binary (not hex)     OK or ENN
+- *
+- *    c             Resume at current address              SNN   ( signal NN)
+- *    cAA..AA       Continue at address AA..AA             SNN
+- *    CNN;          Resume at current address with signal  SNN
+- *    CNN;AA..AA    Resume at address AA..AA with signal   SNN
+- *
+- *    s             Step one instruction                   SNN
+- *    sAA..AA       Step one instruction from AA..AA       SNN
+- *    SNN;          Step one instruction with signal       SNN
+- *    SNNAA..AA     Step one instruction from AA..AA w/NN  SNN
+- *
+- *    k             kill (Detach GDB)
+- *
+- *    d             Toggle debug flag
+- *    D             Detach GDB
+- *
+- *    Hct           Set thread t for operations,           OK or ENN
+- *                  c = 'c' (step, cont), c = 'g' (other
+- *                  operations)
+- *
+- *    qC            Query current thread ID                QCpid
+- *    qfThreadInfo  Get list of current threads (first)    m<id>
+- *    qsThreadInfo   "    "  "     "      "   (subsequent)
+- *    qOffsets      Get section offsets                  Text=x;Data=y;Bss=z
+- *
+- *    TXX           Find if thread XX is alive             OK or ENN
+- *    ?             What was the last sigval ?             SNN   (signal NN)
+- *    O             Output to GDB console
+- *
+- * Remote communication protocol.
+- *
+- *    A debug packet whose contents are <data> is encapsulated for
+- *    transmission in the form:
+- *
+- *       $ <data> # CSUM1 CSUM2
+- *
+- *       <data> must be ASCII alphanumeric and cannot include characters
+- *       '$' or '#'.  If <data> starts with two characters followed by
+- *       ':', then the existing stubs interpret this as a sequence number.
+- *
+- *       CSUM1 and CSUM2 are ascii hex representation of an 8-bit
+- *       checksum of <data>, the most significant nibble is sent first.
+- *       the hex digits 0-9,a-f are used.
+- *
+- *    Receiver responds with:
+- *
+- *       +       - if CSUM is correct and ready for next packet
+- *       -       - if CSUM is incorrect
+- *
+- * Responses can be run-length encoded to save space.  A '*' means that
+- * the next character is an ASCII encoding giving a repeat count which
+- * stands for that many repetitions of the character preceding the '*'.
+- * The encoding is n+29, yielding a printable character where n >=3
+- * (which is where RLE starts to win).  Don't use an n > 126.
+- *
+- * So "0* " means the same as "0000".
+- */
+-
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/sched.h>
+-#include <linux/smp.h>
+-#include <linux/spinlock.h>
+-#include <linux/delay.h>
+-#include <linux/linkage.h>
+-#include <linux/init.h>
+-#include <linux/console.h>
+-#include <linux/sysrq.h>
+-#include <asm/system.h>
+-#include <asm/cacheflush.h>
+-#include <asm/current.h>
+-#include <asm/signal.h>
+-#include <asm/pgtable.h>
+-#include <asm/ptrace.h>
+-#include <asm/kgdb.h>
+-#include <asm/io.h>
+-
+-/* Function pointers for linkage */
+-kgdb_debug_hook_t *kgdb_debug_hook;
+-kgdb_bus_error_hook_t *kgdb_bus_err_hook;
+-
+-int (*kgdb_getchar)(void);
+-void (*kgdb_putchar)(int);
+-
+-static void put_debug_char(int c)
+-{
+-	if (!kgdb_putchar)
+-		return;
+-	(*kgdb_putchar)(c);
+-}
+-static int get_debug_char(void)
+-{
+-	if (!kgdb_getchar)
+-		return -1;
+-	return (*kgdb_getchar)();
+-}
+-
+-/* Num chars in in/out bound buffers, register packets need NUMREGBYTES * 2 */
+-#define BUFMAX 1024
+-#define NUMREGBYTES (MAXREG*4)
+-#define OUTBUFMAX (NUMREGBYTES*2+512)
+-
+-enum regs {
+-	R0 = 0, R1,  R2,  R3,   R4,   R5,  R6, R7,
+-	R8, R9, R10, R11, R12,  R13,  R14, R15,
+-	PC, PR, GBR, VBR, MACH, MACL, SR,
+-	/*  */
+-	MAXREG
+-};
+-
+-static unsigned int registers[MAXREG];
+-struct kgdb_regs trap_registers;
+-
+-char kgdb_in_gdb_mode;
+-char in_nmi;			/* Set during NMI to prevent reentry */
+-int kgdb_nofault;		/* Boolean to ignore bus errs (i.e. in GDB) */
+-int kgdb_enabled = 1;		/* Default to enabled, cmdline can disable */
+-
+-/* Exposed for user access */
+-struct task_struct *kgdb_current;
+-unsigned int kgdb_g_imask;
+-int kgdb_trapa_val;
+-int kgdb_excode;
+-
+-/* Default values for SCI (can override via kernel args in setup.c) */
+-#ifndef CONFIG_KGDB_DEFPORT
+-#define CONFIG_KGDB_DEFPORT 1
+-#endif
+-
+-#ifndef CONFIG_KGDB_DEFBAUD
+-#define CONFIG_KGDB_DEFBAUD 115200
+-#endif
+-
+-#if defined(CONFIG_KGDB_DEFPARITY_E)
+-#define CONFIG_KGDB_DEFPARITY 'E'
+-#elif defined(CONFIG_KGDB_DEFPARITY_O)
+-#define CONFIG_KGDB_DEFPARITY 'O'
+-#else /* CONFIG_KGDB_DEFPARITY_N */
+-#define CONFIG_KGDB_DEFPARITY 'N'
+-#endif
+-
+-#ifdef CONFIG_KGDB_DEFBITS_7
+-#define CONFIG_KGDB_DEFBITS '7'
+-#else /* CONFIG_KGDB_DEFBITS_8 */
+-#define CONFIG_KGDB_DEFBITS '8'
+-#endif
+-
+-/* SCI/UART settings, used in kgdb_console_setup() */
+-int  kgdb_portnum = CONFIG_KGDB_DEFPORT;
+-int  kgdb_baud = CONFIG_KGDB_DEFBAUD;
+-char kgdb_parity = CONFIG_KGDB_DEFPARITY;
+-char kgdb_bits = CONFIG_KGDB_DEFBITS;
+-
+-/* Jump buffer for setjmp/longjmp */
+-static jmp_buf rem_com_env;
+-
+-/* TRA differs sh3/4 */
+-#if defined(CONFIG_CPU_SH3)
+-#define TRA 0xffffffd0
+-#elif defined(CONFIG_CPU_SH4)
+-#define TRA 0xff000020
+-#endif
+-
+-/* Macros for single step instruction identification */
+-#define OPCODE_BT(op)         (((op) & 0xff00) == 0x8900)
+-#define OPCODE_BF(op)         (((op) & 0xff00) == 0x8b00)
+-#define OPCODE_BTF_DISP(op)   (((op) & 0x80) ? (((op) | 0xffffff80) << 1) : \
+-			      (((op) & 0x7f ) << 1))
+-#define OPCODE_BFS(op)        (((op) & 0xff00) == 0x8f00)
+-#define OPCODE_BTS(op)        (((op) & 0xff00) == 0x8d00)
+-#define OPCODE_BRA(op)        (((op) & 0xf000) == 0xa000)
+-#define OPCODE_BRA_DISP(op)   (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
+-			      (((op) & 0x7ff) << 1))
+-#define OPCODE_BRAF(op)       (((op) & 0xf0ff) == 0x0023)
+-#define OPCODE_BRAF_REG(op)   (((op) & 0x0f00) >> 8)
+-#define OPCODE_BSR(op)        (((op) & 0xf000) == 0xb000)
+-#define OPCODE_BSR_DISP(op)   (((op) & 0x800) ? (((op) | 0xfffff800) << 1) : \
+-			      (((op) & 0x7ff) << 1))
+-#define OPCODE_BSRF(op)       (((op) & 0xf0ff) == 0x0003)
+-#define OPCODE_BSRF_REG(op)   (((op) >> 8) & 0xf)
+-#define OPCODE_JMP(op)        (((op) & 0xf0ff) == 0x402b)
+-#define OPCODE_JMP_REG(op)    (((op) >> 8) & 0xf)
+-#define OPCODE_JSR(op)        (((op) & 0xf0ff) == 0x400b)
+-#define OPCODE_JSR_REG(op)    (((op) >> 8) & 0xf)
+-#define OPCODE_RTS(op)        ((op) == 0xb)
+-#define OPCODE_RTE(op)        ((op) == 0x2b)
+-
+-#define SR_T_BIT_MASK           0x1
+-#define STEP_OPCODE             0xc320
+-#define BIOS_CALL_TRAP          0x3f
+-
+-/* Exception codes as per SH-4 core manual */
+-#define ADDRESS_ERROR_LOAD_VEC   7
+-#define ADDRESS_ERROR_STORE_VEC  8
+-#define TRAP_VEC                 11
+-#define INVALID_INSN_VEC         12
+-#define INVALID_SLOT_VEC         13
+-#define NMI_VEC                  14
+-#define USER_BREAK_VEC           15
+-#define SERIAL_BREAK_VEC         58
+-
+-/* Misc static */
+-static int stepped_address;
+-static short stepped_opcode;
+-static char in_buffer[BUFMAX];
+-static char out_buffer[OUTBUFMAX];
+-
+-static void kgdb_to_gdb(const char *s);
+-
+-/* Convert ch to hex */
+-static int hex(const char ch)
+-{
+-	if ((ch >= 'a') && (ch <= 'f'))
+-		return (ch - 'a' + 10);
+-	if ((ch >= '0') && (ch <= '9'))
+-		return (ch - '0');
+-	if ((ch >= 'A') && (ch <= 'F'))
+-		return (ch - 'A' + 10);
+-	return (-1);
+-}
+-
+-/* Convert the memory pointed to by mem into hex, placing result in buf.
+-   Returns a pointer to the last char put in buf (null) */
+-static char *mem_to_hex(const char *mem, char *buf, const int count)
+-{
+-	int i;
+-	int ch;
+-	unsigned short s_val;
+-	unsigned long l_val;
+-
+-	/* Check for 16 or 32 */
+-	if (count == 2 && ((long) mem & 1) == 0) {
+-		s_val = *(unsigned short *) mem;
+-		mem = (char *) &s_val;
+-	} else if (count == 4 && ((long) mem & 3) == 0) {
+-		l_val = *(unsigned long *) mem;
+-		mem = (char *) &l_val;
+-	}
+-	for (i = 0; i < count; i++) {
+-		ch = *mem++;
+-		*buf++ = highhex(ch);
+-		*buf++ = lowhex(ch);
+-	}
+-	*buf = 0;
+-	return (buf);
+-}
+-
+-/* Convert the hex array pointed to by buf into binary, to be placed in mem.
+-   Return a pointer to the character after the last byte written */
+-static char *hex_to_mem(const char *buf, char *mem, const int count)
+-{
+-	int i;
+-	unsigned char ch;
+-
+-	for (i = 0; i < count; i++) {
+-		ch = hex(*buf++) << 4;
+-		ch = ch + hex(*buf++);
+-		*mem++ = ch;
+-	}
+-	return (mem);
+-}
+-
+-/* While finding valid hex chars, convert to an integer, then return it */
+-static int hex_to_int(char **ptr, int *int_value)
+-{
+-	int num_chars = 0;
+-	int hex_value;
+-
+-	*int_value = 0;
+-
+-	while (**ptr) {
+-		hex_value = hex(**ptr);
+-		if (hex_value >= 0) {
+-			*int_value = (*int_value << 4) | hex_value;
+-			num_chars++;
+-		} else
+-			break;
+-		(*ptr)++;
+-	}
+-	return num_chars;
+-}
+-
+-/*  Copy the binary array pointed to by buf into mem.  Fix $, #,
+-    and 0x7d escaped with 0x7d.  Return a pointer to the character
+-    after the last byte written. */
+-static char *ebin_to_mem(const char *buf, char *mem, int count)
+-{
+-	for (; count > 0; count--, buf++) {
+-		if (*buf == 0x7d)
+-			*mem++ = *(++buf) ^ 0x20;
+-		else
+-			*mem++ = *buf;
+-	}
+-	return mem;
+-}
+-
+-/* Pack a hex byte */
+-static char *pack_hex_byte(char *pkt, int byte)
+-{
+-	*pkt++ = hexchars[(byte >> 4) & 0xf];
+-	*pkt++ = hexchars[(byte & 0xf)];
+-	return pkt;
+-}
+-
+-/* Scan for the start char '$', read the packet and check the checksum */
+-static void get_packet(char *buffer, int buflen)
+-{
+-	unsigned char checksum;
+-	unsigned char xmitcsum;
+-	int i;
+-	int count;
+-	char ch;
+-
+-	do {
+-		/* Ignore everything until the start character */
+-		while ((ch = get_debug_char()) != '$');
+-
+-		checksum = 0;
+-		xmitcsum = -1;
+-		count = 0;
+-
+-		/* Now, read until a # or end of buffer is found */
+-		while (count < (buflen - 1)) {
+-			ch = get_debug_char();
+-
+-			if (ch == '#')
+-				break;
+-
+-			checksum = checksum + ch;
+-			buffer[count] = ch;
+-			count = count + 1;
+-		}
+-
+-		buffer[count] = 0;
+-
+-		/* Continue to read checksum following # */
+-		if (ch == '#') {
+-			xmitcsum = hex(get_debug_char()) << 4;
+-			xmitcsum += hex(get_debug_char());
+-
+-			/* Checksum */
+-			if (checksum != xmitcsum)
+-				put_debug_char('-');	/* Failed checksum */
+-			else {
+-				/* Ack successful transfer */
+-				put_debug_char('+');
+-
+-				/* If a sequence char is present, reply
+-				   the sequence ID */
+-				if (buffer[2] == ':') {
+-					put_debug_char(buffer[0]);
+-					put_debug_char(buffer[1]);
+-
+-					/* Remove sequence chars from buffer */
+-					count = strlen(buffer);
+-					for (i = 3; i <= count; i++)
+-						buffer[i - 3] = buffer[i];
+-				}
+-			}
+-		}
+-	}
+-	while (checksum != xmitcsum);	/* Keep trying while we fail */
+-}
+-
+-/* Send the packet in the buffer with run-length encoding */
+-static void put_packet(char *buffer)
+-{
+-	int checksum;
+-	char *src;
+-	int runlen;
+-	int encode;
+-
+-	do {
+-		src = buffer;
+-		put_debug_char('$');
+-		checksum = 0;
+-
+-		/* Continue while we still have chars left */
+-		while (*src) {
+-			/* Check for runs up to 99 chars long */
+-			for (runlen = 1; runlen < 99; runlen++) {
+-				if (src[0] != src[runlen])
+-					break;
+-			}
+-
+-			if (runlen > 3) {
+-				/* Got a useful amount, send encoding */
+-				encode = runlen + ' ' - 4;
+-				put_debug_char(*src);   checksum += *src;
+-				put_debug_char('*');    checksum += '*';
+-				put_debug_char(encode); checksum += encode;
+-				src += runlen;
+-			} else {
+-				/* Otherwise just send the current char */
+-				put_debug_char(*src);   checksum += *src;
+-				src += 1;
+-			}
+-		}
+-
+-		/* '#' Separator, put high and low components of checksum */
+-		put_debug_char('#');
+-		put_debug_char(highhex(checksum));
+-		put_debug_char(lowhex(checksum));
+-	}
+-	while ((get_debug_char()) != '+');	/* While no ack */
+-}
+-
+-/* A bus error has occurred - perform a longjmp to return execution and
+-   allow handling of the error */
+-static void kgdb_handle_bus_error(void)
+-{
+-	longjmp(rem_com_env, 1);
+-}
+-
+-/* Translate SH-3/4 exception numbers to unix-like signal values */
+-static int compute_signal(const int excep_code)
+-{
+-	int sigval;
+-
+-	switch (excep_code) {
+-
+-	case INVALID_INSN_VEC:
+-	case INVALID_SLOT_VEC:
+-		sigval = SIGILL;
+-		break;
+-	case ADDRESS_ERROR_LOAD_VEC:
+-	case ADDRESS_ERROR_STORE_VEC:
+-		sigval = SIGSEGV;
+-		break;
+-
+-	case SERIAL_BREAK_VEC:
+-	case NMI_VEC:
+-		sigval = SIGINT;
+-		break;
+-
+-	case USER_BREAK_VEC:
+-	case TRAP_VEC:
+-		sigval = SIGTRAP;
+-		break;
+-
+-	default:
+-		sigval = SIGBUS;	/* "software generated" */
+-		break;
+-	}
+-
+-	return (sigval);
+-}
+-
+-/* Make a local copy of the registers passed into the handler (bletch) */
+-static void kgdb_regs_to_gdb_regs(const struct kgdb_regs *regs,
+-				  int *gdb_regs)
+-{
+-	gdb_regs[R0] = regs->regs[R0];
+-	gdb_regs[R1] = regs->regs[R1];
+-	gdb_regs[R2] = regs->regs[R2];
+-	gdb_regs[R3] = regs->regs[R3];
+-	gdb_regs[R4] = regs->regs[R4];
+-	gdb_regs[R5] = regs->regs[R5];
+-	gdb_regs[R6] = regs->regs[R6];
+-	gdb_regs[R7] = regs->regs[R7];
+-	gdb_regs[R8] = regs->regs[R8];
+-	gdb_regs[R9] = regs->regs[R9];
+-	gdb_regs[R10] = regs->regs[R10];
+-	gdb_regs[R11] = regs->regs[R11];
+-	gdb_regs[R12] = regs->regs[R12];
+-	gdb_regs[R13] = regs->regs[R13];
+-	gdb_regs[R14] = regs->regs[R14];
+-	gdb_regs[R15] = regs->regs[R15];
+-	gdb_regs[PC] = regs->pc;
+-	gdb_regs[PR] = regs->pr;
+-	gdb_regs[GBR] = regs->gbr;
+-	gdb_regs[MACH] = regs->mach;
+-	gdb_regs[MACL] = regs->macl;
+-	gdb_regs[SR] = regs->sr;
+-	gdb_regs[VBR] = regs->vbr;
+-}
+-
+-/* Copy local gdb registers back to kgdb regs, for later copy to kernel */
+-static void gdb_regs_to_kgdb_regs(const int *gdb_regs,
+-				  struct kgdb_regs *regs)
+-{
+-	regs->regs[R0] = gdb_regs[R0];
+-	regs->regs[R1] = gdb_regs[R1];
+-	regs->regs[R2] = gdb_regs[R2];
+-	regs->regs[R3] = gdb_regs[R3];
+-	regs->regs[R4] = gdb_regs[R4];
+-	regs->regs[R5] = gdb_regs[R5];
+-	regs->regs[R6] = gdb_regs[R6];
+-	regs->regs[R7] = gdb_regs[R7];
+-	regs->regs[R8] = gdb_regs[R8];
+-	regs->regs[R9] = gdb_regs[R9];
+-	regs->regs[R10] = gdb_regs[R10];
+-	regs->regs[R11] = gdb_regs[R11];
+-	regs->regs[R12] = gdb_regs[R12];
+-	regs->regs[R13] = gdb_regs[R13];
+-	regs->regs[R14] = gdb_regs[R14];
+-	regs->regs[R15] = gdb_regs[R15];
+-	regs->pc = gdb_regs[PC];
+-	regs->pr = gdb_regs[PR];
+-	regs->gbr = gdb_regs[GBR];
+-	regs->mach = gdb_regs[MACH];
+-	regs->macl = gdb_regs[MACL];
+-	regs->sr = gdb_regs[SR];
+-	regs->vbr = gdb_regs[VBR];
+-}
+-
+-/* Calculate the new address for after a step */
+-static short *get_step_address(void)
+-{
+-	short op = *(short *) trap_registers.pc;
+-	long addr;
+-
+-	/* BT */
+-	if (OPCODE_BT(op)) {
+-		if (trap_registers.sr & SR_T_BIT_MASK)
+-			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+-		else
+-			addr = trap_registers.pc + 2;
+-	}
+-
+-	/* BTS */
+-	else if (OPCODE_BTS(op)) {
+-		if (trap_registers.sr & SR_T_BIT_MASK)
+-			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+-		else
+-			addr = trap_registers.pc + 4;	/* Not in delay slot */
+-	}
+-
+-	/* BF */
+-	else if (OPCODE_BF(op)) {
+-		if (!(trap_registers.sr & SR_T_BIT_MASK))
+-			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+-		else
+-			addr = trap_registers.pc + 2;
+-	}
+-
+-	/* BFS */
+-	else if (OPCODE_BFS(op)) {
+-		if (!(trap_registers.sr & SR_T_BIT_MASK))
+-			addr = trap_registers.pc + 4 + OPCODE_BTF_DISP(op);
+-		else
+-			addr = trap_registers.pc + 4;	/* Not in delay slot */
+-	}
+-
+-	/* BRA */
+-	else if (OPCODE_BRA(op))
+-		addr = trap_registers.pc + 4 + OPCODE_BRA_DISP(op);
+-
+-	/* BRAF */
+-	else if (OPCODE_BRAF(op))
+-		addr = trap_registers.pc + 4
+-		    + trap_registers.regs[OPCODE_BRAF_REG(op)];
+-
+-	/* BSR */
+-	else if (OPCODE_BSR(op))
+-		addr = trap_registers.pc + 4 + OPCODE_BSR_DISP(op);
+-
+-	/* BSRF */
+-	else if (OPCODE_BSRF(op))
+-		addr = trap_registers.pc + 4
+-		    + trap_registers.regs[OPCODE_BSRF_REG(op)];
+-
+-	/* JMP */
+-	else if (OPCODE_JMP(op))
+-		addr = trap_registers.regs[OPCODE_JMP_REG(op)];
+-
+-	/* JSR */
+-	else if (OPCODE_JSR(op))
+-		addr = trap_registers.regs[OPCODE_JSR_REG(op)];
+-
+-	/* RTS */
+-	else if (OPCODE_RTS(op))
+-		addr = trap_registers.pr;
+-
+-	/* RTE */
+-	else if (OPCODE_RTE(op))
+-		addr = trap_registers.regs[15];
+-
+-	/* Other */
+-	else
+-		addr = trap_registers.pc + 2;
+-
+-	kgdb_flush_icache_range(addr, addr + 2);
+-	return (short *) addr;
+-}
+-
+-/* Set up a single-step.  Replace the instruction immediately after the
+-   current instruction (i.e. next in the expected flow of control) with a
+-   trap instruction, so that returning will cause only a single instruction
+-   to be executed. Note that this model is slightly broken for instructions
+-   with delay slots (e.g. B[TF]S, BSR, BRA etc), where both the branch
+-   and the instruction in the delay slot will be executed. */
+-static void do_single_step(void)
+-{
+-	unsigned short *addr = 0;
+-
+-	/* Determine where the target instruction will send us to */
+-	addr = get_step_address();
+-	stepped_address = (int)addr;
+-
+-	/* Replace it */
+-	stepped_opcode = *(short *)addr;
+-	*addr = STEP_OPCODE;
+-
+-	/* Flush and return */
+-	kgdb_flush_icache_range((long) addr, (long) addr + 2);
+-	return;
+-}
+-
+-/* Undo a single step */
+-static void undo_single_step(void)
+-{
+-	/* If we have stepped, put back the old instruction */
+-	/* Use stepped_address in case we stopped elsewhere */
+-	if (stepped_opcode != 0) {
+-		*(short*)stepped_address = stepped_opcode;
+-		kgdb_flush_icache_range(stepped_address, stepped_address + 2);
+-	}
+-	stepped_opcode = 0;
+-}
+-
+-/* Send a signal message */
+-static void send_signal_msg(const int signum)
+-{
+-	out_buffer[0] = 'S';
+-	out_buffer[1] = highhex(signum);
+-	out_buffer[2] = lowhex(signum);
+-	out_buffer[3] = 0;
+-	put_packet(out_buffer);
+-}
+-
+-/* Reply that all was well */
+-static void send_ok_msg(void)
+-{
+-	strcpy(out_buffer, "OK");
+-	put_packet(out_buffer);
+-}
+-
+-/* Reply that an error occurred */
+-static void send_err_msg(void)
+-{
+-	strcpy(out_buffer, "E01");
+-	put_packet(out_buffer);
+-}
+-
+-/* Empty message indicates unrecognised command */
+-static void send_empty_msg(void)
+-{
+-	put_packet("");
+-}
+-
+-/* Read memory due to 'm' message */
+-static void read_mem_msg(void)
+-{
+-	char *ptr;
+-	int addr;
+-	int length;
+-
+-	/* Jmp, disable bus error handler */
+-	if (setjmp(rem_com_env) == 0) {
+-
+-		kgdb_nofault = 1;
+-
+-		/* Walk through, have m<addr>,<length> */
+-		ptr = &in_buffer[1];
+-		if (hex_to_int(&ptr, &addr) && (*ptr++ == ','))
+-			if (hex_to_int(&ptr, &length)) {
+-				ptr = 0;
+-				if (length * 2 > OUTBUFMAX)
+-					length = OUTBUFMAX / 2;
+-				mem_to_hex((char *) addr, out_buffer, length);
+-			}
+-		if (ptr)
+-			send_err_msg();
+-		else
+-			put_packet(out_buffer);
+-	} else
+-		send_err_msg();
+-
+-	/* Restore bus error handler */
+-	kgdb_nofault = 0;
+-}
+-
+-/* Write memory due to 'M' or 'X' message */
+-static void write_mem_msg(int binary)
+-{
+-	char *ptr;
+-	int addr;
+-	int length;
+-
+-	if (setjmp(rem_com_env) == 0) {
+-
+-		kgdb_nofault = 1;
+-
+-		/* Walk through, have M<addr>,<length>:<data> */
+-		ptr = &in_buffer[1];
+-		if (hex_to_int(&ptr, &addr) && (*ptr++ == ','))
+-			if (hex_to_int(&ptr, &length) && (*ptr++ == ':')) {
+-				if (binary)
+-					ebin_to_mem(ptr, (char*)addr, length);
+-				else
+-					hex_to_mem(ptr, (char*)addr, length);
+-				kgdb_flush_icache_range(addr, addr + length);
+-				ptr = 0;
+-				send_ok_msg();
+-			}
+-		if (ptr)
+-			send_err_msg();
+-	} else
+-		send_err_msg();
+-
+-	/* Restore bus error handler */
+-	kgdb_nofault = 0;
+-}
+-
+-/* Continue message  */
+-static void continue_msg(void)
+-{
+-	/* Try to read optional parameter, PC unchanged if none */
+-	char *ptr = &in_buffer[1];
+-	int addr;
+-
+-	if (hex_to_int(&ptr, &addr))
+-		trap_registers.pc = addr;
+-}
+-
+-/* Continue message with signal */
+-static void continue_with_sig_msg(void)
+-{
+-	int signal;
+-	char *ptr = &in_buffer[1];
+-	int addr;
+-
+-	/* Report limitation */
+-	kgdb_to_gdb("Cannot force signal in kgdb, continuing anyway.\n");
+-
+-	/* Signal */
+-	hex_to_int(&ptr, &signal);
+-	if (*ptr == ';')
+-		ptr++;
+-
+-	/* Optional address */
+-	if (hex_to_int(&ptr, &addr))
+-		trap_registers.pc = addr;
+-}
+-
+-/* Step message */
+-static void step_msg(void)
+-{
+-	continue_msg();
+-	do_single_step();
+-}
+-
+-/* Step message with signal */
+-static void step_with_sig_msg(void)
+-{
+-	continue_with_sig_msg();
+-	do_single_step();
+-}
+-
+-/* Send register contents */
+-static void send_regs_msg(void)
+-{
+-	kgdb_regs_to_gdb_regs(&trap_registers, registers);
+-	mem_to_hex((char *) registers, out_buffer, NUMREGBYTES);
+-	put_packet(out_buffer);
+-}
+-
+-/* Set register contents - currently can't set other thread's registers */
+-static void set_regs_msg(void)
+-{
+-	kgdb_regs_to_gdb_regs(&trap_registers, registers);
+-	hex_to_mem(&in_buffer[1], (char *) registers, NUMREGBYTES);
+-	gdb_regs_to_kgdb_regs(registers, &trap_registers);
+-	send_ok_msg();
+-}
+-
+-#ifdef CONFIG_SH_KGDB_CONSOLE
+-/*
+- * Bring up the ports..
+- */
+-static int kgdb_serial_setup(void)
+-{
+-	extern int kgdb_console_setup(struct console *co, char *options);
+-	struct console dummy;
+-
+-	kgdb_console_setup(&dummy, 0);
+-
+-	return 0;
+-}
+-#else
+-#define kgdb_serial_setup()	0
+-#endif
+-
+-/* The command loop, read and act on requests */
+-static void kgdb_command_loop(const int excep_code, const int trapa_value)
+-{
+-	int sigval;
+-
+-	if (excep_code == NMI_VEC) {
+-#ifndef CONFIG_KGDB_NMI
+-		printk(KERN_NOTICE "KGDB: Ignoring unexpected NMI?\n");
+-		return;
+-#else /* CONFIG_KGDB_NMI */
+-		if (!kgdb_enabled) {
+-			kgdb_enabled = 1;
+-			kgdb_init();
+-		}
+-#endif /* CONFIG_KGDB_NMI */
+-	}
+-
+-	/* Ignore if we're disabled */
+-	if (!kgdb_enabled)
+-		return;
+-
+-	/* Enter GDB mode (e.g. after detach) */
+-	if (!kgdb_in_gdb_mode) {
+-		/* Do serial setup, notify user, issue preemptive ack */
+-		printk(KERN_NOTICE "KGDB: Waiting for GDB\n");
+-		kgdb_in_gdb_mode = 1;
+-		put_debug_char('+');
+-	}
+-
+-	/* Reply to host that an exception has occurred */
+-	sigval = compute_signal(excep_code);
+-	send_signal_msg(sigval);
+-
+-	/* TRAP_VEC exception indicates a software trap inserted in place of
+-	   code by GDB so back up PC by one instruction, as this instruction
+-	   will later be replaced by its original one.  Do NOT do this for
+-	   trap 0xff, since that indicates a compiled-in breakpoint which
+-	   will not be replaced (and we would retake the trap forever) */
+-	if ((excep_code == TRAP_VEC) && (trapa_value != (0x3c << 2)))
+-		trap_registers.pc -= 2;
+-
+-	/* Undo any stepping we may have done */
+-	undo_single_step();
+-
+-	while (1) {
+-		out_buffer[0] = 0;
+-		get_packet(in_buffer, BUFMAX);
+-
+-		/* Examine first char of buffer to see what we need to do */
+-		switch (in_buffer[0]) {
+-		case '?':	/* Send which signal we've received */
+-			send_signal_msg(sigval);
+-			break;
+-
+-		case 'g':	/* Return the values of the CPU registers */
+-			send_regs_msg();
+-			break;
+-
+-		case 'G':	/* Set the value of the CPU registers */
+-			set_regs_msg();
+-			break;
+-
+-		case 'm':	/* Read LLLL bytes address AA..AA */
+-			read_mem_msg();
+-			break;
+-
+-		case 'M':	/* Write LLLL bytes address AA..AA, ret OK */
+-			write_mem_msg(0);	/* 0 = data in hex */
+-			break;
+-
+-		case 'X':	/* Write LLLL bytes esc bin address AA..AA */
+-			if (kgdb_bits == '8')
+-				write_mem_msg(1); /* 1 = data in binary */
+-			else
+-				send_empty_msg();
+-			break;
+-
+-		case 'C':	/* Continue, signum included, we ignore it */
+-			continue_with_sig_msg();
+-			return;
+-
+-		case 'c':	/* Continue at address AA..AA (optional) */
+-			continue_msg();
+-			return;
+-
+-		case 'S':	/* Step, signum included, we ignore it */
+-			step_with_sig_msg();
+-			return;
+-
+-		case 's':	/* Step one instruction from AA..AA */
+-			step_msg();
+-			return;
+-
+-		case 'k':	/* 'Kill the program' with a kernel ? */
+-			break;
+-
+-		case 'D':	/* Detach from program, send reply OK */
+-			kgdb_in_gdb_mode = 0;
+-			send_ok_msg();
+-			get_debug_char();
+-			return;
+-
+-		default:
+-			send_empty_msg();
+-			break;
+-		}
+-	}
+-}
+-
+-/* There has been an exception, most likely a breakpoint. */
+-static void handle_exception(struct pt_regs *regs)
+-{
+-	int excep_code, vbr_val;
+-	int count;
+-	int trapa_value = ctrl_inl(TRA);
+-
+-	/* Copy kernel regs (from stack) */
+-	for (count = 0; count < 16; count++)
+-		trap_registers.regs[count] = regs->regs[count];
+-	trap_registers.pc = regs->pc;
+-	trap_registers.pr = regs->pr;
+-	trap_registers.sr = regs->sr;
+-	trap_registers.gbr = regs->gbr;
+-	trap_registers.mach = regs->mach;
+-	trap_registers.macl = regs->macl;
+-
+-	asm("stc vbr, %0":"=r"(vbr_val));
+-	trap_registers.vbr = vbr_val;
+-
+-	/* Get excode for command loop call, user access */
+-	asm("stc r2_bank, %0":"=r"(excep_code));
+-	kgdb_excode = excep_code;
+-
+-	/* Other interesting environment items for reference */
+-	asm("stc r6_bank, %0":"=r"(kgdb_g_imask));
+-	kgdb_current = current;
+-	kgdb_trapa_val = trapa_value;
+-
+-	/* Act on the exception */
+-	kgdb_command_loop(excep_code, trapa_value);
+-
+-	kgdb_current = NULL;
+-
+-	/* Copy back the (maybe modified) registers */
+-	for (count = 0; count < 16; count++)
+-		regs->regs[count] = trap_registers.regs[count];
+-	regs->pc = trap_registers.pc;
+-	regs->pr = trap_registers.pr;
+-	regs->sr = trap_registers.sr;
+-	regs->gbr = trap_registers.gbr;
+-	regs->mach = trap_registers.mach;
+-	regs->macl = trap_registers.macl;
+-
+-	vbr_val = trap_registers.vbr;
+-	asm("ldc %0, vbr": :"r"(vbr_val));
+-}
+-
+-asmlinkage void kgdb_handle_exception(unsigned long r4, unsigned long r5,
+-				      unsigned long r6, unsigned long r7,
+-				      struct pt_regs __regs)
+-{
+-	struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
+-	handle_exception(regs);
+-}
+-
+-/* Initialise the KGDB data structures and serial configuration */
+-int kgdb_init(void)
+-{
+-	if (!kgdb_enabled)
+-		return 1;
+-
+-	in_nmi = 0;
+-	kgdb_nofault = 0;
+-	stepped_opcode = 0;
+-	kgdb_in_gdb_mode = 0;
+-
+-	if (kgdb_serial_setup() != 0) {
+-		printk(KERN_NOTICE "KGDB: serial setup error\n");
+-		return -1;
+-	}
+-
+-	/* Init ptr to exception handler */
+-	kgdb_debug_hook = handle_exception;
+-	kgdb_bus_err_hook = kgdb_handle_bus_error;
+-
+-	/* Enter kgdb now if requested, or just report init done */
+-	printk(KERN_NOTICE "KGDB: stub is initialized.\n");
+-
+-	return 0;
+-}
+-
+-/* Make function available for "user messages"; console will use it too. */
+-
+-char gdbmsgbuf[BUFMAX];
+-#define MAXOUT ((BUFMAX-2)/2)
+-
+-static void kgdb_msg_write(const char *s, unsigned count)
+-{
+-	int i;
+-	int wcount;
+-	char *bufptr;
+-
+-	/* 'O'utput */
+-	gdbmsgbuf[0] = 'O';
+-
+-	/* Fill and send buffers... */
+-	while (count > 0) {
+-		bufptr = gdbmsgbuf + 1;
+-
+-		/* Calculate how many this time */
+-		wcount = (count > MAXOUT) ? MAXOUT : count;
+-
+-		/* Pack in hex chars */
+-		for (i = 0; i < wcount; i++)
+-			bufptr = pack_hex_byte(bufptr, s[i]);
+-		*bufptr = '\0';
+-
+-		/* Move up */
+-		s += wcount;
+-		count -= wcount;
+-
+-		/* Write packet */
+-		put_packet(gdbmsgbuf);
+-	}
+-}
+-
+-static void kgdb_to_gdb(const char *s)
+-{
+-	kgdb_msg_write(s, strlen(s));
+-}
+-
+-#ifdef CONFIG_SH_KGDB_CONSOLE
+-void kgdb_console_write(struct console *co, const char *s, unsigned count)
+-{
+-	/* Bail if we're not talking to GDB */
+-	if (!kgdb_in_gdb_mode)
+-		return;
+-
+-	kgdb_msg_write(s, count);
+-}
+-#endif
+-
+-#ifdef CONFIG_KGDB_SYSRQ
+-static void sysrq_handle_gdb(int key, struct tty_struct *tty)
+-{
+-	printk("Entering GDB stub\n");
+-	breakpoint();
+-}
+-
+-static struct sysrq_key_op sysrq_gdb_op = {
+-        .handler        = sysrq_handle_gdb,
+-        .help_msg       = "Gdb",
+-        .action_msg     = "GDB",
+-};
+-
+-static int gdb_register_sysrq(void)
+-{
+-	printk("Registering GDB sysrq handler\n");
+-	register_sysrq_key('g', &sysrq_gdb_op);
+-	return 0;
+-}
+-module_init(gdb_register_sysrq);
+-#endif
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/time.c linux-2.6.22-try2/arch/sh/kernel/time.c
+--- linux-2.6.22-570/arch/sh/kernel/time.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/time.c	2007-12-19 15:29:24.000000000 -0500
+@@ -259,11 +259,4 @@
+ 		       ((sh_hpt_frequency + 500) / 1000) / 1000,
+ 		       ((sh_hpt_frequency + 500) / 1000) % 1000);
+ 
+-#if defined(CONFIG_SH_KGDB)
+-	/*
+-	 * Set up kgdb as requested. We do it here because the serial
+-	 * init uses the timer vars we just set up for figuring baud.
+-	 */
+-	kgdb_init();
+-#endif
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/kernel/traps.c linux-2.6.22-try2/arch/sh/kernel/traps.c
+--- linux-2.6.22-570/arch/sh/kernel/traps.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/kernel/traps.c	2007-12-19 15:29:24.000000000 -0500
+@@ -25,16 +25,10 @@
+ #include <linux/limits.h>
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
++#include <linux/kgdb.h>
+ 
+-#ifdef CONFIG_SH_KGDB
+-#include <asm/kgdb.h>
+-#define CHK_REMOTE_DEBUG(regs)			\
+-{						\
+-	if (kgdb_debug_hook && !user_mode(regs))\
+-		(*kgdb_debug_hook)(regs);       \
+-}
+-#else
+-#define CHK_REMOTE_DEBUG(regs)
++#ifndef CONFIG_KGDB
++#define kgdb_handle_exception(t, s, e, r)
+ #endif
+ 
+ #ifdef CONFIG_CPU_SH2
+@@ -91,7 +85,9 @@
+ 
+ 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+ 
+-	CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++	kgdb_handle_exception(1, SIGTRAP, err, regs);
++#endif
+ 	print_modules();
+ 	show_regs(regs);
+ 
+@@ -700,7 +696,9 @@
+ 	lookup_exception_vector(error_code);
+ 
+ 	local_irq_enable();
+-	CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++	kgdb_handle_exception(1, SIGILL, err, regs);
++#endif
+ 	force_sig(SIGILL, tsk);
+ 	die_if_no_fixup("reserved instruction", regs, error_code);
+ }
+@@ -771,7 +769,9 @@
+ 	lookup_exception_vector(error_code);
+ 
+ 	local_irq_enable();
+-	CHK_REMOTE_DEBUG(regs);
++#ifdef CONFIG_KGDB
++	kgdb_handle_exception(1, SIGILL, err, regs);
++#endif
+ 	force_sig(SIGILL, tsk);
+ 	die_if_no_fixup("illegal slot instruction", regs, error_code);
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/mm/extable.c linux-2.6.22-try2/arch/sh/mm/extable.c
+--- linux-2.6.22-570/arch/sh/mm/extable.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/mm/extable.c	2007-12-19 15:29:24.000000000 -0500
+@@ -5,6 +5,7 @@
+  */
+ 
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ #include <asm/uaccess.h>
+ 
+ int fixup_exception(struct pt_regs *regs)
+@@ -16,6 +17,12 @@
+ 		regs->pc = fixup->fixup;
+ 		return 1;
+ 	}
++#ifdef CONFIG_KGDB
++	if (atomic_read(&debugger_active) && kgdb_may_fault)
++		/* Restore our previous state. */
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		/* Never reached. */
++#endif
+ 
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/arch/sh/mm/fault-nommu.c linux-2.6.22-try2/arch/sh/mm/fault-nommu.c
+--- linux-2.6.22-570/arch/sh/mm/fault-nommu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sh/mm/fault-nommu.c	2007-12-19 15:29:24.000000000 -0500
+@@ -28,10 +28,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/cacheflush.h>
+ 
+-#if defined(CONFIG_SH_KGDB)
+-#include <asm/kgdb.h>
+-#endif
+-
+ extern void die(const char *,struct pt_regs *,long);
+ 
+ /*
+@@ -42,11 +38,6 @@
+ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+ 			      unsigned long address)
+ {
+-#if defined(CONFIG_SH_KGDB)
+-	if (kgdb_nofault && kgdb_bus_err_hook)
+-		kgdb_bus_err_hook();
+-#endif
+-
+ 	/*
+ 	 * Oops. The kernel tried to access some bad page. We'll have to
+ 	 * terminate things with extreme prejudice.
+@@ -68,11 +59,6 @@
+ asmlinkage int __do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
+ 			       unsigned long address)
+ {
+-#if defined(CONFIG_SH_KGDB)
+-	if (kgdb_nofault && kgdb_bus_err_hook)
+-		kgdb_bus_err_hook();
+-#endif
+-
+ 	if (address >= TASK_SIZE)
+ 		return 1;
+ 
+diff -Nurb linux-2.6.22-570/arch/sh/mm/fault.c linux-2.6.22-try2/arch/sh/mm/fault.c
+--- linux-2.6.22-570/arch/sh/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/sh/mm/fault.c	2007-12-19 15:29:24.000000000 -0500
+@@ -18,7 +18,6 @@
+ #include <asm/system.h>
+ #include <asm/mmu_context.h>
+ #include <asm/tlbflush.h>
+-#include <asm/kgdb.h>
+ 
+ /*
+  * This routine handles page faults.  It determines the address,
+@@ -39,11 +38,6 @@
+ 	trace_hardirqs_on();
+ 	local_irq_enable();
+ 
+-#ifdef CONFIG_SH_KGDB
+-	if (kgdb_nofault && kgdb_bus_err_hook)
+-		kgdb_bus_err_hook();
+-#endif
+-
+ 	tsk = current;
+ 	mm = tsk->mm;
+ 	si_code = SEGV_MAPERR;
+@@ -189,6 +183,7 @@
+ 	}
+ 	die("Oops", regs, writeaccess);
+ 	do_exit(SIGKILL);
++	dump_stack();
+ 
+ /*
+  * We ran out of memory, or some other thing happened to us that made
+@@ -252,11 +247,6 @@
+ 	spinlock_t *ptl = NULL;
+ 	int ret = 1;
+ 
+-#ifdef CONFIG_SH_KGDB
+-	if (kgdb_nofault && kgdb_bus_err_hook)
+-		kgdb_bus_err_hook();
+-#endif
+-
+ 	/*
+ 	 * We don't take page faults for P1, P2, and parts of P4, these
+ 	 * are always mapped, whether it be due to legacy behaviour in
+diff -Nurb linux-2.6.22-570/arch/sparc64/kernel/power.c linux-2.6.22-try2/arch/sparc64/kernel/power.c
+--- linux-2.6.22-570/arch/sparc64/kernel/power.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/sparc64/kernel/power.c	2007-12-19 15:29:23.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/interrupt.h>
+ #include <linux/pm.h>
+ #include <linux/syscalls.h>
++#include <linux/reboot.h>
+ 
+ #include <asm/system.h>
+ #include <asm/auxio.h>
+@@ -33,14 +34,13 @@
+ #include <linux/pci.h>
+ static void __iomem *power_reg;
+ 
+-static DECLARE_WAIT_QUEUE_HEAD(powerd_wait);
+ static int button_pressed;
+ 
+ static irqreturn_t power_handler(int irq, void *dev_id)
+ {
+ 	if (button_pressed == 0) {
+ 		button_pressed = 1;
+-		wake_up(&powerd_wait);
++		orderly_poweroff(true);
+ 	}
+ 
+ 	/* FIXME: Check registers for status... */
+@@ -77,36 +77,6 @@
+ EXPORT_SYMBOL(pm_power_off);
+ 
+ #ifdef CONFIG_PCI
+-static int powerd(void *__unused)
+-{
+-	static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+-	char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
+-	DECLARE_WAITQUEUE(wait, current);
+-
+-	daemonize("powerd");
+-
+-	add_wait_queue(&powerd_wait, &wait);
+-again:
+-	for (;;) {
+-		set_task_state(current, TASK_INTERRUPTIBLE);
+-		if (button_pressed)
+-			break;
+-		flush_signals(current);
+-		schedule();
+-	}
+-	__set_current_state(TASK_RUNNING);
+-	remove_wait_queue(&powerd_wait, &wait);
+-
+-	/* Ok, down we go... */
+-	button_pressed = 0;
+-	if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
+-		printk("powerd: shutdown execution failed\n");
+-		add_wait_queue(&powerd_wait, &wait);
+-		goto again;
+-	}
+-	return 0;
+-}
+-
+ static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
+ {
+ 	if (irq == PCI_IRQ_NONE)
+@@ -130,12 +100,6 @@
+ 	poweroff_method = machine_halt;  /* able to use the standard halt */
+ 
+ 	if (has_button_interrupt(irq, op->node)) {
+-		if (kernel_thread(powerd, NULL, CLONE_FS) < 0) {
+-			printk("Failed to start power daemon.\n");
+-			return 0;
+-		}
+-		printk("powerd running.\n");
+-
+ 		if (request_irq(irq,
+ 				power_handler, 0, "power", NULL) < 0)
+ 			printk("power: Error, cannot register IRQ handler.\n");
+diff -Nurb linux-2.6.22-570/arch/um/Kconfig.debug linux-2.6.22-try2/arch/um/Kconfig.debug
+--- linux-2.6.22-570/arch/um/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/um/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -47,4 +47,13 @@
+         If you're involved in UML kernel development and want to use gcov,
+         say Y.  If you're unsure, say N.
+ 
++config DEBUG_STACK_USAGE
++	bool "Stack utilization instrumentation"
++	default N
++	help
++	  Track the maximum kernel stack usage - this will look at each
++	  kernel stack at process exit and log it if it's the deepest
++	  stack seen so far.
++
++	  This option will slow down process creation and destruction somewhat.
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/um/defconfig linux-2.6.22-try2/arch/um/defconfig
+--- linux-2.6.22-570/arch/um/defconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/um/defconfig	2007-12-19 15:29:24.000000000 -0500
+@@ -527,3 +527,4 @@
+ # CONFIG_RCU_TORTURE_TEST is not set
+ # CONFIG_GPROF is not set
+ # CONFIG_GCOV is not set
++# CONFIG_DEBUG_STACK_USAGE is not set
+diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig linux-2.6.22-try2/arch/x86_64/Kconfig
+--- linux-2.6.22-570/arch/x86_64/Kconfig	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/Kconfig	2007-12-19 15:29:19.000000000 -0500
+@@ -698,6 +698,8 @@
+ 
+ source "arch/x86_64/kernel/cpufreq/Kconfig"
+ 
++source "drivers/cpuidle/Kconfig"
++
+ endmenu
+ 
+ menu "Bus options (PCI etc.)"
+diff -Nurb linux-2.6.22-570/arch/x86_64/Kconfig.debug linux-2.6.22-try2/arch/x86_64/Kconfig.debug
+--- linux-2.6.22-570/arch/x86_64/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/x86_64/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -55,7 +55,4 @@
+ 
+ 	  This option will slow down process creation somewhat.
+ 
+-#config X86_REMOTE_DEBUG
+-#       bool "kgdb debugging stub"
+-
+ endmenu
+diff -Nurb linux-2.6.22-570/arch/x86_64/Makefile linux-2.6.22-try2/arch/x86_64/Makefile
+--- linux-2.6.22-570/arch/x86_64/Makefile	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -41,7 +41,9 @@
+ cflags-y += -mcmodel=kernel
+ cflags-y += -pipe
+ cflags-y += -Wno-sign-compare
++ifneq ($(CONFIG_UNWIND_INFO),y)
+ cflags-y += -fno-asynchronous-unwind-tables
++endif
+ ifneq ($(CONFIG_DEBUG_INFO),y)
+ # -fweb shrinks the kernel a bit, but the difference is very small
+ # it also messes up debugging, so don't use it for now.
+diff -Nurb linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S linux-2.6.22-try2/arch/x86_64/ia32/ia32entry.S
+--- linux-2.6.22-570/arch/x86_64/ia32/ia32entry.S	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/ia32/ia32entry.S	2007-12-19 15:29:24.000000000 -0500
+@@ -731,4 +731,7 @@
+ 	.quad compat_sys_signalfd
+ 	.quad compat_sys_timerfd
+ 	.quad sys_eventfd
++	.quad sys_revokeat
++	.quad sys_frevoke		/* 325 */
++	.quad sys_fallocate
+ ia32_syscall_end:
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/Makefile linux-2.6.22-try2/arch/x86_64/kernel/Makefile
+--- linux-2.6.22-570/arch/x86_64/kernel/Makefile	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/kernel/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -33,10 +33,12 @@
+ obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary.o tce.o
+ obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
+ obj-$(CONFIG_KPROBES)		+= kprobes.o
++obj-$(CONFIG_KGDB)		+= kgdb.o kgdb-jmp.o
+ obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer.o
+ obj-$(CONFIG_X86_VSMP)		+= vsmp.o
+ obj-$(CONFIG_K8_NB)		+= k8.o
+ obj-$(CONFIG_AUDIT)		+= audit.o
++obj-$(CONFIG_STACK_UNWIND)	+= unwind.o
+ 
+ obj-$(CONFIG_MODULES)		+= module.o
+ obj-$(CONFIG_PCI)		+= early-quirks.o
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S linux-2.6.22-try2/arch/x86_64/kernel/kgdb-jmp.S
+--- linux-2.6.22-570/arch/x86_64/kernel/kgdb-jmp.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/kernel/kgdb-jmp.S	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,65 @@
++/*
++ * arch/x86_64/kernel/kgdb-jmp.S
++ *
++ * Save and restore system registers so that within a limited frame we
++ * may have a fault and "jump back" to a known safe location.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * Cribbed from glibc, which carries the following:
++ * Copyright (C) 2001, 2003, 2004 Free Software Foundation, Inc.
++ * Copyright (C) 2005 by MontaVista Software.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program as licensed "as is" without any warranty of
++ * any kind, whether express or implied.
++ */
++
++#include <linux/linkage.h>
++
++#define JB_RBX		0
++#define JB_RBP		1
++#define JB_R12		2
++#define JB_R13		3
++#define JB_R14		4
++#define JB_R15		5
++#define JB_RSP		6
++#define JB_PC		7
++
++	.code64
++
++/* This must be called prior to kgdb_fault_longjmp and
++ * kgdb_fault_longjmp must not be called outside of the context of the
++ * last call to kgdb_fault_setjmp.
++ */
++ENTRY(kgdb_fault_setjmp)
++	/* Save registers. */
++	movq %rbx, (JB_RBX*8)(%rdi)
++	movq %rbp, (JB_RBP*8)(%rdi)
++	movq %r12, (JB_R12*8)(%rdi)
++	movq %r13, (JB_R13*8)(%rdi)
++	movq %r14, (JB_R14*8)(%rdi)
++	movq %r15, (JB_R15*8)(%rdi)
++	leaq 8(%rsp), %rdx	/* Save SP as it will be after we return. */
++	movq %rdx, (JB_RSP*8)(%rdi)
++	movq (%rsp), %rax	/* Save PC we are returning to now. */
++	movq %rax, (JB_PC*8)(%rdi)
++	/* Set return value for setjmp. */
++	mov $0,%eax
++	movq (JB_PC*8)(%rdi),%rdx
++	movq (JB_RSP*8)(%rdi),%rsp
++	jmpq *%rdx
++
++ENTRY(kgdb_fault_longjmp)
++	/* Restore registers. */
++	movq (JB_RBX*8)(%rdi),%rbx
++	movq (JB_RBP*8)(%rdi),%rbp
++	movq (JB_R12*8)(%rdi),%r12
++	movq (JB_R13*8)(%rdi),%r13
++	movq (JB_R14*8)(%rdi),%r14
++	movq (JB_R15*8)(%rdi),%r15
++	/* Set return value for setjmp. */
++	movq (JB_PC*8)(%rdi),%rdx
++	movq (JB_RSP*8)(%rdi),%rsp
++	mov $1,%eax
++	jmpq *%rdx
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/kgdb.c linux-2.6.22-try2/arch/x86_64/kernel/kgdb.c
+--- linux-2.6.22-570/arch/x86_64/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,461 @@
++/*
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 2, or (at your option) any
++ * later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ */
++
++/*
++ * Copyright (C) 2004 Amit S. Kale <amitkale@linsyssoft.com>
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2002 Andi Kleen, SuSE Labs
++ * Copyright (C) 2004 LinSysSoft Technologies Pvt. Ltd.
++ * Copyright (C) 2007 Jason Wessel, Wind River Systems, Inc.
++ */
++/****************************************************************************
++ *  Contributor:     Lake Stevens Instrument Division$
++ *  Written by:      Glenn Engel $
++ *  Updated by:	     Amit Kale<akale@veritas.com>
++ *  Modified for 386 by Jim Kingdon, Cygnus Support.
++ *  Origianl kgdb, compatibility with 2.1.xx kernel by
++ *  David Grothe <dave@gcom.com>
++ *  Integrated into 2.2.5 kernel by Tigran Aivazian <tigran@sco.com>
++ *  X86_64 changes from Andi Kleen's patch merged by Jim Houston
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>		/* for linux pt_regs struct */
++#include <linux/kgdb.h>
++#include <linux/init.h>
++#include <linux/kdebug.h>
++#include <asm/apicdef.h>
++#include <asm/mach_apic.h>
++#include <asm/kdebug.h>
++#include <asm/debugreg.h>
++
++/* Put the error code here just in case the user cares.  */
++int gdb_x86_64errcode;
++/* Likewise, the vector number here (since GDB only gets the signal
++   number through the usual means, and that's not very specific).  */
++int gdb_x86_64vector = -1;
++
++extern atomic_t cpu_doing_single_step;
++
++void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	gdb_regs[_RAX] = regs->rax;
++	gdb_regs[_RBX] = regs->rbx;
++	gdb_regs[_RCX] = regs->rcx;
++	gdb_regs[_RDX] = regs->rdx;
++	gdb_regs[_RSI] = regs->rsi;
++	gdb_regs[_RDI] = regs->rdi;
++	gdb_regs[_RBP] = regs->rbp;
++	gdb_regs[_PS] = regs->eflags;
++	gdb_regs[_PC] = regs->rip;
++	gdb_regs[_R8] = regs->r8;
++	gdb_regs[_R9] = regs->r9;
++	gdb_regs[_R10] = regs->r10;
++	gdb_regs[_R11] = regs->r11;
++	gdb_regs[_R12] = regs->r12;
++	gdb_regs[_R13] = regs->r13;
++	gdb_regs[_R14] = regs->r14;
++	gdb_regs[_R15] = regs->r15;
++	gdb_regs[_RSP] = regs->rsp;
++}
++
++extern void thread_return(void);
++void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
++{
++	gdb_regs[_RAX] = 0;
++	gdb_regs[_RBX] = 0;
++	gdb_regs[_RCX] = 0;
++	gdb_regs[_RDX] = 0;
++	gdb_regs[_RSI] = 0;
++	gdb_regs[_RDI] = 0;
++	gdb_regs[_RBP] = *(unsigned long *)p->thread.rsp;
++	gdb_regs[_PS] = *(unsigned long *)(p->thread.rsp + 8);
++	gdb_regs[_PC] = (unsigned long)&thread_return;
++	gdb_regs[_R8] = 0;
++	gdb_regs[_R9] = 0;
++	gdb_regs[_R10] = 0;
++	gdb_regs[_R11] = 0;
++	gdb_regs[_R12] = 0;
++	gdb_regs[_R13] = 0;
++	gdb_regs[_R14] = 0;
++	gdb_regs[_R15] = 0;
++	gdb_regs[_RSP] = p->thread.rsp;
++}
++
++void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs)
++{
++	regs->rax = gdb_regs[_RAX];
++	regs->rbx = gdb_regs[_RBX];
++	regs->rcx = gdb_regs[_RCX];
++	regs->rdx = gdb_regs[_RDX];
++	regs->rsi = gdb_regs[_RSI];
++	regs->rdi = gdb_regs[_RDI];
++	regs->rbp = gdb_regs[_RBP];
++	regs->eflags = gdb_regs[_PS];
++	regs->rip = gdb_regs[_PC];
++	regs->r8 = gdb_regs[_R8];
++	regs->r9 = gdb_regs[_R9];
++	regs->r10 = gdb_regs[_R10];
++	regs->r11 = gdb_regs[_R11];
++	regs->r12 = gdb_regs[_R12];
++	regs->r13 = gdb_regs[_R13];
++	regs->r14 = gdb_regs[_R14];
++	regs->r15 = gdb_regs[_R15];
++#if 0				/* can't change these */
++	regs->rsp = gdb_regs[_RSP];
++	regs->ss = gdb_regs[_SS];
++	regs->fs = gdb_regs[_FS];
++	regs->gs = gdb_regs[_GS];
++#endif
++
++}				/* gdb_regs_to_regs */
++
++struct hw_breakpoint {
++	unsigned enabled;
++	unsigned type;
++	unsigned len;
++	unsigned long addr;
++} breakinfo[4] = { {
++enabled:0}, {
++enabled:0}, {
++enabled:0}, {
++enabled:0}};
++
++static void kgdb_correct_hw_break(void)
++{
++	int breakno;
++	int correctit;
++	int breakbit;
++	unsigned long dr7;
++
++	get_debugreg(dr7, 7);
++	correctit = 0;
++	for (breakno = 0; breakno < 3; breakno++) {
++		breakbit = 2 << (breakno << 1);
++ 		if (!(dr7 & breakbit) && breakinfo[breakno].enabled) {
++			correctit = 1;
++			dr7 |= breakbit;
++			dr7 &= ~(0xf0000 << (breakno << 2));
++			dr7 |= (((breakinfo[breakno].len << 2) |
++				 breakinfo[breakno].type) << 16) <<
++			    (breakno << 2);
++			switch (breakno) {
++			case 0:
++				set_debugreg(breakinfo[breakno].addr, 0);
++				break;
++
++			case 1:
++				set_debugreg(breakinfo[breakno].addr, 1);
++				break;
++
++			case 2:
++				set_debugreg(breakinfo[breakno].addr, 2);
++				break;
++
++			case 3:
++				set_debugreg(breakinfo[breakno].addr, 3);
++				break;
++			}
++		} else if ((dr7 & breakbit) && !breakinfo[breakno].enabled) {
++			correctit = 1;
++			dr7 &= ~breakbit;
++			dr7 &= ~(0xf0000 << (breakno << 2));
++		}
++	}
++	if (correctit)
++		set_debugreg(dr7, 7);
++}
++
++static int kgdb_remove_hw_break(unsigned long addr, int len,
++						 enum kgdb_bptype bptype)
++{
++	int i, idx = -1;
++	for (i = 0; i < 4; i++) {
++		if (breakinfo[i].addr == addr && breakinfo[i].enabled) {
++			idx = i;
++			break;
++		}
++	}
++	if (idx == -1)
++		return -1;
++
++	breakinfo[idx].enabled = 0;
++	return 0;
++}
++
++static void kgdb_remove_all_hw_break(void)
++{
++	int i;
++
++	for (i = 0; i < 4; i++) {
++		memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint));
++	}
++}
++
++static int kgdb_set_hw_break(unsigned long addr, int len,
++					  enum kgdb_bptype bptype)
++{
++	int i, idx = -1;
++	for (i = 0; i < 4; i++) {
++		if (!breakinfo[i].enabled) {
++			idx = i;
++			break;
++		}
++	}
++	if (idx == -1)
++		return -1;
++	if (bptype == bp_hardware_breakpoint) {
++		breakinfo[idx].type = 0;
++		breakinfo[idx].len = 0;
++	} else if (bptype == bp_write_watchpoint) {
++		breakinfo[idx].type = 1;
++		if (len == 1 || len == 2 || len == 4)
++			breakinfo[idx].len = len - 1;
++		else
++			return -1;
++	} else if (bptype == bp_access_watchpoint) {
++		breakinfo[idx].type = 3;
++		if (len == 1 || len == 2 || len == 4)
++			breakinfo[idx].len = len - 1;
++		else
++			return -1;
++	} else
++		return -1;
++	breakinfo[idx].enabled = 1;
++	breakinfo[idx].addr = addr;
++	return 0;
++}
++
++void kgdb_disable_hw_debug(struct pt_regs *regs)
++{
++	/* Disable hardware debugging while we are in kgdb */
++	set_debugreg(0UL, 7);
++}
++
++void kgdb_post_master_code(struct pt_regs *regs, int e_vector, int err_code)
++{
++	/* Master processor is completely in the debugger */
++	gdb_x86_64vector = e_vector;
++	gdb_x86_64errcode = err_code;
++}
++
++void kgdb_roundup_cpus(unsigned long flags)
++{
++	send_IPI_allbutself(APIC_DM_NMI);
++}
++
++int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
++			       char *remcomInBuffer, char *remcomOutBuffer,
++			       struct pt_regs *linux_regs)
++{
++	unsigned long addr;
++	unsigned long breakno;
++	char *ptr;
++	int newPC;
++	unsigned long dr6;
++
++	switch (remcomInBuffer[0]) {
++	case 'c':
++	case 's':
++		/* try to read optional parameter, pc unchanged if no parm */
++		ptr = &remcomInBuffer[1];
++		if (kgdb_hex2long(&ptr, &addr))
++			linux_regs->rip = addr;
++		newPC = linux_regs->rip;
++
++		/* clear the trace bit */
++		linux_regs->eflags &= ~TF_MASK;
++
++		atomic_set(&cpu_doing_single_step, -1);
++		/* set the trace bit if we're stepping */
++		if (remcomInBuffer[0] == 's') {
++			linux_regs->eflags |= TF_MASK;
++			debugger_step = 1;
++			if (kgdb_contthread)
++				atomic_set(&cpu_doing_single_step,
++					   raw_smp_processor_id());
++
++		}
++
++		get_debugreg(dr6, 6);
++		if (!(dr6 & 0x4000)) {
++			for (breakno = 0; breakno < 4; ++breakno) {
++				if (dr6 & (1 << breakno)) {
++					if (breakinfo[breakno].type == 0) {
++						/* Set restore flag */
++						linux_regs->eflags |=
++						    X86_EFLAGS_RF;
++						break;
++					}
++				}
++			}
++		}
++		set_debugreg(0UL, 6);
++		kgdb_correct_hw_break();
++
++		return (0);
++	}			/* switch */
++	return -1;
++}
++
++static struct pt_regs *in_interrupt_stack(unsigned long rsp, int cpu)
++{
++	struct pt_regs *regs;
++	unsigned long end = (unsigned long)cpu_pda(cpu)->irqstackptr;
++	if (rsp <= end && rsp >= end - IRQSTACKSIZE + 8) {
++		regs = *(((struct pt_regs **)end) - 1);
++		return regs;
++	}
++	return NULL;
++}
++
++static struct pt_regs *in_exception_stack(unsigned long rsp, int cpu)
++{
++	int i;
++	struct tss_struct *init_tss = &__get_cpu_var(init_tss);
++	for (i = 0; i < N_EXCEPTION_STACKS; i++)
++		if (rsp >= init_tss[cpu].ist[i] &&
++		    rsp <= init_tss[cpu].ist[i] + EXCEPTION_STKSZ) {
++			struct pt_regs *r =
++			    (void *)init_tss[cpu].ist[i] + EXCEPTION_STKSZ;
++			return r - 1;
++		}
++	return NULL;
++}
++
++void kgdb_shadowinfo(struct pt_regs *regs, char *buffer, unsigned threadid)
++{
++	static char intr_desc[] = "Stack at interrupt entrypoint";
++	static char exc_desc[] = "Stack at exception entrypoint";
++	struct pt_regs *stregs;
++	int cpu = raw_smp_processor_id();
++
++	if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++		kgdb_mem2hex(intr_desc, buffer, strlen(intr_desc));
++	else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++		kgdb_mem2hex(exc_desc, buffer, strlen(exc_desc));
++}
++
++struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs, int threadid)
++{
++	struct pt_regs *stregs;
++	int cpu = raw_smp_processor_id();
++
++	if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++		return current;
++	else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++		return current;
++
++	return NULL;
++}
++
++struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid)
++{
++	struct pt_regs *stregs;
++	int cpu = raw_smp_processor_id();
++
++	if ((stregs = in_interrupt_stack(regs->rsp, cpu)))
++		return stregs;
++	else if ((stregs = in_exception_stack(regs->rsp, cpu)))
++		return stregs;
++
++	return NULL;
++}
++
++/* Register KGDB with the die_chain so that we hook into all of the right
++ * spots. */
++static int kgdb_notify(struct notifier_block *self, unsigned long cmd,
++		       void *ptr)
++{
++	struct die_args *args = ptr;
++	struct pt_regs *regs = args->regs;
++
++	if (cmd == DIE_PAGE_FAULT_NO_CONTEXT && atomic_read(&debugger_active)
++			&& kgdb_may_fault) {
++		kgdb_fault_longjmp(kgdb_fault_jmp_regs);
++		return NOTIFY_STOP;
++	/* CPU roundup? */
++	} else if (atomic_read(&debugger_active) && cmd == DIE_NMI_IPI) {
++		kgdb_nmihook(raw_smp_processor_id(), regs);
++		return NOTIFY_STOP;
++		/* See if KGDB is interested. */
++	} else if (cmd == DIE_DEBUG
++			   && atomic_read(&cpu_doing_single_step) == raw_smp_processor_id()
++			   && user_mode(regs)) {
++		/* single step exception from kernel space to user space so
++		 * eat the exception and continue the process
++		 */
++		printk(KERN_ERR "KGDB: trap/step from kernel to user space, resuming...\n");
++		kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c","",regs);
++		return NOTIFY_STOP;
++	} else if (cmd == DIE_PAGE_FAULT || user_mode(regs) ||
++		   cmd == DIE_NMI_IPI || (cmd == DIE_DEBUG &&
++					  atomic_read(&debugger_active)))
++		/* Userpace events, normal watchdog event, or spurious
++		 * debug exception.  Ignore. */
++		return NOTIFY_DONE;
++
++	kgdb_handle_exception(args->trapnr, args->signr, args->err, regs);
++
++	return NOTIFY_STOP;
++}
++
++static struct notifier_block kgdb_notifier = {
++	.notifier_call = kgdb_notify,
++	.priority = 0x7fffffff,	/* we need to notified first */
++};
++
++int kgdb_arch_init(void)
++{
++	register_die_notifier(&kgdb_notifier);
++	return 0;
++}
++/*
++ * Skip an int3 exception when it occurs after a breakpoint has been
++ * removed. Backtrack eip by 1 since the int3 would have caused it to
++ * increment by 1.
++ */
++
++int kgdb_skipexception(int exception, struct pt_regs *regs)
++{
++	if (exception == 3 && kgdb_isremovedbreak(regs->rip - 1)) {
++		regs->rip -= 1;
++		return 1;
++	}
++	return 0;
++}
++
++unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++	if (exception == 3) {
++		return instruction_pointer(regs) - 1;
++	}
++	return instruction_pointer(regs);
++}
++
++struct kgdb_arch arch_kgdb_ops = {
++	.gdb_bpt_instr = {0xcc},
++	.flags = KGDB_HW_BREAKPOINT,
++	.shadowth = 1,
++	.set_hw_breakpoint = kgdb_set_hw_break,
++	.remove_hw_breakpoint = kgdb_remove_hw_break,
++	.remove_all_hw_break = kgdb_remove_all_hw_break,
++	.correct_hw_break = kgdb_correct_hw_break,
++};
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/mce.c linux-2.6.22-try2/arch/x86_64/kernel/mce.c
+--- linux-2.6.22-570/arch/x86_64/kernel/mce.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/x86_64/kernel/mce.c	2007-12-19 15:29:23.000000000 -0500
+@@ -174,7 +174,7 @@
+ 	if (events != atomic_read(&mce_logged) && trigger[0]) {
+ 		/* Small race window, but should be harmless.  */
+ 		atomic_set(&mce_logged, events);
+-		call_usermodehelper(trigger, trigger_argv, NULL, -1);
++		call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/signal.c linux-2.6.22-try2/arch/x86_64/kernel/signal.c
+--- linux-2.6.22-570/arch/x86_64/kernel/signal.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/x86_64/kernel/signal.c	2007-12-19 15:29:23.000000000 -0500
+@@ -480,7 +480,7 @@
+ void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
+ { 
+ 	struct task_struct *me = current; 
+-	if (exception_trace)
++	if (show_unhandled_signals && printk_ratelimit())
+ 		printk("%s[%d] bad frame in %s frame:%p rip:%lx rsp:%lx orax:%lx\n",
+ 	       me->comm,me->pid,where,frame,regs->rip,regs->rsp,regs->orig_rax); 
+ 
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/traps.c linux-2.6.22-try2/arch/x86_64/kernel/traps.c
+--- linux-2.6.22-570/arch/x86_64/kernel/traps.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/kernel/traps.c	2007-12-19 15:29:23.000000000 -0500
+@@ -96,6 +96,11 @@
+ }
+ 
+ int kstack_depth_to_print = 12;
++#ifdef CONFIG_STACK_UNWIND
++static int call_trace = 1;
++#else
++#define call_trace (-1)
++#endif
+ 
+ #ifdef CONFIG_KALLSYMS
+ void printk_address(unsigned long address)
+@@ -198,6 +203,33 @@
+ 	return NULL;
+ }
+ 
++struct ops_and_data {
++	struct stacktrace_ops *ops;
++	void *data;
++};
++
++static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
++{
++	struct ops_and_data *oad = (struct ops_and_data *)context;
++	int n = 0;
++	unsigned long sp = UNW_SP(info);
++
++	if (arch_unw_user_mode(info))
++		return -1;
++	while (unwind(info) == 0 && UNW_PC(info)) {
++		n++;
++		oad->ops->address(oad->data, UNW_PC(info));
++		if (arch_unw_user_mode(info))
++			break;
++		if ((sp & ~(PAGE_SIZE - 1)) == (UNW_SP(info) & ~(PAGE_SIZE - 1))
++		    && sp > UNW_SP(info))
++			break;
++		sp = UNW_SP(info);
++		touch_nmi_watchdog();
++	}
++	return n;
++}
++
+ #define MSG(txt) ops->warning(data, txt)
+ 
+ /*
+@@ -225,6 +257,40 @@
+ 	if (!tsk)
+ 		tsk = current;
+ 
++	if (call_trace >= 0) {
++		int unw_ret = 0;
++		struct unwind_frame_info info;
++		struct ops_and_data oad = { .ops = ops, .data = data };
++
++		if (regs) {
++			if (unwind_init_frame_info(&info, tsk, regs) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		} else if (tsk == current)
++			unw_ret = unwind_init_running(&info, dump_trace_unwind,
++						      &oad);
++		else {
++			if (unwind_init_blocked(&info, tsk) == 0)
++				unw_ret = dump_trace_unwind(&info, &oad);
++		}
++		if (unw_ret > 0) {
++			if (call_trace == 1 && !arch_unw_user_mode(&info)) {
++				ops->warning_symbol(data,
++					     "DWARF2 unwinder stuck at %s",
++					     UNW_PC(&info));
++				if ((long)UNW_SP(&info) < 0) {
++					MSG("Leftover inexact backtrace:");
++					stack = (unsigned long *)UNW_SP(&info);
++					if (!stack)
++						goto out;
++				} else
++					MSG("Full inexact backtrace again:");
++			} else if (call_trace >= 1)
++				goto out;
++			else
++				MSG("Full inexact backtrace again:");
++		} else
++			MSG("Inexact backtrace:");
++	}
+ 	if (!stack) {
+ 		unsigned long dummy;
+ 		stack = &dummy;
+@@ -308,6 +374,7 @@
+ 	tinfo = task_thread_info(tsk);
+ 	HANDLE_STACK (valid_stack_ptr(tinfo, stack));
+ #undef HANDLE_STACK
++out:
+ 	put_cpu();
+ }
+ EXPORT_SYMBOL(dump_trace);
+@@ -585,7 +652,8 @@
+ 		tsk->thread.error_code = error_code;
+ 		tsk->thread.trap_no = trapnr;
+ 
+-		if (exception_trace && unhandled_signal(tsk, signr))
++		if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
++		    printk_ratelimit())
+ 			printk(KERN_INFO
+ 			       "%s[%d:#%u] trap %s rip:%lx rsp:%lx error:%lx\n",
+ 			       tsk->comm, tsk->pid, tsk->xid, str,
+@@ -689,7 +757,8 @@
+ 		tsk->thread.error_code = error_code;
+ 		tsk->thread.trap_no = 13;
+ 
+-		if (exception_trace && unhandled_signal(tsk, SIGSEGV))
++		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
++		    printk_ratelimit())
+ 			printk(KERN_INFO
+ 		       "%s[%d:#%u] general protection rip:%lx rsp:%lx error:%lx\n",
+ 			       tsk->comm, tsk->pid, tsk->xid,
+@@ -1128,3 +1197,21 @@
+ 	return 0;
+ }
+ early_param("kstack", kstack_setup);
++
++#ifdef CONFIG_STACK_UNWIND
++static int __init call_trace_setup(char *s)
++{
++	if (!s)
++		return -EINVAL;
++	if (strcmp(s, "old") == 0)
++		call_trace = -1;
++	else if (strcmp(s, "both") == 0)
++		call_trace = 0;
++	else if (strcmp(s, "newfallback") == 0)
++		call_trace = 1;
++	else if (strcmp(s, "new") == 0)
++		call_trace = 2;
++	return 0;
++}
++early_param("call_trace", call_trace_setup);
++#endif
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/unwind.S linux-2.6.22-try2/arch/x86_64/kernel/unwind.S
+--- linux-2.6.22-570/arch/x86_64/kernel/unwind.S	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/kernel/unwind.S	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,38 @@
++/* Assembler support for dwarf2 unwinder */
++#include <linux/linkage.h>
++#include <asm/dwarf2.h>
++#include <asm/segment.h>
++#include <asm/ptrace.h>
++#include <asm/asm-offsets.h>
++
++ENTRY(arch_unwind_init_running)
++	CFI_STARTPROC
++	movq	%r15, R15(%rdi)
++	movq	%r14, R14(%rdi)
++	xchgq	%rsi, %rdx
++	movq	%r13, R13(%rdi)
++	movq	%r12, R12(%rdi)
++	xorl	%eax, %eax
++	movq	%rbp, RBP(%rdi)
++	movq	%rbx, RBX(%rdi)
++	movq	(%rsp), %rcx
++	movq	%rax, R11(%rdi)
++	movq	%rax, R10(%rdi)
++	movq	%rax, R9(%rdi)
++	movq	%rax, R8(%rdi)
++	movq	%rax, RAX(%rdi)
++	movq	%rax, RCX(%rdi)
++	movq	%rax, RDX(%rdi)
++	movq	%rax, RSI(%rdi)
++	movq	%rax, RDI(%rdi)
++	movq	%rax, ORIG_RAX(%rdi)
++	movq	%rcx, RIP(%rdi)
++	leaq	8(%rsp), %rcx
++	movq	$__KERNEL_CS, CS(%rdi)
++	movq	%rax, EFLAGS(%rdi)
++	movq	%rcx, RSP(%rdi)
++	movq	$__KERNEL_DS, SS(%rdi)
++	jmpq	*%rdx
++	CFI_ENDPROC
++ENDPROC(arch_unwind_init_running)
++
+diff -Nurb linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S linux-2.6.22-try2/arch/x86_64/kernel/vmlinux.lds.S
+--- linux-2.6.22-570/arch/x86_64/kernel/vmlinux.lds.S	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/arch/x86_64/kernel/vmlinux.lds.S	2007-12-19 15:29:23.000000000 -0500
+@@ -219,7 +219,9 @@
+   /* Sections to be discarded */
+   /DISCARD/ : {
+ 	*(.exitcall.exit)
++#ifndef CONFIG_UNWIND_INFO
+ 	*(.eh_frame)
++#endif
+ 	}
+ 
+   STABS_DEBUG
+diff -Nurb linux-2.6.22-570/arch/x86_64/mm/fault.c linux-2.6.22-try2/arch/x86_64/mm/fault.c
+--- linux-2.6.22-570/arch/x86_64/mm/fault.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/mm/fault.c	2007-12-19 15:29:24.000000000 -0500
+@@ -221,16 +221,6 @@
+ 	return 0;
+ } 
+ 
+-int unhandled_signal(struct task_struct *tsk, int sig)
+-{
+-	if (is_init(tsk))
+-		return 1;
+-	if (tsk->ptrace & PT_PTRACED)
+-		return 0;
+-	return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
+-		(tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
+-}
+-
+ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+ 				 unsigned long error_code)
+ {
+@@ -302,7 +292,7 @@
+ }
+ 
+ int page_fault_trace = 0;
+-int exception_trace = 1;
++int show_unhandled_signals = 1;
+ 
+ /*
+  * This routine handles page faults.  It determines the address,
+@@ -534,6 +524,10 @@
+ 	if (is_errata93(regs, address))
+ 		return; 
+ 
++	if (notify_die(DIE_PAGE_FAULT_NO_CONTEXT, "no context", regs,
++				error_code, 14, SIGSEGV) == NOTIFY_STOP)
++		return;
++
+ /*
+  * Oops. The kernel tried to access some bad page. We'll have to
+  * terminate things with extreme prejudice.
+diff -Nurb linux-2.6.22-570/arch/x86_64/mm/init.c linux-2.6.22-try2/arch/x86_64/mm/init.c
+--- linux-2.6.22-570/arch/x86_64/mm/init.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/arch/x86_64/mm/init.c	2007-12-19 15:29:23.000000000 -0500
+@@ -697,41 +697,6 @@
+ 	return pfn_valid(pte_pfn(*pte));
+ }
+ 
+-#ifdef CONFIG_SYSCTL
+-#include <linux/sysctl.h>
+-
+-extern int exception_trace, page_fault_trace;
+-
+-static ctl_table debug_table2[] = {
+-	{
+-		.ctl_name	= 99,
+-		.procname	= "exception-trace",
+-		.data		= &exception_trace,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= proc_dointvec
+-	},
+-	{}
+-}; 
+-
+-static ctl_table debug_root_table2[] = { 
+-	{
+-		.ctl_name = CTL_DEBUG,
+-		.procname = "debug",
+-		.mode = 0555,
+-		.child = debug_table2
+-	},
+-	{}
+-}; 
+-
+-static __init int x8664_sysctl_init(void)
+-{ 
+-	register_sysctl_table(debug_root_table2);
+-	return 0;
+-}
+-__initcall(x8664_sysctl_init);
+-#endif
+-
+ /* A pseudo VMA to allow ptrace access for the vsyscall page.  This only
+    covers the 64bit vsyscall page now. 32bit has a real VMA now and does
+    not need special handling anymore. */
+diff -Nurb linux-2.6.22-570/crypto/Kconfig linux-2.6.22-try2/crypto/Kconfig
+--- linux-2.6.22-570/crypto/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/crypto/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -1,7 +1,17 @@
+ #
+-# Cryptographic API Configuration
++# Generic algorithms support
++#
++config XOR_BLOCKS
++	tristate
++
+ #
++# async_tx api: hardware offloaded memory transfer/transform support
++#
++source "crypto/async_tx/Kconfig"
+ 
++#
++# Cryptographic API Configuration
++#
+ menu "Cryptographic options"
+ 
+ config CRYPTO
+diff -Nurb linux-2.6.22-570/crypto/Makefile linux-2.6.22-try2/crypto/Makefile
+--- linux-2.6.22-570/crypto/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/crypto/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -50,3 +50,9 @@
+ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+ 
+ obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
++
++#
++# generic algorithms and the async_tx api
++#
++obj-$(CONFIG_XOR_BLOCKS) += xor.o
++obj-$(CONFIG_ASYNC_CORE) += async_tx/
+diff -Nurb linux-2.6.22-570/crypto/async_tx/Kconfig linux-2.6.22-try2/crypto/async_tx/Kconfig
+--- linux-2.6.22-570/crypto/async_tx/Kconfig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,16 @@
++config ASYNC_CORE
++	tristate
++
++config ASYNC_MEMCPY
++	tristate
++	select ASYNC_CORE
++
++config ASYNC_XOR
++	tristate
++	select ASYNC_CORE
++	select XOR_BLOCKS
++
++config ASYNC_MEMSET
++	tristate
++	select ASYNC_CORE
++
+diff -Nurb linux-2.6.22-570/crypto/async_tx/Makefile linux-2.6.22-try2/crypto/async_tx/Makefile
+--- linux-2.6.22-570/crypto/async_tx/Makefile	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,4 @@
++obj-$(CONFIG_ASYNC_CORE) += async_tx.o
++obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o
++obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o
++obj-$(CONFIG_ASYNC_XOR) += async_xor.o
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memcpy.c linux-2.6.22-try2/crypto/async_tx/async_memcpy.c
+--- linux-2.6.22-570/crypto/async_tx/async_memcpy.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/async_memcpy.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,131 @@
++/*
++ * copy offload engine support
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ *      Dan Williams <dan.j.williams@intel.com>
++ *
++ *      with architecture considerations by:
++ *      Neil Brown <neilb@suse.de>
++ *      Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/highmem.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/async_tx.h>
++
++/**
++ * async_memcpy - attempt to copy memory with a dma engine.
++ * @dest: destination page
++ * @src: src page
++ * @offset: offset in pages to start transaction
++ * @len: length in bytes
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK,
++ *	ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST
++ * @depend_tx: memcpy depends on the result of this transaction
++ * @cb_fn: function to call when the memcpy completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
++	unsigned int src_offset, size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY);
++	struct dma_device *device = chan ? chan->device : NULL;
++	int int_en = cb_fn ? 1 : 0;
++	struct dma_async_tx_descriptor *tx = device ?
++		device->device_prep_dma_memcpy(chan, len,
++		int_en) : NULL;
++
++	if (tx) { /* run the memcpy asynchronously */
++		dma_addr_t addr;
++		enum dma_data_direction dir;
++
++		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++
++		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++			DMA_NONE : DMA_FROM_DEVICE;
++
++		addr = dma_map_page(device->dev, dest, dest_offset, len, dir);
++		tx->tx_set_dest(addr, tx, 0);
++
++		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++			DMA_NONE : DMA_TO_DEVICE;
++
++		addr = dma_map_page(device->dev, src, src_offset, len, dir);
++		tx->tx_set_src(addr, tx, 0);
++
++		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++	} else { /* run the memcpy synchronously */
++		void *dest_buf, *src_buf;
++		pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++		/* wait for any prerequisite operations */
++		if (depend_tx) {
++			/* if ack is already set then we cannot be sure
++			 * we are referring to the correct operation
++			 */
++			BUG_ON(depend_tx->ack);
++			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++				panic("%s: DMA_ERROR waiting for depend_tx\n",
++					__FUNCTION__);
++		}
++
++		if (flags & ASYNC_TX_KMAP_DST)
++			dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset;
++		else
++			dest_buf = page_address(dest) + dest_offset;
++
++		if (flags & ASYNC_TX_KMAP_SRC)
++			src_buf = kmap_atomic(src, KM_USER0) + src_offset;
++		else
++			src_buf = page_address(src) + src_offset;
++
++		memcpy(dest_buf, src_buf, len);
++
++		if (flags & ASYNC_TX_KMAP_DST)
++			kunmap_atomic(dest_buf, KM_USER0);
++
++		if (flags & ASYNC_TX_KMAP_SRC)
++			kunmap_atomic(src_buf, KM_USER0);
++
++		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++	}
++
++	return tx;
++}
++EXPORT_SYMBOL_GPL(async_memcpy);
++
++static int __init async_memcpy_init(void)
++{
++	return 0;
++}
++
++static void __exit async_memcpy_exit(void)
++{
++	do { } while (0);
++}
++
++module_init(async_memcpy_init);
++module_exit(async_memcpy_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous memcpy api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_memset.c linux-2.6.22-try2/crypto/async_tx/async_memset.c
+--- linux-2.6.22-570/crypto/async_tx/async_memset.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/async_memset.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,109 @@
++/*
++ * memory fill offload engine support
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ *      Dan Williams <dan.j.williams@intel.com>
++ *
++ *      with architecture considerations by:
++ *      Neil Brown <neilb@suse.de>
++ *      Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/async_tx.h>
++
++/**
++ * async_memset - attempt to fill memory with a dma engine.
++ * @dest: destination page
++ * @val: fill value
++ * @offset: offset in pages to start transaction
++ * @len: length in bytes
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: memset depends on the result of this transaction
++ * @cb_fn: function to call when the memcpy completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_memset(struct page *dest, int val, unsigned int offset,
++	size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET);
++	struct dma_device *device = chan ? chan->device : NULL;
++	int int_en = cb_fn ? 1 : 0;
++	struct dma_async_tx_descriptor *tx = device ?
++		device->device_prep_dma_memset(chan, val, len,
++			int_en) : NULL;
++
++	if (tx) { /* run the memset asynchronously */
++		dma_addr_t dma_addr;
++		enum dma_data_direction dir;
++
++		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++			DMA_NONE : DMA_FROM_DEVICE;
++
++		dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
++		tx->tx_set_dest(dma_addr, tx, 0);
++
++		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++	} else { /* run the memset synchronously */
++		void *dest_buf;
++		pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++		dest_buf = (void *) (((char *) page_address(dest)) + offset);
++
++		/* wait for any prerequisite operations */
++		if (depend_tx) {
++			/* if ack is already set then we cannot be sure
++			 * we are referring to the correct operation
++			 */
++			BUG_ON(depend_tx->ack);
++			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++				panic("%s: DMA_ERROR waiting for depend_tx\n",
++					__FUNCTION__);
++		}
++
++		memset(dest_buf, val, len);
++
++		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++	}
++
++	return tx;
++}
++EXPORT_SYMBOL_GPL(async_memset);
++
++static int __init async_memset_init(void)
++{
++	return 0;
++}
++
++static void __exit async_memset_exit(void)
++{
++	do { } while (0);
++}
++
++module_init(async_memset_init);
++module_exit(async_memset_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous memset api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_tx.c linux-2.6.22-try2/crypto/async_tx/async_tx.c
+--- linux-2.6.22-570/crypto/async_tx/async_tx.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/async_tx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,497 @@
++/*
++ * core routines for the asynchronous memory transfer/transform api
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ *	Dan Williams <dan.j.williams@intel.com>
++ *
++ *	with architecture considerations by:
++ *	Neil Brown <neilb@suse.de>
++ *	Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/async_tx.h>
++
++#ifdef CONFIG_DMA_ENGINE
++static enum dma_state_client
++dma_channel_add_remove(struct dma_client *client,
++	struct dma_chan *chan, enum dma_state state);
++
++static struct dma_client async_tx_dma = {
++	.event_callback = dma_channel_add_remove,
++	/* .cap_mask == 0 defaults to all channels */
++};
++
++/**
++ * dma_cap_mask_all - enable iteration over all operation types
++ */
++static dma_cap_mask_t dma_cap_mask_all;
++
++/**
++ * chan_ref_percpu - tracks channel allocations per core/opertion
++ */
++struct chan_ref_percpu {
++	struct dma_chan_ref *ref;
++};
++
++static int channel_table_initialized;
++static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END];
++
++/**
++ * async_tx_lock - protect modification of async_tx_master_list and serialize
++ *	rebalance operations
++ */
++static spinlock_t async_tx_lock;
++
++static struct list_head
++async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list);
++
++/* async_tx_issue_pending_all - start all transactions on all channels */
++void async_tx_issue_pending_all(void)
++{
++	struct dma_chan_ref *ref;
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++		ref->chan->device->device_issue_pending(ref->chan);
++	rcu_read_unlock();
++}
++EXPORT_SYMBOL_GPL(async_tx_issue_pending_all);
++
++/* dma_wait_for_async_tx - spin wait for a transcation to complete
++ * @tx: transaction to wait on
++ */
++enum dma_status
++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
++{
++	enum dma_status status;
++	struct dma_async_tx_descriptor *iter;
++
++	if (!tx)
++		return DMA_SUCCESS;
++
++	/* poll through the dependency chain, return when tx is complete */
++	do {
++		iter = tx;
++		while (iter->cookie == -EBUSY)
++			iter = iter->parent;
++
++		status = dma_sync_wait(iter->chan, iter->cookie);
++	} while (status == DMA_IN_PROGRESS || (iter != tx));
++
++	return status;
++}
++EXPORT_SYMBOL_GPL(dma_wait_for_async_tx);
++
++/* async_tx_run_dependencies - helper routine for dma drivers to process
++ *	(start) dependent operations on their target channel
++ * @tx: transaction with dependencies
++ */
++void
++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx)
++{
++	struct dma_async_tx_descriptor *dep_tx, *_dep_tx;
++	struct dma_device *dev;
++	struct dma_chan *chan;
++
++	list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list,
++		depend_node) {
++		chan = dep_tx->chan;
++		dev = chan->device;
++		/* we can't depend on ourselves */
++		BUG_ON(chan == tx->chan);
++		list_del(&dep_tx->depend_node);
++		tx->tx_submit(dep_tx);
++
++		/* we need to poke the engine as client code does not
++		 * know about dependency submission events
++		 */
++		dev->device_issue_pending(chan);
++	}
++}
++EXPORT_SYMBOL_GPL(async_tx_run_dependencies);
++
++static void
++free_dma_chan_ref(struct rcu_head *rcu)
++{
++	struct dma_chan_ref *ref;
++	ref = container_of(rcu, struct dma_chan_ref, rcu);
++	kfree(ref);
++}
++
++static void
++init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan)
++{
++	INIT_LIST_HEAD(&ref->node);
++	INIT_RCU_HEAD(&ref->rcu);
++	ref->chan = chan;
++	atomic_set(&ref->count, 0);
++}
++
++/**
++ * get_chan_ref_by_cap - returns the nth channel of the given capability
++ * 	defaults to returning the channel with the desired capability and the
++ * 	lowest reference count if the index can not be satisfied
++ * @cap: capability to match
++ * @index: nth channel desired, passing -1 has the effect of forcing the
++ *  default return value
++ */
++static struct dma_chan_ref *
++get_chan_ref_by_cap(enum dma_transaction_type cap, int index)
++{
++	struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref;
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++		if (dma_has_cap(cap, ref->chan->device->cap_mask)) {
++			if (!min_ref)
++				min_ref = ref;
++			else if (atomic_read(&ref->count) <
++				atomic_read(&min_ref->count))
++				min_ref = ref;
++
++			if (index-- == 0) {
++				ret_ref = ref;
++				break;
++			}
++		}
++	rcu_read_unlock();
++
++	if (!ret_ref)
++		ret_ref = min_ref;
++
++	if (ret_ref)
++		atomic_inc(&ret_ref->count);
++
++	return ret_ref;
++}
++
++/**
++ * async_tx_rebalance - redistribute the available channels, optimize
++ * for cpu isolation in the SMP case, and opertaion isolation in the
++ * uniprocessor case
++ */
++static void async_tx_rebalance(void)
++{
++	int cpu, cap, cpu_idx = 0;
++	unsigned long flags;
++
++	if (!channel_table_initialized)
++		return;
++
++	spin_lock_irqsave(&async_tx_lock, flags);
++
++	/* undo the last distribution */
++	for_each_dma_cap_mask(cap, dma_cap_mask_all)
++		for_each_possible_cpu(cpu) {
++			struct dma_chan_ref *ref =
++				per_cpu_ptr(channel_table[cap], cpu)->ref;
++			if (ref) {
++				atomic_set(&ref->count, 0);
++				per_cpu_ptr(channel_table[cap], cpu)->ref =
++									NULL;
++			}
++		}
++
++	for_each_dma_cap_mask(cap, dma_cap_mask_all)
++		for_each_online_cpu(cpu) {
++			struct dma_chan_ref *new;
++			if (NR_CPUS > 1)
++				new = get_chan_ref_by_cap(cap, cpu_idx++);
++			else
++				new = get_chan_ref_by_cap(cap, -1);
++
++			per_cpu_ptr(channel_table[cap], cpu)->ref = new;
++		}
++
++	spin_unlock_irqrestore(&async_tx_lock, flags);
++}
++
++static enum dma_state_client
++dma_channel_add_remove(struct dma_client *client,
++	struct dma_chan *chan, enum dma_state state)
++{
++	unsigned long found, flags;
++	struct dma_chan_ref *master_ref, *ref;
++	enum dma_state_client ack = DMA_DUP; /* default: take no action */
++
++	switch (state) {
++	case DMA_RESOURCE_AVAILABLE:
++		found = 0;
++		rcu_read_lock();
++		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++			if (ref->chan == chan) {
++				found = 1;
++				break;
++			}
++		rcu_read_unlock();
++
++		pr_debug("async_tx: dma resource available [%s]\n",
++			found ? "old" : "new");
++
++		if (!found)
++			ack = DMA_ACK;
++		else
++			break;
++
++		/* add the channel to the generic management list */
++		master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL);
++		if (master_ref) {
++			/* keep a reference until async_tx is unloaded */
++			dma_chan_get(chan);
++			init_dma_chan_ref(master_ref, chan);
++			spin_lock_irqsave(&async_tx_lock, flags);
++			list_add_tail_rcu(&master_ref->node,
++				&async_tx_master_list);
++			spin_unlock_irqrestore(&async_tx_lock,
++				flags);
++		} else {
++			printk(KERN_WARNING "async_tx: unable to create"
++				" new master entry in response to"
++				" a DMA_RESOURCE_ADDED event"
++				" (-ENOMEM)\n");
++			return 0;
++		}
++
++		async_tx_rebalance();
++		break;
++	case DMA_RESOURCE_REMOVED:
++		found = 0;
++		spin_lock_irqsave(&async_tx_lock, flags);
++		list_for_each_entry_rcu(ref, &async_tx_master_list, node)
++			if (ref->chan == chan) {
++				/* permit backing devices to go away */
++				dma_chan_put(ref->chan);
++				list_del_rcu(&ref->node);
++				call_rcu(&ref->rcu, free_dma_chan_ref);
++				found = 1;
++				break;
++			}
++		spin_unlock_irqrestore(&async_tx_lock, flags);
++
++		pr_debug("async_tx: dma resource removed [%s]\n",
++			found ? "ours" : "not ours");
++
++		if (found)
++			ack = DMA_ACK;
++		else
++			break;
++
++		async_tx_rebalance();
++		break;
++	case DMA_RESOURCE_SUSPEND:
++	case DMA_RESOURCE_RESUME:
++		printk(KERN_WARNING "async_tx: does not support dma channel"
++			" suspend/resume\n");
++		break;
++	default:
++		BUG();
++	}
++
++	return ack;
++}
++
++static int __init
++async_tx_init(void)
++{
++	enum dma_transaction_type cap;
++
++	spin_lock_init(&async_tx_lock);
++	bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END);
++
++	/* an interrupt will never be an explicit operation type.
++	 * clearing this bit prevents allocation to a slot in 'channel_table'
++	 */
++	clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits);
++
++	for_each_dma_cap_mask(cap, dma_cap_mask_all) {
++		channel_table[cap] = alloc_percpu(struct chan_ref_percpu);
++		if (!channel_table[cap])
++			goto err;
++	}
++
++	channel_table_initialized = 1;
++	dma_async_client_register(&async_tx_dma);
++	dma_async_client_chan_request(&async_tx_dma);
++
++	printk(KERN_INFO "async_tx: api initialized (async)\n");
++
++	return 0;
++err:
++	printk(KERN_ERR "async_tx: initialization failure\n");
++
++	while (--cap >= 0)
++		free_percpu(channel_table[cap]);
++
++	return 1;
++}
++
++static void __exit async_tx_exit(void)
++{
++	enum dma_transaction_type cap;
++
++	channel_table_initialized = 0;
++
++	for_each_dma_cap_mask(cap, dma_cap_mask_all)
++		if (channel_table[cap])
++			free_percpu(channel_table[cap]);
++
++	dma_async_client_unregister(&async_tx_dma);
++}
++
++/**
++ * async_tx_find_channel - find a channel to carry out the operation or let
++ *	the transaction execute synchronously
++ * @depend_tx: transaction dependency
++ * @tx_type: transaction type
++ */
++struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++	enum dma_transaction_type tx_type)
++{
++	/* see if we can keep the chain on one channel */
++	if (depend_tx &&
++		dma_has_cap(tx_type, depend_tx->chan->device->cap_mask))
++		return depend_tx->chan;
++	else if (likely(channel_table_initialized)) {
++		struct dma_chan_ref *ref;
++		int cpu = get_cpu();
++		ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref;
++		put_cpu();
++		return ref ? ref->chan : NULL;
++	} else
++		return NULL;
++}
++EXPORT_SYMBOL_GPL(async_tx_find_channel);
++#else
++static int __init async_tx_init(void)
++{
++	printk(KERN_INFO "async_tx: api initialized (sync-only)\n");
++	return 0;
++}
++
++static void __exit async_tx_exit(void)
++{
++	do { } while (0);
++}
++#endif
++
++void
++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
++	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	tx->callback = cb_fn;
++	tx->callback_param = cb_param;
++
++	/* set this new tx to run after depend_tx if:
++	 * 1/ a dependency exists (depend_tx is !NULL)
++	 * 2/ the tx can not be submitted to the current channel
++	 */
++	if (depend_tx && depend_tx->chan != chan) {
++		/* if ack is already set then we cannot be sure
++		 * we are referring to the correct operation
++		 */
++		BUG_ON(depend_tx->ack);
++
++		tx->parent = depend_tx;
++		spin_lock_bh(&depend_tx->lock);
++		list_add_tail(&tx->depend_node, &depend_tx->depend_list);
++		if (depend_tx->cookie == 0) {
++			struct dma_chan *dep_chan = depend_tx->chan;
++			struct dma_device *dep_dev = dep_chan->device;
++			dep_dev->device_dependency_added(dep_chan);
++		}
++		spin_unlock_bh(&depend_tx->lock);
++
++		/* schedule an interrupt to trigger the channel switch */
++		async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL);
++	} else {
++		tx->parent = NULL;
++		tx->tx_submit(tx);
++	}
++
++	if (flags & ASYNC_TX_ACK)
++		async_tx_ack(tx);
++
++	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
++		async_tx_ack(depend_tx);
++}
++EXPORT_SYMBOL_GPL(async_tx_submit);
++
++/**
++ * async_trigger_callback - schedules the callback function to be run after
++ * any dependent operations have been completed.
++ * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: 'callback' requires the completion of this transaction
++ * @cb_fn: function to call after depend_tx completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_trigger_callback(enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	struct dma_chan *chan;
++	struct dma_device *device;
++	struct dma_async_tx_descriptor *tx;
++
++	if (depend_tx) {
++		chan = depend_tx->chan;
++		device = chan->device;
++
++		/* see if we can schedule an interrupt
++		 * otherwise poll for completion
++		 */
++		if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask))
++			device = NULL;
++
++		tx = device ? device->device_prep_dma_interrupt(chan) : NULL;
++	} else
++		tx = NULL;
++
++	if (tx) {
++		pr_debug("%s: (async)\n", __FUNCTION__);
++
++		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++	} else {
++		pr_debug("%s: (sync)\n", __FUNCTION__);
++
++		/* wait for any prerequisite operations */
++		if (depend_tx) {
++			/* if ack is already set then we cannot be sure
++			 * we are referring to the correct operation
++			 */
++			BUG_ON(depend_tx->ack);
++			if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR)
++				panic("%s: DMA_ERROR waiting for depend_tx\n",
++					__FUNCTION__);
++		}
++
++		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++	}
++
++	return tx;
++}
++EXPORT_SYMBOL_GPL(async_trigger_callback);
++
++module_init(async_tx_init);
++module_exit(async_tx_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/async_tx/async_xor.c linux-2.6.22-try2/crypto/async_tx/async_xor.c
+--- linux-2.6.22-570/crypto/async_tx/async_xor.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/async_tx/async_xor.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,327 @@
++/*
++ * xor offload engine api
++ *
++ * Copyright © 2006, Intel Corporation.
++ *
++ *      Dan Williams <dan.j.williams@intel.com>
++ *
++ *      with architecture considerations by:
++ *      Neil Brown <neilb@suse.de>
++ *      Jeff Garzik <jeff@garzik.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/dma-mapping.h>
++#include <linux/raid/xor.h>
++#include <linux/async_tx.h>
++
++static void
++do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device,
++	struct dma_chan *chan, struct page *dest, struct page **src_list,
++	unsigned int offset, unsigned int src_cnt, size_t len,
++	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	dma_addr_t dma_addr;
++	enum dma_data_direction dir;
++	int i;
++
++	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
++
++	dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++		DMA_NONE : DMA_FROM_DEVICE;
++
++	dma_addr = dma_map_page(device->dev, dest, offset, len, dir);
++	tx->tx_set_dest(dma_addr, tx, 0);
++
++	dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++		DMA_NONE : DMA_TO_DEVICE;
++
++	for (i = 0; i < src_cnt; i++) {
++		dma_addr = dma_map_page(device->dev, src_list[i],
++			offset, len, dir);
++		tx->tx_set_src(dma_addr, tx, i);
++	}
++
++	async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++}
++
++static void
++do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset,
++	unsigned int src_cnt, size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	void *_dest;
++	int i;
++
++	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
++
++	/* reuse the 'src_list' array to convert to buffer pointers */
++	for (i = 0; i < src_cnt; i++)
++		src_list[i] = (struct page *)
++			(page_address(src_list[i]) + offset);
++
++	/* set destination address */
++	_dest = page_address(dest) + offset;
++
++	if (flags & ASYNC_TX_XOR_ZERO_DST)
++		memset(_dest, 0, len);
++
++	xor_blocks(src_cnt, len, _dest,
++		(void **) src_list);
++
++	async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++}
++
++/**
++ * async_xor - attempt to xor a set of blocks with a dma engine.
++ *	xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST
++ *	flag must be set to not include dest data in the calculation.  The
++ *	assumption with dma eninges is that they only use the destination
++ *	buffer as a source when it is explicity specified in the source list.
++ * @dest: destination page
++ * @src_list: array of source pages (if the dest is also a source it must be
++ *	at index zero).  The contents of this array may be overwritten.
++ * @offset: offset in pages to start transaction
++ * @src_cnt: number of source pages
++ * @len: length in bytes
++ * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST,
++ *	ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: xor depends on the result of this transaction.
++ * @cb_fn: function to call when the xor completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_xor(struct page *dest, struct page **src_list, unsigned int offset,
++	int src_cnt, size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR);
++	struct dma_device *device = chan ? chan->device : NULL;
++	struct dma_async_tx_descriptor *tx = NULL;
++	dma_async_tx_callback _cb_fn;
++	void *_cb_param;
++	unsigned long local_flags;
++	int xor_src_cnt;
++	int i = 0, src_off = 0, int_en;
++
++	BUG_ON(src_cnt <= 1);
++
++	while (src_cnt) {
++		local_flags = flags;
++		if (device) { /* run the xor asynchronously */
++			xor_src_cnt = min(src_cnt, device->max_xor);
++			/* if we are submitting additional xors
++			 * only set the callback on the last transaction
++			 */
++			if (src_cnt > xor_src_cnt) {
++				local_flags &= ~ASYNC_TX_ACK;
++				_cb_fn = NULL;
++				_cb_param = NULL;
++			} else {
++				_cb_fn = cb_fn;
++				_cb_param = cb_param;
++			}
++
++			int_en = _cb_fn ? 1 : 0;
++
++			tx = device->device_prep_dma_xor(
++				chan, xor_src_cnt, len, int_en);
++
++			if (tx) {
++				do_async_xor(tx, device, chan, dest,
++				&src_list[src_off], offset, xor_src_cnt, len,
++				local_flags, depend_tx, _cb_fn,
++				_cb_param);
++			} else /* fall through */
++				goto xor_sync;
++		} else { /* run the xor synchronously */
++xor_sync:
++			/* in the sync case the dest is an implied source
++			 * (assumes the dest is at the src_off index)
++			 */
++			if (flags & ASYNC_TX_XOR_DROP_DST) {
++				src_cnt--;
++				src_off++;
++			}
++
++			/* process up to 'MAX_XOR_BLOCKS' sources */
++			xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
++
++			/* if we are submitting additional xors
++			 * only set the callback on the last transaction
++			 */
++			if (src_cnt > xor_src_cnt) {
++				local_flags &= ~ASYNC_TX_ACK;
++				_cb_fn = NULL;
++				_cb_param = NULL;
++			} else {
++				_cb_fn = cb_fn;
++				_cb_param = cb_param;
++			}
++
++			/* wait for any prerequisite operations */
++			if (depend_tx) {
++				/* if ack is already set then we cannot be sure
++				 * we are referring to the correct operation
++				 */
++				BUG_ON(depend_tx->ack);
++				if (dma_wait_for_async_tx(depend_tx) ==
++					DMA_ERROR)
++					panic("%s: DMA_ERROR waiting for "
++						"depend_tx\n",
++						__FUNCTION__);
++			}
++
++			do_sync_xor(dest, &src_list[src_off], offset,
++				xor_src_cnt, len, local_flags, depend_tx,
++				_cb_fn, _cb_param);
++		}
++
++		/* the previous tx is hidden from the client,
++		 * so ack it
++		 */
++		if (i && depend_tx)
++			async_tx_ack(depend_tx);
++
++		depend_tx = tx;
++
++		if (src_cnt > xor_src_cnt) {
++			/* drop completed sources */
++			src_cnt -= xor_src_cnt;
++			src_off += xor_src_cnt;
++
++			/* unconditionally preserve the destination */
++			flags &= ~ASYNC_TX_XOR_ZERO_DST;
++
++			/* use the intermediate result a source, but remember
++			 * it's dropped, because it's implied, in the sync case
++			 */
++			src_list[--src_off] = dest;
++			src_cnt++;
++			flags |= ASYNC_TX_XOR_DROP_DST;
++		} else
++			src_cnt = 0;
++		i++;
++	}
++
++	return tx;
++}
++EXPORT_SYMBOL_GPL(async_xor);
++
++static int page_is_zero(struct page *p, unsigned int offset, size_t len)
++{
++	char *a = page_address(p) + offset;
++	return ((*(u32 *) a) == 0 &&
++		memcmp(a, a + 4, len - 4) == 0);
++}
++
++/**
++ * async_xor_zero_sum - attempt a xor parity check with a dma engine.
++ * @dest: destination page used if the xor is performed synchronously
++ * @src_list: array of source pages.  The dest page must be listed as a source
++ * 	at index zero.  The contents of this array may be overwritten.
++ * @offset: offset in pages to start transaction
++ * @src_cnt: number of source pages
++ * @len: length in bytes
++ * @result: 0 if sum == 0 else non-zero
++ * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK
++ * @depend_tx: xor depends on the result of this transaction.
++ * @cb_fn: function to call when the xor completes
++ * @cb_param: parameter to pass to the callback routine
++ */
++struct dma_async_tx_descriptor *
++async_xor_zero_sum(struct page *dest, struct page **src_list,
++	unsigned int offset, int src_cnt, size_t len,
++	u32 *result, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_param)
++{
++	struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM);
++	struct dma_device *device = chan ? chan->device : NULL;
++	int int_en = cb_fn ? 1 : 0;
++	struct dma_async_tx_descriptor *tx = device ?
++		device->device_prep_dma_zero_sum(chan, src_cnt, len, result,
++			int_en) : NULL;
++	int i;
++
++	BUG_ON(src_cnt <= 1);
++
++	if (tx) {
++		dma_addr_t dma_addr;
++		enum dma_data_direction dir;
++
++		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
++
++		dir = (flags & ASYNC_TX_ASSUME_COHERENT) ?
++			DMA_NONE : DMA_TO_DEVICE;
++
++		for (i = 0; i < src_cnt; i++) {
++			dma_addr = dma_map_page(device->dev, src_list[i],
++				offset, len, dir);
++			tx->tx_set_src(dma_addr, tx, i);
++		}
++
++		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
++	} else {
++		unsigned long xor_flags = flags;
++
++		pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len);
++
++		xor_flags |= ASYNC_TX_XOR_DROP_DST;
++		xor_flags &= ~ASYNC_TX_ACK;
++
++		tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags,
++			depend_tx, NULL, NULL);
++
++		if (tx) {
++			if (dma_wait_for_async_tx(tx) == DMA_ERROR)
++				panic("%s: DMA_ERROR waiting for tx\n",
++					__FUNCTION__);
++			async_tx_ack(tx);
++		}
++
++		*result = page_is_zero(dest, offset, len) ? 0 : 1;
++
++		tx = NULL;
++
++		async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param);
++	}
++
++	return tx;
++}
++EXPORT_SYMBOL_GPL(async_xor_zero_sum);
++
++static int __init async_xor_init(void)
++{
++	return 0;
++}
++
++static void __exit async_xor_exit(void)
++{
++	do { } while (0);
++}
++
++module_init(async_xor_init);
++module_exit(async_xor_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/crypto/xor.c linux-2.6.22-try2/crypto/xor.c
+--- linux-2.6.22-570/crypto/xor.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/crypto/xor.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,155 @@
++/*
++ * xor.c : Multiple Devices driver for Linux
++ *
++ * Copyright (C) 1996, 1997, 1998, 1999, 2000,
++ * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
++ *
++ * Dispatch optimized RAID-5 checksumming functions.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2, or (at your option)
++ * any later version.
++ *
++ * You should have received a copy of the GNU General Public License
++ * (for example /usr/src/linux/COPYING); if not, write to the Free
++ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ */
++
++#define BH_TRACE 0
++#include <linux/module.h>
++#include <linux/raid/md.h>
++#include <linux/raid/xor.h>
++#include <asm/xor.h>
++
++/* The xor routines to use.  */
++static struct xor_block_template *active_template;
++
++void
++xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
++{
++	unsigned long *p1, *p2, *p3, *p4;
++
++	p1 = (unsigned long *) srcs[0];
++	if (src_count == 1) {
++		active_template->do_2(bytes, dest, p1);
++		return;
++	}
++
++	p2 = (unsigned long *) srcs[1];
++	if (src_count == 2) {
++		active_template->do_3(bytes, dest, p1, p2);
++		return;
++	}
++
++	p3 = (unsigned long *) srcs[2];
++	if (src_count == 3) {
++		active_template->do_4(bytes, dest, p1, p2, p3);
++		return;
++	}
++
++	p4 = (unsigned long *) srcs[3];
++	active_template->do_5(bytes, dest, p1, p2, p3, p4);
++}
++EXPORT_SYMBOL(xor_blocks);
++
++/* Set of all registered templates.  */
++static struct xor_block_template *template_list;
++
++#define BENCH_SIZE (PAGE_SIZE)
++
++static void
++do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
++{
++	int speed;
++	unsigned long now;
++	int i, count, max;
++
++	tmpl->next = template_list;
++	template_list = tmpl;
++
++	/*
++	 * Count the number of XORs done during a whole jiffy, and use
++	 * this to calculate the speed of checksumming.  We use a 2-page
++	 * allocation to have guaranteed color L1-cache layout.
++	 */
++	max = 0;
++	for (i = 0; i < 5; i++) {
++		now = jiffies;
++		count = 0;
++		while (jiffies == now) {
++			mb(); /* prevent loop optimzation */
++			tmpl->do_2(BENCH_SIZE, b1, b2);
++			mb();
++			count++;
++			mb();
++		}
++		if (count > max)
++			max = count;
++	}
++
++	speed = max * (HZ * BENCH_SIZE / 1024);
++	tmpl->speed = speed;
++
++	printk(KERN_INFO "   %-10s: %5d.%03d MB/sec\n", tmpl->name,
++	       speed / 1000, speed % 1000);
++}
++
++static int __init
++calibrate_xor_blocks(void)
++{
++	void *b1, *b2;
++	struct xor_block_template *f, *fastest;
++
++	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
++	if (!b1) {
++		printk(KERN_WARNING "xor: Yikes!  No memory available.\n");
++		return -ENOMEM;
++	}
++	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
++
++	/*
++	 * If this arch/cpu has a short-circuited selection, don't loop through
++	 * all the possible functions, just test the best one
++	 */
++
++	fastest = NULL;
++
++#ifdef XOR_SELECT_TEMPLATE
++		fastest = XOR_SELECT_TEMPLATE(fastest);
++#endif
++
++#define xor_speed(templ)	do_xor_speed((templ), b1, b2)
++
++	if (fastest) {
++		printk(KERN_INFO "xor: automatically using best "
++			"checksumming function: %s\n",
++			fastest->name);
++		xor_speed(fastest);
++	} else {
++		printk(KERN_INFO "xor: measuring software checksum speed\n");
++		XOR_TRY_TEMPLATES;
++		fastest = template_list;
++		for (f = fastest; f; f = f->next)
++			if (f->speed > fastest->speed)
++				fastest = f;
++	}
++
++	printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n",
++	       fastest->name, fastest->speed / 1000, fastest->speed % 1000);
++
++#undef xor_speed
++
++	free_pages((unsigned long)b1, 2);
++
++	active_template = fastest;
++	return 0;
++}
++
++static __exit void xor_exit(void) { }
++
++MODULE_LICENSE("GPL");
++
++/* when built-in xor.o must initialize before drivers/md/md.o */
++core_initcall(calibrate_xor_blocks);
++module_exit(xor_exit);
+diff -Nurb linux-2.6.22-570/drivers/Makefile linux-2.6.22-try2/drivers/Makefile
+--- linux-2.6.22-570/drivers/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/Makefile	2007-12-19 15:29:19.000000000 -0500
+@@ -70,6 +70,7 @@
+ obj-$(CONFIG_MCA)		+= mca/
+ obj-$(CONFIG_EISA)		+= eisa/
+ obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
++obj-$(CONFIG_CPU_IDLE)		+= cpuidle/
+ obj-$(CONFIG_MMC)		+= mmc/
+ obj-$(CONFIG_NEW_LEDS)		+= leds/
+ obj-$(CONFIG_INFINIBAND)	+= infiniband/
+diff -Nurb linux-2.6.22-570/drivers/acpi/Kconfig linux-2.6.22-try2/drivers/acpi/Kconfig
+--- linux-2.6.22-570/drivers/acpi/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/Kconfig	2007-12-19 15:29:19.000000000 -0500
+@@ -124,7 +124,7 @@
+ 
+ config ACPI_VIDEO
+ 	tristate "Video"
+-	depends on X86 && BACKLIGHT_CLASS_DEVICE
++	depends on X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL
+ 	help
+ 	  This driver implement the ACPI Extensions For Display Adapters
+ 	  for integrated graphics devices on motherboard, as specified in
+@@ -280,6 +280,14 @@
+ 	  of verbosity. Saying Y enables these statements. This will increase
+ 	  your kernel size by around 50K.
+ 
++config ACPI_DEBUG_FUNC_TRACE
++	bool "Additionally enable ACPI function tracing"
++	default n
++	depends on ACPI_DEBUG
++	help
++	  ACPI Debug Statements slow down ACPI processing. Function trace
++	  is about half of the penalty and is rarely useful.
++
+ config ACPI_EC
+ 	bool
+ 	default y
+diff -Nurb linux-2.6.22-570/drivers/acpi/battery.c linux-2.6.22-try2/drivers/acpi/battery.c
+--- linux-2.6.22-570/drivers/acpi/battery.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/battery.c	2007-12-19 15:29:20.000000000 -0500
+@@ -43,21 +43,30 @@
+ #define ACPI_BATTERY_CLASS		"battery"
+ #define ACPI_BATTERY_HID		"PNP0C0A"
+ #define ACPI_BATTERY_DEVICE_NAME	"Battery"
+-#define ACPI_BATTERY_FILE_INFO		"info"
+-#define ACPI_BATTERY_FILE_STATUS	"state"
+-#define ACPI_BATTERY_FILE_ALARM		"alarm"
+ #define ACPI_BATTERY_NOTIFY_STATUS	0x80
+ #define ACPI_BATTERY_NOTIFY_INFO	0x81
+ #define ACPI_BATTERY_UNITS_WATTS	"mW"
+ #define ACPI_BATTERY_UNITS_AMPS		"mA"
+ 
+ #define _COMPONENT		ACPI_BATTERY_COMPONENT
++
++#define ACPI_BATTERY_UPDATE_TIME	0
++
++#define ACPI_BATTERY_NONE_UPDATE	0
++#define ACPI_BATTERY_EASY_UPDATE	1
++#define ACPI_BATTERY_INIT_UPDATE	2
++
+ ACPI_MODULE_NAME("battery");
+ 
+ MODULE_AUTHOR("Paul Diefenbaugh");
+ MODULE_DESCRIPTION("ACPI Battery Driver");
+ MODULE_LICENSE("GPL");
+ 
++static unsigned int update_time = ACPI_BATTERY_UPDATE_TIME;
++
++/* 0 - every time, > 0 - by update_time */
++module_param(update_time, uint, 0644);
++
+ extern struct proc_dir_entry *acpi_lock_battery_dir(void);
+ extern void *acpi_unlock_battery_dir(struct proc_dir_entry *acpi_battery_dir);
+ 
+@@ -76,7 +85,7 @@
+ 		},
+ };
+ 
+-struct acpi_battery_status {
++struct acpi_battery_state {
+ 	acpi_integer state;
+ 	acpi_integer present_rate;
+ 	acpi_integer remaining_capacity;
+@@ -99,33 +108,111 @@
+ 	acpi_string oem_info;
+ };
+ 
+-struct acpi_battery_flags {
+-	u8 present:1;		/* Bay occupied? */
+-	u8 power_unit:1;	/* 0=watts, 1=apms */
+-	u8 alarm:1;		/* _BTP present? */
+-	u8 reserved:5;
++enum acpi_battery_files{
++	ACPI_BATTERY_INFO = 0,
++	ACPI_BATTERY_STATE,
++	ACPI_BATTERY_ALARM,
++	ACPI_BATTERY_NUMFILES,
+ };
+ 
+-struct acpi_battery_trips {
+-	unsigned long warning;
+-	unsigned long low;
++struct acpi_battery_flags {
++	u8 battery_present_prev;
++	u8 alarm_present;
++	u8 init_update;
++	u8 update[ACPI_BATTERY_NUMFILES];
++	u8 power_unit;
+ };
+ 
+ struct acpi_battery {
+-	struct acpi_device * device;
++	struct mutex mutex;
++	struct acpi_device *device;
+ 	struct acpi_battery_flags flags;
+-	struct acpi_battery_trips trips;
++	struct acpi_buffer bif_data;
++	struct acpi_buffer bst_data;
+ 	unsigned long alarm;
+-	struct acpi_battery_info *info;
++	unsigned long update_time[ACPI_BATTERY_NUMFILES];
+ };
+ 
++inline int acpi_battery_present(struct acpi_battery *battery)
++{
++	return battery->device->status.battery_present;
++}
++inline char *acpi_battery_power_units(struct acpi_battery *battery)
++{
++	if (battery->flags.power_unit)
++		return ACPI_BATTERY_UNITS_AMPS;
++	else
++		return ACPI_BATTERY_UNITS_WATTS;
++}
++
++inline acpi_handle acpi_battery_handle(struct acpi_battery *battery)
++{
++	return battery->device->handle;
++}
++
+ /* --------------------------------------------------------------------------
+                                Battery Management
+    -------------------------------------------------------------------------- */
+ 
+-static int
+-acpi_battery_get_info(struct acpi_battery *battery,
+-		      struct acpi_battery_info **bif)
++static void acpi_battery_check_result(struct acpi_battery *battery, int result)
++{
++	if (!battery)
++		return;
++
++	if (result) {
++		battery->flags.init_update = 1;
++	}
++}
++
++static int acpi_battery_extract_package(struct acpi_battery *battery,
++					union acpi_object *package,
++					struct acpi_buffer *format,
++					struct acpi_buffer *data,
++					char *package_name)
++{
++	acpi_status status = AE_OK;
++	struct acpi_buffer data_null = { 0, NULL };
++
++	status = acpi_extract_package(package, format, &data_null);
++	if (status != AE_BUFFER_OVERFLOW) {
++		ACPI_EXCEPTION((AE_INFO, status, "Extracting size %s",
++				package_name));
++		return -ENODEV;
++	}
++
++	if (data_null.length != data->length) {
++		kfree(data->pointer);
++		data->pointer = kzalloc(data_null.length, GFP_KERNEL);
++		if (!data->pointer) {
++			ACPI_EXCEPTION((AE_INFO, AE_NO_MEMORY, "kzalloc()"));
++			return -ENOMEM;
++		}
++		data->length = data_null.length;
++	}
++
++	status = acpi_extract_package(package, format, data);
++	if (ACPI_FAILURE(status)) {
++		ACPI_EXCEPTION((AE_INFO, status, "Extracting %s",
++				package_name));
++		return -ENODEV;
++	}
++
++	return 0;
++}
++
++static int acpi_battery_get_status(struct acpi_battery *battery)
++{
++	int result = 0;
++
++	result = acpi_bus_get_status(battery->device);
++	if (result) {
++		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Evaluating _STA"));
++		return -ENODEV;
++	}
++	return result;
++}
++
++static int acpi_battery_get_info(struct acpi_battery *battery)
+ {
+ 	int result = 0;
+ 	acpi_status status = 0;
+@@ -133,16 +220,20 @@
+ 	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BIF),
+ 		ACPI_BATTERY_FORMAT_BIF
+ 	};
+-	struct acpi_buffer data = { 0, NULL };
+ 	union acpi_object *package = NULL;
++	struct acpi_buffer *data = NULL;
++	struct acpi_battery_info *bif = NULL;
+ 
++	battery->update_time[ACPI_BATTERY_INFO] = get_seconds();
+ 
+-	if (!battery || !bif)
+-		return -EINVAL;
++	if (!acpi_battery_present(battery))
++		return 0;
+ 
+-	/* Evalute _BIF */
++	/* Evaluate _BIF */
+ 
+-	status = acpi_evaluate_object(battery->device->handle, "_BIF", NULL, &buffer);
++	status =
++	    acpi_evaluate_object(acpi_battery_handle(battery), "_BIF", NULL,
++				 &buffer);
+ 	if (ACPI_FAILURE(status)) {
+ 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BIF"));
+ 		return -ENODEV;
+@@ -150,41 +241,29 @@
+ 
+ 	package = buffer.pointer;
+ 
+-	/* Extract Package Data */
+-
+-	status = acpi_extract_package(package, &format, &data);
+-	if (status != AE_BUFFER_OVERFLOW) {
+-		ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF"));
+-		result = -ENODEV;
+-		goto end;
+-	}
++	data = &battery->bif_data;
+ 
+-	data.pointer = kzalloc(data.length, GFP_KERNEL);
+-	if (!data.pointer) {
+-		result = -ENOMEM;
+-		goto end;
+-	}
++	/* Extract Package Data */
+ 
+-	status = acpi_extract_package(package, &format, &data);
+-	if (ACPI_FAILURE(status)) {
+-		ACPI_EXCEPTION((AE_INFO, status, "Extracting _BIF"));
+-		kfree(data.pointer);
+-		result = -ENODEV;
++	result =
++	    acpi_battery_extract_package(battery, package, &format, data,
++					 "_BIF");
++	if (result)
+ 		goto end;
+-	}
+ 
+       end:
++
+ 	kfree(buffer.pointer);
+ 
+-	if (!result)
+-		(*bif) = data.pointer;
++	if (!result) {
++		bif = data->pointer;
++		battery->flags.power_unit = bif->power_unit;
++	}
+ 
+ 	return result;
+ }
+ 
+-static int
+-acpi_battery_get_status(struct acpi_battery *battery,
+-			struct acpi_battery_status **bst)
++static int acpi_battery_get_state(struct acpi_battery *battery)
+ {
+ 	int result = 0;
+ 	acpi_status status = 0;
+@@ -192,16 +271,19 @@
+ 	struct acpi_buffer format = { sizeof(ACPI_BATTERY_FORMAT_BST),
+ 		ACPI_BATTERY_FORMAT_BST
+ 	};
+-	struct acpi_buffer data = { 0, NULL };
+ 	union acpi_object *package = NULL;
++	struct acpi_buffer *data = NULL;
+ 
++	battery->update_time[ACPI_BATTERY_STATE] = get_seconds();
+ 
+-	if (!battery || !bst)
+-		return -EINVAL;
++	if (!acpi_battery_present(battery))
++		return 0;
+ 
+-	/* Evalute _BST */
++	/* Evaluate _BST */
+ 
+-	status = acpi_evaluate_object(battery->device->handle, "_BST", NULL, &buffer);
++	status =
++	    acpi_evaluate_object(acpi_battery_handle(battery), "_BST", NULL,
++				 &buffer);
+ 	if (ACPI_FAILURE(status)) {
+ 		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _BST"));
+ 		return -ENODEV;
+@@ -209,55 +291,49 @@
+ 
+ 	package = buffer.pointer;
+ 
+-	/* Extract Package Data */
++	data = &battery->bst_data;
+ 
+-	status = acpi_extract_package(package, &format, &data);
+-	if (status != AE_BUFFER_OVERFLOW) {
+-		ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST"));
+-		result = -ENODEV;
+-		goto end;
+-	}
+-
+-	data.pointer = kzalloc(data.length, GFP_KERNEL);
+-	if (!data.pointer) {
+-		result = -ENOMEM;
+-		goto end;
+-	}
++	/* Extract Package Data */
+ 
+-	status = acpi_extract_package(package, &format, &data);
+-	if (ACPI_FAILURE(status)) {
+-		ACPI_EXCEPTION((AE_INFO, status, "Extracting _BST"));
+-		kfree(data.pointer);
+-		result = -ENODEV;
++	result =
++	    acpi_battery_extract_package(battery, package, &format, data,
++					 "_BST");
++	if (result)
+ 		goto end;
+-	}
+ 
+       end:
+ 	kfree(buffer.pointer);
+ 
+-	if (!result)
+-		(*bst) = data.pointer;
+-
+ 	return result;
+ }
+ 
+-static int
+-acpi_battery_set_alarm(struct acpi_battery *battery, unsigned long alarm)
++static int acpi_battery_get_alarm(struct acpi_battery *battery)
++{
++	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
++
++	return 0;
++}
++
++static int acpi_battery_set_alarm(struct acpi_battery *battery,
++				  unsigned long alarm)
+ {
+ 	acpi_status status = 0;
+ 	union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+ 	struct acpi_object_list arg_list = { 1, &arg0 };
+ 
++	battery->update_time[ACPI_BATTERY_ALARM] = get_seconds();
+ 
+-	if (!battery)
+-		return -EINVAL;
++	if (!acpi_battery_present(battery))
++		return -ENODEV;
+ 
+-	if (!battery->flags.alarm)
++	if (!battery->flags.alarm_present)
+ 		return -ENODEV;
+ 
+ 	arg0.integer.value = alarm;
+ 
+-	status = acpi_evaluate_object(battery->device->handle, "_BTP", &arg_list, NULL);
++	status =
++	    acpi_evaluate_object(acpi_battery_handle(battery), "_BTP",
++				 &arg_list, NULL);
+ 	if (ACPI_FAILURE(status))
+ 		return -ENODEV;
+ 
+@@ -268,65 +344,114 @@
+ 	return 0;
+ }
+ 
+-static int acpi_battery_check(struct acpi_battery *battery)
++static int acpi_battery_init_alarm(struct acpi_battery *battery)
+ {
+ 	int result = 0;
+ 	acpi_status status = AE_OK;
+ 	acpi_handle handle = NULL;
+-	struct acpi_device *device = NULL;
+-	struct acpi_battery_info *bif = NULL;
++	struct acpi_battery_info *bif = battery->bif_data.pointer;
++	unsigned long alarm = battery->alarm;
+ 
++	/* See if alarms are supported, and if so, set default */
+ 
+-	if (!battery)
+-		return -EINVAL;
++	status = acpi_get_handle(acpi_battery_handle(battery), "_BTP", &handle);
++	if (ACPI_SUCCESS(status)) {
++		battery->flags.alarm_present = 1;
++		if (!alarm && bif) {
++			alarm = bif->design_capacity_warning;
++		}
++		result = acpi_battery_set_alarm(battery, alarm);
++		if (result)
++			goto end;
++	} else {
++		battery->flags.alarm_present = 0;
++	}
+ 
+-	device = battery->device;
++      end:
+ 
+-	result = acpi_bus_get_status(device);
+-	if (result)
+ 		return result;
++}
+ 
+-	/* Insertion? */
+-
+-	if (!battery->flags.present && device->status.battery_present) {
++static int acpi_battery_init_update(struct acpi_battery *battery)
++{
++	int result = 0;
+ 
+-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery inserted\n"));
++	result = acpi_battery_get_status(battery);
++	if (result)
++		return result;
+ 
+-		/* Evalute _BIF to get certain static information */
++	battery->flags.battery_present_prev = acpi_battery_present(battery);
+ 
+-		result = acpi_battery_get_info(battery, &bif);
++	if (acpi_battery_present(battery)) {
++		result = acpi_battery_get_info(battery);
++		if (result)
++			return result;
++		result = acpi_battery_get_state(battery);
+ 		if (result)
+ 			return result;
+ 
+-		battery->flags.power_unit = bif->power_unit;
+-		battery->trips.warning = bif->design_capacity_warning;
+-		battery->trips.low = bif->design_capacity_low;
+-		kfree(bif);
++		acpi_battery_init_alarm(battery);
++	}
+ 
+-		/* See if alarms are supported, and if so, set default */
++	return result;
++}
+ 
+-		status = acpi_get_handle(battery->device->handle, "_BTP", &handle);
+-		if (ACPI_SUCCESS(status)) {
+-			battery->flags.alarm = 1;
+-			acpi_battery_set_alarm(battery, battery->trips.warning);
++static int acpi_battery_update(struct acpi_battery *battery,
++			       int update, int *update_result_ptr)
++{
++	int result = 0;
++	int update_result = ACPI_BATTERY_NONE_UPDATE;
++
++	if (!acpi_battery_present(battery)) {
++		update = 1;
++	}
++
++	if (battery->flags.init_update) {
++		result = acpi_battery_init_update(battery);
++		if (result)
++			goto end;
++		update_result = ACPI_BATTERY_INIT_UPDATE;
++	} else if (update) {
++		result = acpi_battery_get_status(battery);
++		if (result)
++			goto end;
++		if ((!battery->flags.battery_present_prev & acpi_battery_present(battery))
++		    || (battery->flags.battery_present_prev & !acpi_battery_present(battery))) {
++			result = acpi_battery_init_update(battery);
++			if (result)
++				goto end;
++			update_result = ACPI_BATTERY_INIT_UPDATE;
++		} else {
++			update_result = ACPI_BATTERY_EASY_UPDATE;
+ 		}
+ 	}
+ 
+-	/* Removal? */
++      end:
+ 
+-	else if (battery->flags.present && !device->status.battery_present) {
+-		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Battery removed\n"));
+-	}
++	battery->flags.init_update = (result != 0);
+ 
+-	battery->flags.present = device->status.battery_present;
++	*update_result_ptr = update_result;
+ 
+ 	return result;
+ }
+ 
+-static void acpi_battery_check_present(struct acpi_battery *battery)
++static void acpi_battery_notify_update(struct acpi_battery *battery)
+ {
+-	if (!battery->flags.present) {
+-		acpi_battery_check(battery);
++	acpi_battery_get_status(battery);
++
++	if (battery->flags.init_update) {
++		return;
++	}
++
++	if ((!battery->flags.battery_present_prev &
++	     acpi_battery_present(battery)) ||
++	    (battery->flags.battery_present_prev &
++	     !acpi_battery_present(battery))) {
++		battery->flags.init_update = 1;
++	} else {
++		battery->flags.update[ACPI_BATTERY_INFO] = 1;
++		battery->flags.update[ACPI_BATTERY_STATE] = 1;
++		battery->flags.update[ACPI_BATTERY_ALARM] = 1;
+ 	}
+ }
+ 
+@@ -335,37 +460,33 @@
+    -------------------------------------------------------------------------- */
+ 
+ static struct proc_dir_entry *acpi_battery_dir;
+-static int acpi_battery_read_info(struct seq_file *seq, void *offset)
++
++static int acpi_battery_print_info(struct seq_file *seq, int result)
+ {
+-	int result = 0;
+ 	struct acpi_battery *battery = seq->private;
+ 	struct acpi_battery_info *bif = NULL;
+ 	char *units = "?";
+ 
+-
+-	if (!battery)
++	if (result)
+ 		goto end;
+ 
+-	acpi_battery_check_present(battery);
+-
+-	if (battery->flags.present)
++	if (acpi_battery_present(battery))
+ 		seq_printf(seq, "present:                 yes\n");
+ 	else {
+ 		seq_printf(seq, "present:                 no\n");
+ 		goto end;
+ 	}
+ 
+-	/* Battery Info (_BIF) */
+-
+-	result = acpi_battery_get_info(battery, &bif);
+-	if (result || !bif) {
+-		seq_printf(seq, "ERROR: Unable to read battery information\n");
++	bif = battery->bif_data.pointer;
++	if (!bif) {
++		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BIF buffer is NULL"));
++		result = -ENODEV;
+ 		goto end;
+ 	}
+ 
+-	units =
+-	    bif->
+-	    power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
++	/* Battery Units */
++
++	units = acpi_battery_power_units(battery);
+ 
+ 	if (bif->design_capacity == ACPI_BATTERY_VALUE_UNKNOWN)
+ 		seq_printf(seq, "design capacity:         unknown\n");
+@@ -396,7 +517,6 @@
+ 	else
+ 		seq_printf(seq, "design voltage:          %d mV\n",
+ 			   (u32) bif->design_voltage);
+-
+ 	seq_printf(seq, "design capacity warning: %d %sh\n",
+ 		   (u32) bif->design_capacity_warning, units);
+ 	seq_printf(seq, "design capacity low:     %d %sh\n",
+@@ -411,50 +531,40 @@
+ 	seq_printf(seq, "OEM info:                %s\n", bif->oem_info);
+ 
+       end:
+-	kfree(bif);
+ 
+-	return 0;
+-}
++	if (result)
++		seq_printf(seq, "ERROR: Unable to read battery info\n");
+ 
+-static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
+-{
+-	return single_open(file, acpi_battery_read_info, PDE(inode)->data);
++	return result;
+ }
+ 
+-static int acpi_battery_read_state(struct seq_file *seq, void *offset)
++static int acpi_battery_print_state(struct seq_file *seq, int result)
+ {
+-	int result = 0;
+ 	struct acpi_battery *battery = seq->private;
+-	struct acpi_battery_status *bst = NULL;
++	struct acpi_battery_state *bst = NULL;
+ 	char *units = "?";
+ 
+-
+-	if (!battery)
++	if (result)
+ 		goto end;
+ 
+-	acpi_battery_check_present(battery);
+-
+-	if (battery->flags.present)
++	if (acpi_battery_present(battery))
+ 		seq_printf(seq, "present:                 yes\n");
+ 	else {
+ 		seq_printf(seq, "present:                 no\n");
+ 		goto end;
+ 	}
+ 
+-	/* Battery Units */
+-
+-	units =
+-	    battery->flags.
+-	    power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
+-
+-	/* Battery Status (_BST) */
+-
+-	result = acpi_battery_get_status(battery, &bst);
+-	if (result || !bst) {
+-		seq_printf(seq, "ERROR: Unable to read battery status\n");
++	bst = battery->bst_data.pointer;
++	if (!bst) {
++		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "BST buffer is NULL"));
++		result = -ENODEV;
+ 		goto end;
+ 	}
+ 
++	/* Battery Units */
++
++	units = acpi_battery_power_units(battery);
++
+ 	if (!(bst->state & 0x04))
+ 		seq_printf(seq, "capacity state:          ok\n");
+ 	else
+@@ -490,48 +600,43 @@
+ 			   (u32) bst->present_voltage);
+ 
+       end:
+-	kfree(bst);
+ 
+-	return 0;
+-}
++	if (result) {
++		seq_printf(seq, "ERROR: Unable to read battery state\n");
++	}
+ 
+-static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
+-{
+-	return single_open(file, acpi_battery_read_state, PDE(inode)->data);
++	return result;
+ }
+ 
+-static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
++static int acpi_battery_print_alarm(struct seq_file *seq, int result)
+ {
+ 	struct acpi_battery *battery = seq->private;
+ 	char *units = "?";
+ 
+-
+-	if (!battery)
++	if (result)
+ 		goto end;
+ 
+-	acpi_battery_check_present(battery);
+-
+-	if (!battery->flags.present) {
++	if (!acpi_battery_present(battery)) {
+ 		seq_printf(seq, "present:                 no\n");
+ 		goto end;
+ 	}
+ 
+ 	/* Battery Units */
+ 
+-	units =
+-	    battery->flags.
+-	    power_unit ? ACPI_BATTERY_UNITS_AMPS : ACPI_BATTERY_UNITS_WATTS;
+-
+-	/* Battery Alarm */
++	units = acpi_battery_power_units(battery);
+ 
+ 	seq_printf(seq, "alarm:                   ");
+ 	if (!battery->alarm)
+ 		seq_printf(seq, "unsupported\n");
+ 	else
+-		seq_printf(seq, "%d %sh\n", (u32) battery->alarm, units);
++		seq_printf(seq, "%lu %sh\n", battery->alarm, units);
+ 
+       end:
+-	return 0;
++
++	if (result)
++		seq_printf(seq, "ERROR: Unable to read battery alarm\n");
++
++	return result;
+ }
+ 
+ static ssize_t
+@@ -543,27 +648,113 @@
+ 	char alarm_string[12] = { '\0' };
+ 	struct seq_file *m = file->private_data;
+ 	struct acpi_battery *battery = m->private;
+-
++	int update_result = ACPI_BATTERY_NONE_UPDATE;
+ 
+ 	if (!battery || (count > sizeof(alarm_string) - 1))
+ 		return -EINVAL;
+ 
+-	acpi_battery_check_present(battery);
++	mutex_lock(&battery->mutex);
+ 
+-	if (!battery->flags.present)
+-		return -ENODEV;
++	result = acpi_battery_update(battery, 1, &update_result);
++	if (result) {
++		result = -ENODEV;
++		goto end;
++	}
+ 
+-	if (copy_from_user(alarm_string, buffer, count))
+-		return -EFAULT;
++	if (!acpi_battery_present(battery)) {
++		result = -ENODEV;
++		goto end;
++	}
++
++	if (copy_from_user(alarm_string, buffer, count)) {
++		result = -EFAULT;
++		goto end;
++	}
+ 
+ 	alarm_string[count] = '\0';
+ 
+ 	result = acpi_battery_set_alarm(battery,
+ 					simple_strtoul(alarm_string, NULL, 0));
+ 	if (result)
++		goto end;
++
++      end:
++
++	acpi_battery_check_result(battery, result);
++
++	if (!result)
++		result = count;
++
++	mutex_unlock(&battery->mutex);
++
+ 		return result;
++}
++
++typedef int(*print_func)(struct seq_file *seq, int result);
++typedef int(*get_func)(struct acpi_battery *battery);
++
++static struct acpi_read_mux {
++	print_func print;
++	get_func get;
++} acpi_read_funcs[ACPI_BATTERY_NUMFILES] = {
++	{.get = acpi_battery_get_info, .print = acpi_battery_print_info},
++	{.get = acpi_battery_get_state, .print = acpi_battery_print_state},
++	{.get = acpi_battery_get_alarm, .print = acpi_battery_print_alarm},
++};
++
++static int acpi_battery_read(int fid, struct seq_file *seq)
++{
++	struct acpi_battery *battery = seq->private;
++	int result = 0;
++	int update_result = ACPI_BATTERY_NONE_UPDATE;
++	int update = 0;
++
++	mutex_lock(&battery->mutex);
++
++	update = (get_seconds() - battery->update_time[fid] >= update_time);
++	update = (update | battery->flags.update[fid]);
++
++	result = acpi_battery_update(battery, update, &update_result);
++	if (result)
++		goto end;
++
++	if (update_result == ACPI_BATTERY_EASY_UPDATE) {
++		result = acpi_read_funcs[fid].get(battery);
++		if (result)
++			goto end;
++	}
+ 
+-	return count;
++      end:
++	result = acpi_read_funcs[fid].print(seq, result);
++	acpi_battery_check_result(battery, result);
++	battery->flags.update[fid] = result;
++	mutex_unlock(&battery->mutex);
++	return result;
++}
++
++static int acpi_battery_read_info(struct seq_file *seq, void *offset)
++{
++	return acpi_battery_read(ACPI_BATTERY_INFO, seq);
++}
++
++static int acpi_battery_read_state(struct seq_file *seq, void *offset)
++{
++	return acpi_battery_read(ACPI_BATTERY_STATE, seq);
++}
++
++static int acpi_battery_read_alarm(struct seq_file *seq, void *offset)
++{
++	return acpi_battery_read(ACPI_BATTERY_ALARM, seq);
++}
++
++static int acpi_battery_info_open_fs(struct inode *inode, struct file *file)
++{
++	return single_open(file, acpi_battery_read_info, PDE(inode)->data);
++}
++
++static int acpi_battery_state_open_fs(struct inode *inode, struct file *file)
++{
++	return single_open(file, acpi_battery_read_state, PDE(inode)->data);
+ }
+ 
+ static int acpi_battery_alarm_open_fs(struct inode *inode, struct file *file)
+@@ -571,35 +762,51 @@
+ 	return single_open(file, acpi_battery_read_alarm, PDE(inode)->data);
+ }
+ 
+-static const struct file_operations acpi_battery_info_ops = {
++static struct battery_file {
++	struct file_operations ops;
++	mode_t mode;
++	char *name;
++} acpi_battery_file[] = {
++	{
++	.name = "info",
++	.mode = S_IRUGO,
++	.ops = {
+ 	.open = acpi_battery_info_open_fs,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
+ 	.release = single_release,
+ 	.owner = THIS_MODULE,
+-};
+-
+-static const struct file_operations acpi_battery_state_ops = {
++	},
++	},
++	{
++	.name = "state",
++	.mode = S_IRUGO,
++	.ops = {
+ 	.open = acpi_battery_state_open_fs,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
+ 	.release = single_release,
+ 	.owner = THIS_MODULE,
+-};
+-
+-static const struct file_operations acpi_battery_alarm_ops = {
++	},
++	},
++	{
++	.name = "alarm",
++	.mode = S_IFREG | S_IRUGO | S_IWUSR,
++	.ops = {
+ 	.open = acpi_battery_alarm_open_fs,
+ 	.read = seq_read,
+ 	.write = acpi_battery_write_alarm,
+ 	.llseek = seq_lseek,
+ 	.release = single_release,
+ 	.owner = THIS_MODULE,
++	},
++	},
+ };
+ 
+ static int acpi_battery_add_fs(struct acpi_device *device)
+ {
+ 	struct proc_dir_entry *entry = NULL;
+-
++	int i;
+ 
+ 	if (!acpi_device_dir(device)) {
+ 		acpi_device_dir(device) = proc_mkdir(acpi_device_bid(device),
+@@ -609,38 +816,16 @@
+ 		acpi_device_dir(device)->owner = THIS_MODULE;
+ 	}
+ 
+-	/* 'info' [R] */
+-	entry = create_proc_entry(ACPI_BATTERY_FILE_INFO,
+-				  S_IRUGO, acpi_device_dir(device));
++	for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
++		entry = create_proc_entry(acpi_battery_file[i].name,
++				  acpi_battery_file[i].mode, acpi_device_dir(device));
+ 	if (!entry)
+ 		return -ENODEV;
+ 	else {
+-		entry->proc_fops = &acpi_battery_info_ops;
++			entry->proc_fops = &acpi_battery_file[i].ops;
+ 		entry->data = acpi_driver_data(device);
+ 		entry->owner = THIS_MODULE;
+ 	}
+-
+-	/* 'status' [R] */
+-	entry = create_proc_entry(ACPI_BATTERY_FILE_STATUS,
+-				  S_IRUGO, acpi_device_dir(device));
+-	if (!entry)
+-		return -ENODEV;
+-	else {
+-		entry->proc_fops = &acpi_battery_state_ops;
+-		entry->data = acpi_driver_data(device);
+-		entry->owner = THIS_MODULE;
+-	}
+-
+-	/* 'alarm' [R/W] */
+-	entry = create_proc_entry(ACPI_BATTERY_FILE_ALARM,
+-				  S_IFREG | S_IRUGO | S_IWUSR,
+-				  acpi_device_dir(device));
+-	if (!entry)
+-		return -ENODEV;
+-	else {
+-		entry->proc_fops = &acpi_battery_alarm_ops;
+-		entry->data = acpi_driver_data(device);
+-		entry->owner = THIS_MODULE;
+ 	}
+ 
+ 	return 0;
+@@ -648,15 +833,12 @@
+ 
+ static int acpi_battery_remove_fs(struct acpi_device *device)
+ {
+-
++	int i;
+ 	if (acpi_device_dir(device)) {
+-		remove_proc_entry(ACPI_BATTERY_FILE_ALARM,
++		for (i = 0; i < ACPI_BATTERY_NUMFILES; ++i) {
++			remove_proc_entry(acpi_battery_file[i].name,
+ 				  acpi_device_dir(device));
+-		remove_proc_entry(ACPI_BATTERY_FILE_STATUS,
+-				  acpi_device_dir(device));
+-		remove_proc_entry(ACPI_BATTERY_FILE_INFO,
+-				  acpi_device_dir(device));
+-
++		}
+ 		remove_proc_entry(acpi_device_bid(device), acpi_battery_dir);
+ 		acpi_device_dir(device) = NULL;
+ 	}
+@@ -673,7 +855,6 @@
+ 	struct acpi_battery *battery = data;
+ 	struct acpi_device *device = NULL;
+ 
+-
+ 	if (!battery)
+ 		return;
+ 
+@@ -684,8 +865,10 @@
+ 	case ACPI_BATTERY_NOTIFY_INFO:
+ 	case ACPI_NOTIFY_BUS_CHECK:
+ 	case ACPI_NOTIFY_DEVICE_CHECK:
+-		acpi_battery_check(battery);
+-		acpi_bus_generate_event(device, event, battery->flags.present);
++		device = battery->device;
++		acpi_battery_notify_update(battery);
++		acpi_bus_generate_event(device, event,
++					acpi_battery_present(battery));
+ 		break;
+ 	default:
+ 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+@@ -702,7 +885,6 @@
+ 	acpi_status status = 0;
+ 	struct acpi_battery *battery = NULL;
+ 
+-
+ 	if (!device)
+ 		return -EINVAL;
+ 
+@@ -710,15 +892,21 @@
+ 	if (!battery)
+ 		return -ENOMEM;
+ 
++	mutex_init(&battery->mutex);
++
++	mutex_lock(&battery->mutex);
++
+ 	battery->device = device;
+ 	strcpy(acpi_device_name(device), ACPI_BATTERY_DEVICE_NAME);
+ 	strcpy(acpi_device_class(device), ACPI_BATTERY_CLASS);
+ 	acpi_driver_data(device) = battery;
+ 
+-	result = acpi_battery_check(battery);
++	result = acpi_battery_get_status(battery);
+ 	if (result)
+ 		goto end;
+ 
++	battery->flags.init_update = 1;
++
+ 	result = acpi_battery_add_fs(device);
+ 	if (result)
+ 		goto end;
+@@ -727,6 +915,7 @@
+ 					     ACPI_ALL_NOTIFY,
+ 					     acpi_battery_notify, battery);
+ 	if (ACPI_FAILURE(status)) {
++		ACPI_EXCEPTION((AE_INFO, status, "Installing notify handler"));
+ 		result = -ENODEV;
+ 		goto end;
+ 	}
+@@ -736,11 +925,14 @@
+ 	       device->status.battery_present ? "present" : "absent");
+ 
+       end:
++
+ 	if (result) {
+ 		acpi_battery_remove_fs(device);
+ 		kfree(battery);
+ 	}
+ 
++	mutex_unlock(&battery->mutex);
++
+ 	return result;
+ }
+ 
+@@ -749,18 +941,27 @@
+ 	acpi_status status = 0;
+ 	struct acpi_battery *battery = NULL;
+ 
+-
+ 	if (!device || !acpi_driver_data(device))
+ 		return -EINVAL;
+ 
+ 	battery = acpi_driver_data(device);
+ 
++	mutex_lock(&battery->mutex);
++
+ 	status = acpi_remove_notify_handler(device->handle,
+ 					    ACPI_ALL_NOTIFY,
+ 					    acpi_battery_notify);
+ 
+ 	acpi_battery_remove_fs(device);
+ 
++	kfree(battery->bif_data.pointer);
++
++	kfree(battery->bst_data.pointer);
++
++	mutex_unlock(&battery->mutex);
++
++	mutex_destroy(&battery->mutex);
++
+ 	kfree(battery);
+ 
+ 	return 0;
+@@ -775,7 +976,10 @@
+ 		return -EINVAL;
+ 
+ 	battery = device->driver_data;
+-	return acpi_battery_check(battery);
++
++	battery->flags.init_update = 1;
++
++	return 0;
+ }
+ 
+ static int __init acpi_battery_init(void)
+@@ -800,7 +1004,6 @@
+ 
+ static void __exit acpi_battery_exit(void)
+ {
+-
+ 	acpi_bus_unregister_driver(&acpi_battery_driver);
+ 
+ 	acpi_unlock_battery_dir(acpi_battery_dir);
+diff -Nurb linux-2.6.22-570/drivers/acpi/bay.c linux-2.6.22-try2/drivers/acpi/bay.c
+--- linux-2.6.22-570/drivers/acpi/bay.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/bay.c	2007-12-19 15:29:20.000000000 -0500
+@@ -288,6 +288,11 @@
+ 	new_bay->pdev = pdev;
+ 	platform_set_drvdata(pdev, new_bay);
+ 
++	/*
++	 * we want the bay driver to be able to send uevents
++	 */
++	pdev->dev.uevent_suppress = 0;
++
+ 	if (acpi_bay_add_fs(new_bay)) {
+ 		platform_device_unregister(new_bay->pdev);
+ 		goto bay_add_err;
+@@ -328,18 +333,12 @@
+ {
+ 	struct bay *bay_dev = (struct bay *)data;
+ 	struct device *dev = &bay_dev->pdev->dev;
++	char event_string[12];
++	char *envp[] = { event_string, NULL };
+ 
+ 	bay_dprintk(handle, "Bay event");
+-
+-	switch(event) {
+-	case ACPI_NOTIFY_BUS_CHECK:
+-	case ACPI_NOTIFY_DEVICE_CHECK:
+-	case ACPI_NOTIFY_EJECT_REQUEST:
+-		kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+-		break;
+-	default:
+-		printk(KERN_ERR PREFIX "Bay: unknown event %d\n", event);
+-	}
++	sprintf(event_string, "BAY_EVENT=%d\n", event);
++	kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
+ }
+ 
+ static acpi_status
+diff -Nurb linux-2.6.22-570/drivers/acpi/dock.c linux-2.6.22-try2/drivers/acpi/dock.c
+--- linux-2.6.22-570/drivers/acpi/dock.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/acpi/dock.c	2007-12-19 15:29:20.000000000 -0500
+@@ -40,8 +40,15 @@
+ MODULE_DESCRIPTION(ACPI_DOCK_DRIVER_DESCRIPTION);
+ MODULE_LICENSE("GPL");
+ 
++static int immediate_undock = 1;
++module_param(immediate_undock, bool, 0644);
++MODULE_PARM_DESC(immediate_undock, "1 (default) will cause the driver to "
++	"undock immediately when the undock button is pressed, 0 will cause"
++	" the driver to wait for userspace to write the undock sysfs file "
++	" before undocking");
++
+ static struct atomic_notifier_head dock_notifier_list;
+-static struct platform_device dock_device;
++static struct platform_device *dock_device;
+ static char dock_device_name[] = "dock";
+ 
+ struct dock_station {
+@@ -63,6 +70,7 @@
+ };
+ 
+ #define DOCK_DOCKING	0x00000001
++#define DOCK_UNDOCKING  0x00000002
+ #define DOCK_EVENT	3
+ #define UNDOCK_EVENT	2
+ 
+@@ -327,12 +335,20 @@
+ 
+ static void dock_event(struct dock_station *ds, u32 event, int num)
+ {
+-	struct device *dev = &dock_device.dev;
++	struct device *dev = &dock_device->dev;
++	char event_string[7];
++	char *envp[] = { event_string, NULL };
++
++	if (num == UNDOCK_EVENT)
++		sprintf(event_string, "UNDOCK");
++	else
++		sprintf(event_string, "DOCK");
++
+ 	/*
+ 	 * Indicate that the status of the dock station has
+ 	 * changed.
+ 	 */
+-	kobject_uevent(&dev->kobj, KOBJ_CHANGE);
++	kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
+ }
+ 
+ /**
+@@ -420,6 +436,16 @@
+ 	ds->last_dock_time = jiffies;
+ }
+ 
++static inline void begin_undock(struct dock_station *ds)
++{
++	ds->flags |= DOCK_UNDOCKING;
++}
++
++static inline void complete_undock(struct dock_station *ds)
++{
++	ds->flags &= ~(DOCK_UNDOCKING);
++}
++
+ /**
+  * dock_in_progress - see if we are in the middle of handling a dock event
+  * @ds: the dock station
+@@ -550,7 +576,7 @@
+ 		printk(KERN_ERR PREFIX "Unable to undock!\n");
+ 		return -EBUSY;
+ 	}
+-
++	complete_undock(ds);
+ 	return 0;
+ }
+ 
+@@ -594,7 +620,11 @@
+ 	 * to the driver who wish to hotplug.
+          */
+ 	case ACPI_NOTIFY_EJECT_REQUEST:
++		begin_undock(ds);
++		if (immediate_undock)
+ 		handle_eject_request(ds, event);
++		else
++			dock_event(ds, event, UNDOCK_EVENT);
+ 		break;
+ 	default:
+ 		printk(KERN_ERR PREFIX "Unknown dock event %d\n", event);
+@@ -653,6 +683,17 @@
+ DEVICE_ATTR(docked, S_IRUGO, show_docked, NULL);
+ 
+ /*
++ * show_flags - read method for flags file in sysfs
++ */
++static ssize_t show_flags(struct device *dev,
++			  struct device_attribute *attr, char *buf)
++{
++	return snprintf(buf, PAGE_SIZE, "%d\n", dock_station->flags);
++
++}
++DEVICE_ATTR(flags, S_IRUGO, show_flags, NULL);
++
++/*
+  * write_undock - write method for "undock" file in sysfs
+  */
+ static ssize_t write_undock(struct device *dev, struct device_attribute *attr,
+@@ -675,16 +716,15 @@
+ 			     struct device_attribute *attr, char *buf)
+ {
+ 	unsigned long lbuf;
+-	acpi_status status = acpi_evaluate_integer(dock_station->handle, "_UID", NULL, &lbuf);
+-	if(ACPI_FAILURE(status)) {
++	acpi_status status = acpi_evaluate_integer(dock_station->handle,
++					"_UID", NULL, &lbuf);
++	if (ACPI_FAILURE(status))
+ 	    return 0;
+-	}
++
+ 	return snprintf(buf, PAGE_SIZE, "%lx\n", lbuf);
+ }
+ DEVICE_ATTR(uid, S_IRUGO, show_dock_uid, NULL);
+ 
+-
+-
+ /**
+  * dock_add - add a new dock station
+  * @handle: the dock station handle
+diff -Nurb linux-2.6.22-570/drivers/acpi/ec.c linux-2.6.22-try2/drivers/acpi/ec.c
+--- linux-2.6.22-570/drivers/acpi/ec.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/ec.c	2007-12-19 15:29:20.000000000 -0500
+@@ -34,25 +34,26 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/interrupt.h>
++#include <linux/list.h>
+ #include <asm/io.h>
+ #include <acpi/acpi_bus.h>
+ #include <acpi/acpi_drivers.h>
+ #include <acpi/actypes.h>
+ 
+-#define _COMPONENT		ACPI_EC_COMPONENT
+-ACPI_MODULE_NAME("ec");
+-#define ACPI_EC_COMPONENT		0x00100000
+ #define ACPI_EC_CLASS			"embedded_controller"
+ #define ACPI_EC_HID			"PNP0C09"
+ #define ACPI_EC_DEVICE_NAME		"Embedded Controller"
+ #define ACPI_EC_FILE_INFO		"info"
++
+ #undef PREFIX
+ #define PREFIX				"ACPI: EC: "
++
+ /* EC status register */
+ #define ACPI_EC_FLAG_OBF	0x01	/* Output buffer full */
+ #define ACPI_EC_FLAG_IBF	0x02	/* Input buffer full */
+ #define ACPI_EC_FLAG_BURST	0x10	/* burst mode */
+ #define ACPI_EC_FLAG_SCI	0x20	/* EC-SCI occurred */
++
+ /* EC commands */
+ enum ec_command {
+ 	ACPI_EC_COMMAND_READ = 0x80,
+@@ -61,6 +62,7 @@
+ 	ACPI_EC_BURST_DISABLE = 0x83,
+ 	ACPI_EC_COMMAND_QUERY = 0x84,
+ };
++
+ /* EC events */
+ enum ec_event {
+ 	ACPI_EC_EVENT_OBF_1 = 1,	/* Output buffer full */
+@@ -94,6 +96,16 @@
+ 
+ /* If we find an EC via the ECDT, we need to keep a ptr to its context */
+ /* External interfaces use first EC only, so remember */
++typedef int (*acpi_ec_query_func) (void *data);
++
++struct acpi_ec_query_handler {
++	struct list_head node;
++	acpi_ec_query_func func;
++	acpi_handle handle;
++	void *data;
++	u8 query_bit;
++};
++
+ static struct acpi_ec {
+ 	acpi_handle handle;
+ 	unsigned long gpe;
+@@ -104,6 +116,7 @@
+ 	atomic_t query_pending;
+ 	atomic_t event_count;
+ 	wait_queue_head_t wait;
++	struct list_head list;
+ } *boot_ec, *first_ec;
+ 
+ /* --------------------------------------------------------------------------
+@@ -245,7 +258,7 @@
+ 
+ 	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBF_0, 0, 0);
+ 	if (status) {
+-		printk(KERN_DEBUG PREFIX
++		printk(KERN_ERR PREFIX
+ 		       "input buffer is not empty, aborting transaction\n");
+ 		goto end;
+ 	}
+@@ -394,21 +407,67 @@
+ /* --------------------------------------------------------------------------
+                                 Event Management
+    -------------------------------------------------------------------------- */
++int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit,
++			      acpi_handle handle, acpi_ec_query_func func,
++			      void *data)
++{
++	struct acpi_ec_query_handler *handler =
++	    kzalloc(sizeof(struct acpi_ec_query_handler), GFP_KERNEL);
++	if (!handler)
++		return -ENOMEM;
++
++	handler->query_bit = query_bit;
++	handler->handle = handle;
++	handler->func = func;
++	handler->data = data;
++	mutex_lock(&ec->lock);
++	list_add_tail(&handler->node, &ec->list);
++	mutex_unlock(&ec->lock);
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(acpi_ec_add_query_handler);
++
++void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit)
++{
++	struct acpi_ec_query_handler *handler;
++	mutex_lock(&ec->lock);
++	list_for_each_entry(handler, &ec->list, node) {
++		if (query_bit == handler->query_bit) {
++			list_del(&handler->node);
++			kfree(handler);
++			break;
++		}
++	}
++	mutex_unlock(&ec->lock);
++}
++
++EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler);
+ 
+ static void acpi_ec_gpe_query(void *ec_cxt)
+ {
+ 	struct acpi_ec *ec = ec_cxt;
+ 	u8 value = 0;
+-	char object_name[8];
++	struct acpi_ec_query_handler *handler, copy;
+ 
+ 	if (!ec || acpi_ec_query(ec, &value))
+ 		return;
+-
+-	snprintf(object_name, 8, "_Q%2.2X", value);
+-
+-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Evaluating %s", object_name));
+-
+-	acpi_evaluate_object(ec->handle, object_name, NULL, NULL);
++	mutex_lock(&ec->lock);
++	list_for_each_entry(handler, &ec->list, node) {
++		if (value == handler->query_bit) {
++			/* have custom handler for this bit */
++			memcpy(&copy, handler, sizeof(copy));
++			mutex_unlock(&ec->lock);
++			if (copy.func) {
++				copy.func(copy.data);
++			} else if (copy.handle) {
++				acpi_evaluate_object(copy.handle, NULL, NULL, NULL);
++			}
++			return;
++		}
++	}
++	mutex_unlock(&ec->lock);
++	printk(KERN_ERR PREFIX "Handler for query 0x%x is not found!\n", value);
+ }
+ 
+ static u32 acpi_ec_gpe_handler(void *data)
+@@ -427,8 +486,7 @@
+ 	if ((value & ACPI_EC_FLAG_SCI) && !atomic_read(&ec->query_pending)) {
+ 		atomic_set(&ec->query_pending, 1);
+ 		status =
+-		    acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query,
+-				    ec);
++		    acpi_os_execute(OSL_EC_BURST_HANDLER, acpi_ec_gpe_query, ec);
+ 	}
+ 
+ 	return status == AE_OK ?
+@@ -454,57 +512,35 @@
+ }
+ 
+ static acpi_status
+-acpi_ec_space_handler(u32 function,
+-		      acpi_physical_address address,
+-		      u32 bit_width,
+-		      acpi_integer * value,
++acpi_ec_space_handler(u32 function, acpi_physical_address address,
++		      u32 bits, acpi_integer *value,
+ 		      void *handler_context, void *region_context)
+ {
+-	int result = 0;
+ 	struct acpi_ec *ec = handler_context;
+-	u64 temp = *value;
+-	acpi_integer f_v = 0;
+-	int i = 0;
++	int result = 0, i = 0;
++	u8 temp = 0;
+ 
+ 	if ((address > 0xFF) || !value || !handler_context)
+ 		return AE_BAD_PARAMETER;
+ 
+-	if (bit_width != 8 && acpi_strict) {
++	if (function != ACPI_READ && function != ACPI_WRITE)
+ 		return AE_BAD_PARAMETER;
+-	}
+ 
+-      next_byte:
+-	switch (function) {
+-	case ACPI_READ:
+-		temp = 0;
+-		result = acpi_ec_read(ec, (u8) address, (u8 *) & temp);
+-		break;
+-	case ACPI_WRITE:
+-		result = acpi_ec_write(ec, (u8) address, (u8) temp);
+-		break;
+-	default:
+-		result = -EINVAL;
+-		goto out;
+-		break;
+-	}
+-
+-	bit_width -= 8;
+-	if (bit_width) {
+-		if (function == ACPI_READ)
+-			f_v |= temp << 8 * i;
+-		if (function == ACPI_WRITE)
+-			temp >>= 8;
+-		i++;
+-		address++;
+-		goto next_byte;
+-	}
++	if (bits != 8 && acpi_strict)
++		return AE_BAD_PARAMETER;
+ 
++	while (bits - i > 0) {
+ 	if (function == ACPI_READ) {
+-		f_v |= temp << 8 * i;
+-		*value = f_v;
++			result = acpi_ec_read(ec, address, &temp);
++			(*value) |= ((acpi_integer)temp) << i;
++		} else {
++			temp = 0xff & ((*value) >> i);
++			result = acpi_ec_write(ec, address, temp);
++		}
++		i += 8;
++		++address;
+ 	}
+ 
+-      out:
+ 	switch (result) {
+ 	case -EINVAL:
+ 		return AE_BAD_PARAMETER;
+@@ -597,9 +633,6 @@
+ static acpi_status
+ ec_parse_io_ports(struct acpi_resource *resource, void *context);
+ 
+-static acpi_status
+-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval);
+-
+ static struct acpi_ec *make_acpi_ec(void)
+ {
+ 	struct acpi_ec *ec = kzalloc(sizeof(struct acpi_ec), GFP_KERNEL);
+@@ -610,13 +643,52 @@
+ 	atomic_set(&ec->event_count, 1);
+ 	mutex_init(&ec->lock);
+ 	init_waitqueue_head(&ec->wait);
++	INIT_LIST_HEAD(&ec->list);
+ 
+ 	return ec;
+ }
+ 
++static acpi_status
++acpi_ec_register_query_methods(acpi_handle handle, u32 level,
++			       void *context, void **return_value)
++{
++	struct acpi_namespace_node *node = handle;
++	struct acpi_ec *ec = context;
++	int value = 0;
++	if (sscanf(node->name.ascii, "_Q%x", &value) == 1) {
++		acpi_ec_add_query_handler(ec, value, handle, NULL, NULL);
++	}
++	return AE_OK;
++}
++
++static int ec_parse_device(struct acpi_ec *ec, acpi_handle handle)
++{
++	if (ACPI_FAILURE(acpi_walk_resources(handle, METHOD_NAME__CRS,
++				     ec_parse_io_ports, ec)))
++		return -EINVAL;
++
++	/* Get GPE bit assignment (EC events). */
++	/* TODO: Add support for _GPE returning a package */
++	if (ACPI_FAILURE(acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe)))
++		return -EINVAL;
++
++	/* Use the global lock for all EC transactions? */
++	acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
++
++	/* Find and register all query methods */
++	acpi_walk_namespace(ACPI_TYPE_METHOD, handle, 1,
++			    acpi_ec_register_query_methods, ec, NULL);
++
++	ec->handle = handle;
++
++	printk(KERN_INFO PREFIX "GPE = 0x%lx, I/O: command/status = 0x%lx, data = 0x%lx",
++			  ec->gpe, ec->command_addr, ec->data_addr);
++
++	return 0;
++}
++
+ static int acpi_ec_add(struct acpi_device *device)
+ {
+-	acpi_status status = AE_OK;
+ 	struct acpi_ec *ec = NULL;
+ 
+ 	if (!device)
+@@ -629,8 +701,7 @@
+ 	if (!ec)
+ 		return -ENOMEM;
+ 
+-	status = ec_parse_device(device->handle, 0, ec, NULL);
+-	if (status != AE_CTRL_TERMINATE) {
++	if (ec_parse_device(ec, device->handle)) {
+ 		kfree(ec);
+ 		return -EINVAL;
+ 	}
+@@ -641,6 +712,8 @@
+ 			/* We might have incorrect info for GL at boot time */
+ 			mutex_lock(&boot_ec->lock);
+ 			boot_ec->global_lock = ec->global_lock;
++			/* Copy handlers from new ec into boot ec */
++			list_splice(&ec->list, &boot_ec->list);
+ 			mutex_unlock(&boot_ec->lock);
+ 			kfree(ec);
+ 			ec = boot_ec;
+@@ -651,22 +724,24 @@
+ 	acpi_driver_data(device) = ec;
+ 
+ 	acpi_ec_add_fs(device);
+-
+-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "%s [%s] (gpe %d) interrupt mode.",
+-			  acpi_device_name(device), acpi_device_bid(device),
+-			  (u32) ec->gpe));
+-
+ 	return 0;
+ }
+ 
+ static int acpi_ec_remove(struct acpi_device *device, int type)
+ {
+ 	struct acpi_ec *ec;
++	struct acpi_ec_query_handler *handler;
+ 
+ 	if (!device)
+ 		return -EINVAL;
+ 
+ 	ec = acpi_driver_data(device);
++	mutex_lock(&ec->lock);
++	list_for_each_entry(handler, &ec->list, node) {
++		list_del(&handler->node);
++		kfree(handler);
++	}
++	mutex_unlock(&ec->lock);
+ 	acpi_ec_remove_fs(device);
+ 	acpi_driver_data(device) = NULL;
+ 	if (ec == first_ec)
+@@ -722,15 +797,13 @@
+ 		return -ENODEV;
+ 	}
+ 
+-	/* EC is fully operational, allow queries */
+-	atomic_set(&ec->query_pending, 0);
+-
+ 	return 0;
+ }
+ 
+ static int acpi_ec_start(struct acpi_device *device)
+ {
+ 	struct acpi_ec *ec;
++	int ret = 0;
+ 
+ 	if (!device)
+ 		return -EINVAL;
+@@ -740,14 +813,14 @@
+ 	if (!ec)
+ 		return -EINVAL;
+ 
+-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "gpe=0x%02lx, ports=0x%2lx,0x%2lx",
+-			  ec->gpe, ec->command_addr, ec->data_addr));
+-
+ 	/* Boot EC is already working */
+-	if (ec == boot_ec)
+-		return 0;
++	if (ec != boot_ec)
++		ret = ec_install_handlers(ec);
+ 
+-	return ec_install_handlers(ec);
++	/* EC is fully operational, allow queries */
++	atomic_set(&ec->query_pending, 0);
++
++	return ret;
+ }
+ 
+ static int acpi_ec_stop(struct acpi_device *device, int type)
+@@ -779,34 +852,6 @@
+ 	return 0;
+ }
+ 
+-static acpi_status
+-ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval)
+-{
+-	acpi_status status;
+-
+-	struct acpi_ec *ec = context;
+-	status = acpi_walk_resources(handle, METHOD_NAME__CRS,
+-				     ec_parse_io_ports, ec);
+-	if (ACPI_FAILURE(status))
+-		return status;
+-
+-	/* Get GPE bit assignment (EC events). */
+-	/* TODO: Add support for _GPE returning a package */
+-	status = acpi_evaluate_integer(handle, "_GPE", NULL, &ec->gpe);
+-	if (ACPI_FAILURE(status))
+-		return status;
+-
+-	/* Use the global lock for all EC transactions? */
+-	acpi_evaluate_integer(handle, "_GLK", NULL, &ec->global_lock);
+-
+-	ec->handle = handle;
+-
+-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "GPE=0x%02lx, ports=0x%2lx, 0x%2lx",
+-			  ec->gpe, ec->command_addr, ec->data_addr));
+-
+-	return AE_CTRL_TERMINATE;
+-}
+-
+ int __init acpi_ec_ecdt_probe(void)
+ {
+ 	int ret;
+@@ -825,7 +870,7 @@
+ 	if (ACPI_FAILURE(status))
+ 		goto error;
+ 
+-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found ECDT"));
++	printk(KERN_INFO PREFIX "EC description table is found, configuring boot EC\n");
+ 
+ 	boot_ec->command_addr = ecdt_ptr->control.address;
+ 	boot_ec->data_addr = ecdt_ptr->data.address;
+diff -Nurb linux-2.6.22-570/drivers/acpi/osl.c linux-2.6.22-try2/drivers/acpi/osl.c
+--- linux-2.6.22-570/drivers/acpi/osl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/osl.c	2007-12-19 15:29:21.000000000 -0500
+@@ -77,13 +77,7 @@
+ #define	OSI_STRING_LENGTH_MAX 64	/* arbitrary */
+ static char osi_additional_string[OSI_STRING_LENGTH_MAX];
+ 
+-#define OSI_LINUX_ENABLED
+-#ifdef	OSI_LINUX_ENABLED
+-int osi_linux = 1;	/* enable _OSI(Linux) by default */
+-#else
+ int osi_linux;		/* disable _OSI(Linux) by default */
+-#endif
+-
+ 
+ #ifdef CONFIG_DMI
+ static struct __initdata dmi_system_id acpi_osl_dmi_table[];
+@@ -1056,6 +1050,17 @@
+ 
+ EXPORT_SYMBOL(max_cstate);
+ 
++void (*acpi_do_set_cstate_limit)(void);
++EXPORT_SYMBOL(acpi_do_set_cstate_limit);
++
++void acpi_set_cstate_limit(unsigned int new_limit)
++{
++	max_cstate = new_limit;
++	if (acpi_do_set_cstate_limit)
++		acpi_do_set_cstate_limit();
++}
++EXPORT_SYMBOL(acpi_set_cstate_limit);
++
+ /*
+  * Acquire a spinlock.
+  *
+@@ -1183,17 +1188,10 @@
+ 	if (!strcmp("Linux", interface)) {
+ 		printk(KERN_WARNING PREFIX
+ 			"System BIOS is requesting _OSI(Linux)\n");
+-#ifdef	OSI_LINUX_ENABLED
+-		printk(KERN_WARNING PREFIX
+-			"Please test with \"acpi_osi=!Linux\"\n"
+-			"Please send dmidecode "
+-			"to linux-acpi@vger.kernel.org\n");
+-#else
+ 		printk(KERN_WARNING PREFIX
+ 			"If \"acpi_osi=Linux\" works better,\n"
+ 			"Please send dmidecode "
+ 			"to linux-acpi@vger.kernel.org\n");
+-#endif
+ 		if(osi_linux)
+ 			return AE_OK;
+ 	}
+@@ -1227,36 +1225,14 @@
+ }
+ 
+ #ifdef CONFIG_DMI
+-#ifdef	OSI_LINUX_ENABLED
+-static int dmi_osi_not_linux(struct dmi_system_id *d)
+-{
+-	printk(KERN_NOTICE "%s detected: requires not _OSI(Linux)\n", d->ident);
+-	enable_osi_linux(0);
+-	return 0;
+-}
+-#else
+ static int dmi_osi_linux(struct dmi_system_id *d)
+ {
+-	printk(KERN_NOTICE "%s detected: requires _OSI(Linux)\n", d->ident);
++	printk(KERN_NOTICE "%s detected: enabling _OSI(Linux)\n", d->ident);
+ 	enable_osi_linux(1);
+ 	return 0;
+ }
+-#endif
+ 
+ static struct dmi_system_id acpi_osl_dmi_table[] __initdata = {
+-#ifdef	OSI_LINUX_ENABLED
+-	/*
+-	 * Boxes that need NOT _OSI(Linux)
+-	 */
+-	{
+-	 .callback = dmi_osi_not_linux,
+-	 .ident = "Toshiba Satellite P100",
+-	 .matches = {
+-		     DMI_MATCH(DMI_BOARD_VENDOR, "TOSHIBA"),
+-		     DMI_MATCH(DMI_BOARD_NAME, "Satellite P100"),
+-		     },
+-	 },
+-#else
+ 	/*
+ 	 * Boxes that need _OSI(Linux)
+ 	 */
+@@ -1268,7 +1244,6 @@
+ 		     DMI_MATCH(DMI_BOARD_NAME, "MPAD-MSAE Customer Reference Boards"),
+ 		     },
+ 	 },
+-#endif
+ 	{}
+ };
+ #endif /* CONFIG_DMI */
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_core.c linux-2.6.22-try2/drivers/acpi/processor_core.c
+--- linux-2.6.22-570/drivers/acpi/processor_core.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/acpi/processor_core.c	2007-12-19 15:29:21.000000000 -0500
+@@ -44,6 +44,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/dmi.h>
+ #include <linux/moduleparam.h>
++#include <linux/cpuidle.h>
+ 
+ #include <asm/io.h>
+ #include <asm/system.h>
+@@ -66,6 +67,7 @@
+ #define ACPI_PROCESSOR_FILE_LIMIT	"limit"
+ #define ACPI_PROCESSOR_NOTIFY_PERFORMANCE 0x80
+ #define ACPI_PROCESSOR_NOTIFY_POWER	0x81
++#define ACPI_PROCESSOR_NOTIFY_THROTTLING	0x82
+ 
+ #define ACPI_PROCESSOR_LIMIT_USER	0
+ #define ACPI_PROCESSOR_LIMIT_THERMAL	1
+@@ -84,6 +86,8 @@
+ static void acpi_processor_notify(acpi_handle handle, u32 event, void *data);
+ static acpi_status acpi_processor_hotadd_init(acpi_handle handle, int *p_cpu);
+ static int acpi_processor_handle_eject(struct acpi_processor *pr);
++extern int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
++
+ 
+ static struct acpi_driver acpi_processor_driver = {
+ 	.name = "processor",
+@@ -701,6 +705,9 @@
+ 		acpi_processor_cst_has_changed(pr);
+ 		acpi_bus_generate_event(device, event, 0);
+ 		break;
++	case ACPI_PROCESSOR_NOTIFY_THROTTLING:
++		acpi_processor_tstate_has_changed(pr);
++		acpi_bus_generate_event(device, event, 0);
+ 	default:
+ 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ 				  "Unsupported event [0x%x]\n", event));
+@@ -1024,11 +1031,15 @@
+ 
+ 	acpi_processor_ppc_init();
+ 
++	cpuidle_register_driver(&acpi_idle_driver);
++	acpi_do_set_cstate_limit = acpi_max_cstate_changed;
+ 	return 0;
+ }
+ 
+ static void __exit acpi_processor_exit(void)
+ {
++	acpi_do_set_cstate_limit = NULL;
++	cpuidle_unregister_driver(&acpi_idle_driver);
+ 
+ 	acpi_processor_ppc_exit();
+ 
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_idle.c linux-2.6.22-try2/drivers/acpi/processor_idle.c
+--- linux-2.6.22-570/drivers/acpi/processor_idle.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/acpi/processor_idle.c	2007-12-19 15:29:21.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/sched.h>	/* need_resched() */
+ #include <linux/latency.h>
+ #include <linux/clockchips.h>
++#include <linux/cpuidle.h>
+ 
+ /*
+  * Include the apic definitions for x86 to have the APIC timer related defines
+@@ -62,25 +63,34 @@
+ #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
+ ACPI_MODULE_NAME("processor_idle");
+ #define ACPI_PROCESSOR_FILE_POWER	"power"
+-#define US_TO_PM_TIMER_TICKS(t)		((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
+-#define C2_OVERHEAD			4	/* 1us (3.579 ticks per us) */
+-#define C3_OVERHEAD			4	/* 1us (3.579 ticks per us) */
+-static void (*pm_idle_save) (void) __read_mostly;
+-module_param(max_cstate, uint, 0644);
++#define PM_TIMER_TICKS_TO_US(p)		(((p) * 1000)/(PM_TIMER_FREQUENCY/1000))
++#define C2_OVERHEAD			1	/* 1us */
++#define C3_OVERHEAD			1	/* 1us */
++
++void acpi_max_cstate_changed(void)
++{
++	/* Driver will reset devices' max cstate limit */
++	cpuidle_force_redetect_devices(&acpi_idle_driver);
++}
++
++static int change_max_cstate(const char *val, struct kernel_param *kp)
++{
++	int max;
++
++	max = simple_strtol(val, NULL, 0);
++	if (!max)
++		return -EINVAL;
++	max_cstate = max;
++	if (acpi_do_set_cstate_limit)
++		acpi_do_set_cstate_limit();
++	return 0;
++}
++
++module_param_call(max_cstate, change_max_cstate, param_get_uint, &max_cstate, 0644);
+ 
+ static unsigned int nocst __read_mostly;
+ module_param(nocst, uint, 0000);
+ 
+-/*
+- * bm_history -- bit-mask with a bit per jiffy of bus-master activity
+- * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
+- * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
+- * 100 HZ: 0x0000000F: 4 jiffies = 40ms
+- * reduce history for more aggressive entry into C3
+- */
+-static unsigned int bm_history __read_mostly =
+-    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
+-module_param(bm_history, uint, 0644);
+ /* --------------------------------------------------------------------------
+                                 Power Management
+    -------------------------------------------------------------------------- */
+@@ -166,88 +176,6 @@
+ 	{},
+ };
+ 
+-static inline u32 ticks_elapsed(u32 t1, u32 t2)
+-{
+-	if (t2 >= t1)
+-		return (t2 - t1);
+-	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
+-		return (((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
+-	else
+-		return ((0xFFFFFFFF - t1) + t2);
+-}
+-
+-static void
+-acpi_processor_power_activate(struct acpi_processor *pr,
+-			      struct acpi_processor_cx *new)
+-{
+-	struct acpi_processor_cx *old;
+-
+-	if (!pr || !new)
+-		return;
+-
+-	old = pr->power.state;
+-
+-	if (old)
+-		old->promotion.count = 0;
+-	new->demotion.count = 0;
+-
+-	/* Cleanup from old state. */
+-	if (old) {
+-		switch (old->type) {
+-		case ACPI_STATE_C3:
+-			/* Disable bus master reload */
+-			if (new->type != ACPI_STATE_C3 && pr->flags.bm_check)
+-				acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
+-			break;
+-		}
+-	}
+-
+-	/* Prepare to use new state. */
+-	switch (new->type) {
+-	case ACPI_STATE_C3:
+-		/* Enable bus master reload */
+-		if (old->type != ACPI_STATE_C3 && pr->flags.bm_check)
+-			acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
+-		break;
+-	}
+-
+-	pr->power.state = new;
+-
+-	return;
+-}
+-
+-static void acpi_safe_halt(void)
+-{
+-	current_thread_info()->status &= ~TS_POLLING;
+-	/*
+-	 * TS_POLLING-cleared state must be visible before we
+-	 * test NEED_RESCHED:
+-	 */
+-	smp_mb();
+-	if (!need_resched())
+-		safe_halt();
+-	current_thread_info()->status |= TS_POLLING;
+-}
+-
+-static atomic_t c3_cpu_count;
+-
+-/* Common C-state entry for C2, C3, .. */
+-static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
+-{
+-	if (cstate->space_id == ACPI_CSTATE_FFH) {
+-		/* Call into architectural FFH based C-state */
+-		acpi_processor_ffh_cstate_enter(cstate);
+-	} else {
+-		int unused;
+-		/* IO port based C-state */
+-		inb(cstate->address);
+-		/* Dummy wait op - must do something useless after P_LVL2 read
+-		   because chipsets cannot guarantee that STPCLK# signal
+-		   gets asserted in time to freeze execution properly. */
+-		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+-	}
+-}
+-
+ #ifdef ARCH_APICTIMER_STOPS_ON_C3
+ 
+ /*
+@@ -341,6 +269,7 @@
+ 	return 0;
+ }
+ 
++<<<<<<< HEAD/drivers/acpi/processor_idle.c
+ static void acpi_processor_idle(void)
+ {
+ 	struct acpi_processor *pr = NULL;
+@@ -712,6 +641,8 @@
+ 	return 0;
+ }
+ 
++=======
++>>>>>>> /drivers/acpi/processor_idle.c
+ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
+ {
+ 
+@@ -929,7 +860,7 @@
+ 	 * Normalize the C2 latency to expidite policy
+ 	 */
+ 	cx->valid = 1;
+-	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
++	cx->latency_ticks = cx->latency;
+ 
+ 	return;
+ }
+@@ -1003,7 +934,7 @@
+ 	 * use this in our C3 policy
+ 	 */
+ 	cx->valid = 1;
+-	cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
++	cx->latency_ticks = cx->latency;
+ 
+ 	return;
+ }
+@@ -1069,18 +1000,6 @@
+ 	pr->power.count = acpi_processor_power_verify(pr);
+ 
+ 	/*
+-	 * Set Default Policy
+-	 * ------------------
+-	 * Now that we know which states are supported, set the default
+-	 * policy.  Note that this policy can be changed dynamically
+-	 * (e.g. encourage deeper sleeps to conserve battery life when
+-	 * not on AC).
+-	 */
+-	result = acpi_processor_set_power_policy(pr);
+-	if (result)
+-		return result;
+-
+-	/*
+ 	 * if one state of type C2 or C3 is available, mark this
+ 	 * CPU as being "idle manageable"
+ 	 */
+@@ -1097,9 +1016,6 @@
+ 
+ int acpi_processor_cst_has_changed(struct acpi_processor *pr)
+ {
+-	int result = 0;
+-
+-
+ 	if (!pr)
+ 		return -EINVAL;
+ 
+@@ -1110,16 +1026,9 @@
+ 	if (!pr->flags.power_setup_done)
+ 		return -ENODEV;
+ 
+-	/* Fall back to the default idle loop */
+-	pm_idle = pm_idle_save;
+-	synchronize_sched();	/* Relies on interrupts forcing exit from idle. */
+-
+-	pr->flags.power = 0;
+-	result = acpi_processor_get_power_info(pr);
+-	if ((pr->flags.power == 1) && (pr->flags.power_setup_done))
+-		pm_idle = acpi_processor_idle;
+-
+-	return result;
++	acpi_processor_get_power_info(pr);
++	return cpuidle_force_redetect(per_cpu(cpuidle_devices, pr->id),
++		&acpi_idle_driver);
+ }
+ 
+ /* proc interface */
+@@ -1205,30 +1114,6 @@
+ 	.release = single_release,
+ };
+ 
+-#ifdef CONFIG_SMP
+-static void smp_callback(void *v)
+-{
+-	/* we already woke the CPU up, nothing more to do */
+-}
+-
+-/*
+- * This function gets called when a part of the kernel has a new latency
+- * requirement.  This means we need to get all processors out of their C-state,
+- * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+- * wakes them all right up.
+- */
+-static int acpi_processor_latency_notify(struct notifier_block *b,
+-		unsigned long l, void *v)
+-{
+-	smp_call_function(smp_callback, NULL, 0, 1);
+-	return NOTIFY_OK;
+-}
+-
+-static struct notifier_block acpi_processor_latency_notifier = {
+-	.notifier_call = acpi_processor_latency_notify,
+-};
+-#endif
+-
+ int __cpuinit acpi_processor_power_init(struct acpi_processor *pr,
+ 			      struct acpi_device *device)
+ {
+@@ -1245,9 +1130,6 @@
+ 			       "ACPI: processor limited to max C-state %d\n",
+ 			       max_cstate);
+ 		first_run++;
+-#ifdef CONFIG_SMP
+-		register_latency_notifier(&acpi_processor_latency_notifier);
+-#endif
+ 	}
+ 
+ 	if (!pr)
+@@ -1264,6 +1146,7 @@
+ 
+ 	acpi_processor_get_power_info(pr);
+ 
++
+ 	/*
+ 	 * Install the idle handler if processor power management is supported.
+ 	 * Note that we use previously set idle handler will be used on
+@@ -1276,11 +1159,6 @@
+ 				printk(" C%d[C%d]", i,
+ 				       pr->power.states[i].type);
+ 		printk(")\n");
+-
+-		if (pr->id == 0) {
+-			pm_idle_save = pm_idle;
+-			pm_idle = acpi_processor_idle;
+-		}
+ 	}
+ 
+ 	/* 'power' [R] */
+@@ -1308,21 +1186,332 @@
+ 	if (acpi_device_dir(device))
+ 		remove_proc_entry(ACPI_PROCESSOR_FILE_POWER,
+ 				  acpi_device_dir(device));
++	return 0;
++}
++
++/**
++ * ticks_elapsed - a helper function that determines how many ticks (in US)
++ *		   have elapsed between two PM Timer timestamps
++ * @t1: the start time
++ * @t2: the end time
++ */
++static inline u32 ticks_elapsed(u32 t1, u32 t2)
++{
++	if (t2 >= t1)
++		return PM_TIMER_TICKS_TO_US(t2 - t1);
++	else if (!(acpi_gbl_FADT.flags & ACPI_FADT_32BIT_TIMER))
++		return PM_TIMER_TICKS_TO_US(((0x00FFFFFF - t1) + t2) & 0x00FFFFFF);
++	else
++		return PM_TIMER_TICKS_TO_US((0xFFFFFFFF - t1) + t2);
++}
+ 
+-	/* Unregister the idle handler when processor #0 is removed. */
+-	if (pr->id == 0) {
+-		pm_idle = pm_idle_save;
++/**
++ * acpi_idle_update_bm_rld - updates the BM_RLD bit depending on target state
++ * @pr: the processor
++ * @target: the new target state
++ */
++static inline void acpi_idle_update_bm_rld(struct acpi_processor *pr,
++					   struct acpi_processor_cx *target)
++{
++	if (pr->flags.bm_rld_set && target->type != ACPI_STATE_C3) {
++		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
++		pr->flags.bm_rld_set = 0;
++	}
+ 
++	if (!pr->flags.bm_rld_set && target->type == ACPI_STATE_C3) {
++		acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
++		pr->flags.bm_rld_set = 1;
++	}
++}
++
++/**
++ * acpi_idle_do_entry - a helper function that does C2 and C3 type entry
++ * @cx: cstate data
++ */
++static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
++{
++	if (cx->space_id == ACPI_CSTATE_FFH) {
++		/* Call into architectural FFH based C-state */
++		acpi_processor_ffh_cstate_enter(cx);
++	} else {
++		int unused;
++		/* IO port based C-state */
++		inb(cx->address);
++		/* Dummy wait op - must do something useless after P_LVL2 read
++		   because chipsets cannot guarantee that STPCLK# signal
++		   gets asserted in time to freeze execution properly. */
++		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
++	}
++}
++
++/**
++ * acpi_idle_enter_c1 - enters an ACPI C1 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ *
++ * This is equivalent to the HALT instruction.
++ */
++static int acpi_idle_enter_c1(struct cpuidle_device *dev,
++			      struct cpuidle_state *state)
++{
++	struct acpi_processor *pr;
++	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++	pr = processors[smp_processor_id()];
++
++	if (unlikely(!pr))
++		return 0;
++
++	if (pr->flags.bm_check)
++		acpi_idle_update_bm_rld(pr, cx);
++
++	current_thread_info()->status &= ~TS_POLLING;
+ 		/*
+-		 * We are about to unload the current idle thread pm callback
+-		 * (pm_idle), Wait for all processors to update cached/local
+-		 * copies of pm_idle before proceeding.
+-		 */
+-		cpu_idle_wait();
+-#ifdef CONFIG_SMP
+-		unregister_latency_notifier(&acpi_processor_latency_notifier);
++	 * TS_POLLING-cleared state must be visible before we test
++	 * NEED_RESCHED:
++	 */
++	smp_mb();
++	if (!need_resched())
++		safe_halt();
++	current_thread_info()->status |= TS_POLLING;
++
++	cx->usage++;
++
++	return 0;
++}
++
++/**
++ * acpi_idle_enter_c2 - enters an ACPI C2 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ */
++static int acpi_idle_enter_c2(struct cpuidle_device *dev,
++			      struct cpuidle_state *state)
++{
++	struct acpi_processor *pr;
++	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++	u32 t1, t2;
++	pr = processors[smp_processor_id()];
++
++	if (unlikely(!pr))
++		return 0;
++
++	if (pr->flags.bm_check)
++		acpi_idle_update_bm_rld(pr, cx);
++
++	local_irq_disable();
++	current_thread_info()->status &= ~TS_POLLING;
++	/*
++	 * TS_POLLING-cleared state must be visible before we test
++	 * NEED_RESCHED:
++	 */
++	smp_mb();
++
++	if (unlikely(need_resched())) {
++		current_thread_info()->status |= TS_POLLING;
++		local_irq_enable();
++		return 0;
++	}
++
++	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++	acpi_state_timer_broadcast(pr, cx, 1);
++	acpi_idle_do_entry(cx);
++	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++
++#ifdef CONFIG_GENERIC_TIME
++	/* TSC halts in C2, so notify users */
++	mark_tsc_unstable("possible TSC halt in C2");
+ #endif
++
++	local_irq_enable();
++	current_thread_info()->status |= TS_POLLING;
++
++	cx->usage++;
++
++	acpi_state_timer_broadcast(pr, cx, 0);
++	return ticks_elapsed(t1, t2);
++}
++
++static int c3_cpu_count;
++static DEFINE_SPINLOCK(c3_lock);
++
++/**
++ * acpi_idle_enter_c3 - enters an ACPI C3 state-type
++ * @dev: the target CPU
++ * @state: the state data
++ *
++ * Similar to C2 entry, except special bus master handling is needed.
++ */
++static int acpi_idle_enter_c3(struct cpuidle_device *dev,
++			      struct cpuidle_state *state)
++{
++	struct acpi_processor *pr;
++	struct acpi_processor_cx *cx = cpuidle_get_statedata(state);
++	u32 t1, t2;
++	pr = processors[smp_processor_id()];
++
++	if (unlikely(!pr))
++		return 0;
++
++	if (pr->flags.bm_check)
++		acpi_idle_update_bm_rld(pr, cx);
++
++	local_irq_disable();
++	current_thread_info()->status &= ~TS_POLLING;
++	/*
++	 * TS_POLLING-cleared state must be visible before we test
++	 * NEED_RESCHED:
++	 */
++	smp_mb();
++
++	if (unlikely(need_resched())) {
++		current_thread_info()->status |= TS_POLLING;
++		local_irq_enable();
++		return 0;
++	}
++
++	/* disable bus master */
++	if (pr->flags.bm_check) {
++		spin_lock(&c3_lock);
++			c3_cpu_count++;
++		if (c3_cpu_count == num_online_cpus()) {
++			/*
++			 * All CPUs are trying to go to C3
++			 * Disable bus master arbitration
++			 */
++			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1);
++		}
++		spin_unlock(&c3_lock);
++	} else {
++		/* SMP with no shared cache... Invalidate cache  */
++		ACPI_FLUSH_CPU_CACHE();
++	}
++
++	/* Get start time (ticks) */
++	t1 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++	acpi_state_timer_broadcast(pr, cx, 1);
++	acpi_idle_do_entry(cx);
++	t2 = inl(acpi_gbl_FADT.xpm_timer_block.address);
++
++	if (pr->flags.bm_check) {
++		spin_lock(&c3_lock);
++		/* Enable bus master arbitration */
++		if (c3_cpu_count == num_online_cpus())
++			acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0);
++		c3_cpu_count--;
++		spin_unlock(&c3_lock);
+ 	}
+ 
++#ifdef CONFIG_GENERIC_TIME
++	/* TSC halts in C3, so notify users */
++	mark_tsc_unstable("TSC halts in C3");
++#endif
++
++	local_irq_enable();
++	current_thread_info()->status |= TS_POLLING;
++
++	cx->usage++;
++
++	acpi_state_timer_broadcast(pr, cx, 0);
++	return ticks_elapsed(t1, t2);
++}
++
++/**
++ * acpi_idle_bm_check - checks if bus master activity was detected
++ */
++static int acpi_idle_bm_check(void)
++{
++	u32 bm_status = 0;
++
++	acpi_get_register(ACPI_BITREG_BUS_MASTER_STATUS, &bm_status);
++	if (bm_status)
++		acpi_set_register(ACPI_BITREG_BUS_MASTER_STATUS, 1);
++	/*
++	 * PIIX4 Erratum #18: Note that BM_STS doesn't always reflect
++	 * the true state of bus mastering activity; forcing us to
++	 * manually check the BMIDEA bit of each IDE channel.
++	 */
++	else if (errata.piix4.bmisx) {
++		if ((inb_p(errata.piix4.bmisx + 0x02) & 0x01)
++		    || (inb_p(errata.piix4.bmisx + 0x0A) & 0x01))
++			bm_status = 1;
++	}
++	return bm_status;
++}
++
++/**
++ * acpi_idle_init - attaches the driver to a CPU
++ * @dev: the CPU
++ */
++static int acpi_idle_init(struct cpuidle_device *dev)
++{
++	int cpu = dev->cpu;
++	int i, count = 0;
++	struct acpi_processor_cx *cx;
++	struct cpuidle_state *state;
++
++	struct acpi_processor *pr = processors[cpu];
++
++	if (!pr->flags.power_setup_done)
++		return -EINVAL;
++
++	if (pr->flags.power == 0) {
++		return -EINVAL;
++	}
++
++	for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
++		cx = &pr->power.states[i];
++		state = &dev->states[count];
++
++		if (!cx->valid)
++			continue;
++
++#ifdef CONFIG_HOTPLUG_CPU
++		if ((cx->type != ACPI_STATE_C1) && (num_online_cpus() > 1) &&
++		    !pr->flags.has_cst &&
++		    !(acpi_gbl_FADT.flags & ACPI_FADT_C2_MP_SUPPORTED))
++			continue;
++#endif
++		cpuidle_set_statedata(state, cx);
++
++		state->exit_latency = cx->latency;
++		state->target_residency = cx->latency * 6;
++		state->power_usage = cx->power;
++
++		state->flags = 0;
++		switch (cx->type) {
++			case ACPI_STATE_C1:
++			state->flags |= CPUIDLE_FLAG_SHALLOW;
++			state->enter = acpi_idle_enter_c1;
++			break;
++
++			case ACPI_STATE_C2:
++			state->flags |= CPUIDLE_FLAG_BALANCED;
++			state->flags |= CPUIDLE_FLAG_TIME_VALID;
++			state->enter = acpi_idle_enter_c2;
++			break;
++
++			case ACPI_STATE_C3:
++			state->flags |= CPUIDLE_FLAG_DEEP;
++			state->flags |= CPUIDLE_FLAG_TIME_VALID;
++			state->flags |= CPUIDLE_FLAG_CHECK_BM;
++			state->enter = acpi_idle_enter_c3;
++			break;
++		}
++
++		count++;
++	}
++
++	if (!count)
++		return -EINVAL;
++
++	dev->state_count = count;
+ 	return 0;
+ }
++
++struct cpuidle_driver acpi_idle_driver = {
++	.name =		"acpi_idle",
++	.init =		acpi_idle_init,
++	.redetect =	acpi_idle_init,
++	.bm_check =	acpi_idle_bm_check,
++	.owner =	THIS_MODULE,
++};
+diff -Nurb linux-2.6.22-570/drivers/acpi/processor_throttling.c linux-2.6.22-try2/drivers/acpi/processor_throttling.c
+--- linux-2.6.22-570/drivers/acpi/processor_throttling.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/processor_throttling.c	2007-12-19 15:29:21.000000000 -0500
+@@ -44,17 +44,231 @@
+ #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
+ ACPI_MODULE_NAME("processor_throttling");
+ 
++static int acpi_processor_get_throttling(struct acpi_processor *pr);
++int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
++
++static int acpi_processor_get_platform_limit(struct acpi_processor *pr)
++{
++	acpi_status status = 0;
++	unsigned long tpc = 0;
++
++	if (!pr)
++		return -EINVAL;
++	status = acpi_evaluate_integer(pr->handle, "_TPC", NULL, &tpc);
++	if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) {
++		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TPC"));
++		return -ENODEV;
++	}
++	pr->throttling_platform_limit = (int)tpc;
++	return 0;
++}
++
++int acpi_processor_tstate_has_changed(struct acpi_processor *pr)
++{
++	return acpi_processor_get_platform_limit(pr);
++}
++
++/* --------------------------------------------------------------------------
++                             _PTC, _TSS, _TSD support 
++   -------------------------------------------------------------------------- */
++static int acpi_processor_get_throttling_control(struct acpi_processor *pr)
++{
++	int result = 0;
++	acpi_status status = 0;
++	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++	union acpi_object *ptc = NULL;
++	union acpi_object obj = { 0 };
++
++	status = acpi_evaluate_object(pr->handle, "_PTC", NULL, &buffer);
++	if (ACPI_FAILURE(status)) {
++		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _PTC"));
++		return -ENODEV;
++	}
++
++	ptc = (union acpi_object *)buffer.pointer;
++	if (!ptc || (ptc->type != ACPI_TYPE_PACKAGE)
++	    || (ptc->package.count != 2)) {
++		printk(KERN_ERR PREFIX "Invalid _PTC data\n");
++		result = -EFAULT;
++		goto end;
++	}
++
++	/*
++	 * control_register
++	 */
++
++	obj = ptc->package.elements[0];
++
++	if ((obj.type != ACPI_TYPE_BUFFER)
++	    || (obj.buffer.length < sizeof(struct acpi_ptc_register))
++	    || (obj.buffer.pointer == NULL)) {
++		printk(KERN_ERR PREFIX
++		       "Invalid _PTC data (control_register)\n");
++		result = -EFAULT;
++		goto end;
++	}
++	memcpy(&pr->throttling.control_register, obj.buffer.pointer,
++	       sizeof(struct acpi_ptc_register));
++
++	/*
++	 * status_register
++	 */
++
++	obj = ptc->package.elements[1];
++
++	if ((obj.type != ACPI_TYPE_BUFFER)
++	    || (obj.buffer.length < sizeof(struct acpi_ptc_register))
++	    || (obj.buffer.pointer == NULL)) {
++		printk(KERN_ERR PREFIX "Invalid _PTC data (status_register)\n");
++		result = -EFAULT;
++		goto end;
++	}
++
++	memcpy(&pr->throttling.status_register, obj.buffer.pointer,
++	       sizeof(struct acpi_ptc_register));
++
++      end:
++	kfree(buffer.pointer);
++
++	return result;
++}
++static int acpi_processor_get_throttling_states(struct acpi_processor *pr)
++{
++	int result = 0;
++	acpi_status status = AE_OK;
++	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++	struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
++	struct acpi_buffer state = { 0, NULL };
++	union acpi_object *tss = NULL;
++	int i;
++
++	status = acpi_evaluate_object(pr->handle, "_TSS", NULL, &buffer);
++	if (ACPI_FAILURE(status)) {
++		ACPI_EXCEPTION((AE_INFO, status, "Evaluating _TSS"));
++		return -ENODEV;
++	}
++
++	tss = buffer.pointer;
++	if (!tss || (tss->type != ACPI_TYPE_PACKAGE)) {
++		printk(KERN_ERR PREFIX "Invalid _TSS data\n");
++		result = -EFAULT;
++		goto end;
++	}
++
++	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Found %d throttling states\n",
++			  tss->package.count));
++
++	pr->throttling.state_count = tss->package.count;
++	pr->throttling.states_tss =
++	    kmalloc(sizeof(struct acpi_processor_tx_tss) * tss->package.count,
++		    GFP_KERNEL);
++	if (!pr->throttling.states_tss) {
++		result = -ENOMEM;
++		goto end;
++	}
++
++	for (i = 0; i < pr->throttling.state_count; i++) {
++
++		struct acpi_processor_tx_tss *tx =
++		    (struct acpi_processor_tx_tss *)&(pr->throttling.
++						      states_tss[i]);
++
++		state.length = sizeof(struct acpi_processor_tx_tss);
++		state.pointer = tx;
++
++		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Extracting state %d\n", i));
++
++		status = acpi_extract_package(&(tss->package.elements[i]),
++					      &format, &state);
++		if (ACPI_FAILURE(status)) {
++			ACPI_EXCEPTION((AE_INFO, status, "Invalid _TSS data"));
++			result = -EFAULT;
++			kfree(pr->throttling.states_tss);
++			goto end;
++		}
++
++		if (!tx->freqpercentage) {
++			printk(KERN_ERR PREFIX
++			       "Invalid _TSS data: freq is zero\n");
++			result = -EFAULT;
++			kfree(pr->throttling.states_tss);
++			goto end;
++		}
++	}
++
++      end:
++	kfree(buffer.pointer);
++
++	return result;
++}
++static int acpi_processor_get_tsd(struct acpi_processor *pr)
++{
++	int result = 0;
++	acpi_status status = AE_OK;
++	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
++	struct acpi_buffer format = { sizeof("NNNNN"), "NNNNN" };
++	struct acpi_buffer state = { 0, NULL };
++	union acpi_object *tsd = NULL;
++	struct acpi_tsd_package *pdomain;
++
++	status = acpi_evaluate_object(pr->handle, "_TSD", NULL, &buffer);
++	if (ACPI_FAILURE(status)) {
++		return -ENODEV;
++	}
++
++	tsd = buffer.pointer;
++	if (!tsd || (tsd->type != ACPI_TYPE_PACKAGE)) {
++		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++		result = -EFAULT;
++		goto end;
++	}
++
++	if (tsd->package.count != 1) {
++		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++		result = -EFAULT;
++		goto end;
++	}
++
++	pdomain = &(pr->throttling.domain_info);
++
++	state.length = sizeof(struct acpi_tsd_package);
++	state.pointer = pdomain;
++
++	status = acpi_extract_package(&(tsd->package.elements[0]),
++				      &format, &state);
++	if (ACPI_FAILURE(status)) {
++		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Invalid _TSD data\n"));
++		result = -EFAULT;
++		goto end;
++	}
++
++	if (pdomain->num_entries != ACPI_TSD_REV0_ENTRIES) {
++		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:num_entries\n"));
++		result = -EFAULT;
++		goto end;
++	}
++
++	if (pdomain->revision != ACPI_TSD_REV0_REVISION) {
++		ACPI_DEBUG_PRINT((ACPI_DB_ERROR, "Unknown _TSD:revision\n"));
++		result = -EFAULT;
++		goto end;
++	}
++
++      end:
++	kfree(buffer.pointer);
++	return result;
++}
++
+ /* --------------------------------------------------------------------------
+                               Throttling Control
+    -------------------------------------------------------------------------- */
+-static int acpi_processor_get_throttling(struct acpi_processor *pr)
++static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr)
+ {
+ 	int state = 0;
+ 	u32 value = 0;
+ 	u32 duty_mask = 0;
+ 	u32 duty_value = 0;
+ 
+-
+ 	if (!pr)
+ 		return -EINVAL;
+ 
+@@ -94,13 +308,114 @@
+ 	return 0;
+ }
+ 
+-int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
++static int acpi_read_throttling_status(struct acpi_processor_throttling
++				       *throttling)
++{
++	int value = -1;
++	switch (throttling->status_register.space_id) {
++	case ACPI_ADR_SPACE_SYSTEM_IO:
++		acpi_os_read_port((acpi_io_address) throttling->status_register.
++				  address, &value,
++				  (u32) throttling->status_register.bit_width *
++				  8);
++		break;
++	case ACPI_ADR_SPACE_FIXED_HARDWARE:
++		printk(KERN_ERR PREFIX
++		       "HARDWARE addr space,NOT supported yet\n");
++		break;
++	default:
++		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
++		       (u32) (throttling->status_register.space_id));
++	}
++	return value;
++}
++
++static int acpi_write_throttling_state(struct acpi_processor_throttling
++				       *throttling, int value)
++{
++	int ret = -1;
++
++	switch (throttling->control_register.space_id) {
++	case ACPI_ADR_SPACE_SYSTEM_IO:
++		acpi_os_write_port((acpi_io_address) throttling->
++				   control_register.address, value,
++				   (u32) throttling->control_register.
++				   bit_width * 8);
++		ret = 0;
++		break;
++	case ACPI_ADR_SPACE_FIXED_HARDWARE:
++		printk(KERN_ERR PREFIX
++		       "HARDWARE addr space,NOT supported yet\n");
++		break;
++	default:
++		printk(KERN_ERR PREFIX "Unknown addr space %d\n",
++		       (u32) (throttling->control_register.space_id));
++	}
++	return ret;
++}
++
++static int acpi_get_throttling_state(struct acpi_processor *pr, int value)
++{
++	int i;
++
++	for (i = 0; i < pr->throttling.state_count; i++) {
++		struct acpi_processor_tx_tss *tx =
++		    (struct acpi_processor_tx_tss *)&(pr->throttling.
++						      states_tss[i]);
++		if (tx->control == value)
++			break;
++	}
++	if (i > pr->throttling.state_count)
++		i = -1;
++	return i;
++}
++
++static int acpi_get_throttling_value(struct acpi_processor *pr, int state)
++{
++	int value = -1;
++	if (state >= 0 && state <= pr->throttling.state_count) {
++		struct acpi_processor_tx_tss *tx =
++		    (struct acpi_processor_tx_tss *)&(pr->throttling.
++						      states_tss[state]);
++		value = tx->control;
++	}
++	return value;
++}
++
++static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr)
++{
++	int state = 0;
++	u32 value = 0;
++
++	if (!pr)
++		return -EINVAL;
++
++	if (!pr->flags.throttling)
++		return -ENODEV;
++
++	pr->throttling.state = 0;
++	local_irq_disable();
++	value = acpi_read_throttling_status(&pr->throttling);
++	if (value >= 0) {
++		state = acpi_get_throttling_state(pr, value);
++		pr->throttling.state = state;
++	}
++	local_irq_enable();
++
++	return 0;
++}
++
++static int acpi_processor_get_throttling(struct acpi_processor *pr)
++{
++	return pr->throttling.acpi_processor_get_throttling(pr);
++}
++
++int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, int state)
+ {
+ 	u32 value = 0;
+ 	u32 duty_mask = 0;
+ 	u32 duty_value = 0;
+ 
+-
+ 	if (!pr)
+ 		return -EINVAL;
+ 
+@@ -113,6 +428,8 @@
+ 	if (state == pr->throttling.state)
+ 		return 0;
+ 
++	if (state < pr->throttling_platform_limit)
++		return -EPERM;
+ 	/*
+ 	 * Calculate the duty_value and duty_mask.
+ 	 */
+@@ -165,12 +482,50 @@
+ 	return 0;
+ }
+ 
++int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, int state)
++{
++	u32 value = 0;
++
++	if (!pr)
++		return -EINVAL;
++
++	if ((state < 0) || (state > (pr->throttling.state_count - 1)))
++		return -EINVAL;
++
++	if (!pr->flags.throttling)
++		return -ENODEV;
++
++	if (state == pr->throttling.state)
++		return 0;
++
++	if (state < pr->throttling_platform_limit)
++		return -EPERM;
++
++	local_irq_disable();
++
++	value = acpi_get_throttling_value(pr, state);
++	if (value >= 0) {
++		acpi_write_throttling_state(&pr->throttling, value);
++		pr->throttling.state = state;
++	}
++	local_irq_enable();
++
++	return 0;
++}
++
++int acpi_processor_set_throttling(struct acpi_processor *pr, int state)
++{
++	return pr->throttling.acpi_processor_set_throttling(pr, state);
++}
++
+ int acpi_processor_get_throttling_info(struct acpi_processor *pr)
+ {
+ 	int result = 0;
+ 	int step = 0;
+ 	int i = 0;
+-
++	int no_ptc = 0;
++	int no_tss = 0;
++	int no_tsd = 0;
+ 
+ 	ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+ 			  "pblk_address[0x%08x] duty_offset[%d] duty_width[%d]\n",
+@@ -182,6 +537,21 @@
+ 		return -EINVAL;
+ 
+ 	/* TBD: Support ACPI 2.0 objects */
++	no_ptc = acpi_processor_get_throttling_control(pr);
++	no_tss = acpi_processor_get_throttling_states(pr);
++	no_tsd = acpi_processor_get_tsd(pr);
++
++	if (no_ptc || no_tss) {
++		pr->throttling.acpi_processor_get_throttling =
++		    &acpi_processor_get_throttling_fadt;
++		pr->throttling.acpi_processor_set_throttling =
++		    &acpi_processor_set_throttling_fadt;
++	} else {
++		pr->throttling.acpi_processor_get_throttling =
++		    &acpi_processor_get_throttling_ptc;
++		pr->throttling.acpi_processor_set_throttling =
++		    &acpi_processor_set_throttling_ptc;
++	}
+ 
+ 	if (!pr->throttling.address) {
+ 		ACPI_DEBUG_PRINT((ACPI_DB_INFO, "No throttling register\n"));
+@@ -262,7 +632,6 @@
+ 	int i = 0;
+ 	int result = 0;
+ 
+-
+ 	if (!pr)
+ 		goto end;
+ 
+@@ -280,15 +649,27 @@
+ 	}
+ 
+ 	seq_printf(seq, "state count:             %d\n"
+-		   "active state:            T%d\n",
+-		   pr->throttling.state_count, pr->throttling.state);
++		   "active state:            T%d\n"
++		   "state available: T%d to T%d\n",
++		   pr->throttling.state_count, pr->throttling.state,
++		   pr->throttling_platform_limit,
++		   pr->throttling.state_count - 1);
+ 
+ 	seq_puts(seq, "states:\n");
++	if (pr->throttling.acpi_processor_get_throttling ==
++			acpi_processor_get_throttling_fadt) {
+ 	for (i = 0; i < pr->throttling.state_count; i++)
+ 		seq_printf(seq, "   %cT%d:                  %02d%%\n",
+ 			   (i == pr->throttling.state ? '*' : ' '), i,
+ 			   (pr->throttling.states[i].performance ? pr->
+ 			    throttling.states[i].performance / 10 : 0));
++	} else {
++		for (i = 0; i < pr->throttling.state_count; i++)
++			seq_printf(seq, "   %cT%d:                  %02d%%\n",
++				   (i == pr->throttling.state ? '*' : ' '), i,
++				   (int)pr->throttling.states_tss[i].
++				   freqpercentage);
++	}
+ 
+       end:
+ 	return 0;
+@@ -301,7 +682,7 @@
+ 			   PDE(inode)->data);
+ }
+ 
+-static ssize_t acpi_processor_write_throttling(struct file * file,
++static ssize_t acpi_processor_write_throttling(struct file *file,
+ 					       const char __user * buffer,
+ 					       size_t count, loff_t * data)
+ {
+@@ -310,7 +691,6 @@
+ 	struct acpi_processor *pr = m->private;
+ 	char state_string[12] = { '\0' };
+ 
+-
+ 	if (!pr || (count > sizeof(state_string) - 1))
+ 		return -EINVAL;
+ 
+diff -Nurb linux-2.6.22-570/drivers/acpi/sbs.c linux-2.6.22-try2/drivers/acpi/sbs.c
+--- linux-2.6.22-570/drivers/acpi/sbs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/sbs.c	2007-12-19 15:29:21.000000000 -0500
+@@ -127,7 +127,7 @@
+ static struct acpi_driver acpi_sbs_driver = {
+ 	.name = "sbs",
+ 	.class = ACPI_SBS_CLASS,
+-	.ids = ACPI_SBS_HID,
++	.ids = "ACPI0001,ACPI0005",
+ 	.ops = {
+ 		.add = acpi_sbs_add,
+ 		.remove = acpi_sbs_remove,
+@@ -176,10 +176,8 @@
+ };
+ 
+ struct acpi_sbs {
+-	acpi_handle handle;
+ 	int base;
+ 	struct acpi_device *device;
+-	struct acpi_ec_smbus *smbus;
+ 	struct mutex mutex;
+ 	int sbsm_present;
+ 	int sbsm_batteries_supported;
+@@ -511,7 +509,7 @@
+ 				"acpi_sbs_read_word() failed"));
+ 		goto end;
+ 	}
+-
++	sbs->sbsm_present = 1;
+ 	sbs->sbsm_batteries_supported = battery_system_info & 0x000f;
+ 
+       end:
+@@ -1630,13 +1628,12 @@
+ {
+ 	struct acpi_sbs *sbs = NULL;
+ 	int result = 0, remove_result = 0;
+-	unsigned long sbs_obj;
+ 	int id;
+ 	acpi_status status = AE_OK;
+ 	unsigned long val;
+ 
+ 	status =
+-	    acpi_evaluate_integer(device->parent->handle, "_EC", NULL, &val);
++	    acpi_evaluate_integer(device->handle, "_EC", NULL, &val);
+ 	if (ACPI_FAILURE(status)) {
+ 		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "Error obtaining _EC"));
+ 		return -EIO;
+@@ -1653,7 +1650,7 @@
+ 
+ 	sbs_mutex_lock(sbs);
+ 
+-	sbs->base = (val & 0xff00ull) >> 8;
++	sbs->base = 0xff & (val >> 8);
+ 	sbs->device = device;
+ 
+ 	strcpy(acpi_device_name(device), ACPI_SBS_DEVICE_NAME);
+@@ -1665,24 +1662,10 @@
+ 		ACPI_EXCEPTION((AE_INFO, AE_ERROR, "acpi_ac_add() failed"));
+ 		goto end;
+ 	}
+-	status = acpi_evaluate_integer(device->handle, "_SBS", NULL, &sbs_obj);
+-	if (status) {
+-		ACPI_EXCEPTION((AE_INFO, status,
+-				"acpi_evaluate_integer() failed"));
+-		result = -EIO;
+-		goto end;
+-	}
+-	if (sbs_obj > 0) {
+-		result = acpi_sbsm_get_info(sbs);
+-		if (result) {
+-			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
+-					"acpi_sbsm_get_info() failed"));
+-			goto end;
+-		}
+-		sbs->sbsm_present = 1;
+-	}
+ 
+-	if (sbs->sbsm_present == 0) {
++	acpi_sbsm_get_info(sbs);
++
++	if (!sbs->sbsm_present) {
+ 		result = acpi_battery_add(sbs, 0);
+ 		if (result) {
+ 			ACPI_EXCEPTION((AE_INFO, AE_ERROR,
+@@ -1702,8 +1685,6 @@
+ 		}
+ 	}
+ 
+-	sbs->handle = device->handle;
+-
+ 	init_timer(&sbs->update_timer);
+ 	result = acpi_check_update_proc(sbs);
+ 	if (result)
+diff -Nurb linux-2.6.22-570/drivers/acpi/system.c linux-2.6.22-try2/drivers/acpi/system.c
+--- linux-2.6.22-570/drivers/acpi/system.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/system.c	2007-12-19 15:29:21.000000000 -0500
+@@ -39,15 +39,12 @@
+ 
+ #define ACPI_SYSTEM_CLASS		"system"
+ #define ACPI_SYSTEM_DEVICE_NAME		"System"
+-#define ACPI_SYSTEM_FILE_INFO		"info"
+-#define ACPI_SYSTEM_FILE_EVENT		"event"
+-#define ACPI_SYSTEM_FILE_DSDT		"dsdt"
+-#define ACPI_SYSTEM_FILE_FADT		"fadt"
+ 
+ /*
+  * Make ACPICA version work as module param
+  */
+-static int param_get_acpica_version(char *buffer, struct kernel_param *kp) {
++static int param_get_acpica_version(char *buffer, struct kernel_param *kp)
++{
+ 	int result;
+ 
+ 	result = sprintf(buffer, "%x", ACPI_CA_VERSION);
+@@ -58,9 +55,126 @@
+ module_param_call(acpica_version, NULL, param_get_acpica_version, NULL, 0444);
+ 
+ /* --------------------------------------------------------------------------
++                              FS Interface (/sys)
++   -------------------------------------------------------------------------- */
++static LIST_HEAD(acpi_table_attr_list);
++static struct kobject tables_kobj;
++
++struct acpi_table_attr {
++	struct bin_attribute attr;
++	char name[8];
++	int instance;
++	struct list_head node;
++};
++
++static ssize_t acpi_table_show(struct kobject *kobj,
++			       struct bin_attribute *bin_attr, char *buf,
++			       loff_t offset, size_t count)
++{
++	struct acpi_table_attr *table_attr =
++	    container_of(bin_attr, struct acpi_table_attr, attr);
++	struct acpi_table_header *table_header = NULL;
++	acpi_status status;
++	ssize_t ret_count = count;
++
++	status =
++	    acpi_get_table(table_attr->name, table_attr->instance,
++			   &table_header);
++	if (ACPI_FAILURE(status))
++		return -ENODEV;
++
++	if (offset >= table_header->length) {
++		ret_count = 0;
++		goto end;
++	}
++
++	if (offset + ret_count > table_header->length)
++		ret_count = table_header->length - offset;
++
++	memcpy(buf, ((char *)table_header) + offset, ret_count);
++
++      end:
++	return ret_count;
++}
++
++static void acpi_table_attr_init(struct acpi_table_attr *table_attr,
++				 struct acpi_table_header *table_header)
++{
++	struct acpi_table_header *header = NULL;
++	struct acpi_table_attr *attr = NULL;
++
++	memcpy(table_attr->name, table_header->signature, ACPI_NAME_SIZE);
++
++	list_for_each_entry(attr, &acpi_table_attr_list, node) {
++		if (!memcmp(table_header->signature, attr->name,
++			    ACPI_NAME_SIZE))
++			if (table_attr->instance < attr->instance)
++				table_attr->instance = attr->instance;
++	}
++	table_attr->instance++;
++
++	if (table_attr->instance > 1 || (table_attr->instance == 1 &&
++					 !acpi_get_table(table_header->
++							 signature, 2,
++							 &header)))
++		sprintf(table_attr->name + 4, "%d", table_attr->instance);
++
++	table_attr->attr.size = 0;
++	table_attr->attr.read = acpi_table_show;
++	table_attr->attr.attr.name = table_attr->name;
++	table_attr->attr.attr.mode = 0444;
++	table_attr->attr.attr.owner = THIS_MODULE;
++
++	return;
++}
++
++static int acpi_system_sysfs_init(void)
++{
++	struct acpi_table_attr *table_attr;
++	struct acpi_table_header *table_header = NULL;
++	int table_index = 0;
++	int result;
++
++	tables_kobj.parent = &acpi_subsys.kobj;
++	kobject_set_name(&tables_kobj, "tables");
++	result = kobject_register(&tables_kobj);
++	if (result)
++		return result;
++
++	do {
++		result = acpi_get_table_by_index(table_index, &table_header);
++		if (!result) {
++			table_index++;
++			table_attr = NULL;
++			table_attr =
++			    kzalloc(sizeof(struct acpi_table_attr), GFP_KERNEL);
++			if (!table_attr)
++				return -ENOMEM;
++
++			acpi_table_attr_init(table_attr, table_header);
++			result =
++			    sysfs_create_bin_file(&tables_kobj,
++						  &table_attr->attr);
++			if (result) {
++				kfree(table_attr);
++				return result;
++			} else
++				list_add_tail(&table_attr->node,
++					      &acpi_table_attr_list);
++		}
++	} while (!result);
++
++	return 0;
++}
++
++/* --------------------------------------------------------------------------
+                               FS Interface (/proc)
+    -------------------------------------------------------------------------- */
+ #ifdef CONFIG_ACPI_PROCFS
++#define ACPI_SYSTEM_FILE_INFO		"info"
++#define ACPI_SYSTEM_FILE_EVENT		"event"
++#define ACPI_SYSTEM_FILE_DSDT		"dsdt"
++#define ACPI_SYSTEM_FILE_FADT		"fadt"
+ 
+ static int acpi_system_read_info(struct seq_file *seq, void *offset)
+ {
+@@ -80,7 +194,6 @@
+ 	.llseek = seq_lseek,
+ 	.release = single_release,
+ };
+-#endif
+ 
+ static ssize_t acpi_system_read_dsdt(struct file *, char __user *, size_t,
+ 				     loff_t *);
+@@ -97,13 +210,11 @@
+ 	struct acpi_table_header *dsdt = NULL;
+ 	ssize_t res;
+ 
+-
+ 	status = acpi_get_table(ACPI_SIG_DSDT, 1, &dsdt);
+ 	if (ACPI_FAILURE(status))
+ 		return -ENODEV;
+ 
+-	res = simple_read_from_buffer(buffer, count, ppos,
+-				      dsdt, dsdt->length);
++	res = simple_read_from_buffer(buffer, count, ppos, dsdt, dsdt->length);
+ 
+ 	return res;
+ }
+@@ -123,28 +234,21 @@
+ 	struct acpi_table_header *fadt = NULL;
+ 	ssize_t res;
+ 
+-
+ 	status = acpi_get_table(ACPI_SIG_FADT, 1, &fadt);
+ 	if (ACPI_FAILURE(status))
+ 		return -ENODEV;
+ 
+-	res = simple_read_from_buffer(buffer, count, ppos,
+-				      fadt, fadt->length);
++	res = simple_read_from_buffer(buffer, count, ppos, fadt, fadt->length);
+ 
+ 	return res;
+ }
+ 
+-static int __init acpi_system_init(void)
++static int acpi_system_procfs_init(void)
+ {
+ 	struct proc_dir_entry *entry;
+ 	int error = 0;
+ 	char *name;
+ 
+-
+-	if (acpi_disabled)
+-		return 0;
+-
+-#ifdef CONFIG_ACPI_PROCFS
+ 	/* 'info' [R] */
+ 	name = ACPI_SYSTEM_FILE_INFO;
+ 	entry = create_proc_entry(name, S_IRUGO, acpi_root_dir);
+@@ -153,7 +257,6 @@
+ 	else {
+ 		entry->proc_fops = &acpi_system_info_ops;
+ 	}
+-#endif
+ 
+ 	/* 'dsdt' [R] */
+ 	name = ACPI_SYSTEM_FILE_DSDT;
+@@ -177,12 +280,32 @@
+       Error:
+ 	remove_proc_entry(ACPI_SYSTEM_FILE_FADT, acpi_root_dir);
+ 	remove_proc_entry(ACPI_SYSTEM_FILE_DSDT, acpi_root_dir);
+-#ifdef CONFIG_ACPI_PROCFS
+ 	remove_proc_entry(ACPI_SYSTEM_FILE_INFO, acpi_root_dir);
+-#endif
+ 
+ 	error = -EFAULT;
+ 	goto Done;
+ }
++#else
++static int acpi_system_procfs_init(void)
++{
++	return 0;
++}
++#endif
++
++static int __init acpi_system_init(void)
++{
++	int result = 0;
++
++	if (acpi_disabled)
++		return 0;
++
++	result = acpi_system_procfs_init();
++	if (result)
++		return result;
++
++	result = acpi_system_sysfs_init();
++
++	return result;
++}
+ 
+ subsys_initcall(acpi_system_init);
+diff -Nurb linux-2.6.22-570/drivers/acpi/thermal.c linux-2.6.22-try2/drivers/acpi/thermal.c
+--- linux-2.6.22-570/drivers/acpi/thermal.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/thermal.c	2007-12-19 15:29:23.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/jiffies.h>
+ #include <linux/kmod.h>
+ #include <linux/seq_file.h>
++#include <linux/reboot.h>
+ #include <asm/uaccess.h>
+ 
+ #include <acpi/acpi_bus.h>
+@@ -59,7 +60,6 @@
+ #define ACPI_THERMAL_NOTIFY_CRITICAL	0xF0
+ #define ACPI_THERMAL_NOTIFY_HOT		0xF1
+ #define ACPI_THERMAL_MODE_ACTIVE	0x00
+-#define ACPI_THERMAL_PATH_POWEROFF	"/sbin/poweroff"
+ 
+ #define ACPI_THERMAL_MAX_ACTIVE	10
+ #define ACPI_THERMAL_MAX_LIMIT_STR_LEN 65
+@@ -419,26 +419,6 @@
+ 	return 0;
+ }
+ 
+-static int acpi_thermal_call_usermode(char *path)
+-{
+-	char *argv[2] = { NULL, NULL };
+-	char *envp[3] = { NULL, NULL, NULL };
+-
+-
+-	if (!path)
+-		return -EINVAL;
+-
+-	argv[0] = path;
+-
+-	/* minimal command environment */
+-	envp[0] = "HOME=/";
+-	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+-
+-	call_usermodehelper(argv[0], argv, envp, 0);
+-
+-	return 0;
+-}
+-
+ static int acpi_thermal_critical(struct acpi_thermal *tz)
+ {
+ 	if (!tz || !tz->trips.critical.flags.valid)
+@@ -456,7 +436,7 @@
+ 	acpi_bus_generate_event(tz->device, ACPI_THERMAL_NOTIFY_CRITICAL,
+ 				tz->trips.critical.flags.enabled);
+ 
+-	acpi_thermal_call_usermode(ACPI_THERMAL_PATH_POWEROFF);
++	orderly_poweroff(true);
+ 
+ 	return 0;
+ }
+@@ -1114,7 +1094,6 @@
+ 		break;
+ 	case ACPI_THERMAL_NOTIFY_THRESHOLDS:
+ 		acpi_thermal_get_trip_points(tz);
+-		acpi_thermal_check(tz);
+ 		acpi_bus_generate_event(device, event, 0);
+ 		break;
+ 	case ACPI_THERMAL_NOTIFY_DEVICES:
+diff -Nurb linux-2.6.22-570/drivers/acpi/utilities/uteval.c linux-2.6.22-try2/drivers/acpi/utilities/uteval.c
+--- linux-2.6.22-570/drivers/acpi/utilities/uteval.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/utilities/uteval.c	2007-12-19 15:29:22.000000000 -0500
+@@ -62,16 +62,13 @@
+ static char *acpi_interfaces_supported[] = {
+ 	/* Operating System Vendor Strings */
+ 
+-	"Windows 2000",
+-	"Windows 2001",
+-	"Windows 2001 SP0",
+-	"Windows 2001 SP1",
+-	"Windows 2001 SP2",
+-	"Windows 2001 SP3",
+-	"Windows 2001 SP4",
+-	"Windows 2001.1",
+-	"Windows 2001.1 SP1",	/* Added 03/2006 */
+-	"Windows 2006",		/* Added 03/2006 */
++	"Windows 2000",		/* Windows 2000 */
++	"Windows 2001",		/* Windows XP */
++	"Windows 2001 SP1",	/* Windows XP SP1 */
++	"Windows 2001 SP2",	/* Windows XP SP2 */
++	"Windows 2001.1",	/* Windows Server 2003 */
++	"Windows 2001.1 SP1",	/* Windows Server 2003 SP1 - Added 03/2006 */
++	"Windows 2006",		/* Windows Vista - Added 03/2006 */
+ 
+ 	/* Feature Group Strings */
+ 
+diff -Nurb linux-2.6.22-570/drivers/acpi/video.c linux-2.6.22-try2/drivers/acpi/video.c
+--- linux-2.6.22-570/drivers/acpi/video.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/acpi/video.c	2007-12-19 15:29:22.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/seq_file.h>
+ 
+ #include <linux/backlight.h>
++#include <linux/video_output.h>
+ #include <asm/uaccess.h>
+ 
+ #include <acpi/acpi_bus.h>
+@@ -169,6 +170,7 @@
+ 	struct acpi_device *dev;
+ 	struct acpi_video_device_brightness *brightness;
+ 	struct backlight_device *backlight;
++	struct output_device *output_dev;
+ };
+ 
+ /* bus */
+@@ -272,6 +274,10 @@
+ 				     u32 level_current, u32 event);
+ static void acpi_video_switch_brightness(struct acpi_video_device *device,
+ 					 int event);
++static int acpi_video_device_get_state(struct acpi_video_device *device,
++			    unsigned long *state);
++static int acpi_video_output_get(struct output_device *od);
++static int acpi_video_device_set_state(struct acpi_video_device *device, int state);
+ 
+ /*backlight device sysfs support*/
+ static int acpi_video_get_brightness(struct backlight_device *bd)
+@@ -297,6 +303,28 @@
+ 	.update_status  = acpi_video_set_brightness,
+ };
+ 
++/*video output device sysfs support*/
++static int acpi_video_output_get(struct output_device *od)
++{
++	unsigned long state;
++	struct acpi_video_device *vd =
++		(struct acpi_video_device *)class_get_devdata(&od->class_dev);
++	acpi_video_device_get_state(vd, &state);
++	return (int)state;
++}
++
++static int acpi_video_output_set(struct output_device *od)
++{
++	unsigned long state = od->request_state;
++	struct acpi_video_device *vd=
++		(struct acpi_video_device *)class_get_devdata(&od->class_dev);
++	return acpi_video_device_set_state(vd, state);
++}
++
++static struct output_properties acpi_output_properties = {
++	.set_state = acpi_video_output_set,
++	.get_status = acpi_video_output_get,
++};
+ /* --------------------------------------------------------------------------
+                                Video Management
+    -------------------------------------------------------------------------- */
+@@ -626,6 +654,17 @@
+ 
+ 		kfree(name);
+ 	}
++	if (device->cap._DCS && device->cap._DSS){
++		static int count = 0;
++		char *name;
++		name = kzalloc(MAX_NAME_LEN, GFP_KERNEL);
++		if (!name)
++			return;
++		sprintf(name, "acpi_video%d", count++);
++		device->output_dev = video_output_register(name,
++				NULL, device, &acpi_output_properties);
++		kfree(name);
++	}
+ 	return;
+ }
+ 
+@@ -1669,6 +1708,7 @@
+ 					    ACPI_DEVICE_NOTIFY,
+ 					    acpi_video_device_notify);
+ 	backlight_device_unregister(device->backlight);
++	video_output_unregister(device->output_dev);
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/base/bus.c linux-2.6.22-try2/drivers/base/bus.c
+--- linux-2.6.22-570/drivers/base/bus.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/base/bus.c	2007-12-19 15:29:22.000000000 -0500
+@@ -562,7 +562,6 @@
+ 
+ 	bus->drivers_probe_attr.attr.name = "drivers_probe";
+ 	bus->drivers_probe_attr.attr.mode = S_IWUSR;
+-	bus->drivers_probe_attr.attr.owner = bus->owner;
+ 	bus->drivers_probe_attr.store = store_drivers_probe;
+ 	retval = bus_create_file(bus, &bus->drivers_probe_attr);
+ 	if (retval)
+@@ -570,7 +569,6 @@
+ 
+ 	bus->drivers_autoprobe_attr.attr.name = "drivers_autoprobe";
+ 	bus->drivers_autoprobe_attr.attr.mode = S_IWUSR | S_IRUGO;
+-	bus->drivers_autoprobe_attr.attr.owner = bus->owner;
+ 	bus->drivers_autoprobe_attr.show = show_drivers_autoprobe;
+ 	bus->drivers_autoprobe_attr.store = store_drivers_autoprobe;
+ 	retval = bus_create_file(bus, &bus->drivers_autoprobe_attr);
+diff -Nurb linux-2.6.22-570/drivers/base/class.c linux-2.6.22-try2/drivers/base/class.c
+--- linux-2.6.22-570/drivers/base/class.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/base/class.c	2007-12-19 15:29:22.000000000 -0500
+@@ -312,9 +312,6 @@
+ 
+ 	pr_debug("device class '%s': release.\n", cd->class_id);
+ 
+-	kfree(cd->devt_attr);
+-	cd->devt_attr = NULL;
+-
+ 	if (cd->release)
+ 		cd->release(cd);
+ 	else if (cls->release)
+@@ -547,6 +544,9 @@
+ 	return print_dev_t(buf, class_dev->devt);
+ }
+ 
++static struct class_device_attribute class_devt_attr =
++	__ATTR(dev, S_IRUGO, show_dev, NULL);
++
+ static ssize_t store_uevent(struct class_device *class_dev,
+ 			    const char *buf, size_t count)
+ {
+@@ -554,6 +554,9 @@
+ 	return count;
+ }
+ 
++static struct class_device_attribute class_uevent_attr =
++	__ATTR(uevent, S_IWUSR, NULL, store_uevent);
++
+ void class_device_initialize(struct class_device *class_dev)
+ {
+ 	kobj_set_kset_s(class_dev, class_obj_subsys);
+@@ -603,34 +606,17 @@
+ 				  &parent_class->subsys.kobj, "subsystem");
+ 	if (error)
+ 		goto out3;
+-	class_dev->uevent_attr.attr.name = "uevent";
+-	class_dev->uevent_attr.attr.mode = S_IWUSR;
+-	class_dev->uevent_attr.attr.owner = parent_class->owner;
+-	class_dev->uevent_attr.store = store_uevent;
+-	error = class_device_create_file(class_dev, &class_dev->uevent_attr);
++
++	error = class_device_create_file(class_dev, &class_uevent_attr);
+ 	if (error)
+ 		goto out3;
+ 
+ 	if (MAJOR(class_dev->devt)) {
+-		struct class_device_attribute *attr;
+-		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+-		if (!attr) {
+-			error = -ENOMEM;
+-			goto out4;
+-		}
+-		attr->attr.name = "dev";
+-		attr->attr.mode = S_IRUGO;
+-		attr->attr.owner = parent_class->owner;
+-		attr->show = show_dev;
+-		error = class_device_create_file(class_dev, attr);
+-		if (error) {
+-			kfree(attr);
++		error = class_device_create_file(class_dev, &class_devt_attr);
++		if (error)
+ 			goto out4;
+ 		}
+ 
+-		class_dev->devt_attr = attr;
+-	}
+-
+ 	error = class_device_add_attrs(class_dev);
+ 	if (error)
+ 		goto out5;
+@@ -671,10 +657,10 @@
+  out6:
+ 	class_device_remove_attrs(class_dev);
+  out5:
+-	if (class_dev->devt_attr)
+-		class_device_remove_file(class_dev, class_dev->devt_attr);
++	if (MAJOR(class_dev->devt))
++		class_device_remove_file(class_dev, &class_devt_attr);
+  out4:
+-	class_device_remove_file(class_dev, &class_dev->uevent_attr);
++	class_device_remove_file(class_dev, &class_uevent_attr);
+  out3:
+ 	kobject_del(&class_dev->kobj);
+  out2:
+@@ -774,9 +760,9 @@
+ 		sysfs_remove_link(&class_dev->kobj, "device");
+ 	}
+ 	sysfs_remove_link(&class_dev->kobj, "subsystem");
+-	class_device_remove_file(class_dev, &class_dev->uevent_attr);
+-	if (class_dev->devt_attr)
+-		class_device_remove_file(class_dev, class_dev->devt_attr);
++	class_device_remove_file(class_dev, &class_uevent_attr);
++	if (MAJOR(class_dev->devt))
++		class_device_remove_file(class_dev, &class_devt_attr);
+ 	class_device_remove_attrs(class_dev);
+ 	class_device_remove_groups(class_dev);
+ 
+diff -Nurb linux-2.6.22-570/drivers/base/core.c linux-2.6.22-try2/drivers/base/core.c
+--- linux-2.6.22-570/drivers/base/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/base/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -310,6 +310,9 @@
+ 	return count;
+ }
+ 
++static struct device_attribute uevent_attr =
++	__ATTR(uevent, S_IRUGO | S_IWUSR, show_uevent, store_uevent);
++
+ static int device_add_attributes(struct device *dev,
+ 				 struct device_attribute *attrs)
+ {
+@@ -423,6 +426,9 @@
+ 	return print_dev_t(buf, dev->devt);
+ }
+ 
++static struct device_attribute devt_attr =
++	__ATTR(dev, S_IRUGO, show_dev, NULL);
++
+ /*
+  *	devices_subsys - structure to be registered with kobject core.
+  */
+@@ -637,6 +643,80 @@
+ 	return 0;
+ }
+ 
++static int device_add_class_symlinks(struct device *dev)
++{
++	int error;
++
++	if (!dev->class)
++		return 0;
++	error = sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj,
++				  "subsystem");
++	if (error)
++		goto out;
++	/*
++	 * If this is not a "fake" compatible device, then create the
++	 * symlink from the class to the device.
++	 */
++	if (dev->kobj.parent == &dev->class->subsys.kobj)
++		return 0;
++	error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
++				  dev->bus_id);
++	if (error)
++		goto out_subsys;
++	/* only bus-device parents get a "device"-link */
++	if (dev->parent && dev->parent->bus) {
++		error = sysfs_create_link(&dev->kobj, &dev->parent->kobj,
++					  "device");
++		if (error)
++			goto out_busid;
++#ifdef CONFIG_SYSFS_DEPRECATED
++		{
++			char * class_name = make_class_name(dev->class->name,
++								&dev->kobj);
++			if (class_name)
++				error = sysfs_create_link(&dev->parent->kobj,
++							&dev->kobj, class_name);
++			kfree(class_name);
++			if (error)
++				goto out_device;
++		}
++#endif
++	}
++	return 0;
++
++#ifdef CONFIG_SYSFS_DEPRECATED
++out_device:
++	if (dev->parent)
++		sysfs_remove_link(&dev->kobj, "device");
++#endif
++out_busid:
++	sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
++out_subsys:
++	sysfs_remove_link(&dev->kobj, "subsystem");
++out:
++	return error;
++}
++
++static void device_remove_class_symlinks(struct device *dev)
++{
++	if (!dev->class)
++		return;
++	if (dev->parent) {
++#ifdef CONFIG_SYSFS_DEPRECATED
++		char *class_name;
++
++		class_name = make_class_name(dev->class->name, &dev->kobj);
++		if (class_name) {
++			sysfs_remove_link(&dev->parent->kobj, class_name);
++			kfree(class_name);
++		}
++#endif
++		sysfs_remove_link(&dev->kobj, "device");
++	}
++	sysfs_remove_link(&dev->class->subsys.kobj, dev->bus_id);
++	sysfs_remove_link(&dev->kobj, "subsystem");
++}
++
+ /**
+  *	device_add - add device to device hierarchy.
+  *	@dev:	device.
+@@ -651,7 +731,6 @@
+ int device_add(struct device *dev)
+ {
+ 	struct device *parent = NULL;
+-	char *class_name = NULL;
+ 	struct class_interface *class_intf;
+ 	int error = -EINVAL;
+ 
+@@ -681,58 +760,17 @@
+ 		blocking_notifier_call_chain(&dev->bus->bus_notifier,
+ 					     BUS_NOTIFY_ADD_DEVICE, dev);
+ 
+-	dev->uevent_attr.attr.name = "uevent";
+-	dev->uevent_attr.attr.mode = S_IRUGO | S_IWUSR;
+-	if (dev->driver)
+-		dev->uevent_attr.attr.owner = dev->driver->owner;
+-	dev->uevent_attr.store = store_uevent;
+-	dev->uevent_attr.show = show_uevent;
+-	error = device_create_file(dev, &dev->uevent_attr);
++	error = device_create_file(dev, &uevent_attr);
+ 	if (error)
+ 		goto attrError;
+ 
+ 	if (MAJOR(dev->devt)) {
+-		struct device_attribute *attr;
+-		attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+-		if (!attr) {
+-			error = -ENOMEM;
+-			goto ueventattrError;
+-		}
+-		attr->attr.name = "dev";
+-		attr->attr.mode = S_IRUGO;
+-		if (dev->driver)
+-			attr->attr.owner = dev->driver->owner;
+-		attr->show = show_dev;
+-		error = device_create_file(dev, attr);
+-		if (error) {
+-			kfree(attr);
++		error = device_create_file(dev, &devt_attr);
++		if (error)
+ 			goto ueventattrError;
+ 		}
+-
+-		dev->devt_attr = attr;
+-	}
+-
+-	if (dev->class) {
+-		sysfs_create_link(&dev->kobj, &dev->class->subsys.kobj,
+-				  "subsystem");
+-		/* If this is not a "fake" compatible device, then create the
+-		 * symlink from the class to the device. */
+-		if (dev->kobj.parent != &dev->class->subsys.kobj)
+-			sysfs_create_link(&dev->class->subsys.kobj,
+-					  &dev->kobj, dev->bus_id);
+-		if (parent) {
+-			sysfs_create_link(&dev->kobj, &dev->parent->kobj,
+-							"device");
+-#ifdef CONFIG_SYSFS_DEPRECATED
+-			class_name = make_class_name(dev->class->name,
+-							&dev->kobj);
+-			if (class_name)
+-				sysfs_create_link(&dev->parent->kobj,
+-						  &dev->kobj, class_name);
+-#endif
+-		}
+-	}
+-
++	if ((error = device_add_class_symlinks(dev)))
++		goto SymlinkError;
+ 	if ((error = device_add_attrs(dev)))
+ 		goto AttrsError;
+ 	if ((error = device_pm_add(dev)))
+@@ -756,7 +794,6 @@
+ 		up(&dev->class->sem);
+ 	}
+  Done:
+-	kfree(class_name);
+ 	put_device(dev);
+ 	return error;
+  BusError:
+@@ -767,10 +804,10 @@
+ 					     BUS_NOTIFY_DEL_DEVICE, dev);
+ 	device_remove_attrs(dev);
+  AttrsError:
+-	if (dev->devt_attr) {
+-		device_remove_file(dev, dev->devt_attr);
+-		kfree(dev->devt_attr);
+-	}
++	device_remove_class_symlinks(dev);
++ SymlinkError:
++	if (MAJOR(dev->devt))
++		device_remove_file(dev, &devt_attr);
+ 
+ 	if (dev->class) {
+ 		sysfs_remove_link(&dev->kobj, "subsystem");
+@@ -792,7 +829,7 @@
+ 		}
+ 	}
+  ueventattrError:
+-	device_remove_file(dev, &dev->uevent_attr);
++	device_remove_file(dev, &uevent_attr);
+  attrError:
+ 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
+ 	kobject_del(&dev->kobj);
+@@ -869,10 +906,8 @@
+ 
+ 	if (parent)
+ 		klist_del(&dev->knode_parent);
+-	if (dev->devt_attr) {
+-		device_remove_file(dev, dev->devt_attr);
+-		kfree(dev->devt_attr);
+-	}
++	if (MAJOR(dev->devt))
++		device_remove_file(dev, &devt_attr);
+ 	if (dev->class) {
+ 		sysfs_remove_link(&dev->kobj, "subsystem");
+ 		/* If this is not a "fake" compatible device, remove the
+@@ -926,7 +961,7 @@
+ 			up(&dev->class->sem);
+ 		}
+ 	}
+-	device_remove_file(dev, &dev->uevent_attr);
++	device_remove_file(dev, &uevent_attr);
+ 	device_remove_attrs(dev);
+ 	bus_remove_device(dev);
+ 
+@@ -1155,7 +1190,7 @@
+ {
+ 	char *old_class_name = NULL;
+ 	char *new_class_name = NULL;
+-	char *old_symlink_name = NULL;
++	char *old_device_name = NULL;
+ 	int error;
+ 
+ 	dev = get_device(dev);
+@@ -1169,42 +1204,49 @@
+ 		old_class_name = make_class_name(dev->class->name, &dev->kobj);
+ #endif
+ 
+-	if (dev->class) {
+-		old_symlink_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
+-		if (!old_symlink_name) {
++	old_device_name = kmalloc(BUS_ID_SIZE, GFP_KERNEL);
++	if (!old_device_name) {
+ 			error = -ENOMEM;
+-			goto out_free_old_class;
+-		}
+-		strlcpy(old_symlink_name, dev->bus_id, BUS_ID_SIZE);
++		goto out;
+ 	}
+-
++	strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
+ 	strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+ 
+ 	error = kobject_rename(&dev->kobj, new_name);
++	if (error) {
++		strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
++		goto out;
++	}
+ 
+ #ifdef CONFIG_SYSFS_DEPRECATED
+ 	if (old_class_name) {
+ 		new_class_name = make_class_name(dev->class->name, &dev->kobj);
+ 		if (new_class_name) {
+-			sysfs_create_link(&dev->parent->kobj, &dev->kobj,
+-					  new_class_name);
++			error = sysfs_create_link(&dev->parent->kobj,
++						  &dev->kobj, new_class_name);
++			if (error)
++				goto out;
+ 			sysfs_remove_link(&dev->parent->kobj, old_class_name);
+ 		}
+ 	}
+ #endif
+ 
+ 	if (dev->class) {
+-		sysfs_remove_link(&dev->class->subsys.kobj,
+-				  old_symlink_name);
+-		sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
++		sysfs_remove_link(&dev->class->subsys.kobj, old_device_name);
++		error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
+ 				  dev->bus_id);
++		if (error) {
++			/* Uh... how to unravel this if restoring can fail? */
++			dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n",
++				__FUNCTION__, error);
+ 	}
++	}
++out:
+ 	put_device(dev);
+ 
+ 	kfree(new_class_name);
+-	kfree(old_symlink_name);
+- out_free_old_class:
+ 	kfree(old_class_name);
++	kfree(old_device_name);
+ 
+ 	return error;
+ }
+diff -Nurb linux-2.6.22-570/drivers/base/firmware_class.c linux-2.6.22-try2/drivers/base/firmware_class.c
+--- linux-2.6.22-570/drivers/base/firmware_class.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/base/firmware_class.c	2007-12-19 15:29:22.000000000 -0500
+@@ -175,7 +175,7 @@
+ static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
+ 
+ static ssize_t
+-firmware_data_read(struct kobject *kobj,
++firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+ 		   char *buffer, loff_t offset, size_t count)
+ {
+ 	struct device *dev = to_dev(kobj);
+@@ -240,7 +240,7 @@
+  *	the driver as a firmware image.
+  **/
+ static ssize_t
+-firmware_data_write(struct kobject *kobj,
++firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+ 		    char *buffer, loff_t offset, size_t count)
+ {
+ 	struct device *dev = to_dev(kobj);
+@@ -271,7 +271,7 @@
+ }
+ 
+ static struct bin_attribute firmware_attr_data_tmpl = {
+-	.attr = {.name = "data", .mode = 0644, .owner = THIS_MODULE},
++	.attr = {.name = "data", .mode = 0644},
+ 	.size = 0,
+ 	.read = firmware_data_read,
+ 	.write = firmware_data_write,
+diff -Nurb linux-2.6.22-570/drivers/block/acsi_slm.c linux-2.6.22-try2/drivers/block/acsi_slm.c
+--- linux-2.6.22-570/drivers/block/acsi_slm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/block/acsi_slm.c	2007-12-19 15:29:24.000000000 -0500
+@@ -367,7 +367,7 @@
+ 	int length;
+ 	int end;
+ 
+-	if (!(page = __get_free_page( GFP_KERNEL )))
++	if (!(page = __get_free_page(GFP_TEMPORARY)))
+ 		return( -ENOMEM );
+ 	
+ 	length = slm_getstats( (char *)page, iminor(node) );
+diff -Nurb linux-2.6.22-570/drivers/block/cciss_scsi.c linux-2.6.22-try2/drivers/block/cciss_scsi.c
+--- linux-2.6.22-570/drivers/block/cciss_scsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/block/cciss_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -555,7 +555,6 @@
+ {
+ 	struct scsi_cmnd *cmd;
+ 	ctlr_info_t *ctlr;
+-	u64bit addr64;
+ 	ErrorInfo_struct *ei;
+ 
+ 	ei = cp->err_info;
+@@ -569,20 +568,7 @@
+ 	cmd = (struct scsi_cmnd *) cp->scsi_cmd;	
+ 	ctlr = hba[cp->ctlr];
+ 
+-	/* undo the DMA mappings */
+-
+-	if (cmd->use_sg) {
+-		pci_unmap_sg(ctlr->pdev,
+-			cmd->request_buffer, cmd->use_sg,
+-				cmd->sc_data_direction); 
+-	}
+-	else if (cmd->request_bufflen) {
+-		addr64.val32.lower = cp->SG[0].Addr.lower;
+-                addr64.val32.upper = cp->SG[0].Addr.upper;
+-                pci_unmap_single(ctlr->pdev, (dma_addr_t) addr64.val,
+-                	cmd->request_bufflen, 
+-				cmd->sc_data_direction);
+-	}
++	scsi_dma_unmap(cmd);
+ 
+ 	cmd->result = (DID_OK << 16); 		/* host byte */
+ 	cmd->result |= (COMMAND_COMPLETE << 8);	/* msg byte */
+@@ -597,7 +583,7 @@
+ 		ei->SenseLen > SCSI_SENSE_BUFFERSIZE ?
+ 			SCSI_SENSE_BUFFERSIZE : 
+ 			ei->SenseLen);
+-	cmd->resid = ei->ResidualCnt;
++	scsi_set_resid(cmd, ei->ResidualCnt);
+ 
+ 	if(ei->CommandStatus != 0) 
+ 	{ /* an error has occurred */ 
+@@ -1204,46 +1190,29 @@
+ 		CommandList_struct *cp,	
+ 		struct scsi_cmnd *cmd)
+ {
+-	unsigned int use_sg, nsegs=0, len;
+-	struct scatterlist *scatter = (struct scatterlist *) cmd->request_buffer;
++	unsigned int len;
++	struct scatterlist *sg;
+ 	__u64 addr64;
++	int use_sg, i;
+ 
+-	/* is it just one virtual address? */	
+-	if (!cmd->use_sg) {
+-		if (cmd->request_bufflen) {	/* anything to xfer? */
+-
+-			addr64 = (__u64) pci_map_single(pdev, 
+-				cmd->request_buffer, 
+-				cmd->request_bufflen, 
+-				cmd->sc_data_direction); 
++	BUG_ON(scsi_sg_count(cmd) > MAXSGENTRIES);
+ 	
+-			cp->SG[0].Addr.lower = 
++	use_sg = scsi_dma_map(cmd);
++	if (use_sg) {	/* not too many addrs? */
++		scsi_for_each_sg(cmd, sg, use_sg, i) {
++			addr64 = (__u64) sg_dma_address(sg);
++			len  = sg_dma_len(sg);
++			cp->SG[i].Addr.lower =
+ 			  (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
+-			cp->SG[0].Addr.upper =
++			cp->SG[i].Addr.upper =
+ 			  (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
+-			cp->SG[0].Len = cmd->request_bufflen;
+-			nsegs=1;
++			cp->SG[i].Len = len;
++			cp->SG[i].Ext = 0;  // we are not chaining
+ 		}
+-	} /* else, must be a list of virtual addresses.... */
+-	else if (cmd->use_sg <= MAXSGENTRIES) {	/* not too many addrs? */
+-
+-		use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg,
+-			cmd->sc_data_direction);
+-
+-		for (nsegs=0; nsegs < use_sg; nsegs++) {
+-			addr64 = (__u64) sg_dma_address(&scatter[nsegs]);
+-			len  = sg_dma_len(&scatter[nsegs]);
+-			cp->SG[nsegs].Addr.lower =
+-			  (__u32) (addr64 & (__u64) 0x00000000FFFFFFFF);
+-			cp->SG[nsegs].Addr.upper =
+-			  (__u32) ((addr64 >> 32) & (__u64) 0x00000000FFFFFFFF);
+-			cp->SG[nsegs].Len = len;
+-			cp->SG[nsegs].Ext = 0;  // we are not chaining
+ 		}
+-	} else BUG();
+ 
+-	cp->Header.SGList = (__u8) nsegs;   /* no. SGs contig in this cmd */
+-	cp->Header.SGTotal = (__u16) nsegs; /* total sgs in this cmd list */
++	cp->Header.SGList = (__u8) use_sg;   /* no. SGs contig in this cmd */
++	cp->Header.SGTotal = (__u16) use_sg; /* total sgs in this cmd list */
+ 	return;
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/block/loop.c linux-2.6.22-try2/drivers/block/loop.c
+--- linux-2.6.22-570/drivers/block/loop.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/drivers/block/loop.c	2007-12-19 15:29:24.000000000 -0500
+@@ -68,6 +68,7 @@
+ #include <linux/loop.h>
+ #include <linux/compat.h>
+ #include <linux/suspend.h>
++#include <linux/freezer.h>
+ #include <linux/writeback.h>
+ #include <linux/buffer_head.h>		/* for invalidate_bdev() */
+ #include <linux/completion.h>
+@@ -577,13 +578,6 @@
+ 	struct loop_device *lo = data;
+ 	struct bio *bio;
+ 
+-	/*
+-	 * loop can be used in an encrypted device,
+-	 * hence, it mustn't be stopped at all
+-	 * because it could be indirectly used during suspension
+-	 */
+-	current->flags |= PF_NOFREEZE;
+-
+ 	set_user_nice(current, -20);
+ 
+ 	while (!kthread_should_stop() || lo->lo_bio) {
+diff -Nurb linux-2.6.22-570/drivers/block/pktcdvd.c linux-2.6.22-try2/drivers/block/pktcdvd.c
+--- linux-2.6.22-570/drivers/block/pktcdvd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/block/pktcdvd.c	2007-12-19 15:29:24.000000000 -0500
+@@ -146,8 +146,7 @@
+  **********************************************************/
+ 
+ #define DEF_ATTR(_obj,_name,_mode) \
+-	static struct attribute _obj = { \
+-		.name = _name, .owner = THIS_MODULE, .mode = _mode }
++	static struct attribute _obj = { .name = _name, .mode = _mode }
+ 
+ /**********************************************************
+   /sys/class/pktcdvd/pktcdvd[0-7]/
+@@ -1594,6 +1593,7 @@
+ 	long min_sleep_time, residue;
+ 
+ 	set_user_nice(current, -20);
++	set_freezable();
+ 
+ 	for (;;) {
+ 		DECLARE_WAITQUEUE(wait, current);
+diff -Nurb linux-2.6.22-570/drivers/char/apm-emulation.c linux-2.6.22-try2/drivers/char/apm-emulation.c
+--- linux-2.6.22-570/drivers/char/apm-emulation.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/char/apm-emulation.c	2007-12-19 15:29:24.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/sched.h>
+ #include <linux/pm.h>
+ #include <linux/apm-emulation.h>
++#include <linux/freezer.h>
+ #include <linux/device.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
+@@ -329,13 +330,8 @@
+ 			/*
+ 			 * Wait for the suspend/resume to complete.  If there
+ 			 * are pending acknowledges, we wait here for them.
+-			 *
+-			 * Note: we need to ensure that the PM subsystem does
+-			 * not kick us out of the wait when it suspends the
+-			 * threads.
+ 			 */
+ 			flags = current->flags;
+-			current->flags |= PF_NOFREEZE;
+ 
+ 			wait_event(apm_suspend_waitqueue,
+ 				   as->suspend_state == SUSPEND_DONE);
+@@ -365,13 +361,8 @@
+ 			/*
+ 			 * Wait for the suspend/resume to complete.  If there
+ 			 * are pending acknowledges, we wait here for them.
+-			 *
+-			 * Note: we need to ensure that the PM subsystem does
+-			 * not kick us out of the wait when it suspends the
+-			 * threads.
+ 			 */
+ 			flags = current->flags;
+-			current->flags |= PF_NOFREEZE;
+ 
+ 			wait_event_interruptible(apm_suspend_waitqueue,
+ 					 as->suspend_state == SUSPEND_DONE);
+@@ -598,7 +589,6 @@
+ 		kapmd_tsk = NULL;
+ 		return ret;
+ 	}
+-	kapmd_tsk->flags |= PF_NOFREEZE;
+ 	wake_up_process(kapmd_tsk);
+ 
+ #ifdef CONFIG_PROC_FS
+diff -Nurb linux-2.6.22-570/drivers/char/hvc_console.c linux-2.6.22-try2/drivers/char/hvc_console.c
+--- linux-2.6.22-570/drivers/char/hvc_console.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/char/hvc_console.c	2007-12-19 15:29:24.000000000 -0500
+@@ -674,11 +674,12 @@
+  * calling hvc_poll() who determines whether a console adapter support
+  * interrupts.
+  */
+-int khvcd(void *unused)
++static int khvcd(void *unused)
+ {
+ 	int poll_mask;
+ 	struct hvc_struct *hp;
+ 
++	set_freezable();
+ 	__set_current_state(TASK_RUNNING);
+ 	do {
+ 		poll_mask = 0;
+diff -Nurb linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c linux-2.6.22-try2/drivers/char/ipmi/ipmi_msghandler.c
+--- linux-2.6.22-570/drivers/char/ipmi/ipmi_msghandler.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/char/ipmi/ipmi_msghandler.c	2007-12-19 15:29:22.000000000 -0500
+@@ -2171,52 +2171,42 @@
+ 	int err;
+ 
+ 	bmc->device_id_attr.attr.name = "device_id";
+-	bmc->device_id_attr.attr.owner = THIS_MODULE;
+ 	bmc->device_id_attr.attr.mode = S_IRUGO;
+ 	bmc->device_id_attr.show = device_id_show;
+ 
+ 	bmc->provides_dev_sdrs_attr.attr.name = "provides_device_sdrs";
+-	bmc->provides_dev_sdrs_attr.attr.owner = THIS_MODULE;
+ 	bmc->provides_dev_sdrs_attr.attr.mode = S_IRUGO;
+ 	bmc->provides_dev_sdrs_attr.show = provides_dev_sdrs_show;
+ 
+ 	bmc->revision_attr.attr.name = "revision";
+-	bmc->revision_attr.attr.owner = THIS_MODULE;
+ 	bmc->revision_attr.attr.mode = S_IRUGO;
+ 	bmc->revision_attr.show = revision_show;
+ 
+ 	bmc->firmware_rev_attr.attr.name = "firmware_revision";
+-	bmc->firmware_rev_attr.attr.owner = THIS_MODULE;
+ 	bmc->firmware_rev_attr.attr.mode = S_IRUGO;
+ 	bmc->firmware_rev_attr.show = firmware_rev_show;
+ 
+ 	bmc->version_attr.attr.name = "ipmi_version";
+-	bmc->version_attr.attr.owner = THIS_MODULE;
+ 	bmc->version_attr.attr.mode = S_IRUGO;
+ 	bmc->version_attr.show = ipmi_version_show;
+ 
+ 	bmc->add_dev_support_attr.attr.name = "additional_device_support";
+-	bmc->add_dev_support_attr.attr.owner = THIS_MODULE;
+ 	bmc->add_dev_support_attr.attr.mode = S_IRUGO;
+ 	bmc->add_dev_support_attr.show = add_dev_support_show;
+ 
+ 	bmc->manufacturer_id_attr.attr.name = "manufacturer_id";
+-	bmc->manufacturer_id_attr.attr.owner = THIS_MODULE;
+ 	bmc->manufacturer_id_attr.attr.mode = S_IRUGO;
+ 	bmc->manufacturer_id_attr.show = manufacturer_id_show;
+ 
+ 	bmc->product_id_attr.attr.name = "product_id";
+-	bmc->product_id_attr.attr.owner = THIS_MODULE;
+ 	bmc->product_id_attr.attr.mode = S_IRUGO;
+ 	bmc->product_id_attr.show = product_id_show;
+ 
+ 	bmc->guid_attr.attr.name = "guid";
+-	bmc->guid_attr.attr.owner = THIS_MODULE;
+ 	bmc->guid_attr.attr.mode = S_IRUGO;
+ 	bmc->guid_attr.show = guid_show;
+ 
+ 	bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision";
+-	bmc->aux_firmware_rev_attr.attr.owner = THIS_MODULE;
+ 	bmc->aux_firmware_rev_attr.attr.mode = S_IRUGO;
+ 	bmc->aux_firmware_rev_attr.show = aux_firmware_rev_show;
+ 
+diff -Nurb linux-2.6.22-570/drivers/char/keyboard.c linux-2.6.22-try2/drivers/char/keyboard.c
+--- linux-2.6.22-570/drivers/char/keyboard.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/char/keyboard.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1150,6 +1150,7 @@
+ 		sysrq_down = 0;
+ 	if (sysrq_down && down && !rep) {
+ 		handle_sysrq(kbd_sysrq_xlate[keycode], tty);
++		sysrq_down = 0;		/* In case we miss the 'up' event. */
+ 		return;
+ 	}
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c linux-2.6.22-try2/drivers/cpufreq/cpufreq_stats.c
+--- linux-2.6.22-570/drivers/cpufreq/cpufreq_stats.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/cpufreq/cpufreq_stats.c	2007-12-19 15:29:22.000000000 -0500
+@@ -25,8 +25,7 @@
+ 
+ #define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \
+ static struct freq_attr _attr_##_name = {\
+-	.attr = {.name = __stringify(_name), .owner = THIS_MODULE, \
+-		.mode = _mode, }, \
++	.attr = {.name = __stringify(_name), .mode = _mode, }, \
+ 	.show = _show,\
+ };
+ 
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c linux-2.6.22-try2/drivers/cpufreq/cpufreq_userspace.c
+--- linux-2.6.22-570/drivers/cpufreq/cpufreq_userspace.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/cpufreq/cpufreq_userspace.c	2007-12-19 15:29:22.000000000 -0500
+@@ -120,7 +120,7 @@
+ 
+ static struct freq_attr freq_attr_scaling_setspeed =
+ {
+-	.attr = { .name = "scaling_setspeed", .mode = 0644, .owner = THIS_MODULE },
++	.attr = { .name = "scaling_setspeed", .mode = 0644 },
+ 	.show = show_speed,
+ 	.store = store_speed,
+ };
+diff -Nurb linux-2.6.22-570/drivers/cpufreq/freq_table.c linux-2.6.22-try2/drivers/cpufreq/freq_table.c
+--- linux-2.6.22-570/drivers/cpufreq/freq_table.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/cpufreq/freq_table.c	2007-12-19 15:29:22.000000000 -0500
+@@ -199,7 +199,6 @@
+ struct freq_attr cpufreq_freq_attr_scaling_available_freqs = {
+ 	.attr = { .name = "scaling_available_frequencies",
+ 		  .mode = 0444,
+-		  .owner=THIS_MODULE
+ 		},
+ 	.show = show_available_freqs,
+ };
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/Kconfig linux-2.6.22-try2/drivers/cpuidle/Kconfig
+--- linux-2.6.22-570/drivers/cpuidle/Kconfig	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/Kconfig	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,39 @@
++menu "CPU idle PM support"
++
++config CPU_IDLE
++	bool "CPU idle PM support"
++	help
++	  CPU idle is a generic framework for supporting software-controlled
++	  idle processor power management.  It includes modular cross-platform
++	  governors that can be swapped during runtime.
++
++	  If you're using a mobile platform that supports CPU idle PM (e.g.
++	  an ACPI-capable notebook), you should say Y here.
++
++if CPU_IDLE
++
++comment "Governors"
++
++config CPU_IDLE_GOV_LADDER
++	tristate "'ladder' governor"
++	depends on CPU_IDLE
++	default y
++	help
++	  This cpuidle governor promotes and demotes through the supported idle
++	  states using residency time and bus master activity as metrics.  This
++	  algorithm was originally introduced in the old ACPI processor driver.
++
++config CPU_IDLE_GOV_MENU
++	tristate "'menu' governor"
++	depends on CPU_IDLE && NO_HZ
++	default y
++	help
++	  This cpuidle governor evaluates all available states and chooses the
++	  deepest state that meets all of the following constraints: BM activity,
++	  expected time until next timer interrupt, and last break event time
++	  delta.  It is designed to minimize power consumption.  Currently
++	  dynticks is required.
++
++endif	# CPU_IDLE
++
++endmenu
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/Makefile linux-2.6.22-try2/drivers/cpuidle/Makefile
+--- linux-2.6.22-570/drivers/cpuidle/Makefile	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,5 @@
++#
++# Makefile for cpuidle.
++#
++
++obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.c linux-2.6.22-try2/drivers/cpuidle/cpuidle.c
+--- linux-2.6.22-570/drivers/cpuidle/cpuidle.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/cpuidle.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,307 @@
++/*
++ * cpuidle.c - core cpuidle infrastructure
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *               Shaohua Li <shaohua.li@intel.com>
++ *               Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/notifier.h>
++#include <linux/cpu.h>
++#include <linux/latency.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
++EXPORT_PER_CPU_SYMBOL_GPL(cpuidle_devices);
++
++DEFINE_MUTEX(cpuidle_lock);
++LIST_HEAD(cpuidle_detected_devices);
++static void (*pm_idle_old)(void);
++
++
++/**
++ * cpuidle_idle_call - the main idle loop
++ *
++ * NOTE: no locks or semaphores should be used here
++ */
++static void cpuidle_idle_call(void)
++{
++	struct cpuidle_device *dev = __get_cpu_var(cpuidle_devices);
++	struct cpuidle_state *target_state;
++	int next_state;
++
++	/* check if the device is ready */
++	if (!dev || dev->status != CPUIDLE_STATUS_DOIDLE) {
++		if (pm_idle_old)
++			pm_idle_old();
++		else
++			local_irq_enable();
++		return;
++	}
++
++	/* ask the governor for the next state */
++	next_state = cpuidle_curr_governor->select(dev);
++	if (need_resched())
++		return;
++	target_state = &dev->states[next_state];
++
++	/* enter the state and update stats */
++	dev->last_residency = target_state->enter(dev, target_state);
++	dev->last_state = target_state;
++	target_state->time += dev->last_residency;
++	target_state->usage++;
++
++	/* give the governor an opportunity to reflect on the outcome */
++	if (cpuidle_curr_governor->reflect)
++		cpuidle_curr_governor->reflect(dev);
++}
++
++/**
++ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
++ */
++void cpuidle_install_idle_handler(void)
++{
++	if (pm_idle != cpuidle_idle_call) {
++		/* Make sure all changes finished before we switch to new idle */
++		smp_wmb();
++		pm_idle = cpuidle_idle_call;
++	}
++}
++
++/**
++ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
++ */
++void cpuidle_uninstall_idle_handler(void)
++{
++	if (pm_idle != pm_idle_old) {
++		pm_idle = pm_idle_old;
++		cpu_idle_wait();
++	}
++}
++
++/**
++ * cpuidle_rescan_device - prepares for a new state configuration
++ * @dev: the target device
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_rescan_device(struct cpuidle_device *dev)
++{
++	int i;
++
++	if (cpuidle_curr_governor->scan)
++		cpuidle_curr_governor->scan(dev);
++
++	for (i = 0; i < dev->state_count; i++) {
++		dev->states[i].usage = 0;
++		dev->states[i].time = 0;
++	}
++}
++
++/**
++ * cpuidle_add_device - attaches the driver to a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ */
++static int cpuidle_add_device(struct sys_device *sys_dev)
++{
++	int cpu = sys_dev->id;
++	struct cpuidle_device *dev;
++
++	dev = per_cpu(cpuidle_devices, cpu);
++
++	mutex_lock(&cpuidle_lock);
++	if (cpu_is_offline(cpu)) {
++		mutex_unlock(&cpuidle_lock);
++		return 0;
++	}
++
++	if (!dev) {
++		dev = kzalloc(sizeof(struct cpuidle_device), GFP_KERNEL);
++		if (!dev) {
++			mutex_unlock(&cpuidle_lock);
++			return -ENOMEM;
++		}
++		init_completion(&dev->kobj_unregister);
++		per_cpu(cpuidle_devices, cpu) = dev;
++	}
++	dev->cpu = cpu;
++
++	if (dev->status & CPUIDLE_STATUS_DETECTED) {
++		mutex_unlock(&cpuidle_lock);
++		return 0;
++	}
++
++	cpuidle_add_sysfs(sys_dev);
++
++	if (cpuidle_curr_driver) {
++		if (cpuidle_attach_driver(dev))
++			goto err_ret;
++	}
++
++	if (cpuidle_curr_governor) {
++		if (cpuidle_attach_governor(dev)) {
++			cpuidle_detach_driver(dev);
++			goto err_ret;
++		}
++	}
++
++	if (cpuidle_device_can_idle(dev))
++		cpuidle_install_idle_handler();
++
++	list_add(&dev->device_list, &cpuidle_detected_devices);
++	dev->status |= CPUIDLE_STATUS_DETECTED;
++
++err_ret:
++	mutex_unlock(&cpuidle_lock);
++
++	return 0;
++}
++
++/**
++ * __cpuidle_remove_device - detaches the driver from a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static int __cpuidle_remove_device(struct sys_device *sys_dev)
++{
++	struct cpuidle_device *dev;
++
++	dev = per_cpu(cpuidle_devices, sys_dev->id);
++
++	if (!(dev->status & CPUIDLE_STATUS_DETECTED)) {
++		return 0;
++	}
++	dev->status &= ~CPUIDLE_STATUS_DETECTED;
++	/* NOTE: we don't wait because the cpu is already offline */
++	if (cpuidle_curr_governor)
++		cpuidle_detach_governor(dev);
++	if (cpuidle_curr_driver)
++		cpuidle_detach_driver(dev);
++	cpuidle_remove_sysfs(sys_dev);
++	list_del(&dev->device_list);
++	wait_for_completion(&dev->kobj_unregister);
++	per_cpu(cpuidle_devices, sys_dev->id) = NULL;
++	kfree(dev);
++
++	return 0;
++}
++
++/**
++ * cpuidle_remove_device - detaches the driver from a CPU instance
++ * @sys_dev: the system device (driver model CPU representation)
++ */
++static int cpuidle_remove_device(struct sys_device *sys_dev)
++{
++	int ret;
++	mutex_lock(&cpuidle_lock);
++	ret = __cpuidle_remove_device(sys_dev);
++	mutex_unlock(&cpuidle_lock);
++
++	return ret;
++}
++
++static struct sysdev_driver cpuidle_sysdev_driver = {
++	.add		= cpuidle_add_device,
++	.remove		= cpuidle_remove_device,
++};
++
++static int cpuidle_cpu_callback(struct notifier_block *nfb,
++					unsigned long action, void *hcpu)
++{
++	struct sys_device *sys_dev;
++
++	sys_dev = get_cpu_sysdev((unsigned long)hcpu);
++
++	switch (action) {
++	case CPU_ONLINE:
++		cpuidle_add_device(sys_dev);
++		break;
++	case CPU_DOWN_PREPARE:
++		mutex_lock(&cpuidle_lock);
++		break;
++	case CPU_DEAD:
++		__cpuidle_remove_device(sys_dev);
++		mutex_unlock(&cpuidle_lock);
++		break;
++	case CPU_DOWN_FAILED:
++		mutex_unlock(&cpuidle_lock);
++		break;
++	}
++
++	return NOTIFY_OK;
++}
++
++static struct notifier_block __cpuinitdata cpuidle_cpu_notifier =
++{
++    .notifier_call = cpuidle_cpu_callback,
++};
++
++#ifdef CONFIG_SMP
++
++static void smp_callback(void *v)
++{
++	/* we already woke the CPU up, nothing more to do */
++}
++
++/*
++ * This function gets called when a part of the kernel has a new latency
++ * requirement.  This means we need to get all processors out of their C-state,
++ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
++ * wakes them all right up.
++ */
++static int cpuidle_latency_notify(struct notifier_block *b,
++		unsigned long l, void *v)
++{
++	smp_call_function(smp_callback, NULL, 0, 1);
++	return NOTIFY_OK;
++}
++
++static struct notifier_block cpuidle_latency_notifier = {
++	.notifier_call = cpuidle_latency_notify,
++};
++
++#define latency_notifier_init(x) do { register_latency_notifier(x); } while (0)
++
++#else /* CONFIG_SMP */
++
++#define latency_notifier_init(x) do { } while (0)
++
++#endif /* CONFIG_SMP */
++
++/**
++ * cpuidle_init - core initializer
++ */
++static int __init cpuidle_init(void)
++{
++	int ret;
++
++	pm_idle_old = pm_idle;
++
++	ret = cpuidle_add_class_sysfs(&cpu_sysdev_class);
++	if (ret)
++		return ret;
++
++	register_hotcpu_notifier(&cpuidle_cpu_notifier);
++
++	ret = sysdev_driver_register(&cpu_sysdev_class, &cpuidle_sysdev_driver);
++
++	if (ret) {
++		cpuidle_remove_class_sysfs(&cpu_sysdev_class);
++		printk(KERN_ERR "cpuidle: failed to initialize\n");
++		return ret;
++	}
++
++	latency_notifier_init(&cpuidle_latency_notifier);
++
++	return 0;
++}
++
++core_initcall(cpuidle_init);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/cpuidle.h linux-2.6.22-try2/drivers/cpuidle/cpuidle.h
+--- linux-2.6.22-570/drivers/cpuidle/cpuidle.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/cpuidle.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,50 @@
++/*
++ * cpuidle.h - The internal header file
++ */
++
++#ifndef __DRIVER_CPUIDLE_H
++#define __DRIVER_CPUIDLE_H
++
++#include <linux/sysdev.h>
++
++/* For internal use only */
++extern struct cpuidle_governor *cpuidle_curr_governor;
++extern struct cpuidle_driver *cpuidle_curr_driver;
++extern struct list_head cpuidle_drivers;
++extern struct list_head cpuidle_governors;
++extern struct list_head cpuidle_detected_devices;
++extern struct mutex cpuidle_lock;
++
++/* idle loop */
++extern void cpuidle_install_idle_handler(void);
++extern void cpuidle_uninstall_idle_handler(void);
++extern void cpuidle_rescan_device(struct cpuidle_device *dev);
++
++/* drivers */
++extern int cpuidle_attach_driver(struct cpuidle_device *dev);
++extern void cpuidle_detach_driver(struct cpuidle_device *dev);
++extern int cpuidle_switch_driver(struct cpuidle_driver *drv);
++
++/* governors */
++extern int cpuidle_attach_governor(struct cpuidle_device *dev);
++extern void cpuidle_detach_governor(struct cpuidle_device *dev);
++extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
++
++/* sysfs */
++extern int cpuidle_add_class_sysfs(struct sysdev_class *cls);
++extern void cpuidle_remove_class_sysfs(struct sysdev_class *cls);
++extern int cpuidle_add_driver_sysfs(struct cpuidle_device *device);
++extern void cpuidle_remove_driver_sysfs(struct cpuidle_device *device);
++extern int cpuidle_add_sysfs(struct sys_device *sysdev);
++extern void cpuidle_remove_sysfs(struct sys_device *sysdev);
++
++/**
++ * cpuidle_device_can_idle - determines if a CPU can utilize the idle loop
++ * @dev: the target CPU
++ */
++static inline int cpuidle_device_can_idle(struct cpuidle_device *dev)
++{
++	return (dev->status == CPUIDLE_STATUS_DOIDLE);
++}
++
++#endif /* __DRIVER_CPUIDLE_H */
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/driver.c linux-2.6.22-try2/drivers/cpuidle/driver.c
+--- linux-2.6.22-570/drivers/cpuidle/driver.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/driver.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,276 @@
++/*
++ * driver.c - driver support
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *               Shaohua Li <shaohua.li@intel.com>
++ *               Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++LIST_HEAD(cpuidle_drivers);
++struct cpuidle_driver *cpuidle_curr_driver;
++
++
++/**
++ * cpuidle_attach_driver - attaches a driver to a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_attach_driver(struct cpuidle_device *dev)
++{
++	int ret;
++
++	if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
++		return -EIO;
++
++	if (!try_module_get(cpuidle_curr_driver->owner))
++		return -EINVAL;
++
++	ret = cpuidle_curr_driver->init(dev);
++	if (ret) {
++		module_put(cpuidle_curr_driver->owner);
++		printk(KERN_INFO "cpuidle: driver %s failed to attach to "
++			"cpu %d\n", cpuidle_curr_driver->name, dev->cpu);
++	} else {
++		if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++			cpuidle_rescan_device(dev);
++		smp_wmb();
++		dev->status |= CPUIDLE_STATUS_DRIVER_ATTACHED;
++		cpuidle_add_driver_sysfs(dev);
++	}
++
++	return ret;
++}
++
++/**
++ * cpuidle_detach_govenor - detaches a driver from a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_detach_driver(struct cpuidle_device *dev)
++{
++	if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) {
++		cpuidle_remove_driver_sysfs(dev);
++		dev->status &= ~CPUIDLE_STATUS_DRIVER_ATTACHED;
++		if (cpuidle_curr_driver->exit)
++			cpuidle_curr_driver->exit(dev);
++		module_put(cpuidle_curr_driver->owner);
++	}
++}
++
++/**
++ * __cpuidle_find_driver - finds a driver of the specified name
++ * @str: the name
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static struct cpuidle_driver * __cpuidle_find_driver(const char *str)
++{
++	struct cpuidle_driver *drv;
++
++	list_for_each_entry(drv, &cpuidle_drivers, driver_list)
++		if (!strnicmp(str, drv->name, CPUIDLE_NAME_LEN))
++			return drv;
++
++	return NULL;
++}
++
++/**
++ * cpuidle_switch_driver - changes the driver
++ * @drv: the new target driver
++ *
++ * NOTE: "drv" can be NULL to specify disabled
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_switch_driver(struct cpuidle_driver *drv)
++{
++	struct cpuidle_device *dev;
++
++	if (drv == cpuidle_curr_driver)
++		return -EINVAL;
++
++	cpuidle_uninstall_idle_handler();
++
++	if (cpuidle_curr_driver)
++		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++			cpuidle_detach_driver(dev);
++
++	cpuidle_curr_driver = drv;
++
++	if (drv) {
++		int ret = 1;
++		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++			if (cpuidle_attach_driver(dev) == 0)
++				ret = 0;
++
++		/* If attach on all devices fail, switch to NULL driver */
++		if (ret)
++			cpuidle_curr_driver = NULL;
++
++		if (cpuidle_curr_driver && cpuidle_curr_governor) {
++			printk(KERN_INFO "cpuidle: using driver %s\n",
++					drv->name);
++			cpuidle_install_idle_handler();
++		}
++	}
++
++	return 0;
++}
++
++/**
++ * cpuidle_register_driver - registers a driver
++ * @drv: the driver
++ */
++int cpuidle_register_driver(struct cpuidle_driver *drv)
++{
++	int ret = -EEXIST;
++
++	if (!drv || !drv->init)
++		return -EINVAL;
++
++	mutex_lock(&cpuidle_lock);
++	if (__cpuidle_find_driver(drv->name) == NULL) {
++		ret = 0;
++		list_add_tail(&drv->driver_list, &cpuidle_drivers);
++		if (!cpuidle_curr_driver)
++			cpuidle_switch_driver(drv);
++	}
++	mutex_unlock(&cpuidle_lock);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_register_driver);
++
++/**
++ * cpuidle_unregister_driver - unregisters a driver
++ * @drv: the driver
++ */
++void cpuidle_unregister_driver(struct cpuidle_driver *drv)
++{
++	if (!drv)
++		return;
++
++	mutex_lock(&cpuidle_lock);
++	if (drv == cpuidle_curr_driver)
++		cpuidle_switch_driver(NULL);
++	list_del(&drv->driver_list);
++	mutex_unlock(&cpuidle_lock);
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
++
++static void __cpuidle_force_redetect(struct cpuidle_device *dev)
++{
++	cpuidle_remove_driver_sysfs(dev);
++	cpuidle_curr_driver->redetect(dev);
++	cpuidle_add_driver_sysfs(dev);
++}
++
++/**
++ * cpuidle_force_redetect - redetects the idle states of a CPU
++ *
++ * @dev: the CPU to redetect
++ * @drv: the target driver
++ *
++ * Generally, the driver will call this when the supported states set has
++ * changed. (e.g. as the result of an ACPI transition to battery power)
++ */
++int cpuidle_force_redetect(struct cpuidle_device *dev,
++		struct cpuidle_driver *drv)
++{
++	int uninstalled = 0;
++
++	mutex_lock(&cpuidle_lock);
++
++	if (drv != cpuidle_curr_driver) {
++		mutex_unlock(&cpuidle_lock);
++		return 0;
++	}
++
++	if (!(dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED) ||
++	    !cpuidle_curr_driver->redetect) {
++		mutex_unlock(&cpuidle_lock);
++		return -EIO;
++	}
++
++	if (cpuidle_device_can_idle(dev)) {
++		uninstalled = 1;
++		cpuidle_uninstall_idle_handler();
++	}
++
++	__cpuidle_force_redetect(dev);
++
++	if (cpuidle_device_can_idle(dev)) {
++		cpuidle_rescan_device(dev);
++		cpuidle_install_idle_handler();
++	}
++
++	/* other devices are still ok */
++	if (uninstalled)
++		cpuidle_install_idle_handler();
++
++	mutex_unlock(&cpuidle_lock);
++
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_force_redetect);
++
++/**
++ * cpuidle_force_redetect_devices - redetects the idle states of all CPUs
++ *
++ * @drv: the target driver
++ *
++ * Generally, the driver will call this when the supported states set has
++ * changed. (e.g. as the result of an ACPI transition to battery power)
++ */
++int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
++{
++	struct cpuidle_device *dev;
++	int ret = 0;
++
++	mutex_lock(&cpuidle_lock);
++
++	if (drv != cpuidle_curr_driver)
++		goto out;
++
++	if (!cpuidle_curr_driver->redetect) {
++		ret = -EIO;
++		goto out;
++	}
++
++	cpuidle_uninstall_idle_handler();
++
++	list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++		__cpuidle_force_redetect(dev);
++
++	cpuidle_install_idle_handler();
++out:
++	mutex_unlock(&cpuidle_lock);
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_force_redetect_devices);
++
++/**
++ * cpuidle_get_bm_activity - determines if BM activity has occured
++ */
++int cpuidle_get_bm_activity(void)
++{
++	if (cpuidle_curr_driver->bm_check)
++		return cpuidle_curr_driver->bm_check();
++	else
++		return 0;
++}
++EXPORT_SYMBOL_GPL(cpuidle_get_bm_activity);
++
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governor.c linux-2.6.22-try2/drivers/cpuidle/governor.c
+--- linux-2.6.22-570/drivers/cpuidle/governor.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/governor.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,160 @@
++/*
++ * governor.c - governor support
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *               Shaohua Li <shaohua.li@intel.com>
++ *               Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/cpuidle.h>
++
++#include "cpuidle.h"
++
++LIST_HEAD(cpuidle_governors);
++struct cpuidle_governor *cpuidle_curr_governor;
++
++
++/**
++ * cpuidle_attach_governor - attaches a governor to a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_attach_governor(struct cpuidle_device *dev)
++{
++	int ret = 0;
++
++	if(dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++		return -EIO;
++
++	if (!try_module_get(cpuidle_curr_governor->owner))
++		return -EINVAL;
++
++	if (cpuidle_curr_governor->init)
++		ret = cpuidle_curr_governor->init(dev);
++	if (ret) {
++		module_put(cpuidle_curr_governor->owner);
++		printk(KERN_ERR "cpuidle: governor %s failed to attach to cpu %d\n",
++			cpuidle_curr_governor->name, dev->cpu);
++	} else {
++		if (dev->status & CPUIDLE_STATUS_DRIVER_ATTACHED)
++			cpuidle_rescan_device(dev);
++		smp_wmb();
++		dev->status |= CPUIDLE_STATUS_GOVERNOR_ATTACHED;
++	}
++
++	return ret;
++}
++
++/**
++ * cpuidle_detach_govenor - detaches a governor from a CPU
++ * @dev: the target CPU
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++void cpuidle_detach_governor(struct cpuidle_device *dev)
++{
++	if (dev->status & CPUIDLE_STATUS_GOVERNOR_ATTACHED) {
++		dev->status &= ~CPUIDLE_STATUS_GOVERNOR_ATTACHED;
++		if (cpuidle_curr_governor->exit)
++			cpuidle_curr_governor->exit(dev);
++		module_put(cpuidle_curr_governor->owner);
++	}
++}
++
++/**
++ * __cpuidle_find_governor - finds a governor of the specified name
++ * @str: the name
++ *
++ * Must be called with cpuidle_lock aquired.
++ */
++static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
++{
++	struct cpuidle_governor *gov;
++
++	list_for_each_entry(gov, &cpuidle_governors, governor_list)
++		if (!strnicmp(str, gov->name, CPUIDLE_NAME_LEN))
++			return gov;
++
++	return NULL;
++}
++
++/**
++ * cpuidle_switch_governor - changes the governor
++ * @gov: the new target governor
++ *
++ * NOTE: "gov" can be NULL to specify disabled
++ * Must be called with cpuidle_lock aquired.
++ */
++int cpuidle_switch_governor(struct cpuidle_governor *gov)
++{
++	struct cpuidle_device *dev;
++
++	if (gov == cpuidle_curr_governor)
++		return -EINVAL;
++
++	cpuidle_uninstall_idle_handler();
++
++	if (cpuidle_curr_governor)
++		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++			cpuidle_detach_governor(dev);
++
++	cpuidle_curr_governor = gov;
++
++	if (gov) {
++		list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
++			cpuidle_attach_governor(dev);
++		if (cpuidle_curr_driver)
++			cpuidle_install_idle_handler();
++		printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
++	}
++
++	return 0;
++}
++
++/**
++ * cpuidle_register_governor - registers a governor
++ * @gov: the governor
++ */
++int cpuidle_register_governor(struct cpuidle_governor *gov)
++{
++	int ret = -EEXIST;
++
++	if (!gov || !gov->select)
++		return -EINVAL;
++
++	mutex_lock(&cpuidle_lock);
++	if (__cpuidle_find_governor(gov->name) == NULL) {
++		ret = 0;
++		list_add_tail(&gov->governor_list, &cpuidle_governors);
++		if (!cpuidle_curr_governor)
++			cpuidle_switch_governor(gov);
++	}
++	mutex_unlock(&cpuidle_lock);
++
++	return ret;
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_register_governor);
++
++/**
++ * cpuidle_unregister_governor - unregisters a governor
++ * @gov: the governor
++ */
++void cpuidle_unregister_governor(struct cpuidle_governor *gov)
++{
++	if (!gov)
++		return;
++
++	mutex_lock(&cpuidle_lock);
++	if (gov == cpuidle_curr_governor)
++		cpuidle_switch_governor(NULL);
++	list_del(&gov->governor_list);
++	mutex_unlock(&cpuidle_lock);
++}
++
++EXPORT_SYMBOL_GPL(cpuidle_unregister_governor);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/Makefile linux-2.6.22-try2/drivers/cpuidle/governors/Makefile
+--- linux-2.6.22-570/drivers/cpuidle/governors/Makefile	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/governors/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,6 @@
++#
++# Makefile for cpuidle governors.
++#
++
++obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
++obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/ladder.c linux-2.6.22-try2/drivers/cpuidle/governors/ladder.c
+--- linux-2.6.22-570/drivers/cpuidle/governors/ladder.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/governors/ladder.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,227 @@
++/*
++ * ladder.c - the residency ladder algorithm
++ *
++ *  Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
++ *  Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
++ *  Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
++ *
++ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *               Shaohua Li <shaohua.li@intel.com>
++ *               Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/latency.h>
++#include <linux/moduleparam.h>
++#include <linux/jiffies.h>
++
++#include <asm/io.h>
++#include <asm/uaccess.h>
++
++#define PROMOTION_COUNT 4
++#define DEMOTION_COUNT 1
++
++/*
++ * bm_history -- bit-mask with a bit per jiffy of bus-master activity
++ * 1000 HZ: 0xFFFFFFFF: 32 jiffies = 32ms
++ * 800 HZ: 0xFFFFFFFF: 32 jiffies = 40ms
++ * 100 HZ: 0x0000000F: 4 jiffies = 40ms
++ * reduce history for more aggressive entry into C3
++ */
++static unsigned int bm_history __read_mostly =
++    (HZ >= 800 ? 0xFFFFFFFF : ((1U << (HZ / 25)) - 1));
++module_param(bm_history, uint, 0644);
++
++struct ladder_device_state {
++	struct {
++		u32 promotion_count;
++		u32 demotion_count;
++		u32 promotion_time;
++		u32 demotion_time;
++		u32 bm;
++	} threshold;
++	struct {
++		int promotion_count;
++		int demotion_count;
++	} stats;
++};
++
++struct ladder_device {
++	struct ladder_device_state states[CPUIDLE_STATE_MAX];
++	unsigned int bm_check:1;
++	unsigned long bm_check_timestamp;
++	unsigned long bm_activity; /* FIXME: bm activity should be global */
++	int last_state_idx;
++};
++
++/**
++ * ladder_do_selection - prepares private data for a state change
++ * @ldev: the ladder device
++ * @old_idx: the current state index
++ * @new_idx: the new target state index
++ */
++static inline void ladder_do_selection(struct ladder_device *ldev,
++				       int old_idx, int new_idx)
++{
++	ldev->states[old_idx].stats.promotion_count = 0;
++	ldev->states[old_idx].stats.demotion_count = 0;
++	ldev->last_state_idx = new_idx;
++}
++
++/**
++ * ladder_select_state - selects the next state to enter
++ * @dev: the CPU
++ */
++static int ladder_select_state(struct cpuidle_device *dev)
++{
++	struct ladder_device *ldev = dev->governor_data;
++	struct ladder_device_state *last_state;
++	int last_residency, last_idx = ldev->last_state_idx;
++
++	if (unlikely(!ldev))
++		return 0;
++
++	last_state = &ldev->states[last_idx];
++
++	/* demote if within BM threshold */
++	if (ldev->bm_check) {
++		unsigned long diff;
++
++		diff = jiffies - ldev->bm_check_timestamp;
++		if (diff > 31)
++			diff = 31;
++
++		ldev->bm_activity <<= diff;
++		if (cpuidle_get_bm_activity())
++			ldev->bm_activity |= ((1 << diff) - 1);
++
++		ldev->bm_check_timestamp = jiffies;
++		if ((last_idx > 0) &&
++		    (last_state->threshold.bm & ldev->bm_activity)) {
++			ladder_do_selection(ldev, last_idx, last_idx - 1);
++			return last_idx - 1;
++		}
++	}
++
++	if (dev->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID)
++		last_residency = cpuidle_get_last_residency(dev) - dev->states[last_idx].exit_latency;
++	else
++		last_residency = last_state->threshold.promotion_time + 1;
++
++	/* consider promotion */
++	if (last_idx < dev->state_count - 1 &&
++	    last_residency > last_state->threshold.promotion_time &&
++	    dev->states[last_idx + 1].exit_latency <= system_latency_constraint()) {
++		last_state->stats.promotion_count++;
++		last_state->stats.demotion_count = 0;
++		if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
++			ladder_do_selection(ldev, last_idx, last_idx + 1);
++			return last_idx + 1;
++		}
++	}
++
++	/* consider demotion */
++	if (last_idx > 0 &&
++	    last_residency < last_state->threshold.demotion_time) {
++		last_state->stats.demotion_count++;
++		last_state->stats.promotion_count = 0;
++		if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
++			ladder_do_selection(ldev, last_idx, last_idx - 1);
++			return last_idx - 1;
++		}
++	}
++
++	/* otherwise remain at the current state */
++	return last_idx;
++}
++
++/**
++ * ladder_scan_device - scans a CPU's states and does setup
++ * @dev: the CPU
++ */
++static void ladder_scan_device(struct cpuidle_device *dev)
++{
++	int i, bm_check = 0;
++	struct ladder_device *ldev = dev->governor_data;
++	struct ladder_device_state *lstate;
++	struct cpuidle_state *state;
++
++	ldev->last_state_idx = 0;
++	ldev->bm_check_timestamp = 0;
++	ldev->bm_activity = 0;
++
++	for (i = 0; i < dev->state_count; i++) {
++		state = &dev->states[i];
++		lstate = &ldev->states[i];
++
++		lstate->stats.promotion_count = 0;
++		lstate->stats.demotion_count = 0;
++
++		lstate->threshold.promotion_count = PROMOTION_COUNT;
++		lstate->threshold.demotion_count = DEMOTION_COUNT;
++
++		if (i < dev->state_count - 1)
++			lstate->threshold.promotion_time = state->exit_latency;
++		if (i > 0)
++			lstate->threshold.demotion_time = state->exit_latency;
++		if (state->flags & CPUIDLE_FLAG_CHECK_BM) {
++			lstate->threshold.bm = bm_history;
++			bm_check = 1;
++		} else
++			lstate->threshold.bm = 0;
++	}
++
++	ldev->bm_check = bm_check;
++}
++
++/**
++ * ladder_init_device - initializes a CPU-instance
++ * @dev: the CPU
++ */
++static int ladder_init_device(struct cpuidle_device *dev)
++{
++	dev->governor_data = kmalloc(sizeof(struct ladder_device), GFP_KERNEL);
++
++	return !dev->governor_data;
++}
++
++/**
++ * ladder_exit_device - exits a CPU-instance
++ * @dev: the CPU
++ */
++static void ladder_exit_device(struct cpuidle_device *dev)
++{
++	kfree(dev->governor_data);
++}
++
++static struct cpuidle_governor ladder_governor = {
++	.name =		"ladder",
++	.init =		ladder_init_device,
++	.exit =		ladder_exit_device,
++	.scan =		ladder_scan_device,
++	.select =	ladder_select_state,
++	.owner =	THIS_MODULE,
++};
++
++/**
++ * init_ladder - initializes the governor
++ */
++static int __init init_ladder(void)
++{
++	return cpuidle_register_governor(&ladder_governor);
++}
++
++/**
++ * exit_ladder - exits the governor
++ */
++static void __exit exit_ladder(void)
++{
++	cpuidle_unregister_governor(&ladder_governor);
++}
++
++MODULE_LICENSE("GPL");
++module_init(init_ladder);
++module_exit(exit_ladder);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/governors/menu.c linux-2.6.22-try2/drivers/cpuidle/governors/menu.c
+--- linux-2.6.22-570/drivers/cpuidle/governors/menu.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/governors/menu.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,152 @@
++/*
++ * menu.c - the menu idle governor
++ *
++ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/latency.h>
++#include <linux/time.h>
++#include <linux/ktime.h>
++#include <linux/tick.h>
++#include <linux/hrtimer.h>
++
++#define BM_HOLDOFF	20000	/* 20 ms */
++
++struct menu_device {
++	int		last_state_idx;
++	int		deepest_bm_state;
++
++	int		break_last_us;
++	int		break_elapsed_us;
++
++	int		bm_elapsed_us;
++	int		bm_holdoff_us;
++
++	unsigned long	idle_jiffies;
++};
++
++static DEFINE_PER_CPU(struct menu_device, menu_devices);
++
++/**
++ * menu_select - selects the next idle state to enter
++ * @dev: the CPU
++ */
++static int menu_select(struct cpuidle_device *dev)
++{
++	struct menu_device *data = &__get_cpu_var(menu_devices);
++	int i, expected_us, max_state = dev->state_count;
++
++	/* discard BM history because it is sticky */
++	cpuidle_get_bm_activity();
++
++	/* determine the expected residency time */
++	expected_us = (s32) ktime_to_ns(tick_nohz_get_sleep_length()) / 1000;
++	expected_us = min(expected_us, data->break_last_us);
++
++	/* determine the maximum state compatible with current BM status */
++	if (cpuidle_get_bm_activity())
++		data->bm_elapsed_us = 0;
++	if (data->bm_elapsed_us <= data->bm_holdoff_us)
++		max_state = data->deepest_bm_state + 1;
++
++	/* find the deepest idle state that satisfies our constraints */
++	for (i = 1; i < max_state; i++) {
++		struct cpuidle_state *s = &dev->states[i];
++		if (s->target_residency > expected_us)
++			break;
++		if (s->exit_latency > system_latency_constraint())
++			break;
++	}
++
++	data->last_state_idx = i - 1;
++	data->idle_jiffies = tick_nohz_get_idle_jiffies();
++	return i - 1;
++}
++
++/**
++ * menu_reflect - attempts to guess what happened after entry
++ * @dev: the CPU
++ *
++ * NOTE: it's important to be fast here because this operation will add to
++ *       the overall exit latency.
++ */
++static void menu_reflect(struct cpuidle_device *dev)
++{
++	struct menu_device *data = &__get_cpu_var(menu_devices);
++	int last_idx = data->last_state_idx;
++	int measured_us = cpuidle_get_last_residency(dev);
++	struct cpuidle_state *target = &dev->states[last_idx];
++
++	/*
++	 * Ugh, this idle state doesn't support residency measurements, so we
++	 * are basically lost in the dark.  As a compromise, assume we slept
++	 * for one full standard timer tick.  However, be aware that this
++	 * could potentially result in a suboptimal state transition.
++	 */
++	if (!(target->flags & CPUIDLE_FLAG_TIME_VALID))
++		measured_us = USEC_PER_SEC / HZ;
++
++	data->bm_elapsed_us += measured_us;
++	data->break_elapsed_us += measured_us;
++
++	/*
++	 * Did something other than the timer interrupt cause the break event?
++	 */
++	if (tick_nohz_get_idle_jiffies() == data->idle_jiffies) {
++		data->break_last_us = data->break_elapsed_us;
++		data->break_elapsed_us = 0;
++	}
++}
++
++/**
++ * menu_scan_device - scans a CPU's states and does setup
++ * @dev: the CPU
++ */
++static void menu_scan_device(struct cpuidle_device *dev)
++{
++	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
++	int i;
++
++	data->last_state_idx = 0;
++	data->break_last_us = 0;
++	data->break_elapsed_us = 0;
++	data->bm_elapsed_us = 0;
++	data->bm_holdoff_us = BM_HOLDOFF;
++
++	for (i = 1; i < dev->state_count; i++)
++		if (dev->states[i].flags & CPUIDLE_FLAG_CHECK_BM)
++			break;
++	data->deepest_bm_state = i - 1;
++}
++
++struct cpuidle_governor menu_governor = {
++	.name =		"menu",
++	.scan =		menu_scan_device,
++	.select =	menu_select,
++	.reflect =	menu_reflect,
++	.owner =	THIS_MODULE,
++};
++
++/**
++ * init_menu - initializes the governor
++ */
++static int __init init_menu(void)
++{
++	return cpuidle_register_governor(&menu_governor);
++}
++
++/**
++ * exit_menu - exits the governor
++ */
++static void __exit exit_menu(void)
++{
++	cpuidle_unregister_governor(&menu_governor);
++}
++
++MODULE_LICENSE("GPL");
++module_init(init_menu);
++module_exit(exit_menu);
+diff -Nurb linux-2.6.22-570/drivers/cpuidle/sysfs.c linux-2.6.22-try2/drivers/cpuidle/sysfs.c
+--- linux-2.6.22-570/drivers/cpuidle/sysfs.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/cpuidle/sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,373 @@
++/*
++ * sysfs.c - sysfs support
++ *
++ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#include <linux/kernel.h>
++#include <linux/cpuidle.h>
++#include <linux/sysfs.h>
++#include <linux/cpu.h>
++
++#include "cpuidle.h"
++
++static ssize_t show_available_drivers(struct sys_device *dev, char *buf)
++{
++	ssize_t i = 0;
++	struct cpuidle_driver *tmp;
++
++	mutex_lock(&cpuidle_lock);
++	list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
++		if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
++			goto out;
++		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
++	}
++out:
++	i+= sprintf(&buf[i], "\n");
++	mutex_unlock(&cpuidle_lock);
++	return i;
++}
++
++static ssize_t show_available_governors(struct sys_device *dev, char *buf)
++{
++	ssize_t i = 0;
++	struct cpuidle_governor *tmp;
++
++	mutex_lock(&cpuidle_lock);
++	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
++		if (i >= (ssize_t)((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
++			goto out;
++		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
++	}
++	if (list_empty(&cpuidle_governors))
++		i+= sprintf(&buf[i], "no governors");
++out:
++	i+= sprintf(&buf[i], "\n");
++	mutex_unlock(&cpuidle_lock);
++	return i;
++}
++
++static ssize_t show_current_driver(struct sys_device *dev, char *buf)
++{
++	ssize_t ret;
++
++	mutex_lock(&cpuidle_lock);
++	ret = sprintf(buf, "%s\n", cpuidle_curr_driver->name);
++	mutex_unlock(&cpuidle_lock);
++	return ret;
++}
++
++static ssize_t store_current_driver(struct sys_device *dev,
++	const char *buf, size_t count)
++{
++	char str[CPUIDLE_NAME_LEN];
++	int len = count;
++	struct cpuidle_driver *tmp, *found = NULL;
++
++	if (len > CPUIDLE_NAME_LEN)
++		len = CPUIDLE_NAME_LEN;
++
++	if (sscanf(buf, "%s", str) != 1)
++		return -EINVAL;
++
++	mutex_lock(&cpuidle_lock);
++	list_for_each_entry(tmp, &cpuidle_drivers, driver_list) {
++		if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
++			found = tmp;
++			break;
++		}
++	}
++	if (found)
++		cpuidle_switch_driver(found);
++	mutex_unlock(&cpuidle_lock);
++
++	return count;
++}
++
++static ssize_t show_current_governor(struct sys_device *dev, char *buf)
++{
++	ssize_t i;
++
++	mutex_lock(&cpuidle_lock);
++	if (cpuidle_curr_governor)
++		i = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
++	else
++		i = sprintf(buf, "no governor\n");
++	mutex_unlock(&cpuidle_lock);
++
++	return i;
++}
++
++static ssize_t store_current_governor(struct sys_device *dev,
++	const char *buf, size_t count)
++{
++	char str[CPUIDLE_NAME_LEN];
++	int len = count;
++	struct cpuidle_governor *tmp, *found = NULL;
++
++	if (len > CPUIDLE_NAME_LEN)
++		len = CPUIDLE_NAME_LEN;
++
++	if (sscanf(buf, "%s", str) != 1)
++		return -EINVAL;
++
++	mutex_lock(&cpuidle_lock);
++	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
++		if (strncmp(tmp->name, str, CPUIDLE_NAME_LEN) == 0) {
++			found = tmp;
++			break;
++		}
++	}
++	if (found)
++		cpuidle_switch_governor(found);
++	mutex_unlock(&cpuidle_lock);
++
++	return count;
++}
++
++static SYSDEV_ATTR(available_drivers, 0444, show_available_drivers, NULL);
++static SYSDEV_ATTR(available_governors, 0444, show_available_governors, NULL);
++static SYSDEV_ATTR(current_driver, 0644, show_current_driver,
++	store_current_driver);
++static SYSDEV_ATTR(current_governor, 0644, show_current_governor,
++	store_current_governor);
++
++static struct attribute *cpuclass_default_attrs[] = {
++	&attr_available_drivers.attr,
++	&attr_available_governors.attr,
++	&attr_current_driver.attr,
++	&attr_current_governor.attr,
++	NULL
++};
++
++static struct attribute_group cpuclass_attr_group = {
++	.attrs = cpuclass_default_attrs,
++	.name = "cpuidle",
++};
++
++/**
++ * cpuidle_add_class_sysfs - add CPU global sysfs attributes
++ */
++int cpuidle_add_class_sysfs(struct sysdev_class *cls)
++{
++	return sysfs_create_group(&cls->kset.kobj, &cpuclass_attr_group);
++}
++
++/**
++ * cpuidle_remove_class_sysfs - remove CPU global sysfs attributes
++ */
++void cpuidle_remove_class_sysfs(struct sysdev_class *cls)
++{
++	sysfs_remove_group(&cls->kset.kobj, &cpuclass_attr_group);
++}
++
++struct cpuidle_attr {
++	struct attribute attr;
++	ssize_t (*show)(struct cpuidle_device *, char *);
++	ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
++};
++
++#define define_one_ro(_name, show) \
++	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
++#define define_one_rw(_name, show, store) \
++	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
++
++#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
++#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
++static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
++{
++	int ret = -EIO;
++	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
++
++	if (cattr->show) {
++		mutex_lock(&cpuidle_lock);
++		ret = cattr->show(dev, buf);
++		mutex_unlock(&cpuidle_lock);
++	}
++	return ret;
++}
++
++static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
++		     const char * buf, size_t count)
++{
++	int ret = -EIO;
++	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
++
++	if (cattr->store) {
++		mutex_lock(&cpuidle_lock);
++		ret = cattr->store(dev, buf, count);
++		mutex_unlock(&cpuidle_lock);
++	}
++	return ret;
++}
++
++static struct sysfs_ops cpuidle_sysfs_ops = {
++	.show = cpuidle_show,
++	.store = cpuidle_store,
++};
++
++static void cpuidle_sysfs_release(struct kobject *kobj)
++{
++	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
++
++	complete(&dev->kobj_unregister);
++}
++
++static struct kobj_type ktype_cpuidle = {
++	.sysfs_ops = &cpuidle_sysfs_ops,
++	.release = cpuidle_sysfs_release,
++};
++
++struct cpuidle_state_attr {
++	struct attribute attr;
++	ssize_t (*show)(struct cpuidle_state *, char *);
++	ssize_t (*store)(struct cpuidle_state *, const char *, size_t);
++};
++
++#define define_one_state_ro(_name, show) \
++static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
++
++#define define_show_state_function(_name) \
++static ssize_t show_state_##_name(struct cpuidle_state *state, char *buf) \
++{ \
++	return sprintf(buf, "%d\n", state->_name);\
++}
++
++define_show_state_function(exit_latency)
++define_show_state_function(power_usage)
++define_show_state_function(usage)
++define_show_state_function(time)
++define_one_state_ro(latency, show_state_exit_latency);
++define_one_state_ro(power, show_state_power_usage);
++define_one_state_ro(usage, show_state_usage);
++define_one_state_ro(time, show_state_time);
++
++static struct attribute *cpuidle_state_default_attrs[] = {
++	&attr_latency.attr,
++	&attr_power.attr,
++	&attr_usage.attr,
++	&attr_time.attr,
++	NULL
++};
++
++#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
++#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
++#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
++static ssize_t cpuidle_state_show(struct kobject * kobj,
++	struct attribute * attr ,char * buf)
++{
++	int ret = -EIO;
++	struct cpuidle_state *state = kobj_to_state(kobj);
++	struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
++
++	if (cattr->show)
++		ret = cattr->show(state, buf);
++
++	return ret;
++}
++
++static struct sysfs_ops cpuidle_state_sysfs_ops = {
++	.show = cpuidle_state_show,
++};
++
++static void cpuidle_state_sysfs_release(struct kobject *kobj)
++{
++	struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
++
++	complete(&state_obj->kobj_unregister);
++}
++
++static struct kobj_type ktype_state_cpuidle = {
++	.sysfs_ops = &cpuidle_state_sysfs_ops,
++	.default_attrs = cpuidle_state_default_attrs,
++	.release = cpuidle_state_sysfs_release,
++};
++
++static void inline cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
++{
++	kobject_unregister(&device->kobjs[i]->kobj);
++	wait_for_completion(&device->kobjs[i]->kobj_unregister);
++	kfree(device->kobjs[i]);
++	device->kobjs[i] = NULL;
++}
++
++/**
++ * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes
++ * @device: the target device
++ */
++int cpuidle_add_driver_sysfs(struct cpuidle_device *device)
++{
++	int i, ret;
++	struct cpuidle_state_kobj *kobj;
++
++	/* state statistics */
++	for (i = 0; i < device->state_count; i++) {
++		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
++		if (!kobj)
++			goto error_state;
++		kobj->state = &device->states[i];
++		init_completion(&kobj->kobj_unregister);
++
++		kobj->kobj.parent = &device->kobj;
++		kobj->kobj.ktype = &ktype_state_cpuidle;
++		kobject_set_name(&kobj->kobj, "state%d", i);
++		ret = kobject_register(&kobj->kobj);
++		if (ret) {
++			kfree(kobj);
++			goto error_state;
++		}
++		device->kobjs[i] = kobj;
++	}
++
++	return 0;
++
++error_state:
++	for (i = i - 1; i >= 0; i--)
++		cpuidle_free_state_kobj(device, i);
++	return ret;
++}
++
++/**
++ * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes
++ * @device: the target device
++ */
++void cpuidle_remove_driver_sysfs(struct cpuidle_device *device)
++{
++	int i;
++
++	for (i = 0; i < device->state_count; i++)
++		cpuidle_free_state_kobj(device, i);
++}
++
++/**
++ * cpuidle_add_sysfs - creates a sysfs instance for the target device
++ * @sysdev: the target device
++ */
++int cpuidle_add_sysfs(struct sys_device *sysdev)
++{
++	int cpu = sysdev->id;
++	struct cpuidle_device *dev;
++
++	dev = per_cpu(cpuidle_devices, cpu);
++	dev->kobj.parent = &sysdev->kobj;
++	dev->kobj.ktype = &ktype_cpuidle;
++	kobject_set_name(&dev->kobj, "%s", "cpuidle");
++	return kobject_register(&dev->kobj);
++}
++
++/**
++ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
++ * @sysdev: the target device
++ */
++void cpuidle_remove_sysfs(struct sys_device *sysdev)
++{
++	int cpu = sysdev->id;
++	struct cpuidle_device *dev;
++
++	dev = per_cpu(cpuidle_devices, cpu);
++	kobject_unregister(&dev->kobj);
++}
+diff -Nurb linux-2.6.22-570/drivers/dma/Kconfig linux-2.6.22-try2/drivers/dma/Kconfig
+--- linux-2.6.22-570/drivers/dma/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/dma/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -8,8 +8,8 @@
+ config DMA_ENGINE
+ 	bool "Support for DMA engines"
+ 	---help---
+-	  DMA engines offload copy operations from the CPU to dedicated
+-	  hardware, allowing the copies to happen asynchronously.
++          DMA engines offload bulk memory operations from the CPU to dedicated
++          hardware, allowing the operations to happen asynchronously.
+ 
+ comment "DMA Clients"
+ 
+@@ -32,4 +32,11 @@
+ 	---help---
+ 	  Enable support for the Intel(R) I/OAT DMA engine.
+ 
++config INTEL_IOP_ADMA
++        tristate "Intel IOP ADMA support"
++        depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
++        default m
++        ---help---
++          Enable support for the Intel(R) IOP Series RAID engines.
++
+ endmenu
+diff -Nurb linux-2.6.22-570/drivers/dma/Makefile linux-2.6.22-try2/drivers/dma/Makefile
+--- linux-2.6.22-570/drivers/dma/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/dma/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -1,3 +1,4 @@
+ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+ obj-$(CONFIG_NET_DMA) += iovlock.o
+ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
++obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+diff -Nurb linux-2.6.22-570/drivers/dma/dmaengine.c linux-2.6.22-try2/drivers/dma/dmaengine.c
+--- linux-2.6.22-570/drivers/dma/dmaengine.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/dma/dmaengine.c	2007-12-19 15:29:23.000000000 -0500
+@@ -37,11 +37,11 @@
+  * Each device has a channels list, which runs unlocked but is never modified
+  * once the device is registered, it's just setup by the driver.
+  *
+- * Each client has a channels list, it's only modified under the client->lock
+- * and in an RCU callback, so it's safe to read under rcu_read_lock().
++ * Each client is responsible for keeping track of the channels it uses.  See
++ * the definition of dma_event_callback in dmaengine.h.
+  *
+  * Each device has a kref, which is initialized to 1 when the device is
+- * registered. A kref_put is done for each class_device registered.  When the
++ * registered. A kref_get is done for each class_device registered.  When the
+  * class_device is released, the coresponding kref_put is done in the release
+  * method. Every time one of the device's channels is allocated to a client,
+  * a kref_get occurs.  When the channel is freed, the coresponding kref_put
+@@ -51,14 +51,17 @@
+  * references to finish.
+  *
+  * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
+- * with a kref and a per_cpu local_t.  A single reference is set when on an
+- * ADDED event, and removed with a REMOVE event.  Net DMA client takes an
+- * extra reference per outstanding transaction.  The relase function does a
+- * kref_put on the device. -ChrisL
++ * with a kref and a per_cpu local_t.  A dma_chan_get is called when a client
++ * signals that it wants to use a channel, and dma_chan_put is called when
++ * a channel is removed or a client using it is unregesitered.  A client can
++ * take extra references per outstanding transaction, as is the case with
++ * the NET DMA client.  The release function does a kref_put on the device.
++ *	-ChrisL, DanW
+  */
+ 
+ #include <linux/init.h>
+ #include <linux/module.h>
++#include <linux/mm.h>
+ #include <linux/device.h>
+ #include <linux/dmaengine.h>
+ #include <linux/hardirq.h>
+@@ -66,6 +69,7 @@
+ #include <linux/percpu.h>
+ #include <linux/rcupdate.h>
+ #include <linux/mutex.h>
++#include <linux/jiffies.h>
+ 
+ static DEFINE_MUTEX(dma_list_mutex);
+ static LIST_HEAD(dma_device_list);
+@@ -100,8 +104,19 @@
+ static ssize_t show_in_use(struct class_device *cd, char *buf)
+ {
+ 	struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev);
++	int in_use = 0;
+ 
+-	return sprintf(buf, "%d\n", (chan->client ? 1 : 0));
++	if (unlikely(chan->slow_ref) &&
++		atomic_read(&chan->refcount.refcount) > 1)
++		in_use = 1;
++	else {
++		if (local_read(&(per_cpu_ptr(chan->local,
++			get_cpu())->refcount)) > 0)
++			in_use = 1;
++		put_cpu();
++	}
++
++	return sprintf(buf, "%d\n", in_use);
+ }
+ 
+ static struct class_device_attribute dma_class_attrs[] = {
+@@ -127,43 +142,72 @@
+ 
+ /* --- client and device registration --- */
+ 
++#define dma_chan_satisfies_mask(chan, mask) \
++	__dma_chan_satisfies_mask((chan), &(mask))
++static int
++__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
++{
++	dma_cap_mask_t has;
++
++	bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
++		DMA_TX_TYPE_END);
++	return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
++}
++
+ /**
+- * dma_client_chan_alloc - try to allocate a channel to a client
++ * dma_client_chan_alloc - try to allocate channels to a client
+  * @client: &dma_client
+  *
+  * Called with dma_list_mutex held.
+  */
+-static struct dma_chan *dma_client_chan_alloc(struct dma_client *client)
++static void dma_client_chan_alloc(struct dma_client *client)
+ {
+ 	struct dma_device *device;
+ 	struct dma_chan *chan;
+-	unsigned long flags;
+ 	int desc;	/* allocated descriptor count */
++	enum dma_state_client ack;
+ 
+-	/* Find a channel, any DMA engine will do */
+-	list_for_each_entry(device, &dma_device_list, global_node) {
++	/* Find a channel */
++	list_for_each_entry(device, &dma_device_list, global_node)
+ 		list_for_each_entry(chan, &device->channels, device_node) {
+-			if (chan->client)
++			if (!dma_chan_satisfies_mask(chan, client->cap_mask))
+ 				continue;
+ 
+ 			desc = chan->device->device_alloc_chan_resources(chan);
+ 			if (desc >= 0) {
++				ack = client->event_callback(client,
++						chan,
++						DMA_RESOURCE_AVAILABLE);
++
++				/* we are done once this client rejects
++				 * an available resource
++				 */
++				if (ack == DMA_ACK) {
++					dma_chan_get(chan);
+ 				kref_get(&device->refcount);
+-				kref_init(&chan->refcount);
+-				chan->slow_ref = 0;
+-				INIT_RCU_HEAD(&chan->rcu);
+-				chan->client = client;
+-				spin_lock_irqsave(&client->lock, flags);
+-				list_add_tail_rcu(&chan->client_node,
+-				                  &client->channels);
+-				spin_unlock_irqrestore(&client->lock, flags);
+-				return chan;
++				} else if (ack == DMA_NAK)
++					return;
+ 			}
+ 		}
++}
++
++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
++{
++	enum dma_status status;
++	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
++
++	dma_async_issue_pending(chan);
++	do {
++		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
++		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
++			printk(KERN_ERR "dma_sync_wait_timeout!\n");
++			return DMA_ERROR;
+ 	}
++	} while (status == DMA_IN_PROGRESS);
+ 
+-	return NULL;
++	return status;
+ }
++EXPORT_SYMBOL(dma_sync_wait);
+ 
+ /**
+  * dma_chan_cleanup - release a DMA channel's resources
+@@ -173,7 +217,6 @@
+ {
+ 	struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
+ 	chan->device->device_free_chan_resources(chan);
+-	chan->client = NULL;
+ 	kref_put(&chan->device->refcount, dma_async_device_cleanup);
+ }
+ EXPORT_SYMBOL(dma_chan_cleanup);
+@@ -189,7 +232,7 @@
+ 	kref_put(&chan->refcount, dma_chan_cleanup);
+ }
+ 
+-static void dma_client_chan_free(struct dma_chan *chan)
++static void dma_chan_release(struct dma_chan *chan)
+ {
+ 	atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
+ 	chan->slow_ref = 1;
+@@ -197,70 +240,57 @@
+ }
+ 
+ /**
+- * dma_chans_rebalance - reallocate channels to clients
+- *
+- * When the number of DMA channel in the system changes,
+- * channels need to be rebalanced among clients.
++ * dma_chans_notify_available - broadcast available channels to the clients
+  */
+-static void dma_chans_rebalance(void)
++static void dma_clients_notify_available(void)
+ {
+ 	struct dma_client *client;
+-	struct dma_chan *chan;
+-	unsigned long flags;
+ 
+ 	mutex_lock(&dma_list_mutex);
+ 
+-	list_for_each_entry(client, &dma_client_list, global_node) {
+-		while (client->chans_desired > client->chan_count) {
+-			chan = dma_client_chan_alloc(client);
+-			if (!chan)
+-				break;
+-			client->chan_count++;
+-			client->event_callback(client,
+-	                                       chan,
+-	                                       DMA_RESOURCE_ADDED);
+-		}
+-		while (client->chans_desired < client->chan_count) {
+-			spin_lock_irqsave(&client->lock, flags);
+-			chan = list_entry(client->channels.next,
+-			                  struct dma_chan,
+-			                  client_node);
+-			list_del_rcu(&chan->client_node);
+-			spin_unlock_irqrestore(&client->lock, flags);
+-			client->chan_count--;
+-			client->event_callback(client,
+-			                       chan,
+-			                       DMA_RESOURCE_REMOVED);
+-			dma_client_chan_free(chan);
+-		}
+-	}
++	list_for_each_entry(client, &dma_client_list, global_node)
++		dma_client_chan_alloc(client);
+ 
+ 	mutex_unlock(&dma_list_mutex);
+ }
+ 
+ /**
+- * dma_async_client_register - allocate and register a &dma_client
+- * @event_callback: callback for notification of channel addition/removal
++ * dma_chans_notify_available - tell the clients that a channel is going away
++ * @chan: channel on its way out
+  */
+-struct dma_client *dma_async_client_register(dma_event_callback event_callback)
++static void dma_clients_notify_removed(struct dma_chan *chan)
+ {
+ 	struct dma_client *client;
++	enum dma_state_client ack;
+ 
+-	client = kzalloc(sizeof(*client), GFP_KERNEL);
+-	if (!client)
+-		return NULL;
++	mutex_lock(&dma_list_mutex);
+ 
+-	INIT_LIST_HEAD(&client->channels);
+-	spin_lock_init(&client->lock);
+-	client->chans_desired = 0;
+-	client->chan_count = 0;
+-	client->event_callback = event_callback;
++	list_for_each_entry(client, &dma_client_list, global_node) {
++		ack = client->event_callback(client, chan,
++				DMA_RESOURCE_REMOVED);
+ 
++		/* client was holding resources for this channel so
++		 * free it
++		 */
++		if (ack == DMA_ACK) {
++			dma_chan_put(chan);
++			kref_put(&chan->device->refcount,
++				dma_async_device_cleanup);
++		}
++	}
++
++	mutex_unlock(&dma_list_mutex);
++}
++
++/**
++ * dma_async_client_register - register a &dma_client
++ * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
++ */
++void dma_async_client_register(struct dma_client *client)
++{
+ 	mutex_lock(&dma_list_mutex);
+ 	list_add_tail(&client->global_node, &dma_client_list);
+ 	mutex_unlock(&dma_list_mutex);
+-
+-	return client;
+ }
+ EXPORT_SYMBOL(dma_async_client_register);
+ 
+@@ -272,40 +302,42 @@
+  */
+ void dma_async_client_unregister(struct dma_client *client)
+ {
++	struct dma_device *device;
+ 	struct dma_chan *chan;
++	enum dma_state_client ack;
+ 
+ 	if (!client)
+ 		return;
+ 
+-	rcu_read_lock();
+-	list_for_each_entry_rcu(chan, &client->channels, client_node)
+-		dma_client_chan_free(chan);
+-	rcu_read_unlock();
+-
+ 	mutex_lock(&dma_list_mutex);
++	/* free all channels the client is holding */
++	list_for_each_entry(device, &dma_device_list, global_node)
++		list_for_each_entry(chan, &device->channels, device_node) {
++			ack = client->event_callback(client, chan,
++				DMA_RESOURCE_REMOVED);
++
++			if (ack == DMA_ACK) {
++				dma_chan_put(chan);
++				kref_put(&chan->device->refcount,
++					dma_async_device_cleanup);
++			}
++		}
++
+ 	list_del(&client->global_node);
+ 	mutex_unlock(&dma_list_mutex);
+-
+-	kfree(client);
+-	dma_chans_rebalance();
+ }
+ EXPORT_SYMBOL(dma_async_client_unregister);
+ 
+ /**
+- * dma_async_client_chan_request - request DMA channels
+- * @client: &dma_client
+- * @number: count of DMA channels requested
+- *
+- * Clients call dma_async_client_chan_request() to specify how many
+- * DMA channels they need, 0 to free all currently allocated.
+- * The resulting allocations/frees are indicated to the client via the
+- * event callback.
++ * dma_async_client_chan_request - send all available channels to the
++ * client that satisfy the capability mask
++ * @client - requester
+  */
+-void dma_async_client_chan_request(struct dma_client *client,
+-			unsigned int number)
++void dma_async_client_chan_request(struct dma_client *client)
+ {
+-	client->chans_desired = number;
+-	dma_chans_rebalance();
++	mutex_lock(&dma_list_mutex);
++	dma_client_chan_alloc(client);
++	mutex_unlock(&dma_list_mutex);
+ }
+ EXPORT_SYMBOL(dma_async_client_chan_request);
+ 
+@@ -322,6 +354,25 @@
+ 	if (!device)
+ 		return -ENODEV;
+ 
++	/* validate device routines */
++	BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
++		!device->device_prep_dma_memcpy);
++	BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
++		!device->device_prep_dma_xor);
++	BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
++		!device->device_prep_dma_zero_sum);
++	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
++		!device->device_prep_dma_memset);
++	BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
++		!device->device_prep_dma_interrupt);
++
++	BUG_ON(!device->device_alloc_chan_resources);
++	BUG_ON(!device->device_free_chan_resources);
++	BUG_ON(!device->device_dependency_added);
++	BUG_ON(!device->device_is_tx_complete);
++	BUG_ON(!device->device_issue_pending);
++	BUG_ON(!device->dev);
++
+ 	init_completion(&device->done);
+ 	kref_init(&device->refcount);
+ 	device->dev_id = id++;
+@@ -339,6 +390,9 @@
+ 		         device->dev_id, chan->chan_id);
+ 
+ 		kref_get(&device->refcount);
++		kref_init(&chan->refcount);
++		chan->slow_ref = 0;
++		INIT_RCU_HEAD(&chan->rcu);
+ 		class_device_register(&chan->class_dev);
+ 	}
+ 
+@@ -346,7 +400,7 @@
+ 	list_add_tail(&device->global_node, &dma_device_list);
+ 	mutex_unlock(&dma_list_mutex);
+ 
+-	dma_chans_rebalance();
++	dma_clients_notify_available();
+ 
+ 	return 0;
+ }
+@@ -371,32 +425,165 @@
+ void dma_async_device_unregister(struct dma_device *device)
+ {
+ 	struct dma_chan *chan;
+-	unsigned long flags;
+ 
+ 	mutex_lock(&dma_list_mutex);
+ 	list_del(&device->global_node);
+ 	mutex_unlock(&dma_list_mutex);
+ 
+ 	list_for_each_entry(chan, &device->channels, device_node) {
+-		if (chan->client) {
+-			spin_lock_irqsave(&chan->client->lock, flags);
+-			list_del(&chan->client_node);
+-			chan->client->chan_count--;
+-			spin_unlock_irqrestore(&chan->client->lock, flags);
+-			chan->client->event_callback(chan->client,
+-			                             chan,
+-			                             DMA_RESOURCE_REMOVED);
+-			dma_client_chan_free(chan);
+-		}
++		dma_clients_notify_removed(chan);
+ 		class_device_unregister(&chan->class_dev);
++		dma_chan_release(chan);
+ 	}
+-	dma_chans_rebalance();
+ 
+ 	kref_put(&device->refcount, dma_async_device_cleanup);
+ 	wait_for_completion(&device->done);
+ }
+ EXPORT_SYMBOL(dma_async_device_unregister);
+ 
++/**
++ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
++ * @chan: DMA channel to offload copy to
++ * @dest: destination address (virtual)
++ * @src: source address (virtual)
++ * @len: length
++ *
++ * Both @dest and @src must be mappable to a bus address according to the
++ * DMA mapping API rules for streaming mappings.
++ * Both @dest and @src must stay memory resident (kernel memory or locked
++ * user space pages).
++ */
++dma_cookie_t
++dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
++			void *src, size_t len)
++{
++	struct dma_device *dev = chan->device;
++	struct dma_async_tx_descriptor *tx;
++	dma_addr_t addr;
++	dma_cookie_t cookie;
++	int cpu;
++
++	tx = dev->device_prep_dma_memcpy(chan, len, 0);
++	if (!tx)
++		return -ENOMEM;
++
++	tx->ack = 1;
++	tx->callback = NULL;
++	addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
++	tx->tx_set_src(addr, tx, 0);
++	addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
++	tx->tx_set_dest(addr, tx, 0);
++	cookie = tx->tx_submit(tx);
++
++	cpu = get_cpu();
++	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++	put_cpu();
++
++	return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
++
++/**
++ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
++ * @chan: DMA channel to offload copy to
++ * @page: destination page
++ * @offset: offset in page to copy to
++ * @kdata: source address (virtual)
++ * @len: length
++ *
++ * Both @page/@offset and @kdata must be mappable to a bus address according
++ * to the DMA mapping API rules for streaming mappings.
++ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
++ * locked user space pages)
++ */
++dma_cookie_t
++dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
++			unsigned int offset, void *kdata, size_t len)
++{
++	struct dma_device *dev = chan->device;
++	struct dma_async_tx_descriptor *tx;
++	dma_addr_t addr;
++	dma_cookie_t cookie;
++	int cpu;
++
++	tx = dev->device_prep_dma_memcpy(chan, len, 0);
++	if (!tx)
++		return -ENOMEM;
++
++	tx->ack = 1;
++	tx->callback = NULL;
++	addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
++	tx->tx_set_src(addr, tx, 0);
++	addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
++	tx->tx_set_dest(addr, tx, 0);
++	cookie = tx->tx_submit(tx);
++
++	cpu = get_cpu();
++	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++	put_cpu();
++
++	return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
++
++/**
++ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
++ * @chan: DMA channel to offload copy to
++ * @dest_pg: destination page
++ * @dest_off: offset in page to copy to
++ * @src_pg: source page
++ * @src_off: offset in page to copy from
++ * @len: length
++ *
++ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
++ * address according to the DMA mapping API rules for streaming mappings.
++ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
++ * (kernel memory or locked user space pages).
++ */
++dma_cookie_t
++dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
++	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
++	size_t len)
++{
++	struct dma_device *dev = chan->device;
++	struct dma_async_tx_descriptor *tx;
++	dma_addr_t addr;
++	dma_cookie_t cookie;
++	int cpu;
++
++	tx = dev->device_prep_dma_memcpy(chan, len, 0);
++	if (!tx)
++		return -ENOMEM;
++
++	tx->ack = 1;
++	tx->callback = NULL;
++	addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
++	tx->tx_set_src(addr, tx, 0);
++	addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE);
++	tx->tx_set_dest(addr, tx, 0);
++	cookie = tx->tx_submit(tx);
++
++	cpu = get_cpu();
++	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
++	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
++	put_cpu();
++
++	return cookie;
++}
++EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
++
++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
++	struct dma_chan *chan)
++{
++	tx->chan = chan;
++	spin_lock_init(&tx->lock);
++	INIT_LIST_HEAD(&tx->depend_node);
++	INIT_LIST_HEAD(&tx->depend_list);
++}
++EXPORT_SYMBOL(dma_async_tx_descriptor_init);
++
+ static int __init dma_bus_init(void)
+ {
+ 	mutex_init(&dma_list_mutex);
+diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.c linux-2.6.22-try2/drivers/dma/ioatdma.c
+--- linux-2.6.22-570/drivers/dma/ioatdma.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/dma/ioatdma.c	2007-12-19 15:29:23.000000000 -0500
+@@ -39,6 +39,7 @@
+ #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
+ #define to_ioat_device(dev) container_of(dev, struct ioat_device, common)
+ #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
++#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
+ 
+ /* internal functions */
+ static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+@@ -71,13 +72,76 @@
+ 		INIT_LIST_HEAD(&ioat_chan->used_desc);
+ 		/* This should be made common somewhere in dmaengine.c */
+ 		ioat_chan->common.device = &device->common;
+-		ioat_chan->common.client = NULL;
+ 		list_add_tail(&ioat_chan->common.device_node,
+ 		              &device->common.channels);
+ 	}
+ 	return device->common.chancnt;
+ }
+ 
++static void
++ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
++{
++	struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
++	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++
++	pci_unmap_addr_set(desc, src, addr);
++
++	list_for_each_entry(iter, &desc->group_list, node) {
++		iter->hw->src_addr = addr;
++		addr += ioat_chan->xfercap;
++	}
++
++}
++
++static void
++ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index)
++{
++	struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx);
++	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++
++	pci_unmap_addr_set(desc, dst, addr);
++
++	list_for_each_entry(iter, &desc->group_list, node) {
++		iter->hw->dst_addr = addr;
++		addr += ioat_chan->xfercap;
++	}
++}
++
++static dma_cookie_t
++ioat_tx_submit(struct dma_async_tx_descriptor *tx)
++{
++	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
++	struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
++	struct ioat_desc_sw *group_start = list_entry(desc->group_list.next,
++		struct ioat_desc_sw, node);
++	int append = 0;
++	dma_cookie_t cookie;
++
++	spin_lock_bh(&ioat_chan->desc_lock);
++	/* cookie incr and addition to used_list must be atomic */
++	cookie = ioat_chan->common.cookie;
++	cookie++;
++	if (cookie < 0)
++		cookie = 1;
++	ioat_chan->common.cookie = desc->async_tx.cookie = cookie;
++
++	/* write address into NextDescriptor field of last desc in chain */
++	to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = group_start->phys;
++	list_splice_init(&desc->group_list, ioat_chan->used_desc.prev);
++
++	ioat_chan->pending += desc->group_count;
++	if (ioat_chan->pending >= 4) {
++		append = 1;
++		ioat_chan->pending = 0;
++	}
++	spin_unlock_bh(&ioat_chan->desc_lock);
++
++	if (append)
++		ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET,
++					IOAT_CHANCMD_APPEND);
++	return cookie;
++}
++
+ static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
+ 	struct ioat_dma_chan *ioat_chan,
+ 	gfp_t flags)
+@@ -99,6 +163,11 @@
+ 	}
+ 
+ 	memset(desc, 0, sizeof(*desc));
++	dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
++	desc_sw->async_tx.tx_set_src = ioat_set_src;
++	desc_sw->async_tx.tx_set_dest = ioat_set_dest;
++	desc_sw->async_tx.tx_submit = ioat_tx_submit;
++	INIT_LIST_HEAD(&desc_sw->group_list);
+ 	desc_sw->hw = desc;
+ 	desc_sw->phys = phys;
+ 
+@@ -215,45 +284,25 @@
+ 	ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl);
+ }
+ 
+-/**
+- * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction
+- * @ioat_chan: IOAT DMA channel handle
+- * @dest: DMA destination address
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan,
+-                                       dma_addr_t dest,
+-                                       dma_addr_t src,
+-                                       size_t len)
+-{
+-	struct ioat_desc_sw *first;
+-	struct ioat_desc_sw *prev;
+-	struct ioat_desc_sw *new;
+-	dma_cookie_t cookie;
++static struct dma_async_tx_descriptor *
++ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en)
++{
++	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
++	struct ioat_desc_sw *first, *prev, *new;
+ 	LIST_HEAD(new_chain);
+ 	u32 copy;
+ 	size_t orig_len;
+-	dma_addr_t orig_src, orig_dst;
+-	unsigned int desc_count = 0;
+-	unsigned int append = 0;
+-
+-	if (!ioat_chan || !dest || !src)
+-		return -EFAULT;
++	int desc_count = 0;
+ 
+ 	if (!len)
+-		return ioat_chan->common.cookie;
++		return NULL;
+ 
+ 	orig_len = len;
+-	orig_src = src;
+-	orig_dst = dest;
+ 
+ 	first = NULL;
+ 	prev = NULL;
+ 
+ 	spin_lock_bh(&ioat_chan->desc_lock);
+-
+ 	while (len) {
+ 		if (!list_empty(&ioat_chan->free_desc)) {
+ 			new = to_ioat_desc(ioat_chan->free_desc.next);
+@@ -270,9 +319,8 @@
+ 
+ 		new->hw->size = copy;
+ 		new->hw->ctl = 0;
+-		new->hw->src_addr = src;
+-		new->hw->dst_addr = dest;
+-		new->cookie = 0;
++		new->async_tx.cookie = 0;
++		new->async_tx.ack = 1;
+ 
+ 		/* chain together the physical address list for the HW */
+ 		if (!first)
+@@ -281,130 +329,26 @@
+ 			prev->hw->next = (u64) new->phys;
+ 
+ 		prev = new;
+-
+ 		len  -= copy;
+-		dest += copy;
+-		src  += copy;
+-
+ 		list_add_tail(&new->node, &new_chain);
+ 		desc_count++;
+ 	}
+-	new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+-	new->hw->next = 0;
+ 
+-	/* cookie incr and addition to used_list must be atomic */
++	list_splice(&new_chain, &new->group_list);
+ 
+-	cookie = ioat_chan->common.cookie;
+-	cookie++;
+-	if (cookie < 0)
+-		cookie = 1;
+-	ioat_chan->common.cookie = new->cookie = cookie;
++	new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
++	new->hw->next = 0;
++	new->group_count = desc_count;
++	new->async_tx.ack = 0; /* client is in control of this ack */
++	new->async_tx.cookie = -EBUSY;
+ 
+-	pci_unmap_addr_set(new, src, orig_src);
+-	pci_unmap_addr_set(new, dst, orig_dst);
+ 	pci_unmap_len_set(new, src_len, orig_len);
+ 	pci_unmap_len_set(new, dst_len, orig_len);
+-
+-	/* write address into NextDescriptor field of last desc in chain */
+-	to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys;
+-	list_splice_init(&new_chain, ioat_chan->used_desc.prev);
+-
+-	ioat_chan->pending += desc_count;
+-	if (ioat_chan->pending >= 20) {
+-		append = 1;
+-		ioat_chan->pending = 0;
+-	}
+-
+ 	spin_unlock_bh(&ioat_chan->desc_lock);
+ 
+-	if (append)
+-		ioatdma_chan_write8(ioat_chan,
+-		                    IOAT_CHANCMD_OFFSET,
+-		                    IOAT_CHANCMD_APPEND);
+-	return cookie;
+-}
+-
+-/**
+- * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs
+- * @chan: IOAT DMA channel handle
+- * @dest: DMA destination address
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan,
+-                                               void *dest,
+-                                               void *src,
+-                                               size_t len)
+-{
+-	dma_addr_t dest_addr;
+-	dma_addr_t src_addr;
+-	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+-	dest_addr = pci_map_single(ioat_chan->device->pdev,
+-		dest, len, PCI_DMA_FROMDEVICE);
+-	src_addr = pci_map_single(ioat_chan->device->pdev,
+-		src, len, PCI_DMA_TODEVICE);
+-
+-	return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+-}
+-
+-/**
+- * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page
+- * @chan: IOAT DMA channel handle
+- * @page: pointer to the page to copy to
+- * @offset: offset into that page
+- * @src: DMA source address
+- * @len: transaction length in bytes
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan,
+-                                              struct page *page,
+-                                              unsigned int offset,
+-                                              void *src,
+-                                              size_t len)
+-{
+-	dma_addr_t dest_addr;
+-	dma_addr_t src_addr;
+-	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+-	dest_addr = pci_map_page(ioat_chan->device->pdev,
+-		page, offset, len, PCI_DMA_FROMDEVICE);
+-	src_addr = pci_map_single(ioat_chan->device->pdev,
+-		src, len, PCI_DMA_TODEVICE);
+-
+-	return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
++	return new ? &new->async_tx : NULL;
+ }
+ 
+-/**
+- * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages
+- * @chan: IOAT DMA channel handle
+- * @dest_pg: pointer to the page to copy to
+- * @dest_off: offset into that page
+- * @src_pg: pointer to the page to copy from
+- * @src_off: offset into that page
+- * @len: transaction length in bytes. This is guaranteed not to make a copy
+- *	 across a page boundary.
+- */
+-
+-static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan,
+-                                             struct page *dest_pg,
+-                                             unsigned int dest_off,
+-                                             struct page *src_pg,
+-                                             unsigned int src_off,
+-                                             size_t len)
+-{
+-	dma_addr_t dest_addr;
+-	dma_addr_t src_addr;
+-	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+-
+-	dest_addr = pci_map_page(ioat_chan->device->pdev,
+-		dest_pg, dest_off, len, PCI_DMA_FROMDEVICE);
+-	src_addr = pci_map_page(ioat_chan->device->pdev,
+-		src_pg, src_off, len, PCI_DMA_TODEVICE);
+-
+-	return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len);
+-}
+ 
+ /**
+  * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw
+@@ -467,8 +411,8 @@
+ 		 * exceeding xfercap, perhaps. If so, only the last one will
+ 		 * have a cookie, and require unmapping.
+ 		 */
+-		if (desc->cookie) {
+-			cookie = desc->cookie;
++		if (desc->async_tx.cookie) {
++			cookie = desc->async_tx.cookie;
+ 
+ 			/* yes we are unmapping both _page and _single alloc'd
+ 			   regions with unmap_page. Is this *really* that bad?
+@@ -484,13 +428,18 @@
+ 		}
+ 
+ 		if (desc->phys != phys_complete) {
+-			/* a completed entry, but not the last, so cleanup */
++			/* a completed entry, but not the last, so cleanup
++			 * if the client is done with the descriptor
++			 */
++			if (desc->async_tx.ack) {
+ 			list_del(&desc->node);
+ 			list_add_tail(&desc->node, &chan->free_desc);
++			} else
++				desc->async_tx.cookie = 0;
+ 		} else {
+ 			/* last used desc. Do not remove, so we can append from
+ 			   it, but don't look at it next time, either */
+-			desc->cookie = 0;
++			desc->async_tx.cookie = 0;
+ 
+ 			/* TODO check status bits? */
+ 			break;
+@@ -506,6 +455,17 @@
+ 	spin_unlock(&chan->cleanup_lock);
+ }
+ 
++static void ioat_dma_dependency_added(struct dma_chan *chan)
++{
++	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
++	spin_lock_bh(&ioat_chan->desc_lock);
++	if (ioat_chan->pending == 0) {
++		spin_unlock_bh(&ioat_chan->desc_lock);
++		ioat_dma_memcpy_cleanup(ioat_chan);
++	} else
++		spin_unlock_bh(&ioat_chan->desc_lock);
++}
++
+ /**
+  * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
+  * @chan: IOAT DMA channel handle
+@@ -607,6 +567,7 @@
+ 
+ 	desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+ 	desc->hw->next = 0;
++	desc->async_tx.ack = 1;
+ 
+ 	list_add_tail(&desc->node, &ioat_chan->used_desc);
+ 	spin_unlock_bh(&ioat_chan->desc_lock);
+@@ -633,6 +594,8 @@
+ 	u8 *src;
+ 	u8 *dest;
+ 	struct dma_chan *dma_chan;
++	struct dma_async_tx_descriptor *tx;
++	dma_addr_t addr;
+ 	dma_cookie_t cookie;
+ 	int err = 0;
+ 
+@@ -658,7 +621,15 @@
+ 		goto out;
+ 	}
+ 
+-	cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE);
++	tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0);
++	async_tx_ack(tx);
++	addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
++			DMA_TO_DEVICE);
++	ioat_set_src(addr, tx, 0);
++	addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
++			DMA_FROM_DEVICE);
++	ioat_set_dest(addr, tx, 0);
++	cookie = ioat_tx_submit(tx);
+ 	ioat_dma_memcpy_issue_pending(dma_chan);
+ 	msleep(1);
+ 
+@@ -754,13 +725,14 @@
+ 	INIT_LIST_HEAD(&device->common.channels);
+ 	enumerate_dma_channels(device);
+ 
++	dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
+ 	device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources;
+ 	device->common.device_free_chan_resources = ioat_dma_free_chan_resources;
+-	device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf;
+-	device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg;
+-	device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg;
+-	device->common.device_memcpy_complete = ioat_dma_is_complete;
+-	device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending;
++	device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy;
++	device->common.device_is_tx_complete = ioat_dma_is_complete;
++	device->common.device_issue_pending = ioat_dma_memcpy_issue_pending;
++	device->common.device_dependency_added = ioat_dma_dependency_added;
++	device->common.dev = &pdev->dev;
+ 	printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n",
+ 		device->common.chancnt);
+ 
+diff -Nurb linux-2.6.22-570/drivers/dma/ioatdma.h linux-2.6.22-try2/drivers/dma/ioatdma.h
+--- linux-2.6.22-570/drivers/dma/ioatdma.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/dma/ioatdma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -30,9 +30,6 @@
+ 
+ #define IOAT_LOW_COMPLETION_MASK	0xffffffc0
+ 
+-extern struct list_head dma_device_list;
+-extern struct list_head dma_client_list;
+-
+ /**
+  * struct ioat_device - internal representation of a IOAT device
+  * @pdev: PCI-Express device
+@@ -105,15 +102,20 @@
+ /**
+  * struct ioat_desc_sw - wrapper around hardware descriptor
+  * @hw: hardware DMA descriptor
++ * @async_tx:
+  * @node:
++ * @group_list:
++ * @group_cnt:
+  * @cookie:
+  * @phys:
+  */
+ 
+ struct ioat_desc_sw {
+ 	struct ioat_dma_descriptor *hw;
++	struct dma_async_tx_descriptor async_tx;
+ 	struct list_head node;
+-	dma_cookie_t cookie;
++	struct list_head group_list;
++	int group_count;
+ 	dma_addr_t phys;
+ 	DECLARE_PCI_UNMAP_ADDR(src)
+ 	DECLARE_PCI_UNMAP_LEN(src_len)
+@@ -122,4 +124,3 @@
+ };
+ 
+ #endif /* IOATDMA_H */
+-
+diff -Nurb linux-2.6.22-570/drivers/dma/iop-adma.c linux-2.6.22-try2/drivers/dma/iop-adma.c
+--- linux-2.6.22-570/drivers/dma/iop-adma.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/dma/iop-adma.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,1465 @@
++/*
++ * offload engine driver for the Intel Xscale series of i/o processors
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++
++/*
++ * This driver supports the asynchrounous DMA copy and RAID engines available
++ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
++ */
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/async_tx.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++#include <linux/platform_device.h>
++#include <linux/memory.h>
++#include <linux/ioport.h>
++
++#include <asm/arch/adma.h>
++
++#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
++#define to_iop_adma_device(dev) \
++	container_of(dev, struct iop_adma_device, common)
++#define tx_to_iop_adma_slot(tx) \
++	container_of(tx, struct iop_adma_desc_slot, async_tx)
++
++/**
++ * iop_adma_free_slots - flags descriptor slots for reuse
++ * @slot: Slot to free
++ * Caller must hold &iop_chan->lock while calling this function
++ */
++static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
++{
++	int stride = slot->slots_per_op;
++
++	while (stride--) {
++		slot->slots_per_op = 0;
++		slot = list_entry(slot->slot_node.next,
++				struct iop_adma_desc_slot,
++				slot_node);
++	}
++}
++
++static dma_cookie_t
++iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
++	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
++{
++	BUG_ON(desc->async_tx.cookie < 0);
++	spin_lock_bh(&desc->async_tx.lock);
++	if (desc->async_tx.cookie > 0) {
++		cookie = desc->async_tx.cookie;
++		desc->async_tx.cookie = 0;
++
++		/* call the callback (must not sleep or submit new
++		 * operations to this channel)
++		 */
++		if (desc->async_tx.callback)
++			desc->async_tx.callback(
++				desc->async_tx.callback_param);
++
++		/* unmap dma addresses
++		 * (unmap_single vs unmap_page?)
++		 */
++		if (desc->group_head && desc->unmap_len) {
++			struct iop_adma_desc_slot *unmap = desc->group_head;
++			struct device *dev =
++				&iop_chan->device->pdev->dev;
++			u32 len = unmap->unmap_len;
++			u32 src_cnt = unmap->unmap_src_cnt;
++			dma_addr_t addr = iop_desc_get_dest_addr(unmap,
++				iop_chan);
++
++			dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
++			while (src_cnt--) {
++				addr = iop_desc_get_src_addr(unmap,
++							iop_chan,
++							src_cnt);
++				dma_unmap_page(dev, addr, len,
++					DMA_TO_DEVICE);
++			}
++			desc->group_head = NULL;
++		}
++	}
++
++	/* run dependent operations */
++	async_tx_run_dependencies(&desc->async_tx);
++	spin_unlock_bh(&desc->async_tx.lock);
++
++	return cookie;
++}
++
++static int
++iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
++	struct iop_adma_chan *iop_chan)
++{
++	/* the client is allowed to attach dependent operations
++	 * until 'ack' is set
++	 */
++	if (!desc->async_tx.ack)
++		return 0;
++
++	/* leave the last descriptor in the chain
++	 * so we can append to it
++	 */
++	if (desc->chain_node.next == &iop_chan->chain)
++		return 1;
++
++	dev_dbg(iop_chan->device->common.dev,
++		"\tfree slot: %d slots_per_op: %d\n",
++		desc->idx, desc->slots_per_op);
++
++	list_del(&desc->chain_node);
++	iop_adma_free_slots(desc);
++
++	return 0;
++}
++
++static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
++{
++	struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
++	dma_cookie_t cookie = 0;
++	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
++	int busy = iop_chan_is_busy(iop_chan);
++	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
++
++	dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++	/* free completed slots from the chain starting with
++	 * the oldest descriptor
++	 */
++	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
++					chain_node) {
++		pr_debug("\tcookie: %d slot: %d busy: %d "
++			"this_desc: %#x next_desc: %#x ack: %d\n",
++			iter->async_tx.cookie, iter->idx, busy, iter->phys,
++			iop_desc_get_next_desc(iter),
++			iter->async_tx.ack);
++		prefetch(_iter);
++		prefetch(&_iter->async_tx);
++
++		/* do not advance past the current descriptor loaded into the
++		 * hardware channel, subsequent descriptors are either in
++		 * process or have not been submitted
++		 */
++		if (seen_current)
++			break;
++
++		/* stop the search if we reach the current descriptor and the
++		 * channel is busy, or if it appears that the current descriptor
++		 * needs to be re-read (i.e. has been appended to)
++		 */
++		if (iter->phys == current_desc) {
++			BUG_ON(seen_current++);
++			if (busy || iop_desc_get_next_desc(iter))
++				break;
++		}
++
++		/* detect the start of a group transaction */
++		if (!slot_cnt && !slots_per_op) {
++			slot_cnt = iter->slot_cnt;
++			slots_per_op = iter->slots_per_op;
++			if (slot_cnt <= slots_per_op) {
++				slot_cnt = 0;
++				slots_per_op = 0;
++			}
++		}
++
++		if (slot_cnt) {
++			pr_debug("\tgroup++\n");
++			if (!grp_start)
++				grp_start = iter;
++			slot_cnt -= slots_per_op;
++		}
++
++		/* all the members of a group are complete */
++		if (slots_per_op != 0 && slot_cnt == 0) {
++			struct iop_adma_desc_slot *grp_iter, *_grp_iter;
++			int end_of_chain = 0;
++			pr_debug("\tgroup end\n");
++
++			/* collect the total results */
++			if (grp_start->xor_check_result) {
++				u32 zero_sum_result = 0;
++				slot_cnt = grp_start->slot_cnt;
++				grp_iter = grp_start;
++
++				list_for_each_entry_from(grp_iter,
++					&iop_chan->chain, chain_node) {
++					zero_sum_result |=
++					    iop_desc_get_zero_result(grp_iter);
++					    pr_debug("\titer%d result: %d\n",
++					    grp_iter->idx, zero_sum_result);
++					slot_cnt -= slots_per_op;
++					if (slot_cnt == 0)
++						break;
++				}
++				pr_debug("\tgrp_start->xor_check_result: %p\n",
++					grp_start->xor_check_result);
++				*grp_start->xor_check_result = zero_sum_result;
++			}
++
++			/* clean up the group */
++			slot_cnt = grp_start->slot_cnt;
++			grp_iter = grp_start;
++			list_for_each_entry_safe_from(grp_iter, _grp_iter,
++				&iop_chan->chain, chain_node) {
++				cookie = iop_adma_run_tx_complete_actions(
++					grp_iter, iop_chan, cookie);
++
++				slot_cnt -= slots_per_op;
++				end_of_chain = iop_adma_clean_slot(grp_iter,
++					iop_chan);
++
++				if (slot_cnt == 0 || end_of_chain)
++					break;
++			}
++
++			/* the group should be complete at this point */
++			BUG_ON(slot_cnt);
++
++			slots_per_op = 0;
++			grp_start = NULL;
++			if (end_of_chain)
++				break;
++			else
++				continue;
++		} else if (slots_per_op) /* wait for group completion */
++			continue;
++
++		/* write back zero sum results (single descriptor case) */
++		if (iter->xor_check_result && iter->async_tx.cookie)
++			*iter->xor_check_result =
++				iop_desc_get_zero_result(iter);
++
++		cookie = iop_adma_run_tx_complete_actions(
++					iter, iop_chan, cookie);
++
++		if (iop_adma_clean_slot(iter, iop_chan))
++			break;
++	}
++
++	BUG_ON(!seen_current);
++
++	iop_chan_idle(busy, iop_chan);
++
++	if (cookie > 0) {
++		iop_chan->completed_cookie = cookie;
++		pr_debug("\tcompleted cookie %d\n", cookie);
++	}
++}
++
++static void
++iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
++{
++	spin_lock_bh(&iop_chan->lock);
++	__iop_adma_slot_cleanup(iop_chan);
++	spin_unlock_bh(&iop_chan->lock);
++}
++
++static void iop_adma_tasklet(unsigned long data)
++{
++	struct iop_adma_chan *chan = (struct iop_adma_chan *) data;
++	__iop_adma_slot_cleanup(chan);
++}
++
++static struct iop_adma_desc_slot *
++iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
++			int slots_per_op)
++{
++	struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
++	struct list_head chain = LIST_HEAD_INIT(chain);
++	int slots_found, retry = 0;
++
++	/* start search from the last allocated descrtiptor
++	 * if a contiguous allocation can not be found start searching
++	 * from the beginning of the list
++	 */
++retry:
++	slots_found = 0;
++	if (retry == 0)
++		iter = iop_chan->last_used;
++	else
++		iter = list_entry(&iop_chan->all_slots,
++			struct iop_adma_desc_slot,
++			slot_node);
++
++	list_for_each_entry_safe_continue(
++		iter, _iter, &iop_chan->all_slots, slot_node) {
++		prefetch(_iter);
++		prefetch(&_iter->async_tx);
++		if (iter->slots_per_op) {
++			/* give up after finding the first busy slot
++			 * on the second pass through the list
++			 */
++			if (retry)
++				break;
++
++			slots_found = 0;
++			continue;
++		}
++
++		/* start the allocation if the slot is correctly aligned */
++		if (!slots_found++) {
++			if (iop_desc_is_aligned(iter, slots_per_op))
++				alloc_start = iter;
++			else {
++				slots_found = 0;
++				continue;
++			}
++		}
++
++		if (slots_found == num_slots) {
++			struct iop_adma_desc_slot *alloc_tail = NULL;
++			struct iop_adma_desc_slot *last_used = NULL;
++			iter = alloc_start;
++			while (num_slots) {
++				int i;
++				dev_dbg(iop_chan->device->common.dev,
++					"allocated slot: %d "
++					"(desc %p phys: %#x) slots_per_op %d\n",
++					iter->idx, iter->hw_desc, iter->phys,
++					slots_per_op);
++
++				/* pre-ack all but the last descriptor */
++				if (num_slots != slots_per_op)
++					iter->async_tx.ack = 1;
++				else
++					iter->async_tx.ack = 0;
++
++				list_add_tail(&iter->chain_node, &chain);
++				alloc_tail = iter;
++				iter->async_tx.cookie = 0;
++				iter->slot_cnt = num_slots;
++				iter->xor_check_result = NULL;
++				for (i = 0; i < slots_per_op; i++) {
++					iter->slots_per_op = slots_per_op - i;
++					last_used = iter;
++					iter = list_entry(iter->slot_node.next,
++						struct iop_adma_desc_slot,
++						slot_node);
++				}
++				num_slots -= slots_per_op;
++			}
++			alloc_tail->group_head = alloc_start;
++			alloc_tail->async_tx.cookie = -EBUSY;
++			list_splice(&chain, &alloc_tail->group_list);
++			iop_chan->last_used = last_used;
++			iop_desc_clear_next_desc(alloc_start);
++			iop_desc_clear_next_desc(alloc_tail);
++			return alloc_tail;
++		}
++	}
++	if (!retry++)
++		goto retry;
++
++	/* try to free some slots if the allocation fails */
++	tasklet_schedule(&iop_chan->irq_tasklet);
++
++	return NULL;
++}
++
++static dma_cookie_t
++iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
++	struct iop_adma_desc_slot *desc)
++{
++	dma_cookie_t cookie = iop_chan->common.cookie;
++	cookie++;
++	if (cookie < 0)
++		cookie = 1;
++	iop_chan->common.cookie = desc->async_tx.cookie = cookie;
++	return cookie;
++}
++
++static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
++{
++	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
++		iop_chan->pending);
++
++	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
++		iop_chan->pending = 0;
++		iop_chan_append(iop_chan);
++	}
++}
++
++static dma_cookie_t
++iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
++{
++	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
++	struct iop_adma_desc_slot *grp_start, *old_chain_tail;
++	int slot_cnt;
++	int slots_per_op;
++	dma_cookie_t cookie;
++
++	grp_start = sw_desc->group_head;
++	slot_cnt = grp_start->slot_cnt;
++	slots_per_op = grp_start->slots_per_op;
++
++	spin_lock_bh(&iop_chan->lock);
++	cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
++
++	old_chain_tail = list_entry(iop_chan->chain.prev,
++		struct iop_adma_desc_slot, chain_node);
++	list_splice_init(&sw_desc->group_list, &old_chain_tail->chain_node);
++
++	/* fix up the hardware chain */
++	iop_desc_set_next_desc(old_chain_tail, grp_start->phys);
++
++	/* 1/ don't add pre-chained descriptors
++	 * 2/ dummy read to flush next_desc write
++	 */
++	BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++	/* increment the pending count by the number of slots
++	 * memcpy operations have a 1:1 (slot:operation) relation
++	 * other operations are heavier and will pop the threshold
++	 * more often.
++	 */
++	iop_chan->pending += slot_cnt;
++	iop_adma_check_threshold(iop_chan);
++	spin_unlock_bh(&iop_chan->lock);
++
++	dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
++		__FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx);
++
++	return cookie;
++}
++
++static void
++iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++	int index)
++{
++	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
++
++	/* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */
++	iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr);
++}
++
++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
++
++/* returns the number of allocated descriptors */
++static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
++{
++	char *hw_desc;
++	int idx;
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *slot = NULL;
++	int init = iop_chan->slots_allocated ? 0 : 1;
++	struct iop_adma_platform_data *plat_data =
++		iop_chan->device->pdev->dev.platform_data;
++	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
++
++	/* Allocate descriptor slots */
++	do {
++		idx = iop_chan->slots_allocated;
++		if (idx == num_descs_in_pool)
++			break;
++
++		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
++		if (!slot) {
++			printk(KERN_INFO "IOP ADMA Channel only initialized"
++				" %d descriptor slots", idx);
++			break;
++		}
++		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
++		slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
++
++		dma_async_tx_descriptor_init(&slot->async_tx, chan);
++		slot->async_tx.tx_submit = iop_adma_tx_submit;
++		slot->async_tx.tx_set_dest = iop_adma_set_dest;
++		INIT_LIST_HEAD(&slot->chain_node);
++		INIT_LIST_HEAD(&slot->slot_node);
++		INIT_LIST_HEAD(&slot->group_list);
++		hw_desc = (char *) iop_chan->device->dma_desc_pool;
++		slot->phys = (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
++		slot->idx = idx;
++
++		spin_lock_bh(&iop_chan->lock);
++		iop_chan->slots_allocated++;
++		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
++		spin_unlock_bh(&iop_chan->lock);
++	} while (iop_chan->slots_allocated < num_descs_in_pool);
++
++	if (idx && !iop_chan->last_used)
++		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
++					struct iop_adma_desc_slot,
++					slot_node);
++
++	dev_dbg(iop_chan->device->common.dev,
++		"allocated %d descriptor slots last_used: %p\n",
++		iop_chan->slots_allocated, iop_chan->last_used);
++
++	/* initialize the channel and the chain with a null operation */
++	if (init) {
++		if (dma_has_cap(DMA_MEMCPY,
++			iop_chan->device->common.cap_mask))
++			iop_chan_start_null_memcpy(iop_chan);
++		else if (dma_has_cap(DMA_XOR,
++			iop_chan->device->common.cap_mask))
++			iop_chan_start_null_xor(iop_chan);
++		else
++			BUG();
++	}
++
++	return (idx > 0) ? idx : -ENOMEM;
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_interrupt(struct dma_chan *chan)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	int slot_cnt, slots_per_op;
++
++	dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		iop_desc_init_interrupt(grp_start, iop_chan);
++		grp_start->unmap_len = 0;
++	}
++	spin_unlock_bh(&iop_chan->lock);
++
++	return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++	int index)
++{
++	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++	iop_desc_set_memcpy_src_addr(grp_start, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	int slot_cnt, slots_per_op;
++
++	if (unlikely(!len))
++		return NULL;
++	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
++
++	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
++		__FUNCTION__, len);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		iop_desc_init_memcpy(grp_start, int_en);
++		iop_desc_set_byte_count(grp_start, iop_chan, len);
++		sw_desc->unmap_src_cnt = 1;
++		sw_desc->unmap_len = len;
++		sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src;
++	}
++	spin_unlock_bh(&iop_chan->lock);
++
++	return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len,
++	int int_en)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	int slot_cnt, slots_per_op;
++
++	if (unlikely(!len))
++		return NULL;
++	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
++
++	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
++		__FUNCTION__, len);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		iop_desc_init_memset(grp_start, int_en);
++		iop_desc_set_byte_count(grp_start, iop_chan, len);
++		iop_desc_set_block_fill_val(grp_start, value);
++		sw_desc->unmap_src_cnt = 1;
++		sw_desc->unmap_len = len;
++	}
++	spin_unlock_bh(&iop_chan->lock);
++
++	return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx,
++	int index)
++{
++	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++	iop_desc_set_xor_src_addr(grp_start, index, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len,
++	int int_en)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	int slot_cnt, slots_per_op;
++
++	if (unlikely(!len))
++		return NULL;
++	BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
++
++	dev_dbg(iop_chan->device->common.dev,
++		"%s src_cnt: %d len: %u int_en: %d\n",
++		__FUNCTION__, src_cnt, len, int_en);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		iop_desc_init_xor(grp_start, src_cnt, int_en);
++		iop_desc_set_byte_count(grp_start, iop_chan, len);
++		sw_desc->unmap_src_cnt = src_cnt;
++		sw_desc->unmap_len = len;
++		sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src;
++	}
++	spin_unlock_bh(&iop_chan->lock);
++
++	return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void
++iop_adma_xor_zero_sum_set_src(dma_addr_t addr,
++				struct dma_async_tx_descriptor *tx,
++				int index)
++{
++	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
++	struct iop_adma_desc_slot *grp_start = sw_desc->group_head;
++
++	iop_desc_set_zero_sum_src_addr(grp_start, index, addr);
++}
++
++static struct dma_async_tx_descriptor *
++iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt,
++	size_t len, u32 *result, int int_en)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	int slot_cnt, slots_per_op;
++
++	if (unlikely(!len))
++		return NULL;
++
++	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
++		__FUNCTION__, src_cnt, len);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
++		iop_desc_set_zero_sum_byte_count(grp_start, len);
++		grp_start->xor_check_result = result;
++		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
++			__FUNCTION__, grp_start->xor_check_result);
++		sw_desc->unmap_src_cnt = src_cnt;
++		sw_desc->unmap_len = len;
++		sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src;
++	}
++	spin_unlock_bh(&iop_chan->lock);
++
++	return sw_desc ? &sw_desc->async_tx : NULL;
++}
++
++static void iop_adma_dependency_added(struct dma_chan *chan)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	tasklet_schedule(&iop_chan->irq_tasklet);
++}
++
++static void iop_adma_free_chan_resources(struct dma_chan *chan)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	struct iop_adma_desc_slot *iter, *_iter;
++	int in_use_descs = 0;
++
++	iop_adma_slot_cleanup(iop_chan);
++
++	spin_lock_bh(&iop_chan->lock);
++	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
++					chain_node) {
++		in_use_descs++;
++		list_del(&iter->chain_node);
++	}
++	list_for_each_entry_safe_reverse(
++		iter, _iter, &iop_chan->all_slots, slot_node) {
++		list_del(&iter->slot_node);
++		kfree(iter);
++		iop_chan->slots_allocated--;
++	}
++	iop_chan->last_used = NULL;
++
++	dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
++		__FUNCTION__, iop_chan->slots_allocated);
++	spin_unlock_bh(&iop_chan->lock);
++
++	/* one is ok since we left it on there on purpose */
++	if (in_use_descs > 1)
++		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
++			in_use_descs - 1);
++}
++
++/**
++ * iop_adma_is_complete - poll the status of an ADMA transaction
++ * @chan: ADMA channel handle
++ * @cookie: ADMA transaction identifier
++ */
++static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
++					dma_cookie_t cookie,
++					dma_cookie_t *done,
++					dma_cookie_t *used)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++	dma_cookie_t last_used;
++	dma_cookie_t last_complete;
++	enum dma_status ret;
++
++	last_used = chan->cookie;
++	last_complete = iop_chan->completed_cookie;
++
++	if (done)
++		*done = last_complete;
++	if (used)
++		*used = last_used;
++
++	ret = dma_async_is_complete(cookie, last_complete, last_used);
++	if (ret == DMA_SUCCESS)
++		return ret;
++
++	iop_adma_slot_cleanup(iop_chan);
++
++	last_used = chan->cookie;
++	last_complete = iop_chan->completed_cookie;
++
++	if (done)
++		*done = last_complete;
++	if (used)
++		*used = last_used;
++
++	return dma_async_is_complete(cookie, last_complete, last_used);
++}
++
++static irqreturn_t iop_adma_eot_handler(int irq, void *data)
++{
++	struct iop_adma_chan *chan = data;
++
++	dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
++
++	tasklet_schedule(&chan->irq_tasklet);
++
++	iop_adma_device_clear_eot_status(chan);
++
++	return IRQ_HANDLED;
++}
++
++static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
++{
++	struct iop_adma_chan *chan = data;
++
++	dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__);
++
++	tasklet_schedule(&chan->irq_tasklet);
++
++	iop_adma_device_clear_eoc_status(chan);
++
++	return IRQ_HANDLED;
++}
++
++static irqreturn_t iop_adma_err_handler(int irq, void *data)
++{
++	struct iop_adma_chan *chan = data;
++	unsigned long status = iop_chan_get_status(chan);
++
++	dev_printk(KERN_ERR, chan->device->common.dev,
++		"error ( %s%s%s%s%s%s%s)\n",
++		iop_is_err_int_parity(status, chan) ? "int_parity " : "",
++		iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
++		iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
++		iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
++		iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
++		iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
++		iop_is_err_split_tx(status, chan) ? "split_tx " : "");
++
++	iop_adma_device_clear_err_status(chan);
++
++	BUG();
++
++	return IRQ_HANDLED;
++}
++
++static void iop_adma_issue_pending(struct dma_chan *chan)
++{
++	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
++
++	if (iop_chan->pending) {
++		iop_chan->pending = 0;
++		iop_chan_append(iop_chan);
++	}
++}
++
++/*
++ * Perform a transaction to verify the HW works.
++ */
++#define IOP_ADMA_TEST_SIZE 2000
++
++static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
++{
++	int i;
++	void *src, *dest;
++	dma_addr_t src_dma, dest_dma;
++	struct dma_chan *dma_chan;
++	dma_cookie_t cookie;
++	struct dma_async_tx_descriptor *tx;
++	int err = 0;
++	struct iop_adma_chan *iop_chan;
++
++	dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
++
++	src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
++	if (!src)
++		return -ENOMEM;
++	dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL);
++	if (!dest) {
++		kfree(src);
++		return -ENOMEM;
++	}
++
++	/* Fill in src buffer */
++	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
++		((u8 *) src)[i] = (u8)i;
++
++	memset(dest, 0, IOP_ADMA_TEST_SIZE);
++
++	/* Start copy, using first DMA channel */
++	dma_chan = container_of(device->common.channels.next,
++				struct dma_chan,
++				device_node);
++	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
++		err = -ENODEV;
++		goto out;
++	}
++
++	tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1);
++	dest_dma = dma_map_single(dma_chan->device->dev, dest,
++				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
++	iop_adma_set_dest(dest_dma, tx, 0);
++	src_dma = dma_map_single(dma_chan->device->dev, src,
++				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
++	iop_adma_memcpy_set_src(src_dma, tx, 0);
++
++	cookie = iop_adma_tx_submit(tx);
++	iop_adma_issue_pending(dma_chan);
++	async_tx_ack(tx);
++	msleep(1);
++
++	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
++			DMA_SUCCESS) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test copy timed out, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	iop_chan = to_iop_adma_chan(dma_chan);
++	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
++		IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
++	if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test copy failed compare, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++free_resources:
++	iop_adma_free_chan_resources(dma_chan);
++out:
++	kfree(src);
++	kfree(dest);
++	return err;
++}
++
++#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
++static int __devinit
++iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
++{
++	int i, src_idx;
++	struct page *dest;
++	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
++	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
++	dma_addr_t dma_addr, dest_dma;
++	struct dma_async_tx_descriptor *tx;
++	struct dma_chan *dma_chan;
++	dma_cookie_t cookie;
++	u8 cmp_byte = 0;
++	u32 cmp_word;
++	u32 zero_sum_result;
++	int err = 0;
++	struct iop_adma_chan *iop_chan;
++
++	dev_dbg(device->common.dev, "%s\n", __FUNCTION__);
++
++	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
++		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
++		if (!xor_srcs[src_idx])
++			while (src_idx--) {
++				__free_page(xor_srcs[src_idx]);
++				return -ENOMEM;
++			}
++	}
++
++	dest = alloc_page(GFP_KERNEL);
++	if (!dest)
++		while (src_idx--) {
++			__free_page(xor_srcs[src_idx]);
++			return -ENOMEM;
++		}
++
++	/* Fill in src buffers */
++	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
++		u8 *ptr = page_address(xor_srcs[src_idx]);
++		for (i = 0; i < PAGE_SIZE; i++)
++			ptr[i] = (1 << src_idx);
++	}
++
++	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
++		cmp_byte ^= (u8) (1 << src_idx);
++
++	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
++			(cmp_byte << 8) | cmp_byte;
++
++	memset(page_address(dest), 0, PAGE_SIZE);
++
++	dma_chan = container_of(device->common.channels.next,
++				struct dma_chan,
++				device_node);
++	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
++		err = -ENODEV;
++		goto out;
++	}
++
++	/* test xor */
++	tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST,
++				PAGE_SIZE, 1);
++	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
++				PAGE_SIZE, DMA_FROM_DEVICE);
++	iop_adma_set_dest(dest_dma, tx, 0);
++
++	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
++		dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0,
++			PAGE_SIZE, DMA_TO_DEVICE);
++		iop_adma_xor_set_src(dma_addr, tx, i);
++	}
++
++	cookie = iop_adma_tx_submit(tx);
++	iop_adma_issue_pending(dma_chan);
++	async_tx_ack(tx);
++	msleep(8);
++
++	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
++		DMA_SUCCESS) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test xor timed out, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	iop_chan = to_iop_adma_chan(dma_chan);
++	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
++		PAGE_SIZE, DMA_FROM_DEVICE);
++	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
++		u32 *ptr = page_address(dest);
++		if (ptr[i] != cmp_word) {
++			dev_printk(KERN_ERR, dma_chan->device->dev,
++				"Self-test xor failed compare, disabling\n");
++			err = -ENODEV;
++			goto free_resources;
++		}
++	}
++	dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
++		PAGE_SIZE, DMA_TO_DEVICE);
++
++	/* skip zero sum if the capability is not present */
++	if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
++		goto free_resources;
++
++	/* zero sum the sources with the destintation page */
++	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
++		zero_sum_srcs[i] = xor_srcs[i];
++	zero_sum_srcs[i] = dest;
++
++	zero_sum_result = 1;
++
++	tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
++		PAGE_SIZE, &zero_sum_result, 1);
++	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
++		dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
++			0, PAGE_SIZE, DMA_TO_DEVICE);
++		iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
++	}
++
++	cookie = iop_adma_tx_submit(tx);
++	iop_adma_issue_pending(dma_chan);
++	async_tx_ack(tx);
++	msleep(8);
++
++	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test zero sum timed out, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	if (zero_sum_result != 0) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test zero sum failed compare, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	/* test memset */
++	tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1);
++	dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
++			PAGE_SIZE, DMA_FROM_DEVICE);
++	iop_adma_set_dest(dma_addr, tx, 0);
++
++	cookie = iop_adma_tx_submit(tx);
++	iop_adma_issue_pending(dma_chan);
++	async_tx_ack(tx);
++	msleep(8);
++
++	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test memset timed out, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
++		u32 *ptr = page_address(dest);
++		if (ptr[i]) {
++			dev_printk(KERN_ERR, dma_chan->device->dev,
++				"Self-test memset failed compare, disabling\n");
++			err = -ENODEV;
++			goto free_resources;
++		}
++	}
++
++	/* test for non-zero parity sum */
++	zero_sum_result = 0;
++	tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1,
++		PAGE_SIZE, &zero_sum_result, 1);
++	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) {
++		dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i],
++			0, PAGE_SIZE, DMA_TO_DEVICE);
++		iop_adma_xor_zero_sum_set_src(dma_addr, tx, i);
++	}
++
++	cookie = iop_adma_tx_submit(tx);
++	iop_adma_issue_pending(dma_chan);
++	async_tx_ack(tx);
++	msleep(8);
++
++	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test non-zero sum timed out, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++	if (zero_sum_result != 1) {
++		dev_printk(KERN_ERR, dma_chan->device->dev,
++			"Self-test non-zero sum failed compare, disabling\n");
++		err = -ENODEV;
++		goto free_resources;
++	}
++
++free_resources:
++	iop_adma_free_chan_resources(dma_chan);
++out:
++	src_idx = IOP_ADMA_NUM_SRC_TEST;
++	while (src_idx--)
++		__free_page(xor_srcs[src_idx]);
++	__free_page(dest);
++	return err;
++}
++
++static int __devexit iop_adma_remove(struct platform_device *dev)
++{
++	struct iop_adma_device *device = platform_get_drvdata(dev);
++	struct dma_chan *chan, *_chan;
++	struct iop_adma_chan *iop_chan;
++	int i;
++	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
++
++	dma_async_device_unregister(&device->common);
++
++	for (i = 0; i < 3; i++) {
++		unsigned int irq;
++		irq = platform_get_irq(dev, i);
++		free_irq(irq, device);
++	}
++
++	dma_free_coherent(&dev->dev, plat_data->pool_size,
++			device->dma_desc_pool_virt, device->dma_desc_pool);
++
++	do {
++		struct resource *res;
++		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
++		release_mem_region(res->start, res->end - res->start);
++	} while (0);
++
++	list_for_each_entry_safe(chan, _chan, &device->common.channels,
++				device_node) {
++		iop_chan = to_iop_adma_chan(chan);
++		list_del(&chan->device_node);
++		kfree(iop_chan);
++	}
++	kfree(device);
++
++	return 0;
++}
++
++static int __devinit iop_adma_probe(struct platform_device *pdev)
++{
++	struct resource *res;
++	int ret = 0, i;
++	struct iop_adma_device *adev;
++	struct iop_adma_chan *iop_chan;
++	struct dma_device *dma_dev;
++	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
++
++	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
++	if (!res)
++		return -ENODEV;
++
++	if (!devm_request_mem_region(&pdev->dev, res->start,
++				res->end - res->start, pdev->name))
++		return -EBUSY;
++
++	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
++	if (!adev)
++		return -ENOMEM;
++	dma_dev = &adev->common;
++
++	/* allocate coherent memory for hardware descriptors
++	 * note: writecombine gives slightly better performance, but
++	 * requires that we explicitly flush the writes
++	 */
++	if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
++					plat_data->pool_size,
++					&adev->dma_desc_pool,
++					GFP_KERNEL)) == NULL) {
++		ret = -ENOMEM;
++		goto err_free_adev;
++	}
++
++	dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
++		__FUNCTION__, adev->dma_desc_pool_virt,
++		(void *) adev->dma_desc_pool);
++
++	adev->id = plat_data->hw_id;
++
++	/* discover transaction capabilites from the platform data */
++	dma_dev->cap_mask = plat_data->cap_mask;
++
++	adev->pdev = pdev;
++	platform_set_drvdata(pdev, adev);
++
++	INIT_LIST_HEAD(&dma_dev->channels);
++
++	/* set base routines */
++	dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
++	dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
++	dma_dev->device_is_tx_complete = iop_adma_is_complete;
++	dma_dev->device_issue_pending = iop_adma_issue_pending;
++	dma_dev->device_dependency_added = iop_adma_dependency_added;
++	dma_dev->dev = &pdev->dev;
++
++	/* set prep routines based on capability */
++	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
++		dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
++	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
++		dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
++	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
++		dma_dev->max_xor = iop_adma_get_max_xor();
++		dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
++	}
++	if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
++		dma_dev->device_prep_dma_zero_sum =
++			iop_adma_prep_dma_zero_sum;
++	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
++		dma_dev->device_prep_dma_interrupt =
++			iop_adma_prep_dma_interrupt;
++
++	iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
++	if (!iop_chan) {
++		ret = -ENOMEM;
++		goto err_free_dma;
++	}
++	iop_chan->device = adev;
++
++	iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
++					res->end - res->start);
++	if (!iop_chan->mmr_base) {
++		ret = -ENOMEM;
++		goto err_free_iop_chan;
++	}
++	tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
++		iop_chan);
++
++	/* clear errors before enabling interrupts */
++	iop_adma_device_clear_err_status(iop_chan);
++
++	for (i = 0; i < 3; i++) {
++		irq_handler_t handler[] = { iop_adma_eot_handler,
++					iop_adma_eoc_handler,
++					iop_adma_err_handler };
++		int irq = platform_get_irq(pdev, i);
++		if (irq < 0) {
++			ret = -ENXIO;
++			goto err_free_iop_chan;
++		} else {
++			ret = devm_request_irq(&pdev->dev, irq,
++					handler[i], 0, pdev->name, iop_chan);
++			if (ret)
++				goto err_free_iop_chan;
++		}
++	}
++
++	spin_lock_init(&iop_chan->lock);
++	init_timer(&iop_chan->cleanup_watchdog);
++	iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan;
++	iop_chan->cleanup_watchdog.function = iop_adma_tasklet;
++	INIT_LIST_HEAD(&iop_chan->chain);
++	INIT_LIST_HEAD(&iop_chan->all_slots);
++	INIT_RCU_HEAD(&iop_chan->common.rcu);
++	iop_chan->common.device = dma_dev;
++	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
++
++	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
++		ret = iop_adma_memcpy_self_test(adev);
++		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
++		if (ret)
++			goto err_free_iop_chan;
++	}
++
++	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
++		dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
++		ret = iop_adma_xor_zero_sum_self_test(adev);
++		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
++		if (ret)
++			goto err_free_iop_chan;
++	}
++
++	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
++	  "( %s%s%s%s%s%s%s%s%s%s)\n",
++	  dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
++	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
++	  dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
++	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
++	  dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
++	  dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
++	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
++	  dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
++	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
++	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
++
++	dma_async_device_register(dma_dev);
++	goto out;
++
++ err_free_iop_chan:
++	kfree(iop_chan);
++ err_free_dma:
++	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
++			adev->dma_desc_pool_virt, adev->dma_desc_pool);
++ err_free_adev:
++	kfree(adev);
++ out:
++	return ret;
++}
++
++static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
++{
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	dma_cookie_t cookie;
++	int slot_cnt, slots_per_op;
++
++	dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++
++		list_splice_init(&sw_desc->group_list, &iop_chan->chain);
++		sw_desc->async_tx.ack = 1;
++		iop_desc_init_memcpy(grp_start, 0);
++		iop_desc_set_byte_count(grp_start, iop_chan, 0);
++		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
++		iop_desc_set_memcpy_src_addr(grp_start, 0);
++
++		cookie = iop_chan->common.cookie;
++		cookie++;
++		if (cookie <= 1)
++			cookie = 2;
++
++		/* initialize the completed cookie to be less than
++		 * the most recently used cookie
++		 */
++		iop_chan->completed_cookie = cookie - 1;
++		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
++
++		/* channel should not be busy */
++		BUG_ON(iop_chan_is_busy(iop_chan));
++
++		/* clear any prior error-status bits */
++		iop_adma_device_clear_err_status(iop_chan);
++
++		/* disable operation */
++		iop_chan_disable(iop_chan);
++
++		/* set the descriptor address */
++		iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
++
++		/* 1/ don't add pre-chained descriptors
++		 * 2/ dummy read to flush next_desc write
++		 */
++		BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++		/* run the descriptor */
++		iop_chan_enable(iop_chan);
++	} else
++		dev_printk(KERN_ERR, iop_chan->device->common.dev,
++			 "failed to allocate null descriptor\n");
++	spin_unlock_bh(&iop_chan->lock);
++}
++
++static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
++{
++	struct iop_adma_desc_slot *sw_desc, *grp_start;
++	dma_cookie_t cookie;
++	int slot_cnt, slots_per_op;
++
++	dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__);
++
++	spin_lock_bh(&iop_chan->lock);
++	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
++	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
++	if (sw_desc) {
++		grp_start = sw_desc->group_head;
++		list_splice_init(&sw_desc->group_list, &iop_chan->chain);
++		sw_desc->async_tx.ack = 1;
++		iop_desc_init_null_xor(grp_start, 2, 0);
++		iop_desc_set_byte_count(grp_start, iop_chan, 0);
++		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
++		iop_desc_set_xor_src_addr(grp_start, 0, 0);
++		iop_desc_set_xor_src_addr(grp_start, 1, 0);
++
++		cookie = iop_chan->common.cookie;
++		cookie++;
++		if (cookie <= 1)
++			cookie = 2;
++
++		/* initialize the completed cookie to be less than
++		 * the most recently used cookie
++		 */
++		iop_chan->completed_cookie = cookie - 1;
++		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
++
++		/* channel should not be busy */
++		BUG_ON(iop_chan_is_busy(iop_chan));
++
++		/* clear any prior error-status bits */
++		iop_adma_device_clear_err_status(iop_chan);
++
++		/* disable operation */
++		iop_chan_disable(iop_chan);
++
++		/* set the descriptor address */
++		iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
++
++		/* 1/ don't add pre-chained descriptors
++		 * 2/ dummy read to flush next_desc write
++		 */
++		BUG_ON(iop_desc_get_next_desc(sw_desc));
++
++		/* run the descriptor */
++		iop_chan_enable(iop_chan);
++	} else
++		dev_printk(KERN_ERR, iop_chan->device->common.dev,
++			"failed to allocate null descriptor\n");
++	spin_unlock_bh(&iop_chan->lock);
++}
++
++static struct platform_driver iop_adma_driver = {
++	.probe		= iop_adma_probe,
++	.remove		= iop_adma_remove,
++	.driver		= {
++		.owner	= THIS_MODULE,
++		.name	= "iop-adma",
++	},
++};
++
++static int __init iop_adma_init (void)
++{
++	/* it's currently unsafe to unload this module */
++	/* if forced, worst case is that rmmod hangs */
++	__unsafe(THIS_MODULE);
++
++	return platform_driver_register(&iop_adma_driver);
++}
++
++static void __exit iop_adma_exit (void)
++{
++	platform_driver_unregister(&iop_adma_driver);
++	return;
++}
++
++module_init(iop_adma_init);
++module_exit(iop_adma_exit);
++
++MODULE_AUTHOR("Intel Corporation");
++MODULE_DESCRIPTION("IOP ADMA Engine Driver");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/edac/edac_mc.c linux-2.6.22-try2/drivers/edac/edac_mc.c
+--- linux-2.6.22-570/drivers/edac/edac_mc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/edac/edac_mc.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1906,6 +1906,7 @@
+ 
+ static int edac_kernel_thread(void *arg)
+ {
++	set_freezable();
+ 	while (!kthread_should_stop()) {
+ 		do_edac_check();
+ 
+diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.c linux-2.6.22-try2/drivers/firmware/dcdbas.c
+--- linux-2.6.22-570/drivers/firmware/dcdbas.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/firmware/dcdbas.c	2007-12-19 15:29:22.000000000 -0500
+@@ -149,8 +149,9 @@
+ 	return count;
+ }
+ 
+-static ssize_t smi_data_read(struct kobject *kobj, char *buf, loff_t pos,
+-			     size_t count)
++static ssize_t smi_data_read(struct kobject *kobj,
++			     struct bin_attribute *bin_attr,
++			     char *buf, loff_t pos, size_t count)
+ {
+ 	size_t max_read;
+ 	ssize_t ret;
+@@ -170,8 +171,9 @@
+ 	return ret;
+ }
+ 
+-static ssize_t smi_data_write(struct kobject *kobj, char *buf, loff_t pos,
+-			      size_t count)
++static ssize_t smi_data_write(struct kobject *kobj,
++			      struct bin_attribute *bin_attr,
++			      char *buf, loff_t pos, size_t count)
+ {
+ 	ssize_t ret;
+ 
+diff -Nurb linux-2.6.22-570/drivers/firmware/dcdbas.h linux-2.6.22-try2/drivers/firmware/dcdbas.h
+--- linux-2.6.22-570/drivers/firmware/dcdbas.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/firmware/dcdbas.h	2007-12-19 15:29:22.000000000 -0500
+@@ -67,8 +67,7 @@
+ #define DCDBAS_BIN_ATTR_RW(_name) \
+ struct bin_attribute bin_attr_##_name = { \
+ 	.attr =  { .name = __stringify(_name), \
+-		   .mode = 0600, \
+-		   .owner = THIS_MODULE }, \
++		   .mode = 0600 }, \
+ 	.read =  _name##_read, \
+ 	.write = _name##_write, \
+ }
+diff -Nurb linux-2.6.22-570/drivers/firmware/dell_rbu.c linux-2.6.22-try2/drivers/firmware/dell_rbu.c
+--- linux-2.6.22-570/drivers/firmware/dell_rbu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/firmware/dell_rbu.c	2007-12-19 15:29:22.000000000 -0500
+@@ -543,8 +543,9 @@
+ 	return ret_count;
+ }
+ 
+-static ssize_t read_rbu_data(struct kobject *kobj, char *buffer,
+-	loff_t pos, size_t count)
++static ssize_t read_rbu_data(struct kobject *kobj,
++			     struct bin_attribute *bin_attr,
++			     char *buffer, loff_t pos, size_t count)
+ {
+ 	ssize_t ret_count = 0;
+ 
+@@ -591,8 +592,9 @@
+ 	spin_unlock(&rbu_data.lock);
+ }
+ 
+-static ssize_t read_rbu_image_type(struct kobject *kobj, char *buffer,
+-	loff_t pos, size_t count)
++static ssize_t read_rbu_image_type(struct kobject *kobj,
++				   struct bin_attribute *bin_attr,
++				   char *buffer, loff_t pos, size_t count)
+ {
+ 	int size = 0;
+ 	if (!pos)
+@@ -600,8 +602,9 @@
+ 	return size;
+ }
+ 
+-static ssize_t write_rbu_image_type(struct kobject *kobj, char *buffer,
+-	loff_t pos, size_t count)
++static ssize_t write_rbu_image_type(struct kobject *kobj,
++				    struct bin_attribute *bin_attr,
++				    char *buffer, loff_t pos, size_t count)
+ {
+ 	int rc = count;
+ 	int req_firm_rc = 0;
+@@ -660,8 +663,9 @@
+ 	return rc;
+ }
+ 
+-static ssize_t read_rbu_packet_size(struct kobject *kobj, char *buffer,
+-	loff_t pos, size_t count)
++static ssize_t read_rbu_packet_size(struct kobject *kobj,
++				    struct bin_attribute *bin_attr,
++				    char *buffer, loff_t pos, size_t count)
+ {
+ 	int size = 0;
+ 	if (!pos) {
+@@ -672,8 +676,9 @@
+ 	return size;
+ }
+ 
+-static ssize_t write_rbu_packet_size(struct kobject *kobj, char *buffer,
+-	loff_t pos, size_t count)
++static ssize_t write_rbu_packet_size(struct kobject *kobj,
++				     struct bin_attribute *bin_attr,
++				     char *buffer, loff_t pos, size_t count)
+ {
+ 	unsigned long temp;
+ 	spin_lock(&rbu_data.lock);
+@@ -687,18 +692,18 @@
+ }
+ 
+ static struct bin_attribute rbu_data_attr = {
+-	.attr = {.name = "data",.owner = THIS_MODULE,.mode = 0444},
++	.attr = {.name = "data", .mode = 0444},
+ 	.read = read_rbu_data,
+ };
+ 
+ static struct bin_attribute rbu_image_type_attr = {
+-	.attr = {.name = "image_type",.owner = THIS_MODULE,.mode = 0644},
++	.attr = {.name = "image_type", .mode = 0644},
+ 	.read = read_rbu_image_type,
+ 	.write = write_rbu_image_type,
+ };
+ 
+ static struct bin_attribute rbu_packet_size_attr = {
+-	.attr = {.name = "packet_size",.owner = THIS_MODULE,.mode = 0644},
++	.attr = {.name = "packet_size", .mode = 0644},
+ 	.read = read_rbu_packet_size,
+ 	.write = write_rbu_packet_size,
+ };
+diff -Nurb linux-2.6.22-570/drivers/firmware/edd.c linux-2.6.22-try2/drivers/firmware/edd.c
+--- linux-2.6.22-570/drivers/firmware/edd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/firmware/edd.c	2007-12-19 15:29:22.000000000 -0500
+@@ -74,7 +74,7 @@
+ 
+ #define EDD_DEVICE_ATTR(_name,_mode,_show,_test) \
+ struct edd_attribute edd_attr_##_name = { 	\
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE },	\
++	.attr = {.name = __stringify(_name), .mode = _mode },	\
+ 	.show	= _show,				\
+ 	.test	= _test,				\
+ };
+diff -Nurb linux-2.6.22-570/drivers/firmware/efivars.c linux-2.6.22-try2/drivers/firmware/efivars.c
+--- linux-2.6.22-570/drivers/firmware/efivars.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/firmware/efivars.c	2007-12-19 15:29:22.000000000 -0500
+@@ -131,21 +131,21 @@
+ 
+ #define EFI_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute efi_attr_##_name = { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++	.attr = {.name = __stringify(_name), .mode = _mode}, \
+ 	.show = _show, \
+ 	.store = _store, \
+ };
+ 
+ #define EFIVAR_ATTR(_name, _mode, _show, _store) \
+ struct efivar_attribute efivar_attr_##_name = { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++	.attr = {.name = __stringify(_name), .mode = _mode}, \
+ 	.show = _show, \
+ 	.store = _store, \
+ };
+ 
+ #define VAR_SUBSYS_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute var_subsys_attr_##_name = { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++	.attr = {.name = __stringify(_name), .mode = _mode}, \
+ 	.show = _show, \
+ 	.store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/i2c/chips/eeprom.c linux-2.6.22-try2/drivers/i2c/chips/eeprom.c
+--- linux-2.6.22-570/drivers/i2c/chips/eeprom.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/i2c/chips/eeprom.c	2007-12-19 15:29:22.000000000 -0500
+@@ -110,7 +110,8 @@
+ 	mutex_unlock(&data->update_lock);
+ }
+ 
+-static ssize_t eeprom_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr,
++			   char *buf, loff_t off, size_t count)
+ {
+ 	struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
+ 	struct eeprom_data *data = i2c_get_clientdata(client);
+@@ -150,7 +151,6 @@
+ 	.attr = {
+ 		.name = "eeprom",
+ 		.mode = S_IRUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = EEPROM_SIZE,
+ 	.read = eeprom_read,
+diff -Nurb linux-2.6.22-570/drivers/i2c/chips/max6875.c linux-2.6.22-try2/drivers/i2c/chips/max6875.c
+--- linux-2.6.22-570/drivers/i2c/chips/max6875.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/i2c/chips/max6875.c	2007-12-19 15:29:22.000000000 -0500
+@@ -125,8 +125,9 @@
+ 	mutex_unlock(&data->update_lock);
+ }
+ 
+-static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off,
+-			    size_t count)
++static ssize_t max6875_read(struct kobject *kobj,
++			    struct bin_attribute *bin_attr,
++			    char *buf, loff_t off, size_t count)
+ {
+ 	struct i2c_client *client = kobj_to_i2c_client(kobj);
+ 	struct max6875_data *data = i2c_get_clientdata(client);
+@@ -152,7 +153,6 @@
+ 	.attr = {
+ 		.name = "eeprom",
+ 		.mode = S_IRUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = USER_EEPROM_SIZE,
+ 	.read = max6875_read,
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c linux-2.6.22-try2/drivers/ieee1394/ieee1394_core.c
+--- linux-2.6.22-570/drivers/ieee1394/ieee1394_core.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/ieee1394/ieee1394_core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/bitops.h>
+ #include <linux/kdev_t.h>
++#include <linux/freezer.h>
+ #include <linux/suspend.h>
+ #include <linux/kthread.h>
+ #include <linux/preempt.h>
+@@ -1133,8 +1134,6 @@
+ 	struct list_head tmp;
+ 	int may_schedule;
+ 
+-	current->flags |= PF_NOFREEZE;
+-
+ 	while (!kthread_should_stop()) {
+ 
+ 		INIT_LIST_HEAD(&tmp);
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/nodemgr.c linux-2.6.22-try2/drivers/ieee1394/nodemgr.c
+--- linux-2.6.22-570/drivers/ieee1394/nodemgr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/ieee1394/nodemgr.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1669,6 +1669,7 @@
+ 	unsigned int g, generation = 0;
+ 	int i, reset_cycles = 0;
+ 
++	set_freezable();
+ 	/* Setup our device-model entries */
+ 	nodemgr_create_host_dev_files(host);
+ 
+diff -Nurb linux-2.6.22-570/drivers/ieee1394/sbp2.c linux-2.6.22-try2/drivers/ieee1394/sbp2.c
+--- linux-2.6.22-570/drivers/ieee1394/sbp2.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/ieee1394/sbp2.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1505,69 +1505,6 @@
+ 	}
+ }
+ 
+-static void sbp2_prep_command_orb_no_sg(struct sbp2_command_orb *orb,
+-					struct sbp2_fwhost_info *hi,
+-					struct sbp2_command_info *cmd,
+-					struct scatterlist *sgpnt,
+-					u32 orb_direction,
+-					unsigned int scsi_request_bufflen,
+-					void *scsi_request_buffer,
+-					enum dma_data_direction dma_dir)
+-{
+-	cmd->dma_dir = dma_dir;
+-	cmd->dma_size = scsi_request_bufflen;
+-	cmd->dma_type = CMD_DMA_SINGLE;
+-	cmd->cmd_dma = dma_map_single(hi->host->device.parent,
+-				      scsi_request_buffer,
+-				      cmd->dma_size, cmd->dma_dir);
+-	orb->data_descriptor_hi = ORB_SET_NODE_ID(hi->host->node_id);
+-	orb->misc |= ORB_SET_DIRECTION(orb_direction);
+-
+-	/* handle case where we get a command w/o s/g enabled
+-	 * (but check for transfers larger than 64K) */
+-	if (scsi_request_bufflen <= SBP2_MAX_SG_ELEMENT_LENGTH) {
+-
+-		orb->data_descriptor_lo = cmd->cmd_dma;
+-		orb->misc |= ORB_SET_DATA_SIZE(scsi_request_bufflen);
+-
+-	} else {
+-		/* The buffer is too large. Turn this into page tables. */
+-
+-		struct sbp2_unrestricted_page_table *sg_element =
+-						&cmd->scatter_gather_element[0];
+-		u32 sg_count, sg_len;
+-		dma_addr_t sg_addr;
+-
+-		orb->data_descriptor_lo = cmd->sge_dma;
+-		orb->misc |= ORB_SET_PAGE_TABLE_PRESENT(0x1);
+-
+-		/* fill out our SBP-2 page tables; split up the large buffer */
+-		sg_count = 0;
+-		sg_len = scsi_request_bufflen;
+-		sg_addr = cmd->cmd_dma;
+-		while (sg_len) {
+-			sg_element[sg_count].segment_base_lo = sg_addr;
+-			if (sg_len > SBP2_MAX_SG_ELEMENT_LENGTH) {
+-				sg_element[sg_count].length_segment_base_hi =
+-					PAGE_TABLE_SET_SEGMENT_LENGTH(SBP2_MAX_SG_ELEMENT_LENGTH);
+-				sg_addr += SBP2_MAX_SG_ELEMENT_LENGTH;
+-				sg_len -= SBP2_MAX_SG_ELEMENT_LENGTH;
+-			} else {
+-				sg_element[sg_count].length_segment_base_hi =
+-					PAGE_TABLE_SET_SEGMENT_LENGTH(sg_len);
+-				sg_len = 0;
+-			}
+-			sg_count++;
+-		}
+-
+-		orb->misc |= ORB_SET_DATA_SIZE(sg_count);
+-
+-		sbp2util_cpu_to_be32_buffer(sg_element,
+-				(sizeof(struct sbp2_unrestricted_page_table)) *
+-				sg_count);
+-	}
+-}
+-
+ static void sbp2_create_command_orb(struct sbp2_lu *lu,
+ 				    struct sbp2_command_info *cmd,
+ 				    unchar *scsi_cmd,
+@@ -1611,13 +1548,9 @@
+ 		orb->data_descriptor_hi = 0x0;
+ 		orb->data_descriptor_lo = 0x0;
+ 		orb->misc |= ORB_SET_DIRECTION(1);
+-	} else if (scsi_use_sg)
++	} else
+ 		sbp2_prep_command_orb_sg(orb, hi, cmd, scsi_use_sg, sgpnt,
+ 					 orb_direction, dma_dir);
+-	else
+-		sbp2_prep_command_orb_no_sg(orb, hi, cmd, sgpnt, orb_direction,
+-					    scsi_request_bufflen,
+-					    scsi_request_buffer, dma_dir);
+ 
+ 	sbp2util_cpu_to_be32_buffer(orb, sizeof(*orb));
+ 
+@@ -1706,15 +1639,15 @@
+ 			     void (*done)(struct scsi_cmnd *))
+ {
+ 	unchar *scsi_cmd = (unchar *)SCpnt->cmnd;
+-	unsigned int request_bufflen = SCpnt->request_bufflen;
++	unsigned int request_bufflen = scsi_bufflen(SCpnt);
+ 	struct sbp2_command_info *cmd;
+ 
+ 	cmd = sbp2util_allocate_command_orb(lu, SCpnt, done);
+ 	if (!cmd)
+ 		return -EIO;
+ 
+-	sbp2_create_command_orb(lu, cmd, scsi_cmd, SCpnt->use_sg,
+-				request_bufflen, SCpnt->request_buffer,
++	sbp2_create_command_orb(lu, cmd, scsi_cmd, scsi_sg_count(SCpnt),
++				request_bufflen, scsi_sglist(SCpnt),
+ 				SCpnt->sc_data_direction);
+ 	sbp2_link_orb_command(lu, cmd);
+ 
+diff -Nurb linux-2.6.22-570/drivers/infiniband/core/sysfs.c linux-2.6.22-try2/drivers/infiniband/core/sysfs.c
+--- linux-2.6.22-570/drivers/infiniband/core/sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/core/sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -479,7 +479,6 @@
+ 
+ 		element->attr.attr.name  = element->name;
+ 		element->attr.attr.mode  = S_IRUGO;
+-		element->attr.attr.owner = THIS_MODULE;
+ 		element->attr.show       = show;
+ 		element->index		 = i;
+ 
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c linux-2.6.22-try2/drivers/infiniband/ulp/iser/iscsi_iser.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/iser/iscsi_iser.c	2007-12-19 15:29:23.000000000 -0500
+@@ -134,19 +134,9 @@
+ {
+ 	struct iscsi_iser_conn     *iser_conn  = ctask->conn->dd_data;
+ 	struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
+-	struct scsi_cmnd  *sc = ctask->sc;
+ 
+ 	iser_ctask->command_sent = 0;
+ 	iser_ctask->iser_conn    = iser_conn;
+-
+-	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+-		BUG_ON(ctask->total_length == 0);
+-
+-		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
+-			   ctask->itt, ctask->total_length, ctask->imm_count,
+-			   ctask->unsol_count);
+-	}
+-
+ 	iser_ctask_rdma_init(iser_ctask);
+ }
+ 
+@@ -219,6 +209,14 @@
+ 	struct iscsi_iser_cmd_task *iser_ctask = ctask->dd_data;
+ 	int error = 0;
+ 
++	if (ctask->sc->sc_data_direction == DMA_TO_DEVICE) {
++		BUG_ON(scsi_bufflen(ctask->sc) == 0);
++
++		debug_scsi("cmd [itt %x total %d imm %d unsol_data %d\n",
++			   ctask->itt, scsi_bufflen(ctask->sc),
++			   ctask->imm_count, ctask->unsol_count);
++	}
++
+ 	debug_scsi("ctask deq [cid %d itt 0x%x]\n",
+ 		   conn->id, ctask->itt);
+ 
+@@ -375,6 +373,7 @@
+ static struct iscsi_cls_session *
+ iscsi_iser_session_create(struct iscsi_transport *iscsit,
+ 			 struct scsi_transport_template *scsit,
++			 uint16_t cmds_max, uint16_t qdepth,
+ 			  uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+ 	struct iscsi_cls_session *cls_session;
+@@ -386,7 +385,13 @@
+ 	struct iscsi_iser_cmd_task *iser_ctask;
+ 	struct iser_desc *desc;
+ 
++	/*
++	 * we do not support setting can_queue cmd_per_lun from userspace yet
++	 * because we preallocate so many resources
++	 */
+ 	cls_session = iscsi_session_setup(iscsit, scsit,
++					  ISCSI_DEF_XMIT_CMDS_MAX,
++					  ISCSI_MAX_CMD_PER_LUN,
+ 					  sizeof(struct iscsi_iser_cmd_task),
+ 					  sizeof(struct iser_desc),
+ 					  initial_cmdsn, &hn);
+@@ -545,7 +550,7 @@
+ static struct scsi_host_template iscsi_iser_sht = {
+ 	.name                   = "iSCSI Initiator over iSER, v." DRV_VER,
+ 	.queuecommand           = iscsi_queuecommand,
+-	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
++	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
+ 	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+ 	.max_sectors		= 1024,
+ 	.cmd_per_lun            = ISCSI_MAX_CMD_PER_LUN,
+@@ -574,8 +579,12 @@
+ 				  ISCSI_EXP_STATSN |
+ 				  ISCSI_PERSISTENT_PORT |
+ 				  ISCSI_PERSISTENT_ADDRESS |
+-				  ISCSI_TARGET_NAME |
+-				  ISCSI_TPGT,
++				  ISCSI_TARGET_NAME | ISCSI_TPGT |
++				  ISCSI_USERNAME | ISCSI_PASSWORD |
++				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN,
++	.host_param_mask	= ISCSI_HOST_HWADDRESS |
++				  ISCSI_HOST_NETDEV_NAME |
++				  ISCSI_HOST_INITIATOR_NAME,
+ 	.host_template          = &iscsi_iser_sht,
+ 	.conndata_size		= sizeof(struct iscsi_conn),
+ 	.max_lun                = ISCSI_ISER_MAX_LUN,
+@@ -592,6 +601,9 @@
+ 	.get_session_param	= iscsi_session_get_param,
+ 	.start_conn             = iscsi_iser_conn_start,
+ 	.stop_conn              = iscsi_conn_stop,
++	/* iscsi host params */
++	.get_host_param		= iscsi_host_get_param,
++	.set_host_param		= iscsi_host_set_param,
+ 	/* IO */
+ 	.send_pdu		= iscsi_conn_send_pdu,
+ 	.get_stats		= iscsi_iser_conn_get_stats,
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h linux-2.6.22-try2/drivers/infiniband/ulp/iser/iscsi_iser.h
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iscsi_iser.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/iser/iscsi_iser.h	2007-12-19 15:29:23.000000000 -0500
+@@ -98,7 +98,7 @@
+ #define ISER_MAX_TX_MISC_PDUS		6 /* NOOP_OUT(2), TEXT(1),         *
+ 					   * SCSI_TMFUNC(2), LOGOUT(1) */
+ 
+-#define ISER_QP_MAX_RECV_DTOS		(ISCSI_XMIT_CMDS_MAX + \
++#define ISER_QP_MAX_RECV_DTOS		(ISCSI_DEF_XMIT_CMDS_MAX + \
+ 					ISER_MAX_RX_MISC_PDUS    +  \
+ 					ISER_MAX_TX_MISC_PDUS)
+ 
+@@ -110,7 +110,7 @@
+ 
+ #define ISER_INFLIGHT_DATAOUTS		8
+ 
+-#define ISER_QP_MAX_REQ_DTOS		(ISCSI_XMIT_CMDS_MAX *    \
++#define ISER_QP_MAX_REQ_DTOS		(ISCSI_DEF_XMIT_CMDS_MAX *    \
+ 					(1 + ISER_INFLIGHT_DATAOUTS) + \
+ 					ISER_MAX_TX_MISC_PDUS        + \
+ 					ISER_MAX_RX_MISC_PDUS)
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c linux-2.6.22-try2/drivers/infiniband/ulp/iser/iser_initiator.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_initiator.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/iser/iser_initiator.c	2007-12-19 15:29:23.000000000 -0500
+@@ -351,18 +351,12 @@
+ 	else
+ 		data_buf = &iser_ctask->data[ISER_DIR_OUT];
+ 
+-	if (sc->use_sg) { /* using a scatter list */
+-		data_buf->buf  = sc->request_buffer;
+-		data_buf->size = sc->use_sg;
+-	} else if (sc->request_bufflen) {
+-		/* using a single buffer - convert it into one entry SG */
+-		sg_init_one(&data_buf->sg_single,
+-			    sc->request_buffer, sc->request_bufflen);
+-		data_buf->buf   = &data_buf->sg_single;
+-		data_buf->size  = 1;
++	if (scsi_sg_count(sc)) { /* using a scatter list */
++		data_buf->buf  = scsi_sglist(sc);
++		data_buf->size = scsi_sg_count(sc);
+ 	}
+ 
+-	data_buf->data_len = sc->request_bufflen;
++	data_buf->data_len = scsi_bufflen(sc);
+ 
+ 	if (hdr->flags & ISCSI_FLAG_CMD_READ) {
+ 		err = iser_prepare_read_cmd(ctask, edtl);
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c linux-2.6.22-try2/drivers/infiniband/ulp/iser/iser_verbs.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/iser/iser_verbs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/iser/iser_verbs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -155,8 +155,8 @@
+ 	params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
+ 	/* make the pool size twice the max number of SCSI commands *
+ 	 * the ML is expected to queue, watermark for unmap at 50%  */
+-	params.pool_size	 = ISCSI_XMIT_CMDS_MAX * 2;
+-	params.dirty_watermark	 = ISCSI_XMIT_CMDS_MAX;
++	params.pool_size	 = ISCSI_DEF_XMIT_CMDS_MAX * 2;
++	params.dirty_watermark	 = ISCSI_DEF_XMIT_CMDS_MAX;
+ 	params.cache		 = 0;
+ 	params.flush_function	 = NULL;
+ 	params.access		 = (IB_ACCESS_LOCAL_WRITE  |
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c linux-2.6.22-try2/drivers/infiniband/ulp/srp/ib_srp.c
+--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/srp/ib_srp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -455,10 +455,7 @@
+ 			   struct srp_target_port *target,
+ 			   struct srp_request *req)
+ {
+-	struct scatterlist *scat;
+-	int nents;
+-
+-	if (!scmnd->request_buffer ||
++	if (!scsi_sglist(scmnd) ||
+ 	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
+ 	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
+ 		return;
+@@ -468,20 +465,8 @@
+ 		req->fmr = NULL;
+ 	}
+ 
+-	/*
+-	 * This handling of non-SG commands can be killed when the
+-	 * SCSI midlayer no longer generates non-SG commands.
+-	 */
+-	if (likely(scmnd->use_sg)) {
+-		nents = scmnd->use_sg;
+-		scat  = scmnd->request_buffer;
+-	} else {
+-		nents = 1;
+-		scat  = &req->fake_sg;
+-	}
+-
+-	ib_dma_unmap_sg(target->srp_host->dev->dev, scat, nents,
+-			scmnd->sc_data_direction);
++	ib_dma_unmap_sg(target->srp_host->dev->dev, scsi_sglist(scmnd),
++			scsi_sg_count(scmnd), scmnd->sc_data_direction);
+ }
+ 
+ static void srp_remove_req(struct srp_target_port *target, struct srp_request *req)
+@@ -595,6 +580,7 @@
+ 	int ret;
+ 	struct srp_device *dev = target->srp_host->dev;
+ 	struct ib_device *ibdev = dev->dev;
++	struct scatterlist *sg;
+ 
+ 	if (!dev->fmr_pool)
+ 		return -ENODEV;
+@@ -604,16 +590,16 @@
+ 		return -EINVAL;
+ 
+ 	len = page_cnt = 0;
+-	for (i = 0; i < sg_cnt; ++i) {
+-		unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++	scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
++		unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ 
+-		if (ib_sg_dma_address(ibdev, &scat[i]) & ~dev->fmr_page_mask) {
++		if (ib_sg_dma_address(ibdev, sg) & ~dev->fmr_page_mask) {
+ 			if (i > 0)
+ 				return -EINVAL;
+ 			else
+ 				++page_cnt;
+ 		}
+-		if ((ib_sg_dma_address(ibdev, &scat[i]) + dma_len) &
++		if ((ib_sg_dma_address(ibdev, sg) + dma_len) &
+ 		    ~dev->fmr_page_mask) {
+ 			if (i < sg_cnt - 1)
+ 				return -EINVAL;
+@@ -633,12 +619,12 @@
+ 		return -ENOMEM;
+ 
+ 	page_cnt = 0;
+-	for (i = 0; i < sg_cnt; ++i) {
+-		unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++	scsi_for_each_sg(req->scmnd, sg, sg_cnt, i) {
++		unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ 
+ 		for (j = 0; j < dma_len; j += dev->fmr_page_size)
+ 			dma_pages[page_cnt++] =
+-				(ib_sg_dma_address(ibdev, &scat[i]) &
++				(ib_sg_dma_address(ibdev, sg) &
+ 				 dev->fmr_page_mask) + j;
+ 	}
+ 
+@@ -673,7 +659,7 @@
+ 	struct srp_device *dev;
+ 	struct ib_device *ibdev;
+ 
+-	if (!scmnd->request_buffer || scmnd->sc_data_direction == DMA_NONE)
++	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
+ 		return sizeof (struct srp_cmd);
+ 
+ 	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
+@@ -683,18 +669,8 @@
+ 		return -EINVAL;
+ 	}
+ 
+-	/*
+-	 * This handling of non-SG commands can be killed when the
+-	 * SCSI midlayer no longer generates non-SG commands.
+-	 */
+-	if (likely(scmnd->use_sg)) {
+-		nents = scmnd->use_sg;
+-		scat  = scmnd->request_buffer;
+-	} else {
+-		nents = 1;
+-		scat  = &req->fake_sg;
+-		sg_init_one(scat, scmnd->request_buffer, scmnd->request_bufflen);
+-	}
++	nents = scsi_sg_count(scmnd);
++	scat  = scsi_sglist(scmnd);
+ 
+ 	dev = target->srp_host->dev;
+ 	ibdev = dev->dev;
+@@ -724,6 +700,7 @@
+ 		 * descriptor.
+ 		 */
+ 		struct srp_indirect_buf *buf = (void *) cmd->add_data;
++		struct scatterlist *sg;
+ 		u32 datalen = 0;
+ 		int i;
+ 
+@@ -732,11 +709,11 @@
+ 			sizeof (struct srp_indirect_buf) +
+ 			count * sizeof (struct srp_direct_buf);
+ 
+-		for (i = 0; i < count; ++i) {
+-			unsigned int dma_len = ib_sg_dma_len(ibdev, &scat[i]);
++		scsi_for_each_sg(scmnd, sg, count, i) {
++			unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
+ 
+ 			buf->desc_list[i].va  =
+-				cpu_to_be64(ib_sg_dma_address(ibdev, &scat[i]));
++				cpu_to_be64(ib_sg_dma_address(ibdev, sg));
+ 			buf->desc_list[i].key =
+ 				cpu_to_be32(dev->mr->rkey);
+ 			buf->desc_list[i].len = cpu_to_be32(dma_len);
+@@ -802,9 +779,9 @@
+ 		}
+ 
+ 		if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER))
+-			scmnd->resid = be32_to_cpu(rsp->data_out_res_cnt);
++			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
+ 		else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
+-			scmnd->resid = be32_to_cpu(rsp->data_in_res_cnt);
++			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
+ 
+ 		if (!req->tsk_mgmt) {
+ 			scmnd->host_scribble = (void *) -1L;
+diff -Nurb linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h linux-2.6.22-try2/drivers/infiniband/ulp/srp/ib_srp.h
+--- linux-2.6.22-570/drivers/infiniband/ulp/srp/ib_srp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/infiniband/ulp/srp/ib_srp.h	2007-12-19 15:29:23.000000000 -0500
+@@ -106,11 +106,6 @@
+ 	struct srp_iu	       *cmd;
+ 	struct srp_iu	       *tsk_mgmt;
+ 	struct ib_pool_fmr     *fmr;
+-	/*
+-	 * Fake scatterlist used when scmnd->use_sg==0.  Can be killed
+-	 * when the SCSI midlayer no longer generates non-SG commands.
+-	 */
+-	struct scatterlist	fake_sg;
+ 	struct completion	done;
+ 	short			index;
+ 	u8			cmd_done;
+diff -Nurb linux-2.6.22-570/drivers/input/gameport/gameport.c linux-2.6.22-try2/drivers/input/gameport/gameport.c
+--- linux-2.6.22-570/drivers/input/gameport/gameport.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/input/gameport/gameport.c	2007-12-19 15:29:24.000000000 -0500
+@@ -445,6 +445,7 @@
+ 
+ static int gameport_thread(void *nothing)
+ {
++	set_freezable();
+ 	do {
+ 		gameport_handle_event();
+ 		wait_event_interruptible(gameport_wait,
+diff -Nurb linux-2.6.22-570/drivers/input/mouse/psmouse.h linux-2.6.22-try2/drivers/input/mouse/psmouse.h
+--- linux-2.6.22-570/drivers/input/mouse/psmouse.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/input/mouse/psmouse.h	2007-12-19 15:29:22.000000000 -0500
+@@ -118,7 +118,6 @@
+ 		.attr	= {							\
+ 			.name	= __stringify(_name),				\
+ 			.mode	= _mode,					\
+-			.owner	= THIS_MODULE,					\
+ 		},								\
+ 		.show	= psmouse_attr_show_helper,				\
+ 		.store	= psmouse_attr_set_helper,				\
+diff -Nurb linux-2.6.22-570/drivers/input/serio/serio.c linux-2.6.22-try2/drivers/input/serio/serio.c
+--- linux-2.6.22-570/drivers/input/serio/serio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/input/serio/serio.c	2007-12-19 15:29:24.000000000 -0500
+@@ -384,6 +384,7 @@
+ 
+ static int serio_thread(void *nothing)
+ {
++	set_freezable();
+ 	do {
+ 		serio_handle_event();
+ 		wait_event_interruptible(serio_wait,
+diff -Nurb linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c linux-2.6.22-try2/drivers/input/touchscreen/ucb1400_ts.c
+--- linux-2.6.22-570/drivers/input/touchscreen/ucb1400_ts.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/input/touchscreen/ucb1400_ts.c	2007-12-19 15:29:24.000000000 -0500
+@@ -292,6 +292,7 @@
+ 
+ 	sched_setscheduler(tsk, SCHED_FIFO, &param);
+ 
++	set_freezable();
+ 	while (!kthread_should_stop()) {
+ 		unsigned int x, y, p;
+ 		long timeout;
+diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_adt746x.c linux-2.6.22-try2/drivers/macintosh/therm_adt746x.c
+--- linux-2.6.22-570/drivers/macintosh/therm_adt746x.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/macintosh/therm_adt746x.c	2007-12-19 15:29:24.000000000 -0500
+@@ -335,6 +335,7 @@
+ {
+ 	struct thermostat* th = arg;
+ 
++	set_freezable();
+ 	while(!kthread_should_stop()) {
+ 		try_to_freeze();
+ 		msleep_interruptible(2000);
+diff -Nurb linux-2.6.22-570/drivers/macintosh/therm_pm72.c linux-2.6.22-try2/drivers/macintosh/therm_pm72.c
+--- linux-2.6.22-570/drivers/macintosh/therm_pm72.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/macintosh/therm_pm72.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1770,7 +1770,8 @@
+ 				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ 				NULL };
+ 
+-	return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
++	return call_usermodehelper(critical_overtemp_path,
++				   argv, envp, UMH_WAIT_EXEC);
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-570/drivers/macintosh/windfarm_core.c linux-2.6.22-try2/drivers/macintosh/windfarm_core.c
+--- linux-2.6.22-570/drivers/macintosh/windfarm_core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/macintosh/windfarm_core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -80,7 +80,8 @@
+ 				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+ 				NULL };
+ 
+-	return call_usermodehelper(critical_overtemp_path, argv, envp, 0);
++	return call_usermodehelper(critical_overtemp_path,
++				   argv, envp, UMH_WAIT_EXEC);
+ }
+ EXPORT_SYMBOL_GPL(wf_critical_overtemp);
+ 
+@@ -92,6 +93,7 @@
+ 
+ 	DBG("wf: thread started\n");
+ 
++	set_freezable();
+ 	while(!kthread_should_stop()) {
+ 		if (time_after_eq(jiffies, next)) {
+ 			wf_notify(WF_EVENT_TICK, NULL);
+@@ -212,7 +214,6 @@
+ 	list_add(&new_ct->link, &wf_controls);
+ 
+ 	new_ct->attr.attr.name = new_ct->name;
+-	new_ct->attr.attr.owner = THIS_MODULE;
+ 	new_ct->attr.attr.mode = 0644;
+ 	new_ct->attr.show = wf_show_control;
+ 	new_ct->attr.store = wf_store_control;
+@@ -325,7 +326,6 @@
+ 	list_add(&new_sr->link, &wf_sensors);
+ 
+ 	new_sr->attr.attr.name = new_sr->name;
+-	new_sr->attr.attr.owner = THIS_MODULE;
+ 	new_sr->attr.attr.mode = 0444;
+ 	new_sr->attr.show = wf_show_sensor;
+ 	new_sr->attr.store = NULL;
+diff -Nurb linux-2.6.22-570/drivers/md/Kconfig linux-2.6.22-try2/drivers/md/Kconfig
+--- linux-2.6.22-570/drivers/md/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/md/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -109,6 +109,8 @@
+ config MD_RAID456
+ 	tristate "RAID-4/RAID-5/RAID-6 mode"
+ 	depends on BLK_DEV_MD
++	select ASYNC_MEMCPY
++	select ASYNC_XOR
+ 	---help---
+ 	  A RAID-5 set of N drives with a capacity of C MB per drive provides
+ 	  the capacity of C * (N - 1) MB, and protects against a failure
+@@ -271,6 +273,11 @@
+ 
+ 	If unsure, say N.
+ 
++config DM_NETLINK
++	bool "DM netlink events (EXPERIMENTAL)"
++	depends on BLK_DEV_DM && EXPERIMENTAL
++	---help---
++	Generate netlink events for DM events.
+ endmenu
+ 
+ endif
+diff -Nurb linux-2.6.22-570/drivers/md/Makefile linux-2.6.22-try2/drivers/md/Makefile
+--- linux-2.6.22-570/drivers/md/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/md/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -17,7 +17,7 @@
+ hostprogs-y	:= mktables
+ 
+ # Note: link order is important.  All raid personalities
+-# and xor.o must come before md.o, as they each initialise 
++# and must come before md.o, as they each initialise 
+ # themselves, and md.o may use the personalities when it 
+ # auto-initialised.
+ 
+@@ -25,7 +25,7 @@
+ obj-$(CONFIG_MD_RAID0)		+= raid0.o
+ obj-$(CONFIG_MD_RAID1)		+= raid1.o
+ obj-$(CONFIG_MD_RAID10)		+= raid10.o
+-obj-$(CONFIG_MD_RAID456)	+= raid456.o xor.o
++obj-$(CONFIG_MD_RAID456)	+= raid456.o
+ obj-$(CONFIG_MD_MULTIPATH)	+= multipath.o
+ obj-$(CONFIG_MD_FAULTY)		+= faulty.o
+ obj-$(CONFIG_BLK_DEV_MD)	+= md-mod.o
+@@ -46,6 +46,10 @@
+ altivec_flags := -maltivec -mabi=altivec
+ endif
+ 
++ifeq ($(CONFIG_DM_NETLINK),y)
++dm-mod-objs			+= dm-netlink.o
++endif
++
+ targets += raid6int1.c
+ $(obj)/raid6int1.c:   UNROLL := 1
+ $(obj)/raid6int1.c:   $(src)/raid6int.uc $(src)/unroll.pl FORCE
+diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.c linux-2.6.22-try2/drivers/md/dm-netlink.c
+--- linux-2.6.22-570/drivers/md/dm-netlink.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/md/dm-netlink.c	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,103 @@
++/*
++ * Device Mapper Netlink Support (dm-netlink)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright IBM Corporation, 2005, 2006
++ * 	Author: Mike Anderson <andmike@us.ibm.com>
++ */
++#include <linux/module.h>
++#include <linux/mempool.h>
++#include <linux/time.h>
++#include <linux/jiffies.h>
++#include <linux/security.h>
++#include <net/sock.h>
++#include <net/netlink.h>
++
++#include "dm.h"
++#include "dm-netlink.h"
++
++#define DM_MSG_PREFIX "netlink"
++
++#define DM_EVENT_SKB_SIZE NLMSG_GOODSIZE
++
++struct dm_event_cache {
++	struct kmem_cache *cache;
++	unsigned skb_size;
++};
++
++static struct dm_event_cache _dme_cache;
++
++static int dme_cache_init(struct dm_event_cache *dc, unsigned skb_size)
++{
++	dc->skb_size = skb_size;
++
++	dc->cache = KMEM_CACHE(dm_event, 0);
++	if (!dc->cache)
++		return -ENOMEM;
++
++	return 0;
++}
++
++static void dme_cache_destroy(struct dm_event_cache *dc)
++{
++	kmem_cache_destroy(dc->cache);
++}
++
++static void dme_cache_event_put(struct dm_event *evt)
++{
++	struct dm_event_cache *dc = evt->cdata;
++
++	kmem_cache_free(dc->cache, evt);
++}
++
++static struct dm_event *dme_cache_event_get(struct dm_event_cache *dc,
++					    struct mapped_device *md)
++{
++	struct dm_event *evt;
++
++	evt = kmem_cache_alloc(dc->cache, GFP_ATOMIC);
++	if (!evt)
++		return NULL;
++
++	INIT_LIST_HEAD(&evt->elist);
++	evt->cdata = dc;
++	evt->md = md;
++	evt->skb = alloc_skb(dc->skb_size, GFP_ATOMIC);
++	if (!evt->skb)
++		goto cache_err;
++
++	return evt;
++
++cache_err:
++	dme_cache_event_put(evt);
++	return NULL;
++}
++
++int __init dm_netlink_init(void)
++{
++	int r;
++
++	r = dme_cache_init(&_dme_cache, DM_EVENT_SKB_SIZE);
++	if (!r)
++		DMINFO("version 1.0.0 loaded");
++
++	return r;
++}
++
++void dm_netlink_exit(void)
++{
++	dme_cache_destroy(&_dme_cache);
++}
+diff -Nurb linux-2.6.22-570/drivers/md/dm-netlink.h linux-2.6.22-try2/drivers/md/dm-netlink.h
+--- linux-2.6.22-570/drivers/md/dm-netlink.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/md/dm-netlink.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,50 @@
++/*
++ * Device Mapper Netlink Support
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright IBM Corporation, 2005, 2006
++ * 	Author: Mike Anderson <andmike@us.ibm.com>
++ */
++#ifndef DM_NETLINK_H
++#define DM_NETLINK_H
++
++struct dm_event_cache;
++struct mapped_device;
++struct dm_event {
++	struct dm_event_cache *cdata;
++	struct mapped_device *md;
++	struct sk_buff *skb;
++	struct list_head elist;
++};
++
++#ifdef CONFIG_DM_NETLINK
++
++int dm_netlink_init(void);
++void dm_netlink_exit(void);
++
++#else	/* CONFIG_DM_NETLINK */
++
++static inline int __init dm_netlink_init(void)
++{
++	return 0;
++}
++static inline void dm_netlink_exit(void)
++{
++}
++
++#endif	/* CONFIG_DM_NETLINK */
++
++#endif	/* DM_NETLINK_H */
+diff -Nurb linux-2.6.22-570/drivers/md/dm.c linux-2.6.22-try2/drivers/md/dm.c
+--- linux-2.6.22-570/drivers/md/dm.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/drivers/md/dm.c	2007-12-19 15:29:23.000000000 -0500
+@@ -7,6 +7,7 @@
+ 
+ #include "dm.h"
+ #include "dm-bio-list.h"
++#include "dm-netlink.h"
+ 
+ #include <linux/init.h>
+ #include <linux/module.h>
+@@ -180,6 +181,7 @@
+ 	dm_linear_init,
+ 	dm_stripe_init,
+ 	dm_interface_init,
++	dm_netlink_init,
+ };
+ 
+ void (*_exits[])(void) = {
+@@ -188,6 +190,7 @@
+ 	dm_linear_exit,
+ 	dm_stripe_exit,
+ 	dm_interface_exit,
++	dm_netlink_exit,
+ };
+ 
+ static int __init dm_init(void)
+diff -Nurb linux-2.6.22-570/drivers/md/md.c linux-2.6.22-try2/drivers/md/md.c
+--- linux-2.6.22-570/drivers/md/md.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/md/md.c	2007-12-19 15:29:24.000000000 -0500
+@@ -4642,7 +4642,6 @@
+ 	 * many dirty RAID5 blocks.
+ 	 */
+ 
+-	current->flags |= PF_NOFREEZE;
+ 	allow_signal(SIGKILL);
+ 	while (!kthread_should_stop()) {
+ 
+@@ -5814,7 +5813,7 @@
+ 	}
+ }
+ 
+-module_init(md_init)
++subsys_initcall(md_init);
+ module_exit(md_exit)
+ 
+ static int get_ro(char *buffer, struct kernel_param *kp)
+diff -Nurb linux-2.6.22-570/drivers/md/raid5.c linux-2.6.22-try2/drivers/md/raid5.c
+--- linux-2.6.22-570/drivers/md/raid5.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/md/raid5.c	2007-12-19 15:29:23.000000000 -0500
+@@ -52,6 +52,7 @@
+ #include "raid6.h"
+ 
+ #include <linux/raid/bitmap.h>
++#include <linux/async_tx.h>
+ 
+ /*
+  * Stripe cache
+@@ -80,7 +81,6 @@
+ /*
+  * The following can be used to debug the driver
+  */
+-#define RAID5_DEBUG	0
+ #define RAID5_PARANOIA	1
+ #if RAID5_PARANOIA && defined(CONFIG_SMP)
+ # define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock)
+@@ -88,8 +88,7 @@
+ # define CHECK_DEVLOCK()
+ #endif
+ 
+-#define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x)))
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ #define inline
+ #define __inline__
+ #endif
+@@ -125,6 +124,7 @@
+ 			}
+ 			md_wakeup_thread(conf->mddev->thread);
+ 		} else {
++			BUG_ON(sh->ops.pending);
+ 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ 				atomic_dec(&conf->preread_active_stripes);
+ 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+@@ -152,7 +152,8 @@
+ 
+ static inline void remove_hash(struct stripe_head *sh)
+ {
+-	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
++	pr_debug("remove_hash(), stripe %llu\n",
++		(unsigned long long)sh->sector);
+ 
+ 	hlist_del_init(&sh->hash);
+ }
+@@ -161,7 +162,8 @@
+ {
+ 	struct hlist_head *hp = stripe_hash(conf, sh->sector);
+ 
+-	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
++	pr_debug("insert_hash(), stripe %llu\n",
++		(unsigned long long)sh->sector);
+ 
+ 	CHECK_DEVLOCK();
+ 	hlist_add_head(&sh->hash, hp);
+@@ -224,9 +226,10 @@
+ 
+ 	BUG_ON(atomic_read(&sh->count) != 0);
+ 	BUG_ON(test_bit(STRIPE_HANDLE, &sh->state));
++	BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete);
+ 	
+ 	CHECK_DEVLOCK();
+-	PRINTK("init_stripe called, stripe %llu\n", 
++	pr_debug("init_stripe called, stripe %llu\n",
+ 		(unsigned long long)sh->sector);
+ 
+ 	remove_hash(sh);
+@@ -240,11 +243,11 @@
+ 	for (i = sh->disks; i--; ) {
+ 		struct r5dev *dev = &sh->dev[i];
+ 
+-		if (dev->toread || dev->towrite || dev->written ||
++		if (dev->toread || dev->read || dev->towrite || dev->written ||
+ 		    test_bit(R5_LOCKED, &dev->flags)) {
+-			printk("sector=%llx i=%d %p %p %p %d\n",
++			printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n",
+ 			       (unsigned long long)sh->sector, i, dev->toread,
+-			       dev->towrite, dev->written,
++			       dev->read, dev->towrite, dev->written,
+ 			       test_bit(R5_LOCKED, &dev->flags));
+ 			BUG();
+ 		}
+@@ -260,11 +263,11 @@
+ 	struct hlist_node *hn;
+ 
+ 	CHECK_DEVLOCK();
+-	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
++	pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector);
+ 	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
+ 		if (sh->sector == sector && sh->disks == disks)
+ 			return sh;
+-	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
++	pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector);
+ 	return NULL;
+ }
+ 
+@@ -276,7 +279,7 @@
+ {
+ 	struct stripe_head *sh;
+ 
+-	PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector);
++	pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
+ 
+ 	spin_lock_irq(&conf->device_lock);
+ 
+@@ -324,179 +327,762 @@
+ 	return sh;
+ }
+ 
+-static int grow_one_stripe(raid5_conf_t *conf)
++/* test_and_ack_op() ensures that we only dequeue an operation once */
++#define test_and_ack_op(op, pend) \
++do {							\
++	if (test_bit(op, &sh->ops.pending) &&		\
++		!test_bit(op, &sh->ops.complete)) {	\
++		if (test_and_set_bit(op, &sh->ops.ack)) \
++			clear_bit(op, &pend);		\
++		else					\
++			ack++;				\
++	} else						\
++		clear_bit(op, &pend);			\
++} while (0)
++
++/* find new work to run, do not resubmit work that is already
++ * in flight
++ */
++static unsigned long get_stripe_work(struct stripe_head *sh)
++{
++	unsigned long pending;
++	int ack = 0;
++
++	pending = sh->ops.pending;
++
++	test_and_ack_op(STRIPE_OP_BIOFILL, pending);
++	test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending);
++	test_and_ack_op(STRIPE_OP_PREXOR, pending);
++	test_and_ack_op(STRIPE_OP_BIODRAIN, pending);
++	test_and_ack_op(STRIPE_OP_POSTXOR, pending);
++	test_and_ack_op(STRIPE_OP_CHECK, pending);
++	if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending))
++		ack++;
++
++	sh->ops.count -= ack;
++	BUG_ON(sh->ops.count < 0);
++
++	return pending;
++}
++
++static int
++raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error);
++static int
++raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error);
++
++static void ops_run_io(struct stripe_head *sh)
+ {
+-	struct stripe_head *sh;
+-	sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
+-	if (!sh)
+-		return 0;
+-	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
+-	sh->raid_conf = conf;
+-	spin_lock_init(&sh->lock);
++	raid5_conf_t *conf = sh->raid_conf;
++	int i, disks = sh->disks;
+ 
+-	if (grow_buffers(sh, conf->raid_disks)) {
+-		shrink_buffers(sh, conf->raid_disks);
+-		kmem_cache_free(conf->slab_cache, sh);
+-		return 0;
+-	}
+-	sh->disks = conf->raid_disks;
+-	/* we just created an active stripe so... */
+-	atomic_set(&sh->count, 1);
+-	atomic_inc(&conf->active_stripes);
+-	INIT_LIST_HEAD(&sh->lru);
+-	release_stripe(sh);
+-	return 1;
+-}
++	might_sleep();
+ 
+-static int grow_stripes(raid5_conf_t *conf, int num)
+-{
+-	struct kmem_cache *sc;
+-	int devs = conf->raid_disks;
++	for (i = disks; i--; ) {
++		int rw;
++		struct bio *bi;
++		mdk_rdev_t *rdev;
++		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
++			rw = WRITE;
++		else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
++			rw = READ;
++		else
++			continue;
+ 
+-	sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
+-	sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
+-	conf->active_name = 0;
+-	sc = kmem_cache_create(conf->cache_name[conf->active_name],
+-			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
+-			       0, 0, NULL, NULL);
+-	if (!sc)
+-		return 1;
+-	conf->slab_cache = sc;
+-	conf->pool_size = devs;
+-	while (num--)
+-		if (!grow_one_stripe(conf))
+-			return 1;
+-	return 0;
++		bi = &sh->dev[i].req;
++
++		bi->bi_rw = rw;
++		if (rw == WRITE)
++			bi->bi_end_io = raid5_end_write_request;
++		else
++			bi->bi_end_io = raid5_end_read_request;
++
++		rcu_read_lock();
++		rdev = rcu_dereference(conf->disks[i].rdev);
++		if (rdev && test_bit(Faulty, &rdev->flags))
++			rdev = NULL;
++		if (rdev)
++			atomic_inc(&rdev->nr_pending);
++		rcu_read_unlock();
++
++		if (rdev) {
++			if (test_bit(STRIPE_SYNCING, &sh->state) ||
++				test_bit(STRIPE_EXPAND_SOURCE, &sh->state) ||
++				test_bit(STRIPE_EXPAND_READY, &sh->state))
++				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
++
++			bi->bi_bdev = rdev->bdev;
++			pr_debug("%s: for %llu schedule op %ld on disc %d\n",
++				__FUNCTION__, (unsigned long long)sh->sector,
++				bi->bi_rw, i);
++			atomic_inc(&sh->count);
++			bi->bi_sector = sh->sector + rdev->data_offset;
++			bi->bi_flags = 1 << BIO_UPTODATE;
++			bi->bi_vcnt = 1;
++			bi->bi_max_vecs = 1;
++			bi->bi_idx = 0;
++			bi->bi_io_vec = &sh->dev[i].vec;
++			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
++			bi->bi_io_vec[0].bv_offset = 0;
++			bi->bi_size = STRIPE_SIZE;
++			bi->bi_next = NULL;
++			if (rw == WRITE &&
++			    test_bit(R5_ReWrite, &sh->dev[i].flags))
++				atomic_add(STRIPE_SECTORS,
++					&rdev->corrected_errors);
++			generic_make_request(bi);
++		} else {
++			if (rw == WRITE)
++				set_bit(STRIPE_DEGRADED, &sh->state);
++			pr_debug("skip op %ld on disc %d for sector %llu\n",
++				bi->bi_rw, i, (unsigned long long)sh->sector);
++			clear_bit(R5_LOCKED, &sh->dev[i].flags);
++			set_bit(STRIPE_HANDLE, &sh->state);
++		}
++	}
+ }
+ 
+-#ifdef CONFIG_MD_RAID5_RESHAPE
+-static int resize_stripes(raid5_conf_t *conf, int newsize)
++static struct dma_async_tx_descriptor *
++async_copy_data(int frombio, struct bio *bio, struct page *page,
++	sector_t sector, struct dma_async_tx_descriptor *tx)
+ {
+-	/* Make all the stripes able to hold 'newsize' devices.
+-	 * New slots in each stripe get 'page' set to a new page.
+-	 *
+-	 * This happens in stages:
+-	 * 1/ create a new kmem_cache and allocate the required number of
+-	 *    stripe_heads.
+-	 * 2/ gather all the old stripe_heads and tranfer the pages across
+-	 *    to the new stripe_heads.  This will have the side effect of
+-	 *    freezing the array as once all stripe_heads have been collected,
+-	 *    no IO will be possible.  Old stripe heads are freed once their
+-	 *    pages have been transferred over, and the old kmem_cache is
+-	 *    freed when all stripes are done.
+-	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
+-	 *    we simple return a failre status - no need to clean anything up.
+-	 * 4/ allocate new pages for the new slots in the new stripe_heads.
+-	 *    If this fails, we don't bother trying the shrink the
+-	 *    stripe_heads down again, we just leave them as they are.
+-	 *    As each stripe_head is processed the new one is released into
+-	 *    active service.
+-	 *
+-	 * Once step2 is started, we cannot afford to wait for a write,
+-	 * so we use GFP_NOIO allocations.
+-	 */
+-	struct stripe_head *osh, *nsh;
+-	LIST_HEAD(newstripes);
+-	struct disk_info *ndisks;
+-	int err = 0;
+-	struct kmem_cache *sc;
++	struct bio_vec *bvl;
++	struct page *bio_page;
+ 	int i;
++	int page_offset;
+ 
+-	if (newsize <= conf->pool_size)
+-		return 0; /* never bother to shrink */
++	if (bio->bi_sector >= sector)
++		page_offset = (signed)(bio->bi_sector - sector) * 512;
++	else
++		page_offset = (signed)(sector - bio->bi_sector) * -512;
++	bio_for_each_segment(bvl, bio, i) {
++		int len = bio_iovec_idx(bio, i)->bv_len;
++		int clen;
++		int b_offset = 0;
+ 
+-	md_allow_write(conf->mddev);
++		if (page_offset < 0) {
++			b_offset = -page_offset;
++			page_offset += b_offset;
++			len -= b_offset;
++		}
+ 
+-	/* Step 1 */
+-	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
+-			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
+-			       0, 0, NULL, NULL);
+-	if (!sc)
+-		return -ENOMEM;
++		if (len > 0 && page_offset + len > STRIPE_SIZE)
++			clen = STRIPE_SIZE - page_offset;
++		else
++			clen = len;
+ 
+-	for (i = conf->max_nr_stripes; i; i--) {
+-		nsh = kmem_cache_alloc(sc, GFP_KERNEL);
+-		if (!nsh)
++		if (clen > 0) {
++			b_offset += bio_iovec_idx(bio, i)->bv_offset;
++			bio_page = bio_iovec_idx(bio, i)->bv_page;
++			if (frombio)
++				tx = async_memcpy(page, bio_page, page_offset,
++					b_offset, clen,
++					ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC,
++					tx, NULL, NULL);
++			else
++				tx = async_memcpy(bio_page, page, b_offset,
++					page_offset, clen,
++					ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST,
++					tx, NULL, NULL);
++		}
++		if (clen < len) /* hit end of page */
+ 			break;
++		page_offset +=  len;
++	}
+ 
+-		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
++	return tx;
++}
+ 
+-		nsh->raid_conf = conf;
+-		spin_lock_init(&nsh->lock);
++static void ops_complete_biofill(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++	struct bio *return_bi = NULL, *bi;
++	raid5_conf_t *conf = sh->raid_conf;
++	int i, more_to_read = 0;
+ 
+-		list_add(&nsh->lru, &newstripes);
+-	}
+-	if (i) {
+-		/* didn't get enough, give up */
+-		while (!list_empty(&newstripes)) {
+-			nsh = list_entry(newstripes.next, struct stripe_head, lru);
+-			list_del(&nsh->lru);
+-			kmem_cache_free(sc, nsh);
+-		}
+-		kmem_cache_destroy(sc);
+-		return -ENOMEM;
+-	}
+-	/* Step 2 - Must use GFP_NOIO now.
+-	 * OK, we have enough stripes, start collecting inactive
+-	 * stripes and copying them over
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	/* clear completed biofills */
++	for (i = sh->disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		/* check if this stripe has new incoming reads */
++		if (dev->toread)
++			more_to_read++;
++
++		/* acknowledge completion of a biofill operation */
++		/* and check if we need to reply to a read request
+ 	 */
+-	list_for_each_entry(nsh, &newstripes, lru) {
++		if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) {
++			struct bio *rbi, *rbi2;
++			clear_bit(R5_Wantfill, &dev->flags);
++
++			/* The access to dev->read is outside of the
++			 * spin_lock_irq(&conf->device_lock), but is protected
++			 * by the STRIPE_OP_BIOFILL pending bit
++			 */
++			BUG_ON(!dev->read);
++			rbi = dev->read;
++			dev->read = NULL;
++			while (rbi && rbi->bi_sector <
++				dev->sector + STRIPE_SECTORS) {
++				rbi2 = r5_next_bio(rbi, dev->sector);
+ 		spin_lock_irq(&conf->device_lock);
+-		wait_event_lock_irq(conf->wait_for_stripe,
+-				    !list_empty(&conf->inactive_list),
+-				    conf->device_lock,
+-				    unplug_slaves(conf->mddev)
+-			);
+-		osh = get_free_stripe(conf);
++				if (--rbi->bi_phys_segments == 0) {
++					rbi->bi_next = return_bi;
++					return_bi = rbi;
++				}
+ 		spin_unlock_irq(&conf->device_lock);
+-		atomic_set(&nsh->count, 1);
+-		for(i=0; i<conf->pool_size; i++)
+-			nsh->dev[i].page = osh->dev[i].page;
+-		for( ; i<newsize; i++)
+-			nsh->dev[i].page = NULL;
+-		kmem_cache_free(conf->slab_cache, osh);
++				rbi = rbi2;
+ 	}
+-	kmem_cache_destroy(conf->slab_cache);
++		}
++	}
++	clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack);
++	clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending);
+ 
+-	/* Step 3.
+-	 * At this point, we are holding all the stripes so the array
+-	 * is completely stalled, so now is a good time to resize
+-	 * conf->disks.
+-	 */
+-	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
+-	if (ndisks) {
+-		for (i=0; i<conf->raid_disks; i++)
+-			ndisks[i] = conf->disks[i];
+-		kfree(conf->disks);
+-		conf->disks = ndisks;
+-	} else
+-		err = -ENOMEM;
++	bi = return_bi;
++	while (bi) {
++		int bytes = bi->bi_size;
+ 
+-	/* Step 4, return new stripes to service */
+-	while(!list_empty(&newstripes)) {
+-		nsh = list_entry(newstripes.next, struct stripe_head, lru);
+-		list_del_init(&nsh->lru);
+-		for (i=conf->raid_disks; i < newsize; i++)
+-			if (nsh->dev[i].page == NULL) {
+-				struct page *p = alloc_page(GFP_NOIO);
+-				nsh->dev[i].page = p;
+-				if (!p)
+-					err = -ENOMEM;
+-			}
+-		release_stripe(nsh);
++		return_bi = bi->bi_next;
++		bi->bi_next = NULL;
++		bi->bi_size = 0;
++		bi->bi_end_io(bi, bytes,
++			test_bit(BIO_UPTODATE, &bi->bi_flags) ? 0 : -EIO);
++		bi = return_bi;
+ 	}
+-	/* critical section pass, GFP_NOIO no longer needed */
+ 
+-	conf->slab_cache = sc;
+-	conf->active_name = 1-conf->active_name;
+-	conf->pool_size = newsize;
+-	return err;
++	if (more_to_read)
++		set_bit(STRIPE_HANDLE, &sh->state);
++	release_stripe(sh);
+ }
+-#endif
+ 
+-static int drop_one_stripe(raid5_conf_t *conf)
++static void ops_run_biofill(struct stripe_head *sh)
+ {
+-	struct stripe_head *sh;
++	struct dma_async_tx_descriptor *tx = NULL;
++	raid5_conf_t *conf = sh->raid_conf;
++	int i;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	for (i = sh->disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		if (test_bit(R5_Wantfill, &dev->flags)) {
++			struct bio *rbi;
++			spin_lock_irq(&conf->device_lock);
++			dev->read = rbi = dev->toread;
++			dev->toread = NULL;
++			spin_unlock_irq(&conf->device_lock);
++			while (rbi && rbi->bi_sector <
++				dev->sector + STRIPE_SECTORS) {
++				tx = async_copy_data(0, rbi, dev->page,
++					dev->sector, tx);
++				rbi = r5_next_bio(rbi, dev->sector);
++			}
++		}
++	}
++
++	atomic_inc(&sh->count);
++	async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
++		ops_complete_biofill, sh);
++}
++
++static void ops_complete_compute5(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++	int target = sh->ops.target;
++	struct r5dev *tgt = &sh->dev[target];
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	set_bit(R5_UPTODATE, &tgt->flags);
++	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
++	clear_bit(R5_Wantcompute, &tgt->flags);
++	set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++	set_bit(STRIPE_HANDLE, &sh->state);
++	release_stripe(sh);
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_compute5(struct stripe_head *sh, unsigned long pending)
++{
++	/* kernel stack size limits the total number of disks */
++	int disks = sh->disks;
++	struct page *xor_srcs[disks];
++	int target = sh->ops.target;
++	struct r5dev *tgt = &sh->dev[target];
++	struct page *xor_dest = tgt->page;
++	int count = 0;
++	struct dma_async_tx_descriptor *tx;
++	int i;
++
++	pr_debug("%s: stripe %llu block: %d\n",
++		__FUNCTION__, (unsigned long long)sh->sector, target);
++	BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
++
++	for (i = disks; i--; )
++		if (i != target)
++			xor_srcs[count++] = sh->dev[i].page;
++
++	atomic_inc(&sh->count);
++
++	if (unlikely(count == 1))
++		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
++			0, NULL, ops_complete_compute5, sh);
++	else
++		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++			ASYNC_TX_XOR_ZERO_DST, NULL,
++			ops_complete_compute5, sh);
++
++	/* ack now if postxor is not set to be run */
++	if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending))
++		async_tx_ack(tx);
++
++	return tx;
++}
++
++static void ops_complete_prexor(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	set_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++	/* kernel stack size limits the total number of disks */
++	int disks = sh->disks;
++	struct page *xor_srcs[disks];
++	int count = 0, pd_idx = sh->pd_idx, i;
++
++	/* existing parity data subtracted */
++	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		/* Only process blocks that are known to be uptodate */
++		if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags))
++			xor_srcs[count++] = dev->page;
++	}
++
++	tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++		ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx,
++		ops_complete_prexor, sh);
++
++	return tx;
++}
++
++static struct dma_async_tx_descriptor *
++ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++	int disks = sh->disks;
++	int pd_idx = sh->pd_idx, i;
++
++	/* check if prexor is active which means only process blocks
++	 * that are part of a read-modify-write (Wantprexor)
++	 */
++	int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		struct bio *chosen;
++		int towrite;
++
++		towrite = 0;
++		if (prexor) { /* rmw */
++			if (dev->towrite &&
++			    test_bit(R5_Wantprexor, &dev->flags))
++				towrite = 1;
++		} else { /* rcw */
++			if (i != pd_idx && dev->towrite &&
++				test_bit(R5_LOCKED, &dev->flags))
++				towrite = 1;
++		}
++
++		if (towrite) {
++			struct bio *wbi;
++
++			spin_lock(&sh->lock);
++			chosen = dev->towrite;
++			dev->towrite = NULL;
++			BUG_ON(dev->written);
++			wbi = dev->written = chosen;
++			spin_unlock(&sh->lock);
++
++			while (wbi && wbi->bi_sector <
++				dev->sector + STRIPE_SECTORS) {
++				tx = async_copy_data(1, wbi, dev->page,
++					dev->sector, tx);
++				wbi = r5_next_bio(wbi, dev->sector);
++			}
++		}
++	}
++
++	return tx;
++}
++
++static void ops_complete_postxor(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++	set_bit(STRIPE_HANDLE, &sh->state);
++	release_stripe(sh);
++}
++
++static void ops_complete_write(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++	int disks = sh->disks, i, pd_idx = sh->pd_idx;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		if (dev->written || i == pd_idx)
++			set_bit(R5_UPTODATE, &dev->flags);
++	}
++
++	set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
++	set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++
++	set_bit(STRIPE_HANDLE, &sh->state);
++	release_stripe(sh);
++}
++
++static void
++ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
++{
++	/* kernel stack size limits the total number of disks */
++	int disks = sh->disks;
++	struct page *xor_srcs[disks];
++
++	int count = 0, pd_idx = sh->pd_idx, i;
++	struct page *xor_dest;
++	int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++	unsigned long flags;
++	dma_async_tx_callback callback;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	/* check if prexor is active which means only process blocks
++	 * that are part of a read-modify-write (written)
++	 */
++	if (prexor) {
++		xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (dev->written)
++				xor_srcs[count++] = dev->page;
++		}
++	} else {
++		xor_dest = sh->dev[pd_idx].page;
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (i != pd_idx)
++				xor_srcs[count++] = dev->page;
++		}
++	}
++
++	/* check whether this postxor is part of a write */
++	callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
++		ops_complete_write : ops_complete_postxor;
++
++	/* 1/ if we prexor'd then the dest is reused as a source
++	 * 2/ if we did not prexor then we are redoing the parity
++	 * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST
++	 * for the synchronous xor case
++	 */
++	flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK |
++		(prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST);
++
++	atomic_inc(&sh->count);
++
++	if (unlikely(count == 1)) {
++		flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST);
++		tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE,
++			flags, tx, callback, sh);
++	} else
++		tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++			flags, tx, callback, sh);
++}
++
++static void ops_complete_check(void *stripe_head_ref)
++{
++	struct stripe_head *sh = stripe_head_ref;
++	int pd_idx = sh->pd_idx;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) &&
++		sh->ops.zero_sum_result == 0)
++		set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
++
++	set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
++	set_bit(STRIPE_HANDLE, &sh->state);
++	release_stripe(sh);
++}
++
++static void ops_run_check(struct stripe_head *sh)
++{
++	/* kernel stack size limits the total number of disks */
++	int disks = sh->disks;
++	struct page *xor_srcs[disks];
++	struct dma_async_tx_descriptor *tx;
++
++	int count = 0, pd_idx = sh->pd_idx, i;
++	struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page;
++
++	pr_debug("%s: stripe %llu\n", __FUNCTION__,
++		(unsigned long long)sh->sector);
++
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		if (i != pd_idx)
++			xor_srcs[count++] = dev->page;
++	}
++
++	tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE,
++		&sh->ops.zero_sum_result, 0, NULL, NULL, NULL);
++
++	if (tx)
++		set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
++	else
++		clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending);
++
++	atomic_inc(&sh->count);
++	tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx,
++		ops_complete_check, sh);
++}
++
++static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
++{
++	int overlap_clear = 0, i, disks = sh->disks;
++	struct dma_async_tx_descriptor *tx = NULL;
++
++	if (test_bit(STRIPE_OP_BIOFILL, &pending)) {
++		ops_run_biofill(sh);
++		overlap_clear++;
++	}
++
++	if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))
++		tx = ops_run_compute5(sh, pending);
++
++	if (test_bit(STRIPE_OP_PREXOR, &pending))
++		tx = ops_run_prexor(sh, tx);
++
++	if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
++		tx = ops_run_biodrain(sh, tx);
++		overlap_clear++;
++	}
++
++	if (test_bit(STRIPE_OP_POSTXOR, &pending))
++		ops_run_postxor(sh, tx);
++
++	if (test_bit(STRIPE_OP_CHECK, &pending))
++		ops_run_check(sh);
++
++	if (test_bit(STRIPE_OP_IO, &pending))
++		ops_run_io(sh);
++
++	if (overlap_clear)
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (test_and_clear_bit(R5_Overlap, &dev->flags))
++				wake_up(&sh->raid_conf->wait_for_overlap);
++		}
++}
++
++static int grow_one_stripe(raid5_conf_t *conf)
++{
++	struct stripe_head *sh;
++	sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL);
++	if (!sh)
++		return 0;
++	memset(sh, 0, sizeof(*sh) + (conf->raid_disks-1)*sizeof(struct r5dev));
++	sh->raid_conf = conf;
++	spin_lock_init(&sh->lock);
++
++	if (grow_buffers(sh, conf->raid_disks)) {
++		shrink_buffers(sh, conf->raid_disks);
++		kmem_cache_free(conf->slab_cache, sh);
++		return 0;
++	}
++	sh->disks = conf->raid_disks;
++	/* we just created an active stripe so... */
++	atomic_set(&sh->count, 1);
++	atomic_inc(&conf->active_stripes);
++	INIT_LIST_HEAD(&sh->lru);
++	release_stripe(sh);
++	return 1;
++}
++
++static int grow_stripes(raid5_conf_t *conf, int num)
++{
++	struct kmem_cache *sc;
++	int devs = conf->raid_disks;
++
++	sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
++	sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
++	conf->active_name = 0;
++	sc = kmem_cache_create(conf->cache_name[conf->active_name],
++			       sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
++			       0, 0, NULL, NULL);
++	if (!sc)
++		return 1;
++	conf->slab_cache = sc;
++	conf->pool_size = devs;
++	while (num--)
++		if (!grow_one_stripe(conf))
++			return 1;
++	return 0;
++}
++
++#ifdef CONFIG_MD_RAID5_RESHAPE
++static int resize_stripes(raid5_conf_t *conf, int newsize)
++{
++	/* Make all the stripes able to hold 'newsize' devices.
++	 * New slots in each stripe get 'page' set to a new page.
++	 *
++	 * This happens in stages:
++	 * 1/ create a new kmem_cache and allocate the required number of
++	 *    stripe_heads.
++	 * 2/ gather all the old stripe_heads and tranfer the pages across
++	 *    to the new stripe_heads.  This will have the side effect of
++	 *    freezing the array as once all stripe_heads have been collected,
++	 *    no IO will be possible.  Old stripe heads are freed once their
++	 *    pages have been transferred over, and the old kmem_cache is
++	 *    freed when all stripes are done.
++	 * 3/ reallocate conf->disks to be suitable bigger.  If this fails,
++	 *    we simple return a failre status - no need to clean anything up.
++	 * 4/ allocate new pages for the new slots in the new stripe_heads.
++	 *    If this fails, we don't bother trying the shrink the
++	 *    stripe_heads down again, we just leave them as they are.
++	 *    As each stripe_head is processed the new one is released into
++	 *    active service.
++	 *
++	 * Once step2 is started, we cannot afford to wait for a write,
++	 * so we use GFP_NOIO allocations.
++	 */
++	struct stripe_head *osh, *nsh;
++	LIST_HEAD(newstripes);
++	struct disk_info *ndisks;
++	int err = 0;
++	struct kmem_cache *sc;
++	int i;
++
++	if (newsize <= conf->pool_size)
++		return 0; /* never bother to shrink */
++
++	md_allow_write(conf->mddev);
++
++	/* Step 1 */
++	sc = kmem_cache_create(conf->cache_name[1-conf->active_name],
++			       sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev),
++			       0, 0, NULL, NULL);
++	if (!sc)
++		return -ENOMEM;
++
++	for (i = conf->max_nr_stripes; i; i--) {
++		nsh = kmem_cache_alloc(sc, GFP_KERNEL);
++		if (!nsh)
++			break;
++
++		memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev));
++
++		nsh->raid_conf = conf;
++		spin_lock_init(&nsh->lock);
++
++		list_add(&nsh->lru, &newstripes);
++	}
++	if (i) {
++		/* didn't get enough, give up */
++		while (!list_empty(&newstripes)) {
++			nsh = list_entry(newstripes.next, struct stripe_head, lru);
++			list_del(&nsh->lru);
++			kmem_cache_free(sc, nsh);
++		}
++		kmem_cache_destroy(sc);
++		return -ENOMEM;
++	}
++	/* Step 2 - Must use GFP_NOIO now.
++	 * OK, we have enough stripes, start collecting inactive
++	 * stripes and copying them over
++	 */
++	list_for_each_entry(nsh, &newstripes, lru) {
++		spin_lock_irq(&conf->device_lock);
++		wait_event_lock_irq(conf->wait_for_stripe,
++				    !list_empty(&conf->inactive_list),
++				    conf->device_lock,
++				    unplug_slaves(conf->mddev)
++			);
++		osh = get_free_stripe(conf);
++		spin_unlock_irq(&conf->device_lock);
++		atomic_set(&nsh->count, 1);
++		for(i=0; i<conf->pool_size; i++)
++			nsh->dev[i].page = osh->dev[i].page;
++		for( ; i<newsize; i++)
++			nsh->dev[i].page = NULL;
++		kmem_cache_free(conf->slab_cache, osh);
++	}
++	kmem_cache_destroy(conf->slab_cache);
++
++	/* Step 3.
++	 * At this point, we are holding all the stripes so the array
++	 * is completely stalled, so now is a good time to resize
++	 * conf->disks.
++	 */
++	ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO);
++	if (ndisks) {
++		for (i=0; i<conf->raid_disks; i++)
++			ndisks[i] = conf->disks[i];
++		kfree(conf->disks);
++		conf->disks = ndisks;
++	} else
++		err = -ENOMEM;
++
++	/* Step 4, return new stripes to service */
++	while(!list_empty(&newstripes)) {
++		nsh = list_entry(newstripes.next, struct stripe_head, lru);
++		list_del_init(&nsh->lru);
++		for (i=conf->raid_disks; i < newsize; i++)
++			if (nsh->dev[i].page == NULL) {
++				struct page *p = alloc_page(GFP_NOIO);
++				nsh->dev[i].page = p;
++				if (!p)
++					err = -ENOMEM;
++			}
++		release_stripe(nsh);
++	}
++	/* critical section pass, GFP_NOIO no longer needed */
++
++	conf->slab_cache = sc;
++	conf->active_name = 1-conf->active_name;
++	conf->pool_size = newsize;
++	return err;
++}
++#endif
++
++static int drop_one_stripe(raid5_conf_t *conf)
++{
++	struct stripe_head *sh;
+ 
+ 	spin_lock_irq(&conf->device_lock);
+ 	sh = get_free_stripe(conf);
+@@ -537,7 +1123,7 @@
+ 		if (bi == &sh->dev[i].req)
+ 			break;
+ 
+-	PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", 
++	pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n",
+ 		(unsigned long long)sh->sector, i, atomic_read(&sh->count), 
+ 		uptodate);
+ 	if (i == disks) {
+@@ -613,7 +1199,7 @@
+ 		if (bi == &sh->dev[i].req)
+ 			break;
+ 
+-	PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", 
++	pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n",
+ 		(unsigned long long)sh->sector, i, atomic_read(&sh->count),
+ 		uptodate);
+ 	if (i == disks) {
+@@ -658,7 +1244,7 @@
+ {
+ 	char b[BDEVNAME_SIZE];
+ 	raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
+-	PRINTK("raid5: error called\n");
++	pr_debug("raid5: error called\n");
+ 
+ 	if (!test_bit(Faulty, &rdev->flags)) {
+ 		set_bit(MD_CHANGE_DEVS, &mddev->flags);
+@@ -918,135 +1504,11 @@
+ 
+ #define check_xor() 	do { 						\
+ 			   if (count == MAX_XOR_BLOCKS) {		\
+-				xor_block(count, STRIPE_SIZE, ptr);	\
+-				count = 1;				\
++				xor_blocks(count, STRIPE_SIZE, dest, ptr);\
++				count = 0;				  \
+ 			   }						\
+ 			} while(0)
+ 
+-
+-static void compute_block(struct stripe_head *sh, int dd_idx)
+-{
+-	int i, count, disks = sh->disks;
+-	void *ptr[MAX_XOR_BLOCKS], *p;
+-
+-	PRINTK("compute_block, stripe %llu, idx %d\n", 
+-		(unsigned long long)sh->sector, dd_idx);
+-
+-	ptr[0] = page_address(sh->dev[dd_idx].page);
+-	memset(ptr[0], 0, STRIPE_SIZE);
+-	count = 1;
+-	for (i = disks ; i--; ) {
+-		if (i == dd_idx)
+-			continue;
+-		p = page_address(sh->dev[i].page);
+-		if (test_bit(R5_UPTODATE, &sh->dev[i].flags))
+-			ptr[count++] = p;
+-		else
+-			printk(KERN_ERR "compute_block() %d, stripe %llu, %d"
+-				" not present\n", dd_idx,
+-				(unsigned long long)sh->sector, i);
+-
+-		check_xor();
+-	}
+-	if (count != 1)
+-		xor_block(count, STRIPE_SIZE, ptr);
+-	set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+-}
+-
+-static void compute_parity5(struct stripe_head *sh, int method)
+-{
+-	raid5_conf_t *conf = sh->raid_conf;
+-	int i, pd_idx = sh->pd_idx, disks = sh->disks, count;
+-	void *ptr[MAX_XOR_BLOCKS];
+-	struct bio *chosen;
+-
+-	PRINTK("compute_parity5, stripe %llu, method %d\n",
+-		(unsigned long long)sh->sector, method);
+-
+-	count = 1;
+-	ptr[0] = page_address(sh->dev[pd_idx].page);
+-	switch(method) {
+-	case READ_MODIFY_WRITE:
+-		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags));
+-		for (i=disks ; i-- ;) {
+-			if (i==pd_idx)
+-				continue;
+-			if (sh->dev[i].towrite &&
+-			    test_bit(R5_UPTODATE, &sh->dev[i].flags)) {
+-				ptr[count++] = page_address(sh->dev[i].page);
+-				chosen = sh->dev[i].towrite;
+-				sh->dev[i].towrite = NULL;
+-
+-				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+-					wake_up(&conf->wait_for_overlap);
+-
+-				BUG_ON(sh->dev[i].written);
+-				sh->dev[i].written = chosen;
+-				check_xor();
+-			}
+-		}
+-		break;
+-	case RECONSTRUCT_WRITE:
+-		memset(ptr[0], 0, STRIPE_SIZE);
+-		for (i= disks; i-- ;)
+-			if (i!=pd_idx && sh->dev[i].towrite) {
+-				chosen = sh->dev[i].towrite;
+-				sh->dev[i].towrite = NULL;
+-
+-				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+-					wake_up(&conf->wait_for_overlap);
+-
+-				BUG_ON(sh->dev[i].written);
+-				sh->dev[i].written = chosen;
+-			}
+-		break;
+-	case CHECK_PARITY:
+-		break;
+-	}
+-	if (count>1) {
+-		xor_block(count, STRIPE_SIZE, ptr);
+-		count = 1;
+-	}
+-	
+-	for (i = disks; i--;)
+-		if (sh->dev[i].written) {
+-			sector_t sector = sh->dev[i].sector;
+-			struct bio *wbi = sh->dev[i].written;
+-			while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) {
+-				copy_data(1, wbi, sh->dev[i].page, sector);
+-				wbi = r5_next_bio(wbi, sector);
+-			}
+-
+-			set_bit(R5_LOCKED, &sh->dev[i].flags);
+-			set_bit(R5_UPTODATE, &sh->dev[i].flags);
+-		}
+-
+-	switch(method) {
+-	case RECONSTRUCT_WRITE:
+-	case CHECK_PARITY:
+-		for (i=disks; i--;)
+-			if (i != pd_idx) {
+-				ptr[count++] = page_address(sh->dev[i].page);
+-				check_xor();
+-			}
+-		break;
+-	case READ_MODIFY_WRITE:
+-		for (i = disks; i--;)
+-			if (sh->dev[i].written) {
+-				ptr[count++] = page_address(sh->dev[i].page);
+-				check_xor();
+-			}
+-	}
+-	if (count != 1)
+-		xor_block(count, STRIPE_SIZE, ptr);
+-	
+-	if (method != CHECK_PARITY) {
+-		set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+-		set_bit(R5_LOCKED,   &sh->dev[pd_idx].flags);
+-	} else
+-		clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
+-}
+-
+ static void compute_parity6(struct stripe_head *sh, int method)
+ {
+ 	raid6_conf_t *conf = sh->raid_conf;
+@@ -1058,7 +1520,7 @@
+ 	qd_idx = raid6_next_disk(pd_idx, disks);
+ 	d0_idx = raid6_next_disk(qd_idx, disks);
+ 
+-	PRINTK("compute_parity, stripe %llu, method %d\n",
++	pr_debug("compute_parity, stripe %llu, method %d\n",
+ 		(unsigned long long)sh->sector, method);
+ 
+ 	switch(method) {
+@@ -1132,20 +1594,20 @@
+ static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
+ {
+ 	int i, count, disks = sh->disks;
+-	void *ptr[MAX_XOR_BLOCKS], *p;
++	void *ptr[MAX_XOR_BLOCKS], *dest, *p;
+ 	int pd_idx = sh->pd_idx;
+ 	int qd_idx = raid6_next_disk(pd_idx, disks);
+ 
+-	PRINTK("compute_block_1, stripe %llu, idx %d\n",
++	pr_debug("compute_block_1, stripe %llu, idx %d\n",
+ 		(unsigned long long)sh->sector, dd_idx);
+ 
+ 	if ( dd_idx == qd_idx ) {
+ 		/* We're actually computing the Q drive */
+ 		compute_parity6(sh, UPDATE_PARITY);
+ 	} else {
+-		ptr[0] = page_address(sh->dev[dd_idx].page);
+-		if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
+-		count = 1;
++		dest = page_address(sh->dev[dd_idx].page);
++		if (!nozero) memset(dest, 0, STRIPE_SIZE);
++		count = 0;
+ 		for (i = disks ; i--; ) {
+ 			if (i == dd_idx || i == qd_idx)
+ 				continue;
+@@ -1159,8 +1621,8 @@
+ 
+ 			check_xor();
+ 		}
+-		if (count != 1)
+-			xor_block(count, STRIPE_SIZE, ptr);
++		if (count)
++			xor_blocks(count, STRIPE_SIZE, dest, ptr);
+ 		if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+ 		else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+ 	}
+@@ -1183,7 +1645,7 @@
+ 	BUG_ON(faila == failb);
+ 	if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; }
+ 
+-	PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
++	pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n",
+ 	       (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb);
+ 
+ 	if ( failb == disks-1 ) {
+@@ -1229,7 +1691,79 @@
+ 	}
+ }
+ 
++static int
++handle_write_operations5(struct stripe_head *sh, int rcw, int expand)
++{
++	int i, pd_idx = sh->pd_idx, disks = sh->disks;
++	int locked = 0;
++
++	if (rcw) {
++		/* if we are not expanding this is a proper write request, and
++		 * there will be bios with new data to be drained into the
++		 * stripe cache
++		 */
++		if (!expand) {
++			set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++			sh->ops.count++;
++		}
++
++		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++		sh->ops.count++;
++
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++
++			if (dev->towrite) {
++				set_bit(R5_LOCKED, &dev->flags);
++				if (!expand)
++					clear_bit(R5_UPTODATE, &dev->flags);
++				locked++;
++			}
++		}
++	} else {
++		BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) ||
++			test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags)));
++
++		set_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++		set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++		set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++
++		sh->ops.count += 3;
++
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (i == pd_idx)
++				continue;
++
++			/* For a read-modify write there may be blocks that are
++			 * locked for reading while others are ready to be
++			 * written so we distinguish these blocks by the
++			 * R5_Wantprexor bit
++			 */
++			if (dev->towrite &&
++			    (test_bit(R5_UPTODATE, &dev->flags) ||
++			    test_bit(R5_Wantcompute, &dev->flags))) {
++				set_bit(R5_Wantprexor, &dev->flags);
++				set_bit(R5_LOCKED, &dev->flags);
++				clear_bit(R5_UPTODATE, &dev->flags);
++				locked++;
++			}
++		}
++	}
++
++	/* keep the parity disk locked while asynchronous operations
++	 * are in flight
++	 */
++	set_bit(R5_LOCKED, &sh->dev[pd_idx].flags);
++	clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags);
++	locked++;
++
++	pr_debug("%s: stripe %llu locked: %d pending: %lx\n",
++		__FUNCTION__, (unsigned long long)sh->sector,
++		locked, sh->ops.pending);
+ 
++	return locked;
++}
+ 
+ /*
+  * Each stripe/dev can have one or more bion attached.
+@@ -1242,7 +1776,7 @@
+ 	raid5_conf_t *conf = sh->raid_conf;
+ 	int firstwrite=0;
+ 
+-	PRINTK("adding bh b#%llu to stripe s#%llu\n",
++	pr_debug("adding bh b#%llu to stripe s#%llu\n",
+ 		(unsigned long long)bi->bi_sector,
+ 		(unsigned long long)sh->sector);
+ 
+@@ -1271,7 +1805,7 @@
+ 	spin_unlock_irq(&conf->device_lock);
+ 	spin_unlock(&sh->lock);
+ 
+-	PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n",
++	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
+ 		(unsigned long long)bi->bi_sector,
+ 		(unsigned long long)sh->sector, dd_idx);
+ 
+@@ -1326,116 +1860,14 @@
+ 	return pd_idx;
+ }
+ 
+-
+-/*
+- * handle_stripe - do things to a stripe.
+- *
+- * We lock the stripe and then examine the state of various bits
+- * to see what needs to be done.
+- * Possible results:
+- *    return some read request which now have data
+- *    return some write requests which are safely on disc
+- *    schedule a read on some buffers
+- *    schedule a write of some buffers
+- *    return confirmation of parity correctness
+- *
+- * Parity calculations are done inside the stripe lock
+- * buffers are taken off read_list or write_list, and bh_cache buffers
+- * get BH_Lock set before the stripe lock is released.
+- *
+- */
+- 
+-static void handle_stripe5(struct stripe_head *sh)
++static void
++handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh,
++				struct stripe_head_state *s, int disks,
++				struct bio **return_bi)
+ {
+-	raid5_conf_t *conf = sh->raid_conf;
+-	int disks = sh->disks;
+-	struct bio *return_bi= NULL;
+-	struct bio *bi;
+ 	int i;
+-	int syncing, expanding, expanded;
+-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
+-	int non_overwrite = 0;
+-	int failed_num=0;
+-	struct r5dev *dev;
+-
+-	PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n",
+-		(unsigned long long)sh->sector, atomic_read(&sh->count),
+-		sh->pd_idx);
+-
+-	spin_lock(&sh->lock);
+-	clear_bit(STRIPE_HANDLE, &sh->state);
+-	clear_bit(STRIPE_DELAYED, &sh->state);
+-
+-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
+-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+-	/* Now to look around and see what can be done */
+-
+-	rcu_read_lock();
+-	for (i=disks; i--; ) {
+-		mdk_rdev_t *rdev;
+-		dev = &sh->dev[i];
+-		clear_bit(R5_Insync, &dev->flags);
+-
+-		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
+-			i, dev->flags, dev->toread, dev->towrite, dev->written);
+-		/* maybe we can reply to a read */
+-		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
+-			struct bio *rbi, *rbi2;
+-			PRINTK("Return read for disc %d\n", i);
+-			spin_lock_irq(&conf->device_lock);
+-			rbi = dev->toread;
+-			dev->toread = NULL;
+-			if (test_and_clear_bit(R5_Overlap, &dev->flags))
+-				wake_up(&conf->wait_for_overlap);
+-			spin_unlock_irq(&conf->device_lock);
+-			while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+-				copy_data(0, rbi, dev->page, dev->sector);
+-				rbi2 = r5_next_bio(rbi, dev->sector);
+-				spin_lock_irq(&conf->device_lock);
+-				if (--rbi->bi_phys_segments == 0) {
+-					rbi->bi_next = return_bi;
+-					return_bi = rbi;
+-				}
+-				spin_unlock_irq(&conf->device_lock);
+-				rbi = rbi2;
+-			}
+-		}
+-
+-		/* now count some things */
+-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
+-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
+-
+-		
+-		if (dev->toread) to_read++;
+-		if (dev->towrite) {
+-			to_write++;
+-			if (!test_bit(R5_OVERWRITE, &dev->flags))
+-				non_overwrite++;
+-		}
+-		if (dev->written) written++;
+-		rdev = rcu_dereference(conf->disks[i].rdev);
+-		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+-			/* The ReadError flag will just be confusing now */
+-			clear_bit(R5_ReadError, &dev->flags);
+-			clear_bit(R5_ReWrite, &dev->flags);
+-		}
+-		if (!rdev || !test_bit(In_sync, &rdev->flags)
+-		    || test_bit(R5_ReadError, &dev->flags)) {
+-			failed++;
+-			failed_num = i;
+-		} else
+-			set_bit(R5_Insync, &dev->flags);
+-	}
+-	rcu_read_unlock();
+-	PRINTK("locked=%d uptodate=%d to_read=%d"
+-		" to_write=%d failed=%d failed_num=%d\n",
+-		locked, uptodate, to_read, to_write, failed, failed_num);
+-	/* check if the array has lost two devices and, if so, some requests might
+-	 * need to be failed
+-	 */
+-	if (failed > 1 && to_read+to_write+written) {
+-		for (i=disks; i--; ) {
++	for (i = disks; i--; ) {
++		struct bio *bi;
+ 			int bitmap_end = 0;
+ 
+ 			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+@@ -1447,23 +1879,26 @@
+ 					md_error(conf->mddev, rdev);
+ 				rcu_read_unlock();
+ 			}
+-
+ 			spin_lock_irq(&conf->device_lock);
+ 			/* fail all writes first */
+ 			bi = sh->dev[i].towrite;
+ 			sh->dev[i].towrite = NULL;
+-			if (bi) { to_write--; bitmap_end = 1; }
++		if (bi) {
++			s->to_write--;
++			bitmap_end = 1;
++		}
+ 
+ 			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+ 				wake_up(&conf->wait_for_overlap);
+ 
+-			while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
++		while (bi && bi->bi_sector <
++			sh->dev[i].sector + STRIPE_SECTORS) {
+ 				struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+ 				clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ 				if (--bi->bi_phys_segments == 0) {
+ 					md_write_end(conf->mddev);
+-					bi->bi_next = return_bi;
+-					return_bi = bi;
++				bi->bi_next = *return_bi;
++				*return_bi = bi;
+ 				}
+ 				bi = nextbi;
+ 			}
+@@ -1471,78 +1906,235 @@
+ 			bi = sh->dev[i].written;
+ 			sh->dev[i].written = NULL;
+ 			if (bi) bitmap_end = 1;
+-			while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
++		while (bi && bi->bi_sector <
++		       sh->dev[i].sector + STRIPE_SECTORS) {
+ 				struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+ 				clear_bit(BIO_UPTODATE, &bi->bi_flags);
+ 				if (--bi->bi_phys_segments == 0) {
+ 					md_write_end(conf->mddev);
+-					bi->bi_next = return_bi;
+-					return_bi = bi;
++				bi->bi_next = *return_bi;
++				*return_bi = bi;
++			}
++			bi = bi2;
++		}
++
++		/* fail any reads if this device is non-operational and
++		 * the data has not reached the cache yet.
++		 */
++		if (!test_bit(R5_Wantfill, &sh->dev[i].flags) &&
++		    (!test_bit(R5_Insync, &sh->dev[i].flags) ||
++		      test_bit(R5_ReadError, &sh->dev[i].flags))) {
++			bi = sh->dev[i].toread;
++			sh->dev[i].toread = NULL;
++			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
++				wake_up(&conf->wait_for_overlap);
++			if (bi) s->to_read--;
++			while (bi && bi->bi_sector <
++			       sh->dev[i].sector + STRIPE_SECTORS) {
++				struct bio *nextbi =
++					r5_next_bio(bi, sh->dev[i].sector);
++				clear_bit(BIO_UPTODATE, &bi->bi_flags);
++				if (--bi->bi_phys_segments == 0) {
++					bi->bi_next = *return_bi;
++					*return_bi = bi;
++				}
++				bi = nextbi;
++			}
++		}
++		spin_unlock_irq(&conf->device_lock);
++		if (bitmap_end)
++			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
++					STRIPE_SECTORS, 0, 0);
++	}
++
++}
++
++/* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks
++ * to process
++ */
++static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
++			struct stripe_head_state *s, int disk_idx, int disks)
++{
++	struct r5dev *dev = &sh->dev[disk_idx];
++	struct r5dev *failed_dev = &sh->dev[s->failed_num];
++
++	/* don't schedule compute operations or reads on the parity block while
++	 * a check is in flight
++	 */
++	if ((disk_idx == sh->pd_idx) &&
++	     test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
++		return ~0;
++
++	/* is the data in this block needed, and can we get it? */
++	if (!test_bit(R5_LOCKED, &dev->flags) &&
++	    !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread ||
++	    (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
++	     s->syncing || s->expanding || (s->failed &&
++	     (failed_dev->toread || (failed_dev->towrite &&
++	     !test_bit(R5_OVERWRITE, &failed_dev->flags)
++	     ))))) {
++		/* 1/ We would like to get this block, possibly by computing it,
++		 * but we might not be able to.
++		 *
++		 * 2/ Since parity check operations potentially make the parity
++		 * block !uptodate it will need to be refreshed before any
++		 * compute operations on data disks are scheduled.
++		 *
++		 * 3/ We hold off parity block re-reads until check operations
++		 * have quiesced.
++		 */
++		if ((s->uptodate == disks - 1) &&
++		    !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
++			set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
++			set_bit(R5_Wantcompute, &dev->flags);
++			sh->ops.target = disk_idx;
++			s->req_compute = 1;
++			sh->ops.count++;
++			/* Careful: from this point on 'uptodate' is in the eye
++			 * of raid5_run_ops which services 'compute' operations
++			 * before writes. R5_Wantcompute flags a block that will
++			 * be R5_UPTODATE by the time it is needed for a
++			 * subsequent operation.
++			 */
++			s->uptodate++;
++			return 0; /* uptodate + compute == disks */
++		} else if ((s->uptodate < disks - 1) &&
++			test_bit(R5_Insync, &dev->flags)) {
++			/* Note: we hold off compute operations while checks are
++			 * in flight, but we still prefer 'compute' over 'read'
++			 * hence we only read if (uptodate < * disks-1)
++			 */
++			set_bit(R5_LOCKED, &dev->flags);
++			set_bit(R5_Wantread, &dev->flags);
++			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++				sh->ops.count++;
++			s->locked++;
++			pr_debug("Reading block %d (sync=%d)\n", disk_idx,
++				s->syncing);
+ 				}
+-				bi = bi2;
+ 			}
+ 
+-			/* fail any reads if this device is non-operational */
+-			if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+-			    test_bit(R5_ReadError, &sh->dev[i].flags)) {
+-				bi = sh->dev[i].toread;
+-				sh->dev[i].toread = NULL;
+-				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+-					wake_up(&conf->wait_for_overlap);
+-				if (bi) to_read--;
+-				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+-					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+-					clear_bit(BIO_UPTODATE, &bi->bi_flags);
+-					if (--bi->bi_phys_segments == 0) {
+-						bi->bi_next = return_bi;
+-						return_bi = bi;
+-					}
+-					bi = nextbi;
++	return ~0;
++}
++
++static void handle_issuing_new_read_requests5(struct stripe_head *sh,
++			struct stripe_head_state *s, int disks)
++{
++	int i;
++
++	/* Clear completed compute operations.  Parity recovery
++	 * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
++	 * later on in this routine
++	 */
++	if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
++		!test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
++	}
++
++	/* look for blocks to read/compute, skip this if a compute
++	 * is already in flight, or if the stripe contents are in the
++	 * midst of changing due to a write
++	 */
++	if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
++		!test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) &&
++		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++		for (i = disks; i--; )
++			if (__handle_issuing_new_read_requests5(
++				sh, s, i, disks) == 0)
++				break;
+ 				}
++	set_bit(STRIPE_HANDLE, &sh->state);
++}
++
++static void handle_issuing_new_read_requests6(struct stripe_head *sh,
++			struct stripe_head_state *s, struct r6_state *r6s,
++			int disks)
++{
++	int i;
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		if (!test_bit(R5_LOCKED, &dev->flags) &&
++		    !test_bit(R5_UPTODATE, &dev->flags) &&
++		    (dev->toread || (dev->towrite &&
++		     !test_bit(R5_OVERWRITE, &dev->flags)) ||
++		     s->syncing || s->expanding ||
++		     (s->failed >= 1 &&
++		      (sh->dev[r6s->failed_num[0]].toread ||
++		       s->to_write)) ||
++		     (s->failed >= 2 &&
++		      (sh->dev[r6s->failed_num[1]].toread ||
++		       s->to_write)))) {
++			/* we would like to get this block, possibly
++			 * by computing it, but we might not be able to
++			 */
++			if (s->uptodate == disks-1) {
++				pr_debug("Computing stripe %llu block %d\n",
++				       (unsigned long long)sh->sector, i);
++				compute_block_1(sh, i, 0);
++				s->uptodate++;
++			} else if ( s->uptodate == disks-2 && s->failed >= 2 ) {
++				/* Computing 2-failure is *very* expensive; only
++				 * do it if failed >= 2
++				 */
++				int other;
++				for (other = disks; other--; ) {
++					if (other == i)
++						continue;
++					if (!test_bit(R5_UPTODATE,
++					      &sh->dev[other].flags))
++						break;
+ 			}
+-			spin_unlock_irq(&conf->device_lock);
+-			if (bitmap_end)
+-				bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+-						STRIPE_SECTORS, 0, 0);
++				BUG_ON(other < 0);
++				pr_debug("Computing stripe %llu blocks %d,%d\n",
++				       (unsigned long long)sh->sector,
++				       i, other);
++				compute_block_2(sh, i, other);
++				s->uptodate += 2;
++			} else if (test_bit(R5_Insync, &dev->flags)) {
++				set_bit(R5_LOCKED, &dev->flags);
++				set_bit(R5_Wantread, &dev->flags);
++				s->locked++;
++				pr_debug("Reading block %d (sync=%d)\n",
++					i, s->syncing);
+ 		}
+ 	}
+-	if (failed > 1 && syncing) {
+-		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+-		clear_bit(STRIPE_SYNCING, &sh->state);
+-		syncing = 0;
+ 	}
++	set_bit(STRIPE_HANDLE, &sh->state);
++}
+ 
+-	/* might be able to return some write requests if the parity block
+-	 * is safe, or on a failed drive
+-	 */
+-	dev = &sh->dev[sh->pd_idx];
+-	if ( written &&
+-	     ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
+-		test_bit(R5_UPTODATE, &dev->flags))
+-	       || (failed == 1 && failed_num == sh->pd_idx))
+-	    ) {
+-	    /* any written block on an uptodate or failed drive can be returned.
++
++/* handle_completed_write_requests
++ * any written block on an uptodate or failed drive can be returned.
+ 	     * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 
+ 	     * never LOCKED, so we don't need to test 'failed' directly.
+ 	     */
+-	    for (i=disks; i--; )
++static void handle_completed_write_requests(raid5_conf_t *conf,
++	struct stripe_head *sh, int disks, struct bio **return_bi)
++{
++	int i;
++	struct r5dev *dev;
++
++	for (i = disks; i--; )
+ 		if (sh->dev[i].written) {
+ 		    dev = &sh->dev[i];
+ 		    if (!test_bit(R5_LOCKED, &dev->flags) &&
+-			 test_bit(R5_UPTODATE, &dev->flags) ) {
++				test_bit(R5_UPTODATE, &dev->flags)) {
+ 			/* We can return any write requests */
+ 			    struct bio *wbi, *wbi2;
+ 			    int bitmap_end = 0;
+-			    PRINTK("Return write for disc %d\n", i);
++				pr_debug("Return write for disc %d\n", i);
+ 			    spin_lock_irq(&conf->device_lock);
+ 			    wbi = dev->written;
+ 			    dev->written = NULL;
+-			    while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
++				while (wbi && wbi->bi_sector <
++					dev->sector + STRIPE_SECTORS) {
+ 				    wbi2 = r5_next_bio(wbi, dev->sector);
+ 				    if (--wbi->bi_phys_segments == 0) {
+ 					    md_write_end(conf->mddev);
+-					    wbi->bi_next = return_bi;
+-					    return_bi = wbi;
++						wbi->bi_next = *return_bi;
++						*return_bi = wbi;
+ 				    }
+ 				    wbi = wbi2;
+ 			    }
+@@ -1550,89 +2142,63 @@
+ 				    bitmap_end = 1;
+ 			    spin_unlock_irq(&conf->device_lock);
+ 			    if (bitmap_end)
+-				    bitmap_endwrite(conf->mddev->bitmap, sh->sector,
++					bitmap_endwrite(conf->mddev->bitmap,
++							sh->sector,
+ 						    STRIPE_SECTORS,
+-						    !test_bit(STRIPE_DEGRADED, &sh->state), 0);
+-		    }
+-		}
+-	}
+-
+-	/* Now we might consider reading some blocks, either to check/generate
+-	 * parity, or to satisfy requests
+-	 * or to load a block that is being partially written.
+-	 */
+-	if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) {
+-		for (i=disks; i--;) {
+-			dev = &sh->dev[i];
+-			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+-			    (dev->toread ||
+-			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+-			     syncing ||
+-			     expanding ||
+-			     (failed && (sh->dev[failed_num].toread ||
+-					 (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags))))
+-				    )
+-				) {
+-				/* we would like to get this block, possibly
+-				 * by computing it, but we might not be able to
+-				 */
+-				if (uptodate == disks-1) {
+-					PRINTK("Computing block %d\n", i);
+-					compute_block(sh, i);
+-					uptodate++;
+-				} else if (test_bit(R5_Insync, &dev->flags)) {
+-					set_bit(R5_LOCKED, &dev->flags);
+-					set_bit(R5_Wantread, &dev->flags);
+-					locked++;
+-					PRINTK("Reading block %d (sync=%d)\n", 
+-						i, syncing);
+-				}
++					 !test_bit(STRIPE_DEGRADED, &sh->state),
++							0);
+ 			}
+ 		}
+-		set_bit(STRIPE_HANDLE, &sh->state);
+-	}
++}
+ 
+-	/* now to consider writing and what else, if anything should be read */
+-	if (to_write) {
+-		int rmw=0, rcw=0;
+-		for (i=disks ; i--;) {
++static void handle_issuing_new_write_requests5(raid5_conf_t *conf,
++		struct stripe_head *sh,	struct stripe_head_state *s, int disks)
++{
++	int rmw = 0, rcw = 0, i;
++	for (i = disks; i--; ) {
+ 			/* would I have to read this buffer for read_modify_write */
+-			dev = &sh->dev[i];
++		struct r5dev *dev = &sh->dev[i];
+ 			if ((dev->towrite || i == sh->pd_idx) &&
+-			    (!test_bit(R5_LOCKED, &dev->flags) 
+-				    ) &&
+-			    !test_bit(R5_UPTODATE, &dev->flags)) {
+-				if (test_bit(R5_Insync, &dev->flags)
+-/*				    && !(!mddev->insync && i == sh->pd_idx) */
+-					)
++		    !test_bit(R5_LOCKED, &dev->flags) &&
++		    !(test_bit(R5_UPTODATE, &dev->flags) ||
++		      test_bit(R5_Wantcompute, &dev->flags))) {
++			if (test_bit(R5_Insync, &dev->flags))
+ 					rmw++;
+-				else rmw += 2*disks;  /* cannot read it */
++			else
++				rmw += 2*disks;  /* cannot read it */
+ 			}
+ 			/* Would I have to read this buffer for reconstruct_write */
+ 			if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+-			    (!test_bit(R5_LOCKED, &dev->flags) 
+-				    ) &&
+-			    !test_bit(R5_UPTODATE, &dev->flags)) {
++		    !test_bit(R5_LOCKED, &dev->flags) &&
++		    !(test_bit(R5_UPTODATE, &dev->flags) ||
++		    test_bit(R5_Wantcompute, &dev->flags))) {
+ 				if (test_bit(R5_Insync, &dev->flags)) rcw++;
+-				else rcw += 2*disks;
++			else
++				rcw += 2*disks;
+ 			}
+ 		}
+-		PRINTK("for sector %llu, rmw=%d rcw=%d\n", 
++	pr_debug("for sector %llu, rmw=%d rcw=%d\n",
+ 			(unsigned long long)sh->sector, rmw, rcw);
+ 		set_bit(STRIPE_HANDLE, &sh->state);
+ 		if (rmw < rcw && rmw > 0)
+ 			/* prefer read-modify-write, but need to get some data */
+-			for (i=disks; i--;) {
+-				dev = &sh->dev[i];
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
+ 				if ((dev->towrite || i == sh->pd_idx) &&
+-				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
++			    !test_bit(R5_LOCKED, &dev->flags) &&
++			    !(test_bit(R5_UPTODATE, &dev->flags) ||
++			    test_bit(R5_Wantcompute, &dev->flags)) &&
+ 				    test_bit(R5_Insync, &dev->flags)) {
+-					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+-					{
+-						PRINTK("Read_old block %d for r-m-w\n", i);
++				if (
++				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++					pr_debug("Read_old block "
++						"%d for r-m-w\n", i);
+ 						set_bit(R5_LOCKED, &dev->flags);
+ 						set_bit(R5_Wantread, &dev->flags);
+-						locked++;
++					if (!test_and_set_bit(
++						STRIPE_OP_IO, &sh->ops.pending))
++						sh->ops.count++;
++					s->locked++;
+ 					} else {
+ 						set_bit(STRIPE_DELAYED, &sh->state);
+ 						set_bit(STRIPE_HANDLE, &sh->state);
+@@ -1641,165 +2207,367 @@
+ 			}
+ 		if (rcw <= rmw && rcw > 0)
+ 			/* want reconstruct write, but need to get some data */
+-			for (i=disks; i--;) {
+-				dev = &sh->dev[i];
+-				if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
+-				    !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (!test_bit(R5_OVERWRITE, &dev->flags) &&
++			    i != sh->pd_idx &&
++			    !test_bit(R5_LOCKED, &dev->flags) &&
++			    !(test_bit(R5_UPTODATE, &dev->flags) ||
++			    test_bit(R5_Wantcompute, &dev->flags)) &&
+ 				    test_bit(R5_Insync, &dev->flags)) {
+-					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+-					{
+-						PRINTK("Read_old block %d for Reconstruct\n", i);
++				if (
++				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++					pr_debug("Read_old block "
++						"%d for Reconstruct\n", i);
+ 						set_bit(R5_LOCKED, &dev->flags);
+ 						set_bit(R5_Wantread, &dev->flags);
+-						locked++;
++					if (!test_and_set_bit(
++						STRIPE_OP_IO, &sh->ops.pending))
++						sh->ops.count++;
++					s->locked++;
++				} else {
++					set_bit(STRIPE_DELAYED, &sh->state);
++					set_bit(STRIPE_HANDLE, &sh->state);
++				}
++			}
++		}
++	/* now if nothing is locked, and if we have enough data,
++	 * we can start a write request
++	 */
++	/* since handle_stripe can be called at any time we need to handle the
++	 * case where a compute block operation has been submitted and then a
++	 * subsequent call wants to start a write request.  raid5_run_ops only
++	 * handles the case where compute block and postxor are requested
++	 * simultaneously.  If this is not the case then new writes need to be
++	 * held off until the compute completes.
++	 */
++	if ((s->req_compute ||
++	    !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) &&
++		(s->locked == 0 && (rcw == 0 || rmw == 0) &&
++		!test_bit(STRIPE_BIT_DELAY, &sh->state)))
++		s->locked += handle_write_operations5(sh, rcw == 0, 0);
++}
++
++static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
++		struct stripe_head *sh,	struct stripe_head_state *s,
++		struct r6_state *r6s, int disks)
++{
++	int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i;
++	int qd_idx = r6s->qd_idx;
++	for (i = disks; i--; ) {
++		struct r5dev *dev = &sh->dev[i];
++		/* Would I have to read this buffer for reconstruct_write */
++		if (!test_bit(R5_OVERWRITE, &dev->flags)
++		    && i != pd_idx && i != qd_idx
++		    && (!test_bit(R5_LOCKED, &dev->flags)
++			    ) &&
++		    !test_bit(R5_UPTODATE, &dev->flags)) {
++			if (test_bit(R5_Insync, &dev->flags)) rcw++;
++			else {
++				pr_debug("raid6: must_compute: "
++					"disk %d flags=%#lx\n", i, dev->flags);
++				must_compute++;
++			}
++		}
++	}
++	pr_debug("for sector %llu, rcw=%d, must_compute=%d\n",
++	       (unsigned long long)sh->sector, rcw, must_compute);
++	set_bit(STRIPE_HANDLE, &sh->state);
++
++	if (rcw > 0)
++		/* want reconstruct write, but need to get some data */
++		for (i = disks; i--; ) {
++			struct r5dev *dev = &sh->dev[i];
++			if (!test_bit(R5_OVERWRITE, &dev->flags)
++			    && !(s->failed == 0 && (i == pd_idx || i == qd_idx))
++			    && !test_bit(R5_LOCKED, &dev->flags) &&
++			    !test_bit(R5_UPTODATE, &dev->flags) &&
++			    test_bit(R5_Insync, &dev->flags)) {
++				if (
++				  test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++					pr_debug("Read_old stripe %llu "
++						"block %d for Reconstruct\n",
++					     (unsigned long long)sh->sector, i);
++					set_bit(R5_LOCKED, &dev->flags);
++					set_bit(R5_Wantread, &dev->flags);
++					s->locked++;
+ 					} else {
++					pr_debug("Request delayed stripe %llu "
++						"block %d for Reconstruct\n",
++					     (unsigned long long)sh->sector, i);
+ 						set_bit(STRIPE_DELAYED, &sh->state);
+ 						set_bit(STRIPE_HANDLE, &sh->state);
+ 					}
+ 				}
+ 			}
+-		/* now if nothing is locked, and if we have enough data, we can start a write request */
+-		if (locked == 0 && (rcw == 0 ||rmw == 0) &&
++	/* now if nothing is locked, and if we have enough data, we can start a
++	 * write request
++	 */
++	if (s->locked == 0 && rcw == 0 &&
+ 		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
+-			PRINTK("Computing parity...\n");
+-			compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE);
++		if (must_compute > 0) {
++			/* We have failed blocks and need to compute them */
++			switch (s->failed) {
++			case 0:
++				BUG();
++			case 1:
++				compute_block_1(sh, r6s->failed_num[0], 0);
++				break;
++			case 2:
++				compute_block_2(sh, r6s->failed_num[0],
++						r6s->failed_num[1]);
++				break;
++			default: /* This request should have been failed? */
++				BUG();
++			}
++		}
++
++		pr_debug("Computing parity for stripe %llu\n",
++			(unsigned long long)sh->sector);
++		compute_parity6(sh, RECONSTRUCT_WRITE);
+ 			/* now every locked buffer is ready to be written */
+-			for (i=disks; i--;)
++		for (i = disks; i--; )
+ 				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
+-					PRINTK("Writing block %d\n", i);
+-					locked++;
++				pr_debug("Writing stripe %llu block %d\n",
++				       (unsigned long long)sh->sector, i);
++				s->locked++;
+ 					set_bit(R5_Wantwrite, &sh->dev[i].flags);
+-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
+-					    || (i==sh->pd_idx && failed == 0))
+-						set_bit(STRIPE_INSYNC, &sh->state);
+ 				}
++		/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
++		set_bit(STRIPE_INSYNC, &sh->state);
++
+ 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+ 				atomic_dec(&conf->preread_active_stripes);
+-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
++			if (atomic_read(&conf->preread_active_stripes) <
++			    IO_THRESHOLD)
+ 					md_wakeup_thread(conf->mddev->thread);
+ 			}
+ 		}
+-	}
++}
+ 
+-	/* maybe we need to check and possibly fix the parity for this stripe
+-	 * Any reads will already have been scheduled, so we just see if enough data
+-	 * is available
+-	 */
+-	if (syncing && locked == 0 &&
+-	    !test_bit(STRIPE_INSYNC, &sh->state)) {
++static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
++				struct stripe_head_state *s, int disks)
++{
+ 		set_bit(STRIPE_HANDLE, &sh->state);
+-		if (failed == 0) {
+-			BUG_ON(uptodate != disks);
+-			compute_parity5(sh, CHECK_PARITY);
+-			uptodate--;
+-			if (page_is_zero(sh->dev[sh->pd_idx].page)) {
+-				/* parity is correct (on disc, not in buffer any more) */
++	/* Take one of the following actions:
++	 * 1/ start a check parity operation if (uptodate == disks)
++	 * 2/ finish a check parity operation and act on the result
++	 * 3/ skip to the writeback section if we previously
++	 *    initiated a recovery operation
++	 */
++	if (s->failed == 0 &&
++	    !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++		if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
++			BUG_ON(s->uptodate != disks);
++			clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
++			sh->ops.count++;
++			s->uptodate--;
++		} else if (
++		       test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
++			clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
++			clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
++
++			if (sh->ops.zero_sum_result == 0)
++				/* parity is correct (on disc,
++				 * not in buffer any more)
++				 */
+ 				set_bit(STRIPE_INSYNC, &sh->state);
+-			} else {
+-				conf->mddev->resync_mismatches += STRIPE_SECTORS;
+-				if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
++			else {
++				conf->mddev->resync_mismatches +=
++					STRIPE_SECTORS;
++				if (test_bit(
++				     MD_RECOVERY_CHECK, &conf->mddev->recovery))
+ 					/* don't try to repair!! */
+ 					set_bit(STRIPE_INSYNC, &sh->state);
+ 				else {
+-					compute_block(sh, sh->pd_idx);
+-					uptodate++;
++					set_bit(STRIPE_OP_COMPUTE_BLK,
++						&sh->ops.pending);
++					set_bit(STRIPE_OP_MOD_REPAIR_PD,
++						&sh->ops.pending);
++					set_bit(R5_Wantcompute,
++						&sh->dev[sh->pd_idx].flags);
++					sh->ops.target = sh->pd_idx;
++					sh->ops.count++;
++					s->uptodate++;
++				}
++			}
+ 				}
+ 			}
++
++	/* check if we can clear a parity disk reconstruct */
++	if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
++		test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
++
++		clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
++		clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+ 		}
+-		if (!test_bit(STRIPE_INSYNC, &sh->state)) {
++
++	/* Wait for check parity and compute block operations to complete
++	 * before write-back
++	 */
++	if (!test_bit(STRIPE_INSYNC, &sh->state) &&
++		!test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
++		!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
++		struct r5dev *dev;
+ 			/* either failed parity check, or recovery is happening */
+-			if (failed==0)
+-				failed_num = sh->pd_idx;
+-			dev = &sh->dev[failed_num];
++		if (s->failed == 0)
++			s->failed_num = sh->pd_idx;
++		dev = &sh->dev[s->failed_num];
+ 			BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
+-			BUG_ON(uptodate != disks);
++		BUG_ON(s->uptodate != disks);
+ 
+ 			set_bit(R5_LOCKED, &dev->flags);
+ 			set_bit(R5_Wantwrite, &dev->flags);
++		if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++			sh->ops.count++;
++
+ 			clear_bit(STRIPE_DEGRADED, &sh->state);
+-			locked++;
++		s->locked++;
+ 			set_bit(STRIPE_INSYNC, &sh->state);
+ 		}
++}
++
++
++static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh,
++				struct stripe_head_state *s,
++				struct r6_state *r6s, struct page *tmp_page,
++				int disks)
++{
++	int update_p = 0, update_q = 0;
++	struct r5dev *dev;
++	int pd_idx = sh->pd_idx;
++	int qd_idx = r6s->qd_idx;
++
++	set_bit(STRIPE_HANDLE, &sh->state);
++
++	BUG_ON(s->failed > 2);
++	BUG_ON(s->uptodate < disks);
++	/* Want to check and possibly repair P and Q.
++	 * However there could be one 'failed' device, in which
++	 * case we can only check one of them, possibly using the
++	 * other to generate missing data
++	 */
++
++	/* If !tmp_page, we cannot do the calculations,
++	 * but as we have set STRIPE_HANDLE, we will soon be called
++	 * by stripe_handle with a tmp_page - just wait until then.
++	 */
++	if (tmp_page) {
++		if (s->failed == r6s->q_failed) {
++			/* The only possible failed device holds 'Q', so it
++			 * makes sense to check P (If anything else were failed,
++			 * we would have used P to recreate it).
++			 */
++			compute_block_1(sh, pd_idx, 1);
++			if (!page_is_zero(sh->dev[pd_idx].page)) {
++				compute_block_1(sh, pd_idx, 0);
++				update_p = 1;
++			}
+ 	}
+-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+-		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+-		clear_bit(STRIPE_SYNCING, &sh->state);
++		if (!r6s->q_failed && s->failed < 2) {
++			/* q is not failed, and we didn't use it to generate
++			 * anything, so it makes sense to check it
++			 */
++			memcpy(page_address(tmp_page),
++			       page_address(sh->dev[qd_idx].page),
++			       STRIPE_SIZE);
++			compute_parity6(sh, UPDATE_PARITY);
++			if (memcmp(page_address(tmp_page),
++				   page_address(sh->dev[qd_idx].page),
++				   STRIPE_SIZE) != 0) {
++				clear_bit(STRIPE_INSYNC, &sh->state);
++				update_q = 1;
++			}
++		}
++		if (update_p || update_q) {
++			conf->mddev->resync_mismatches += STRIPE_SECTORS;
++			if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
++				/* don't try to repair!! */
++				update_p = update_q = 0;
+ 	}
+ 
+-	/* If the failed drive is just a ReadError, then we might need to progress
+-	 * the repair/check process
++		/* now write out any block on a failed drive,
++		 * or P or Q if they need it
+ 	 */
+-	if (failed == 1 && ! conf->mddev->ro &&
+-	    test_bit(R5_ReadError, &sh->dev[failed_num].flags)
+-	    && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags)
+-	    && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags)
+-		) {
+-		dev = &sh->dev[failed_num];
+-		if (!test_bit(R5_ReWrite, &dev->flags)) {
++
++		if (s->failed == 2) {
++			dev = &sh->dev[r6s->failed_num[1]];
++			s->locked++;
++			set_bit(R5_LOCKED, &dev->flags);
+ 			set_bit(R5_Wantwrite, &dev->flags);
+-			set_bit(R5_ReWrite, &dev->flags);
++		}
++		if (s->failed >= 1) {
++			dev = &sh->dev[r6s->failed_num[0]];
++			s->locked++;
+ 			set_bit(R5_LOCKED, &dev->flags);
+-			locked++;
+-		} else {
+-			/* let's read it back */
+-			set_bit(R5_Wantread, &dev->flags);
++			set_bit(R5_Wantwrite, &dev->flags);
++		}
++
++		if (update_p) {
++			dev = &sh->dev[pd_idx];
++			s->locked++;
+ 			set_bit(R5_LOCKED, &dev->flags);
+-			locked++;
++			set_bit(R5_Wantwrite, &dev->flags);
+ 		}
++		if (update_q) {
++			dev = &sh->dev[qd_idx];
++			s->locked++;
++			set_bit(R5_LOCKED, &dev->flags);
++			set_bit(R5_Wantwrite, &dev->flags);
+ 	}
++		clear_bit(STRIPE_DEGRADED, &sh->state);
+ 
+-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+-		/* Need to write out all blocks after computing parity */
+-		sh->disks = conf->raid_disks;
+-		sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks);
+-		compute_parity5(sh, RECONSTRUCT_WRITE);
+-		for (i= conf->raid_disks; i--;) {
+-			set_bit(R5_LOCKED, &sh->dev[i].flags);
+-			locked++;
+-			set_bit(R5_Wantwrite, &sh->dev[i].flags);
+-		}
+-		clear_bit(STRIPE_EXPANDING, &sh->state);
+-	} else if (expanded) {
+-		clear_bit(STRIPE_EXPAND_READY, &sh->state);
+-		atomic_dec(&conf->reshape_stripes);
+-		wake_up(&conf->wait_for_overlap);
+-		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
++		set_bit(STRIPE_INSYNC, &sh->state);
+ 	}
++}
++
++static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
++				struct r6_state *r6s)
++{
++	int i;
+ 
+-	if (expanding && locked == 0) {
+ 		/* We have read all the blocks in this stripe and now we need to
+ 		 * copy some of them into a target stripe for expand.
+ 		 */
++	struct dma_async_tx_descriptor *tx = NULL;
+ 		clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+-		for (i=0; i< sh->disks; i++)
+-			if (i != sh->pd_idx) {
++	for (i = 0; i < sh->disks; i++)
++		if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) {
+ 				int dd_idx, pd_idx, j;
+ 				struct stripe_head *sh2;
+ 
+ 				sector_t bn = compute_blocknr(sh, i);
+ 				sector_t s = raid5_compute_sector(bn, conf->raid_disks,
+-								  conf->raid_disks-1,
+-								  &dd_idx, &pd_idx, conf);
+-				sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1);
++						conf->raid_disks -
++						conf->max_degraded, &dd_idx,
++						&pd_idx, conf);
++			sh2 = get_active_stripe(conf, s, conf->raid_disks,
++						pd_idx, 1);
+ 				if (sh2 == NULL)
+ 					/* so far only the early blocks of this stripe
+ 					 * have been requested.  When later blocks
+ 					 * get requested, we will try again
+ 					 */
+ 					continue;
+-				if(!test_bit(STRIPE_EXPANDING, &sh2->state) ||
++			if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+ 				   test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) {
+ 					/* must have already done this block */
+ 					release_stripe(sh2);
+ 					continue;
+ 				}
+-				memcpy(page_address(sh2->dev[dd_idx].page),
+-				       page_address(sh->dev[i].page),
+-				       STRIPE_SIZE);
++
++			/* place all the copies on one channel */
++			tx = async_memcpy(sh2->dev[dd_idx].page,
++				sh->dev[i].page, 0, 0, STRIPE_SIZE,
++				ASYNC_TX_DEP_ACK, tx, NULL, NULL);
++
+ 				set_bit(R5_Expanded, &sh2->dev[dd_idx].flags);
+ 				set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags);
+-				for (j=0; j<conf->raid_disks; j++)
++			for (j = 0; j < conf->raid_disks; j++)
+ 					if (j != sh2->pd_idx &&
++				    (r6s && j != r6s->qd_idx) &&
+ 					    !test_bit(R5_Expanded, &sh2->dev[j].flags))
+ 						break;
+ 				if (j == conf->raid_disks) {
+@@ -1807,153 +2575,91 @@
+ 					set_bit(STRIPE_HANDLE, &sh2->state);
+ 				}
+ 				release_stripe(sh2);
+-			}
+-	}
+-
+-	spin_unlock(&sh->lock);
+-
+-	while ((bi=return_bi)) {
+-		int bytes = bi->bi_size;
+-
+-		return_bi = bi->bi_next;
+-		bi->bi_next = NULL;
+-		bi->bi_size = 0;
+-		bi->bi_end_io(bi, bytes,
+-			      test_bit(BIO_UPTODATE, &bi->bi_flags)
+-			        ? 0 : -EIO);
+-	}
+-	for (i=disks; i-- ;) {
+-		int rw;
+-		struct bio *bi;
+-		mdk_rdev_t *rdev;
+-		if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags))
+-			rw = WRITE;
+-		else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
+-			rw = READ;
+-		else
+-			continue;
+- 
+-		bi = &sh->dev[i].req;
+- 
+-		bi->bi_rw = rw;
+-		if (rw == WRITE)
+-			bi->bi_end_io = raid5_end_write_request;
+-		else
+-			bi->bi_end_io = raid5_end_read_request;
+- 
+-		rcu_read_lock();
+-		rdev = rcu_dereference(conf->disks[i].rdev);
+-		if (rdev && test_bit(Faulty, &rdev->flags))
+-			rdev = NULL;
+-		if (rdev)
+-			atomic_inc(&rdev->nr_pending);
+-		rcu_read_unlock();
+- 
+-		if (rdev) {
+-			if (syncing || expanding || expanded)
+-				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+ 
+-			bi->bi_bdev = rdev->bdev;
+-			PRINTK("for %llu schedule op %ld on disc %d\n",
+-				(unsigned long long)sh->sector, bi->bi_rw, i);
+-			atomic_inc(&sh->count);
+-			bi->bi_sector = sh->sector + rdev->data_offset;
+-			bi->bi_flags = 1 << BIO_UPTODATE;
+-			bi->bi_vcnt = 1;	
+-			bi->bi_max_vecs = 1;
+-			bi->bi_idx = 0;
+-			bi->bi_io_vec = &sh->dev[i].vec;
+-			bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
+-			bi->bi_io_vec[0].bv_offset = 0;
+-			bi->bi_size = STRIPE_SIZE;
+-			bi->bi_next = NULL;
+-			if (rw == WRITE &&
+-			    test_bit(R5_ReWrite, &sh->dev[i].flags))
+-				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
+-			generic_make_request(bi);
+-		} else {
+-			if (rw == WRITE)
+-				set_bit(STRIPE_DEGRADED, &sh->state);
+-			PRINTK("skip op %ld on disc %d for sector %llu\n",
+-				bi->bi_rw, i, (unsigned long long)sh->sector);
+-			clear_bit(R5_LOCKED, &sh->dev[i].flags);
+-			set_bit(STRIPE_HANDLE, &sh->state);
++			/* done submitting copies, wait for them to complete */
++			if (i + 1 >= sh->disks) {
++				async_tx_ack(tx);
++				dma_wait_for_async_tx(tx);
+ 		}
+ 	}
+ }
+ 
+-static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
++/*
++ * handle_stripe - do things to a stripe.
++ *
++ * We lock the stripe and then examine the state of various bits
++ * to see what needs to be done.
++ * Possible results:
++ *    return some read request which now have data
++ *    return some write requests which are safely on disc
++ *    schedule a read on some buffers
++ *    schedule a write of some buffers
++ *    return confirmation of parity correctness
++ *
++ * buffers are taken off read_list or write_list, and bh_cache buffers
++ * get BH_Lock set before the stripe lock is released.
++ *
++ */
++
++static void handle_stripe5(struct stripe_head *sh)
+ {
+-	raid6_conf_t *conf = sh->raid_conf;
+-	int disks = sh->disks;
+-	struct bio *return_bi= NULL;
+-	struct bio *bi;
+-	int i;
+-	int syncing, expanding, expanded;
+-	int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
+-	int non_overwrite = 0;
+-	int failed_num[2] = {0, 0};
+-	struct r5dev *dev, *pdev, *qdev;
+-	int pd_idx = sh->pd_idx;
+-	int qd_idx = raid6_next_disk(pd_idx, disks);
+-	int p_failed, q_failed;
++	raid5_conf_t *conf = sh->raid_conf;
++	int disks = sh->disks, i;
++	struct bio *return_bi = NULL, *bi;
++	struct stripe_head_state s;
++	struct r5dev *dev;
++	unsigned long pending = 0;
+ 
+-	PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n",
+-	       (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count),
+-	       pd_idx, qd_idx);
++	memset(&s, 0, sizeof(s));
++	pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
++		"ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state,
++		atomic_read(&sh->count), sh->pd_idx,
++		sh->ops.pending, sh->ops.ack, sh->ops.complete);
+ 
+ 	spin_lock(&sh->lock);
+ 	clear_bit(STRIPE_HANDLE, &sh->state);
+ 	clear_bit(STRIPE_DELAYED, &sh->state);
+ 
+-	syncing = test_bit(STRIPE_SYNCING, &sh->state);
+-	expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+-	expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
++	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
++	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
++	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
+ 	/* Now to look around and see what can be done */
+ 
+ 	rcu_read_lock();
+ 	for (i=disks; i--; ) {
+ 		mdk_rdev_t *rdev;
+-		dev = &sh->dev[i];
++		struct r5dev *dev = &sh->dev[i];
+ 		clear_bit(R5_Insync, &dev->flags);
+ 
+-		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
+-			i, dev->flags, dev->toread, dev->towrite, dev->written);
+-		/* maybe we can reply to a read */
+-		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
+-			struct bio *rbi, *rbi2;
+-			PRINTK("Return read for disc %d\n", i);
+-			spin_lock_irq(&conf->device_lock);
+-			rbi = dev->toread;
+-			dev->toread = NULL;
+-			if (test_and_clear_bit(R5_Overlap, &dev->flags))
+-				wake_up(&conf->wait_for_overlap);
+-			spin_unlock_irq(&conf->device_lock);
+-			while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+-				copy_data(0, rbi, dev->page, dev->sector);
+-				rbi2 = r5_next_bio(rbi, dev->sector);
+-				spin_lock_irq(&conf->device_lock);
+-				if (--rbi->bi_phys_segments == 0) {
+-					rbi->bi_next = return_bi;
+-					return_bi = rbi;
+-				}
+-				spin_unlock_irq(&conf->device_lock);
+-				rbi = rbi2;
+-			}
+-		}
+-
+-		/* now count some things */
+-		if (test_bit(R5_LOCKED, &dev->flags)) locked++;
+-		if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++;
++		pr_debug("check %d: state 0x%lx toread %p read %p write %p "
++			"written %p\n",	i, dev->flags, dev->toread, dev->read,
++			dev->towrite, dev->written);
+ 
++		/* maybe we can request a biofill operation
++		 *
++		 * new wantfill requests are only permitted while
++		 * STRIPE_OP_BIOFILL is clear
++		 */
++		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread &&
++			!test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
++			set_bit(R5_Wantfill, &dev->flags);
+ 
+-		if (dev->toread) to_read++;
++		/* now count some things */
++		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
++		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
++		if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++;
++
++		if (test_bit(R5_Wantfill, &dev->flags))
++			s.to_fill++;
++		else if (dev->toread)
++			s.to_read++;
+ 		if (dev->towrite) {
+-			to_write++;
++			s.to_write++;
+ 			if (!test_bit(R5_OVERWRITE, &dev->flags))
+-				non_overwrite++;
++				s.non_overwrite++;
+ 		}
+-		if (dev->written) written++;
++		if (dev->written)
++			s.written++;
+ 		rdev = rcu_dereference(conf->disks[i].rdev);
+ 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
+ 			/* The ReadError flag will just be confusing now */
+@@ -1962,376 +2668,361 @@
+ 		}
+ 		if (!rdev || !test_bit(In_sync, &rdev->flags)
+ 		    || test_bit(R5_ReadError, &dev->flags)) {
+-			if ( failed < 2 )
+-				failed_num[failed] = i;
+-			failed++;
++			s.failed++;
++			s.failed_num = i;
+ 		} else
+ 			set_bit(R5_Insync, &dev->flags);
+ 	}
+ 	rcu_read_unlock();
+-	PRINTK("locked=%d uptodate=%d to_read=%d"
+-	       " to_write=%d failed=%d failed_num=%d,%d\n",
+-	       locked, uptodate, to_read, to_write, failed,
+-	       failed_num[0], failed_num[1]);
+-	/* check if the array has lost >2 devices and, if so, some requests might
+-	 * need to be failed
+-	 */
+-	if (failed > 2 && to_read+to_write+written) {
+-		for (i=disks; i--; ) {
+-			int bitmap_end = 0;
+-
+-			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
+-				mdk_rdev_t *rdev;
+-				rcu_read_lock();
+-				rdev = rcu_dereference(conf->disks[i].rdev);
+-				if (rdev && test_bit(In_sync, &rdev->flags))
+-					/* multiple read failures in one stripe */
+-					md_error(conf->mddev, rdev);
+-				rcu_read_unlock();
+-			}
+-
+-			spin_lock_irq(&conf->device_lock);
+-			/* fail all writes first */
+-			bi = sh->dev[i].towrite;
+-			sh->dev[i].towrite = NULL;
+-			if (bi) { to_write--; bitmap_end = 1; }
+-
+-			if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+-				wake_up(&conf->wait_for_overlap);
+ 
+-			while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+-				struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+-				clear_bit(BIO_UPTODATE, &bi->bi_flags);
+-				if (--bi->bi_phys_segments == 0) {
+-					md_write_end(conf->mddev);
+-					bi->bi_next = return_bi;
+-					return_bi = bi;
+-				}
+-				bi = nextbi;
+-			}
+-			/* and fail all 'written' */
+-			bi = sh->dev[i].written;
+-			sh->dev[i].written = NULL;
+-			if (bi) bitmap_end = 1;
+-			while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) {
+-				struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
+-				clear_bit(BIO_UPTODATE, &bi->bi_flags);
+-				if (--bi->bi_phys_segments == 0) {
+-					md_write_end(conf->mddev);
+-					bi->bi_next = return_bi;
+-					return_bi = bi;
+-				}
+-				bi = bi2;
+-			}
++	if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending))
++		sh->ops.count++;
+ 
+-			/* fail any reads if this device is non-operational */
+-			if (!test_bit(R5_Insync, &sh->dev[i].flags) ||
+-			    test_bit(R5_ReadError, &sh->dev[i].flags)) {
+-				bi = sh->dev[i].toread;
+-				sh->dev[i].toread = NULL;
+-				if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
+-					wake_up(&conf->wait_for_overlap);
+-				if (bi) to_read--;
+-				while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){
+-					struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
+-					clear_bit(BIO_UPTODATE, &bi->bi_flags);
+-					if (--bi->bi_phys_segments == 0) {
+-						bi->bi_next = return_bi;
+-						return_bi = bi;
+-					}
+-					bi = nextbi;
+-				}
+-			}
+-			spin_unlock_irq(&conf->device_lock);
+-			if (bitmap_end)
+-				bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+-						STRIPE_SECTORS, 0, 0);
+-		}
+-	}
+-	if (failed > 2 && syncing) {
++	pr_debug("locked=%d uptodate=%d to_read=%d"
++		" to_write=%d failed=%d failed_num=%d\n",
++		s.locked, s.uptodate, s.to_read, s.to_write,
++		s.failed, s.failed_num);
++	/* check if the array has lost two devices and, if so, some requests might
++	 * need to be failed
++	 */
++	if (s.failed > 1 && s.to_read+s.to_write+s.written)
++		handle_requests_to_failed_array(conf, sh, &s, disks,
++						&return_bi);
++	if (s.failed > 1 && s.syncing) {
+ 		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
+ 		clear_bit(STRIPE_SYNCING, &sh->state);
+-		syncing = 0;
++		s.syncing = 0;
+ 	}
+ 
+-	/*
+-	 * might be able to return some write requests if the parity blocks
+-	 * are safe, or on a failed drive
+-	 */
+-	pdev = &sh->dev[pd_idx];
+-	p_failed = (failed >= 1 && failed_num[0] == pd_idx)
+-		|| (failed >= 2 && failed_num[1] == pd_idx);
+-	qdev = &sh->dev[qd_idx];
+-	q_failed = (failed >= 1 && failed_num[0] == qd_idx)
+-		|| (failed >= 2 && failed_num[1] == qd_idx);
+-
+-	if ( written &&
+-	     ( p_failed || ((test_bit(R5_Insync, &pdev->flags)
+-			     && !test_bit(R5_LOCKED, &pdev->flags)
+-			     && test_bit(R5_UPTODATE, &pdev->flags))) ) &&
+-	     ( q_failed || ((test_bit(R5_Insync, &qdev->flags)
+-			     && !test_bit(R5_LOCKED, &qdev->flags)
+-			     && test_bit(R5_UPTODATE, &qdev->flags))) ) ) {
+-		/* any written block on an uptodate or failed drive can be
+-		 * returned.  Note that if we 'wrote' to a failed drive,
+-		 * it will be UPTODATE, but never LOCKED, so we don't need
+-		 * to test 'failed' directly.
++	/* might be able to return some write requests if the parity block
++	 * is safe, or on a failed drive
+ 		 */
+-		for (i=disks; i--; )
+-			if (sh->dev[i].written) {
+-				dev = &sh->dev[i];
+-				if (!test_bit(R5_LOCKED, &dev->flags) &&
+-				    test_bit(R5_UPTODATE, &dev->flags) ) {
+-					/* We can return any write requests */
+-					int bitmap_end = 0;
+-					struct bio *wbi, *wbi2;
+-					PRINTK("Return write for stripe %llu disc %d\n",
+-					       (unsigned long long)sh->sector, i);
+-					spin_lock_irq(&conf->device_lock);
+-					wbi = dev->written;
+-					dev->written = NULL;
+-					while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) {
+-						wbi2 = r5_next_bio(wbi, dev->sector);
+-						if (--wbi->bi_phys_segments == 0) {
+-							md_write_end(conf->mddev);
+-							wbi->bi_next = return_bi;
+-							return_bi = wbi;
+-						}
+-						wbi = wbi2;
+-					}
+-					if (dev->towrite == NULL)
+-						bitmap_end = 1;
+-					spin_unlock_irq(&conf->device_lock);
+-					if (bitmap_end)
+-						bitmap_endwrite(conf->mddev->bitmap, sh->sector,
+-								STRIPE_SECTORS,
+-								!test_bit(STRIPE_DEGRADED, &sh->state), 0);
+-				}
+-			}
+-	}
++	dev = &sh->dev[sh->pd_idx];
++	if ( s.written &&
++	     ((test_bit(R5_Insync, &dev->flags) &&
++	       !test_bit(R5_LOCKED, &dev->flags) &&
++	       test_bit(R5_UPTODATE, &dev->flags)) ||
++	       (s.failed == 1 && s.failed_num == sh->pd_idx)))
++		handle_completed_write_requests(conf, sh, disks, &return_bi);
+ 
+ 	/* Now we might consider reading some blocks, either to check/generate
+ 	 * parity, or to satisfy requests
+ 	 * or to load a block that is being partially written.
+ 	 */
+-	if (to_read || non_overwrite || (to_write && failed) ||
+-	    (syncing && (uptodate < disks)) || expanding) {
+-		for (i=disks; i--;) {
+-			dev = &sh->dev[i];
+-			if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+-			    (dev->toread ||
+-			     (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) ||
+-			     syncing ||
+-			     expanding ||
+-			     (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) ||
+-			     (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write))
+-				    )
+-				) {
+-				/* we would like to get this block, possibly
+-				 * by computing it, but we might not be able to
++	if (s.to_read || s.non_overwrite ||
++	    (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding ||
++	    test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending))
++		handle_issuing_new_read_requests5(sh, &s, disks);
++
++	/* Now we check to see if any write operations have recently
++	 * completed
+ 				 */
+-				if (uptodate == disks-1) {
+-					PRINTK("Computing stripe %llu block %d\n",
+-					       (unsigned long long)sh->sector, i);
+-					compute_block_1(sh, i, 0);
+-					uptodate++;
+-				} else if ( uptodate == disks-2 && failed >= 2 ) {
+-					/* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
+-					int other;
+-					for (other=disks; other--;) {
+-						if ( other == i )
+-							continue;
+-						if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) )
+-							break;
+-					}
+-					BUG_ON(other < 0);
+-					PRINTK("Computing stripe %llu blocks %d,%d\n",
+-					       (unsigned long long)sh->sector, i, other);
+-					compute_block_2(sh, i, other);
+-					uptodate += 2;
+-				} else if (test_bit(R5_Insync, &dev->flags)) {
+-					set_bit(R5_LOCKED, &dev->flags);
+-					set_bit(R5_Wantread, &dev->flags);
+-					locked++;
+-					PRINTK("Reading block %d (sync=%d)\n",
+-						i, syncing);
+-				}
+-			}
+-		}
+-		set_bit(STRIPE_HANDLE, &sh->state);
++
++	/* leave prexor set until postxor is done, allows us to distinguish
++	 * a rmw from a rcw during biodrain
++	 */
++	if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
++		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
++
++		clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
++		clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
++		clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
++
++		for (i = disks; i--; )
++			clear_bit(R5_Wantprexor, &sh->dev[i].flags);
+ 	}
+ 
+-	/* now to consider writing and what else, if anything should be read */
+-	if (to_write) {
+-		int rcw=0, must_compute=0;
+-		for (i=disks ; i--;) {
++	/* if only POSTXOR is set then this is an 'expand' postxor */
++	if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) &&
++		test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
++
++		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete);
++		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack);
++		clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending);
++
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++
++		/* All the 'written' buffers and the parity block are ready to
++		 * be written back to disk
++		 */
++		BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags));
++		for (i = disks; i--; ) {
+ 			dev = &sh->dev[i];
+-			/* Would I have to read this buffer for reconstruct_write */
+-			if (!test_bit(R5_OVERWRITE, &dev->flags)
+-			    && i != pd_idx && i != qd_idx
+-			    && (!test_bit(R5_LOCKED, &dev->flags)
+-				    ) &&
+-			    !test_bit(R5_UPTODATE, &dev->flags)) {
+-				if (test_bit(R5_Insync, &dev->flags)) rcw++;
+-				else {
+-					PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags);
+-					must_compute++;
++			if (test_bit(R5_LOCKED, &dev->flags) &&
++				(i == sh->pd_idx || dev->written)) {
++				pr_debug("Writing block %d\n", i);
++				set_bit(R5_Wantwrite, &dev->flags);
++				if (!test_and_set_bit(
++				    STRIPE_OP_IO, &sh->ops.pending))
++					sh->ops.count++;
++				if (!test_bit(R5_Insync, &dev->flags) ||
++				    (i == sh->pd_idx && s.failed == 0))
++					set_bit(STRIPE_INSYNC, &sh->state);
++			}
+ 				}
++		if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
++			atomic_dec(&conf->preread_active_stripes);
++			if (atomic_read(&conf->preread_active_stripes) <
++				IO_THRESHOLD)
++				md_wakeup_thread(conf->mddev->thread);
+ 			}
+ 		}
+-		PRINTK("for sector %llu, rcw=%d, must_compute=%d\n",
+-		       (unsigned long long)sh->sector, rcw, must_compute);
+-		set_bit(STRIPE_HANDLE, &sh->state);
+ 
+-		if (rcw > 0)
+-			/* want reconstruct write, but need to get some data */
+-			for (i=disks; i--;) {
+-				dev = &sh->dev[i];
+-				if (!test_bit(R5_OVERWRITE, &dev->flags)
+-				    && !(failed == 0 && (i == pd_idx || i == qd_idx))
+-				    && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
+-				    test_bit(R5_Insync, &dev->flags)) {
+-					if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+-					{
+-						PRINTK("Read_old stripe %llu block %d for Reconstruct\n",
+-						       (unsigned long long)sh->sector, i);
++	/* Now to consider new write requests and what else, if anything
++	 * should be read.  We do not handle new writes when:
++	 * 1/ A 'write' operation (copy+xor) is already in flight.
++	 * 2/ A 'check' operation is in flight, as it may clobber the parity
++	 *    block.
++	 */
++	if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
++			  !test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
++		handle_issuing_new_write_requests5(conf, sh, &s, disks);
++
++	/* maybe we need to check and possibly fix the parity for this stripe
++	 * Any reads will already have been scheduled, so we just see if enough
++	 * data is available.  The parity check is held off while parity
++	 * dependent operations are in flight.
++	 */
++	if ((s.syncing && s.locked == 0 &&
++	     !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
++	     !test_bit(STRIPE_INSYNC, &sh->state)) ||
++	      test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
++	      test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending))
++		handle_parity_checks5(conf, sh, &s, disks);
++
++	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
++		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
++		clear_bit(STRIPE_SYNCING, &sh->state);
++	}
++
++	/* If the failed drive is just a ReadError, then we might need to progress
++	 * the repair/check process
++	 */
++	if (s.failed == 1 && !conf->mddev->ro &&
++	    test_bit(R5_ReadError, &sh->dev[s.failed_num].flags)
++	    && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags)
++	    && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags)
++		) {
++		dev = &sh->dev[s.failed_num];
++		if (!test_bit(R5_ReWrite, &dev->flags)) {
++			set_bit(R5_Wantwrite, &dev->flags);
++			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++				sh->ops.count++;
++			set_bit(R5_ReWrite, &dev->flags);
+ 						set_bit(R5_LOCKED, &dev->flags);
+-						set_bit(R5_Wantread, &dev->flags);
+-						locked++;
++			s.locked++;
+ 					} else {
+-						PRINTK("Request delayed stripe %llu block %d for Reconstruct\n",
+-						       (unsigned long long)sh->sector, i);
+-						set_bit(STRIPE_DELAYED, &sh->state);
+-						set_bit(STRIPE_HANDLE, &sh->state);
++			/* let's read it back */
++			set_bit(R5_Wantread, &dev->flags);
++			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++				sh->ops.count++;
++			set_bit(R5_LOCKED, &dev->flags);
++			s.locked++;
+ 					}
+ 				}
++
++	/* Finish postxor operations initiated by the expansion
++	 * process
++	 */
++	if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) &&
++		!test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) {
++
++		clear_bit(STRIPE_EXPANDING, &sh->state);
++
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending);
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack);
++		clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete);
++
++		for (i = conf->raid_disks; i--; ) {
++			set_bit(R5_Wantwrite, &sh->dev[i].flags);
++			if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
++				sh->ops.count++;
+ 			}
+-		/* now if nothing is locked, and if we have enough data, we can start a write request */
+-		if (locked == 0 && rcw == 0 &&
+-		    !test_bit(STRIPE_BIT_DELAY, &sh->state)) {
+-			if ( must_compute > 0 ) {
+-				/* We have failed blocks and need to compute them */
+-				switch ( failed ) {
+-				case 0:	BUG();
+-				case 1: compute_block_1(sh, failed_num[0], 0); break;
+-				case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
+-				default: BUG();	/* This request should have been failed? */
+ 				}
++
++	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) &&
++		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++		/* Need to write out all blocks after computing parity */
++		sh->disks = conf->raid_disks;
++		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
++			conf->raid_disks);
++		s.locked += handle_write_operations5(sh, 0, 1);
++	} else if (s.expanded &&
++		!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
++		clear_bit(STRIPE_EXPAND_READY, &sh->state);
++		atomic_dec(&conf->reshape_stripes);
++		wake_up(&conf->wait_for_overlap);
++		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ 			}
+ 
+-			PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector);
+-			compute_parity6(sh, RECONSTRUCT_WRITE);
+-			/* now every locked buffer is ready to be written */
+-			for (i=disks; i--;)
+-				if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
+-					PRINTK("Writing stripe %llu block %d\n",
+-					       (unsigned long long)sh->sector, i);
+-					locked++;
+-					set_bit(R5_Wantwrite, &sh->dev[i].flags);
++	if (s.expanding && s.locked == 0)
++		handle_stripe_expansion(conf, sh, NULL);
++
++	if (sh->ops.count)
++		pending = get_stripe_work(sh);
++
++	spin_unlock(&sh->lock);
++
++	if (pending)
++		raid5_run_ops(sh, pending);
++
++	while ((bi=return_bi)) {
++		int bytes = bi->bi_size;
++
++		return_bi = bi->bi_next;
++		bi->bi_next = NULL;
++		bi->bi_size = 0;
++		bi->bi_end_io(bi, bytes,
++			      test_bit(BIO_UPTODATE, &bi->bi_flags)
++			        ? 0 : -EIO);
+ 				}
+-			/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
+-			set_bit(STRIPE_INSYNC, &sh->state);
++}
+ 
+-			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
+-				atomic_dec(&conf->preread_active_stripes);
+-				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
+-					md_wakeup_thread(conf->mddev->thread);
++static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
++{
++	raid6_conf_t *conf = sh->raid_conf;
++	int disks = sh->disks;
++	struct bio *return_bi = NULL;
++	struct bio *bi;
++	int i, pd_idx = sh->pd_idx;
++	struct stripe_head_state s;
++	struct r6_state r6s;
++	struct r5dev *dev, *pdev, *qdev;
++
++	r6s.qd_idx = raid6_next_disk(pd_idx, disks);
++	pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
++		"pd_idx=%d, qd_idx=%d\n",
++	       (unsigned long long)sh->sector, sh->state,
++	       atomic_read(&sh->count), pd_idx, r6s.qd_idx);
++	memset(&s, 0, sizeof(s));
++
++	spin_lock(&sh->lock);
++	clear_bit(STRIPE_HANDLE, &sh->state);
++	clear_bit(STRIPE_DELAYED, &sh->state);
++
++	s.syncing = test_bit(STRIPE_SYNCING, &sh->state);
++	s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state);
++	s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state);
++	/* Now to look around and see what can be done */
++
++	rcu_read_lock();
++	for (i=disks; i--; ) {
++		mdk_rdev_t *rdev;
++		dev = &sh->dev[i];
++		clear_bit(R5_Insync, &dev->flags);
++
++		pr_debug("check %d: state 0x%lx read %p write %p written %p\n",
++			i, dev->flags, dev->toread, dev->towrite, dev->written);
++		/* maybe we can reply to a read */
++		if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) {
++			struct bio *rbi, *rbi2;
++			pr_debug("Return read for disc %d\n", i);
++			spin_lock_irq(&conf->device_lock);
++			rbi = dev->toread;
++			dev->toread = NULL;
++			if (test_and_clear_bit(R5_Overlap, &dev->flags))
++				wake_up(&conf->wait_for_overlap);
++			spin_unlock_irq(&conf->device_lock);
++			while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) {
++				copy_data(0, rbi, dev->page, dev->sector);
++				rbi2 = r5_next_bio(rbi, dev->sector);
++				spin_lock_irq(&conf->device_lock);
++				if (--rbi->bi_phys_segments == 0) {
++					rbi->bi_next = return_bi;
++					return_bi = rbi;
+ 			}
++				spin_unlock_irq(&conf->device_lock);
++				rbi = rbi2;
+ 		}
+ 	}
+ 
+-	/* maybe we need to check and possibly fix the parity for this stripe
+-	 * Any reads will already have been scheduled, so we just see if enough data
+-	 * is available
+-	 */
+-	if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+-		int update_p = 0, update_q = 0;
+-		struct r5dev *dev;
+-
+-		set_bit(STRIPE_HANDLE, &sh->state);
++		/* now count some things */
++		if (test_bit(R5_LOCKED, &dev->flags)) s.locked++;
++		if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++;
+ 
+-		BUG_ON(failed>2);
+-		BUG_ON(uptodate < disks);
+-		/* Want to check and possibly repair P and Q.
+-		 * However there could be one 'failed' device, in which
+-		 * case we can only check one of them, possibly using the
+-		 * other to generate missing data
+-		 */
+ 
+-		/* If !tmp_page, we cannot do the calculations,
+-		 * but as we have set STRIPE_HANDLE, we will soon be called
+-		 * by stripe_handle with a tmp_page - just wait until then.
+-		 */
+-		if (tmp_page) {
+-			if (failed == q_failed) {
+-				/* The only possible failed device holds 'Q', so it makes
+-				 * sense to check P (If anything else were failed, we would
+-				 * have used P to recreate it).
+-				 */
+-				compute_block_1(sh, pd_idx, 1);
+-				if (!page_is_zero(sh->dev[pd_idx].page)) {
+-					compute_block_1(sh,pd_idx,0);
+-					update_p = 1;
+-				}
++		if (dev->toread)
++			s.to_read++;
++		if (dev->towrite) {
++			s.to_write++;
++			if (!test_bit(R5_OVERWRITE, &dev->flags))
++				s.non_overwrite++;
+ 			}
+-			if (!q_failed && failed < 2) {
+-				/* q is not failed, and we didn't use it to generate
+-				 * anything, so it makes sense to check it
+-				 */
+-				memcpy(page_address(tmp_page),
+-				       page_address(sh->dev[qd_idx].page),
+-				       STRIPE_SIZE);
+-				compute_parity6(sh, UPDATE_PARITY);
+-				if (memcmp(page_address(tmp_page),
+-					   page_address(sh->dev[qd_idx].page),
+-					   STRIPE_SIZE)!= 0) {
+-					clear_bit(STRIPE_INSYNC, &sh->state);
+-					update_q = 1;
++		if (dev->written)
++			s.written++;
++		rdev = rcu_dereference(conf->disks[i].rdev);
++		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
++			/* The ReadError flag will just be confusing now */
++			clear_bit(R5_ReadError, &dev->flags);
++			clear_bit(R5_ReWrite, &dev->flags);
+ 				}
++		if (!rdev || !test_bit(In_sync, &rdev->flags)
++		    || test_bit(R5_ReadError, &dev->flags)) {
++			if (s.failed < 2)
++				r6s.failed_num[s.failed] = i;
++			s.failed++;
++		} else
++			set_bit(R5_Insync, &dev->flags);
+ 			}
+-			if (update_p || update_q) {
+-				conf->mddev->resync_mismatches += STRIPE_SECTORS;
+-				if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+-					/* don't try to repair!! */
+-					update_p = update_q = 0;
++	rcu_read_unlock();
++	pr_debug("locked=%d uptodate=%d to_read=%d"
++	       " to_write=%d failed=%d failed_num=%d,%d\n",
++	       s.locked, s.uptodate, s.to_read, s.to_write, s.failed,
++	       r6s.failed_num[0], r6s.failed_num[1]);
++	/* check if the array has lost >2 devices and, if so, some requests
++	 * might need to be failed
++	 */
++	if (s.failed > 2 && s.to_read+s.to_write+s.written)
++		handle_requests_to_failed_array(conf, sh, &s, disks,
++						&return_bi);
++	if (s.failed > 2 && s.syncing) {
++		md_done_sync(conf->mddev, STRIPE_SECTORS,0);
++		clear_bit(STRIPE_SYNCING, &sh->state);
++		s.syncing = 0;
+ 			}
+ 
+-			/* now write out any block on a failed drive,
+-			 * or P or Q if they need it
++	/*
++	 * might be able to return some write requests if the parity blocks
++	 * are safe, or on a failed drive
+ 			 */
++	pdev = &sh->dev[pd_idx];
++	r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx)
++		|| (s.failed >= 2 && r6s.failed_num[1] == pd_idx);
++	qdev = &sh->dev[r6s.qd_idx];
++	r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx)
++		|| (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx);
+ 
+-			if (failed == 2) {
+-				dev = &sh->dev[failed_num[1]];
+-				locked++;
+-				set_bit(R5_LOCKED, &dev->flags);
+-				set_bit(R5_Wantwrite, &dev->flags);
+-			}
+-			if (failed >= 1) {
+-				dev = &sh->dev[failed_num[0]];
+-				locked++;
+-				set_bit(R5_LOCKED, &dev->flags);
+-				set_bit(R5_Wantwrite, &dev->flags);
+-			}
++	if ( s.written &&
++	     ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
++			     && !test_bit(R5_LOCKED, &pdev->flags)
++			     && test_bit(R5_UPTODATE, &pdev->flags)))) &&
++	     ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
++			     && !test_bit(R5_LOCKED, &qdev->flags)
++			     && test_bit(R5_UPTODATE, &qdev->flags)))))
++		handle_completed_write_requests(conf, sh, disks, &return_bi);
+ 
+-			if (update_p) {
+-				dev = &sh->dev[pd_idx];
+-				locked ++;
+-				set_bit(R5_LOCKED, &dev->flags);
+-				set_bit(R5_Wantwrite, &dev->flags);
+-			}
+-			if (update_q) {
+-				dev = &sh->dev[qd_idx];
+-				locked++;
+-				set_bit(R5_LOCKED, &dev->flags);
+-				set_bit(R5_Wantwrite, &dev->flags);
+-			}
+-			clear_bit(STRIPE_DEGRADED, &sh->state);
++	/* Now we might consider reading some blocks, either to check/generate
++	 * parity, or to satisfy requests
++	 * or to load a block that is being partially written.
++	 */
++	if (s.to_read || s.non_overwrite || (s.to_write && s.failed) ||
++	    (s.syncing && (s.uptodate < disks)) || s.expanding)
++		handle_issuing_new_read_requests6(sh, &s, &r6s, disks);
+ 
+-			set_bit(STRIPE_INSYNC, &sh->state);
+-		}
+-	}
++	/* now to consider writing and what else, if anything should be read */
++	if (s.to_write)
++		handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks);
++
++	/* maybe we need to check and possibly fix the parity for this stripe
++	 * Any reads will already have been scheduled, so we just see if enough
++	 * data is available
++	 */
++	if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state))
++		handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks);
+ 
+-	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
++	if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
+ 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
+ 		clear_bit(STRIPE_SYNCING, &sh->state);
+ 	}
+@@ -2339,9 +3030,9 @@
+ 	/* If the failed drives are just a ReadError, then we might need
+ 	 * to progress the repair/check process
+ 	 */
+-	if (failed <= 2 && ! conf->mddev->ro)
+-		for (i=0; i<failed;i++) {
+-			dev = &sh->dev[failed_num[i]];
++	if (s.failed <= 2 && !conf->mddev->ro)
++		for (i = 0; i < s.failed; i++) {
++			dev = &sh->dev[r6s.failed_num[i]];
+ 			if (test_bit(R5_ReadError, &dev->flags)
+ 			    && !test_bit(R5_LOCKED, &dev->flags)
+ 			    && test_bit(R5_UPTODATE, &dev->flags)
+@@ -2358,7 +3049,7 @@
+ 			}
+ 		}
+ 
+-	if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
++	if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
+ 		/* Need to write out all blocks after computing P&Q */
+ 		sh->disks = conf->raid_disks;
+ 		sh->pd_idx = stripe_to_pdidx(sh->sector, conf,
+@@ -2366,69 +3057,19 @@
+ 		compute_parity6(sh, RECONSTRUCT_WRITE);
+ 		for (i = conf->raid_disks ; i-- ;  ) {
+ 			set_bit(R5_LOCKED, &sh->dev[i].flags);
+-			locked++;
++			s.locked++;
+ 			set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ 		}
+ 		clear_bit(STRIPE_EXPANDING, &sh->state);
+-	} else if (expanded) {
++	} else if (s.expanded) {
+ 		clear_bit(STRIPE_EXPAND_READY, &sh->state);
+ 		atomic_dec(&conf->reshape_stripes);
+ 		wake_up(&conf->wait_for_overlap);
+ 		md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
+ 	}
+ 
+-	if (expanding && locked == 0) {
+-		/* We have read all the blocks in this stripe and now we need to
+-		 * copy some of them into a target stripe for expand.
+-		 */
+-		clear_bit(STRIPE_EXPAND_SOURCE, &sh->state);
+-		for (i = 0; i < sh->disks ; i++)
+-			if (i != pd_idx && i != qd_idx) {
+-				int dd_idx2, pd_idx2, j;
+-				struct stripe_head *sh2;
+-
+-				sector_t bn = compute_blocknr(sh, i);
+-				sector_t s = raid5_compute_sector(
+-					bn, conf->raid_disks,
+-					conf->raid_disks - conf->max_degraded,
+-					&dd_idx2, &pd_idx2, conf);
+-				sh2 = get_active_stripe(conf, s,
+-							conf->raid_disks,
+-						       pd_idx2, 1);
+-				if (sh2 == NULL)
+-					/* so for only the early blocks of
+-					 * this stripe have been requests.
+-					 * When later blocks get requests, we
+-					 * will try again
+-					 */
+-					continue;
+-				if (!test_bit(STRIPE_EXPANDING, &sh2->state) ||
+-				    test_bit(R5_Expanded,
+-					     &sh2->dev[dd_idx2].flags)) {
+-					/* must have already done this block */
+-					release_stripe(sh2);
+-					continue;
+-				}
+-				memcpy(page_address(sh2->dev[dd_idx2].page),
+-				       page_address(sh->dev[i].page),
+-				       STRIPE_SIZE);
+-				set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags);
+-				set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags);
+-				for (j = 0 ; j < conf->raid_disks ; j++)
+-					if (j != sh2->pd_idx &&
+-					    j != raid6_next_disk(sh2->pd_idx,
+-							   sh2->disks) &&
+-					    !test_bit(R5_Expanded,
+-						      &sh2->dev[j].flags))
+-						break;
+-				if (j == conf->raid_disks) {
+-					set_bit(STRIPE_EXPAND_READY,
+-						&sh2->state);
+-					set_bit(STRIPE_HANDLE, &sh2->state);
+-				}
+-				release_stripe(sh2);
+-			}
+-	}
++	if (s.expanding && s.locked == 0)
++		handle_stripe_expansion(conf, sh, &r6s);
+ 
+ 	spin_unlock(&sh->lock);
+ 
+@@ -2470,11 +3111,11 @@
+ 		rcu_read_unlock();
+ 
+ 		if (rdev) {
+-			if (syncing || expanding || expanded)
++			if (s.syncing || s.expanding || s.expanded)
+ 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
+ 
+ 			bi->bi_bdev = rdev->bdev;
+-			PRINTK("for %llu schedule op %ld on disc %d\n",
++			pr_debug("for %llu schedule op %ld on disc %d\n",
+ 				(unsigned long long)sh->sector, bi->bi_rw, i);
+ 			atomic_inc(&sh->count);
+ 			bi->bi_sector = sh->sector + rdev->data_offset;
+@@ -2494,7 +3135,7 @@
+ 		} else {
+ 			if (rw == WRITE)
+ 				set_bit(STRIPE_DEGRADED, &sh->state);
+-			PRINTK("skip op %ld on disc %d for sector %llu\n",
++			pr_debug("skip op %ld on disc %d for sector %llu\n",
+ 				bi->bi_rw, i, (unsigned long long)sh->sector);
+ 			clear_bit(R5_LOCKED, &sh->dev[i].flags);
+ 			set_bit(STRIPE_HANDLE, &sh->state);
+@@ -2738,7 +3379,7 @@
+ 	}
+ 
+ 
+-	PRINTK("raid5_align_endio : io error...handing IO for a retry\n");
++	pr_debug("raid5_align_endio : io error...handing IO for a retry\n");
+ 
+ 	add_bio_to_retry(raid_bi, conf);
+ 	return 0;
+@@ -2776,7 +3417,7 @@
+ 	mdk_rdev_t *rdev;
+ 
+ 	if (!in_chunk_boundary(mddev, raid_bio)) {
+-		PRINTK("chunk_aligned_read : non aligned\n");
++		pr_debug("chunk_aligned_read : non aligned\n");
+ 		return 0;
+ 	}
+ 	/*
+@@ -2900,7 +3541,7 @@
+ 
+  		new_sector = raid5_compute_sector(logical_sector, disks, data_disks,
+ 						  &dd_idx, &pd_idx, conf);
+-		PRINTK("raid5: make_request, sector %llu logical %llu\n",
++		pr_debug("raid5: make_request, sector %llu logical %llu\n",
+ 			(unsigned long long)new_sector, 
+ 			(unsigned long long)logical_sector);
+ 
+@@ -3273,7 +3914,7 @@
+ 	raid5_conf_t *conf = mddev_to_conf(mddev);
+ 	int handled;
+ 
+-	PRINTK("+++ raid5d active\n");
++	pr_debug("+++ raid5d active\n");
+ 
+ 	md_check_recovery(mddev);
+ 
+@@ -3308,8 +3949,10 @@
+ 			handled++;
+ 		}
+ 
+-		if (list_empty(&conf->handle_list))
++		if (list_empty(&conf->handle_list)) {
++			async_tx_issue_pending_all();
+ 			break;
++		}
+ 
+ 		first = conf->handle_list.next;
+ 		sh = list_entry(first, struct stripe_head, lru);
+@@ -3325,13 +3968,13 @@
+ 
+ 		spin_lock_irq(&conf->device_lock);
+ 	}
+-	PRINTK("%d stripes handled\n", handled);
++	pr_debug("%d stripes handled\n", handled);
+ 
+ 	spin_unlock_irq(&conf->device_lock);
+ 
+ 	unplug_slaves(mddev);
+ 
+-	PRINTK("--- raid5d inactive\n");
++	pr_debug("--- raid5d inactive\n");
+ }
+ 
+ static ssize_t
+@@ -3507,7 +4150,7 @@
+ 	atomic_set(&conf->preread_active_stripes, 0);
+ 	atomic_set(&conf->active_aligned_reads, 0);
+ 
+-	PRINTK("raid5: run(%s) called.\n", mdname(mddev));
++	pr_debug("raid5: run(%s) called.\n", mdname(mddev));
+ 
+ 	ITERATE_RDEV(mddev,rdev,tmp) {
+ 		raid_disk = rdev->raid_disk;
+@@ -3690,7 +4333,7 @@
+ 	return 0;
+ }
+ 
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ static void print_sh (struct seq_file *seq, struct stripe_head *sh)
+ {
+ 	int i;
+@@ -3737,7 +4380,7 @@
+ 			       conf->disks[i].rdev &&
+ 			       test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
+ 	seq_printf (seq, "]");
+-#if RAID5_DEBUG
++#ifdef DEBUG
+ 	seq_printf (seq, "\n");
+ 	printall(seq, conf);
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/md/xor.c linux-2.6.22-try2/drivers/md/xor.c
+--- linux-2.6.22-570/drivers/md/xor.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/md/xor.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,154 +0,0 @@
+-/*
+- * xor.c : Multiple Devices driver for Linux
+- *
+- * Copyright (C) 1996, 1997, 1998, 1999, 2000,
+- * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
+- *
+- * Dispatch optimized RAID-5 checksumming functions.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2, or (at your option)
+- * any later version.
+- *
+- * You should have received a copy of the GNU General Public License
+- * (for example /usr/src/linux/COPYING); if not, write to the Free
+- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- */
+-
+-#define BH_TRACE 0
+-#include <linux/module.h>
+-#include <linux/raid/md.h>
+-#include <linux/raid/xor.h>
+-#include <asm/xor.h>
+-
+-/* The xor routines to use.  */
+-static struct xor_block_template *active_template;
+-
+-void
+-xor_block(unsigned int count, unsigned int bytes, void **ptr)
+-{
+-	unsigned long *p0, *p1, *p2, *p3, *p4;
+-
+-	p0 = (unsigned long *) ptr[0];
+-	p1 = (unsigned long *) ptr[1];
+-	if (count == 2) {
+-		active_template->do_2(bytes, p0, p1);
+-		return;
+-	}
+-
+-	p2 = (unsigned long *) ptr[2];
+-	if (count == 3) {
+-		active_template->do_3(bytes, p0, p1, p2);
+-		return;
+-	}
+-
+-	p3 = (unsigned long *) ptr[3];
+-	if (count == 4) {
+-		active_template->do_4(bytes, p0, p1, p2, p3);
+-		return;
+-	}
+-
+-	p4 = (unsigned long *) ptr[4];
+-	active_template->do_5(bytes, p0, p1, p2, p3, p4);
+-}
+-
+-/* Set of all registered templates.  */
+-static struct xor_block_template *template_list;
+-
+-#define BENCH_SIZE (PAGE_SIZE)
+-
+-static void
+-do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
+-{
+-	int speed;
+-	unsigned long now;
+-	int i, count, max;
+-
+-	tmpl->next = template_list;
+-	template_list = tmpl;
+-
+-	/*
+-	 * Count the number of XORs done during a whole jiffy, and use
+-	 * this to calculate the speed of checksumming.  We use a 2-page
+-	 * allocation to have guaranteed color L1-cache layout.
+-	 */
+-	max = 0;
+-	for (i = 0; i < 5; i++) {
+-		now = jiffies;
+-		count = 0;
+-		while (jiffies == now) {
+-			mb();
+-			tmpl->do_2(BENCH_SIZE, b1, b2);
+-			mb();
+-			count++;
+-			mb();
+-		}
+-		if (count > max)
+-			max = count;
+-	}
+-
+-	speed = max * (HZ * BENCH_SIZE / 1024);
+-	tmpl->speed = speed;
+-
+-	printk("   %-10s: %5d.%03d MB/sec\n", tmpl->name,
+-	       speed / 1000, speed % 1000);
+-}
+-
+-static int
+-calibrate_xor_block(void)
+-{
+-	void *b1, *b2;
+-	struct xor_block_template *f, *fastest;
+-
+-	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
+-	if (! b1) {
+-		printk("raid5: Yikes!  No memory available.\n");
+-		return -ENOMEM;
+-	}
+-	b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
+-
+-	/*
+-	 * If this arch/cpu has a short-circuited selection, don't loop through all
+-	 * the possible functions, just test the best one
+-	 */
+-
+-	fastest = NULL;
+-
+-#ifdef XOR_SELECT_TEMPLATE
+-		fastest = XOR_SELECT_TEMPLATE(fastest);
+-#endif
+-
+-#define xor_speed(templ)	do_xor_speed((templ), b1, b2)
+-
+-	if (fastest) {
+-		printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n",
+-			fastest->name);
+-		xor_speed(fastest);
+-	} else {
+-		printk(KERN_INFO "raid5: measuring checksumming speed\n");
+-		XOR_TRY_TEMPLATES;
+-		fastest = template_list;
+-		for (f = fastest; f; f = f->next)
+-			if (f->speed > fastest->speed)
+-				fastest = f;
+-	}
+-
+-	printk("raid5: using function: %s (%d.%03d MB/sec)\n",
+-	       fastest->name, fastest->speed / 1000, fastest->speed % 1000);
+-
+-#undef xor_speed
+-
+-	free_pages((unsigned long)b1, 2);
+-
+-	active_template = fastest;
+-	return 0;
+-}
+-
+-static __exit void xor_exit(void) { }
+-
+-EXPORT_SYMBOL(xor_block);
+-MODULE_LICENSE("GPL");
+-
+-module_init(calibrate_xor_block);
+-module_exit(xor_exit);
+diff -Nurb linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c linux-2.6.22-try2/drivers/media/dvb/dvb-core/dvb_frontend.c
+--- linux-2.6.22-570/drivers/media/dvb/dvb-core/dvb_frontend.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/dvb/dvb-core/dvb_frontend.c	2007-12-19 15:29:24.000000000 -0500
+@@ -523,6 +523,7 @@
+ 
+ 	dvb_frontend_init(fe);
+ 
++	set_freezable();
+ 	while (1) {
+ 		up(&fepriv->sem);	    /* is locked when we enter the thread... */
+ restart:
+diff -Nurb linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c linux-2.6.22-try2/drivers/media/video/cx88/cx88-tvaudio.c
+--- linux-2.6.22-570/drivers/media/video/cx88/cx88-tvaudio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/video/cx88/cx88-tvaudio.c	2007-12-19 15:29:24.000000000 -0500
+@@ -906,6 +906,7 @@
+ 	u32 mode = 0;
+ 
+ 	dprintk("cx88: tvaudio thread started\n");
++	set_freezable();
+ 	for (;;) {
+ 		msleep_interruptible(1000);
+ 		if (kthread_should_stop())
+diff -Nurb linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c linux-2.6.22-try2/drivers/media/video/msp3400-kthreads.c
+--- linux-2.6.22-570/drivers/media/video/msp3400-kthreads.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/video/msp3400-kthreads.c	2007-12-19 15:29:24.000000000 -0500
+@@ -23,6 +23,7 @@
+ #include <linux/module.h>
+ #include <linux/slab.h>
+ #include <linux/i2c.h>
++#include <linux/freezer.h>
+ #include <linux/videodev.h>
+ #include <linux/videodev2.h>
+ #include <media/v4l2-common.h>
+@@ -468,6 +469,7 @@
+ 
+ 
+ 	v4l_dbg(1, msp_debug, client, "msp3400 daemon started\n");
++	set_freezable();
+ 	for (;;) {
+ 		v4l_dbg(2, msp_debug, client, "msp3400 thread: sleep\n");
+ 		msp_sleep(state, -1);
+@@ -646,7 +648,7 @@
+ 	int val, i, std, count;
+ 
+ 	v4l_dbg(1, msp_debug, client, "msp3410 daemon started\n");
+-
++	set_freezable();
+ 	for (;;) {
+ 		v4l_dbg(2, msp_debug, client, "msp3410 thread: sleep\n");
+ 		msp_sleep(state,-1);
+@@ -940,7 +942,7 @@
+ 	int val, i;
+ 
+ 	v4l_dbg(1, msp_debug, client, "msp34xxg daemon started\n");
+-
++	set_freezable();
+ 	for (;;) {
+ 		v4l_dbg(2, msp_debug, client, "msp34xxg thread: sleep\n");
+ 		msp_sleep(state, -1);
+diff -Nurb linux-2.6.22-570/drivers/media/video/tvaudio.c linux-2.6.22-try2/drivers/media/video/tvaudio.c
+--- linux-2.6.22-570/drivers/media/video/tvaudio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/video/tvaudio.c	2007-12-19 15:29:24.000000000 -0500
+@@ -271,7 +271,7 @@
+ 	struct CHIPDESC  *desc = chiplist + chip->type;
+ 
+ 	v4l_dbg(1, debug, &chip->c, "%s: thread started\n", chip->c.name);
+-
++	set_freezable();
+ 	for (;;) {
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 		if (!kthread_should_stop())
+diff -Nurb linux-2.6.22-570/drivers/media/video/video-buf-dvb.c linux-2.6.22-try2/drivers/media/video/video-buf-dvb.c
+--- linux-2.6.22-570/drivers/media/video/video-buf-dvb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/video/video-buf-dvb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -47,6 +47,7 @@
+ 	int err;
+ 
+ 	dprintk("dvb thread started\n");
++	set_freezable();
+ 	videobuf_read_start(&dvb->dvbq);
+ 
+ 	for (;;) {
+diff -Nurb linux-2.6.22-570/drivers/media/video/vivi.c linux-2.6.22-try2/drivers/media/video/vivi.c
+--- linux-2.6.22-570/drivers/media/video/vivi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/media/video/vivi.c	2007-12-19 15:29:24.000000000 -0500
+@@ -573,6 +573,7 @@
+ 	dprintk(1,"thread started\n");
+ 
+ 	mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
++	set_freezable();
+ 
+ 	for (;;) {
+ 		vivi_sleep(dma_q);
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/linux_compat.h linux-2.6.22-try2/drivers/message/fusion/linux_compat.h
+--- linux-2.6.22-570/drivers/message/fusion/linux_compat.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/linux_compat.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,9 +0,0 @@
+-/* drivers/message/fusion/linux_compat.h */
+-
+-#ifndef FUSION_LINUX_COMPAT_H
+-#define FUSION_LINUX_COMPAT_H
+-
+-#include <linux/version.h>
+-#include <scsi/scsi_device.h>
+-
+-#endif /* _LINUX_COMPAT_H */
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- *  Copyright (c) 2000-2006 LSI Logic Corporation.
++ *  Copyright (c) 2000-2007 LSI Logic Corporation.
+  *
+  *
+  *           Name:  mpi.h
+  *          Title:  MPI Message independent structures and definitions
+  *  Creation Date:  July 27, 2000
+  *
+- *    mpi.h Version:  01.05.12
++ *    mpi.h Version:  01.05.13
+  *
+  *  Version History
+  *  ---------------
+@@ -78,6 +78,7 @@
+  *  08-30-05  01.05.10  Added 2 new IOCStatus codes for Target.
+  *  03-27-06  01.05.11  Bumped MPI_HEADER_VERSION_UNIT.
+  *  10-11-06  01.05.12  Bumped MPI_HEADER_VERSION_UNIT.
++ *  05-24-07  01.05.13  Bumped MPI_HEADER_VERSION_UNIT.
+  *  --------------------------------------------------------------------------
+  */
+ 
+@@ -108,7 +109,7 @@
+ /* Note: The major versions of 0xe0 through 0xff are reserved */
+ 
+ /* versioning for this MPI header set */
+-#define MPI_HEADER_VERSION_UNIT             (0x0E)
++#define MPI_HEADER_VERSION_UNIT             (0x10)
+ #define MPI_HEADER_VERSION_DEV              (0x00)
+ #define MPI_HEADER_VERSION_UNIT_MASK        (0xFF00)
+ #define MPI_HEADER_VERSION_UNIT_SHIFT       (8)
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_cnfg.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_cnfg.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_cnfg.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- *  Copyright (c) 2000-2006 LSI Logic Corporation.
++ *  Copyright (c) 2000-2007 LSI Logic Corporation.
+  *
+  *
+  *           Name:  mpi_cnfg.h
+  *          Title:  MPI Config message, structures, and Pages
+  *  Creation Date:  July 27, 2000
+  *
+- *    mpi_cnfg.h Version:  01.05.13
++ *    mpi_cnfg.h Version:  01.05.15
+  *
+  *  Version History
+  *  ---------------
+@@ -293,6 +293,21 @@
+  *                      Added more AccessStatus values for SAS Device Page 0.
+  *                      Added bit for SATA Asynchronous Notification Support in
+  *                      Flags field of SAS Device Page 0.
++ *  02-28-07  01.05.14  Added ExtFlags field to Manufacturing Page 4.
++ *                      Added Disable SMART Polling for CapabilitiesFlags of
++ *                      IOC Page 6.
++ *                      Added Disable SMART Polling to DeviceSettings of BIOS
++ *                      Page 1.
++ *                      Added Multi-Port Domain bit for DiscoveryStatus field
++ *                      of SAS IO Unit Page.
++ *                      Added Multi-Port Domain Illegal flag for SAS IO Unit
++ *                      Page 1 AdditionalControlFlags field.
++ *  05-24-07  01.05.15  Added Hide Physical Disks with Non-Integrated RAID
++ *                      Metadata bit to Manufacturing Page 4 ExtFlags field.
++ *                      Added Internal Connector to End Device Present bit to
++ *                      Expander Page 0 Flags field.
++ *                      Fixed define for
++ *                      MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED.
+  *  --------------------------------------------------------------------------
+  */
+ 
+@@ -639,7 +654,7 @@
+     U8                              InfoSize1;          /* 0Bh */
+     U8                              InquirySize;        /* 0Ch */
+     U8                              Flags;              /* 0Dh */
+-    U16                             Reserved2;          /* 0Eh */
++    U16                             ExtFlags;           /* 0Eh */
+     U8                              InquiryData[56];    /* 10h */
+     U32                             ISVolumeSettings;   /* 48h */
+     U32                             IMEVolumeSettings;  /* 4Ch */
+@@ -658,7 +673,7 @@
+ } CONFIG_PAGE_MANUFACTURING_4, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_4,
+   ManufacturingPage4_t, MPI_POINTER pManufacturingPage4_t;
+ 
+-#define MPI_MANUFACTURING4_PAGEVERSION                  (0x04)
++#define MPI_MANUFACTURING4_PAGEVERSION                  (0x05)
+ 
+ /* defines for the Flags field */
+ #define MPI_MANPAGE4_FORCE_BAD_BLOCK_TABLE              (0x80)
+@@ -670,6 +685,12 @@
+ #define MPI_MANPAGE4_IM_RESYNC_CACHE_ENABLE             (0x02)
+ #define MPI_MANPAGE4_IR_NO_MIX_SAS_SATA                 (0x01)
+ 
++/* defines for the ExtFlags field */
++#define MPI_MANPAGE4_EXTFLAGS_HIDE_NON_IR_METADATA      (0x0008)
++#define MPI_MANPAGE4_EXTFLAGS_SAS_CACHE_DISABLE         (0x0004)
++#define MPI_MANPAGE4_EXTFLAGS_SATA_CACHE_DISABLE        (0x0002)
++#define MPI_MANPAGE4_EXTFLAGS_LEGACY_MODE               (0x0001)
++
+ 
+ #ifndef MPI_MANPAGE5_NUM_FORCEWWID
+ #define MPI_MANPAGE5_NUM_FORCEWWID      (1)
+@@ -781,7 +802,7 @@
+ } CONFIG_PAGE_MANUFACTURING_9, MPI_POINTER PTR_CONFIG_PAGE_MANUFACTURING_9,
+   ManufacturingPage9_t, MPI_POINTER pManufacturingPage9_t;
+ 
+-#define MPI_MANUFACTURING6_PAGEVERSION                  (0x00)
++#define MPI_MANUFACTURING9_PAGEVERSION                  (0x00)
+ 
+ 
+ typedef struct _CONFIG_PAGE_MANUFACTURING_10
+@@ -1138,6 +1159,8 @@
+ 
+ /* IOC Page 6 Capabilities Flags */
+ 
++#define MPI_IOCPAGE6_CAP_FLAGS_DISABLE_SMART_POLLING    (0x00000008)
++
+ #define MPI_IOCPAGE6_CAP_FLAGS_MASK_METADATA_SIZE       (0x00000006)
+ #define MPI_IOCPAGE6_CAP_FLAGS_64MB_METADATA_SIZE       (0x00000000)
+ #define MPI_IOCPAGE6_CAP_FLAGS_512MB_METADATA_SIZE      (0x00000002)
+@@ -1208,6 +1231,7 @@
+ #define MPI_BIOSPAGE1_IOCSET_ALTERNATE_CHS              (0x00000008)
+ 
+ /* values for the DeviceSettings field */
++#define MPI_BIOSPAGE1_DEVSET_DISABLE_SMART_POLLING      (0x00000010)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_SEQ_LUN            (0x00000008)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_RM_LUN             (0x00000004)
+ #define MPI_BIOSPAGE1_DEVSET_DISABLE_NON_RM_LUN         (0x00000002)
+@@ -2281,11 +2305,11 @@
+ typedef struct _CONFIG_PAGE_RAID_VOL_1
+ {
+     CONFIG_PAGE_HEADER      Header;         /* 00h */
+-    U8                      VolumeID;       /* 01h */
+-    U8                      VolumeBus;      /* 02h */
+-    U8                      VolumeIOC;      /* 03h */
+-    U8                      Reserved0;      /* 04h */
+-    U8                      GUID[24];       /* 05h */
++    U8                      VolumeID;       /* 04h */
++    U8                      VolumeBus;      /* 05h */
++    U8                      VolumeIOC;      /* 06h */
++    U8                      Reserved0;      /* 07h */
++    U8                      GUID[24];       /* 08h */
+     U8                      Name[32];       /* 20h */
+     U64                     WWID;           /* 40h */
+     U32                     Reserved1;      /* 48h */
+@@ -2340,7 +2364,7 @@
+ } RAID_PHYS_DISK0_STATUS, MPI_POINTER PTR_RAID_PHYS_DISK0_STATUS,
+   RaidPhysDiskStatus_t, MPI_POINTER pRaidPhysDiskStatus_t;
+ 
+-/* RAID Volume 2 IM Physical Disk DiskStatus flags */
++/* RAID Physical Disk PhysDiskStatus flags */
+ 
+ #define MPI_PHYSDISK0_STATUS_FLAG_OUT_OF_SYNC           (0x01)
+ #define MPI_PHYSDISK0_STATUS_FLAG_QUIESCED              (0x02)
+@@ -2544,6 +2568,7 @@
+ #define MPI_SAS_IOUNIT0_DS_TABLE_LINK                       (0x00000400)
+ #define MPI_SAS_IOUNIT0_DS_UNSUPPORTED_DEVICE               (0x00000800)
+ #define MPI_SAS_IOUNIT0_DS_MAX_SATA_TARGETS                 (0x00001000)
++#define MPI_SAS_IOUNIT0_DS_MULTI_PORT_DOMAIN                (0x00002000)
+ 
+ 
+ typedef struct _MPI_SAS_IO_UNIT1_PHY_DATA
+@@ -2607,6 +2632,7 @@
+ #define MPI_SAS_IOUNIT1_CONTROL_CLEAR_AFFILIATION           (0x0001)
+ 
+ /* values for SAS IO Unit Page 1 AdditionalControlFlags */
++#define MPI_SAS_IOUNIT1_ACONTROL_MULTI_PORT_DOMAIN_ILLEGAL          (0x0080)
+ #define MPI_SAS_IOUNIT1_ACONTROL_SATA_ASYNCHROUNOUS_NOTIFICATION    (0x0040)
+ #define MPI_SAS_IOUNIT1_ACONTROL_HIDE_NONZERO_ATTACHED_PHY_IDENT    (0x0020)
+ #define MPI_SAS_IOUNIT1_ACONTROL_PORT_ENABLE_ONLY_SATA_LINK_RESET   (0x0010)
+@@ -2734,6 +2760,7 @@
+ #define MPI_SAS_EXPANDER0_DS_UNSUPPORTED_DEVICE         (0x00000800)
+ 
+ /* values for SAS Expander Page 0 Flags field */
++#define MPI_SAS_EXPANDER0_FLAGS_CONNECTOR_END_DEVICE    (0x04)
+ #define MPI_SAS_EXPANDER0_FLAGS_ROUTE_TABLE_CONFIG      (0x02)
+ #define MPI_SAS_EXPANDER0_FLAGS_CONFIG_IN_PROGRESS      (0x01)
+ 
+@@ -2774,7 +2801,7 @@
+ /* see mpi_sas.h for values for SAS Expander Page 1 AttachedDeviceInfo values */
+ 
+ /* values for SAS Expander Page 1 DiscoveryInfo field */
+-#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY DISABLED     (0x04)
++#define MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED     (0x04)
+ #define MPI_SAS_EXPANDER1_DISCINFO_LINK_STATUS_CHANGE   (0x02)
+ #define MPI_SAS_EXPANDER1_DISCINFO_NO_ROUTING_ENTRIES   (0x01)
+ 
+@@ -2895,11 +2922,11 @@
+     U8                                  AttachedPhyIdentifier;  /* 16h */
+     U8                                  Reserved2;              /* 17h */
+     U32                                 AttachedDeviceInfo;     /* 18h */
+-    U8                                  ProgrammedLinkRate;     /* 20h */
+-    U8                                  HwLinkRate;             /* 21h */
+-    U8                                  ChangeCount;            /* 22h */
+-    U8                                  Flags;                  /* 23h */
+-    U32                                 PhyInfo;                /* 24h */
++    U8                                  ProgrammedLinkRate;     /* 1Ch */
++    U8                                  HwLinkRate;             /* 1Dh */
++    U8                                  ChangeCount;            /* 1Eh */
++    U8                                  Flags;                  /* 1Fh */
++    U32                                 PhyInfo;                /* 20h */
+ } CONFIG_PAGE_SAS_PHY_0, MPI_POINTER PTR_CONFIG_PAGE_SAS_PHY_0,
+   SasPhyPage0_t, MPI_POINTER pSasPhyPage0_t;
+ 
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_history.txt
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_history.txt	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_history.txt	2007-12-19 15:29:23.000000000 -0500
+@@ -3,28 +3,28 @@
+  MPI Header File Change History
+  ==============================
+ 
+- Copyright (c) 2000-2006 LSI Logic Corporation.
++ Copyright (c) 2000-2007 LSI Logic Corporation.
+ 
+  ---------------------------------------
+- Header Set Release Version:    01.05.14
+- Header Set Release Date:       10-11-06
++ Header Set Release Version:    01.05.16
++ Header Set Release Date:       05-24-07
+  ---------------------------------------
+ 
+  Filename               Current version     Prior version
+  ----------             ---------------     -------------
+- mpi.h                  01.05.12            01.05.11
+- mpi_ioc.h              01.05.12            01.05.11
+- mpi_cnfg.h             01.05.13            01.05.12
+- mpi_init.h             01.05.08            01.05.07
++ mpi.h                  01.05.13            01.05.12
++ mpi_ioc.h              01.05.14            01.05.13
++ mpi_cnfg.h             01.05.15            01.05.14
++ mpi_init.h             01.05.09            01.05.09
+  mpi_targ.h             01.05.06            01.05.06
+  mpi_fc.h               01.05.01            01.05.01
+  mpi_lan.h              01.05.01            01.05.01
+- mpi_raid.h             01.05.02            01.05.02
++ mpi_raid.h             01.05.03            01.05.03
+  mpi_tool.h             01.05.03            01.05.03
+  mpi_inb.h              01.05.01            01.05.01
+- mpi_sas.h              01.05.04            01.05.03
++ mpi_sas.h              01.05.04            01.05.04
+  mpi_type.h             01.05.02            01.05.02
+- mpi_history.txt        01.05.14            01.05.13
++ mpi_history.txt        01.05.14            01.05.14
+ 
+ 
+  *  Date      Version   Description
+@@ -95,6 +95,7 @@
+  *  08-30-05  01.05.10  Added 2 new IOCStatus codes for Target.
+  *  03-27-06  01.05.11  Bumped MPI_HEADER_VERSION_UNIT.
+  *  10-11-06  01.05.12  Bumped MPI_HEADER_VERSION_UNIT.
++ *  05-24-07  01.05.13  Bumped MPI_HEADER_VERSION_UNIT.
+  *  --------------------------------------------------------------------------
+ 
+ mpi_ioc.h
+@@ -191,6 +192,13 @@
+  *                      data structure.
+  *                      Added new ImageType values for FWDownload and FWUpload
+  *                      requests.
++ *  02-28-07  01.05.13  Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS
++ *                      Broadcast Event Data (replacing _RESERVED2).
++ *                      For Discovery Error Event Data DiscoveryStatus field,
++ *                      replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and
++ *                      added _MULTI_PORT_DOMAIN.
++ *  05-24-07  01.05.14  Added Common Boot Block type to FWDownload Request.
++ *                      Added Common Boot Block type to FWUpload Request.
+  *  --------------------------------------------------------------------------
+ 
+ mpi_cnfg.h
+@@ -473,6 +481,21 @@
+  *                      Added more AccessStatus values for SAS Device Page 0.
+  *                      Added bit for SATA Asynchronous Notification Support in
+  *                      Flags field of SAS Device Page 0.
++ *  02-28-07  01.05.14  Added ExtFlags field to Manufacturing Page 4.
++ *                      Added Disable SMART Polling for CapabilitiesFlags of
++ *                      IOC Page 6.
++ *                      Added Disable SMART Polling to DeviceSettings of BIOS
++ *                      Page 1.
++ *                      Added Multi-Port Domain bit for DiscoveryStatus field
++ *                      of SAS IO Unit Page.
++ *                      Added Multi-Port Domain Illegal flag for SAS IO Unit
++ *                      Page 1 AdditionalControlFlags field.
++ *  05-24-07  01.05.15  Added Hide Physical Disks with Non-Integrated RAID
++ *                      Metadata bit to Manufacturing Page 4 ExtFlags field.
++ *                      Added Internal Connector to End Device Present bit to
++ *                      Expander Page 0 Flags field.
++ *                      Fixed define for
++ *                      MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED.
+  *  --------------------------------------------------------------------------
+ 
+ mpi_init.h
+@@ -517,6 +540,8 @@
+  *                      unique in the first 32 characters.
+  *  03-27-06  01.05.07  Added Task Management type of Clear ACA.
+  *  10-11-06  01.05.08  Shortened define for Task Management type of Clear ACA.
++ *  02-28-07  01.05.09  Defined two new MsgFlags bits for SCSI Task Management
++ *                      Request: Do Not Send Task IU and Soft Reset Option.
+  *  --------------------------------------------------------------------------
+ 
+ mpi_targ.h
+@@ -571,7 +596,7 @@
+  *  11-02-00  01.01.01  Original release for post 1.0 work
+  *  12-04-00  01.01.02  Added messages for Common Transport Send and
+  *                      Primitive Send.
+- *  01-09-01  01.01.03  Modified some of the new flags to have an MPI prefix
++ *  01-09-01  01.01.03  Modifed some of the new flags to have an MPI prefix
+  *                      and modified the FcPrimitiveSend flags.
+  *  01-25-01  01.01.04  Move InitiatorIndex in LinkServiceRsp reply to a larger
+  *                      field.
+@@ -634,6 +659,8 @@
+  *  08-19-04  01.05.01  Original release for MPI v1.5.
+  *  01-15-05  01.05.02  Added defines for the two new RAID Actions for
+  *                      _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE.
++ *  02-28-07  01.05.03  Added new RAID Action, Device FW Update Mode, and
++ *                      associated defines.
+  *  --------------------------------------------------------------------------
+ 
+ mpi_tool.h
+@@ -682,7 +709,22 @@
+ 
+ mpi_history.txt         Parts list history
+ 
+-Filename    01.05.13   01.05.13   01.05.12   01.05.11   01.05.10   01.05.09
++Filename    01.05.15   01.05.15
++----------  --------   --------
++mpi.h       01.05.12   01.05.13
++mpi_ioc.h   01.05.13   01.05.14
++mpi_cnfg.h  01.05.14   01.05.15
++mpi_init.h  01.05.09   01.05.09
++mpi_targ.h  01.05.06   01.05.06
++mpi_fc.h    01.05.01   01.05.01
++mpi_lan.h   01.05.01   01.05.01
++mpi_raid.h  01.05.03   01.05.03
++mpi_tool.h  01.05.03   01.05.03
++mpi_inb.h   01.05.01   01.05.01
++mpi_sas.h   01.05.04   01.05.04
++mpi_type.h  01.05.02   01.05.02
++
++Filename    01.05.14   01.05.13   01.05.12   01.05.11   01.05.10   01.05.09
+ ----------  --------   --------   --------   --------   --------   --------
+ mpi.h       01.05.12   01.05.11   01.05.10   01.05.09   01.05.08   01.05.07
+ mpi_ioc.h   01.05.12   01.05.11   01.05.10   01.05.09   01.05.09   01.05.08
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_inb.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_inb.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_inb.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,221 +0,0 @@
+-/*
+- *  Copyright (c) 2003-2004 LSI Logic Corporation.
+- *
+- *
+- *           Name:  mpi_inb.h
+- *          Title:  MPI Inband structures and definitions
+- *  Creation Date:  September 30, 2003
+- *
+- *    mpi_inb.h Version:  01.05.01
+- *
+- *  Version History
+- *  ---------------
+- *
+- *  Date      Version   Description
+- *  --------  --------  ------------------------------------------------------
+- *  05-11-04  01.03.01  Original release.
+- *  08-19-04  01.05.01  Original release for MPI v1.5.
+- *  --------------------------------------------------------------------------
+- */
+-
+-#ifndef MPI_INB_H
+-#define MPI_INB_H
+-
+-/******************************************************************************
+-*
+-*        I n b a n d    M e s s a g e s
+-*
+-*******************************************************************************/
+-
+-
+-/****************************************************************************/
+-/* Inband Buffer Post Request                                               */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_BUFFER_POST_REQUEST
+-{
+-    U8                      Reserved1;          /* 00h */
+-    U8                      BufferCount;        /* 01h */
+-    U8                      ChainOffset;        /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U32                     Reserved4;          /* 0Ch */
+-    SGE_TRANS_SIMPLE_UNION  SGL;                /* 10h */
+-} MSG_INBAND_BUFFER_POST_REQUEST, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REQUEST,
+-  MpiInbandBufferPostRequest_t , MPI_POINTER pMpiInbandBufferPostRequest_t;
+-
+-
+-typedef struct _WWN_FC_FORMAT
+-{
+-    U64                     NodeName;           /* 00h */
+-    U64                     PortName;           /* 08h */
+-} WWN_FC_FORMAT, MPI_POINTER PTR_WWN_FC_FORMAT,
+-  WwnFcFormat_t, MPI_POINTER pWwnFcFormat_t;
+-
+-typedef struct _WWN_SAS_FORMAT
+-{
+-    U64                     WorldWideID;        /* 00h */
+-    U32                     Reserved1;          /* 08h */
+-    U32                     Reserved2;          /* 0Ch */
+-} WWN_SAS_FORMAT, MPI_POINTER PTR_WWN_SAS_FORMAT,
+-  WwnSasFormat_t, MPI_POINTER pWwnSasFormat_t;
+-
+-typedef union _WWN_INBAND_FORMAT
+-{
+-    WWN_FC_FORMAT           Fc;
+-    WWN_SAS_FORMAT          Sas;
+-} WWN_INBAND_FORMAT, MPI_POINTER PTR_WWN_INBAND_FORMAT,
+-  WwnInbandFormat, MPI_POINTER pWwnInbandFormat;
+-
+-
+-/* Inband Buffer Post reply message */
+-
+-typedef struct _MSG_INBAND_BUFFER_POST_REPLY
+-{
+-    U16                     Reserved1;          /* 00h */
+-    U8                      MsgLength;          /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U16                     Reserved4;          /* 0Ch */
+-    U16                     IOCStatus;          /* 0Eh */
+-    U32                     IOCLogInfo;         /* 10h */
+-    U32                     TransferLength;     /* 14h */
+-    U32                     TransactionContext; /* 18h */
+-    WWN_INBAND_FORMAT       Wwn;                /* 1Ch */
+-    U32                     IOCIdentifier[4];   /* 2Ch */
+-} MSG_INBAND_BUFFER_POST_REPLY, MPI_POINTER PTR_MSG_INBAND_BUFFER_POST_REPLY,
+-  MpiInbandBufferPostReply_t, MPI_POINTER pMpiInbandBufferPostReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Send Request                                                      */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_SEND_REQUEST
+-{
+-    U16                     Reserved1;          /* 00h */
+-    U8                      ChainOffset;        /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U32                     Reserved4;          /* 0Ch */
+-    WWN_INBAND_FORMAT       Wwn;                /* 10h */
+-    U32                     Reserved5;          /* 20h */
+-    SGE_IO_UNION            SGL;                /* 24h */
+-} MSG_INBAND_SEND_REQUEST, MPI_POINTER PTR_MSG_INBAND_SEND_REQUEST,
+-  MpiInbandSendRequest_t , MPI_POINTER pMpiInbandSendRequest_t;
+-
+-
+-/* Inband Send reply message */
+-
+-typedef struct _MSG_INBAND_SEND_REPLY
+-{
+-    U16                     Reserved1;          /* 00h */
+-    U8                      MsgLength;          /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U16                     Reserved4;          /* 0Ch */
+-    U16                     IOCStatus;          /* 0Eh */
+-    U32                     IOCLogInfo;         /* 10h */
+-    U32                     ResponseLength;     /* 14h */
+-} MSG_INBAND_SEND_REPLY, MPI_POINTER PTR_MSG_INBAND_SEND_REPLY,
+-  MpiInbandSendReply_t, MPI_POINTER pMpiInbandSendReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Response Request                                                  */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_RSP_REQUEST
+-{
+-    U16                     Reserved1;          /* 00h */
+-    U8                      ChainOffset;        /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U32                     Reserved4;          /* 0Ch */
+-    WWN_INBAND_FORMAT       Wwn;                /* 10h */
+-    U32                     IOCIdentifier[4];   /* 20h */
+-    U32                     ResponseLength;     /* 30h */
+-    SGE_IO_UNION            SGL;                /* 34h */
+-} MSG_INBAND_RSP_REQUEST, MPI_POINTER PTR_MSG_INBAND_RSP_REQUEST,
+-  MpiInbandRspRequest_t , MPI_POINTER pMpiInbandRspRequest_t;
+-
+-
+-/* Inband Response reply message */
+-
+-typedef struct _MSG_INBAND_RSP_REPLY
+-{
+-    U16                     Reserved1;          /* 00h */
+-    U8                      MsgLength;          /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U16                     Reserved4;          /* 0Ch */
+-    U16                     IOCStatus;          /* 0Eh */
+-    U32                     IOCLogInfo;         /* 10h */
+-} MSG_INBAND_RSP_REPLY, MPI_POINTER PTR_MSG_INBAND_RSP_REPLY,
+-  MpiInbandRspReply_t, MPI_POINTER pMpiInbandRspReply_t;
+-
+-
+-/****************************************************************************/
+-/* Inband Abort Request                                                     */
+-/****************************************************************************/
+-
+-typedef struct _MSG_INBAND_ABORT_REQUEST
+-{
+-    U8                      Reserved1;          /* 00h */
+-    U8                      AbortType;          /* 01h */
+-    U8                      ChainOffset;        /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U32                     Reserved4;          /* 0Ch */
+-    U32                     ContextToAbort;     /* 10h */
+-} MSG_INBAND_ABORT_REQUEST, MPI_POINTER PTR_MSG_INBAND_ABORT_REQUEST,
+-  MpiInbandAbortRequest_t , MPI_POINTER pMpiInbandAbortRequest_t;
+-
+-#define MPI_INBAND_ABORT_TYPE_ALL_BUFFERS       (0x00)
+-#define MPI_INBAND_ABORT_TYPE_EXACT_BUFFER      (0x01)
+-#define MPI_INBAND_ABORT_TYPE_SEND_REQUEST      (0x02)
+-#define MPI_INBAND_ABORT_TYPE_RESPONSE_REQUEST  (0x03)
+-
+-
+-/* Inband Abort reply message */
+-
+-typedef struct _MSG_INBAND_ABORT_REPLY
+-{
+-    U8                      Reserved1;          /* 00h */
+-    U8                      AbortType;          /* 01h */
+-    U8                      MsgLength;          /* 02h */
+-    U8                      Function;           /* 03h */
+-    U16                     Reserved2;          /* 04h */
+-    U8                      Reserved3;          /* 06h */
+-    U8                      MsgFlags;           /* 07h */
+-    U32                     MsgContext;         /* 08h */
+-    U16                     Reserved4;          /* 0Ch */
+-    U16                     IOCStatus;          /* 0Eh */
+-    U32                     IOCLogInfo;         /* 10h */
+-} MSG_INBAND_ABORT_REPLY, MPI_POINTER PTR_MSG_INBAND_ABORT_REPLY,
+-  MpiInbandAbortReply_t, MPI_POINTER pMpiInbandAbortReply_t;
+-
+-
+-#endif
+-
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_init.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_init.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_init.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- *  Copyright (c) 2000-2006 LSI Logic Corporation.
++ *  Copyright (c) 2000-2007 LSI Logic Corporation.
+  *
+  *
+  *           Name:  mpi_init.h
+  *          Title:  MPI initiator mode messages and structures
+  *  Creation Date:  June 8, 2000
+  *
+- *    mpi_init.h Version:  01.05.08
++ *    mpi_init.h Version:  01.05.09
+  *
+  *  Version History
+  *  ---------------
+@@ -54,6 +54,8 @@
+  *                      unique in the first 32 characters.
+  *  03-27-06  01.05.07  Added Task Management type of Clear ACA.
+  *  10-11-06  01.05.08  Shortened define for Task Management type of Clear ACA.
++ *  02-28-07  01.05.09  Defined two new MsgFlags bits for SCSI Task Management
++ *                      Request: Do Not Send Task IU and Soft Reset Option.
+  *  --------------------------------------------------------------------------
+  */
+ 
+@@ -432,10 +434,14 @@
+ #define MPI_SCSITASKMGMT_TASKTYPE_CLR_ACA               (0x08)
+ 
+ /* MsgFlags bits */
++#define MPI_SCSITASKMGMT_MSGFLAGS_DO_NOT_SEND_TASK_IU   (0x01)
++
+ #define MPI_SCSITASKMGMT_MSGFLAGS_TARGET_RESET_OPTION   (0x00)
+ #define MPI_SCSITASKMGMT_MSGFLAGS_LIP_RESET_OPTION      (0x02)
+ #define MPI_SCSITASKMGMT_MSGFLAGS_LIPRESET_RESET_OPTION (0x04)
+ 
++#define MPI_SCSITASKMGMT_MSGFLAGS_SOFT_RESET_OPTION     (0x08)
++
+ /* SCSI Task Management Reply */
+ typedef struct _MSG_SCSI_TASK_MGMT_REPLY
+ {
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_ioc.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_ioc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_ioc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- *  Copyright (c) 2000-2006 LSI Logic Corporation.
++ *  Copyright (c) 2000-2007 LSI Logic Corporation.
+  *
+  *
+  *           Name:  mpi_ioc.h
+  *          Title:  MPI IOC, Port, Event, FW Download, and FW Upload messages
+  *  Creation Date:  August 11, 2000
+  *
+- *    mpi_ioc.h Version:  01.05.12
++ *    mpi_ioc.h Version:  01.05.14
+  *
+  *  Version History
+  *  ---------------
+@@ -106,6 +106,13 @@
+  *                      data structure.
+  *                      Added new ImageType values for FWDownload and FWUpload
+  *                      requests.
++ *  02-28-07  01.05.13  Added MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT for SAS
++ *                      Broadcast Event Data (replacing _RESERVED2).
++ *                      For Discovery Error Event Data DiscoveryStatus field,
++ *                      replaced _MULTPL_PATHS with _UNSUPPORTED_DEVICE and
++ *                      added _MULTI_PORT_DOMAIN.
++ *  05-24-07  01.05.14  Added Common Boot Block type to FWDownload Request.
++ *                      Added Common Boot Block type to FWUpload Request.
+  *  --------------------------------------------------------------------------
+  */
+ 
+@@ -792,7 +799,7 @@
+ 
+ #define MPI_EVENT_PRIMITIVE_CHANGE              (0x01)
+ #define MPI_EVENT_PRIMITIVE_EXPANDER            (0x03)
+-#define MPI_EVENT_PRIMITIVE_RESERVED2           (0x04)
++#define MPI_EVENT_PRIMITIVE_ASYNCHRONOUS_EVENT  (0x04)
+ #define MPI_EVENT_PRIMITIVE_RESERVED3           (0x05)
+ #define MPI_EVENT_PRIMITIVE_RESERVED4           (0x06)
+ #define MPI_EVENT_PRIMITIVE_CHANGE0_RESERVED    (0x07)
+@@ -857,8 +864,9 @@
+ #define MPI_EVENT_DSCVRY_ERR_DS_SMP_CRC_ERROR               (0x00000100)
+ #define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_SUBTRACTIVE          (0x00000200)
+ #define MPI_EVENT_DSCVRY_ERR_DS_TABLE_TO_TABLE              (0x00000400)
+-#define MPI_EVENT_DSCVRY_ERR_DS_MULTPL_PATHS                (0x00000800)
++#define MPI_EVENT_DSCVRY_ERR_DS_UNSUPPORTED_DEVICE          (0x00000800)
+ #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS            (0x00001000)
++#define MPI_EVENT_DSCVRY_ERR_DS_MULTI_PORT_DOMAIN           (0x00002000)
+ 
+ /* SAS SMP Error Event data */
+ 
+@@ -990,6 +998,7 @@
+ #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_1          (0x07)
+ #define MPI_FW_DOWNLOAD_ITYPE_CONFIG_2          (0x08)
+ #define MPI_FW_DOWNLOAD_ITYPE_MEGARAID          (0x09)
++#define MPI_FW_DOWNLOAD_ITYPE_COMMON_BOOT_BLOCK (0x0B)
+ 
+ 
+ typedef struct _FWDownloadTCSGE
+@@ -1049,6 +1058,7 @@
+ #define MPI_FW_UPLOAD_ITYPE_CONFIG_2        (0x08)
+ #define MPI_FW_UPLOAD_ITYPE_MEGARAID        (0x09)
+ #define MPI_FW_UPLOAD_ITYPE_COMPLETE        (0x0A)
++#define MPI_FW_UPLOAD_ITYPE_COMMON_BOOT_BLOCK   (0x0B)
+ 
+ typedef struct _FWUploadTCSGE
+ {
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_raid.h
+--- linux-2.6.22-570/drivers/message/fusion/lsi/mpi_raid.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/lsi/mpi_raid.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,12 +1,12 @@
+ /*
+- *  Copyright (c) 2001-2005 LSI Logic Corporation.
++ *  Copyright (c) 2001-2007 LSI Logic Corporation.
+  *
+  *
+  *           Name:  mpi_raid.h
+  *          Title:  MPI RAID message and structures
+  *  Creation Date:  February 27, 2001
+  *
+- *    mpi_raid.h Version:  01.05.02
++ *    mpi_raid.h Version:  01.05.03
+  *
+  *  Version History
+  *  ---------------
+@@ -32,6 +32,8 @@
+  *  08-19-04  01.05.01  Original release for MPI v1.5.
+  *  01-15-05  01.05.02  Added defines for the two new RAID Actions for
+  *                      _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE.
++ *  02-28-07  01.05.03  Added new RAID Action, Device FW Update Mode, and
++ *                      associated defines.
+  *  --------------------------------------------------------------------------
+  */
+ 
+@@ -90,6 +92,7 @@
+ #define MPI_RAID_ACTION_INACTIVATE_VOLUME           (0x12)
+ #define MPI_RAID_ACTION_SET_RESYNC_RATE             (0x13)
+ #define MPI_RAID_ACTION_SET_DATA_SCRUB_RATE         (0x14)
++#define MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE       (0x15)
+ 
+ /* ActionDataWord defines for use with MPI_RAID_ACTION_CREATE_VOLUME action */
+ #define MPI_RAID_ACTION_ADATA_DO_NOT_SYNC           (0x00000001)
+@@ -111,6 +114,10 @@
+ /* ActionDataWord defines for use with MPI_RAID_ACTION_SET_DATA_SCRUB_RATE action */
+ #define MPI_RAID_ACTION_ADATA_DATA_SCRUB_RATE_MASK  (0x000000FF)
+ 
++/* ActionDataWord defines for use with MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE action */
++#define MPI_RAID_ACTION_ADATA_ENABLE_FW_UPDATE          (0x00000001)
++#define MPI_RAID_ACTION_ADATA_MASK_FW_UPDATE_TIMEOUT    (0x0000FF00)
++#define MPI_RAID_ACTION_ADATA_SHIFT_FW_UPDATE_TIMEOUT   (8)
+ 
+ 
+ /* RAID Action reply message */
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.c linux-2.6.22-try2/drivers/message/fusion/mptbase.c
+--- linux-2.6.22-570/drivers/message/fusion/mptbase.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptbase.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,7 +6,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -64,6 +64,7 @@
+ #endif
+ 
+ #include "mptbase.h"
++#include "lsi/mpi_log_fc.h"
+ 
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+ #define my_NAME		"Fusion MPT base driver"
+@@ -6349,14 +6350,37 @@
+ static void
+ mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info)
+ {
+-	static char *subcl_str[8] = {
+-		"FCP Initiator", "FCP Target", "LAN", "MPI Message Layer",
+-		"FC Link", "Context Manager", "Invalid Field Offset", "State Change Info"
+-	};
+-	u8 subcl = (log_info >> 24) & 0x7;
++	char *desc = "unknown";
++
++	switch (log_info & 0xFF000000) {
++	case MPI_IOCLOGINFO_FC_INIT_BASE:
++		desc = "FCP Initiator";
++		break;
++	case MPI_IOCLOGINFO_FC_TARGET_BASE:
++		desc = "FCP Target";
++		break;
++	case MPI_IOCLOGINFO_FC_LAN_BASE:
++		desc = "LAN";
++		break;
++	case MPI_IOCLOGINFO_FC_MSG_BASE:
++		desc = "MPI Message Layer";
++		break;
++	case MPI_IOCLOGINFO_FC_LINK_BASE:
++		desc = "FC Link";
++		break;
++	case MPI_IOCLOGINFO_FC_CTX_BASE:
++		desc = "Context Manager";
++		break;
++	case MPI_IOCLOGINFO_FC_INVALID_FIELD_BYTE_OFFSET:
++		desc = "Invalid Field Offset";
++		break;
++	case MPI_IOCLOGINFO_FC_STATE_CHANGE:
++		desc = "State Change Info";
++		break;
++	}
+ 
+-	printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubCl={%s}\n",
+-			ioc->name, log_info, subcl_str[subcl]);
++	printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): SubClass={%s}, Value=(0x%06x)\n",
++			ioc->name, log_info, desc, (log_info & 0xFFFFFF));
+ }
+ 
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptbase.h linux-2.6.22-try2/drivers/message/fusion/mptbase.h
+--- linux-2.6.22-570/drivers/message/fusion/mptbase.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptbase.h	2007-12-19 15:29:23.000000000 -0500
+@@ -6,7 +6,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -75,8 +75,8 @@
+ #define COPYRIGHT	"Copyright (c) 1999-2007 " MODULEAUTHOR
+ #endif
+ 
+-#define MPT_LINUX_VERSION_COMMON	"3.04.04"
+-#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.04"
++#define MPT_LINUX_VERSION_COMMON	"3.04.05"
++#define MPT_LINUX_PACKAGE_NAME		"@(#)mptlinux-3.04.05"
+ #define WHAT_MAGIC_STRING		"@" "(" "#" ")"
+ 
+ #define show_mptmod_ver(s,ver)  \
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.c linux-2.6.22-try2/drivers/message/fusion/mptctl.c
+--- linux-2.6.22-570/drivers/message/fusion/mptctl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptctl.c	2007-12-19 15:29:23.000000000 -0500
+@@ -5,7 +5,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptctl.h linux-2.6.22-try2/drivers/message/fusion/mptctl.h
+--- linux-2.6.22-570/drivers/message/fusion/mptctl.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptctl.h	2007-12-19 15:29:23.000000000 -0500
+@@ -6,7 +6,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptfc.c linux-2.6.22-try2/drivers/message/fusion/mptfc.c
+--- linux-2.6.22-570/drivers/message/fusion/mptfc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptfc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -4,7 +4,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -43,7 +43,6 @@
+     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+-#include "linux_compat.h"	/* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.c linux-2.6.22-try2/drivers/message/fusion/mptlan.c
+--- linux-2.6.22-570/drivers/message/fusion/mptlan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptlan.c	2007-12-19 15:29:23.000000000 -0500
+@@ -5,7 +5,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 2000-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptlan.h linux-2.6.22-try2/drivers/message/fusion/mptlan.h
+--- linux-2.6.22-570/drivers/message/fusion/mptlan.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptlan.h	2007-12-19 15:29:23.000000000 -0500
+@@ -5,7 +5,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 2000-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptsas.c linux-2.6.22-try2/drivers/message/fusion/mptsas.c
+--- linux-2.6.22-570/drivers/message/fusion/mptsas.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptsas.c	2007-12-19 15:29:23.000000000 -0500
+@@ -4,7 +4,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *  Copyright (c) 2005-2007 Dell
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.c linux-2.6.22-try2/drivers/message/fusion/mptscsih.c
+--- linux-2.6.22-570/drivers/message/fusion/mptscsih.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptscsih.c	2007-12-19 15:29:23.000000000 -0500
+@@ -4,7 +4,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -44,7 +44,6 @@
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+ 
+-#include "linux_compat.h"	/* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+@@ -260,30 +259,13 @@
+ 	/* Map the data portion, if any.
+ 	 * sges_left  = 0 if no data transfer.
+ 	 */
+-	if ( (sges_left = SCpnt->use_sg) ) {
+-		sges_left = pci_map_sg(ioc->pcidev,
+-			       (struct scatterlist *) SCpnt->request_buffer,
+- 			       SCpnt->use_sg,
+-			       SCpnt->sc_data_direction);
+-		if (sges_left == 0)
++	sges_left = scsi_dma_map(SCpnt);
++	if (sges_left < 0)
+ 			return FAILED;
+-	} else if (SCpnt->request_bufflen) {
+-		SCpnt->SCp.dma_handle = pci_map_single(ioc->pcidev,
+-				      SCpnt->request_buffer,
+-				      SCpnt->request_bufflen,
+-				      SCpnt->sc_data_direction);
+-		dsgprintk((MYIOC_s_INFO_FMT "SG: non-SG for %p, len=%d\n",
+-				ioc->name, SCpnt, SCpnt->request_bufflen));
+-		mptscsih_add_sge((char *) &pReq->SGL,
+-			0xD1000000|MPT_SGE_FLAGS_ADDRESSING|sgdir|SCpnt->request_bufflen,
+-			SCpnt->SCp.dma_handle);
+-
+-		return SUCCESS;
+-	}
+ 
+ 	/* Handle the SG case.
+ 	 */
+-	sg = (struct scatterlist *) SCpnt->request_buffer;
++	sg = scsi_sglist(SCpnt);
+ 	sg_done  = 0;
+ 	sgeOffset = sizeof(SCSIIORequest_t) - sizeof(SGE_IO_UNION);
+ 	chainSge = NULL;
+@@ -465,7 +447,12 @@
+ 	MPT_FRAME_HDR *mf;
+ 	SEPRequest_t 	 *SEPMsg;
+ 
+-	if (ioc->bus_type == FC)
++	if (ioc->bus_type != SAS)
++		return;
++
++	/* Not supported for hidden raid components
++	 */
++	if (vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
+ 		return;
+ 
+ 	if ((mf = mpt_get_msg_frame(ioc->InternalCtx, ioc)) == NULL) {
+@@ -662,7 +649,7 @@
+ 		scsi_state = pScsiReply->SCSIState;
+ 		scsi_status = pScsiReply->SCSIStatus;
+ 		xfer_cnt = le32_to_cpu(pScsiReply->TransferCount);
+-		sc->resid = sc->request_bufflen - xfer_cnt;
++		scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt);
+ 		log_info = le32_to_cpu(pScsiReply->IOCLogInfo);
+ 
+ 		/*
+@@ -767,7 +754,7 @@
+ 			break;
+ 
+ 		case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH:	/* 0x0049 */
+-			sc->resid = sc->request_bufflen - xfer_cnt;
++			scsi_set_resid(sc, scsi_bufflen(sc) - xfer_cnt);
+ 			if((xfer_cnt==0)||(sc->underflow > xfer_cnt))
+ 				sc->result=DID_SOFT_ERROR << 16;
+ 			else /* Sufficient data transfer occurred */
+@@ -816,7 +803,7 @@
+ 			break;
+ 
+ 		case MPI_IOCSTATUS_SCSI_DATA_OVERRUN:		/* 0x0044 */
+-			sc->resid=0;
++			scsi_set_resid(sc, 0);
+ 		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
+ 		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
+ 			sc->result = (DID_OK << 16) | scsi_status;
+@@ -900,22 +887,17 @@
+ 
+ 			dreplyprintk(("%s: [%d:%d:%d:%d] resid=%d "
+ 			    "bufflen=%d xfer_cnt=%d\n", __FUNCTION__,
+-			    sc->device->host->host_no, sc->device->channel, sc->device->id,
+-			    sc->device->lun, sc->resid, sc->request_bufflen,
+-			    xfer_cnt));
++				      sc->device->host->host_no,
++				      sc->device->channel, sc->device->id,
++				      sc->device->lun, scsi_get_resid(sc),
++				      scsi_bufflen(sc), xfer_cnt));
+ 		}
+ #endif
+ 
+ 	} /* end of address reply case */
+ 
+ 	/* Unmap the DMA buffers, if any. */
+-	if (sc->use_sg) {
+-		pci_unmap_sg(ioc->pcidev, (struct scatterlist *) sc->request_buffer,
+-			    sc->use_sg, sc->sc_data_direction);
+-	} else if (sc->request_bufflen) {
+-		pci_unmap_single(ioc->pcidev, sc->SCp.dma_handle,
+-				sc->request_bufflen, sc->sc_data_direction);
+-	}
++	scsi_dma_unmap(sc);
+ 
+ 	sc->scsi_done(sc);		/* Issue the command callback */
+ 
+@@ -970,17 +952,8 @@
+ 			/* Set status, free OS resources (SG DMA buffers)
+ 			 * Do OS callback
+ 			 */
+-			if (SCpnt->use_sg) {
+-				pci_unmap_sg(ioc->pcidev,
+-					(struct scatterlist *) SCpnt->request_buffer,
+-					SCpnt->use_sg,
+-					SCpnt->sc_data_direction);
+-			} else if (SCpnt->request_bufflen) {
+-				pci_unmap_single(ioc->pcidev,
+-					SCpnt->SCp.dma_handle,
+-					SCpnt->request_bufflen,
+-					SCpnt->sc_data_direction);
+-			}
++			scsi_dma_unmap(SCpnt);
++
+ 			SCpnt->result = DID_RESET << 16;
+ 			SCpnt->host_scribble = NULL;
+ 
+@@ -1023,14 +996,19 @@
+ 			mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii);
+ 			if (mf == NULL)
+ 				continue;
++			/* If the device is a hidden raid component, then its
++			 * expected that the mf->function will be RAID_SCSI_IO
++			 */
++			if (vdevice->vtarget->tflags &
++			    MPT_TARGET_FLAGS_RAID_COMPONENT && mf->Function !=
++			    MPI_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)
++				continue;
++
+ 			int_to_scsilun(vdevice->lun, &lun);
+ 			if ((mf->Bus != vdevice->vtarget->channel) ||
+ 			    (mf->TargetID != vdevice->vtarget->id) ||
+ 			    memcmp(lun.scsi_lun, mf->LUN, 8))
+ 				continue;
+-			dsprintk(( "search_running: found (sc=%p, mf = %p) "
+-			    "channel %d id %d, lun %d \n", hd->ScsiLookup[ii],
+-			    mf, mf->Bus, mf->TargetID, vdevice->lun));
+ 
+ 			/* Cleanup
+ 			 */
+@@ -1039,19 +1017,12 @@
+ 			mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf);
+ 			if ((unsigned char *)mf != sc->host_scribble)
+ 				continue;
+-			if (sc->use_sg) {
+-				pci_unmap_sg(hd->ioc->pcidev,
+-				(struct scatterlist *) sc->request_buffer,
+-					sc->use_sg,
+-					sc->sc_data_direction);
+-			} else if (sc->request_bufflen) {
+-				pci_unmap_single(hd->ioc->pcidev,
+-					sc->SCp.dma_handle,
+-					sc->request_bufflen,
+-					sc->sc_data_direction);
+-			}
++			scsi_dma_unmap(sc);
+ 			sc->host_scribble = NULL;
+ 			sc->result = DID_NO_CONNECT << 16;
++			dsprintk(( "search_running: found (sc=%p, mf = %p) "
++			    "channel %d id %d, lun %d \n", sc, mf,
++			    vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun));
+ 			sc->scsi_done(sc);
+ 		}
+ 	}
+@@ -1380,10 +1351,10 @@
+ 	 *    will be no data transfer!  GRRRRR...
+ 	 */
+ 	if (SCpnt->sc_data_direction == DMA_FROM_DEVICE) {
+-		datalen = SCpnt->request_bufflen;
++		datalen = scsi_bufflen(SCpnt);
+ 		scsidir = MPI_SCSIIO_CONTROL_READ;	/* DATA IN  (host<--ioc<--dev) */
+ 	} else if (SCpnt->sc_data_direction == DMA_TO_DEVICE) {
+-		datalen = SCpnt->request_bufflen;
++		datalen = scsi_bufflen(SCpnt);
+ 		scsidir = MPI_SCSIIO_CONTROL_WRITE;	/* DATA OUT (host-->ioc-->dev) */
+ 	} else {
+ 		datalen = 0;
+@@ -1768,20 +1739,45 @@
+ 	u32		 ctx2abort;
+ 	int		 scpnt_idx;
+ 	int		 retval;
+-	VirtDevice	 *vdev;
++	VirtDevice	 *vdevice;
+ 	ulong	 	 sn = SCpnt->serial_number;
++	MPT_ADAPTER	*ioc;
+ 
+ 	/* If we can't locate our host adapter structure, return FAILED status.
+ 	 */
+ 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) {
+ 		SCpnt->result = DID_RESET << 16;
+ 		SCpnt->scsi_done(SCpnt);
+-		dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: "
+-			   "Can't locate host! (sc=%p)\n",
+-			   SCpnt));
++		dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: Can't locate "
++		    "host! (sc=%p)\n", SCpnt));
+ 		return FAILED;
+ 	}
+ 
++	ioc = hd->ioc;
++	printk(MYIOC_s_INFO_FMT "attempting task abort! (sc=%p)\n",
++	       ioc->name, SCpnt);
++	scsi_print_command(SCpnt);
++
++	vdevice = SCpnt->device->hostdata;
++	if (!vdevice || !vdevice->vtarget) {
++		dtmprintk((MYIOC_s_DEBUG_FMT "task abort: device has been "
++		    "deleted (sc=%p)\n", ioc->name, SCpnt));
++		SCpnt->result = DID_NO_CONNECT << 16;
++		SCpnt->scsi_done(SCpnt);
++		retval = 0;
++		goto out;
++	}
++
++	/* Task aborts are not supported for hidden raid components.
++	 */
++	if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
++		dtmprintk((MYIOC_s_DEBUG_FMT "task abort: hidden raid "
++		    "component (sc=%p)\n", ioc->name, SCpnt));
++		SCpnt->result = DID_RESET << 16;
++		retval = FAILED;
++		goto out;
++	}
++
+ 	/* Find this command
+ 	 */
+ 	if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) {
+@@ -1790,21 +1786,20 @@
+ 		 */
+ 		SCpnt->result = DID_RESET << 16;
+ 		dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: "
+-			   "Command not in the active list! (sc=%p)\n",
+-			   hd->ioc->name, SCpnt));
+-		return SUCCESS;
++		   "Command not in the active list! (sc=%p)\n", ioc->name,
++		   SCpnt));
++		retval = 0;
++		goto out;
+ 	}
+ 
+-	if (hd->resetPending)
+-		return FAILED;
++	if (hd->resetPending) {
++		retval = FAILED;
++		goto out;
++	}
+ 
+ 	if (hd->timeouts < -1)
+ 		hd->timeouts++;
+ 
+-	printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n",
+-	       hd->ioc->name, SCpnt);
+-	scsi_print_command(SCpnt);
+-
+ 	/* Most important!  Set TaskMsgContext to SCpnt's MsgContext!
+ 	 * (the IO to be ABORT'd)
+ 	 *
+@@ -1817,18 +1812,17 @@
+ 
+ 	hd->abortSCpnt = SCpnt;
+ 
+-	vdev = SCpnt->device->hostdata;
+ 	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK,
+-		vdev->vtarget->channel, vdev->vtarget->id, vdev->lun,
+-		ctx2abort, mptscsih_get_tm_timeout(hd->ioc));
++	    vdevice->vtarget->channel, vdevice->vtarget->id, vdevice->lun,
++	    ctx2abort, mptscsih_get_tm_timeout(ioc));
+ 
+ 	if (SCPNT_TO_LOOKUP_IDX(SCpnt) == scpnt_idx &&
+ 	    SCpnt->serial_number == sn)
+ 		retval = FAILED;
+ 
+-	printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n",
+-		hd->ioc->name,
+-		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++ out:
++	printk(MYIOC_s_INFO_FMT "task abort: %s (sc=%p)\n",
++	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+ 
+ 	if (retval == 0)
+ 		return SUCCESS;
+@@ -1850,32 +1844,47 @@
+ {
+ 	MPT_SCSI_HOST	*hd;
+ 	int		 retval;
+-	VirtDevice	 *vdev;
++	VirtDevice	 *vdevice;
++	MPT_ADAPTER	*ioc;
+ 
+ 	/* If we can't locate our host adapter structure, return FAILED status.
+ 	 */
+ 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+-		dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: "
+-			   "Can't locate host! (sc=%p)\n",
+-			   SCpnt));
++		dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: Can't "
++		    "locate host! (sc=%p)\n", SCpnt));
+ 		return FAILED;
+ 	}
+ 
+-	if (hd->resetPending)
+-		return FAILED;
+-
+-	printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n",
+-	       hd->ioc->name, SCpnt);
++	ioc = hd->ioc;
++	printk(MYIOC_s_INFO_FMT "attempting target reset! (sc=%p)\n",
++	       ioc->name, SCpnt);
+ 	scsi_print_command(SCpnt);
+ 
+-	vdev = SCpnt->device->hostdata;
++	if (hd->resetPending) {
++		retval = FAILED;
++		goto out;
++	}
++
++	vdevice = SCpnt->device->hostdata;
++	if (!vdevice || !vdevice->vtarget) {
++		retval = 0;
++		goto out;
++	}
++
++	/* Target reset to hidden raid component is not supported
++	 */
++	if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT) {
++		retval = FAILED;
++		goto out;
++	}
++
+ 	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
+-		vdev->vtarget->channel, vdev->vtarget->id,
+-		0, 0, mptscsih_get_tm_timeout(hd->ioc));
++	    vdevice->vtarget->channel, vdevice->vtarget->id, 0, 0,
++	    mptscsih_get_tm_timeout(ioc));
+ 
+-	printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n",
+-		hd->ioc->name,
+-		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++ out:
++	printk (MYIOC_s_INFO_FMT "target reset: %s (sc=%p)\n",
++	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+ 
+ 	if (retval == 0)
+ 		return SUCCESS;
+@@ -1899,18 +1908,19 @@
+ 	MPT_SCSI_HOST	*hd;
+ 	int		 retval;
+ 	VirtDevice	 *vdev;
++	MPT_ADAPTER	*ioc;
+ 
+ 	/* If we can't locate our host adapter structure, return FAILED status.
+ 	 */
+ 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+-		dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: "
+-			   "Can't locate host! (sc=%p)\n",
+-			   SCpnt ) );
++		dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: Can't "
++		    "locate host! (sc=%p)\n", SCpnt ));
+ 		return FAILED;
+ 	}
+ 
+-	printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n",
+-	       hd->ioc->name, SCpnt);
++	ioc = hd->ioc;
++	printk(MYIOC_s_INFO_FMT "attempting bus reset! (sc=%p)\n",
++	       ioc->name, SCpnt);
+ 	scsi_print_command(SCpnt);
+ 
+ 	if (hd->timeouts < -1)
+@@ -1918,11 +1928,10 @@
+ 
+ 	vdev = SCpnt->device->hostdata;
+ 	retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS,
+-		vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(hd->ioc));
++	    vdev->vtarget->channel, 0, 0, 0, mptscsih_get_tm_timeout(ioc));
+ 
+-	printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n",
+-		hd->ioc->name,
+-		((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
++	printk(MYIOC_s_INFO_FMT "bus reset: %s (sc=%p)\n",
++	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+ 
+ 	if (retval == 0)
+ 		return SUCCESS;
+@@ -1943,37 +1952,38 @@
+ mptscsih_host_reset(struct scsi_cmnd *SCpnt)
+ {
+ 	MPT_SCSI_HOST *  hd;
+-	int              status = SUCCESS;
++	int              retval;
++	MPT_ADAPTER	*ioc;
+ 
+ 	/*  If we can't locate the host to reset, then we failed. */
+ 	if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){
+-		dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: "
+-			     "Can't locate host! (sc=%p)\n",
+-			     SCpnt ) );
++		dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: Can't "
++		    "locate host! (sc=%p)\n", SCpnt));
+ 		return FAILED;
+ 	}
+ 
+-	printk(KERN_WARNING MYNAM ": %s: Attempting host reset! (sc=%p)\n",
+-	       hd->ioc->name, SCpnt);
++	ioc = hd->ioc;
++	printk(MYIOC_s_INFO_FMT "attempting host reset! (sc=%p)\n",
++	    ioc->name, SCpnt);
+ 
+ 	/*  If our attempts to reset the host failed, then return a failed
+ 	 *  status.  The host will be taken off line by the SCSI mid-layer.
+ 	 */
+-	if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0){
+-		status = FAILED;
++	if (mpt_HardResetHandler(hd->ioc, CAN_SLEEP) < 0) {
++		retval = FAILED;
+ 	} else {
+ 		/*  Make sure TM pending is cleared and TM state is set to
+ 		 *  NONE.
+ 		 */
++		retval = 0;
+ 		hd->tmPending = 0;
+ 		hd->tmState = TM_STATE_NONE;
+ 	}
+ 
+-	dtmprintk( ( KERN_INFO MYNAM ": mptscsih_host_reset: "
+-		     "Status = %s\n",
+-		     (status == SUCCESS) ? "SUCCESS" : "FAILED" ) );
++	printk(MYIOC_s_INFO_FMT "host reset: %s (sc=%p)\n",
++	    ioc->name, ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt);
+ 
+-	return status;
++	return retval;
+ }
+ 
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -3150,6 +3160,16 @@
+ {
+ 	INTERNAL_CMD		 iocmd;
+ 
++	/* Ignore hidden raid components, this is handled when the command
++	 * is sent to the volume
++	 */
++	if (vdevice->vtarget->tflags & MPT_TARGET_FLAGS_RAID_COMPONENT)
++		return;
++
++	if (vdevice->vtarget->type != TYPE_DISK || vdevice->vtarget->deleted ||
++	    !vdevice->configured_lun)
++		return;
++
+ 	/* Following parameters will not change
+ 	 * in this routine.
+ 	 */
+@@ -3164,8 +3184,6 @@
+ 	iocmd.id = vdevice->vtarget->id;
+ 	iocmd.lun = vdevice->lun;
+ 
+-	if ((vdevice->vtarget->type == TYPE_DISK) &&
+-	    (vdevice->configured_lun))
+ 		mptscsih_do_cmd(hd, &iocmd);
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptscsih.h linux-2.6.22-try2/drivers/message/fusion/mptscsih.h
+--- linux-2.6.22-570/drivers/message/fusion/mptscsih.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptscsih.h	2007-12-19 15:29:23.000000000 -0500
+@@ -6,7 +6,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+diff -Nurb linux-2.6.22-570/drivers/message/fusion/mptspi.c linux-2.6.22-try2/drivers/message/fusion/mptspi.c
+--- linux-2.6.22-570/drivers/message/fusion/mptspi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/fusion/mptspi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -4,7 +4,7 @@
+  *      running LSI Logic Fusion MPT (Message Passing Technology) firmware.
+  *
+  *  Copyright (c) 1999-2007 LSI Logic Corporation
+- *  (mailto:mpt_linux_developer@lsi.com)
++ *  (mailto:DL-MPTFusionLinux@lsi.com)
+  *
+  */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+@@ -44,7 +44,6 @@
+ */
+ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
+ 
+-#include "linux_compat.h"	/* linux-2.6 tweaks */
+ #include <linux/module.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+diff -Nurb linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c linux-2.6.22-try2/drivers/message/i2o/i2o_scsi.c
+--- linux-2.6.22-570/drivers/message/i2o/i2o_scsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/message/i2o/i2o_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -377,12 +377,8 @@
+ 		osm_err("SCSI error %08x\n", error);
+ 
+ 	dev = &c->pdev->dev;
+-	if (cmd->use_sg)
+-		dma_unmap_sg(dev, cmd->request_buffer, cmd->use_sg,
+-			     cmd->sc_data_direction);
+-	else if (cmd->SCp.dma_handle)
+-		dma_unmap_single(dev, cmd->SCp.dma_handle, cmd->request_bufflen,
+-				 cmd->sc_data_direction);
++
++	scsi_dma_unmap(cmd);
+ 
+ 	cmd->scsi_done(cmd);
+ 
+@@ -664,21 +660,15 @@
+ 
+ 	if (sgl_offset != SGL_OFFSET_0) {
+ 		/* write size of data addressed by SGL */
+-		*mptr++ = cpu_to_le32(SCpnt->request_bufflen);
++		*mptr++ = cpu_to_le32(scsi_bufflen(SCpnt));
+ 
+ 		/* Now fill in the SGList and command */
+-		if (SCpnt->use_sg) {
+-			if (!i2o_dma_map_sg(c, SCpnt->request_buffer,
+-					    SCpnt->use_sg,
++
++		if (scsi_sg_count(SCpnt)) {
++			if (!i2o_dma_map_sg(c, scsi_sglist(SCpnt),
++					    scsi_sg_count(SCpnt),
+ 					    SCpnt->sc_data_direction, &mptr))
+ 				goto nomem;
+-		} else {
+-			SCpnt->SCp.dma_handle =
+-			    i2o_dma_map_single(c, SCpnt->request_buffer,
+-					       SCpnt->request_bufflen,
+-					       SCpnt->sc_data_direction, &mptr);
+-			if (dma_mapping_error(SCpnt->SCp.dma_handle))
+-				goto nomem;
+ 		}
+ 	}
+ 
+diff -Nurb linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c linux-2.6.22-try2/drivers/mfd/ucb1x00-ts.c
+--- linux-2.6.22-570/drivers/mfd/ucb1x00-ts.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mfd/ucb1x00-ts.c	2007-12-19 15:29:24.000000000 -0500
+@@ -209,6 +209,7 @@
+ 	DECLARE_WAITQUEUE(wait, tsk);
+ 	int valid = 0;
+ 
++	set_freezable();
+ 	add_wait_queue(&ts->irq_wait, &wait);
+ 	while (!kthread_should_stop()) {
+ 		unsigned int x, y, p;
+diff -Nurb linux-2.6.22-570/drivers/misc/asus-laptop.c linux-2.6.22-try2/drivers/misc/asus-laptop.c
+--- linux-2.6.22-570/drivers/misc/asus-laptop.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/misc/asus-laptop.c	2007-12-19 15:29:22.000000000 -0500
+@@ -737,8 +737,7 @@
+ 	struct device_attribute dev_attr_##_name = {			\
+ 		.attr = {						\
+ 			.name = __stringify(_name),			\
+-			.mode = 0,					\
+-			.owner = THIS_MODULE },				\
++			.mode = 0 },					\
+ 		.show   = NULL,						\
+ 		.store  = NULL,						\
+ 	}
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/Kconfig linux-2.6.22-try2/drivers/mmc/card/Kconfig
+--- linux-2.6.22-570/drivers/mmc/card/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/card/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -14,3 +14,21 @@
+ 	  mount the filesystem. Almost everyone wishing MMC support
+ 	  should say Y or M here.
+ 
++config MMC_BLOCK_BOUNCE
++	bool "Use bounce buffer for simple hosts"
++	depends on MMC_BLOCK
++	default y
++	help
++	  SD/MMC is a high latency protocol where it is crucial to
++	  send large requests in order to get high performance. Many
++	  controllers, however, are restricted to continuous memory
++	  (i.e. they can't do scatter-gather), something the kernel
++	  rarely can provide.
++
++	  Say Y here to help these restricted hosts by bouncing
++	  requests back and forth from a large buffer. You will get
++	  a big performance gain at the cost of up to 64 KiB of
++	  physical memory.
++
++	  If unsure, say Y here.
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/block.c linux-2.6.22-try2/drivers/mmc/card/block.c
+--- linux-2.6.22-570/drivers/mmc/card/block.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/card/block.c	2007-12-19 15:29:23.000000000 -0500
+@@ -262,7 +262,9 @@
+ 		}
+ 
+ 		brq.data.sg = mq->sg;
+-		brq.data.sg_len = blk_rq_map_sg(req->q, req, brq.data.sg);
++		brq.data.sg_len = mmc_queue_map_sg(mq);
++
++		mmc_queue_bounce_pre(mq);
+ 
+ 		if (brq.data.blocks !=
+ 		    (req->nr_sectors >> (md->block_bits - 9))) {
+@@ -279,6 +281,9 @@
+ 		}
+ 
+ 		mmc_wait_for_req(card->host, &brq.mrq);
++
++		mmc_queue_bounce_post(mq);
++
+ 		if (brq.cmd.error) {
+ 			printk(KERN_ERR "%s: error %d sending read/write command\n",
+ 			       req->rq_disk->disk_name, brq.cmd.error);
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.c linux-2.6.22-try2/drivers/mmc/card/queue.c
+--- linux-2.6.22-570/drivers/mmc/card/queue.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/card/queue.c	2007-12-19 15:29:24.000000000 -0500
+@@ -11,12 +11,15 @@
+  */
+ #include <linux/module.h>
+ #include <linux/blkdev.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ 
+ #include <linux/mmc/card.h>
+ #include <linux/mmc/host.h>
+ #include "queue.h"
+ 
++#define MMC_QUEUE_BOUNCESZ	65536
++
+ #define MMC_QUEUE_SUSPENDED	(1 << 0)
+ 
+ /*
+@@ -42,11 +45,7 @@
+ 	struct mmc_queue *mq = d;
+ 	struct request_queue *q = mq->queue;
+ 
+-	/*
+-	 * Set iothread to ensure that we aren't put to sleep by
+-	 * the process freezing.  We handle suspension ourselves.
+-	 */
+-	current->flags |= PF_MEMALLOC|PF_NOFREEZE;
++	current->flags |= PF_MEMALLOC;
+ 
+ 	down(&mq->thread_sem);
+ 	do {
+@@ -118,6 +117,7 @@
+ 	struct mmc_host *host = card->host;
+ 	u64 limit = BLK_BOUNCE_HIGH;
+ 	int ret;
++	unsigned int bouncesz;
+ 
+ 	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
+ 		limit = *mmc_dev(host)->dma_mask;
+@@ -127,36 +127,83 @@
+ 	if (!mq->queue)
+ 		return -ENOMEM;
+ 
++	mq->queue->queuedata = mq;
++	mq->req = NULL;
++
+ 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
++
++#ifdef CONFIG_MMC_BLOCK_BOUNCE
++	if (host->max_hw_segs == 1) {
++		bouncesz = MMC_QUEUE_BOUNCESZ;
++
++		if (bouncesz > host->max_req_size)
++			bouncesz = host->max_req_size;
++		if (bouncesz > host->max_seg_size)
++			bouncesz = host->max_seg_size;
++
++		mq->bounce_buf = kmalloc(bouncesz, GFP_KERNEL);
++		if (!mq->bounce_buf) {
++			printk(KERN_WARNING "%s: unable to allocate "
++				"bounce buffer\n", mmc_card_name(card));
++		} else {
++			blk_queue_bounce_limit(mq->queue, BLK_BOUNCE_HIGH);
++			blk_queue_max_sectors(mq->queue, bouncesz / 512);
++			blk_queue_max_phys_segments(mq->queue, bouncesz / 512);
++			blk_queue_max_hw_segments(mq->queue, bouncesz / 512);
++			blk_queue_max_segment_size(mq->queue, bouncesz);
++
++			mq->sg = kmalloc(sizeof(struct scatterlist),
++				GFP_KERNEL);
++			if (!mq->sg) {
++				ret = -ENOMEM;
++				goto free_bounce_buf;
++			}
++
++			mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
++				bouncesz / 512, GFP_KERNEL);
++			if (!mq->bounce_sg) {
++				ret = -ENOMEM;
++				goto free_sg;
++			}
++		}
++	}
++#endif
++
++	if (!mq->bounce_buf) {
+ 	blk_queue_bounce_limit(mq->queue, limit);
+ 	blk_queue_max_sectors(mq->queue, host->max_req_size / 512);
+ 	blk_queue_max_phys_segments(mq->queue, host->max_phys_segs);
+ 	blk_queue_max_hw_segments(mq->queue, host->max_hw_segs);
+ 	blk_queue_max_segment_size(mq->queue, host->max_seg_size);
+ 
+-	mq->queue->queuedata = mq;
+-	mq->req = NULL;
+-
+-	mq->sg = kmalloc(sizeof(struct scatterlist) * host->max_phys_segs,
+-			 GFP_KERNEL);
++		mq->sg = kmalloc(sizeof(struct scatterlist) *
++			host->max_phys_segs, GFP_KERNEL);
+ 	if (!mq->sg) {
+ 		ret = -ENOMEM;
+ 		goto cleanup_queue;
+ 	}
++	}
+ 
+ 	init_MUTEX(&mq->thread_sem);
+ 
+ 	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd");
+ 	if (IS_ERR(mq->thread)) {
+ 		ret = PTR_ERR(mq->thread);
+-		goto free_sg;
++		goto free_bounce_sg;
+ 	}
+ 
+ 	return 0;
+-
++ free_bounce_sg:
++ 	if (mq->bounce_sg)
++ 		kfree(mq->bounce_sg);
++ 	mq->bounce_sg = NULL;
+  free_sg:
+ 	kfree(mq->sg);
+ 	mq->sg = NULL;
++ free_bounce_buf:
++	if (mq->bounce_buf)
++		kfree(mq->bounce_buf);
++	mq->bounce_buf = NULL;
+  cleanup_queue:
+ 	blk_cleanup_queue(mq->queue);
+ 	return ret;
+@@ -178,9 +225,17 @@
+ 	/* Then terminate our worker thread */
+ 	kthread_stop(mq->thread);
+ 
++ 	if (mq->bounce_sg)
++ 		kfree(mq->bounce_sg);
++ 	mq->bounce_sg = NULL;
++
+ 	kfree(mq->sg);
+ 	mq->sg = NULL;
+ 
++	if (mq->bounce_buf)
++		kfree(mq->bounce_buf);
++	mq->bounce_buf = NULL;
++
+ 	blk_cleanup_queue(mq->queue);
+ 
+ 	mq->card = NULL;
+@@ -231,3 +286,108 @@
+ 	}
+ }
+ 
++static void copy_sg(struct scatterlist *dst, unsigned int dst_len,
++	struct scatterlist *src, unsigned int src_len)
++{
++	unsigned int chunk;
++	char *dst_buf, *src_buf;
++	unsigned int dst_size, src_size;
++
++	dst_buf = NULL;
++	src_buf = NULL;
++	dst_size = 0;
++	src_size = 0;
++
++	while (src_len) {
++		BUG_ON(dst_len == 0);
++
++		if (dst_size == 0) {
++			dst_buf = page_address(dst->page) + dst->offset;
++			dst_size = dst->length;
++		}
++
++		if (src_size == 0) {
++			src_buf = page_address(src->page) + src->offset;
++			src_size = src->length;
++		}
++
++		chunk = min(dst_size, src_size);
++
++		memcpy(dst_buf, src_buf, chunk);
++
++		dst_buf += chunk;
++		src_buf += chunk;
++		dst_size -= chunk;
++		src_size -= chunk;
++
++		if (dst_size == 0) {
++			dst++;
++			dst_len--;
++		}
++
++		if (src_size == 0) {
++			src++;
++			src_len--;
++		}
++	}
++}
++
++unsigned int mmc_queue_map_sg(struct mmc_queue *mq)
++{
++	unsigned int sg_len;
++
++	if (!mq->bounce_buf)
++		return blk_rq_map_sg(mq->queue, mq->req, mq->sg);
++
++	BUG_ON(!mq->bounce_sg);
++
++	sg_len = blk_rq_map_sg(mq->queue, mq->req, mq->bounce_sg);
++
++	mq->bounce_sg_len = sg_len;
++
++	/*
++	 * Shortcut in the event we only get a single entry.
++	 */
++	if (sg_len == 1) {
++		memcpy(mq->sg, mq->bounce_sg, sizeof(struct scatterlist));
++		return 1;
++	}
++
++	mq->sg[0].page = virt_to_page(mq->bounce_buf);
++	mq->sg[0].offset = offset_in_page(mq->bounce_buf);
++	mq->sg[0].length = 0;
++
++	while (sg_len) {
++		mq->sg[0].length += mq->bounce_sg[sg_len - 1].length;
++		sg_len--;
++	}
++
++	return 1;
++}
++
++void mmc_queue_bounce_pre(struct mmc_queue *mq)
++{
++	if (!mq->bounce_buf)
++		return;
++
++	if (mq->bounce_sg_len == 1)
++		return;
++	if (rq_data_dir(mq->req) != WRITE)
++		return;
++
++	copy_sg(mq->sg, 1, mq->bounce_sg, mq->bounce_sg_len);
++}
++
++void mmc_queue_bounce_post(struct mmc_queue *mq)
++{
++	if (!mq->bounce_buf)
++		return;
++
++	if (mq->bounce_sg_len == 1)
++		return;
++	if (rq_data_dir(mq->req) != READ)
++		return;
++
++	copy_sg(mq->bounce_sg, mq->bounce_sg_len, mq->sg, 1);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/card/queue.h linux-2.6.22-try2/drivers/mmc/card/queue.h
+--- linux-2.6.22-570/drivers/mmc/card/queue.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/card/queue.h	2007-12-19 15:29:23.000000000 -0500
+@@ -14,6 +14,9 @@
+ 	void			*data;
+ 	struct request_queue	*queue;
+ 	struct scatterlist	*sg;
++	char			*bounce_buf;
++	struct scatterlist	*bounce_sg;
++	unsigned int		bounce_sg_len;
+ };
+ 
+ extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *);
+@@ -21,4 +24,8 @@
+ extern void mmc_queue_suspend(struct mmc_queue *);
+ extern void mmc_queue_resume(struct mmc_queue *);
+ 
++extern unsigned int mmc_queue_map_sg(struct mmc_queue *);
++extern void mmc_queue_bounce_pre(struct mmc_queue *);
++extern void mmc_queue_bounce_post(struct mmc_queue *);
++
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/Kconfig linux-2.6.22-try2/drivers/mmc/core/Kconfig
+--- linux-2.6.22-570/drivers/mmc/core/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -14,3 +14,16 @@
+ 	  This option is usually just for embedded systems which use
+ 	  a MMC/SD card for rootfs. Most people should say N here.
+ 
++config MMC_PASSWORDS
++	boolean "MMC card lock/unlock passwords (EXPERIMENTAL)"
++	depends on EXPERIMENTAL
++	select KEYS
++	help
++	  Say Y here to enable the use of passwords to lock and unlock
++	  MMC cards.  This uses the access key retention support, using
++	  request_key to look up the key associated with each card.
++
++	  For example, if you have an MMC card that was locked using
++	  Symbian OS on your cell phone, you won't be able to read it
++	  on Linux without this support.
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/Makefile linux-2.6.22-try2/drivers/mmc/core/Makefile
+--- linux-2.6.22-570/drivers/mmc/core/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -7,5 +7,6 @@
+ endif
+ 
+ obj-$(CONFIG_MMC)		+= mmc_core.o
+-mmc_core-y			:= core.o sysfs.o mmc.o mmc_ops.o sd.o sd_ops.o
++mmc_core-y			:= core.o sysfs.o bus.o host.o mmc.o mmc_ops.o sd.o sd_ops.o
++mmc_core-$(CONFIG_MMC_PASSWORDS) += lock.o
+ 
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.c linux-2.6.22-try2/drivers/mmc/core/bus.c
+--- linux-2.6.22-570/drivers/mmc/core/bus.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/bus.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,245 @@
++/*
++ *  linux/drivers/mmc/core/bus.c
++ *
++ *  Copyright (C) 2003 Russell King, All Rights Reserved.
++ *  Copyright (C) 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ *  MMC card bus driver model
++ */
++
++#include <linux/device.h>
++#include <linux/err.h>
++
++#include <linux/mmc/card.h>
++#include <linux/mmc/host.h>
++
++#include "sysfs.h"
++#include "bus.h"
++
++#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
++#define to_mmc_driver(d)	container_of(d, struct mmc_driver, drv)
++
++static ssize_t mmc_type_show(struct device *dev,
++	struct device_attribute *attr, char *buf)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	switch (card->type) {
++	case MMC_TYPE_MMC:
++		return sprintf(buf, "MMC\n");
++	case MMC_TYPE_SD:
++		return sprintf(buf, "SD\n");
++	default:
++		return -EFAULT;
++	}
++}
++
++static struct device_attribute mmc_dev_attrs[] = {
++	MMC_ATTR_RO(type),
++	__ATTR_NULL,
++};
++
++/*
++ * This currently matches any MMC driver to any MMC card - drivers
++ * themselves make the decision whether to drive this card in their
++ * probe method.
++ *
++ * We also fail for all locked cards; drivers expect to be able to do block
++ * I/O still on probe(), which is not possible while the card is locked.
++ * Device probing must be triggered sometime later to make the card available
++ * to the block driver.
++ */
++static int mmc_bus_match(struct device *dev, struct device_driver *drv)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	if (mmc_card_locked(card)) {
++		dev_dbg(&card->dev, "card is locked; binding is deferred\n");
++		return 0;
++	}
++
++	return 1;
++}
++
++static int
++mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf,
++		int buf_size)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++	int retval = 0, i = 0, length = 0;
++
++#define add_env(fmt,val) do {					\
++	retval = add_uevent_var(envp, num_envp, &i,		\
++				buf, buf_size, &length,		\
++				fmt, val);			\
++	if (retval)						\
++		return retval;					\
++} while (0);
++
++	switch (card->type) {
++	case MMC_TYPE_MMC:
++		add_env("MMC_TYPE=%s", "MMC");
++		break;
++	case MMC_TYPE_SD:
++		add_env("MMC_TYPE=%s", "SD");
++		break;
++	}
++
++	add_env("MMC_NAME=%s", mmc_card_name(card));
++
++#undef add_env
++
++	envp[i] = NULL;
++
++	return 0;
++}
++
++static int mmc_bus_probe(struct device *dev)
++{
++	struct mmc_driver *drv = to_mmc_driver(dev->driver);
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	return drv->probe(card);
++}
++
++static int mmc_bus_remove(struct device *dev)
++{
++	struct mmc_driver *drv = to_mmc_driver(dev->driver);
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	drv->remove(card);
++
++	return 0;
++}
++
++static int mmc_bus_suspend(struct device *dev, pm_message_t state)
++{
++	struct mmc_driver *drv = to_mmc_driver(dev->driver);
++	struct mmc_card *card = dev_to_mmc_card(dev);
++	int ret = 0;
++
++	if (dev->driver && drv->suspend)
++		ret = drv->suspend(card, state);
++	return ret;
++}
++
++static int mmc_bus_resume(struct device *dev)
++{
++	struct mmc_driver *drv = to_mmc_driver(dev->driver);
++	struct mmc_card *card = dev_to_mmc_card(dev);
++	int ret = 0;
++
++	if (dev->driver && drv->resume)
++		ret = drv->resume(card);
++	return ret;
++}
++
++static struct bus_type mmc_bus_type = {
++	.name		= "mmc",
++	.dev_attrs	= mmc_dev_attrs,
++	.match		= mmc_bus_match,
++	.uevent		= mmc_bus_uevent,
++	.probe		= mmc_bus_probe,
++	.remove		= mmc_bus_remove,
++	.suspend	= mmc_bus_suspend,
++	.resume		= mmc_bus_resume,
++};
++
++int mmc_register_bus(void)
++{
++	return bus_register(&mmc_bus_type);
++}
++
++void mmc_unregister_bus(void)
++{
++	bus_unregister(&mmc_bus_type);
++}
++
++/**
++ *	mmc_register_driver - register a media driver
++ *	@drv: MMC media driver
++ */
++int mmc_register_driver(struct mmc_driver *drv)
++{
++	drv->drv.bus = &mmc_bus_type;
++	return driver_register(&drv->drv);
++}
++
++EXPORT_SYMBOL(mmc_register_driver);
++
++/**
++ *	mmc_unregister_driver - unregister a media driver
++ *	@drv: MMC media driver
++ */
++void mmc_unregister_driver(struct mmc_driver *drv)
++{
++	drv->drv.bus = &mmc_bus_type;
++	driver_unregister(&drv->drv);
++}
++
++EXPORT_SYMBOL(mmc_unregister_driver);
++
++static void mmc_release_card(struct device *dev)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	kfree(card);
++}
++
++/*
++ * Allocate and initialise a new MMC card structure.
++ */
++struct mmc_card *mmc_alloc_card(struct mmc_host *host)
++{
++	struct mmc_card *card;
++
++	card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL);
++	if (!card)
++		return ERR_PTR(-ENOMEM);
++
++	memset(card, 0, sizeof(struct mmc_card));
++
++	card->host = host;
++
++	device_initialize(&card->dev);
++
++	card->dev.parent = mmc_classdev(host);
++	card->dev.bus = &mmc_bus_type;
++	card->dev.release = mmc_release_card;
++
++	return card;
++}
++
++/*
++ * Register a new MMC card with the driver model.
++ */
++int mmc_add_card(struct mmc_card *card)
++{
++	int ret;
++
++	snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
++		 "%s:%04x", mmc_hostname(card->host), card->rca);
++
++	ret = device_add(&card->dev);
++	if (ret == 0)
++		mmc_card_set_present(card);
++
++	return ret;
++}
++
++/*
++ * Unregister a new MMC card with the driver model, and
++ * (eventually) free it.
++ */
++void mmc_remove_card(struct mmc_card *card)
++{
++	if (mmc_card_present(card))
++		device_del(&card->dev);
++
++	put_device(&card->dev);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/bus.h linux-2.6.22-try2/drivers/mmc/core/bus.h
+--- linux-2.6.22-570/drivers/mmc/core/bus.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/bus.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,22 @@
++/*
++ *  linux/drivers/mmc/core/bus.h
++ *
++ *  Copyright (C) 2003 Russell King, All Rights Reserved.
++ *  Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_BUS_H
++#define _MMC_CORE_BUS_H
++
++struct mmc_card *mmc_alloc_card(struct mmc_host *host);
++int mmc_add_card(struct mmc_card *card);
++void mmc_remove_card(struct mmc_card *card);
++
++int mmc_register_bus(void);
++void mmc_unregister_bus(void);
++
++#endif
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.c linux-2.6.22-try2/drivers/mmc/core/core.c
+--- linux-2.6.22-570/drivers/mmc/core/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -27,7 +27,9 @@
+ #include <linux/mmc/sd.h>
+ 
+ #include "core.h"
+-#include "sysfs.h"
++#include "bus.h"
++#include "host.h"
++#include "lock.h"
+ 
+ #include "mmc_ops.h"
+ #include "sd_ops.h"
+@@ -35,6 +37,25 @@
+ extern int mmc_attach_mmc(struct mmc_host *host, u32 ocr);
+ extern int mmc_attach_sd(struct mmc_host *host, u32 ocr);
+ 
++static struct workqueue_struct *workqueue;
++
++/*
++ * Internal function. Schedule delayed work in the MMC work queue.
++ */
++static int mmc_schedule_delayed_work(struct delayed_work *work,
++				     unsigned long delay)
++{
++	return queue_delayed_work(workqueue, work, delay);
++}
++
++/*
++ * Internal function. Flush all scheduled work from the MMC work queue.
++ */
++static void mmc_flush_scheduled_work(void)
++{
++	flush_workqueue(workqueue);
++}
++
+ /**
+  *	mmc_request_done - finish processing an MMC request
+  *	@host: MMC host which completed request
+@@ -369,22 +390,6 @@
+ }
+ 
+ /*
+- * Allocate a new MMC card
+- */
+-struct mmc_card *mmc_alloc_card(struct mmc_host *host)
+-{
+-	struct mmc_card *card;
+-
+-	card = kmalloc(sizeof(struct mmc_card), GFP_KERNEL);
+-	if (!card)
+-		return ERR_PTR(-ENOMEM);
+-
+-	mmc_init_card(card, host);
+-
+-	return card;
+-}
+-
+-/*
+  * Apply power to the MMC stack.  This is a two-stage process.
+  * First, we enable power to the card without the clock running.
+  * We then wait a bit for the power to stabilise.  Finally,
+@@ -512,7 +517,7 @@
+ EXPORT_SYMBOL(mmc_detect_change);
+ 
+ 
+-static void mmc_rescan(struct work_struct *work)
++void mmc_rescan(struct work_struct *work)
+ {
+ 	struct mmc_host *host =
+ 		container_of(work, struct mmc_host, detect.work);
+@@ -561,69 +566,13 @@
+ 	}
+ }
+ 
+-
+-/**
+- *	mmc_alloc_host - initialise the per-host structure.
+- *	@extra: sizeof private data structure
+- *	@dev: pointer to host device model structure
+- *
+- *	Initialise the per-host structure.
+- */
+-struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
+-{
+-	struct mmc_host *host;
+-
+-	host = mmc_alloc_host_sysfs(extra, dev);
+-	if (host) {
+-		spin_lock_init(&host->lock);
+-		init_waitqueue_head(&host->wq);
+-		INIT_DELAYED_WORK(&host->detect, mmc_rescan);
+-
+-		/*
+-		 * By default, hosts do not support SGIO or large requests.
+-		 * They have to set these according to their abilities.
+-		 */
+-		host->max_hw_segs = 1;
+-		host->max_phys_segs = 1;
+-		host->max_seg_size = PAGE_CACHE_SIZE;
+-
+-		host->max_req_size = PAGE_CACHE_SIZE;
+-		host->max_blk_size = 512;
+-		host->max_blk_count = PAGE_CACHE_SIZE / 512;
+-	}
+-
+-	return host;
+-}
+-
+-EXPORT_SYMBOL(mmc_alloc_host);
+-
+-/**
+- *	mmc_add_host - initialise host hardware
+- *	@host: mmc host
+- */
+-int mmc_add_host(struct mmc_host *host)
++void mmc_start_host(struct mmc_host *host)
+ {
+-	int ret;
+-
+-	ret = mmc_add_host_sysfs(host);
+-	if (ret == 0) {
+ 		mmc_power_off(host);
+ 		mmc_detect_change(host, 0);
+-	}
+-
+-	return ret;
+ }
+ 
+-EXPORT_SYMBOL(mmc_add_host);
+-
+-/**
+- *	mmc_remove_host - remove host hardware
+- *	@host: mmc host
+- *
+- *	Unregister and remove all cards associated with this host,
+- *	and power down the MMC bus.
+- */
+-void mmc_remove_host(struct mmc_host *host)
++void mmc_stop_host(struct mmc_host *host)
+ {
+ #ifdef CONFIG_MMC_DEBUG
+ 	unsigned long flags;
+@@ -648,24 +597,8 @@
+ 	BUG_ON(host->card);
+ 
+ 	mmc_power_off(host);
+-	mmc_remove_host_sysfs(host);
+ }
+ 
+-EXPORT_SYMBOL(mmc_remove_host);
+-
+-/**
+- *	mmc_free_host - free the host structure
+- *	@host: mmc host
+- *
+- *	Free the host once all references to it have been dropped.
+- */
+-void mmc_free_host(struct mmc_host *host)
+-{
+-	mmc_free_host_sysfs(host);
+-}
+-
+-EXPORT_SYMBOL(mmc_free_host);
+-
+ #ifdef CONFIG_PM
+ 
+ /**
+@@ -726,4 +659,47 @@
+ 
+ #endif
+ 
++static int __init mmc_init(void)
++{
++	int ret;
++
++	workqueue = create_singlethread_workqueue("kmmcd");
++	if (!workqueue)
++		return -ENOMEM;
++
++	ret = mmc_register_bus();
++	if (ret)
++		goto destroy_workqueue;
++
++	ret = mmc_register_host_class();
++	if (ret)
++		goto unregister_bus;
++
++	ret = mmc_register_key_type();
++	if (ret)
++		goto unregister_host_class;
++
++	return 0;
++
++unregister_host_class:
++	mmc_unregister_host_class();
++unregister_bus:
++	mmc_unregister_bus();
++destroy_workqueue:
++	destroy_workqueue(workqueue);
++
++	return ret;
++}
++
++static void __exit mmc_exit(void)
++{
++	mmc_unregister_key_type();
++	mmc_unregister_host_class();
++	mmc_unregister_bus();
++	destroy_workqueue(workqueue);
++}
++
++module_init(mmc_init);
++module_exit(mmc_exit);
++
+ MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/core.h linux-2.6.22-try2/drivers/mmc/core/core.h
+--- linux-2.6.22-570/drivers/mmc/core/core.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/core.h	2007-12-19 15:29:23.000000000 -0500
+@@ -54,8 +54,6 @@
+ u32 mmc_select_voltage(struct mmc_host *host, u32 ocr);
+ void mmc_set_timing(struct mmc_host *host, unsigned int timing);
+ 
+-struct mmc_card *mmc_alloc_card(struct mmc_host *host);
+-
+ static inline void mmc_delay(unsigned int ms)
+ {
+ 	if (ms < 1000 / HZ) {
+@@ -66,5 +64,9 @@
+ 	}
+ }
+ 
++void mmc_rescan(struct work_struct *work);
++void mmc_start_host(struct mmc_host *host);
++void mmc_stop_host(struct mmc_host *host);
++
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.c linux-2.6.22-try2/drivers/mmc/core/host.c
+--- linux-2.6.22-570/drivers/mmc/core/host.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/host.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,156 @@
++/*
++ *  linux/drivers/mmc/core/host.c
++ *
++ *  Copyright (C) 2003 Russell King, All Rights Reserved.
++ *  Copyright (C) 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ *  MMC host class device management
++ */
++
++#include <linux/device.h>
++#include <linux/err.h>
++#include <linux/idr.h>
++#include <linux/pagemap.h>
++
++#include <linux/mmc/host.h>
++
++#include "core.h"
++#include "host.h"
++
++#define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
++
++static void mmc_host_classdev_release(struct device *dev)
++{
++	struct mmc_host *host = cls_dev_to_mmc_host(dev);
++	kfree(host);
++}
++
++static struct class mmc_host_class = {
++	.name		= "mmc_host",
++	.dev_release	= mmc_host_classdev_release,
++};
++
++int mmc_register_host_class(void)
++{
++	return class_register(&mmc_host_class);
++}
++
++void mmc_unregister_host_class(void)
++{
++	class_unregister(&mmc_host_class);
++}
++
++static DEFINE_IDR(mmc_host_idr);
++static DEFINE_SPINLOCK(mmc_host_lock);
++
++/**
++ *	mmc_alloc_host - initialise the per-host structure.
++ *	@extra: sizeof private data structure
++ *	@dev: pointer to host device model structure
++ *
++ *	Initialise the per-host structure.
++ */
++struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
++{
++	struct mmc_host *host;
++
++	host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
++	if (!host)
++		return NULL;
++
++	memset(host, 0, sizeof(struct mmc_host) + extra);
++
++	host->parent = dev;
++	host->class_dev.parent = dev;
++	host->class_dev.class = &mmc_host_class;
++	device_initialize(&host->class_dev);
++
++	spin_lock_init(&host->lock);
++	init_waitqueue_head(&host->wq);
++	INIT_DELAYED_WORK(&host->detect, mmc_rescan);
++
++	/*
++	 * By default, hosts do not support SGIO or large requests.
++	 * They have to set these according to their abilities.
++	 */
++	host->max_hw_segs = 1;
++	host->max_phys_segs = 1;
++	host->max_seg_size = PAGE_CACHE_SIZE;
++
++	host->max_req_size = PAGE_CACHE_SIZE;
++	host->max_blk_size = 512;
++	host->max_blk_count = PAGE_CACHE_SIZE / 512;
++
++	return host;
++}
++
++EXPORT_SYMBOL(mmc_alloc_host);
++
++/**
++ *	mmc_add_host - initialise host hardware
++ *	@host: mmc host
++ */
++int mmc_add_host(struct mmc_host *host)
++{
++	int err;
++
++	if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
++		return -ENOMEM;
++
++	spin_lock(&mmc_host_lock);
++	err = idr_get_new(&mmc_host_idr, host, &host->index);
++	spin_unlock(&mmc_host_lock);
++	if (err)
++		return err;
++
++	snprintf(host->class_dev.bus_id, BUS_ID_SIZE,
++		 "mmc%d", host->index);
++
++	err = device_add(&host->class_dev);
++	if (err)
++		return err;
++
++	mmc_start_host(host);
++
++	return 0;
++}
++
++EXPORT_SYMBOL(mmc_add_host);
++
++/**
++ *	mmc_remove_host - remove host hardware
++ *	@host: mmc host
++ *
++ *	Unregister and remove all cards associated with this host,
++ *	and power down the MMC bus.
++ */
++void mmc_remove_host(struct mmc_host *host)
++{
++	mmc_stop_host(host);
++
++	device_del(&host->class_dev);
++
++	spin_lock(&mmc_host_lock);
++	idr_remove(&mmc_host_idr, host->index);
++	spin_unlock(&mmc_host_lock);
++}
++
++EXPORT_SYMBOL(mmc_remove_host);
++
++/**
++ *	mmc_free_host - free the host structure
++ *	@host: mmc host
++ *
++ *	Free the host once all references to it have been dropped.
++ */
++void mmc_free_host(struct mmc_host *host)
++{
++	put_device(&host->class_dev);
++}
++
++EXPORT_SYMBOL(mmc_free_host);
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/host.h linux-2.6.22-try2/drivers/mmc/core/host.h
+--- linux-2.6.22-570/drivers/mmc/core/host.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/host.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,18 @@
++/*
++ *  linux/drivers/mmc/core/host.h
++ *
++ *  Copyright (C) 2003 Russell King, All Rights Reserved.
++ *  Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_HOST_H
++#define _MMC_CORE_HOST_H
++
++int mmc_register_host_class(void);
++void mmc_unregister_host_class(void);
++
++#endif
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.c linux-2.6.22-try2/drivers/mmc/core/lock.c
+--- linux-2.6.22-570/drivers/mmc/core/lock.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/lock.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,199 @@
++/*
++ *  linux/drivers/mmc/core/lock.h
++ *
++ *  Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved.
++ *  Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * MMC password key handling.
++ */
++
++#include <linux/device.h>
++#include <linux/key.h>
++#include <linux/err.h>
++
++#include <linux/mmc/card.h>
++#include <linux/mmc/host.h>
++#include <linux/mmc/mmc.h>
++
++#include "sysfs.h"
++#include "mmc_ops.h"
++#include "lock.h"
++
++#define MMC_KEYLEN_MAXBYTES 32
++
++#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
++
++static int mmc_key_instantiate(struct key *key, const void *data, size_t datalen)
++{
++	struct mmc_key_payload *mpayload;
++	int ret;
++
++	ret = -EINVAL;
++	if (datalen <= 0 || datalen > MMC_KEYLEN_MAXBYTES || !data) {
++		pr_debug("Invalid data\n");
++		goto error;
++	}
++
++	ret = key_payload_reserve(key, datalen);
++	if (ret < 0) {
++		pr_debug("ret = %d\n", ret);
++		goto error;
++	}
++
++	ret = -ENOMEM;
++	mpayload = kmalloc(sizeof(*mpayload) + datalen, GFP_KERNEL);
++	if (!mpayload) {
++		pr_debug("Unable to allocate mpayload structure\n");
++		goto error;
++	}
++	mpayload->datalen = datalen;
++	memcpy(mpayload->data, data, datalen);
++
++	rcu_assign_pointer(key->payload.data, mpayload);
++
++	/* ret = 0 if there is no error */
++	ret = 0;
++
++error:
++	return ret;
++}
++
++static int mmc_key_match(const struct key *key, const void *description)
++{
++	return strcmp(key->description, description) == 0;
++}
++
++/*
++ * dispose of the data dangling from the corpse of a mmc key
++ */
++static void mmc_key_destroy(struct key *key)
++{
++	struct mmc_key_payload *mpayload = key->payload.data;
++
++	kfree(mpayload);
++}
++
++static struct key_type mmc_key_type = {
++	.name		= "mmc",
++	.def_datalen	= MMC_KEYLEN_MAXBYTES,
++	.instantiate	= mmc_key_instantiate,
++	.match		= mmc_key_match,
++	.destroy	= mmc_key_destroy,
++};
++
++int mmc_register_key_type(void)
++{
++	return register_key_type(&mmc_key_type);
++}
++
++void mmc_unregister_key_type(void)
++{
++	unregister_key_type(&mmc_key_type);
++}
++
++static ssize_t
++mmc_lockable_show(struct device *dev, struct device_attribute *att, char *buf)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++
++	return sprintf(buf, "%slocked\n", mmc_card_locked(card) ? "" : "un");
++}
++
++/*
++ * implement MMC password functions: force erase, remove password, change
++ * password, unlock card and assign password.
++ */
++static ssize_t
++mmc_lockable_store(struct device *dev, struct device_attribute *att,
++	const char *data, size_t len)
++{
++	struct mmc_card *card = dev_to_mmc_card(dev);
++	int ret;
++	struct key *mmc_key;
++
++	if(!mmc_card_lockable(card))
++		return -EINVAL;
++
++	mmc_claim_host(card->host);
++
++	ret = -EINVAL;
++	if (mmc_card_locked(card) && !strncmp(data, "erase", 5)) {
++		/* forced erase only works while card is locked */
++		mmc_lock_unlock(card, NULL, MMC_LOCK_MODE_ERASE);
++		ret = len;
++	} else if (!mmc_card_locked(card) && !strncmp(data, "remove", 6)) {
++		/* remove password only works while card is unlocked */
++		mmc_key = request_key(&mmc_key_type, "mmc:key", "remove");
++
++		if (!IS_ERR(mmc_key)) {
++			ret =  mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_CLR_PWD);
++			if (!ret)
++				ret = len;
++		} else
++			dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++	} else if (!mmc_card_locked(card) && ((!strncmp(data, "assign", 6)) ||
++					      (!strncmp(data, "change", 6)))) {
++		/* assign or change */
++		if(!(strncmp(data, "assign", 6)))
++			mmc_key = request_key(&mmc_key_type, "mmc:key", "assign");
++		else
++			mmc_key = request_key(&mmc_key_type, "mmc:key", "change");
++
++		if (!IS_ERR(mmc_key)) {
++			ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_SET_PWD);
++			if (!ret)
++				ret = len;
++		} else
++			dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++	} else if (mmc_card_locked(card) && !strncmp(data, "unlock", 6)) {
++		/* unlock */
++		mmc_key = request_key(&mmc_key_type, "mmc:key", "unlock");
++		if (!IS_ERR(mmc_key)) {
++			ret = mmc_lock_unlock(card, mmc_key, MMC_LOCK_MODE_UNLOCK);
++			if (ret) {
++				dev_dbg(&card->dev, "Wrong password\n");
++				ret = -EINVAL;
++			}
++			else {
++				mmc_release_host(card->host);
++				device_release_driver(dev);
++				ret = device_attach(dev);
++				if(!ret)
++					return -EINVAL;
++				else
++					return len;
++			}
++		} else
++			dev_dbg(&card->dev, "request_key returned error %ld\n", PTR_ERR(mmc_key));
++	}
++
++	mmc_release_host(card->host);
++	return ret;
++}
++
++static struct device_attribute mmc_dev_attr_lockable[] = {
++	__ATTR(lockable, S_IWUSR | S_IRUGO,
++		mmc_lockable_show, mmc_lockable_store),
++	__ATTR_NULL,
++};
++
++int mmc_lock_add_sysfs(struct mmc_card *card)
++{
++	if (!mmc_card_lockable(card))
++		return 0;
++
++	return mmc_add_attrs(card, mmc_dev_attr_lockable);
++}
++
++void mmc_lock_remove_sysfs(struct mmc_card *card)
++{
++	if (!mmc_card_lockable(card))
++		return;
++
++	mmc_remove_attrs(card, mmc_dev_attr_lockable);
++}
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/lock.h linux-2.6.22-try2/drivers/mmc/core/lock.h
+--- linux-2.6.22-570/drivers/mmc/core/lock.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/mmc/core/lock.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,51 @@
++/*
++ *  linux/drivers/mmc/core/lock.h
++ *
++ *  Copyright 2006 Instituto Nokia de Tecnologia (INdT), All Rights Reserved.
++ *  Copyright 2007 Pierre Ossman
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#ifndef _MMC_CORE_LOCK_H
++#define _MMC_CORE_LOCK_H
++
++#ifdef CONFIG_MMC_PASSWORDS
++
++/* core-internal data */
++struct mmc_key_payload {
++	struct rcu_head	rcu;		/* RCU destructor */
++	unsigned short	datalen;	/* length of this data */
++	char		data[0];	/* actual data */
++};
++
++int mmc_register_key_type(void);
++void mmc_unregister_key_type(void);
++
++int mmc_lock_add_sysfs(struct mmc_card *card);
++void mmc_lock_remove_sysfs(struct mmc_card *card);
++
++#else
++
++static inline int mmc_register_key_type(void)
++{
++	return 0;
++}
++
++static inline void mmc_unregister_key_type(void)
++{
++}
++
++static inline int mmc_lock_add_sysfs(struct mmc_card *card)
++{
++	return 0;
++}
++
++static inline void mmc_lock_remove_sysfs(struct mmc_card *card)
++{
++}
++
++#endif
++
++#endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc.c linux-2.6.22-try2/drivers/mmc/core/mmc.c
+--- linux-2.6.22-570/drivers/mmc/core/mmc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/mmc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -18,6 +18,8 @@
+ 
+ #include "core.h"
+ #include "sysfs.h"
++#include "bus.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+ 
+ static const unsigned int tran_exp[] = {
+@@ -230,19 +232,74 @@
+ 	return err;
+ }
+ 
++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
++	card->raw_cid[2], card->raw_cid[3]);
++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
++	card->raw_csd[2], card->raw_csd[3]);
++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year);
++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev);
++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev);
++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid);
++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name);
++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid);
++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial);
++
++static struct device_attribute mmc_dev_attrs[] = {
++	MMC_ATTR_RO(cid),
++	MMC_ATTR_RO(csd),
++	MMC_ATTR_RO(date),
++	MMC_ATTR_RO(fwrev),
++	MMC_ATTR_RO(hwrev),
++	MMC_ATTR_RO(manfid),
++	MMC_ATTR_RO(name),
++	MMC_ATTR_RO(oemid),
++	MMC_ATTR_RO(serial),
++	__ATTR_NULL,
++};
++
++/*
++ * Adds sysfs entries as relevant.
++ */
++static int mmc_sysfs_add(struct mmc_card *card)
++{
++	int ret;
++
++	ret = mmc_add_attrs(card, mmc_dev_attrs);
++	if (ret < 0)
++		return ret;
++
++	ret = mmc_lock_add_sysfs(card);
++	if (ret < 0) {
++		mmc_remove_attrs(card, mmc_dev_attrs);
++		return ret;
++	}
++
++	return 0;
++}
++
++/*
++ * Removes the sysfs entries added by mmc_sysfs_add().
++ */
++static void mmc_sysfs_remove(struct mmc_card *card)
++{
++	mmc_lock_remove_sysfs(card);
++	mmc_remove_attrs(card, mmc_dev_attrs);
++}
++
+ /*
+  * Handle the detection and initialisation of a card.
+  *
+  * In the case of a resume, "curcard" will contain the card
+  * we're trying to reinitialise.
+  */
+-static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
++static int mmc_init_card(struct mmc_host *host, u32 ocr,
+ 	struct mmc_card *oldcard)
+ {
+ 	struct mmc_card *card;
+ 	int err;
+ 	u32 cid[4];
+ 	unsigned int max_dtr;
++	u32 status;
+ 
+ 	BUG_ON(!host);
+ 	BUG_ON(!host->claimed);
+@@ -294,6 +351,15 @@
+ 
+ 	mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
+ 
++	/*
++	 * Check if card is locked.
++	 */
++	err = mmc_send_status(card, &status);
++	if (err != MMC_ERR_NONE)
++		goto free_card;
++	if (status & R1_CARD_IS_LOCKED)
++		mmc_card_set_locked(card);
++
+ 	if (!oldcard) {
+ 		/*
+ 		 * Fetch CSD from card.
+@@ -389,6 +455,8 @@
+ 	BUG_ON(!host);
+ 	BUG_ON(!host->card);
+ 
++	mmc_sysfs_remove(host->card);
++
+ 	mmc_remove_card(host->card);
+ 	host->card = NULL;
+ }
+@@ -413,8 +481,7 @@
+ 	mmc_release_host(host);
+ 
+ 	if (err != MMC_ERR_NONE) {
+-		mmc_remove_card(host->card);
+-		host->card = NULL;
++		mmc_remove(host);
+ 
+ 		mmc_claim_host(host);
+ 		mmc_detach_bus(host);
+@@ -434,7 +501,7 @@
+ 
+ 	mmc_claim_host(host);
+ 	mmc_deselect_cards(host);
+-	host->card->state &= ~MMC_STATE_HIGHSPEED;
++	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED);
+ 	mmc_release_host(host);
+ }
+ 
+@@ -453,11 +520,9 @@
+ 
+ 	mmc_claim_host(host);
+ 
+-	err = mmc_sd_init_card(host, host->ocr, host->card);
++	err = mmc_init_card(host, host->ocr, host->card);
+ 	if (err != MMC_ERR_NONE) {
+-		mmc_remove_card(host->card);
+-		host->card = NULL;
+-
++		mmc_remove(host);
+ 		mmc_detach_bus(host);
+ 	}
+ 
+@@ -512,13 +577,17 @@
+ 	/*
+ 	 * Detect and init the card.
+ 	 */
+-	err = mmc_sd_init_card(host, host->ocr, NULL);
++	err = mmc_init_card(host, host->ocr, NULL);
+ 	if (err != MMC_ERR_NONE)
+ 		goto err;
+ 
+ 	mmc_release_host(host);
+ 
+-	err = mmc_register_card(host->card);
++	err = mmc_add_card(host->card);
++	if (err)
++		goto reclaim_host;
++
++	err = mmc_sysfs_add(host->card);
+ 	if (err)
+ 		goto reclaim_host;
+ 
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.c linux-2.6.22-try2/drivers/mmc/core/mmc_ops.c
+--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/mmc_ops.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2,6 +2,8 @@
+  *  linux/drivers/mmc/mmc_ops.h
+  *
+  *  Copyright 2006-2007 Pierre Ossman
++ *  MMC password protection (C) 2006 Instituto Nokia de Tecnologia (INdT),
++ *     All Rights Reserved.
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -12,12 +14,14 @@
+ #include <linux/types.h>
+ #include <asm/scatterlist.h>
+ #include <linux/scatterlist.h>
++#include <linux/key.h>
+ 
+ #include <linux/mmc/host.h>
+ #include <linux/mmc/card.h>
+ #include <linux/mmc/mmc.h>
+ 
+ #include "core.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+ 
+ static int _mmc_select_card(struct mmc_host *host, struct mmc_card *card)
+@@ -274,3 +278,114 @@
+ 	return MMC_ERR_NONE;
+ }
+ 
++#ifdef CONFIG_MMC_PASSWORDS
++
++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode)
++{
++	struct mmc_request mrq;
++	struct mmc_command cmd;
++	struct mmc_data data;
++	struct scatterlist sg;
++	struct mmc_key_payload *mpayload;
++	unsigned long erase_timeout;
++	int err, data_size;
++	u8 *data_buf;
++
++	mpayload = NULL;
++	data_size = 1;
++	if (!(mode & MMC_LOCK_MODE_ERASE)) {
++		mpayload = rcu_dereference(key->payload.data);
++		data_size = 2 + mpayload->datalen;
++	}
++
++	data_buf = kmalloc(data_size, GFP_KERNEL);
++	if (!data_buf)
++		return -ENOMEM;
++	memset(data_buf, 0, data_size);
++
++	data_buf[0] |= mode;
++	if (mode & MMC_LOCK_MODE_UNLOCK)
++		data_buf[0] &= ~MMC_LOCK_MODE_UNLOCK;
++
++	if (!(mode & MMC_LOCK_MODE_ERASE)) {
++		data_buf[1] = mpayload->datalen;
++		memcpy(data_buf + 2, mpayload->data, mpayload->datalen);
++	}
++
++	memset(&cmd, 0, sizeof(struct mmc_command));
++
++	cmd.opcode = MMC_SET_BLOCKLEN;
++	cmd.arg = data_size;
++	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
++	err = mmc_wait_for_cmd(card->host, &cmd, MMC_CMD_RETRIES);
++	if (err != MMC_ERR_NONE)
++		goto out;
++
++	memset(&cmd, 0, sizeof(struct mmc_command));
++
++	cmd.opcode = MMC_LOCK_UNLOCK;
++	cmd.arg = 0;
++	cmd.flags = MMC_RSP_R1B | MMC_CMD_ADTC;
++
++	memset(&data, 0, sizeof(struct mmc_data));
++
++	mmc_set_data_timeout(&data, card, 1);
++
++	data.blksz = data_size;
++	data.blocks = 1;
++	data.flags = MMC_DATA_WRITE;
++	data.sg = &sg;
++	data.sg_len = 1;
++
++	memset(&mrq, 0, sizeof(struct mmc_request));
++
++	mrq.cmd = &cmd;
++	mrq.data = &data;
++
++	sg_init_one(&sg, data_buf, data_size);
++	err = mmc_wait_for_req(card->host, &mrq);
++	if (err != MMC_ERR_NONE)
++		goto out;
++
++	memset(&cmd, 0, sizeof(struct mmc_command));
++
++	cmd.opcode = MMC_SEND_STATUS;
++	cmd.arg = card->rca << 16;
++	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
++
++	/* set timeout for forced erase operation to 3 min. (see MMC spec) */
++	erase_timeout = jiffies + 180 * HZ;
++	do {
++		/* we cannot use "retries" here because the
++		 * R1_LOCK_UNLOCK_FAILED bit is cleared by subsequent reads to
++		 * the status register, hiding the error condition */
++		err = mmc_wait_for_cmd(card->host, &cmd, 0);
++		if (err != MMC_ERR_NONE)
++			break;
++		/* the other modes don't need timeout checking */
++		if (!(mode & MMC_LOCK_MODE_ERASE))
++			continue;
++		if (time_after(jiffies, erase_timeout)) {
++			dev_dbg(&card->dev, "forced erase timed out\n");
++			err = MMC_ERR_TIMEOUT;
++			break;
++		}
++	} while (!(cmd.resp[0] & R1_READY_FOR_DATA));
++	if (cmd.resp[0] & R1_LOCK_UNLOCK_FAILED) {
++		dev_dbg(&card->dev, "LOCK_UNLOCK operation failed\n");
++		err = MMC_ERR_FAILED;
++	}
++
++	if (cmd.resp[0] & R1_CARD_IS_LOCKED)
++		mmc_card_set_locked(card);
++	else
++		card->state &= ~MMC_STATE_LOCKED;
++
++out:
++	kfree(data_buf);
++
++	return err;
++}
++
++#endif /* CONFIG_MMC_PASSWORDS */
++
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/mmc_ops.h linux-2.6.22-try2/drivers/mmc/core/mmc_ops.h
+--- linux-2.6.22-570/drivers/mmc/core/mmc_ops.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/mmc_ops.h	2007-12-19 15:29:23.000000000 -0500
+@@ -12,6 +12,8 @@
+ #ifndef _MMC_MMC_OPS_H
+ #define _MMC_MMC_OPS_H
+ 
++struct key;
++
+ int mmc_select_card(struct mmc_card *card);
+ int mmc_deselect_cards(struct mmc_host *host);
+ int mmc_go_idle(struct mmc_host *host);
+@@ -22,6 +24,7 @@
+ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
+ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value);
+ int mmc_send_status(struct mmc_card *card, u32 *status);
++int mmc_lock_unlock(struct mmc_card *card, struct key *key, int mode);
+ 
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sd.c linux-2.6.22-try2/drivers/mmc/core/sd.c
+--- linux-2.6.22-570/drivers/mmc/core/sd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/sd.c	2007-12-19 15:29:23.000000000 -0500
+@@ -19,11 +19,11 @@
+ 
+ #include "core.h"
+ #include "sysfs.h"
++#include "bus.h"
++#include "lock.h"
+ #include "mmc_ops.h"
+ #include "sd_ops.h"
+ 
+-#include "core.h"
+-
+ static const unsigned int tran_exp[] = {
+ 	10000,		100000,		1000000,	10000000,
+ 	0,		0,		0,		0
+@@ -280,6 +280,62 @@
+ 	return err;
+ }
+ 
++MMC_ATTR_FN(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
++	card->raw_cid[2], card->raw_cid[3]);
++MMC_ATTR_FN(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
++	card->raw_csd[2], card->raw_csd[3]);
++MMC_ATTR_FN(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]);
++MMC_ATTR_FN(date, "%02d/%04d\n", card->cid.month, card->cid.year);
++MMC_ATTR_FN(fwrev, "0x%x\n", card->cid.fwrev);
++MMC_ATTR_FN(hwrev, "0x%x\n", card->cid.hwrev);
++MMC_ATTR_FN(manfid, "0x%06x\n", card->cid.manfid);
++MMC_ATTR_FN(name, "%s\n", card->cid.prod_name);
++MMC_ATTR_FN(oemid, "0x%04x\n", card->cid.oemid);
++MMC_ATTR_FN(serial, "0x%08x\n", card->cid.serial);
++
++static struct device_attribute mmc_sd_dev_attrs[] = {
++	MMC_ATTR_RO(cid),
++	MMC_ATTR_RO(csd),
++	MMC_ATTR_RO(scr),
++	MMC_ATTR_RO(date),
++	MMC_ATTR_RO(fwrev),
++	MMC_ATTR_RO(hwrev),
++	MMC_ATTR_RO(manfid),
++	MMC_ATTR_RO(name),
++	MMC_ATTR_RO(oemid),
++	MMC_ATTR_RO(serial),
++	__ATTR_NULL,
++};
++
++/*
++ * Adds sysfs entries as relevant.
++ */
++static int mmc_sd_sysfs_add(struct mmc_card *card)
++{
++	int ret;
++
++	ret = mmc_add_attrs(card, mmc_sd_dev_attrs);
++	if (ret < 0)
++		return ret;
++
++	ret = mmc_lock_add_sysfs(card);
++	if (ret < 0) {
++		mmc_remove_attrs(card, mmc_sd_dev_attrs);
++		return ret;
++	}
++
++	return 0;
++}
++
++/*
++ * Removes the sysfs entries added by mmc_sysfs_add().
++ */
++static void mmc_sd_sysfs_remove(struct mmc_card *card)
++{
++	mmc_lock_remove_sysfs(card);
++	mmc_remove_attrs(card, mmc_sd_dev_attrs);
++}
++
+ /*
+  * Handle the detection and initialisation of a card.
+  *
+@@ -293,6 +349,7 @@
+ 	int err;
+ 	u32 cid[4];
+ 	unsigned int max_dtr;
++	u32 status;
+ 
+ 	BUG_ON(!host);
+ 	BUG_ON(!host->claimed);
+@@ -352,6 +409,15 @@
+ 
+ 	mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
+ 
++	/*
++	 * Check if card is locked.
++	 */
++	err = mmc_send_status(card, &status);
++	if (err != MMC_ERR_NONE)
++		goto free_card;
++	if (status & R1_CARD_IS_LOCKED)
++		mmc_card_set_locked(card);
++
+ 	if (!oldcard) {
+ 		/*
+ 		 * Fetch CSD from card.
+@@ -463,6 +529,8 @@
+ 	BUG_ON(!host);
+ 	BUG_ON(!host->card);
+ 
++	mmc_sd_sysfs_remove(host->card);
++
+ 	mmc_remove_card(host->card);
+ 	host->card = NULL;
+ }
+@@ -487,8 +555,7 @@
+ 	mmc_release_host(host);
+ 
+ 	if (err != MMC_ERR_NONE) {
+-		mmc_remove_card(host->card);
+-		host->card = NULL;
++		mmc_sd_remove(host);
+ 
+ 		mmc_claim_host(host);
+ 		mmc_detach_bus(host);
+@@ -508,7 +575,7 @@
+ 
+ 	mmc_claim_host(host);
+ 	mmc_deselect_cards(host);
+-	host->card->state &= ~MMC_STATE_HIGHSPEED;
++	host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_LOCKED);
+ 	mmc_release_host(host);
+ }
+ 
+@@ -529,9 +596,7 @@
+ 
+ 	err = mmc_sd_init_card(host, host->ocr, host->card);
+ 	if (err != MMC_ERR_NONE) {
+-		mmc_remove_card(host->card);
+-		host->card = NULL;
+-
++		mmc_sd_remove(host);
+ 		mmc_detach_bus(host);
+ 	}
+ 
+@@ -599,7 +664,11 @@
+ 
+ 	mmc_release_host(host);
+ 
+-	err = mmc_register_card(host->card);
++	err = mmc_add_card(host->card);
++	if (err)
++		goto reclaim_host;
++
++	err = mmc_sd_sysfs_add(host->card);
+ 	if (err)
+ 		goto reclaim_host;
+ 
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.c linux-2.6.22-try2/drivers/mmc/core/sysfs.c
+--- linux-2.6.22-570/drivers/mmc/core/sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/sysfs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2,6 +2,7 @@
+  *  linux/drivers/mmc/core/sysfs.c
+  *
+  *  Copyright (C) 2003 Russell King, All Rights Reserved.
++ *  Copyright 2007 Pierre Ossman
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License version 2 as
+@@ -9,352 +10,34 @@
+  *
+  *  MMC sysfs/driver model support.
+  */
+-#include <linux/module.h>
+-#include <linux/init.h>
+ #include <linux/device.h>
+-#include <linux/idr.h>
+-#include <linux/workqueue.h>
+ 
+ #include <linux/mmc/card.h>
+-#include <linux/mmc/host.h>
+ 
+ #include "sysfs.h"
+ 
+-#define dev_to_mmc_card(d)	container_of(d, struct mmc_card, dev)
+-#define to_mmc_driver(d)	container_of(d, struct mmc_driver, drv)
+-#define cls_dev_to_mmc_host(d)	container_of(d, struct mmc_host, class_dev)
+-
+-#define MMC_ATTR(name, fmt, args...)					\
+-static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf)	\
+-{									\
+-	struct mmc_card *card = dev_to_mmc_card(dev);			\
+-	return sprintf(buf, fmt, args);					\
+-}
+-
+-MMC_ATTR(cid, "%08x%08x%08x%08x\n", card->raw_cid[0], card->raw_cid[1],
+-	card->raw_cid[2], card->raw_cid[3]);
+-MMC_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
+-	card->raw_csd[2], card->raw_csd[3]);
+-MMC_ATTR(scr, "%08x%08x\n", card->raw_scr[0], card->raw_scr[1]);
+-MMC_ATTR(date, "%02d/%04d\n", card->cid.month, card->cid.year);
+-MMC_ATTR(fwrev, "0x%x\n", card->cid.fwrev);
+-MMC_ATTR(hwrev, "0x%x\n", card->cid.hwrev);
+-MMC_ATTR(manfid, "0x%06x\n", card->cid.manfid);
+-MMC_ATTR(name, "%s\n", card->cid.prod_name);
+-MMC_ATTR(oemid, "0x%04x\n", card->cid.oemid);
+-MMC_ATTR(serial, "0x%08x\n", card->cid.serial);
+-
+-#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
+-
+-static struct device_attribute mmc_dev_attrs[] = {
+-	MMC_ATTR_RO(cid),
+-	MMC_ATTR_RO(csd),
+-	MMC_ATTR_RO(date),
+-	MMC_ATTR_RO(fwrev),
+-	MMC_ATTR_RO(hwrev),
+-	MMC_ATTR_RO(manfid),
+-	MMC_ATTR_RO(name),
+-	MMC_ATTR_RO(oemid),
+-	MMC_ATTR_RO(serial),
+-	__ATTR_NULL
+-};
+-
+-static struct device_attribute mmc_dev_attr_scr = MMC_ATTR_RO(scr);
+-
+-
+-static void mmc_release_card(struct device *dev)
+-{
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-
+-	kfree(card);
+-}
+-
+-/*
+- * This currently matches any MMC driver to any MMC card - drivers
+- * themselves make the decision whether to drive this card in their
+- * probe method.
+- */
+-static int mmc_bus_match(struct device *dev, struct device_driver *drv)
+-{
+-	return 1;
+-}
+-
+-static int
+-mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf,
+-		int buf_size)
+-{
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-	char ccc[13];
+-	int retval = 0, i = 0, length = 0;
+-
+-#define add_env(fmt,val) do {					\
+-	retval = add_uevent_var(envp, num_envp, &i,		\
+-				buf, buf_size, &length,		\
+-				fmt, val);			\
+-	if (retval)						\
+-		return retval;					\
+-} while (0);
+-
+-	for (i = 0; i < 12; i++)
+-		ccc[i] = card->csd.cmdclass & (1 << i) ? '1' : '0';
+-	ccc[12] = '\0';
+-
+-	add_env("MMC_CCC=%s", ccc);
+-	add_env("MMC_MANFID=%06x", card->cid.manfid);
+-	add_env("MMC_NAME=%s", mmc_card_name(card));
+-	add_env("MMC_OEMID=%04x", card->cid.oemid);
+-#undef add_env
+-	envp[i] = NULL;
+-
+-	return 0;
+-}
+-
+-static int mmc_bus_suspend(struct device *dev, pm_message_t state)
++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs)
+ {
+-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-	int ret = 0;
+-
+-	if (dev->driver && drv->suspend)
+-		ret = drv->suspend(card, state);
+-	return ret;
+-}
++	int error = 0;
++	int i;
+ 
+-static int mmc_bus_resume(struct device *dev)
+-{
+-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-	int ret = 0;
+-
+-	if (dev->driver && drv->resume)
+-		ret = drv->resume(card);
+-	return ret;
+-}
+-
+-static int mmc_bus_probe(struct device *dev)
+-{
+-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-
+-	return drv->probe(card);
+-}
+-
+-static int mmc_bus_remove(struct device *dev)
+-{
+-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
+-	struct mmc_card *card = dev_to_mmc_card(dev);
+-
+-	drv->remove(card);
+-
+-	return 0;
+-}
+-
+-static struct bus_type mmc_bus_type = {
+-	.name		= "mmc",
+-	.dev_attrs	= mmc_dev_attrs,
+-	.match		= mmc_bus_match,
+-	.uevent		= mmc_bus_uevent,
+-	.probe		= mmc_bus_probe,
+-	.remove		= mmc_bus_remove,
+-	.suspend	= mmc_bus_suspend,
+-	.resume		= mmc_bus_resume,
+-};
+-
+-/**
+- *	mmc_register_driver - register a media driver
+- *	@drv: MMC media driver
+- */
+-int mmc_register_driver(struct mmc_driver *drv)
+-{
+-	drv->drv.bus = &mmc_bus_type;
+-	return driver_register(&drv->drv);
+-}
+-
+-EXPORT_SYMBOL(mmc_register_driver);
+-
+-/**
+- *	mmc_unregister_driver - unregister a media driver
+- *	@drv: MMC media driver
+- */
+-void mmc_unregister_driver(struct mmc_driver *drv)
+-{
+-	drv->drv.bus = &mmc_bus_type;
+-	driver_unregister(&drv->drv);
+-}
+-
+-EXPORT_SYMBOL(mmc_unregister_driver);
+-
+-
+-/*
+- * Internal function.  Initialise a MMC card structure.
+- */
+-void mmc_init_card(struct mmc_card *card, struct mmc_host *host)
+-{
+-	memset(card, 0, sizeof(struct mmc_card));
+-	card->host = host;
+-	device_initialize(&card->dev);
+-	card->dev.parent = mmc_classdev(host);
+-	card->dev.bus = &mmc_bus_type;
+-	card->dev.release = mmc_release_card;
+-}
+-
+-/*
+- * Internal function.  Register a new MMC card with the driver model.
+- */
+-int mmc_register_card(struct mmc_card *card)
+-{
+-	int ret;
+-
+-	snprintf(card->dev.bus_id, sizeof(card->dev.bus_id),
+-		 "%s:%04x", mmc_hostname(card->host), card->rca);
+-
+-	ret = device_add(&card->dev);
+-	if (ret == 0) {
+-		if (mmc_card_sd(card)) {
+-			ret = device_create_file(&card->dev, &mmc_dev_attr_scr);
+-			if (ret)
+-				device_del(&card->dev);
+-		}
++	for (i = 0; attr_name(attrs[i]); i++) {
++		error = device_create_file(&card->dev, &attrs[i]);
++		if (error) {
++			while (--i >= 0)
++				device_remove_file(&card->dev, &attrs[i]);
++			break;
+ 	}
+-	if (ret == 0)
+-		mmc_card_set_present(card);
+-	return ret;
+-}
+-
+-/*
+- * Internal function.  Unregister a new MMC card with the
+- * driver model, and (eventually) free it.
+- */
+-void mmc_remove_card(struct mmc_card *card)
+-{
+-	if (mmc_card_present(card)) {
+-		if (mmc_card_sd(card))
+-			device_remove_file(&card->dev, &mmc_dev_attr_scr);
+-
+-		device_del(&card->dev);
+ 	}
+ 
+-	put_device(&card->dev);
+-}
+-
+-
+-static void mmc_host_classdev_release(struct device *dev)
+-{
+-	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+-	kfree(host);
+-}
+-
+-static struct class mmc_host_class = {
+-	.name		= "mmc_host",
+-	.dev_release	= mmc_host_classdev_release,
+-};
+-
+-static DEFINE_IDR(mmc_host_idr);
+-static DEFINE_SPINLOCK(mmc_host_lock);
+-
+-/*
+- * Internal function. Allocate a new MMC host.
+- */
+-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev)
+-{
+-	struct mmc_host *host;
+-
+-	host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL);
+-	if (host) {
+-		memset(host, 0, sizeof(struct mmc_host) + extra);
+-
+-		host->parent = dev;
+-		host->class_dev.parent = dev;
+-		host->class_dev.class = &mmc_host_class;
+-		device_initialize(&host->class_dev);
+-	}
+-
+-	return host;
+-}
+-
+-/*
+- * Internal function. Register a new MMC host with the MMC class.
+- */
+-int mmc_add_host_sysfs(struct mmc_host *host)
+-{
+-	int err;
+-
+-	if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL))
+-		return -ENOMEM;
+-
+-	spin_lock(&mmc_host_lock);
+-	err = idr_get_new(&mmc_host_idr, host, &host->index);
+-	spin_unlock(&mmc_host_lock);
+-	if (err)
+-		return err;
+-
+-	snprintf(host->class_dev.bus_id, BUS_ID_SIZE,
+-		 "mmc%d", host->index);
+-
+-	return device_add(&host->class_dev);
+-}
+-
+-/*
+- * Internal function. Unregister a MMC host with the MMC class.
+- */
+-void mmc_remove_host_sysfs(struct mmc_host *host)
+-{
+-	device_del(&host->class_dev);
+-
+-	spin_lock(&mmc_host_lock);
+-	idr_remove(&mmc_host_idr, host->index);
+-	spin_unlock(&mmc_host_lock);
++	return error;
+ }
+ 
+-/*
+- * Internal function. Free a MMC host.
+- */
+-void mmc_free_host_sysfs(struct mmc_host *host)
++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs)
+ {
+-	put_device(&host->class_dev);
+-}
++	int i;
+ 
+-static struct workqueue_struct *workqueue;
+-
+-/*
+- * Internal function. Schedule delayed work in the MMC work queue.
+- */
+-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay)
+-{
+-	return queue_delayed_work(workqueue, work, delay);
+-}
+-
+-/*
+- * Internal function. Flush all scheduled work from the MMC work queue.
+- */
+-void mmc_flush_scheduled_work(void)
+-{
+-	flush_workqueue(workqueue);
+-}
+-
+-static int __init mmc_init(void)
+-{
+-	int ret;
+-
+-	workqueue = create_singlethread_workqueue("kmmcd");
+-	if (!workqueue)
+-		return -ENOMEM;
+-
+-	ret = bus_register(&mmc_bus_type);
+-	if (ret == 0) {
+-		ret = class_register(&mmc_host_class);
+-		if (ret)
+-			bus_unregister(&mmc_bus_type);
+-	}
+-	return ret;
+-}
+-
+-static void __exit mmc_exit(void)
+-{
+-	class_unregister(&mmc_host_class);
+-	bus_unregister(&mmc_bus_type);
+-	destroy_workqueue(workqueue);
++	for (i = 0; attr_name(attrs[i]); i++)
++		device_remove_file(&card->dev, &attrs[i]);
+ }
+ 
+-module_init(mmc_init);
+-module_exit(mmc_exit);
+diff -Nurb linux-2.6.22-570/drivers/mmc/core/sysfs.h linux-2.6.22-try2/drivers/mmc/core/sysfs.h
+--- linux-2.6.22-570/drivers/mmc/core/sysfs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/core/sysfs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -11,17 +11,16 @@
+ #ifndef _MMC_CORE_SYSFS_H
+ #define _MMC_CORE_SYSFS_H
+ 
+-void mmc_init_card(struct mmc_card *card, struct mmc_host *host);
+-int mmc_register_card(struct mmc_card *card);
+-void mmc_remove_card(struct mmc_card *card);
++#define MMC_ATTR_FN(name, fmt, args...)					\
++static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf)	\
++{									\
++	struct mmc_card *card = container_of(dev, struct mmc_card, dev);\
++	return sprintf(buf, fmt, args);					\
++}
+ 
+-struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev);
+-int mmc_add_host_sysfs(struct mmc_host *host);
+-void mmc_remove_host_sysfs(struct mmc_host *host);
+-void mmc_free_host_sysfs(struct mmc_host *host);
++#define MMC_ATTR_RO(name) __ATTR(name, S_IRUGO, mmc_##name##_show, NULL)
+ 
+-int mmc_schedule_work(struct work_struct *work);
+-int mmc_schedule_delayed_work(struct delayed_work *work, unsigned long delay);
+-void mmc_flush_scheduled_work(void);
++int mmc_add_attrs(struct mmc_card *card, struct device_attribute *attrs);
++void mmc_remove_attrs(struct mmc_card *card, struct device_attribute *attrs);
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/mmc/host/sdhci.c linux-2.6.22-try2/drivers/mmc/host/sdhci.c
+--- linux-2.6.22-570/drivers/mmc/host/sdhci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mmc/host/sdhci.c	2007-12-19 15:29:23.000000000 -0500
+@@ -70,6 +70,14 @@
+ 		.driver_data	= SDHCI_QUIRK_SINGLE_POWER_WRITE,
+ 	},
+ 
++	{
++		.vendor		= PCI_VENDOR_ID_ENE,
++		.device		= PCI_DEVICE_ID_ENE_CB712_SD_2,
++		.subvendor	= PCI_ANY_ID,
++		.subdevice	= PCI_ANY_ID,
++		.driver_data	= SDHCI_QUIRK_SINGLE_POWER_WRITE,
++	},
++
+ 	{	/* Generic SD host controller */
+ 		PCI_DEVICE_CLASS((PCI_CLASS_SYSTEM_SDHCI << 8), 0xFFFF00)
+ 	},
+diff -Nurb linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c linux-2.6.22-try2/drivers/mtd/mtd_blkdevs.c
+--- linux-2.6.22-570/drivers/mtd/mtd_blkdevs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mtd/mtd_blkdevs.c	2007-12-19 15:29:24.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/mtd/mtd.h>
+ #include <linux/blkdev.h>
+ #include <linux/blkpg.h>
++#include <linux/freezer.h>
+ #include <linux/spinlock.h>
+ #include <linux/hdreg.h>
+ #include <linux/init.h>
+@@ -80,7 +81,7 @@
+ 	struct request_queue *rq = tr->blkcore_priv->rq;
+ 
+ 	/* we might get involved when memory gets low, so use PF_MEMALLOC */
+-	current->flags |= PF_MEMALLOC | PF_NOFREEZE;
++	current->flags |= PF_MEMALLOC;
+ 
+ 	spin_lock_irq(rq->queue_lock);
+ 	while (!kthread_should_stop()) {
+diff -Nurb linux-2.6.22-570/drivers/mtd/ubi/wl.c linux-2.6.22-try2/drivers/mtd/ubi/wl.c
+--- linux-2.6.22-570/drivers/mtd/ubi/wl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/mtd/ubi/wl.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1346,6 +1346,7 @@
+ 	ubi_msg("background thread \"%s\" started, PID %d",
+ 		ubi->bgt_name, current->pid);
+ 
++	set_freezable();
+ 	for (;;) {
+ 		int err;
+ 
+diff -Nurb linux-2.6.22-570/drivers/net/3c523.c linux-2.6.22-try2/drivers/net/3c523.c
+--- linux-2.6.22-570/drivers/net/3c523.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/3c523.c	2007-12-19 15:29:23.000000000 -0500
+@@ -990,7 +990,7 @@
+ 				if (skb != NULL) {
+ 					skb_reserve(skb, 2);	/* 16 byte alignment */
+ 					skb_put(skb,totlen);
+-					eth_copy_and_sum(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen,0);
++					skb_copy_to_linear_data(skb, (char *) p->base+(unsigned long) rbd->buffer,totlen);
+ 					skb->protocol = eth_type_trans(skb, dev);
+ 					netif_rx(skb);
+ 					dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/7990.c linux-2.6.22-try2/drivers/net/7990.c
+--- linux-2.6.22-570/drivers/net/7990.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/7990.c	2007-12-19 15:29:23.000000000 -0500
+@@ -333,9 +333,9 @@
+ 
+                         skb_reserve (skb, 2);           /* 16 byte align */
+                         skb_put (skb, len);             /* make room */
+-                        eth_copy_and_sum(skb,
++                        skb_copy_to_linear_data(skb,
+                                          (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
+-                                         len, 0);
++                                         len);
+                         skb->protocol = eth_type_trans (skb, dev);
+ 			netif_rx (skb);
+ 			dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/8139too.c linux-2.6.22-try2/drivers/net/8139too.c
+--- linux-2.6.22-570/drivers/net/8139too.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/8139too.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2017,7 +2017,7 @@
+ #if RX_BUF_IDX == 3
+ 			wrap_copy(skb, rx_ring, ring_offset+4, pkt_size);
+ #else
+-			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
++			skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
+ #endif
+ 			skb_put (skb, pkt_size);
+ 
+diff -Nurb linux-2.6.22-570/drivers/net/Kconfig linux-2.6.22-try2/drivers/net/Kconfig
+--- linux-2.6.22-570/drivers/net/Kconfig	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -2555,6 +2555,18 @@
+ 
+ source "drivers/s390/net/Kconfig"
+ 
++config XEN_NETDEV_FRONTEND
++	tristate "Xen network device frontend driver"
++	depends on XEN
++	default y
++	help
++	  The network device frontend driver allows the kernel to
++	  access network devices exported exported by a virtual
++	  machine containing a physical network device driver. The
++	  frontend driver is intended for unprivileged guest domains;
++	  if you are compiling a kernel for a Xen guest, you almost
++	  certainly want to enable this.
++
+ config ISERIES_VETH
+ 	tristate "iSeries Virtual Ethernet driver support"
+ 	depends on PPC_ISERIES
+diff -Nurb linux-2.6.22-570/drivers/net/Makefile linux-2.6.22-try2/drivers/net/Makefile
+--- linux-2.6.22-570/drivers/net/Makefile	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -224,7 +224,10 @@
+ obj-$(CONFIG_ENP2611_MSF_NET) += ixp2000/
+ 
+ obj-$(CONFIG_NETCONSOLE) += netconsole.o
++obj-$(CONFIG_KGDBOE) += kgdboe.o
+ 
+ obj-$(CONFIG_FS_ENET) += fs_enet/
+ 
+ obj-$(CONFIG_NETXEN_NIC) += netxen/
++obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
++
+diff -Nurb linux-2.6.22-570/drivers/net/a2065.c linux-2.6.22-try2/drivers/net/a2065.c
+--- linux-2.6.22-570/drivers/net/a2065.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/a2065.c	2007-12-19 15:29:23.000000000 -0500
+@@ -322,9 +322,9 @@
+ 
+ 			skb_reserve (skb, 2);		/* 16 byte align */
+ 			skb_put (skb, len);		/* make room */
+-			eth_copy_and_sum(skb,
++			skb_copy_to_linear_data(skb,
+ 					 (unsigned char *)&(ib->rx_buf [lp->rx_new][0]),
+-					 len, 0);
++					 len);
+ 			skb->protocol = eth_type_trans (skb, dev);
+ 			netif_rx (skb);
+ 			dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/ariadne.c linux-2.6.22-try2/drivers/net/ariadne.c
+--- linux-2.6.22-570/drivers/net/ariadne.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ariadne.c	2007-12-19 15:29:23.000000000 -0500
+@@ -746,7 +746,7 @@
+ 
+ 	    skb_reserve(skb,2);		/* 16 byte align */
+ 	    skb_put(skb,pkt_len);	/* Make room */
+-	    eth_copy_and_sum(skb, (char *)priv->rx_buff[entry], pkt_len,0);
++	    skb_copy_to_linear_data(skb, (char *)priv->rx_buff[entry], pkt_len);
+ 	    skb->protocol=eth_type_trans(skb,dev);
+ #if 0
+ 	    printk(KERN_DEBUG "RX pkt type 0x%04x from ",
+diff -Nurb linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c linux-2.6.22-try2/drivers/net/arm/ep93xx_eth.c
+--- linux-2.6.22-570/drivers/net/arm/ep93xx_eth.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/arm/ep93xx_eth.c	2007-12-19 15:29:23.000000000 -0500
+@@ -258,7 +258,7 @@
+ 			skb_reserve(skb, 2);
+ 			dma_sync_single(NULL, ep->descs->rdesc[entry].buf_addr,
+ 						length, DMA_FROM_DEVICE);
+-			eth_copy_and_sum(skb, ep->rx_buf[entry], length, 0);
++			skb_copy_to_linear_data(skb, ep->rx_buf[entry], length);
+ 			skb_put(skb, length);
+ 			skb->protocol = eth_type_trans(skb, dev);
+ 
+diff -Nurb linux-2.6.22-570/drivers/net/au1000_eth.c linux-2.6.22-try2/drivers/net/au1000_eth.c
+--- linux-2.6.22-570/drivers/net/au1000_eth.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/au1000_eth.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1205,8 +1205,8 @@
+ 				continue;
+ 			}
+ 			skb_reserve(skb, 2);	/* 16 byte IP header align */
+-			eth_copy_and_sum(skb,
+-				(unsigned char *)pDB->vaddr, frmlen, 0);
++			skb_copy_to_linear_data(skb,
++				(unsigned char *)pDB->vaddr, frmlen);
+ 			skb_put(skb, frmlen);
+ 			skb->protocol = eth_type_trans(skb, dev);
+ 			netif_rx(skb);	/* pass the packet to upper layers */
+diff -Nurb linux-2.6.22-570/drivers/net/bnx2.c linux-2.6.22-try2/drivers/net/bnx2.c
+--- linux-2.6.22-570/drivers/net/bnx2.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/bnx2.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6490,10 +6490,10 @@
+ 	memcpy(dev->perm_addr, bp->mac_addr, 6);
+ 	bp->name = board_info[ent->driver_data].name;
+ 
+-	if (CHIP_NUM(bp) == CHIP_NUM_5709)
+-		dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
+-	else
+ 		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
++	if (CHIP_NUM(bp) == CHIP_NUM_5709)
++		dev->features |= NETIF_F_IPV6_CSUM;
++
+ #ifdef BCM_VLAN
+ 	dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/net/dl2k.c linux-2.6.22-try2/drivers/net/dl2k.c
+--- linux-2.6.22-570/drivers/net/dl2k.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/dl2k.c	2007-12-19 15:29:23.000000000 -0500
+@@ -866,9 +866,9 @@
+ 							    PCI_DMA_FROMDEVICE);
+ 				/* 16 byte align the IP header */
+ 				skb_reserve (skb, 2);
+-				eth_copy_and_sum (skb,
++				skb_copy_to_linear_data (skb,
+ 						  np->rx_skbuff[entry]->data,
+-						  pkt_len, 0);
++						  pkt_len);
+ 				skb_put (skb, pkt_len);
+ 				pci_dma_sync_single_for_device(np->pdev,
+ 				  			       desc->fraginfo &
+diff -Nurb linux-2.6.22-570/drivers/net/dummy.c linux-2.6.22-try2/drivers/net/dummy.c
+--- linux-2.6.22-570/drivers/net/dummy.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/dummy.c	2007-12-19 15:29:23.000000000 -0500
+@@ -34,11 +34,17 @@
+ #include <linux/etherdevice.h>
+ #include <linux/init.h>
+ #include <linux/moduleparam.h>
++#include <linux/rtnetlink.h>
++#include <net/rtnetlink.h>
++
++struct dummy_priv {
++	struct net_device *dev;
++	struct list_head list;
++};
+ 
+ static int numdummies = 1;
+ 
+ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev);
+-static struct net_device_stats *dummy_get_stats(struct net_device *dev);
+ 
+ static int dummy_set_address(struct net_device *dev, void *p)
+ {
+@@ -56,13 +62,13 @@
+ {
+ }
+ 
+-static void __init dummy_setup(struct net_device *dev)
++static void dummy_setup(struct net_device *dev)
+ {
+ 	/* Initialize the device structure. */
+-	dev->get_stats = dummy_get_stats;
+ 	dev->hard_start_xmit = dummy_xmit;
+ 	dev->set_multicast_list = set_multicast_list;
+ 	dev->set_mac_address = dummy_set_address;
++	dev->destructor = free_netdev;
+ 
+ 	/* Fill in device structure with ethernet-generic values. */
+ 	ether_setup(dev);
+@@ -76,77 +82,114 @@
+ 
+ static int dummy_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+-	struct net_device_stats *stats = netdev_priv(dev);
+-
+-	stats->tx_packets++;
+-	stats->tx_bytes+=skb->len;
++	dev->stats.tx_packets++;
++	dev->stats.tx_bytes += skb->len;
+ 
+ 	dev_kfree_skb(skb);
+ 	return 0;
+ }
+ 
+-static struct net_device_stats *dummy_get_stats(struct net_device *dev)
++static LIST_HEAD(dummies);
++
++static int dummy_newlink(struct net_device *dev,
++			 struct nlattr *tb[], struct nlattr *data[])
+ {
+-	return netdev_priv(dev);
++	struct dummy_priv *priv = netdev_priv(dev);
++	int err;
++
++	err = register_netdevice(dev);
++	if (err < 0)
++		return err;
++
++	priv->dev = dev;
++	list_add_tail(&priv->list, &dummies);
++	return 0;
++}
++
++static void dummy_dellink(struct net_device *dev)
++{
++	struct dummy_priv *priv = netdev_priv(dev);
++
++	list_del(&priv->list);
++	unregister_netdevice(dev);
+ }
+ 
+-static struct net_device **dummies;
++static struct rtnl_link_ops dummy_link_ops __read_mostly = {
++	.kind		= "dummy",
++	.priv_size	= sizeof(struct dummy_priv),
++	.setup		= dummy_setup,
++	.newlink	= dummy_newlink,
++	.dellink	= dummy_dellink,
++};
+ 
+ /* Number of dummy devices to be set up by this module. */
+ module_param(numdummies, int, 0);
+ MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices");
+ 
+-static int __init dummy_init_one(int index)
++static int __init dummy_init_one(void)
+ {
+ 	struct net_device *dev_dummy;
++	struct dummy_priv *priv;
+ 	int err;
+ 
+-	dev_dummy = alloc_netdev(sizeof(struct net_device_stats),
+-				 "dummy%d", dummy_setup);
++	dev_dummy = alloc_netdev(sizeof(struct dummy_priv), "dummy%d",
++				 dummy_setup);
+ 
+ 	if (!dev_dummy)
+ 		return -ENOMEM;
+ 
+-	if ((err = register_netdev(dev_dummy))) {
+-		free_netdev(dev_dummy);
+-		dev_dummy = NULL;
+-	} else {
+-		dummies[index] = dev_dummy;
+-	}
++	err = dev_alloc_name(dev_dummy, dev_dummy->name);
++	if (err < 0)
++		goto err;
++
++	dev_dummy->rtnl_link_ops = &dummy_link_ops;
++	err = register_netdevice(dev_dummy);
++	if (err < 0)
++		goto err;
++
++	priv = netdev_priv(dev_dummy);
++	priv->dev = dev_dummy;
++	list_add_tail(&priv->list, &dummies);
++	return 0;
+ 
++err:
++	free_netdev(dev_dummy);
+ 	return err;
+ }
+ 
+-static void dummy_free_one(int index)
+-{
+-	unregister_netdev(dummies[index]);
+-	free_netdev(dummies[index]);
+-}
+-
+ static int __init dummy_init_module(void)
+ {
++	struct dummy_priv *priv, *next;
+ 	int i, err = 0;
+-	dummies = kmalloc(numdummies * sizeof(void *), GFP_KERNEL);
+-	if (!dummies)
+-		return -ENOMEM;
++
++	rtnl_lock();
++	err = __rtnl_link_register(&dummy_link_ops);
++
+ 	for (i = 0; i < numdummies && !err; i++)
+-		err = dummy_init_one(i);
+-	if (err) {
+-		i--;
+-		while (--i >= 0)
+-			dummy_free_one(i);
++		err = dummy_init_one();
++	if (err < 0) {
++		list_for_each_entry_safe(priv, next, &dummies, list)
++			dummy_dellink(priv->dev);
++		__rtnl_link_unregister(&dummy_link_ops);
+ 	}
++	rtnl_unlock();
++
+ 	return err;
+ }
+ 
+ static void __exit dummy_cleanup_module(void)
+ {
+-	int i;
+-	for (i = 0; i < numdummies; i++)
+-		dummy_free_one(i);
+-	kfree(dummies);
++	struct dummy_priv *priv, *next;
++
++	rtnl_lock();
++	list_for_each_entry_safe(priv, next, &dummies, list)
++		dummy_dellink(priv->dev);
++
++	__rtnl_link_unregister(&dummy_link_ops);
++	rtnl_unlock();
+ }
+ 
+ module_init(dummy_init_module);
+ module_exit(dummy_cleanup_module);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_RTNL_LINK("dummy");
+diff -Nurb linux-2.6.22-570/drivers/net/eepro100.c linux-2.6.22-try2/drivers/net/eepro100.c
+--- linux-2.6.22-570/drivers/net/eepro100.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/eepro100.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1801,7 +1801,7 @@
+ 
+ #if 1 || USE_IP_CSUM
+ 				/* Packet is in one chunk -- we can copy + cksum. */
+-				eth_copy_and_sum(skb, sp->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb, sp->rx_skbuff[entry]->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ #else
+ 				skb_copy_from_linear_data(sp->rx_skbuff[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/epic100.c linux-2.6.22-try2/drivers/net/epic100.c
+--- linux-2.6.22-570/drivers/net/epic100.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/epic100.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1201,7 +1201,7 @@
+ 							    ep->rx_ring[entry].bufaddr,
+ 							    ep->rx_buf_sz,
+ 							    PCI_DMA_FROMDEVICE);
+-				eth_copy_and_sum(skb, ep->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb, ep->rx_skbuff[entry]->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ 				pci_dma_sync_single_for_device(ep->pci_dev,
+ 							       ep->rx_ring[entry].bufaddr,
+diff -Nurb linux-2.6.22-570/drivers/net/fealnx.c linux-2.6.22-try2/drivers/net/fealnx.c
+--- linux-2.6.22-570/drivers/net/fealnx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/fealnx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1727,8 +1727,8 @@
+ 				/* Call copy + cksum if available. */
+ 
+ #if ! defined(__alpha__)
+-				eth_copy_and_sum(skb,
+-					np->cur_rx->skbuff->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb,
++					np->cur_rx->skbuff->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ #else
+ 				memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/fec.c linux-2.6.22-try2/drivers/net/fec.c
+--- linux-2.6.22-570/drivers/net/fec.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/fec.c	2007-12-19 15:29:23.000000000 -0500
+@@ -648,7 +648,7 @@
+ 		fep->stats.rx_dropped++;
+ 	} else {
+ 		skb_put(skb,pkt_len-4);	/* Make room */
+-		eth_copy_and_sum(skb, data, pkt_len-4, 0);
++		skb_copy_to_linear_data(skb, data, pkt_len-4);
+ 		skb->protocol=eth_type_trans(skb,dev);
+ 		netif_rx(skb);
+ 	}
+diff -Nurb linux-2.6.22-570/drivers/net/hamachi.c linux-2.6.22-try2/drivers/net/hamachi.c
+--- linux-2.6.22-570/drivers/net/hamachi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/hamachi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1575,8 +1575,8 @@
+ 							    PCI_DMA_FROMDEVICE);
+ 				/* Call copy + cksum if available. */
+ #if 1 || USE_IP_COPYSUM
+-				eth_copy_and_sum(skb,
+-					hmp->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb,
++					hmp->rx_skbuff[entry]->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ #else
+ 				memcpy(skb_put(skb, pkt_len), hmp->rx_ring_dma
+diff -Nurb linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c linux-2.6.22-try2/drivers/net/hamradio/baycom_epp.c
+--- linux-2.6.22-570/drivers/net/hamradio/baycom_epp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/hamradio/baycom_epp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -320,7 +320,7 @@
+ 	sprintf(portarg, "%ld", bc->pdev->port->base);
+ 	printk(KERN_DEBUG "%s: %s -s -p %s -m %s\n", bc_drvname, eppconfig_path, portarg, modearg);
+ 
+-	return call_usermodehelper(eppconfig_path, argv, envp, 1);
++	return call_usermodehelper(eppconfig_path, argv, envp, UMH_WAIT_PROC);
+ }
+ 
+ /* ---------------------------------------------------------------------- */
+diff -Nurb linux-2.6.22-570/drivers/net/ibmveth.c linux-2.6.22-try2/drivers/net/ibmveth.c
+--- linux-2.6.22-570/drivers/net/ibmveth.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ibmveth.c	2007-12-19 15:29:22.000000000 -0500
+@@ -1337,7 +1337,7 @@
+ 
+ #define ATTR(_name, _mode)      \
+         struct attribute veth_##_name##_attr = {               \
+-        .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE \
++        .name = __stringify(_name), .mode = _mode, \
+         };
+ 
+ static ATTR(active, 0644);
+diff -Nurb linux-2.6.22-570/drivers/net/ifb.c linux-2.6.22-try2/drivers/net/ifb.c
+--- linux-2.6.22-570/drivers/net/ifb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ifb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -33,12 +33,15 @@
+ #include <linux/etherdevice.h>
+ #include <linux/init.h>
+ #include <linux/moduleparam.h>
++#include <linux/list.h>
+ #include <net/pkt_sched.h>
+ 
+ #define TX_TIMEOUT  (2*HZ)
+ 
+ #define TX_Q_LIMIT    32
+ struct ifb_private {
++	struct list_head	list;
++	struct net_device	*dev;
+ 	struct net_device_stats stats;
+ 	struct tasklet_struct   ifb_tasklet;
+ 	int     tasklet_pending;
+@@ -136,13 +139,14 @@
+ 
+ }
+ 
+-static void __init ifb_setup(struct net_device *dev)
++static void ifb_setup(struct net_device *dev)
+ {
+ 	/* Initialize the device structure. */
+ 	dev->get_stats = ifb_get_stats;
+ 	dev->hard_start_xmit = ifb_xmit;
+ 	dev->open = &ifb_open;
+ 	dev->stop = &ifb_close;
++	dev->destructor = free_netdev;
+ 
+ 	/* Fill in device structure with ethernet-generic values. */
+ 	ether_setup(dev);
+@@ -197,7 +201,7 @@
+ 	return stats;
+ }
+ 
+-static struct net_device **ifbs;
++static LIST_HEAD(ifbs);
+ 
+ /* Number of ifb devices to be set up by this module. */
+ module_param(numifbs, int, 0);
+@@ -226,9 +230,41 @@
+ 	return 0;
+ }
+ 
++static int ifb_newlink(struct net_device *dev,
++		       struct nlattr *tb[], struct nlattr *data[])
++{
++	struct ifb_private *priv = netdev_priv(dev);
++	int err;
++
++	err = register_netdevice(dev);
++	if (err < 0)
++		return err;
++
++	priv->dev = dev;
++	list_add_tail(&priv->list, &ifbs);
++	return 0;
++}
++
++static void ifb_dellink(struct net_device *dev)
++{
++	struct ifb_private *priv = netdev_priv(dev);
++
++	list_del(&priv->list);
++	unregister_netdevice(dev);
++}
++
++static struct rtnl_link_ops ifb_link_ops __read_mostly = {
++	.kind		= "ifb",
++	.priv_size	= sizeof(struct ifb_private),
++	.setup		= ifb_setup,
++	.newlink	= ifb_newlink,
++	.dellink	= ifb_dellink,
++};
++
+ static int __init ifb_init_one(int index)
+ {
+ 	struct net_device *dev_ifb;
++	struct ifb_private *priv;
+ 	int err;
+ 
+ 	dev_ifb = alloc_netdev(sizeof(struct ifb_private),
+@@ -237,49 +273,59 @@
+ 	if (!dev_ifb)
+ 		return -ENOMEM;
+ 
+-	if ((err = register_netdev(dev_ifb))) {
+-		free_netdev(dev_ifb);
+-		dev_ifb = NULL;
+-	} else {
+-		ifbs[index] = dev_ifb;
+-	}
++	err = dev_alloc_name(dev_ifb, dev_ifb->name);
++	if (err < 0)
++		goto err;
++
++	dev_ifb->rtnl_link_ops = &ifb_link_ops;
++	err = register_netdevice(dev_ifb);
++	if (err < 0)
++		goto err;
++
++	priv = netdev_priv(dev_ifb);
++	priv->dev = dev_ifb;
++	list_add_tail(&priv->list, &ifbs);
++	return 0;
+ 
++err:
++	free_netdev(dev_ifb);
+ 	return err;
+ }
+ 
+-static void ifb_free_one(int index)
+-{
+-	unregister_netdev(ifbs[index]);
+-	free_netdev(ifbs[index]);
+-}
+-
+ static int __init ifb_init_module(void)
+ {
+-	int i, err = 0;
+-	ifbs = kmalloc(numifbs * sizeof(void *), GFP_KERNEL);
+-	if (!ifbs)
+-		return -ENOMEM;
++	struct ifb_private *priv, *next;
++	int i, err;
++
++	rtnl_lock();
++	err = __rtnl_link_register(&ifb_link_ops);
++
+ 	for (i = 0; i < numifbs && !err; i++)
+ 		err = ifb_init_one(i);
+ 	if (err) {
+-		i--;
+-		while (--i >= 0)
+-			ifb_free_one(i);
++		list_for_each_entry_safe(priv, next, &ifbs, list)
++			ifb_dellink(priv->dev);
++		__rtnl_link_unregister(&ifb_link_ops);
+ 	}
++	rtnl_unlock();
+ 
+ 	return err;
+ }
+ 
+ static void __exit ifb_cleanup_module(void)
+ {
+-	int i;
++	struct ifb_private *priv, *next;
++
++	rtnl_lock();
++	list_for_each_entry_safe(priv, next, &ifbs, list)
++		ifb_dellink(priv->dev);
+ 
+-	for (i = 0; i < numifbs; i++)
+-		ifb_free_one(i);
+-	kfree(ifbs);
++	__rtnl_link_unregister(&ifb_link_ops);
++	rtnl_unlock();
+ }
+ 
+ module_init(ifb_init_module);
+ module_exit(ifb_cleanup_module);
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Jamal Hadi Salim");
++MODULE_ALIAS_RTNL_LINK("ifb");
+diff -Nurb linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c linux-2.6.22-try2/drivers/net/ixp2000/ixpdev.c
+--- linux-2.6.22-570/drivers/net/ixp2000/ixpdev.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ixp2000/ixpdev.c	2007-12-19 15:29:23.000000000 -0500
+@@ -111,7 +111,7 @@
+ 		skb = dev_alloc_skb(desc->pkt_length + 2);
+ 		if (likely(skb != NULL)) {
+ 			skb_reserve(skb, 2);
+-			eth_copy_and_sum(skb, buf, desc->pkt_length, 0);
++			skb_copy_to_linear_data(skb, buf, desc->pkt_length);
+ 			skb_put(skb, desc->pkt_length);
+ 			skb->protocol = eth_type_trans(skb, nds[desc->channel]);
+ 
+diff -Nurb linux-2.6.22-570/drivers/net/kgdboe.c linux-2.6.22-try2/drivers/net/kgdboe.c
+--- linux-2.6.22-570/drivers/net/kgdboe.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/kgdboe.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,294 @@
++/*
++ * drivers/net/kgdboe.c
++ *
++ * A network interface for GDB.
++ * Based upon 'gdbserial' by David Grothe <dave@gcom.com>
++ * and Scott Foehner <sfoehner@engr.sgi.com>
++ *
++ * Maintainers: Amit S. Kale <amitkale@linsyssoft.com> and
++ * 		Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2004 (c) Amit S. Kale <amitkale@linsyssoft.com>
++ * 2004-2005 (c) MontaVista Software, Inc.
++ * 2005 (c) Wind River Systems, Inc.
++ *
++ * Contributors at various stages not listed above:
++ * San Mehat <nettwerk@biodome.org>, Robert Walsh <rjwalsh@durables.org>,
++ * wangdi <wangdi@clusterfs.com>, Matt Mackall <mpm@selenic.com>,
++ * Pavel Machek <pavel@suse.cz>, Jason Wessel <jason.wessel@windriver.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/string.h>
++#include <linux/kgdb.h>
++#include <linux/netpoll.h>
++#include <linux/init.h>
++
++#include <asm/atomic.h>
++
++#define IN_BUF_SIZE 512		/* power of 2, please */
++#define NOT_CONFIGURED_STRING "not_configured"
++#define OUT_BUF_SIZE 30		/* We don't want to send too big of a packet. */
++#define MAX_KGDBOE_CONFIG_STR 256
++
++static char in_buf[IN_BUF_SIZE], out_buf[OUT_BUF_SIZE];
++static int in_head, in_tail, out_count;
++static atomic_t in_count;
++/* 0 = unconfigured, 1 = netpoll options parsed, 2 = fully configured. */
++static int configured;
++static struct kgdb_io local_kgdb_io_ops;
++static int use_dynamic_mac;
++
++MODULE_DESCRIPTION("KGDB driver for network interfaces");
++MODULE_LICENSE("GPL");
++static char config[MAX_KGDBOE_CONFIG_STR] = NOT_CONFIGURED_STRING;
++static struct kparam_string kps = {
++	.string = config,
++	.maxlen = MAX_KGDBOE_CONFIG_STR,
++};
++
++static void rx_hook(struct netpoll *np, int port, char *msg, int len,
++		    struct sk_buff *skb)
++{
++	int i;
++
++	np->remote_port = port;
++
++	/* Copy the MAC address if we need to. */
++	if (use_dynamic_mac) {
++		memcpy(np->remote_mac, eth_hdr(skb)->h_source,
++				sizeof(np->remote_mac));
++		use_dynamic_mac = 0;
++	}
++
++	/*
++	 * This could be GDB trying to attach.  But it could also be GDB
++	 * finishing up a session, with kgdb_connected=0 but GDB sending
++	 * an ACK for the final packet.  To make sure we don't try and
++	 * make a breakpoint when GDB is leaving, make sure that if
++	 * !kgdb_connected the only len == 1 packet we allow is ^C.
++	 */
++	if (!kgdb_connected && (len != 1 || msg[0] == 3) &&
++	    !atomic_read(&kgdb_setting_breakpoint)) {
++		tasklet_schedule(&kgdb_tasklet_breakpoint);
++	}
++
++	for (i = 0; i < len; i++) {
++		if (msg[i] == 3)
++			tasklet_schedule(&kgdb_tasklet_breakpoint);
++
++		if (atomic_read(&in_count) >= IN_BUF_SIZE) {
++			/* buffer overflow, clear it */
++			in_head = in_tail = 0;
++			atomic_set(&in_count, 0);
++			break;
++		}
++		in_buf[in_head++] = msg[i];
++		in_head &= (IN_BUF_SIZE - 1);
++		atomic_inc(&in_count);
++	}
++}
++
++static struct netpoll np = {
++	.dev_name = "eth0",
++	.name = "kgdboe",
++	.rx_hook = rx_hook,
++	.local_port = 6443,
++	.remote_port = 6442,
++	.remote_mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
++};
++
++static void eth_pre_exception_handler(void)
++{
++	/* Increment the module count when the debugger is active */
++	if (!kgdb_connected)
++		try_module_get(THIS_MODULE);
++	netpoll_set_trap(1);
++}
++
++static void eth_post_exception_handler(void)
++{
++	/* decrement the module count when the debugger detaches */
++	if (!kgdb_connected)
++		module_put(THIS_MODULE);
++	netpoll_set_trap(0);
++}
++
++static int eth_get_char(void)
++{
++	int chr;
++
++	while (atomic_read(&in_count) == 0)
++		netpoll_poll(&np);
++
++	chr = in_buf[in_tail++];
++	in_tail &= (IN_BUF_SIZE - 1);
++	atomic_dec(&in_count);
++	return chr;
++}
++
++static void eth_flush_buf(void)
++{
++	if (out_count && np.dev) {
++		netpoll_send_udp(&np, out_buf, out_count);
++		memset(out_buf, 0, sizeof(out_buf));
++		out_count = 0;
++	}
++}
++
++static void eth_put_char(u8 chr)
++{
++	out_buf[out_count++] = chr;
++	if (out_count == OUT_BUF_SIZE)
++		eth_flush_buf();
++}
++
++static int option_setup(char *opt)
++{
++	char opt_scratch[MAX_KGDBOE_CONFIG_STR];
++
++	/* If we're being given a new configuration, copy it in. */
++	if (opt != config)
++		strcpy(config, opt);
++	/* But work on a copy as netpoll_parse_options will eat it. */
++	strcpy(opt_scratch, opt);
++	configured = !netpoll_parse_options(&np, opt_scratch);
++
++	use_dynamic_mac = 1;
++
++	return 0;
++}
++__setup("kgdboe=", option_setup);
++
++/* With our config string set by some means, configure kgdboe. */
++static int configure_kgdboe(void)
++{
++	/* Try out the string. */
++	option_setup(config);
++
++	if (!configured) {
++		printk(KERN_ERR "kgdboe: configuration incorrect - kgdboe not "
++		       "loaded.\n");
++		printk(KERN_ERR "  Usage: kgdboe=[src-port]@[src-ip]/[dev],"
++				"[tgt-port]@<tgt-ip>/<tgt-macaddr>\n");
++		return -EINVAL;
++	}
++
++	/* Bring it up. */
++	if (netpoll_setup(&np)) {
++		printk(KERN_ERR "kgdboe: netpoll_setup failed kgdboe failed\n");
++		return -EINVAL;
++	}
++
++	if (kgdb_register_io_module(&local_kgdb_io_ops)) {
++		netpoll_cleanup(&np);
++		return -EINVAL;
++	}
++
++	configured = 2;
++
++	return 0;
++}
++
++static int init_kgdboe(void)
++{
++	int ret;
++
++	/* Already done? */
++	if (configured == 2)
++		return 0;
++
++	/* OK, go ahead and do it. */
++	ret = configure_kgdboe();
++
++	if (configured == 2)
++		printk(KERN_INFO "kgdboe: debugging over ethernet enabled\n");
++
++	return ret;
++}
++
++static void cleanup_kgdboe(void)
++{
++	netpoll_cleanup(&np);
++	configured = 0;
++	kgdb_unregister_io_module(&local_kgdb_io_ops);
++}
++
++static int param_set_kgdboe_var(const char *kmessage, struct kernel_param *kp)
++{
++	char kmessage_save[MAX_KGDBOE_CONFIG_STR];
++	int msg_len = strlen(kmessage);
++
++	if (msg_len + 1 > MAX_KGDBOE_CONFIG_STR) {
++		printk(KERN_ERR "%s: string doesn't fit in %u chars.\n",
++		       kp->name, MAX_KGDBOE_CONFIG_STR - 1);
++		return -ENOSPC;
++	}
++
++	if (kgdb_connected) {
++		printk(KERN_ERR "kgdboe: Cannot reconfigure while KGDB is "
++				"connected.\n");
++		return 0;
++	}
++
++	/* Start the reconfiguration process by saving the old string */
++	strncpy(kmessage_save, config, sizeof(kmessage_save));
++
++
++	/* Copy in the new param and strip out invalid characters so we
++	 * can optionally specify the MAC.
++	 */
++	strncpy(config, kmessage, sizeof(config));
++	msg_len--;
++	while (msg_len > 0 &&
++			(config[msg_len] < ',' || config[msg_len] > 'f')) {
++		config[msg_len] = '\0';
++		msg_len--;
++	}
++
++	/* Check to see if we are unconfiguring the io module and that it
++	 * was in a fully configured state, as this is the only time that
++	 * netpoll_cleanup should get called
++	 */
++	if (configured == 2 && strcmp(config, NOT_CONFIGURED_STRING) == 0) {
++		printk(KERN_INFO "kgdboe: reverting to unconfigured state\n");
++		cleanup_kgdboe();
++		return 0;
++	} else
++		/* Go and configure with the new params. */
++		configure_kgdboe();
++
++	if (configured == 2)
++		return 0;
++
++	/* If the new string was invalid, revert to the previous state, which
++	 * is at a minimum not_configured. */
++	strncpy(config, kmessage_save, sizeof(config));
++	if (strcmp(kmessage_save, NOT_CONFIGURED_STRING) != 0) {
++		printk(KERN_INFO "kgdboe: reverting to prior configuration\n");
++		/* revert back to the original config */
++		strncpy(config, kmessage_save, sizeof(config));
++		configure_kgdboe();
++	}
++	return 0;
++}
++
++static struct kgdb_io local_kgdb_io_ops = {
++	.read_char = eth_get_char,
++	.write_char = eth_put_char,
++	.init = init_kgdboe,
++	.flush = eth_flush_buf,
++	.pre_exception = eth_pre_exception_handler,
++	.post_exception = eth_post_exception_handler
++};
++
++module_init(init_kgdboe);
++module_exit(cleanup_kgdboe);
++module_param_call(kgdboe, param_set_kgdboe_var, param_get_string, &kps, 0644);
++MODULE_PARM_DESC(kgdboe, " kgdboe=[src-port]@[src-ip]/[dev],"
++		 "[tgt-port]@<tgt-ip>/<tgt-macaddr>\n");
+diff -Nurb linux-2.6.22-570/drivers/net/lance.c linux-2.6.22-try2/drivers/net/lance.c
+--- linux-2.6.22-570/drivers/net/lance.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/lance.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1186,9 +1186,9 @@
+ 				}
+ 				skb_reserve(skb,2);	/* 16 byte align */
+ 				skb_put(skb,pkt_len);	/* Make room */
+-				eth_copy_and_sum(skb,
++				skb_copy_to_linear_data(skb,
+ 					(unsigned char *)isa_bus_to_virt((lp->rx_ring[entry].base & 0x00ffffff)),
+-					pkt_len,0);
++					pkt_len);
+ 				skb->protocol=eth_type_trans(skb,dev);
+ 				netif_rx(skb);
+ 				dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/natsemi.c linux-2.6.22-try2/drivers/net/natsemi.c
+--- linux-2.6.22-570/drivers/net/natsemi.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/natsemi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2357,8 +2357,8 @@
+ 					np->rx_dma[entry],
+ 					buflen,
+ 					PCI_DMA_FROMDEVICE);
+-				eth_copy_and_sum(skb,
+-					np->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb,
++					np->rx_skbuff[entry]->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ 				pci_dma_sync_single_for_device(np->pci_dev,
+ 					np->rx_dma[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/ni52.c linux-2.6.22-try2/drivers/net/ni52.c
+--- linux-2.6.22-570/drivers/net/ni52.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ni52.c	2007-12-19 15:29:23.000000000 -0500
+@@ -936,7 +936,7 @@
+ 					{
+ 						skb_reserve(skb,2);
+ 						skb_put(skb,totlen);
+-						eth_copy_and_sum(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen,0);
++						skb_copy_to_linear_data(skb,(char *) p->base+(unsigned long) rbd->buffer,totlen);
+ 						skb->protocol=eth_type_trans(skb,dev);
+ 						netif_rx(skb);
+ 						dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/ni65.c linux-2.6.22-try2/drivers/net/ni65.c
+--- linux-2.6.22-570/drivers/net/ni65.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/ni65.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1096,7 +1096,7 @@
+ #ifdef RCV_VIA_SKB
+ 				if( (unsigned long) (skb->data + R_BUF_SIZE) > 0x1000000) {
+ 					skb_put(skb,len);
+-					eth_copy_and_sum(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len,0);
++					skb_copy_to_linear_data(skb, (unsigned char *)(p->recv_skb[p->rmdnum]->data),len);
+ 				}
+ 				else {
+ 					struct sk_buff *skb1 = p->recv_skb[p->rmdnum];
+@@ -1108,7 +1108,7 @@
+ 				}
+ #else
+ 				skb_put(skb,len);
+-				eth_copy_and_sum(skb, (unsigned char *) p->recvbounce[p->rmdnum],len,0);
++				skb_copy_to_linear_data(skb, (unsigned char *) p->recvbounce[p->rmdnum],len);
+ #endif
+ 				p->stats.rx_packets++;
+ 				p->stats.rx_bytes += len;
+diff -Nurb linux-2.6.22-570/drivers/net/pci-skeleton.c linux-2.6.22-try2/drivers/net/pci-skeleton.c
+--- linux-2.6.22-570/drivers/net/pci-skeleton.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/pci-skeleton.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1567,7 +1567,7 @@
+ 		if (skb) {
+ 			skb_reserve (skb, 2);	/* 16 byte align the IP fields. */
+ 
+-			eth_copy_and_sum (skb, &rx_ring[ring_offset + 4], pkt_size, 0);
++			skb_copy_to_linear_data (skb, &rx_ring[ring_offset + 4], pkt_size);
+ 			skb_put (skb, pkt_size);
+ 
+ 			skb->protocol = eth_type_trans (skb, dev);
+diff -Nurb linux-2.6.22-570/drivers/net/pcnet32.c linux-2.6.22-try2/drivers/net/pcnet32.c
+--- linux-2.6.22-570/drivers/net/pcnet32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/pcnet32.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1235,9 +1235,9 @@
+ 					    lp->rx_dma_addr[entry],
+ 					    pkt_len,
+ 					    PCI_DMA_FROMDEVICE);
+-		eth_copy_and_sum(skb,
++		skb_copy_to_linear_data(skb,
+ 				 (unsigned char *)(lp->rx_skbuff[entry]->data),
+-				 pkt_len, 0);
++				 pkt_len);
+ 		pci_dma_sync_single_for_device(lp->pci_dev,
+ 					       lp->rx_dma_addr[entry],
+ 					       pkt_len,
+diff -Nurb linux-2.6.22-570/drivers/net/r8169.c linux-2.6.22-try2/drivers/net/r8169.c
+--- linux-2.6.22-570/drivers/net/r8169.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/r8169.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2492,7 +2492,7 @@
+ 		skb = dev_alloc_skb(pkt_size + align);
+ 		if (skb) {
+ 			skb_reserve(skb, (align - 1) & (unsigned long)skb->data);
+-			eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0);
++			skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size);
+ 			*sk_buff = skb;
+ 			rtl8169_mark_to_asic(desc, rx_buf_sz);
+ 			ret = 0;
+diff -Nurb linux-2.6.22-570/drivers/net/saa9730.c linux-2.6.22-try2/drivers/net/saa9730.c
+--- linux-2.6.22-570/drivers/net/saa9730.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/saa9730.c	2007-12-19 15:29:23.000000000 -0500
+@@ -690,9 +690,9 @@
+ 				lp->stats.rx_packets++;
+ 				skb_reserve(skb, 2);	/* 16 byte align */
+ 				skb_put(skb, len);	/* make room */
+-				eth_copy_and_sum(skb,
++				skb_copy_to_linear_data(skb,
+ 						 (unsigned char *) pData,
+-						 len, 0);
++						 len);
+ 				skb->protocol = eth_type_trans(skb, dev);
+ 				netif_rx(skb);
+ 				dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/sgiseeq.c linux-2.6.22-try2/drivers/net/sgiseeq.c
+--- linux-2.6.22-570/drivers/net/sgiseeq.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sgiseeq.c	2007-12-19 15:29:23.000000000 -0500
+@@ -320,7 +320,7 @@
+ 				skb_put(skb, len);
+ 
+ 				/* Copy out of kseg1 to avoid silly cache flush. */
+-				eth_copy_and_sum(skb, pkt_pointer + 2, len, 0);
++				skb_copy_to_linear_data(skb, pkt_pointer + 2, len);
+ 				skb->protocol = eth_type_trans(skb, dev);
+ 
+ 				/* We don't want to receive our own packets */
+diff -Nurb linux-2.6.22-570/drivers/net/sis190.c linux-2.6.22-try2/drivers/net/sis190.c
+--- linux-2.6.22-570/drivers/net/sis190.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sis190.c	2007-12-19 15:29:23.000000000 -0500
+@@ -548,7 +548,7 @@
+ 		skb = dev_alloc_skb(pkt_size + NET_IP_ALIGN);
+ 		if (skb) {
+ 			skb_reserve(skb, NET_IP_ALIGN);
+-			eth_copy_and_sum(skb, sk_buff[0]->data, pkt_size, 0);
++			skb_copy_to_linear_data(skb, sk_buff[0]->data, pkt_size);
+ 			*sk_buff = skb;
+ 			sis190_give_to_asic(desc, rx_buf_sz);
+ 			ret = 0;
+diff -Nurb linux-2.6.22-570/drivers/net/starfire.c linux-2.6.22-try2/drivers/net/starfire.c
+--- linux-2.6.22-570/drivers/net/starfire.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/starfire.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1456,7 +1456,7 @@
+ 			pci_dma_sync_single_for_cpu(np->pci_dev,
+ 						    np->rx_info[entry].mapping,
+ 						    pkt_len, PCI_DMA_FROMDEVICE);
+-			eth_copy_and_sum(skb, np->rx_info[entry].skb->data, pkt_len, 0);
++			skb_copy_to_linear_data(skb, np->rx_info[entry].skb->data, pkt_len);
+ 			pci_dma_sync_single_for_device(np->pci_dev,
+ 						       np->rx_info[entry].mapping,
+ 						       pkt_len, PCI_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/sun3_82586.c linux-2.6.22-try2/drivers/net/sun3_82586.c
+--- linux-2.6.22-570/drivers/net/sun3_82586.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sun3_82586.c	2007-12-19 15:29:23.000000000 -0500
+@@ -777,7 +777,7 @@
+ 					{
+ 						skb_reserve(skb,2);
+ 						skb_put(skb,totlen);
+-						eth_copy_and_sum(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen,0);
++						skb_copy_to_linear_data(skb,(char *) p->base+swab32((unsigned long) rbd->buffer),totlen);
+ 						skb->protocol=eth_type_trans(skb,dev);
+ 						netif_rx(skb);
+ 						p->stats.rx_packets++;
+diff -Nurb linux-2.6.22-570/drivers/net/sun3lance.c linux-2.6.22-try2/drivers/net/sun3lance.c
+--- linux-2.6.22-570/drivers/net/sun3lance.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sun3lance.c	2007-12-19 15:29:23.000000000 -0500
+@@ -853,10 +853,9 @@
+ 
+ 				skb_reserve( skb, 2 );	/* 16 byte align */
+ 				skb_put( skb, pkt_len );	/* Make room */
+-//			        skb_copy_to_linear_data(skb, PKTBUF_ADDR(head), pkt_len);
+-				eth_copy_and_sum(skb,
++				skb_copy_to_linear_data(skb,
+ 						 PKTBUF_ADDR(head),
+-						 pkt_len, 0);
++						 pkt_len);
+ 
+ 				skb->protocol = eth_type_trans( skb, dev );
+ 				netif_rx( skb );
+diff -Nurb linux-2.6.22-570/drivers/net/sunbmac.c linux-2.6.22-try2/drivers/net/sunbmac.c
+--- linux-2.6.22-570/drivers/net/sunbmac.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sunbmac.c	2007-12-19 15:29:23.000000000 -0500
+@@ -860,7 +860,7 @@
+ 			sbus_dma_sync_single_for_cpu(bp->bigmac_sdev,
+ 						     this->rx_addr, len,
+ 						     SBUS_DMA_FROMDEVICE);
+-			eth_copy_and_sum(copy_skb, (unsigned char *)skb->data, len, 0);
++			skb_copy_to_linear_data(copy_skb, (unsigned char *)skb->data, len);
+ 			sbus_dma_sync_single_for_device(bp->bigmac_sdev,
+ 							this->rx_addr, len,
+ 							SBUS_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/sundance.c linux-2.6.22-try2/drivers/net/sundance.c
+--- linux-2.6.22-570/drivers/net/sundance.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sundance.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1313,7 +1313,7 @@
+ 							    np->rx_buf_sz,
+ 							    PCI_DMA_FROMDEVICE);
+ 
+-				eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
+ 				pci_dma_sync_single_for_device(np->pci_dev,
+ 							       desc->frag[0].addr,
+ 							       np->rx_buf_sz,
+diff -Nurb linux-2.6.22-570/drivers/net/sunlance.c linux-2.6.22-try2/drivers/net/sunlance.c
+--- linux-2.6.22-570/drivers/net/sunlance.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sunlance.c	2007-12-19 15:29:23.000000000 -0500
+@@ -549,9 +549,9 @@
+ 
+ 			skb_reserve(skb, 2);		/* 16 byte align */
+ 			skb_put(skb, len);		/* make room */
+-			eth_copy_and_sum(skb,
++			skb_copy_to_linear_data(skb,
+ 					 (unsigned char *)&(ib->rx_buf [entry][0]),
+-					 len, 0);
++					 len);
+ 			skb->protocol = eth_type_trans(skb, dev);
+ 			netif_rx(skb);
+ 			dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/sunqe.c linux-2.6.22-try2/drivers/net/sunqe.c
+--- linux-2.6.22-570/drivers/net/sunqe.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/sunqe.c	2007-12-19 15:29:23.000000000 -0500
+@@ -439,8 +439,8 @@
+ 			} else {
+ 				skb_reserve(skb, 2);
+ 				skb_put(skb, len);
+-				eth_copy_and_sum(skb, (unsigned char *) this_qbuf,
+-						 len, 0);
++				skb_copy_to_linear_data(skb, (unsigned char *) this_qbuf,
++						 len);
+ 				skb->protocol = eth_type_trans(skb, qep->dev);
+ 				netif_rx(skb);
+ 				qep->dev->last_rx = jiffies;
+diff -Nurb linux-2.6.22-570/drivers/net/tg3.c linux-2.6.22-try2/drivers/net/tg3.c
+--- linux-2.6.22-570/drivers/net/tg3.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tg3.c	2007-12-19 15:29:23.000000000 -0500
+@@ -11944,12 +11944,11 @@
+ 	 * checksumming.
+ 	 */
+ 	if ((tp->tg3_flags & TG3_FLAG_BROKEN_CHECKSUMS) == 0) {
++		dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+ 		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 ||
+ 		    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787)
+-			dev->features |= NETIF_F_HW_CSUM;
+-		else
+-			dev->features |= NETIF_F_IP_CSUM;
+-		dev->features |= NETIF_F_SG;
++			dev->features |= NETIF_F_IPV6_CSUM;
++
+ 		tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS;
+ 	} else
+ 		tp->tg3_flags &= ~TG3_FLAG_RX_CHECKSUMS;
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/interrupt.c linux-2.6.22-try2/drivers/net/tulip/interrupt.c
+--- linux-2.6.22-570/drivers/net/tulip/interrupt.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tulip/interrupt.c	2007-12-19 15:29:23.000000000 -0500
+@@ -197,8 +197,8 @@
+ 								   tp->rx_buffers[entry].mapping,
+ 								   pkt_len, PCI_DMA_FROMDEVICE);
+ #if ! defined(__alpha__)
+-                                       eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
+-                                                        pkt_len, 0);
++                                       skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
++                                                        pkt_len);
+                                        skb_put(skb, pkt_len);
+ #else
+                                        memcpy(skb_put(skb, pkt_len),
+@@ -420,8 +420,8 @@
+ 							    tp->rx_buffers[entry].mapping,
+ 							    pkt_len, PCI_DMA_FROMDEVICE);
+ #if ! defined(__alpha__)
+-				eth_copy_and_sum(skb, tp->rx_buffers[entry].skb->data,
+-						 pkt_len, 0);
++				skb_copy_to_linear_data(skb, tp->rx_buffers[entry].skb->data,
++						 pkt_len);
+ 				skb_put(skb, pkt_len);
+ #else
+ 				memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/winbond-840.c linux-2.6.22-try2/drivers/net/tulip/winbond-840.c
+--- linux-2.6.22-570/drivers/net/tulip/winbond-840.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tulip/winbond-840.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1232,7 +1232,7 @@
+ 				pci_dma_sync_single_for_cpu(np->pci_dev,np->rx_addr[entry],
+ 							    np->rx_skbuff[entry]->len,
+ 							    PCI_DMA_FROMDEVICE);
+-				eth_copy_and_sum(skb, np->rx_skbuff[entry]->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ 				pci_dma_sync_single_for_device(np->pci_dev,np->rx_addr[entry],
+ 							       np->rx_skbuff[entry]->len,
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_cb.c linux-2.6.22-try2/drivers/net/tulip/xircom_cb.c
+--- linux-2.6.22-570/drivers/net/tulip/xircom_cb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tulip/xircom_cb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1208,7 +1208,7 @@
+ 				goto out;
+ 			}
+ 			skb_reserve(skb, 2);
+-			eth_copy_and_sum(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len, 0);
++			skb_copy_to_linear_data(skb, (unsigned char*)&card->rx_buffer[bufferoffset / 4], pkt_len);
+ 			skb_put(skb, pkt_len);
+ 			skb->protocol = eth_type_trans(skb, dev);
+ 			netif_rx(skb);
+diff -Nurb linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c linux-2.6.22-try2/drivers/net/tulip/xircom_tulip_cb.c
+--- linux-2.6.22-570/drivers/net/tulip/xircom_tulip_cb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tulip/xircom_tulip_cb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1242,8 +1242,8 @@
+ 				&& (skb = dev_alloc_skb(pkt_len + 2)) != NULL) {
+ 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+ #if ! defined(__alpha__)
+-				eth_copy_and_sum(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
+-								 pkt_len, 0);
++				skb_copy_to_linear_data(skb, bus_to_virt(tp->rx_ring[entry].buffer1),
++								 pkt_len);
+ 				skb_put(skb, pkt_len);
+ #else
+ 				memcpy(skb_put(skb, pkt_len),
+diff -Nurb linux-2.6.22-570/drivers/net/tun.c linux-2.6.22-try2/drivers/net/tun.c
+--- linux-2.6.22-570/drivers/net/tun.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/tun.c	2007-12-19 15:29:23.000000000 -0500
+@@ -432,6 +432,7 @@
+ 	init_waitqueue_head(&tun->read_wait);
+ 
+ 	tun->owner = -1;
++	tun->group = -1;
+ 
+ 	SET_MODULE_OWNER(dev);
+ 	dev->open = tun_net_open;
+@@ -467,8 +468,11 @@
+ 			return -EBUSY;
+ 
+ 		/* Check permissions */
+-		if (tun->owner != -1 &&
+-		    current->euid != tun->owner && !capable(CAP_NET_ADMIN))
++		if (((tun->owner != -1 &&
++		      current->euid != tun->owner) ||
++		     (tun->group != -1 &&
++		      current->egid != tun->group)) &&
++		     !capable(CAP_NET_ADMIN))
+ 			return -EPERM;
+ 	}
+ 	else if (__dev_get_by_name(ifr->ifr_name))
+@@ -610,6 +614,13 @@
+ 		DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner);
+ 		break;
+ 
++	case TUNSETGROUP:
++		/* Set group of the device */
++		tun->group= (gid_t) arg;
++
++		DBG(KERN_INFO "%s: group set to %d\n", tun->dev->name, tun->group);
++		break;
++
+ 	case TUNSETLINK:
+ 		/* Only allow setting the type when the interface is down */
+ 		if (tun->dev->flags & IFF_UP) {
+diff -Nurb linux-2.6.22-570/drivers/net/typhoon.c linux-2.6.22-try2/drivers/net/typhoon.c
+--- linux-2.6.22-570/drivers/net/typhoon.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/typhoon.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1703,7 +1703,7 @@
+ 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr,
+ 						    PKT_BUF_SZ,
+ 						    PCI_DMA_FROMDEVICE);
+-			eth_copy_and_sum(new_skb, skb->data, pkt_len, 0);
++			skb_copy_to_linear_data(new_skb, skb->data, pkt_len);
+ 			pci_dma_sync_single_for_device(tp->pdev, dma_addr,
+ 						       PKT_BUF_SZ,
+ 						       PCI_DMA_FROMDEVICE);
+diff -Nurb linux-2.6.22-570/drivers/net/usb/catc.c linux-2.6.22-try2/drivers/net/usb/catc.c
+--- linux-2.6.22-570/drivers/net/usb/catc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/usb/catc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -255,7 +255,7 @@
+ 		if (!(skb = dev_alloc_skb(pkt_len)))
+ 			return;
+ 
+-		eth_copy_and_sum(skb, pkt_start + pkt_offset, pkt_len, 0);
++		skb_copy_to_linear_data(skb, pkt_start + pkt_offset, pkt_len);
+ 		skb_put(skb, pkt_len);
+ 
+ 		skb->protocol = eth_type_trans(skb, catc->netdev);
+diff -Nurb linux-2.6.22-570/drivers/net/usb/kaweth.c linux-2.6.22-try2/drivers/net/usb/kaweth.c
+--- linux-2.6.22-570/drivers/net/usb/kaweth.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/usb/kaweth.c	2007-12-19 15:29:23.000000000 -0500
+@@ -635,7 +635,7 @@
+ 
+ 		skb_reserve(skb, 2);    /* Align IP on 16 byte boundaries */
+ 
+-		eth_copy_and_sum(skb, kaweth->rx_buf + 2, pkt_len, 0);
++		skb_copy_to_linear_data(skb, kaweth->rx_buf + 2, pkt_len);
+ 
+ 		skb_put(skb, pkt_len);
+ 
+diff -Nurb linux-2.6.22-570/drivers/net/via-rhine.c linux-2.6.22-try2/drivers/net/via-rhine.c
+--- linux-2.6.22-570/drivers/net/via-rhine.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/via-rhine.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1492,9 +1492,9 @@
+ 							    rp->rx_buf_sz,
+ 							    PCI_DMA_FROMDEVICE);
+ 
+-				eth_copy_and_sum(skb,
++				skb_copy_to_linear_data(skb,
+ 						 rp->rx_skbuff[entry]->data,
+-						 pkt_len, 0);
++						 pkt_len);
+ 				skb_put(skb, pkt_len);
+ 				pci_dma_sync_single_for_device(rp->pdev,
+ 							       rp->rx_skbuff_dma[entry],
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/airo.c linux-2.6.22-try2/drivers/net/wireless/airo.c
+--- linux-2.6.22-570/drivers/net/wireless/airo.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/wireless/airo.c	2007-12-19 15:29:24.000000000 -0500
+@@ -3079,6 +3079,7 @@
+ 	struct airo_info *ai = dev->priv;
+ 	int locked;
+ 	
++	set_freezable();
+ 	while(1) {
+ 		/* make swsusp happy with our thread */
+ 		try_to_freeze();
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/libertas/main.c linux-2.6.22-try2/drivers/net/wireless/libertas/main.c
+--- linux-2.6.22-570/drivers/net/wireless/libertas/main.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/wireless/libertas/main.c	2007-12-19 15:29:24.000000000 -0500
+@@ -613,6 +613,7 @@
+ 
+ 	init_waitqueue_entry(&wait, current);
+ 
++	set_freezable();
+ 	for (;;) {
+ 		lbs_deb_thread( "main-thread 111: intcounter=%d "
+ 		       "currenttxskb=%p dnld_sent=%d\n",
+diff -Nurb linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c linux-2.6.22-try2/drivers/net/wireless/wl3501_cs.c
+--- linux-2.6.22-570/drivers/net/wireless/wl3501_cs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/wireless/wl3501_cs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1011,7 +1011,7 @@
+ 	} else {
+ 		skb->dev = dev;
+ 		skb_reserve(skb, 2); /* IP headers on 16 bytes boundaries */
+-		eth_copy_and_sum(skb, (unsigned char *)&sig.daddr, 12, 0);
++		skb_copy_to_linear_data(skb, (unsigned char *)&sig.daddr, 12);
+ 		wl3501_receive(this, skb->data, pkt_len);
+ 		skb_put(skb, pkt_len);
+ 		skb->protocol	= eth_type_trans(skb, dev);
+diff -Nurb linux-2.6.22-570/drivers/net/xen-netfront.c linux-2.6.22-try2/drivers/net/xen-netfront.c
+--- linux-2.6.22-570/drivers/net/xen-netfront.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/net/xen-netfront.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,1995 @@
++/*
++ * Virtual network driver for conversing with remote driver backends.
++ *
++ * Copyright (c) 2002-2005, K A Fraser
++ * Copyright (c) 2005, XenSource Ltd
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License version 2
++ * as published by the Free Software Foundation; or, when distributed
++ * separately from the Linux kernel or incorporated into other
++ * software packages, subject to the following license:
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a copy
++ * of this source file (the "Software"), to deal in the Software without
++ * restriction, including without limitation the rights to use, copy, modify,
++ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
++ * and to permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
++ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
++ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
++ * IN THE SOFTWARE.
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/skbuff.h>
++#include <linux/ethtool.h>
++#include <linux/if_ether.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <linux/moduleparam.h>
++#include <linux/mm.h>
++#include <net/ip.h>
++
++#include <xen/xenbus.h>
++#include <xen/events.h>
++#include <xen/page.h>
++#include <xen/grant_table.h>
++
++#include <xen/interface/io/netif.h>
++#include <xen/interface/memory.h>
++#include <xen/interface/grant_table.h>
++
++static struct ethtool_ops xennet_ethtool_ops;
++
++struct netfront_cb {
++	struct page *page;
++	unsigned offset;
++};
++
++#define NETFRONT_SKB_CB(skb)	((struct netfront_cb *)((skb)->cb))
++
++/*
++ * Mutually-exclusive module options to select receive data path:
++ *  copy : Packets are copied by network backend into local memory
++ *  flip : Page containing packet data is transferred to our ownership
++ * For fully-virtualised guests there is no option - copying must be used.
++ * For paravirtualised guests, flipping is the default.
++ */
++typedef enum rx_mode {
++	RX_COPY = 0,
++	RX_FLIP = 1,
++} rx_mode_t;
++
++static enum rx_mode rx_mode = RX_FLIP;
++
++#define param_check_rx_mode_t(name, p) __param_check(name, p, rx_mode_t)
++
++static int param_set_rx_mode_t(const char *val, struct kernel_param *kp)
++{
++	enum rx_mode *rxmp = kp->arg;
++	int ret = 0;
++
++	if (strcmp(val, "copy") == 0)
++		*rxmp = RX_COPY;
++	else if (strcmp(val, "flip") == 0)
++		*rxmp = RX_FLIP;
++	else
++		ret = -EINVAL;
++
++	return ret;
++}
++
++static int param_get_rx_mode_t(char *buffer, struct kernel_param *kp)
++{
++	enum rx_mode *rxmp = kp->arg;
++
++	return sprintf(buffer, "%s", *rxmp == RX_COPY ? "copy" : "flip");
++}
++
++MODULE_PARM_DESC(rx_mode, "How to get packets from card: \"copy\" or \"flip\"");
++module_param(rx_mode, rx_mode_t, 0400);
++
++#define RX_COPY_THRESHOLD 256
++
++#define GRANT_INVALID_REF	0
++
++#define NET_TX_RING_SIZE __RING_SIZE((struct xen_netif_tx_sring *)0, PAGE_SIZE)
++#define NET_RX_RING_SIZE __RING_SIZE((struct xen_netif_rx_sring *)0, PAGE_SIZE)
++#define TX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++
++struct netfront_info {
++	struct list_head list;
++	struct net_device *netdev;
++
++	struct net_device_stats stats;
++
++	struct xen_netif_tx_front_ring tx;
++	struct xen_netif_rx_front_ring rx;
++
++	spinlock_t   tx_lock;
++	spinlock_t   rx_lock;
++
++	unsigned int evtchn;
++	unsigned int copying_receiver;
++
++	/* Receive-ring batched refills. */
++#define RX_MIN_TARGET 8
++#define RX_DFL_MIN_TARGET 64
++#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256)
++	unsigned rx_min_target, rx_max_target, rx_target;
++	struct sk_buff_head rx_batch;
++
++	struct timer_list rx_refill_timer;
++
++	/*
++	 * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries
++	 * are linked from tx_skb_freelist through skb_entry.link.
++	 *
++	 *  NB. Freelist index entries are always going to be less than
++	 *  PAGE_OFFSET, whereas pointers to skbs will always be equal or
++	 *  greater than PAGE_OFFSET: we use this property to distinguish
++	 *  them.
++	 */
++	union skb_entry {
++		struct sk_buff *skb;
++		unsigned link;
++	} tx_skbs[NET_TX_RING_SIZE];;
++	grant_ref_t gref_tx_head;
++	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
++	unsigned tx_skb_freelist;
++
++	struct sk_buff *rx_skbs[NET_RX_RING_SIZE];
++	grant_ref_t gref_rx_head;
++	grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
++
++	struct xenbus_device *xbdev;
++	int tx_ring_ref;
++	int rx_ring_ref;
++
++	unsigned long rx_pfn_array[NET_RX_RING_SIZE];
++	struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
++	struct mmu_update rx_mmu[NET_RX_RING_SIZE];
++};
++
++struct netfront_rx_info {
++	struct xen_netif_rx_response rx;
++	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
++};
++
++/*
++ * Access macros for acquiring freeing slots in tx_skbs[].
++ */
++
++static void add_id_to_freelist(unsigned *head, union skb_entry *list, unsigned short id)
++{
++	list[id].link = *head;
++	*head = id;
++}
++
++static unsigned short get_id_from_freelist(unsigned *head, union skb_entry *list)
++{
++	unsigned int id = *head;
++	*head = list[id].link;
++	return id;
++}
++
++static int xennet_rxidx(RING_IDX idx)
++{
++	return idx & (NET_RX_RING_SIZE - 1);
++}
++
++static struct sk_buff *xennet_get_rx_skb(struct netfront_info *np,
++					 RING_IDX ri)
++{
++	int i = xennet_rxidx(ri);
++	struct sk_buff *skb = np->rx_skbs[i];
++	np->rx_skbs[i] = NULL;
++	return skb;
++}
++
++static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
++					    RING_IDX ri)
++{
++	int i = xennet_rxidx(ri);
++	grant_ref_t ref = np->grant_rx_ref[i];
++	np->grant_rx_ref[i] = GRANT_INVALID_REF;
++	return ref;
++}
++
++#ifdef CONFIG_SYSFS
++static int xennet_sysfs_addif(struct net_device *netdev);
++static void xennet_sysfs_delif(struct net_device *netdev);
++#else /* !CONFIG_SYSFS */
++#define xennet_sysfs_addif(dev) (0)
++#define xennet_sysfs_delif(dev) do { } while(0)
++#endif
++
++static int xennet_can_sg(struct net_device *dev)
++{
++	return dev->features & NETIF_F_SG;
++}
++
++
++static void rx_refill_timeout(unsigned long data)
++{
++	struct net_device *dev = (struct net_device *)data;
++	netif_rx_schedule(dev);
++}
++
++static int netfront_tx_slot_available(struct netfront_info *np)
++{
++	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
++		(TX_MAX_TARGET - MAX_SKB_FRAGS - 2));
++}
++
++static void xennet_maybe_wake_tx(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++
++	if (unlikely(netif_queue_stopped(dev)) &&
++	    netfront_tx_slot_available(np) &&
++	    likely(netif_running(dev)))
++		netif_wake_queue(dev);
++}
++
++static void xennet_alloc_rx_buffers(struct net_device *dev)
++{
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++	struct page *page;
++	int i, batch_target, notify;
++	RING_IDX req_prod = np->rx.req_prod_pvt;
++	struct xen_memory_reservation reservation;
++	grant_ref_t ref;
++	unsigned long pfn;
++	void *vaddr;
++	int nr_flips;
++	struct xen_netif_rx_request *req;
++
++	if (unlikely(!netif_carrier_ok(dev)))
++		return;
++
++	/*
++	 * Allocate skbuffs greedily, even though we batch updates to the
++	 * receive ring. This creates a less bursty demand on the memory
++	 * allocator, so should reduce the chance of failed allocation requests
++	 * both for ourself and for other kernel subsystems.
++	 */
++	batch_target = np->rx_target - (req_prod - np->rx.rsp_cons);
++	for (i = skb_queue_len(&np->rx_batch); i < batch_target; i++) {
++		skb = __netdev_alloc_skb(dev, RX_COPY_THRESHOLD,
++					 GFP_ATOMIC | __GFP_NOWARN);
++		if (unlikely(!skb))
++			goto no_skb;
++
++		page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
++		if (!page) {
++			kfree_skb(skb);
++no_skb:
++			/* Any skbuffs queued for refill? Force them out. */
++			if (i != 0)
++				goto refill;
++			/* Could not allocate any skbuffs. Try again later. */
++			mod_timer(&np->rx_refill_timer,
++				  jiffies + (HZ/10));
++			break;
++		}
++
++		skb_shinfo(skb)->frags[0].page = page;
++		skb_shinfo(skb)->nr_frags = 1;
++		__skb_queue_tail(&np->rx_batch, skb);
++	}
++
++	/* Is the batch large enough to be worthwhile? */
++	if (i < (np->rx_target/2)) {
++		if (req_prod > np->rx.sring->req_prod)
++			goto push;
++		return;
++	}
++
++	/* Adjust our fill target if we risked running out of buffers. */
++	if (((req_prod - np->rx.sring->rsp_prod) < (np->rx_target / 4)) &&
++	    ((np->rx_target *= 2) > np->rx_max_target))
++		np->rx_target = np->rx_max_target;
++
++ refill:
++	for (nr_flips = i = 0; ; i++) {
++		if ((skb = __skb_dequeue(&np->rx_batch)) == NULL)
++			break;
++
++		skb->dev = dev;
++
++		id = xennet_rxidx(req_prod + i);
++
++		BUG_ON(np->rx_skbs[id]);
++		np->rx_skbs[id] = skb;
++
++		ref = gnttab_claim_grant_reference(&np->gref_rx_head);
++		BUG_ON((signed short)ref < 0);
++		np->grant_rx_ref[id] = ref;
++
++		pfn = page_to_pfn(skb_shinfo(skb)->frags[0].page);
++		vaddr = page_address(skb_shinfo(skb)->frags[0].page);
++
++		req = RING_GET_REQUEST(&np->rx, req_prod + i);
++		if (!np->copying_receiver) {
++			gnttab_grant_foreign_transfer_ref(ref,
++							  np->xbdev->otherend_id,
++							  pfn);
++			np->rx_pfn_array[nr_flips] = pfn_to_mfn(pfn);
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				/* Remove this page before passing
++				 * back to Xen. */
++				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
++				MULTI_update_va_mapping(np->rx_mcl+i,
++							(unsigned long)vaddr,
++							__pte(0), 0);
++			}
++			nr_flips++;
++		} else {
++			gnttab_grant_foreign_access_ref(ref,
++							np->xbdev->otherend_id,
++							pfn_to_mfn(pfn),
++							0);
++		}
++
++		req->id = id;
++		req->gref = ref;
++	}
++
++	if (nr_flips != 0) {
++		reservation.extent_start = np->rx_pfn_array;
++		reservation.nr_extents   = nr_flips;
++		reservation.extent_order = 0;
++		reservation.address_bits = 0;
++		reservation.domid        = DOMID_SELF;
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* After all PTEs have been zapped, flush the TLB. */
++			np->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
++				UVMF_TLB_FLUSH|UVMF_ALL;
++
++			/* Give away a batch of pages. */
++			np->rx_mcl[i].op = __HYPERVISOR_memory_op;
++			np->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
++			np->rx_mcl[i].args[1] = (unsigned long)&reservation;
++
++			/* Zap PTEs and give away pages in one big
++			 * multicall. */
++			(void)HYPERVISOR_multicall(np->rx_mcl, i+1);
++
++			/* Check return status of HYPERVISOR_memory_op(). */
++			if (unlikely(np->rx_mcl[i].result != i))
++				panic("Unable to reduce memory reservation\n");
++		} else {
++			if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
++						 &reservation) != i)
++				panic("Unable to reduce memory reservation\n");
++		}
++	} else {
++		wmb();
++	}
++
++	/* Above is a suitable barrier to ensure backend will see requests. */
++	np->rx.req_prod_pvt = req_prod + i;
++ push:
++	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->rx, notify);
++	if (notify)
++		notify_remote_via_irq(np->netdev->irq);
++}
++
++static int xennet_open(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++
++	memset(&np->stats, 0, sizeof(np->stats));
++
++	spin_lock_bh(&np->rx_lock);
++	if (netif_carrier_ok(dev)) {
++		xennet_alloc_rx_buffers(dev);
++		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
++		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++			netif_rx_schedule(dev);
++	}
++	spin_unlock_bh(&np->rx_lock);
++
++	xennet_maybe_wake_tx(dev);
++
++	return 0;
++}
++
++static void xennet_tx_buf_gc(struct net_device *dev)
++{
++	RING_IDX cons, prod;
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++
++	BUG_ON(!netif_carrier_ok(dev));
++
++	do {
++		prod = np->tx.sring->rsp_prod;
++		rmb(); /* Ensure we see responses up to 'rp'. */
++
++		for (cons = np->tx.rsp_cons; cons != prod; cons++) {
++			struct xen_netif_tx_response *txrsp;
++
++			txrsp = RING_GET_RESPONSE(&np->tx, cons);
++			if (txrsp->status == NETIF_RSP_NULL)
++				continue;
++
++			id  = txrsp->id;
++			skb = np->tx_skbs[id].skb;
++			if (unlikely(gnttab_query_foreign_access(
++				np->grant_tx_ref[id]) != 0)) {
++				printk(KERN_ALERT "xennet_tx_buf_gc: warning "
++				       "-- grant still in use by backend "
++				       "domain.\n");
++				BUG();
++			}
++			gnttab_end_foreign_access_ref(
++				np->grant_tx_ref[id], GNTMAP_readonly);
++			gnttab_release_grant_reference(
++				&np->gref_tx_head, np->grant_tx_ref[id]);
++			np->grant_tx_ref[id] = GRANT_INVALID_REF;
++			add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
++			dev_kfree_skb_irq(skb);
++		}
++
++		np->tx.rsp_cons = prod;
++
++		/*
++		 * Set a new event, then check for race with update of tx_cons.
++		 * Note that it is essential to schedule a callback, no matter
++		 * how few buffers are pending. Even if there is space in the
++		 * transmit ring, higher layers may be blocked because too much
++		 * data is outstanding: in such cases notification from Xen is
++		 * likely to be the only kick that we'll get.
++		 */
++		np->tx.sring->rsp_event =
++			prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
++		mb();
++	} while ((cons == prod) && (prod != np->tx.sring->rsp_prod));
++
++	xennet_maybe_wake_tx(dev);
++}
++
++static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev,
++			      struct xen_netif_tx_request *tx)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	char *data = skb->data;
++	unsigned long mfn;
++	RING_IDX prod = np->tx.req_prod_pvt;
++	int frags = skb_shinfo(skb)->nr_frags;
++	unsigned int offset = offset_in_page(data);
++	unsigned int len = skb_headlen(skb);
++	unsigned int id;
++	grant_ref_t ref;
++	int i;
++
++	/* While the header overlaps a page boundary (including being
++	   larger than a page), split it it into page-sized chunks. */
++	while (len > PAGE_SIZE - offset) {
++		tx->size = PAGE_SIZE - offset;
++		tx->flags |= NETTXF_more_data;
++		len -= tx->size;
++		data += tx->size;
++		offset = 0;
++
++		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++		np->tx_skbs[id].skb = skb_get(skb);
++		tx = RING_GET_REQUEST(&np->tx, prod++);
++		tx->id = id;
++		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++		BUG_ON((signed short)ref < 0);
++
++		mfn = virt_to_mfn(data);
++		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++						mfn, GNTMAP_readonly);
++
++		tx->gref = np->grant_tx_ref[id] = ref;
++		tx->offset = offset;
++		tx->size = len;
++		tx->flags = 0;
++	}
++
++	/* Grant backend access to each skb fragment page. */
++	for (i = 0; i < frags; i++) {
++		skb_frag_t *frag = skb_shinfo(skb)->frags + i;
++
++		tx->flags |= NETTXF_more_data;
++
++		id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++		np->tx_skbs[id].skb = skb_get(skb);
++		tx = RING_GET_REQUEST(&np->tx, prod++);
++		tx->id = id;
++		ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++		BUG_ON((signed short)ref < 0);
++
++		mfn = pfn_to_mfn(page_to_pfn(frag->page));
++		gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
++						mfn, GNTMAP_readonly);
++
++		tx->gref = np->grant_tx_ref[id] = ref;
++		tx->offset = frag->page_offset;
++		tx->size = frag->size;
++		tx->flags = 0;
++	}
++
++	np->tx.req_prod_pvt = prod;
++}
++
++static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	unsigned short id;
++	struct netfront_info *np = netdev_priv(dev);
++	struct xen_netif_tx_request *tx;
++	struct xen_netif_extra_info *extra;
++	char *data = skb->data;
++	RING_IDX i;
++	grant_ref_t ref;
++	unsigned long mfn;
++	int notify;
++	int frags = skb_shinfo(skb)->nr_frags;
++	unsigned int offset = offset_in_page(data);
++	unsigned int len = skb_headlen(skb);
++
++	frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
++	if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
++		printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n",
++		       frags);
++		dump_stack();
++		goto drop;
++	}
++
++	spin_lock_irq(&np->tx_lock);
++
++	if (unlikely(!netif_carrier_ok(dev) ||
++		     (frags > 1 && !xennet_can_sg(dev)) ||
++		     netif_needs_gso(dev, skb))) {
++		spin_unlock_irq(&np->tx_lock);
++		goto drop;
++	}
++
++	i = np->tx.req_prod_pvt;
++
++	id = get_id_from_freelist(&np->tx_skb_freelist, np->tx_skbs);
++	np->tx_skbs[id].skb = skb;
++
++	tx = RING_GET_REQUEST(&np->tx, i);
++
++	tx->id   = id;
++	ref = gnttab_claim_grant_reference(&np->gref_tx_head);
++	BUG_ON((signed short)ref < 0);
++	mfn = virt_to_mfn(data);
++	gnttab_grant_foreign_access_ref(
++		ref, np->xbdev->otherend_id, mfn, GNTMAP_readonly);
++	tx->gref = np->grant_tx_ref[id] = ref;
++	tx->offset = offset;
++	tx->size = len;
++	extra = NULL;
++
++ 	tx->flags = 0;
++ 	if (skb->ip_summed == CHECKSUM_PARTIAL)
++ 		/* local packet? */
++ 		tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
++ 	else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
++ 		/* remote but checksummed. */
++ 		tx->flags |= NETTXF_data_validated;
++
++	if (skb_shinfo(skb)->gso_size) {
++		struct xen_netif_extra_info *gso;
++
++		gso = (struct xen_netif_extra_info *)
++			RING_GET_REQUEST(&np->tx, ++i);
++
++		if (extra)
++			extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
++		else
++			tx->flags |= NETTXF_extra_info;
++
++		gso->u.gso.size = skb_shinfo(skb)->gso_size;
++		gso->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
++		gso->u.gso.pad = 0;
++		gso->u.gso.features = 0;
++
++		gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
++		gso->flags = 0;
++		extra = gso;
++	}
++
++	np->tx.req_prod_pvt = i + 1;
++
++	xennet_make_frags(skb, dev, tx);
++	tx->size = skb->len;
++
++	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&np->tx, notify);
++	if (notify)
++		notify_remote_via_irq(np->netdev->irq);
++
++	xennet_tx_buf_gc(dev);
++
++	if (!netfront_tx_slot_available(np))
++		netif_stop_queue(dev);
++
++	spin_unlock_irq(&np->tx_lock);
++
++	np->stats.tx_bytes += skb->len;
++	np->stats.tx_packets++;
++
++	return 0;
++
++ drop:
++	np->stats.tx_dropped++;
++	dev_kfree_skb(skb);
++	return 0;
++}
++
++static int xennet_close(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	netif_stop_queue(np->netdev);
++	return 0;
++}
++
++static struct net_device_stats *xennet_get_stats(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	return &np->stats;
++}
++
++static void xennet_move_rx_slot(struct netfront_info *np, struct sk_buff *skb,
++				grant_ref_t ref)
++{
++	int new = xennet_rxidx(np->rx.req_prod_pvt);
++
++	BUG_ON(np->rx_skbs[new]);
++	np->rx_skbs[new] = skb;
++	np->grant_rx_ref[new] = ref;
++	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
++	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
++	np->rx.req_prod_pvt++;
++}
++
++static int xennet_get_extras(struct netfront_info *np,
++			     struct xen_netif_extra_info *extras,
++			     RING_IDX rp)
++
++{
++	struct xen_netif_extra_info *extra;
++	struct device *dev = &np->netdev->dev;
++	RING_IDX cons = np->rx.rsp_cons;
++	int err = 0;
++
++	do {
++		struct sk_buff *skb;
++		grant_ref_t ref;
++
++		if (unlikely(cons + 1 == rp)) {
++			if (net_ratelimit())
++				dev_warn(dev, "Missing extra info\n");
++			err = -EBADR;
++			break;
++		}
++
++		extra = (struct xen_netif_extra_info *)
++			RING_GET_RESPONSE(&np->rx, ++cons);
++
++		if (unlikely(!extra->type ||
++			     extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
++			if (net_ratelimit())
++				dev_warn(dev, "Invalid extra type: %d\n",
++					extra->type);
++			err = -EINVAL;
++		} else {
++			memcpy(&extras[extra->type - 1], extra,
++			       sizeof(*extra));
++		}
++
++		skb = xennet_get_rx_skb(np, cons);
++		ref = xennet_get_rx_ref(np, cons);
++		xennet_move_rx_slot(np, skb, ref);
++	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
++
++	np->rx.rsp_cons = cons;
++	return err;
++}
++
++static int xennet_get_responses(struct netfront_info *np,
++				struct netfront_rx_info *rinfo, RING_IDX rp,
++				struct sk_buff_head *list,
++				int *pages_flipped_p)
++{
++	int pages_flipped = *pages_flipped_p;
++	struct mmu_update *mmu;
++	struct multicall_entry *mcl;
++	struct xen_netif_rx_response *rx = &rinfo->rx;
++	struct xen_netif_extra_info *extras = rinfo->extras;
++	struct device *dev = &np->netdev->dev;
++	RING_IDX cons = np->rx.rsp_cons;
++	struct sk_buff *skb = xennet_get_rx_skb(np, cons);
++	grant_ref_t ref = xennet_get_rx_ref(np, cons);
++	int max = MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD);
++	int frags = 1;
++	int err = 0;
++	unsigned long ret;
++
++	if (rx->flags & NETRXF_extra_info) {
++		err = xennet_get_extras(np, extras, rp);
++		cons = np->rx.rsp_cons;
++	}
++
++	for (;;) {
++		unsigned long mfn;
++
++		if (unlikely(rx->status < 0 ||
++			     rx->offset + rx->status > PAGE_SIZE)) {
++			if (net_ratelimit())
++				dev_warn(dev, "rx->offset: %x, size: %u\n",
++					 rx->offset, rx->status);
++			xennet_move_rx_slot(np, skb, ref);
++			err = -EINVAL;
++			goto next;
++		}
++
++		/*
++		 * This definitely indicates a bug, either in this driver or in
++		 * the backend driver. In future this should flag the bad
++		 * situation to the system controller to reboot the backed.
++		 */
++		if (ref == GRANT_INVALID_REF) {
++			if (net_ratelimit())
++				dev_warn(dev, "Bad rx response id %d.\n",
++					 rx->id);
++			err = -EINVAL;
++			goto next;
++		}
++
++		if (!np->copying_receiver) {
++			/* Memory pressure, insufficient buffer
++			 * headroom, ... */
++			mfn = gnttab_end_foreign_transfer_ref(ref);
++			if (!mfn) {
++				if (net_ratelimit())
++					dev_warn(dev, "Unfulfilled rx req "
++						 "(id=%d, st=%d).\n",
++						 rx->id, rx->status);
++				xennet_move_rx_slot(np, skb, ref);
++				err = -ENOMEM;
++				goto next;
++			}
++
++			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++				/* Remap the page. */
++				struct page *page =
++					skb_shinfo(skb)->frags[0].page;
++				unsigned long pfn = page_to_pfn(page);
++				void *vaddr = page_address(page);
++
++				mcl = np->rx_mcl + pages_flipped;
++				mmu = np->rx_mmu + pages_flipped;
++
++				MULTI_update_va_mapping(mcl,
++							(unsigned long)vaddr,
++							mfn_pte(mfn, PAGE_KERNEL),
++							0);
++				mmu->ptr = ((u64)mfn << PAGE_SHIFT)
++					| MMU_MACHPHYS_UPDATE;
++				mmu->val = pfn;
++
++				set_phys_to_machine(pfn, mfn);
++			}
++			pages_flipped++;
++		} else {
++			ret = gnttab_end_foreign_access_ref(ref, 0);
++			BUG_ON(!ret);
++		}
++
++		gnttab_release_grant_reference(&np->gref_rx_head, ref);
++
++		__skb_queue_tail(list, skb);
++
++next:
++		if (!(rx->flags & NETRXF_more_data))
++			break;
++
++		if (cons + frags == rp) {
++			if (net_ratelimit())
++				dev_warn(dev, "Need more frags\n");
++			err = -ENOENT;
++			break;
++		}
++
++		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
++		skb = xennet_get_rx_skb(np, cons + frags);
++		ref = xennet_get_rx_ref(np, cons + frags);
++		frags++;
++	}
++
++	if (unlikely(frags > max)) {
++		if (net_ratelimit())
++			dev_warn(dev, "Too many frags\n");
++		err = -E2BIG;
++	}
++
++	if (unlikely(err))
++		np->rx.rsp_cons = cons + frags;
++
++	*pages_flipped_p = pages_flipped;
++
++	return err;
++}
++
++static int xennet_set_skb_gso(struct sk_buff *skb,
++			      struct xen_netif_extra_info *gso)
++{
++	if (!gso->u.gso.size) {
++		if (net_ratelimit())
++			printk(KERN_WARNING "GSO size must not be zero.\n");
++		return -EINVAL;
++	}
++
++	/* Currently only TCPv4 S.O. is supported. */
++	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
++		if (net_ratelimit())
++			printk(KERN_WARNING "Bad GSO type %d.\n", gso->u.gso.type);
++		return -EINVAL;
++	}
++
++	skb_shinfo(skb)->gso_size = gso->u.gso.size;
++	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
++
++	/* Header must be checked, and gso_segs computed. */
++	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
++	skb_shinfo(skb)->gso_segs = 0;
++
++	return 0;
++}
++
++static RING_IDX xennet_fill_frags(struct netfront_info *np,
++				  struct sk_buff *skb,
++				  struct sk_buff_head *list)
++{
++	struct skb_shared_info *shinfo = skb_shinfo(skb);
++	int nr_frags = shinfo->nr_frags;
++	RING_IDX cons = np->rx.rsp_cons;
++	skb_frag_t *frag = shinfo->frags + nr_frags;
++	struct sk_buff *nskb;
++
++	while ((nskb = __skb_dequeue(list))) {
++		struct xen_netif_rx_response *rx =
++			RING_GET_RESPONSE(&np->rx, ++cons);
++
++		frag->page = skb_shinfo(nskb)->frags[0].page;
++		frag->page_offset = rx->offset;
++		frag->size = rx->status;
++
++		skb->data_len += rx->status;
++
++		skb_shinfo(nskb)->nr_frags = 0;
++		kfree_skb(nskb);
++
++		frag++;
++		nr_frags++;
++	}
++
++	shinfo->nr_frags = nr_frags;
++	return cons;
++}
++
++static int skb_checksum_setup(struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	unsigned char *th;
++	int err = -EPROTO;
++
++	if (skb->protocol != htons(ETH_P_IP))
++		goto out;
++
++	iph = (void *)skb->data;
++	th = skb->data + 4 * iph->ihl;
++	if (th >= skb_tail_pointer(skb))
++		goto out;
++
++	skb->csum_start = th - skb->head;
++	switch (iph->protocol) {
++	case IPPROTO_TCP:
++		skb->csum_offset = offsetof(struct tcphdr, check);
++		break;
++	case IPPROTO_UDP:
++		skb->csum_offset = offsetof(struct udphdr, check);
++		break;
++	default:
++		if (net_ratelimit())
++			printk(KERN_ERR "Attempting to checksum a non-"
++			       "TCP/UDP packet, dropping a protocol"
++			       " %d packet", iph->protocol);
++		goto out;
++	}
++
++	if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb))
++		goto out;
++
++	err = 0;
++
++out:
++	return err;
++}
++
++static int handle_incoming_queue(struct net_device *dev,
++				  struct sk_buff_head *rxq)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	int packets_dropped = 0;
++	struct sk_buff *skb;
++
++	while ((skb = __skb_dequeue(rxq)) != NULL) {
++		struct page *page = NETFRONT_SKB_CB(skb)->page;
++		void *vaddr = page_address(page);
++		unsigned offset = NETFRONT_SKB_CB(skb)->offset;
++
++		memcpy(skb->data, vaddr + offset,
++		       skb_headlen(skb));
++
++		if (page != skb_shinfo(skb)->frags[0].page)
++			__free_page(page);
++
++		/* Ethernet work: Delayed to here as it peeks the header. */
++		skb->protocol = eth_type_trans(skb, dev);
++
++		if (skb->ip_summed == CHECKSUM_PARTIAL) {
++			if (skb_checksum_setup(skb)) {
++				kfree_skb(skb);
++				packets_dropped++;
++				np->stats.rx_errors++;
++				continue;
++			}
++		}
++
++		np->stats.rx_packets++;
++		np->stats.rx_bytes += skb->len;
++
++		/* Pass it up. */
++		netif_receive_skb(skb);
++		dev->last_rx = jiffies;
++	}
++
++	return packets_dropped;
++}
++
++static int xennet_poll(struct net_device *dev, int *pbudget)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	struct sk_buff *skb;
++	struct netfront_rx_info rinfo;
++	struct xen_netif_rx_response *rx = &rinfo.rx;
++	struct xen_netif_extra_info *extras = rinfo.extras;
++	RING_IDX i, rp;
++	struct multicall_entry *mcl;
++	int work_done, budget, more_to_do = 1;
++	struct sk_buff_head rxq;
++	struct sk_buff_head errq;
++	struct sk_buff_head tmpq;
++	unsigned long flags;
++	unsigned int len;
++	int pages_flipped = 0;
++	int err;
++
++	spin_lock(&np->rx_lock);
++
++	if (unlikely(!netif_carrier_ok(dev))) {
++		spin_unlock(&np->rx_lock);
++		return 0;
++	}
++
++	skb_queue_head_init(&rxq);
++	skb_queue_head_init(&errq);
++	skb_queue_head_init(&tmpq);
++
++	if ((budget = *pbudget) > dev->quota)
++		budget = dev->quota;
++	rp = np->rx.sring->rsp_prod;
++	rmb(); /* Ensure we see queued responses up to 'rp'. */
++
++	i = np->rx.rsp_cons;
++	work_done = 0;
++	while ((i != rp) && (work_done < budget)) {
++		memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
++		memset(extras, 0, sizeof(rinfo.extras));
++
++		err = xennet_get_responses(np, &rinfo, rp, &tmpq,
++					   &pages_flipped);
++
++		if (unlikely(err)) {
++err:
++			while ((skb = __skb_dequeue(&tmpq)))
++				__skb_queue_tail(&errq, skb);
++			np->stats.rx_errors++;
++			i = np->rx.rsp_cons;
++			continue;
++		}
++
++		skb = __skb_dequeue(&tmpq);
++
++		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
++			struct xen_netif_extra_info *gso;
++			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
++
++			if (unlikely(xennet_set_skb_gso(skb, gso))) {
++				__skb_queue_head(&tmpq, skb);
++				np->rx.rsp_cons += skb_queue_len(&tmpq);
++				goto err;
++			}
++		}
++
++		NETFRONT_SKB_CB(skb)->page = skb_shinfo(skb)->frags[0].page;
++		NETFRONT_SKB_CB(skb)->offset = rx->offset;
++
++		len = rx->status;
++		if (len > RX_COPY_THRESHOLD)
++			len = RX_COPY_THRESHOLD;
++		skb_put(skb, len);
++
++		if (rx->status > len) {
++			skb_shinfo(skb)->frags[0].page_offset =
++				rx->offset + len;
++			skb_shinfo(skb)->frags[0].size = rx->status - len;
++			skb->data_len = rx->status - len;
++		} else {
++			skb_shinfo(skb)->frags[0].page = NULL;
++			skb_shinfo(skb)->nr_frags = 0;
++		}
++
++		i = xennet_fill_frags(np, skb, &tmpq);
++
++		/*
++		 * Truesize approximates the size of true data plus
++		 * any supervisor overheads. Adding hypervisor
++		 * overheads has been shown to significantly reduce
++		 * achievable bandwidth with the default receive
++		 * buffer size. It is therefore not wise to account
++		 * for it here.
++		 *
++		 * After alloc_skb(RX_COPY_THRESHOLD), truesize is set
++		 * to RX_COPY_THRESHOLD + the supervisor
++		 * overheads. Here, we add the size of the data pulled
++		 * in xennet_fill_frags().
++		 *
++		 * We also adjust for any unused space in the main
++		 * data area by subtracting (RX_COPY_THRESHOLD -
++		 * len). This is especially important with drivers
++		 * which split incoming packets into header and data,
++		 * using only 66 bytes of the main data area (see the
++		 * e1000 driver for example.)  On such systems,
++		 * without this last adjustement, our achievable
++		 * receive throughout using the standard receive
++		 * buffer size was cut by 25%(!!!).
++		 */
++		skb->truesize += skb->data_len - (RX_COPY_THRESHOLD - len);
++		skb->len += skb->data_len;
++
++		if (rx->flags & NETRXF_csum_blank)
++			skb->ip_summed = CHECKSUM_PARTIAL;
++		else if (rx->flags & NETRXF_data_validated)
++			skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++		__skb_queue_tail(&rxq, skb);
++
++		np->rx.rsp_cons = ++i;
++		work_done++;
++	}
++
++	if (pages_flipped) {
++		/* Do all the remapping work, and M2P updates. */
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			mcl = np->rx_mcl + pages_flipped;
++			MULTI_mmu_update(mcl, np->rx_mmu,
++					 pages_flipped, 0, DOMID_SELF);
++			(void)HYPERVISOR_multicall(np->rx_mcl,
++						   pages_flipped + 1);
++		}
++	}
++
++	while ((skb = __skb_dequeue(&errq)))
++		kfree_skb(skb);
++
++	work_done -= handle_incoming_queue(dev, &rxq);
++
++	/* If we get a callback with very few responses, reduce fill target. */
++	/* NB. Note exponential increase, linear decrease. */
++	if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) >
++	     ((3*np->rx_target) / 4)) &&
++	    (--np->rx_target < np->rx_min_target))
++		np->rx_target = np->rx_min_target;
++
++	xennet_alloc_rx_buffers(dev);
++
++	*pbudget   -= work_done;
++	dev->quota -= work_done;
++
++	if (work_done < budget) {
++		local_irq_save(flags);
++
++		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
++		if (!more_to_do)
++			__netif_rx_complete(dev);
++
++		local_irq_restore(flags);
++	}
++
++	spin_unlock(&np->rx_lock);
++
++	return more_to_do;
++}
++
++static int xennet_change_mtu(struct net_device *dev, int mtu)
++{
++	int max = xennet_can_sg(dev) ? 65535 - ETH_HLEN : ETH_DATA_LEN;
++
++	if (mtu > max)
++		return -EINVAL;
++	dev->mtu = mtu;
++	return 0;
++}
++
++static void xennet_release_tx_bufs(struct netfront_info *np)
++{
++	struct sk_buff *skb;
++	int i;
++
++	for (i = 0; i < NET_TX_RING_SIZE; i++) {
++		/* Skip over entries which are actually freelist references */
++		if ((unsigned long)np->tx_skbs[i].skb < PAGE_OFFSET)
++			continue;
++
++		skb = np->tx_skbs[i].skb;
++		gnttab_end_foreign_access_ref(np->grant_tx_ref[i],
++					      GNTMAP_readonly);
++		gnttab_release_grant_reference(&np->gref_tx_head,
++					       np->grant_tx_ref[i]);
++		np->grant_tx_ref[i] = GRANT_INVALID_REF;
++		add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, i);
++		dev_kfree_skb_irq(skb);
++	}
++}
++
++static void xennet_release_rx_bufs(struct netfront_info *np)
++{
++	struct mmu_update      *mmu = np->rx_mmu;
++	struct multicall_entry *mcl = np->rx_mcl;
++	struct sk_buff_head free_list;
++	struct sk_buff *skb;
++	unsigned long mfn;
++	int xfer = 0, noxfer = 0, unused = 0;
++	int id, ref;
++
++	if (np->copying_receiver) {
++		dev_warn(&np->netdev->dev, "%s: fix me for copying receiver.\n",
++			 __func__);
++		return;
++	}
++
++	skb_queue_head_init(&free_list);
++
++	spin_lock_bh(&np->rx_lock);
++
++	for (id = 0; id < NET_RX_RING_SIZE; id++) {
++		if ((ref = np->grant_rx_ref[id]) == GRANT_INVALID_REF) {
++			unused++;
++			continue;
++		}
++
++		skb = np->rx_skbs[id];
++		mfn = gnttab_end_foreign_transfer_ref(ref);
++		gnttab_release_grant_reference(&np->gref_rx_head, ref);
++		np->grant_rx_ref[id] = GRANT_INVALID_REF;
++
++		if (0 == mfn) {
++			skb_shinfo(skb)->nr_frags = 0;
++			dev_kfree_skb(skb);
++			noxfer++;
++			continue;
++		}
++
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* Remap the page. */
++			struct page *page = skb_shinfo(skb)->frags[0].page;
++			unsigned long pfn = page_to_pfn(page);
++			void *vaddr = page_address(page);
++
++			MULTI_update_va_mapping(mcl, (unsigned long)vaddr,
++						mfn_pte(mfn, PAGE_KERNEL),
++						0);
++			mcl++;
++			mmu->ptr = ((u64)mfn << PAGE_SHIFT)
++				| MMU_MACHPHYS_UPDATE;
++			mmu->val = pfn;
++			mmu++;
++
++			set_phys_to_machine(pfn, mfn);
++		}
++		__skb_queue_tail(&free_list, skb);
++		xfer++;
++	}
++
++	dev_info(&np->netdev->dev, "%s: %d xfer, %d noxfer, %d unused\n",
++		 __func__, xfer, noxfer, unused);
++
++	if (xfer) {
++		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
++			/* Do all the remapping work and M2P updates. */
++			MULTI_mmu_update(mcl, np->rx_mmu, mmu - np->rx_mmu,
++					 0, DOMID_SELF);
++			mcl++;
++			HYPERVISOR_multicall(np->rx_mcl, mcl - np->rx_mcl);
++		}
++	}
++
++	while ((skb = __skb_dequeue(&free_list)) != NULL)
++		dev_kfree_skb(skb);
++
++	spin_unlock_bh(&np->rx_lock);
++}
++
++static void xennet_uninit(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	xennet_release_tx_bufs(np);
++	xennet_release_rx_bufs(np);
++	gnttab_free_grant_references(np->gref_tx_head);
++	gnttab_free_grant_references(np->gref_rx_head);
++}
++
++static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev)
++{
++	int i, err;
++	struct net_device *netdev;
++	struct netfront_info *np;
++
++	netdev = alloc_etherdev(sizeof(struct netfront_info));
++	if (!netdev) {
++		printk(KERN_WARNING "%s> alloc_etherdev failed.\n",
++		       __func__);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	np                   = netdev_priv(netdev);
++	np->xbdev            = dev;
++
++	spin_lock_init(&np->tx_lock);
++	spin_lock_init(&np->rx_lock);
++
++	skb_queue_head_init(&np->rx_batch);
++	np->rx_target     = RX_DFL_MIN_TARGET;
++	np->rx_min_target = RX_DFL_MIN_TARGET;
++	np->rx_max_target = RX_MAX_TARGET;
++
++	init_timer(&np->rx_refill_timer);
++	np->rx_refill_timer.data = (unsigned long)netdev;
++	np->rx_refill_timer.function = rx_refill_timeout;
++
++	/* Initialise tx_skbs as a free chain containing every entry. */
++	np->tx_skb_freelist = 0;
++	for (i = 0; i < NET_TX_RING_SIZE; i++) {
++		np->tx_skbs[i].link = i+1;
++		np->grant_tx_ref[i] = GRANT_INVALID_REF;
++	}
++
++	/* Clear out rx_skbs */
++	for (i = 0; i < NET_RX_RING_SIZE; i++) {
++		np->rx_skbs[i] = NULL;
++		np->grant_rx_ref[i] = GRANT_INVALID_REF;
++	}
++
++	/* A grant for every tx ring slot */
++	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
++					  &np->gref_tx_head) < 0) {
++		printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n");
++		err = -ENOMEM;
++		goto exit;
++	}
++	/* A grant for every rx ring slot */
++	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
++					  &np->gref_rx_head) < 0) {
++		printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n");
++		err = -ENOMEM;
++		goto exit_free_tx;
++	}
++
++	netdev->open            = xennet_open;
++	netdev->hard_start_xmit = xennet_start_xmit;
++	netdev->stop            = xennet_close;
++	netdev->get_stats       = xennet_get_stats;
++	netdev->poll            = xennet_poll;
++	netdev->uninit          = xennet_uninit;
++	netdev->change_mtu	= xennet_change_mtu;
++	netdev->weight          = 64;
++	netdev->features        = NETIF_F_IP_CSUM;
++
++	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
++	SET_MODULE_OWNER(netdev);
++	SET_NETDEV_DEV(netdev, &dev->dev);
++
++	np->netdev = netdev;
++
++	netif_carrier_off(netdev);
++
++	return netdev;
++
++ exit_free_tx:
++	gnttab_free_grant_references(np->gref_tx_head);
++ exit:
++	free_netdev(netdev);
++	return ERR_PTR(err);
++}
++
++/**
++ * Entry point to this code when a new device is created.  Allocate the basic
++ * structures and the ring buffers for communication with the backend, and
++ * inform the backend of the appropriate details for those.
++ */
++static int __devinit netfront_probe(struct xenbus_device *dev,
++				    const struct xenbus_device_id *id)
++{
++	int err;
++	struct net_device *netdev;
++	struct netfront_info *info;
++
++	netdev = xennet_create_dev(dev);
++	if (IS_ERR(netdev)) {
++		err = PTR_ERR(netdev);
++		xenbus_dev_fatal(dev, err, "creating netdev");
++		return err;
++	}
++
++	info = netdev_priv(netdev);
++	dev->dev.driver_data = info;
++
++	err = register_netdev(info->netdev);
++	if (err) {
++		printk(KERN_WARNING "%s: register_netdev err=%d\n",
++		       __func__, err);
++		goto fail;
++	}
++
++	err = xennet_sysfs_addif(info->netdev);
++	if (err) {
++		unregister_netdev(info->netdev);
++		printk(KERN_WARNING "%s: add sysfs failed err=%d\n",
++		       __func__, err);
++		goto fail;
++	}
++
++	return 0;
++
++ fail:
++	free_netdev(netdev);
++	dev->dev.driver_data = NULL;
++	return err;
++}
++
++static void xennet_end_access(int ref, void *page)
++{
++	/* This frees the page as a side-effect */
++	if (ref != GRANT_INVALID_REF)
++		gnttab_end_foreign_access(ref, 0, (unsigned long)page);
++}
++
++static void xennet_disconnect_backend(struct netfront_info *info)
++{
++	/* Stop old i/f to prevent errors whilst we rebuild the state. */
++	spin_lock_bh(&info->rx_lock);
++	spin_lock_irq(&info->tx_lock);
++	netif_carrier_off(info->netdev);
++	spin_unlock_irq(&info->tx_lock);
++	spin_unlock_bh(&info->rx_lock);
++
++	if (info->netdev->irq)
++		unbind_from_irqhandler(info->netdev->irq, info->netdev);
++	info->evtchn = info->netdev->irq = 0;
++
++	/* End access and free the pages */
++	xennet_end_access(info->tx_ring_ref, info->tx.sring);
++	xennet_end_access(info->rx_ring_ref, info->rx.sring);
++
++	info->tx_ring_ref = GRANT_INVALID_REF;
++	info->rx_ring_ref = GRANT_INVALID_REF;
++	info->tx.sring = NULL;
++	info->rx.sring = NULL;
++}
++
++/**
++ * We are reconnecting to the backend, due to a suspend/resume, or a backend
++ * driver restart.  We tear down our netif structure and recreate it, but
++ * leave the device-layer structures intact so that this is transparent to the
++ * rest of the kernel.
++ */
++static int netfront_resume(struct xenbus_device *dev)
++{
++	struct netfront_info *info = dev->dev.driver_data;
++
++	dev_dbg(&dev->dev, "%s\n", dev->nodename);
++
++	xennet_disconnect_backend(info);
++	return 0;
++}
++
++static int xen_net_read_mac(struct xenbus_device *dev, u8 mac[])
++{
++	char *s, *e, *macstr;
++	int i;
++
++	macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL);
++	if (IS_ERR(macstr))
++		return PTR_ERR(macstr);
++
++	for (i = 0; i < ETH_ALEN; i++) {
++		mac[i] = simple_strtoul(s, &e, 16);
++		if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) {
++			kfree(macstr);
++			return -ENOENT;
++		}
++		s = e+1;
++	}
++
++	kfree(macstr);
++	return 0;
++}
++
++static irqreturn_t xennet_interrupt(int irq, void *dev_id)
++{
++	struct net_device *dev = dev_id;
++	struct netfront_info *np = netdev_priv(dev);
++	unsigned long flags;
++
++	spin_lock_irqsave(&np->tx_lock, flags);
++
++	if (likely(netif_carrier_ok(dev))) {
++		xennet_tx_buf_gc(dev);
++		/* Under tx_lock: protects access to rx shared-ring indexes. */
++		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
++			netif_rx_schedule(dev);
++	}
++
++	spin_unlock_irqrestore(&np->tx_lock, flags);
++
++	return IRQ_HANDLED;
++}
++
++static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info)
++{
++	struct xen_netif_tx_sring *txs;
++	struct xen_netif_rx_sring *rxs;
++	int err;
++	struct net_device *netdev = info->netdev;
++
++	info->tx_ring_ref = GRANT_INVALID_REF;
++	info->rx_ring_ref = GRANT_INVALID_REF;
++	info->rx.sring = NULL;
++	info->tx.sring = NULL;
++	netdev->irq = 0;
++
++	err = xen_net_read_mac(dev, netdev->dev_addr);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "parsing %s/mac", dev->nodename);
++		goto fail;
++	}
++
++	txs = (struct xen_netif_tx_sring *)get_zeroed_page(GFP_KERNEL);
++	if (!txs) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err, "allocating tx ring page");
++		goto fail;
++	}
++	SHARED_RING_INIT(txs);
++	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
++
++	err = xenbus_grant_ring(dev, virt_to_mfn(txs));
++	if (err < 0) {
++		free_page((unsigned long)txs);
++		goto fail;
++	}
++
++	info->tx_ring_ref = err;
++	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_KERNEL);
++	if (!rxs) {
++		err = -ENOMEM;
++		xenbus_dev_fatal(dev, err, "allocating rx ring page");
++		goto fail;
++	}
++	SHARED_RING_INIT(rxs);
++	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
++
++	err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
++	if (err < 0) {
++		free_page((unsigned long)rxs);
++		goto fail;
++	}
++	info->rx_ring_ref = err;
++
++	err = xenbus_alloc_evtchn(dev, &info->evtchn);
++	if (err)
++		goto fail;
++
++	err = bind_evtchn_to_irqhandler(info->evtchn, xennet_interrupt,
++					IRQF_SAMPLE_RANDOM, netdev->name,
++					netdev);
++	if (err < 0)
++		goto fail;
++	netdev->irq = err;
++	return 0;
++
++ fail:
++	return err;
++}
++
++/* Common code used when first setting up, and when resuming. */
++static int talk_to_backend(struct xenbus_device *dev,
++			   struct netfront_info *info)
++{
++	const char *message;
++	struct xenbus_transaction xbt;
++	int err;
++
++	/* Create shared ring, alloc event channel. */
++	err = setup_netfront(dev, info);
++	if (err)
++		goto out;
++
++again:
++	err = xenbus_transaction_start(&xbt);
++	if (err) {
++		xenbus_dev_fatal(dev, err, "starting transaction");
++		goto destroy_ring;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "tx-ring-ref","%u",
++			    info->tx_ring_ref);
++	if (err) {
++		message = "writing tx ring-ref";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename, "rx-ring-ref","%u",
++			    info->rx_ring_ref);
++	if (err) {
++		message = "writing rx ring-ref";
++		goto abort_transaction;
++	}
++	err = xenbus_printf(xbt, dev->nodename,
++			    "event-channel", "%u", info->evtchn);
++	if (err) {
++		message = "writing event-channel";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "request-rx-copy", "%u",
++			    info->copying_receiver);
++	if (err) {
++		message = "writing request-rx-copy";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-rx-notify", "%d", 1);
++	if (err) {
++		message = "writing feature-rx-notify";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", 1);
++	if (err) {
++		message = "writing feature-sg";
++		goto abort_transaction;
++	}
++
++	err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4", "%d", 1);
++	if (err) {
++		message = "writing feature-gso-tcpv4";
++		goto abort_transaction;
++	}
++
++	err = xenbus_transaction_end(xbt, 0);
++	if (err) {
++		if (err == -EAGAIN)
++			goto again;
++		xenbus_dev_fatal(dev, err, "completing transaction");
++		goto destroy_ring;
++	}
++
++	return 0;
++
++ abort_transaction:
++	xenbus_transaction_end(xbt, 1);
++	xenbus_dev_fatal(dev, err, "%s", message);
++ destroy_ring:
++	xennet_disconnect_backend(info);
++ out:
++	return err;
++}
++
++static int xennet_set_sg(struct net_device *dev, u32 data)
++{
++	if (data) {
++		struct netfront_info *np = netdev_priv(dev);
++		int val;
++
++		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend, "feature-sg",
++				 "%d", &val) < 0)
++			val = 0;
++		if (!val)
++			return -ENOSYS;
++	} else if (dev->mtu > ETH_DATA_LEN)
++		dev->mtu = ETH_DATA_LEN;
++
++	return ethtool_op_set_sg(dev, data);
++}
++
++static int xennet_set_tso(struct net_device *dev, u32 data)
++{
++	if (data) {
++		struct netfront_info *np = netdev_priv(dev);
++		int val;
++
++		if (xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++				 "feature-gso-tcpv4", "%d", &val) < 0)
++			val = 0;
++		if (!val)
++			return -ENOSYS;
++	}
++
++	return ethtool_op_set_tso(dev, data);
++}
++
++static void xennet_set_features(struct net_device *dev)
++{
++	/* Turn off all GSO bits except ROBUST. */
++	dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
++	dev->features |= NETIF_F_GSO_ROBUST;
++	xennet_set_sg(dev, 0);
++
++	/* We need checksum offload to enable scatter/gather and TSO. */
++	if (!(dev->features & NETIF_F_IP_CSUM))
++		return;
++
++	if (!xennet_set_sg(dev, 1))
++		xennet_set_tso(dev, 1);
++}
++
++static int xennet_connect(struct net_device *dev)
++{
++	struct netfront_info *np = netdev_priv(dev);
++	int i, requeue_idx, err;
++	struct sk_buff *skb;
++	grant_ref_t ref;
++	struct xen_netif_rx_request *req;
++	unsigned int feature_rx_copy, feature_rx_flip;
++
++	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++			   "feature-rx-copy", "%u", &feature_rx_copy);
++	if (err != 1)
++		feature_rx_copy = 0;
++
++	err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
++			   "feature-rx-flip", "%u", &feature_rx_flip);
++	/* Flip is the default, since it was once the only mode of
++	   operation. */
++	if (err != 1)
++		feature_rx_flip = 1;
++
++	/*
++	 * Copy packets on receive path if:
++	 *  (a) This was requested by user, and the backend supports it; or
++	 *  (b) Flipping was requested, but this is unsupported by the backend.
++	 */
++	np->copying_receiver = (((rx_mode == RX_COPY) && feature_rx_copy) ||
++				((rx_mode == RX_FLIP) && !feature_rx_flip));
++
++	err = talk_to_backend(np->xbdev, np);
++	if (err)
++		return err;
++
++	xennet_set_features(dev);
++
++	dev_info(&dev->dev, "has %s receive path.\n",
++		 np->copying_receiver ? "copying" : "flipping");
++
++	spin_lock_bh(&np->rx_lock);
++	spin_lock_irq(&np->tx_lock);
++
++	/* Step 1: Discard all pending TX packet fragments. */
++	xennet_release_tx_bufs(np);
++
++	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
++	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
++		if (!np->rx_skbs[i])
++			continue;
++
++		skb = np->rx_skbs[requeue_idx] = xennet_get_rx_skb(np, i);
++		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
++		req = RING_GET_REQUEST(&np->rx, requeue_idx);
++
++		if (!np->copying_receiver) {
++			gnttab_grant_foreign_transfer_ref(
++				ref, np->xbdev->otherend_id,
++				page_to_pfn(skb_shinfo(skb)->frags->page));
++		} else {
++			gnttab_grant_foreign_access_ref(
++				ref, np->xbdev->otherend_id,
++				pfn_to_mfn(page_to_pfn(skb_shinfo(skb)->
++						       frags->page)),
++				0);
++		}
++		req->gref = ref;
++		req->id   = requeue_idx;
++
++		requeue_idx++;
++	}
++
++	np->rx.req_prod_pvt = requeue_idx;
++
++	/*
++	 * Step 3: All public and private state should now be sane.  Get
++	 * ready to start sending and receiving packets and give the driver
++	 * domain a kick because we've probably just requeued some
++	 * packets.
++	 */
++	netif_carrier_on(np->netdev);
++	notify_remote_via_irq(np->netdev->irq);
++	xennet_tx_buf_gc(dev);
++	xennet_alloc_rx_buffers(dev);
++
++	spin_unlock_irq(&np->tx_lock);
++	spin_unlock_bh(&np->rx_lock);
++
++	return 0;
++}
++
++/**
++ * Callback received when the backend's state changes.
++ */
++static void backend_changed(struct xenbus_device *dev,
++			    enum xenbus_state backend_state)
++{
++	struct netfront_info *np = dev->dev.driver_data;
++	struct net_device *netdev = np->netdev;
++
++	dev_dbg(&dev->dev, "%s\n", xenbus_strstate(backend_state));
++
++	switch (backend_state) {
++	case XenbusStateInitialising:
++	case XenbusStateInitialised:
++	case XenbusStateConnected:
++	case XenbusStateUnknown:
++	case XenbusStateClosed:
++		break;
++
++	case XenbusStateInitWait:
++		if (dev->state != XenbusStateInitialising)
++			break;
++		if (xennet_connect(netdev) != 0)
++			break;
++		xenbus_switch_state(dev, XenbusStateConnected);
++		break;
++
++	case XenbusStateClosing:
++		xenbus_frontend_closed(dev);
++		break;
++	}
++}
++
++static struct ethtool_ops xennet_ethtool_ops =
++{
++	.get_tx_csum = ethtool_op_get_tx_csum,
++	.set_tx_csum = ethtool_op_set_tx_csum,
++	.get_sg = ethtool_op_get_sg,
++	.set_sg = xennet_set_sg,
++	.get_tso = ethtool_op_get_tso,
++	.set_tso = xennet_set_tso,
++	.get_link = ethtool_op_get_link,
++};
++
++#ifdef CONFIG_SYSFS
++static ssize_t show_rxbuf_min(struct device *dev,
++			      struct device_attribute *attr, char *buf)
++{
++	struct net_device *netdev = to_net_dev(dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_min_target);
++}
++
++static ssize_t store_rxbuf_min(struct device *dev,
++			       struct device_attribute *attr,
++			       const char *buf, size_t len)
++{
++	struct net_device *netdev = to_net_dev(dev);
++	struct netfront_info *np = netdev_priv(netdev);
++	char *endp;
++	unsigned long target;
++
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	target = simple_strtoul(buf, &endp, 0);
++	if (endp == buf)
++		return -EBADMSG;
++
++	if (target < RX_MIN_TARGET)
++		target = RX_MIN_TARGET;
++	if (target > RX_MAX_TARGET)
++		target = RX_MAX_TARGET;
++
++	spin_lock_bh(&np->rx_lock);
++	if (target > np->rx_max_target)
++		np->rx_max_target = target;
++	np->rx_min_target = target;
++	if (target > np->rx_target)
++		np->rx_target = target;
++
++	xennet_alloc_rx_buffers(netdev);
++
++	spin_unlock_bh(&np->rx_lock);
++	return len;
++}
++
++static ssize_t show_rxbuf_max(struct device *dev,
++			      struct device_attribute *attr, char *buf)
++{
++	struct net_device *netdev = to_net_dev(dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_max_target);
++}
++
++static ssize_t store_rxbuf_max(struct device *dev,
++			       struct device_attribute *attr,
++			       const char *buf, size_t len)
++{
++	struct net_device *netdev = to_net_dev(dev);
++	struct netfront_info *np = netdev_priv(netdev);
++	char *endp;
++	unsigned long target;
++
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	target = simple_strtoul(buf, &endp, 0);
++	if (endp == buf)
++		return -EBADMSG;
++
++	if (target < RX_MIN_TARGET)
++		target = RX_MIN_TARGET;
++	if (target > RX_MAX_TARGET)
++		target = RX_MAX_TARGET;
++
++	spin_lock_bh(&np->rx_lock);
++	if (target < np->rx_min_target)
++		np->rx_min_target = target;
++	np->rx_max_target = target;
++	if (target < np->rx_target)
++		np->rx_target = target;
++
++	xennet_alloc_rx_buffers(netdev);
++
++	spin_unlock_bh(&np->rx_lock);
++	return len;
++}
++
++static ssize_t show_rxbuf_cur(struct device *dev,
++			      struct device_attribute *attr, char *buf)
++{
++	struct net_device *netdev = to_net_dev(dev);
++	struct netfront_info *info = netdev_priv(netdev);
++
++	return sprintf(buf, "%u\n", info->rx_target);
++}
++
++static struct device_attribute xennet_attrs[] = {
++	__ATTR(rxbuf_min, S_IRUGO|S_IWUSR, show_rxbuf_min, store_rxbuf_min),
++	__ATTR(rxbuf_max, S_IRUGO|S_IWUSR, show_rxbuf_max, store_rxbuf_max),
++	__ATTR(rxbuf_cur, S_IRUGO, show_rxbuf_cur, NULL),
++};
++
++static int xennet_sysfs_addif(struct net_device *netdev)
++{
++	int i;
++	int err;
++
++	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++) {
++		err = device_create_file(&netdev->dev,
++					   &xennet_attrs[i]);
++		if (err)
++			goto fail;
++	}
++	return 0;
++
++ fail:
++	while (--i >= 0)
++		device_remove_file(&netdev->dev, &xennet_attrs[i]);
++	return err;
++}
++
++static void xennet_sysfs_delif(struct net_device *netdev)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(xennet_attrs); i++)
++		device_remove_file(&netdev->dev, &xennet_attrs[i]);
++}
++
++#endif /* CONFIG_SYSFS */
++
++static struct xenbus_device_id netfront_ids[] = {
++	{ "vif" },
++	{ "" }
++};
++
++
++static int __devexit xennet_remove(struct xenbus_device *dev)
++{
++	struct netfront_info *info = dev->dev.driver_data;
++
++	dev_dbg(&dev->dev, "%s\n", dev->nodename);
++
++	unregister_netdev(info->netdev);
++
++	xennet_disconnect_backend(info);
++
++	del_timer_sync(&info->rx_refill_timer);
++
++	xennet_sysfs_delif(info->netdev);
++
++	free_netdev(info->netdev);
++
++	return 0;
++}
++
++static struct xenbus_driver netfront = {
++	.name = "vif",
++	.owner = THIS_MODULE,
++	.ids = netfront_ids,
++	.probe = netfront_probe,
++	.remove = __devexit_p(xennet_remove),
++	.resume = netfront_resume,
++	.otherend_changed = backend_changed,
++};
++
++static int __init netif_init(void)
++{
++	if (!is_running_on_xen())
++		return -ENODEV;
++
++	if (is_initial_xendomain())
++		return 0;
++
++	printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
++
++	return xenbus_register_frontend(&netfront);
++}
++module_init(netif_init);
++
++
++static void __exit netif_exit(void)
++{
++	if (is_initial_xendomain())
++		return;
++
++	return xenbus_unregister_driver(&netfront);
++}
++module_exit(netif_exit);
++
++MODULE_DESCRIPTION("Xen virtual network device frontend");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/net/yellowfin.c linux-2.6.22-try2/drivers/net/yellowfin.c
+--- linux-2.6.22-570/drivers/net/yellowfin.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/net/yellowfin.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1137,7 +1137,7 @@
+ 				if (skb == NULL)
+ 					break;
+ 				skb_reserve(skb, 2);	/* 16 byte align the IP header */
+-				eth_copy_and_sum(skb, rx_skb->data, pkt_len, 0);
++				skb_copy_to_linear_data(skb, rx_skb->data, pkt_len);
+ 				skb_put(skb, pkt_len);
+ 				pci_dma_sync_single_for_device(yp->pci_dev, desc->addr,
+ 											   yp->rx_buf_sz,
+diff -Nurb linux-2.6.22-570/drivers/parisc/pdc_stable.c linux-2.6.22-try2/drivers/parisc/pdc_stable.c
+--- linux-2.6.22-570/drivers/parisc/pdc_stable.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/parisc/pdc_stable.c	2007-12-19 15:29:22.000000000 -0500
+@@ -121,14 +121,14 @@
+ 
+ #define PDCS_ATTR(_name, _mode, _show, _store) \
+ struct subsys_attribute pdcs_attr_##_name = { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++	.attr = {.name = __stringify(_name), .mode = _mode}, \
+ 	.show = _show, \
+ 	.store = _store, \
+ };
+ 
+ #define PATHS_ATTR(_name, _mode, _show, _store) \
+ struct pdcspath_attribute paths_attr_##_name = { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE}, \
++	.attr = {.name = __stringify(_name), .mode = _mode}, \
+ 	.show = _show, \
+ 	.store = _store, \
+ };
+diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c linux-2.6.22-try2/drivers/pci/hotplug/acpiphp_ibm.c
+--- linux-2.6.22-570/drivers/pci/hotplug/acpiphp_ibm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pci/hotplug/acpiphp_ibm.c	2007-12-19 15:29:22.000000000 -0500
+@@ -106,6 +106,7 @@
+ static void ibm_handle_events(acpi_handle handle, u32 event, void *context);
+ static int ibm_get_table_from_acpi(char **bufp);
+ static ssize_t ibm_read_apci_table(struct kobject *kobj,
++				   struct bin_attribute *bin_attr,
+ 		char *buffer, loff_t pos, size_t size);
+ static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
+ 		u32 lvl, void *context, void **rv);
+@@ -117,7 +118,6 @@
+ static struct bin_attribute ibm_apci_table_attr = {
+ 	    .attr = {
+ 		    .name = "apci_table",
+-		    .owner = THIS_MODULE,
+ 		    .mode = S_IRUGO,
+ 	    },
+ 	    .read = ibm_read_apci_table,
+@@ -358,6 +358,7 @@
+  * our solution is to only allow reading the table in all at once
+  **/
+ static ssize_t ibm_read_apci_table(struct kobject *kobj,
++				   struct bin_attribute *bin_attr,
+ 		char *buffer, loff_t pos, size_t size)
+ {
+ 	int bytes_read = -EINVAL;
+diff -Nurb linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c linux-2.6.22-try2/drivers/pci/hotplug/rpadlpar_core.c
+--- linux-2.6.22-570/drivers/pci/hotplug/rpadlpar_core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pci/hotplug/rpadlpar_core.c	2007-12-19 15:29:22.000000000 -0500
+@@ -159,8 +159,8 @@
+ 	/* Claim new bus resources */
+ 	pcibios_claim_one_bus(dev->bus);
+ 
+-	/* ioremap() for child bus, which may or may not succeed */
+-	remap_bus_range(dev->subordinate);
++	/* Map IO space for child bus, which may or may not succeed */
++	pcibios_map_io_space(dev->subordinate);
+ 
+ 	/* Add new devices to global lists.  Register in proc, sysfs. */
+ 	pci_bus_add_devices(phb->bus);
+@@ -390,7 +390,7 @@
+ 	} else
+ 		pcibios_remove_pci_devices(bus);
+ 
+-	if (unmap_bus_range(bus)) {
++	if (pcibios_unmap_io_space(bus)) {
+ 		printk(KERN_ERR "%s: failed to unmap bus range\n",
+ 			__FUNCTION__);
+ 		return -ERANGE;
+diff -Nurb linux-2.6.22-570/drivers/pci/pci-sysfs.c linux-2.6.22-try2/drivers/pci/pci-sysfs.c
+--- linux-2.6.22-570/drivers/pci/pci-sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pci/pci-sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -213,7 +213,8 @@
+ };
+ 
+ static ssize_t
+-pci_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++		char *buf, loff_t off, size_t count)
+ {
+ 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+ 	unsigned int size = 64;
+@@ -285,7 +286,8 @@
+ }
+ 
+ static ssize_t
+-pci_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++		 char *buf, loff_t off, size_t count)
+ {
+ 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
+ 	unsigned int size = count;
+@@ -352,7 +354,8 @@
+  * callback routine (pci_legacy_read).
+  */
+ ssize_t
+-pci_read_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
++		   char *buf, loff_t off, size_t count)
+ {
+         struct pci_bus *bus = to_pci_bus(container_of(kobj,
+                                                       struct class_device,
+@@ -376,7 +379,8 @@
+  * callback routine (pci_legacy_write).
+  */
+ ssize_t
+-pci_write_legacy_io(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
++		    char *buf, loff_t off, size_t count)
+ {
+         struct pci_bus *bus = to_pci_bus(container_of(kobj,
+ 						      struct class_device,
+@@ -499,7 +503,6 @@
+ 			sprintf(res_attr_name, "resource%d", i);
+ 			res_attr->attr.name = res_attr_name;
+ 			res_attr->attr.mode = S_IRUSR | S_IWUSR;
+-			res_attr->attr.owner = THIS_MODULE;
+ 			res_attr->size = pci_resource_len(pdev, i);
+ 			res_attr->mmap = pci_mmap_resource;
+ 			res_attr->private = &pdev->resource[i];
+@@ -529,7 +532,8 @@
+  * writing anything except 0 enables it
+  */
+ static ssize_t
+-pci_write_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
++	      char *buf, loff_t off, size_t count)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
+ 
+@@ -552,7 +556,8 @@
+  * device corresponding to @kobj.
+  */
+ static ssize_t
+-pci_read_rom(struct kobject *kobj, char *buf, loff_t off, size_t count)
++pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
++	     char *buf, loff_t off, size_t count)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
+ 	void __iomem *rom;
+@@ -582,7 +587,6 @@
+ 	.attr =	{
+ 		.name = "config",
+ 		.mode = S_IRUGO | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 256,
+ 	.read = pci_read_config,
+@@ -593,7 +597,6 @@
+ 	.attr =	{
+ 		.name = "config",
+ 		.mode = S_IRUGO | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 4096,
+ 	.read = pci_read_config,
+@@ -628,7 +631,6 @@
+ 			rom_attr->size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+ 			rom_attr->attr.name = "rom";
+ 			rom_attr->attr.mode = S_IRUSR;
+-			rom_attr->attr.owner = THIS_MODULE;
+ 			rom_attr->read = pci_read_rom;
+ 			rom_attr->write = pci_write_rom;
+ 			retval = sysfs_create_bin_file(&pdev->dev.kobj, rom_attr);
+diff -Nurb linux-2.6.22-570/drivers/pci/probe.c linux-2.6.22-try2/drivers/pci/probe.c
+--- linux-2.6.22-570/drivers/pci/probe.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/pci/probe.c	2007-12-19 15:29:22.000000000 -0500
+@@ -39,7 +39,6 @@
+ 		b->legacy_io->attr.name = "legacy_io";
+ 		b->legacy_io->size = 0xffff;
+ 		b->legacy_io->attr.mode = S_IRUSR | S_IWUSR;
+-		b->legacy_io->attr.owner = THIS_MODULE;
+ 		b->legacy_io->read = pci_read_legacy_io;
+ 		b->legacy_io->write = pci_write_legacy_io;
+ 		class_device_create_bin_file(&b->class_dev, b->legacy_io);
+@@ -49,7 +48,6 @@
+ 		b->legacy_mem->attr.name = "legacy_mem";
+ 		b->legacy_mem->size = 1024*1024;
+ 		b->legacy_mem->attr.mode = S_IRUSR | S_IWUSR;
+-		b->legacy_mem->attr.owner = THIS_MODULE;
+ 		b->legacy_mem->mmap = pci_mmap_legacy_mem;
+ 		class_device_create_bin_file(&b->class_dev, b->legacy_mem);
+ 	}
+diff -Nurb linux-2.6.22-570/drivers/pcmcia/cs.c linux-2.6.22-try2/drivers/pcmcia/cs.c
+--- linux-2.6.22-570/drivers/pcmcia/cs.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/pcmcia/cs.c	2007-12-19 15:29:24.000000000 -0500
+@@ -654,6 +654,7 @@
+ 	add_wait_queue(&skt->thread_wait, &wait);
+ 	complete(&skt->thread_done);
+ 
++	set_freezable();
+ 	for (;;) {
+ 		unsigned long flags;
+ 		unsigned int events;
+diff -Nurb linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c linux-2.6.22-try2/drivers/pcmcia/socket_sysfs.c
+--- linux-2.6.22-570/drivers/pcmcia/socket_sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pcmcia/socket_sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -283,7 +283,9 @@
+ 	return (ret);
+ }
+ 
+-static ssize_t pccard_show_cis(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t pccard_show_cis(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	unsigned int size = 0x200;
+ 
+@@ -311,7 +313,9 @@
+ 	return (count);
+ }
+ 
+-static ssize_t pccard_store_cis(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t pccard_store_cis(struct kobject *kobj,
++				struct bin_attribute *bin_attr,
++				char *buf, loff_t off, size_t count)
+ {
+ 	struct pcmcia_socket *s = to_socket(container_of(kobj, struct device, kobj));
+ 	cisdump_t *cis;
+@@ -366,7 +370,7 @@
+ };
+ 
+ static struct bin_attribute pccard_cis_attr = {
+-	.attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR, .owner = THIS_MODULE},
++	.attr = { .name = "cis", .mode = S_IRUGO | S_IWUSR },
+ 	.size = 0x200,
+ 	.read = pccard_show_cis,
+ 	.write = pccard_store_cis,
+diff -Nurb linux-2.6.22-570/drivers/pnp/driver.c linux-2.6.22-try2/drivers/pnp/driver.c
+--- linux-2.6.22-570/drivers/pnp/driver.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pnp/driver.c	2007-12-19 15:29:22.000000000 -0500
+@@ -167,6 +167,8 @@
+ 	    		return error;
+ 	}
+ 
++	if (pnp_dev->protocol && pnp_dev->protocol->suspend)
++		pnp_dev->protocol->suspend(pnp_dev, state);
+ 	return 0;
+ }
+ 
+@@ -179,6 +181,9 @@
+ 	if (!pnp_drv)
+ 		return 0;
+ 
++	if (pnp_dev->protocol && pnp_dev->protocol->resume)
++		pnp_dev->protocol->resume(pnp_dev);
++
+ 	if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) {
+ 		error = pnp_start_dev(pnp_dev);
+ 		if (error)
+diff -Nurb linux-2.6.22-570/drivers/pnp/pnpacpi/core.c linux-2.6.22-try2/drivers/pnp/pnpacpi/core.c
+--- linux-2.6.22-570/drivers/pnp/pnpacpi/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pnp/pnpacpi/core.c	2007-12-19 15:29:22.000000000 -0500
+@@ -119,11 +119,23 @@
+ 	return ACPI_FAILURE(status) ? -ENODEV : 0;
+ }
+ 
++static int pnpacpi_suspend(struct pnp_dev *dev, pm_message_t state)
++{
++	return acpi_bus_set_power((acpi_handle)dev->data, 3);
++}
++
++static int pnpacpi_resume(struct pnp_dev *dev)
++{
++	return acpi_bus_set_power((acpi_handle)dev->data, 0);
++}
++
+ static struct pnp_protocol pnpacpi_protocol = {
+ 	.name	= "Plug and Play ACPI",
+ 	.get	= pnpacpi_get_resources,
+ 	.set	= pnpacpi_set_resources,
+ 	.disable = pnpacpi_disable_resources,
++	.suspend = pnpacpi_suspend,
++	.resume = pnpacpi_resume,
+ };
+ 
+ static int __init pnpacpi_add_device(struct acpi_device *device)
+diff -Nurb linux-2.6.22-570/drivers/pnp/pnpbios/core.c linux-2.6.22-try2/drivers/pnp/pnpbios/core.c
+--- linux-2.6.22-570/drivers/pnp/pnpbios/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/pnp/pnpbios/core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -147,7 +147,7 @@
+ 		info->location_id, info->serial, info->capabilities);
+ 	envp[i] = NULL;
+ 	
+-	value = call_usermodehelper (argv [0], argv, envp, 0);
++	value = call_usermodehelper (argv [0], argv, envp, UMH_WAIT_EXEC);
+ 	kfree (buf);
+ 	kfree (envp);
+ 	return 0;
+@@ -160,6 +160,7 @@
+ {
+ 	static struct pnp_docking_station_info now;
+ 	int docked = -1, d = 0;
++	set_freezable();
+ 	while (!unloading)
+ 	{
+ 		int status;
+diff -Nurb linux-2.6.22-570/drivers/rapidio/rio-sysfs.c linux-2.6.22-try2/drivers/rapidio/rio-sysfs.c
+--- linux-2.6.22-570/drivers/rapidio/rio-sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/rapidio/rio-sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -67,7 +67,8 @@
+ };
+ 
+ static ssize_t
+-rio_read_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++		char *buf, loff_t off, size_t count)
+ {
+ 	struct rio_dev *dev =
+ 	    to_rio_dev(container_of(kobj, struct device, kobj));
+@@ -137,7 +138,8 @@
+ }
+ 
+ static ssize_t
+-rio_write_config(struct kobject *kobj, char *buf, loff_t off, size_t count)
++rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
++		 char *buf, loff_t off, size_t count)
+ {
+ 	struct rio_dev *dev =
+ 	    to_rio_dev(container_of(kobj, struct device, kobj));
+@@ -197,7 +199,6 @@
+ 	.attr = {
+ 		 .name = "config",
+ 		 .mode = S_IRUGO | S_IWUSR,
+-		 .owner = THIS_MODULE,
+ 		 },
+ 	.size = 0x200000,
+ 	.read = rio_read_config,
+diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1553.c linux-2.6.22-try2/drivers/rtc/rtc-ds1553.c
+--- linux-2.6.22-570/drivers/rtc/rtc-ds1553.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/rtc/rtc-ds1553.c	2007-12-19 15:29:22.000000000 -0500
+@@ -258,8 +258,9 @@
+ 	.ioctl		= ds1553_rtc_ioctl,
+ };
+ 
+-static ssize_t ds1553_nvram_read(struct kobject *kobj, char *buf,
+-				 loff_t pos, size_t size)
++static ssize_t ds1553_nvram_read(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t pos, size_t size)
+ {
+ 	struct platform_device *pdev =
+ 		to_platform_device(container_of(kobj, struct device, kobj));
+@@ -272,8 +273,9 @@
+ 	return count;
+ }
+ 
+-static ssize_t ds1553_nvram_write(struct kobject *kobj, char *buf,
+-				  loff_t pos, size_t size)
++static ssize_t ds1553_nvram_write(struct kobject *kobj,
++				  struct bin_attribute *bin_attr,
++				  char *buf, loff_t pos, size_t size)
+ {
+ 	struct platform_device *pdev =
+ 		to_platform_device(container_of(kobj, struct device, kobj));
+@@ -290,7 +292,6 @@
+ 	.attr = {
+ 		.name = "nvram",
+ 		.mode = S_IRUGO | S_IWUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = RTC_OFFSET,
+ 	.read = ds1553_nvram_read,
+diff -Nurb linux-2.6.22-570/drivers/rtc/rtc-ds1742.c linux-2.6.22-try2/drivers/rtc/rtc-ds1742.c
+--- linux-2.6.22-570/drivers/rtc/rtc-ds1742.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/rtc/rtc-ds1742.c	2007-12-19 15:29:22.000000000 -0500
+@@ -127,8 +127,9 @@
+ 	.set_time	= ds1742_rtc_set_time,
+ };
+ 
+-static ssize_t ds1742_nvram_read(struct kobject *kobj, char *buf,
+-				 loff_t pos, size_t size)
++static ssize_t ds1742_nvram_read(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t pos, size_t size)
+ {
+ 	struct platform_device *pdev =
+ 		to_platform_device(container_of(kobj, struct device, kobj));
+@@ -141,8 +142,9 @@
+ 	return count;
+ }
+ 
+-static ssize_t ds1742_nvram_write(struct kobject *kobj, char *buf,
+-				  loff_t pos, size_t size)
++static ssize_t ds1742_nvram_write(struct kobject *kobj,
++				  struct bin_attribute *bin_attr,
++				  char *buf, loff_t pos, size_t size)
+ {
+ 	struct platform_device *pdev =
+ 		to_platform_device(container_of(kobj, struct device, kobj));
+@@ -159,7 +161,6 @@
+ 	.attr = {
+ 		.name = "nvram",
+ 		.mode = S_IRUGO | S_IWUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.read = ds1742_nvram_read,
+ 	.write = ds1742_nvram_write,
+diff -Nurb linux-2.6.22-570/drivers/s390/cio/chp.c linux-2.6.22-try2/drivers/s390/cio/chp.c
+--- linux-2.6.22-570/drivers/s390/cio/chp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/s390/cio/chp.c	2007-12-19 15:29:22.000000000 -0500
+@@ -141,8 +141,9 @@
+ /*
+  * Channel measurement related functions
+  */
+-static ssize_t chp_measurement_chars_read(struct kobject *kobj, char *buf,
+-					  loff_t off, size_t count)
++static ssize_t chp_measurement_chars_read(struct kobject *kobj,
++					  struct bin_attribute *bin_attr,
++					  char *buf, loff_t off, size_t count)
+ {
+ 	struct channel_path *chp;
+ 	unsigned int size;
+@@ -165,7 +166,6 @@
+ 	.attr = {
+ 		.name = "measurement_chars",
+ 		.mode = S_IRUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = sizeof(struct cmg_chars),
+ 	.read = chp_measurement_chars_read,
+@@ -193,8 +193,9 @@
+ 	} while (reference_buf.values[0] != buf->values[0]);
+ }
+ 
+-static ssize_t chp_measurement_read(struct kobject *kobj, char *buf,
+-				    loff_t off, size_t count)
++static ssize_t chp_measurement_read(struct kobject *kobj,
++				    struct bin_attribute *bin_attr,
++				    char *buf, loff_t off, size_t count)
+ {
+ 	struct channel_path *chp;
+ 	struct channel_subsystem *css;
+@@ -217,7 +218,6 @@
+ 	.attr = {
+ 		.name = "measurement",
+ 		.mode = S_IRUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = sizeof(struct cmg_entry),
+ 	.read = chp_measurement_read,
+diff -Nurb linux-2.6.22-570/drivers/s390/net/qeth_sys.c linux-2.6.22-try2/drivers/s390/net/qeth_sys.c
+--- linux-2.6.22-570/drivers/s390/net/qeth_sys.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/s390/net/qeth_sys.c	2007-12-19 15:29:22.000000000 -0500
+@@ -991,7 +991,7 @@
+ 
+ #define QETH_DEVICE_ATTR(_id,_name,_mode,_show,_store)			     \
+ struct device_attribute dev_attr_##_id = {				     \
+-	.attr = {.name=__stringify(_name), .mode=_mode, .owner=THIS_MODULE },\
++	.attr = {.name=__stringify(_name), .mode=_mode, },\
+ 	.show	= _show,						     \
+ 	.store	= _store,						     \
+ };
+diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c linux-2.6.22-try2/drivers/s390/scsi/zfcp_aux.c
+--- linux-2.6.22-570/drivers/s390/scsi/zfcp_aux.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/s390/scsi/zfcp_aux.c	2007-12-19 15:29:23.000000000 -0500
+@@ -815,9 +815,7 @@
+ struct zfcp_unit *
+ zfcp_unit_enqueue(struct zfcp_port *port, fcp_lun_t fcp_lun)
+ {
+-	struct zfcp_unit *unit, *tmp_unit;
+-	unsigned int scsi_lun;
+-	int found;
++	struct zfcp_unit *unit;
+ 
+ 	/*
+ 	 * check that there is no unit with this FCP_LUN already in list
+@@ -863,21 +861,9 @@
+ 	}
+ 
+ 	zfcp_unit_get(unit);
++	unit->scsi_lun = scsilun_to_int((struct scsi_lun *)&unit->fcp_lun);
+ 
+-	scsi_lun = 0;
+-	found = 0;
+ 	write_lock_irq(&zfcp_data.config_lock);
+-	list_for_each_entry(tmp_unit, &port->unit_list_head, list) {
+-		if (tmp_unit->scsi_lun != scsi_lun) {
+-			found = 1;
+-			break;
+-		}
+-		scsi_lun++;
+-	}
+-	unit->scsi_lun = scsi_lun;
+-	if (found)
+-		list_add_tail(&unit->list, &tmp_unit->list);
+-	else
+ 		list_add_tail(&unit->list, &port->unit_list_head);
+ 	atomic_clear_mask(ZFCP_STATUS_COMMON_REMOVE, &unit->status);
+ 	atomic_set_mask(ZFCP_STATUS_COMMON_RUNNING, &unit->status);
+diff -Nurb linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c linux-2.6.22-try2/drivers/s390/scsi/zfcp_erp.c
+--- linux-2.6.22-570/drivers/s390/scsi/zfcp_erp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/s390/scsi/zfcp_erp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1986,6 +1986,10 @@
+  failed_openfcp:
+ 	zfcp_close_fsf(erp_action->adapter);
+  failed_qdio:
++	atomic_clear_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK |
++			  ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED |
++			  ZFCP_STATUS_ADAPTER_XPORT_OK,
++			  &erp_action->adapter->status);
+  out:
+ 	return retval;
+ }
+@@ -2167,6 +2171,9 @@
+ 		sleep *= 2;
+ 	}
+ 
++	atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT,
++			  &adapter->status);
++
+ 	if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK,
+ 			      &adapter->status)) {
+ 		ZFCP_LOG_INFO("error: exchange of configuration data for "
+diff -Nurb linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c linux-2.6.22-try2/drivers/sbus/char/bbc_envctrl.c
+--- linux-2.6.22-570/drivers/sbus/char/bbc_envctrl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/sbus/char/bbc_envctrl.c	2007-12-19 15:29:23.000000000 -0500
+@@ -7,6 +7,7 @@
+ #include <linux/kthread.h>
+ #include <linux/delay.h>
+ #include <linux/kmod.h>
++#include <linux/reboot.h>
+ #include <asm/oplib.h>
+ #include <asm/ebus.h>
+ 
+@@ -170,8 +171,6 @@
+ static void do_envctrl_shutdown(struct bbc_cpu_temperature *tp)
+ {
+ 	static int shutting_down = 0;
+-	static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+-	char *argv[] = { "/sbin/shutdown", "-h", "now", NULL };
+ 	char *type = "???";
+ 	s8 val = -1;
+ 
+@@ -195,7 +194,7 @@
+ 	printk(KERN_CRIT "kenvctrld: Shutting down the system now.\n");
+ 
+ 	shutting_down = 1;
+-	if (call_usermodehelper("/sbin/shutdown", argv, envp, 0) < 0)
++	if (orderly_poweroff(true) < 0)
+ 		printk(KERN_CRIT "envctrl: shutdown execution failed\n");
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/sbus/char/envctrl.c linux-2.6.22-try2/drivers/sbus/char/envctrl.c
+--- linux-2.6.22-570/drivers/sbus/char/envctrl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/sbus/char/envctrl.c	2007-12-19 15:29:23.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <linux/ioport.h>
+ #include <linux/miscdevice.h>
+ #include <linux/kmod.h>
++#include <linux/reboot.h>
+ 
+ #include <asm/ebus.h>
+ #include <asm/uaccess.h>
+@@ -966,10 +967,6 @@
+ static void envctrl_do_shutdown(void)
+ {
+ 	static int inprog = 0;
+-	static char *envp[] = {	
+-		"HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
+-	char *argv[] = { 
+-		"/sbin/shutdown", "-h", "now", NULL };	
+ 	int ret;
+ 
+ 	if (inprog != 0)
+@@ -977,7 +974,7 @@
+ 
+ 	inprog = 1;
+ 	printk(KERN_CRIT "kenvctrld: WARNING: Shutting down the system now.\n");
+-	ret = call_usermodehelper("/sbin/shutdown", argv, envp, 0);
++	ret = orderly_poweroff(true);
+ 	if (ret < 0) {
+ 		printk(KERN_CRIT "kenvctrld: WARNING: system shutdown failed!\n"); 
+ 		inprog = 0;  /* unlikely to succeed, but we could try again */
+diff -Nurb linux-2.6.22-570/drivers/scsi/3w-9xxx.c linux-2.6.22-try2/drivers/scsi/3w-9xxx.c
+--- linux-2.6.22-570/drivers/scsi/3w-9xxx.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/3w-9xxx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1307,22 +1307,26 @@
+ 					wake_up(&tw_dev->ioctl_wqueue);
+ 				}
+ 			} else {
++				struct scsi_cmnd *cmd;
++
++				cmd = tw_dev->srb[request_id];
++
+ 				twa_scsiop_execute_scsi_complete(tw_dev, request_id);
+ 				/* If no error command was a success */
+ 				if (error == 0) {
+-					tw_dev->srb[request_id]->result = (DID_OK << 16);
++					cmd->result = (DID_OK << 16);
+ 				}
+ 
+ 				/* If error, command failed */
+ 				if (error == 1) {
+ 					/* Ask for a host reset */
+-					tw_dev->srb[request_id]->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
++					cmd->result = (DID_OK << 16) | (CHECK_CONDITION << 1);
+ 				}
+ 
+ 				/* Report residual bytes for single sgl */
+-				if ((tw_dev->srb[request_id]->use_sg <= 1) && (full_command_packet->command.newcommand.status == 0)) {
+-					if (full_command_packet->command.newcommand.sg_list[0].length < tw_dev->srb[request_id]->request_bufflen)
+-						tw_dev->srb[request_id]->resid = tw_dev->srb[request_id]->request_bufflen - full_command_packet->command.newcommand.sg_list[0].length;
++				if ((scsi_sg_count(cmd) <= 1) && (full_command_packet->command.newcommand.status == 0)) {
++					if (full_command_packet->command.newcommand.sg_list[0].length < scsi_bufflen(tw_dev->srb[request_id]))
++						scsi_set_resid(cmd, scsi_bufflen(cmd) - full_command_packet->command.newcommand.sg_list[0].length);
+ 				}
+ 
+ 				/* Now complete the io */
+@@ -1385,52 +1389,20 @@
+ {
+ 	int use_sg;
+ 	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+-	struct pci_dev *pdev = tw_dev->tw_pci_dev;
+-	int retval = 0;
+-
+-	if (cmd->use_sg == 0)
+-		goto out;
+-
+-	use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+ 
+-	if (use_sg == 0) {
++	use_sg = scsi_dma_map(cmd);
++	if (!use_sg)
++		return 0;
++	else if (use_sg < 0) {
+ 		TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1c, "Failed to map scatter gather list");
+-		goto out;
++		return 0;
+ 	}
+ 
+ 	cmd->SCp.phase = TW_PHASE_SGLIST;
+ 	cmd->SCp.have_data_in = use_sg;
+-	retval = use_sg;
+-out:
+-	return retval;
+-} /* End twa_map_scsi_sg_data() */
+-
+-/* This function will perform a pci-dma map for a single buffer */
+-static dma_addr_t twa_map_scsi_single_data(TW_Device_Extension *tw_dev, int request_id)
+-{
+-	dma_addr_t mapping;
+-	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+-	struct pci_dev *pdev = tw_dev->tw_pci_dev;
+-	dma_addr_t retval = 0;
+-
+-	if (cmd->request_bufflen == 0) {
+-		retval = 0;
+-		goto out;
+-	}
+-
+-	mapping = pci_map_single(pdev, cmd->request_buffer, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-
+-	if (mapping == 0) {
+-		TW_PRINTK(tw_dev->host, TW_DRIVER, 0x1d, "Failed to map page");
+-		goto out;
+-	}
+ 
+-	cmd->SCp.phase = TW_PHASE_SINGLE;
+-	cmd->SCp.have_data_in = mapping;
+-	retval = mapping;
+-out:
+-	return retval;
+-} /* End twa_map_scsi_single_data() */
++	return use_sg;
++} /* End twa_map_scsi_sg_data() */
+ 
+ /* This function will poll for a response interrupt of a request */
+ static int twa_poll_response(TW_Device_Extension *tw_dev, int request_id, int seconds)
+@@ -1816,15 +1788,13 @@
+ 	u32 num_sectors = 0x0;
+ 	int i, sg_count;
+ 	struct scsi_cmnd *srb = NULL;
+-	struct scatterlist *sglist = NULL;
+-	dma_addr_t buffaddr = 0x0;
++	struct scatterlist *sglist = NULL, *sg;
+ 	int retval = 1;
+ 
+ 	if (tw_dev->srb[request_id]) {
+-		if (tw_dev->srb[request_id]->request_buffer) {
+-			sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
+-		}
+ 		srb = tw_dev->srb[request_id];
++		if (scsi_sglist(srb))
++			sglist = scsi_sglist(srb);
+ 	}
+ 
+ 	/* Initialize command packet */
+@@ -1857,32 +1827,12 @@
+ 
+ 	if (!sglistarg) {
+ 		/* Map sglist from scsi layer to cmd packet */
+-		if (tw_dev->srb[request_id]->use_sg == 0) {
+-			if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH) {
+-				command_packet->sg_list[0].address = TW_CPU_TO_SGL(tw_dev->generic_buffer_phys[request_id]);
+-				command_packet->sg_list[0].length = cpu_to_le32(TW_MIN_SGL_LENGTH);
+-				if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)
+-					memcpy(tw_dev->generic_buffer_virt[request_id], tw_dev->srb[request_id]->request_buffer, tw_dev->srb[request_id]->request_bufflen);
+-			} else {
+-				buffaddr = twa_map_scsi_single_data(tw_dev, request_id);
+-				if (buffaddr == 0)
+-					goto out;
+-
+-				command_packet->sg_list[0].address = TW_CPU_TO_SGL(buffaddr);
+-				command_packet->sg_list[0].length = cpu_to_le32(tw_dev->srb[request_id]->request_bufflen);
+-			}
+-			command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), 1));
+ 
+-			if (command_packet->sg_list[0].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+-				TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2d, "Found unaligned address during execute scsi");
+-				goto out;
+-			}
+-		}
+-
+-		if (tw_dev->srb[request_id]->use_sg > 0) {
+-			if ((tw_dev->srb[request_id]->use_sg == 1) && (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH)) {
+-				if (tw_dev->srb[request_id]->sc_data_direction == DMA_TO_DEVICE || tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL) {
+-					struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++		if (scsi_sg_count(srb)) {
++			if ((scsi_sg_count(srb) == 1) &&
++			    (scsi_bufflen(srb) < TW_MIN_SGL_LENGTH)) {
++				if (srb->sc_data_direction == DMA_TO_DEVICE || srb->sc_data_direction == DMA_BIDIRECTIONAL) {
++					struct scatterlist *sg = scsi_sglist(srb);
+ 					char *buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 					memcpy(tw_dev->generic_buffer_virt[request_id], buf, sg->length);
+ 					kunmap_atomic(buf - sg->offset, KM_IRQ0);
+@@ -1894,16 +1844,16 @@
+ 				if (sg_count == 0)
+ 					goto out;
+ 
+-				for (i = 0; i < sg_count; i++) {
+-					command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(&sglist[i]));
+-					command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(&sglist[i]));
++				scsi_for_each_sg(srb, sg, sg_count, i) {
++					command_packet->sg_list[i].address = TW_CPU_TO_SGL(sg_dma_address(sg));
++					command_packet->sg_list[i].length = cpu_to_le32(sg_dma_len(sg));
+ 					if (command_packet->sg_list[i].address & TW_CPU_TO_SGL(TW_ALIGNMENT_9000_SGL)) {
+ 						TW_PRINTK(tw_dev->host, TW_DRIVER, 0x2e, "Found unaligned sgl address during execute scsi");
+ 						goto out;
+ 					}
+ 				}
+ 			}
+-			command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), tw_dev->srb[request_id]->use_sg));
++			command_packet->sgl_entries__lunh = cpu_to_le16(TW_REQ_LUN_IN((srb->device->lun >> 4), scsi_sg_count(tw_dev->srb[request_id])));
+ 		}
+ 	} else {
+ 		/* Internal cdb post */
+@@ -1933,7 +1883,7 @@
+ 
+ 	/* Update SG statistics */
+ 	if (srb) {
+-		tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg;
++		tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]);
+ 		if (tw_dev->sgl_entries > tw_dev->max_sgl_entries)
+ 			tw_dev->max_sgl_entries = tw_dev->sgl_entries;
+ 	}
+@@ -1952,16 +1902,13 @@
+ /* This function completes an execute scsi operation */
+ static void twa_scsiop_execute_scsi_complete(TW_Device_Extension *tw_dev, int request_id)
+ {
+-	if (tw_dev->srb[request_id]->request_bufflen < TW_MIN_SGL_LENGTH &&
+-	    (tw_dev->srb[request_id]->sc_data_direction == DMA_FROM_DEVICE ||
+-	     tw_dev->srb[request_id]->sc_data_direction == DMA_BIDIRECTIONAL)) {
+-		if (tw_dev->srb[request_id]->use_sg == 0) {
+-			memcpy(tw_dev->srb[request_id]->request_buffer,
+-			       tw_dev->generic_buffer_virt[request_id],
+-			       tw_dev->srb[request_id]->request_bufflen);
+-		}
+-		if (tw_dev->srb[request_id]->use_sg == 1) {
+-			struct scatterlist *sg = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
++	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
++
++	if (scsi_bufflen(cmd) < TW_MIN_SGL_LENGTH &&
++	    (cmd->sc_data_direction == DMA_FROM_DEVICE ||
++	     cmd->sc_data_direction == DMA_BIDIRECTIONAL)) {
++		if (scsi_sg_count(cmd) == 1) {
++			struct scatterlist *sg = scsi_sglist(tw_dev->srb[request_id]);
+ 			char *buf;
+ 			unsigned long flags = 0;
+ 			local_irq_save(flags);
+@@ -2018,16 +1965,8 @@
+ static void twa_unmap_scsi_data(TW_Device_Extension *tw_dev, int request_id)
+ {
+ 	struct scsi_cmnd *cmd = tw_dev->srb[request_id];
+-	struct pci_dev *pdev = tw_dev->tw_pci_dev;
+ 
+-	switch(cmd->SCp.phase) {
+-	case TW_PHASE_SINGLE:
+-		pci_unmap_single(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-		break;
+-	case TW_PHASE_SGLIST:
+-		pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+-		break;
+-	}
++	scsi_dma_unmap(cmd);
+ } /* End twa_unmap_scsi_data() */
+ 
+ /* scsi_host_template initializer */
+diff -Nurb linux-2.6.22-570/drivers/scsi/3w-xxxx.c linux-2.6.22-try2/drivers/scsi/3w-xxxx.c
+--- linux-2.6.22-570/drivers/scsi/3w-xxxx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/3w-xxxx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1274,12 +1274,8 @@
+ 
+ 	dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data()\n");
+ 	
+-	if (cmd->use_sg == 0)
+-		return 0;
+-
+-	use_sg = pci_map_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+-	
+-	if (use_sg == 0) {
++	use_sg = scsi_dma_map(cmd);
++	if (use_sg < 0) {
+ 		printk(KERN_WARNING "3w-xxxx: tw_map_scsi_sg_data(): pci_map_sg() failed.\n");
+ 		return 0;
+ 	}
+@@ -1290,40 +1286,11 @@
+ 	return use_sg;
+ } /* End tw_map_scsi_sg_data() */
+ 
+-static u32 tw_map_scsi_single_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+-{
+-	dma_addr_t mapping;
+-
+-	dprintk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data()\n");
+-
+-	if (cmd->request_bufflen == 0)
+-		return 0;
+-
+-	mapping = pci_map_page(pdev, virt_to_page(cmd->request_buffer), offset_in_page(cmd->request_buffer), cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-
+-	if (mapping == 0) {
+-		printk(KERN_WARNING "3w-xxxx: tw_map_scsi_single_data(): pci_map_page() failed.\n");
+-		return 0;
+-	}
+-
+-	cmd->SCp.phase = TW_PHASE_SINGLE;
+-	cmd->SCp.have_data_in = mapping;
+-
+-	return mapping;
+-} /* End tw_map_scsi_single_data() */
+-
+ static void tw_unmap_scsi_data(struct pci_dev *pdev, struct scsi_cmnd *cmd)
+ {
+ 	dprintk(KERN_WARNING "3w-xxxx: tw_unmap_scsi_data()\n");
+ 
+-	switch(cmd->SCp.phase) {
+-		case TW_PHASE_SINGLE:
+-			pci_unmap_page(pdev, cmd->SCp.have_data_in, cmd->request_bufflen, DMA_BIDIRECTIONAL);
+-			break;
+-		case TW_PHASE_SGLIST:
+-			pci_unmap_sg(pdev, cmd->request_buffer, cmd->use_sg, DMA_BIDIRECTIONAL);
+-			break;
+-	}
++	scsi_dma_unmap(cmd);
+ } /* End tw_unmap_scsi_data() */
+ 
+ /* This function will reset a device extension */
+@@ -1499,27 +1466,16 @@
+ 	void *buf;
+ 	unsigned int transfer_len;
+ 	unsigned long flags = 0;
++	struct scatterlist *sg = scsi_sglist(cmd);
+ 
+-	if (cmd->use_sg) {
+-		struct scatterlist *sg =
+-			(struct scatterlist *)cmd->request_buffer;
+ 		local_irq_save(flags);
+ 		buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 		transfer_len = min(sg->length, len);
+-	} else {
+-		buf = cmd->request_buffer;
+-		transfer_len = min(cmd->request_bufflen, len);
+-	}
+ 
+ 	memcpy(buf, data, transfer_len);
+ 	
+-	if (cmd->use_sg) {
+-		struct scatterlist *sg;
+-
+-		sg = (struct scatterlist *)cmd->request_buffer;
+ 		kunmap_atomic(buf - sg->offset, KM_IRQ0);
+ 		local_irq_restore(flags);
+-	}
+ }
+ 
+ /* This function is called by the isr to complete an inquiry command */
+@@ -1764,19 +1720,20 @@
+ {
+ 	TW_Command *command_packet;
+ 	unsigned long command_que_value;
+-	u32 lba = 0x0, num_sectors = 0x0, buffaddr = 0x0;
++	u32 lba = 0x0, num_sectors = 0x0;
+ 	int i, use_sg;
+ 	struct scsi_cmnd *srb;
+-	struct scatterlist *sglist;
++	struct scatterlist *sglist, *sg;
+ 
+ 	dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write()\n");
+ 
+-	if (tw_dev->srb[request_id]->request_buffer == NULL) {
++	srb = tw_dev->srb[request_id];
++
++	sglist = scsi_sglist(srb);
++	if (!sglist) {
+ 		printk(KERN_WARNING "3w-xxxx: tw_scsiop_read_write(): Request buffer NULL.\n");
+ 		return 1;
+ 	}
+-	sglist = (struct scatterlist *)tw_dev->srb[request_id]->request_buffer;
+-	srb = tw_dev->srb[request_id];
+ 
+ 	/* Initialize command packet */
+ 	command_packet = (TW_Command *)tw_dev->command_packet_virtual_address[request_id];
+@@ -1819,33 +1776,18 @@
+ 	command_packet->byte8.io.lba = lba;
+ 	command_packet->byte6.block_count = num_sectors;
+ 
+-	/* Do this if there are no sg list entries */
+-	if (tw_dev->srb[request_id]->use_sg == 0) {    
+-		dprintk(KERN_NOTICE "3w-xxxx: tw_scsiop_read_write(): SG = 0\n");
+-		buffaddr = tw_map_scsi_single_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]);
+-		if (buffaddr == 0)
+-			return 1;
+-
+-		command_packet->byte8.io.sgl[0].address = buffaddr;
+-		command_packet->byte8.io.sgl[0].length = tw_dev->srb[request_id]->request_bufflen;
+-		command_packet->size+=2;
+-	}
+-
+-	/* Do this if we have multiple sg list entries */
+-	if (tw_dev->srb[request_id]->use_sg > 0) {
+ 		use_sg = tw_map_scsi_sg_data(tw_dev->tw_pci_dev, tw_dev->srb[request_id]);
+-		if (use_sg == 0)
++	if (!use_sg)
+ 			return 1;
+ 
+-		for (i=0;i<use_sg; i++) {
+-			command_packet->byte8.io.sgl[i].address = sg_dma_address(&sglist[i]);
+-			command_packet->byte8.io.sgl[i].length = sg_dma_len(&sglist[i]);
++	scsi_for_each_sg(tw_dev->srb[request_id], sg, use_sg, i) {
++		command_packet->byte8.io.sgl[i].address = sg_dma_address(sg);
++		command_packet->byte8.io.sgl[i].length = sg_dma_len(sg);
+ 			command_packet->size+=2;
+ 		}
+-	}
+ 
+ 	/* Update SG statistics */
+-	tw_dev->sgl_entries = tw_dev->srb[request_id]->use_sg;
++	tw_dev->sgl_entries = scsi_sg_count(tw_dev->srb[request_id]);
+ 	if (tw_dev->sgl_entries > tw_dev->max_sgl_entries)
+ 		tw_dev->max_sgl_entries = tw_dev->sgl_entries;
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.c linux-2.6.22-try2/drivers/scsi/53c700.c
+--- linux-2.6.22-570/drivers/scsi/53c700.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c700.c	2007-12-19 15:29:23.000000000 -0500
+@@ -585,16 +585,8 @@
+ 	      struct NCR_700_command_slot *slot)
+ {
+ 	if(SCp->sc_data_direction != DMA_NONE &&
+-	   SCp->sc_data_direction != DMA_BIDIRECTIONAL) {
+-		if(SCp->use_sg) {
+-			dma_unmap_sg(hostdata->dev, SCp->request_buffer,
+-				     SCp->use_sg, SCp->sc_data_direction);
+-		} else {
+-			dma_unmap_single(hostdata->dev, slot->dma_handle,
+-					 SCp->request_bufflen,
+-					 SCp->sc_data_direction);
+-		}
+-	}
++	   SCp->sc_data_direction != DMA_BIDIRECTIONAL)
++		scsi_dma_unmap(SCp);
+ }
+ 
+ STATIC inline void
+@@ -661,7 +653,6 @@
+ {
+ 	struct NCR_700_Host_Parameters *hostdata = 
+ 		(struct NCR_700_Host_Parameters *)host->hostdata[0];
+-	__u32 dcntl_extra = 0;
+ 	__u8 min_period;
+ 	__u8 min_xferp = (hostdata->chip710 ? NCR_710_MIN_XFERP : NCR_700_MIN_XFERP);
+ 
+@@ -686,13 +677,14 @@
+ 			        burst_disable = BURST_DISABLE;
+ 			        break;
+ 		}
+-		dcntl_extra = COMPAT_700_MODE;
++		hostdata->dcntl_extra |= COMPAT_700_MODE;
+ 
+-		NCR_700_writeb(dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(hostdata->dcntl_extra, host, DCNTL_REG);
+ 		NCR_700_writeb(burst_length | hostdata->dmode_extra,
+ 			       host, DMODE_710_REG);
+-		NCR_700_writeb(burst_disable | (hostdata->differential ? 
+-						DIFF : 0), host, CTEST7_REG);
++		NCR_700_writeb(burst_disable | hostdata->ctest7_extra |
++			       (hostdata->differential ? DIFF : 0),
++			       host, CTEST7_REG);
+ 		NCR_700_writeb(BTB_TIMER_DISABLE, host, CTEST0_REG);
+ 		NCR_700_writeb(FULL_ARBITRATION | ENABLE_PARITY | PARITY
+ 			       | AUTO_ATN, host, SCNTL0_REG);
+@@ -727,13 +719,13 @@
+ 		 * of spec: sync divider 2, async divider 3 */
+ 		DEBUG(("53c700: sync 2 async 3\n"));
+ 		NCR_700_writeb(SYNC_DIV_2_0, host, SBCL_REG);
+-		NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ 		hostdata->sync_clock = hostdata->clock/2;
+ 	} else	if(hostdata->clock > 50  && hostdata->clock <= 75) {
+ 		/* sync divider 1.5, async divider 3 */
+ 		DEBUG(("53c700: sync 1.5 async 3\n"));
+ 		NCR_700_writeb(SYNC_DIV_1_5, host, SBCL_REG);
+-		NCR_700_writeb(ASYNC_DIV_3_0 | dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(ASYNC_DIV_3_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ 		hostdata->sync_clock = hostdata->clock*2;
+ 		hostdata->sync_clock /= 3;
+ 		
+@@ -741,18 +733,18 @@
+ 		/* sync divider 1, async divider 2 */
+ 		DEBUG(("53c700: sync 1 async 2\n"));
+ 		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+-		NCR_700_writeb(ASYNC_DIV_2_0 | dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(ASYNC_DIV_2_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ 		hostdata->sync_clock = hostdata->clock;
+ 	} else if(hostdata->clock > 25 && hostdata->clock <=37) {
+ 		/* sync divider 1, async divider 1.5 */
+ 		DEBUG(("53c700: sync 1 async 1.5\n"));
+ 		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+-		NCR_700_writeb(ASYNC_DIV_1_5 | dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(ASYNC_DIV_1_5 | hostdata->dcntl_extra, host, DCNTL_REG);
+ 		hostdata->sync_clock = hostdata->clock;
+ 	} else {
+ 		DEBUG(("53c700: sync 1 async 1\n"));
+ 		NCR_700_writeb(SYNC_DIV_1_0, host, SBCL_REG);
+-		NCR_700_writeb(ASYNC_DIV_1_0 | dcntl_extra, host, DCNTL_REG);
++		NCR_700_writeb(ASYNC_DIV_1_0 | hostdata->dcntl_extra, host, DCNTL_REG);
+ 		/* sync divider 1, async divider 1 */
+ 		hostdata->sync_clock = hostdata->clock;
+ 	}
+@@ -1263,12 +1255,11 @@
+ 		       host->host_no, pun, lun, NCR_700_condition[i],
+ 		       NCR_700_phase[j], dsp - hostdata->pScript);
+ 		if(SCp != NULL) {
+-			scsi_print_command(SCp);
++			struct scatterlist *sg;
+ 
+-			if(SCp->use_sg) {
+-				for(i = 0; i < SCp->use_sg + 1; i++) {
+-					printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, ((struct scatterlist *)SCp->request_buffer)[i].length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr);
+-				}
++			scsi_print_command(SCp);
++			scsi_for_each_sg(SCp, sg, scsi_sg_count(SCp) + 1, i) {
++				printk(KERN_INFO " SG[%d].length = %d, move_insn=%08x, addr %08x\n", i, sg->length, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].ins, ((struct NCR_700_command_slot *)SCp->host_scribble)->SG[i].pAddr);
+ 			}
+ 		}	       
+ 		NCR_700_internal_bus_reset(host);
+@@ -1844,8 +1835,8 @@
+ 	}
+ 	/* sanity check: some of the commands generated by the mid-layer
+ 	 * have an eccentric idea of their sc_data_direction */
+-	if(!SCp->use_sg && !SCp->request_bufflen 
+-	   && SCp->sc_data_direction != DMA_NONE) {
++	if(!scsi_sg_count(SCp) && !scsi_bufflen(SCp) &&
++	   SCp->sc_data_direction != DMA_NONE) {
+ #ifdef NCR_700_DEBUG
+ 		printk("53c700: Command");
+ 		scsi_print_command(SCp);
+@@ -1887,31 +1878,15 @@
+ 		int i;
+ 		int sg_count;
+ 		dma_addr_t vPtr = 0;
++		struct scatterlist *sg;
+ 		__u32 count = 0;
+ 
+-		if(SCp->use_sg) {
+-			sg_count = dma_map_sg(hostdata->dev,
+-					      SCp->request_buffer, SCp->use_sg,
+-					      direction);
+-		} else {
+-			vPtr = dma_map_single(hostdata->dev,
+-					      SCp->request_buffer, 
+-					      SCp->request_bufflen,
+-					      direction);
+-			count = SCp->request_bufflen;
+-			slot->dma_handle = vPtr;
+-			sg_count = 1;
+-		}
+-			
+-
+-		for(i = 0; i < sg_count; i++) {
++		sg_count = scsi_dma_map(SCp);
++		BUG_ON(sg_count < 0);
+ 
+-			if(SCp->use_sg) {
+-				struct scatterlist *sg = SCp->request_buffer;
+-
+-				vPtr = sg_dma_address(&sg[i]);
+-				count = sg_dma_len(&sg[i]);
+-			}
++		scsi_for_each_sg(SCp, sg, sg_count, i) {
++			vPtr = sg_dma_address(sg);
++			count = sg_dma_len(sg);
+ 
+ 			slot->SG[i].ins = bS_to_host(move_ins | count);
+ 			DEBUG((" scatter block %d: move %d[%08x] from 0x%lx\n",
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c700.h linux-2.6.22-try2/drivers/scsi/53c700.h
+--- linux-2.6.22-570/drivers/scsi/53c700.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c700.h	2007-12-19 15:29:23.000000000 -0500
+@@ -177,6 +177,7 @@
+ 	__u8	state;
+ 	#define NCR_700_FLAG_AUTOSENSE	0x01
+ 	__u8	flags;
++	__u8	pad1[2];	/* Needed for m68k where min alignment is 2 bytes */
+ 	int	tag;
+ 	__u32	resume_offset;
+ 	struct scsi_cmnd *cmnd;
+@@ -196,6 +197,8 @@
+ 	void __iomem	*base;		/* the base for the port (copied to host) */
+ 	struct device	*dev;
+ 	__u32	dmode_extra;	/* adjustable bus settings */
++	__u32	dcntl_extra;	/* adjustable bus settings */
++	__u32	ctest7_extra;	/* adjustable bus settings */
+ 	__u32	differential:1;	/* if we are differential */
+ #ifdef CONFIG_53C700_LE_ON_BE
+ 	/* This option is for HP only.  Set it if your chip is wired for
+@@ -352,6 +355,7 @@
+ #define		SEL_TIMEOUT_DISABLE	0x10 /* 710 only */
+ #define         DFP                     0x08
+ #define         EVP                     0x04
++#define         CTEST7_TT1              0x02
+ #define		DIFF			0x01
+ #define CTEST6_REG                      0x1A
+ #define	TEMP_REG			0x1C
+@@ -385,6 +389,7 @@
+ #define		SOFTWARE_RESET		0x01
+ #define		COMPAT_700_MODE		0x01
+ #define 	SCRPTS_16BITS		0x20
++#define		EA_710			0x20
+ #define		ASYNC_DIV_2_0		0x00
+ #define		ASYNC_DIV_1_5		0x40
+ #define		ASYNC_DIV_1_0		0x80
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.c linux-2.6.22-try2/drivers/scsi/53c7xx.c
+--- linux-2.6.22-570/drivers/scsi/53c7xx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c7xx.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,6102 +0,0 @@
+-/*
+- * 53c710 driver.  Modified from Drew Eckhardts driver
+- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+- * Check out PERM_OPTIONS and EXPECTED_CLOCK, which may be defined in the
+- * relevant machine specific file (eg. mvme16x.[ch], amiga7xx.[ch]).
+- * There are also currently some defines at the top of 53c7xx.scr.
+- * The chip type is #defined in script_asm.pl, as well as the Makefile.
+- * Host scsi ID expected to be 7 - see NCR53c7x0_init().
+- *
+- * I have removed the PCI code and some of the 53c8xx specific code - 
+- * simply to make this file smaller and easier to manage.
+- *
+- * MVME16x issues:
+- *   Problems trying to read any chip registers in NCR53c7x0_init(), as they
+- *   may never have been set by 16xBug (eg. If kernel has come in over tftp).
+- */
+-
+-/*
+- * Adapted for Linux/m68k Amiga platforms for the A4000T/A4091 and
+- * WarpEngine SCSI controllers.
+- * By Alan Hourihane <alanh@fairlite.demon.co.uk>
+- * Thanks to Richard Hirst for making it possible with the MVME additions
+- */
+-
+-/*
+- * 53c710 rev 0 doesn't support add with carry.  Rev 1 and 2 does.  To
+- * overcome this problem you can define FORCE_DSA_ALIGNMENT, which ensures
+- * that the DSA address is always xxxxxx00.  If disconnection is not allowed,
+- * then the script only ever tries to add small (< 256) positive offsets to
+- * DSA, so lack of carry isn't a problem.  FORCE_DSA_ALIGNMENT can, of course,
+- * be defined for all chip revisions at a small cost in memory usage.
+- */
+-
+-#define FORCE_DSA_ALIGNMENT
+-
+-/*
+- * Selection timer does not always work on the 53c710, depending on the
+- * timing at the last disconnect, if this is a problem for you, try
+- * using validids as detailed below.
+- *
+- * Options for the NCR7xx driver
+- *
+- * noasync:0		-	disables sync and asynchronous negotiation
+- * nosync:0		-	disables synchronous negotiation (does async)
+- * nodisconnect:0	-	disables disconnection
+- * validids:0x??	-	Bitmask field that disallows certain ID's.
+- *			-	e.g.	0x03	allows ID 0,1
+- *			-		0x1F	allows ID 0,1,2,3,4
+- * opthi:n		-	replace top word of options with 'n'
+- * optlo:n		-	replace bottom word of options with 'n'
+- *			-	ALWAYS SPECIFY opthi THEN optlo <<<<<<<<<<
+- */
+-
+-/*
+- * PERM_OPTIONS are driver options which will be enabled for all NCR boards
+- * in the system at driver initialization time.
+- *
+- * Don't THINK about touching these in PERM_OPTIONS : 
+- *   OPTION_MEMORY_MAPPED 
+- * 	680x0 doesn't have an IO map!
+- *
+- *   OPTION_DEBUG_TEST1
+- *	Test 1 does bus mastering and interrupt tests, which will help weed 
+- *	out brain damaged main boards.
+- *
+- * Other PERM_OPTIONS settings are listed below.  Note the actual options
+- * required are set in the relevant file (mvme16x.c, amiga7xx.c, etc):
+- *
+- *   OPTION_NO_ASYNC
+- *	Don't negotiate for asynchronous transfers on the first command 
+- *	when OPTION_ALWAYS_SYNCHRONOUS is set.  Useful for dain bramaged
+- *	devices which do something bad rather than sending a MESSAGE 
+- *	REJECT back to us like they should if they can't cope.
+- *
+- *   OPTION_SYNCHRONOUS
+- *	Enable support for synchronous transfers.  Target negotiated 
+- *	synchronous transfers will be responded to.  To initiate 
+- *	a synchronous transfer request,  call 
+- *
+- *	    request_synchronous (hostno, target) 
+- *
+- *	from within KGDB.
+- *
+- *   OPTION_ALWAYS_SYNCHRONOUS
+- *	Negotiate for synchronous transfers with every target after
+- *	driver initialization or a SCSI bus reset.  This is a bit dangerous, 
+- *	since there are some dain bramaged SCSI devices which will accept
+- *	SDTR messages but keep talking asynchronously.
+- *
+- *   OPTION_DISCONNECT
+- *	Enable support for disconnect/reconnect.  To change the 
+- *	default setting on a given host adapter, call
+- *
+- *	    request_disconnect (hostno, allow)
+- *
+- *	where allow is non-zero to allow, 0 to disallow.
+- * 
+- *  If you really want to run 10MHz FAST SCSI-II transfers, you should 
+- *  know that the NCR driver currently ignores parity information.  Most
+- *  systems do 5MHz SCSI fine.  I've seen a lot that have problems faster
+- *  than 8MHz.  To play it safe, we only request 5MHz transfers.
+- *
+- *  If you'd rather get 10MHz transfers, edit sdtr_message and change 
+- *  the fourth byte from 50 to 25.
+- */
+-
+-/*
+- * Sponsored by 
+- *	iX Multiuser Multitasking Magazine
+- *	Hannover, Germany
+- *	hm@ix.de
+- *
+- * Copyright 1993, 1994, 1995 Drew Eckhardt
+- *      Visionary Computing 
+- *      (Unix and Linux consulting and custom programming)
+- *      drew@PoohSticks.ORG
+- *	+1 (303) 786-7975
+- *
+- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+- * 
+- * For more information, please consult 
+- *
+- * NCR53C810 
+- * SCSI I/O Processor
+- * Programmer's Guide
+- *
+- * NCR 53C810
+- * PCI-SCSI I/O Processor
+- * Data Manual
+- *
+- * NCR 53C810/53C820
+- * PCI-SCSI I/O Processor Design In Guide
+- *
+- * For literature on Symbios Logic Inc. formerly NCR, SCSI, 
+- * and Communication products please call (800) 334-5454 or
+- * (719) 536-3300. 
+- * 
+- * PCI BIOS Specification Revision
+- * PCI Local Bus Specification
+- * PCI System Design Guide
+- *
+- * PCI Special Interest Group
+- * M/S HF3-15A
+- * 5200 N.E. Elam Young Parkway
+- * Hillsboro, Oregon 97124-6497
+- * +1 (503) 696-2000 
+- * +1 (800) 433-5177
+- */
+-
+-/*
+- * Design issues : 
+- * The cumulative latency needed to propagate a read/write request 
+- * through the file system, buffer cache, driver stacks, SCSI host, and 
+- * SCSI device is ultimately the limiting factor in throughput once we 
+- * have a sufficiently fast host adapter.
+- *  
+- * So, to maximize performance we want to keep the ratio of latency to data 
+- * transfer time to a minimum by
+- * 1.  Minimizing the total number of commands sent (typical command latency
+- *	including drive and bus mastering host overhead is as high as 4.5ms)
+- *	to transfer a given amount of data.  
+- *
+- *      This is accomplished by placing no arbitrary limit on the number
+- *	of scatter/gather buffers supported, since we can transfer 1K
+- *	per scatter/gather buffer without Eric's cluster patches, 
+- *	4K with.  
+- *
+- * 2.  Minimizing the number of fatal interrupts serviced, since
+- * 	fatal interrupts halt the SCSI I/O processor.  Basically,
+- *	this means offloading the practical maximum amount of processing 
+- *	to the SCSI chip.
+- * 
+- *	On the NCR53c810/820/720,  this is accomplished by using 
+- *		interrupt-on-the-fly signals when commands complete, 
+- *		and only handling fatal errors and SDTR / WDTR 	messages 
+- *		in the host code.
+- *
+- *	On the NCR53c710, interrupts are generated as on the NCR53c8x0,
+- *		only the lack of a interrupt-on-the-fly facility complicates
+- *		things.   Also, SCSI ID registers and commands are 
+- *		bit fielded rather than binary encoded.
+- *		
+- * 	On the NCR53c700 and NCR53c700-66, operations that are done via 
+- *		indirect, table mode on the more advanced chips must be
+- *	        replaced by calls through a jump table which 
+- *		acts as a surrogate for the DSA.  Unfortunately, this 
+- * 		will mean that we must service an interrupt for each 
+- *		disconnect/reconnect.
+- * 
+- * 3.  Eliminating latency by pipelining operations at the different levels.
+- * 	
+- *	This driver allows a configurable number of commands to be enqueued
+- *	for each target/lun combination (experimentally, I have discovered
+- *	that two seems to work best) and will ultimately allow for 
+- *	SCSI-II tagged queuing.
+- * 	
+- *
+- * Architecture : 
+- * This driver is built around a Linux queue of commands waiting to 
+- * be executed, and a shared Linux/NCR array of commands to start.  Commands
+- * are transferred to the array  by the run_process_issue_queue() function 
+- * which is called whenever a command completes.
+- *
+- * As commands are completed, the interrupt routine is triggered,
+- * looks for commands in the linked list of completed commands with
+- * valid status, removes these commands from a list of running commands, 
+- * calls the done routine, and flags their target/luns as not busy.
+- *
+- * Due to limitations in the intelligence of the NCR chips, certain
+- * concessions are made.  In many cases, it is easier to dynamically 
+- * generate/fix-up code rather than calculate on the NCR at run time.  
+- * So, code is generated or fixed up for
+- *
+- * - Handling data transfers, using a variable number of MOVE instructions
+- *	interspersed with CALL MSG_IN, WHEN MSGIN instructions.
+- *
+- * 	The DATAIN and DATAOUT routines	are separate, so that an incorrect
+- *	direction can be trapped, and space isn't wasted. 
+- *
+- *	It may turn out that we're better off using some sort 
+- *	of table indirect instruction in a loop with a variable
+- *	sized table on the NCR53c710 and newer chips.
+- *
+- * - Checking for reselection (NCR53c710 and better)
+- *
+- * - Handling the details of SCSI context switches (NCR53c710 and better),
+- *	such as reprogramming appropriate synchronous parameters, 
+- *	removing the dsa structure from the NCR's queue of outstanding
+- *	commands, etc.
+- *
+- */
+-
+-#include <linux/module.h>
+-
+-
+-#include <linux/types.h>
+-#include <asm/setup.h>
+-#include <asm/dma.h>
+-#include <asm/io.h>
+-#include <asm/system.h>
+-#include <linux/delay.h>
+-#include <linux/signal.h>
+-#include <linux/sched.h>
+-#include <linux/errno.h>
+-#include <linux/string.h>
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+-#include <linux/mm.h>
+-#include <linux/ioport.h>
+-#include <linux/time.h>
+-#include <linux/blkdev.h>
+-#include <linux/spinlock.h>
+-#include <linux/interrupt.h>
+-#include <asm/pgtable.h>
+-
+-#ifdef CONFIG_AMIGA
+-#include <asm/amigahw.h>
+-#include <asm/amigaints.h>
+-#include <asm/irq.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#endif
+-
+-#ifdef CONFIG_MVME16x
+-#include <asm/mvme16xhw.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#define VALID_IDS
+-#endif
+-
+-#ifdef CONFIG_BVME6000
+-#include <asm/bvme6000hw.h>
+-
+-#define BIG_ENDIAN
+-#define NO_IO_SPACE
+-#define VALID_IDS
+-#endif
+-
+-#include "scsi.h"
+-#include <scsi/scsi_dbg.h>
+-#include <scsi/scsi_host.h>
+-#include <scsi/scsi_transport_spi.h>
+-#include "53c7xx.h"
+-#include <linux/stat.h>
+-#include <linux/stddef.h>
+-
+-#ifdef NO_IO_SPACE
+-/*
+- * The following make the definitions in 53c7xx.h (write8, etc) smaller,
+- * we don't have separate i/o space anyway.
+- */
+-#undef inb
+-#undef outb
+-#undef inw
+-#undef outw
+-#undef inl
+-#undef outl
+-#define inb(x)          1
+-#define inw(x)          1
+-#define inl(x)          1
+-#define outb(x,y)       1
+-#define outw(x,y)       1
+-#define outl(x,y)       1
+-#endif
+-
+-static int check_address (unsigned long addr, int size);
+-static void dump_events (struct Scsi_Host *host, int count);
+-static Scsi_Cmnd * return_outstanding_commands (struct Scsi_Host *host, 
+-    int free, int issue);
+-static void hard_reset (struct Scsi_Host *host);
+-static void ncr_scsi_reset (struct Scsi_Host *host);
+-static void print_lots (struct Scsi_Host *host);
+-static void set_synchronous (struct Scsi_Host *host, int target, int sxfer, 
+-    int scntl3, int now_connected);
+-static int datapath_residual (struct Scsi_Host *host);
+-static const char * sbcl_to_phase (int sbcl);
+-static void print_progress (Scsi_Cmnd *cmd);
+-static void print_queues (struct Scsi_Host *host);
+-static void process_issue_queue (unsigned long flags);
+-static int shutdown (struct Scsi_Host *host);
+-static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int result);
+-static int disable (struct Scsi_Host *host);
+-static int NCR53c7xx_run_tests (struct Scsi_Host *host);
+-static irqreturn_t NCR53c7x0_intr(int irq, void *dev_id);
+-static void NCR53c7x0_intfly (struct Scsi_Host *host);
+-static int ncr_halt (struct Scsi_Host *host);
+-static void intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd 
+-    *cmd);
+-static void intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd);
+-static void print_dsa (struct Scsi_Host *host, u32 *dsa,
+-    const char *prefix);
+-static int print_insn (struct Scsi_Host *host, const u32 *insn,
+-    const char *prefix, int kernel);
+-
+-static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd);
+-static void NCR53c7x0_init_fixup (struct Scsi_Host *host);
+-static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct 
+-    NCR53c7x0_cmd *cmd);
+-static void NCR53c7x0_soft_reset (struct Scsi_Host *host);
+-
+-/* Size of event list (per host adapter) */
+-static int track_events = 0;
+-static struct Scsi_Host *first_host = NULL;	/* Head of list of NCR boards */
+-static struct scsi_host_template *the_template = NULL;
+-
+-/* NCR53c710 script handling code */
+-
+-#include "53c7xx_d.h"
+-#ifdef A_int_debug_sync
+-#define DEBUG_SYNC_INTR A_int_debug_sync
+-#endif
+-int NCR53c7xx_script_len = sizeof (SCRIPT);
+-int NCR53c7xx_dsa_len = A_dsa_end + Ent_dsa_zero - Ent_dsa_code_template;
+-#ifdef FORCE_DSA_ALIGNMENT
+-int CmdPageStart = (0 - Ent_dsa_zero - sizeof(struct NCR53c7x0_cmd)) & 0xff;
+-#endif
+-
+-static char *setup_strings[] =
+-	{"","","","","","","",""};
+-
+-#define MAX_SETUP_STRINGS ARRAY_SIZE(setup_strings)
+-#define SETUP_BUFFER_SIZE 200
+-static char setup_buffer[SETUP_BUFFER_SIZE];
+-static char setup_used[MAX_SETUP_STRINGS];
+-
+-void ncr53c7xx_setup (char *str, int *ints)
+-{
+-   int i;
+-   char *p1, *p2;
+-
+-   p1 = setup_buffer;
+-   *p1 = '\0';
+-   if (str)
+-      strncpy(p1, str, SETUP_BUFFER_SIZE - strlen(setup_buffer));
+-   setup_buffer[SETUP_BUFFER_SIZE - 1] = '\0';
+-   p1 = setup_buffer;
+-   i = 0;
+-   while (*p1 && (i < MAX_SETUP_STRINGS)) {
+-      p2 = strchr(p1, ',');
+-      if (p2) {
+-         *p2 = '\0';
+-         if (p1 != p2)
+-            setup_strings[i] = p1;
+-         p1 = p2 + 1;
+-         i++;
+-         }
+-      else {
+-         setup_strings[i] = p1;
+-         break;
+-         }
+-      }
+-   for (i=0; i<MAX_SETUP_STRINGS; i++)
+-      setup_used[i] = 0;
+-}
+-
+-
+-/* check_setup_strings() returns index if key found, 0 if not
+- */
+-
+-static int check_setup_strings(char *key, int *flags, int *val, char *buf)
+-{
+-int x;
+-char *cp;
+-
+-   for  (x=0; x<MAX_SETUP_STRINGS; x++) {
+-      if (setup_used[x])
+-         continue;
+-      if (!strncmp(setup_strings[x], key, strlen(key)))
+-         break;
+-      if (!strncmp(setup_strings[x], "next", strlen("next")))
+-         return 0;
+-      }
+-   if (x == MAX_SETUP_STRINGS)
+-      return 0;
+-   setup_used[x] = 1;
+-   cp = setup_strings[x] + strlen(key);
+-   *val = -1;
+-   if (*cp != ':')
+-      return ++x;
+-   cp++;
+-   if ((*cp >= '0') && (*cp <= '9')) {
+-      *val = simple_strtoul(cp,NULL,0);
+-      }
+-   return ++x;
+-}
+-
+-
+-
+-/*
+- * KNOWN BUGS :
+- * - There is some sort of conflict when the PPP driver is compiled with 
+- * 	support for 16 channels?
+- * 
+- * - On systems which predate the 1.3.x initialization order change,
+- *      the NCR driver will cause Cannot get free page messages to appear.  
+- *      These are harmless, but I don't know of an easy way to avoid them.
+- *
+- * - With OPTION_DISCONNECT, on two systems under unknown circumstances,
+- *	we get a PHASE MISMATCH with DSA set to zero (suggests that we 
+- *	are occurring somewhere in the reselection code) where 
+- *	DSP=some value DCMD|DBC=same value.  
+- * 	
+- *	Closer inspection suggests that we may be trying to execute
+- *	some portion of the DSA?
+- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO)
+- * scsi0 : handling residual transfer (+ 0 bytes from DMA FIFO)
+- * scsi0 : no current command : unexpected phase MSGIN.
+- *         DSP=0x1c46cc, DCMD|DBC=0x1c46ac, DSA=0x0
+- *         DSPS=0x0, TEMP=0x1c3e70, DMODE=0x80
+- * scsi0 : DSP->
+- * 001c46cc : 0x001c46cc 0x00000000
+- * 001c46d4 : 0x001c5ea0 0x000011f8
+- *
+- *	Changed the print code in the phase_mismatch handler so
+- *	that we call print_lots to try to diagnose this.
+- *
+- */
+-
+-/* 
+- * Possible future direction of architecture for max performance :
+- *
+- * We're using a single start array for the NCR chip.  This is 
+- * sub-optimal, because we cannot add a command which would conflict with 
+- * an executing command to this start queue, and therefore must insert the 
+- * next command for a given I/T/L combination after the first has completed;
+- * incurring our interrupt latency between SCSI commands.
+- *
+- * To allow further pipelining of the NCR and host CPU operation, we want 
+- * to set things up so that immediately on termination of a command destined 
+- * for a given LUN, we get that LUN busy again.  
+- * 
+- * To do this, we need to add a 32 bit pointer to which is jumped to 
+- * on completion of a command.  If no new command is available, this 
+- * would point to the usual DSA issue queue select routine.
+- *
+- * If one were, it would point to a per-NCR53c7x0_cmd select routine 
+- * which starts execution immediately, inserting the command at the head 
+- * of the start queue if the NCR chip is selected or reselected.
+- *
+- * We would change so that we keep a list of outstanding commands 
+- * for each unit, rather than a single running_list.  We'd insert 
+- * a new command into the right running list; if the NCR didn't 
+- * have something running for that yet, we'd put it in the 
+- * start queue as well.  Some magic needs to happen to handle the 
+- * race condition between the first command terminating before the 
+- * new one is written.
+- *
+- * Potential for profiling : 
+- * Call do_gettimeofday(struct timeval *tv) to get 800ns resolution.
+- */
+-
+-
+-/*
+- * TODO : 
+- * 1.  To support WIDE transfers, not much needs to happen.  We
+- *	should do CHMOVE instructions instead of MOVEs when
+- *	we have scatter/gather segments of uneven length.  When
+- * 	we do this, we need to handle the case where we disconnect
+- *	between segments.
+- * 
+- * 2.  Currently, when Icky things happen we do a FATAL().  Instead,
+- *     we want to do an integrity check on the parts of the NCR hostdata
+- *     structure which were initialized at boot time; FATAL() if that 
+- *     fails, and otherwise try to recover.  Keep track of how many
+- *     times this has happened within a single SCSI command; if it 
+- *     gets excessive, then FATAL().
+- *
+- * 3.  Parity checking is currently disabled, and a few things should 
+- *     happen here now that we support synchronous SCSI transfers :
+- *     1.  On soft-reset, we shoould set the EPC (Enable Parity Checking)
+- *	   and AAP (Assert SATN/ on parity error) bits in SCNTL0.
+- *	
+- *     2.  We should enable the parity interrupt in the SIEN0 register.
+- * 
+- *     3.  intr_phase_mismatch() needs to believe that message out is 
+- *	   always an "acceptable" phase to have a mismatch in.  If 
+- *	   the old phase was MSG_IN, we should send a MESSAGE PARITY 
+- *	   error.  If the old phase was something else, we should send
+- *	   a INITIATOR_DETECTED_ERROR message.  Note that this could
+- *	   cause a RESTORE POINTERS message; so we should handle that 
+- *	   correctly first.  Instead, we should probably do an 
+- *	   initiator_abort.
+- *
+- * 4.  MPEE bit of CTEST4 should be set so we get interrupted if 
+- *     we detect an error.
+- *
+- *  
+- * 5.  The initial code has been tested on the NCR53c810.  I don't 
+- *     have access to NCR53c700, 700-66 (Forex boards), NCR53c710
+- *     (NCR Pentium systems), NCR53c720, NCR53c820, or NCR53c825 boards to 
+- *     finish development on those platforms.
+- *
+- *     NCR53c820/825/720 - need to add wide transfer support, including WDTR 
+- *     		negotiation, programming of wide transfer capabilities
+- *		on reselection and table indirect selection.
+- *
+- *     NCR53c710 - need to add fatal interrupt or GEN code for 
+- *		command completion signaling.   Need to modify all 
+- *		SDID, SCID, etc. registers, and table indirect select code 
+- *		since these use bit fielded (ie 1<<target) instead of 
+- *		binary encoded target ids.  Need to accommodate
+- *		different register mappings, probably scan through
+- *		the SCRIPT code and change the non SFBR register operand
+- *		of all MOVE instructions.
+- *
+- *		It is rather worse than this actually, the 710 corrupts
+- *		both TEMP and DSA when you do a MOVE MEMORY.  This
+- *		screws you up all over the place.  MOVE MEMORY 4 with a
+- *		destination of DSA seems to work OK, which helps some.
+- *		Richard Hirst  richard@sleepie.demon.co.uk
+- * 
+- *     NCR53c700/700-66 - need to add code to refix addresses on 
+- *		every nexus change, eliminate all table indirect code,
+- *		very messy.
+- *
+- * 6.  The NCR53c7x0 series is very popular on other platforms that 
+- *     could be running Linux - ie, some high performance AMIGA SCSI 
+- *     boards use it.  
+- *	
+- *     So, I should include #ifdef'd code so that it is 
+- *     compatible with these systems.
+- *	
+- *     Specifically, the little Endian assumptions I made in my 
+- *     bit fields need to change, and if the NCR doesn't see memory
+- *     the right way, we need to provide options to reverse words
+- *     when the scripts are relocated.
+- *
+- * 7.  Use vremap() to access memory mapped boards.  
+- */
+-
+-/* 
+- * Allow for simultaneous existence of multiple SCSI scripts so we 
+- * can have a single driver binary for all of the family.
+- *
+- * - one for NCR53c700 and NCR53c700-66 chips	(not yet supported)
+- * - one for rest (only the NCR53c810, 815, 820, and 825 are currently 
+- *	supported)
+- * 
+- * So that we only need two SCSI scripts, we need to modify things so
+- * that we fixup register accesses in READ/WRITE instructions, and 
+- * we'll also have to accommodate the bit vs. binary encoding of IDs
+- * with the 7xx chips.
+- */
+-
+-#define ROUNDUP(adr,type)	\
+-  ((void *) (((long) (adr) + sizeof(type) - 1) & ~(sizeof(type) - 1)))
+-
+-
+-/*
+- * Function: issue_to_cmd
+- *
+- * Purpose: convert jump instruction in issue array to NCR53c7x0_cmd
+- *	structure pointer.  
+- *
+- * Inputs; issue - pointer to start of NOP or JUMP instruction
+- *	in issue array.
+- *
+- * Returns: pointer to command on success; 0 if opcode is NOP.
+- */
+-
+-static inline struct NCR53c7x0_cmd *
+-issue_to_cmd (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata,
+-    u32 *issue)
+-{
+-    return (issue[0] != hostdata->NOP_insn) ? 
+-    /* 
+-     * If the IF TRUE bit is set, it's a JUMP instruction.  The
+-     * operand is a bus pointer to the dsa_begin routine for this DSA.  The
+-     * dsa field of the NCR53c7x0_cmd structure starts with the 
+-     * DSA code template.  By converting to a virtual address,
+-     * subtracting the code template size, and offset of the 
+-     * dsa field, we end up with a pointer to the start of the 
+-     * structure (alternatively, we could use the 
+-     * dsa_cmnd field, an anachronism from when we weren't
+-     * sure what the relationship between the NCR structures
+-     * and host structures were going to be.
+-     */
+-	(struct NCR53c7x0_cmd *) ((char *) bus_to_virt (issue[1]) - 
+-	    (hostdata->E_dsa_code_begin - hostdata->E_dsa_code_template) -
+-	    offsetof(struct NCR53c7x0_cmd, dsa)) 
+-    /* If the IF TRUE bit is not set, it's a NOP */
+-	: NULL;
+-}
+-
+-
+-/* 
+- * FIXME: we should junk these, in favor of synchronous_want and 
+- * wide_want in the NCR53c7x0_hostdata structure.
+- */
+-
+-/* Template for "preferred" synchronous transfer parameters. */
+-
+-static const unsigned char sdtr_message[] = {
+-#ifdef CONFIG_SCSI_NCR53C7xx_FAST
+-    EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 25 /* *4ns */, 8 /* off */
+-#else
+-    EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 50 /* *4ns */, 8 /* off */ 
+-#endif
+-};
+-
+-/* Template to request asynchronous transfers */
+-
+-static const unsigned char async_message[] = {
+-    EXTENDED_MESSAGE, 3 /* length */, EXTENDED_SDTR, 0, 0 /* asynchronous */
+-};
+-
+-/* Template for "preferred" WIDE transfer parameters */
+-
+-static const unsigned char wdtr_message[] = {
+-    EXTENDED_MESSAGE, 2 /* length */, EXTENDED_WDTR, 1 /* 2^1 bytes */
+-};
+-
+-#if 0
+-/*
+- * Function : struct Scsi_Host *find_host (int host)
+- * 
+- * Purpose : KGDB support function which translates a host number 
+- * 	to a host structure. 
+- *
+- * Inputs : host - number of SCSI host
+- *
+- * Returns : NULL on failure, pointer to host structure on success.
+- */
+-
+-static struct Scsi_Host *
+-find_host (int host) {
+-    struct Scsi_Host *h;
+-    for (h = first_host; h && h->host_no != host; h = h->next);
+-    if (!h) {
+-	printk (KERN_ALERT "scsi%d not found\n", host);
+-	return NULL;
+-    } else if (h->hostt != the_template) {
+-	printk (KERN_ALERT "scsi%d is not a NCR board\n", host);
+-	return NULL;
+-    }
+-    return h;
+-}
+-
+-#if 0
+-/*
+- * Function : request_synchronous (int host, int target)
+- * 
+- * Purpose : KGDB interface which will allow us to negotiate for 
+- * 	synchronous transfers.  This ill be replaced with a more 
+- * 	integrated function; perhaps a new entry in the scsi_host 
+- *	structure, accessible via an ioctl() or perhaps /proc/scsi.
+- *
+- * Inputs : host - number of SCSI host; target - number of target.
+- *
+- * Returns : 0 when negotiation has been setup for next SCSI command,
+- *	-1 on failure.
+- */
+-
+-static int
+-request_synchronous (int host, int target) {
+-    struct Scsi_Host *h;
+-    struct NCR53c7x0_hostdata *hostdata;
+-    unsigned long flags;
+-    if (target < 0) {
+-	printk (KERN_ALERT "target %d is bogus\n", target);
+-	return -1;
+-    }
+-    if (!(h = find_host (host)))
+-	return -1;
+-    else if (h->this_id == target) {
+-	printk (KERN_ALERT "target %d is host ID\n", target);
+-	return -1;
+-    } 
+-    else if (target >= h->max_id) {
+-	printk (KERN_ALERT "target %d exceeds maximum of %d\n", target,
+-	    h->max_id);
+-	return -1;
+-    }
+-    hostdata = (struct NCR53c7x0_hostdata *)h->hostdata[0];
+-
+-    local_irq_save(flags);
+-    if (hostdata->initiate_sdtr & (1 << target)) {
+-	local_irq_restore(flags);
+-	printk (KERN_ALERT "target %d already doing SDTR\n", target);
+-	return -1;
+-    } 
+-    hostdata->initiate_sdtr |= (1 << target);
+-    local_irq_restore(flags);
+-    return 0;
+-}
+-#endif
+-
+-/*
+- * Function : request_disconnect (int host, int on_or_off)
+- * 
+- * Purpose : KGDB support function, tells us to allow or disallow 
+- *	disconnections.
+- *
+- * Inputs : host - number of SCSI host; on_or_off - non-zero to allow,
+- *	zero to disallow.
+- *
+- * Returns : 0 on success, *	-1 on failure.
+- */
+-
+-static int 
+-request_disconnect (int host, int on_or_off) {
+-    struct Scsi_Host *h;
+-    struct NCR53c7x0_hostdata *hostdata;
+-    if (!(h = find_host (host)))
+-	return -1;
+-    hostdata = (struct NCR53c7x0_hostdata *) h->hostdata[0];
+-    if (on_or_off) 
+-	hostdata->options |= OPTION_DISCONNECT;
+-    else
+-	hostdata->options &= ~OPTION_DISCONNECT;
+-    return 0;
+-}
+-#endif
+-
+-/*
+- * Function : static void NCR53c7x0_driver_init (struct Scsi_Host *host)
+- *
+- * Purpose : Initialize internal structures, as required on startup, or 
+- *	after a SCSI bus reset.
+- * 
+- * Inputs : host - pointer to this host adapter's structure
+- */
+-
+-static void 
+-NCR53c7x0_driver_init (struct Scsi_Host *host) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    int i, j;
+-    u32 *ncrcurrent;
+-
+-    for (i = 0; i < 16; ++i) {
+-	hostdata->request_sense[i] = 0;
+-    	for (j = 0; j < 8; ++j) 
+-	    hostdata->busy[i][j] = 0;
+-	set_synchronous (host, i, /* sxfer */ 0, hostdata->saved_scntl3, 0);
+-    }
+-    hostdata->issue_queue = NULL;
+-    hostdata->running_list = hostdata->finished_queue = 
+-	hostdata->ncrcurrent = NULL;
+-    for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; 
+-	i < host->can_queue; ++i, ncrcurrent += 2) {
+-	ncrcurrent[0] = hostdata->NOP_insn;
+-	ncrcurrent[1] = 0xdeadbeef;
+-    }
+-    ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) | DBC_TCI_TRUE;
+-    ncrcurrent[1] = (u32) virt_to_bus (hostdata->script) +
+-	hostdata->E_wait_reselect;
+-    hostdata->reconnect_dsa_head = 0;
+-    hostdata->addr_reconnect_dsa_head = (u32) 
+-	virt_to_bus((void *) &(hostdata->reconnect_dsa_head));
+-    hostdata->expecting_iid = 0;
+-    hostdata->expecting_sto = 0;
+-    if (hostdata->options & OPTION_ALWAYS_SYNCHRONOUS) 
+-	hostdata->initiate_sdtr = 0xffff; 
+-    else
+-    	hostdata->initiate_sdtr = 0;
+-    hostdata->talked_to = 0;
+-    hostdata->idle = 1;
+-}
+-
+-/* 
+- * Function : static int clock_to_ccf_710 (int clock)
+- *
+- * Purpose :  Return the clock conversion factor for a given SCSI clock.
+- *
+- * Inputs : clock - SCSI clock expressed in Hz.
+- *
+- * Returns : ccf on success, -1 on failure.
+- */
+-
+-static int 
+-clock_to_ccf_710 (int clock) {
+-    if (clock <= 16666666)
+-	return -1;
+-    if (clock <= 25000000)
+-	return 2; 	/* Divide by 1.0 */
+-    else if (clock <= 37500000)
+-	return 1; 	/* Divide by 1.5 */
+-    else if (clock <= 50000000)
+-	return 0;	/* Divide by 2.0 */
+-    else if (clock <= 66000000)
+-	return 3;	/* Divide by 3.0 */
+-    else 
+-	return -1;
+-}
+-    
+-/* 
+- * Function : static int NCR53c7x0_init (struct Scsi_Host *host)
+- *
+- * Purpose :  initialize the internal structures for a given SCSI host
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- * Preconditions : when this function is called, the chip_type 
+- * 	field of the hostdata structure MUST have been set.
+- *
+- * Returns : 0 on success, -1 on failure.
+- */
+-
+-int 
+-NCR53c7x0_init (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    int i, ccf;
+-    unsigned char revision;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    /* 
+-     * There are some things which we need to know about in order to provide
+-     * a semblance of support.  Print 'em if they aren't what we expect, 
+-     * otherwise don't add to the noise.
+-     * 
+-     * -1 means we don't know what to expect.
+-     */
+-    int val, flags;
+-    char buf[32];
+-    int expected_id = -1;
+-    int expected_clock = -1;
+-    int uninitialized = 0;
+-#ifdef NO_IO_SPACE
+-    int expected_mapping = OPTION_MEMORY_MAPPED;
+-#else
+-    int expected_mapping = OPTION_IO_MAPPED;
+-#endif
+-    for (i=0;i<7;i++)
+-	hostdata->valid_ids[i] = 1;	/* Default all ID's to scan */
+-
+-    /* Parse commandline flags */
+-    if (check_setup_strings("noasync",&flags,&val,buf))
+-    {
+-	hostdata->options |= OPTION_NO_ASYNC;
+-	hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS);
+-    }
+-
+-    if (check_setup_strings("nosync",&flags,&val,buf))
+-    {
+-	hostdata->options &= ~(OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS);
+-    }
+-
+-    if (check_setup_strings("nodisconnect",&flags,&val,buf))
+-	hostdata->options &= ~OPTION_DISCONNECT;
+-
+-    if (check_setup_strings("validids",&flags,&val,buf))
+-    {
+-	for (i=0;i<7;i++) 
+-		hostdata->valid_ids[i] = val & (1<<i);
+-    }
+- 
+-    if  ((i = check_setup_strings("next",&flags,&val,buf)))
+-    {
+-	while (i)
+-		setup_used[--i] = 1;
+-    }
+-
+-    if (check_setup_strings("opthi",&flags,&val,buf))
+-	hostdata->options = (long long)val << 32;
+-    if (check_setup_strings("optlo",&flags,&val,buf))
+-	hostdata->options |= val;
+-
+-    NCR53c7x0_local_setup(host);
+-    switch (hostdata->chip) {
+-    case 710:
+-    case 770:
+-    	hostdata->dstat_sir_intr = NCR53c7x0_dstat_sir_intr;
+-    	hostdata->init_save_regs = NULL;
+-    	hostdata->dsa_fixup = NCR53c7xx_dsa_fixup;
+-    	hostdata->init_fixup = NCR53c7x0_init_fixup;
+-    	hostdata->soft_reset = NCR53c7x0_soft_reset;
+-	hostdata->run_tests = NCR53c7xx_run_tests;
+-	expected_clock = hostdata->scsi_clock;
+-	expected_id = 7;
+-    	break;
+-    default:
+-	printk ("scsi%d : chip type of %d is not supported yet, detaching.\n",
+-	    host->host_no, hostdata->chip);
+-	scsi_unregister (host);
+-	return -1;
+-    }
+-
+-    /* Assign constants accessed by NCR */
+-    hostdata->NCR53c7xx_zero = 0;			
+-    hostdata->NCR53c7xx_msg_reject = MESSAGE_REJECT;
+-    hostdata->NCR53c7xx_msg_abort = ABORT;
+-    hostdata->NCR53c7xx_msg_nop = NOP;
+-    hostdata->NOP_insn = (DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24;
+-    if (expected_mapping == -1 || 
+-	(hostdata->options & (OPTION_MEMORY_MAPPED)) != 
+-	(expected_mapping & OPTION_MEMORY_MAPPED))
+-	printk ("scsi%d : using %s mapped access\n", host->host_no, 
+-	    (hostdata->options & OPTION_MEMORY_MAPPED) ? "memory" : 
+-	    "io");
+-
+-    hostdata->dmode = (hostdata->chip == 700 || hostdata->chip == 70066) ? 
+-	DMODE_REG_00 : DMODE_REG_10;
+-    hostdata->istat = ((hostdata->chip / 100) == 8) ? 
+-    	ISTAT_REG_800 : ISTAT_REG_700;
+-
+-/* We have to assume that this may be the first access to the chip, so
+- * we must set EA in DCNTL. */
+-
+-    NCR53c7x0_write8 (DCNTL_REG, DCNTL_10_EA|DCNTL_10_COM);
+-
+-
+-/* Only the ISTAT register is readable when the NCR is running, so make 
+-   sure it's halted. */
+-    ncr_halt(host);
+-
+-/* 
+- * XXX - the NCR53c700 uses bitfielded registers for SCID, SDID, etc,
+- *	as does the 710 with one bit per SCSI ID.  Conversely, the NCR
+- * 	uses a normal, 3 bit binary representation of these values.
+- *
+- * Get the rest of the NCR documentation, and FIND OUT where the change
+- * was.
+- */
+-
+-#if 0
+-	/* May not be able to do this - chip my not have been set up yet */
+-	tmp = hostdata->this_id_mask = NCR53c7x0_read8(SCID_REG);
+-	for (host->this_id = 0; tmp != 1; tmp >>=1, ++host->this_id);
+-#else
+-	host->this_id = 7;
+-#endif
+-
+-/*
+- * Note : we should never encounter a board setup for ID0.  So,
+- * 	if we see ID0, assume that it was uninitialized and set it
+- * 	to the industry standard 7.
+- */
+-    if (!host->this_id) {
+-	printk("scsi%d : initiator ID was %d, changing to 7\n",
+-	    host->host_no, host->this_id);
+-	host->this_id = 7;
+-	hostdata->this_id_mask = 1 << 7;
+-	uninitialized = 1;
+-    };
+-
+-    if (expected_id == -1 || host->this_id != expected_id)
+-    	printk("scsi%d : using initiator ID %d\n", host->host_no,
+-    	    host->this_id);
+-
+-    /*
+-     * Save important registers to allow a soft reset.
+-     */
+-
+-    /*
+-     * CTEST7 controls cache snooping, burst mode, and support for 
+-     * external differential drivers.  This isn't currently used - the
+-     * default value may not be optimal anyway.
+-     * Even worse, it may never have been set up since reset.
+-     */
+-    hostdata->saved_ctest7 = NCR53c7x0_read8(CTEST7_REG) & CTEST7_SAVE;
+-    revision = (NCR53c7x0_read8(CTEST8_REG) & 0xF0) >> 4;
+-    switch (revision) {
+-	case 1: revision = 0;    break;
+-	case 2: revision = 1;    break;
+-	case 4: revision = 2;    break;
+-	case 8: revision = 3;    break;
+-	default: revision = 255; break;
+-    }
+-    printk("scsi%d: Revision 0x%x\n",host->host_no,revision);
+-
+-    if ((revision == 0 || revision == 255) && (hostdata->options & (OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS)))
+-    {
+-	printk ("scsi%d: Disabling sync working and disconnect/reselect\n",
+-							host->host_no);
+-	hostdata->options &= ~(OPTION_SYNCHRONOUS|OPTION_DISCONNECT|OPTION_ALWAYS_SYNCHRONOUS);
+-    }
+-
+-    /*
+-     * On NCR53c700 series chips, DCNTL controls the SCSI clock divisor,
+-     * on 800 series chips, it allows for a totem-pole IRQ driver.
+-     * NOTE saved_dcntl currently overwritten in init function.
+-     * The value read here may be garbage anyway, MVME16x board at least
+-     * does not initialise chip if kernel arrived via tftp.
+-     */
+-
+-    hostdata->saved_dcntl = NCR53c7x0_read8(DCNTL_REG);
+-
+-    /*
+-     * DMODE controls DMA burst length, and on 700 series chips,
+-     * 286 mode and bus width  
+-     * NOTE:  On MVME16x, chip may have been reset, so this could be a
+-     * power-on/reset default value.
+-     */
+-    hostdata->saved_dmode = NCR53c7x0_read8(hostdata->dmode);
+-
+-    /* 
+-     * Now that burst length and enabled/disabled status is known, 
+-     * clue the user in on it.  
+-     */
+-   
+-    ccf = clock_to_ccf_710 (expected_clock);
+-
+-    for (i = 0; i < 16; ++i) 
+-	hostdata->cmd_allocated[i] = 0;
+-
+-    if (hostdata->init_save_regs)
+-    	hostdata->init_save_regs (host);
+-    if (hostdata->init_fixup)
+-    	hostdata->init_fixup (host);
+-
+-    if (!the_template) {
+-	the_template = host->hostt;
+-	first_host = host;
+-    }
+-
+-    /* 
+-     * Linux SCSI drivers have always been plagued with initialization 
+-     * problems - some didn't work with the BIOS disabled since they expected
+-     * initialization from it, some didn't work when the networking code
+-     * was enabled and registers got scrambled, etc.
+-     *
+-     * To avoid problems like this, in the future, we will do a soft 
+-     * reset on the SCSI chip, taking it back to a sane state.
+-     */
+-
+-    hostdata->soft_reset (host);
+-
+-#if 1
+-    hostdata->debug_count_limit = -1;
+-#else
+-    hostdata->debug_count_limit = 1;
+-#endif
+-    hostdata->intrs = -1;
+-    hostdata->resets = -1;
+-    memcpy ((void *) hostdata->synchronous_want, (void *) sdtr_message, 
+-	sizeof (hostdata->synchronous_want));
+-
+-    NCR53c7x0_driver_init (host);
+-
+-    if (request_irq(host->irq, NCR53c7x0_intr, IRQF_SHARED, "53c7xx", host))
+-    {
+-	printk("scsi%d : IRQ%d not free, detaching\n",
+-		host->host_no, host->irq);
+-	goto err_unregister;
+-    } 
+-
+-    if ((hostdata->run_tests && hostdata->run_tests(host) == -1) ||
+-        (hostdata->options & OPTION_DEBUG_TESTS_ONLY)) {
+-    	/* XXX Should disable interrupts, etc. here */
+-	goto err_free_irq;
+-    } else {
+-	if (host->io_port)  {
+-	    host->n_io_port = 128;
+-	    if (!request_region (host->io_port, host->n_io_port, "ncr53c7xx"))
+-		goto err_free_irq;
+-	}
+-    }
+-    
+-    if (NCR53c7x0_read8 (SBCL_REG) & SBCL_BSY) {
+-	printk ("scsi%d : bus wedge, doing SCSI reset\n", host->host_no);
+-	hard_reset (host);
+-    }
+-    return 0;
+-
+- err_free_irq:
+-    free_irq(host->irq,  NCR53c7x0_intr);
+- err_unregister:
+-    scsi_unregister(host);
+-    return -1;
+-}
+-
+-/* 
+- * Function : int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip,
+- *	unsigned long base, int io_port, int irq, int dma, long long options,
+- *	int clock);
+- *
+- * Purpose : initializes a NCR53c7,8x0 based on base addresses,
+- *	IRQ, and DMA channel.	
+- *	
+- * Inputs : tpnt - Template for this SCSI adapter, board - board level
+- *	product, chip - 710
+- * 
+- * Returns : 0 on success, -1 on failure.
+- *
+- */
+-
+-int 
+-ncr53c7xx_init (struct scsi_host_template *tpnt, int board, int chip,
+-    unsigned long base, int io_port, int irq, int dma, 
+-    long long options, int clock)
+-{
+-    struct Scsi_Host *instance;
+-    struct NCR53c7x0_hostdata *hostdata;
+-    char chip_str[80];
+-    int script_len = 0, dsa_len = 0, size = 0, max_cmd_size = 0,
+-	schedule_size = 0, ok = 0;
+-    void *tmp;
+-    unsigned long page;
+-
+-    switch (chip) {
+-    case 710:
+-    case 770:
+-	schedule_size = (tpnt->can_queue + 1) * 8 /* JUMP instruction size */;
+-	script_len = NCR53c7xx_script_len;
+-    	dsa_len = NCR53c7xx_dsa_len;
+-    	options |= OPTION_INTFLY;
+-    	sprintf (chip_str, "NCR53c%d", chip);
+-    	break;
+-    default:
+-    	printk("scsi-ncr53c7xx : unsupported SCSI chip %d\n", chip);
+-    	return -1;
+-    }
+-
+-    printk("scsi-ncr53c7xx : %s at memory 0x%lx, io 0x%x, irq %d",
+-    	chip_str, base, io_port, irq);
+-    if (dma == DMA_NONE)
+-    	printk("\n");
+-    else 
+-    	printk(", dma %d\n", dma);
+-
+-    if (options & OPTION_DEBUG_PROBE_ONLY) {
+-    	printk ("scsi-ncr53c7xx : probe only enabled, aborting initialization\n");
+-    	return -1;
+-    }
+-
+-    max_cmd_size = sizeof(struct NCR53c7x0_cmd) + dsa_len +
+-    	/* Size of dynamic part of command structure : */
+-	2 * /* Worst case : we don't know if we need DATA IN or DATA out */
+-		( 2 * /* Current instructions per scatter/gather segment */ 
+-        	  tpnt->sg_tablesize + 
+-                  3 /* Current startup / termination required per phase */
+-		) *
+-	8 /* Each instruction is eight bytes */;
+-
+-    /* Allocate fixed part of hostdata, dynamic part to hold appropriate
+-       SCSI SCRIPT(tm) plus a single, maximum-sized NCR53c7x0_cmd structure.
+-
+-       We need a NCR53c7x0_cmd structure for scan_scsis() when we are 
+-       not loaded as a module, and when we're loaded as a module, we 
+-       can't use a non-dynamically allocated structure because modules
+-       are vmalloc()'d, which can allow structures to cross page 
+-       boundaries and breaks our physical/virtual address assumptions
+-       for DMA.
+-
+-       So, we stick it past the end of our hostdata structure.
+-
+-       ASSUMPTION : 
+-       	 Regardless of how many simultaneous SCSI commands we allow,
+-	 the probe code only executes a _single_ instruction at a time,
+-	 so we only need one here, and don't need to allocate NCR53c7x0_cmd
+-	 structures for each target until we are no longer in scan_scsis
+-	 and kmalloc() has become functional (memory_init() happens 
+-	 after all device driver initialization).
+-    */
+-
+-    size = sizeof(struct NCR53c7x0_hostdata) + script_len + 
+-    /* Note that alignment will be guaranteed, since we put the command
+-       allocated at probe time after the fixed-up SCSI script, which 
+-       consists of 32 bit words, aligned on a 32 bit boundary.  But
+-       on a 64bit machine we need 8 byte alignment for hostdata->free, so
+-       we add in another 4 bytes to take care of potential misalignment
+-       */
+-	(sizeof(void *) - sizeof(u32)) + max_cmd_size + schedule_size;
+-
+-    page = __get_free_pages(GFP_ATOMIC,1);
+-    if(page==0)
+-    {
+-    	printk(KERN_ERR "53c7xx: out of memory.\n");
+-    	return -ENOMEM;
+-    }
+-#ifdef FORCE_DSA_ALIGNMENT
+-    /*
+-     * 53c710 rev.0 doesn't have an add-with-carry instruction.
+-     * Ensure we allocate enough memory to force DSA alignment.
+-    */
+-    size += 256;
+-#endif
+-    /* Size should be < 8K, so we can fit it in two pages. */
+-    if (size > 8192) {
+-      printk(KERN_ERR "53c7xx: hostdata > 8K\n");
+-      return -1;
+-    }
+-
+-    instance = scsi_register (tpnt, 4);
+-    if (!instance)
+-    {
+-        free_page(page);
+-	return -1;
+-    }
+-    instance->hostdata[0] = page;
+-    memset((void *)instance->hostdata[0], 0, 8192);
+-    cache_push(virt_to_phys((void *)(instance->hostdata[0])), 8192);
+-    cache_clear(virt_to_phys((void *)(instance->hostdata[0])), 8192);
+-    kernel_set_cachemode((void *)instance->hostdata[0], 8192, IOMAP_NOCACHE_SER);
+-
+-    /* FIXME : if we ever support an ISA NCR53c7xx based board, we
+-       need to check if the chip is running in a 16 bit mode, and if so 
+-       unregister it if it is past the 16M (0x1000000) mark */
+-
+-    hostdata = (struct NCR53c7x0_hostdata *)instance->hostdata[0];
+-    hostdata->size = size;
+-    hostdata->script_count = script_len / sizeof(u32);
+-    hostdata->board = board;
+-    hostdata->chip = chip;
+-
+-    /*
+-     * Being memory mapped is more desirable, since 
+-     *
+-     * - Memory accesses may be faster.
+-     *
+-     * - The destination and source address spaces are the same for 
+-     *	 all instructions, meaning we don't have to twiddle dmode or 
+-     *	 any other registers.
+-     *
+-     * So, we try for memory mapped, and if we don't get it,
+-     * we go for port mapped, and that failing we tell the user
+-     * it can't work.
+-     */
+-
+-    if (base) {
+-	instance->base = base;
+-	/* Check for forced I/O mapping */
+-    	if (!(options & OPTION_IO_MAPPED)) {
+-	    options |= OPTION_MEMORY_MAPPED;
+-	    ok = 1;
+-	}
+-    } else {
+-	options &= ~OPTION_MEMORY_MAPPED;
+-    }
+-
+-    if (io_port) {
+-	instance->io_port = io_port;
+-	options |= OPTION_IO_MAPPED;
+-	ok = 1;
+-    } else {
+-	options &= ~OPTION_IO_MAPPED;
+-    }
+-
+-    if (!ok) {
+-	printk ("scsi%d : not initializing, no I/O or memory mapping known \n",
+-	    instance->host_no);
+-	scsi_unregister (instance);
+-	return -1;
+-    }
+-    instance->irq = irq;
+-    instance->dma_channel = dma;
+-
+-    hostdata->options = options;
+-    hostdata->dsa_len = dsa_len;
+-    hostdata->max_cmd_size = max_cmd_size;
+-    hostdata->num_cmds = 1;
+-    hostdata->scsi_clock = clock;
+-    /* Initialize single command */
+-    tmp = (hostdata->script + hostdata->script_count);
+-#ifdef FORCE_DSA_ALIGNMENT
+-    {
+-	void *t = ROUNDUP(tmp, void *);
+-	if (((u32)t & 0xff) > CmdPageStart)
+-	    t = (void *)((u32)t + 255);
+-	t = (void *)(((u32)t & ~0xff) + CmdPageStart);
+-        hostdata->free = t;
+-#if 0
+-	printk ("scsi: Registered size increased by 256 to %d\n", size);
+-	printk ("scsi: CmdPageStart = 0x%02x\n", CmdPageStart);
+-	printk ("scsi: tmp = 0x%08x, hostdata->free set to 0x%08x\n",
+-			(u32)tmp, (u32)t);
+-#endif
+-    }
+-#else
+-    hostdata->free = ROUNDUP(tmp, void *);
+-#endif
+-    hostdata->free->real = tmp;
+-    hostdata->free->size = max_cmd_size;
+-    hostdata->free->free = NULL;
+-    hostdata->free->next = NULL;
+-    hostdata->extra_allocate = 0;
+-
+-    /* Allocate command start code space */
+-    hostdata->schedule = (chip == 700 || chip == 70066) ?
+-	NULL : (u32 *) ((char *)hostdata->free + max_cmd_size);
+-
+-/* 
+- * For diagnostic purposes, we don't really care how fast things blaze.
+- * For profiling, we want to access the 800ns resolution system clock,
+- * using a 'C' call on the host processor.
+- *
+- * Therefore, there's no need for the NCR chip to directly manipulate
+- * this data, and we should put it wherever is most convenient for 
+- * Linux.
+- */
+-    if (track_events) 
+-	hostdata->events = (struct NCR53c7x0_event *) (track_events ? 
+-	    vmalloc (sizeof (struct NCR53c7x0_event) * track_events) : NULL);
+-    else
+-	hostdata->events = NULL;
+-
+-    if (hostdata->events) {
+-	memset ((void *) hostdata->events, 0, sizeof(struct NCR53c7x0_event) *
+-	    track_events);	
+-	hostdata->event_size = track_events;
+-	hostdata->event_index = 0;
+-    } else 
+-	hostdata->event_size = 0;
+-
+-    return NCR53c7x0_init(instance);
+-}
+-
+-
+-/* 
+- * Function : static void NCR53c7x0_init_fixup (struct Scsi_Host *host)
+- *
+- * Purpose :  copy and fixup the SCSI SCRIPTS(tm) code for this device.
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- */
+-
+-static void 
+-NCR53c7x0_init_fixup (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    unsigned char tmp;
+-    int i, ncr_to_memory, memory_to_ncr;
+-    u32 base;
+-    NCR53c7x0_local_setup(host);
+-
+-
+-    /* XXX - NOTE : this code MUST be made endian aware */
+-    /*  Copy code into buffer that was allocated at detection time.  */
+-    memcpy ((void *) hostdata->script, (void *) SCRIPT, 
+-	sizeof(SCRIPT));
+-    /* Fixup labels */
+-    for (i = 0; i < PATCHES; ++i) 
+-	hostdata->script[LABELPATCHES[i]] += 
+-    	    virt_to_bus(hostdata->script);
+-    /* Fixup addresses of constants that used to be EXTERNAL */
+-
+-    patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_abort, 
+-    	virt_to_bus(&(hostdata->NCR53c7xx_msg_abort)));
+-    patch_abs_32 (hostdata->script, 0, NCR53c7xx_msg_reject, 
+-    	virt_to_bus(&(hostdata->NCR53c7xx_msg_reject)));
+-    patch_abs_32 (hostdata->script, 0, NCR53c7xx_zero, 
+-    	virt_to_bus(&(hostdata->NCR53c7xx_zero)));
+-    patch_abs_32 (hostdata->script, 0, NCR53c7xx_sink, 
+-    	virt_to_bus(&(hostdata->NCR53c7xx_sink)));
+-    patch_abs_32 (hostdata->script, 0, NOP_insn,
+-	virt_to_bus(&(hostdata->NOP_insn)));
+-    patch_abs_32 (hostdata->script, 0, schedule,
+-	virt_to_bus((void *) hostdata->schedule));
+-
+-    /* Fixup references to external variables: */
+-    for (i = 0; i < EXTERNAL_PATCHES_LEN; ++i)
+-       hostdata->script[EXTERNAL_PATCHES[i].offset] +=
+-         virt_to_bus(EXTERNAL_PATCHES[i].address);
+-
+-    /* 
+-     * Fixup absolutes set at boot-time.
+-     * 
+-     * All non-code absolute variables suffixed with "dsa_" and "int_"
+-     * are constants, and need no fixup provided the assembler has done 
+-     * it for us (I don't know what the "real" NCR assembler does in 
+-     * this case, my assembler does the right magic).
+-     */
+-
+-    patch_abs_rwri_data (hostdata->script, 0, dsa_save_data_pointer, 
+-    	Ent_dsa_code_save_data_pointer - Ent_dsa_zero);
+-    patch_abs_rwri_data (hostdata->script, 0, dsa_restore_pointers,
+-    	Ent_dsa_code_restore_pointers - Ent_dsa_zero);
+-    patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect,
+-    	Ent_dsa_code_check_reselect - Ent_dsa_zero);
+-
+-    /*
+-     * Just for the hell of it, preserve the settings of 
+-     * Burst Length and Enable Read Line bits from the DMODE 
+-     * register.  Make sure SCRIPTS start automagically.
+-     */
+-
+-#if defined(CONFIG_MVME16x) || defined(CONFIG_BVME6000)
+-    /* We know better what we want than 16xBug does! */
+-    tmp = DMODE_10_BL_8 | DMODE_10_FC2;
+-#else
+-    tmp = NCR53c7x0_read8(DMODE_REG_10);
+-    tmp &= (DMODE_BL_MASK | DMODE_10_FC2 | DMODE_10_FC1 | DMODE_710_PD |
+-								DMODE_710_UO);
+-#endif
+-
+-    if (!(hostdata->options & OPTION_MEMORY_MAPPED)) {
+-    	base = (u32) host->io_port;
+-    	memory_to_ncr = tmp|DMODE_800_DIOM;
+-    	ncr_to_memory = tmp|DMODE_800_SIOM;
+-    } else {
+-    	base = virt_to_bus((void *)host->base);
+-	memory_to_ncr = ncr_to_memory = tmp;
+-    }
+-
+-    /* SCRATCHB_REG_10 == SCRATCHA_REG_800, as it happens */
+-    patch_abs_32 (hostdata->script, 0, addr_scratch, base + SCRATCHA_REG_800);
+-    patch_abs_32 (hostdata->script, 0, addr_temp, base + TEMP_REG);
+-    patch_abs_32 (hostdata->script, 0, addr_dsa, base + DSA_REG);
+-
+-    /*
+-     * I needed some variables in the script to be accessible to 
+-     * both the NCR chip and the host processor. For these variables,
+-     * I made the arbitrary decision to store them directly in the 
+-     * hostdata structure rather than in the RELATIVE area of the 
+-     * SCRIPTS.
+-     */
+-    
+-
+-    patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_memory, tmp);
+-    patch_abs_rwri_data (hostdata->script, 0, dmode_memory_to_ncr, memory_to_ncr);
+-    patch_abs_rwri_data (hostdata->script, 0, dmode_ncr_to_memory, ncr_to_memory);
+-
+-    patch_abs_32 (hostdata->script, 0, msg_buf, 
+-	virt_to_bus((void *)&(hostdata->msg_buf)));
+-    patch_abs_32 (hostdata->script, 0, reconnect_dsa_head, 
+-    	virt_to_bus((void *)&(hostdata->reconnect_dsa_head)));
+-    patch_abs_32 (hostdata->script, 0, addr_reconnect_dsa_head, 
+-	virt_to_bus((void *)&(hostdata->addr_reconnect_dsa_head)));
+-    patch_abs_32 (hostdata->script, 0, reselected_identify, 
+-    	virt_to_bus((void *)&(hostdata->reselected_identify)));
+-/* reselected_tag is currently unused */
+-#if 0
+-    patch_abs_32 (hostdata->script, 0, reselected_tag, 
+-    	virt_to_bus((void *)&(hostdata->reselected_tag)));
+-#endif
+-
+-    patch_abs_32 (hostdata->script, 0, test_dest, 
+-	virt_to_bus((void*)&hostdata->test_dest));
+-    patch_abs_32 (hostdata->script, 0, test_src, 
+-	virt_to_bus(&hostdata->test_source));
+-    patch_abs_32 (hostdata->script, 0, saved_dsa,
+-	virt_to_bus((void *)&hostdata->saved2_dsa));
+-    patch_abs_32 (hostdata->script, 0, emulfly,
+-	virt_to_bus((void *)&hostdata->emulated_intfly));
+-
+-    patch_abs_rwri_data (hostdata->script, 0, dsa_check_reselect, 
+-	(unsigned char)(Ent_dsa_code_check_reselect - Ent_dsa_zero));
+-
+-/* These are for event logging; the ncr_event enum contains the 
+-   actual interrupt numbers. */
+-#ifdef A_int_EVENT_SELECT
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT, (u32) EVENT_SELECT);
+-#endif
+-#ifdef A_int_EVENT_DISCONNECT
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_DISCONNECT, (u32) EVENT_DISCONNECT);
+-#endif
+-#ifdef A_int_EVENT_RESELECT
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT, (u32) EVENT_RESELECT);
+-#endif
+-#ifdef A_int_EVENT_COMPLETE
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_COMPLETE, (u32) EVENT_COMPLETE);
+-#endif
+-#ifdef A_int_EVENT_IDLE
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_IDLE, (u32) EVENT_IDLE);
+-#endif
+-#ifdef A_int_EVENT_SELECT_FAILED
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_SELECT_FAILED, 
+-	(u32) EVENT_SELECT_FAILED);
+-#endif
+-#ifdef A_int_EVENT_BEFORE_SELECT
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_BEFORE_SELECT,
+-	(u32) EVENT_BEFORE_SELECT);
+-#endif
+-#ifdef A_int_EVENT_RESELECT_FAILED
+-   patch_abs_32 (hostdata->script, 0, int_EVENT_RESELECT_FAILED, 
+-	(u32) EVENT_RESELECT_FAILED);
+-#endif
+-
+-    /*
+-     * Make sure the NCR and Linux code agree on the location of 
+-     * certain fields.
+-     */
+-
+-    hostdata->E_accept_message = Ent_accept_message;
+-    hostdata->E_command_complete = Ent_command_complete;		
+-    hostdata->E_cmdout_cmdout = Ent_cmdout_cmdout;
+-    hostdata->E_data_transfer = Ent_data_transfer;
+-    hostdata->E_debug_break = Ent_debug_break;	
+-    hostdata->E_dsa_code_template = Ent_dsa_code_template;
+-    hostdata->E_dsa_code_template_end = Ent_dsa_code_template_end;
+-    hostdata->E_end_data_transfer = Ent_end_data_transfer;
+-    hostdata->E_initiator_abort = Ent_initiator_abort;
+-    hostdata->E_msg_in = Ent_msg_in;
+-    hostdata->E_other_transfer = Ent_other_transfer;
+-    hostdata->E_other_in = Ent_other_in;
+-    hostdata->E_other_out = Ent_other_out;
+-    hostdata->E_reject_message = Ent_reject_message;
+-    hostdata->E_respond_message = Ent_respond_message;
+-    hostdata->E_select = Ent_select;
+-    hostdata->E_select_msgout = Ent_select_msgout;
+-    hostdata->E_target_abort = Ent_target_abort;
+-#ifdef Ent_test_0
+-    hostdata->E_test_0 = Ent_test_0;
+-#endif
+-    hostdata->E_test_1 = Ent_test_1;
+-    hostdata->E_test_2 = Ent_test_2;
+-#ifdef Ent_test_3
+-    hostdata->E_test_3 = Ent_test_3;
+-#endif
+-    hostdata->E_wait_reselect = Ent_wait_reselect;
+-    hostdata->E_dsa_code_begin = Ent_dsa_code_begin;
+-
+-    hostdata->dsa_cmdout = A_dsa_cmdout;
+-    hostdata->dsa_cmnd = A_dsa_cmnd;
+-    hostdata->dsa_datain = A_dsa_datain;
+-    hostdata->dsa_dataout = A_dsa_dataout;
+-    hostdata->dsa_end = A_dsa_end;			
+-    hostdata->dsa_msgin = A_dsa_msgin;
+-    hostdata->dsa_msgout = A_dsa_msgout;
+-    hostdata->dsa_msgout_other = A_dsa_msgout_other;
+-    hostdata->dsa_next = A_dsa_next;
+-    hostdata->dsa_select = A_dsa_select;
+-    hostdata->dsa_start = Ent_dsa_code_template - Ent_dsa_zero;
+-    hostdata->dsa_status = A_dsa_status;
+-    hostdata->dsa_jump_dest = Ent_dsa_code_fix_jump - Ent_dsa_zero + 
+-	8 /* destination operand */;
+-
+-    /* sanity check */
+-    if (A_dsa_fields_start != Ent_dsa_code_template_end - 
+-    	Ent_dsa_zero) 
+-    	printk("scsi%d : NCR dsa_fields start is %d not %d\n",
+-    	    host->host_no, A_dsa_fields_start, Ent_dsa_code_template_end - 
+-    	    Ent_dsa_zero);
+-
+-    printk("scsi%d : NCR code relocated to 0x%lx (virt 0x%p)\n", host->host_no,
+-	virt_to_bus(hostdata->script), hostdata->script);
+-}
+-
+-/*
+- * Function : static int NCR53c7xx_run_tests (struct Scsi_Host *host)
+- *
+- * Purpose : run various verification tests on the NCR chip, 
+- *	including interrupt generation, and proper bus mastering
+- * 	operation.
+- * 
+- * Inputs : host - a properly initialized Scsi_Host structure
+- *
+- * Preconditions : the NCR chip must be in a halted state.
+- *
+- * Returns : 0 if all tests were successful, -1 on error.
+- * 
+- */
+-
+-static int 
+-NCR53c7xx_run_tests (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    unsigned long timeout;
+-    u32 start;
+-    int failed, i;
+-    unsigned long flags;
+-    NCR53c7x0_local_setup(host);
+-
+-    /* The NCR chip _must_ be idle to run the test scripts */
+-
+-    local_irq_save(flags);
+-    if (!hostdata->idle) {
+-	printk ("scsi%d : chip not idle, aborting tests\n", host->host_no);
+-	local_irq_restore(flags);
+-	return -1;
+-    }
+-
+-    /* 
+-     * Check for functional interrupts, this could work as an
+-     * autoprobe routine.
+-     */
+-
+-    if ((hostdata->options & OPTION_DEBUG_TEST1) && 
+-	    hostdata->state != STATE_DISABLED) {
+-	hostdata->idle = 0;
+-	hostdata->test_running = 1;
+-	hostdata->test_completed = -1;
+-	hostdata->test_dest = 0;
+-	hostdata->test_source = 0xdeadbeef;
+-	start = virt_to_bus (hostdata->script) + hostdata->E_test_1;
+-    	hostdata->state = STATE_RUNNING;
+-	printk ("scsi%d : test 1", host->host_no);
+-	NCR53c7x0_write32 (DSP_REG, start);
+-	if (hostdata->options & OPTION_DEBUG_TRACE)
+-	    NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM |
+-						DCNTL_STD);
+-	printk (" started\n");
+-	local_irq_restore(flags);
+-
+-	/* 
+-	 * This is currently a .5 second timeout, since (in theory) no slow 
+-	 * board will take that long.  In practice, we've seen one 
+-	 * pentium which occassionally fails with this, but works with 
+-	 * 10 times as much?
+-	 */
+-
+-	timeout = jiffies + 5 * HZ / 10;
+-	while ((hostdata->test_completed == -1) && time_before(jiffies, timeout))
+-		barrier();
+-
+-	failed = 1;
+-	if (hostdata->test_completed == -1)
+-	    printk ("scsi%d : driver test 1 timed out%s\n",host->host_no ,
+-		(hostdata->test_dest == 0xdeadbeef) ? 
+-		    " due to lost interrupt.\n"
+-		    "         Please verify that the correct IRQ is being used for your board,\n"
+-		    : "");
+-	else if (hostdata->test_completed != 1) 
+-	    printk ("scsi%d : test 1 bad interrupt value (%d)\n", 
+-		host->host_no, hostdata->test_completed);
+-	else 
+-	    failed = (hostdata->test_dest != 0xdeadbeef);
+-
+-	if (hostdata->test_dest != 0xdeadbeef) {
+-	    printk ("scsi%d : driver test 1 read 0x%x instead of 0xdeadbeef indicating a\n"
+-                    "         probable cache invalidation problem.  Please configure caching\n"
+-		    "         as write-through or disabled\n",
+-		host->host_no, hostdata->test_dest);
+-	}
+-
+-	if (failed) {
+-	    printk ("scsi%d : DSP = 0x%p (script at 0x%p, start at 0x%x)\n",
+-		host->host_no, bus_to_virt(NCR53c7x0_read32(DSP_REG)),
+-		hostdata->script, start);
+-	    printk ("scsi%d : DSPS = 0x%x\n", host->host_no,
+-		NCR53c7x0_read32(DSPS_REG));
+-	    local_irq_restore(flags);
+-	    return -1;
+-	}
+-    	hostdata->test_running = 0;
+-    }
+-
+-    if ((hostdata->options & OPTION_DEBUG_TEST2) && 
+-	hostdata->state != STATE_DISABLED) {
+-	u32 dsa[48];
+-    	unsigned char identify = IDENTIFY(0, 0);
+-	unsigned char cmd[6];
+-	unsigned char data[36];
+-    	unsigned char status = 0xff;
+-    	unsigned char msg = 0xff;
+-
+-    	cmd[0] = INQUIRY;
+-    	cmd[1] = cmd[2] = cmd[3] = cmd[5] = 0;
+-    	cmd[4] = sizeof(data); 
+-
+-    	dsa[2] = 1;
+-    	dsa[3] = virt_to_bus(&identify);
+-    	dsa[4] = 6;
+-    	dsa[5] = virt_to_bus(&cmd);
+-    	dsa[6] = sizeof(data);
+-    	dsa[7] = virt_to_bus(&data);
+-    	dsa[8] = 1;
+-    	dsa[9] = virt_to_bus(&status);
+-    	dsa[10] = 1;
+-    	dsa[11] = virt_to_bus(&msg);
+-
+-	for (i = 0; i < 6; ++i) {
+-#ifdef VALID_IDS
+-	    if (!hostdata->valid_ids[i])
+-		continue;
+-#endif
+-	    local_irq_disable();
+-	    if (!hostdata->idle) {
+-		printk ("scsi%d : chip not idle, aborting tests\n", host->host_no);
+-		local_irq_restore(flags);
+-		return -1;
+-	    }
+-
+-	    /* 710: bit mapped scsi ID, async   */
+-            dsa[0] = (1 << i) << 16;
+-	    hostdata->idle = 0;
+-	    hostdata->test_running = 2;
+-	    hostdata->test_completed = -1;
+-	    start = virt_to_bus(hostdata->script) + hostdata->E_test_2;
+-	    hostdata->state = STATE_RUNNING;
+-	    NCR53c7x0_write32 (DSA_REG, virt_to_bus(dsa));
+-	    NCR53c7x0_write32 (DSP_REG, start);
+-	    if (hostdata->options & OPTION_DEBUG_TRACE)
+-	        NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl |
+-				DCNTL_SSM | DCNTL_STD);
+-	    local_irq_restore(flags);
+-
+-	    timeout = jiffies + 5 * HZ;	/* arbitrary */
+-	    while ((hostdata->test_completed == -1) && time_before(jiffies, timeout))
+-	    	barrier();
+-
+-	    NCR53c7x0_write32 (DSA_REG, 0);
+-
+-	    if (hostdata->test_completed == 2) {
+-		data[35] = 0;
+-		printk ("scsi%d : test 2 INQUIRY to target %d, lun 0 : %s\n",
+-		    host->host_no, i, data + 8);
+-		printk ("scsi%d : status ", host->host_no);
+-		scsi_print_status (status);
+-		printk ("\nscsi%d : message ", host->host_no);
+-		spi_print_msg(&msg);
+-		printk ("\n");
+-	    } else if (hostdata->test_completed == 3) {
+-		printk("scsi%d : test 2 no connection with target %d\n",
+-		    host->host_no, i);
+-		if (!hostdata->idle) {
+-		    printk("scsi%d : not idle\n", host->host_no);
+-		    local_irq_restore(flags);
+-		    return -1;
+-		}
+-	    } else if (hostdata->test_completed == -1) {
+-		printk ("scsi%d : test 2 timed out\n", host->host_no);
+-		local_irq_restore(flags);
+-		return -1;
+-	    } 
+-	    hostdata->test_running = 0;
+-	}
+-    }
+-
+-    local_irq_restore(flags);
+-    return 0;
+-}
+-
+-/*
+- * Function : static void NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : copy the NCR53c8xx dsa structure into cmd's dsa buffer,
+- * 	performing all necessary relocation.
+- *
+- * Inputs : cmd, a NCR53c7x0_cmd structure with a dsa area large
+- *	enough to hold the NCR53c8xx dsa.
+- */
+-
+-static void 
+-NCR53c7xx_dsa_fixup (struct NCR53c7x0_cmd *cmd) {
+-    Scsi_Cmnd *c = cmd->cmd;
+-    struct Scsi_Host *host = c->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-    	host->hostdata[0];
+-    int i;
+-
+-    memcpy (cmd->dsa, hostdata->script + (hostdata->E_dsa_code_template / 4),
+-    	hostdata->E_dsa_code_template_end - hostdata->E_dsa_code_template);
+-
+-    /* 
+-     * Note : within the NCR 'C' code, dsa points to the _start_
+-     * of the DSA structure, and _not_ the offset of dsa_zero within
+-     * that structure used to facilitate shorter signed offsets
+-     * for the 8 bit ALU.
+-     * 
+-     * The implications of this are that 
+-     * 
+-     * - 32 bit A_dsa_* absolute values require an additional 
+-     * 	 dsa_zero added to their value to be correct, since they are 
+-     *   relative to dsa_zero which is in essentially a separate
+-     *   space from the code symbols.
+-     *
+-     * - All other symbols require no special treatment.
+-     */
+-
+-    patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	dsa_temp_lun, c->device->lun);
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-	dsa_temp_addr_next, virt_to_bus(&cmd->dsa_next_addr));
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	dsa_temp_next, virt_to_bus(cmd->dsa) + Ent_dsa_zero -
+-	Ent_dsa_code_template + A_dsa_next);
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), 
+-    	dsa_temp_sync, virt_to_bus((void *)hostdata->sync[c->device->id].script));
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32), 
+-    	dsa_sscf_710, virt_to_bus((void *)&hostdata->sync[c->device->id].sscf_710));
+-    patch_abs_tci_data (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	    dsa_temp_target, 1 << c->device->id);
+-    /* XXX - new pointer stuff */
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	dsa_temp_addr_saved_pointer, virt_to_bus(&cmd->saved_data_pointer));
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	dsa_temp_addr_saved_residual, virt_to_bus(&cmd->saved_residual));
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-    	dsa_temp_addr_residual, virt_to_bus(&cmd->residual));
+-
+-    /*  XXX - new start stuff */
+-
+-    patch_abs_32 (cmd->dsa, Ent_dsa_code_template / sizeof(u32),
+-	dsa_temp_addr_dsa_value, virt_to_bus(&cmd->dsa_addr));
+-}
+-
+-/* 
+- * Function : run_process_issue_queue (void)
+- * 
+- * Purpose : insure that the coroutine is running and will process our 
+- * 	request.  process_issue_queue_running is checked/set here (in an 
+- *	inline function) rather than in process_issue_queue itself to reduce 
+- * 	the chances of stack overflow.
+- *
+- */
+-
+-static volatile int process_issue_queue_running = 0;
+-
+-static __inline__ void 
+-run_process_issue_queue(void) {
+-    unsigned long flags;
+-    local_irq_save(flags);
+-    if (!process_issue_queue_running) {
+-	process_issue_queue_running = 1;
+-        process_issue_queue(flags);
+-	/* 
+-         * process_issue_queue_running is cleared in process_issue_queue 
+-	 * once it can't do more work, and process_issue_queue exits with 
+-	 * interrupts disabled.
+-	 */
+-    }
+-    local_irq_restore(flags);
+-}
+-
+-/*
+- * Function : static void abnormal_finished (struct NCR53c7x0_cmd *cmd, int
+- *	result)
+- *
+- * Purpose : mark SCSI command as finished, OR'ing the host portion 
+- *	of the result word into the result field of the corresponding
+- *	Scsi_Cmnd structure, and removing it from the internal queues.
+- *
+- * Inputs : cmd - command, result - entire result field
+- *
+- * Preconditions : the 	NCR chip should be in a halted state when 
+- *	abnormal_finished is run, since it modifies structures which
+- *	the NCR expects to have exclusive access to.
+- */
+-
+-static void 
+-abnormal_finished (struct NCR53c7x0_cmd *cmd, int result) {
+-    Scsi_Cmnd *c = cmd->cmd;
+-    struct Scsi_Host *host = c->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-    	host->hostdata[0];
+-    unsigned long flags;
+-    int left, found;
+-    volatile struct NCR53c7x0_cmd * linux_search;
+-    volatile struct NCR53c7x0_cmd * volatile *linux_prev;
+-    volatile u32 *ncr_prev, *ncrcurrent, ncr_search;
+-
+-#if 0
+-    printk ("scsi%d: abnormal finished\n", host->host_no);
+-#endif
+-
+-    local_irq_save(flags);
+-    found = 0;
+-    /* 
+-     * Traverse the NCR issue array until we find a match or run out 
+-     * of instructions.  Instructions in the NCR issue array are 
+-     * either JUMP or NOP instructions, which are 2 words in length.
+-     */
+-
+-
+-    for (found = 0, left = host->can_queue, ncrcurrent = hostdata->schedule; 
+-	left > 0; --left, ncrcurrent += 2)
+-    {
+-	if (issue_to_cmd (host, hostdata, (u32 *) ncrcurrent) == cmd) 
+-	{
+-	    ncrcurrent[0] = hostdata->NOP_insn;
+-	    ncrcurrent[1] = 0xdeadbeef;
+-	    ++found;
+-	    break;
+-	}
+-    }
+-	
+-    /* 
+-     * Traverse the NCR reconnect list of DSA structures until we find 
+-     * a pointer to this dsa or have found too many command structures.  
+-     * We let prev point at the next field of the previous element or 
+-     * head of the list, so we don't do anything different for removing 
+-     * the head element.  
+-     */
+-
+-    for (left = host->can_queue,
+-	    ncr_search = hostdata->reconnect_dsa_head, 
+-	    ncr_prev = &hostdata->reconnect_dsa_head;
+-	left >= 0 && ncr_search && 
+-	    ((char*)bus_to_virt(ncr_search) + hostdata->dsa_start) 
+-		!= (char *) cmd->dsa;
+-	ncr_prev = (u32*) ((char*)bus_to_virt(ncr_search) + 
+-	    hostdata->dsa_next), ncr_search = *ncr_prev, --left);
+-
+-    if (left < 0) 
+-	printk("scsi%d: loop detected in ncr reconncect list\n",
+-	    host->host_no);
+-    else if (ncr_search) {
+-	if (found)
+-	    printk("scsi%d: scsi %ld in ncr issue array and reconnect lists\n",
+-		host->host_no, c->pid);
+-	else {
+-	    volatile u32 * next = (u32 *) 
+-	    	((char *)bus_to_virt(ncr_search) + hostdata->dsa_next);
+-	    *ncr_prev = *next;
+-/* If we're at the tail end of the issue queue, update that pointer too. */
+-	    found = 1;
+-	}
+-    }
+-
+-    /*
+-     * Traverse the host running list until we find this command or discover
+-     * we have too many elements, pointing linux_prev at the next field of the 
+-     * linux_previous element or head of the list, search at this element.
+-     */
+-
+-    for (left = host->can_queue, linux_search = hostdata->running_list, 
+-	    linux_prev = &hostdata->running_list;
+-	left >= 0 && linux_search && linux_search != cmd;
+-	linux_prev = &(linux_search->next), 
+-	    linux_search = linux_search->next, --left);
+-    
+-    if (left < 0) 
+-	printk ("scsi%d: loop detected in host running list for scsi pid %ld\n",
+-	    host->host_no, c->pid);
+-    else if (linux_search) {
+-	*linux_prev = linux_search->next;
+-	--hostdata->busy[c->device->id][c->device->lun];
+-    }
+-
+-    /* Return the NCR command structure to the free list */
+-    cmd->next = hostdata->free;
+-    hostdata->free = cmd;
+-    c->host_scribble = NULL;
+-
+-    /* And return */
+-    c->result = result;
+-    c->scsi_done(c);
+-
+-    local_irq_restore(flags);
+-    run_process_issue_queue();
+-}
+-
+-/* 
+- * Function : static void intr_break (struct Scsi_Host *host,
+- * 	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose :  Handler for breakpoint interrupts from a SCSI script
+- *
+- * Inputs : host - pointer to this host adapter's structure,
+- * 	cmd - pointer to the command (if any) dsa was pointing 
+- * 	to.
+- *
+- */
+-
+-static void 
+-intr_break (struct Scsi_Host *host, struct 
+-    NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_break *bp;
+-#if 0
+-    Scsi_Cmnd *c = cmd ? cmd->cmd : NULL;
+-#endif
+-    u32 *dsp;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];		
+-    unsigned long flags;
+-    NCR53c7x0_local_setup(host);
+-
+-    /*
+-     * Find the break point corresponding to this address, and 
+-     * dump the appropriate debugging information to standard 
+-     * output.  
+-     */
+-    local_irq_save(flags);
+-    dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-    for (bp = hostdata->breakpoints; bp && bp->address != dsp; 
+-    	bp = bp->next);
+-    if (!bp) 
+-    	panic("scsi%d : break point interrupt from %p with no breakpoint!",
+-    	    host->host_no, dsp);
+-
+-    /*
+-     * Configure the NCR chip for manual start mode, so that we can 
+-     * point the DSP register at the instruction that follows the 
+-     * INT int_debug_break instruction.
+-     */
+-
+-    NCR53c7x0_write8 (hostdata->dmode, 
+-	NCR53c7x0_read8(hostdata->dmode)|DMODE_MAN);
+-
+-    /*
+-     * And update the DSP register, using the size of the old 
+-     * instruction in bytes.
+-     */
+-
+-    local_irq_restore(flags);
+-}
+-/*
+- * Function : static void print_synchronous (const char *prefix, 
+- *	const unsigned char *msg)
+- * 
+- * Purpose : print a pretty, user and machine parsable representation
+- *	of a SDTR message, including the "real" parameters, data
+- *	clock so we can tell transfer rate at a glance.
+- *
+- * Inputs ; prefix - text to prepend, msg - SDTR message (5 bytes)
+- */
+-
+-static void
+-print_synchronous (const char *prefix, const unsigned char *msg) {
+-    if (msg[4]) {
+-	int Hz = 1000000000 / (msg[3] * 4);
+-	int integer = Hz / 1000000;
+-	int fraction = (Hz - (integer * 1000000)) / 10000;
+-	printk ("%speriod %dns offset %d %d.%02dMHz %s SCSI%s\n",
+-	    prefix, (int) msg[3] * 4, (int) msg[4], integer, fraction,
+-	    (((msg[3] * 4) < 200) ? "FAST" : "synchronous"),
+-	    (((msg[3] * 4) < 200) ? "-II" : ""));
+-    } else 
+-	printk ("%sasynchronous SCSI\n", prefix);
+-}
+-
+-/*
+- * Function : static void set_synchronous (struct Scsi_Host *host, 
+- *	 	int target, int sxfer, int scntl3, int now_connected)
+- *
+- * Purpose : reprogram transfers between the selected SCSI initiator and 
+- *	target with the given register values; in the indirect
+- *	select operand, reselection script, and chip registers.
+- *
+- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id,
+- *	sxfer and scntl3 - NCR registers. now_connected - if non-zero, 
+- *	we should reprogram the registers now too.
+- *
+- *      NOTE:  For 53c710, scntl3 is actually used for SCF bits from
+- *	SBCL, as we don't have a SCNTL3.
+- */
+-
+-static void
+-set_synchronous (struct Scsi_Host *host, int target, int sxfer, int scntl3,
+-    int now_connected) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *) 
+-	host->hostdata[0];
+-    u32 *script;
+-    NCR53c7x0_local_setup(host);
+-
+-    /* These are eight bit registers */
+-    sxfer &= 0xff;
+-    scntl3 &= 0xff;
+-
+-    hostdata->sync[target].sxfer_sanity = sxfer;
+-    hostdata->sync[target].scntl3_sanity = scntl3;
+-
+-/* 
+- * HARD CODED : synchronous script is EIGHT words long.  This 
+- * must agree with 53c7.8xx.h
+- */
+-
+-    if ((hostdata->chip != 700) && (hostdata->chip != 70066)) {
+-	hostdata->sync[target].select_indirect = (1 << target) << 16 |
+-		(sxfer << 8);
+-	hostdata->sync[target].sscf_710 = scntl3;
+-
+-	script = (u32 *) hostdata->sync[target].script;
+-
+-	/* XXX - add NCR53c7x0 code to reprogram SCF bits if we want to */
+-	script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY |
+-		DCMD_RWRI_OP_MOVE) << 24) |
+-		(SBCL_REG << 16) | (scntl3 << 8);
+-	script[1] = 0;
+-	script += 2;
+-
+-	script[0] = ((DCMD_TYPE_RWRI | DCMD_RWRI_OPC_MODIFY |
+-	    DCMD_RWRI_OP_MOVE) << 24) |
+-		(SXFER_REG << 16) | (sxfer << 8);
+-	script[1] = 0;
+-	script += 2;
+-
+-#ifdef DEBUG_SYNC_INTR
+-	if (hostdata->options & OPTION_DEBUG_DISCONNECT) {
+-	    script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_INT) << 24) | DBC_TCI_TRUE;
+-	    script[1] = DEBUG_SYNC_INTR;
+-	    script += 2;
+-	}
+-#endif
+-
+-	script[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_RETURN) << 24) | DBC_TCI_TRUE;
+-	script[1] = 0;
+-	script += 2;
+-    }
+-
+-    if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) 
+-	printk ("scsi%d : target %d sync parameters are sxfer=0x%x, scntl3=0x%x\n",
+-	host->host_no, target, sxfer, scntl3);
+-
+-    if (now_connected) {
+-	NCR53c7x0_write8(SBCL_REG, scntl3);
+-	NCR53c7x0_write8(SXFER_REG, sxfer);
+-    }
+-}
+-
+-
+-/*
+- * Function : static int asynchronous (struct Scsi_Host *host, int target)
+- *
+- * Purpose : reprogram between the selected SCSI Host adapter and target 
+- *      (assumed to be currently connected) for asynchronous transfers.
+- *
+- * Inputs : host - SCSI host structure, target - numeric target ID.
+- *
+- * Preconditions : the NCR chip should be in one of the halted states
+- */
+-    
+-static void
+-asynchronous (struct Scsi_Host *host, int target) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    NCR53c7x0_local_setup(host);
+-    set_synchronous (host, target, /* no offset */ 0, hostdata->saved_scntl3,
+-	1);
+-    printk ("scsi%d : setting target %d to asynchronous SCSI\n",
+-	host->host_no, target);
+-}
+-
+-/* 
+- * XXX - do we want to go out of our way (ie, add extra code to selection
+- * 	in the NCR53c710/NCR53c720 script) to reprogram the synchronous
+- * 	conversion bits, or can we be content in just setting the 
+- * 	sxfer bits?  I chose to do so [richard@sleepie.demon.co.uk]
+- */
+-
+-/* Table for NCR53c8xx synchronous values */
+-
+-/* This table is also correct for 710, allowing that scf=4 is equivalent
+- * of SSCF=0 (ie use DCNTL, divide by 3) for a 50.01-66.00MHz clock.
+- * For any other clock values, we cannot use entries with SCF values of
+- * 4.  I guess that for a 66MHz clock, the slowest it will set is 2MHz,
+- * and for a 50MHz clock, the slowest will be 2.27Mhz.  Should check
+- * that a device doesn't try and negotiate sync below these limits!
+- */
+- 
+-static const struct {
+-    int div;		/* Total clock divisor * 10 */
+-    unsigned char scf;	/* */
+-    unsigned char tp;	/* 4 + tp = xferp divisor */
+-} syncs[] = {
+-/*	div	scf	tp	div	scf	tp	div	scf	tp */
+-    {	40,	1,	0}, {	50,	1,	1}, {	60,	1,	2}, 
+-    {	70,	1,	3}, {	75,	2,	1}, {	80,	1,	4},
+-    {	90,	1,	5}, {	100,	1,	6}, {	105,	2,	3},
+-    {	110,	1,	7}, {	120,	2,	4}, {	135,	2,	5},
+-    {	140,	3,	3}, {	150,	2,	6}, {	160,	3,	4},
+-    {	165,	2,	7}, {	180,	3,	5}, {	200,	3,	6},
+-    {	210,	4,	3}, {	220,	3,	7}, {	240,	4,	4},
+-    {	270,	4,	5}, {	300,	4,	6}, {	330,	4,	7}
+-};
+-
+-/*
+- * Function : static void synchronous (struct Scsi_Host *host, int target, 
+- *	char *msg)
+- *
+- * Purpose : reprogram transfers between the selected SCSI initiator and 
+- *	target for synchronous SCSI transfers such that the synchronous 
+- *	offset is less than that requested and period at least as long 
+- *	as that requested.  Also modify *msg such that it contains 
+- *	an appropriate response. 
+- *
+- * Inputs : host - NCR53c7,8xx SCSI host, target - number SCSI target id,
+- *	msg - synchronous transfer request.
+- */
+-
+-
+-static void
+-synchronous (struct Scsi_Host *host, int target, char *msg) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    int desire, divisor, i, limit;
+-    unsigned char scntl3, sxfer;
+-/* The diagnostic message fits on one line, even with max. width integers */
+-    char buf[80];
+-
+-/* Desired transfer clock in Hz */
+-    desire = 1000000000L / (msg[3] * 4);
+-/* Scale the available SCSI clock by 10 so we get tenths */
+-    divisor = (hostdata->scsi_clock * 10) / desire;
+-
+-/* NCR chips can handle at most an offset of 8 */
+-    if (msg[4] > 8)
+-	msg[4] = 8;
+-
+-    if (hostdata->options & OPTION_DEBUG_SDTR)
+-    	printk("scsi%d : optimal synchronous divisor of %d.%01d\n",
+-	    host->host_no, divisor / 10, divisor % 10);
+-
+-    limit = ARRAY_SIZE(syncs) - 1;
+-    for (i = 0; (i < limit) && (divisor > syncs[i].div); ++i);
+-
+-    if (hostdata->options & OPTION_DEBUG_SDTR)
+-    	printk("scsi%d : selected synchronous divisor of %d.%01d\n",
+-	    host->host_no, syncs[i].div / 10, syncs[i].div % 10);
+-
+-    msg[3] = ((1000000000L / hostdata->scsi_clock) * syncs[i].div / 10 / 4);
+-
+-    if (hostdata->options & OPTION_DEBUG_SDTR)
+-    	printk("scsi%d : selected synchronous period of %dns\n", host->host_no,
+-	    msg[3] * 4);
+-
+-    scntl3 = syncs[i].scf;
+-    sxfer = (msg[4] << SXFER_MO_SHIFT) | (syncs[i].tp << 4);
+-    if (hostdata->options & OPTION_DEBUG_SDTR)
+-    	printk ("scsi%d : sxfer=0x%x scntl3=0x%x\n", 
+-	    host->host_no, (int) sxfer, (int) scntl3);
+-    set_synchronous (host, target, sxfer, scntl3, 1);
+-    sprintf (buf, "scsi%d : setting target %d to ", host->host_no, target);
+-    print_synchronous (buf, msg);
+-}
+-
+-/* 
+- * Function : static int NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host,
+- * 	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose :  Handler for INT generated instructions for the 
+- * 	NCR53c810/820 SCSI SCRIPT
+- *
+- * Inputs : host - pointer to this host adapter's structure,
+- * 	cmd - pointer to the command (if any) dsa was pointing 
+- * 	to.
+- *
+- */
+-
+-static int 
+-NCR53c7x0_dstat_sir_intr (struct Scsi_Host *host, struct 
+-    NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    int print;
+-    Scsi_Cmnd *c = cmd ? cmd->cmd : NULL;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];		
+-    u32 dsps,*dsp;	/* Argument of the INT instruction */
+-
+-    NCR53c7x0_local_setup(host);
+-    dsps = NCR53c7x0_read32(DSPS_REG);
+-    dsp = (u32 *) bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-
+-    /* RGH 150597:  Frig.  Commands which fail with Check Condition are
+-     * Flagged as successful - hack dsps to indicate check condition */
+-#if 0
+-    /* RGH 200597:  Need to disable for BVME6000, as it gets Check Conditions
+-     * and then dies.  Seems to handle Check Condition at startup, but
+-     * not mid kernel build. */
+-    if (dsps == A_int_norm_emulateintfly && cmd && cmd->result == 2)
+-        dsps = A_int_err_check_condition;
+-#endif
+-
+-    if (hostdata->options & OPTION_DEBUG_INTR) 
+-	printk ("scsi%d : DSPS = 0x%x\n", host->host_no, dsps);
+-
+-    switch (dsps) {
+-    case A_int_msg_1:
+-	print = 1;
+-	switch (hostdata->msg_buf[0]) {
+-	/* 
+-	 * Unless we've initiated synchronous negotiation, I don't
+-	 * think that this should happen.
+-	 */
+-	case MESSAGE_REJECT:
+-	    hostdata->dsp = hostdata->script + hostdata->E_accept_message /
+-		sizeof(u32);
+-	    hostdata->dsp_changed = 1;
+-	    if (cmd && (cmd->flags & CMD_FLAG_SDTR)) {
+-		printk ("scsi%d : target %d rejected SDTR\n", host->host_no, 
+-		    c->device->id);
+-		cmd->flags &= ~CMD_FLAG_SDTR;
+-		asynchronous (host, c->device->id);
+-		print = 0;
+-	    } 
+-	    break;
+-	case INITIATE_RECOVERY:
+-	    printk ("scsi%d : extended contingent allegiance not supported yet, rejecting\n",
+-		host->host_no);
+-	    /* Fall through to default */
+-	    hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+-		sizeof(u32);
+-	    hostdata->dsp_changed = 1;
+-	    break;
+-	default:
+-	    printk ("scsi%d : unsupported message, rejecting\n",
+-		host->host_no);
+-	    hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+-		sizeof(u32);
+-	    hostdata->dsp_changed = 1;
+-	}
+-	if (print) {
+-	    printk ("scsi%d : received message", host->host_no);
+-	    if (c) 
+-	    	printk (" from target %d lun %d ", c->device->id, c->device->lun);
+-	    spi_print_msg((unsigned char *) hostdata->msg_buf);
+-	    printk("\n");
+-	}
+-	
+-	return SPECIFIC_INT_NOTHING;
+-
+-
+-    case A_int_msg_sdtr:
+-/*
+- * At this point, hostdata->msg_buf contains
+- * 0 EXTENDED MESSAGE
+- * 1 length 
+- * 2 SDTR
+- * 3 period * 4ns
+- * 4 offset
+- */
+-
+-	if (cmd) {
+-	    char buf[80];
+-	    sprintf (buf, "scsi%d : target %d %s ", host->host_no, c->device->id,
+-		(cmd->flags & CMD_FLAG_SDTR) ? "accepting" : "requesting");
+-	    print_synchronous (buf, (unsigned char *) hostdata->msg_buf);
+-
+-	/* 
+-	 * Initiator initiated, won't happen unless synchronous 
+-	 * 	transfers are enabled.  If we get a SDTR message in
+-	 * 	response to our SDTR, we should program our parameters
+-	 * 	such that 
+-	 *		offset <= requested offset
+-	 *		period >= requested period		 	
+-   	 */
+-	    if (cmd->flags & CMD_FLAG_SDTR) {
+-		cmd->flags &= ~CMD_FLAG_SDTR; 
+-		if (hostdata->msg_buf[4]) 
+-		    synchronous (host, c->device->id, (unsigned char *) 
+-		    	hostdata->msg_buf);
+-		else 
+-		    asynchronous (host, c->device->id);
+-		hostdata->dsp = hostdata->script + hostdata->E_accept_message /
+-		    sizeof(u32);
+-		hostdata->dsp_changed = 1;
+-		return SPECIFIC_INT_NOTHING;
+-	    } else {
+-		if (hostdata->options & OPTION_SYNCHRONOUS)  {
+-		    cmd->flags |= CMD_FLAG_DID_SDTR;
+-		    synchronous (host, c->device->id, (unsigned char *) 
+-			hostdata->msg_buf);
+-		} else {
+-		    hostdata->msg_buf[4] = 0;		/* 0 offset = async */
+-		    asynchronous (host, c->device->id);
+-		}
+-		patch_dsa_32 (cmd->dsa, dsa_msgout_other, 0, 5);
+-		patch_dsa_32 (cmd->dsa, dsa_msgout_other, 1, (u32) 
+-		    virt_to_bus ((void *)&hostdata->msg_buf));
+-		hostdata->dsp = hostdata->script + 
+-		    hostdata->E_respond_message / sizeof(u32);
+-		hostdata->dsp_changed = 1;
+-	    }
+-	    return SPECIFIC_INT_NOTHING;
+-	}
+-	/* Fall through to abort if we couldn't find a cmd, and 
+-	   therefore a dsa structure to twiddle */
+-    case A_int_msg_wdtr:
+-	hostdata->dsp = hostdata->script + hostdata->E_reject_message /
+-	    sizeof(u32);
+-	hostdata->dsp_changed = 1;
+-	return SPECIFIC_INT_NOTHING;
+-    case A_int_err_unexpected_phase:
+-	if (hostdata->options & OPTION_DEBUG_INTR) 
+-	    printk ("scsi%d : unexpected phase\n", host->host_no);
+-	return SPECIFIC_INT_ABORT;
+-    case A_int_err_selected:
+-	if ((hostdata->chip / 100) == 8)
+-	    printk ("scsi%d : selected by target %d\n", host->host_no,
+-	        (int) NCR53c7x0_read8(SDID_REG_800) &7);
+-	else
+-            printk ("scsi%d : selected by target LCRC=0x%02x\n", host->host_no,
+-                (int) NCR53c7x0_read8(LCRC_REG_10));
+-	hostdata->dsp = hostdata->script + hostdata->E_target_abort / 
+-    	    sizeof(u32);
+-	hostdata->dsp_changed = 1;
+-	return SPECIFIC_INT_NOTHING;
+-    case A_int_err_unexpected_reselect:
+-	if ((hostdata->chip / 100) == 8)
+-	    printk ("scsi%d : unexpected reselect by target %d lun %d\n", 
+-	        host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & 7,
+-	        hostdata->reselected_identify & 7);
+-	else
+-            printk ("scsi%d : unexpected reselect LCRC=0x%02x\n", host->host_no,
+-                (int) NCR53c7x0_read8(LCRC_REG_10));
+-	hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+-    	    sizeof(u32);
+-	hostdata->dsp_changed = 1;
+-	return SPECIFIC_INT_NOTHING;
+-/*
+- * Since contingent allegiance conditions are cleared by the next 
+- * command issued to a target, we must issue a REQUEST SENSE 
+- * command after receiving a CHECK CONDITION status, before
+- * another command is issued.
+- * 
+- * Since this NCR53c7x0_cmd will be freed after use, we don't 
+- * care if we step on the various fields, so modify a few things.
+- */
+-    case A_int_err_check_condition: 
+-#if 0
+-	if (hostdata->options & OPTION_DEBUG_INTR) 
+-#endif
+-	    printk ("scsi%d : CHECK CONDITION\n", host->host_no);
+-	if (!c) {
+-	    printk("scsi%d : CHECK CONDITION with no SCSI command\n",
+-		host->host_no);
+-	    return SPECIFIC_INT_PANIC;
+-	}
+-
+-	/* 
+-	 * FIXME : this uses the normal one-byte selection message.
+-	 * 	We may want to renegotiate for synchronous & WIDE transfers
+-	 * 	since these could be the crux of our problem.
+-	 *
+-	 hostdata->NOP_insn* FIXME : once SCSI-II tagged queuing is implemented, we'll
+-	 * 	have to set this up so that the rest of the DSA
+-	 *	agrees with this being an untagged queue'd command.
+-	 */
+-
+-    	patch_dsa_32 (cmd->dsa, dsa_msgout, 0, 1);
+-
+-    	/* 
+-    	 * Modify the table indirect for COMMAND OUT phase, since 
+-    	 * Request Sense is a six byte command.
+-    	 */
+-
+-    	patch_dsa_32 (cmd->dsa, dsa_cmdout, 0, 6);
+-
+-        /*
+-         * The CDB is now mirrored in our local non-cached
+-         * structure, but keep the old structure up to date as well,
+-         * just in case anyone looks at it.
+-         */
+-
+-	/*
+-	 * XXX Need to worry about data buffer alignment/cache state
+-	 * XXX here, but currently never get A_int_err_check_condition,
+-	 * XXX so ignore problem for now.
+-         */
+-	cmd->cmnd[0] = c->cmnd[0] = REQUEST_SENSE;
+-	cmd->cmnd[0] = c->cmnd[1] &= 0xe0;	/* Zero all but LUN */
+-	cmd->cmnd[0] = c->cmnd[2] = 0;
+-	cmd->cmnd[0] = c->cmnd[3] = 0;
+-	cmd->cmnd[0] = c->cmnd[4] = sizeof(c->sense_buffer);
+-	cmd->cmnd[0] = c->cmnd[5] = 0; 
+-
+-	/*
+-	 * Disable dataout phase, and program datain to transfer to the 
+-	 * sense buffer, and add a jump to other_transfer after the 
+-    	 * command so overflow/underrun conditions are detected.
+-	 */
+-
+-    	patch_dsa_32 (cmd->dsa, dsa_dataout, 0, 
+-	    virt_to_bus(hostdata->script) + hostdata->E_other_transfer);
+-    	patch_dsa_32 (cmd->dsa, dsa_datain, 0, 
+-	    virt_to_bus(cmd->data_transfer_start));
+-    	cmd->data_transfer_start[0] = (((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | 
+-    	    DCMD_BMI_IO)) << 24) | sizeof(c->sense_buffer);
+-    	cmd->data_transfer_start[1] = (u32) virt_to_bus(c->sense_buffer);
+-
+-	cmd->data_transfer_start[2] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) 
+-    	    << 24) | DBC_TCI_TRUE;
+-	cmd->data_transfer_start[3] = (u32) virt_to_bus(hostdata->script) + 
+-	    hostdata->E_other_transfer;
+-
+-    	/*
+-    	 * Currently, this command is flagged as completed, ie 
+-    	 * it has valid status and message data.  Reflag it as
+-    	 * incomplete.  Q - need to do something so that original
+-	 * status, etc are used.
+-    	 */
+-
+-	cmd->result = cmd->cmd->result = 0xffff;		
+-
+-	/* 
+-	 * Restart command as a REQUEST SENSE.
+-	 */
+-	hostdata->dsp = (u32 *) hostdata->script + hostdata->E_select /
+-	    sizeof(u32);
+-	hostdata->dsp_changed = 1;
+-	return SPECIFIC_INT_NOTHING;
+-    case A_int_debug_break:
+-	return SPECIFIC_INT_BREAK;
+-    case A_int_norm_aborted:
+-	hostdata->dsp = (u32 *) hostdata->schedule;
+-	hostdata->dsp_changed = 1;
+-	if (cmd)
+-	    abnormal_finished (cmd, DID_ERROR << 16);
+-	return SPECIFIC_INT_NOTHING;
+-    case A_int_norm_emulateintfly:
+-	NCR53c7x0_intfly(host);
+-	return SPECIFIC_INT_NOTHING;
+-    case A_int_test_1:
+-    case A_int_test_2:
+-	hostdata->idle = 1;
+-	hostdata->test_completed = (dsps - A_int_test_1) / 0x00010000 + 1;
+-	if (hostdata->options & OPTION_DEBUG_INTR)
+-	    printk("scsi%d : test%d complete\n", host->host_no,
+-		hostdata->test_completed);
+-	return SPECIFIC_INT_NOTHING;
+-#ifdef A_int_debug_reselected_ok
+-    case A_int_debug_reselected_ok:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-    	    	OPTION_DEBUG_DISCONNECT)) {
+-	    /* 
+-	     * Note - this dsa is not based on location relative to 
+-	     * the command structure, but to location relative to the 
+-	     * DSA register 
+-	     */	
+-	    u32 *dsa;
+-	    dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-
+-	    printk("scsi%d : reselected_ok (DSA = 0x%x (virt 0x%p)\n", 
+-		host->host_no, NCR53c7x0_read32(DSA_REG), dsa);
+-	    printk("scsi%d : resume address is 0x%x (virt 0x%p)\n",
+-		    host->host_no, cmd->saved_data_pointer,
+-		    bus_to_virt(cmd->saved_data_pointer));
+-	    print_insn (host, hostdata->script + Ent_reselected_ok / 
+-    	    	    sizeof(u32), "", 1);
+-	    if ((hostdata->chip / 100) == 8)
+-    	        printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n",
+-		    host->host_no, NCR53c7x0_read8(SXFER_REG),
+-		    NCR53c7x0_read8(SCNTL3_REG_800));
+-	    else
+-    	        printk ("scsi%d : sxfer=0x%x, cannot read SBCL\n",
+-		    host->host_no, NCR53c7x0_read8(SXFER_REG));
+-	    if (c) {
+-		print_insn (host, (u32 *) 
+-		    hostdata->sync[c->device->id].script, "", 1);
+-		print_insn (host, (u32 *) 
+-		    hostdata->sync[c->device->id].script + 2, "", 1);
+-	    }
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_reselect_check
+-    case A_int_debug_reselect_check:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    u32 *dsa;
+-#if 0
+-	    u32 *code;
+-#endif
+-	    /* 
+-	     * Note - this dsa is not based on location relative to 
+-	     * the command structure, but to location relative to the 
+-	     * DSA register 
+-	     */	
+-	    dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-	    printk("scsi%d : reselected_check_next (DSA = 0x%lx (virt 0x%p))\n",
+-		host->host_no, virt_to_bus(dsa), dsa);
+-	    if (dsa) {
+-		printk("scsi%d : resume address is 0x%x (virt 0x%p)\n",
+-		    host->host_no, cmd->saved_data_pointer,
+-		    bus_to_virt (cmd->saved_data_pointer));
+-#if 0
+-		printk("scsi%d : template code :\n", host->host_no);
+-		for (code = dsa + (Ent_dsa_code_check_reselect - Ent_dsa_zero) 
+-		    / sizeof(u32); code < (dsa + Ent_dsa_zero / sizeof(u32)); 
+-		    code += print_insn (host, code, "", 1));
+-#endif
+-	    }
+-	    print_insn (host, hostdata->script + Ent_reselected_ok / 
+-    	    	    sizeof(u32), "", 1);
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_dsa_schedule
+-    case A_int_debug_dsa_schedule:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    u32 *dsa;
+-	    /* 
+-	     * Note - this dsa is not based on location relative to 
+-	     * the command structure, but to location relative to the 
+-	     * DSA register 
+-	     */	
+-	    dsa = (u32 *) bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-	    printk("scsi%d : dsa_schedule (old DSA = 0x%lx (virt 0x%p))\n", 
+-		host->host_no, virt_to_bus(dsa), dsa);
+-	    if (dsa) 
+-		printk("scsi%d : resume address is 0x%x (virt 0x%p)\n"
+-		       "         (temp was 0x%x (virt 0x%p))\n",
+-		    host->host_no, cmd->saved_data_pointer,
+-		    bus_to_virt (cmd->saved_data_pointer),
+-		    NCR53c7x0_read32 (TEMP_REG),
+-		    bus_to_virt (NCR53c7x0_read32(TEMP_REG)));
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_scheduled
+-    case A_int_debug_scheduled:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    printk("scsi%d : new I/O 0x%x (virt 0x%p) scheduled\n", 
+-		host->host_no, NCR53c7x0_read32(DSA_REG),
+-	    	bus_to_virt(NCR53c7x0_read32(DSA_REG)));
+-	}
+-	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_idle
+-    case A_int_debug_idle:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    printk("scsi%d : idle\n", host->host_no);
+-	}
+-	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_cmd
+-    case A_int_debug_cmd:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    printk("scsi%d : command sent\n");
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_dsa_loaded
+-    case A_int_debug_dsa_loaded:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    printk("scsi%d : DSA loaded with 0x%x (virt 0x%p)\n", host->host_no,
+-		NCR53c7x0_read32(DSA_REG), 
+-		bus_to_virt(NCR53c7x0_read32(DSA_REG)));
+-	}
+-	return SPECIFIC_INT_RESTART; 
+-#endif
+-#ifdef A_int_debug_reselected
+-    case A_int_debug_reselected:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-	    OPTION_DEBUG_DISCONNECT)) {
+-	    if ((hostdata->chip / 100) == 8)
+-		printk("scsi%d : reselected by target %d lun %d\n",
+-		    host->host_no, (int) NCR53c7x0_read8(SDID_REG_800) & ~0x80, 
+-		    (int) hostdata->reselected_identify & 7);
+-	    else
+-		printk("scsi%d : reselected by LCRC=0x%02x lun %d\n",
+-                    host->host_no, (int) NCR53c7x0_read8(LCRC_REG_10),
+-                    (int) hostdata->reselected_identify & 7);
+-	    print_queues(host);
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_disconnect_msg
+-    case A_int_debug_disconnect_msg:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR)) {
+-	    if (c)
+-		printk("scsi%d : target %d lun %d disconnecting\n", 
+-		    host->host_no, c->device->id, c->device->lun);
+-	    else
+-		printk("scsi%d : unknown target disconnecting\n",
+-		    host->host_no);
+-	}
+-	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_disconnected
+-    case A_int_debug_disconnected:
+-	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-		OPTION_DEBUG_DISCONNECT)) {
+-	    printk ("scsi%d : disconnected, new queues are\n", 
+-		host->host_no);
+-	    print_queues(host);
+-#if 0
+-	    /* Not valid on ncr53c710! */
+-    	    printk ("scsi%d : sxfer=0x%x, scntl3=0x%x\n",
+-		host->host_no, NCR53c7x0_read8(SXFER_REG),
+-		NCR53c7x0_read8(SCNTL3_REG_800));
+-#endif
+-	    if (c) {
+-		print_insn (host, (u32 *) 
+-		    hostdata->sync[c->device->id].script, "", 1);
+-		print_insn (host, (u32 *) 
+-		    hostdata->sync[c->device->id].script + 2, "", 1);
+-	    }
+-	}
+-	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_panic
+-    case A_int_debug_panic:
+-	printk("scsi%d : int_debug_panic received\n", host->host_no);
+-	print_lots (host);
+-	return SPECIFIC_INT_PANIC;
+-#endif
+-#ifdef A_int_debug_saved
+-    case A_int_debug_saved:
+-    	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-    	    OPTION_DEBUG_DISCONNECT)) {
+-    	    printk ("scsi%d : saved data pointer 0x%x (virt 0x%p)\n",
+-    	    	host->host_no, cmd->saved_data_pointer,
+-		bus_to_virt (cmd->saved_data_pointer));
+-    	    print_progress (c);
+-    	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_restored
+-    case A_int_debug_restored:
+-    	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-    	    OPTION_DEBUG_DISCONNECT)) {
+-    	    if (cmd) {
+-		int size;
+-    	    	printk ("scsi%d : restored data pointer 0x%x (virt 0x%p)\n",
+-    	    	    host->host_no, cmd->saved_data_pointer, bus_to_virt (
+-		    cmd->saved_data_pointer));
+-		size = print_insn (host, (u32 *) 
+-		    bus_to_virt(cmd->saved_data_pointer), "", 1);
+-		size = print_insn (host, (u32 *) 
+-		    bus_to_virt(cmd->saved_data_pointer) + size, "", 1);
+-    	    	print_progress (c);
+-	    }
+-#if 0
+-	    printk ("scsi%d : datapath residual %d\n",
+-		host->host_no, datapath_residual (host)) ;
+-#endif
+-    	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_sync
+-    case A_int_debug_sync:
+-    	if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-    	    OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) {
+-	    unsigned char sxfer = NCR53c7x0_read8 (SXFER_REG), scntl3;
+-	    if ((hostdata->chip / 100) == 8) {
+-		scntl3 = NCR53c7x0_read8 (SCNTL3_REG_800);
+-		if (c) {
+-		  if (sxfer != hostdata->sync[c->device->id].sxfer_sanity ||
+-		    scntl3 != hostdata->sync[c->device->id].scntl3_sanity) {
+-		   	printk ("scsi%d :  sync sanity check failed sxfer=0x%x, scntl3=0x%x",
+-			    host->host_no, sxfer, scntl3);
+-			NCR53c7x0_write8 (SXFER_REG, sxfer);
+-			NCR53c7x0_write8 (SCNTL3_REG_800, scntl3);
+-		    }
+-		} else 
+-    	    	  printk ("scsi%d : unknown command sxfer=0x%x, scntl3=0x%x\n",
+-		    host->host_no, (int) sxfer, (int) scntl3);
+-	    } else {
+-		if (c) {
+-		  if (sxfer != hostdata->sync[c->device->id].sxfer_sanity) {
+-		   	printk ("scsi%d :  sync sanity check failed sxfer=0x%x",
+-			    host->host_no, sxfer);
+-			NCR53c7x0_write8 (SXFER_REG, sxfer);
+-			NCR53c7x0_write8 (SBCL_REG,
+-				hostdata->sync[c->device->id].sscf_710);
+-		    }
+-		} else 
+-    	    	  printk ("scsi%d : unknown command sxfer=0x%x\n",
+-		    host->host_no, (int) sxfer);
+-	    }
+-	}
+-    	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_datain
+-	case A_int_debug_datain:
+-	    if (hostdata->options & (OPTION_DEBUG_SCRIPT|OPTION_DEBUG_INTR|
+-		OPTION_DEBUG_DISCONNECT|OPTION_DEBUG_SDTR)) {
+-		int size;
+-		if ((hostdata->chip / 100) == 8)
+-		  printk ("scsi%d : In do_datain (%s) sxfer=0x%x, scntl3=0x%x\n"
+-			"         datapath residual=%d\n",
+-		    host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)),
+-		    (int) NCR53c7x0_read8(SXFER_REG), 
+-		    (int) NCR53c7x0_read8(SCNTL3_REG_800),
+-		    datapath_residual (host)) ;
+-		else
+-		  printk ("scsi%d : In do_datain (%s) sxfer=0x%x\n"
+-			"         datapath residual=%d\n",
+-		    host->host_no, sbcl_to_phase (NCR53c7x0_read8 (SBCL_REG)),
+-		    (int) NCR53c7x0_read8(SXFER_REG), 
+-		    datapath_residual (host)) ;
+-		print_insn (host, dsp, "", 1);
+-		size = print_insn (host, (u32 *) bus_to_virt(dsp[1]), "", 1);
+-		print_insn (host, (u32 *) bus_to_virt(dsp[1]) + size, "", 1);
+-	   } 
+-	return SPECIFIC_INT_RESTART;
+-#endif
+-#ifdef A_int_debug_check_dsa
+-	case A_int_debug_check_dsa:
+-	    if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) {
+-		int sdid;
+-		int tmp;
+-		char *where;
+-		if (hostdata->chip / 100 == 8)
+-		    sdid = NCR53c7x0_read8 (SDID_REG_800) & 15;
+-		else {
+-		    tmp = NCR53c7x0_read8 (SDID_REG_700);
+-		    if (!tmp)
+-			panic ("SDID_REG_700 = 0");
+-		    tmp >>= 1;
+-		    sdid = 0;
+-		    while (tmp) {
+-			tmp >>= 1;
+-			sdid++;
+-		    }
+-		}
+-		where = dsp - NCR53c7x0_insn_size(NCR53c7x0_read8 
+-			(DCMD_REG)) == hostdata->script + 
+-		    	Ent_select_check_dsa / sizeof(u32) ?
+-		    "selection" : "reselection";
+-		if (c && sdid != c->device->id) {
+-		    printk ("scsi%d : SDID target %d != DSA target %d at %s\n",
+-			host->host_no, sdid, c->device->id, where);
+-		    print_lots(host);
+-		    dump_events (host, 20);
+-		    return SPECIFIC_INT_PANIC;
+-		}
+-	    }
+-	    return SPECIFIC_INT_RESTART;
+-#endif
+-    default:
+-	if ((dsps & 0xff000000) == 0x03000000) {
+-	     printk ("scsi%d : misc debug interrupt 0x%x\n",
+-		host->host_no, dsps);
+-	    return SPECIFIC_INT_RESTART;
+-	} else if ((dsps & 0xff000000) == 0x05000000) {
+-	    if (hostdata->events) {
+-		struct NCR53c7x0_event *event;
+-		++hostdata->event_index;
+-		if (hostdata->event_index >= hostdata->event_size)
+-		    hostdata->event_index = 0;
+-		event = (struct NCR53c7x0_event *) hostdata->events + 
+-		    hostdata->event_index;
+-		event->event = (enum ncr_event) dsps;
+-		event->dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+-		if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON) {
+-		    if (hostdata->chip / 100 == 8)
+-			event->target = NCR53c7x0_read8(SSID_REG_800);
+-		    else {
+-			unsigned char tmp, sdid;
+-		        tmp = NCR53c7x0_read8 (SDID_REG_700);
+-		        if (!tmp)
+-			    panic ("SDID_REG_700 = 0");
+-		        tmp >>= 1;
+-		        sdid = 0;
+-		        while (tmp) {
+-			    tmp >>= 1;
+-			    sdid++;
+-		        }
+-			event->target = sdid;
+-		    }
+-		}
+-		else 
+-			event->target = 255;
+-
+-		if (event->event == EVENT_RESELECT)
+-		    event->lun = hostdata->reselected_identify & 0xf;
+-		else if (c)
+-		    event->lun = c->device->lun;
+-		else
+-		    event->lun = 255;
+-		do_gettimeofday(&(event->time));
+-		if (c) {
+-		    event->pid = c->pid;
+-		    memcpy ((void *) event->cmnd, (void *) c->cmnd, 
+-			sizeof (event->cmnd));
+-		} else {
+-		    event->pid = -1;
+-		}
+-	    }
+-	    return SPECIFIC_INT_RESTART;
+-	}
+-
+-	printk ("scsi%d : unknown user interrupt 0x%x\n", 
+-	    host->host_no, (unsigned) dsps);
+-	return SPECIFIC_INT_PANIC;
+-    }
+-}
+-
+-/* 
+- * XXX - the stock NCR assembler won't output the scriptu.h file,
+- * which undefine's all #define'd CPP symbols from the script.h
+- * file, which will create problems if you use multiple scripts
+- * with the same  symbol names.
+- *
+- * If you insist on using NCR's assembler, you could generate
+- * scriptu.h from script.h using something like 
+- *
+- * grep #define script.h | \
+- * sed 's/#define[ 	][ 	]*\([_a-zA-Z][_a-zA-Z0-9]*\).*$/#undefine \1/' \
+- * > scriptu.h
+- */
+-
+-#include "53c7xx_u.h"
+-
+-/* XXX - add alternate script handling code here */
+-
+-
+-/* 
+- * Function : static void NCR537xx_soft_reset (struct Scsi_Host *host)
+- *
+- * Purpose :  perform a soft reset of the NCR53c7xx chip
+- *
+- * Inputs : host - pointer to this host adapter's structure
+- *
+- * Preconditions : NCR53c7x0_init must have been called for this 
+- *      host.
+- * 
+- */
+-
+-static void 
+-NCR53c7x0_soft_reset (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    unsigned long flags;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    NCR53c7x0_local_setup(host);
+-
+-    local_irq_save(flags);
+-
+-    /* Disable scsi chip and s/w level 7 ints */
+-
+-#ifdef CONFIG_MVME16x
+-    if (MACH_IS_MVME16x)
+-    {
+-        volatile unsigned long v;
+-
+-        v = *(volatile unsigned long *)0xfff4006c;
+-        v &= ~0x8000;
+-        *(volatile unsigned long *)0xfff4006c = v;
+-        v = *(volatile unsigned long *)0xfff4202c;
+-        v &= ~0x10;
+-        *(volatile unsigned long *)0xfff4202c = v;
+-    }
+-#endif
+-    /* Anything specific for your hardware? */
+-
+-    /*
+-     * Do a soft reset of the chip so that everything is 
+-     * reinitialized to the power-on state.
+-     *
+-     * Basically follow the procedure outlined in the NCR53c700
+-     * data manual under Chapter Six, How to Use, Steps Necessary to
+-     * Start SCRIPTS, with the exception of actually starting the 
+-     * script and setting up the synchronous transfer gunk.
+-     */
+-
+-    /* Should we reset the scsi bus here??????????????????? */
+-
+-    NCR53c7x0_write8(ISTAT_REG_700, ISTAT_10_SRST);
+-    NCR53c7x0_write8(ISTAT_REG_700, 0);
+-
+-    /*
+-     * saved_dcntl is set up in NCR53c7x0_init() before it is overwritten
+-     * here.  We should have some better way of working out the CF bit
+-     * setting..
+-     */
+-
+-    hostdata->saved_dcntl = DCNTL_10_EA|DCNTL_10_COM;
+-    if (hostdata->scsi_clock > 50000000)
+-	hostdata->saved_dcntl |= DCNTL_700_CF_3;
+-    else
+-    if (hostdata->scsi_clock > 37500000)
+-        hostdata->saved_dcntl |= DCNTL_700_CF_2;
+-#if 0
+-    else
+-	/* Any clocks less than 37.5MHz? */
+-#endif
+-
+-    if (hostdata->options & OPTION_DEBUG_TRACE)
+-    	NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl | DCNTL_SSM);
+-    else
+-    	NCR53c7x0_write8(DCNTL_REG, hostdata->saved_dcntl);
+-    /* Following disables snooping - snooping is not required, as non-
+-     * cached pages are used for shared data, and appropriate use is
+-     * made of cache_push/cache_clear.  Indeed, for 68060
+-     * enabling snooping causes disk corruption of ext2fs free block
+-     * bitmaps and the like.  If you have a 68060 with snooping hardwared
+-     * on, then you need to enable CONFIG_060_WRITETHROUGH.
+-     */
+-    NCR53c7x0_write8(CTEST7_REG, CTEST7_10_TT1|CTEST7_STD);
+-    /* Actually burst of eight, according to my 53c710 databook */
+-    NCR53c7x0_write8(hostdata->dmode, DMODE_10_BL_8 | DMODE_10_FC2);
+-    NCR53c7x0_write8(SCID_REG, 1 << host->this_id);
+-    NCR53c7x0_write8(SBCL_REG, 0);
+-    NCR53c7x0_write8(SCNTL1_REG, SCNTL1_ESR_700);
+-    NCR53c7x0_write8(SCNTL0_REG, ((hostdata->options & OPTION_PARITY) ? 
+-            SCNTL0_EPC : 0) | SCNTL0_EPG_700 | SCNTL0_ARB1 | SCNTL0_ARB2);
+-
+-    /*
+-     * Enable all interrupts, except parity which we only want when
+-     * the user requests it.
+-     */
+-
+-    NCR53c7x0_write8(DIEN_REG, DIEN_700_BF |
+-		DIEN_ABRT | DIEN_SSI | DIEN_SIR | DIEN_700_OPC);
+-
+-    NCR53c7x0_write8(SIEN_REG_700, ((hostdata->options & OPTION_PARITY) ?
+-	    SIEN_PAR : 0) | SIEN_700_STO | SIEN_RST | SIEN_UDC |
+-		SIEN_SGE | SIEN_MA);
+-
+-#ifdef CONFIG_MVME16x
+-    if (MACH_IS_MVME16x)
+-    {
+-        volatile unsigned long v;
+-
+-        /* Enable scsi chip and s/w level 7 ints */
+-        v = *(volatile unsigned long *)0xfff40080;
+-        v = (v & ~(0xf << 28)) | (4 << 28);
+-        *(volatile unsigned long *)0xfff40080 = v;
+-        v = *(volatile unsigned long *)0xfff4006c;
+-        v |= 0x8000;
+-        *(volatile unsigned long *)0xfff4006c = v;
+-        v = *(volatile unsigned long *)0xfff4202c;
+-        v = (v & ~0xff) | 0x10 | 4;
+-        *(volatile unsigned long *)0xfff4202c = v;
+-    }
+-#endif
+-    /* Anything needed for your hardware? */
+-    local_irq_restore(flags);
+-}
+-
+-
+-/*
+- * Function static struct NCR53c7x0_cmd *allocate_cmd (Scsi_Cmnd *cmd)
+- * 
+- * Purpose : Return the first free NCR53c7x0_cmd structure (which are 
+- * 	reused in a LIFO manner to minimize cache thrashing).
+- *
+- * Side effects : If we haven't yet scheduled allocation of NCR53c7x0_cmd
+- *	structures for this device, do so.  Attempt to complete all scheduled
+- *	allocations using get_zeroed_page(), putting NCR53c7x0_cmd structures on
+- *	the free list.  Teach programmers not to drink and hack.
+- *
+- * Inputs : cmd - SCSI command
+- *
+- * Returns : NCR53c7x0_cmd structure allocated on behalf of cmd;
+- *	NULL on failure.
+- */
+-
+-static void
+-my_free_page (void *addr, int dummy)
+-{
+-    /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which
+-     * XXX may be invalid (CONFIG_060_WRITETHROUGH)
+-     */
+-    kernel_set_cachemode((void *)addr, 4096, IOMAP_FULL_CACHING);
+-    free_page ((u32)addr);
+-}
+-
+-static struct NCR53c7x0_cmd *
+-allocate_cmd (Scsi_Cmnd *cmd) {
+-    struct Scsi_Host *host = cmd->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = 
+-	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    u32 real;			/* Real address */
+-    int size;			/* Size of *tmp */
+-    struct NCR53c7x0_cmd *tmp;
+-    unsigned long flags;
+-
+-    if (hostdata->options & OPTION_DEBUG_ALLOCATION)
+-	printk ("scsi%d : num_cmds = %d, can_queue = %d\n"
+-		"         target = %d, lun = %d, %s\n",
+-	    host->host_no, hostdata->num_cmds, host->can_queue,
+-	    cmd->device->id, cmd->device->lun, (hostdata->cmd_allocated[cmd->device->id] &
+-		(1 << cmd->device->lun)) ? "already allocated" : "not allocated");
+-
+-/*
+- * If we have not yet reserved commands for this I_T_L nexus, and
+- * the device exists (as indicated by permanent Scsi_Cmnd structures
+- * being allocated under 1.3.x, or being outside of scan_scsis in
+- * 1.2.x), do so now.
+- */
+-    if (!(hostdata->cmd_allocated[cmd->device->id] & (1 << cmd->device->lun)) &&
+-				cmd->device && cmd->device->has_cmdblocks) {
+-      if ((hostdata->extra_allocate + hostdata->num_cmds) < host->can_queue)
+-          hostdata->extra_allocate += host->cmd_per_lun;
+-      hostdata->cmd_allocated[cmd->device->id] |= (1 << cmd->device->lun);
+-    }
+-
+-    for (; hostdata->extra_allocate > 0 ; --hostdata->extra_allocate, 
+-    	++hostdata->num_cmds) {
+-    /* historically, kmalloc has returned unaligned addresses; pad so we
+-       have enough room to ROUNDUP */
+-	size = hostdata->max_cmd_size + sizeof (void *);
+-#ifdef FORCE_DSA_ALIGNMENT
+-	/*
+-	 * 53c710 rev.0 doesn't have an add-with-carry instruction.
+-	 * Ensure we allocate enough memory to force alignment.
+-	 */
+-	size += 256;
+-#endif
+-/* FIXME: for ISA bus '7xx chips, we need to or GFP_DMA in here */
+-
+-        if (size > 4096) {
+-            printk (KERN_ERR "53c7xx: allocate_cmd size > 4K\n");
+-	    return NULL;
+-	}
+-        real = get_zeroed_page(GFP_ATOMIC);
+-        if (real == 0)
+-        	return NULL;
+-        cache_push(virt_to_phys((void *)real), 4096);
+-        cache_clear(virt_to_phys((void *)real), 4096);
+-        kernel_set_cachemode((void *)real, 4096, IOMAP_NOCACHE_SER);
+-	tmp = ROUNDUP(real, void *);
+-#ifdef FORCE_DSA_ALIGNMENT
+-	{
+-	    if (((u32)tmp & 0xff) > CmdPageStart)
+-		tmp = (struct NCR53c7x0_cmd *)((u32)tmp + 255);
+-	    tmp = (struct NCR53c7x0_cmd *)(((u32)tmp & ~0xff) + CmdPageStart);
+-#if 0
+-	    printk ("scsi: size = %d, real = 0x%08x, tmp set to 0x%08x\n",
+-			size, real, (u32)tmp);
+-#endif
+-	}
+-#endif
+-	tmp->real = (void *)real;
+-	tmp->size = size;			
+-	tmp->free = ((void (*)(void *, int)) my_free_page);
+-	local_irq_save(flags);
+-	tmp->next = hostdata->free;
+-	hostdata->free = tmp;
+-	local_irq_restore(flags);
+-    }
+-    local_irq_save(flags);
+-    tmp = (struct NCR53c7x0_cmd *) hostdata->free;
+-    if (tmp) {
+-	hostdata->free = tmp->next;
+-    }
+-    local_irq_restore(flags);
+-    if (!tmp)
+-	printk ("scsi%d : can't allocate command for target %d lun %d\n",
+-	    host->host_no, cmd->device->id, cmd->device->lun);
+-    return tmp;
+-}
+-
+-/*
+- * Function static struct NCR53c7x0_cmd *create_cmd (Scsi_Cmnd *cmd) 
+- *
+- *
+- * Purpose : allocate a NCR53c7x0_cmd structure, initialize it based on the 
+- * 	Scsi_Cmnd structure passed in cmd, including dsa and Linux field 
+- * 	initialization, and dsa code relocation.
+- *
+- * Inputs : cmd - SCSI command
+- *
+- * Returns : NCR53c7x0_cmd structure corresponding to cmd,
+- *	NULL on failure.
+- */
+-static struct NCR53c7x0_cmd *
+-create_cmd (Scsi_Cmnd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct Scsi_Host *host = cmd->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-        host->hostdata[0];	
+-    struct NCR53c7x0_cmd *tmp; 	/* NCR53c7x0_cmd structure for this command */
+-    int datain,  		/* Number of instructions per phase */
+-	dataout;
+-    int data_transfer_instructions, /* Count of dynamic instructions */
+-    	i;			/* Counter */
+-    u32 *cmd_datain,		/* Address of datain/dataout code */
+-	*cmd_dataout;		/* Incremented as we assemble */
+-#ifdef notyet
+-    unsigned char *msgptr;	/* Current byte in select message */
+-    int msglen;			/* Length of whole select message */
+-#endif
+-    unsigned long flags;
+-    u32 exp_select_indirect;	/* Used in sanity check */
+-    NCR53c7x0_local_setup(cmd->device->host);
+-
+-    if (!(tmp = allocate_cmd (cmd)))
+-	return NULL;
+-
+-    /*
+-     * Copy CDB and initialised result fields from Scsi_Cmnd to NCR53c7x0_cmd.
+-     * We do this because NCR53c7x0_cmd may have a special cache mode
+-     * selected to cope with lack of bus snooping, etc.
+-     */
+-
+-    memcpy(tmp->cmnd, cmd->cmnd, 12);
+-    tmp->result = cmd->result;
+-
+-    /*
+-     * Decide whether we need to generate commands for DATA IN,
+-     * DATA OUT, neither, or both based on the SCSI command 
+-     */
+-
+-    switch (cmd->cmnd[0]) {
+-    /* These commands do DATA IN */
+-    case INQUIRY:
+-    case MODE_SENSE:
+-    case READ_6:
+-    case READ_10:
+-    case READ_CAPACITY:
+-    case REQUEST_SENSE:
+-    case READ_BLOCK_LIMITS:
+-    case READ_TOC:
+-	datain = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+-    	dataout = 0;
+-	break;
+-    /* These commands do DATA OUT */
+-    case MODE_SELECT: 
+-    case WRITE_6:
+-    case WRITE_10:
+-#if 0
+-	printk("scsi%d : command is ", host->host_no);
+-	__scsi_print_command(cmd->cmnd);
+-#endif
+-#if 0
+-	printk ("scsi%d : %d scatter/gather segments\n", host->host_no,
+-	    cmd->use_sg);
+-#endif
+-    	datain = 0;
+-	dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+-#if 0
+-	hostdata->options |= OPTION_DEBUG_INTR;
+-#endif
+-	break;
+-    /* 
+-     * These commands do no data transfer, we should force an
+-     * interrupt if a data phase is attempted on them.
+-     */
+-    case TEST_UNIT_READY:
+-    case ALLOW_MEDIUM_REMOVAL:
+-    case START_STOP:
+-    	datain = dataout = 0;
+-	break;
+-    /*
+-     * We don't know about these commands, so generate code to handle
+-     * both DATA IN and DATA OUT phases.  More efficient to identify them
+-     * and add them to the above cases.
+-     */
+-    default:
+-	printk("scsi%d : datain+dataout for command ", host->host_no);
+-	__scsi_print_command(cmd->cmnd);
+-	datain = dataout = 2 * (cmd->use_sg ? cmd->use_sg : 1) + 3;
+-    }
+-
+-    /*
+-     * New code : so that active pointers work correctly regardless
+-     * 	of where the saved data pointer is at, we want to immediately
+-     * 	enter the dynamic code after selection, and on a non-data
+-     * 	phase perform a CALL to the non-data phase handler, with
+-     * 	returns back to this address.
+-     *
+-     * 	If a phase mismatch is encountered in the middle of a 
+-     * 	Block MOVE instruction, we want to _leave_ that instruction
+-     *	unchanged as the current case is, modify a temporary buffer,
+-     *	and point the active pointer (TEMP) at that.
+-     *
+-     * 	Furthermore, we want to implement a saved data pointer, 
+-     * 	set by the SAVE_DATA_POINTERs message.
+-     *
+-     * 	So, the data transfer segments will change to 
+-     *		CALL data_transfer, WHEN NOT data phase
+-     *		MOVE x, x, WHEN data phase
+-     *		( repeat )
+-     *		JUMP other_transfer
+-     */
+-
+-    data_transfer_instructions = datain + dataout;
+-
+-    /*
+-     * When we perform a request sense, we overwrite various things,
+-     * including the data transfer code.  Make sure we have enough
+-     * space to do that.
+-     */
+-
+-    if (data_transfer_instructions < 2)
+-    	data_transfer_instructions = 2;
+-
+-
+-    /*
+-     * The saved data pointer is set up so that a RESTORE POINTERS message 
+-     * will start the data transfer over at the beginning.
+-     */
+-
+-    tmp->saved_data_pointer = virt_to_bus (hostdata->script) + 
+-	hostdata->E_data_transfer;
+-
+-    /*
+-     * Initialize Linux specific fields.
+-     */
+-
+-    tmp->cmd = cmd;
+-    tmp->next = NULL;
+-    tmp->flags = 0;
+-    tmp->dsa_next_addr = virt_to_bus(tmp->dsa) + hostdata->dsa_next - 
+-	hostdata->dsa_start;
+-    tmp->dsa_addr = virt_to_bus(tmp->dsa) - hostdata->dsa_start;
+-
+-    /* 
+-     * Calculate addresses of dynamic code to fill in DSA
+-     */
+-
+-    tmp->data_transfer_start = tmp->dsa + (hostdata->dsa_end - 
+-    	hostdata->dsa_start) / sizeof(u32);
+-    tmp->data_transfer_end = tmp->data_transfer_start + 
+-    	2 * data_transfer_instructions;
+-
+-    cmd_datain = datain ? tmp->data_transfer_start : NULL;
+-    cmd_dataout = dataout ? (datain ? cmd_datain + 2 * datain : tmp->
+-    	data_transfer_start) : NULL;
+-
+-    /*
+-     * Fill in the NCR53c7x0_cmd structure as follows
+-     * dsa, with fixed up DSA code
+-     * datain code
+-     * dataout code
+-     */
+-
+-    /* Copy template code into dsa and perform all necessary fixups */
+-    if (hostdata->dsa_fixup)
+-    	hostdata->dsa_fixup(tmp);
+-
+-    patch_dsa_32(tmp->dsa, dsa_next, 0, 0);
+-    /*
+-     * XXX is this giving 53c710 access to the Scsi_Cmnd in some way?
+-     * Do we need to change it for caching reasons?
+-     */
+-    patch_dsa_32(tmp->dsa, dsa_cmnd, 0, virt_to_bus(cmd));
+-
+-    if (hostdata->options & OPTION_DEBUG_SYNCHRONOUS) {
+-
+-	exp_select_indirect = ((1 << cmd->device->id) << 16) |
+-			(hostdata->sync[cmd->device->id].sxfer_sanity << 8);
+-
+-	if (hostdata->sync[cmd->device->id].select_indirect !=
+-				exp_select_indirect) {
+-	    printk ("scsi%d :  sanity check failed select_indirect=0x%x\n",
+-		host->host_no, hostdata->sync[cmd->device->id].select_indirect);
+-	    FATAL(host);
+-
+-	}
+-    }
+-
+-    patch_dsa_32(tmp->dsa, dsa_select, 0,
+-		hostdata->sync[cmd->device->id].select_indirect);
+-
+-    /*
+-     * Right now, we'll do the WIDE and SYNCHRONOUS negotiations on
+-     * different commands; although it should be trivial to do them
+-     * both at the same time.
+-     */
+-    if (hostdata->initiate_wdtr & (1 << cmd->device->id)) {
+-	memcpy ((void *) (tmp->select + 1), (void *) wdtr_message,
+-	    sizeof(wdtr_message));
+-    	patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(wdtr_message));
+-	local_irq_save(flags);
+-	hostdata->initiate_wdtr &= ~(1 << cmd->device->id);
+-	local_irq_restore(flags);
+-    } else if (hostdata->initiate_sdtr & (1 << cmd->device->id)) {
+-	memcpy ((void *) (tmp->select + 1), (void *) sdtr_message, 
+-	    sizeof(sdtr_message));
+-    	patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(sdtr_message));
+-	tmp->flags |= CMD_FLAG_SDTR;
+-	local_irq_save(flags);
+-	hostdata->initiate_sdtr &= ~(1 << cmd->device->id);
+-	local_irq_restore(flags);
+-    
+-    }
+-#if 1
+-    else if (!(hostdata->talked_to & (1 << cmd->device->id)) &&
+-		!(hostdata->options & OPTION_NO_ASYNC)) {
+-
+-	memcpy ((void *) (tmp->select + 1), (void *) async_message, 
+-	    sizeof(async_message));
+-    	patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1 + sizeof(async_message));
+-	tmp->flags |= CMD_FLAG_SDTR;
+-    } 
+-#endif
+-    else 
+-    	patch_dsa_32(tmp->dsa, dsa_msgout, 0, 1);
+-
+-    hostdata->talked_to |= (1 << cmd->device->id);
+-    tmp->select[0] = (hostdata->options & OPTION_DISCONNECT) ? 
+-	IDENTIFY (1, cmd->device->lun) : IDENTIFY (0, cmd->device->lun);
+-    patch_dsa_32(tmp->dsa, dsa_msgout, 1, virt_to_bus(tmp->select));
+-    patch_dsa_32(tmp->dsa, dsa_cmdout, 0, cmd->cmd_len);
+-    patch_dsa_32(tmp->dsa, dsa_cmdout, 1, virt_to_bus(tmp->cmnd));
+-    patch_dsa_32(tmp->dsa, dsa_dataout, 0, cmd_dataout ? 
+-    	    virt_to_bus (cmd_dataout)
+-	: virt_to_bus (hostdata->script) + hostdata->E_other_transfer);
+-    patch_dsa_32(tmp->dsa, dsa_datain, 0, cmd_datain ? 
+-    	    virt_to_bus (cmd_datain) 
+-	: virt_to_bus (hostdata->script) + hostdata->E_other_transfer);
+-    /* 
+-     * XXX - need to make endian aware, should use separate variables
+-     * for both status and message bytes.
+-     */
+-    patch_dsa_32(tmp->dsa, dsa_msgin, 0, 1);
+-/* 
+- * FIXME : these only works for little endian.  We probably want to 
+- * 	provide message and status fields in the NCR53c7x0_cmd 
+- *	structure, and assign them to cmd->result when we're done.
+- */
+-#ifdef BIG_ENDIAN
+-    patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 2);
+-    patch_dsa_32(tmp->dsa, dsa_status, 0, 1);
+-    patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result) + 3);
+-#else
+-    patch_dsa_32(tmp->dsa, dsa_msgin, 1, virt_to_bus(&tmp->result) + 1);
+-    patch_dsa_32(tmp->dsa, dsa_status, 0, 1);
+-    patch_dsa_32(tmp->dsa, dsa_status, 1, virt_to_bus(&tmp->result));
+-#endif
+-    patch_dsa_32(tmp->dsa, dsa_msgout_other, 0, 1);
+-    patch_dsa_32(tmp->dsa, dsa_msgout_other, 1, 
+-	virt_to_bus(&(hostdata->NCR53c7xx_msg_nop)));
+-    
+-    /*
+-     * Generate code for zero or more of the DATA IN, DATA OUT phases 
+-     * in the format 
+-     *
+-     * CALL data_transfer, WHEN NOT phase
+-     * MOVE first buffer length, first buffer address, WHEN phase
+-     * ...
+-     * MOVE last buffer length, last buffer address, WHEN phase
+-     * JUMP other_transfer
+-     */
+-
+-/* 
+- * See if we're getting to data transfer by generating an unconditional 
+- * interrupt.
+- */
+-#if 0
+-    if (datain) {
+-	cmd_datain[0] = 0x98080000;
+-	cmd_datain[1] = 0x03ffd00d;
+-	cmd_datain += 2;
+-    }
+-#endif
+-
+-/* 
+- * XXX - I'm undecided whether all of this nonsense is faster
+- * in the long run, or whether I should just go and implement a loop
+- * on the NCR chip using table indirect mode?
+- *
+- * In any case, this is how it _must_ be done for 53c700/700-66 chips,
+- * so this stays even when we come up with something better.
+- *
+- * When we're limited to 1 simultaneous command, no overlapping processing,
+- * we're seeing 630K/sec, with 7% CPU usage on a slow Syquest 45M
+- * drive.
+- *
+- * Not bad, not good. We'll see.
+- */
+-
+-    tmp->bounce.len = 0;	/* Assume aligned buffer */
+-
+-    for (i = 0; cmd->use_sg ? (i < cmd->use_sg) : !i; cmd_datain += 4, 
+-	cmd_dataout += 4, ++i) {
+-	u32 vbuf = cmd->use_sg
+-	    ? (u32)page_address(((struct scatterlist *)cmd->request_buffer)[i].page)+
+-	      ((struct scatterlist *)cmd->request_buffer)[i].offset
+-	    : (u32)(cmd->request_buffer);
+-	u32 bbuf = virt_to_bus((void *)vbuf);
+-	u32 count = cmd->use_sg ?
+-	    ((struct scatterlist *)cmd->request_buffer)[i].length :
+-	    cmd->request_bufflen;
+-
+-	/*
+-	 * If we have buffers which are not aligned with 16 byte cache
+-	 * lines, then we just hope nothing accesses the other parts of
+-	 * those cache lines while the transfer is in progress.  That would
+-	 * fill the cache, and subsequent reads of the dma data would pick
+-	 * up the wrong thing.
+-	 * XXX We need a bounce buffer to handle that correctly.
+-	 */
+-
+-	if (((bbuf & 15) || (count & 15)) && (datain || dataout))
+-	{
+-	    /* Bounce buffer needed */
+-	    if (cmd->use_sg)
+-		printk ("53c7xx: Non-aligned buffer with use_sg\n");
+-	    else if (datain && dataout)
+-                printk ("53c7xx: Non-aligned buffer with datain && dataout\n");
+-            else if (count > 256)
+-		printk ("53c7xx: Non-aligned transfer > 256 bytes\n");
+-	    else
+-	    {
+-		    if (datain)
+-		    {
+-			tmp->bounce.len = count;
+-			tmp->bounce.addr = vbuf;
+-			bbuf = virt_to_bus(tmp->bounce.buf);
+-			tmp->bounce.buf[0] = 0xff;
+-			tmp->bounce.buf[1] = 0xfe;
+-			tmp->bounce.buf[2] = 0xfd;
+-			tmp->bounce.buf[3] = 0xfc;
+-	    	    }
+-	    	    if (dataout)
+-	    	    {
+-			memcpy ((void *)tmp->bounce.buf, (void *)vbuf, count);
+-			bbuf = virt_to_bus(tmp->bounce.buf);
+-		    }
+-	    }
+-	}
+-
+-	if (datain) {
+-            cache_clear(virt_to_phys((void *)vbuf), count);
+-	    /* CALL other_in, WHEN NOT DATA_IN */  
+-	    cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL | 
+-		DCMD_TCI_IO) << 24) | 
+-		DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+-	    cmd_datain[1] = virt_to_bus (hostdata->script) + 
+-		hostdata->E_other_in;
+-	    /* MOVE count, buf, WHEN DATA_IN */
+-	    cmd_datain[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I | DCMD_BMI_IO) 
+-    	    	<< 24) | count;
+-	    cmd_datain[3] = bbuf;
+-#if 0
+-	    print_insn (host, cmd_datain, "dynamic ", 1);
+-	    print_insn (host, cmd_datain + 2, "dynamic ", 1);
+-#endif
+-	}
+-	if (dataout) {
+-            cache_push(virt_to_phys((void *)vbuf), count);
+-	    /* CALL other_out, WHEN NOT DATA_OUT */
+-	    cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL) << 24) | 
+-		DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+-	    cmd_dataout[1] = virt_to_bus(hostdata->script) + 
+-    	    	hostdata->E_other_out;
+-	    /* MOVE count, buf, WHEN DATA+OUT */
+-	    cmd_dataout[2] = ((DCMD_TYPE_BMI | DCMD_BMI_OP_MOVE_I) << 24) 
+-		| count;
+-	    cmd_dataout[3] = bbuf;
+-#if 0
+-	    print_insn (host, cmd_dataout, "dynamic ", 1);
+-	    print_insn (host, cmd_dataout + 2, "dynamic ", 1);
+-#endif
+-	}
+-    }
+-
+-    /*
+-     * Install JUMP instructions after the data transfer routines to return
+-     * control to the do_other_transfer routines.
+-     */
+-  
+-    
+-    if (datain) {
+-	cmd_datain[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) |
+-    	    DBC_TCI_TRUE;
+-	cmd_datain[1] = virt_to_bus(hostdata->script) + 
+-    	    hostdata->E_other_transfer;
+-#if 0
+-	print_insn (host, cmd_datain, "dynamic jump ", 1);
+-#endif
+-	cmd_datain += 2; 
+-    }
+-#if 0
+-    if (datain) {
+-	cmd_datain[0] = 0x98080000;
+-	cmd_datain[1] = 0x03ffdeed;
+-	cmd_datain += 2;
+-    }
+-#endif
+-    if (dataout) {
+-	cmd_dataout[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_JUMP) << 24) |
+-    	    DBC_TCI_TRUE;
+-	cmd_dataout[1] = virt_to_bus(hostdata->script) + 
+-    	    hostdata->E_other_transfer;
+-#if 0
+-	print_insn (host, cmd_dataout, "dynamic jump ", 1);
+-#endif
+-	cmd_dataout += 2;
+-    }
+-
+-    return tmp;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_queue_command (Scsi_Cmnd *cmd,
+- *      void (*done)(Scsi_Cmnd *))
+- *
+- * Purpose :  enqueues a SCSI command
+- *
+- * Inputs : cmd - SCSI command, done - function called on completion, with
+- *      a pointer to the command descriptor.
+- *
+- * Returns : 0
+- *
+- * Side effects :
+- *      cmd is added to the per instance driver issue_queue, with major
+- *      twiddling done to the host specific fields of cmd.  If the
+- *      process_issue_queue coroutine isn't running, it is restarted.
+- * 
+- * NOTE : we use the host_scribble field of the Scsi_Cmnd structure to 
+- *	hold our own data, and pervert the ptr field of the SCp field
+- *	to create a linked list.
+- */
+-
+-int
+-NCR53c7xx_queue_command (Scsi_Cmnd *cmd, void (* done)(Scsi_Cmnd *)) {
+-    struct Scsi_Host *host = cmd->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = 
+-	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    unsigned long flags;
+-    Scsi_Cmnd *tmp;
+-
+-    cmd->scsi_done = done;
+-    cmd->host_scribble = NULL;
+-    cmd->SCp.ptr = NULL;
+-    cmd->SCp.buffer = NULL;
+-
+-#ifdef VALID_IDS
+-    /* Ignore commands on invalid IDs */
+-    if (!hostdata->valid_ids[cmd->device->id]) {
+-        printk("scsi%d : ignoring target %d lun %d\n", host->host_no,
+-            cmd->device->id, cmd->device->lun);
+-        cmd->result = (DID_BAD_TARGET << 16);
+-        done(cmd);
+-        return 0;
+-    }
+-#endif
+-
+-    local_irq_save(flags);
+-    if ((hostdata->options & (OPTION_DEBUG_INIT_ONLY|OPTION_DEBUG_PROBE_ONLY)) 
+-	|| ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) &&
+-	    !(hostdata->debug_lun_limit[cmd->device->id] & (1 << cmd->device->lun)))
+-#ifdef LINUX_1_2
+-	|| cmd->device->id > 7
+-#else
+-	|| cmd->device->id >= host->max_id
+-#endif
+-	|| cmd->device->id == host->this_id
+-	|| hostdata->state == STATE_DISABLED) {
+-	printk("scsi%d : disabled or bad target %d lun %d\n", host->host_no,
+-	    cmd->device->id, cmd->device->lun);
+-	cmd->result = (DID_BAD_TARGET << 16);
+-	done(cmd);
+-	local_irq_restore(flags);
+-	return 0;
+-    }
+-
+-    if ((hostdata->options & OPTION_DEBUG_NCOMMANDS_LIMIT) &&
+-	(hostdata->debug_count_limit == 0)) {
+-	printk("scsi%d : maximum commands exceeded\n", host->host_no);
+-	cmd->result = (DID_BAD_TARGET << 16);
+-	done(cmd);
+-	local_irq_restore(flags);
+-	return 0;
+-    }
+-
+-    if (hostdata->options & OPTION_DEBUG_READ_ONLY) {
+-	switch (cmd->cmnd[0]) {
+-	case WRITE_6:
+-	case WRITE_10:
+-	    printk("scsi%d : WRITE attempted with NO_WRITE debugging flag set\n",
+-		host->host_no);
+-	    cmd->result = (DID_BAD_TARGET << 16);
+-	    done(cmd);
+-	    local_irq_restore(flags);
+-	    return 0;
+-	}
+-    }
+-
+-    if ((hostdata->options & OPTION_DEBUG_TARGET_LIMIT) &&
+-	    hostdata->debug_count_limit != -1) 
+-	--hostdata->debug_count_limit;
+-
+-    cmd->result = 0xffff;	/* The NCR will overwrite message
+-				       and status with valid data */
+-    cmd->host_scribble = (unsigned char *) tmp = create_cmd (cmd);
+-
+-    /*
+-     * REQUEST SENSE commands are inserted at the head of the queue 
+-     * so that we do not clear the contingent allegiance condition
+-     * they may be looking at.
+-     */
+-
+-    if (!(hostdata->issue_queue) || (cmd->cmnd[0] == REQUEST_SENSE)) {
+-	cmd->SCp.ptr = (unsigned char *) hostdata->issue_queue;
+-	hostdata->issue_queue = cmd;
+-    } else {
+-	for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp->SCp.ptr; 
+-		tmp = (Scsi_Cmnd *) tmp->SCp.ptr);
+-	tmp->SCp.ptr = (unsigned char *) cmd;
+-    }
+-    local_irq_restore(flags);
+-    run_process_issue_queue();
+-    return 0;
+-}
+-
+-/*
+- * Function : void to_schedule_list (struct Scsi_Host *host,
+- * 	struct NCR53c7x0_hostdata * hostdata, Scsi_Cmnd *cmd)
+- *
+- * Purpose : takes a SCSI command which was just removed from the 
+- *	issue queue, and deals with it by inserting it in the first
+- *	free slot in the schedule list or by terminating it immediately.
+- *
+- * Inputs : 
+- *	host - SCSI host adapter; hostdata - hostdata structure for 
+- *	this adapter; cmd - a pointer to the command; should have 
+- *	the host_scribble field initialized to point to a valid 
+- *	
+- * Side effects : 
+- *      cmd is added to the per instance schedule list, with minor 
+- *      twiddling done to the host specific fields of cmd.
+- *
+- */
+-
+-static __inline__ void
+-to_schedule_list (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata,
+-    struct NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    Scsi_Cmnd *tmp = cmd->cmd;
+-    unsigned long flags;
+-    /* dsa start is negative, so subtraction is used */
+-    volatile u32 *ncrcurrent;
+-
+-    int i;
+-    NCR53c7x0_local_setup(host);
+-#if 0
+-    printk("scsi%d : new dsa is 0x%lx (virt 0x%p)\n", host->host_no, 
+-	virt_to_bus(hostdata->dsa), hostdata->dsa);
+-#endif
+-
+-    local_irq_save(flags);
+-    
+-    /* 
+-     * Work around race condition : if an interrupt fired and we 
+-     * got disabled forget about this command.
+-     */
+-
+-    if (hostdata->state == STATE_DISABLED) {
+-	printk("scsi%d : driver disabled\n", host->host_no);
+-	tmp->result = (DID_BAD_TARGET << 16);
+-	cmd->next = (struct NCR53c7x0_cmd *) hostdata->free;
+-	hostdata->free = cmd;
+-	tmp->scsi_done(tmp);
+-	local_irq_restore(flags);
+-	return;
+-    }
+-
+-    for (i = host->can_queue, ncrcurrent = hostdata->schedule; 
+-	i > 0  && ncrcurrent[0] != hostdata->NOP_insn;
+-	--i, ncrcurrent += 2 /* JUMP instructions are two words */);
+-
+-    if (i > 0) {
+-	++hostdata->busy[tmp->device->id][tmp->device->lun];
+-	cmd->next = hostdata->running_list;
+-	hostdata->running_list = cmd;
+-
+-	/* Restore this instruction to a NOP once the command starts */
+-	cmd->dsa [(hostdata->dsa_jump_dest - hostdata->dsa_start) / 
+-	    sizeof(u32)] = (u32) virt_to_bus ((void *)ncrcurrent);
+-	/* Replace the current jump operand.  */
+-	ncrcurrent[1] =
+-	    virt_to_bus ((void *) cmd->dsa) + hostdata->E_dsa_code_begin -
+-	    hostdata->E_dsa_code_template;
+-	/* Replace the NOP instruction with a JUMP */
+-	ncrcurrent[0] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) << 24) |
+-	    DBC_TCI_TRUE;
+-    }  else {
+-	printk ("scsi%d: no free slot\n", host->host_no);
+-	disable(host);
+-	tmp->result = (DID_ERROR << 16);
+-	cmd->next = (struct NCR53c7x0_cmd *) hostdata->free;
+-	hostdata->free = cmd;
+-	tmp->scsi_done(tmp);
+-	local_irq_restore(flags);
+-	return;
+-    }
+-
+-    /* 
+-     * If the NCR chip is in an idle state, start it running the scheduler
+-     * immediately.  Otherwise, signal the chip to jump to schedule as 
+-     * soon as it is idle.
+-     */
+-
+-    if (hostdata->idle) {
+-	hostdata->idle = 0;
+-	hostdata->state = STATE_RUNNING;
+-	NCR53c7x0_write32 (DSP_REG,  virt_to_bus ((void *)hostdata->schedule));
+-	if (hostdata->options & OPTION_DEBUG_TRACE)
+-	    NCR53c7x0_write8 (DCNTL_REG, hostdata->saved_dcntl |
+-				DCNTL_SSM | DCNTL_STD);
+-    } else {
+-	NCR53c7x0_write8(hostdata->istat, ISTAT_10_SIGP);
+-    }
+-
+-    local_irq_restore(flags);
+-}
+-
+-/*
+- * Function : busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata 
+- *	*hostdata, Scsi_Cmnd *cmd)
+- *
+- * Purpose : decide if we can pass the given SCSI command on to the 
+- *	device in question or not.
+- *  
+- * Returns : non-zero when we're busy, 0 when we aren't.
+- */
+-
+-static __inline__ int
+-busyp (struct Scsi_Host *host, struct NCR53c7x0_hostdata *hostdata, 
+-    Scsi_Cmnd *cmd) {
+-    /* FIXME : in the future, this needs to accommodate SCSI-II tagged
+-       queuing, and we may be able to play with fairness here a bit.
+-     */
+-    return hostdata->busy[cmd->device->id][cmd->device->lun];
+-}
+-
+-/*
+- * Function : process_issue_queue (void)
+- *
+- * Purpose : transfer commands from the issue queue to NCR start queue 
+- *	of each NCR53c7/8xx in the system, avoiding kernel stack 
+- *	overflows when the scsi_done() function is invoked recursively.
+- * 
+- * NOTE : process_issue_queue exits with interrupts *disabled*, so the 
+- *	caller must reenable them if it desires.
+- * 
+- * NOTE : process_issue_queue should be called from both 
+- *	NCR53c7x0_queue_command() and from the interrupt handler 
+- *	after command completion in case NCR53c7x0_queue_command()
+- * 	isn't invoked again but we've freed up resources that are
+- *	needed.
+- */
+-
+-static void 
+-process_issue_queue (unsigned long flags) {
+-    Scsi_Cmnd *tmp, *prev;
+-    struct Scsi_Host *host;
+-    struct NCR53c7x0_hostdata *hostdata;
+-    int done;
+-
+-    /*
+-     * We run (with interrupts disabled) until we're sure that none of 
+-     * the host adapters have anything that can be done, at which point 
+-     * we set process_issue_queue_running to 0 and exit.
+-     *
+-     * Interrupts are enabled before doing various other internal 
+-     * instructions, after we've decided that we need to run through
+-     * the loop again.
+-     *
+-     */
+-
+-    do {
+-	local_irq_disable(); /* Freeze request queues */
+-	done = 1;
+-	for (host = first_host; host && host->hostt == the_template;
+-	    host = host->next) {
+-	    hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+-	    local_irq_disable();
+-	    if (hostdata->issue_queue) {
+-	    	if (hostdata->state == STATE_DISABLED) {
+-		    tmp = (Scsi_Cmnd *) hostdata->issue_queue;
+-		    hostdata->issue_queue = (Scsi_Cmnd *) tmp->SCp.ptr;
+-		    tmp->result = (DID_BAD_TARGET << 16);
+-		    if (tmp->host_scribble) {
+-			((struct NCR53c7x0_cmd *)tmp->host_scribble)->next = 
+-			    hostdata->free;
+-			hostdata->free = 
+-			    (struct NCR53c7x0_cmd *)tmp->host_scribble;
+-			tmp->host_scribble = NULL;
+-		    }
+-		    tmp->scsi_done (tmp);
+-		    done = 0;
+-		} else 
+-		    for (tmp = (Scsi_Cmnd *) hostdata->issue_queue, 
+-			prev = NULL; tmp; prev = tmp, tmp = (Scsi_Cmnd *) 
+-			tmp->SCp.ptr) 
+-			if (!tmp->host_scribble || 
+-			    !busyp (host, hostdata, tmp)) {
+-				if (prev)
+-				    prev->SCp.ptr = tmp->SCp.ptr;
+-				else
+-				    hostdata->issue_queue = (Scsi_Cmnd *) 
+-					tmp->SCp.ptr;
+-			    tmp->SCp.ptr = NULL;
+-			    if (tmp->host_scribble) {
+-				if (hostdata->options & OPTION_DEBUG_QUEUES) 
+-				    printk ("scsi%d : moving command for target %d lun %d to start list\n",
+-					host->host_no, tmp->device->id, tmp->device->lun);
+-		
+-
+-			    	to_schedule_list (host, hostdata, 
+-				    (struct NCR53c7x0_cmd *)
+-				    tmp->host_scribble);
+-			    } else {
+-				if (((tmp->result & 0xff) == 0xff) ||
+-			    	    ((tmp->result & 0xff00) == 0xff00)) {
+-				    printk ("scsi%d : danger Will Robinson!\n",
+-					host->host_no);
+-				    tmp->result = DID_ERROR << 16;
+-				    disable (host);
+-				}
+-				tmp->scsi_done(tmp);
+-			    }
+-			    done = 0;
+-			} /* if target/lun is not busy */
+-	    } /* if hostdata->issue_queue */
+-	    if (!done)
+-		local_irq_restore(flags);
+-    	} /* for host */
+-    } while (!done);
+-    process_issue_queue_running = 0;
+-}
+-
+-/*
+- * Function : static void intr_scsi (struct Scsi_Host *host, 
+- * 	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle all SCSI interrupts, indicated by the setting 
+- * 	of the SIP bit in the ISTAT register.
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * 	may be NULL.
+- */
+-
+-static void 
+-intr_scsi (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = 
+-    	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    unsigned char sstat0_sist0, sist1, 		/* Registers */
+-	    fatal; 				/* Did a fatal interrupt 
+-						   occur ? */
+-   
+-    NCR53c7x0_local_setup(host);
+-
+-    fatal = 0;
+-
+-    sstat0_sist0 = NCR53c7x0_read8(SSTAT0_REG);
+-    sist1 = 0;
+-
+-    if (hostdata->options & OPTION_DEBUG_INTR) 
+-	printk ("scsi%d : SIST0 0x%0x, SIST1 0x%0x\n", host->host_no,
+-	    sstat0_sist0, sist1);
+-
+-    /* 250ms selection timeout */
+-    if (sstat0_sist0 & SSTAT0_700_STO) {
+-	fatal = 1;
+-	if (hostdata->options & OPTION_DEBUG_INTR) {
+-	    printk ("scsi%d : Selection Timeout\n", host->host_no);
+-    	    if (cmd) {
+-    	    	printk("scsi%d : target %d, lun %d, command ",
+-		    host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+-    	    	__scsi_print_command (cmd->cmd->cmnd);
+-		printk("scsi%d : dsp = 0x%x (virt 0x%p)\n", host->host_no,
+-		    NCR53c7x0_read32(DSP_REG),
+-		    bus_to_virt(NCR53c7x0_read32(DSP_REG)));
+-    	    } else {
+-    	    	printk("scsi%d : no command\n", host->host_no);
+-    	    }
+-    	}
+-/*
+- * XXX - question : how do we want to handle the Illegal Instruction
+- * 	interrupt, which may occur before or after the Selection Timeout
+- * 	interrupt?
+- */
+-
+-	if (1) {
+-	    hostdata->idle = 1;
+-	    hostdata->expecting_sto = 0;
+-
+-	    if (hostdata->test_running) {
+-		hostdata->test_running = 0;
+-		hostdata->test_completed = 3;
+-	    } else if (cmd) {
+-		abnormal_finished(cmd, DID_BAD_TARGET << 16);
+-	    }
+-#if 0	    
+-	    hostdata->intrs = 0;
+-#endif
+-	}
+-    } 
+-
+-/*
+- * FIXME : in theory, we can also get a UDC when a STO occurs.
+- */
+-    if (sstat0_sist0 & SSTAT0_UDC) {
+-	fatal = 1;
+-	if (cmd) {
+-	    printk("scsi%d : target %d lun %d unexpected disconnect\n",
+-		host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+-	    print_lots (host);
+-	    abnormal_finished(cmd, DID_ERROR << 16);
+-	} else 
+-	     printk("scsi%d : unexpected disconnect (no command)\n",
+-		host->host_no);
+-
+-	hostdata->dsp = (u32 *) hostdata->schedule;
+-	hostdata->dsp_changed = 1;
+-    }
+-
+-    /* SCSI PARITY error */
+-    if (sstat0_sist0 & SSTAT0_PAR) {
+-	fatal = 1;
+-	if (cmd && cmd->cmd) {
+-	    printk("scsi%d : target %d lun %d parity error.\n",
+-		host->host_no, cmd->cmd->device->id, cmd->cmd->device->lun);
+-	    abnormal_finished (cmd, DID_PARITY << 16); 
+-	} else
+-	    printk("scsi%d : parity error\n", host->host_no);
+-	/* Should send message out, parity error */
+-
+-	/* XXX - Reduce synchronous transfer rate! */
+-	hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+-    	    sizeof(u32);
+-	hostdata->dsp_changed = 1; 
+-    /* SCSI GROSS error */
+-    } 
+-
+-    if (sstat0_sist0 & SSTAT0_SGE) {
+-	fatal = 1;
+-	printk("scsi%d : gross error, saved2_dsa = 0x%x\n", host->host_no,
+-					(unsigned int)hostdata->saved2_dsa);
+-	print_lots (host);
+-	
+-	/* 
+-         * A SCSI gross error may occur when we have 
+-	 *
+-	 * - A synchronous offset which causes the SCSI FIFO to be overwritten.
+-	 *
+-	 * - A REQ which causes the maximum synchronous offset programmed in 
+-	 * 	the SXFER register to be exceeded.
+-	 *
+-	 * - A phase change with an outstanding synchronous offset.
+-	 *
+-	 * - Residual data in the synchronous data FIFO, with a transfer
+-	 *	other than a synchronous receive is started.$#
+-	 */
+-		
+-
+-	/* XXX Should deduce synchronous transfer rate! */
+-	hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+-    	    sizeof(u32);
+-	hostdata->dsp_changed = 1;
+-    /* Phase mismatch */
+-    } 
+-
+-    if (sstat0_sist0 & SSTAT0_MA) {
+-	fatal = 1;
+-	if (hostdata->options & OPTION_DEBUG_INTR)
+-	    printk ("scsi%d : SSTAT0_MA\n", host->host_no);
+-	intr_phase_mismatch (host, cmd);
+-    }
+-
+-#if 0
+-    if (sstat0_sist0 & SIST0_800_RSL) 
+-	printk ("scsi%d : Oh no Mr. Bill!\n", host->host_no);
+-#endif
+-    
+-/*
+- * If a fatal SCSI interrupt occurs, we must insure that the DMA and
+- * SCSI FIFOs were flushed.
+- */
+-
+-    if (fatal) {
+-	if (!hostdata->dstat_valid) {
+-	    hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+-	    hostdata->dstat_valid = 1;
+-	}
+-
+-	if (!(hostdata->dstat & DSTAT_DFE)) {
+-	  printk ("scsi%d : DMA FIFO not empty\n", host->host_no);
+-	  /*
+-	   * Really need to check this code for 710  RGH.
+-	   * Havn't seen any problems, but maybe we should FLUSH before
+-	   * clearing sometimes.
+-	   */
+-          NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+-          while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF)
+-		;
+-	  hostdata->dstat |= DSTAT_DFE;
+-    	}
+-    }
+-}
+-
+-#ifdef CYCLIC_TRACE
+-
+-/*
+- * The following implements a cyclic log of instructions executed, if you turn
+- * TRACE on.  It will also print the log for you.  Very useful when debugging
+- * 53c710 support, possibly not really needed any more.
+- */
+-
+-u32 insn_log[4096];
+-u32 insn_log_index = 0;
+-
+-void log1 (u32 i)
+-{
+-	insn_log[insn_log_index++] = i;
+-	if (insn_log_index == 4096)
+-		insn_log_index = 0;
+-}
+-
+-void log_insn (u32 *ip)
+-{
+-	log1 ((u32)ip);
+-	log1 (*ip);
+-	log1 (*(ip+1));
+-	if (((*ip >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI)
+-		log1 (*(ip+2));
+-}
+-
+-void dump_log(void)
+-{
+-	int cnt = 0;
+-	int i = insn_log_index;
+-	int size;
+-	struct Scsi_Host *host = first_host;
+-
+-	while (cnt < 4096) {
+-		printk ("%08x (+%6x): ", insn_log[i], (insn_log[i] - (u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4);
+-		if (++i == 4096)
+-			i = 0;
+-		cnt++;
+-		if (((insn_log[i]  >> 24) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI) 
+-			size = 3;
+-		else
+-			size = 2;
+-		while (size--) {
+-			printk ("%08x ", insn_log[i]);
+-			if (++i == 4096)
+-				i = 0;
+-			cnt++;
+-		}
+-		printk ("\n");
+-	}
+-}
+-#endif
+-
+-
+-/*
+- * Function : static void NCR53c7x0_intfly (struct Scsi_Host *host)
+- *
+- * Purpose : Scan command queue for specified host, looking for completed
+- *           commands.
+- * 
+- * Inputs : Scsi_Host pointer.
+- *
+- * 	This is called from the interrupt handler, when a simulated INTFLY
+- * 	interrupt occurs.
+- */
+-
+-static void
+-NCR53c7x0_intfly (struct Scsi_Host *host)
+-{
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata;	/* host->hostdata[0] */
+-    struct NCR53c7x0_cmd *cmd,			/* command which halted */
+-	**cmd_prev_ptr;
+-    unsigned long flags;				
+-    char search_found = 0;			/* Got at least one ? */
+-
+-    hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    NCR53c7x0_local_setup(host);
+-
+-    if (hostdata->options & OPTION_DEBUG_INTR)
+-    printk ("scsi%d : INTFLY\n", host->host_no); 
+-
+-    /*
+-    * Traverse our list of running commands, and look
+-    * for those with valid (non-0xff ff) status and message
+-    * bytes encoded in the result which signify command
+-    * completion.
+-    */
+-
+-    local_irq_save(flags);
+-restart:
+-    for (cmd_prev_ptr = (struct NCR53c7x0_cmd **)&(hostdata->running_list),
+-	cmd = (struct NCR53c7x0_cmd *) hostdata->running_list; cmd ;
+-	cmd_prev_ptr = (struct NCR53c7x0_cmd **) &(cmd->next), 
+-    	cmd = (struct NCR53c7x0_cmd *) cmd->next)
+-    {
+-	Scsi_Cmnd *tmp;
+-
+-	if (!cmd) {
+-	    printk("scsi%d : very weird.\n", host->host_no);
+-	    break;
+-	}
+-
+-	if (!(tmp = cmd->cmd)) {
+-	    printk("scsi%d : weird.  NCR53c7x0_cmd has no Scsi_Cmnd\n",
+-		    host->host_no);
+-	    continue;
+-	}
+-	/* Copy the result over now; may not be complete,
+-	 * but subsequent tests may as well be done on
+-	 * cached memory.
+-	 */
+-	tmp->result = cmd->result;
+-
+-	if (((tmp->result & 0xff) == 0xff) ||
+-			    ((tmp->result & 0xff00) == 0xff00))
+-	    continue;
+-
+-	search_found = 1;
+-
+-	if (cmd->bounce.len)
+-	    memcpy ((void *)cmd->bounce.addr,
+-				(void *)cmd->bounce.buf, cmd->bounce.len);
+-
+-	/* Important - remove from list _before_ done is called */
+-	if (cmd_prev_ptr)
+-	    *cmd_prev_ptr = (struct NCR53c7x0_cmd *) cmd->next;
+-
+-	--hostdata->busy[tmp->device->id][tmp->device->lun];
+-	cmd->next = hostdata->free;
+-	hostdata->free = cmd;
+-
+-	tmp->host_scribble = NULL;
+-
+-	if (hostdata->options & OPTION_DEBUG_INTR) {
+-	    printk ("scsi%d : command complete : pid %lu, id %d,lun %d result 0x%x ", 
+-		  host->host_no, tmp->pid, tmp->device->id, tmp->device->lun, tmp->result);
+-	    __scsi_print_command (tmp->cmnd);
+-	}
+-
+-	tmp->scsi_done(tmp);
+-	goto restart;
+-    }
+-    local_irq_restore(flags);
+-
+-    if (!search_found)  {
+-	printk ("scsi%d : WARNING : INTFLY with no completed commands.\n",
+-			    host->host_no);
+-    } else {
+-	run_process_issue_queue();
+-    }
+-    return;
+-}
+-
+-/*
+- * Function : static irqreturn_t NCR53c7x0_intr (int irq, void *dev_id)
+- *
+- * Purpose : handle NCR53c7x0 interrupts for all NCR devices sharing
+- *	the same IRQ line.  
+- * 
+- * Inputs : Since we're using the IRQF_DISABLED interrupt handler
+- *	semantics, irq indicates the interrupt which invoked 
+- *	this handler.  
+- *
+- * On the 710 we simualte an INTFLY with a script interrupt, and the
+- * script interrupt handler will call back to this function.
+- */
+-
+-static irqreturn_t
+-NCR53c7x0_intr (int irq, void *dev_id)
+-{
+-    NCR53c7x0_local_declare();
+-    struct Scsi_Host *host;			/* Host we are looking at */
+-    unsigned char istat; 			/* Values of interrupt regs */
+-    struct NCR53c7x0_hostdata *hostdata;	/* host->hostdata[0] */
+-    struct NCR53c7x0_cmd *cmd;			/* command which halted */
+-    u32 *dsa;					/* DSA */
+-    int handled = 0;
+-
+-#ifdef NCR_DEBUG
+-    char buf[80];				/* Debugging sprintf buffer */
+-    size_t buflen;				/* Length of same */
+-#endif
+-
+-    host     = (struct Scsi_Host *)dev_id;
+-    hostdata = (struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    NCR53c7x0_local_setup(host);
+-
+-    /*
+-     * Only read istat once per loop, since reading it again will unstack
+-     * interrupts
+-     */
+-
+-    while ((istat = NCR53c7x0_read8(hostdata->istat)) & (ISTAT_SIP|ISTAT_DIP)) {
+-	handled = 1;
+-	hostdata->dsp_changed = 0;
+-	hostdata->dstat_valid = 0;
+-    	hostdata->state = STATE_HALTED;
+-
+-	if (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) 
+-	    printk ("scsi%d : SCSI FIFO not empty\n", host->host_no);
+-
+-	/*
+-	 * NCR53c700 and NCR53c700-66 change the current SCSI
+-	 * process, hostdata->ncrcurrent, in the Linux driver so
+-	 * cmd = hostdata->ncrcurrent.
+-	 *
+-	 * With other chips, we must look through the commands
+-	 * executing and find the command structure which 
+-	 * corresponds to the DSA register.
+-	 */
+-
+-	if (hostdata->options & OPTION_700) {
+-	    cmd = (struct NCR53c7x0_cmd *) hostdata->ncrcurrent;
+-	} else {
+-	    dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+-	    for (cmd = (struct NCR53c7x0_cmd *) hostdata->running_list;
+-		cmd && (dsa + (hostdata->dsa_start / sizeof(u32))) != cmd->dsa;
+-		    cmd = (struct NCR53c7x0_cmd *)(cmd->next))
+-		;
+-	}
+-	if (hostdata->options & OPTION_DEBUG_INTR) {
+-	    if (cmd) {
+-		printk("scsi%d : interrupt for pid %lu, id %d, lun %d ", 
+-		    host->host_no, cmd->cmd->pid, (int) cmd->cmd->device->id,
+-		    (int) cmd->cmd->device->lun);
+-		__scsi_print_command (cmd->cmd->cmnd);
+-	    } else {
+-		printk("scsi%d : no active command\n", host->host_no);
+-	    }
+-	}
+-	
+-	if (istat & ISTAT_SIP) {
+-	    if (hostdata->options & OPTION_DEBUG_INTR) 
+-		printk ("scsi%d : ISTAT_SIP\n", host->host_no);
+-	    intr_scsi (host, cmd);
+-	}
+-	
+-	if (istat & ISTAT_DIP) {
+-	    if (hostdata->options & OPTION_DEBUG_INTR) 
+-		printk ("scsi%d : ISTAT_DIP\n", host->host_no);
+-	    intr_dma (host, cmd);
+-	}
+-	
+-	if (!hostdata->dstat_valid) {
+-	    hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+-	    hostdata->dstat_valid = 1;
+-	}
+-	
+-	if (!(hostdata->dstat & DSTAT_DFE)) {
+-	    printk ("scsi%d : DMA FIFO not empty\n", host->host_no);
+-	    /* Really need to check this out for 710 RGH */
+-	    NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+-	    while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF)
+-		;
+-	    hostdata->dstat |= DSTAT_DFE;
+-	}
+-
+-	if (!hostdata->idle && hostdata->state == STATE_HALTED) {
+-	    if (!hostdata->dsp_changed)
+-		hostdata->dsp = (u32 *)bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-#if 0
+-	    printk("scsi%d : new dsp is 0x%lx (virt 0x%p)\n",
+-		host->host_no,  virt_to_bus(hostdata->dsp), hostdata->dsp);
+-#endif
+-		
+-	    hostdata->state = STATE_RUNNING;
+-	    NCR53c7x0_write32 (DSP_REG, virt_to_bus(hostdata->dsp));
+-	    if (hostdata->options & OPTION_DEBUG_TRACE) {
+-#ifdef CYCLIC_TRACE
+-		log_insn (hostdata->dsp);
+-#else
+-	    	print_insn (host, hostdata->dsp, "t ", 1);
+-#endif
+-		NCR53c7x0_write8 (DCNTL_REG,
+-			hostdata->saved_dcntl | DCNTL_SSM | DCNTL_STD);
+-	    }
+-	}
+-    }
+-    return IRQ_HANDLED;
+-}
+-
+-
+-/* 
+- * Function : static int abort_connected (struct Scsi_Host *host)
+- *
+- * Purpose : Assuming that the NCR SCSI processor is currently 
+- * 	halted, break the currently established nexus.  Clean
+- *	up of the NCR53c7x0_cmd and Scsi_Cmnd structures should
+- *	be done on receipt of the abort interrupt.
+- *
+- * Inputs : host - SCSI host
+- *
+- */
+-
+-static int 
+-abort_connected (struct Scsi_Host *host) {
+-#ifdef NEW_ABORT
+-    NCR53c7x0_local_declare();
+-#endif
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-/* FIXME : this probably should change for production kernels; at the 
+-   least, counter should move to a per-host structure. */
+-    static int counter = 5;
+-#ifdef NEW_ABORT
+-    int sstat, phase, offset;
+-    u32 *script;
+-    NCR53c7x0_local_setup(host);
+-#endif
+-
+-    if (--counter <= 0) {
+-	disable(host);
+-	return 0;
+-    }
+-
+-    printk ("scsi%d : DANGER : abort_connected() called \n",
+-	host->host_no);
+-
+-#ifdef NEW_ABORT
+-
+-/*
+- * New strategy : Rather than using a generic abort routine,
+- * we'll specifically try to source or sink the appropriate
+- * amount of data for the phase we're currently in (taking into 
+- * account the current synchronous offset) 
+- */
+-
+-    sstat = NCR53c8x0_read8 (SSTAT2_REG);
+-    offset = OFFSET (sstat & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT;
+-    phase = sstat & SSTAT2_PHASE_MASK;
+-
+-/*
+- * SET ATN
+- * MOVE source_or_sink, WHEN CURRENT PHASE 
+- * < repeat for each outstanding byte >
+- * JUMP send_abort_message
+- */
+-
+-    script = hostdata->abort_script = kmalloc (
+-	8  /* instruction size */ * (
+-	    1 /* set ATN */ +
+-	    (!offset ? 1 : offset) /* One transfer per outstanding byte */ +
+-	    1 /* send abort message */),
+-	GFP_ATOMIC);
+-
+-
+-#else /* def NEW_ABORT */
+-    hostdata->dsp = hostdata->script + hostdata->E_initiator_abort /
+-	    sizeof(u32);
+-#endif /* def NEW_ABORT */
+-    hostdata->dsp_changed = 1;
+-
+-/* XXX - need to flag the command as aborted after the abort_connected
+- 	 code runs 
+- */
+-    return 0;
+-}
+-
+-/*
+- * Function : static int datapath_residual (Scsi_Host *host)
+- *
+- * Purpose : return residual data count of what's in the chip.
+- *
+- * Inputs : host - SCSI host
+- */
+-
+-static int
+-datapath_residual (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    int count, synchronous, sstat;
+-    unsigned int ddir;
+-
+-    NCR53c7x0_local_setup(host);
+-    /* COMPAT : the 700 and 700-66 need to use DFIFO_00_BO_MASK */
+-    count = ((NCR53c7x0_read8 (DFIFO_REG) & DFIFO_10_BO_MASK) -
+-	(NCR53c7x0_read32 (DBC_REG) & DFIFO_10_BO_MASK)) & DFIFO_10_BO_MASK;
+-    synchronous = NCR53c7x0_read8 (SXFER_REG) & SXFER_MO_MASK;
+-    /* COMPAT : DDIR is elsewhere on non-'8xx chips. */
+-    ddir = NCR53c7x0_read8 (CTEST0_REG_700) & CTEST0_700_DDIR;
+-
+-    if (ddir) {
+-    /* Receive */
+-	if (synchronous) 
+-	    count += (NCR53c7x0_read8 (SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT;
+-	else
+-	    if (NCR53c7x0_read8 (SSTAT1_REG) & SSTAT1_ILF)
+-		++count;
+-    } else {
+-    /* Send */
+-	sstat = NCR53c7x0_read8 (SSTAT1_REG);
+-	if (sstat & SSTAT1_OLF)
+-	    ++count;
+-	if (synchronous && (sstat & SSTAT1_ORF))
+-	    ++count;
+-    }
+-    return count;
+-}
+-
+-/* 
+- * Function : static const char * sbcl_to_phase (int sbcl)_
+- *
+- * Purpose : Convert SBCL register to user-parsable phase representation
+- *
+- * Inputs : sbcl - value of sbcl register
+- */
+-
+-
+-static const char *
+-sbcl_to_phase (int sbcl) {
+-    switch (sbcl & SBCL_PHASE_MASK) {
+-    case SBCL_PHASE_DATAIN:
+-	return "DATAIN";
+-    case SBCL_PHASE_DATAOUT:
+-	return "DATAOUT";
+-    case SBCL_PHASE_MSGIN:
+-	return "MSGIN";
+-    case SBCL_PHASE_MSGOUT:
+-	return "MSGOUT";
+-    case SBCL_PHASE_CMDOUT:
+-	return "CMDOUT";
+-    case SBCL_PHASE_STATIN:
+-	return "STATUSIN";
+-    default:
+-	return "unknown";
+-    }
+-}
+-
+-/* 
+- * Function : static const char * sstat2_to_phase (int sstat)_
+- *
+- * Purpose : Convert SSTAT2 register to user-parsable phase representation
+- *
+- * Inputs : sstat - value of sstat register
+- */
+-
+-
+-static const char *
+-sstat2_to_phase (int sstat) {
+-    switch (sstat & SSTAT2_PHASE_MASK) {
+-    case SSTAT2_PHASE_DATAIN:
+-	return "DATAIN";
+-    case SSTAT2_PHASE_DATAOUT:
+-	return "DATAOUT";
+-    case SSTAT2_PHASE_MSGIN:
+-	return "MSGIN";
+-    case SSTAT2_PHASE_MSGOUT:
+-	return "MSGOUT";
+-    case SSTAT2_PHASE_CMDOUT:
+-	return "CMDOUT";
+-    case SSTAT2_PHASE_STATIN:
+-	return "STATUSIN";
+-    default:
+-	return "unknown";
+-    }
+-}
+-
+-/* 
+- * Function : static void intr_phase_mismatch (struct Scsi_Host *host, 
+- *	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : Handle phase mismatch interrupts
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * 	may be NULL.
+- *
+- * Side effects : The abort_connected() routine is called or the NCR chip 
+- *	is restarted, jumping to the command_complete entry point, or 
+- *	patching the address and transfer count of the current instruction 
+- *	and calling the msg_in entry point as appropriate.
+- */
+-
+-static void 
+-intr_phase_mismatch (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    u32 dbc_dcmd, *dsp, *dsp_next;
+-    unsigned char dcmd, sbcl;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-    	host->hostdata[0];
+-    int residual;
+-    enum {ACTION_ABORT, ACTION_ABORT_PRINT, ACTION_CONTINUE} action = 
+-	ACTION_ABORT_PRINT;
+-    const char *where = NULL;
+-
+-    NCR53c7x0_local_setup(host);
+-
+-    /*
+-     * Corrective action is based on where in the SCSI SCRIPT(tm) the error 
+-     * occurred, as well as which SCSI phase we are currently in.
+-     */
+-    dsp_next = bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-
+-    /* 
+-     * Fetch the current instruction, and remove the operands for easier 
+-     * interpretation.
+-     */
+-    dbc_dcmd = NCR53c7x0_read32(DBC_REG);
+-    dcmd = (dbc_dcmd & 0xff000000) >> 24;
+-    /*
+-     * Like other processors, the NCR adjusts the instruction pointer before
+-     * instruction decode.  Set the DSP address back to what it should
+-     * be for this instruction based on its size (2 or 3 32 bit words).
+-     */
+-    dsp = dsp_next - NCR53c7x0_insn_size(dcmd);
+-
+-
+-    /*
+-     * Read new SCSI phase from the SBCL lines.  Since all of our code uses 
+-     * a WHEN conditional instead of an IF conditional, we don't need to 
+-     * wait for a new REQ.
+-     */
+-    sbcl = NCR53c7x0_read8(SBCL_REG) & SBCL_PHASE_MASK;
+-
+-    if (!cmd) {
+-	action = ACTION_ABORT_PRINT;
+-	where = "no current command";
+-    /*
+-     * The way my SCSI SCRIPTS(tm) are architected, recoverable phase
+-     * mismatches should only occur where we're doing a multi-byte  
+-     * BMI instruction.  Specifically, this means 
+-     *
+-     *  - select messages (a SCSI-I target may ignore additional messages
+-     * 		after the IDENTIFY; any target may reject a SDTR or WDTR)
+-     *
+-     *  - command out (targets may send a message to signal an error 
+-     * 		condition, or go into STATUSIN after they've decided 
+-     *		they don't like the command.
+-     *
+-     *	- reply_message (targets may reject a multi-byte message in the 
+-     *		middle)
+-     *
+-     * 	- data transfer routines (command completion with buffer space
+-     *		left, disconnect message, or error message)
+-     */
+-    } else if (((dsp >= cmd->data_transfer_start && 
+-	dsp < cmd->data_transfer_end)) || dsp == (cmd->residual + 2)) {
+-	if ((dcmd & (DCMD_TYPE_MASK|DCMD_BMI_OP_MASK|DCMD_BMI_INDIRECT|
+-		DCMD_BMI_MSG|DCMD_BMI_CD)) == (DCMD_TYPE_BMI|
+-		DCMD_BMI_OP_MOVE_I)) {
+-	    residual = datapath_residual (host);
+-	    if (hostdata->options & OPTION_DEBUG_DISCONNECT)
+-	    	printk ("scsi%d : handling residual transfer (+ %d bytes from DMA FIFO)\n", 
+-		    host->host_no, residual);
+-
+-	    /*
+-	     * The first instruction is a CALL to the alternate handler for 
+-	     * this data transfer phase, so we can do calls to 
+-	     * munge_msg_restart as we would if control were passed 
+-	     * from normal dynamic code.
+-	     */
+-	    if (dsp != cmd->residual + 2) {
+-		cmd->residual[0] = ((DCMD_TYPE_TCI | DCMD_TCI_OP_CALL |
+-			((dcmd & DCMD_BMI_IO) ? DCMD_TCI_IO : 0)) << 24) | 
+-		    DBC_TCI_WAIT_FOR_VALID | DBC_TCI_COMPARE_PHASE;
+-		cmd->residual[1] = virt_to_bus(hostdata->script)
+-		    + ((dcmd & DCMD_BMI_IO)
+-		       ? hostdata->E_other_in : hostdata->E_other_out);
+-	    }
+-
+-	    /*
+-	     * The second instruction is the a data transfer block
+-	     * move instruction, reflecting the pointer and count at the 
+-	     * time of the phase mismatch.
+-	     */
+-	    cmd->residual[2] = dbc_dcmd + residual;
+-	    cmd->residual[3] = NCR53c7x0_read32(DNAD_REG) - residual;
+-
+-	    /*
+-	     * The third and final instruction is a jump to the instruction
+-	     * which follows the instruction which had to be 'split'
+-	     */
+-	    if (dsp != cmd->residual + 2) {
+-		cmd->residual[4] = ((DCMD_TYPE_TCI|DCMD_TCI_OP_JUMP) 
+-		    << 24) | DBC_TCI_TRUE;
+-		cmd->residual[5] = virt_to_bus(dsp_next);
+-	    }
+-
+-	    /*
+-	     * For the sake of simplicity, transfer control to the 
+-	     * conditional CALL at the start of the residual buffer.
+-	     */
+-	    hostdata->dsp = cmd->residual;
+-	    hostdata->dsp_changed = 1;
+-	    action = ACTION_CONTINUE;
+-	} else {
+-	    where = "non-BMI dynamic DSA code";
+-	    action = ACTION_ABORT_PRINT;
+-	}
+-    } else if (dsp == (hostdata->script + hostdata->E_select_msgout / 4 + 2)) {
+-	/* RGH 290697:  Added +2 above, to compensate for the script
+-	 * instruction which disables the selection timer. */
+-	/* Release ATN */
+-	NCR53c7x0_write8 (SOCL_REG, 0);
+-	switch (sbcl) {
+-    /* 
+-     * Some devices (SQ555 come to mind) grab the IDENTIFY message
+-     * sent on selection, and decide to go into COMMAND OUT phase
+-     * rather than accepting the rest of the messages or rejecting
+-     * them.  Handle these devices gracefully.
+-     */
+-	case SBCL_PHASE_CMDOUT:
+-	    hostdata->dsp = dsp + 2 /* two _words_ */;
+-	    hostdata->dsp_changed = 1;
+-	    printk ("scsi%d : target %d ignored SDTR and went into COMMAND OUT\n", 
+-		host->host_no, cmd->cmd->device->id);
+-	    cmd->flags &= ~CMD_FLAG_SDTR;
+-	    action = ACTION_CONTINUE;
+-	    break;
+-	case SBCL_PHASE_MSGIN:
+-	    hostdata->dsp = hostdata->script + hostdata->E_msg_in / 
+-		sizeof(u32);
+-	    hostdata->dsp_changed = 1;
+-	    action = ACTION_CONTINUE;
+-	    break;
+-	default:
+-	    where="select message out";
+-	    action = ACTION_ABORT_PRINT;
+-	}
+-    /*
+-     * Some SCSI devices will interpret a command as they read the bytes
+-     * off the SCSI bus, and may decide that the command is Bogus before 
+-     * they've read the entire command off the bus.
+-     */
+-    } else if (dsp == hostdata->script + hostdata->E_cmdout_cmdout / sizeof 
+-	(u32)) {
+-	hostdata->dsp = hostdata->script + hostdata->E_data_transfer /
+-	    sizeof (u32);
+-	hostdata->dsp_changed = 1;
+-	action = ACTION_CONTINUE;
+-    /* FIXME : we need to handle message reject, etc. within msg_respond. */
+-#ifdef notyet
+-    } else if (dsp == hostdata->script + hostdata->E_reply_message) {
+-	switch (sbcl) {
+-    /* Any other phase mismatches abort the currently executing command.  */
+-#endif
+-    } else {
+-	where = "unknown location";
+-	action = ACTION_ABORT_PRINT;
+-    }
+-
+-    /* Flush DMA FIFO */
+-    if (!hostdata->dstat_valid) {
+-	hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+-	hostdata->dstat_valid = 1;
+-    }
+-    if (!(hostdata->dstat & DSTAT_DFE)) {
+-      /* Really need to check this out for 710 RGH */
+-      NCR53c7x0_write8 (CTEST8_REG, CTEST8_10_CLF);
+-      while (NCR53c7x0_read8 (CTEST8_REG) & CTEST8_10_CLF);
+-      hostdata->dstat |= DSTAT_DFE;
+-    }
+-
+-    switch (action) {
+-    case ACTION_ABORT_PRINT:
+-	printk("scsi%d : %s : unexpected phase %s.\n",
+-	     host->host_no, where ? where : "unknown location", 
+-	     sbcl_to_phase(sbcl));
+-	print_lots (host);
+-    /* Fall through to ACTION_ABORT */
+-    case ACTION_ABORT:
+-	abort_connected (host);
+-	break;
+-    case ACTION_CONTINUE:
+-	break;
+-    }
+-
+-#if 0
+-    if (hostdata->dsp_changed) {
+-	printk("scsi%d: new dsp 0x%p\n", host->host_no, hostdata->dsp);
+-	print_insn (host, hostdata->dsp, "", 1);
+-    }
+-#endif
+-}
+-
+-/*
+- * Function : static void intr_bf (struct Scsi_Host *host, 
+- * 	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle BUS FAULT interrupts 
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * 	may be NULL.
+- */
+-
+-static void
+-intr_bf (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    u32 *dsp,
+-	*next_dsp,		/* Current dsp */
+-    	*dsa,
+-	dbc_dcmd;		/* DCMD (high eight bits) + DBC */
+-    char *reason = NULL;
+-    /* Default behavior is for a silent error, with a retry until we've
+-       exhausted retries. */
+-    enum {MAYBE, ALWAYS, NEVER} retry = MAYBE;
+-    int report = 0;
+-    NCR53c7x0_local_setup(host);
+-
+-    dbc_dcmd = NCR53c7x0_read32 (DBC_REG);
+-    next_dsp = bus_to_virt (NCR53c7x0_read32(DSP_REG));
+-    dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff);
+-/* FIXME - check chip type  */
+-    dsa = bus_to_virt (NCR53c7x0_read32(DSA_REG));
+-
+-    /*
+-     * Bus faults can be caused by either a Bad Address or 
+-     * Target Abort. We should check the Received Target Abort
+-     * bit of the PCI status register and Master Abort Bit.
+-     *
+-     * 	- Master Abort bit indicates that no device claimed
+-     *		the address with DEVSEL within five clocks
+-     *
+-     *	- Target Abort bit indicates that a target claimed it,
+-     *		but changed its mind once it saw the byte enables.
+-     *
+-     */
+-
+-    /* 53c710, not PCI system */
+-    report = 1;
+-    reason = "Unknown";
+-
+-#ifndef notyet
+-    report = 1;
+-#endif
+-    if (report && reason)
+-    {
+-	printk(KERN_ALERT "scsi%d : BUS FAULT reason = %s\n",
+-	     host->host_no, reason ? reason : "unknown");
+-	print_lots (host);
+-    }
+-
+-#ifndef notyet
+-    retry = NEVER;
+-#endif
+-
+-    /* 
+-     * TODO : we should attempt to recover from any spurious bus 
+-     * faults.  After X retries, we should figure that things are 
+-     * sufficiently wedged, and call NCR53c7xx_reset.
+-     *
+-     * This code should only get executed once we've decided that we 
+-     * cannot retry.
+-     */
+-
+-    if (retry == NEVER) {
+-    	printk(KERN_ALERT "          mail richard@sleepie.demon.co.uk\n");
+-    	FATAL (host);
+-    }
+-}
+-
+-/*
+- * Function : static void intr_dma (struct Scsi_Host *host, 
+- * 	struct NCR53c7x0_cmd *cmd)
+- *
+- * Purpose : handle all DMA interrupts, indicated by the setting 
+- * 	of the DIP bit in the ISTAT register.
+- *
+- * Inputs : host, cmd - host and NCR command causing the interrupt, cmd
+- * 	may be NULL.
+- */
+-
+-static void 
+-intr_dma (struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    unsigned char dstat;	/* DSTAT */	
+-    u32 *dsp,
+-	*next_dsp,		/* Current dsp */
+-    	*dsa,
+-	dbc_dcmd;		/* DCMD (high eight bits) + DBC */
+-    int tmp;
+-    unsigned long flags;
+-    NCR53c7x0_local_setup(host);
+-
+-    if (!hostdata->dstat_valid) {
+-	hostdata->dstat = NCR53c7x0_read8(DSTAT_REG);
+-	hostdata->dstat_valid = 1;
+-    }
+-    
+-    dstat = hostdata->dstat;
+-    
+-    if (hostdata->options & OPTION_DEBUG_INTR)
+-	printk("scsi%d : DSTAT=0x%x\n", host->host_no, (int) dstat);
+-
+-    dbc_dcmd = NCR53c7x0_read32 (DBC_REG);
+-    next_dsp = bus_to_virt(NCR53c7x0_read32(DSP_REG));
+-    dsp = next_dsp - NCR53c7x0_insn_size ((dbc_dcmd >> 24) & 0xff);
+-/* XXX - check chip type */
+-    dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+-
+-    /*
+-     * DSTAT_ABRT is the aborted interrupt.  This is set whenever the 
+-     * SCSI chip is aborted.  
+-     * 
+-     * With NCR53c700 and NCR53c700-66 style chips, we should only 
+-     * get this when the chip is currently running the accept 
+-     * reselect/select code and we have set the abort bit in the 
+-     * ISTAT register.
+-     *
+-     */
+-    
+-    if (dstat & DSTAT_ABRT) {
+-#if 0
+-	/* XXX - add code here to deal with normal abort */
+-	if ((hostdata->options & OPTION_700) && (hostdata->state ==
+-	    STATE_ABORTING)) {
+-	} else 
+-#endif
+-	{
+-	    printk(KERN_ALERT "scsi%d : unexpected abort interrupt at\n" 
+-		   "         ", host->host_no);
+-	    print_insn (host, dsp, KERN_ALERT "s ", 1);
+-	    FATAL (host);
+-	}
+-    }
+-
+-    /*
+-     * DSTAT_SSI is the single step interrupt.  Should be generated 
+-     * whenever we have single stepped or are tracing.
+-     */
+-
+-    if (dstat & DSTAT_SSI) {
+-	if (hostdata->options & OPTION_DEBUG_TRACE) {
+-	    /* Don't print instr. until we write DSP at end of intr function */
+-	} else if (hostdata->options & OPTION_DEBUG_SINGLE) {
+-	    print_insn (host, dsp, "s ", 0);
+-	    local_irq_save(flags);
+-/* XXX - should we do this, or can we get away with writing dsp? */
+-
+-	    NCR53c7x0_write8 (DCNTL_REG, (NCR53c7x0_read8(DCNTL_REG) & 
+-    	    	~DCNTL_SSM) | DCNTL_STD);
+-	    local_irq_restore(flags);
+-	} else {
+-	    printk(KERN_ALERT "scsi%d : unexpected single step interrupt at\n"
+-		   "         ", host->host_no);
+-	    print_insn (host, dsp, KERN_ALERT "", 1);
+-	    printk(KERN_ALERT "         mail drew@PoohSticks.ORG\n");
+-    	    FATAL (host);
+-    	}
+-    }
+-
+-    /*
+-     * DSTAT_IID / DSTAT_OPC (same bit, same meaning, only the name 
+-     * is different) is generated whenever an illegal instruction is 
+-     * encountered.  
+-     * 
+-     * XXX - we may want to emulate INTFLY here, so we can use 
+-     *    the same SCSI SCRIPT (tm) for NCR53c710 through NCR53c810  
+-     *	  chips.
+-     */
+-
+-    if (dstat & DSTAT_OPC) {
+-    /* 
+-     * Ascertain if this IID interrupts occurred before or after a STO 
+-     * interrupt.  Since the interrupt handling code now leaves 
+-     * DSP unmodified until _after_ all stacked interrupts have been
+-     * processed, reading the DSP returns the original DSP register.
+-     * This means that if dsp lies between the select code, and 
+-     * message out following the selection code (where the IID interrupt
+-     * would have to have occurred by due to the implicit wait for REQ),
+-     * we have an IID interrupt resulting from a STO condition and 
+-     * can ignore it.
+-     */
+-
+-	if (((dsp >= (hostdata->script + hostdata->E_select / sizeof(u32))) &&
+-	    (dsp <= (hostdata->script + hostdata->E_select_msgout / 
+-    	    sizeof(u32) + 8))) || (hostdata->test_running == 2)) {
+-	    if (hostdata->options & OPTION_DEBUG_INTR) 
+-		printk ("scsi%d : ignoring DSTAT_IID for SSTAT_STO\n",
+-		    host->host_no);
+-	    if (hostdata->expecting_iid) {
+-		hostdata->expecting_iid = 0;
+-		hostdata->idle = 1;
+-		if (hostdata->test_running == 2) {
+-		    hostdata->test_running = 0;
+-		    hostdata->test_completed = 3;
+-		} else if (cmd) 
+-			abnormal_finished (cmd, DID_BAD_TARGET << 16);
+-	    } else {
+-		hostdata->expecting_sto = 1;
+-	    }
+-    /*
+-     * We can't guarantee we'll be able to execute the WAIT DISCONNECT
+-     * instruction within the 3.4us of bus free and arbitration delay
+-     * that a target can RESELECT in and assert REQ after we've dropped
+-     * ACK.  If this happens, we'll get an illegal instruction interrupt.
+-     * Doing away with the WAIT DISCONNECT instructions broke everything,
+-     * so instead I'll settle for moving one WAIT DISCONNECT a few 
+-     * instructions closer to the CLEAR ACK before it to minimize the
+-     * chances of this happening, and handle it if it occurs anyway.
+-     *
+-     * Simply continue with what we were doing, and control should
+-     * be transferred to the schedule routine which will ultimately
+-     * pass control onto the reselection or selection (not yet)
+-     * code.
+-     */
+-	} else if (dbc_dcmd == 0x48000000 && (NCR53c7x0_read8 (SBCL_REG) &
+-	    SBCL_REQ)) {
+-	    if (!(hostdata->options & OPTION_NO_PRINT_RACE))
+-	    {
+-		printk("scsi%d: REQ before WAIT DISCONNECT IID\n", 
+-		    host->host_no);
+-		hostdata->options |= OPTION_NO_PRINT_RACE;
+-	    }
+-	} else {
+-	    printk(KERN_ALERT "scsi%d : invalid instruction\n", host->host_no);
+-	    print_lots (host);
+-	    printk(KERN_ALERT "         mail Richard@sleepie.demon.co.uk with ALL\n"
+-		              "         boot messages and diagnostic output\n");
+-    	    FATAL (host);
+-	}
+-    }
+-
+-    /* 
+-     * DSTAT_BF are bus fault errors.  DSTAT_800_BF is valid for 710 also.
+-     */
+-    
+-    if (dstat & DSTAT_800_BF) {
+-	intr_bf (host, cmd);
+-    }
+-	
+-
+-    /* 
+-     * DSTAT_SIR interrupts are generated by the execution of 
+-     * the INT instruction.  Since the exact values available 
+-     * are determined entirely by the SCSI script running, 
+-     * and are local to a particular script, a unique handler
+-     * is called for each script.
+-     */
+-
+-    if (dstat & DSTAT_SIR) {
+-	if (hostdata->options & OPTION_DEBUG_INTR)
+-	    printk ("scsi%d : DSTAT_SIR\n", host->host_no);
+-	switch ((tmp = hostdata->dstat_sir_intr (host, cmd))) {
+-	case SPECIFIC_INT_NOTHING:
+-	case SPECIFIC_INT_RESTART:
+-	    break;
+-	case SPECIFIC_INT_ABORT:
+-	    abort_connected(host);
+-	    break;
+-	case SPECIFIC_INT_PANIC:
+-	    printk(KERN_ALERT "scsi%d : failure at ", host->host_no);
+-	    print_insn (host, dsp, KERN_ALERT "", 1);
+-	    printk(KERN_ALERT "          dstat_sir_intr() returned SPECIFIC_INT_PANIC\n");
+-    	    FATAL (host);
+-	    break;
+-	case SPECIFIC_INT_BREAK:
+-	    intr_break (host, cmd);
+-	    break;
+-	default:
+-	    printk(KERN_ALERT "scsi%d : failure at ", host->host_no);
+-	    print_insn (host, dsp, KERN_ALERT "", 1);
+-	    printk(KERN_ALERT"          dstat_sir_intr() returned unknown value %d\n", 
+-		tmp);
+-    	    FATAL (host);
+-	}
+-    } 
+-}
+-
+-/*
+- * Function : static int print_insn (struct Scsi_Host *host, 
+- * 	u32 *insn, int kernel)
+- *
+- * Purpose : print numeric representation of the instruction pointed
+- * 	to by insn to the debugging or kernel message buffer
+- *	as appropriate.  
+- *
+- * 	If desired, a user level program can interpret this 
+- * 	information.
+- *
+- * Inputs : host, insn - host, pointer to instruction, prefix - 
+- *	string to prepend, kernel - use printk instead of debugging buffer.
+- *
+- * Returns : size, in u32s, of instruction printed.
+- */
+-
+-/*
+- * FIXME: should change kernel parameter so that it takes an ENUM
+- * 	specifying severity - either KERN_ALERT or KERN_PANIC so
+- *	all panic messages are output with the same severity.
+- */
+-
+-static int 
+-print_insn (struct Scsi_Host *host, const u32 *insn, 
+-    const char *prefix, int kernel) {
+-    char buf[160], 		/* Temporary buffer and pointer.  ICKY 
+-				   arbitrary length.  */
+-
+-		
+-	*tmp;			
+-    unsigned char dcmd;		/* dcmd register for *insn */
+-    int size;
+-
+-    /* 
+-     * Check to see if the instruction pointer is not bogus before 
+-     * indirecting through it; avoiding red-zone at start of 
+-     * memory.
+-     *
+-     * FIXME: icky magic needs to happen here on non-intel boxes which
+-     * don't have kernel memory mapped in like this.  Might be reasonable
+-     * to use vverify()?
+-     */
+-
+-    if (virt_to_phys((void *)insn) < PAGE_SIZE || 
+-	virt_to_phys((void *)(insn + 8)) > virt_to_phys(high_memory) ||
+-	((((dcmd = (insn[0] >> 24) & 0xff) & DCMD_TYPE_MMI) == DCMD_TYPE_MMI) &&
+-	virt_to_phys((void *)(insn + 12)) > virt_to_phys(high_memory))) {
+-	size = 0;
+-	sprintf (buf, "%s%p: address out of range\n",
+-	    prefix, insn);
+-    } else {
+-/* 
+- * FIXME : (void *) cast in virt_to_bus should be unnecessary, because
+- * 	it should take const void * as argument.
+- */
+-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000)
+-	sprintf(buf, "%s0x%lx (virt 0x%p) : 0x%08x 0x%08x (virt 0x%p)", 
+-	    (prefix ? prefix : ""), virt_to_bus((void *) insn), insn,  
+-	    insn[0], insn[1], bus_to_virt (insn[1]));
+-#else
+-	/* Remove virtual addresses to reduce output, as they are the same */
+-	sprintf(buf, "%s0x%x (+%x) : 0x%08x 0x%08x", 
+-	    (prefix ? prefix : ""), (u32)insn, ((u32)insn -
+-		(u32)&(((struct NCR53c7x0_hostdata *)host->hostdata[0])->script))/4, 
+-	    insn[0], insn[1]);
+-#endif
+-	tmp = buf + strlen(buf);
+-	if ((dcmd & DCMD_TYPE_MASK) == DCMD_TYPE_MMI)  {
+-#if !defined(CONFIG_MVME16x) && !defined(CONFIG_BVME6000)
+-	    sprintf (tmp, " 0x%08x (virt 0x%p)\n", insn[2], 
+-		bus_to_virt(insn[2]));
+-#else
+-	    /* Remove virtual addr to reduce output, as it is the same */
+-	    sprintf (tmp, " 0x%08x\n", insn[2]);
+-#endif
+-	    size = 3;
+-	} else {
+-	    sprintf (tmp, "\n");
+-	    size = 2;
+-	}
+-    }
+-
+-    if (kernel) 
+-	printk ("%s", buf);
+-#ifdef NCR_DEBUG
+-    else {
+-	size_t len = strlen(buf);
+-	debugger_kernel_write(host, buf, len);
+-    }
+-#endif
+-    return size;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_abort (Scsi_Cmnd *cmd)
+- * 
+- * Purpose : Abort an errant SCSI command, doing all necessary
+- *	cleanup of the issue_queue, running_list, shared Linux/NCR
+- *	dsa issue and reconnect queues.
+- *
+- * Inputs : cmd - command to abort, code - entire result field
+- *
+- * Returns : 0 on success, -1 on failure.
+- */
+-
+-int 
+-NCR53c7xx_abort (Scsi_Cmnd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct Scsi_Host *host = cmd->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = host ? (struct NCR53c7x0_hostdata *) 
+-	host->hostdata[0] : NULL;
+-    unsigned long flags;
+-    struct NCR53c7x0_cmd *curr, **prev;
+-    Scsi_Cmnd *me, **last;
+-#if 0
+-    static long cache_pid = -1;
+-#endif
+-
+-
+-    if (!host) {
+-	printk ("Bogus SCSI command pid %ld; no host structure\n",
+-	    cmd->pid);
+-	return SCSI_ABORT_ERROR;
+-    } else if (!hostdata) {
+-	printk ("Bogus SCSI host %d; no hostdata\n", host->host_no);
+-	return SCSI_ABORT_ERROR;
+-    }
+-    NCR53c7x0_local_setup(host);
+-
+-/*
+- * CHECK : I don't think that reading ISTAT will unstack any interrupts,
+- *	since we need to write the INTF bit to clear it, and SCSI/DMA
+- * 	interrupts don't clear until we read SSTAT/SIST and DSTAT registers.
+- *	
+- *	See that this is the case.  Appears to be correct on the 710, at least.
+- *
+- * I suspect that several of our failures may be coming from a new fatal
+- * interrupt (possibly due to a phase mismatch) happening after we've left
+- * the interrupt handler, but before the PIC has had the interrupt condition
+- * cleared.
+- */
+-
+-    if (NCR53c7x0_read8(hostdata->istat) & (ISTAT_DIP|ISTAT_SIP)) {
+-	printk ("scsi%d : dropped interrupt for command %ld\n", host->host_no,
+-	    cmd->pid);
+-	NCR53c7x0_intr (host->irq, NULL, NULL);
+-	return SCSI_ABORT_BUSY;
+-    }
+-	
+-    local_irq_save(flags);
+-#if 0
+-    if (cache_pid == cmd->pid) 
+-	panic ("scsi%d : bloody fetus %d\n", host->host_no, cmd->pid);
+-    else
+-	cache_pid = cmd->pid;
+-#endif
+-	
+-
+-/*
+- * The command could be hiding in the issue_queue.  This would be very
+- * nice, as commands can't be moved from the high level driver's issue queue 
+- * into the shared queue until an interrupt routine is serviced, and this
+- * moving is atomic.  
+- *
+- * If this is the case, we don't have to worry about anything - we simply
+- * pull the command out of the old queue, and call it aborted.
+- */
+-
+-    for (me = (Scsi_Cmnd *) hostdata->issue_queue, 
+-         last = (Scsi_Cmnd **) &(hostdata->issue_queue);
+-	 me && me != cmd;  last = (Scsi_Cmnd **)&(me->SCp.ptr), 
+-	 me = (Scsi_Cmnd *)me->SCp.ptr);
+-
+-    if (me) {
+-	*last = (Scsi_Cmnd *) me->SCp.ptr;
+-	if (me->host_scribble) {
+-	    ((struct NCR53c7x0_cmd *)me->host_scribble)->next = hostdata->free;
+-	    hostdata->free = (struct NCR53c7x0_cmd *) me->host_scribble;
+-	    me->host_scribble = NULL;
+-	}
+-	cmd->result = DID_ABORT << 16;
+-	cmd->scsi_done(cmd);
+-	printk ("scsi%d : found command %ld in Linux issue queue\n", 
+-	    host->host_no, me->pid);
+-	local_irq_restore(flags);
+-    	run_process_issue_queue();
+-	return SCSI_ABORT_SUCCESS;
+-    }
+-
+-/* 
+- * That failing, the command could be in our list of already executing 
+- * commands.  If this is the case, drastic measures are called for.  
+- */ 
+-
+-    for (curr = (struct NCR53c7x0_cmd *) hostdata->running_list, 
+-    	 prev = (struct NCR53c7x0_cmd **) &(hostdata->running_list);
+-	 curr && curr->cmd != cmd; prev = (struct NCR53c7x0_cmd **) 
+-         &(curr->next), curr = (struct NCR53c7x0_cmd *) curr->next);
+-
+-    if (curr) {
+-	if ((curr->result & 0xff) != 0xff && (curr->result & 0xff00) != 0xff00) {
+-            cmd->result = curr->result;
+-	    if (prev)
+-		*prev = (struct NCR53c7x0_cmd *) curr->next;
+-	    curr->next = (struct NCR53c7x0_cmd *) hostdata->free;
+-	    cmd->host_scribble = NULL;
+-	    hostdata->free = curr;
+-	    cmd->scsi_done(cmd);
+-	printk ("scsi%d : found finished command %ld in running list\n", 
+-	    host->host_no, cmd->pid);
+-	    local_irq_restore(flags);
+-	    return SCSI_ABORT_NOT_RUNNING;
+-	} else {
+-	    printk ("scsi%d : DANGER : command running, can not abort.\n",
+-		cmd->device->host->host_no);
+-	    local_irq_restore(flags);
+-	    return SCSI_ABORT_BUSY;
+-	}
+-    }
+-
+-/* 
+- * And if we couldn't find it in any of our queues, it must have been 
+- * a dropped interrupt.
+- */
+-
+-    curr = (struct NCR53c7x0_cmd *) cmd->host_scribble;
+-    if (curr) {
+-	curr->next = hostdata->free;
+-	hostdata->free = curr;
+-	cmd->host_scribble = NULL;
+-    }
+-
+-    if (curr == NULL || ((curr->result & 0xff00) == 0xff00) ||
+-		((curr->result & 0xff) == 0xff)) {
+-	printk ("scsi%d : did this command ever run?\n", host->host_no);
+-	    cmd->result = DID_ABORT << 16;
+-    } else {
+-	printk ("scsi%d : probably lost INTFLY, normal completion\n", 
+-	    host->host_no);
+-        cmd->result = curr->result;
+-/* 
+- * FIXME : We need to add an additional flag which indicates if a 
+- * command was ever counted as BUSY, so if we end up here we can
+- * decrement the busy count if and only if it is necessary.
+- */
+-        --hostdata->busy[cmd->device->id][cmd->device->lun];
+-    }
+-    local_irq_restore(flags);
+-    cmd->scsi_done(cmd);
+-
+-/* 
+- * We need to run process_issue_queue since termination of this command 
+- * may allow another queued command to execute first? 
+- */
+-    return SCSI_ABORT_NOT_RUNNING;
+-}
+-
+-/*
+- * Function : int NCR53c7xx_reset (Scsi_Cmnd *cmd) 
+- * 
+- * Purpose : perform a hard reset of the SCSI bus and NCR
+- * 	chip.
+- *
+- * Inputs : cmd - command which caused the SCSI RESET
+- *
+- * Returns : 0 on success.
+- */
+- 
+-int 
+-NCR53c7xx_reset (Scsi_Cmnd *cmd, unsigned int reset_flags) {
+-    NCR53c7x0_local_declare();
+-    unsigned long flags;
+-    int found = 0;
+-    struct NCR53c7x0_cmd * c;
+-    Scsi_Cmnd *tmp;
+-    /*
+-     * When we call scsi_done(), it's going to wake up anything sleeping on the
+-     * resources which were in use by the aborted commands, and we'll start to 
+-     * get new commands.
+-     *
+-     * We can't let this happen until after we've re-initialized the driver
+-     * structures, and can't reinitialize those structures until after we've 
+-     * dealt with their contents.
+-     *
+-     * So, we need to find all of the commands which were running, stick
+-     * them on a linked list of completed commands (we'll use the host_scribble
+-     * pointer), do our reinitialization, and then call the done function for
+-     * each command.  
+-     */
+-    Scsi_Cmnd *nuke_list = NULL;
+-    struct Scsi_Host *host = cmd->device->host;
+-    struct NCR53c7x0_hostdata *hostdata = 
+-    	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-
+-    NCR53c7x0_local_setup(host);
+-    local_irq_save(flags);
+-    ncr_halt (host);
+-    print_lots (host);
+-    dump_events (host, 30);
+-    ncr_scsi_reset (host);
+-    for (tmp = nuke_list = return_outstanding_commands (host, 1 /* free */,
+-	0 /* issue */ ); tmp; tmp = (Scsi_Cmnd *) tmp->SCp.buffer)
+-	if (tmp == cmd) {
+-	    found = 1;
+-	    break;
+-	}
+-	    
+-    /* 
+-     * If we didn't find the command which caused this reset in our running
+-     * list, then we've lost it.  See that it terminates normally anyway.
+-     */
+-    if (!found) {
+-    	c = (struct NCR53c7x0_cmd *) cmd->host_scribble;
+-    	if (c) {
+-	    cmd->host_scribble = NULL;
+-    	    c->next = hostdata->free;
+-    	    hostdata->free = c;
+-    	} else
+-	    printk ("scsi%d: lost command %ld\n", host->host_no, cmd->pid);
+-	cmd->SCp.buffer = (struct scatterlist *) nuke_list;
+-	nuke_list = cmd;
+-    }
+-
+-    NCR53c7x0_driver_init (host);
+-    hostdata->soft_reset (host);
+-    if (hostdata->resets == 0) 
+-	disable(host);
+-    else if (hostdata->resets != -1)
+-	--hostdata->resets;
+-    local_irq_restore(flags);
+-    for (; nuke_list; nuke_list = tmp) {
+-	tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer;
+-    	nuke_list->result = DID_RESET << 16;
+-	nuke_list->scsi_done (nuke_list);
+-    }
+-    local_irq_restore(flags);
+-    return SCSI_RESET_SUCCESS;
+-}
+-
+-/*
+- * The NCR SDMS bios follows Annex A of the SCSI-CAM draft, and 
+- * therefore shares the scsicam_bios_param function.
+- */
+-
+-/*
+- * Function : int insn_to_offset (Scsi_Cmnd *cmd, u32 *insn)
+- *
+- * Purpose : convert instructions stored at NCR pointer into data 
+- *	pointer offset.
+- * 
+- * Inputs : cmd - SCSI command; insn - pointer to instruction.  Either current
+- *	DSP, or saved data pointer.
+- *
+- * Returns : offset on success, -1 on failure.
+- */
+-
+-
+-static int 
+-insn_to_offset (Scsi_Cmnd *cmd, u32 *insn) {
+-    struct NCR53c7x0_hostdata *hostdata = 
+-	(struct NCR53c7x0_hostdata *) cmd->device->host->hostdata[0];
+-    struct NCR53c7x0_cmd *ncmd = 
+-	(struct NCR53c7x0_cmd *) cmd->host_scribble;
+-    int offset = 0, buffers;
+-    struct scatterlist *segment;
+-    char *ptr;
+-    int found = 0;
+-
+-/*
+- * With the current code implementation, if the insn is inside dynamically 
+- * generated code, the data pointer will be the instruction preceding 
+- * the next transfer segment.
+- */
+-
+-    if (!check_address ((unsigned long) ncmd, sizeof (struct NCR53c7x0_cmd)) &&
+-	((insn >= ncmd->data_transfer_start &&  
+-    	    insn < ncmd->data_transfer_end) ||
+-    	(insn >= ncmd->residual &&
+-    	    insn < (ncmd->residual + 
+-    	    	sizeof(ncmd->residual))))) {
+-	    ptr = bus_to_virt(insn[3]);
+-
+-	    if ((buffers = cmd->use_sg)) {
+-    	    	for (offset = 0, 
+-		     	segment = (struct scatterlist *) cmd->request_buffer;
+-    	    	     buffers && !((found = ((ptr >= (char *)page_address(segment->page)+segment->offset) && 
+-    	    	    	    (ptr < ((char *)page_address(segment->page)+segment->offset+segment->length)))));
+-    	    	     --buffers, offset += segment->length, ++segment)
+-#if 0
+-		    printk("scsi%d: comparing 0x%p to 0x%p\n", 
+-			cmd->device->host->host_no, saved, page_address(segment->page+segment->offset));
+-#else
+-		    ;
+-#endif
+-    	    	    offset += ptr - ((char *)page_address(segment->page)+segment->offset);
+-    	    } else {
+-		found = 1;
+-    	    	offset = ptr - (char *) (cmd->request_buffer);
+-    	    }
+-    } else if ((insn >= hostdata->script + 
+-		hostdata->E_data_transfer / sizeof(u32)) &&
+-	       (insn <= hostdata->script +
+-		hostdata->E_end_data_transfer / sizeof(u32))) {
+-    	found = 1;
+-	offset = 0;
+-    }
+-    return found ? offset : -1;
+-}
+-
+-
+-
+-/*
+- * Function : void print_progress (Scsi_Cmnd *cmd) 
+- * 
+- * Purpose : print the current location of the saved data pointer
+- *
+- * Inputs : cmd - command we are interested in
+- *
+- */
+-
+-static void 
+-print_progress (Scsi_Cmnd *cmd) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_cmd *ncmd = 
+-	(struct NCR53c7x0_cmd *) cmd->host_scribble;
+-    int offset, i;
+-    char *where;
+-    u32 *ptr;
+-    NCR53c7x0_local_setup (cmd->device->host);
+-
+-    if (check_address ((unsigned long) ncmd,sizeof (struct NCR53c7x0_cmd)) == 0)
+-    {
+-	printk("\nNCR53c7x0_cmd fields:\n");
+-	printk("  bounce.len=0x%x, addr=0x%0x, buf[]=0x%02x %02x %02x %02x\n",
+-	    ncmd->bounce.len, ncmd->bounce.addr, ncmd->bounce.buf[0],
+-	    ncmd->bounce.buf[1], ncmd->bounce.buf[2], ncmd->bounce.buf[3]);
+-	printk("  result=%04x, cdb[0]=0x%02x\n", ncmd->result, ncmd->cmnd[0]);
+-    }
+-
+-    for (i = 0; i < 2; ++i) {
+-	if (check_address ((unsigned long) ncmd, 
+-	    sizeof (struct NCR53c7x0_cmd)) == -1) 
+-	    continue;
+-	if (!i) {
+-	    where = "saved";
+-	    ptr = bus_to_virt(ncmd->saved_data_pointer);
+-	} else {
+-	    where = "active";
+-	    ptr = bus_to_virt (NCR53c7x0_read32 (DSP_REG) -
+-		NCR53c7x0_insn_size (NCR53c7x0_read8 (DCMD_REG)) *
+-		sizeof(u32));
+-	} 
+-	offset = insn_to_offset (cmd, ptr);
+-
+-	if (offset != -1) 
+-	    printk ("scsi%d : %s data pointer at offset %d\n",
+-		cmd->device->host->host_no, where, offset);
+-	else {
+-	    int size;
+-	    printk ("scsi%d : can't determine %s data pointer offset\n",
+-		cmd->device->host->host_no, where);
+-	    if (ncmd) {
+-		size = print_insn (cmd->device->host,
+-		    bus_to_virt(ncmd->saved_data_pointer), "", 1);
+-		print_insn (cmd->device->host,
+-		    bus_to_virt(ncmd->saved_data_pointer) + size * sizeof(u32),
+-		    "", 1);
+-	    }
+-	}
+-    }
+-}
+-
+-
+-static void 
+-print_dsa (struct Scsi_Host *host, u32 *dsa, const char *prefix) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    int i, len;
+-    char *ptr;
+-    Scsi_Cmnd *cmd;
+-
+-    if (check_address ((unsigned long) dsa, hostdata->dsa_end - 
+-	hostdata->dsa_start) == -1) {
+-	printk("scsi%d : bad dsa virt 0x%p\n", host->host_no, dsa);
+-	return;
+-    }
+-    printk("%sscsi%d : dsa at phys 0x%lx (virt 0x%p)\n"
+-	    "        + %d : dsa_msgout length = %u, data = 0x%x (virt 0x%p)\n" ,
+-    	    prefix ? prefix : "",
+-    	    host->host_no,  virt_to_bus (dsa), dsa, hostdata->dsa_msgout,
+-    	    dsa[hostdata->dsa_msgout / sizeof(u32)],
+-	    dsa[hostdata->dsa_msgout / sizeof(u32) + 1],
+-	    bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]));
+-
+-    /* 
+-     * Only print messages if they're sane in length so we don't
+-     * blow the kernel printk buffer on something which won't buy us
+-     * anything.
+-     */
+-
+-    if (dsa[hostdata->dsa_msgout / sizeof(u32)] < 
+-	    sizeof (hostdata->free->select)) 
+-	for (i = dsa[hostdata->dsa_msgout / sizeof(u32)],
+-	    ptr = bus_to_virt (dsa[hostdata->dsa_msgout / sizeof(u32) + 1]); 
+-	    i > 0 && !check_address ((unsigned long) ptr, 1);
+-	    ptr += len, i -= len) {
+-	    printk("               ");
+-	    len = spi_print_msg(ptr);
+-	    printk("\n");
+-	    if (!len)
+-		break;
+-	}
+-
+-    printk("        + %d : select_indirect = 0x%x\n",
+-	hostdata->dsa_select, dsa[hostdata->dsa_select / sizeof(u32)]);
+-    cmd = (Scsi_Cmnd *) bus_to_virt(dsa[hostdata->dsa_cmnd / sizeof(u32)]);
+-    printk("        + %d : dsa_cmnd = 0x%x ", hostdata->dsa_cmnd,
+-	   (u32) virt_to_bus(cmd));
+-    /* XXX Maybe we should access cmd->host_scribble->result here. RGH */
+-    if (cmd) {
+-	printk("               result = 0x%x, target = %d, lun = %d, cmd = ",
+-	    cmd->result, cmd->device->id, cmd->device->lun);
+-	__scsi_print_command(cmd->cmnd);
+-    } else
+-	printk("\n");
+-    printk("        + %d : dsa_next = 0x%x\n", hostdata->dsa_next,
+-	dsa[hostdata->dsa_next / sizeof(u32)]);
+-    if (cmd) { 
+-	printk("scsi%d target %d : sxfer_sanity = 0x%x, scntl3_sanity = 0x%x\n"
+-	       "                   script : ",
+-	    host->host_no, cmd->device->id,
+-	    hostdata->sync[cmd->device->id].sxfer_sanity,
+-	    hostdata->sync[cmd->device->id].scntl3_sanity);
+-	for (i = 0; i < (sizeof(hostdata->sync[cmd->device->id].script) / 4); ++i)
+-	    printk ("0x%x ", hostdata->sync[cmd->device->id].script[i]);
+-	printk ("\n");
+-    	print_progress (cmd);
+-    }
+-}
+-/*
+- * Function : void print_queues (Scsi_Host *host) 
+- * 
+- * Purpose : print the contents of the NCR issue and reconnect queues
+- *
+- * Inputs : host - SCSI host we are interested in
+- *
+- */
+-
+-static void 
+-print_queues (struct Scsi_Host *host) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    u32 *dsa, *next_dsa;
+-    volatile u32 *ncrcurrent;
+-    int left;
+-    Scsi_Cmnd *cmd, *next_cmd;
+-    unsigned long flags;
+-
+-    printk ("scsi%d : issue queue\n", host->host_no);
+-
+-    for (left = host->can_queue, cmd = (Scsi_Cmnd *) hostdata->issue_queue; 
+-	    left >= 0 && cmd; 
+-	    cmd = next_cmd) {
+-	next_cmd = (Scsi_Cmnd *) cmd->SCp.ptr;
+-	local_irq_save(flags);
+-	if (cmd->host_scribble) {
+-	    if (check_address ((unsigned long) (cmd->host_scribble), 
+-		sizeof (cmd->host_scribble)) == -1)
+-		printk ("scsi%d: scsi pid %ld bad pointer to NCR53c7x0_cmd\n",
+-		    host->host_no, cmd->pid);
+-	    /* print_dsa does sanity check on address, no need to check */
+-	    else
+-	    	print_dsa (host, ((struct NCR53c7x0_cmd *) cmd->host_scribble)
+-		    -> dsa, "");
+-	} else 
+-	    printk ("scsi%d : scsi pid %ld for target %d lun %d has no NCR53c7x0_cmd\n",
+-		host->host_no, cmd->pid, cmd->device->id, cmd->device->lun);
+-	local_irq_restore(flags);
+-    }
+-
+-    if (left <= 0) {
+-	printk ("scsi%d : loop detected in issue queue\n",
+-	    host->host_no);
+-    }
+-
+-    /*
+-     * Traverse the NCR reconnect and start DSA structures, printing out 
+-     * each element until we hit the end or detect a loop.  Currently,
+-     * the reconnect structure is a linked list; and the start structure
+-     * is an array.  Eventually, the reconnect structure will become a 
+-     * list as well, since this simplifies the code.
+-     */
+-
+-    printk ("scsi%d : schedule dsa array :\n", host->host_no);
+-    for (left = host->can_queue, ncrcurrent = hostdata->schedule;
+-	    left > 0; ncrcurrent += 2, --left)
+-	if (ncrcurrent[0] != hostdata->NOP_insn) 
+-/* FIXME : convert pointer to dsa_begin to pointer to dsa. */
+-	    print_dsa (host, bus_to_virt (ncrcurrent[1] - 
+-		(hostdata->E_dsa_code_begin - 
+-		hostdata->E_dsa_code_template)), "");
+-    printk ("scsi%d : end schedule dsa array\n", host->host_no);
+-    
+-    printk ("scsi%d : reconnect_dsa_head :\n", host->host_no);
+-	    
+-    for (left = host->can_queue, 
+-	dsa = bus_to_virt (hostdata->reconnect_dsa_head);
+-	left >= 0 && dsa; 
+-	dsa = next_dsa) {
+-	local_irq_save(flags);
+-	if (check_address ((unsigned long) dsa, sizeof(dsa)) == -1) {
+-	    printk ("scsi%d: bad DSA pointer 0x%p", host->host_no,
+-		dsa);
+-	    next_dsa = NULL;
+-	}
+-	else 
+-	{
+-	    next_dsa = bus_to_virt(dsa[hostdata->dsa_next / sizeof(u32)]);
+-	    print_dsa (host, dsa, "");
+-	}
+-	local_irq_restore(flags);
+-    }
+-    printk ("scsi%d : end reconnect_dsa_head\n", host->host_no);
+-    if (left < 0)
+-	printk("scsi%d: possible loop in ncr reconnect list\n",
+-	    host->host_no);
+-}
+-
+-static void
+-print_lots (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    struct NCR53c7x0_hostdata *hostdata = 
+-	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    u32 *dsp_next, *dsp, *dsa, dbc_dcmd;
+-    unsigned char dcmd, sbcl;
+-    int i, size;
+-    NCR53c7x0_local_setup(host);
+-
+-    if ((dsp_next = bus_to_virt(NCR53c7x0_read32 (DSP_REG)))) {
+-    	dbc_dcmd = NCR53c7x0_read32(DBC_REG);
+-    	dcmd = (dbc_dcmd & 0xff000000) >> 24;
+-    	dsp = dsp_next - NCR53c7x0_insn_size(dcmd);
+-	dsa = bus_to_virt(NCR53c7x0_read32(DSA_REG));
+-	sbcl = NCR53c7x0_read8 (SBCL_REG);
+-	    
+-	/*
+-	 * For the 53c710, the following will report value 0 for SCNTL3
+-	 * and STEST0 - we don't have these registers.
+-	 */
+-    	printk ("scsi%d : DCMD|DBC=0x%x, DNAD=0x%x (virt 0x%p)\n"
+-		"         DSA=0x%lx (virt 0x%p)\n"
+-	        "         DSPS=0x%x, TEMP=0x%x (virt 0x%p), DMODE=0x%x\n"
+-		"         SXFER=0x%x, SCNTL3=0x%x\n"
+-		"         %s%s%sphase=%s, %d bytes in SCSI FIFO\n"
+-		"         SCRATCH=0x%x, saved2_dsa=0x%0lx\n",
+-	    host->host_no, dbc_dcmd, NCR53c7x0_read32(DNAD_REG),
+-		bus_to_virt(NCR53c7x0_read32(DNAD_REG)),
+-	    virt_to_bus(dsa), dsa,
+-	    NCR53c7x0_read32(DSPS_REG), NCR53c7x0_read32(TEMP_REG), 
+-	    bus_to_virt (NCR53c7x0_read32(TEMP_REG)),
+-	    (int) NCR53c7x0_read8(hostdata->dmode),
+-	    (int) NCR53c7x0_read8(SXFER_REG), 
+-	    ((hostdata->chip / 100) == 8) ?
+-		(int) NCR53c7x0_read8(SCNTL3_REG_800) : 0,
+-	    (sbcl & SBCL_BSY) ? "BSY " : "",
+-	    (sbcl & SBCL_SEL) ? "SEL " : "",
+-	    (sbcl & SBCL_REQ) ? "REQ " : "",
+-	    sstat2_to_phase(NCR53c7x0_read8 (((hostdata->chip / 100) == 8) ?
+-	    	SSTAT1_REG : SSTAT2_REG)),
+-	    (NCR53c7x0_read8 ((hostdata->chip / 100) == 8 ? 
+-		SSTAT1_REG : SSTAT2_REG) & SSTAT2_FF_MASK) >> SSTAT2_FF_SHIFT,
+-	    ((hostdata->chip / 100) == 8) ? NCR53c7x0_read8 (STEST0_REG_800) :
+-		NCR53c7x0_read32(SCRATCHA_REG_800),
+-	    hostdata->saved2_dsa);
+-	printk ("scsi%d : DSP 0x%lx (virt 0x%p) ->\n", host->host_no, 
+-	    virt_to_bus(dsp), dsp);
+-    	for (i = 6; i > 0; --i, dsp += size)
+-	    size = print_insn (host, dsp, "", 1);
+-	if (NCR53c7x0_read8 (SCNTL1_REG) & SCNTL1_CON)  {
+-	    if ((hostdata->chip / 100) == 8)
+-	        printk ("scsi%d : connected (SDID=0x%x, SSID=0x%x)\n",
+-		    host->host_no, NCR53c7x0_read8 (SDID_REG_800),
+-		    NCR53c7x0_read8 (SSID_REG_800));
+-	    else
+-		printk ("scsi%d : connected (SDID=0x%x)\n",
+-		    host->host_no, NCR53c7x0_read8 (SDID_REG_700));
+-	    print_dsa (host, dsa, "");
+-	}
+-
+-#if 1
+-	print_queues (host);
+-#endif
+-    }
+-}
+-
+-/*
+- * Function : static int shutdown (struct Scsi_Host *host)
+- * 
+- * Purpose : does a clean (we hope) shutdown of the NCR SCSI 
+- *	chip.  Use prior to dumping core, unloading the NCR driver,
+- * 
+- * Returns : 0 on success
+- */
+-static int 
+-shutdown (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    unsigned long flags;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    NCR53c7x0_local_setup(host);
+-    local_irq_save(flags);
+-/* Get in a state where we can reset the SCSI bus */
+-    ncr_halt (host);
+-    ncr_scsi_reset (host);
+-    hostdata->soft_reset(host);
+-
+-    disable (host);
+-    local_irq_restore(flags);
+-    return 0;
+-}
+-
+-/*
+- * Function : void ncr_scsi_reset (struct Scsi_Host *host)
+- *
+- * Purpose : reset the SCSI bus.
+- */
+-
+-static void 
+-ncr_scsi_reset (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    unsigned long flags;
+-    NCR53c7x0_local_setup(host);
+-    local_irq_save(flags);
+-    NCR53c7x0_write8(SCNTL1_REG, SCNTL1_RST);
+-    udelay(25);	/* Minimum amount of time to assert RST */
+-    NCR53c7x0_write8(SCNTL1_REG, 0);
+-    local_irq_restore(flags);
+-}
+-
+-/* 
+- * Function : void hard_reset (struct Scsi_Host *host)
+- *
+- */
+-
+-static void 
+-hard_reset (struct Scsi_Host *host) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    unsigned long flags;
+-    local_irq_save(flags);
+-    ncr_scsi_reset(host);
+-    NCR53c7x0_driver_init (host);
+-    if (hostdata->soft_reset)
+-	hostdata->soft_reset (host);
+-    local_irq_restore(flags);
+-}
+-
+-
+-/*
+- * Function : Scsi_Cmnd *return_outstanding_commands (struct Scsi_Host *host,
+- *	int free, int issue)
+- *
+- * Purpose : return a linked list (using the SCp.buffer field as next,
+- *	so we don't perturb hostdata.  We don't use a field of the 
+- *	NCR53c7x0_cmd structure since we may not have allocated one 
+- *	for the command causing the reset.) of Scsi_Cmnd structures that 
+- *  	had propagated below the Linux issue queue level.  If free is set, 
+- *	free the NCR53c7x0_cmd structures which are associated with 
+- *	the Scsi_Cmnd structures, and clean up any internal 
+- *	NCR lists that the commands were on.  If issue is set,
+- *	also return commands in the issue queue.
+- *
+- * Returns : linked list of commands
+- *
+- * NOTE : the caller should insure that the NCR chip is halted
+- *	if the free flag is set. 
+- */
+-
+-static Scsi_Cmnd *
+-return_outstanding_commands (struct Scsi_Host *host, int free, int issue) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    struct NCR53c7x0_cmd *c;
+-    int i;
+-    u32 *ncrcurrent;
+-    Scsi_Cmnd *list = NULL, *tmp;
+-    for (c = (struct NCR53c7x0_cmd *) hostdata->running_list; c; 
+-    	c = (struct NCR53c7x0_cmd *) c->next)  {
+-	if (c->cmd->SCp.buffer) {
+-	    printk ("scsi%d : loop detected in running list!\n", host->host_no);
+-	    break;
+-	} else {
+-	    printk ("Duh? Bad things happening in the NCR driver\n");
+-	    break;
+-	}
+-
+-	c->cmd->SCp.buffer = (struct scatterlist *) list;
+-	list = c->cmd;
+-	if (free) {
+-    	    c->next = hostdata->free;
+-    	    hostdata->free = c;
+-	}
+-    }
+-
+-    if (free) { 
+-	for (i = 0, ncrcurrent = (u32 *) hostdata->schedule; 
+-	    i < host->can_queue; ++i, ncrcurrent += 2) {
+-	    ncrcurrent[0] = hostdata->NOP_insn;
+-	    ncrcurrent[1] = 0xdeadbeef;
+-	}
+-	hostdata->ncrcurrent = NULL;
+-    }
+-
+-    if (issue) {
+-	for (tmp = (Scsi_Cmnd *) hostdata->issue_queue; tmp; tmp = tmp->next) {
+-	    if (tmp->SCp.buffer) {
+-		printk ("scsi%d : loop detected in issue queue!\n", 
+-			host->host_no);
+-		break;
+-	    }
+-	    tmp->SCp.buffer = (struct scatterlist *) list;
+-	    list = tmp;
+-	}
+-	if (free)
+-	    hostdata->issue_queue = NULL;
+-		
+-    }
+-    return list;
+-}
+-
+-/* 
+- * Function : static int disable (struct Scsi_Host *host)
+- *
+- * Purpose : disables the given NCR host, causing all commands
+- * 	to return a driver error.  Call this so we can unload the
+- * 	module during development and try again.  Eventually, 
+- * 	we should be able to find clean workarounds for these
+- * 	problems.
+- *
+- * Inputs : host - hostadapter to twiddle
+- *
+- * Returns : 0 on success.
+- */
+-
+-static int 
+-disable (struct Scsi_Host *host) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    unsigned long flags;
+-    Scsi_Cmnd *nuke_list, *tmp;
+-    local_irq_save(flags);
+-    if (hostdata->state != STATE_HALTED)
+-	ncr_halt (host);
+-    nuke_list = return_outstanding_commands (host, 1 /* free */, 1 /* issue */);
+-    hard_reset (host);
+-    hostdata->state = STATE_DISABLED;
+-    local_irq_restore(flags);
+-    printk ("scsi%d : nuking commands\n", host->host_no);
+-    for (; nuke_list; nuke_list = tmp) {
+-	    tmp = (Scsi_Cmnd *) nuke_list->SCp.buffer;
+-	    nuke_list->result = DID_ERROR << 16;
+-	    nuke_list->scsi_done(nuke_list);
+-    }
+-    printk ("scsi%d : done. \n", host->host_no);
+-    printk (KERN_ALERT "scsi%d : disabled.  Unload and reload\n",
+-    	host->host_no);
+-    return 0;
+-}
+-
+-/*
+- * Function : static int ncr_halt (struct Scsi_Host *host)
+- * 
+- * Purpose : halts the SCSI SCRIPTS(tm) processor on the NCR chip
+- *
+- * Inputs : host - SCSI chip to halt
+- *
+- * Returns : 0 on success
+- */
+-
+-static int 
+-ncr_halt (struct Scsi_Host *host) {
+-    NCR53c7x0_local_declare();
+-    unsigned long flags;
+-    unsigned char istat, tmp;
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    int stage;
+-    NCR53c7x0_local_setup(host);
+-
+-    local_irq_save(flags);
+-    /* Stage 0 : eat all interrupts
+-       Stage 1 : set ABORT
+-       Stage 2 : eat all but abort interrupts
+-       Stage 3 : eat all interrupts
+-     */
+-    for (stage = 0;;) {
+-	if (stage == 1) {
+-	    NCR53c7x0_write8(hostdata->istat, ISTAT_ABRT);
+-	    ++stage;
+-	}
+-	istat = NCR53c7x0_read8 (hostdata->istat);
+-	if (istat & ISTAT_SIP) {
+-	    tmp = NCR53c7x0_read8(SSTAT0_REG);
+-	} else if (istat & ISTAT_DIP) {
+-	    tmp = NCR53c7x0_read8(DSTAT_REG);
+-	    if (stage == 2) {
+-		if (tmp & DSTAT_ABRT) {
+-		    NCR53c7x0_write8(hostdata->istat, 0);
+-		    ++stage;
+-		} else {
+-		    printk(KERN_ALERT "scsi%d : could not halt NCR chip\n", 
+-			host->host_no);
+-		    disable (host);
+-	    	}
+-    	    }
+-	}
+-	if (!(istat & (ISTAT_SIP|ISTAT_DIP))) {
+-	    if (stage == 0)
+-	    	++stage;
+-	    else if (stage == 3)
+-		break;
+-	}
+-    }
+-    hostdata->state = STATE_HALTED;
+-    local_irq_restore(flags);
+-#if 0
+-    print_lots (host);
+-#endif
+-    return 0;
+-}
+-
+-/* 
+- * Function: event_name (int event)
+- * 
+- * Purpose: map event enum into user-readable strings.
+- */
+-
+-static const char *
+-event_name (int event) {
+-    switch (event) {
+-    case EVENT_NONE:		return "none";
+-    case EVENT_ISSUE_QUEUE:	return "to issue queue";
+-    case EVENT_START_QUEUE:	return "to start queue";
+-    case EVENT_SELECT:		return "selected";
+-    case EVENT_DISCONNECT:	return "disconnected";
+-    case EVENT_RESELECT:	return "reselected";
+-    case EVENT_COMPLETE:	return "completed";
+-    case EVENT_IDLE:		return "idle";
+-    case EVENT_SELECT_FAILED:	return "select failed";
+-    case EVENT_BEFORE_SELECT:	return "before select";
+-    case EVENT_RESELECT_FAILED:	return "reselect failed";
+-    default:			return "unknown";
+-    }
+-}
+-
+-/*
+- * Function : void dump_events (struct Scsi_Host *host, count)
+- *
+- * Purpose : print last count events which have occurred.
+- */ 
+-static void
+-dump_events (struct Scsi_Host *host, int count) {
+-    struct NCR53c7x0_hostdata *hostdata = (struct NCR53c7x0_hostdata *)
+-	host->hostdata[0];
+-    struct NCR53c7x0_event event;
+-    int i;
+-    unsigned long flags;
+-    if (hostdata->events) {
+-	if (count > hostdata->event_size)
+-	    count = hostdata->event_size;
+-	for (i = hostdata->event_index; count > 0; 
+-	    i = (i ? i - 1 : hostdata->event_size -1), --count) {
+-/*
+- * By copying the event we're currently examining with interrupts
+- * disabled, we can do multiple printk(), etc. operations and 
+- * still be guaranteed that they're happening on the same 
+- * event structure.
+- */
+-	    local_irq_save(flags);
+-#if 0
+-	    event = hostdata->events[i];
+-#else
+-	    memcpy ((void *) &event, (void *) &(hostdata->events[i]),
+-		sizeof(event));
+-#endif
+-
+-	    local_irq_restore(flags);
+-	    printk ("scsi%d : %s event %d at %ld secs %ld usecs target %d lun %d\n",
+-		host->host_no, event_name (event.event), count,
+-		(long) event.time.tv_sec, (long) event.time.tv_usec,
+-		event.target, event.lun);
+-	    if (event.dsa) 
+-		printk ("         event for dsa 0x%lx (virt 0x%p)\n", 
+-		    virt_to_bus(event.dsa), event.dsa);
+-	    if (event.pid != -1) {
+-		printk ("         event for pid %ld ", event.pid);
+-		__scsi_print_command (event.cmnd);
+-	    }
+-	}
+-    }
+-}
+-
+-/*
+- * Function: check_address
+- *
+- * Purpose: Check to see if a possibly corrupt pointer will fault the 
+- *	kernel.
+- *
+- * Inputs: addr - address; size - size of area
+- *
+- * Returns: 0 if area is OK, -1 on error.
+- *
+- * NOTES: should be implemented in terms of vverify on kernels 
+- *	that have it.
+- */
+-
+-static int 
+-check_address (unsigned long addr, int size) {
+-    return (virt_to_phys((void *)addr) < PAGE_SIZE || virt_to_phys((void *)(addr + size)) > virt_to_phys(high_memory) ?  -1 : 0);
+-}
+-
+-#ifdef MODULE
+-int 
+-NCR53c7x0_release(struct Scsi_Host *host) {
+-    struct NCR53c7x0_hostdata *hostdata = 
+-	(struct NCR53c7x0_hostdata *) host->hostdata[0];
+-    struct NCR53c7x0_cmd *cmd, *tmp;
+-    shutdown (host);
+-    if (host->irq != SCSI_IRQ_NONE)
+-	{
+-	    int irq_count;
+-	    struct Scsi_Host *tmp;
+-	    for (irq_count = 0, tmp = first_host; tmp; tmp = tmp->next)
+-		if (tmp->hostt == the_template && tmp->irq == host->irq)
+-		    ++irq_count;
+-	    if (irq_count == 1)
+-		free_irq(host->irq, NULL);
+-	}
+-    if (host->dma_channel != DMA_NONE)
+-	free_dma(host->dma_channel);
+-    if (host->io_port)
+-	release_region(host->io_port, host->n_io_port);
+-    
+-    for (cmd = (struct NCR53c7x0_cmd *) hostdata->free; cmd; cmd = tmp, 
+-	--hostdata->num_cmds) {
+-	tmp = (struct NCR53c7x0_cmd *) cmd->next;
+-    /* 
+-     * If we're going to loop, try to stop it to get a more accurate
+-     * count of the leaked commands.
+-     */
+-	cmd->next = NULL;
+-	if (cmd->free)
+-	    cmd->free ((void *) cmd->real, cmd->size);
+-    }
+-    if (hostdata->num_cmds)
+-	printk ("scsi%d : leaked %d NCR53c7x0_cmd structures\n",
+-	    host->host_no, hostdata->num_cmds);
+-
+-    vfree(hostdata->events);
+-
+-    /* XXX This assumes default cache mode to be IOMAP_FULL_CACHING, which
+-     * XXX may be invalid (CONFIG_060_WRITETHROUGH)
+-     */
+-    kernel_set_cachemode((void *)hostdata, 8192, IOMAP_FULL_CACHING);
+-    free_pages ((u32)hostdata, 1);
+-    return 1;
+-}
+-#endif /* def MODULE */
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.h linux-2.6.22-try2/drivers/scsi/53c7xx.h
+--- linux-2.6.22-570/drivers/scsi/53c7xx.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c7xx.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1608 +0,0 @@
+-/*
+- * 53c710 driver.  Modified from Drew Eckhardts driver
+- * for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+- *
+- * I have left the code for the 53c8xx family in here, because it didn't
+- * seem worth removing it.  The possibility of IO_MAPPED chips rather
+- * than MEMORY_MAPPED remains, in case someone wants to add support for
+- * 53c710 chips on Intel PCs (some older machines have them on the
+- * motherboard).
+- *
+- * NOTE THERE MAY BE PROBLEMS WITH CASTS IN read8 AND Co.
+- */
+-
+-/*
+- * NCR 53c{7,8}0x0 driver, header file
+- *
+- * Sponsored by
+- *      iX Multiuser Multitasking Magazine
+- *	Hannover, Germany
+- *	hm@ix.de	
+- *
+- * Copyright 1993, 1994, 1995 Drew Eckhardt
+- *      Visionary Computing 
+- *      (Unix and Linux consulting and custom programming)
+- *      drew@PoohSticks.ORG
+- *	+1 (303) 786-7975
+- *
+- * TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+- * 
+- * PRE-ALPHA
+- *
+- * For more information, please consult 
+- *
+- * NCR 53C700/53C700-66
+- * SCSI I/O Processor
+- * Data Manual
+- *
+- * NCR 53C810
+- * PCI-SCSI I/O Processor 
+- * Data Manual
+- *
+- * NCR Microelectronics
+- * 1635 Aeroplaza Drive
+- * Colorado Springs, CO 80916
+- * +1 (719) 578-3400
+- *
+- * Toll free literature number
+- * +1 (800) 334-5454
+- *
+- */
+-
+-#ifndef NCR53c710_H
+-#define NCR53c710_H
+-
+-#ifndef HOSTS_C
+-
+-/* SCSI control 0 rw, default = 0xc0 */ 
+-#define SCNTL0_REG 		0x00	
+-#define SCNTL0_ARB1		0x80	/* 0 0 = simple arbitration */
+-#define SCNTL0_ARB2		0x40	/* 1 1 = full arbitration */
+-#define SCNTL0_STRT		0x20	/* Start Sequence */
+-#define SCNTL0_WATN		0x10	/* Select with ATN */
+-#define SCNTL0_EPC		0x08	/* Enable parity checking */
+-/* Bit 2 is reserved on 800 series chips */
+-#define SCNTL0_EPG_700		0x04	/* Enable parity generation */
+-#define SCNTL0_AAP		0x02	/*  ATN/ on parity error */
+-#define SCNTL0_TRG		0x01	/* Target mode */
+-
+-/* SCSI control 1 rw, default = 0x00 */
+-
+-#define SCNTL1_REG 		0x01	
+-#define SCNTL1_EXC		0x80	/* Extra Clock Cycle of Data setup */
+-#define SCNTL1_ADB		0x40	/*  contents of SODL on bus */
+-#define SCNTL1_ESR_700		0x20	/* Enable SIOP response to selection 
+-					   and reselection */
+-#define SCNTL1_DHP_800		0x20	/* Disable halt on parity error or ATN
+-					   target mode only */
+-#define SCNTL1_CON		0x10	/* Connected */
+-#define SCNTL1_RST		0x08	/* SCSI RST/ */
+-#define SCNTL1_AESP		0x04	/* Force bad parity */
+-#define SCNTL1_SND_700		0x02	/* Start SCSI send */
+-#define SCNTL1_IARB_800		0x02	/* Immediate Arbitration, start
+-					   arbitration immediately after
+-					   busfree is detected */
+-#define SCNTL1_RCV_700		0x01	/* Start SCSI receive */
+-#define SCNTL1_SST_800		0x01	/* Start SCSI transfer */
+-
+-/* SCSI control 2 rw, */
+-
+-#define SCNTL2_REG_800		0x02	
+-#define SCNTL2_800_SDU		0x80	/* SCSI disconnect unexpected */
+-
+-/* SCSI control 3 rw */
+-
+-#define SCNTL3_REG_800 		0x03	
+-#define SCNTL3_800_SCF_SHIFT	4
+-#define SCNTL3_800_SCF_MASK	0x70
+-#define SCNTL3_800_SCF2		0x40	/* Synchronous divisor */
+-#define SCNTL3_800_SCF1		0x20	/* 0x00 = SCLK/3 */
+-#define SCNTL3_800_SCF0		0x10	/* 0x10 = SCLK/1 */
+-					/* 0x20 = SCLK/1.5 
+-					   0x30 = SCLK/2 
+-					   0x40 = SCLK/3 */
+-	    
+-#define SCNTL3_800_CCF_SHIFT	0
+-#define SCNTL3_800_CCF_MASK	0x07
+-#define SCNTL3_800_CCF2		0x04	/* 0x00 50.01 to 66 */
+-#define SCNTL3_800_CCF1		0x02	/* 0x01 16.67 to 25 */
+-#define SCNTL3_800_CCF0		0x01	/* 0x02	25.01 - 37.5 
+-					   0x03	37.51 - 50 
+-					   0x04 50.01 - 66 */
+-
+-/*  
+- * SCSI destination ID rw - the appropriate bit is set for the selected
+- * target ID.  This is written by the SCSI SCRIPTS processor.
+- * default = 0x00
+- */
+-#define SDID_REG_700  		0x02	
+-#define SDID_REG_800		0x06
+-
+-#define GP_REG_800		0x07	/* General purpose IO */
+-#define GP_800_IO1		0x02
+-#define GP_800_IO2		0x01
+-
+-/* SCSI interrupt enable rw, default = 0x00 */
+-#define SIEN_REG_700		0x03	
+-#define SIEN0_REG_800		0x40
+-#define SIEN_MA			0x80	/* Phase mismatch (ini) or ATN (tgt) */
+-#define SIEN_FC			0x40	/* Function complete */
+-#define SIEN_700_STO		0x20	/* Selection or reselection timeout */
+-#define SIEN_800_SEL		0x20	/* Selected */
+-#define SIEN_700_SEL		0x10	/* Selected or reselected */
+-#define SIEN_800_RESEL		0x10	/* Reselected */
+-#define SIEN_SGE		0x08	/* SCSI gross error */
+-#define SIEN_UDC		0x04	/* Unexpected disconnect */
+-#define SIEN_RST		0x02	/* SCSI RST/ received */
+-#define SIEN_PAR		0x01	/* Parity error */
+-
+-/* 
+- * SCSI chip ID rw
+- * NCR53c700 : 
+- * 	When arbitrating, the highest bit is used, when reselection or selection
+- * 	occurs, the chip responds to all IDs for which a bit is set.
+- * 	default = 0x00 
+- * NCR53c810 : 
+- *	Uses bit mapping
+- */
+-#define SCID_REG		0x04	
+-/* Bit 7 is reserved on 800 series chips */
+-#define SCID_800_RRE		0x40	/* Enable response to reselection */
+-#define SCID_800_SRE		0x20	/* Enable response to selection */
+-/* Bits four and three are reserved on 800 series chips */
+-#define SCID_800_ENC_MASK	0x07	/* Encoded SCSI ID */
+-
+-/* SCSI transfer rw, default = 0x00 */
+-#define SXFER_REG		0x05
+-#define SXFER_DHP		0x80	/* Disable halt on parity */
+-
+-#define SXFER_TP2		0x40	/* Transfer period msb */
+-#define SXFER_TP1		0x20
+-#define SXFER_TP0		0x10	/* lsb */
+-#define SXFER_TP_MASK		0x70
+-/* FIXME : SXFER_TP_SHIFT == 5 is right for '8xx chips */
+-#define SXFER_TP_SHIFT		5
+-#define SXFER_TP_4		0x00	/* Divisors */
+-#define SXFER_TP_5		0x10<<1
+-#define SXFER_TP_6		0x20<<1
+-#define SXFER_TP_7		0x30<<1
+-#define SXFER_TP_8		0x40<<1
+-#define SXFER_TP_9		0x50<<1
+-#define SXFER_TP_10		0x60<<1
+-#define SXFER_TP_11		0x70<<1
+-
+-#define SXFER_MO3		0x08	/* Max offset msb */
+-#define SXFER_MO2		0x04
+-#define SXFER_MO1		0x02
+-#define SXFER_MO0		0x01	/* lsb */
+-#define SXFER_MO_MASK		0x0f
+-#define SXFER_MO_SHIFT		0
+-
+-/* 
+- * SCSI output data latch rw
+- * The contents of this register are driven onto the SCSI bus when 
+- * the Assert Data Bus bit of the SCNTL1 register is set and 
+- * the CD, IO, and MSG bits of the SOCL register match the SCSI phase
+- */
+-#define SODL_REG_700		0x06	
+-#define SODL_REG_800		0x54
+-
+-
+-/* 
+- * SCSI output control latch rw, default = 0 
+- * Note that when the chip is being manually programmed as an initiator,
+- * the MSG, CD, and IO bits must be set correctly for the phase the target
+- * is driving the bus in.  Otherwise no data transfer will occur due to 
+- * phase mismatch.
+- */
+-
+-#define SOCL_REG		0x07
+-#define SOCL_REQ		0x80	/*  REQ */
+-#define SOCL_ACK		0x40	/*  ACK */
+-#define SOCL_BSY		0x20	/*  BSY */
+-#define SOCL_SEL		0x10	/*  SEL */
+-#define SOCL_ATN		0x08	/*  ATN */
+-#define SOCL_MSG		0x04	/*  MSG */
+-#define SOCL_CD			0x02	/*  C/D */
+-#define SOCL_IO			0x01	/*  I/O */
+-
+-/* 
+- * SCSI first byte received latch ro 
+- * This register contains the first byte received during a block MOVE 
+- * SCSI SCRIPTS instruction, including
+- * 
+- * Initiator mode	Target mode
+- * Message in		Command
+- * Status		Message out
+- * Data in		Data out
+- *
+- * It also contains the selecting or reselecting device's ID and our 
+- * ID.
+- *
+- * Note that this is the register the various IF conditionals can 
+- * operate on.
+- */
+-#define SFBR_REG		0x08	
+-
+-/* 
+- * SCSI input data latch ro
+- * In initiator mode, data is latched into this register on the rising
+- * edge of REQ/. In target mode, data is latched on the rising edge of 
+- * ACK/
+- */
+-#define SIDL_REG_700		0x09
+-#define SIDL_REG_800		0x50
+-
+-/* 
+- * SCSI bus data lines ro 
+- * This register reflects the instantaneous status of the SCSI data 
+- * lines.  Note that SCNTL0 must be set to disable parity checking, 
+- * otherwise reading this register will latch new parity.
+- */
+-#define SBDL_REG_700		0x0a
+-#define SBDL_REG_800		0x58
+-
+-#define SSID_REG_800		0x0a
+-#define SSID_800_VAL		0x80	/* Exactly two bits asserted at sel */
+-#define SSID_800_ENCID_MASK	0x07	/* Device which performed operation */
+-
+-
+-/* 
+- * SCSI bus control lines rw, 
+- * instantaneous readout of control lines 
+- */
+-#define SBCL_REG		0x0b 	
+-#define SBCL_REQ		0x80	/*  REQ ro */
+-#define SBCL_ACK		0x40	/*  ACK ro */
+-#define SBCL_BSY		0x20	/*  BSY ro */
+-#define SBCL_SEL		0x10	/*  SEL ro */
+-#define SBCL_ATN		0x08	/*  ATN ro */
+-#define SBCL_MSG		0x04	/*  MSG ro */
+-#define SBCL_CD			0x02	/*  C/D ro */
+-#define SBCL_IO			0x01	/*  I/O ro */
+-#define SBCL_PHASE_CMDOUT	SBCL_CD
+-#define SBCL_PHASE_DATAIN	SBCL_IO
+-#define SBCL_PHASE_DATAOUT	0
+-#define SBCL_PHASE_MSGIN	(SBCL_CD|SBCL_IO|SBCL_MSG)
+-#define SBCL_PHASE_MSGOUT	(SBCL_CD|SBCL_MSG)
+-#define SBCL_PHASE_STATIN	(SBCL_CD|SBCL_IO)
+-#define SBCL_PHASE_MASK		(SBCL_CD|SBCL_IO|SBCL_MSG)
+-/* 
+- * Synchronous SCSI Clock Control bits 
+- * 0 - set by DCNTL 
+- * 1 - SCLK / 1.0
+- * 2 - SCLK / 1.5
+- * 3 - SCLK / 2.0 
+- */
+-#define SBCL_SSCF1		0x02	/* wo, -66 only */
+-#define SBCL_SSCF0		0x01	/* wo, -66 only */
+-#define SBCL_SSCF_MASK		0x03
+-
+-/* 
+- * XXX note : when reading the DSTAT and STAT registers to clear interrupts,
+- * insure that 10 clocks elapse between the two  
+- */
+-/* DMA status ro */
+-#define DSTAT_REG		0x0c	
+-#define DSTAT_DFE		0x80	/* DMA FIFO empty */
+-#define DSTAT_800_MDPE		0x40	/* Master Data Parity Error */
+-#define DSTAT_800_BF		0x20	/* Bus Fault */
+-#define DSTAT_ABRT		0x10	/* Aborted - set on error */
+-#define DSTAT_SSI		0x08	/* SCRIPTS single step interrupt */
+-#define DSTAT_SIR		0x04	/* SCRIPTS interrupt received - 
+-					   set when INT instruction is 
+-					   executed */
+-#define DSTAT_WTD		0x02	/* Watchdog timeout detected */
+-#define DSTAT_OPC		0x01	/* Illegal instruction */
+-#define DSTAT_800_IID		0x01	/* Same thing, different name */
+-
+-
+-/* NCR53c800 moves this stuff into SIST0 */
+-#define SSTAT0_REG		0x0d	/* SCSI status 0 ro */
+-#define SIST0_REG_800		0x42	
+-#define SSTAT0_MA		0x80	/* ini : phase mismatch,
+-					 * tgt : ATN/ asserted 
+-					 */
+-#define SSTAT0_CMP		0x40	/* function complete */
+-#define SSTAT0_700_STO		0x20	/* Selection or reselection timeout */
+-#define SIST0_800_SEL		0x20	/* Selected */
+-#define SSTAT0_700_SEL		0x10	/* Selected or reselected */
+-#define SIST0_800_RSL		0x10	/* Reselected */
+-#define SSTAT0_SGE		0x08	/* SCSI gross error */
+-#define SSTAT0_UDC		0x04	/* Unexpected disconnect */
+-#define SSTAT0_RST		0x02	/* SCSI RST/ received */
+-#define SSTAT0_PAR		0x01	/* Parity error */
+-
+-/* And uses SSTAT0 for what was SSTAT1 */
+-
+-#define SSTAT1_REG		0x0e	/* SCSI status 1 ro */
+-#define SSTAT1_ILF		0x80	/* SIDL full */
+-#define SSTAT1_ORF		0x40	/* SODR full */
+-#define SSTAT1_OLF		0x20	/* SODL full */
+-#define SSTAT1_AIP		0x10	/* Arbitration in progress */
+-#define SSTAT1_LOA		0x08	/* Lost arbitration */
+-#define SSTAT1_WOA		0x04	/* Won arbitration */
+-#define SSTAT1_RST		0x02	/* Instant readout of RST/ */
+-#define SSTAT1_SDP		0x01	/* Instant readout of SDP/ */
+-
+-#define SSTAT2_REG		0x0f	/* SCSI status 2 ro */
+-#define SSTAT2_FF3		0x80 	/* number of bytes in synchronous */
+-#define SSTAT2_FF2		0x40	/* data FIFO */
+-#define SSTAT2_FF1		0x20	
+-#define SSTAT2_FF0		0x10
+-#define SSTAT2_FF_MASK		0xf0
+-#define SSTAT2_FF_SHIFT		4
+-
+-/* 
+- * Latched signals, latched on the leading edge of REQ/ for initiators,
+- * ACK/ for targets.
+- */
+-#define SSTAT2_SDP		0x08	/* SDP */
+-#define SSTAT2_MSG		0x04	/* MSG */
+-#define SSTAT2_CD		0x02	/* C/D */
+-#define SSTAT2_IO		0x01	/* I/O */
+-#define SSTAT2_PHASE_CMDOUT	SSTAT2_CD
+-#define SSTAT2_PHASE_DATAIN	SSTAT2_IO
+-#define SSTAT2_PHASE_DATAOUT	0
+-#define SSTAT2_PHASE_MSGIN	(SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG)
+-#define SSTAT2_PHASE_MSGOUT	(SSTAT2_CD|SSTAT2_MSG)
+-#define SSTAT2_PHASE_STATIN	(SSTAT2_CD|SSTAT2_IO)
+-#define SSTAT2_PHASE_MASK	(SSTAT2_CD|SSTAT2_IO|SSTAT2_MSG)
+-
+-
+-/* NCR53c700-66 only */
+-#define SCRATCHA_REG_00		0x10    /* through  0x13 Scratch A rw */
+-/* NCR53c710 and higher */
+-#define DSA_REG			0x10	/* DATA structure address */
+-
+-#define CTEST0_REG_700		0x14	/* Chip test 0 ro */
+-#define CTEST0_REG_800		0x18	/* Chip test 0 rw, general purpose */
+-/* 0x80 - 0x04 are reserved */
+-#define CTEST0_700_RTRG		0x02	/* Real target mode */
+-#define CTEST0_700_DDIR		0x01	/* Data direction, 1 = 
+-					 * SCSI bus to host, 0  =
+-					 * host to SCSI.
+-					 */
+-
+-#define CTEST1_REG_700		0x15	/* Chip test 1 ro */
+-#define CTEST1_REG_800		0x19	/* Chip test 1 ro */
+-#define CTEST1_FMT3		0x80	/* Identify which byte lanes are empty */
+-#define CTEST1_FMT2		0x40 	/* in the DMA FIFO */
+-#define CTEST1_FMT1		0x20
+-#define CTEST1_FMT0		0x10
+-
+-#define CTEST1_FFL3		0x08	/* Identify which bytes lanes are full */
+-#define CTEST1_FFL2		0x04	/* in the DMA FIFO */
+-#define CTEST1_FFL1		0x02
+-#define CTEST1_FFL0		0x01
+-
+-#define CTEST2_REG_700		0x16	/* Chip test 2 ro */
+-#define CTEST2_REG_800		0x1a	/* Chip test 2 ro */
+-
+-#define CTEST2_800_DDIR		0x80	/* 1 = SCSI->host */
+-#define CTEST2_800_SIGP		0x40	/* A copy of SIGP in ISTAT.
+-					   Reading this register clears */
+-#define CTEST2_800_CIO		0x20	/* Configured as IO */.
+-#define CTEST2_800_CM		0x10	/* Configured as memory */
+-
+-/* 0x80 - 0x40 are reserved on 700 series chips */
+-#define CTEST2_700_SOFF		0x20	/* SCSI Offset Compare,
+-					 * As an initiator, this bit is 
+-					 * one when the synchronous offset
+-					 * is zero, as a target this bit 
+-					 * is one when the synchronous 
+-					 * offset is at the maximum
+-					 * defined in SXFER
+-					 */
+-#define CTEST2_700_SFP		0x10	/* SCSI FIFO parity bit,
+-					 * reading CTEST3 unloads a byte
+-					 * from the FIFO and sets this
+-					 */
+-#define CTEST2_700_DFP		0x08	/* DMA FIFO parity bit,
+-					 * reading CTEST6 unloads a byte
+-					 * from the FIFO and sets this
+-					 */
+-#define CTEST2_TEOP		0x04	/* SCSI true end of process,
+-					 * indicates a totally finished
+-					 * transfer
+-					 */
+-#define CTEST2_DREQ		0x02	/* Data request signal */
+-/* 0x01 is reserved on 700 series chips */
+-#define CTEST2_800_DACK		0x01	
+-
+-/* 
+- * Chip test 3 ro 
+- * Unloads the bottom byte of the eight deep SCSI synchronous FIFO,
+- * check SSTAT2 FIFO full bits to determine size.  Note that a GROSS
+- * error results if a read is attempted on this register.  Also note 
+- * that 16 and 32 bit reads of this register will cause corruption.
+- */
+-#define CTEST3_REG_700		0x17	
+-/*  Chip test 3 rw */
+-#define CTEST3_REG_800		0x1b
+-#define CTEST3_800_V3		0x80	/* Chip revision */
+-#define CTEST3_800_V2		0x40
+-#define CTEST3_800_V1		0x20
+-#define CTEST3_800_V0		0x10
+-#define CTEST3_800_FLF		0x08	/* Flush DMA FIFO */
+-#define CTEST3_800_CLF		0x04	/* Clear DMA FIFO */
+-#define CTEST3_800_FM		0x02	/* Fetch mode pin */
+-/* bit 0 is reserved on 800 series chips */
+-
+-#define CTEST4_REG_700		0x18	/* Chip test 4 rw */
+-#define CTEST4_REG_800		0x21	/* Chip test 4 rw */
+-/* 0x80 is reserved on 700 series chips */
+-#define CTEST4_800_BDIS		0x80	/* Burst mode disable */
+-#define CTEST4_ZMOD		0x40	/* High impedance mode */
+-#define CTEST4_SZM		0x20	/* SCSI bus high impedance */
+-#define CTEST4_700_SLBE		0x10	/* SCSI loopback enabled */
+-#define CTEST4_800_SRTM		0x10	/* Shadow Register Test Mode */
+-#define CTEST4_700_SFWR		0x08	/* SCSI FIFO write enable, 
+-					 * redirects writes from SODL
+-					 * to the SCSI FIFO.
+-					 */
+-#define CTEST4_800_MPEE		0x08	/* Enable parity checking
+-					   during master cycles on PCI
+-					   bus */
+-
+-/* 
+- * These bits send the contents of the CTEST6 register to the appropriate
+- * byte lane of the 32 bit DMA FIFO.  Normal operation is zero, otherwise 
+- * the high bit means the low two bits select the byte lane.
+- */
+-#define CTEST4_FBL2		0x04	
+-#define CTEST4_FBL1		0x02
+-#define CTEST4_FBL0		0x01	
+-#define CTEST4_FBL_MASK		0x07
+-#define CTEST4_FBL_0		0x04	/* Select DMA FIFO byte lane 0 */
+-#define CTEST4_FBL_1		0x05	/* Select DMA FIFO byte lane 1 */
+-#define CTEST4_FBL_2		0x06	/* Select DMA FIFO byte lane 2 */
+-#define CTEST4_FBL_3		0x07	/* Select DMA FIFO byte lane 3 */
+-#define CTEST4_800_SAVE		(CTEST4_800_BDIS)
+-
+-
+-#define CTEST5_REG_700		0x19	/* Chip test 5 rw */
+-#define CTEST5_REG_800		0x22	/* Chip test 5 rw */
+-/* 
+- * Clock Address Incrementor.  When set, it increments the 
+- * DNAD register to the next bus size boundary.  It automatically 
+- * resets itself when the operation is complete.
+- */
+-#define CTEST5_ADCK		0x80
+-/*
+- * Clock Byte Counter.  When set, it decrements the DBC register to
+- * the next bus size boundary.
+- */
+-#define CTEST5_BBCK		0x40
+-/*
+- * Reset SCSI Offset.  Setting this bit to 1 clears the current offset
+- * pointer in the SCSI synchronous offset counter (SSTAT).  This bit
+- * is set to 1 if a SCSI Gross Error Condition occurs.  The offset should
+- * be cleared when a synchronous transfer fails.  When written, it is 
+- * automatically cleared after the SCSI synchronous offset counter is 
+- * reset.
+- */
+-/* Bit 5 is reserved on 800 series chips */
+-#define CTEST5_700_ROFF		0x20
+-/* 
+- * Master Control for Set or Reset pulses. When 1, causes the low 
+- * four bits of register to set when set, 0 causes the low bits to
+- * clear when set.
+- */
+-#define CTEST5_MASR 		0x10	
+-#define CTEST5_DDIR		0x08	/* DMA direction */
+-/*
+- * Bits 2-0 are reserved on 800 series chips
+- */
+-#define CTEST5_700_EOP		0x04	/* End of process */
+-#define CTEST5_700_DREQ		0x02	/* Data request */
+-#define CTEST5_700_DACK		0x01	/* Data acknowledge */
+-
+-/* 
+- * Chip test 6 rw - writing to this register writes to the byte 
+- * lane in the DMA FIFO as determined by the FBL bits in the CTEST4
+- * register.
+- */
+-#define CTEST6_REG_700		0x1a
+-#define CTEST6_REG_800		0x23
+-
+-#define CTEST7_REG		0x1b	/* Chip test 7 rw */
+-/* 0x80 - 0x40 are reserved on NCR53c700 and NCR53c700-66 chips */
+-#define CTEST7_10_CDIS		0x80	/* Cache burst disable */
+-#define CTEST7_10_SC1		0x40	/* Snoop control bits */
+-#define CTEST7_10_SC0		0x20	
+-#define CTEST7_10_SC_MASK	0x60
+-/* 0x20 is reserved on the NCR53c700 */
+-#define CTEST7_0060_FM		0x20	/* Fetch mode */
+-#define CTEST7_STD		0x10	/* Selection timeout disable */
+-#define CTEST7_DFP		0x08	/* DMA FIFO parity bit for CTEST6 */
+-#define CTEST7_EVP		0x04	/* 1 = host bus even parity, 0 = odd */
+-#define CTEST7_10_TT1		0x02	/* Transfer type */
+-#define CTEST7_00_DC		0x02	/* Set to drive DC low during instruction 
+-					   fetch */
+-#define CTEST7_DIFF		0x01	/* Differential mode */
+-
+-#define CTEST7_SAVE ( CTEST7_EVP | CTEST7_DIFF )
+-
+-
+-#define TEMP_REG		0x1c	/* through 0x1f Temporary stack rw */
+-
+-#define DFIFO_REG		0x20	/* DMA FIFO rw */
+-/* 
+- * 0x80 is reserved on the NCR53c710, the CLF and FLF bits have been
+- * moved into the CTEST8 register.
+- */
+-#define DFIFO_00_FLF		0x80	/* Flush DMA FIFO to memory */
+-#define DFIFO_00_CLF		0x40	/* Clear DMA and SCSI FIFOs */
+-#define DFIFO_BO6		0x40
+-#define DFIFO_BO5		0x20
+-#define DFIFO_BO4		0x10
+-#define DFIFO_BO3		0x08
+-#define DFIFO_BO2		0x04 
+-#define DFIFO_BO1		0x02
+-#define DFIFO_BO0		0x01
+-#define DFIFO_10_BO_MASK	0x7f	/* 7 bit counter */
+-#define DFIFO_00_BO_MASK	0x3f	/* 6 bit counter */
+-
+-/* 
+- * Interrupt status rw 
+- * Note that this is the only register which can be read while SCSI
+- * SCRIPTS are being executed.
+- */
+-#define ISTAT_REG_700		0x21
+-#define ISTAT_REG_800		0x14
+-#define ISTAT_ABRT		0x80	/* Software abort, write 
+-					 *1 to abort, wait for interrupt. */
+-/* 0x40 and 0x20 are reserved on NCR53c700 and NCR53c700-66 chips */
+-#define ISTAT_10_SRST		0x40	/* software reset */
+-#define ISTAT_10_SIGP		0x20	/* signal script */
+-/* 0x10 is reserved on NCR53c700 series chips */
+-#define ISTAT_800_SEM		0x10	/* semaphore */
+-#define ISTAT_CON		0x08	/* 1 when connected */
+-#define ISTAT_800_INTF		0x04	/* Interrupt on the fly */
+-#define ISTAT_700_PRE		0x04	/* Pointer register empty.
+-					 * Set to 1 when DSPS and DSP
+-					 * registers are empty in pipeline
+-					 * mode, always set otherwise.
+-					 */
+-#define ISTAT_SIP		0x02	/* SCSI interrupt pending from
+-					 * SCSI portion of SIOP see
+-					 * SSTAT0
+-					 */
+-#define ISTAT_DIP		0x01	/* DMA interrupt pending 
+-					 * see DSTAT
+-					 */
+-
+-/* NCR53c700-66 and NCR53c710 only */
+-#define CTEST8_REG		0x22	/* Chip test 8 rw */
+-#define CTEST8_0066_EAS		0x80	/* Enable alternate SCSI clock,
+-					 * ie read from SCLK/ rather than CLK/
+-					 */
+-#define CTEST8_0066_EFM		0x40	/* Enable fetch and master outputs */
+-#define CTEST8_0066_GRP		0x20	/* Generate Receive Parity for 
+-					 * pass through.  This insures that 
+-					 * bad parity won't reach the host 
+-					 * bus.
+-					 */
+-#define CTEST8_0066_TE		0x10	/* TolerANT enable.  Enable 
+-					 * active negation, should only
+-					 * be used for slow SCSI 
+-					 * non-differential.
+-					 */
+-#define CTEST8_0066_HSC		0x08	/* Halt SCSI clock */
+-#define CTEST8_0066_SRA		0x04	/* Shorten REQ/ACK filtering,
+-					 * must be set for fast SCSI-II
+-					 * speeds.
+-					 */
+-#define CTEST8_0066_DAS		0x02	/* Disable automatic target/initiator
+-					 * switching.
+-					 */
+-#define CTEST8_0066_LDE		0x01	/* Last disconnect enable.
+-					 * The status of pending 
+-					 * disconnect is maintained by
+-					 * the core, eliminating
+-					 * the possibility of missing a 
+-					 * selection or reselection
+-					 * while waiting to fetch a 
+-					 * WAIT DISCONNECT opcode.
+-					 */
+-
+-#define CTEST8_10_V3		0x80	/* Chip revision */
+-#define CTEST8_10_V2		0x40
+-#define CTEST8_10_V1		0x20	
+-#define CTEST8_10_V0		0x10
+-#define CTEST8_10_V_MASK	0xf0	
+-#define CTEST8_10_FLF		0x08	/* Flush FIFOs */
+-#define CTEST8_10_CLF		0x04	/* Clear FIFOs */
+-#define CTEST8_10_FM		0x02	/* Fetch pin mode */
+-#define CTEST8_10_SM		0x01	/* Snoop pin mode */
+-
+-
+-/* 
+- * The CTEST9 register may be used to differentiate between a
+- * NCR53c700 and a NCR53c710.  
+- *
+- * Write 0xff to this register.
+- * Read it.
+- * If the contents are 0xff, it is a NCR53c700
+- * If the contents are 0x00, it is a NCR53c700-66 first revision
+- * If the contents are some other value, it is some other NCR53c700-66
+- */
+-#define CTEST9_REG_00		0x23	/* Chip test 9 ro */
+-#define LCRC_REG_10		0x23	
+-
+-/*
+- * 0x24 through 0x27 are the DMA byte counter register.  Instructions
+- * write their high 8 bits into the DCMD register, the low 24 bits into
+- * the DBC register.
+- *
+- * Function is dependent on the command type being executed.
+- */
+-
+- 
+-#define DBC_REG			0x24
+-/* 
+- * For Block Move Instructions, DBC is a 24 bit quantity representing 
+- *     the number of bytes to transfer.
+- * For Transfer Control Instructions, DBC is bit fielded as follows : 
+- */
+-/* Bits 20 - 23 should be clear */
+-#define DBC_TCI_TRUE		(1 << 19) 	/* Jump when true */
+-#define DBC_TCI_COMPARE_DATA	(1 << 18)	/* Compare data */
+-#define DBC_TCI_COMPARE_PHASE	(1 << 17)	/* Compare phase with DCMD field */
+-#define DBC_TCI_WAIT_FOR_VALID	(1 << 16)	/* Wait for REQ */
+-/* Bits 8 - 15 are reserved on some implementations ? */
+-#define DBC_TCI_MASK_MASK	0xff00 		/* Mask for data compare */
+-#define DBC_TCI_MASK_SHIFT	8
+-#define DBC_TCI_DATA_MASK	0xff		/* Data to be compared */ 
+-#define DBC_TCI_DATA_SHIFT	0
+-
+-#define DBC_RWRI_IMMEDIATE_MASK	0xff00		/* Immediate data */
+-#define DBC_RWRI_IMMEDIATE_SHIFT 8		/* Amount to shift */
+-#define DBC_RWRI_ADDRESS_MASK	0x3f0000	/* Register address */
+-#define DBC_RWRI_ADDRESS_SHIFT 	16
+-
+-
+-/*
+- * DMA command r/w
+- */
+-#define DCMD_REG		0x27	
+-#define DCMD_TYPE_MASK		0xc0	/* Masks off type */
+-#define DCMD_TYPE_BMI		0x00	/* Indicates a Block Move instruction */
+-#define DCMD_BMI_IO		0x01	/* I/O, CD, and MSG bits selecting   */
+-#define DCMD_BMI_CD		0x02	/* the phase for the block MOVE      */
+-#define DCMD_BMI_MSG		0x04	/* instruction 			     */
+-
+-#define DCMD_BMI_OP_MASK	0x18	/* mask for opcode */
+-#define DCMD_BMI_OP_MOVE_T	0x00	/* MOVE */
+-#define DCMD_BMI_OP_MOVE_I	0x08	/* MOVE Initiator */
+-
+-#define DCMD_BMI_INDIRECT	0x20	/*  Indirect addressing */
+-
+-#define DCMD_TYPE_TCI		0x80	/* Indicates a Transfer Control 
+-					   instruction */
+-#define DCMD_TCI_IO		0x01	/* I/O, CD, and MSG bits selecting   */
+-#define DCMD_TCI_CD		0x02	/* the phase for the block MOVE      */
+-#define DCMD_TCI_MSG		0x04	/* instruction 			     */
+-#define DCMD_TCI_OP_MASK	0x38	/* mask for opcode */
+-#define DCMD_TCI_OP_JUMP	0x00	/* JUMP */
+-#define DCMD_TCI_OP_CALL	0x08	/* CALL */
+-#define DCMD_TCI_OP_RETURN	0x10	/* RETURN */
+-#define DCMD_TCI_OP_INT		0x18	/* INT */
+-
+-#define DCMD_TYPE_RWRI		0x40	/* Indicates I/O or register Read/Write
+-					   instruction */
+-#define DCMD_RWRI_OPC_MASK	0x38	/* Opcode mask */
+-#define DCMD_RWRI_OPC_WRITE	0x28	/* Write SFBR to register */
+-#define DCMD_RWRI_OPC_READ	0x30	/* Read register to SFBR */
+-#define DCMD_RWRI_OPC_MODIFY	0x38	/* Modify in place */
+-
+-#define DCMD_RWRI_OP_MASK	0x07
+-#define DCMD_RWRI_OP_MOVE	0x00
+-#define DCMD_RWRI_OP_SHL	0x01
+-#define DCMD_RWRI_OP_OR		0x02
+-#define DCMD_RWRI_OP_XOR	0x03
+-#define DCMD_RWRI_OP_AND	0x04
+-#define DCMD_RWRI_OP_SHR	0x05
+-#define DCMD_RWRI_OP_ADD	0x06
+-#define DCMD_RWRI_OP_ADDC	0x07
+-
+-#define DCMD_TYPE_MMI		0xc0	/* Indicates a Memory Move instruction 
+-					   (three words) */
+-
+-
+-#define DNAD_REG		0x28	/* through 0x2b DMA next address for 
+-					   data */
+-#define DSP_REG			0x2c	/* through 0x2f DMA SCRIPTS pointer rw */
+-#define DSPS_REG		0x30	/* through 0x33 DMA SCRIPTS pointer 
+-					   save rw */
+-#define DMODE_REG_00		0x34 	/* DMA mode rw */
+-#define DMODE_00_BL1	0x80	/* Burst length bits */
+-#define DMODE_00_BL0	0x40
+-#define DMODE_BL_MASK	0xc0
+-/* Burst lengths (800) */
+-#define DMODE_BL_2	0x00	/* 2 transfer */
+-#define DMODE_BL_4	0x40	/* 4 transfers */
+-#define DMODE_BL_8	0x80	/* 8 transfers */
+-#define DMODE_BL_16	0xc0	/* 16 transfers */
+-
+-#define DMODE_10_BL_1	0x00	/* 1 transfer */
+-#define DMODE_10_BL_2	0x40	/* 2 transfers */
+-#define DMODE_10_BL_4	0x80	/* 4 transfers */
+-#define DMODE_10_BL_8	0xc0	/* 8 transfers */
+-#define DMODE_10_FC2	0x20	/* Driven to FC2 pin */
+-#define DMODE_10_FC1	0x10	/* Driven to FC1 pin */
+-#define DMODE_710_PD	0x08	/* Program/data on FC0 pin */
+-#define DMODE_710_UO	0x02	/* User prog. output */
+-
+-#define DMODE_700_BW16	0x20	/* Host buswidth = 16 */
+-#define DMODE_700_286	0x10	/* 286 mode */
+-#define DMODE_700_IOM	0x08	/* Transfer to IO port */
+-#define DMODE_700_FAM	0x04	/* Fixed address mode */
+-#define DMODE_700_PIPE	0x02	/* Pipeline mode disables 
+-					 * automatic fetch / exec 
+-					 */
+-#define DMODE_MAN	0x01		/* Manual start mode, 
+-					 * requires a 1 to be written
+-					 * to the start DMA bit in the DCNTL
+-					 * register to run scripts 
+-					 */
+-
+-#define DMODE_700_SAVE ( DMODE_00_BL_MASK | DMODE_00_BW16 | DMODE_00_286 )
+-
+-/* NCR53c800 series only */
+-#define SCRATCHA_REG_800	0x34	/* through 0x37 Scratch A rw */
+-/* NCR53c710 only */
+-#define SCRATCHB_REG_10		0x34	/* through 0x37 scratch B rw */
+-
+-#define DMODE_REG_10    	0x38	/* DMA mode rw, NCR53c710 and newer */
+-#define DMODE_800_SIOM		0x20	/* Source IO = 1 */
+-#define DMODE_800_DIOM		0x10	/* Destination IO = 1 */
+-#define DMODE_800_ERL		0x08	/* Enable Read Line */
+-
+-/* 35-38 are reserved on 700 and 700-66 series chips */
+-#define DIEN_REG		0x39	/* DMA interrupt enable rw */
+-/* 0x80, 0x40, and 0x20 are reserved on 700-series chips */
+-#define DIEN_800_MDPE		0x40	/* Master data parity error */
+-#define DIEN_800_BF		0x20	/* BUS fault */
+-#define DIEN_700_BF		0x20	/* BUS fault */
+-#define DIEN_ABRT		0x10	/* Enable aborted interrupt */
+-#define DIEN_SSI		0x08	/* Enable single step interrupt */
+-#define DIEN_SIR		0x04	/* Enable SCRIPTS INT command 
+-					 * interrupt
+-					 */
+-/* 0x02 is reserved on 800 series chips */
+-#define DIEN_700_WTD		0x02	/* Enable watchdog timeout interrupt */
+-#define DIEN_700_OPC		0x01	/* Enable illegal instruction 
+-					 * interrupt 
+-					 */
+-#define DIEN_800_IID		0x01	/*  Same meaning, different name */ 
+-
+-/*
+- * DMA watchdog timer rw
+- * set in 16 CLK input periods.
+- */
+-#define DWT_REG			0x3a
+-
+-/* DMA control rw */
+-#define DCNTL_REG		0x3b
+-#define DCNTL_700_CF1		0x80	/* Clock divisor bits */
+-#define DCNTL_700_CF0		0x40
+-#define DCNTL_700_CF_MASK	0xc0
+-/* Clock divisors 			   Divisor SCLK range (MHZ) */
+-#define DCNTL_700_CF_2		0x00    /* 2.0	   37.51-50.00 */
+-#define DCNTL_700_CF_1_5	0x40	/* 1.5	   25.01-37.50 */
+-#define DCNTL_700_CF_1		0x80	/* 1.0     16.67-25.00 */
+-#define DCNTL_700_CF_3		0xc0	/* 3.0	   50.01-66.67 (53c700-66) */
+-
+-#define DCNTL_700_S16		0x20	/* Load scripts 16 bits at a time */
+-#define DCNTL_SSM		0x10	/* Single step mode */
+-#define DCNTL_700_LLM		0x08	/* Low level mode, can only be set 
+-					 * after selection */
+-#define DCNTL_800_IRQM		0x08	/* Totem pole IRQ pin */
+-#define DCNTL_STD		0x04	/* Start DMA / SCRIPTS */
+-/* 0x02 is reserved */
+-#define DCNTL_00_RST		0x01	/* Software reset, resets everything
+-					 * but 286 mode bit  in DMODE. On the
+-					 * NCR53c710, this bit moved to CTEST8
+-					 */
+-#define DCNTL_10_COM		0x01	/* 700 software compatibility mode */
+-#define DCNTL_10_EA		0x20	/* Enable Ack - needed for MVME16x */
+-
+-#define DCNTL_700_SAVE ( DCNTL_CF_MASK | DCNTL_S16)
+-
+-
+-/* NCR53c700-66 only */
+-#define SCRATCHB_REG_00		0x3c	/* through 0x3f scratch b rw */
+-#define SCRATCHB_REG_800	0x5c	/* through 0x5f scratch b rw */
+-/* NCR53c710 only */
+-#define ADDER_REG_10		0x3c	/* Adder, NCR53c710 only */
+-
+-#define SIEN1_REG_800		0x41
+-#define SIEN1_800_STO		0x04	/* selection/reselection timeout */
+-#define SIEN1_800_GEN		0x02	/* general purpose timer */
+-#define SIEN1_800_HTH		0x01	/* handshake to handshake */
+-
+-#define SIST1_REG_800		0x43
+-#define SIST1_800_STO		0x04	/* selection/reselection timeout */
+-#define SIST1_800_GEN		0x02	/* general purpose timer */
+-#define SIST1_800_HTH		0x01	/* handshake to handshake */
+-
+-#define SLPAR_REG_800		0x44	/* Parity */
+-
+-#define MACNTL_REG_800		0x46	/* Memory access control */
+-#define MACNTL_800_TYP3		0x80
+-#define MACNTL_800_TYP2		0x40
+-#define MACNTL_800_TYP1		0x20
+-#define MACNTL_800_TYP0		0x10
+-#define MACNTL_800_DWR		0x08
+-#define MACNTL_800_DRD		0x04
+-#define MACNTL_800_PSCPT	0x02
+-#define MACNTL_800_SCPTS	0x01
+-
+-#define GPCNTL_REG_800		0x47	/* General Purpose Pin Control */
+-
+-/* Timeouts are expressed such that 0=off, 1=100us, doubling after that */
+-#define STIME0_REG_800		0x48	/* SCSI Timer Register 0 */
+-#define STIME0_800_HTH_MASK	0xf0	/* Handshake to Handshake timeout */
+-#define STIME0_800_HTH_SHIFT	4
+-#define STIME0_800_SEL_MASK	0x0f	/* Selection timeout */
+-#define STIME0_800_SEL_SHIFT	0
+-
+-#define STIME1_REG_800		0x49
+-#define STIME1_800_GEN_MASK	0x0f	/* General purpose timer */
+-
+-#define RESPID_REG_800		0x4a	/* Response ID, bit fielded.  8
+-					   bits on narrow chips, 16 on WIDE */
+-
+-#define STEST0_REG_800		0x4c	
+-#define STEST0_800_SLT		0x08	/* Selection response logic test */
+-#define STEST0_800_ART		0x04	/* Arbitration priority encoder test */
+-#define STEST0_800_SOZ		0x02	/* Synchronous offset zero */
+-#define STEST0_800_SOM		0x01	/* Synchronous offset maximum */
+-
+-#define STEST1_REG_800		0x4d
+-#define STEST1_800_SCLK		0x80	/* Disable SCSI clock */
+-
+-#define STEST2_REG_800		0x4e	
+-#define STEST2_800_SCE		0x80	/* Enable SOCL/SODL */
+-#define STEST2_800_ROF		0x40	/* Reset SCSI sync offset */
+-#define STEST2_800_SLB		0x10	/* Enable SCSI loopback mode */
+-#define STEST2_800_SZM		0x08	/* SCSI high impedance mode */
+-#define STEST2_800_EXT		0x02	/* Extend REQ/ACK filter 30 to 60ns */
+-#define STEST2_800_LOW		0x01	/* SCSI low level mode */
+-
+-#define STEST3_REG_800		0x4f	 
+-#define STEST3_800_TE		0x80	/* Enable active negation */
+-#define STEST3_800_STR		0x40	/* SCSI FIFO test read */
+-#define STEST3_800_HSC		0x20	/* Halt SCSI clock */
+-#define STEST3_800_DSI		0x10	/* Disable single initiator response */
+-#define STEST3_800_TTM		0x04	/* Time test mode */
+-#define STEST3_800_CSF		0x02	/* Clear SCSI FIFO */
+-#define STEST3_800_STW		0x01	/* SCSI FIFO test write */
+-
+-#define OPTION_PARITY 		0x1	/* Enable parity checking */
+-#define OPTION_TAGGED_QUEUE	0x2	/* Enable SCSI-II tagged queuing */
+-#define OPTION_700		0x8	/* Always run NCR53c700 scripts */
+-#define OPTION_INTFLY		0x10	/* Use INTFLY interrupts */
+-#define OPTION_DEBUG_INTR	0x20	/* Debug interrupts */
+-#define OPTION_DEBUG_INIT_ONLY	0x40	/* Run initialization code and 
+-					   simple test code, return
+-					   DID_NO_CONNECT if any SCSI
+-					   commands are attempted. */
+-#define OPTION_DEBUG_READ_ONLY	0x80	/* Return DID_ERROR if any 
+-					   SCSI write is attempted */
+-#define OPTION_DEBUG_TRACE	0x100	/* Animated trace mode, print 
+-					   each address and instruction 
+-					   executed to debug buffer. */
+-#define OPTION_DEBUG_SINGLE	0x200	/* stop after executing one 
+-					   instruction */
+-#define OPTION_SYNCHRONOUS	0x400	/* Enable sync SCSI.  */
+-#define OPTION_MEMORY_MAPPED	0x800	/* NCR registers have valid 
+-					   memory mapping */
+-#define OPTION_IO_MAPPED	0x1000  /* NCR registers have valid
+-					     I/O mapping */
+-#define OPTION_DEBUG_PROBE_ONLY	0x2000  /* Probe only, don't even init */
+-#define OPTION_DEBUG_TESTS_ONLY	0x4000  /* Probe, init, run selected tests */
+-#define OPTION_DEBUG_TEST0	0x08000 /* Run test 0 */
+-#define OPTION_DEBUG_TEST1	0x10000 /* Run test 1 */
+-#define OPTION_DEBUG_TEST2	0x20000 /* Run test 2 */
+-#define OPTION_DEBUG_DUMP	0x40000 /* Dump commands */
+-#define OPTION_DEBUG_TARGET_LIMIT 0x80000 /* Only talk to target+luns specified */
+-#define OPTION_DEBUG_NCOMMANDS_LIMIT 0x100000 /* Limit the number of commands */
+-#define OPTION_DEBUG_SCRIPT 0x200000 /* Print when checkpoints are passed */
+-#define OPTION_DEBUG_FIXUP 0x400000 /* print fixup values */
+-#define OPTION_DEBUG_DSA 0x800000
+-#define OPTION_DEBUG_CORRUPTION	0x1000000	/* Detect script corruption */
+-#define OPTION_DEBUG_SDTR       0x2000000	/* Debug SDTR problem */
+-#define OPTION_DEBUG_MISMATCH 	0x4000000 	/* Debug phase mismatches */
+-#define OPTION_DISCONNECT	0x8000000	/* Allow disconnect */
+-#define OPTION_DEBUG_DISCONNECT 0x10000000	
+-#define OPTION_ALWAYS_SYNCHRONOUS 0x20000000	/* Negotiate sync. transfers
+-						   on power up */
+-#define OPTION_DEBUG_QUEUES	0x80000000	
+-#define OPTION_DEBUG_ALLOCATION 0x100000000LL
+-#define OPTION_DEBUG_SYNCHRONOUS 0x200000000LL	/* Sanity check SXFER and 
+-						   SCNTL3 registers */
+-#define OPTION_NO_ASYNC	0x400000000LL		/* Don't automagically send
+-						   SDTR for async transfers when
+-						   we haven't been told to do
+-						   a synchronous transfer. */
+-#define OPTION_NO_PRINT_RACE 0x800000000LL	/* Don't print message when
+-						   the reselect/WAIT DISCONNECT
+-						   race condition hits */
+-#if !defined(PERM_OPTIONS)
+-#define PERM_OPTIONS 0
+-#endif
+-				
+-/*
+- * Some data which is accessed by the NCR chip must be 4-byte aligned.
+- * For some hosts the default is less than that (eg. 68K uses 2-byte).
+- * Alignment has only been forced where it is important; also if one
+- * 32 bit structure field is aligned then it is assumed that following
+- * 32 bit fields are also aligned.  Take care when adding fields
+- * which are other than 32 bit.
+- */
+-
+-struct NCR53c7x0_synchronous {
+-    u32 select_indirect			/* Value used for indirect selection */
+-	__attribute__ ((aligned (4)));
+-    u32 sscf_710;			/* Used to set SSCF bits for 710 */
+-    u32 script[8];			/* Size ?? Script used when target is 
+-						reselected */
+-    unsigned char synchronous_want[5];	/* Per target desired SDTR */
+-/* 
+- * Set_synchronous programs these, select_indirect and current settings after
+- * int_debug_should show a match.
+- */
+-    unsigned char sxfer_sanity, scntl3_sanity;
+-};
+-
+-#define CMD_FLAG_SDTR 		1	/* Initiating synchronous 
+-					   transfer negotiation */
+-#define CMD_FLAG_WDTR		2	/* Initiating wide transfer
+-					   negotiation */
+-#define CMD_FLAG_DID_SDTR	4	/* did SDTR */
+-#define CMD_FLAG_DID_WDTR	8	/* did WDTR */
+-
+-struct NCR53c7x0_table_indirect {
+-    u32 count;
+-    void *address;
+-};
+-
+-enum ncr_event { 
+-    EVENT_NONE = 0,
+-/* 
+- * Order is IMPORTANT, since these must correspond to the event interrupts
+- * in 53c7,8xx.scr 
+- */
+-
+-    EVENT_ISSUE_QUEUE = 0x5000000,	/* 0 Command was added to issue queue */
+-    EVENT_START_QUEUE,			/* 1 Command moved to start queue */
+-    EVENT_SELECT,			/* 2 Command completed selection */
+-    EVENT_DISCONNECT,			/* 3 Command disconnected */
+-    EVENT_RESELECT,			/* 4 Command reselected */
+-    EVENT_COMPLETE,		        /* 5 Command completed */
+-    EVENT_IDLE,				/* 6 */
+-    EVENT_SELECT_FAILED,		/* 7 */
+-    EVENT_BEFORE_SELECT,		/* 8 */
+-    EVENT_RESELECT_FAILED		/* 9 */
+-};
+-
+-struct NCR53c7x0_event {
+-    enum ncr_event event;	/* What type of event */
+-    unsigned char target;
+-    unsigned char lun;
+-    struct timeval time;	
+-    u32 *dsa;			/* What's in the DSA register now (virt) */
+-/* 
+- * A few things from that SCSI pid so we know what happened after 
+- * the Scsi_Cmnd structure in question may have disappeared.
+- */
+-    unsigned long pid;		/* The SCSI PID which caused this 
+-				   event */
+-    unsigned char cmnd[12];
+-};
+-
+-/*
+- * Things in the NCR53c7x0_cmd structure are split into two parts :
+- *
+- * 1.  A fixed portion, for things which are not accessed directly by static NCR
+- *	code (ie, are referenced only by the Linux side of the driver,
+- *	or only by dynamically generated code).  
+- *
+- * 2.  The DSA portion, for things which are accessed directly by static NCR
+- *	code.
+- *
+- * This is a little ugly, but it 
+- * 1.  Avoids conflicts between the NCR code's picture of the structure, and 
+- * 	Linux code's idea of what it looks like.
+- *
+- * 2.  Minimizes the pain in the Linux side of the code needed 
+- * 	to calculate real dsa locations for things, etc.
+- * 
+- */
+-
+-struct NCR53c7x0_cmd {
+-    void *real;				/* Real, unaligned address for
+-					   free function */
+-    void (* free)(void *, int);		/* Command to deallocate; NULL
+-					   for structures allocated with
+-					   scsi_register, etc. */
+-    Scsi_Cmnd *cmd;			/* Associated Scsi_Cmnd 
+-					   structure, Scsi_Cmnd points
+-					   at NCR53c7x0_cmd using 
+-					   host_scribble structure */
+-
+-    int size;				/* scsi_malloc'd size of this 
+-					   structure */
+-
+-    int flags;				/* CMD_* flags */
+-
+-    unsigned char      cmnd[12];	/* CDB, copied from Scsi_Cmnd */
+-    int                result;		/* Copy to Scsi_Cmnd when done */
+-
+-    struct {				/* Private non-cached bounce buffer */
+-        unsigned char buf[256];
+-	u32	      addr;
+-        u32           len;
+-    } bounce;
+-
+-/*
+- * SDTR and WIDE messages are an either/or affair
+- * in this message, since we will go into message out and send
+- * _the whole mess_ without dropping out of message out to 
+- * let the target go into message in after sending the first 
+- * message.
+- */
+-
+-    unsigned char select[11];		/* Select message, includes
+-					   IDENTIFY
+-					   (optional) QUEUE TAG
+- 				 	   (optional) SDTR or WDTR
+-					 */
+-
+-
+-    volatile struct NCR53c7x0_cmd *next; /* Linux maintained lists (free,
+-					    running, eventually finished */
+-    					 
+-
+-    u32 *data_transfer_start;		/* Start of data transfer routines */
+-    u32 *data_transfer_end;		/* Address after end of data transfer o
+-    	    	    	    	    	   routines */
+-/* 
+- * The following three fields were moved from the DSA proper to here
+- * since only dynamically generated NCR code refers to them, meaning
+- * we don't need dsa_* absolutes, and it is simpler to let the 
+- * host code refer to them directly.
+- */
+-
+-/* 
+- * HARD CODED : residual and saved_residual need to agree with the sizes
+- * used in NCR53c7,8xx.scr.  
+- * 
+- * FIXME: we want to consider the case where we have odd-length 
+- *	scatter/gather buffers and a WIDE transfer, in which case 
+- *	we'll need to use the CHAIN MOVE instruction.  Ick.
+- */
+-    u32 residual[6] __attribute__ ((aligned (4)));
+-					/* Residual data transfer which
+-					   allows pointer code to work
+-					   right.
+-
+-    	    	    	    	    	    [0-1] : Conditional call to 
+-    	    	    	    	    	    	appropriate other transfer 
+-    	    	    	    	    	    	routine.
+-    	    	    	    	    	    [2-3] : Residual block transfer
+-    	    	    	    	    	    	instruction.
+-    	    	    	    	    	    [4-5] : Jump to instruction
+-    	    	    	    	    	    	after splice.
+-					 */
+-    u32 saved_residual[6]; 		/* Copy of old residual, so we 
+-					   can get another partial 
+-					   transfer and still recover 
+-    	    	    	    	    	 */
+-    	    	
+-    u32 saved_data_pointer;		/* Saved data pointer */
+-
+-    u32 dsa_next_addr;		        /* _Address_ of dsa_next field  
+-					   in this dsa for RISCy 
+-					   style constant. */
+-
+-    u32 dsa_addr;			/* Address of dsa; RISCy style
+-					   constant */
+-
+-    u32 dsa[0];				/* Variable length (depending
+-					   on host type, number of scatter /
+-					   gather buffers, etc).  */
+-};
+-
+-struct NCR53c7x0_break {
+-    u32 *address, old_instruction[2];
+-    struct NCR53c7x0_break *next;
+-    unsigned char old_size;		/* Size of old instruction */
+-};
+-
+-/* Indicates that the NCR is not executing code */
+-#define STATE_HALTED	0		
+-/* 
+- * Indicates that the NCR is executing the wait for select / reselect 
+- * script.  Only used when running NCR53c700 compatible scripts, only 
+- * state during which an ABORT is _not_ considered an error condition.
+- */
+-#define STATE_WAITING	1		
+-/* Indicates that the NCR is executing other code. */
+-#define STATE_RUNNING	2		
+-/* 
+- * Indicates that the NCR was being aborted.
+- */
+-#define STATE_ABORTING	3
+-/* Indicates that the NCR was successfully aborted. */
+-#define STATE_ABORTED 4
+-/* Indicates that the NCR has been disabled due to a fatal error */
+-#define STATE_DISABLED 5
+-
+-/* 
+- * Where knowledge of SCSI SCRIPT(tm) specified values are needed 
+- * in an interrupt handler, an interrupt handler exists for each 
+- * different SCSI script so we don't have name space problems.
+- * 
+- * Return values of these handlers are as follows : 
+- */
+-#define SPECIFIC_INT_NOTHING 	0	/* don't even restart */
+-#define SPECIFIC_INT_RESTART	1	/* restart at the next instruction */
+-#define SPECIFIC_INT_ABORT	2	/* recoverable error, abort cmd */
+-#define SPECIFIC_INT_PANIC	3	/* unrecoverable error, panic */
+-#define SPECIFIC_INT_DONE	4	/* normal command completion */
+-#define SPECIFIC_INT_BREAK	5	/* break point encountered */
+-
+-struct NCR53c7x0_hostdata {
+-    int size;				/* Size of entire Scsi_Host
+-					   structure */
+-    int board;				/* set to board type, useful if 
+-					   we have host specific things,
+-					   ie, a general purpose I/O 
+-					   bit is being used to enable
+-					   termination, etc. */
+-
+-    int chip;				/* set to chip type; 700-66 is
+-					   700-66, rest are last three
+-					   digits of part number */
+-
+-    char valid_ids[8];			/* Valid SCSI ID's for adapter */
+-
+-    u32 *dsp;				/* dsp to restart with after
+-					   all stacked interrupts are
+-					   handled. */
+-
+-    unsigned dsp_changed:1;		/* Has dsp changed within this
+-					   set of stacked interrupts ? */
+-
+-    unsigned char dstat;		/* Most recent value of dstat */
+-    unsigned dstat_valid:1;
+-
+-    unsigned expecting_iid:1;		/* Expect IID interrupt */
+-    unsigned expecting_sto:1;		/* Expect STO interrupt */
+-    
+-    /* 
+-     * The code stays cleaner if we use variables with function
+-     * pointers and offsets that are unique for the different
+-     * scripts rather than having a slew of switch(hostdata->chip) 
+-     * statements.
+-     * 
+-     * It also means that the #defines from the SCSI SCRIPTS(tm)
+-     * don't have to be visible outside of the script-specific
+-     * instructions, preventing name space pollution.
+-     */
+-
+-    void (* init_fixup)(struct Scsi_Host *host);
+-    void (* init_save_regs)(struct Scsi_Host *host);
+-    void (* dsa_fixup)(struct NCR53c7x0_cmd *cmd);
+-    void (* soft_reset)(struct Scsi_Host *host);
+-    int (* run_tests)(struct Scsi_Host *host);
+-
+-    /*
+-     * Called when DSTAT_SIR is set, indicating an interrupt generated
+-     * by the INT instruction, where values are unique for each SCSI
+-     * script.  Should return one of the SPEC_* values.
+-     */
+-
+-    int (* dstat_sir_intr)(struct Scsi_Host *host, struct NCR53c7x0_cmd *cmd);
+-
+-    int dsa_len; /* Size of DSA structure */
+-
+-    /*
+-     * Location of DSA fields for the SCSI SCRIPT corresponding to this 
+-     * chip.  
+-     */
+-
+-    s32 dsa_start;			
+-    s32 dsa_end;			
+-    s32 dsa_next;
+-    s32 dsa_prev;
+-    s32 dsa_cmnd;
+-    s32 dsa_select;
+-    s32 dsa_msgout;
+-    s32 dsa_cmdout;
+-    s32 dsa_dataout;
+-    s32 dsa_datain;
+-    s32 dsa_msgin;
+-    s32 dsa_msgout_other;
+-    s32 dsa_write_sync;
+-    s32 dsa_write_resume;
+-    s32 dsa_check_reselect;
+-    s32 dsa_status;
+-    s32 dsa_saved_pointer;
+-    s32 dsa_jump_dest;
+-
+-    /* 
+-     * Important entry points that generic fixup code needs
+-     * to know about, fixed up.
+-     */
+-
+-    s32 E_accept_message;
+-    s32 E_command_complete;		
+-    s32 E_data_transfer;
+-    s32 E_dsa_code_template;
+-    s32 E_dsa_code_template_end;
+-    s32 E_end_data_transfer;
+-    s32 E_msg_in;
+-    s32 E_initiator_abort;
+-    s32 E_other_transfer;
+-    s32 E_other_in;
+-    s32 E_other_out;
+-    s32 E_target_abort;
+-    s32 E_debug_break;	
+-    s32 E_reject_message;
+-    s32 E_respond_message;
+-    s32 E_select;
+-    s32 E_select_msgout;
+-    s32 E_test_0;
+-    s32 E_test_1;
+-    s32 E_test_2;
+-    s32 E_test_3;
+-    s32 E_dsa_zero;
+-    s32 E_cmdout_cmdout;
+-    s32 E_wait_reselect;
+-    s32 E_dsa_code_begin;
+-
+-    long long options;			/* Bitfielded set of options enabled */
+-    volatile u32 test_completed;	/* Test completed */
+-    int test_running;			/* Test currently running */
+-    s32 test_source
+-	__attribute__ ((aligned (4)));
+-    volatile s32 test_dest;
+-
+-    volatile int state;			/* state of driver, only used for 
+-					   OPTION_700 */
+-
+-    unsigned char  dmode;		/* 
+-					 * set to the address of the DMODE 
+-					 * register for this chip.
+-					 */
+-    unsigned char istat;		/* 
+-    	    	    	    	    	 * set to the address of the ISTAT 
+-    	    	    	    	    	 * register for this chip.
+-    	    	    	    	    	 */
+-  
+-    int scsi_clock;			/* 
+-					 * SCSI clock in HZ. 0 may be used 
+-					 * for unknown, although this will
+-					 * disable synchronous negotiation.
+-					 */
+-
+-    volatile int intrs;			/* Number of interrupts */
+-    volatile int resets;		/* Number of SCSI resets */
+-    unsigned char saved_dmode;	
+-    unsigned char saved_ctest4;
+-    unsigned char saved_ctest7;
+-    unsigned char saved_dcntl;
+-    unsigned char saved_scntl3;
+-
+-    unsigned char this_id_mask;
+-
+-    /* Debugger information */
+-    struct NCR53c7x0_break *breakpoints, /* Linked list of all break points */
+-	*breakpoint_current;		/* Current breakpoint being stepped 
+-					   through, NULL if we are running 
+-					   normally. */
+-#ifdef NCR_DEBUG
+-    int debug_size;			/* Size of debug buffer */
+-    volatile int debug_count;		/* Current data count */
+-    volatile char *debug_buf;		/* Output ring buffer */
+-    volatile char *debug_write;		/* Current write pointer */
+-    volatile char *debug_read;		/* Current read pointer */
+-#endif /* def NCR_DEBUG */
+-
+-    /* XXX - primitive debugging junk, remove when working ? */
+-    int debug_print_limit;		/* Number of commands to print
+-					   out exhaustive debugging
+-					   information for if 
+-					   OPTION_DEBUG_DUMP is set */ 
+-
+-    unsigned char debug_lun_limit[16];	/* If OPTION_DEBUG_TARGET_LIMIT
+-					   set, puke if commands are sent
+-					   to other target/lun combinations */
+-
+-    int debug_count_limit;		/* Number of commands to execute
+-					   before puking to limit debugging 
+-					   output */
+-				    
+-
+-    volatile unsigned idle:1;			/* set to 1 if idle */
+-
+-    /* 
+-     * Table of synchronous+wide transfer parameters set on a per-target
+-     * basis.
+-     */
+-    
+-    volatile struct NCR53c7x0_synchronous sync[16]
+-	__attribute__ ((aligned (4)));
+-
+-    volatile Scsi_Cmnd *issue_queue
+-	__attribute__ ((aligned (4)));
+-						/* waiting to be issued by
+-						   Linux driver */
+-    volatile struct NCR53c7x0_cmd *running_list;	
+-						/* commands running, maintained
+-						   by Linux driver */
+-
+-    volatile struct NCR53c7x0_cmd *ncrcurrent;	/* currently connected 
+-						   nexus, ONLY valid for
+-						   NCR53c700/NCR53c700-66
+-						 */
+-
+-    volatile struct NCR53c7x0_cmd *spare;	/* pointer to spare,
+-    	    	    	    	    	    	   allocated at probe time,
+-    	    	    	    	    	    	   which we can use for 
+-						   initialization */
+-    volatile struct NCR53c7x0_cmd *free;
+-    int max_cmd_size;				/* Maximum size of NCR53c7x0_cmd
+-					    	   based on number of 
+-						   scatter/gather segments, etc.
+-						   */
+-    volatile int num_cmds;			/* Number of commands 
+-						   allocated */
+-    volatile int extra_allocate;
+-    volatile unsigned char cmd_allocated[16];	/* Have we allocated commands
+-						   for this target yet?  If not,
+-						   do so ASAP */
+-    volatile unsigned char busy[16][8];     	/* number of commands 
+-						   executing on each target
+-    	    	    	    	    	    	 */
+-    /* 
+-     * Eventually, I'll switch to a coroutine for calling 
+-     * cmd->done(cmd), etc. so that we can overlap interrupt
+-     * processing with this code for maximum performance.
+-     */
+-    
+-    volatile struct NCR53c7x0_cmd *finished_queue;	
+-						
+-    /* Shared variables between SCRIPT and host driver */
+-    volatile u32 *schedule
+-	__attribute__ ((aligned (4)));		/* Array of JUMPs to dsa_begin
+-						   routines of various DSAs.  
+-						   When not in use, replace
+-						   with jump to next slot */
+-
+-
+-    volatile unsigned char msg_buf[16];		/* buffer for messages
+-						   other than the command
+-						   complete message */
+-
+-    /* Per-target default synchronous and WIDE messages */
+-    volatile unsigned char synchronous_want[16][5];
+-    volatile unsigned char wide_want[16][4];
+-
+-    /* Bit fielded set of targets we want to speak synchronously with */ 
+-    volatile u16 initiate_sdtr;	
+-    /* Bit fielded set of targets we want to speak wide with */
+-    volatile u16 initiate_wdtr;
+-    /* Bit fielded list of targets we've talked to. */
+-    volatile u16 talked_to;
+-
+-    /* Array of bit-fielded lun lists that we need to request_sense */
+-    volatile unsigned char request_sense[16];
+-
+-    u32 addr_reconnect_dsa_head
+-	__attribute__ ((aligned (4)));		/* RISCy style constant,
+-						   address of following */
+-    volatile u32 reconnect_dsa_head;	
+-    /* Data identifying nexus we are trying to match during reselection */
+-    volatile unsigned char reselected_identify; /* IDENTIFY message */
+-    volatile unsigned char reselected_tag;	/* second byte of queue tag 
+-						   message or 0 */
+-
+-    /* These were static variables before we moved them */
+-
+-    s32 NCR53c7xx_zero
+-	__attribute__ ((aligned (4)));
+-    s32 NCR53c7xx_sink;
+-    u32 NOP_insn;
+-    char NCR53c7xx_msg_reject;
+-    char NCR53c7xx_msg_abort;
+-    char NCR53c7xx_msg_nop;
+-
+-    /*
+-     * Following item introduced by RGH to support NCRc710, which is
+-     * VERY brain-dead when it come to memory moves
+-     */
+-
+-			  /* DSA save area used only by the NCR chip */
+-    volatile unsigned long saved2_dsa
+-	__attribute__ ((aligned (4)));
+-
+-    volatile unsigned long emulated_intfly
+-	__attribute__ ((aligned (4)));
+-
+-    volatile int event_size, event_index;
+-    volatile struct NCR53c7x0_event *events;
+-
+-    /* If we need to generate code to kill off the currently connected 
+-       command, this is where we do it. Should have a BMI instruction
+-       to source or sink the current data, followed by a JUMP
+-       to abort_connected */
+-
+-    u32 *abort_script;
+-
+-    int script_count;				/* Size of script in words */
+-    u32 script[0];				/* Relocated SCSI script */
+-
+-};
+-
+-#define SCSI_IRQ_NONE	255
+-#define DMA_NONE	255
+-#define IRQ_AUTO	254
+-#define DMA_AUTO	254
+-
+-#define BOARD_GENERIC	0
+-
+-#define NCR53c7x0_insn_size(insn)					\
+-    (((insn) & DCMD_TYPE_MASK) == DCMD_TYPE_MMI ? 3 : 2)
+-    
+-
+-#define NCR53c7x0_local_declare()					\
+-    volatile unsigned char *NCR53c7x0_address_memory;			\
+-    unsigned int NCR53c7x0_address_io;					\
+-    int NCR53c7x0_memory_mapped
+-
+-#define NCR53c7x0_local_setup(host)					\
+-    NCR53c7x0_address_memory = (void *) (host)->base;			\
+-    NCR53c7x0_address_io = (unsigned int) (host)->io_port;		\
+-    NCR53c7x0_memory_mapped = ((struct NCR53c7x0_hostdata *) 		\
+-	host->hostdata[0])-> options & OPTION_MEMORY_MAPPED 
+-
+-#ifdef BIG_ENDIAN
+-/* These could be more efficient, given that we are always memory mapped,
+- * but they don't give the same problems as the write macros, so leave
+- * them. */
+-#ifdef __mc68000__
+-#define NCR53c7x0_read8(address) 					\
+-    ((unsigned int)raw_inb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) )
+-
+-#define NCR53c7x0_read16(address) 					\
+-    ((unsigned int)raw_inw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)))
+-#else
+-#define NCR53c7x0_read8(address) 					\
+-    (NCR53c7x0_memory_mapped ? 						\
+-	(unsigned int)readb((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) :	\
+-	inb(NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_read16(address) 					\
+-    (NCR53c7x0_memory_mapped ? 						\
+-	(unsigned int)readw((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) :	\
+-	inw(NCR53c7x0_address_io + (address)))
+-#endif /* mc68000 */
+-#else
+-#define NCR53c7x0_read8(address) 					\
+-    (NCR53c7x0_memory_mapped ? 						\
+-	(unsigned int)readb((u32)NCR53c7x0_address_memory + (u32)(address)) :	\
+-	inb(NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_read16(address) 					\
+-    (NCR53c7x0_memory_mapped ? 						\
+-	(unsigned int)readw((u32)NCR53c7x0_address_memory + (u32)(address)) :	\
+-	inw(NCR53c7x0_address_io + (address)))
+-#endif
+-
+-#ifdef __mc68000__
+-#define NCR53c7x0_read32(address) 					\
+-    ((unsigned int) raw_inl((u32)NCR53c7x0_address_memory + (u32)(address)))
+-#else
+-#define NCR53c7x0_read32(address) 					\
+-    (NCR53c7x0_memory_mapped ? 						\
+-	(unsigned int) readl((u32)NCR53c7x0_address_memory + (u32)(address)) : 	\
+-	inl(NCR53c7x0_address_io + (address)))
+-#endif /* mc68000*/
+-
+-#ifdef BIG_ENDIAN
+-/* If we are big-endian, then we are not Intel, so probably don't have
+- * an i/o map as well as a memory map.  So, let's assume memory mapped.
+- * Also, I am having terrible problems trying to persuade the compiler
+- * not to lay down code which does a read after write for these macros.
+- * If you remove 'volatile' from writeb() and friends it is ok....
+- */
+-
+-#define NCR53c7x0_write8(address,value) 				\
+-	*(volatile unsigned char *)					\
+-		((u32)NCR53c7x0_address_memory + ((u32)(address)^3)) = (value)
+-
+-#define NCR53c7x0_write16(address,value) 				\
+-	*(volatile unsigned short *)					\
+-		((u32)NCR53c7x0_address_memory + ((u32)(address)^2)) = (value)
+-
+-#define NCR53c7x0_write32(address,value) 				\
+-	*(volatile unsigned long *)					\
+-		((u32)NCR53c7x0_address_memory + ((u32)(address))) = (value)
+-
+-#else
+-
+-#define NCR53c7x0_write8(address,value) 				\
+-    (NCR53c7x0_memory_mapped ? 						\
+-     ({writeb((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) :	\
+-	outb((value), NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_write16(address,value) 				\
+-    (NCR53c7x0_memory_mapped ? 						\
+-     ({writew((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) :	\
+-	outw((value), NCR53c7x0_address_io + (address)))
+-
+-#define NCR53c7x0_write32(address,value) 				\
+-    (NCR53c7x0_memory_mapped ? 						\
+-     ({writel((value), (u32)NCR53c7x0_address_memory + (u32)(address)); mb();}) :	\
+-	outl((value), NCR53c7x0_address_io + (address)))
+-
+-#endif
+-
+-/* Patch arbitrary 32 bit words in the script */
+-#define patch_abs_32(script, offset, symbol, value)			\
+-    	for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof 		\
+-    	    (u32)); ++i) {					\
+-	    (script)[A_##symbol##_used[i] - (offset)] += (value);	\
+-	    if (hostdata->options & OPTION_DEBUG_FIXUP) 		\
+-	      printk("scsi%d : %s reference %d at 0x%x in %s is now 0x%x\n",\
+-		host->host_no, #symbol, i, A_##symbol##_used[i] - 	\
+-		(int)(offset), #script, (script)[A_##symbol##_used[i] -	\
+-		(offset)]);						\
+-    	}
+-
+-/* Patch read/write instruction immediate field */
+-#define patch_abs_rwri_data(script, offset, symbol, value)		\
+-    	for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof 		\
+-    	    (u32)); ++i)					\
+-    	    (script)[A_##symbol##_used[i] - (offset)] =			\
+-	    	((script)[A_##symbol##_used[i] - (offset)] & 		\
+-	    	~DBC_RWRI_IMMEDIATE_MASK) | 				\
+-    	    	(((value) << DBC_RWRI_IMMEDIATE_SHIFT) &		\
+-		 DBC_RWRI_IMMEDIATE_MASK)
+-
+-/* Patch transfer control instruction data field */
+-#define patch_abs_tci_data(script, offset, symbol, value)	        \
+-    	for (i = 0; i < (sizeof (A_##symbol##_used) / sizeof 		\
+-    	    (u32)); ++i)					\
+-    	    (script)[A_##symbol##_used[i] - (offset)] =			\
+-	    	((script)[A_##symbol##_used[i] - (offset)] & 		\
+-	    	~DBC_TCI_DATA_MASK) | 					\
+-    	    	(((value) << DBC_TCI_DATA_SHIFT) &			\
+-		 DBC_TCI_DATA_MASK)
+-
+-/* Patch field in dsa structure (assignment should be +=?) */
+-#define patch_dsa_32(dsa, symbol, word, value)				\
+-	{								\
+-	(dsa)[(hostdata->##symbol - hostdata->dsa_start) / sizeof(u32)	\
+-	    + (word)] = (value);					\
+-	if (hostdata->options & OPTION_DEBUG_DSA)			\
+-	    printk("scsi : dsa %s symbol %s(%d) word %d now 0x%x\n",	\
+-		#dsa, #symbol, hostdata->##symbol, 			\
+-		(word), (u32) (value));					\
+-	}
+-
+-/* Paranoid people could use panic() here. */
+-#define FATAL(host) shutdown((host));
+-
+-extern int ncr53c7xx_init(struct scsi_host_template *tpnt, int board, int chip,
+-			  unsigned long base, int io_port, int irq, int dma,
+-			  long long options, int clock);
+-
+-#endif /* NCR53c710_C */
+-#endif /* NCR53c710_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx.scr linux-2.6.22-try2/drivers/scsi/53c7xx.scr
+--- linux-2.6.22-570/drivers/scsi/53c7xx.scr	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c7xx.scr	1969-12-31 19:00:00.000000000 -0500
+@@ -1,1591 +0,0 @@
+-#undef DEBUG
+-#undef EVENTS
+-#undef NO_SELECTION_TIMEOUT
+-#define BIG_ENDIAN
+-
+-; 53c710 driver.  Modified from Drew Eckhardts driver
+-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+-;
+-; I have left the script for the 53c8xx family in here, as it is likely
+-; to be useful to see what I changed when bug hunting.
+-
+-; NCR 53c810 driver, main script
+-; Sponsored by 
+-;	iX Multiuser Multitasking Magazine
+-;	hm@ix.de
+-;
+-; Copyright 1993, 1994, 1995 Drew Eckhardt
+-;      Visionary Computing 
+-;      (Unix and Linux consulting and custom programming)
+-;      drew@PoohSticks.ORG
+-;      +1 (303) 786-7975
+-;
+-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+-;
+-; PRE-ALPHA
+-;
+-; For more information, please consult 
+-;
+-; NCR 53C810
+-; PCI-SCSI I/O Processor
+-; Data Manual
+-;
+-; NCR 53C710 
+-; SCSI I/O Processor
+-; Programmers Guide
+-;
+-; NCR Microelectronics
+-; 1635 Aeroplaza Drive
+-; Colorado Springs, CO 80916
+-; 1+ (719) 578-3400
+-;
+-; Toll free literature number
+-; +1 (800) 334-5454
+-;
+-; IMPORTANT : This code is self modifying due to the limitations of 
+-;	the NCR53c7,8xx series chips.  Persons debugging this code with
+-;	the remote debugger should take this into account, and NOT set
+-;	breakpoints in modified instructions.
+-;
+-; Design:
+-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard 
+-; microcontroller using a simple instruction set.   
+-;
+-; So, to minimize the effects of interrupt latency, and to maximize 
+-; throughput, this driver offloads the practical maximum amount 
+-; of processing to the SCSI chip while still maintaining a common
+-; structure.
+-;
+-; Where tradeoffs were needed between efficiency on the older
+-; chips and the newer NCR53c800 series, the NCR53c800 series 
+-; was chosen.
+-;
+-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully
+-; automate SCSI transfers without host processor intervention, this 
+-; isn't the case with the NCR53c710 and newer chips which allow 
+-;
+-; - reads and writes to the internal registers from within the SCSI
+-; 	scripts, allowing the SCSI SCRIPTS(tm) code to save processor
+-; 	state so that multiple threads of execution are possible, and also
+-; 	provide an ALU for loop control, etc.
+-; 
+-; - table indirect addressing for some instructions. This allows 
+-;	pointers to be located relative to the DSA ((Data Structure
+-;	Address) register.
+-;
+-; These features make it possible to implement a mailbox style interface,
+-; where the same piece of code is run to handle I/O for multiple threads
+-; at once minimizing our need to relocate code.  Since the NCR53c700/
+-; NCR53c800 series have a unique combination of features, making a 
+-; a standard ingoing/outgoing mailbox system, costly, I've modified it.
+-;
+-; - Mailboxes are a mixture of code and data.  This lets us greatly
+-; 	simplify the NCR53c810 code and do things that would otherwise
+-;	not be possible.
+-;
+-; The saved data pointer is now implemented as follows :
+-;
+-; 	Control flow has been architected such that if control reaches
+-;	munge_save_data_pointer, on a restore pointers message or 
+-;	reconnection, a jump to the address formerly in the TEMP register
+-;	will allow the SCSI command to resume execution.
+-;
+-
+-;
+-; Note : the DSA structures must be aligned on 32 bit boundaries,
+-; since the source and destination of MOVE MEMORY instructions 
+-; must share the same alignment and this is the alignment of the
+-; NCR registers.
+-;
+-
+-; For some systems (MVME166, for example) dmode is always the same, so don't
+-; waste time writing it
+-
+-#if 1
+-#define DMODE_MEMORY_TO_NCR
+-#define DMODE_MEMORY_TO_MEMORY
+-#define DMODE_NCR_TO_MEMORY
+-#else
+-#define DMODE_MEMORY_TO_NCR    MOVE dmode_memory_to_ncr TO DMODE
+-#define DMODE_MEMORY_TO_MEMORY MOVE dmode_memory_to_memory TO DMODE
+-#define DMODE_NCR_TO_MEMORY    MOVE dmode_ncr_to_memory TO DMODE
+-#endif
+-
+-ABSOLUTE dsa_temp_lun = 0		; Patch to lun for current dsa
+-ABSOLUTE dsa_temp_next = 0		; Patch to dsa next for current dsa
+-ABSOLUTE dsa_temp_addr_next = 0		; Patch to address of dsa next address 
+-					; 	for current dsa
+-ABSOLUTE dsa_temp_sync = 0		; Patch to address of per-target
+-					;	sync routine
+-ABSOLUTE dsa_sscf_710 = 0		; Patch to address of per-target
+-					;	sscf value (53c710)
+-ABSOLUTE dsa_temp_target = 0		; Patch to id for current dsa
+-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command
+-					; 	saved data pointer
+-ABSOLUTE dsa_temp_addr_residual = 0	; Patch to address of per-command
+-					;	current residual code
+-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command
+-					; saved residual code
+-ABSOLUTE dsa_temp_addr_new_value = 0	; Address of value for JUMP operand
+-ABSOLUTE dsa_temp_addr_array_value = 0 	; Address to copy to
+-ABSOLUTE dsa_temp_addr_dsa_value = 0	; Address of this DSA value
+-
+-;
+-; Once a device has initiated reselection, we need to compare it 
+-; against the singly linked list of commands which have disconnected
+-; and are pending reselection.  These commands are maintained in 
+-; an unordered singly linked list of DSA structures, through the
+-; DSA pointers at their 'centers' headed by the reconnect_dsa_head
+-; pointer.
+-; 
+-; To avoid complications in removing commands from the list,
+-; I minimize the amount of expensive (at eight operations per
+-; addition @ 500-600ns each) pointer operations which must
+-; be done in the NCR driver by precomputing them on the 
+-; host processor during dsa structure generation.
+-;
+-; The fixed-up per DSA code knows how to recognize the nexus
+-; associated with the corresponding SCSI command, and modifies
+-; the source and destination pointers for the MOVE MEMORY 
+-; instruction which is executed when reselected_ok is called
+-; to remove the command from the list.  Similarly, DSA is 
+-; loaded with the address of the next DSA structure and
+-; reselected_check_next is called if a failure occurs.
+-;
+-; Perhaps more concisely, the net effect of the mess is 
+-;
+-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, 
+-;     src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) {
+-; 	src = &dsa->next;
+-; 	if (target_id == dsa->id && target_lun == dsa->lun) {
+-; 		*dest = *src;
+-; 		break;
+-;         }	
+-; }
+-;
+-; if (!dsa)
+-;           error (int_err_unexpected_reselect);
+-; else  
+-;     longjmp (dsa->jump_resume, 0);
+-;
+-; 	
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; Define DSA structure used for mailboxes
+-ENTRY dsa_code_template
+-dsa_code_template:
+-ENTRY dsa_code_begin
+-dsa_code_begin:
+-; RGH: Don't care about TEMP and DSA here
+-	DMODE_MEMORY_TO_NCR
+-	MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch
+-	DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+-	MOVE MEMORY 4, addr_scratch, saved_dsa
+-	; We are about to go and select the device, so must set SSCF bits
+-	MOVE MEMORY 4, dsa_sscf_710, addr_scratch
+-#ifdef BIG_ENDIAN
+-	MOVE SCRATCH3 TO SFBR
+-#else
+-	MOVE SCRATCH0 TO SFBR
+-#endif
+-	MOVE SFBR TO SBCL
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-#else
+-	CALL scratch_to_dsa
+-#endif
+-	CALL select
+-; Handle the phase mismatch which may have resulted from the 
+-; MOVE FROM dsa_msgout if we returned here.  The CLEAR ATN 
+-; may or may not be necessary, and we should update script_asm.pl
+-; to handle multiple pieces.
+-    CLEAR ATN
+-    CLEAR ACK
+-
+-; Replace second operand with address of JUMP instruction dest operand
+-; in schedule table for this DSA.  Becomes dsa_jump_dest in 53c7,8xx.c.
+-ENTRY dsa_code_fix_jump
+-dsa_code_fix_jump:
+-	MOVE MEMORY 4, NOP_insn, 0
+-	JUMP select_done
+-
+-; wrong_dsa loads the DSA register with the value of the dsa_next
+-; field.
+-;
+-wrong_dsa:
+-#if (CHIP == 710)
+-;                NOTE DSA is corrupt when we arrive here!
+-#endif
+-;		Patch the MOVE MEMORY INSTRUCTION such that 
+-;		the destination address is the address of the OLD 
+-;		next pointer.
+-;
+-	MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8
+-	DMODE_MEMORY_TO_NCR
+-;
+-; 	Move the _contents_ of the next pointer into the DSA register as 
+-;	the next I_T_L or I_T_L_Q tupple to check against the established
+-;	nexus.
+-;
+-	MOVE MEMORY 4, dsa_temp_next, addr_scratch
+-	DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+-	MOVE MEMORY 4, addr_scratch, saved_dsa
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-#else
+-	CALL scratch_to_dsa
+-#endif
+-	JUMP reselected_check_next
+-
+-ABSOLUTE dsa_save_data_pointer = 0
+-ENTRY dsa_code_save_data_pointer
+-dsa_code_save_data_pointer:
+-#if (CHIP == 710)
+-	; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt
+-	; We MUST return with DSA correct
+-    	MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+-        CLEAR ACK
+-#ifdef DEBUG
+-        INT int_debug_saved
+-#endif
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-	JUMP jump_temp
+-#else
+-    	DMODE_NCR_TO_MEMORY
+-    	MOVE MEMORY 4, addr_temp, dsa_temp_addr_saved_pointer
+-    	DMODE_MEMORY_TO_MEMORY
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+-        CLEAR ACK
+-#ifdef DEBUG
+-        INT int_debug_saved
+-#endif
+-    	RETURN
+-#endif
+-ABSOLUTE dsa_restore_pointers = 0
+-ENTRY dsa_code_restore_pointers
+-dsa_code_restore_pointers:
+-#if (CHIP == 710)
+-	; TEMP and DSA are corrupt when we get here, but who cares!
+-    	MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+-        CLEAR ACK
+-	; Restore DSA, note we don't care about TEMP
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-#ifdef DEBUG
+-        INT int_debug_restored
+-#endif
+-	JUMP jump_temp
+-#else
+-    	DMODE_MEMORY_TO_NCR
+-    	MOVE MEMORY 4, dsa_temp_addr_saved_pointer, addr_temp
+-    	DMODE_MEMORY_TO_MEMORY
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+-        CLEAR ACK
+-#ifdef DEBUG
+-        INT int_debug_restored
+-#endif
+-    	RETURN
+-#endif
+-
+-ABSOLUTE dsa_check_reselect = 0
+-; dsa_check_reselect determines whether or not the current target and
+-; lun match the current DSA
+-ENTRY dsa_code_check_reselect
+-dsa_code_check_reselect:
+-#if (CHIP == 710)
+-	/* Arrives here with DSA correct */
+-	/* Assumes we are always ID 7 */
+-	MOVE LCRC TO SFBR		; LCRC has our ID and his ID bits set
+-	JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80
+-#else
+-	MOVE SSID TO SFBR		; SSID contains 3 bit target ID
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+-	JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0xf8
+-#endif
+-;
+-; Hack - move to scratch first, since SFBR is not writeable
+-; 	via the CPU and hence a MOVE MEMORY instruction.
+-;
+-	DMODE_MEMORY_TO_NCR
+-	MOVE MEMORY 1, reselected_identify, addr_scratch
+-	DMODE_MEMORY_TO_MEMORY
+-#ifdef BIG_ENDIAN
+-	; BIG ENDIAN ON MVME16x
+-	MOVE SCRATCH3 TO SFBR
+-#else
+-	MOVE SCRATCH0 TO SFBR
+-#endif
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+-; Are you sure about that?  richard@sleepie.demon.co.uk
+-	JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8
+-;		Patch the MOVE MEMORY INSTRUCTION such that
+-;		the source address is the address of this dsa's
+-;		next pointer.
+-	MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4
+-	CALL reselected_ok
+-#if (CHIP == 710)
+-;	Restore DSA following memory moves in reselected_ok
+-;	dsa_temp_sync doesn't really care about DSA, but it has an
+-;	optional debug INT so a valid DSA is a good idea.
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-	CALL dsa_temp_sync	
+-; Release ACK on the IDENTIFY message _after_ we've set the synchronous 
+-; transfer parameters! 
+-	CLEAR ACK
+-; Implicitly restore pointers on reselection, so a RETURN
+-; will transfer control back to the right spot.
+-    	CALL REL (dsa_code_restore_pointers)
+-    	RETURN
+-ENTRY dsa_zero
+-dsa_zero:
+-ENTRY dsa_code_template_end
+-dsa_code_template_end:
+-
+-; Perform sanity check for dsa_fields_start == dsa_code_template_end - 
+-; dsa_zero, puke.
+-
+-ABSOLUTE dsa_fields_start =  0	; Sanity marker
+-				; 	pad 48 bytes (fix this RSN)
+-ABSOLUTE dsa_next = 48		; len 4 Next DSA
+- 				; del 4 Previous DSA address
+-ABSOLUTE dsa_cmnd = 56		; len 4 Scsi_Cmnd * for this thread.
+-ABSOLUTE dsa_select = 60	; len 4 Device ID, Period, Offset for 
+-			 	;	table indirect select
+-ABSOLUTE dsa_msgout = 64	; len 8 table indirect move parameter for 
+-				;       select message
+-ABSOLUTE dsa_cmdout = 72	; len 8 table indirect move parameter for 
+-				;	command
+-ABSOLUTE dsa_dataout = 80	; len 4 code pointer for dataout
+-ABSOLUTE dsa_datain = 84	; len 4 code pointer for datain
+-ABSOLUTE dsa_msgin = 88		; len 8 table indirect move for msgin
+-ABSOLUTE dsa_status = 96 	; len 8 table indirect move for status byte
+-ABSOLUTE dsa_msgout_other = 104	; len 8 table indirect for normal message out
+-				; (Synchronous transfer negotiation, etc).
+-ABSOLUTE dsa_end = 112
+-
+-ABSOLUTE schedule = 0 		; Array of JUMP dsa_begin or JUMP (next),
+-				; terminated by a call to JUMP wait_reselect
+-
+-; Linked lists of DSA structures
+-ABSOLUTE reconnect_dsa_head = 0	; Link list of DSAs which can reconnect
+-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing
+-				; address of reconnect_dsa_head
+-
+-; These select the source and destination of a MOVE MEMORY instruction
+-ABSOLUTE dmode_memory_to_memory = 0x0
+-ABSOLUTE dmode_memory_to_ncr = 0x0
+-ABSOLUTE dmode_ncr_to_memory = 0x0
+-
+-ABSOLUTE addr_scratch = 0x0
+-ABSOLUTE addr_temp = 0x0
+-#if (CHIP == 710)
+-ABSOLUTE saved_dsa = 0x0
+-ABSOLUTE emulfly = 0x0
+-ABSOLUTE addr_dsa = 0x0
+-#endif
+-#endif /* CHIP != 700 && CHIP != 70066 */
+-
+-; Interrupts - 
+-; MSB indicates type
+-; 0	handle error condition
+-; 1 	handle message 
+-; 2 	handle normal condition
+-; 3	debugging interrupt
+-; 4 	testing interrupt 
+-; Next byte indicates specific error
+-
+-; XXX not yet implemented, I'm not sure if I want to - 
+-; Next byte indicates the routine the error occurred in
+-; The LSB indicates the specific place the error occurred
+- 
+-ABSOLUTE int_err_unexpected_phase = 0x00000000	; Unexpected phase encountered
+-ABSOLUTE int_err_selected = 0x00010000		; SELECTED (nee RESELECTED)
+-ABSOLUTE int_err_unexpected_reselect = 0x00020000 
+-ABSOLUTE int_err_check_condition = 0x00030000	
+-ABSOLUTE int_err_no_phase = 0x00040000
+-ABSOLUTE int_msg_wdtr = 0x01000000		; WDTR message received
+-ABSOLUTE int_msg_sdtr = 0x01010000		; SDTR received
+-ABSOLUTE int_msg_1 = 0x01020000			; single byte special message
+-						; received
+-
+-ABSOLUTE int_norm_select_complete = 0x02000000	; Select complete, reprogram
+-						; registers.
+-ABSOLUTE int_norm_reselect_complete = 0x02010000	; Nexus established
+-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete
+-ABSOLUTE int_norm_disconnected = 0x02030000	; Disconnected 
+-ABSOLUTE int_norm_aborted =0x02040000		; Aborted *dsa
+-ABSOLUTE int_norm_reset = 0x02050000		; Generated BUS reset.
+-ABSOLUTE int_norm_emulateintfly = 0x02060000	; 53C710 Emulated intfly
+-ABSOLUTE int_debug_break = 0x03000000		; Break point
+-#ifdef DEBUG
+-ABSOLUTE int_debug_scheduled = 0x03010000	; new I/O scheduled 
+-ABSOLUTE int_debug_idle = 0x03020000		; scheduler is idle
+-ABSOLUTE int_debug_dsa_loaded = 0x03030000	; dsa reloaded
+-ABSOLUTE int_debug_reselected = 0x03040000	; NCR reselected
+-ABSOLUTE int_debug_head = 0x03050000		; issue head overwritten
+-ABSOLUTE int_debug_disconnected = 0x03060000	; disconnected
+-ABSOLUTE int_debug_disconnect_msg = 0x03070000	; got message to disconnect
+-ABSOLUTE int_debug_dsa_schedule = 0x03080000	; in dsa_schedule
+-ABSOLUTE int_debug_reselect_check = 0x03090000  ; Check for reselection of DSA
+-ABSOLUTE int_debug_reselected_ok = 0x030a0000 	; Reselection accepted
+-#endif
+-ABSOLUTE int_debug_panic = 0x030b0000		; Panic driver
+-#ifdef DEBUG
+-ABSOLUTE int_debug_saved = 0x030c0000 		; save/restore pointers
+-ABSOLUTE int_debug_restored = 0x030d0000
+-ABSOLUTE int_debug_sync = 0x030e0000		; Sanity check synchronous 
+-						; parameters. 
+-ABSOLUTE int_debug_datain = 0x030f0000		; going into data in phase 
+-						; now.
+-ABSOLUTE int_debug_check_dsa = 0x03100000	; Sanity check DSA against
+-						; SDID.
+-#endif
+-
+-ABSOLUTE int_test_1 = 0x04000000		; Test 1 complete
+-ABSOLUTE int_test_2 = 0x04010000		; Test 2 complete
+-ABSOLUTE int_test_3 = 0x04020000		; Test 3 complete
+-
+-
+-; These should start with 0x05000000, with low bits incrementing for 
+-; each one.
+-
+-#ifdef EVENTS
+-ABSOLUTE int_EVENT_SELECT = 0
+-ABSOLUTE int_EVENT_DISCONNECT = 0
+-ABSOLUTE int_EVENT_RESELECT = 0
+-ABSOLUTE int_EVENT_COMPLETE = 0
+-ABSOLUTE int_EVENT_IDLE = 0
+-ABSOLUTE int_EVENT_SELECT_FAILED = 0
+-ABSOLUTE int_EVENT_BEFORE_SELECT = 0
+-ABSOLUTE int_EVENT_RESELECT_FAILED = 0
+-#endif
+-						
+-ABSOLUTE NCR53c7xx_msg_abort = 0	; Pointer to abort message
+-ABSOLUTE NCR53c7xx_msg_reject = 0       ; Pointer to reject message
+-ABSOLUTE NCR53c7xx_zero	= 0		; long with zero in it, use for source
+-ABSOLUTE NCR53c7xx_sink = 0		; long to dump worthless data in
+-ABSOLUTE NOP_insn = 0			; NOP instruction
+-
+-; Pointer to message, potentially multi-byte
+-ABSOLUTE msg_buf = 0
+-
+-; Pointer to holding area for reselection information
+-ABSOLUTE reselected_identify = 0
+-ABSOLUTE reselected_tag = 0
+-
+-; Request sense command pointer, it's a 6 byte command, should
+-; be constant for all commands since we always want 16 bytes of 
+-; sense and we don't need to change any fields as we did under 
+-; SCSI-I when we actually cared about the LUN field.
+-;EXTERNAL NCR53c7xx_sense		; Request sense command
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; dsa_schedule  
+-; PURPOSE : after a DISCONNECT message has been received, and pointers
+-;	saved, insert the current DSA structure at the head of the 
+-; 	disconnected queue and fall through to the scheduler.
+-;
+-; CALLS : OK
+-;
+-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list
+-;	of disconnected commands
+-;
+-; MODIFIES : SCRATCH, reconnect_dsa_head
+-; 
+-; EXITS : always passes control to schedule
+-
+-ENTRY dsa_schedule
+-dsa_schedule:
+-#ifdef DEBUG
+-    INT int_debug_dsa_schedule
+-#endif
+-
+-;
+-; Calculate the address of the next pointer within the DSA 
+-; structure of the command that is currently disconnecting
+-;
+-#if (CHIP == 710)
+-    ; Read what should be the current DSA from memory - actual DSA
+-    ; register is probably corrupt
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+-    CALL dsa_to_scratch
+-#endif
+-    MOVE SCRATCH0 + dsa_next TO SCRATCH0
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-; Point the next field of this DSA structure at the current disconnected 
+-; list
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8
+-    DMODE_MEMORY_TO_MEMORY
+-dsa_schedule_insert:
+-    MOVE MEMORY 4, reconnect_dsa_head, 0 
+-
+-; And update the head pointer.
+-#if (CHIP == 710)
+-    ; Read what should be the current DSA from memory - actual DSA
+-    ; register is probably corrupt
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+-    CALL dsa_to_scratch
+-#endif
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, reconnect_dsa_head
+-    DMODE_MEMORY_TO_MEMORY
+-/* Temporarily, see what happens. */
+-#ifndef ORIGINAL
+-#if (CHIP != 710)
+-    MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+-    CLEAR ACK
+-#endif
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-    WAIT DISCONNECT
+-#ifdef EVENTS
+-    INT int_EVENT_DISCONNECT;
+-#endif
+-#ifdef DEBUG
+-    INT int_debug_disconnected
+-#endif
+-    JUMP schedule
+-#endif 
+-
+-;
+-; select
+-;
+-; PURPOSE : establish a nexus for the SCSI command referenced by DSA.
+-;	On success, the current DSA structure is removed from the issue 
+-;	queue.  Usually, this is entered as a fall-through from schedule,
+-;	although the contingent allegiance handling code will write
+-;	the select entry address to the DSP to restart a command as a 
+-;	REQUEST SENSE.  A message is sent (usually IDENTIFY, although
+-;	additional SDTR or WDTR messages may be sent).  COMMAND OUT
+-;	is handled.
+-;
+-; INPUTS : DSA - SCSI command, issue_dsa_head
+-;
+-; CALLS : NOT OK
+-;
+-; MODIFIES : SCRATCH, issue_dsa_head
+-;
+-; EXITS : on reselection or selection, go to select_failed
+-;	otherwise, RETURN so control is passed back to 
+-;	dsa_begin.
+-;
+-
+-ENTRY select
+-select:
+-
+-#ifdef EVENTS
+-    INT int_EVENT_BEFORE_SELECT
+-#endif
+-
+-#ifdef DEBUG
+-    INT int_debug_scheduled
+-#endif
+-    CLEAR TARGET
+-
+-; XXX
+-;
+-; In effect, SELECTION operations are backgrounded, with execution
+-; continuing until code which waits for REQ or a fatal interrupt is 
+-; encountered.
+-;
+-; So, for more performance, we could overlap the code which removes 
+-; the command from the NCRs issue queue with the selection, but 
+-; at this point I don't want to deal with the error recovery.
+-;
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-#if (CHIP == 710)
+-    ; Enable selection timer
+-#ifdef NO_SELECTION_TIMEOUT
+-    MOVE CTEST7 & 0xff TO CTEST7
+-#else
+-    MOVE CTEST7 & 0xef TO CTEST7
+-#endif
+-#endif
+-    SELECT ATN FROM dsa_select, select_failed
+-    JUMP select_msgout, WHEN MSG_OUT
+-ENTRY select_msgout
+-select_msgout:
+-#if (CHIP == 710)
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+-    MOVE FROM dsa_msgout, WHEN MSG_OUT
+-#else
+-ENTRY select_msgout
+-    SELECT ATN 0, select_failed
+-select_msgout:
+-    MOVE 0, 0, WHEN MSGOUT
+-#endif
+-
+-#ifdef EVENTS
+-   INT int_EVENT_SELECT
+-#endif
+-   RETURN
+-
+-; 
+-; select_done
+-; 
+-; PURPOSE: continue on to normal data transfer; called as the exit 
+-;	point from dsa_begin.
+-;
+-; INPUTS: dsa
+-;
+-; CALLS: OK
+-;
+-;
+-
+-select_done:
+-#if (CHIP == 710)
+-; NOTE DSA is corrupt when we arrive here!
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-
+-#ifdef DEBUG
+-ENTRY select_check_dsa
+-select_check_dsa:
+-    INT int_debug_check_dsa
+-#endif
+-
+-; After a successful selection, we should get either a CMD phase or 
+-; some transfer request negotiation message.
+-
+-    JUMP cmdout, WHEN CMD
+-    INT int_err_unexpected_phase, WHEN NOT MSG_IN 
+-
+-select_msg_in:
+-    CALL msg_in, WHEN MSG_IN
+-    JUMP select_msg_in, WHEN MSG_IN
+-
+-cmdout:
+-    INT int_err_unexpected_phase, WHEN NOT CMD
+-#if (CHIP == 700)
+-    INT int_norm_selected
+-#endif
+-ENTRY cmdout_cmdout
+-cmdout_cmdout:
+-#if (CHIP != 700) && (CHIP != 70066)
+-    MOVE FROM dsa_cmdout, WHEN CMD
+-#else
+-    MOVE 0, 0, WHEN CMD
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-;
+-; data_transfer  
+-; other_out
+-; other_in
+-; other_transfer
+-;
+-; PURPOSE : handle the main data transfer for a SCSI command in 
+-;	several parts.  In the first part, data_transfer, DATA_IN
+-;	and DATA_OUT phases are allowed, with the user provided
+-;	code (usually dynamically generated based on the scatter/gather
+-;	list associated with a SCSI command) called to handle these 
+-;	phases.
+-;
+-;	After control has passed to one of the user provided 
+-;	DATA_IN or DATA_OUT routines, back calls are made to 
+-;	other_transfer_in or other_transfer_out to handle non-DATA IN
+-;	and DATA OUT phases respectively, with the state of the active
+-;	data pointer being preserved in TEMP.
+-;
+-;	On completion, the user code passes control to other_transfer
+-;	which causes DATA_IN and DATA_OUT to result in unexpected_phase
+-;	interrupts so that data overruns may be trapped.
+-;
+-; INPUTS : DSA - SCSI command
+-;
+-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in
+-;	other_transfer
+-;
+-; MODIFIES : SCRATCH
+-;
+-; EXITS : if STATUS IN is detected, signifying command completion,
+-;	the NCR jumps to command_complete.  If MSG IN occurs, a 
+-;	CALL is made to msg_in.  Otherwise, other_transfer runs in 
+-;	an infinite loop.
+-;	
+-
+-ENTRY data_transfer
+-data_transfer:
+-    JUMP cmdout_cmdout, WHEN CMD
+-    CALL msg_in, WHEN MSG_IN
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-    JUMP do_dataout, WHEN DATA_OUT
+-    JUMP do_datain, WHEN DATA_IN
+-    JUMP command_complete, WHEN STATUS
+-    JUMP data_transfer
+-ENTRY end_data_transfer
+-end_data_transfer:
+-
+-;
+-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain 
+-; should be fixed up whenever the nexus changes so it can point to the 
+-; correct routine for that command.
+-;
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-; Nasty jump to dsa->dataout
+-do_dataout:
+-#if (CHIP == 710)
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+-    CALL dsa_to_scratch
+-#endif
+-    MOVE SCRATCH0 + dsa_dataout TO SCRATCH0	
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY 
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY 
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY 
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4
+-    DMODE_MEMORY_TO_MEMORY
+-dataout_to_jump:
+-    MOVE MEMORY 4, 0, dataout_jump + 4 
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-dataout_jump:
+-    JUMP 0
+-
+-; Nasty jump to dsa->dsain
+-do_datain:
+-#if (CHIP == 710)
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-#else
+-    CALL dsa_to_scratch
+-#endif
+-    MOVE SCRATCH0 + dsa_datain TO SCRATCH0	
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY 
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY 
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY 
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, datain_to_jump + 4
+-    DMODE_MEMORY_TO_MEMORY
+-ENTRY datain_to_jump
+-datain_to_jump:
+-    MOVE MEMORY 4, 0, datain_jump + 4
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-#ifdef DEBUG
+-    INT int_debug_datain
+-#endif
+-datain_jump:
+-    JUMP 0
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-
+-; Note that other_out and other_in loop until a non-data phase
+-; is discovered, so we only execute return statements when we
+-; can go on to the next data phase block move statement.
+-
+-ENTRY other_out
+-other_out:
+-#if 0
+-    INT 0x03ffdead
+-#endif
+-    INT int_err_unexpected_phase, WHEN CMD
+-    JUMP msg_in_restart, WHEN MSG_IN 
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-    INT int_err_unexpected_phase, WHEN DATA_IN
+-    JUMP command_complete, WHEN STATUS
+-    JUMP other_out, WHEN NOT DATA_OUT
+-#if (CHIP == 710)
+-; TEMP should be OK, as we got here from a call in the user dataout code.
+-#endif
+-    RETURN
+-
+-ENTRY other_in
+-other_in:
+-#if 0
+-    INT 0x03ffdead
+-#endif
+-    INT int_err_unexpected_phase, WHEN CMD
+-    JUMP msg_in_restart, WHEN MSG_IN 
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-    INT int_err_unexpected_phase, WHEN DATA_OUT
+-    JUMP command_complete, WHEN STATUS
+-    JUMP other_in, WHEN NOT DATA_IN
+-#if (CHIP == 710)
+-; TEMP should be OK, as we got here from a call in the user datain code.
+-#endif
+-    RETURN
+-
+-
+-ENTRY other_transfer
+-other_transfer:
+-    INT int_err_unexpected_phase, WHEN CMD
+-    CALL msg_in, WHEN MSG_IN
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-    INT int_err_unexpected_phase, WHEN DATA_OUT
+-    INT int_err_unexpected_phase, WHEN DATA_IN
+-    JUMP command_complete, WHEN STATUS
+-    JUMP other_transfer
+-
+-;
+-; msg_in_restart
+-; msg_in
+-; munge_msg
+-;
+-; PURPOSE : process messages from a target.  msg_in is called when the 
+-;	caller hasn't read the first byte of the message.  munge_message
+-;	is called when the caller has read the first byte of the message,
+-;	and left it in SFBR.  msg_in_restart is called when the caller 
+-;	hasn't read the first byte of the message, and wishes RETURN
+-;	to transfer control back to the address of the conditional
+-;	CALL instruction rather than to the instruction after it.
+-;
+-;	Various int_* interrupts are generated when the host system
+-;	needs to intervene, as is the case with SDTR, WDTR, and
+-;	INITIATE RECOVERY messages.
+-;
+-;	When the host system handles one of these interrupts,
+-;	it can respond by reentering at reject_message, 
+-;	which rejects the message and returns control to
+-;	the caller of msg_in or munge_msg, accept_message
+-;	which clears ACK and returns control, or reply_message
+-;	which sends the message pointed to by the DSA 
+-;	msgout_other table indirect field.
+-;
+-;	DISCONNECT messages are handled by moving the command
+-;	to the reconnect_dsa_queue.
+-#if (CHIP == 710)
+-; NOTE: DSA should be valid when we get here - we cannot save both it
+-;	and TEMP in this routine.
+-#endif
+-;
+-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg
+-;	only)
+-;
+-; CALLS : NO.  The TEMP register isn't backed up to allow nested calls.
+-;
+-; MODIFIES : SCRATCH, DSA on DISCONNECT
+-;
+-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS,
+-;	and normal return from message handlers running under
+-;	Linux, control is returned to the caller.  Receipt
+-;	of DISCONNECT messages pass control to dsa_schedule.
+-;
+-ENTRY msg_in_restart
+-msg_in_restart:
+-; XXX - hackish
+-;
+-; Since it's easier to debug changes to the statically 
+-; compiled code, rather than the dynamically generated 
+-; stuff, such as
+-;
+-; 	MOVE x, y, WHEN data_phase
+-; 	CALL other_z, WHEN NOT data_phase
+-; 	MOVE x, y, WHEN data_phase
+-;
+-; I'd like to have certain routines (notably the message handler)
+-; restart on the conditional call rather than the next instruction.
+-;
+-; So, subtract 8 from the return address
+-
+-    MOVE TEMP0 + 0xf8 TO TEMP0
+-    MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY
+-    MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY
+-    MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY
+-
+-ENTRY msg_in
+-msg_in:
+-    MOVE 1, msg_buf, WHEN MSG_IN
+-
+-munge_msg:
+-    JUMP munge_extended, IF 0x01		; EXTENDED MESSAGE
+-    JUMP munge_2, IF 0x20, AND MASK 0xdf	; two byte message
+-;
+-; XXX - I've seen a handful of broken SCSI devices which fail to issue
+-; 	a SAVE POINTERS message before disconnecting in the middle of 
+-; 	a transfer, assuming that the DATA POINTER will be implicitly 
+-; 	restored.  
+-;
+-; Historically, I've often done an implicit save when the DISCONNECT
+-; message is processed.  We may want to consider having the option of 
+-; doing that here. 
+-;
+-    JUMP munge_save_data_pointer, IF 0x02	; SAVE DATA POINTER
+-    JUMP munge_restore_pointers, IF 0x03	; RESTORE POINTERS 
+-    JUMP munge_disconnect, IF 0x04		; DISCONNECT
+-    INT int_msg_1, IF 0x07			; MESSAGE REJECT
+-    INT int_msg_1, IF 0x0f			; INITIATE RECOVERY
+-#ifdef EVENTS 
+-    INT int_EVENT_SELECT_FAILED 
+-#endif
+-    JUMP reject_message
+-
+-munge_2:
+-    JUMP reject_message
+-;
+-; The SCSI standard allows targets to recover from transient 
+-; error conditions by backing up the data pointer with a 
+-; RESTORE POINTERS message.  
+-;	
+-; So, we must save and restore the _residual_ code as well as 
+-; the current instruction pointer.  Because of this messiness,
+-; it is simpler to put dynamic code in the dsa for this and to
+-; just do a simple jump down there. 
+-;
+-
+-munge_save_data_pointer:
+-#if (CHIP == 710)
+-    ; We have something in TEMP here, so first we must save that
+-    MOVE TEMP0 TO SFBR
+-    MOVE SFBR TO SCRATCH0
+-    MOVE TEMP1 TO SFBR
+-    MOVE SFBR TO SCRATCH1
+-    MOVE TEMP2 TO SFBR
+-    MOVE SFBR TO SCRATCH2
+-    MOVE TEMP3 TO SFBR
+-    MOVE SFBR TO SCRATCH3
+-    MOVE MEMORY 4, addr_scratch, jump_temp + 4
+-    ; Now restore DSA
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-    MOVE DSA0 + dsa_save_data_pointer TO SFBR
+-    MOVE SFBR TO SCRATCH0
+-    MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-    MOVE SFBR TO SCRATCH1
+-    MOVE DSA2 + 0xff TO SFBR WITH CARRY 
+-    MOVE SFBR TO SCRATCH2
+-    MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-    MOVE SFBR TO SCRATCH3
+-
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4
+-    DMODE_MEMORY_TO_MEMORY
+-jump_dsa_save:
+-    JUMP 0
+-
+-munge_restore_pointers:
+-#if (CHIP == 710)
+-    ; The code at dsa_restore_pointers will RETURN, but we don't care
+-    ; about TEMP here, as it will overwrite it anyway.
+-#endif
+-    MOVE DSA0 + dsa_restore_pointers TO SFBR
+-    MOVE SFBR TO SCRATCH0
+-    MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-    MOVE SFBR TO SCRATCH1
+-    MOVE DSA2 + 0xff TO SFBR WITH CARRY
+-    MOVE SFBR TO SCRATCH2
+-    MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-    MOVE SFBR TO SCRATCH3
+-
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4
+-    DMODE_MEMORY_TO_MEMORY
+-jump_dsa_restore:
+-    JUMP 0
+-
+-
+-munge_disconnect:
+-#ifdef DEBUG
+-    INT int_debug_disconnect_msg
+-#endif
+-
+-/* 
+- * Before, we overlapped processing with waiting for disconnect, but
+- * debugging was beginning to appear messy.  Temporarily move things
+- * to just before the WAIT DISCONNECT.
+- */
+- 
+-#ifdef ORIGINAL
+-#if (CHIP == 710)
+-; Following clears Unexpected Disconnect bit.  What do we do?
+-#else
+-    MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+-    CLEAR ACK
+-#endif
+-
+-#if (CHIP != 700) && (CHIP != 70066)
+-    JUMP dsa_schedule
+-#else
+-    WAIT DISCONNECT
+-    INT int_norm_disconnected
+-#endif
+-
+-munge_extended:
+-    CLEAR ACK
+-    INT int_err_unexpected_phase, WHEN NOT MSG_IN
+-    MOVE 1, msg_buf + 1, WHEN MSG_IN
+-    JUMP munge_extended_2, IF 0x02
+-    JUMP munge_extended_3, IF 0x03 
+-    JUMP reject_message
+-
+-munge_extended_2:
+-    CLEAR ACK
+-    MOVE 1, msg_buf + 2, WHEN MSG_IN
+-    JUMP reject_message, IF NOT 0x02	; Must be WDTR
+-    CLEAR ACK
+-    MOVE 1, msg_buf + 3, WHEN MSG_IN
+-    INT int_msg_wdtr
+-
+-munge_extended_3:
+-    CLEAR ACK
+-    MOVE 1, msg_buf + 2, WHEN MSG_IN
+-    JUMP reject_message, IF NOT 0x01	; Must be SDTR
+-    CLEAR ACK
+-    MOVE 2, msg_buf + 3, WHEN MSG_IN
+-    INT int_msg_sdtr
+-
+-ENTRY reject_message
+-reject_message:
+-    SET ATN
+-    CLEAR ACK
+-    MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT
+-    RETURN
+-
+-ENTRY accept_message
+-accept_message:
+-    CLEAR ATN
+-    CLEAR ACK
+-    RETURN
+-
+-ENTRY respond_message
+-respond_message:
+-    SET ATN
+-    CLEAR ACK
+-    MOVE FROM dsa_msgout_other, WHEN MSG_OUT
+-    RETURN
+-
+-;
+-; command_complete
+-;
+-; PURPOSE : handle command termination when STATUS IN is detected by reading
+-;	a status byte followed by a command termination message. 
+-;
+-;	Normal termination results in an INTFLY instruction, and 
+-;	the host system can pick out which command terminated by 
+-;	examining the MESSAGE and STATUS buffers of all currently 
+-;	executing commands;
+-;
+-;	Abnormal (CHECK_CONDITION) termination results in an
+-;	int_err_check_condition interrupt so that a REQUEST SENSE
+-;	command can be issued out-of-order so that no other command
+-;	clears the contingent allegiance condition.
+-;	
+-;
+-; INPUTS : DSA - command	
+-;
+-; CALLS : OK
+-;
+-; EXITS : On successful termination, control is passed to schedule.
+-;	On abnormal termination, the user will usually modify the 
+-;	DSA fields and corresponding buffers and return control
+-;	to select.
+-;
+-
+-ENTRY command_complete
+-command_complete:
+-    MOVE FROM dsa_status, WHEN STATUS
+-#if (CHIP != 700) && (CHIP != 70066)
+-    MOVE SFBR TO SCRATCH0		; Save status
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-ENTRY command_complete_msgin
+-command_complete_msgin:
+-    MOVE FROM dsa_msgin, WHEN MSG_IN
+-; Indicate that we should be expecting a disconnect
+-#if (CHIP != 710)
+-    MOVE SCNTL2 & 0x7f TO SCNTL2
+-#else
+-    ; Above code cleared the Unexpected Disconnect bit, what do we do?
+-#endif
+-    CLEAR ACK
+-#if (CHIP != 700) && (CHIP != 70066)
+-    WAIT DISCONNECT
+-
+-;
+-; The SCSI specification states that when a UNIT ATTENTION condition
+-; is pending, as indicated by a CHECK CONDITION status message,
+-; the target shall revert to asynchronous transfers.  Since
+-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET 
+-; basis, and returning control to our scheduler could work on a command
+-; running on another lun on that target using the old parameters, we must
+-; interrupt the host processor to get them changed, or change them ourselves.
+-;
+-; Once SCSI-II tagged queueing is implemented, things will be even more
+-; hairy, since contingent allegiance conditions exist on a per-target/lun
+-; basis, and issuing a new command with a different tag would clear it.
+-; In these cases, we must interrupt the host processor to get a request 
+-; added to the HEAD of the queue with the request sense command, or we
+-; must automatically issue the request sense command.
+-
+-#if 0
+-    MOVE SCRATCH0 TO SFBR			
+-    JUMP command_failed, IF 0x02
+-#endif
+-#if (CHIP == 710)
+-#if defined(MVME16x_INTFLY)
+-; For MVME16x (ie CHIP=710) we will force an INTFLY by triggering a software
+-; interrupt (SW7).  We can use SCRATCH, as we are about to jump to
+-; schedule, which corrupts it anyway.  Will probably remove this later,
+-; but want to check performance effects first.
+-
+-#define INTFLY_ADDR     0xfff40070
+-
+-    MOVE 0 TO SCRATCH0
+-    MOVE 0x80 TO SCRATCH1
+-    MOVE 0 TO SCRATCH2
+-    MOVE 0 TO SCRATCH3
+-    MOVE MEMORY 4, addr_scratch, INTFLY_ADDR
+-#else
+-    INT int_norm_emulateintfly
+-#endif
+-#else
+-    INTFLY
+-#endif
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-#ifdef EVENTS
+-    INT int_EVENT_COMPLETE
+-#endif
+-#if (CHIP != 700) && (CHIP != 70066)
+-    JUMP schedule
+-command_failed:
+-    INT int_err_check_condition
+-#else
+-    INT int_norm_command_complete
+-#endif
+-
+-;
+-; wait_reselect
+-;
+-; PURPOSE : This is essentially the idle routine, where control lands
+-;	when there are no new processes to schedule.  wait_reselect
+-;	waits for reselection, selection, and new commands.
+-;
+-;	When a successful reselection occurs, with the aid 
+-;	of fixed up code in each DSA, wait_reselect walks the 
+-;	reconnect_dsa_queue, asking each dsa if the target ID
+-;	and LUN match its.
+-;
+-;	If a match is found, a call is made back to reselected_ok,
+-;	which through the miracles of self modifying code, extracts
+-;	the found DSA from the reconnect_dsa_queue and then 
+-;	returns control to the DSAs thread of execution.
+-;
+-; INPUTS : NONE
+-;
+-; CALLS : OK
+-;
+-; MODIFIES : DSA,
+-;
+-; EXITS : On successful reselection, control is returned to the 
+-;	DSA which called reselected_ok.  If the WAIT RESELECT
+-;	was interrupted by a new commands arrival signaled by 
+-;	SIG_P, control is passed to schedule.  If the NCR is 
+-;	selected, the host system is interrupted with an 
+-;	int_err_selected which is usually responded to by
+-;	setting DSP to the target_abort address.
+-
+-ENTRY wait_reselect
+-wait_reselect:
+-#ifdef EVENTS
+-    int int_EVENT_IDLE
+-#endif
+-#ifdef DEBUG
+-    int int_debug_idle
+-#endif
+-    WAIT RESELECT wait_reselect_failed
+-
+-reselected:
+-#ifdef EVENTS
+-    int int_EVENT_RESELECT
+-#endif
+-    CLEAR TARGET
+-    DMODE_MEMORY_TO_MEMORY
+-    ; Read all data needed to reestablish the nexus - 
+-    MOVE 1, reselected_identify, WHEN MSG_IN
+-    ; We used to CLEAR ACK here.
+-#if (CHIP != 700) && (CHIP != 70066)
+-#ifdef DEBUG
+-    int int_debug_reselected
+-#endif
+-
+-    ; Point DSA at the current head of the disconnected queue.
+-    DMODE_MEMORY_TO_NCR
+-    MOVE MEMORY 4, reconnect_dsa_head, addr_scratch
+-    DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+-    MOVE MEMORY 4, addr_scratch, saved_dsa
+-#else
+-    CALL scratch_to_dsa
+-#endif
+-
+-    ; Fix the update-next pointer so that the reconnect_dsa_head
+-    ; pointer is the one that will be updated if this DSA is a hit 
+-    ; and we remove it from the queue.
+-
+-    MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-
+-ENTRY reselected_check_next
+-reselected_check_next:
+-#ifdef DEBUG
+-    INT int_debug_reselect_check
+-#endif
+-    ; Check for a NULL pointer.
+-    MOVE DSA0 TO SFBR
+-    JUMP reselected_not_end, IF NOT 0
+-    MOVE DSA1 TO SFBR
+-    JUMP reselected_not_end, IF NOT 0
+-    MOVE DSA2 TO SFBR
+-    JUMP reselected_not_end, IF NOT 0
+-    MOVE DSA3 TO SFBR
+-    JUMP reselected_not_end, IF NOT 0
+-    INT int_err_unexpected_reselect
+-
+-reselected_not_end:
+-    ;
+-    ; XXX the ALU is only eight bits wide, and the assembler
+-    ; wont do the dirt work for us.  As long as dsa_check_reselect
+-    ; is negative, we need to sign extend with 1 bits to the full
+-    ; 32 bit width of the address.
+-    ;
+-    ; A potential work around would be to have a known alignment 
+-    ; of the DSA structure such that the base address plus 
+-    ; dsa_check_reselect doesn't require carrying from bytes 
+-    ; higher than the LSB.
+-    ;
+-
+-    MOVE DSA0 TO SFBR
+-    MOVE SFBR + dsa_check_reselect TO SCRATCH0
+-    MOVE DSA1 TO SFBR
+-    MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY
+-    MOVE DSA2 TO SFBR
+-    MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY
+-    MOVE DSA3 TO SFBR
+-    MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY
+-
+-    DMODE_NCR_TO_MEMORY
+-    MOVE MEMORY 4, addr_scratch, reselected_check + 4
+-    DMODE_MEMORY_TO_MEMORY
+-#if (CHIP == 710)
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-#endif
+-reselected_check:
+-    JUMP 0
+-
+-
+-;
+-;
+-#if (CHIP == 710)
+-; We have problems here - the memory move corrupts TEMP and DSA.  This
+-; routine is called from DSA code, and patched from many places.  Scratch
+-; is probably free when it is called.
+-; We have to:
+-;   copy temp to scratch, one byte at a time
+-;   write scratch to patch a jump in place of the return
+-;   do the move memory
+-;   jump to the patched in return address
+-; DSA is corrupt when we get here, and can be left corrupt
+-
+-ENTRY reselected_ok
+-reselected_ok:
+-    MOVE TEMP0 TO SFBR
+-    MOVE SFBR TO SCRATCH0
+-    MOVE TEMP1 TO SFBR
+-    MOVE SFBR TO SCRATCH1
+-    MOVE TEMP2 TO SFBR
+-    MOVE SFBR TO SCRATCH2
+-    MOVE TEMP3 TO SFBR
+-    MOVE SFBR TO SCRATCH3
+-    MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4
+-reselected_ok_patch:
+-    MOVE MEMORY 4, 0, 0
+-reselected_ok_jump:
+-    JUMP 0
+-#else
+-ENTRY reselected_ok
+-reselected_ok:
+-reselected_ok_patch:
+-    MOVE MEMORY 4, 0, 0				; Patched : first word
+-						; 	is address of 
+-						;       successful dsa_next
+-						; Second word is last 
+-						;	unsuccessful dsa_next,
+-						;	starting with 
+-						;       dsa_reconnect_head
+-    ; We used to CLEAR ACK here.
+-#ifdef DEBUG
+-    INT int_debug_reselected_ok
+-#endif
+-#ifdef DEBUG
+-    INT int_debug_check_dsa
+-#endif
+-    RETURN					; Return control to where
+-#endif
+-#else
+-    INT int_norm_reselected
+-#endif /* (CHIP != 700) && (CHIP != 70066) */
+-
+-selected:
+-    INT int_err_selected;
+-
+-;
+-; A select or reselect failure can be caused by one of two conditions : 
+-; 1.  SIG_P was set.  This will be the case if the user has written
+-;	a new value to a previously NULL head of the issue queue.
+-;
+-; 2.  The NCR53c810 was selected or reselected by another device.
+-;
+-; 3.  The bus was already busy since we were selected or reselected
+-;	before starting the command.
+-
+-wait_reselect_failed:
+-#ifdef EVENTS 
+-	INT int_EVENT_RESELECT_FAILED
+-#endif
+-; Check selected bit.  
+-#if (CHIP == 710)
+-    ; Must work out how to tell if we are selected....
+-#else
+-    MOVE SIST0 & 0x20 TO SFBR
+-    JUMP selected, IF 0x20
+-#endif
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+-    MOVE CTEST2 & 0x40 TO SFBR	
+-    JUMP schedule, IF 0x40
+-; Check connected bit.  
+-; FIXME: this needs to change if we support target mode
+-    MOVE ISTAT & 0x08 TO SFBR
+-    JUMP reselected, IF 0x08
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-#if 0
+-    JUMP schedule
+-#else
+-    INT int_debug_panic
+-#endif
+-
+-
+-select_failed:
+-#if (CHIP == 710)
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+-#ifdef EVENTS
+-  int int_EVENT_SELECT_FAILED
+-#endif
+-; Otherwise, mask the selected and reselected bits off SIST0
+-#if (CHIP ==710)
+-    ; Let's assume we don't get selected for now
+-    MOVE SSTAT0 & 0x10 TO SFBR
+-#else
+-    MOVE SIST0 & 0x30 TO SFBR
+-    JUMP selected, IF 0x20
+-#endif
+-    JUMP reselected, IF 0x10 
+-; If SIGP is set, the user just gave us another command, and
+-; we should restart or return to the scheduler.
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+-    MOVE CTEST2 & 0x40 TO SFBR	
+-    JUMP select, IF 0x40
+-; Check connected bit.  
+-; FIXME: this needs to change if we support target mode
+-; FIXME: is this really necessary? 
+-    MOVE ISTAT & 0x08 TO SFBR
+-    JUMP reselected, IF 0x08
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-#if 0
+-    JUMP schedule
+-#else
+-    INT int_debug_panic
+-#endif
+-
+-;
+-; test_1
+-; test_2
+-;
+-; PURPOSE : run some verification tests on the NCR.  test_1
+-;	copies test_src to test_dest and interrupts the host
+-;	processor, testing for cache coherency and interrupt
+-; 	problems in the processes.
+-;
+-;	test_2 runs a command with offsets relative to the 
+-;	DSA on entry, and is useful for miscellaneous experimentation.
+-;
+-
+-; Verify that interrupts are working correctly and that we don't 
+-; have a cache invalidation problem.
+-
+-ABSOLUTE test_src = 0, test_dest = 0
+-ENTRY test_1
+-test_1:
+-    MOVE MEMORY 4, test_src, test_dest
+-    INT int_test_1
+-
+-;
+-; Run arbitrary commands, with test code establishing a DSA
+-;
+- 
+-ENTRY test_2
+-test_2:
+-    CLEAR TARGET
+-#if (CHIP == 710)
+-    ; Enable selection timer
+-#ifdef NO_SELECTION_TIMEOUT
+-    MOVE CTEST7 & 0xff TO CTEST7
+-#else
+-    MOVE CTEST7 & 0xef TO CTEST7
+-#endif
+-#endif
+-    SELECT ATN FROM 0, test_2_fail
+-    JUMP test_2_msgout, WHEN MSG_OUT
+-ENTRY test_2_msgout
+-test_2_msgout:
+-#if (CHIP == 710)
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+-    MOVE FROM 8, WHEN MSG_OUT
+-    MOVE FROM 16, WHEN CMD 
+-    MOVE FROM 24, WHEN DATA_IN
+-    MOVE FROM 32, WHEN STATUS
+-    MOVE FROM 40, WHEN MSG_IN
+-#if (CHIP != 710)
+-    MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+-    CLEAR ACK
+-    WAIT DISCONNECT
+-test_2_fail:
+-#if (CHIP == 710)
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-#endif
+-    INT int_test_2
+-
+-ENTRY debug_break
+-debug_break:
+-    INT int_debug_break
+-
+-;
+-; initiator_abort
+-; target_abort
+-;
+-; PURPOSE : Abort the currently established nexus from with initiator
+-;	or target mode.
+-;
+-;  
+-
+-ENTRY target_abort
+-target_abort:
+-    SET TARGET
+-    DISCONNECT
+-    CLEAR TARGET
+-    JUMP schedule
+-    
+-ENTRY initiator_abort
+-initiator_abort:
+-    SET ATN
+-;
+-; The SCSI-I specification says that targets may go into MSG out at 
+-; their leisure upon receipt of the ATN single.  On all versions of the 
+-; specification, we can't change phases until REQ transitions true->false, 
+-; so we need to sink/source one byte of data to allow the transition.
+-;
+-; For the sake of safety, we'll only source one byte of data in all 
+-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an  
+-; arbitrary number of bytes.
+-    JUMP spew_cmd, WHEN CMD
+-    JUMP eat_msgin, WHEN MSG_IN
+-    JUMP eat_datain, WHEN DATA_IN
+-    JUMP eat_status, WHEN STATUS
+-    JUMP spew_dataout, WHEN DATA_OUT
+-    JUMP sated
+-spew_cmd:
+-    MOVE 1, NCR53c7xx_zero, WHEN CMD
+-    JUMP sated
+-eat_msgin:
+-    MOVE 1, NCR53c7xx_sink, WHEN MSG_IN
+-    JUMP eat_msgin, WHEN MSG_IN
+-    JUMP sated
+-eat_status:
+-    MOVE 1, NCR53c7xx_sink, WHEN STATUS
+-    JUMP eat_status, WHEN STATUS
+-    JUMP sated
+-eat_datain:
+-    MOVE 1, NCR53c7xx_sink, WHEN DATA_IN
+-    JUMP eat_datain, WHEN DATA_IN
+-    JUMP sated
+-spew_dataout:
+-    MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT
+-sated:
+-#if (CHIP != 710)
+-    MOVE SCNTL2 & 0x7f TO SCNTL2
+-#endif
+-    MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT
+-    WAIT DISCONNECT
+-    INT int_norm_aborted
+-
+-#if (CHIP != 710)
+-;
+-; dsa_to_scratch
+-; scratch_to_dsa
+-;
+-; PURPOSE :
+-; 	The NCR chips cannot do a move memory instruction with the DSA register 
+-; 	as the source or destination.  So, we provide a couple of subroutines
+-; 	that let us switch between the DSA register and scratch register.
+-;
+-; 	Memory moves to/from the DSPS  register also don't work, but we 
+-; 	don't use them.
+-;
+-;
+-
+- 
+-dsa_to_scratch:
+-    MOVE DSA0 TO SFBR
+-    MOVE SFBR TO SCRATCH0
+-    MOVE DSA1 TO SFBR
+-    MOVE SFBR TO SCRATCH1
+-    MOVE DSA2 TO SFBR
+-    MOVE SFBR TO SCRATCH2
+-    MOVE DSA3 TO SFBR
+-    MOVE SFBR TO SCRATCH3
+-    RETURN
+-
+-scratch_to_dsa:
+-    MOVE SCRATCH0 TO SFBR
+-    MOVE SFBR TO DSA0
+-    MOVE SCRATCH1 TO SFBR
+-    MOVE SFBR TO DSA1
+-    MOVE SCRATCH2 TO SFBR
+-    MOVE SFBR TO DSA2
+-    MOVE SCRATCH3 TO SFBR
+-    MOVE SFBR TO DSA3
+-    RETURN
+-#endif
+- 
+-#if (CHIP == 710)
+-; Little patched jump, used to overcome problems with TEMP getting
+-; corrupted on memory moves.
+-
+-jump_temp:
+-    JUMP 0
+-#endif
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped linux-2.6.22-try2/drivers/scsi/53c7xx_d.h_shipped
+--- linux-2.6.22-570/drivers/scsi/53c7xx_d.h_shipped	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c7xx_d.h_shipped	1969-12-31 19:00:00.000000000 -0500
+@@ -1,2874 +0,0 @@
+-/* DO NOT EDIT - Generated automatically by script_asm.pl */
+-static u32 SCRIPT[] = {
+-/*
+-
+-
+-
+-
+-
+-; 53c710 driver.  Modified from Drew Eckhardts driver
+-; for 53c810 by Richard Hirst [richard@sleepie.demon.co.uk]
+-;
+-; I have left the script for the 53c8xx family in here, as it is likely
+-; to be useful to see what I changed when bug hunting.
+-
+-; NCR 53c810 driver, main script
+-; Sponsored by 
+-;	iX Multiuser Multitasking Magazine
+-;	hm@ix.de
+-;
+-; Copyright 1993, 1994, 1995 Drew Eckhardt
+-;      Visionary Computing 
+-;      (Unix and Linux consulting and custom programming)
+-;      drew@PoohSticks.ORG
+-;      +1 (303) 786-7975
+-;
+-; TolerANT and SCSI SCRIPTS are registered trademarks of NCR Corporation.
+-;
+-; PRE-ALPHA
+-;
+-; For more information, please consult 
+-;
+-; NCR 53C810
+-; PCI-SCSI I/O Processor
+-; Data Manual
+-;
+-; NCR 53C710 
+-; SCSI I/O Processor
+-; Programmers Guide
+-;
+-; NCR Microelectronics
+-; 1635 Aeroplaza Drive
+-; Colorado Springs, CO 80916
+-; 1+ (719) 578-3400
+-;
+-; Toll free literature number
+-; +1 (800) 334-5454
+-;
+-; IMPORTANT : This code is self modifying due to the limitations of 
+-;	the NCR53c7,8xx series chips.  Persons debugging this code with
+-;	the remote debugger should take this into account, and NOT set
+-;	breakpoints in modified instructions.
+-;
+-; Design:
+-; The NCR53c7,8xx family of SCSI chips are busmasters with an onboard 
+-; microcontroller using a simple instruction set.   
+-;
+-; So, to minimize the effects of interrupt latency, and to maximize 
+-; throughput, this driver offloads the practical maximum amount 
+-; of processing to the SCSI chip while still maintaining a common
+-; structure.
+-;
+-; Where tradeoffs were needed between efficiency on the older
+-; chips and the newer NCR53c800 series, the NCR53c800 series 
+-; was chosen.
+-;
+-; While the NCR53c700 and NCR53c700-66 lacked the facilities to fully
+-; automate SCSI transfers without host processor intervention, this 
+-; isn't the case with the NCR53c710 and newer chips which allow 
+-;
+-; - reads and writes to the internal registers from within the SCSI
+-; 	scripts, allowing the SCSI SCRIPTS(tm) code to save processor
+-; 	state so that multiple threads of execution are possible, and also
+-; 	provide an ALU for loop control, etc.
+-; 
+-; - table indirect addressing for some instructions. This allows 
+-;	pointers to be located relative to the DSA ((Data Structure
+-;	Address) register.
+-;
+-; These features make it possible to implement a mailbox style interface,
+-; where the same piece of code is run to handle I/O for multiple threads
+-; at once minimizing our need to relocate code.  Since the NCR53c700/
+-; NCR53c800 series have a unique combination of features, making a 
+-; a standard ingoing/outgoing mailbox system, costly, I've modified it.
+-;
+-; - Mailboxes are a mixture of code and data.  This lets us greatly
+-; 	simplify the NCR53c810 code and do things that would otherwise
+-;	not be possible.
+-;
+-; The saved data pointer is now implemented as follows :
+-;
+-; 	Control flow has been architected such that if control reaches
+-;	munge_save_data_pointer, on a restore pointers message or 
+-;	reconnection, a jump to the address formerly in the TEMP register
+-;	will allow the SCSI command to resume execution.
+-;
+-
+-;
+-; Note : the DSA structures must be aligned on 32 bit boundaries,
+-; since the source and destination of MOVE MEMORY instructions 
+-; must share the same alignment and this is the alignment of the
+-; NCR registers.
+-;
+-
+-; For some systems (MVME166, for example) dmode is always the same, so don't
+-; waste time writing it
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-ABSOLUTE dsa_temp_lun = 0		; Patch to lun for current dsa
+-ABSOLUTE dsa_temp_next = 0		; Patch to dsa next for current dsa
+-ABSOLUTE dsa_temp_addr_next = 0		; Patch to address of dsa next address 
+-					; 	for current dsa
+-ABSOLUTE dsa_temp_sync = 0		; Patch to address of per-target
+-					;	sync routine
+-ABSOLUTE dsa_sscf_710 = 0		; Patch to address of per-target
+-					;	sscf value (53c710)
+-ABSOLUTE dsa_temp_target = 0		; Patch to id for current dsa
+-ABSOLUTE dsa_temp_addr_saved_pointer = 0; Patch to address of per-command
+-					; 	saved data pointer
+-ABSOLUTE dsa_temp_addr_residual = 0	; Patch to address of per-command
+-					;	current residual code
+-ABSOLUTE dsa_temp_addr_saved_residual = 0; Patch to address of per-command
+-					; saved residual code
+-ABSOLUTE dsa_temp_addr_new_value = 0	; Address of value for JUMP operand
+-ABSOLUTE dsa_temp_addr_array_value = 0 	; Address to copy to
+-ABSOLUTE dsa_temp_addr_dsa_value = 0	; Address of this DSA value
+-
+-;
+-; Once a device has initiated reselection, we need to compare it 
+-; against the singly linked list of commands which have disconnected
+-; and are pending reselection.  These commands are maintained in 
+-; an unordered singly linked list of DSA structures, through the
+-; DSA pointers at their 'centers' headed by the reconnect_dsa_head
+-; pointer.
+-; 
+-; To avoid complications in removing commands from the list,
+-; I minimize the amount of expensive (at eight operations per
+-; addition @ 500-600ns each) pointer operations which must
+-; be done in the NCR driver by precomputing them on the 
+-; host processor during dsa structure generation.
+-;
+-; The fixed-up per DSA code knows how to recognize the nexus
+-; associated with the corresponding SCSI command, and modifies
+-; the source and destination pointers for the MOVE MEMORY 
+-; instruction which is executed when reselected_ok is called
+-; to remove the command from the list.  Similarly, DSA is 
+-; loaded with the address of the next DSA structure and
+-; reselected_check_next is called if a failure occurs.
+-;
+-; Perhaps more concisely, the net effect of the mess is 
+-;
+-; for (dsa = reconnect_dsa_head, dest = &reconnect_dsa_head, 
+-;     src = NULL; dsa; dest = &dsa->next, dsa = dsa->next) {
+-; 	src = &dsa->next;
+-; 	if (target_id == dsa->id && target_lun == dsa->lun) {
+-; 		*dest = *src;
+-; 		break;
+-;         }	
+-; }
+-;
+-; if (!dsa)
+-;           error (int_err_unexpected_reselect);
+-; else  
+-;     longjmp (dsa->jump_resume, 0);
+-;
+-; 	
+-
+-
+-; Define DSA structure used for mailboxes
+-ENTRY dsa_code_template
+-dsa_code_template:
+-ENTRY dsa_code_begin
+-dsa_code_begin:
+-; RGH: Don't care about TEMP and DSA here
+-	
+-	MOVE MEMORY 4, dsa_temp_addr_dsa_value, addr_scratch
+-
+-at 0x00000000 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	
+-
+-	MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x00000003 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	; We are about to go and select the device, so must set SSCF bits
+-	MOVE MEMORY 4, dsa_sscf_710, addr_scratch
+-
+-at 0x00000006 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-	MOVE SCRATCH3 TO SFBR
+-
+-at 0x00000009 : */	0x72370000,0x00000000,
+-/*
+-
+-
+-
+-	MOVE SFBR TO SBCL
+-
+-at 0x0000000b : */	0x6a0b0000,0x00000000,
+-/*
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000000d : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-	CALL select
+-
+-at 0x00000010 : */	0x88080000,0x000001f8,
+-/*
+-; Handle the phase mismatch which may have resulted from the 
+-; MOVE FROM dsa_msgout if we returned here.  The CLEAR ATN 
+-; may or may not be necessary, and we should update script_asm.pl
+-; to handle multiple pieces.
+-    CLEAR ATN
+-
+-at 0x00000012 : */	0x60000008,0x00000000,
+-/*
+-    CLEAR ACK
+-
+-at 0x00000014 : */	0x60000040,0x00000000,
+-/*
+-
+-; Replace second operand with address of JUMP instruction dest operand
+-; in schedule table for this DSA.  Becomes dsa_jump_dest in 53c7,8xx.c.
+-ENTRY dsa_code_fix_jump
+-dsa_code_fix_jump:
+-	MOVE MEMORY 4, NOP_insn, 0
+-
+-at 0x00000016 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	JUMP select_done
+-
+-at 0x00000019 : */	0x80080000,0x00000230,
+-/*
+-
+-; wrong_dsa loads the DSA register with the value of the dsa_next
+-; field.
+-;
+-wrong_dsa:
+-
+-;                NOTE DSA is corrupt when we arrive here!
+-
+-;		Patch the MOVE MEMORY INSTRUCTION such that 
+-;		the destination address is the address of the OLD 
+-;		next pointer.
+-;
+-	MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 8
+-
+-at 0x0000001b : */	0xc0000004,0x00000000,0x000007ec,
+-/*
+-	
+-;
+-; 	Move the _contents_ of the next pointer into the DSA register as 
+-;	the next I_T_L or I_T_L_Q tupple to check against the established
+-;	nexus.
+-;
+-	MOVE MEMORY 4, dsa_temp_next, addr_scratch
+-
+-at 0x0000001e : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	
+-
+-	MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x00000021 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000024 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-	JUMP reselected_check_next
+-
+-at 0x00000027 : */	0x80080000,0x000006f0,
+-/*
+-
+-ABSOLUTE dsa_save_data_pointer = 0
+-ENTRY dsa_code_save_data_pointer
+-dsa_code_save_data_pointer:
+-
+-	; When we get here, TEMP has been saved in jump_temp+4, DSA is corrupt
+-	; We MUST return with DSA correct
+-    	MOVE MEMORY 4, jump_temp+4, dsa_temp_addr_saved_pointer
+-
+-at 0x00000029 : */	0xc0000004,0x000009c8,0x00000000,
+-/*
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_residual, dsa_temp_addr_saved_residual
+-
+-at 0x0000002c : */	0xc0000018,0x00000000,0x00000000,
+-/*
+-        CLEAR ACK
+-
+-at 0x0000002f : */	0x60000040,0x00000000,
+-/*
+-
+-
+-
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000031 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-	JUMP jump_temp
+-
+-at 0x00000034 : */	0x80080000,0x000009c4,
+-/*
+-
+-ABSOLUTE dsa_restore_pointers = 0
+-ENTRY dsa_code_restore_pointers
+-dsa_code_restore_pointers:
+-
+-	; TEMP and DSA are corrupt when we get here, but who cares!
+-    	MOVE MEMORY 4, dsa_temp_addr_saved_pointer, jump_temp + 4
+-
+-at 0x00000036 : */	0xc0000004,0x00000000,0x000009c8,
+-/*
+-; HARD CODED : 24 bytes needs to agree with 53c7,8xx.h
+-    	MOVE MEMORY 24, dsa_temp_addr_saved_residual, dsa_temp_addr_residual
+-
+-at 0x00000039 : */	0xc0000018,0x00000000,0x00000000,
+-/*
+-        CLEAR ACK
+-
+-at 0x0000003c : */	0x60000040,0x00000000,
+-/*
+-	; Restore DSA, note we don't care about TEMP
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000003e : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-	JUMP jump_temp
+-
+-at 0x00000041 : */	0x80080000,0x000009c4,
+-/*
+-
+-
+-ABSOLUTE dsa_check_reselect = 0
+-; dsa_check_reselect determines whether or not the current target and
+-; lun match the current DSA
+-ENTRY dsa_code_check_reselect
+-dsa_code_check_reselect:
+-
+-	
+-	
+-	MOVE LCRC TO SFBR		; LCRC has our ID and his ID bits set
+-
+-at 0x00000043 : */	0x72230000,0x00000000,
+-/*
+-	JUMP REL (wrong_dsa), IF NOT dsa_temp_target, AND MASK 0x80
+-
+-at 0x00000045 : */	0x80848000,0x00ffff50,
+-/*
+-
+-
+-
+-
+-
+-;
+-; Hack - move to scratch first, since SFBR is not writeable
+-; 	via the CPU and hence a MOVE MEMORY instruction.
+-;
+-	
+-	MOVE MEMORY 1, reselected_identify, addr_scratch
+-
+-at 0x00000047 : */	0xc0000001,0x00000000,0x00000000,
+-/*
+-	
+-
+-	; BIG ENDIAN ON MVME16x
+-	MOVE SCRATCH3 TO SFBR
+-
+-at 0x0000004a : */	0x72370000,0x00000000,
+-/*
+-
+-
+-
+-; FIXME : we need to accommodate bit fielded and binary here for '7xx/'8xx chips
+-; Are you sure about that?  richard@sleepie.demon.co.uk
+-	JUMP REL (wrong_dsa), IF NOT dsa_temp_lun, AND MASK 0xf8
+-
+-at 0x0000004c : */	0x8084f800,0x00ffff34,
+-/*
+-;		Patch the MOVE MEMORY INSTRUCTION such that
+-;		the source address is the address of this dsa's
+-;		next pointer.
+-	MOVE MEMORY 4, dsa_temp_addr_next, reselected_ok_patch + 4
+-
+-at 0x0000004e : */	0xc0000004,0x00000000,0x000007e8,
+-/*
+-	CALL reselected_ok
+-
+-at 0x00000051 : */	0x88080000,0x00000798,
+-/*
+-
+-;	Restore DSA following memory moves in reselected_ok
+-;	dsa_temp_sync doesn't really care about DSA, but it has an
+-;	optional debug INT so a valid DSA is a good idea.
+-	MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000053 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-	CALL dsa_temp_sync	
+-
+-at 0x00000056 : */	0x88080000,0x00000000,
+-/*
+-; Release ACK on the IDENTIFY message _after_ we've set the synchronous 
+-; transfer parameters! 
+-	CLEAR ACK
+-
+-at 0x00000058 : */	0x60000040,0x00000000,
+-/*
+-; Implicitly restore pointers on reselection, so a RETURN
+-; will transfer control back to the right spot.
+-    	CALL REL (dsa_code_restore_pointers)
+-
+-at 0x0000005a : */	0x88880000,0x00ffff68,
+-/*
+-    	RETURN
+-
+-at 0x0000005c : */	0x90080000,0x00000000,
+-/*
+-ENTRY dsa_zero
+-dsa_zero:
+-ENTRY dsa_code_template_end
+-dsa_code_template_end:
+-
+-; Perform sanity check for dsa_fields_start == dsa_code_template_end - 
+-; dsa_zero, puke.
+-
+-ABSOLUTE dsa_fields_start =  0	; Sanity marker
+-				; 	pad 48 bytes (fix this RSN)
+-ABSOLUTE dsa_next = 48		; len 4 Next DSA
+- 				; del 4 Previous DSA address
+-ABSOLUTE dsa_cmnd = 56		; len 4 Scsi_Cmnd * for this thread.
+-ABSOLUTE dsa_select = 60	; len 4 Device ID, Period, Offset for 
+-			 	;	table indirect select
+-ABSOLUTE dsa_msgout = 64	; len 8 table indirect move parameter for 
+-				;       select message
+-ABSOLUTE dsa_cmdout = 72	; len 8 table indirect move parameter for 
+-				;	command
+-ABSOLUTE dsa_dataout = 80	; len 4 code pointer for dataout
+-ABSOLUTE dsa_datain = 84	; len 4 code pointer for datain
+-ABSOLUTE dsa_msgin = 88		; len 8 table indirect move for msgin
+-ABSOLUTE dsa_status = 96 	; len 8 table indirect move for status byte
+-ABSOLUTE dsa_msgout_other = 104	; len 8 table indirect for normal message out
+-				; (Synchronous transfer negotiation, etc).
+-ABSOLUTE dsa_end = 112
+-
+-ABSOLUTE schedule = 0 		; Array of JUMP dsa_begin or JUMP (next),
+-				; terminated by a call to JUMP wait_reselect
+-
+-; Linked lists of DSA structures
+-ABSOLUTE reconnect_dsa_head = 0	; Link list of DSAs which can reconnect
+-ABSOLUTE addr_reconnect_dsa_head = 0 ; Address of variable containing
+-				; address of reconnect_dsa_head
+-
+-; These select the source and destination of a MOVE MEMORY instruction
+-ABSOLUTE dmode_memory_to_memory = 0x0
+-ABSOLUTE dmode_memory_to_ncr = 0x0
+-ABSOLUTE dmode_ncr_to_memory = 0x0
+-
+-ABSOLUTE addr_scratch = 0x0
+-ABSOLUTE addr_temp = 0x0
+-
+-ABSOLUTE saved_dsa = 0x0
+-ABSOLUTE emulfly = 0x0
+-ABSOLUTE addr_dsa = 0x0
+-
+-
+-
+-; Interrupts - 
+-; MSB indicates type
+-; 0	handle error condition
+-; 1 	handle message 
+-; 2 	handle normal condition
+-; 3	debugging interrupt
+-; 4 	testing interrupt 
+-; Next byte indicates specific error
+-
+-; XXX not yet implemented, I'm not sure if I want to - 
+-; Next byte indicates the routine the error occurred in
+-; The LSB indicates the specific place the error occurred
+- 
+-ABSOLUTE int_err_unexpected_phase = 0x00000000	; Unexpected phase encountered
+-ABSOLUTE int_err_selected = 0x00010000		; SELECTED (nee RESELECTED)
+-ABSOLUTE int_err_unexpected_reselect = 0x00020000 
+-ABSOLUTE int_err_check_condition = 0x00030000	
+-ABSOLUTE int_err_no_phase = 0x00040000
+-ABSOLUTE int_msg_wdtr = 0x01000000		; WDTR message received
+-ABSOLUTE int_msg_sdtr = 0x01010000		; SDTR received
+-ABSOLUTE int_msg_1 = 0x01020000			; single byte special message
+-						; received
+-
+-ABSOLUTE int_norm_select_complete = 0x02000000	; Select complete, reprogram
+-						; registers.
+-ABSOLUTE int_norm_reselect_complete = 0x02010000	; Nexus established
+-ABSOLUTE int_norm_command_complete = 0x02020000 ; Command complete
+-ABSOLUTE int_norm_disconnected = 0x02030000	; Disconnected 
+-ABSOLUTE int_norm_aborted =0x02040000		; Aborted *dsa
+-ABSOLUTE int_norm_reset = 0x02050000		; Generated BUS reset.
+-ABSOLUTE int_norm_emulateintfly = 0x02060000	; 53C710 Emulated intfly
+-ABSOLUTE int_debug_break = 0x03000000		; Break point
+-
+-ABSOLUTE int_debug_panic = 0x030b0000		; Panic driver
+-
+-
+-ABSOLUTE int_test_1 = 0x04000000		; Test 1 complete
+-ABSOLUTE int_test_2 = 0x04010000		; Test 2 complete
+-ABSOLUTE int_test_3 = 0x04020000		; Test 3 complete
+-
+-
+-; These should start with 0x05000000, with low bits incrementing for 
+-; each one.
+-
+-
+-						
+-ABSOLUTE NCR53c7xx_msg_abort = 0	; Pointer to abort message
+-ABSOLUTE NCR53c7xx_msg_reject = 0       ; Pointer to reject message
+-ABSOLUTE NCR53c7xx_zero	= 0		; long with zero in it, use for source
+-ABSOLUTE NCR53c7xx_sink = 0		; long to dump worthless data in
+-ABSOLUTE NOP_insn = 0			; NOP instruction
+-
+-; Pointer to message, potentially multi-byte
+-ABSOLUTE msg_buf = 0
+-
+-; Pointer to holding area for reselection information
+-ABSOLUTE reselected_identify = 0
+-ABSOLUTE reselected_tag = 0
+-
+-; Request sense command pointer, it's a 6 byte command, should
+-; be constant for all commands since we always want 16 bytes of 
+-; sense and we don't need to change any fields as we did under 
+-; SCSI-I when we actually cared about the LUN field.
+-;EXTERNAL NCR53c7xx_sense		; Request sense command
+-
+-
+-; dsa_schedule  
+-; PURPOSE : after a DISCONNECT message has been received, and pointers
+-;	saved, insert the current DSA structure at the head of the 
+-; 	disconnected queue and fall through to the scheduler.
+-;
+-; CALLS : OK
+-;
+-; INPUTS : dsa - current DSA structure, reconnect_dsa_head - list
+-;	of disconnected commands
+-;
+-; MODIFIES : SCRATCH, reconnect_dsa_head
+-; 
+-; EXITS : always passes control to schedule
+-
+-ENTRY dsa_schedule
+-dsa_schedule:
+-
+-
+-
+-
+-;
+-; Calculate the address of the next pointer within the DSA 
+-; structure of the command that is currently disconnecting
+-;
+-
+-    ; Read what should be the current DSA from memory - actual DSA
+-    ; register is probably corrupt
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x0000005e : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-    MOVE SCRATCH0 + dsa_next TO SCRATCH0
+-
+-at 0x00000061 : */	0x7e343000,0x00000000,
+-/*
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY
+-
+-at 0x00000063 : */	0x7f350000,0x00000000,
+-/*
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY
+-
+-at 0x00000065 : */	0x7f360000,0x00000000,
+-/*
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY
+-
+-at 0x00000067 : */	0x7f370000,0x00000000,
+-/*
+-
+-; Point the next field of this DSA structure at the current disconnected 
+-; list
+-    
+-    MOVE MEMORY 4, addr_scratch, dsa_schedule_insert + 8
+-
+-at 0x00000069 : */	0xc0000004,0x00000000,0x000001b8,
+-/*
+-    
+-dsa_schedule_insert:
+-    MOVE MEMORY 4, reconnect_dsa_head, 0 
+-
+-at 0x0000006c : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-; And update the head pointer.
+-
+-    ; Read what should be the current DSA from memory - actual DSA
+-    ; register is probably corrupt
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x0000006f : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-    
+-    MOVE MEMORY 4, addr_scratch, reconnect_dsa_head
+-
+-at 0x00000072 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-    
+-
+-
+-
+-
+-
+-    CLEAR ACK
+-
+-at 0x00000075 : */	0x60000040,0x00000000,
+-/*
+-
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x00000077 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-    WAIT DISCONNECT
+-
+-at 0x0000007a : */	0x48000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-
+-    JUMP schedule
+-
+-at 0x0000007c : */	0x80080000,0x00000000,
+-/*
+-
+-
+-;
+-; select
+-;
+-; PURPOSE : establish a nexus for the SCSI command referenced by DSA.
+-;	On success, the current DSA structure is removed from the issue 
+-;	queue.  Usually, this is entered as a fall-through from schedule,
+-;	although the contingent allegiance handling code will write
+-;	the select entry address to the DSP to restart a command as a 
+-;	REQUEST SENSE.  A message is sent (usually IDENTIFY, although
+-;	additional SDTR or WDTR messages may be sent).  COMMAND OUT
+-;	is handled.
+-;
+-; INPUTS : DSA - SCSI command, issue_dsa_head
+-;
+-; CALLS : NOT OK
+-;
+-; MODIFIES : SCRATCH, issue_dsa_head
+-;
+-; EXITS : on reselection or selection, go to select_failed
+-;	otherwise, RETURN so control is passed back to 
+-;	dsa_begin.
+-;
+-
+-ENTRY select
+-select:
+-
+-
+-
+-
+-
+-
+-
+-
+-    CLEAR TARGET
+-
+-at 0x0000007e : */	0x60000200,0x00000000,
+-/*
+-
+-; XXX
+-;
+-; In effect, SELECTION operations are backgrounded, with execution
+-; continuing until code which waits for REQ or a fatal interrupt is 
+-; encountered.
+-;
+-; So, for more performance, we could overlap the code which removes 
+-; the command from the NCRs issue queue with the selection, but 
+-; at this point I don't want to deal with the error recovery.
+-;
+-
+-
+-
+-    ; Enable selection timer
+-
+-
+-
+-    MOVE CTEST7 & 0xef TO CTEST7
+-
+-at 0x00000080 : */	0x7c1bef00,0x00000000,
+-/*
+-
+-
+-    SELECT ATN FROM dsa_select, select_failed
+-
+-at 0x00000082 : */	0x4300003c,0x00000828,
+-/*
+-    JUMP select_msgout, WHEN MSG_OUT
+-
+-at 0x00000084 : */	0x860b0000,0x00000218,
+-/*
+-ENTRY select_msgout
+-select_msgout:
+-
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000086 : */	0x7a1b1000,0x00000000,
+-/*
+-
+-    MOVE FROM dsa_msgout, WHEN MSG_OUT
+-
+-at 0x00000088 : */	0x1e000000,0x00000040,
+-/*
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-   RETURN
+-
+-at 0x0000008a : */	0x90080000,0x00000000,
+-/*
+-
+-; 
+-; select_done
+-; 
+-; PURPOSE: continue on to normal data transfer; called as the exit 
+-;	point from dsa_begin.
+-;
+-; INPUTS: dsa
+-;
+-; CALLS: OK
+-;
+-;
+-
+-select_done:
+-
+-; NOTE DSA is corrupt when we arrive here!
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000008c : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-
+-
+-
+-; After a successful selection, we should get either a CMD phase or 
+-; some transfer request negotiation message.
+-
+-    JUMP cmdout, WHEN CMD
+-
+-at 0x0000008f : */	0x820b0000,0x0000025c,
+-/*
+-    INT int_err_unexpected_phase, WHEN NOT MSG_IN 
+-
+-at 0x00000091 : */	0x9f030000,0x00000000,
+-/*
+-
+-select_msg_in:
+-    CALL msg_in, WHEN MSG_IN
+-
+-at 0x00000093 : */	0x8f0b0000,0x0000041c,
+-/*
+-    JUMP select_msg_in, WHEN MSG_IN
+-
+-at 0x00000095 : */	0x870b0000,0x0000024c,
+-/*
+-
+-cmdout:
+-    INT int_err_unexpected_phase, WHEN NOT CMD
+-
+-at 0x00000097 : */	0x9a030000,0x00000000,
+-/*
+-
+-
+-
+-ENTRY cmdout_cmdout
+-cmdout_cmdout:
+-
+-    MOVE FROM dsa_cmdout, WHEN CMD
+-
+-at 0x00000099 : */	0x1a000000,0x00000048,
+-/*
+-
+-
+-
+-
+-;
+-; data_transfer  
+-; other_out
+-; other_in
+-; other_transfer
+-;
+-; PURPOSE : handle the main data transfer for a SCSI command in 
+-;	several parts.  In the first part, data_transfer, DATA_IN
+-;	and DATA_OUT phases are allowed, with the user provided
+-;	code (usually dynamically generated based on the scatter/gather
+-;	list associated with a SCSI command) called to handle these 
+-;	phases.
+-;
+-;	After control has passed to one of the user provided 
+-;	DATA_IN or DATA_OUT routines, back calls are made to 
+-;	other_transfer_in or other_transfer_out to handle non-DATA IN
+-;	and DATA OUT phases respectively, with the state of the active
+-;	data pointer being preserved in TEMP.
+-;
+-;	On completion, the user code passes control to other_transfer
+-;	which causes DATA_IN and DATA_OUT to result in unexpected_phase
+-;	interrupts so that data overruns may be trapped.
+-;
+-; INPUTS : DSA - SCSI command
+-;
+-; CALLS : OK in data_transfer_start, not ok in other_out and other_in, ok in
+-;	other_transfer
+-;
+-; MODIFIES : SCRATCH
+-;
+-; EXITS : if STATUS IN is detected, signifying command completion,
+-;	the NCR jumps to command_complete.  If MSG IN occurs, a 
+-;	CALL is made to msg_in.  Otherwise, other_transfer runs in 
+-;	an infinite loop.
+-;	
+-
+-ENTRY data_transfer
+-data_transfer:
+-    JUMP cmdout_cmdout, WHEN CMD
+-
+-at 0x0000009b : */	0x820b0000,0x00000264,
+-/*
+-    CALL msg_in, WHEN MSG_IN
+-
+-at 0x0000009d : */	0x8f0b0000,0x0000041c,
+-/*
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x0000009f : */	0x9e0b0000,0x00000000,
+-/*
+-    JUMP do_dataout, WHEN DATA_OUT
+-
+-at 0x000000a1 : */	0x800b0000,0x000002a4,
+-/*
+-    JUMP do_datain, WHEN DATA_IN
+-
+-at 0x000000a3 : */	0x810b0000,0x000002fc,
+-/*
+-    JUMP command_complete, WHEN STATUS
+-
+-at 0x000000a5 : */	0x830b0000,0x0000065c,
+-/*
+-    JUMP data_transfer
+-
+-at 0x000000a7 : */	0x80080000,0x0000026c,
+-/*
+-ENTRY end_data_transfer
+-end_data_transfer:
+-
+-;
+-; FIXME: On NCR53c700 and NCR53c700-66 chips, do_dataout/do_datain 
+-; should be fixed up whenever the nexus changes so it can point to the 
+-; correct routine for that command.
+-;
+-
+-
+-; Nasty jump to dsa->dataout
+-do_dataout:
+-
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x000000a9 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-    MOVE SCRATCH0 + dsa_dataout TO SCRATCH0	
+-
+-at 0x000000ac : */	0x7e345000,0x00000000,
+-/*
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY 
+-
+-at 0x000000ae : */	0x7f350000,0x00000000,
+-/*
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY 
+-
+-at 0x000000b0 : */	0x7f360000,0x00000000,
+-/*
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY 
+-
+-at 0x000000b2 : */	0x7f370000,0x00000000,
+-/*
+-    
+-    MOVE MEMORY 4, addr_scratch, dataout_to_jump + 4
+-
+-at 0x000000b4 : */	0xc0000004,0x00000000,0x000002e0,
+-/*
+-    
+-dataout_to_jump:
+-    MOVE MEMORY 4, 0, dataout_jump + 4 
+-
+-at 0x000000b7 : */	0xc0000004,0x00000000,0x000002f8,
+-/*
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000000ba : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-dataout_jump:
+-    JUMP 0
+-
+-at 0x000000bd : */	0x80080000,0x00000000,
+-/*
+-
+-; Nasty jump to dsa->dsain
+-do_datain:
+-
+-    MOVE MEMORY 4, saved_dsa, addr_scratch
+-
+-at 0x000000bf : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-    MOVE SCRATCH0 + dsa_datain TO SCRATCH0	
+-
+-at 0x000000c2 : */	0x7e345400,0x00000000,
+-/*
+-    MOVE SCRATCH1 + 0 TO SCRATCH1 WITH CARRY 
+-
+-at 0x000000c4 : */	0x7f350000,0x00000000,
+-/*
+-    MOVE SCRATCH2 + 0 TO SCRATCH2 WITH CARRY 
+-
+-at 0x000000c6 : */	0x7f360000,0x00000000,
+-/*
+-    MOVE SCRATCH3 + 0 TO SCRATCH3 WITH CARRY 
+-
+-at 0x000000c8 : */	0x7f370000,0x00000000,
+-/*
+-    
+-    MOVE MEMORY 4, addr_scratch, datain_to_jump + 4
+-
+-at 0x000000ca : */	0xc0000004,0x00000000,0x00000338,
+-/*
+-    
+-ENTRY datain_to_jump
+-datain_to_jump:
+-    MOVE MEMORY 4, 0, datain_jump + 4
+-
+-at 0x000000cd : */	0xc0000004,0x00000000,0x00000350,
+-/*
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000000d0 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-datain_jump:
+-    JUMP 0
+-
+-at 0x000000d3 : */	0x80080000,0x00000000,
+-/*
+-
+-
+-
+-; Note that other_out and other_in loop until a non-data phase
+-; is discovered, so we only execute return statements when we
+-; can go on to the next data phase block move statement.
+-
+-ENTRY other_out
+-other_out:
+-
+-
+-
+-    INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000d5 : */	0x9a0b0000,0x00000000,
+-/*
+-    JUMP msg_in_restart, WHEN MSG_IN 
+-
+-at 0x000000d7 : */	0x870b0000,0x000003fc,
+-/*
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000d9 : */	0x9e0b0000,0x00000000,
+-/*
+-    INT int_err_unexpected_phase, WHEN DATA_IN
+-
+-at 0x000000db : */	0x990b0000,0x00000000,
+-/*
+-    JUMP command_complete, WHEN STATUS
+-
+-at 0x000000dd : */	0x830b0000,0x0000065c,
+-/*
+-    JUMP other_out, WHEN NOT DATA_OUT
+-
+-at 0x000000df : */	0x80030000,0x00000354,
+-/*
+-
+-; TEMP should be OK, as we got here from a call in the user dataout code.
+-
+-    RETURN
+-
+-at 0x000000e1 : */	0x90080000,0x00000000,
+-/*
+-
+-ENTRY other_in
+-other_in:
+-
+-
+-
+-    INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000e3 : */	0x9a0b0000,0x00000000,
+-/*
+-    JUMP msg_in_restart, WHEN MSG_IN 
+-
+-at 0x000000e5 : */	0x870b0000,0x000003fc,
+-/*
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000e7 : */	0x9e0b0000,0x00000000,
+-/*
+-    INT int_err_unexpected_phase, WHEN DATA_OUT
+-
+-at 0x000000e9 : */	0x980b0000,0x00000000,
+-/*
+-    JUMP command_complete, WHEN STATUS
+-
+-at 0x000000eb : */	0x830b0000,0x0000065c,
+-/*
+-    JUMP other_in, WHEN NOT DATA_IN
+-
+-at 0x000000ed : */	0x81030000,0x0000038c,
+-/*
+-
+-; TEMP should be OK, as we got here from a call in the user datain code.
+-
+-    RETURN
+-
+-at 0x000000ef : */	0x90080000,0x00000000,
+-/*
+-
+-
+-ENTRY other_transfer
+-other_transfer:
+-    INT int_err_unexpected_phase, WHEN CMD
+-
+-at 0x000000f1 : */	0x9a0b0000,0x00000000,
+-/*
+-    CALL msg_in, WHEN MSG_IN
+-
+-at 0x000000f3 : */	0x8f0b0000,0x0000041c,
+-/*
+-    INT int_err_unexpected_phase, WHEN MSG_OUT
+-
+-at 0x000000f5 : */	0x9e0b0000,0x00000000,
+-/*
+-    INT int_err_unexpected_phase, WHEN DATA_OUT
+-
+-at 0x000000f7 : */	0x980b0000,0x00000000,
+-/*
+-    INT int_err_unexpected_phase, WHEN DATA_IN
+-
+-at 0x000000f9 : */	0x990b0000,0x00000000,
+-/*
+-    JUMP command_complete, WHEN STATUS
+-
+-at 0x000000fb : */	0x830b0000,0x0000065c,
+-/*
+-    JUMP other_transfer
+-
+-at 0x000000fd : */	0x80080000,0x000003c4,
+-/*
+-
+-;
+-; msg_in_restart
+-; msg_in
+-; munge_msg
+-;
+-; PURPOSE : process messages from a target.  msg_in is called when the 
+-;	caller hasn't read the first byte of the message.  munge_message
+-;	is called when the caller has read the first byte of the message,
+-;	and left it in SFBR.  msg_in_restart is called when the caller 
+-;	hasn't read the first byte of the message, and wishes RETURN
+-;	to transfer control back to the address of the conditional
+-;	CALL instruction rather than to the instruction after it.
+-;
+-;	Various int_* interrupts are generated when the host system
+-;	needs to intervene, as is the case with SDTR, WDTR, and
+-;	INITIATE RECOVERY messages.
+-;
+-;	When the host system handles one of these interrupts,
+-;	it can respond by reentering at reject_message, 
+-;	which rejects the message and returns control to
+-;	the caller of msg_in or munge_msg, accept_message
+-;	which clears ACK and returns control, or reply_message
+-;	which sends the message pointed to by the DSA 
+-;	msgout_other table indirect field.
+-;
+-;	DISCONNECT messages are handled by moving the command
+-;	to the reconnect_dsa_queue.
+-
+-; NOTE: DSA should be valid when we get here - we cannot save both it
+-;	and TEMP in this routine.
+-
+-;
+-; INPUTS : DSA - SCSI COMMAND, SFBR - first byte of message (munge_msg
+-;	only)
+-;
+-; CALLS : NO.  The TEMP register isn't backed up to allow nested calls.
+-;
+-; MODIFIES : SCRATCH, DSA on DISCONNECT
+-;
+-; EXITS : On receipt of SAVE DATA POINTER, RESTORE POINTERS,
+-;	and normal return from message handlers running under
+-;	Linux, control is returned to the caller.  Receipt
+-;	of DISCONNECT messages pass control to dsa_schedule.
+-;
+-ENTRY msg_in_restart
+-msg_in_restart:
+-; XXX - hackish
+-;
+-; Since it's easier to debug changes to the statically 
+-; compiled code, rather than the dynamically generated 
+-; stuff, such as
+-;
+-; 	MOVE x, y, WHEN data_phase
+-; 	CALL other_z, WHEN NOT data_phase
+-; 	MOVE x, y, WHEN data_phase
+-;
+-; I'd like to have certain routines (notably the message handler)
+-; restart on the conditional call rather than the next instruction.
+-;
+-; So, subtract 8 from the return address
+-
+-    MOVE TEMP0 + 0xf8 TO TEMP0
+-
+-at 0x000000ff : */	0x7e1cf800,0x00000000,
+-/*
+-    MOVE TEMP1 + 0xff TO TEMP1 WITH CARRY
+-
+-at 0x00000101 : */	0x7f1dff00,0x00000000,
+-/*
+-    MOVE TEMP2 + 0xff TO TEMP2 WITH CARRY
+-
+-at 0x00000103 : */	0x7f1eff00,0x00000000,
+-/*
+-    MOVE TEMP3 + 0xff TO TEMP3 WITH CARRY
+-
+-at 0x00000105 : */	0x7f1fff00,0x00000000,
+-/*
+-
+-ENTRY msg_in
+-msg_in:
+-    MOVE 1, msg_buf, WHEN MSG_IN
+-
+-at 0x00000107 : */	0x0f000001,0x00000000,
+-/*
+-
+-munge_msg:
+-    JUMP munge_extended, IF 0x01		; EXTENDED MESSAGE
+-
+-at 0x00000109 : */	0x800c0001,0x00000574,
+-/*
+-    JUMP munge_2, IF 0x20, AND MASK 0xdf	; two byte message
+-
+-at 0x0000010b : */	0x800cdf20,0x00000464,
+-/*
+-;
+-; XXX - I've seen a handful of broken SCSI devices which fail to issue
+-; 	a SAVE POINTERS message before disconnecting in the middle of 
+-; 	a transfer, assuming that the DATA POINTER will be implicitly 
+-; 	restored.  
+-;
+-; Historically, I've often done an implicit save when the DISCONNECT
+-; message is processed.  We may want to consider having the option of 
+-; doing that here. 
+-;
+-    JUMP munge_save_data_pointer, IF 0x02	; SAVE DATA POINTER
+-
+-at 0x0000010d : */	0x800c0002,0x0000046c,
+-/*
+-    JUMP munge_restore_pointers, IF 0x03	; RESTORE POINTERS 
+-
+-at 0x0000010f : */	0x800c0003,0x00000518,
+-/*
+-    JUMP munge_disconnect, IF 0x04		; DISCONNECT
+-
+-at 0x00000111 : */	0x800c0004,0x0000056c,
+-/*
+-    INT int_msg_1, IF 0x07			; MESSAGE REJECT
+-
+-at 0x00000113 : */	0x980c0007,0x01020000,
+-/*
+-    INT int_msg_1, IF 0x0f			; INITIATE RECOVERY
+-
+-at 0x00000115 : */	0x980c000f,0x01020000,
+-/*
+-
+-
+-
+-    JUMP reject_message
+-
+-at 0x00000117 : */	0x80080000,0x00000604,
+-/*
+-
+-munge_2:
+-    JUMP reject_message
+-
+-at 0x00000119 : */	0x80080000,0x00000604,
+-/*
+-;
+-; The SCSI standard allows targets to recover from transient 
+-; error conditions by backing up the data pointer with a 
+-; RESTORE POINTERS message.  
+-;	
+-; So, we must save and restore the _residual_ code as well as 
+-; the current instruction pointer.  Because of this messiness,
+-; it is simpler to put dynamic code in the dsa for this and to
+-; just do a simple jump down there. 
+-;
+-
+-munge_save_data_pointer:
+-
+-    ; We have something in TEMP here, so first we must save that
+-    MOVE TEMP0 TO SFBR
+-
+-at 0x0000011b : */	0x721c0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH0
+-
+-at 0x0000011d : */	0x6a340000,0x00000000,
+-/*
+-    MOVE TEMP1 TO SFBR
+-
+-at 0x0000011f : */	0x721d0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH1
+-
+-at 0x00000121 : */	0x6a350000,0x00000000,
+-/*
+-    MOVE TEMP2 TO SFBR
+-
+-at 0x00000123 : */	0x721e0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH2
+-
+-at 0x00000125 : */	0x6a360000,0x00000000,
+-/*
+-    MOVE TEMP3 TO SFBR
+-
+-at 0x00000127 : */	0x721f0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH3
+-
+-at 0x00000129 : */	0x6a370000,0x00000000,
+-/*
+-    MOVE MEMORY 4, addr_scratch, jump_temp + 4
+-
+-at 0x0000012b : */	0xc0000004,0x00000000,0x000009c8,
+-/*
+-    ; Now restore DSA
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x0000012e : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-    MOVE DSA0 + dsa_save_data_pointer TO SFBR
+-
+-at 0x00000131 : */	0x76100000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH0
+-
+-at 0x00000133 : */	0x6a340000,0x00000000,
+-/*
+-    MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-
+-at 0x00000135 : */	0x7711ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH1
+-
+-at 0x00000137 : */	0x6a350000,0x00000000,
+-/*
+-    MOVE DSA2 + 0xff TO SFBR WITH CARRY 
+-
+-at 0x00000139 : */	0x7712ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH2
+-
+-at 0x0000013b : */	0x6a360000,0x00000000,
+-/*
+-    MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000013d : */	0x7713ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH3
+-
+-at 0x0000013f : */	0x6a370000,0x00000000,
+-/*
+-
+-    
+-    MOVE MEMORY 4, addr_scratch, jump_dsa_save + 4
+-
+-at 0x00000141 : */	0xc0000004,0x00000000,0x00000514,
+-/*
+-    
+-jump_dsa_save:
+-    JUMP 0
+-
+-at 0x00000144 : */	0x80080000,0x00000000,
+-/*
+-
+-munge_restore_pointers:
+-
+-    ; The code at dsa_restore_pointers will RETURN, but we don't care
+-    ; about TEMP here, as it will overwrite it anyway.
+-
+-    MOVE DSA0 + dsa_restore_pointers TO SFBR
+-
+-at 0x00000146 : */	0x76100000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH0
+-
+-at 0x00000148 : */	0x6a340000,0x00000000,
+-/*
+-    MOVE DSA1 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000014a : */	0x7711ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH1
+-
+-at 0x0000014c : */	0x6a350000,0x00000000,
+-/*
+-    MOVE DSA2 + 0xff TO SFBR WITH CARRY
+-
+-at 0x0000014e : */	0x7712ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH2
+-
+-at 0x00000150 : */	0x6a360000,0x00000000,
+-/*
+-    MOVE DSA3 + 0xff TO SFBR WITH CARRY
+-
+-at 0x00000152 : */	0x7713ff00,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH3
+-
+-at 0x00000154 : */	0x6a370000,0x00000000,
+-/*
+-
+-    
+-    MOVE MEMORY 4, addr_scratch, jump_dsa_restore + 4
+-
+-at 0x00000156 : */	0xc0000004,0x00000000,0x00000568,
+-/*
+-    
+-jump_dsa_restore:
+-    JUMP 0
+-
+-at 0x00000159 : */	0x80080000,0x00000000,
+-/*
+-
+-
+-munge_disconnect:
+-
+-
+-
+-
+-
+-
+-
+-
+-
+- 
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-
+-    JUMP dsa_schedule
+-
+-at 0x0000015b : */	0x80080000,0x00000178,
+-/*
+-
+-
+-
+-
+-
+-munge_extended:
+-    CLEAR ACK
+-
+-at 0x0000015d : */	0x60000040,0x00000000,
+-/*
+-    INT int_err_unexpected_phase, WHEN NOT MSG_IN
+-
+-at 0x0000015f : */	0x9f030000,0x00000000,
+-/*
+-    MOVE 1, msg_buf + 1, WHEN MSG_IN
+-
+-at 0x00000161 : */	0x0f000001,0x00000001,
+-/*
+-    JUMP munge_extended_2, IF 0x02
+-
+-at 0x00000163 : */	0x800c0002,0x000005a4,
+-/*
+-    JUMP munge_extended_3, IF 0x03 
+-
+-at 0x00000165 : */	0x800c0003,0x000005d4,
+-/*
+-    JUMP reject_message
+-
+-at 0x00000167 : */	0x80080000,0x00000604,
+-/*
+-
+-munge_extended_2:
+-    CLEAR ACK
+-
+-at 0x00000169 : */	0x60000040,0x00000000,
+-/*
+-    MOVE 1, msg_buf + 2, WHEN MSG_IN
+-
+-at 0x0000016b : */	0x0f000001,0x00000002,
+-/*
+-    JUMP reject_message, IF NOT 0x02	; Must be WDTR
+-
+-at 0x0000016d : */	0x80040002,0x00000604,
+-/*
+-    CLEAR ACK
+-
+-at 0x0000016f : */	0x60000040,0x00000000,
+-/*
+-    MOVE 1, msg_buf + 3, WHEN MSG_IN
+-
+-at 0x00000171 : */	0x0f000001,0x00000003,
+-/*
+-    INT int_msg_wdtr
+-
+-at 0x00000173 : */	0x98080000,0x01000000,
+-/*
+-
+-munge_extended_3:
+-    CLEAR ACK
+-
+-at 0x00000175 : */	0x60000040,0x00000000,
+-/*
+-    MOVE 1, msg_buf + 2, WHEN MSG_IN
+-
+-at 0x00000177 : */	0x0f000001,0x00000002,
+-/*
+-    JUMP reject_message, IF NOT 0x01	; Must be SDTR
+-
+-at 0x00000179 : */	0x80040001,0x00000604,
+-/*
+-    CLEAR ACK
+-
+-at 0x0000017b : */	0x60000040,0x00000000,
+-/*
+-    MOVE 2, msg_buf + 3, WHEN MSG_IN
+-
+-at 0x0000017d : */	0x0f000002,0x00000003,
+-/*
+-    INT int_msg_sdtr
+-
+-at 0x0000017f : */	0x98080000,0x01010000,
+-/*
+-
+-ENTRY reject_message
+-reject_message:
+-    SET ATN
+-
+-at 0x00000181 : */	0x58000008,0x00000000,
+-/*
+-    CLEAR ACK
+-
+-at 0x00000183 : */	0x60000040,0x00000000,
+-/*
+-    MOVE 1, NCR53c7xx_msg_reject, WHEN MSG_OUT
+-
+-at 0x00000185 : */	0x0e000001,0x00000000,
+-/*
+-    RETURN
+-
+-at 0x00000187 : */	0x90080000,0x00000000,
+-/*
+-
+-ENTRY accept_message
+-accept_message:
+-    CLEAR ATN
+-
+-at 0x00000189 : */	0x60000008,0x00000000,
+-/*
+-    CLEAR ACK
+-
+-at 0x0000018b : */	0x60000040,0x00000000,
+-/*
+-    RETURN
+-
+-at 0x0000018d : */	0x90080000,0x00000000,
+-/*
+-
+-ENTRY respond_message
+-respond_message:
+-    SET ATN
+-
+-at 0x0000018f : */	0x58000008,0x00000000,
+-/*
+-    CLEAR ACK
+-
+-at 0x00000191 : */	0x60000040,0x00000000,
+-/*
+-    MOVE FROM dsa_msgout_other, WHEN MSG_OUT
+-
+-at 0x00000193 : */	0x1e000000,0x00000068,
+-/*
+-    RETURN
+-
+-at 0x00000195 : */	0x90080000,0x00000000,
+-/*
+-
+-;
+-; command_complete
+-;
+-; PURPOSE : handle command termination when STATUS IN is detected by reading
+-;	a status byte followed by a command termination message. 
+-;
+-;	Normal termination results in an INTFLY instruction, and 
+-;	the host system can pick out which command terminated by 
+-;	examining the MESSAGE and STATUS buffers of all currently 
+-;	executing commands;
+-;
+-;	Abnormal (CHECK_CONDITION) termination results in an
+-;	int_err_check_condition interrupt so that a REQUEST SENSE
+-;	command can be issued out-of-order so that no other command
+-;	clears the contingent allegiance condition.
+-;	
+-;
+-; INPUTS : DSA - command	
+-;
+-; CALLS : OK
+-;
+-; EXITS : On successful termination, control is passed to schedule.
+-;	On abnormal termination, the user will usually modify the 
+-;	DSA fields and corresponding buffers and return control
+-;	to select.
+-;
+-
+-ENTRY command_complete
+-command_complete:
+-    MOVE FROM dsa_status, WHEN STATUS
+-
+-at 0x00000197 : */	0x1b000000,0x00000060,
+-/*
+-
+-    MOVE SFBR TO SCRATCH0		; Save status
+-
+-at 0x00000199 : */	0x6a340000,0x00000000,
+-/*
+-
+-ENTRY command_complete_msgin
+-command_complete_msgin:
+-    MOVE FROM dsa_msgin, WHEN MSG_IN
+-
+-at 0x0000019b : */	0x1f000000,0x00000058,
+-/*
+-; Indicate that we should be expecting a disconnect
+-
+-
+-
+-    ; Above code cleared the Unexpected Disconnect bit, what do we do?
+-
+-    CLEAR ACK
+-
+-at 0x0000019d : */	0x60000040,0x00000000,
+-/*
+-
+-    WAIT DISCONNECT
+-
+-at 0x0000019f : */	0x48000000,0x00000000,
+-/*
+-
+-;
+-; The SCSI specification states that when a UNIT ATTENTION condition
+-; is pending, as indicated by a CHECK CONDITION status message,
+-; the target shall revert to asynchronous transfers.  Since
+-; synchronous transfers parameters are maintained on a per INITIATOR/TARGET 
+-; basis, and returning control to our scheduler could work on a command
+-; running on another lun on that target using the old parameters, we must
+-; interrupt the host processor to get them changed, or change them ourselves.
+-;
+-; Once SCSI-II tagged queueing is implemented, things will be even more
+-; hairy, since contingent allegiance conditions exist on a per-target/lun
+-; basis, and issuing a new command with a different tag would clear it.
+-; In these cases, we must interrupt the host processor to get a request 
+-; added to the HEAD of the queue with the request sense command, or we
+-; must automatically issue the request sense command.
+-
+-
+-
+-
+-
+-
+-
+-    INT int_norm_emulateintfly
+-
+-at 0x000001a1 : */	0x98080000,0x02060000,
+-/*
+-
+-
+-
+-
+-
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001a3 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-    JUMP schedule
+-
+-at 0x000001a6 : */	0x80080000,0x00000000,
+-/*
+-command_failed:
+-    INT int_err_check_condition
+-
+-at 0x000001a8 : */	0x98080000,0x00030000,
+-/*
+-
+-
+-
+-
+-;
+-; wait_reselect
+-;
+-; PURPOSE : This is essentially the idle routine, where control lands
+-;	when there are no new processes to schedule.  wait_reselect
+-;	waits for reselection, selection, and new commands.
+-;
+-;	When a successful reselection occurs, with the aid 
+-;	of fixed up code in each DSA, wait_reselect walks the 
+-;	reconnect_dsa_queue, asking each dsa if the target ID
+-;	and LUN match its.
+-;
+-;	If a match is found, a call is made back to reselected_ok,
+-;	which through the miracles of self modifying code, extracts
+-;	the found DSA from the reconnect_dsa_queue and then 
+-;	returns control to the DSAs thread of execution.
+-;
+-; INPUTS : NONE
+-;
+-; CALLS : OK
+-;
+-; MODIFIES : DSA,
+-;
+-; EXITS : On successful reselection, control is returned to the 
+-;	DSA which called reselected_ok.  If the WAIT RESELECT
+-;	was interrupted by a new commands arrival signaled by 
+-;	SIG_P, control is passed to schedule.  If the NCR is 
+-;	selected, the host system is interrupted with an 
+-;	int_err_selected which is usually responded to by
+-;	setting DSP to the target_abort address.
+-
+-ENTRY wait_reselect
+-wait_reselect:
+-
+-
+-
+-
+-
+-
+-    WAIT RESELECT wait_reselect_failed
+-
+-at 0x000001aa : */	0x50000000,0x00000800,
+-/*
+-
+-reselected:
+-
+-
+-
+-    CLEAR TARGET
+-
+-at 0x000001ac : */	0x60000200,0x00000000,
+-/*
+-    
+-    ; Read all data needed to reestablish the nexus - 
+-    MOVE 1, reselected_identify, WHEN MSG_IN
+-
+-at 0x000001ae : */	0x0f000001,0x00000000,
+-/*
+-    ; We used to CLEAR ACK here.
+-
+-
+-
+-
+-
+-    ; Point DSA at the current head of the disconnected queue.
+-    
+-    MOVE MEMORY 4, reconnect_dsa_head, addr_scratch
+-
+-at 0x000001b0 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-    
+-
+-    MOVE MEMORY 4, addr_scratch, saved_dsa
+-
+-at 0x000001b3 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-
+-
+-    ; Fix the update-next pointer so that the reconnect_dsa_head
+-    ; pointer is the one that will be updated if this DSA is a hit 
+-    ; and we remove it from the queue.
+-
+-    MOVE MEMORY 4, addr_reconnect_dsa_head, reselected_ok_patch + 8
+-
+-at 0x000001b6 : */	0xc0000004,0x00000000,0x000007ec,
+-/*
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001b9 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-
+-ENTRY reselected_check_next
+-reselected_check_next:
+-
+-
+-
+-    ; Check for a NULL pointer.
+-    MOVE DSA0 TO SFBR
+-
+-at 0x000001bc : */	0x72100000,0x00000000,
+-/*
+-    JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001be : */	0x80040000,0x00000738,
+-/*
+-    MOVE DSA1 TO SFBR
+-
+-at 0x000001c0 : */	0x72110000,0x00000000,
+-/*
+-    JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001c2 : */	0x80040000,0x00000738,
+-/*
+-    MOVE DSA2 TO SFBR
+-
+-at 0x000001c4 : */	0x72120000,0x00000000,
+-/*
+-    JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001c6 : */	0x80040000,0x00000738,
+-/*
+-    MOVE DSA3 TO SFBR
+-
+-at 0x000001c8 : */	0x72130000,0x00000000,
+-/*
+-    JUMP reselected_not_end, IF NOT 0
+-
+-at 0x000001ca : */	0x80040000,0x00000738,
+-/*
+-    INT int_err_unexpected_reselect
+-
+-at 0x000001cc : */	0x98080000,0x00020000,
+-/*
+-
+-reselected_not_end:
+-    ;
+-    ; XXX the ALU is only eight bits wide, and the assembler
+-    ; wont do the dirt work for us.  As long as dsa_check_reselect
+-    ; is negative, we need to sign extend with 1 bits to the full
+-    ; 32 bit width of the address.
+-    ;
+-    ; A potential work around would be to have a known alignment 
+-    ; of the DSA structure such that the base address plus 
+-    ; dsa_check_reselect doesn't require carrying from bytes 
+-    ; higher than the LSB.
+-    ;
+-
+-    MOVE DSA0 TO SFBR
+-
+-at 0x000001ce : */	0x72100000,0x00000000,
+-/*
+-    MOVE SFBR + dsa_check_reselect TO SCRATCH0
+-
+-at 0x000001d0 : */	0x6e340000,0x00000000,
+-/*
+-    MOVE DSA1 TO SFBR
+-
+-at 0x000001d2 : */	0x72110000,0x00000000,
+-/*
+-    MOVE SFBR + 0xff TO SCRATCH1 WITH CARRY
+-
+-at 0x000001d4 : */	0x6f35ff00,0x00000000,
+-/*
+-    MOVE DSA2 TO SFBR
+-
+-at 0x000001d6 : */	0x72120000,0x00000000,
+-/*
+-    MOVE SFBR + 0xff TO SCRATCH2 WITH CARRY
+-
+-at 0x000001d8 : */	0x6f36ff00,0x00000000,
+-/*
+-    MOVE DSA3 TO SFBR
+-
+-at 0x000001da : */	0x72130000,0x00000000,
+-/*
+-    MOVE SFBR + 0xff TO SCRATCH3 WITH CARRY
+-
+-at 0x000001dc : */	0x6f37ff00,0x00000000,
+-/*
+-
+-    
+-    MOVE MEMORY 4, addr_scratch, reselected_check + 4
+-
+-at 0x000001de : */	0xc0000004,0x00000000,0x00000794,
+-/*
+-    
+-
+-    ; Time to correct DSA following memory move
+-    MOVE MEMORY 4, saved_dsa, addr_dsa
+-
+-at 0x000001e1 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-
+-reselected_check:
+-    JUMP 0
+-
+-at 0x000001e4 : */	0x80080000,0x00000000,
+-/*
+-
+-
+-;
+-;
+-
+-; We have problems here - the memory move corrupts TEMP and DSA.  This
+-; routine is called from DSA code, and patched from many places.  Scratch
+-; is probably free when it is called.
+-; We have to:
+-;   copy temp to scratch, one byte at a time
+-;   write scratch to patch a jump in place of the return
+-;   do the move memory
+-;   jump to the patched in return address
+-; DSA is corrupt when we get here, and can be left corrupt
+-
+-ENTRY reselected_ok
+-reselected_ok:
+-    MOVE TEMP0 TO SFBR
+-
+-at 0x000001e6 : */	0x721c0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH0
+-
+-at 0x000001e8 : */	0x6a340000,0x00000000,
+-/*
+-    MOVE TEMP1 TO SFBR
+-
+-at 0x000001ea : */	0x721d0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH1
+-
+-at 0x000001ec : */	0x6a350000,0x00000000,
+-/*
+-    MOVE TEMP2 TO SFBR
+-
+-at 0x000001ee : */	0x721e0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH2
+-
+-at 0x000001f0 : */	0x6a360000,0x00000000,
+-/*
+-    MOVE TEMP3 TO SFBR
+-
+-at 0x000001f2 : */	0x721f0000,0x00000000,
+-/*
+-    MOVE SFBR TO SCRATCH3
+-
+-at 0x000001f4 : */	0x6a370000,0x00000000,
+-/*
+-    MOVE MEMORY 4, addr_scratch, reselected_ok_jump + 4
+-
+-at 0x000001f6 : */	0xc0000004,0x00000000,0x000007f4,
+-/*
+-reselected_ok_patch:
+-    MOVE MEMORY 4, 0, 0
+-
+-at 0x000001f9 : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-reselected_ok_jump:
+-    JUMP 0
+-
+-at 0x000001fc : */	0x80080000,0x00000000,
+-/*
+-
+-
+-
+-
+-
+-selected:
+-    INT int_err_selected;
+-
+-at 0x000001fe : */	0x98080000,0x00010000,
+-/*
+-
+-;
+-; A select or reselect failure can be caused by one of two conditions : 
+-; 1.  SIG_P was set.  This will be the case if the user has written
+-;	a new value to a previously NULL head of the issue queue.
+-;
+-; 2.  The NCR53c810 was selected or reselected by another device.
+-;
+-; 3.  The bus was already busy since we were selected or reselected
+-;	before starting the command.
+-
+-wait_reselect_failed:
+-
+-
+-
+-; Check selected bit.  
+-
+-    ; Must work out how to tell if we are selected....
+-
+-
+-
+-
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+-    MOVE CTEST2 & 0x40 TO SFBR	
+-
+-at 0x00000200 : */	0x74164000,0x00000000,
+-/*
+-    JUMP schedule, IF 0x40
+-
+-at 0x00000202 : */	0x800c0040,0x00000000,
+-/*
+-; Check connected bit.  
+-; FIXME: this needs to change if we support target mode
+-    MOVE ISTAT & 0x08 TO SFBR
+-
+-at 0x00000204 : */	0x74210800,0x00000000,
+-/*
+-    JUMP reselected, IF 0x08
+-
+-at 0x00000206 : */	0x800c0008,0x000006b0,
+-/*
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-
+-
+-
+-    INT int_debug_panic
+-
+-at 0x00000208 : */	0x98080000,0x030b0000,
+-/*
+-
+-
+-
+-select_failed:
+-
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x0000020a : */	0x7a1b1000,0x00000000,
+-/*
+-
+-
+-
+-
+-; Otherwise, mask the selected and reselected bits off SIST0
+-
+-    ; Let's assume we don't get selected for now
+-    MOVE SSTAT0 & 0x10 TO SFBR
+-
+-at 0x0000020c : */	0x740d1000,0x00000000,
+-/*
+-
+-
+-
+-
+-    JUMP reselected, IF 0x10 
+-
+-at 0x0000020e : */	0x800c0010,0x000006b0,
+-/*
+-; If SIGP is set, the user just gave us another command, and
+-; we should restart or return to the scheduler.
+-; Reading CTEST2 clears the SIG_P bit in the ISTAT register.
+-    MOVE CTEST2 & 0x40 TO SFBR	
+-
+-at 0x00000210 : */	0x74164000,0x00000000,
+-/*
+-    JUMP select, IF 0x40
+-
+-at 0x00000212 : */	0x800c0040,0x000001f8,
+-/*
+-; Check connected bit.  
+-; FIXME: this needs to change if we support target mode
+-; FIXME: is this really necessary? 
+-    MOVE ISTAT & 0x08 TO SFBR
+-
+-at 0x00000214 : */	0x74210800,0x00000000,
+-/*
+-    JUMP reselected, IF 0x08
+-
+-at 0x00000216 : */	0x800c0008,0x000006b0,
+-/*
+-; FIXME : Something bogus happened, and we shouldn't fail silently.
+-
+-
+-
+-    INT int_debug_panic
+-
+-at 0x00000218 : */	0x98080000,0x030b0000,
+-/*
+-
+-
+-;
+-; test_1
+-; test_2
+-;
+-; PURPOSE : run some verification tests on the NCR.  test_1
+-;	copies test_src to test_dest and interrupts the host
+-;	processor, testing for cache coherency and interrupt
+-; 	problems in the processes.
+-;
+-;	test_2 runs a command with offsets relative to the 
+-;	DSA on entry, and is useful for miscellaneous experimentation.
+-;
+-
+-; Verify that interrupts are working correctly and that we don't 
+-; have a cache invalidation problem.
+-
+-ABSOLUTE test_src = 0, test_dest = 0
+-ENTRY test_1
+-test_1:
+-    MOVE MEMORY 4, test_src, test_dest
+-
+-at 0x0000021a : */	0xc0000004,0x00000000,0x00000000,
+-/*
+-    INT int_test_1
+-
+-at 0x0000021d : */	0x98080000,0x04000000,
+-/*
+-
+-;
+-; Run arbitrary commands, with test code establishing a DSA
+-;
+- 
+-ENTRY test_2
+-test_2:
+-    CLEAR TARGET
+-
+-at 0x0000021f : */	0x60000200,0x00000000,
+-/*
+-
+-    ; Enable selection timer
+-
+-
+-
+-    MOVE CTEST7 & 0xef TO CTEST7
+-
+-at 0x00000221 : */	0x7c1bef00,0x00000000,
+-/*
+-
+-
+-    SELECT ATN FROM 0, test_2_fail
+-
+-at 0x00000223 : */	0x43000000,0x000008dc,
+-/*
+-    JUMP test_2_msgout, WHEN MSG_OUT
+-
+-at 0x00000225 : */	0x860b0000,0x0000089c,
+-/*
+-ENTRY test_2_msgout
+-test_2_msgout:
+-
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000227 : */	0x7a1b1000,0x00000000,
+-/*
+-
+-    MOVE FROM 8, WHEN MSG_OUT
+-
+-at 0x00000229 : */	0x1e000000,0x00000008,
+-/*
+-    MOVE FROM 16, WHEN CMD 
+-
+-at 0x0000022b : */	0x1a000000,0x00000010,
+-/*
+-    MOVE FROM 24, WHEN DATA_IN
+-
+-at 0x0000022d : */	0x19000000,0x00000018,
+-/*
+-    MOVE FROM 32, WHEN STATUS
+-
+-at 0x0000022f : */	0x1b000000,0x00000020,
+-/*
+-    MOVE FROM 40, WHEN MSG_IN
+-
+-at 0x00000231 : */	0x1f000000,0x00000028,
+-/*
+-
+-
+-
+-    CLEAR ACK
+-
+-at 0x00000233 : */	0x60000040,0x00000000,
+-/*
+-    WAIT DISCONNECT
+-
+-at 0x00000235 : */	0x48000000,0x00000000,
+-/*
+-test_2_fail:
+-
+-    ; Disable selection timer
+-    MOVE CTEST7 | 0x10 TO CTEST7
+-
+-at 0x00000237 : */	0x7a1b1000,0x00000000,
+-/*
+-
+-    INT int_test_2
+-
+-at 0x00000239 : */	0x98080000,0x04010000,
+-/*
+-
+-ENTRY debug_break
+-debug_break:
+-    INT int_debug_break
+-
+-at 0x0000023b : */	0x98080000,0x03000000,
+-/*
+-
+-;
+-; initiator_abort
+-; target_abort
+-;
+-; PURPOSE : Abort the currently established nexus from with initiator
+-;	or target mode.
+-;
+-;  
+-
+-ENTRY target_abort
+-target_abort:
+-    SET TARGET
+-
+-at 0x0000023d : */	0x58000200,0x00000000,
+-/*
+-    DISCONNECT
+-
+-at 0x0000023f : */	0x48000000,0x00000000,
+-/*
+-    CLEAR TARGET
+-
+-at 0x00000241 : */	0x60000200,0x00000000,
+-/*
+-    JUMP schedule
+-
+-at 0x00000243 : */	0x80080000,0x00000000,
+-/*
+-    
+-ENTRY initiator_abort
+-initiator_abort:
+-    SET ATN
+-
+-at 0x00000245 : */	0x58000008,0x00000000,
+-/*
+-;
+-; The SCSI-I specification says that targets may go into MSG out at 
+-; their leisure upon receipt of the ATN single.  On all versions of the 
+-; specification, we can't change phases until REQ transitions true->false, 
+-; so we need to sink/source one byte of data to allow the transition.
+-;
+-; For the sake of safety, we'll only source one byte of data in all 
+-; cases, but to accommodate the SCSI-I dain bramage, we'll sink an  
+-; arbitrary number of bytes.
+-    JUMP spew_cmd, WHEN CMD
+-
+-at 0x00000247 : */	0x820b0000,0x0000094c,
+-/*
+-    JUMP eat_msgin, WHEN MSG_IN
+-
+-at 0x00000249 : */	0x870b0000,0x0000095c,
+-/*
+-    JUMP eat_datain, WHEN DATA_IN
+-
+-at 0x0000024b : */	0x810b0000,0x0000098c,
+-/*
+-    JUMP eat_status, WHEN STATUS
+-
+-at 0x0000024d : */	0x830b0000,0x00000974,
+-/*
+-    JUMP spew_dataout, WHEN DATA_OUT
+-
+-at 0x0000024f : */	0x800b0000,0x000009a4,
+-/*
+-    JUMP sated
+-
+-at 0x00000251 : */	0x80080000,0x000009ac,
+-/*
+-spew_cmd:
+-    MOVE 1, NCR53c7xx_zero, WHEN CMD
+-
+-at 0x00000253 : */	0x0a000001,0x00000000,
+-/*
+-    JUMP sated
+-
+-at 0x00000255 : */	0x80080000,0x000009ac,
+-/*
+-eat_msgin:
+-    MOVE 1, NCR53c7xx_sink, WHEN MSG_IN
+-
+-at 0x00000257 : */	0x0f000001,0x00000000,
+-/*
+-    JUMP eat_msgin, WHEN MSG_IN
+-
+-at 0x00000259 : */	0x870b0000,0x0000095c,
+-/*
+-    JUMP sated
+-
+-at 0x0000025b : */	0x80080000,0x000009ac,
+-/*
+-eat_status:
+-    MOVE 1, NCR53c7xx_sink, WHEN STATUS
+-
+-at 0x0000025d : */	0x0b000001,0x00000000,
+-/*
+-    JUMP eat_status, WHEN STATUS
+-
+-at 0x0000025f : */	0x830b0000,0x00000974,
+-/*
+-    JUMP sated
+-
+-at 0x00000261 : */	0x80080000,0x000009ac,
+-/*
+-eat_datain:
+-    MOVE 1, NCR53c7xx_sink, WHEN DATA_IN
+-
+-at 0x00000263 : */	0x09000001,0x00000000,
+-/*
+-    JUMP eat_datain, WHEN DATA_IN
+-
+-at 0x00000265 : */	0x810b0000,0x0000098c,
+-/*
+-    JUMP sated
+-
+-at 0x00000267 : */	0x80080000,0x000009ac,
+-/*
+-spew_dataout:
+-    MOVE 1, NCR53c7xx_zero, WHEN DATA_OUT
+-
+-at 0x00000269 : */	0x08000001,0x00000000,
+-/*
+-sated:
+-
+-
+-
+-    MOVE 1, NCR53c7xx_msg_abort, WHEN MSG_OUT
+-
+-at 0x0000026b : */	0x0e000001,0x00000000,
+-/*
+-    WAIT DISCONNECT
+-
+-at 0x0000026d : */	0x48000000,0x00000000,
+-/*
+-    INT int_norm_aborted
+-
+-at 0x0000026f : */	0x98080000,0x02040000,
+-/*
+-
+-
+- 
+-
+-; Little patched jump, used to overcome problems with TEMP getting
+-; corrupted on memory moves.
+-
+-jump_temp:
+-    JUMP 0
+-
+-at 0x00000271 : */	0x80080000,0x00000000,
+-};
+-
+-#define A_NCR53c7xx_msg_abort	0x00000000
+-static u32 A_NCR53c7xx_msg_abort_used[] __attribute((unused)) = {
+-	0x0000026c,
+-};
+-
+-#define A_NCR53c7xx_msg_reject	0x00000000
+-static u32 A_NCR53c7xx_msg_reject_used[] __attribute((unused)) = {
+-	0x00000186,
+-};
+-
+-#define A_NCR53c7xx_sink	0x00000000
+-static u32 A_NCR53c7xx_sink_used[] __attribute((unused)) = {
+-	0x00000258,
+-	0x0000025e,
+-	0x00000264,
+-};
+-
+-#define A_NCR53c7xx_zero	0x00000000
+-static u32 A_NCR53c7xx_zero_used[] __attribute((unused)) = {
+-	0x00000254,
+-	0x0000026a,
+-};
+-
+-#define A_NOP_insn	0x00000000
+-static u32 A_NOP_insn_used[] __attribute((unused)) = {
+-	0x00000017,
+-};
+-
+-#define A_addr_dsa	0x00000000
+-static u32 A_addr_dsa_used[] __attribute((unused)) = {
+-	0x0000000f,
+-	0x00000026,
+-	0x00000033,
+-	0x00000040,
+-	0x00000055,
+-	0x00000079,
+-	0x0000008e,
+-	0x000000bc,
+-	0x000000d2,
+-	0x00000130,
+-	0x000001a5,
+-	0x000001bb,
+-	0x000001e3,
+-};
+-
+-#define A_addr_reconnect_dsa_head	0x00000000
+-static u32 A_addr_reconnect_dsa_head_used[] __attribute((unused)) = {
+-	0x000001b7,
+-};
+-
+-#define A_addr_scratch	0x00000000
+-static u32 A_addr_scratch_used[] __attribute((unused)) = {
+-	0x00000002,
+-	0x00000004,
+-	0x00000008,
+-	0x00000020,
+-	0x00000022,
+-	0x00000049,
+-	0x00000060,
+-	0x0000006a,
+-	0x00000071,
+-	0x00000073,
+-	0x000000ab,
+-	0x000000b5,
+-	0x000000c1,
+-	0x000000cb,
+-	0x0000012c,
+-	0x00000142,
+-	0x00000157,
+-	0x000001b2,
+-	0x000001b4,
+-	0x000001df,
+-	0x000001f7,
+-};
+-
+-#define A_addr_temp	0x00000000
+-static u32 A_addr_temp_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_memory_to_memory	0x00000000
+-static u32 A_dmode_memory_to_memory_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_memory_to_ncr	0x00000000
+-static u32 A_dmode_memory_to_ncr_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dmode_ncr_to_memory	0x00000000
+-static u32 A_dmode_ncr_to_memory_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_check_reselect	0x00000000
+-static u32 A_dsa_check_reselect_used[] __attribute((unused)) = {
+-	0x000001d0,
+-};
+-
+-#define A_dsa_cmdout	0x00000048
+-static u32 A_dsa_cmdout_used[] __attribute((unused)) = {
+-	0x0000009a,
+-};
+-
+-#define A_dsa_cmnd	0x00000038
+-static u32 A_dsa_cmnd_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_datain	0x00000054
+-static u32 A_dsa_datain_used[] __attribute((unused)) = {
+-	0x000000c2,
+-};
+-
+-#define A_dsa_dataout	0x00000050
+-static u32 A_dsa_dataout_used[] __attribute((unused)) = {
+-	0x000000ac,
+-};
+-
+-#define A_dsa_end	0x00000070
+-static u32 A_dsa_end_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_fields_start	0x00000000
+-static u32 A_dsa_fields_start_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_msgin	0x00000058
+-static u32 A_dsa_msgin_used[] __attribute((unused)) = {
+-	0x0000019c,
+-};
+-
+-#define A_dsa_msgout	0x00000040
+-static u32 A_dsa_msgout_used[] __attribute((unused)) = {
+-	0x00000089,
+-};
+-
+-#define A_dsa_msgout_other	0x00000068
+-static u32 A_dsa_msgout_other_used[] __attribute((unused)) = {
+-	0x00000194,
+-};
+-
+-#define A_dsa_next	0x00000030
+-static u32 A_dsa_next_used[] __attribute((unused)) = {
+-	0x00000061,
+-};
+-
+-#define A_dsa_restore_pointers	0x00000000
+-static u32 A_dsa_restore_pointers_used[] __attribute((unused)) = {
+-	0x00000146,
+-};
+-
+-#define A_dsa_save_data_pointer	0x00000000
+-static u32 A_dsa_save_data_pointer_used[] __attribute((unused)) = {
+-	0x00000131,
+-};
+-
+-#define A_dsa_select	0x0000003c
+-static u32 A_dsa_select_used[] __attribute((unused)) = {
+-	0x00000082,
+-};
+-
+-#define A_dsa_sscf_710	0x00000000
+-static u32 A_dsa_sscf_710_used[] __attribute((unused)) = {
+-	0x00000007,
+-};
+-
+-#define A_dsa_status	0x00000060
+-static u32 A_dsa_status_used[] __attribute((unused)) = {
+-	0x00000198,
+-};
+-
+-#define A_dsa_temp_addr_array_value	0x00000000
+-static u32 A_dsa_temp_addr_array_value_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_temp_addr_dsa_value	0x00000000
+-static u32 A_dsa_temp_addr_dsa_value_used[] __attribute((unused)) = {
+-	0x00000001,
+-};
+-
+-#define A_dsa_temp_addr_new_value	0x00000000
+-static u32 A_dsa_temp_addr_new_value_used[] __attribute((unused)) = {
+-};
+-
+-#define A_dsa_temp_addr_next	0x00000000
+-static u32 A_dsa_temp_addr_next_used[] __attribute((unused)) = {
+-	0x0000001c,
+-	0x0000004f,
+-};
+-
+-#define A_dsa_temp_addr_residual	0x00000000
+-static u32 A_dsa_temp_addr_residual_used[] __attribute((unused)) = {
+-	0x0000002d,
+-	0x0000003b,
+-};
+-
+-#define A_dsa_temp_addr_saved_pointer	0x00000000
+-static u32 A_dsa_temp_addr_saved_pointer_used[] __attribute((unused)) = {
+-	0x0000002b,
+-	0x00000037,
+-};
+-
+-#define A_dsa_temp_addr_saved_residual	0x00000000
+-static u32 A_dsa_temp_addr_saved_residual_used[] __attribute((unused)) = {
+-	0x0000002e,
+-	0x0000003a,
+-};
+-
+-#define A_dsa_temp_lun	0x00000000
+-static u32 A_dsa_temp_lun_used[] __attribute((unused)) = {
+-	0x0000004c,
+-};
+-
+-#define A_dsa_temp_next	0x00000000
+-static u32 A_dsa_temp_next_used[] __attribute((unused)) = {
+-	0x0000001f,
+-};
+-
+-#define A_dsa_temp_sync	0x00000000
+-static u32 A_dsa_temp_sync_used[] __attribute((unused)) = {
+-	0x00000057,
+-};
+-
+-#define A_dsa_temp_target	0x00000000
+-static u32 A_dsa_temp_target_used[] __attribute((unused)) = {
+-	0x00000045,
+-};
+-
+-#define A_emulfly	0x00000000
+-static u32 A_emulfly_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_debug_break	0x03000000
+-static u32 A_int_debug_break_used[] __attribute((unused)) = {
+-	0x0000023c,
+-};
+-
+-#define A_int_debug_panic	0x030b0000
+-static u32 A_int_debug_panic_used[] __attribute((unused)) = {
+-	0x00000209,
+-	0x00000219,
+-};
+-
+-#define A_int_err_check_condition	0x00030000
+-static u32 A_int_err_check_condition_used[] __attribute((unused)) = {
+-	0x000001a9,
+-};
+-
+-#define A_int_err_no_phase	0x00040000
+-static u32 A_int_err_no_phase_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_err_selected	0x00010000
+-static u32 A_int_err_selected_used[] __attribute((unused)) = {
+-	0x000001ff,
+-};
+-
+-#define A_int_err_unexpected_phase	0x00000000
+-static u32 A_int_err_unexpected_phase_used[] __attribute((unused)) = {
+-	0x00000092,
+-	0x00000098,
+-	0x000000a0,
+-	0x000000d6,
+-	0x000000da,
+-	0x000000dc,
+-	0x000000e4,
+-	0x000000e8,
+-	0x000000ea,
+-	0x000000f2,
+-	0x000000f6,
+-	0x000000f8,
+-	0x000000fa,
+-	0x00000160,
+-};
+-
+-#define A_int_err_unexpected_reselect	0x00020000
+-static u32 A_int_err_unexpected_reselect_used[] __attribute((unused)) = {
+-	0x000001cd,
+-};
+-
+-#define A_int_msg_1	0x01020000
+-static u32 A_int_msg_1_used[] __attribute((unused)) = {
+-	0x00000114,
+-	0x00000116,
+-};
+-
+-#define A_int_msg_sdtr	0x01010000
+-static u32 A_int_msg_sdtr_used[] __attribute((unused)) = {
+-	0x00000180,
+-};
+-
+-#define A_int_msg_wdtr	0x01000000
+-static u32 A_int_msg_wdtr_used[] __attribute((unused)) = {
+-	0x00000174,
+-};
+-
+-#define A_int_norm_aborted	0x02040000
+-static u32 A_int_norm_aborted_used[] __attribute((unused)) = {
+-	0x00000270,
+-};
+-
+-#define A_int_norm_command_complete	0x02020000
+-static u32 A_int_norm_command_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_disconnected	0x02030000
+-static u32 A_int_norm_disconnected_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_emulateintfly	0x02060000
+-static u32 A_int_norm_emulateintfly_used[] __attribute((unused)) = {
+-	0x000001a2,
+-};
+-
+-#define A_int_norm_reselect_complete	0x02010000
+-static u32 A_int_norm_reselect_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_reset	0x02050000
+-static u32 A_int_norm_reset_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_norm_select_complete	0x02000000
+-static u32 A_int_norm_select_complete_used[] __attribute((unused)) = {
+-};
+-
+-#define A_int_test_1	0x04000000
+-static u32 A_int_test_1_used[] __attribute((unused)) = {
+-	0x0000021e,
+-};
+-
+-#define A_int_test_2	0x04010000
+-static u32 A_int_test_2_used[] __attribute((unused)) = {
+-	0x0000023a,
+-};
+-
+-#define A_int_test_3	0x04020000
+-static u32 A_int_test_3_used[] __attribute((unused)) = {
+-};
+-
+-#define A_msg_buf	0x00000000
+-static u32 A_msg_buf_used[] __attribute((unused)) = {
+-	0x00000108,
+-	0x00000162,
+-	0x0000016c,
+-	0x00000172,
+-	0x00000178,
+-	0x0000017e,
+-};
+-
+-#define A_reconnect_dsa_head	0x00000000
+-static u32 A_reconnect_dsa_head_used[] __attribute((unused)) = {
+-	0x0000006d,
+-	0x00000074,
+-	0x000001b1,
+-};
+-
+-#define A_reselected_identify	0x00000000
+-static u32 A_reselected_identify_used[] __attribute((unused)) = {
+-	0x00000048,
+-	0x000001af,
+-};
+-
+-#define A_reselected_tag	0x00000000
+-static u32 A_reselected_tag_used[] __attribute((unused)) = {
+-};
+-
+-#define A_saved_dsa	0x00000000
+-static u32 A_saved_dsa_used[] __attribute((unused)) = {
+-	0x00000005,
+-	0x0000000e,
+-	0x00000023,
+-	0x00000025,
+-	0x00000032,
+-	0x0000003f,
+-	0x00000054,
+-	0x0000005f,
+-	0x00000070,
+-	0x00000078,
+-	0x0000008d,
+-	0x000000aa,
+-	0x000000bb,
+-	0x000000c0,
+-	0x000000d1,
+-	0x0000012f,
+-	0x000001a4,
+-	0x000001b5,
+-	0x000001ba,
+-	0x000001e2,
+-};
+-
+-#define A_schedule	0x00000000
+-static u32 A_schedule_used[] __attribute((unused)) = {
+-	0x0000007d,
+-	0x000001a7,
+-	0x00000203,
+-	0x00000244,
+-};
+-
+-#define A_test_dest	0x00000000
+-static u32 A_test_dest_used[] __attribute((unused)) = {
+-	0x0000021c,
+-};
+-
+-#define A_test_src	0x00000000
+-static u32 A_test_src_used[] __attribute((unused)) = {
+-	0x0000021b,
+-};
+-
+-#define Ent_accept_message	0x00000624
+-#define Ent_cmdout_cmdout	0x00000264
+-#define Ent_command_complete	0x0000065c
+-#define Ent_command_complete_msgin	0x0000066c
+-#define Ent_data_transfer	0x0000026c
+-#define Ent_datain_to_jump	0x00000334
+-#define Ent_debug_break	0x000008ec
+-#define Ent_dsa_code_begin	0x00000000
+-#define Ent_dsa_code_check_reselect	0x0000010c
+-#define Ent_dsa_code_fix_jump	0x00000058
+-#define Ent_dsa_code_restore_pointers	0x000000d8
+-#define Ent_dsa_code_save_data_pointer	0x000000a4
+-#define Ent_dsa_code_template	0x00000000
+-#define Ent_dsa_code_template_end	0x00000178
+-#define Ent_dsa_schedule	0x00000178
+-#define Ent_dsa_zero	0x00000178
+-#define Ent_end_data_transfer	0x000002a4
+-#define Ent_initiator_abort	0x00000914
+-#define Ent_msg_in	0x0000041c
+-#define Ent_msg_in_restart	0x000003fc
+-#define Ent_other_in	0x0000038c
+-#define Ent_other_out	0x00000354
+-#define Ent_other_transfer	0x000003c4
+-#define Ent_reject_message	0x00000604
+-#define Ent_reselected_check_next	0x000006f0
+-#define Ent_reselected_ok	0x00000798
+-#define Ent_respond_message	0x0000063c
+-#define Ent_select	0x000001f8
+-#define Ent_select_msgout	0x00000218
+-#define Ent_target_abort	0x000008f4
+-#define Ent_test_1	0x00000868
+-#define Ent_test_2	0x0000087c
+-#define Ent_test_2_msgout	0x0000089c
+-#define Ent_wait_reselect	0x000006a8
+-static u32 LABELPATCHES[] __attribute((unused)) = {
+-	0x00000011,
+-	0x0000001a,
+-	0x0000001d,
+-	0x00000028,
+-	0x0000002a,
+-	0x00000035,
+-	0x00000038,
+-	0x00000042,
+-	0x00000050,
+-	0x00000052,
+-	0x0000006b,
+-	0x00000083,
+-	0x00000085,
+-	0x00000090,
+-	0x00000094,
+-	0x00000096,
+-	0x0000009c,
+-	0x0000009e,
+-	0x000000a2,
+-	0x000000a4,
+-	0x000000a6,
+-	0x000000a8,
+-	0x000000b6,
+-	0x000000b9,
+-	0x000000cc,
+-	0x000000cf,
+-	0x000000d8,
+-	0x000000de,
+-	0x000000e0,
+-	0x000000e6,
+-	0x000000ec,
+-	0x000000ee,
+-	0x000000f4,
+-	0x000000fc,
+-	0x000000fe,
+-	0x0000010a,
+-	0x0000010c,
+-	0x0000010e,
+-	0x00000110,
+-	0x00000112,
+-	0x00000118,
+-	0x0000011a,
+-	0x0000012d,
+-	0x00000143,
+-	0x00000158,
+-	0x0000015c,
+-	0x00000164,
+-	0x00000166,
+-	0x00000168,
+-	0x0000016e,
+-	0x0000017a,
+-	0x000001ab,
+-	0x000001b8,
+-	0x000001bf,
+-	0x000001c3,
+-	0x000001c7,
+-	0x000001cb,
+-	0x000001e0,
+-	0x000001f8,
+-	0x00000207,
+-	0x0000020f,
+-	0x00000213,
+-	0x00000217,
+-	0x00000224,
+-	0x00000226,
+-	0x00000248,
+-	0x0000024a,
+-	0x0000024c,
+-	0x0000024e,
+-	0x00000250,
+-	0x00000252,
+-	0x00000256,
+-	0x0000025a,
+-	0x0000025c,
+-	0x00000260,
+-	0x00000262,
+-	0x00000266,
+-	0x00000268,
+-};
+-
+-static struct {
+-	u32	offset;
+-	void		*address;
+-} EXTERNAL_PATCHES[] __attribute((unused)) = {
+-};
+-
+-static u32 INSTRUCTIONS __attribute((unused))	= 290;
+-static u32 PATCHES __attribute((unused))	= 78;
+-static u32 EXTERNAL_PATCHES_LEN __attribute((unused))	= 0;
+diff -Nurb linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped linux-2.6.22-try2/drivers/scsi/53c7xx_u.h_shipped
+--- linux-2.6.22-570/drivers/scsi/53c7xx_u.h_shipped	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/53c7xx_u.h_shipped	1969-12-31 19:00:00.000000000 -0500
+@@ -1,102 +0,0 @@
+-#undef A_NCR53c7xx_msg_abort
+-#undef A_NCR53c7xx_msg_reject
+-#undef A_NCR53c7xx_sink
+-#undef A_NCR53c7xx_zero
+-#undef A_NOP_insn
+-#undef A_addr_dsa
+-#undef A_addr_reconnect_dsa_head
+-#undef A_addr_scratch
+-#undef A_addr_temp
+-#undef A_dmode_memory_to_memory
+-#undef A_dmode_memory_to_ncr
+-#undef A_dmode_ncr_to_memory
+-#undef A_dsa_check_reselect
+-#undef A_dsa_cmdout
+-#undef A_dsa_cmnd
+-#undef A_dsa_datain
+-#undef A_dsa_dataout
+-#undef A_dsa_end
+-#undef A_dsa_fields_start
+-#undef A_dsa_msgin
+-#undef A_dsa_msgout
+-#undef A_dsa_msgout_other
+-#undef A_dsa_next
+-#undef A_dsa_restore_pointers
+-#undef A_dsa_save_data_pointer
+-#undef A_dsa_select
+-#undef A_dsa_sscf_710
+-#undef A_dsa_status
+-#undef A_dsa_temp_addr_array_value
+-#undef A_dsa_temp_addr_dsa_value
+-#undef A_dsa_temp_addr_new_value
+-#undef A_dsa_temp_addr_next
+-#undef A_dsa_temp_addr_residual
+-#undef A_dsa_temp_addr_saved_pointer
+-#undef A_dsa_temp_addr_saved_residual
+-#undef A_dsa_temp_lun
+-#undef A_dsa_temp_next
+-#undef A_dsa_temp_sync
+-#undef A_dsa_temp_target
+-#undef A_emulfly
+-#undef A_int_debug_break
+-#undef A_int_debug_panic
+-#undef A_int_err_check_condition
+-#undef A_int_err_no_phase
+-#undef A_int_err_selected
+-#undef A_int_err_unexpected_phase
+-#undef A_int_err_unexpected_reselect
+-#undef A_int_msg_1
+-#undef A_int_msg_sdtr
+-#undef A_int_msg_wdtr
+-#undef A_int_norm_aborted
+-#undef A_int_norm_command_complete
+-#undef A_int_norm_disconnected
+-#undef A_int_norm_emulateintfly
+-#undef A_int_norm_reselect_complete
+-#undef A_int_norm_reset
+-#undef A_int_norm_select_complete
+-#undef A_int_test_1
+-#undef A_int_test_2
+-#undef A_int_test_3
+-#undef A_msg_buf
+-#undef A_reconnect_dsa_head
+-#undef A_reselected_identify
+-#undef A_reselected_tag
+-#undef A_saved_dsa
+-#undef A_schedule
+-#undef A_test_dest
+-#undef A_test_src
+-#undef Ent_accept_message
+-#undef Ent_cmdout_cmdout
+-#undef Ent_command_complete
+-#undef Ent_command_complete_msgin
+-#undef Ent_data_transfer
+-#undef Ent_datain_to_jump
+-#undef Ent_debug_break
+-#undef Ent_dsa_code_begin
+-#undef Ent_dsa_code_check_reselect
+-#undef Ent_dsa_code_fix_jump
+-#undef Ent_dsa_code_restore_pointers
+-#undef Ent_dsa_code_save_data_pointer
+-#undef Ent_dsa_code_template
+-#undef Ent_dsa_code_template_end
+-#undef Ent_dsa_schedule
+-#undef Ent_dsa_zero
+-#undef Ent_end_data_transfer
+-#undef Ent_initiator_abort
+-#undef Ent_msg_in
+-#undef Ent_msg_in_restart
+-#undef Ent_other_in
+-#undef Ent_other_out
+-#undef Ent_other_transfer
+-#undef Ent_reject_message
+-#undef Ent_reselected_check_next
+-#undef Ent_reselected_ok
+-#undef Ent_respond_message
+-#undef Ent_select
+-#undef Ent_select_msgout
+-#undef Ent_target_abort
+-#undef Ent_test_1
+-#undef Ent_test_2
+-#undef Ent_test_2_msgout
+-#undef Ent_wait_reselect
+diff -Nurb linux-2.6.22-570/drivers/scsi/BusLogic.c linux-2.6.22-try2/drivers/scsi/BusLogic.c
+--- linux-2.6.22-570/drivers/scsi/BusLogic.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/BusLogic.c	2007-12-19 15:29:23.000000000 -0500
+@@ -304,16 +304,8 @@
+ static void BusLogic_DeallocateCCB(struct BusLogic_CCB *CCB)
+ {
+ 	struct BusLogic_HostAdapter *HostAdapter = CCB->HostAdapter;
+-	struct scsi_cmnd *cmd = CCB->Command;
+ 
+-	if (cmd->use_sg != 0) {
+-		pci_unmap_sg(HostAdapter->PCI_Device,
+-				(struct scatterlist *)cmd->request_buffer,
+-				cmd->use_sg, cmd->sc_data_direction);
+-	} else if (cmd->request_bufflen != 0) {
+-		pci_unmap_single(HostAdapter->PCI_Device, CCB->DataPointer,
+-				CCB->DataLength, cmd->sc_data_direction);
+-	}
++	scsi_dma_unmap(CCB->Command);
+ 	pci_unmap_single(HostAdapter->PCI_Device, CCB->SenseDataPointer,
+ 			CCB->SenseDataLength, PCI_DMA_FROMDEVICE);
+ 
+@@ -2648,7 +2640,8 @@
+ 			 */
+ 			if (CCB->CDB[0] == INQUIRY && CCB->CDB[1] == 0 && CCB->HostAdapterStatus == BusLogic_CommandCompletedNormally) {
+ 				struct BusLogic_TargetFlags *TargetFlags = &HostAdapter->TargetFlags[CCB->TargetID];
+-				struct SCSI_Inquiry *InquiryResult = (struct SCSI_Inquiry *) Command->request_buffer;
++				struct SCSI_Inquiry *InquiryResult =
++					(struct SCSI_Inquiry *) scsi_sglist(Command);
+ 				TargetFlags->TargetExists = true;
+ 				TargetFlags->TaggedQueuingSupported = InquiryResult->CmdQue;
+ 				TargetFlags->WideTransfersSupported = InquiryResult->WBus16;
+@@ -2819,9 +2812,8 @@
+ 	int CDB_Length = Command->cmd_len;
+ 	int TargetID = Command->device->id;
+ 	int LogicalUnit = Command->device->lun;
+-	void *BufferPointer = Command->request_buffer;
+-	int BufferLength = Command->request_bufflen;
+-	int SegmentCount = Command->use_sg;
++	int BufferLength = scsi_bufflen(Command);
++	int Count;
+ 	struct BusLogic_CCB *CCB;
+ 	/*
+ 	   SCSI REQUEST_SENSE commands will be executed automatically by the Host
+@@ -2851,36 +2843,35 @@
+ 			return 0;
+ 		}
+ 	}
++
+ 	/*
+ 	   Initialize the fields in the BusLogic Command Control Block (CCB).
+ 	 */
+-	if (SegmentCount == 0 && BufferLength != 0) {
+-		CCB->Opcode = BusLogic_InitiatorCCB;
+-		CCB->DataLength = BufferLength;
+-		CCB->DataPointer = pci_map_single(HostAdapter->PCI_Device,
+-				BufferPointer, BufferLength,
+-				Command->sc_data_direction);
+-	} else if (SegmentCount != 0) {
+-		struct scatterlist *ScatterList = (struct scatterlist *) BufferPointer;
+-		int Segment, Count;
++	Count = scsi_dma_map(Command);
++	BUG_ON(Count < 0);
++	if (Count) {
++		struct scatterlist *sg;
++		int i;
+ 
+-		Count = pci_map_sg(HostAdapter->PCI_Device, ScatterList, SegmentCount,
+-				Command->sc_data_direction);
+ 		CCB->Opcode = BusLogic_InitiatorCCB_ScatterGather;
+ 		CCB->DataLength = Count * sizeof(struct BusLogic_ScatterGatherSegment);
+ 		if (BusLogic_MultiMasterHostAdapterP(HostAdapter))
+ 			CCB->DataPointer = (unsigned int) CCB->DMA_Handle + ((unsigned long) &CCB->ScatterGatherList - (unsigned long) CCB);
+ 		else
+ 			CCB->DataPointer = Virtual_to_32Bit_Virtual(CCB->ScatterGatherList);
+-		for (Segment = 0; Segment < Count; Segment++) {
+-			CCB->ScatterGatherList[Segment].SegmentByteCount = sg_dma_len(ScatterList + Segment);
+-			CCB->ScatterGatherList[Segment].SegmentDataPointer = sg_dma_address(ScatterList + Segment);
++
++		scsi_for_each_sg(Command, sg, Count, i) {
++			CCB->ScatterGatherList[i].SegmentByteCount =
++				sg_dma_len(sg);
++			CCB->ScatterGatherList[i].SegmentDataPointer =
++				sg_dma_address(sg);
+ 		}
+-	} else {
++	} else if (!Count) {
+ 		CCB->Opcode = BusLogic_InitiatorCCB;
+ 		CCB->DataLength = BufferLength;
+ 		CCB->DataPointer = 0;
+ 	}
++
+ 	switch (CDB[0]) {
+ 	case READ_6:
+ 	case READ_10:
+diff -Nurb linux-2.6.22-570/drivers/scsi/Kconfig linux-2.6.22-try2/drivers/scsi/Kconfig
+--- linux-2.6.22-570/drivers/scsi/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -739,7 +739,7 @@
+ 
+ config SCSI_IBMMCA
+ 	tristate "IBMMCA SCSI support"
+-	depends on MCA_LEGACY && SCSI
++	depends on MCA && SCSI
+ 	---help---
+ 	  This is support for the IBM SCSI adapter found in many of the PS/2
+ 	  series computers.  These machines have an MCA bus, so you need to
+@@ -1007,6 +1007,11 @@
+ 	  To compile this driver as a module, choose M here: the
+ 	  module will be called stex.
+ 
++config 53C700_BE_BUS
++	bool
++	depends on SCSI_A4000T || SCSI_ZORRO7XX || MVME16x_SCSI || BVME6000_SCSI
++	default y
++
+ config SCSI_SYM53C8XX_2
+ 	tristate "SYM53C8XX Version 2 SCSI support"
+ 	depends on PCI && SCSI
+@@ -1611,13 +1616,25 @@
+ 	  If you have the Phase5 Fastlane Z3 SCSI controller, or plan to use
+ 	  one in the near future, say Y to this question. Otherwise, say N.
+ 
+-config SCSI_AMIGA7XX
+-	bool "Amiga NCR53c710 SCSI support (EXPERIMENTAL)"
+-	depends on AMIGA && SCSI && EXPERIMENTAL && BROKEN
++config SCSI_A4000T
++	tristate "A4000T NCR53c710 SCSI support (EXPERIMENTAL)"
++	depends on AMIGA && SCSI && EXPERIMENTAL
++	select SCSI_SPI_ATTRS
+ 	help
+-	  Support for various NCR53c710-based SCSI controllers on the Amiga.
++	  If you have an Amiga 4000T and have SCSI devices connected to the
++	  built-in SCSI controller, say Y. Otherwise, say N.
++
++	  To compile this driver as a module, choose M here: the
++	  module will be called a4000t.
++
++config SCSI_ZORRO7XX
++	tristate "Zorro NCR53c710 SCSI support (EXPERIMENTAL)"
++	depends on ZORRO && SCSI && EXPERIMENTAL
++	select SCSI_SPI_ATTRS
++	help
++	  Support for various NCR53c710-based SCSI controllers on Zorro
++	  expansion boards for the Amiga.
+ 	  This includes:
+-	    - the builtin SCSI controller on the Amiga 4000T,
+ 	    - the Amiga 4091 Zorro III SCSI-2 controller,
+ 	    - the MacroSystem Development's WarpEngine Amiga SCSI-2 controller
+ 	      (info at
+@@ -1625,10 +1642,6 @@
+ 	    - the SCSI controller on the Phase5 Blizzard PowerUP 603e+
+ 	      accelerator card for the Amiga 1200,
+ 	    - the SCSI controller on the GVP Turbo 040/060 accelerator.
+-	  Note that all of the above SCSI controllers, except for the builtin
+-	  SCSI controller on the Amiga 4000T, reside on the Zorro expansion
+-	  bus, so you also have to enable Zorro bus support if you want to use
+-	  them.
+ 
+ config OKTAGON_SCSI
+ 	tristate "BSC Oktagon SCSI support (EXPERIMENTAL)"
+@@ -1712,8 +1725,8 @@
+ 	  single-board computer.
+ 
+ config MVME16x_SCSI
+-	bool "NCR53C710 SCSI driver for MVME16x"
+-	depends on MVME16x && SCSI && BROKEN
++	tristate "NCR53C710 SCSI driver for MVME16x"
++	depends on MVME16x && SCSI
+ 	select SCSI_SPI_ATTRS
+ 	help
+ 	  The Motorola MVME162, 166, 167, 172 and 177 boards use the NCR53C710
+@@ -1721,22 +1734,14 @@
+ 	  will want to say Y to this question.
+ 
+ config BVME6000_SCSI
+-	bool "NCR53C710 SCSI driver for BVME6000"
+-	depends on BVME6000 && SCSI && BROKEN
++	tristate "NCR53C710 SCSI driver for BVME6000"
++	depends on BVME6000 && SCSI
+ 	select SCSI_SPI_ATTRS
+ 	help
+ 	  The BVME4000 and BVME6000 boards from BVM Ltd use the NCR53C710
+ 	  SCSI controller chip.  Almost everyone using one of these boards
+ 	  will want to say Y to this question.
+ 
+-config SCSI_NCR53C7xx_FAST
+-	bool "allow FAST-SCSI [10MHz]"
+-	depends on SCSI_AMIGA7XX || MVME16x_SCSI || BVME6000_SCSI
+-	help
+-	  This will enable 10MHz FAST-SCSI transfers with your host
+-	  adapter. Some systems have problems with that speed, so it's safest
+-	  to say N here.
+-
+ config SUN3_SCSI
+ 	tristate "Sun3 NCR5380 SCSI"
+ 	depends on SUN3 && SCSI
+diff -Nurb linux-2.6.22-570/drivers/scsi/Makefile linux-2.6.22-try2/drivers/scsi/Makefile
+--- linux-2.6.22-570/drivers/scsi/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -37,7 +37,8 @@
+ 
+ obj-$(CONFIG_ISCSI_TCP) 	+= libiscsi.o	iscsi_tcp.o
+ obj-$(CONFIG_INFINIBAND_ISER) 	+= libiscsi.o
+-obj-$(CONFIG_SCSI_AMIGA7XX)	+= amiga7xx.o	53c7xx.o
++obj-$(CONFIG_SCSI_A4000T)	+= 53c700.o	a4000t.o
++obj-$(CONFIG_SCSI_ZORRO7XX)	+= 53c700.o	zorro7xx.o
+ obj-$(CONFIG_A3000_SCSI)	+= a3000.o	wd33c93.o
+ obj-$(CONFIG_A2091_SCSI)	+= a2091.o	wd33c93.o
+ obj-$(CONFIG_GVP11_SCSI)	+= gvp11.o	wd33c93.o
+@@ -53,8 +54,8 @@
+ obj-$(CONFIG_MAC_SCSI)		+= mac_scsi.o
+ obj-$(CONFIG_SCSI_MAC_ESP)	+= mac_esp.o	NCR53C9x.o
+ obj-$(CONFIG_SUN3_SCSI)		+= sun3_scsi.o  sun3_scsi_vme.o
+-obj-$(CONFIG_MVME16x_SCSI)	+= mvme16x.o	53c7xx.o
+-obj-$(CONFIG_BVME6000_SCSI)	+= bvme6000.o	53c7xx.o
++obj-$(CONFIG_MVME16x_SCSI)	+= 53c700.o	mvme16x_scsi.o
++obj-$(CONFIG_BVME6000_SCSI)	+= 53c700.o	bvme6000_scsi.o
+ obj-$(CONFIG_SCSI_SIM710)	+= 53c700.o	sim710.o
+ obj-$(CONFIG_SCSI_ADVANSYS)	+= advansys.o
+ obj-$(CONFIG_SCSI_PSI240I)	+= psi240i.o
+@@ -168,10 +169,8 @@
+ oktagon_esp_mod-objs	:= oktagon_esp.o oktagon_io.o
+ 
+ # Files generated that shall be removed upon make clean
+-clean-files :=	53c7xx_d.h 53c700_d.h	\
+-		53c7xx_u.h 53c700_u.h
++clean-files :=	53c700_d.h 53c700_u.h
+ 
+-$(obj)/53c7xx.o:   $(obj)/53c7xx_d.h $(obj)/53c7xx_u.h
+ $(obj)/53c700.o $(MODVERDIR)/$(obj)/53c700.ver: $(obj)/53c700_d.h
+ 
+ # If you want to play with the firmware, uncomment
+@@ -179,11 +178,6 @@
+ 
+ ifdef GENERATE_FIRMWARE
+ 
+-$(obj)/53c7xx_d.h: $(src)/53c7xx.scr $(src)/script_asm.pl
+-	$(CPP) -traditional -DCHIP=710 - < $< | grep -v '^#' | $(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h)
+-
+-$(obj)/53c7xx_u.h: $(obj)/53c7xx_d.h
+-
+ $(obj)/53c700_d.h: $(src)/53c700.scr $(src)/script_asm.pl
+ 	$(PERL) -s $(src)/script_asm.pl -ncr7x0_family $@ $(@:_d.h=_u.h) < $<
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.c linux-2.6.22-try2/drivers/scsi/NCR5380.c
+--- linux-2.6.22-570/drivers/scsi/NCR5380.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/NCR5380.c	2007-12-19 15:29:23.000000000 -0500
+@@ -347,7 +347,7 @@
+ 		if((r & bit) == val)
+ 			return 0;
+ 		if(!in_interrupt())
+-			yield();
++			cond_resched();
+ 		else
+ 			cpu_relax();
+ 	}
+@@ -357,7 +357,7 @@
+ static struct {
+ 	unsigned char value;
+ 	const char *name;
+-} phases[] = {
++} phases[] __maybe_unused = {
+ 	{PHASE_DATAOUT, "DATAOUT"}, 
+ 	{PHASE_DATAIN, "DATAIN"}, 
+ 	{PHASE_CMDOUT, "CMDOUT"}, 
+@@ -575,7 +575,8 @@
+  *	Locks: none, irqs must be enabled on entry
+  */
+ 
+-static int __init NCR5380_probe_irq(struct Scsi_Host *instance, int possible)
++static int __init __maybe_unused NCR5380_probe_irq(struct Scsi_Host *instance,
++						int possible)
+ {
+ 	NCR5380_local_declare();
+ 	struct NCR5380_hostdata *hostdata = (struct NCR5380_hostdata *) instance->hostdata;
+@@ -629,7 +630,8 @@
+  *	Locks: none
+  */
+ 
+-static void __init NCR5380_print_options(struct Scsi_Host *instance)
++static void __init __maybe_unused
++NCR5380_print_options(struct Scsi_Host *instance)
+ {
+ 	printk(" generic options"
+ #ifdef AUTOPROBE_IRQ
+@@ -703,8 +705,8 @@
+ static
+ char *lprint_opcode(int opcode, char *pos, char *buffer, int length);
+ 
+-static
+-int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start, off_t offset, int length, int inout)
++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance,
++	char *buffer, char **start, off_t offset, int length, int inout)
+ {
+ 	char *pos = buffer;
+ 	struct NCR5380_hostdata *hostdata;
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR5380.h linux-2.6.22-try2/drivers/scsi/NCR5380.h
+--- linux-2.6.22-570/drivers/scsi/NCR5380.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/NCR5380.h	2007-12-19 15:29:23.000000000 -0500
+@@ -299,7 +299,7 @@
+ static irqreturn_t NCR5380_intr(int irq, void *dev_id);
+ #endif
+ static void NCR5380_main(struct work_struct *work);
+-static void NCR5380_print_options(struct Scsi_Host *instance);
++static void __maybe_unused NCR5380_print_options(struct Scsi_Host *instance);
+ #ifdef NDEBUG
+ static void NCR5380_print_phase(struct Scsi_Host *instance);
+ static void NCR5380_print(struct Scsi_Host *instance);
+@@ -307,8 +307,8 @@
+ static int NCR5380_abort(Scsi_Cmnd * cmd);
+ static int NCR5380_bus_reset(Scsi_Cmnd * cmd);
+ static int NCR5380_queue_command(Scsi_Cmnd * cmd, void (*done) (Scsi_Cmnd *));
+-static int NCR5380_proc_info(struct Scsi_Host *instance, char *buffer, char **start,
+-off_t offset, int length, int inout);
++static int __maybe_unused NCR5380_proc_info(struct Scsi_Host *instance,
++	char *buffer, char **start, off_t offset, int length, int inout);
+ 
+ static void NCR5380_reselect(struct Scsi_Host *instance);
+ static int NCR5380_select(struct Scsi_Host *instance, Scsi_Cmnd * cmd, int tag);
+diff -Nurb linux-2.6.22-570/drivers/scsi/NCR53c406a.c linux-2.6.22-try2/drivers/scsi/NCR53c406a.c
+--- linux-2.6.22-570/drivers/scsi/NCR53c406a.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/NCR53c406a.c	2007-12-19 15:29:23.000000000 -0500
+@@ -698,7 +698,7 @@
+ 	int i;
+ 
+ 	VDEB(printk("NCR53c406a_queue called\n"));
+-	DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, SCpnt->request_bufflen));
++	DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->target, SCpnt->lun, scsi_bufflen(SCpnt)));
+ 
+ #if 0
+ 	VDEB(for (i = 0; i < SCpnt->cmd_len; i++)
+@@ -785,8 +785,8 @@
+ 	unsigned char status, int_reg;
+ #if USE_PIO
+ 	unsigned char pio_status;
+-	struct scatterlist *sglist;
+-	unsigned int sgcount;
++	struct scatterlist *sg;
++        int i;
+ #endif
+ 
+ 	VDEB(printk("NCR53c406a_intr called\n"));
+@@ -866,21 +866,17 @@
+ 			current_SC->SCp.phase = data_out;
+ 			VDEB(printk("NCR53c406a: Data-Out phase\n"));
+ 			outb(FLUSH_FIFO, CMD_REG);
+-			LOAD_DMA_COUNT(current_SC->request_bufflen);	/* Max transfer size */
++			LOAD_DMA_COUNT(scsi_bufflen(current_SC));	/* Max transfer size */
+ #if USE_DMA			/* No s/g support for DMA */
+-			NCR53c406a_dma_write(current_SC->request_buffer, current_SC->request_bufflen);
++			NCR53c406a_dma_write(scsi_sglist(current_SC),
++                                             scsdi_bufflen(current_SC));
++
+ #endif				/* USE_DMA */
+ 			outb(TRANSFER_INFO | DMA_OP, CMD_REG);
+ #if USE_PIO
+-			if (!current_SC->use_sg)	/* Don't use scatter-gather */
+-				NCR53c406a_pio_write(current_SC->request_buffer, current_SC->request_bufflen);
+-			else {	/* use scatter-gather */
+-				sgcount = current_SC->use_sg;
+-				sglist = current_SC->request_buffer;
+-				while (sgcount--) {
+-					NCR53c406a_pio_write(page_address(sglist->page) + sglist->offset, sglist->length);
+-					sglist++;
+-				}
++                        scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
++                                NCR53c406a_pio_write(page_address(sg->page) + sg->offset,
++                                                     sg->length);
+ 			}
+ 			REG0;
+ #endif				/* USE_PIO */
+@@ -893,21 +889,16 @@
+ 			current_SC->SCp.phase = data_in;
+ 			VDEB(printk("NCR53c406a: Data-In phase\n"));
+ 			outb(FLUSH_FIFO, CMD_REG);
+-			LOAD_DMA_COUNT(current_SC->request_bufflen);	/* Max transfer size */
++			LOAD_DMA_COUNT(scsi_bufflen(current_SC));	/* Max transfer size */
+ #if USE_DMA			/* No s/g support for DMA */
+-			NCR53c406a_dma_read(current_SC->request_buffer, current_SC->request_bufflen);
++			NCR53c406a_dma_read(scsi_sglist(current_SC),
++                                            scsdi_bufflen(current_SC));
+ #endif				/* USE_DMA */
+ 			outb(TRANSFER_INFO | DMA_OP, CMD_REG);
+ #if USE_PIO
+-			if (!current_SC->use_sg)	/* Don't use scatter-gather */
+-				NCR53c406a_pio_read(current_SC->request_buffer, current_SC->request_bufflen);
+-			else {	/* Use scatter-gather */
+-				sgcount = current_SC->use_sg;
+-				sglist = current_SC->request_buffer;
+-				while (sgcount--) {
+-					NCR53c406a_pio_read(page_address(sglist->page) + sglist->offset, sglist->length);
+-					sglist++;
+-				}
++                        scsi_for_each_sg(current_SC, sg, scsi_sg_count(current_SC), i) {
++                                NCR53c406a_pio_read(page_address(sg->page) + sg->offset,
++                                                    sg->length);
+ 			}
+ 			REG0;
+ #endif				/* USE_PIO */
+diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.c linux-2.6.22-try2/drivers/scsi/a100u2w.c
+--- linux-2.6.22-570/drivers/scsi/a100u2w.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/a100u2w.c	2007-12-19 15:29:23.000000000 -0500
+@@ -19,27 +19,6 @@
+  * along with this program; see the file COPYING.  If not, write to
+  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+  *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- *    notice, this list of conditions, and the following disclaimer,
+- *    without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- *    notice, this list of conditions and the following disclaimer in the
+- *    documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- *    derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of 
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the 
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -75,6 +54,8 @@
+  * 9/28/04 Christoph Hellwig <hch@lst.de>
+  *	    - merge the two source files
+  *	    - remove internal queueing code
++ * 14/06/07 Alan Cox <alan@redhat.com>
++ *	 - Grand cleanup and Linuxisation
+  */
+ 
+ #include <linux/module.h>
+@@ -102,14 +83,12 @@
+ #include "a100u2w.h"
+ 
+ 
+-#define JIFFIES_TO_MS(t) ((t) * 1000 / HZ)
+-#define MS_TO_JIFFIES(j) ((j * HZ) / 1000)
++static struct orc_scb *__orc_alloc_scb(struct orc_host * host);
++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb);
+ 
+-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp);
+-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb);
++static struct orc_nvram nvram, *nvramp = &nvram;
+ 
+-static NVRAM nvram, *nvramp = &nvram;
+-static UCHAR dftNvRam[64] =
++static u8 default_nvram[64] =
+ {
+ /*----------header -------------*/
+ 	0x01,			/* 0x00: Sub System Vendor ID 0 */
+@@ -158,823 +137,882 @@
+ };
+ 
+ 
+-/***************************************************************************/
+-static void waitForPause(unsigned amount)
+-{
+-	ULONG the_time = jiffies + MS_TO_JIFFIES(amount);
+-	while (time_before_eq(jiffies, the_time))
+-		cpu_relax();
+-}
+-
+-/***************************************************************************/
+-static UCHAR waitChipReady(ORC_HCS * hcsp)
++static u8 wait_chip_ready(struct orc_host * host)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 10; i++) {	/* Wait 1 second for report timeout     */
+-		if (ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HOSTSTOP)	/* Wait HOSTSTOP set */
++		if (inb(host->base + ORC_HCTRL) & HOSTSTOP)	/* Wait HOSTSTOP set */
+ 			return 1;
+-		waitForPause(100);	/* wait 100ms before try again  */
++		mdelay(100);
+ 	}
+ 	return 0;
+ }
+ 
+-/***************************************************************************/
+-static UCHAR waitFWReady(ORC_HCS * hcsp)
++static u8 wait_firmware_ready(struct orc_host * host)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 10; i++) {	/* Wait 1 second for report timeout     */
+-		if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY)		/* Wait READY set */
++		if (inb(host->base + ORC_HSTUS) & RREADY)		/* Wait READY set */
+ 			return 1;
+-		waitForPause(100);	/* wait 100ms before try again  */
++		mdelay(100);	/* wait 100ms before try again  */
+ 	}
+ 	return 0;
+ }
+ 
+ /***************************************************************************/
+-static UCHAR waitSCSIRSTdone(ORC_HCS * hcsp)
++static u8 wait_scsi_reset_done(struct orc_host * host)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 10; i++) {	/* Wait 1 second for report timeout     */
+-		if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & SCSIRST))	/* Wait SCSIRST done */
++		if (!(inb(host->base + ORC_HCTRL) & SCSIRST))	/* Wait SCSIRST done */
+ 			return 1;
+-		waitForPause(100);	/* wait 100ms before try again  */
++		mdelay(100);	/* wait 100ms before try again  */
+ 	}
+ 	return 0;
+ }
+ 
+ /***************************************************************************/
+-static UCHAR waitHDOoff(ORC_HCS * hcsp)
++static u8 wait_HDO_off(struct orc_host * host)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 10; i++) {	/* Wait 1 second for report timeout     */
+-		if (!(ORC_RD(hcsp->HCS_Base, ORC_HCTRL) & HDO))		/* Wait HDO off */
++		if (!(inb(host->base + ORC_HCTRL) & HDO))		/* Wait HDO off */
+ 			return 1;
+-		waitForPause(100);	/* wait 100ms before try again  */
++		mdelay(100);	/* wait 100ms before try again  */
+ 	}
+ 	return 0;
+ }
+ 
+ /***************************************************************************/
+-static UCHAR waitHDIset(ORC_HCS * hcsp, UCHAR * pData)
++static u8 wait_hdi_set(struct orc_host * host, u8 * data)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < 10; i++) {	/* Wait 1 second for report timeout     */
+-		if ((*pData = ORC_RD(hcsp->HCS_Base, ORC_HSTUS)) & HDI)
++		if ((*data = inb(host->base + ORC_HSTUS)) & HDI)
+ 			return 1;	/* Wait HDI set */
+-		waitForPause(100);	/* wait 100ms before try again  */
++		mdelay(100);	/* wait 100ms before try again  */
+ 	}
+ 	return 0;
+ }
+ 
+ /***************************************************************************/
+-static unsigned short get_FW_version(ORC_HCS * hcsp)
++static unsigned short orc_read_fwrev(struct orc_host * host)
+ {
+-	UCHAR bData;
+-	union {
+-		unsigned short sVersion;
+-		unsigned char cVersion[2];
+-	} Version;
++	u16 version;
++	u8 data;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_VERSION);
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(ORC_CMD_VERSION, host->base + ORC_HDATA);
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	if (waitHDIset(hcsp, &bData) == 0)	/* Wait HDI set   */
++	if (wait_hdi_set(host, &data) == 0)	/* Wait HDI set   */
+ 		return 0;
+-	Version.cVersion[0] = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+-	ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData);	/* Clear HDI            */
++	version = inb(host->base + ORC_HDATA);
++	outb(data, host->base + ORC_HSTUS);	/* Clear HDI            */
+ 
+-	if (waitHDIset(hcsp, &bData) == 0)	/* Wait HDI set   */
++	if (wait_hdi_set(host, &data) == 0)	/* Wait HDI set   */
+ 		return 0;
+-	Version.cVersion[1] = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+-	ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData);	/* Clear HDI            */
++	version |= inb(host->base + ORC_HDATA) << 8;
++	outb(data, host->base + ORC_HSTUS);	/* Clear HDI            */
+ 
+-	return (Version.sVersion);
++	return version;
+ }
+ 
+ /***************************************************************************/
+-static UCHAR set_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char value)
++static u8 orc_nv_write(struct orc_host * host, unsigned char address, unsigned char value)
+ {
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_SET_NVM);	/* Write command */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(ORC_CMD_SET_NVM, host->base + ORC_HDATA);	/* Write command */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, address);	/* Write address */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(address, host->base + ORC_HDATA);	/* Write address */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, value);	/* Write value  */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(value, host->base + ORC_HDATA);	/* Write value  */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+ 	return 1;
+ }
+ 
+ /***************************************************************************/
+-static UCHAR get_NVRAM(ORC_HCS * hcsp, unsigned char address, unsigned char *pDataIn)
++static u8 orc_nv_read(struct orc_host * host, u8 address, u8 *ptr)
+ {
+-	unsigned char bData;
++	unsigned char data;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_GET_NVM);	/* Write command */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(ORC_CMD_GET_NVM, host->base + ORC_HDATA);	/* Write command */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, address);	/* Write address */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(address, host->base + ORC_HDATA);	/* Write address */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	if (waitHDIset(hcsp, &bData) == 0)	/* Wait HDI set   */
++	if (wait_hdi_set(host, &data) == 0)	/* Wait HDI set   */
+ 		return 0;
+-	*pDataIn = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+-	ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData);	/* Clear HDI    */
++	*ptr = inb(host->base + ORC_HDATA);
++	outb(data, host->base + ORC_HSTUS);	/* Clear HDI    */
+ 
+ 	return 1;
++
+ }
+ 
+-/***************************************************************************/
+-static void orc_exec_scb(ORC_HCS * hcsp, ORC_SCB * scbp)
++/**
++ *	orc_exec_sb		-	Queue an SCB with the HA
++ *	@host: host adapter the SCB belongs to
++ *	@scb: SCB to queue for execution
++ */
++
++static void orc_exec_scb(struct orc_host * host, struct orc_scb * scb)
+ {
+-	scbp->SCB_Status = ORCSCB_POST;
+-	ORC_WR(hcsp->HCS_Base + ORC_PQUEUE, scbp->SCB_ScbIdx);
+-	return;
++	scb->status = ORCSCB_POST;
++	outb(scb->scbidx, host->base + ORC_PQUEUE);
+ }
+ 
+ 
+-/***********************************************************************
+- Read SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-static int se2_rd_all(ORC_HCS * hcsp)
++/**
++ *	se2_rd_all	-	read SCSI parameters from EEPROM
++ *	@host: Host whose EEPROM is being loaded
++ *
++ *	Read SCSI H/A configuration parameters from serial EEPROM
++ */
++
++static int se2_rd_all(struct orc_host * host)
+ {
+ 	int i;
+-	UCHAR *np, chksum = 0;
++	u8 *np, chksum = 0;
+ 
+-	np = (UCHAR *) nvramp;
++	np = (u8 *) nvramp;
+ 	for (i = 0; i < 64; i++, np++) {	/* <01> */
+-		if (get_NVRAM(hcsp, (unsigned char) i, np) == 0)
++		if (orc_nv_read(host, (u8) i, np) == 0)
+ 			return -1;
+-//      *np++ = get_NVRAM(hcsp, (unsigned char ) i);
+ 	}
+ 
+-/*------ Is ckecksum ok ? ------*/
+-	np = (UCHAR *) nvramp;
++	/*------ Is ckecksum ok ? ------*/
++	np = (u8 *) nvramp;
+ 	for (i = 0; i < 63; i++)
+ 		chksum += *np++;
+ 
+-	if (nvramp->CheckSum != (UCHAR) chksum)
++	if (nvramp->CheckSum != (u8) chksum)
+ 		return -1;
+ 	return 1;
+ }
+ 
+-/************************************************************************
+- Update SCSI H/A configuration parameters from serial EEPROM
+-*************************************************************************/
+-static void se2_update_all(ORC_HCS * hcsp)
++/**
++ *	se2_update_all		-	update the EEPROM
++ *	@host: Host whose EEPROM is being updated
++ *
++ *	Update changed bytes in the EEPROM image.
++ */
++
++static void se2_update_all(struct orc_host * host)
+ {				/* setup default pattern  */
+ 	int i;
+-	UCHAR *np, *np1, chksum = 0;
++	u8 *np, *np1, chksum = 0;
+ 
+ 	/* Calculate checksum first   */
+-	np = (UCHAR *) dftNvRam;
++	np = (u8 *) default_nvram;
+ 	for (i = 0; i < 63; i++)
+ 		chksum += *np++;
+ 	*np = chksum;
+ 
+-	np = (UCHAR *) dftNvRam;
+-	np1 = (UCHAR *) nvramp;
++	np = (u8 *) default_nvram;
++	np1 = (u8 *) nvramp;
+ 	for (i = 0; i < 64; i++, np++, np1++) {
+-		if (*np != *np1) {
+-			set_NVRAM(hcsp, (unsigned char) i, *np);
+-		}
++		if (*np != *np1)
++			orc_nv_write(host, (u8) i, *np);
+ 	}
+-	return;
+ }
+ 
+-/*************************************************************************
+- Function name  : read_eeprom
+-**************************************************************************/
+-static void read_eeprom(ORC_HCS * hcsp)
+-{
+-	if (se2_rd_all(hcsp) != 1) {
+-		se2_update_all(hcsp);	/* setup default pattern        */
+-		se2_rd_all(hcsp);	/* load again                   */
++/**
++ *	read_eeprom		-	load EEPROM
++ *	@host: Host EEPROM to read
++ *
++ *	Read the EEPROM for a given host. If it is invalid or fails
++ *	the restore the defaults and use them.
++ */
++
++static void read_eeprom(struct orc_host * host)
++{
++	if (se2_rd_all(host) != 1) {
++		se2_update_all(host);	/* setup default pattern        */
++		se2_rd_all(host);	/* load again                   */
+ 	}
+ }
+ 
+ 
+-/***************************************************************************/
+-static UCHAR load_FW(ORC_HCS * hcsp)
++/**
++ *	orc_load_firmware	-	initialise firmware
++ *	@host: Host to set up
++ *
++ *	Load the firmware from the EEPROM into controller SRAM. This
++ *	is basically a 4K block copy and then a 4K block read to check
++ *	correctness. The rest is convulted by the indirect interfaces
++ *	in the hardware
++ */
++
++static u8 orc_load_firmware(struct orc_host * host)
+ {
+-	U32 dData;
+-	USHORT wBIOSAddress;
+-	USHORT i;
+-	UCHAR *pData, bData;
+-
+-
+-	bData = ORC_RD(hcsp->HCS_Base, ORC_GCFG);
+-	ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData | EEPRG);	/* Enable EEPROM programming */
+-	ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, 0x00);
+-	ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x00);
+-	if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0x55) {
+-		ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData);	/* Disable EEPROM programming */
+-		return 0;
+-	}
+-	ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x01);
+-	if (ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA) != 0xAA) {
+-		ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData);	/* Disable EEPROM programming */
+-		return 0;
+-	}
+-	ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD);	/* Enable SRAM programming */
+-	pData = (UCHAR *) & dData;
+-	dData = 0;		/* Initial FW address to 0 */
+-	ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x10);
+-	*pData = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA);		/* Read from BIOS */
+-	ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x11);
+-	*(pData + 1) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA);	/* Read from BIOS */
+-	ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, 0x12);
+-	*(pData + 2) = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA);	/* Read from BIOS */
+-	ORC_WR(hcsp->HCS_Base + ORC_EBIOSADR2, *(pData + 2));
+-	ORC_WRLONG(hcsp->HCS_Base + ORC_FWBASEADR, dData);	/* Write FW address */
++	u32 data32;
++	u16 bios_addr;
++	u16 i;
++	u8 *data32_ptr, data;
++
++
++	/* Set up the EEPROM for access */
+ 
+-	wBIOSAddress = (USHORT) dData;	/* FW code locate at BIOS address + ? */
+-	for (i = 0, pData = (UCHAR *) & dData;	/* Download the code    */
++	data = inb(host->base + ORC_GCFG);
++	outb(data | EEPRG, host->base + ORC_GCFG);	/* Enable EEPROM programming */
++	outb(0x00, host->base + ORC_EBIOSADR2);
++	outw(0x0000, host->base + ORC_EBIOSADR0);
++	if (inb(host->base + ORC_EBIOSDATA) != 0x55) {
++		outb(data, host->base + ORC_GCFG);	/* Disable EEPROM programming */
++		return 0;
++	}
++	outw(0x0001, host->base + ORC_EBIOSADR0);
++	if (inb(host->base + ORC_EBIOSDATA) != 0xAA) {
++		outb(data, host->base + ORC_GCFG);	/* Disable EEPROM programming */
++		return 0;
++	}
++
++	outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL);	/* Enable SRAM programming */
++	data32_ptr = (u8 *) & data32;
++	data32 = 0;		/* Initial FW address to 0 */
++	outw(0x0010, host->base + ORC_EBIOSADR0);
++	*data32_ptr = inb(host->base + ORC_EBIOSDATA);		/* Read from BIOS */
++	outw(0x0011, host->base + ORC_EBIOSADR0);
++	*(data32_ptr + 1) = inb(host->base + ORC_EBIOSDATA);	/* Read from BIOS */
++	outw(0x0012, host->base + ORC_EBIOSADR0);
++	*(data32_ptr + 2) = inb(host->base + ORC_EBIOSDATA);	/* Read from BIOS */
++	outw(*(data32_ptr + 2), host->base + ORC_EBIOSADR2);
++	outl(data32, host->base + ORC_FWBASEADR);		/* Write FW address */
++
++	/* Copy the code from the BIOS to the SRAM */
++
++	bios_addr = (u16) data32;	/* FW code locate at BIOS address + ? */
++	for (i = 0, data32_ptr = (u8 *) & data32;	/* Download the code    */
+ 	     i < 0x1000;	/* Firmware code size = 4K      */
+-	     i++, wBIOSAddress++) {
+-		ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress);
+-		*pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA);	/* Read from BIOS */
++	     i++, bios_addr++) {
++		outw(bios_addr, host->base + ORC_EBIOSADR0);
++		*data32_ptr++ = inb(host->base + ORC_EBIOSDATA);	/* Read from BIOS */
+ 		if ((i % 4) == 3) {
+-			ORC_WRLONG(hcsp->HCS_Base + ORC_RISCRAM, dData);	/* Write every 4 bytes */
+-			pData = (UCHAR *) & dData;
++			outl(data32, host->base + ORC_RISCRAM);	/* Write every 4 bytes */
++			data32_ptr = (u8 *) & data32;
+ 		}
+ 	}
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST | DOWNLOAD);	/* Reset program count 0 */
+-	wBIOSAddress -= 0x1000;	/* Reset the BIOS adddress      */
+-	for (i = 0, pData = (UCHAR *) & dData;	/* Check the code       */
++	/* Go back and check they match */
++
++	outb(PRGMRST | DOWNLOAD, host->base + ORC_RISCCTL);	/* Reset program count 0 */
++	bios_addr -= 0x1000;	/* Reset the BIOS adddress      */
++	for (i = 0, data32_ptr = (u8 *) & data32;	/* Check the code       */
+ 	     i < 0x1000;	/* Firmware code size = 4K      */
+-	     i++, wBIOSAddress++) {
+-		ORC_WRSHORT(hcsp->HCS_Base + ORC_EBIOSADR0, wBIOSAddress);
+-		*pData++ = ORC_RD(hcsp->HCS_Base, ORC_EBIOSDATA);	/* Read from BIOS */
++	     i++, bios_addr++) {
++		outw(bios_addr, host->base + ORC_EBIOSADR0);
++		*data32_ptr++ = inb(host->base + ORC_EBIOSDATA);	/* Read from BIOS */
+ 		if ((i % 4) == 3) {
+-			if (ORC_RDLONG(hcsp->HCS_Base, ORC_RISCRAM) != dData) {
+-				ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST);	/* Reset program to 0 */
+-				ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData);	/*Disable EEPROM programming */
++			if (inl(host->base + ORC_RISCRAM) != data32) {
++				outb(PRGMRST, host->base + ORC_RISCCTL);	/* Reset program to 0 */
++				outb(data, host->base + ORC_GCFG);	/*Disable EEPROM programming */
+ 				return 0;
+ 			}
+-			pData = (UCHAR *) & dData;
++			data32_ptr = (u8 *) & data32;
+ 		}
+ 	}
+-	ORC_WR(hcsp->HCS_Base + ORC_RISCCTL, PRGMRST);	/* Reset program to 0   */
+-	ORC_WR(hcsp->HCS_Base + ORC_GCFG, bData);	/* Disable EEPROM programming */
++
++	/* Success */
++	outb(PRGMRST, host->base + ORC_RISCCTL);	/* Reset program to 0   */
++	outb(data, host->base + ORC_GCFG);	/* Disable EEPROM programming */
+ 	return 1;
+ }
+ 
+ /***************************************************************************/
+-static void setup_SCBs(ORC_HCS * hcsp)
++static void setup_SCBs(struct orc_host * host)
+ {
+-	ORC_SCB *pVirScb;
++	struct orc_scb *scb;
+ 	int i;
+-	ESCB *pVirEscb;
+-	dma_addr_t pPhysEscb;
++	struct orc_extended_scb *escb;
++	dma_addr_t escb_phys;
+ 
+-	/* Setup SCB HCS_Base and SCB Size registers */
+-	ORC_WR(hcsp->HCS_Base + ORC_SCBSIZE, ORC_MAXQUEUE);	/* Total number of SCBs */
+-	/* SCB HCS_Base address 0      */
+-	ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE0, hcsp->HCS_physScbArray);
+-	/* SCB HCS_Base address 1      */
+-	ORC_WRLONG(hcsp->HCS_Base + ORC_SCBBASE1, hcsp->HCS_physScbArray);
++	/* Setup SCB base and SCB Size registers */
++	outb(ORC_MAXQUEUE, host->base + ORC_SCBSIZE);	/* Total number of SCBs */
++	/* SCB base address 0      */
++	outl(host->scb_phys, host->base + ORC_SCBBASE0);
++	/* SCB base address 1      */
++	outl(host->scb_phys, host->base + ORC_SCBBASE1);
+ 
+ 	/* setup scatter list address with one buffer */
+-	pVirScb = hcsp->HCS_virScbArray;
+-	pVirEscb = hcsp->HCS_virEscbArray;
++	scb = host->scb_virt;
++	escb = host->escb_virt;
+ 
+ 	for (i = 0; i < ORC_MAXQUEUE; i++) {
+-		pPhysEscb = (hcsp->HCS_physEscbArray + (sizeof(ESCB) * i));
+-		pVirScb->SCB_SGPAddr = (U32) pPhysEscb;
+-		pVirScb->SCB_SensePAddr = (U32) pPhysEscb;
+-		pVirScb->SCB_EScb = pVirEscb;
+-		pVirScb->SCB_ScbIdx = i;
+-		pVirScb++;
+-		pVirEscb++;
++		escb_phys = (host->escb_phys + (sizeof(struct orc_extended_scb) * i));
++		scb->sg_addr = (u32) escb_phys;
++		scb->sense_addr = (u32) escb_phys;
++		scb->escb = escb;
++		scb->scbidx = i;
++		scb++;
++		escb++;
+ 	}
+-
+-	return;
+ }
+ 
+-/***************************************************************************/
+-static void initAFlag(ORC_HCS * hcsp)
++/**
++ *	init_alloc_map		-	initialise allocation map
++ *	@host: host map to configure
++ *
++ *	Initialise the allocation maps for this device. If the device
++ *	is not quiescent the caller must hold the allocation lock
++ */
++
++static void init_alloc_map(struct orc_host * host)
+ {
+-	UCHAR i, j;
++	u8 i, j;
+ 
+ 	for (i = 0; i < MAX_CHANNELS; i++) {
+ 		for (j = 0; j < 8; j++) {
+-			hcsp->BitAllocFlag[i][j] = 0xffffffff;
++			host->allocation_map[i][j] = 0xffffffff;
+ 		}
+ 	}
+ }
+ 
+-/***************************************************************************/
+-static int init_orchid(ORC_HCS * hcsp)
++/**
++ *	init_orchid		-	initialise the host adapter
++ *	@host:host adapter to initialise
++ *
++ *	Initialise the controller and if neccessary load the firmware.
++ *
++ *	Returns -1 if the initialisation fails.
++ */
++
++static int init_orchid(struct orc_host * host)
+ {
+-	UBYTE *readBytep;
+-	USHORT revision;
+-	UCHAR i;
+-
+-	initAFlag(hcsp);
+-	ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFF);	/* Disable all interrupt        */
+-	if (ORC_RD(hcsp->HCS_Base, ORC_HSTUS) & RREADY) {	/* Orchid is ready              */
+-		revision = get_FW_version(hcsp);
++	u8 *ptr;
++	u16 revision;
++	u8 i;
++
++	init_alloc_map(host);
++	outb(0xFF, host->base + ORC_GIMSK);	/* Disable all interrupts */
++
++	if (inb(host->base + ORC_HSTUS) & RREADY) {	/* Orchid is ready */
++		revision = orc_read_fwrev(host);
+ 		if (revision == 0xFFFF) {
+-			ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST);	/* Reset Host Adapter   */
+-			if (waitChipReady(hcsp) == 0)
+-				return (-1);
+-			load_FW(hcsp);	/* Download FW                  */
+-			setup_SCBs(hcsp);	/* Setup SCB HCS_Base and SCB Size registers */
+-			ORC_WR(hcsp->HCS_Base + ORC_HCTRL, 0);	/* clear HOSTSTOP       */
+-			if (waitFWReady(hcsp) == 0)
+-				return (-1);
++			outb(DEVRST, host->base + ORC_HCTRL);	/* Reset Host Adapter   */
++			if (wait_chip_ready(host) == 0)
++				return -1;
++			orc_load_firmware(host);	/* Download FW                  */
++			setup_SCBs(host);	/* Setup SCB base and SCB Size registers */
++			outb(0x00, host->base + ORC_HCTRL);	/* clear HOSTSTOP       */
++			if (wait_firmware_ready(host) == 0)
++				return -1;
+ 			/* Wait for firmware ready     */
+ 		} else {
+-			setup_SCBs(hcsp);	/* Setup SCB HCS_Base and SCB Size registers */
++			setup_SCBs(host);	/* Setup SCB base and SCB Size registers */
+ 		}
+ 	} else {		/* Orchid is not Ready          */
+-		ORC_WR(hcsp->HCS_Base + ORC_HCTRL, DEVRST);	/* Reset Host Adapter   */
+-		if (waitChipReady(hcsp) == 0)
+-			return (-1);
+-		load_FW(hcsp);	/* Download FW                  */
+-		setup_SCBs(hcsp);	/* Setup SCB HCS_Base and SCB Size registers */
+-		ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);	/* Do Hardware Reset &  */
++		outb(DEVRST, host->base + ORC_HCTRL);	/* Reset Host Adapter   */
++		if (wait_chip_ready(host) == 0)
++			return -1;
++		orc_load_firmware(host);	/* Download FW                  */
++		setup_SCBs(host);	/* Setup SCB base and SCB Size registers */
++		outb(HDO, host->base + ORC_HCTRL);	/* Do Hardware Reset &  */
+ 
+ 		/*     clear HOSTSTOP  */
+-		if (waitFWReady(hcsp) == 0)		/* Wait for firmware ready      */
+-			return (-1);
++		if (wait_firmware_ready(host) == 0)		/* Wait for firmware ready      */
++			return -1;
+ 	}
+ 
+-/*------------- get serial EEProm settting -------*/
++	/* Load an EEProm copy into RAM */
++	/* Assumes single threaded at this point */
++	read_eeprom(host);
+ 
+-	read_eeprom(hcsp);
+-
+-	if (nvramp->Revision != 1)
+-		return (-1);
+-
+-	hcsp->HCS_SCSI_ID = nvramp->SCSI0Id;
+-	hcsp->HCS_BIOS = nvramp->BIOSConfig1;
+-	hcsp->HCS_MaxTar = MAX_TARGETS;
+-	readBytep = (UCHAR *) & (nvramp->Target00Config);
+-	for (i = 0; i < 16; readBytep++, i++) {
+-		hcsp->TargetFlag[i] = *readBytep;
+-		hcsp->MaximumTags[i] = ORC_MAXTAGS;
+-	}			/* for                          */
++	if (nvramp->revision != 1)
++		return -1;
+ 
+-	if (nvramp->SCSI0Config & NCC_BUSRESET) {	/* Reset SCSI bus               */
+-		hcsp->HCS_Flags |= HCF_SCSI_RESET;
++	host->scsi_id = nvramp->scsi_id;
++	host->BIOScfg = nvramp->BIOSConfig1;
++	host->max_targets = MAX_TARGETS;
++	ptr = (u8 *) & (nvramp->Target00Config);
++	for (i = 0; i < 16; ptr++, i++) {
++		host->target_flag[i] = *ptr;
++		host->max_tags[i] = ORC_MAXTAGS;
+ 	}
+-	ORC_WR(hcsp->HCS_Base + ORC_GIMSK, 0xFB);	/* enable RP FIFO interrupt     */
+-	return (0);
++
++	if (nvramp->SCSI0Config & NCC_BUSRESET)
++		host->flags |= HCF_SCSI_RESET;
++	outb(0xFB, host->base + ORC_GIMSK);	/* enable RP FIFO interrupt     */
++	return 0;
+ }
+ 
+-/*****************************************************************************
+- Function name  : orc_reset_scsi_bus
+- Description    : Reset registers, reset a hanging bus and
+-                  kill active and disconnected commands for target w/o soft reset
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_reset_scsi_bus(ORC_HCS * pHCB)
++/**
++ *	orc_reset_scsi_bus		-	perform bus reset
++ *	@host: host being reset
++ *
++ *	Perform a full bus reset on the adapter.
++ */
++
++static int orc_reset_scsi_bus(struct orc_host * host)
+ {				/* I need Host Control Block Information */
+-	ULONG flags;
++	unsigned long flags;
+ 
+-	spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags);
++	spin_lock_irqsave(&host->allocation_lock, flags);
+ 
+-	initAFlag(pHCB);
++	init_alloc_map(host);
+ 	/* reset scsi bus */
+-	ORC_WR(pHCB->HCS_Base + ORC_HCTRL, SCSIRST);
+-	if (waitSCSIRSTdone(pHCB) == 0) {
+-		spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++	outb(SCSIRST, host->base + ORC_HCTRL);
++	/* FIXME: We can spend up to a second with the lock held and
++	   interrupts off here */
++	if (wait_scsi_reset_done(host) == 0) {
++		spin_unlock_irqrestore(&host->allocation_lock, flags);
+ 		return FAILED;
+ 	} else {
+-		spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++		spin_unlock_irqrestore(&host->allocation_lock, flags);
+ 		return SUCCESS;
+ 	}
+ }
+ 
+-/*****************************************************************************
+- Function name  : orc_device_reset
+- Description    : Reset registers, reset a hanging bus and
+-                  kill active and disconnected commands for target w/o soft reset
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_device_reset(ORC_HCS * pHCB, struct scsi_cmnd *SCpnt, unsigned int target)
++/**
++ *	orc_device_reset	-	device reset handler
++ *	@host: host to reset
++ *	@cmd: command causing the reset
++ *	@target; target device
++ *
++ *	Reset registers, reset a hanging bus and kill active and disconnected
++ *	commands for target w/o soft reset
++ */
++
++static int orc_device_reset(struct orc_host * host, struct scsi_cmnd *cmd, unsigned int target)
+ {				/* I need Host Control Block Information */
+-	ORC_SCB *pScb;
+-	ESCB *pVirEscb;
+-	ORC_SCB *pVirScb;
+-	UCHAR i;
+-	ULONG flags;
+-
+-	spin_lock_irqsave(&(pHCB->BitAllocFlagLock), flags);
+-	pScb = (ORC_SCB *) NULL;
+-	pVirEscb = (ESCB *) NULL;
++	struct orc_scb *scb;
++	struct orc_extended_scb *escb;
++	struct orc_scb *host_scb;
++	u8 i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&(host->allocation_lock), flags);
++	scb = (struct orc_scb *) NULL;
++	escb = (struct orc_extended_scb *) NULL;
+ 
+ 	/* setup scatter list address with one buffer */
+-	pVirScb = pHCB->HCS_virScbArray;
++	host_scb = host->scb_virt;
+ 
+-	initAFlag(pHCB);
+-	/* device reset */
++	/* FIXME: is this safe if we then fail to issue the reset or race
++	   a completion ? */
++	init_alloc_map(host);
++
++	/* Find the scb corresponding to the command */
+ 	for (i = 0; i < ORC_MAXQUEUE; i++) {
+-		pVirEscb = pVirScb->SCB_EScb;
+-		if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt))
++		escb = host_scb->escb;
++		if (host_scb->status && escb->srb == cmd)
+ 			break;
+-		pVirScb++;
++		host_scb++;
+ 	}
+ 
+ 	if (i == ORC_MAXQUEUE) {
+-		printk("Unable to Reset - No SCB Found\n");
+-		spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++		printk(KERN_ERR "Unable to Reset - No SCB Found\n");
++		spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ 		return FAILED;
+ 	}
+-	if ((pScb = orc_alloc_scb(pHCB)) == NULL) {
+-		spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++
++	/* Allocate a new SCB for the reset command to the firmware */
++	if ((scb = __orc_alloc_scb(host)) == NULL) {
++		/* Can't happen.. */
++		spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ 		return FAILED;
+ 	}
+-	pScb->SCB_Opcode = ORC_BUSDEVRST;
+-	pScb->SCB_Target = target;
+-	pScb->SCB_HaStat = 0;
+-	pScb->SCB_TaStat = 0;
+-	pScb->SCB_Status = 0x0;
+-	pScb->SCB_Link = 0xFF;
+-	pScb->SCB_Reserved0 = 0;
+-	pScb->SCB_Reserved1 = 0;
+-	pScb->SCB_XferLen = 0;
+-	pScb->SCB_SGLen = 0;
+-
+-	pVirEscb->SCB_Srb = NULL;
+-	pVirEscb->SCB_Srb = SCpnt;
+-	orc_exec_scb(pHCB, pScb);	/* Start execute SCB            */
+-	spin_unlock_irqrestore(&(pHCB->BitAllocFlagLock), flags);
++
++	/* Reset device is handled by the firmare, we fill in an SCB and
++	   fire it at the controller, it does the rest */
++	scb->opcode = ORC_BUSDEVRST;
++	scb->target = target;
++	scb->hastat = 0;
++	scb->tastat = 0;
++	scb->status = 0x0;
++	scb->link = 0xFF;
++	scb->reserved0 = 0;
++	scb->reserved1 = 0;
++	scb->xferlen = 0;
++	scb->sg_len = 0;
++
++	escb->srb = NULL;
++	escb->srb = cmd;
++	orc_exec_scb(host, scb);	/* Start execute SCB            */
++	spin_unlock_irqrestore(&host->allocation_lock, flags);
+ 	return SUCCESS;
+ }
+ 
++/**
++ *	__orc_alloc_scb		-		allocate an SCB
++ *	@host: host to allocate from
++ *
++ *	Allocate an SCB and return a pointer to the SCB object. NULL
++ *	is returned if no SCB is free. The caller must already hold
++ *	the allocator lock at this point.
++ */
+ 
+-/***************************************************************************/
+-static ORC_SCB *__orc_alloc_scb(ORC_HCS * hcsp)
++
++static struct orc_scb *__orc_alloc_scb(struct orc_host * host)
+ {
+-	ORC_SCB *pTmpScb;
+-	UCHAR Ch;
+-	ULONG idx;
+-	UCHAR index;
+-	UCHAR i;
++	u8 channel;
++	unsigned long idx;
++	u8 index;
++	u8 i;
+ 
+-	Ch = hcsp->HCS_Index;
++	channel = host->index;
+ 	for (i = 0; i < 8; i++) {
+ 		for (index = 0; index < 32; index++) {
+-			if ((hcsp->BitAllocFlag[Ch][i] >> index) & 0x01) {
+-				hcsp->BitAllocFlag[Ch][i] &= ~(1 << index);
++			if ((host->allocation_map[channel][i] >> index) & 0x01) {
++				host->allocation_map[channel][i] &= ~(1 << index);
+ 				break;
+ 			}
+ 		}
+ 		idx = index + 32 * i;
+-		pTmpScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (idx * sizeof(ORC_SCB)));
+-		return (pTmpScb);
++		/* Translate the index to a structure instance */
++		return (struct orc_scb *) ((unsigned long) host->scb_virt + (idx * sizeof(struct orc_scb)));
+ 	}
+-	return (NULL);
++	return NULL;
+ }
+ 
+-static ORC_SCB *orc_alloc_scb(ORC_HCS * hcsp)
++/**
++ *	orc_alloc_scb		-		allocate an SCB
++ *	@host: host to allocate from
++ *
++ *	Allocate an SCB and return a pointer to the SCB object. NULL
++ *	is returned if no SCB is free.
++ */
++
++static struct orc_scb *orc_alloc_scb(struct orc_host * host)
+ {
+-	ORC_SCB *pTmpScb;
+-	ULONG flags;
++	struct orc_scb *scb;
++	unsigned long flags;
+ 
+-	spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+-	pTmpScb = __orc_alloc_scb(hcsp);
+-	spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+-	return (pTmpScb);
++	spin_lock_irqsave(&host->allocation_lock, flags);
++	scb = __orc_alloc_scb(host);
++	spin_unlock_irqrestore(&host->allocation_lock, flags);
++	return scb;
+ }
+ 
++/**
++ *	orc_release_scb			-	release an SCB
++ *	@host: host owning the SCB
++ *	@scb: SCB that is now free
++ *
++ *	Called to return a completed SCB to the allocation pool. Before
++ *	calling the SCB must be out of use on both the host and the HA.
++ */
+ 
+-/***************************************************************************/
+-static void orc_release_scb(ORC_HCS * hcsp, ORC_SCB * scbp)
++static void orc_release_scb(struct orc_host *host, struct orc_scb *scb)
+ {
+-	ULONG flags;
+-	UCHAR Index;
+-	UCHAR i;
+-	UCHAR Ch;
+-
+-	spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+-	Ch = hcsp->HCS_Index;
+-	Index = scbp->SCB_ScbIdx;
+-	i = Index / 32;
+-	Index %= 32;
+-	hcsp->BitAllocFlag[Ch][i] |= (1 << Index);
+-	spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++	unsigned long flags;
++	u8 index, i, channel;
++
++	spin_lock_irqsave(&(host->allocation_lock), flags);
++	channel = host->index;	/* Channel */
++	index = scb->scbidx;
++	i = index / 32;
++	index %= 32;
++	host->allocation_map[channel][i] |= (1 << index);
++	spin_unlock_irqrestore(&(host->allocation_lock), flags);
+ }
+ 
+-/*****************************************************************************
+- Function name  : abort_SCB
+- Description    : Abort a queued command.
+-	                 (commands that are on the bus can't be aborted easily)
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static int abort_SCB(ORC_HCS * hcsp, ORC_SCB * pScb)
++/**
++ *	orchid_abort_scb	-	abort a command
++ *
++ *	Abort a queued command that has been passed to the firmware layer
++ *	if possible. This is all handled by the firmware. We aks the firmware
++ *	and it either aborts the command or fails
++ */
++
++static int orchid_abort_scb(struct orc_host * host, struct orc_scb * scb)
+ {
+-	unsigned char bData, bStatus;
++	unsigned char data, status;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, ORC_CMD_ABORT_SCB);	/* Write command */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(ORC_CMD_ABORT_SCB, host->base + ORC_HDATA);	/* Write command */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	ORC_WR(hcsp->HCS_Base + ORC_HDATA, pScb->SCB_ScbIdx);	/* Write address */
+-	ORC_WR(hcsp->HCS_Base + ORC_HCTRL, HDO);
+-	if (waitHDOoff(hcsp) == 0)	/* Wait HDO off   */
++	outb(scb->scbidx, host->base + ORC_HDATA);	/* Write address */
++	outb(HDO, host->base + ORC_HCTRL);
++	if (wait_HDO_off(host) == 0)	/* Wait HDO off   */
+ 		return 0;
+ 
+-	if (waitHDIset(hcsp, &bData) == 0)	/* Wait HDI set   */
++	if (wait_hdi_set(host, &data) == 0)	/* Wait HDI set   */
+ 		return 0;
+-	bStatus = ORC_RD(hcsp->HCS_Base, ORC_HDATA);
+-	ORC_WR(hcsp->HCS_Base + ORC_HSTUS, bData);	/* Clear HDI    */
++	status = inb(host->base + ORC_HDATA);
++	outb(data, host->base + ORC_HSTUS);	/* Clear HDI    */
+ 
+-	if (bStatus == 1)	/* 0 - Successfully               */
++	if (status == 1)	/* 0 - Successfully               */
+ 		return 0;	/* 1 - Fail                     */
+ 	return 1;
+ }
+ 
+-/*****************************************************************************
+- Function name  : inia100_abort
+- Description    : Abort a queued command.
+-	                 (commands that are on the bus can't be aborted easily)
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static int orc_abort_srb(ORC_HCS * hcsp, struct scsi_cmnd *SCpnt)
++static int inia100_abort_cmd(struct orc_host * host, struct scsi_cmnd *cmd)
+ {
+-	ESCB *pVirEscb;
+-	ORC_SCB *pVirScb;
+-	UCHAR i;
+-	ULONG flags;
+-
+-	spin_lock_irqsave(&(hcsp->BitAllocFlagLock), flags);
+-
+-	pVirScb = hcsp->HCS_virScbArray;
+-
+-	for (i = 0; i < ORC_MAXQUEUE; i++, pVirScb++) {
+-		pVirEscb = pVirScb->SCB_EScb;
+-		if ((pVirScb->SCB_Status) && (pVirEscb->SCB_Srb == SCpnt)) {
+-			if (pVirScb->SCB_TagMsg == 0) {
+-				spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+-				return FAILED;
++	struct orc_extended_scb *escb;
++	struct orc_scb *scb;
++	u8 i;
++	unsigned long flags;
++
++	spin_lock_irqsave(&(host->allocation_lock), flags);
++
++	scb = host->scb_virt;
++
++	/* Walk the queue until we find the SCB that belongs to the command
++	   block. This isn't a performance critical path so a walk in the park
++	   here does no harm */
++
++	for (i = 0; i < ORC_MAXQUEUE; i++, scb++) {
++		escb = scb->escb;
++		if (scb->status && escb->srb == cmd) {
++			if (scb->tag_msg == 0) {
++				goto out;
+ 			} else {
+-				if (abort_SCB(hcsp, pVirScb)) {
+-					pVirEscb->SCB_Srb = NULL;
+-					spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++				/* Issue an ABORT to the firmware */
++				if (orchid_abort_scb(host, scb)) {
++					escb->srb = NULL;
++					spin_unlock_irqrestore(&host->allocation_lock, flags);
+ 					return SUCCESS;
+-				} else {
+-					spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
+-					return FAILED;
+-				}
++				} else
++					goto out;
+ 			}
+ 		}
+ 	}
+-	spin_unlock_irqrestore(&(hcsp->BitAllocFlagLock), flags);
++out:
++	spin_unlock_irqrestore(&host->allocation_lock, flags);
+ 	return FAILED;
+ }
+ 
+-/***********************************************************************
+- Routine Description:
+-	  This is the interrupt service routine for the Orchid SCSI adapter.
+-	  It reads the interrupt register to determine if the adapter is indeed
+-	  the source of the interrupt and clears the interrupt at the device.
+- Arguments:
+-	  HwDeviceExtension - HBA miniport driver's adapter data storage
+- Return Value:
+-***********************************************************************/
+-static void orc_interrupt(
+-			  ORC_HCS * hcsp
+-)
++/**
++ *	orc_interrupt		-	IRQ processing
++ *	@host: Host causing the interrupt
++ *
++ *	This function is called from the IRQ handler and protected
++ *	by the host lock. While the controller reports that there are
++ *	scb's for processing we pull them off the controller, turn the
++ *	index into a host address pointer to the scb and call the scb
++ *	handler.
++ *
++ *	Returns IRQ_HANDLED if any SCBs were processed, IRQ_NONE otherwise
++ */
++
++static irqreturn_t orc_interrupt(struct orc_host * host)
+ {
+-	BYTE bScbIdx;
+-	ORC_SCB *pScb;
++	u8 scb_index;
++	struct orc_scb *scb;
+ 
+-	if (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT) == 0) {
+-		return;		// 0;
++	/* Check if we have an SCB queued for servicing */
++	if (inb(host->base + ORC_RQUEUECNT) == 0)
++		return IRQ_NONE;
+ 
+-	}
+ 	do {
+-		bScbIdx = ORC_RD(hcsp->HCS_Base, ORC_RQUEUE);
+-
+-		pScb = (ORC_SCB *) ((ULONG) hcsp->HCS_virScbArray + (ULONG) (sizeof(ORC_SCB) * bScbIdx));
+-		pScb->SCB_Status = 0x0;
+-
+-		inia100SCBPost((BYTE *) hcsp, (BYTE *) pScb);
+-	} while (ORC_RD(hcsp->HCS_Base, ORC_RQUEUECNT));
+-	return;			//1;
++		/* Get the SCB index of the SCB to service */
++		scb_index = inb(host->base + ORC_RQUEUE);
+ 
++		/* Translate it back to a host pointer */
++		scb = (struct orc_scb *) ((unsigned long) host->scb_virt + (unsigned long) (sizeof(struct orc_scb) * scb_index));
++		scb->status = 0x0;
++		/* Process the SCB */
++		inia100_scb_handler(host, scb);
++	} while (inb(host->base + ORC_RQUEUECNT));
++	return IRQ_HANDLED;
+ }				/* End of I1060Interrupt() */
+ 
+-/*****************************************************************************
+- Function name  : inia100BuildSCB
+- Description    : 
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static void inia100BuildSCB(ORC_HCS * pHCB, ORC_SCB * pSCB, struct scsi_cmnd * SCpnt)
++/**
++ *	inia100_build_scb	-	build SCB
++ *	@host: host owing the control block
++ *	@scb: control block to use
++ *	@cmd: Mid layer command
++ *
++ *	Build a host adapter control block from the SCSI mid layer command
++ */
++
++static void inia100_build_scb(struct orc_host * host, struct orc_scb * scb, struct scsi_cmnd * cmd)
+ {				/* Create corresponding SCB     */
+-	struct scatterlist *pSrbSG;
+-	ORC_SG *pSG;		/* Pointer to SG list           */
++	struct scatterlist *sg;
++	struct orc_sgent *sgent;		/* Pointer to SG list           */
+ 	int i, count_sg;
+-	ESCB *pEScb;
++	struct orc_extended_scb *escb;
+ 
+-	pEScb = pSCB->SCB_EScb;
+-	pEScb->SCB_Srb = SCpnt;
+-	pSG = NULL;
+-
+-	pSCB->SCB_Opcode = ORC_EXECSCSI;
+-	pSCB->SCB_Flags = SCF_NO_DCHK;	/* Clear done bit               */
+-	pSCB->SCB_Target = SCpnt->device->id;
+-	pSCB->SCB_Lun = SCpnt->device->lun;
+-	pSCB->SCB_Reserved0 = 0;
+-	pSCB->SCB_Reserved1 = 0;
+-	pSCB->SCB_SGLen = 0;
+-
+-	if ((pSCB->SCB_XferLen = (U32) SCpnt->request_bufflen)) {
+-		pSG = (ORC_SG *) & pEScb->ESCB_SGList[0];
+-		if (SCpnt->use_sg) {
+-			pSrbSG = (struct scatterlist *) SCpnt->request_buffer;
+-			count_sg = pci_map_sg(pHCB->pdev, pSrbSG, SCpnt->use_sg,
+-					SCpnt->sc_data_direction);
+-			pSCB->SCB_SGLen = (U32) (count_sg * 8);
+-			for (i = 0; i < count_sg; i++, pSG++, pSrbSG++) {
+-				pSG->SG_Ptr = (U32) sg_dma_address(pSrbSG);
+-				pSG->SG_Len = (U32) sg_dma_len(pSrbSG);
+-			}
+-		} else if (SCpnt->request_bufflen != 0) {/* Non SG */
+-			pSCB->SCB_SGLen = 0x8;
+-			SCpnt->SCp.dma_handle = pci_map_single(pHCB->pdev,
+-					SCpnt->request_buffer,
+-					SCpnt->request_bufflen,
+-					SCpnt->sc_data_direction);
+-			pSG->SG_Ptr = (U32) SCpnt->SCp.dma_handle;
+-			pSG->SG_Len = (U32) SCpnt->request_bufflen;
++	/* Links between the escb, scb and Linux scsi midlayer cmd */
++	escb = scb->escb;
++	escb->srb = cmd;
++	sgent = NULL;
++
++	/* Set up the SCB to do a SCSI command block */
++	scb->opcode = ORC_EXECSCSI;
++	scb->flags = SCF_NO_DCHK;	/* Clear done bit               */
++	scb->target = cmd->device->id;
++	scb->lun = cmd->device->lun;
++	scb->reserved0 = 0;
++	scb->reserved1 = 0;
++	scb->sg_len = 0;
++
++	scb->xferlen = (u32) scsi_bufflen(cmd);
++	sgent = (struct orc_sgent *) & escb->sglist[0];
++
++	count_sg = scsi_dma_map(cmd);
++	BUG_ON(count_sg < 0);
++
++	/* Build the scatter gather lists */
++	if (count_sg) {
++		scb->sg_len = (u32) (count_sg * 8);
++		scsi_for_each_sg(cmd, sg, count_sg, i) {
++			sgent->base = (u32) sg_dma_address(sg);
++			sgent->length = (u32) sg_dma_len(sg);
++			sgent++;
++		}
+ 		} else {
+-			pSCB->SCB_SGLen = 0;
+-			pSG->SG_Ptr = 0;
+-			pSG->SG_Len = 0;
+-		}
+-	}
+-	pSCB->SCB_SGPAddr = (U32) pSCB->SCB_SensePAddr;
+-	pSCB->SCB_HaStat = 0;
+-	pSCB->SCB_TaStat = 0;
+-	pSCB->SCB_Link = 0xFF;
+-	pSCB->SCB_SenseLen = SENSE_SIZE;
+-	pSCB->SCB_CDBLen = SCpnt->cmd_len;
+-	if (pSCB->SCB_CDBLen >= IMAX_CDB) {
+-		printk("max cdb length= %x\b", SCpnt->cmd_len);
+-		pSCB->SCB_CDBLen = IMAX_CDB;
+-	}
+-	pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW;
+-	if (SCpnt->device->tagged_supported) {	/* Tag Support                  */
+-		pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG;	/* Do simple tag only   */
++		scb->sg_len = 0;
++		sgent->base = 0;
++		sgent->length = 0;
++	}
++	scb->sg_addr = (u32) scb->sense_addr;
++	scb->hastat = 0;
++	scb->tastat = 0;
++	scb->link = 0xFF;
++	scb->sense_len = SENSE_SIZE;
++	scb->cdb_len = cmd->cmd_len;
++	if (scb->cdb_len >= IMAX_CDB) {
++		printk("max cdb length= %x\b", cmd->cmd_len);
++		scb->cdb_len = IMAX_CDB;
++	}
++	scb->ident = cmd->device->lun | DISC_ALLOW;
++	if (cmd->device->tagged_supported) {	/* Tag Support                  */
++		scb->tag_msg = SIMPLE_QUEUE_TAG;	/* Do simple tag only   */
+ 	} else {
+-		pSCB->SCB_TagMsg = 0;	/* No tag support               */
++		scb->tag_msg = 0;	/* No tag support               */
+ 	}
+-	memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, pSCB->SCB_CDBLen);
+-	return;
++	memcpy(&scb->cdb[0], &cmd->cmnd, scb->cdb_len);
+ }
+ 
+-/*****************************************************************************
+- Function name  : inia100_queue
+- Description    : Queue a command and setup interrupts for a free bus.
+- Input          : pHCB  -       Pointer to host adapter structure
+- Output         : None.
+- Return         : pSRB  -       Pointer to SCSI request block.
+-*****************************************************************************/
+-static int inia100_queue(struct scsi_cmnd * SCpnt, void (*done) (struct scsi_cmnd *))
++/**
++ *	inia100_queue		-	queue command with host
++ *	@cmd: Command block
++ *	@done: Completion function
++ *
++ *	Called by the mid layer to queue a command. Process the command
++ *	block, build the host specific scb structures and if there is room
++ *	queue the command down to the controller
++ */
++
++static int inia100_queue(struct scsi_cmnd * cmd, void (*done) (struct scsi_cmnd *))
+ {
+-	register ORC_SCB *pSCB;
+-	ORC_HCS *pHCB;		/* Point to Host adapter control block */
++	struct orc_scb *scb;
++	struct orc_host *host;		/* Point to Host adapter control block */
+ 
+-	pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+-	SCpnt->scsi_done = done;
++	host = (struct orc_host *) cmd->device->host->hostdata;
++	cmd->scsi_done = done;
+ 	/* Get free SCSI control block  */
+-	if ((pSCB = orc_alloc_scb(pHCB)) == NULL)
++	if ((scb = orc_alloc_scb(host)) == NULL)
+ 		return SCSI_MLQUEUE_HOST_BUSY;
+ 
+-	inia100BuildSCB(pHCB, pSCB, SCpnt);
+-	orc_exec_scb(pHCB, pSCB);	/* Start execute SCB            */
+-
+-	return (0);
++	inia100_build_scb(host, scb, cmd);
++	orc_exec_scb(host, scb);	/* Start execute SCB            */
++	return 0;
+ }
+ 
+ /*****************************************************************************
+  Function name  : inia100_abort
+  Description    : Abort a queued command.
+ 	                 (commands that are on the bus can't be aborted easily)
+- Input          : pHCB  -       Pointer to host adapter structure
++ Input          : host  -       Pointer to host adapter structure
+  Output         : None.
+  Return         : pSRB  -       Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_abort(struct scsi_cmnd * SCpnt)
++static int inia100_abort(struct scsi_cmnd * cmd)
+ {
+-	ORC_HCS *hcsp;
++	struct orc_host *host;
+ 
+-	hcsp = (ORC_HCS *) SCpnt->device->host->hostdata;
+-	return orc_abort_srb(hcsp, SCpnt);
++	host = (struct orc_host *) cmd->device->host->hostdata;
++	return inia100_abort_cmd(host, cmd);
+ }
+ 
+ /*****************************************************************************
+  Function name  : inia100_reset
+  Description    : Reset registers, reset a hanging bus and
+                   kill active and disconnected commands for target w/o soft reset
+- Input          : pHCB  -       Pointer to host adapter structure
++ Input          : host  -       Pointer to host adapter structure
+  Output         : None.
+  Return         : pSRB  -       Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_bus_reset(struct scsi_cmnd * SCpnt)
++static int inia100_bus_reset(struct scsi_cmnd * cmd)
+ {				/* I need Host Control Block Information */
+-	ORC_HCS *pHCB;
+-	pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+-	return orc_reset_scsi_bus(pHCB);
++	struct orc_host *host;
++	host = (struct orc_host *) cmd->device->host->hostdata;
++	return orc_reset_scsi_bus(host);
+ }
+ 
+ /*****************************************************************************
+  Function name  : inia100_device_reset
+  Description    : Reset the device
+- Input          : pHCB  -       Pointer to host adapter structure
++ Input          : host  -       Pointer to host adapter structure
+  Output         : None.
+  Return         : pSRB  -       Pointer to SCSI request block.
+ *****************************************************************************/
+-static int inia100_device_reset(struct scsi_cmnd * SCpnt)
++static int inia100_device_reset(struct scsi_cmnd * cmd)
+ {				/* I need Host Control Block Information */
+-	ORC_HCS *pHCB;
+-	pHCB = (ORC_HCS *) SCpnt->device->host->hostdata;
+-	return orc_device_reset(pHCB, SCpnt, scmd_id(SCpnt));
++	struct orc_host *host;
++	host = (struct orc_host *) cmd->device->host->hostdata;
++	return orc_device_reset(host, cmd, scmd_id(cmd));
+ 
+ }
+ 
+-/*****************************************************************************
+- Function name  : inia100SCBPost
+- Description    : This is callback routine be called when orc finish one
+-			SCSI command.
+- Input          : pHCB  -       Pointer to host adapter control block.
+-		  pSCB  -       Pointer to SCSI control block.
+- Output         : None.
+- Return         : None.
+-*****************************************************************************/
+-static void inia100SCBPost(BYTE * pHcb, BYTE * pScb)
++/**
++ *	inia100_scb_handler	-	interrupt callback
++ *	@host: Host causing the interrupt
++ *	@scb: SCB the controller returned as needing processing
++ *
++ *	Perform completion processing on a control block. Do the conversions
++ *	from host to SCSI midlayer error coding, save any sense data and
++ *	the complete with the midlayer and recycle the scb.
++ */
++
++static void inia100_scb_handler(struct orc_host *host, struct orc_scb *scb)
+ {
+-	struct scsi_cmnd *pSRB;	/* Pointer to SCSI request block */
+-	ORC_HCS *pHCB;
+-	ORC_SCB *pSCB;
+-	ESCB *pEScb;
+-
+-	pHCB = (ORC_HCS *) pHcb;
+-	pSCB = (ORC_SCB *) pScb;
+-	pEScb = pSCB->SCB_EScb;
+-	if ((pSRB = (struct scsi_cmnd *) pEScb->SCB_Srb) == 0) {
+-		printk("inia100SCBPost: SRB pointer is empty\n");
+-		orc_release_scb(pHCB, pSCB);	/* Release SCB for current channel */
++	struct scsi_cmnd *cmd;	/* Pointer to SCSI request block */
++	struct orc_extended_scb *escb;
++
++	escb = scb->escb;
++	if ((cmd = (struct scsi_cmnd *) escb->srb) == NULL) {
++		printk(KERN_ERR "inia100_scb_handler: SRB pointer is empty\n");
++		orc_release_scb(host, scb);	/* Release SCB for current channel */
+ 		return;
+ 	}
+-	pEScb->SCB_Srb = NULL;
++	escb->srb = NULL;
+ 
+-	switch (pSCB->SCB_HaStat) {
++	switch (scb->hastat) {
+ 	case 0x0:
+ 	case 0xa:		/* Linked command complete without error and linked normally */
+ 	case 0xb:		/* Linked command complete without error interrupt generated */
+-		pSCB->SCB_HaStat = 0;
++		scb->hastat = 0;
+ 		break;
+ 
+ 	case 0x11:		/* Selection time out-The initiator selection or target
+ 				   reselection was not complete within the SCSI Time out period */
+-		pSCB->SCB_HaStat = DID_TIME_OUT;
++		scb->hastat = DID_TIME_OUT;
+ 		break;
+ 
+ 	case 0x14:		/* Target bus phase sequence failure-An invalid bus phase or bus
+ 				   phase sequence was requested by the target. The host adapter
+ 				   will generate a SCSI Reset Condition, notifying the host with
+ 				   a SCRD interrupt */
+-		pSCB->SCB_HaStat = DID_RESET;
++		scb->hastat = DID_RESET;
+ 		break;
+ 
+ 	case 0x1a:		/* SCB Aborted. 07/21/98 */
+-		pSCB->SCB_HaStat = DID_ABORT;
++		scb->hastat = DID_ABORT;
+ 		break;
+ 
+ 	case 0x12:		/* Data overrun/underrun-The target attempted to transfer more data
+@@ -984,46 +1022,41 @@
+ 	case 0x16:		/* Invalid CCB Operation Code-The first byte of the CCB was invalid. */
+ 
+ 	default:
+-		printk("inia100: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat);
+-		pSCB->SCB_HaStat = DID_ERROR;	/* Couldn't find any better */
++		printk(KERN_DEBUG "inia100: %x %x\n", scb->hastat, scb->tastat);
++		scb->hastat = DID_ERROR;	/* Couldn't find any better */
+ 		break;
+ 	}
+ 
+-	if (pSCB->SCB_TaStat == 2) {	/* Check condition              */
+-		memcpy((unsigned char *) &pSRB->sense_buffer[0],
+-		   (unsigned char *) &pEScb->ESCB_SGList[0], SENSE_SIZE);
+-	}
+-	pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16);
+-
+-	if (pSRB->use_sg) {
+-		pci_unmap_sg(pHCB->pdev,
+-			     (struct scatterlist *)pSRB->request_buffer,
+-			     pSRB->use_sg, pSRB->sc_data_direction);
+-	} else if (pSRB->request_bufflen != 0) {
+-		pci_unmap_single(pHCB->pdev, pSRB->SCp.dma_handle,
+-				 pSRB->request_bufflen,
+-				 pSRB->sc_data_direction);
+-	}
+-
+-	pSRB->scsi_done(pSRB);	/* Notify system DONE           */
+-
+-	orc_release_scb(pHCB, pSCB);	/* Release SCB for current channel */
++	if (scb->tastat == 2) {	/* Check condition              */
++		memcpy((unsigned char *) &cmd->sense_buffer[0],
++		   (unsigned char *) &escb->sglist[0], SENSE_SIZE);
++	}
++	cmd->result = scb->tastat | (scb->hastat << 16);
++	scsi_dma_unmap(cmd);
++	cmd->scsi_done(cmd);	/* Notify system DONE           */
++	orc_release_scb(host, scb);	/* Release SCB for current channel */
+ }
+ 
+-/*
+- * Interrupt handler (main routine of the driver)
++/**
++ *	inia100_intr		-	interrupt handler
++ *	@irqno: Interrupt value
++ *	@devid: Host adapter
++ *
++ *	Entry point for IRQ handling. All the real work is performed
++ *	by orc_interrupt.
+  */
+ static irqreturn_t inia100_intr(int irqno, void *devid)
+ {
+-	struct Scsi_Host *host = (struct Scsi_Host *)devid;
+-	ORC_HCS *pHcb = (ORC_HCS *)host->hostdata;
++	struct Scsi_Host *shost = (struct Scsi_Host *)devid;
++	struct orc_host *host = (struct orc_host *)shost->hostdata;
+ 	unsigned long flags;
++	irqreturn_t res;
+ 
+-	spin_lock_irqsave(host->host_lock, flags);
+-	orc_interrupt(pHcb);
+-	spin_unlock_irqrestore(host->host_lock, flags);
++	spin_lock_irqsave(shost->host_lock, flags);
++	res = orc_interrupt(host);
++	spin_unlock_irqrestore(shost->host_lock, flags);
+ 
+-	return IRQ_HANDLED;
++	return res;
+ }
+ 
+ static struct scsi_host_template inia100_template = {
+@@ -1044,12 +1077,12 @@
+ 		const struct pci_device_id *id)
+ {
+ 	struct Scsi_Host *shost;
+-	ORC_HCS *pHCB;
++	struct orc_host *host;
+ 	unsigned long port, bios;
+ 	int error = -ENODEV;
+ 	u32 sz;
+-	unsigned long dBiosAdr;
+-	char *pbBiosAdr;
++	unsigned long biosaddr;
++	char *bios_phys;
+ 
+ 	if (pci_enable_device(pdev))
+ 		goto out;
+@@ -1068,55 +1101,55 @@
+ 	}
+ 
+ 	/* <02> read from base address + 0x50 offset to get the bios value. */
+-	bios = ORC_RDWORD(port, 0x50);
++	bios = inw(port + 0x50);
+ 
+ 
+-	shost = scsi_host_alloc(&inia100_template, sizeof(ORC_HCS));
++	shost = scsi_host_alloc(&inia100_template, sizeof(struct orc_host));
+ 	if (!shost)
+ 		goto out_release_region;
+ 
+-	pHCB = (ORC_HCS *)shost->hostdata;
+-	pHCB->pdev = pdev;
+-	pHCB->HCS_Base = port;
+-	pHCB->HCS_BIOS = bios;
+-	spin_lock_init(&pHCB->BitAllocFlagLock);
++	host = (struct orc_host *)shost->hostdata;
++	host->pdev = pdev;
++	host->base = port;
++	host->BIOScfg = bios;
++	spin_lock_init(&host->allocation_lock);
+ 
+ 	/* Get total memory needed for SCB */
+-	sz = ORC_MAXQUEUE * sizeof(ORC_SCB);
+-	pHCB->HCS_virScbArray = pci_alloc_consistent(pdev, sz,
+-			&pHCB->HCS_physScbArray);
+-	if (!pHCB->HCS_virScbArray) {
++	sz = ORC_MAXQUEUE * sizeof(struct orc_scb);
++	host->scb_virt = pci_alloc_consistent(pdev, sz,
++			&host->scb_phys);
++	if (!host->scb_virt) {
+ 		printk("inia100: SCB memory allocation error\n");
+ 		goto out_host_put;
+ 	}
+-	memset(pHCB->HCS_virScbArray, 0, sz);
++	memset(host->scb_virt, 0, sz);
+ 
+ 	/* Get total memory needed for ESCB */
+-	sz = ORC_MAXQUEUE * sizeof(ESCB);
+-	pHCB->HCS_virEscbArray = pci_alloc_consistent(pdev, sz,
+-			&pHCB->HCS_physEscbArray);
+-	if (!pHCB->HCS_virEscbArray) {
++	sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb);
++	host->escb_virt = pci_alloc_consistent(pdev, sz,
++			&host->escb_phys);
++	if (!host->escb_virt) {
+ 		printk("inia100: ESCB memory allocation error\n");
+ 		goto out_free_scb_array;
+ 	}
+-	memset(pHCB->HCS_virEscbArray, 0, sz);
++	memset(host->escb_virt, 0, sz);
+ 
+-	dBiosAdr = pHCB->HCS_BIOS;
+-	dBiosAdr = (dBiosAdr << 4);
+-	pbBiosAdr = phys_to_virt(dBiosAdr);
+-	if (init_orchid(pHCB)) {	/* Initialize orchid chip */
++	biosaddr = host->BIOScfg;
++	biosaddr = (biosaddr << 4);
++	bios_phys = phys_to_virt(biosaddr);
++	if (init_orchid(host)) {	/* Initialize orchid chip */
+ 		printk("inia100: initial orchid fail!!\n");
+ 		goto out_free_escb_array;
+ 	}
+ 
+-	shost->io_port = pHCB->HCS_Base;
++	shost->io_port = host->base;
+ 	shost->n_io_port = 0xff;
+ 	shost->can_queue = ORC_MAXQUEUE;
+ 	shost->unique_id = shost->io_port;
+-	shost->max_id = pHCB->HCS_MaxTar;
++	shost->max_id = host->max_targets;
+ 	shost->max_lun = 16;
+-	shost->irq = pHCB->HCS_Intr = pdev->irq;
+-	shost->this_id = pHCB->HCS_SCSI_ID;	/* Assign HCS index */
++	shost->irq = pdev->irq;
++	shost->this_id = host->scsi_id;	/* Assign HCS index */
+ 	shost->sg_tablesize = TOTAL_SG_ENTRY;
+ 
+ 	/* Initial orc chip           */
+@@ -1137,36 +1170,36 @@
+ 	scsi_scan_host(shost);
+ 	return 0;
+ 
+- out_free_irq:
++out_free_irq:
+         free_irq(shost->irq, shost);
+- out_free_escb_array:
+-	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB),
+-			pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray);
+- out_free_scb_array:
+-	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB),
+-			pHCB->HCS_virScbArray, pHCB->HCS_physScbArray);
+- out_host_put:
++out_free_escb_array:
++	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb),
++			host->escb_virt, host->escb_phys);
++out_free_scb_array:
++	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb),
++			host->scb_virt, host->scb_phys);
++out_host_put:
+ 	scsi_host_put(shost);
+- out_release_region:
++out_release_region:
+         release_region(port, 256);
+- out_disable_device:
++out_disable_device:
+ 	pci_disable_device(pdev);
+- out:
++out:
+ 	return error;
+ }
+ 
+ static void __devexit inia100_remove_one(struct pci_dev *pdev)
+ {
+ 	struct Scsi_Host *shost = pci_get_drvdata(pdev);
+-	ORC_HCS *pHCB = (ORC_HCS *)shost->hostdata;
++	struct orc_host *host = (struct orc_host *)shost->hostdata;
+ 
+ 	scsi_remove_host(shost);
+ 
+         free_irq(shost->irq, shost);
+-	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ESCB),
+-			pHCB->HCS_virEscbArray, pHCB->HCS_physEscbArray);
+-	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(ORC_SCB),
+-			pHCB->HCS_virScbArray, pHCB->HCS_physScbArray);
++	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_extended_scb),
++			host->escb_virt, host->escb_phys);
++	pci_free_consistent(pdev, ORC_MAXQUEUE * sizeof(struct orc_scb),
++			host->scb_virt, host->scb_phys);
+         release_region(shost->io_port, 256);
+ 
+ 	scsi_host_put(shost);
+diff -Nurb linux-2.6.22-570/drivers/scsi/a100u2w.h linux-2.6.22-try2/drivers/scsi/a100u2w.h
+--- linux-2.6.22-570/drivers/scsi/a100u2w.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/a100u2w.h	2007-12-19 15:29:23.000000000 -0500
+@@ -18,27 +18,6 @@
+  * along with this program; see the file COPYING.  If not, write to
+  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+  *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- *    notice, this list of conditions, and the following disclaimer,
+- *    without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- *    notice, this list of conditions and the following disclaimer in the
+- *    documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- *    derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of 
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the 
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -50,30 +29,19 @@
+  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+- */
+-
+-/*
++ *
+  * Revision History:
+  * 06/18/98 HL, Initial production Version 1.02
+  * 12/19/98 bv, Use spinlocks for 2.1.95 and up
+  * 06/25/02 Doug Ledford <dledford@redhat.com>
+  *	 - This and the i60uscsi.h file are almost identical,
+  *	   merged them into a single header used by both .c files.
++ * 14/06/07 Alan Cox <alan@redhat.com>
++ *	 - Grand cleanup and Linuxisation
+  */
+ 
+ #define inia100_REVID "Initio INI-A100U2W SCSI device driver; Revision: 1.02d"
+ 
+-#define ULONG   unsigned long
+-#define USHORT  unsigned short
+-#define UCHAR   unsigned char
+-#define BYTE    unsigned char
+-#define WORD    unsigned short
+-#define DWORD   unsigned long
+-#define UBYTE   unsigned char
+-#define UWORD   unsigned short
+-#define UDWORD  unsigned long
+-#define U32     u32
+-
+ #if 1
+ #define ORC_MAXQUEUE		245
+ #define ORC_MAXTAGS		64
+@@ -90,10 +58,10 @@
+ /************************************************************************/
+ /*              Scatter-Gather Element Structure                        */
+ /************************************************************************/
+-typedef struct ORC_SG_Struc {
+-	U32 SG_Ptr;		/* Data Pointer */
+-	U32 SG_Len;		/* Data Length */
+-} ORC_SG;
++struct orc_sgent {
++	u32 base;		/* Data Pointer */
++	u32 length;		/* Data Length */
++};
+ 
+ /* SCSI related definition                                              */
+ #define DISC_NOT_ALLOW          0x80	/* Disconnect is not allowed    */
+@@ -165,42 +133,45 @@
+ #define	ORC_PRGMCTR1	0xE3	/* RISC program counter           */
+ #define	ORC_RISCRAM	0xEC	/* RISC RAM data port 4 bytes     */
+ 
+-typedef struct orc_extended_scb {	/* Extended SCB                 */
+-	ORC_SG ESCB_SGList[TOTAL_SG_ENTRY];	/*0 Start of SG list              */
+-	struct scsi_cmnd *SCB_Srb;	/*50 SRB Pointer */
+-} ESCB;
++struct orc_extended_scb {	/* Extended SCB                 */
++	struct orc_sgent sglist[TOTAL_SG_ENTRY];	/*0 Start of SG list              */
++	struct scsi_cmnd *srb;	/*50 SRB Pointer */
++};
+ 
+ /***********************************************************************
+ 		SCSI Control Block
++
++		0x40 bytes long, the last 8 are user bytes
+ ************************************************************************/
+-typedef struct orc_scb {	/* Scsi_Ctrl_Blk                */
+-	UBYTE SCB_Opcode;	/*00 SCB command code&residual  */
+-	UBYTE SCB_Flags;	/*01 SCB Flags                  */
+-	UBYTE SCB_Target;	/*02 Target Id                  */
+-	UBYTE SCB_Lun;		/*03 Lun                        */
+-	U32 SCB_Reserved0;	/*04 Reserved for ORCHID must 0 */
+-	U32 SCB_XferLen;	/*08 Data Transfer Length       */
+-	U32 SCB_Reserved1;	/*0C Reserved for ORCHID must 0 */
+-	U32 SCB_SGLen;		/*10 SG list # * 8              */
+-	U32 SCB_SGPAddr;	/*14 SG List Buf physical Addr  */
+-	U32 SCB_SGPAddrHigh;	/*18 SG Buffer high physical Addr */
+-	UBYTE SCB_HaStat;	/*1C Host Status                */
+-	UBYTE SCB_TaStat;	/*1D Target Status              */
+-	UBYTE SCB_Status;	/*1E SCB status                 */
+-	UBYTE SCB_Link;		/*1F Link pointer, default 0xFF */
+-	UBYTE SCB_SenseLen;	/*20 Sense Allocation Length    */
+-	UBYTE SCB_CDBLen;	/*21 CDB Length                 */
+-	UBYTE SCB_Ident;	/*22 Identify                   */
+-	UBYTE SCB_TagMsg;	/*23 Tag Message                */
+-	UBYTE SCB_CDB[IMAX_CDB];	/*24 SCSI CDBs                  */
+-	UBYTE SCB_ScbIdx;	/*3C Index for this ORCSCB      */
+-	U32 SCB_SensePAddr;	/*34 Sense Buffer physical Addr */
+-
+-	ESCB *SCB_EScb;		/*38 Extended SCB Pointer       */
+-#ifndef ALPHA
+-	UBYTE SCB_Reserved2[4];	/*3E Reserved for Driver use    */
++struct orc_scb {	/* Scsi_Ctrl_Blk                */
++	u8 opcode;	/*00 SCB command code&residual  */
++	u8 flags;	/*01 SCB Flags                  */
++	u8 target;	/*02 Target Id                  */
++	u8 lun;		/*03 Lun                        */
++	u32 reserved0;	/*04 Reserved for ORCHID must 0 */
++	u32 xferlen;	/*08 Data Transfer Length       */
++	u32 reserved1;	/*0C Reserved for ORCHID must 0 */
++	u32 sg_len;		/*10 SG list # * 8              */
++	u32 sg_addr;	/*14 SG List Buf physical Addr  */
++	u32 sg_addrhigh;	/*18 SG Buffer high physical Addr */
++	u8 hastat;	/*1C Host Status                */
++	u8 tastat;	/*1D Target Status              */
++	u8 status;	/*1E SCB status                 */
++	u8 link;		/*1F Link pointer, default 0xFF */
++	u8 sense_len;	/*20 Sense Allocation Length    */
++	u8 cdb_len;	/*21 CDB Length                 */
++	u8 ident;	/*22 Identify                   */
++	u8 tag_msg;	/*23 Tag Message                */
++	u8 cdb[IMAX_CDB];	/*24 SCSI CDBs                  */
++	u8 scbidx;	/*3C Index for this ORCSCB      */
++	u32 sense_addr;	/*34 Sense Buffer physical Addr */
++
++	struct orc_extended_scb *escb; /*38 Extended SCB Pointer       */
++        /* 64bit pointer or 32bit pointer + reserved ? */
++#ifndef CONFIG_64BIT
++	u8 reserved2[4];	/*3E Reserved for Driver use    */
+ #endif
+-} ORC_SCB;
++};
+ 
+ /* Opcodes of ORCSCB_Opcode */
+ #define ORC_EXECSCSI	0x00	/* SCSI initiator command with residual */
+@@ -239,13 +210,13 @@
+ 		Target Device Control Structure
+ **********************************************************************/
+ 
+-typedef struct ORC_Tar_Ctrl_Struc {
+-	UBYTE TCS_DrvDASD;	/* 6 */
+-	UBYTE TCS_DrvSCSI;	/* 7 */
+-	UBYTE TCS_DrvHead;	/* 8 */
+-	UWORD TCS_DrvFlags;	/* 4 */
+-	UBYTE TCS_DrvSector;	/* 7 */
+-} ORC_TCS;
++struct orc_target {
++	u8 TCS_DrvDASD;	/* 6 */
++	u8 TCS_DrvSCSI;	/* 7 */
++	u8 TCS_DrvHead;	/* 8 */
++	u16 TCS_DrvFlags;	/* 4 */
++	u8 TCS_DrvSector;	/* 7 */
++};
+ 
+ /* Bit Definition for TCF_DrvFlags */
+ #define	TCS_DF_NODASD_SUPT	0x20	/* Suppress OS/2 DASD Mgr support */
+@@ -255,32 +226,23 @@
+ /***********************************************************************
+               Host Adapter Control Structure
+ ************************************************************************/
+-typedef struct ORC_Ha_Ctrl_Struc {
+-	USHORT HCS_Base;	/* 00 */
+-	UBYTE HCS_Index;	/* 02 */
+-	UBYTE HCS_Intr;		/* 04 */
+-	UBYTE HCS_SCSI_ID;	/* 06    H/A SCSI ID */
+-	UBYTE HCS_BIOS;		/* 07    BIOS configuration */
+-
+-	UBYTE HCS_Flags;	/* 0B */
+-	UBYTE HCS_HAConfig1;	/* 1B    SCSI0MAXTags */
+-	UBYTE HCS_MaxTar;	/* 1B    SCSI0MAXTags */
+-
+-	USHORT HCS_Units;	/* Number of units this adapter  */
+-	USHORT HCS_AFlags;	/* Adapter info. defined flags   */
+-	ULONG HCS_Timeout;	/* Adapter timeout value   */
+-	ORC_SCB *HCS_virScbArray;	/* 28 Virtual Pointer to SCB array */
+-	dma_addr_t HCS_physScbArray;	/* Scb Physical address */
+-	ESCB *HCS_virEscbArray;	/* Virtual pointer to ESCB Scatter list */
+-	dma_addr_t HCS_physEscbArray;	/* scatter list Physical address */
+-	UBYTE TargetFlag[16];	/* 30  target configuration, TCF_EN_TAG */
+-	UBYTE MaximumTags[16];	/* 40  ORC_MAX_SCBS */
+-	UBYTE ActiveTags[16][16];	/* 50 */
+-	ORC_TCS HCS_Tcs[16];	/* 28 */
+-	U32 BitAllocFlag[MAX_CHANNELS][8];	/* Max STB is 256, So 256/32 */
+-	spinlock_t BitAllocFlagLock;
++struct orc_host {
++	unsigned long base;	/* Base address */
++	u8 index;		/* Index (Channel)*/
++	u8 scsi_id;		/* H/A SCSI ID */
++	u8 BIOScfg;		/*BIOS configuration */
++	u8 flags;
++	u8 max_targets;		/* SCSI0MAXTags */
++	struct orc_scb *scb_virt;	/* Virtual Pointer to SCB array */
++	dma_addr_t scb_phys;	/* Scb Physical address */
++	struct orc_extended_scb *escb_virt; /* Virtual pointer to ESCB Scatter list */
++	dma_addr_t escb_phys;	/* scatter list Physical address */
++	u8 target_flag[16];	/* target configuration, TCF_EN_TAG */
++	u8 max_tags[16];	/* ORC_MAX_SCBS */
++	u32 allocation_map[MAX_CHANNELS][8];	/* Max STB is 256, So 256/32 */
++	spinlock_t allocation_lock;
+ 	struct pci_dev *pdev;
+-} ORC_HCS;
++};
+ 
+ /* Bit Definition for HCS_Flags */
+ 
+@@ -301,79 +263,79 @@
+ #define	HCS_AF_DISABLE_RESET	0x10	/* Adapter disable reset  */
+ #define	HCS_AF_DISABLE_ADPT	0x80	/* Adapter disable                */
+ 
+-typedef struct _NVRAM {
++struct orc_nvram {
+ /*----------header ---------------*/
+-        UCHAR SubVendorID0;     /* 00 - Sub Vendor ID           */
+-        UCHAR SubVendorID1;     /* 00 - Sub Vendor ID           */
+-        UCHAR SubSysID0;        /* 02 - Sub System ID           */
+-        UCHAR SubSysID1;        /* 02 - Sub System ID           */
+-        UCHAR SubClass;         /* 04 - Sub Class               */
+-        UCHAR VendorID0;        /* 05 - Vendor ID               */
+-        UCHAR VendorID1;        /* 05 - Vendor ID               */
+-        UCHAR DeviceID0;        /* 07 - Device ID               */
+-        UCHAR DeviceID1;        /* 07 - Device ID               */
+-        UCHAR Reserved0[2];     /* 09 - Reserved                */
+-        UCHAR Revision;         /* 0B - Revision of data structure */
++        u8 SubVendorID0;     /* 00 - Sub Vendor ID           */
++        u8 SubVendorID1;     /* 00 - Sub Vendor ID           */
++        u8 SubSysID0;        /* 02 - Sub System ID           */
++        u8 SubSysID1;        /* 02 - Sub System ID           */
++        u8 SubClass;         /* 04 - Sub Class               */
++        u8 VendorID0;        /* 05 - Vendor ID               */
++        u8 VendorID1;        /* 05 - Vendor ID               */
++        u8 DeviceID0;        /* 07 - Device ID               */
++        u8 DeviceID1;        /* 07 - Device ID               */
++        u8 Reserved0[2];     /* 09 - Reserved                */
++        u8 revision;         /* 0B - revision of data structure */
+         /* ----Host Adapter Structure ---- */
+-        UCHAR NumOfCh;          /* 0C - Number of SCSI channel  */
+-        UCHAR BIOSConfig1;      /* 0D - BIOS configuration 1    */
+-        UCHAR BIOSConfig2;      /* 0E - BIOS boot channel&target ID */
+-        UCHAR BIOSConfig3;      /* 0F - BIOS configuration 3    */
++        u8 NumOfCh;          /* 0C - Number of SCSI channel  */
++        u8 BIOSConfig1;      /* 0D - BIOS configuration 1    */
++        u8 BIOSConfig2;      /* 0E - BIOS boot channel&target ID */
++        u8 BIOSConfig3;      /* 0F - BIOS configuration 3    */
+         /* ----SCSI channel Structure ---- */
+         /* from "CTRL-I SCSI Host Adapter SetUp menu "  */
+-        UCHAR SCSI0Id;          /* 10 - Channel 0 SCSI ID       */
+-        UCHAR SCSI0Config;      /* 11 - Channel 0 SCSI configuration */
+-        UCHAR SCSI0MaxTags;     /* 12 - Channel 0 Maximum tags  */
+-        UCHAR SCSI0ResetTime;   /* 13 - Channel 0 Reset recovering time */
+-        UCHAR ReservedforChannel0[2];   /* 14 - Reserved                */
++        u8 scsi_id;          /* 10 - Channel 0 SCSI ID       */
++        u8 SCSI0Config;      /* 11 - Channel 0 SCSI configuration */
++        u8 SCSI0MaxTags;     /* 12 - Channel 0 Maximum tags  */
++        u8 SCSI0ResetTime;   /* 13 - Channel 0 Reset recovering time */
++        u8 ReservedforChannel0[2];   /* 14 - Reserved                */
+ 
+         /* ----SCSI target Structure ----  */
+         /* from "CTRL-I SCSI device SetUp menu "                        */
+-        UCHAR Target00Config;   /* 16 - Channel 0 Target 0 config */
+-        UCHAR Target01Config;   /* 17 - Channel 0 Target 1 config */
+-        UCHAR Target02Config;   /* 18 - Channel 0 Target 2 config */
+-        UCHAR Target03Config;   /* 19 - Channel 0 Target 3 config */
+-        UCHAR Target04Config;   /* 1A - Channel 0 Target 4 config */
+-        UCHAR Target05Config;   /* 1B - Channel 0 Target 5 config */
+-        UCHAR Target06Config;   /* 1C - Channel 0 Target 6 config */
+-        UCHAR Target07Config;   /* 1D - Channel 0 Target 7 config */
+-        UCHAR Target08Config;   /* 1E - Channel 0 Target 8 config */
+-        UCHAR Target09Config;   /* 1F - Channel 0 Target 9 config */
+-        UCHAR Target0AConfig;   /* 20 - Channel 0 Target A config */
+-        UCHAR Target0BConfig;   /* 21 - Channel 0 Target B config */
+-        UCHAR Target0CConfig;   /* 22 - Channel 0 Target C config */
+-        UCHAR Target0DConfig;   /* 23 - Channel 0 Target D config */
+-        UCHAR Target0EConfig;   /* 24 - Channel 0 Target E config */
+-        UCHAR Target0FConfig;   /* 25 - Channel 0 Target F config */
+-
+-        UCHAR SCSI1Id;          /* 26 - Channel 1 SCSI ID       */
+-        UCHAR SCSI1Config;      /* 27 - Channel 1 SCSI configuration */
+-        UCHAR SCSI1MaxTags;     /* 28 - Channel 1 Maximum tags  */
+-        UCHAR SCSI1ResetTime;   /* 29 - Channel 1 Reset recovering time */
+-        UCHAR ReservedforChannel1[2];   /* 2A - Reserved                */
++        u8 Target00Config;   /* 16 - Channel 0 Target 0 config */
++        u8 Target01Config;   /* 17 - Channel 0 Target 1 config */
++        u8 Target02Config;   /* 18 - Channel 0 Target 2 config */
++        u8 Target03Config;   /* 19 - Channel 0 Target 3 config */
++        u8 Target04Config;   /* 1A - Channel 0 Target 4 config */
++        u8 Target05Config;   /* 1B - Channel 0 Target 5 config */
++        u8 Target06Config;   /* 1C - Channel 0 Target 6 config */
++        u8 Target07Config;   /* 1D - Channel 0 Target 7 config */
++        u8 Target08Config;   /* 1E - Channel 0 Target 8 config */
++        u8 Target09Config;   /* 1F - Channel 0 Target 9 config */
++        u8 Target0AConfig;   /* 20 - Channel 0 Target A config */
++        u8 Target0BConfig;   /* 21 - Channel 0 Target B config */
++        u8 Target0CConfig;   /* 22 - Channel 0 Target C config */
++        u8 Target0DConfig;   /* 23 - Channel 0 Target D config */
++        u8 Target0EConfig;   /* 24 - Channel 0 Target E config */
++        u8 Target0FConfig;   /* 25 - Channel 0 Target F config */
++
++        u8 SCSI1Id;          /* 26 - Channel 1 SCSI ID       */
++        u8 SCSI1Config;      /* 27 - Channel 1 SCSI configuration */
++        u8 SCSI1MaxTags;     /* 28 - Channel 1 Maximum tags  */
++        u8 SCSI1ResetTime;   /* 29 - Channel 1 Reset recovering time */
++        u8 ReservedforChannel1[2];   /* 2A - Reserved                */
+ 
+         /* ----SCSI target Structure ----  */
+         /* from "CTRL-I SCSI device SetUp menu "                                          */
+-        UCHAR Target10Config;   /* 2C - Channel 1 Target 0 config */
+-        UCHAR Target11Config;   /* 2D - Channel 1 Target 1 config */
+-        UCHAR Target12Config;   /* 2E - Channel 1 Target 2 config */
+-        UCHAR Target13Config;   /* 2F - Channel 1 Target 3 config */
+-        UCHAR Target14Config;   /* 30 - Channel 1 Target 4 config */
+-        UCHAR Target15Config;   /* 31 - Channel 1 Target 5 config */
+-        UCHAR Target16Config;   /* 32 - Channel 1 Target 6 config */
+-        UCHAR Target17Config;   /* 33 - Channel 1 Target 7 config */
+-        UCHAR Target18Config;   /* 34 - Channel 1 Target 8 config */
+-        UCHAR Target19Config;   /* 35 - Channel 1 Target 9 config */
+-        UCHAR Target1AConfig;   /* 36 - Channel 1 Target A config */
+-        UCHAR Target1BConfig;   /* 37 - Channel 1 Target B config */
+-        UCHAR Target1CConfig;   /* 38 - Channel 1 Target C config */
+-        UCHAR Target1DConfig;   /* 39 - Channel 1 Target D config */
+-        UCHAR Target1EConfig;   /* 3A - Channel 1 Target E config */
+-        UCHAR Target1FConfig;   /* 3B - Channel 1 Target F config */
+-        UCHAR reserved[3];      /* 3C - Reserved                */
++        u8 Target10Config;   /* 2C - Channel 1 Target 0 config */
++        u8 Target11Config;   /* 2D - Channel 1 Target 1 config */
++        u8 Target12Config;   /* 2E - Channel 1 Target 2 config */
++        u8 Target13Config;   /* 2F - Channel 1 Target 3 config */
++        u8 Target14Config;   /* 30 - Channel 1 Target 4 config */
++        u8 Target15Config;   /* 31 - Channel 1 Target 5 config */
++        u8 Target16Config;   /* 32 - Channel 1 Target 6 config */
++        u8 Target17Config;   /* 33 - Channel 1 Target 7 config */
++        u8 Target18Config;   /* 34 - Channel 1 Target 8 config */
++        u8 Target19Config;   /* 35 - Channel 1 Target 9 config */
++        u8 Target1AConfig;   /* 36 - Channel 1 Target A config */
++        u8 Target1BConfig;   /* 37 - Channel 1 Target B config */
++        u8 Target1CConfig;   /* 38 - Channel 1 Target C config */
++        u8 Target1DConfig;   /* 39 - Channel 1 Target D config */
++        u8 Target1EConfig;   /* 3A - Channel 1 Target E config */
++        u8 Target1FConfig;   /* 3B - Channel 1 Target F config */
++        u8 reserved[3];      /* 3C - Reserved                */
+         /* ---------- CheckSum ----------       */
+-        UCHAR CheckSum;         /* 3F - Checksum of NVRam       */
+-} NVRAM, *PNVRAM;
++        u8 CheckSum;         /* 3F - Checksum of NVRam       */
++};
+ 
+ /* Bios Configuration for nvram->BIOSConfig1                            */
+ #define NBC_BIOSENABLE  0x01    /* BIOS enable                    */
+@@ -407,10 +369,3 @@
+ #define NCC_RESET_TIME  0x0A    /* SCSI RESET recovering time     */
+ #define NTC_DEFAULT     (NTC_1GIGA | NTC_NO_WIDESYNC | NTC_DISC_ENABLE)
+ 
+-#define ORC_RD(x,y)             (UCHAR)(inb(  (int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-#define ORC_RDWORD(x,y)         (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-#define ORC_RDLONG(x,y)         (long)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-
+-#define ORC_WR(     adr,data)   outb( (UCHAR)(data), (int)(adr))
+-#define ORC_WRSHORT(adr,data)   outw( (UWORD)(data), (int)(adr))
+-#define ORC_WRLONG( adr,data)   outl( (ULONG)(data), (int)(adr))
+diff -Nurb linux-2.6.22-570/drivers/scsi/a4000t.c linux-2.6.22-try2/drivers/scsi/a4000t.c
+--- linux-2.6.22-570/drivers/scsi/a4000t.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/a4000t.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,143 @@
++/*
++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
++ *		Amiga Technologies A4000T SCSI controller.
++ *
++ * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
++ * plus modifications of the 53c7xx.c driver to support the Amiga.
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/amigahw.h>
++#include <asm/amigaints.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("Amiga A4000T NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++
++static struct scsi_host_template a4000t_scsi_driver_template = {
++	.name		= "A4000T builtin SCSI",
++	.proc_name	= "A4000t",
++	.this_id	= 7,
++	.module		= THIS_MODULE,
++};
++
++static struct platform_device *a4000t_scsi_device;
++
++#define A4000T_SCSI_ADDR 0xdd0040
++
++static int __devinit a4000t_probe(struct device *dev)
++{
++	struct Scsi_Host * host = NULL;
++	struct NCR_700_Host_Parameters *hostdata;
++
++	if (!(MACH_IS_AMIGA && AMIGAHW_PRESENT(A4000_SCSI)))
++		goto out;
++
++	if (!request_mem_region(A4000T_SCSI_ADDR, 0x1000,
++				"A4000T builtin SCSI"))
++		goto out;
++
++	hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++	if (hostdata == NULL) {
++		printk(KERN_ERR "a4000t-scsi: Failed to allocate host data\n");
++		goto out_release;
++	}
++	memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++	/* Fill in the required pieces of hostdata */
++	hostdata->base = (void __iomem *)ZTWO_VADDR(A4000T_SCSI_ADDR);
++	hostdata->clock = 50;
++	hostdata->chip710 = 1;
++	hostdata->dmode_extra = DMODE_FC2;
++	hostdata->dcntl_extra = EA_710;
++
++	/* and register the chip */
++	host = NCR_700_detect(&a4000t_scsi_driver_template, hostdata, dev);
++	if (!host) {
++		printk(KERN_ERR "a4000t-scsi: No host detected; "
++				"board configuration problem?\n");
++		goto out_free;
++	}
++
++	host->this_id = 7;
++	host->base = A4000T_SCSI_ADDR;
++	host->irq = IRQ_AMIGA_PORTS;
++
++	if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "a4000t-scsi",
++			host)) {
++		printk(KERN_ERR "a4000t-scsi: request_irq failed\n");
++		goto out_put_host;
++	}
++
++	scsi_scan_host(host);
++
++	return 0;
++
++ out_put_host:
++	scsi_host_put(host);
++ out_free:
++	kfree(hostdata);
++ out_release:
++	release_mem_region(A4000T_SCSI_ADDR, 0x1000);
++ out:
++	return -ENODEV;
++}
++
++static __devexit int a4000t_device_remove(struct device *dev)
++{
++	struct Scsi_Host *host = dev_to_shost(dev);
++	struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++	scsi_remove_host(host);
++
++	NCR_700_release(host);
++	kfree(hostdata);
++	free_irq(host->irq, host);
++	release_mem_region(A4000T_SCSI_ADDR, 0x1000);
++
++	return 0;
++}
++
++static struct device_driver a4000t_scsi_driver = {
++	.name	= "a4000t-scsi",
++	.bus	= &platform_bus_type,
++	.probe	= a4000t_probe,
++	.remove	= __devexit_p(a4000t_device_remove),
++};
++
++static int __init a4000t_scsi_init(void)
++{
++	int err;
++
++	err = driver_register(&a4000t_scsi_driver);
++	if (err)
++		return err;
++
++	a4000t_scsi_device = platform_device_register_simple("a4000t-scsi",
++			-1, NULL, 0);
++	if (IS_ERR(a4000t_scsi_device)) {
++		driver_unregister(&a4000t_scsi_driver);
++		return PTR_ERR(a4000t_scsi_device);
++	}
++
++	return err;
++}
++
++static void __exit a4000t_scsi_exit(void)
++{
++	platform_device_unregister(a4000t_scsi_device);
++	driver_unregister(&a4000t_scsi_driver);
++}
++
++module_init(a4000t_scsi_init);
++module_exit(a4000t_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aachba.c linux-2.6.22-try2/drivers/scsi/aacraid/aachba.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/aachba.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aacraid/aachba.c	2007-12-19 15:29:23.000000000 -0500
+@@ -169,6 +169,18 @@
+ module_param(acbsize, int, S_IRUGO|S_IWUSR);
+ MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
+ 
++int update_interval = 30 * 60;
++module_param(update_interval, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter.");
++
++int check_interval = 24 * 60 * 60;
++module_param(check_interval, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks.");
++
++int check_reset = 1;
++module_param(check_reset, int, S_IRUGO|S_IWUSR);
++MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter.");
++
+ int expose_physicals = -1;
+ module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
+ MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on");
+@@ -312,11 +324,10 @@
+ 
+ 	if (maximum_num_containers < MAXIMUM_NUM_CONTAINERS)
+ 		maximum_num_containers = MAXIMUM_NUM_CONTAINERS;
+-	fsa_dev_ptr =  kmalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers,
++	fsa_dev_ptr = kzalloc(sizeof(*fsa_dev_ptr) * maximum_num_containers,
+ 			GFP_KERNEL);
+ 	if (!fsa_dev_ptr)
+ 		return -ENOMEM;
+-	memset(fsa_dev_ptr, 0, sizeof(*fsa_dev_ptr) * maximum_num_containers);
+ 
+ 	dev->fsa_dev = fsa_dev_ptr;
+ 	dev->maximum_num_containers = maximum_num_containers;
+@@ -344,20 +355,15 @@
+ {
+ 	void *buf;
+ 	int transfer_len;
+-	struct scatterlist *sg = scsicmd->request_buffer;
++	struct scatterlist *sg = scsi_sglist(scsicmd);
+ 
+-	if (scsicmd->use_sg) {
+ 		buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 		transfer_len = min(sg->length, len + offset);
+-	} else {
+-		buf = scsicmd->request_buffer;
+-		transfer_len = min(scsicmd->request_bufflen, len + offset);
+-	}
++
+ 	transfer_len -= offset;
+ 	if (buf && transfer_len > 0)
+ 		memcpy(buf + offset, data, transfer_len);
+ 
+-	if (scsicmd->use_sg) 
+ 		kunmap_atomic(buf - sg->offset, KM_IRQ0);
+ 
+ }
+@@ -451,7 +457,7 @@
+ {
+ 	struct fsa_dev_info *fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
+ 
+-	if (fsa_dev_ptr[scmd_id(scsicmd)].valid)
++	if ((fsa_dev_ptr[scmd_id(scsicmd)].valid & 1))
+ 		return aac_scsi_cmd(scsicmd);
+ 
+ 	scsicmd->result = DID_NO_CONNECT << 16;
+@@ -459,18 +465,18 @@
+ 	return 0;
+ }
+ 
+-static int _aac_probe_container2(void * context, struct fib * fibptr)
++static void _aac_probe_container2(void * context, struct fib * fibptr)
+ {
+ 	struct fsa_dev_info *fsa_dev_ptr;
+ 	int (*callback)(struct scsi_cmnd *);
+ 	struct scsi_cmnd * scsicmd = (struct scsi_cmnd *)context;
+ 
+-	if (!aac_valid_context(scsicmd, fibptr))
+-		return 0;
+ 
+-	fsa_dev_ptr = ((struct aac_dev *)(scsicmd->device->host->hostdata))->fsa_dev;
++	if (!aac_valid_context(scsicmd, fibptr))
++		return;
+ 
+ 	scsicmd->SCp.Status = 0;
++	fsa_dev_ptr = fibptr->dev->fsa_dev;
+ 	if (fsa_dev_ptr) {
+ 		struct aac_mount * dresp = (struct aac_mount *) fib_data(fibptr);
+ 		fsa_dev_ptr += scmd_id(scsicmd);
+@@ -493,10 +499,11 @@
+ 	aac_fib_free(fibptr);
+ 	callback = (int (*)(struct scsi_cmnd *))(scsicmd->SCp.ptr);
+ 	scsicmd->SCp.ptr = NULL;
+-	return (*callback)(scsicmd);
++	(*callback)(scsicmd);
++	return;
+ }
+ 
+-static int _aac_probe_container1(void * context, struct fib * fibptr)
++static void _aac_probe_container1(void * context, struct fib * fibptr)
+ {
+ 	struct scsi_cmnd * scsicmd;
+ 	struct aac_mount * dresp;
+@@ -506,13 +513,14 @@
+ 	dresp = (struct aac_mount *) fib_data(fibptr);
+ 	dresp->mnt[0].capacityhigh = 0;
+ 	if ((le32_to_cpu(dresp->status) != ST_OK) ||
+-	    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE))
+-		return _aac_probe_container2(context, fibptr);
++	    (le32_to_cpu(dresp->mnt[0].vol) != CT_NONE)) {
++		_aac_probe_container2(context, fibptr);
++		return;
++	}
+ 	scsicmd = (struct scsi_cmnd *) context;
+-	scsicmd->SCp.phase = AAC_OWNER_MIDLEVEL;
+ 
+ 	if (!aac_valid_context(scsicmd, fibptr))
+-		return 0;
++		return;
+ 
+ 	aac_fib_init(fibptr);
+ 
+@@ -527,21 +535,18 @@
+ 			  sizeof(struct aac_query_mount),
+ 			  FsaNormal,
+ 			  0, 1,
+-			  (fib_callback) _aac_probe_container2,
++			  _aac_probe_container2,
+ 			  (void *) scsicmd);
+ 	/*
+ 	 *	Check that the command queued to the controller
+ 	 */
+-	if (status == -EINPROGRESS) {
++	if (status == -EINPROGRESS)
+ 		scsicmd->SCp.phase = AAC_OWNER_FIRMWARE;
+-		return 0;
+-	}
+-	if (status < 0) {
++	else if (status < 0) {
+ 		/* Inherit results from VM_NameServe, if any */
+ 		dresp->status = cpu_to_le32(ST_OK);
+-		return _aac_probe_container2(context, fibptr);
++		_aac_probe_container2(context, fibptr);
+ 	}
+-	return 0;
+ }
+ 
+ static int _aac_probe_container(struct scsi_cmnd * scsicmd, int (*callback)(struct scsi_cmnd *))
+@@ -566,7 +571,7 @@
+ 			  sizeof(struct aac_query_mount),
+ 			  FsaNormal,
+ 			  0, 1,
+-			  (fib_callback) _aac_probe_container1,
++			  _aac_probe_container1,
+ 			  (void *) scsicmd);
+ 		/*
+ 		 *	Check that the command queued to the controller
+@@ -620,7 +625,7 @@
+ 		return -ENOMEM;
+ 	}
+ 	scsicmd->list.next = NULL;
+-	scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))_aac_probe_container1;
++	scsicmd->scsi_done = (void (*)(struct scsi_cmnd*))aac_probe_container_callback1;
+ 
+ 	scsicmd->device = scsidev;
+ 	scsidev->sdev_state = 0;
+@@ -825,7 +830,7 @@
+ 	readcmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
+ 	readcmd->count = cpu_to_le32(count<<9);
+ 	readcmd->cid = cpu_to_le16(scmd_id(cmd));
+-	readcmd->flags = cpu_to_le16(1);
++	readcmd->flags = cpu_to_le16(IO_TYPE_READ);
+ 	readcmd->bpTotal = 0;
+ 	readcmd->bpComplete = 0;
+ 
+@@ -904,7 +909,7 @@
+ 			  (void *) cmd);
+ }
+ 
+-static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_raw_io(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ 	u16 fibsize;
+ 	struct aac_raw_io *writecmd;
+@@ -914,7 +919,9 @@
+ 	writecmd->block[1] = cpu_to_le32((u32)((lba&0xffffffff00000000LL)>>32));
+ 	writecmd->count = cpu_to_le32(count<<9);
+ 	writecmd->cid = cpu_to_le16(scmd_id(cmd));
+-	writecmd->flags = 0;
++	writecmd->flags = fua ?
++		cpu_to_le16(IO_TYPE_WRITE|IO_SUREWRITE) :
++		cpu_to_le16(IO_TYPE_WRITE);
+ 	writecmd->bpTotal = 0;
+ 	writecmd->bpComplete = 0;
+ 
+@@ -933,7 +940,7 @@
+ 			  (void *) cmd);
+ }
+ 
+-static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_block64(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ 	u16 fibsize;
+ 	struct aac_write64 *writecmd;
+@@ -964,7 +971,7 @@
+ 			  (void *) cmd);
+ }
+ 
+-static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count)
++static int aac_write_block(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua)
+ {
+ 	u16 fibsize;
+ 	struct aac_write *writecmd;
+@@ -1041,7 +1048,7 @@
+ 	struct aac_srb * srbcmd = aac_scsi_common(fib, cmd);
+ 
+ 	aac_build_sg64(cmd, (struct sgmap64*) &srbcmd->sg);
+-	srbcmd->count = cpu_to_le32(cmd->request_bufflen);
++	srbcmd->count = cpu_to_le32(scsi_bufflen(cmd));
+ 
+ 	memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ 	memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len);
+@@ -1069,7 +1076,7 @@
+ 	struct aac_srb * srbcmd = aac_scsi_common(fib, cmd);
+ 
+ 	aac_build_sg(cmd, (struct sgmap*)&srbcmd->sg);
+-	srbcmd->count = cpu_to_le32(cmd->request_bufflen);
++	srbcmd->count = cpu_to_le32(scsi_bufflen(cmd));
+ 
+ 	memset(srbcmd->cdb, 0, sizeof(srbcmd->cdb));
+ 	memcpy(srbcmd->cdb, cmd->cmnd, cmd->cmd_len);
+@@ -1172,6 +1179,7 @@
+ 	}
+ 
+ 	if (!dev->in_reset) {
++		char buffer[16];
+ 		tmp = le32_to_cpu(dev->adapter_info.kernelrev);
+ 		printk(KERN_INFO "%s%d: kernel %d.%d-%d[%d] %.*s\n",
+ 			dev->name, 
+@@ -1192,16 +1200,23 @@
+ 			dev->name, dev->id,
+ 			tmp>>24,(tmp>>16)&0xff,tmp&0xff,
+ 			le32_to_cpu(dev->adapter_info.biosbuild));
+-		if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+-			printk(KERN_INFO "%s%d: serial %x\n",
+-				dev->name, dev->id,
+-				le32_to_cpu(dev->adapter_info.serial[0]));
++		buffer[0] = '\0';
++		if (aac_show_serial_number(
++		  shost_to_class(dev->scsi_host_ptr), buffer))
++			printk(KERN_INFO "%s%d: serial %s",
++			  dev->name, dev->id, buffer);
+ 		if (dev->supplement_adapter_info.VpdInfo.Tsid[0]) {
+ 			printk(KERN_INFO "%s%d: TSID %.*s\n",
+ 			  dev->name, dev->id,
+ 			  (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
+ 			  dev->supplement_adapter_info.VpdInfo.Tsid);
+ 		}
++		if (!check_reset ||
++		  (dev->supplement_adapter_info.SupportedOptions2 &
++		  le32_to_cpu(AAC_OPTION_IGNORE_RESET))) {
++			printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
++			  dev->name, dev->id);
++		}
+ 	}
+ 
+ 	dev->nondasd_support = 0;
+@@ -1332,7 +1347,7 @@
+ 	if (!aac_valid_context(scsicmd, fibptr))
+ 		return;
+ 
+-	dev = (struct aac_dev *)scsicmd->device->host->hostdata;
++	dev = fibptr->dev;
+ 	cid = scmd_id(scsicmd);
+ 
+ 	if (nblank(dprintk(x))) {
+@@ -1372,15 +1387,8 @@
+ 
+ 	BUG_ON(fibptr == NULL);
+ 		
+-	if(scsicmd->use_sg)
+-		pci_unmap_sg(dev->pdev, 
+-			(struct scatterlist *)scsicmd->request_buffer,
+-			scsicmd->use_sg,
+-			scsicmd->sc_data_direction);
+-	else if(scsicmd->request_bufflen)
+-		pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle,
+-				 scsicmd->request_bufflen,
+-				 scsicmd->sc_data_direction);
++	scsi_dma_unmap(scsicmd);
++
+ 	readreply = (struct aac_read_reply *)fib_data(fibptr);
+ 	if (le32_to_cpu(readreply->status) == ST_OK)
+ 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+@@ -1498,6 +1506,7 @@
+ {
+ 	u64 lba;
+ 	u32 count;
++	int fua;
+ 	int status;
+ 	struct aac_dev *dev;
+ 	struct fib * cmd_fibcontext;
+@@ -1512,6 +1521,7 @@
+ 		count = scsicmd->cmnd[4];
+ 		if (count == 0)
+ 			count = 256;
++		fua = 0;
+ 	} else if (scsicmd->cmnd[0] == WRITE_16) { /* 16 byte command */
+ 		dprintk((KERN_DEBUG "aachba: received a write(16) command on id %d.\n", scmd_id(scsicmd)));
+ 
+@@ -1524,6 +1534,7 @@
+ 			(scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
+ 		count = (scsicmd->cmnd[10] << 24) | (scsicmd->cmnd[11] << 16) |
+ 			(scsicmd->cmnd[12] << 8) | scsicmd->cmnd[13];
++		fua = scsicmd->cmnd[1] & 0x8;
+ 	} else if (scsicmd->cmnd[0] == WRITE_12) { /* 12 byte command */
+ 		dprintk((KERN_DEBUG "aachba: received a write(12) command on id %d.\n", scmd_id(scsicmd)));
+ 
+@@ -1531,10 +1542,12 @@
+ 		    | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ 		count = (scsicmd->cmnd[6] << 24) | (scsicmd->cmnd[7] << 16)
+ 		      | (scsicmd->cmnd[8] << 8) | scsicmd->cmnd[9];
++		fua = scsicmd->cmnd[1] & 0x8;
+ 	} else {
+ 		dprintk((KERN_DEBUG "aachba: received a write(10) command on id %d.\n", scmd_id(scsicmd)));
+ 		lba = ((u64)scsicmd->cmnd[2] << 24) | (scsicmd->cmnd[3] << 16) | (scsicmd->cmnd[4] << 8) | scsicmd->cmnd[5];
+ 		count = (scsicmd->cmnd[7] << 8) | scsicmd->cmnd[8];
++		fua = scsicmd->cmnd[1] & 0x8;
+ 	}
+ 	dprintk((KERN_DEBUG "aac_write[cpu %d]: lba = %llu, t = %ld.\n",
+ 	  smp_processor_id(), (unsigned long long)lba, jiffies));
+@@ -1549,7 +1562,7 @@
+ 		return 0;
+ 	}
+ 
+-	status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count);
++	status = aac_adapter_write(cmd_fibcontext, scsicmd, lba, count, fua);
+ 
+ 	/*
+ 	 *	Check that the command queued to the controller
+@@ -1592,7 +1605,7 @@
+ 			COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ 	else {
+ 		struct scsi_device *sdev = cmd->device;
+-		struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
++		struct aac_dev *dev = fibptr->dev;
+ 		u32 cid = sdev_id(sdev);
+ 		printk(KERN_WARNING 
+ 		     "synchronize_callback: synchronize failed, status = %d\n",
+@@ -1699,7 +1712,7 @@
+  
+ int aac_scsi_cmd(struct scsi_cmnd * scsicmd)
+ {
+-	u32 cid = 0;
++	u32 cid;
+ 	struct Scsi_Host *host = scsicmd->device->host;
+ 	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ 	struct fsa_dev_info *fsa_dev_ptr = dev->fsa_dev;
+@@ -1711,15 +1724,15 @@
+ 	 *	Test does not apply to ID 16, the pseudo id for the controller
+ 	 *	itself.
+ 	 */
+-	if (scmd_id(scsicmd) != host->this_id) {
+-		if ((scmd_channel(scsicmd) == CONTAINER_CHANNEL)) {
+-			if((scmd_id(scsicmd) >= dev->maximum_num_containers) ||
++	cid = scmd_id(scsicmd);
++	if (cid != host->this_id) {
++		if (scmd_channel(scsicmd) == CONTAINER_CHANNEL) {
++			if((cid >= dev->maximum_num_containers) ||
+ 					(scsicmd->device->lun != 0)) {
+ 				scsicmd->result = DID_NO_CONNECT << 16;
+ 				scsicmd->scsi_done(scsicmd);
+ 				return 0;
+ 			}
+-			cid = scmd_id(scsicmd);
+ 
+ 			/*
+ 			 *	If the target container doesn't exist, it may have
+@@ -1782,7 +1795,7 @@
+ 	{
+ 		struct inquiry_data inq_data;
+ 
+-		dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", scmd_id(scsicmd)));
++		dprintk((KERN_DEBUG "INQUIRY command, ID: %d.\n", cid));
+ 		memset(&inq_data, 0, sizeof (struct inquiry_data));
+ 
+ 		inq_data.inqd_ver = 2;	/* claim compliance to SCSI-2 */
+@@ -1794,7 +1807,7 @@
+ 		 *	Set the Vendor, Product, and Revision Level
+ 		 *	see: <vendor>.c i.e. aac.c
+ 		 */
+-		if (scmd_id(scsicmd) == host->this_id) {
++		if (cid == host->this_id) {
+ 			setinqstr(dev, (void *) (inq_data.inqd_vid), ARRAY_SIZE(container_types));
+ 			inq_data.inqd_pdt = INQD_PDT_PROC;	/* Processor device */
+ 			aac_internal_transfer(scsicmd, &inq_data, 0, sizeof(inq_data));
+@@ -1886,15 +1899,29 @@
+ 
+ 	case MODE_SENSE:
+ 	{
+-		char mode_buf[4];
++		char mode_buf[7];
++		int mode_buf_length = 4;
+ 
+ 		dprintk((KERN_DEBUG "MODE SENSE command.\n"));
+ 		mode_buf[0] = 3;	/* Mode data length */
+ 		mode_buf[1] = 0;	/* Medium type - default */
+-		mode_buf[2] = 0;	/* Device-specific param, bit 8: 0/1 = write enabled/protected */
++		mode_buf[2] = 0;	/* Device-specific param,
++					   bit 8: 0/1 = write enabled/protected
++					   bit 4: 0/1 = FUA enabled */
++		if (dev->raw_io_interface)
++			mode_buf[2] = 0x10;
+ 		mode_buf[3] = 0;	/* Block descriptor length */
+-
+-		aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf));
++		if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
++		  ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
++			mode_buf[0] = 6;
++			mode_buf[4] = 8;
++			mode_buf[5] = 1;
++			mode_buf[6] = 0x04; /* WCE */
++			mode_buf_length = 7;
++			if (mode_buf_length > scsicmd->cmnd[4])
++				mode_buf_length = scsicmd->cmnd[4];
++		}
++		aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length);
+ 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ 		scsicmd->scsi_done(scsicmd);
+ 
+@@ -1902,18 +1929,33 @@
+ 	}
+ 	case MODE_SENSE_10:
+ 	{
+-		char mode_buf[8];
++		char mode_buf[11];
++		int mode_buf_length = 8;
+ 
+ 		dprintk((KERN_DEBUG "MODE SENSE 10 byte command.\n"));
+ 		mode_buf[0] = 0;	/* Mode data length (MSB) */
+ 		mode_buf[1] = 6;	/* Mode data length (LSB) */
+ 		mode_buf[2] = 0;	/* Medium type - default */
+-		mode_buf[3] = 0;	/* Device-specific param, bit 8: 0/1 = write enabled/protected */
++		mode_buf[3] = 0;	/* Device-specific param,
++					   bit 8: 0/1 = write enabled/protected
++					   bit 4: 0/1 = FUA enabled */
++		if (dev->raw_io_interface)
++			mode_buf[3] = 0x10;
+ 		mode_buf[4] = 0;	/* reserved */
+ 		mode_buf[5] = 0;	/* reserved */
+ 		mode_buf[6] = 0;	/* Block descriptor length (MSB) */
+ 		mode_buf[7] = 0;	/* Block descriptor length (LSB) */
+-		aac_internal_transfer(scsicmd, mode_buf, 0, sizeof(mode_buf));
++		if (((scsicmd->cmnd[2] & 0x3f) == 8) ||
++		  ((scsicmd->cmnd[2] & 0x3f) == 0x3f)) {
++			mode_buf[1] = 9;
++			mode_buf[8] = 8;
++			mode_buf[9] = 1;
++			mode_buf[10] = 0x04; /* WCE */
++			mode_buf_length = 11;
++			if (mode_buf_length > scsicmd->cmnd[8])
++				mode_buf_length = scsicmd->cmnd[8];
++		}
++		aac_internal_transfer(scsicmd, mode_buf, 0, mode_buf_length);
+ 
+ 		scsicmd->result = DID_OK << 16 | COMMAND_COMPLETE << 8 | SAM_STAT_GOOD;
+ 		scsicmd->scsi_done(scsicmd);
+@@ -2136,10 +2178,10 @@
+ 	if (!aac_valid_context(scsicmd, fibptr))
+ 		return;
+ 
+-	dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+-
+ 	BUG_ON(fibptr == NULL);
+ 
++	dev = fibptr->dev;
++
+ 	srbreply = (struct aac_srb_reply *) fib_data(fibptr);
+ 
+ 	scsicmd->sense_buffer[0] = '\0';  /* Initialize sense valid flag to false */
+@@ -2147,17 +2189,10 @@
+ 	 *	Calculate resid for sg 
+ 	 */
+ 	 
+-	scsicmd->resid = scsicmd->request_bufflen - 
+-		le32_to_cpu(srbreply->data_xfer_length);
++	scsi_set_resid(scsicmd, scsi_bufflen(scsicmd)
++		       - le32_to_cpu(srbreply->data_xfer_length));
+ 
+-	if(scsicmd->use_sg)
+-		pci_unmap_sg(dev->pdev, 
+-			(struct scatterlist *)scsicmd->request_buffer,
+-			scsicmd->use_sg,
+-			scsicmd->sc_data_direction);
+-	else if(scsicmd->request_bufflen)
+-		pci_unmap_single(dev->pdev, scsicmd->SCp.dma_handle, scsicmd->request_bufflen,
+-			scsicmd->sc_data_direction);
++	scsi_dma_unmap(scsicmd);
+ 
+ 	/*
+ 	 * First check the fib status
+@@ -2233,7 +2268,7 @@
+ 		break;
+ 
+ 	case SRB_STATUS_BUSY:
+-		scsicmd->result = DID_NO_CONNECT << 16 | COMMAND_COMPLETE << 8;
++		scsicmd->result = DID_BUS_BUSY << 16 | COMMAND_COMPLETE << 8;
+ 		break;
+ 
+ 	case SRB_STATUS_BUS_RESET:
+@@ -2343,34 +2378,33 @@
+ {
+ 	struct aac_dev *dev;
+ 	unsigned long byte_count = 0;
++	int nseg;
+ 
+ 	dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ 	// Get rid of old data
+ 	psg->count = 0;
+ 	psg->sg[0].addr = 0;
+ 	psg->sg[0].count = 0;  
+-	if (scsicmd->use_sg) {
++
++	nseg = scsi_dma_map(scsicmd);
++	BUG_ON(nseg < 0);
++	if (nseg) {
+ 		struct scatterlist *sg;
+ 		int i;
+-		int sg_count;
+-		sg = (struct scatterlist *) scsicmd->request_buffer;
+ 
+-		sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+-			scsicmd->sc_data_direction);
+-		psg->count = cpu_to_le32(sg_count);
++		psg->count = cpu_to_le32(nseg);
+ 
+-		for (i = 0; i < sg_count; i++) {
++		scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ 			psg->sg[i].addr = cpu_to_le32(sg_dma_address(sg));
+ 			psg->sg[i].count = cpu_to_le32(sg_dma_len(sg));
+ 			byte_count += sg_dma_len(sg);
+-			sg++;
+ 		}
+ 		/* hba wants the size to be exact */
+-		if(byte_count > scsicmd->request_bufflen){
++		if (byte_count > scsi_bufflen(scsicmd)) {
+ 			u32 temp = le32_to_cpu(psg->sg[i-1].count) - 
+-				(byte_count - scsicmd->request_bufflen);
++				(byte_count - scsi_bufflen(scsicmd));
+ 			psg->sg[i-1].count = cpu_to_le32(temp);
+-			byte_count = scsicmd->request_bufflen;
++			byte_count = scsi_bufflen(scsicmd);
+ 		}
+ 		/* Check for command underflow */
+ 		if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2378,18 +2412,6 @@
+ 					byte_count, scsicmd->underflow);
+ 		}
+ 	}
+-	else if(scsicmd->request_bufflen) {
+-		u32 addr;
+-		scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+-				scsicmd->request_buffer,
+-				scsicmd->request_bufflen,
+-				scsicmd->sc_data_direction);
+-		addr = scsicmd->SCp.dma_handle;
+-		psg->count = cpu_to_le32(1);
+-		psg->sg[0].addr = cpu_to_le32(addr);
+-		psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);  
+-		byte_count = scsicmd->request_bufflen;
+-	}
+ 	return byte_count;
+ }
+ 
+@@ -2399,6 +2421,7 @@
+ 	struct aac_dev *dev;
+ 	unsigned long byte_count = 0;
+ 	u64 addr;
++	int nseg;
+ 
+ 	dev = (struct aac_dev *)scsicmd->device->host->hostdata;
+ 	// Get rid of old data
+@@ -2406,31 +2429,28 @@
+ 	psg->sg[0].addr[0] = 0;
+ 	psg->sg[0].addr[1] = 0;
+ 	psg->sg[0].count = 0;
+-	if (scsicmd->use_sg) {
++
++	nseg = scsi_dma_map(scsicmd);
++	BUG_ON(nseg < 0);
++	if (nseg) {
+ 		struct scatterlist *sg;
+ 		int i;
+-		int sg_count;
+-		sg = (struct scatterlist *) scsicmd->request_buffer;
+-
+-		sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+-			scsicmd->sc_data_direction);
+ 
+-		for (i = 0; i < sg_count; i++) {
++		scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ 			int count = sg_dma_len(sg);
+ 			addr = sg_dma_address(sg);
+ 			psg->sg[i].addr[0] = cpu_to_le32(addr & 0xffffffff);
+ 			psg->sg[i].addr[1] = cpu_to_le32(addr>>32);
+ 			psg->sg[i].count = cpu_to_le32(count);
+ 			byte_count += count;
+-			sg++;
+ 		}
+-		psg->count = cpu_to_le32(sg_count);
++		psg->count = cpu_to_le32(nseg);
+ 		/* hba wants the size to be exact */
+-		if(byte_count > scsicmd->request_bufflen){
++		if (byte_count > scsi_bufflen(scsicmd)) {
+ 			u32 temp = le32_to_cpu(psg->sg[i-1].count) - 
+-				(byte_count - scsicmd->request_bufflen);
++				(byte_count - scsi_bufflen(scsicmd));
+ 			psg->sg[i-1].count = cpu_to_le32(temp);
+-			byte_count = scsicmd->request_bufflen;
++			byte_count = scsi_bufflen(scsicmd);
+ 		}
+ 		/* Check for command underflow */
+ 		if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2438,26 +2458,13 @@
+ 					byte_count, scsicmd->underflow);
+ 		}
+ 	}
+-	else if(scsicmd->request_bufflen) {
+-		scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+-				scsicmd->request_buffer,
+-				scsicmd->request_bufflen,
+-				scsicmd->sc_data_direction);
+-		addr = scsicmd->SCp.dma_handle;
+-		psg->count = cpu_to_le32(1);
+-		psg->sg[0].addr[0] = cpu_to_le32(addr & 0xffffffff);
+-		psg->sg[0].addr[1] = cpu_to_le32(addr >> 32);
+-		psg->sg[0].count = cpu_to_le32(scsicmd->request_bufflen);  
+-		byte_count = scsicmd->request_bufflen;
+-	}
+ 	return byte_count;
+ }
+ 
+ static unsigned long aac_build_sgraw(struct scsi_cmnd* scsicmd, struct sgmapraw* psg)
+ {
+-	struct Scsi_Host *host = scsicmd->device->host;
+-	struct aac_dev *dev = (struct aac_dev *)host->hostdata;
+ 	unsigned long byte_count = 0;
++	int nseg;
+ 
+ 	// Get rid of old data
+ 	psg->count = 0;
+@@ -2467,16 +2474,14 @@
+ 	psg->sg[0].addr[1] = 0;
+ 	psg->sg[0].count = 0;
+ 	psg->sg[0].flags = 0;
+-	if (scsicmd->use_sg) {
++
++	nseg = scsi_dma_map(scsicmd);
++	BUG_ON(nseg < 0);
++	if (nseg) {
+ 		struct scatterlist *sg;
+ 		int i;
+-		int sg_count;
+-		sg = (struct scatterlist *) scsicmd->request_buffer;
+ 
+-		sg_count = pci_map_sg(dev->pdev, sg, scsicmd->use_sg,
+-			scsicmd->sc_data_direction);
+-
+-		for (i = 0; i < sg_count; i++) {
++		scsi_for_each_sg(scsicmd, sg, nseg, i) {
+ 			int count = sg_dma_len(sg);
+ 			u64 addr = sg_dma_address(sg);
+ 			psg->sg[i].next = 0;
+@@ -2486,15 +2491,14 @@
+ 			psg->sg[i].count = cpu_to_le32(count);
+ 			psg->sg[i].flags = 0;
+ 			byte_count += count;
+-			sg++;
+ 		}
+-		psg->count = cpu_to_le32(sg_count);
++		psg->count = cpu_to_le32(nseg);
+ 		/* hba wants the size to be exact */
+-		if(byte_count > scsicmd->request_bufflen){
++		if (byte_count > scsi_bufflen(scsicmd)) {
+ 			u32 temp = le32_to_cpu(psg->sg[i-1].count) - 
+-				(byte_count - scsicmd->request_bufflen);
++				(byte_count - scsi_bufflen(scsicmd));
+ 			psg->sg[i-1].count = cpu_to_le32(temp);
+-			byte_count = scsicmd->request_bufflen;
++			byte_count = scsi_bufflen(scsicmd);
+ 		}
+ 		/* Check for command underflow */
+ 		if(scsicmd->underflow && (byte_count < scsicmd->underflow)){
+@@ -2502,24 +2506,6 @@
+ 					byte_count, scsicmd->underflow);
+ 		}
+ 	}
+-	else if(scsicmd->request_bufflen) {
+-		int count;
+-		u64 addr;
+-		scsicmd->SCp.dma_handle = pci_map_single(dev->pdev,
+-				scsicmd->request_buffer,
+-				scsicmd->request_bufflen,
+-				scsicmd->sc_data_direction);
+-		addr = scsicmd->SCp.dma_handle;
+-		count = scsicmd->request_bufflen;
+-		psg->count = cpu_to_le32(1);
+-		psg->sg[0].next = 0;
+-		psg->sg[0].prev = 0;
+-		psg->sg[0].addr[1] = cpu_to_le32((u32)(addr>>32));
+-		psg->sg[0].addr[0] = cpu_to_le32((u32)(addr & 0xffffffff));
+-		psg->sg[0].count = cpu_to_le32(count);
+-		psg->sg[0].flags = 0;
+-		byte_count = scsicmd->request_bufflen;
+-	}
+ 	return byte_count;
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h linux-2.6.22-try2/drivers/scsi/aacraid/aacraid.h
+--- linux-2.6.22-570/drivers/scsi/aacraid/aacraid.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aacraid/aacraid.h	2007-12-19 15:29:23.000000000 -0500
+@@ -12,8 +12,8 @@
+  *----------------------------------------------------------------------------*/
+ 
+ #ifndef AAC_DRIVER_BUILD
+-# define AAC_DRIVER_BUILD 2437
+-# define AAC_DRIVER_BRANCH "-mh4"
++# define AAC_DRIVER_BUILD 2447
++# define AAC_DRIVER_BRANCH "-ms"
+ #endif
+ #define MAXIMUM_NUM_CONTAINERS	32
+ 
+@@ -464,12 +464,12 @@
+ 	int  (*adapter_restart)(struct aac_dev *dev, int bled);
+ 	/* Transport operations */
+ 	int  (*adapter_ioremap)(struct aac_dev * dev, u32 size);
+-	irqreturn_t (*adapter_intr)(int irq, void *dev_id);
++	irq_handler_t adapter_intr;
+ 	/* Packet operations */
+ 	int  (*adapter_deliver)(struct fib * fib);
+ 	int  (*adapter_bounds)(struct aac_dev * dev, struct scsi_cmnd * cmd, u64 lba);
+ 	int  (*adapter_read)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count);
+-	int  (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count);
++	int  (*adapter_write)(struct fib * fib, struct scsi_cmnd * cmd, u64 lba, u32 count, int fua);
+ 	int  (*adapter_scsi)(struct fib * fib, struct scsi_cmnd * cmd);
+ 	/* Administrative operations */
+ 	int  (*adapter_comm)(struct aac_dev * dev, int comm);
+@@ -860,10 +860,12 @@
+ 	__le32	FlashFirmwareBootBuild;
+ 	u8	MfgPcbaSerialNo[12];
+ 	u8	MfgWWNName[8];
+-	__le32	MoreFeatureBits;
++	__le32	SupportedOptions2;
+ 	__le32	ReservedGrowth[1];
+ };
+ #define AAC_FEATURE_FALCON	0x00000010
++#define AAC_OPTION_MU_RESET	0x00000001
++#define AAC_OPTION_IGNORE_RESET	0x00000002
+ #define AAC_SIS_VERSION_V3	3
+ #define AAC_SIS_SLOT_UNKNOWN	0xFF
+ 
+@@ -1054,8 +1056,8 @@
+ #define aac_adapter_read(fib,cmd,lba,count) \
+ 	((fib)->dev)->a_ops.adapter_read(fib,cmd,lba,count)
+ 
+-#define aac_adapter_write(fib,cmd,lba,count) \
+-	((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count)
++#define aac_adapter_write(fib,cmd,lba,count,fua) \
++	((fib)->dev)->a_ops.adapter_write(fib,cmd,lba,count,fua)
+ 
+ #define aac_adapter_scsi(fib,cmd) \
+ 	((fib)->dev)->a_ops.adapter_scsi(fib,cmd)
+@@ -1213,6 +1215,9 @@
+ 	__le32 		block;
+ 	__le16		pad;
+ 	__le16		flags;
++#define	IO_TYPE_WRITE 0x00000000
++#define	IO_TYPE_READ  0x00000001
++#define	IO_SUREWRITE  0x00000008
+ 	struct sgmap64	sg;	// Must be last in struct because it is variable
+ };
+ struct aac_write_reply
+@@ -1257,6 +1262,19 @@
+ 	u8		data[16];
+ };
+ 
++#define CT_PAUSE_IO    65
++#define CT_RELEASE_IO  66
++struct aac_pause {
++	__le32		command;	/* VM_ContainerConfig */
++	__le32		type;		/* CT_PAUSE_IO */
++	__le32		timeout;	/* 10ms ticks */
++	__le32		min;
++	__le32		noRescan;
++	__le32		parm3;
++	__le32		parm4;
++	__le32		count;	/* sizeof(((struct aac_pause_reply *)NULL)->data) */
++};
++
+ struct aac_srb
+ {
+ 	__le32		function;
+@@ -1804,6 +1822,10 @@
+ int aac_get_containers(struct aac_dev *dev);
+ int aac_scsi_cmd(struct scsi_cmnd *cmd);
+ int aac_dev_ioctl(struct aac_dev *dev, int cmd, void __user *arg);
++#ifndef shost_to_class
++#define shost_to_class(shost) &shost->shost_classdev
++#endif
++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf);
+ int aac_do_ioctl(struct aac_dev * dev, int cmd, void __user *arg);
+ int aac_rx_init(struct aac_dev *dev);
+ int aac_rkt_init(struct aac_dev *dev);
+@@ -1813,6 +1835,7 @@
+ unsigned int aac_response_normal(struct aac_queue * q);
+ unsigned int aac_command_normal(struct aac_queue * q);
+ unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
++int aac_reset_adapter(struct aac_dev * dev, int forced);
+ int aac_check_health(struct aac_dev * dev);
+ int aac_command_thread(void *data);
+ int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
+@@ -1832,3 +1855,6 @@
+ extern int expose_physicals;
+ extern int aac_reset_devices;
+ extern int aac_commit;
++extern int update_interval;
++extern int check_interval;
++extern int check_reset;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/commsup.c linux-2.6.22-try2/drivers/scsi/aacraid/commsup.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/commsup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aacraid/commsup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1021,7 +1021,7 @@
+ 
+ }
+ 
+-static int _aac_reset_adapter(struct aac_dev *aac)
++static int _aac_reset_adapter(struct aac_dev *aac, int forced)
+ {
+ 	int index, quirks;
+ 	int retval;
+@@ -1029,25 +1029,32 @@
+ 	struct scsi_device *dev;
+ 	struct scsi_cmnd *command;
+ 	struct scsi_cmnd *command_list;
++	int jafo = 0;
+ 
+ 	/*
+ 	 * Assumptions:
+-	 *	- host is locked.
++	 *	- host is locked, unless called by the aacraid thread.
++	 *	  (a matter of convenience, due to legacy issues surrounding
++	 *	  eh_host_adapter_reset).
+ 	 *	- in_reset is asserted, so no new i/o is getting to the
+ 	 *	  card.
+-	 *	- The card is dead.
++	 *	- The card is dead, or will be very shortly ;-/ so no new
++	 *	  commands are completing in the interrupt service.
+ 	 */
+ 	host = aac->scsi_host_ptr;
+ 	scsi_block_requests(host);
+ 	aac_adapter_disable_int(aac);
++	if (aac->thread->pid != current->pid) {
+ 	spin_unlock_irq(host->host_lock);
+ 	kthread_stop(aac->thread);
++		jafo = 1;
++	}
+ 
+ 	/*
+ 	 *	If a positive health, means in a known DEAD PANIC
+ 	 * state and the adapter could be reset to `try again'.
+ 	 */
+-	retval = aac_adapter_restart(aac, aac_adapter_check_health(aac));
++	retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac));
+ 
+ 	if (retval)
+ 		goto out;
+@@ -1104,11 +1111,13 @@
+ 	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
+ 		if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
+ 			goto out;
++	if (jafo) {
+ 	aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+ 	if (IS_ERR(aac->thread)) {
+ 		retval = PTR_ERR(aac->thread);
+ 		goto out;
+ 	}
++	}
+ 	(void)aac_get_adapter_info(aac);
+ 	quirks = aac_get_driver_ident(index)->quirks;
+ 	if ((quirks & AAC_QUIRK_34SG) && (host->sg_tablesize > 34)) {
+@@ -1150,7 +1159,98 @@
+ out:
+ 	aac->in_reset = 0;
+ 	scsi_unblock_requests(host);
++	if (jafo) {
+ 	spin_lock_irq(host->host_lock);
++	}
++	return retval;
++}
++
++int aac_reset_adapter(struct aac_dev * aac, int forced)
++{
++	unsigned long flagv = 0;
++	int retval;
++	struct Scsi_Host * host;
++
++	if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
++		return -EBUSY;
++
++	if (aac->in_reset) {
++		spin_unlock_irqrestore(&aac->fib_lock, flagv);
++		return -EBUSY;
++	}
++	aac->in_reset = 1;
++	spin_unlock_irqrestore(&aac->fib_lock, flagv);
++
++	/*
++	 * Wait for all commands to complete to this specific
++	 * target (block maximum 60 seconds). Although not necessary,
++	 * it does make us a good storage citizen.
++	 */
++	host = aac->scsi_host_ptr;
++	scsi_block_requests(host);
++	if (forced < 2) for (retval = 60; retval; --retval) {
++		struct scsi_device * dev;
++		struct scsi_cmnd * command;
++		int active = 0;
++
++		__shost_for_each_device(dev, host) {
++			spin_lock_irqsave(&dev->list_lock, flagv);
++			list_for_each_entry(command, &dev->cmd_list, list) {
++				if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
++					active++;
++					break;
++				}
++			}
++			spin_unlock_irqrestore(&dev->list_lock, flagv);
++			if (active)
++				break;
++
++		}
++		/*
++		 * We can exit If all the commands are complete
++		 */
++		if (active == 0)
++			break;
++		ssleep(1);
++	}
++
++	/* Quiesce build, flush cache, write through mode */
++	aac_send_shutdown(aac);
++	spin_lock_irqsave(host->host_lock, flagv);
++	retval = _aac_reset_adapter(aac, forced);
++	spin_unlock_irqrestore(host->host_lock, flagv);
++
++	if (retval == -ENODEV) {
++		/* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */
++		struct fib * fibctx = aac_fib_alloc(aac);
++		if (fibctx) {
++			struct aac_pause *cmd;
++			int status;
++
++			aac_fib_init(fibctx);
++
++			cmd = (struct aac_pause *) fib_data(fibctx);
++
++			cmd->command = cpu_to_le32(VM_ContainerConfig);
++			cmd->type = cpu_to_le32(CT_PAUSE_IO);
++			cmd->timeout = cpu_to_le32(1);
++			cmd->min = cpu_to_le32(1);
++			cmd->noRescan = cpu_to_le32(1);
++			cmd->count = cpu_to_le32(0);
++
++			status = aac_fib_send(ContainerCommand,
++			  fibctx,
++			  sizeof(struct aac_pause),
++			  FsaNormal,
++			  -2 /* Timeout silently */, 1,
++			  NULL, NULL);
++
++			if (status >= 0)
++				aac_fib_complete(fibctx);
++			aac_fib_free(fibctx);
++		}
++	}
++
+ 	return retval;
+ }
+ 
+@@ -1270,9 +1370,14 @@
+ 
+ 	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
+ 
++	if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 &
++	  le32_to_cpu(AAC_OPTION_IGNORE_RESET)))
++		goto out;
+ 	host = aac->scsi_host_ptr;
++	if (aac->thread->pid != current->pid)
+ 	spin_lock_irqsave(host->host_lock, flagv);
+-	BlinkLED = _aac_reset_adapter(aac);
++	BlinkLED = _aac_reset_adapter(aac, 0);
++	if (aac->thread->pid != current->pid)
+ 	spin_unlock_irqrestore(host->host_lock, flagv);
+ 	return BlinkLED;
+ 
+@@ -1300,6 +1405,9 @@
+ 	struct aac_fib_context *fibctx;
+ 	unsigned long flags;
+ 	DECLARE_WAITQUEUE(wait, current);
++	unsigned long next_jiffies = jiffies + HZ;
++	unsigned long next_check_jiffies = next_jiffies;
++	long difference = HZ;
+ 
+ 	/*
+ 	 *	We can only have one thread per adapter for AIF's.
+@@ -1507,11 +1615,79 @@
+ 		 *	There are no more AIF's
+ 		 */
+ 		spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags);
+-		schedule();
+ 
+-		if (kthread_should_stop())
++		/*
++		 *	Background activity
++		 */
++		if ((time_before(next_check_jiffies,next_jiffies))
++		 && ((difference = next_check_jiffies - jiffies) <= 0)) {
++			next_check_jiffies = next_jiffies;
++			if (aac_check_health(dev) == 0) {
++				difference = ((long)(unsigned)check_interval)
++					   * HZ;
++				next_check_jiffies = jiffies + difference;
++			} else if (!dev->queues)
+ 			break;
++		}
++		if (!time_before(next_check_jiffies,next_jiffies)
++		 && ((difference = next_jiffies - jiffies) <= 0)) {
++			struct timeval now;
++			int ret;
++
++			/* Don't even try to talk to adapter if its sick */
++			ret = aac_check_health(dev);
++			if (!ret && !dev->queues)
++				break;
++			next_check_jiffies = jiffies
++					   + ((long)(unsigned)check_interval)
++					   * HZ;
++			do_gettimeofday(&now);
++
++			/* Synchronize our watches */
++			if (((1000000 - (1000000 / HZ)) > now.tv_usec)
++			 && (now.tv_usec > (1000000 / HZ)))
++				difference = (((1000000 - now.tv_usec) * HZ)
++				  + 500000) / 1000000;
++			else if (ret == 0) {
++				struct fib *fibptr;
++
++				if ((fibptr = aac_fib_alloc(dev))) {
++					u32 * info;
++
++					aac_fib_init(fibptr);
++
++					info = (u32 *) fib_data(fibptr);
++					if (now.tv_usec > 500000)
++						++now.tv_sec;
++
++					*info = cpu_to_le32(now.tv_sec);
++
++					(void)aac_fib_send(SendHostTime,
++						fibptr,
++						sizeof(*info),
++						FsaNormal,
++						1, 1,
++						NULL,
++						NULL);
++					aac_fib_complete(fibptr);
++					aac_fib_free(fibptr);
++				}
++				difference = (long)(unsigned)update_interval*HZ;
++			} else {
++				/* retry shortly */
++				difference = 10 * HZ;
++			}
++			next_jiffies = jiffies + difference;
++			if (time_before(next_check_jiffies,next_jiffies))
++				difference = next_check_jiffies - jiffies;
++		}
++		if (difference <= 0)
++			difference = 1;
+ 		set_current_state(TASK_INTERRUPTIBLE);
++		schedule_timeout(difference);
++
++		if (kthread_should_stop())
++			break;
+ 	}
+ 	if (dev->queues)
+ 		remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/linit.c linux-2.6.22-try2/drivers/scsi/aacraid/linit.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/linit.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/aacraid/linit.c	2007-12-19 15:29:23.000000000 -0500
+@@ -39,10 +39,8 @@
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+-#include <linux/dma-mapping.h>
+ #include <linux/syscalls.h>
+ #include <linux/delay.h>
+-#include <linux/smp_lock.h>
+ #include <linux/kthread.h>
+ #include <asm/semaphore.h>
+ 
+@@ -223,12 +221,12 @@
+ 	{ aac_rx_init, "percraid", "DELL    ", "PERC 320/DC     ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Perc 320/DC*/
+ 	{ aac_sa_init, "aacraid",  "ADAPTEC ", "Adaptec 5400S   ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
+ 	{ aac_sa_init, "aacraid",  "ADAPTEC ", "AAC-364         ", 4, AAC_QUIRK_34SG }, /* Adaptec 5400S (Mustang)*/
+-	{ aac_sa_init, "percraid", "DELL    ", "PERCRAID        ", 4, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell PERC2/QC */
++	{ aac_sa_init, "percraid", "DELL    ", "PERCRAID        ", 4, AAC_QUIRK_34SG }, /* Dell PERC2/QC */
+ 	{ aac_sa_init, "hpnraid",  "HP      ", "NetRAID         ", 4, AAC_QUIRK_34SG }, /* HP NetRAID-4M */
+ 
+ 	{ aac_rx_init, "aacraid",  "DELL    ", "RAID            ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Dell Catchall */
+ 	{ aac_rx_init, "aacraid",  "Legend  ", "RAID            ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Legend Catchall */
+-	{ aac_rx_init, "aacraid",  "ADAPTEC ", "RAID            ", 2, AAC_QUIRK_31BIT | AAC_QUIRK_34SG }, /* Adaptec Catch All */
++	{ aac_rx_init, "aacraid",  "ADAPTEC ", "RAID            ", 2 }, /* Adaptec Catch All */
+ 	{ aac_rkt_init, "aacraid", "ADAPTEC ", "RAID            ", 2 }, /* Adaptec Rocket Catch All */
+ 	{ aac_nark_init, "aacraid", "ADAPTEC ", "RAID            ", 2 } /* Adaptec NEMER/ARK Catch All */
+ };
+@@ -403,10 +401,6 @@
+ 
+ static int aac_slave_configure(struct scsi_device *sdev)
+ {
+-	if (sdev_channel(sdev) == CONTAINER_CHANNEL) {
+-		sdev->skip_ms_page_8 = 1;
+-		sdev->skip_ms_page_3f = 1;
+-	}
+ 	if ((sdev->type == TYPE_DISK) &&
+ 			(sdev_channel(sdev) != CONTAINER_CHANNEL)) {
+ 		if (expose_physicals == 0)
+@@ -450,6 +444,43 @@
+ 	return 0;
+ }
+ 
++/**
++ *	aac_change_queue_depth		-	alter queue depths
++ *	@sdev:	SCSI device we are considering
++ *	@depth:	desired queue depth
++ *
++ *	Alters queue depths for target device based on the host adapter's
++ *	total capacity and the queue depth supported by the target device.
++ */
++
++static int aac_change_queue_depth(struct scsi_device *sdev, int depth)
++{
++	if (sdev->tagged_supported && (sdev->type == TYPE_DISK) &&
++	    (sdev_channel(sdev) == CONTAINER_CHANNEL)) {
++		struct scsi_device * dev;
++		struct Scsi_Host *host = sdev->host;
++		unsigned num = 0;
++
++		__shost_for_each_device(dev, host) {
++			if (dev->tagged_supported && (dev->type == TYPE_DISK) &&
++			    (sdev_channel(dev) == CONTAINER_CHANNEL))
++				++num;
++			++num;
++		}
++		if (num >= host->can_queue)
++			num = host->can_queue - 1;
++		if (depth > (host->can_queue - num))
++			depth = host->can_queue - num;
++		if (depth > 256)
++			depth = 256;
++		else if (depth < 2)
++			depth = 2;
++		scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG, depth);
++	} else
++		scsi_adjust_queue_depth(sdev, 0, 1);
++	return sdev->queue_depth;
++}
++
+ static int aac_ioctl(struct scsi_device *sdev, int cmd, void __user * arg)
+ {
+ 	struct aac_dev *dev = (struct aac_dev *)sdev->host->hostdata;
+@@ -548,6 +579,14 @@
+ 		ssleep(1);
+ 	}
+ 	printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
++	/*
++	 * This adapter needs a blind reset, only do so for Adapters that
++	 * support a register, instead of a commanded, reset.
++	 */
++	if ((aac->supplement_adapter_info.SupportedOptions2 &
++	  le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) ==
++	  le32_to_cpu(AAC_OPTION_MU_RESET))
++		aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */
+ 	return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
+ }
+ 
+@@ -735,15 +774,21 @@
+ 	return len;
+ }
+ 
+-static ssize_t aac_show_serial_number(struct class_device *class_dev,
+-		char *buf)
++ssize_t aac_show_serial_number(struct class_device *class_dev, char *buf)
+ {
+ 	struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
+ 	int len = 0;
+ 
+ 	if (le32_to_cpu(dev->adapter_info.serial[0]) != 0xBAD0)
+-		len = snprintf(buf, PAGE_SIZE, "%x\n",
++		len = snprintf(buf, PAGE_SIZE, "%06X\n",
+ 		  le32_to_cpu(dev->adapter_info.serial[0]));
++	if (len &&
++	  !memcmp(&dev->supplement_adapter_info.MfgPcbaSerialNo[
++	    sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo)+2-len],
++	  buf, len))
++		len = snprintf(buf, PAGE_SIZE, "%.*s\n",
++		  (int)sizeof(dev->supplement_adapter_info.MfgPcbaSerialNo),
++		  dev->supplement_adapter_info.MfgPcbaSerialNo);
+ 	return len;
+ }
+ 
+@@ -759,6 +804,31 @@
+ 	  class_to_shost(class_dev)->max_id);
+ }
+ 
++static ssize_t aac_store_reset_adapter(struct class_device *class_dev,
++		const char *buf, size_t count)
++{
++	int retval = -EACCES;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return retval;
++	retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!');
++	if (retval >= 0)
++		retval = count;
++	return retval;
++}
++
++static ssize_t aac_show_reset_adapter(struct class_device *class_dev,
++		char *buf)
++{
++	struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
++	int len, tmp;
++
++	tmp = aac_adapter_check_health(dev);
++	if ((tmp == 0) && dev->in_reset)
++		tmp = -EBUSY;
++	len = snprintf(buf, PAGE_SIZE, "0x%x", tmp);
++	return len;
++}
+ 
+ static struct class_device_attribute aac_model = {
+ 	.attr = {
+@@ -816,6 +886,14 @@
+ 	},
+ 	.show = aac_show_max_id,
+ };
++static struct class_device_attribute aac_reset = {
++	.attr = {
++		.name = "reset_host",
++		.mode = S_IWUSR|S_IRUGO,
++	},
++	.store = aac_store_reset_adapter,
++	.show = aac_show_reset_adapter,
++};
+ 
+ static struct class_device_attribute *aac_attrs[] = {
+ 	&aac_model,
+@@ -826,6 +904,7 @@
+ 	&aac_serial_number,
+ 	&aac_max_channel,
+ 	&aac_max_id,
++	&aac_reset,
+ 	NULL
+ };
+ 
+@@ -852,6 +931,7 @@
+ 	.bios_param     		= aac_biosparm,	
+ 	.shost_attrs			= aac_attrs,
+ 	.slave_configure		= aac_slave_configure,
++	.change_queue_depth		= aac_change_queue_depth,
+ 	.eh_abort_handler		= aac_eh_abort,
+ 	.eh_host_reset_handler		= aac_eh_reset,
+ 	.can_queue      		= AAC_NUM_IO_FIB,	
+@@ -1090,7 +1170,7 @@
+ {
+ 	int error;
+ 	
+-	printk(KERN_INFO "Adaptec %s driver (%s)\n",
++	printk(KERN_INFO "Adaptec %s driver %s\n",
+ 	  AAC_DRIVERNAME, aac_driver_version);
+ 
+ 	error = pci_register_driver(&aac_pci_driver);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aacraid/rx.c linux-2.6.22-try2/drivers/scsi/aacraid/rx.c
+--- linux-2.6.22-570/drivers/scsi/aacraid/rx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aacraid/rx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -464,6 +464,8 @@
+ {
+ 	u32 var;
+ 
++	if (!(dev->supplement_adapter_info.SupportedOptions2 &
++	  le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) {
+ 	if (bled)
+ 		printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
+ 			dev->name, dev->id, bled);
+@@ -479,6 +481,7 @@
+ 
+ 	if (bled && (bled != -ETIMEDOUT))
+ 		return -EINVAL;
++	}
+ 	if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */
+ 		rx_writel(dev, MUnit.reserved2, 3);
+ 		msleep(5000); /* Delay 5 seconds */
+@@ -596,7 +599,7 @@
+ 		}
+ 		msleep(1);
+ 	}
+-	if (restart)
++	if (restart && aac_commit)
+ 		aac_commit = 1;
+ 	/*
+ 	 *	Fill in the common function dispatch table.
+diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.c linux-2.6.22-try2/drivers/scsi/advansys.c
+--- linux-2.6.22-570/drivers/scsi/advansys.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/advansys.c	2007-12-19 15:29:23.000000000 -0500
+@@ -798,7 +798,6 @@
+ #include <scsi/scsi_tcq.h>
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_host.h>
+-#include "advansys.h"
+ #ifdef CONFIG_PCI
+ #include <linux/pci.h>
+ #endif /* CONFIG_PCI */
+@@ -2014,7 +2013,7 @@
+ STATIC void      AscEnableIsaDma(uchar);
+ #endif /* CONFIG_ISA */
+ STATIC ASC_DCNT  AscGetMaxDmaCount(ushort);
+-
++static const char *advansys_info(struct Scsi_Host *shp);
+ 
+ /*
+  * --- Adv Library Constants and Macros
+@@ -3970,10 +3969,6 @@
+     ASC_IS_PCI,
+ };
+ 
+-/*
+- * Used with the LILO 'advansys' option to eliminate or
+- * limit I/O port probing at boot time, cf. advansys_setup().
+- */
+ STATIC int asc_iopflag = ASC_FALSE;
+ STATIC int asc_ioport[ASC_NUM_IOPORT_PROBE] = { 0, 0, 0, 0 };
+ 
+@@ -4055,10 +4050,6 @@
+ #endif /* ADVANSYS_DEBUG */
+ 
+ 
+-/*
+- * --- Linux 'struct scsi_host_template' and advansys_setup() Functions
+- */
+-
+ #ifdef CONFIG_PROC_FS
+ /*
+  * advansys_proc_info() - /proc/scsi/advansys/[0-(ASC_NUM_BOARD_SUPPORTED-1)]
+@@ -4080,7 +4071,7 @@
+  * if 'prtbuf' is too small it will not be overwritten. Instead the
+  * user just won't get all the available statistics.
+  */
+-int
++static int
+ advansys_proc_info(struct Scsi_Host *shost, char *buffer, char **start,
+ 		off_t offset, int length, int inout)
+ {
+@@ -4296,7 +4287,7 @@
+  * it must not call SCSI mid-level functions including scsi_malloc()
+  * and scsi_free().
+  */
+-int __init
++static int __init
+ advansys_detect(struct scsi_host_template *tpnt)
+ {
+     static int          detect_called = ASC_FALSE;
+@@ -5428,7 +5419,7 @@
+  *
+  * Release resources allocated for a single AdvanSys adapter.
+  */
+-int
++static int
+ advansys_release(struct Scsi_Host *shp)
+ {
+     asc_board_t    *boardp;
+@@ -5475,7 +5466,7 @@
+  * Note: The information line should not exceed ASC_INFO_SIZE bytes,
+  * otherwise the static 'info' array will be overrun.
+  */
+-const char *
++static const char *
+ advansys_info(struct Scsi_Host *shp)
+ {
+     static char     info[ASC_INFO_SIZE];
+@@ -5568,7 +5559,7 @@
+  * This function always returns 0. Command return status is saved
+  * in the 'scp' result field.
+  */
+-int
++static int
+ advansys_queuecommand(struct scsi_cmnd *scp, void (*done)(struct scsi_cmnd *))
+ {
+     struct Scsi_Host    *shp;
+@@ -5656,7 +5647,7 @@
+  * sleeping is allowed and no locking other than for host structures is
+  * required. Returns SUCCESS or FAILED.
+  */
+-int
++static int
+ advansys_reset(struct scsi_cmnd *scp)
+ {
+     struct Scsi_Host     *shp;
+@@ -5841,7 +5832,7 @@
+  * ip[1]: sectors
+  * ip[2]: cylinders
+  */
+-int
++static int
+ advansys_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ 		sector_t capacity, int ip[])
+ {
+@@ -5875,82 +5866,6 @@
+ }
+ 
+ /*
+- * advansys_setup()
+- *
+- * This function is called from init/main.c at boot time.
+- * It it passed LILO parameters that can be set from the
+- * LILO command line or in /etc/lilo.conf.
+- *
+- * It is used by the AdvanSys driver to either disable I/O
+- * port scanning or to limit scanning to 1 - 4 I/O ports.
+- * Regardless of the option setting EISA and PCI boards
+- * will still be searched for and detected. This option
+- * only affects searching for ISA and VL boards.
+- *
+- * If ADVANSYS_DEBUG is defined the driver debug level may
+- * be set using the 5th (ASC_NUM_IOPORT_PROBE + 1) I/O Port.
+- *
+- * Examples:
+- * 1. Eliminate I/O port scanning:
+- *         boot: linux advansys=
+- *       or
+- *         boot: linux advansys=0x0
+- * 2. Limit I/O port scanning to one I/O port:
+- *        boot: linux advansys=0x110
+- * 3. Limit I/O port scanning to four I/O ports:
+- *        boot: linux advansys=0x110,0x210,0x230,0x330
+- * 4. If ADVANSYS_DEBUG, limit I/O port scanning to four I/O ports and
+- *    set the driver debug level to 2.
+- *        boot: linux advansys=0x110,0x210,0x230,0x330,0xdeb2
+- *
+- * ints[0] - number of arguments
+- * ints[1] - first argument
+- * ints[2] - second argument
+- * ...
+- */
+-void __init
+-advansys_setup(char *str, int *ints)
+-{
+-    int    i;
+-
+-    if (asc_iopflag == ASC_TRUE) {
+-        printk("AdvanSys SCSI: 'advansys' LILO option may appear only once\n");
+-        return;
+-    }
+-
+-    asc_iopflag = ASC_TRUE;
+-
+-    if (ints[0] > ASC_NUM_IOPORT_PROBE) {
+-#ifdef ADVANSYS_DEBUG
+-        if ((ints[0] == ASC_NUM_IOPORT_PROBE + 1) &&
+-            (ints[ASC_NUM_IOPORT_PROBE + 1] >> 4 == 0xdeb)) {
+-            asc_dbglvl = ints[ASC_NUM_IOPORT_PROBE + 1] & 0xf;
+-        } else {
+-#endif /* ADVANSYS_DEBUG */
+-            printk("AdvanSys SCSI: only %d I/O ports accepted\n",
+-                ASC_NUM_IOPORT_PROBE);
+-#ifdef ADVANSYS_DEBUG
+-        }
+-#endif /* ADVANSYS_DEBUG */
+-    }
+-
+-#ifdef ADVANSYS_DEBUG
+-    ASC_DBG1(1, "advansys_setup: ints[0] %d\n", ints[0]);
+-    for (i = 1; i < ints[0]; i++) {
+-        ASC_DBG2(1, " ints[%d] 0x%x", i, ints[i]);
+-    }
+-    ASC_DBG(1, "\n");
+-#endif /* ADVANSYS_DEBUG */
+-
+-    for (i = 1; i <= ints[0] && i <= ASC_NUM_IOPORT_PROBE; i++) {
+-        asc_ioport[i-1] = ints[i];
+-        ASC_DBG2(1, "advansys_setup: asc_ioport[%d] 0x%x\n",
+-            i - 1, asc_ioport[i-1]);
+-    }
+-}
+-
+-
+-/*
+  * --- Loadable Driver Support
+  */
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/advansys.h linux-2.6.22-try2/drivers/scsi/advansys.h
+--- linux-2.6.22-570/drivers/scsi/advansys.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/advansys.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,36 +0,0 @@
+-/*
+- * advansys.h - Linux Host Driver for AdvanSys SCSI Adapters
+- * 
+- * Copyright (c) 1995-2000 Advanced System Products, Inc.
+- * Copyright (c) 2000-2001 ConnectCom Solutions, Inc.
+- * All Rights Reserved.
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that redistributions of source
+- * code retain the above copyright notice and this comment without
+- * modification.
+- *
+- * As of March 8, 2000 Advanced System Products, Inc. (AdvanSys)
+- * changed its name to ConnectCom Solutions, Inc.
+- *
+- */
+-
+-#ifndef _ADVANSYS_H
+-#define _ADVANSYS_H
+-
+-/*
+- * struct scsi_host_template function prototypes.
+- */
+-int advansys_detect(struct scsi_host_template *);
+-int advansys_release(struct Scsi_Host *);
+-const char *advansys_info(struct Scsi_Host *);
+-int advansys_queuecommand(struct scsi_cmnd *, void (* done)(struct scsi_cmnd *));
+-int advansys_reset(struct scsi_cmnd *);
+-int advansys_biosparam(struct scsi_device *, struct block_device *,
+-		sector_t, int[]);
+-static int advansys_slave_configure(struct scsi_device *);
+-
+-/* init/main.c setup function */
+-void advansys_setup(char *, int *);
+-
+-#endif /* _ADVANSYS_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/aha152x.c linux-2.6.22-try2/drivers/scsi/aha152x.c
+--- linux-2.6.22-570/drivers/scsi/aha152x.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aha152x.c	2007-12-19 15:29:23.000000000 -0500
+@@ -240,6 +240,7 @@
+ #include <linux/io.h>
+ #include <linux/blkdev.h>
+ #include <asm/system.h>
++#include <linux/completion.h>
+ #include <linux/errno.h>
+ #include <linux/string.h>
+ #include <linux/wait.h>
+@@ -253,7 +254,6 @@
+ #include <linux/spinlock.h>
+ #include <linux/workqueue.h>
+ #include <linux/list.h>
+-#include <asm/semaphore.h>
+ #include <scsi/scsicam.h>
+ 
+ #include "scsi.h"
+@@ -551,7 +551,7 @@
+  */
+ struct aha152x_scdata {
+ 	Scsi_Cmnd *next;	/* next sc in queue */
+-	struct semaphore *sem;	/* semaphore to block on */
++	struct completion *done;/* semaphore to block on */
+ 	unsigned char cmd_len;
+ 	unsigned char cmnd[MAX_COMMAND_SIZE];
+ 	unsigned short use_sg;
+@@ -608,7 +608,7 @@
+ 
+ #define SCDATA(SCpnt)		((struct aha152x_scdata *) (SCpnt)->host_scribble)
+ #define SCNEXT(SCpnt)		SCDATA(SCpnt)->next
+-#define SCSEM(SCpnt)		SCDATA(SCpnt)->sem
++#define SCSEM(SCpnt)		SCDATA(SCpnt)->done
+ 
+ #define SG_ADDRESS(buffer)	((char *) (page_address((buffer)->page)+(buffer)->offset))
+ 
+@@ -969,7 +969,8 @@
+ /* 
+  *  Queue a command and setup interrupts for a free bus.
+  */
+-static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct semaphore *sem, int phase, void (*done)(Scsi_Cmnd *))
++static int aha152x_internal_queue(Scsi_Cmnd *SCpnt, struct completion *complete,
++		int phase, void (*done)(Scsi_Cmnd *))
+ {
+ 	struct Scsi_Host *shpnt = SCpnt->device->host;
+ 	unsigned long flags;
+@@ -1013,7 +1014,7 @@
+ 	}
+ 
+ 	SCNEXT(SCpnt)		= NULL;
+-	SCSEM(SCpnt)		= sem;
++	SCSEM(SCpnt)		= complete;
+ 
+ 	/* setup scratch area
+ 	   SCp.ptr              : buffer pointer
+@@ -1084,9 +1085,9 @@
+ 	DPRINTK(debug_eh, INFO_LEAD "reset_done called\n", CMDINFO(SCpnt));
+ #endif
+ 	if(SCSEM(SCpnt)) {
+-		up(SCSEM(SCpnt));
++		complete(SCSEM(SCpnt));
+ 	} else {
+-		printk(KERN_ERR "aha152x: reset_done w/o semaphore\n");
++		printk(KERN_ERR "aha152x: reset_done w/o completion\n");
+ 	}
+ }
+ 
+@@ -1139,21 +1140,6 @@
+ 	return FAILED;
+ }
+ 
+-static void timer_expired(unsigned long p)
+-{
+-	Scsi_Cmnd	 *SCp   = (Scsi_Cmnd *)p;
+-	struct semaphore *sem   = SCSEM(SCp);
+-	struct Scsi_Host *shpnt = SCp->device->host;
+-	unsigned long flags;
+-
+-	/* remove command from issue queue */
+-	DO_LOCK(flags);
+-	remove_SC(&ISSUE_SC, SCp);
+-	DO_UNLOCK(flags);
+-
+-	up(sem);
+-}
+-
+ /*
+  * Reset a device
+  *
+@@ -1161,14 +1147,14 @@
+ static int aha152x_device_reset(Scsi_Cmnd * SCpnt)
+ {
+ 	struct Scsi_Host *shpnt = SCpnt->device->host;
+-	DECLARE_MUTEX_LOCKED(sem);
+-	struct timer_list timer;
++	DECLARE_COMPLETION(done);
+ 	int ret, issued, disconnected;
+ 	unsigned char old_cmd_len = SCpnt->cmd_len;
+ 	unsigned short old_use_sg = SCpnt->use_sg;
+ 	void *old_buffer = SCpnt->request_buffer;
+ 	unsigned old_bufflen = SCpnt->request_bufflen;
+ 	unsigned long flags;
++	unsigned long timeleft;
+ 
+ #if defined(AHA152X_DEBUG)
+ 	if(HOSTDATA(shpnt)->debug & debug_eh) {
+@@ -1192,15 +1178,15 @@
+ 	SCpnt->request_buffer  = NULL;
+ 	SCpnt->request_bufflen = 0;
+ 
+-	init_timer(&timer);
+-	timer.data     = (unsigned long) SCpnt;
+-	timer.expires  = jiffies + 100*HZ;   /* 10s */
+-	timer.function = (void (*)(unsigned long)) timer_expired;
+-
+-	aha152x_internal_queue(SCpnt, &sem, resetting, reset_done);
+-	add_timer(&timer);
+-	down(&sem);
+-	del_timer(&timer);
++	aha152x_internal_queue(SCpnt, &done, resetting, reset_done);
++
++	timeleft = wait_for_completion_timeout(&done, 100*HZ);
++	if (!timeleft) {
++		/* remove command from issue queue */
++		DO_LOCK(flags);
++		remove_SC(&ISSUE_SC, SCpnt);
++		DO_UNLOCK(flags);
++	}
+ 
+ 	SCpnt->cmd_len         = old_cmd_len;
+ 	SCpnt->use_sg          = old_use_sg;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aha1740.c linux-2.6.22-try2/drivers/scsi/aha1740.c
+--- linux-2.6.22-570/drivers/scsi/aha1740.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aha1740.c	2007-12-19 15:29:23.000000000 -0500
+@@ -271,19 +271,7 @@
+ 				continue;
+ 			}
+ 			sgptr = (struct aha1740_sg *) SCtmp->host_scribble;
+-			if (SCtmp->use_sg) {
+-				/* We used scatter-gather.
+-				   Do the unmapping dance. */
+-				dma_unmap_sg (&edev->dev,
+-					      (struct scatterlist *) SCtmp->request_buffer,
+-					      SCtmp->use_sg,
+-					      SCtmp->sc_data_direction);
+-			} else {
+-				dma_unmap_single (&edev->dev,
+-						  sgptr->buf_dma_addr,
+-						  SCtmp->request_bufflen,
+-						  DMA_BIDIRECTIONAL);
+-			}
++			scsi_dma_unmap(SCtmp);
+ 	    
+ 			/* Free the sg block */
+ 			dma_free_coherent (&edev->dev,
+@@ -349,11 +337,9 @@
+ 	unchar target = scmd_id(SCpnt);
+ 	struct aha1740_hostdata *host = HOSTDATA(SCpnt->device->host);
+ 	unsigned long flags;
+-	void *buff = SCpnt->request_buffer;
+-	int bufflen = SCpnt->request_bufflen;
+ 	dma_addr_t sg_dma;
+ 	struct aha1740_sg *sgptr;
+-	int ecbno;
++	int ecbno, nseg;
+ 	DEB(int i);
+ 
+ 	if(*cmd == REQUEST_SENSE) {
+@@ -424,23 +410,22 @@
+ 	sgptr = (struct aha1740_sg *) SCpnt->host_scribble;
+ 	sgptr->sg_dma_addr = sg_dma;
+     
+-	if (SCpnt->use_sg) {
+-		struct scatterlist * sgpnt;
++	nseg = scsi_dma_map(SCpnt);
++	BUG_ON(nseg < 0);
++	if (nseg) {
++		struct scatterlist *sg;
+ 		struct aha1740_chain * cptr;
+-		int i, count;
++		int i;
+ 		DEB(unsigned char * ptr);
+ 
+ 		host->ecb[ecbno].sg = 1;  /* SCSI Initiator Command
+ 					   * w/scatter-gather*/
+-		sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+ 		cptr = sgptr->sg_chain;
+-		count = dma_map_sg (&host->edev->dev, sgpnt, SCpnt->use_sg,
+-				    SCpnt->sc_data_direction);
+-		for(i=0; i < count; i++) {
+-			cptr[i].datalen = sg_dma_len (sgpnt + i);
+-			cptr[i].dataptr = sg_dma_address (sgpnt + i);
++		scsi_for_each_sg(SCpnt, sg, nseg, i) {
++			cptr[i].datalen = sg_dma_len (sg);
++			cptr[i].dataptr = sg_dma_address (sg);
+ 		}
+-		host->ecb[ecbno].datalen = count*sizeof(struct aha1740_chain);
++		host->ecb[ecbno].datalen = nseg * sizeof(struct aha1740_chain);
+ 		host->ecb[ecbno].dataptr = sg_dma;
+ #ifdef DEBUG
+ 		printk("cptr %x: ",cptr);
+@@ -448,11 +433,8 @@
+ 		for(i=0;i<24;i++) printk("%02x ", ptr[i]);
+ #endif
+ 	} else {
+-		host->ecb[ecbno].datalen = bufflen;
+-		sgptr->buf_dma_addr =  dma_map_single (&host->edev->dev,
+-						       buff, bufflen,
+-						       DMA_BIDIRECTIONAL);
+-		host->ecb[ecbno].dataptr = sgptr->buf_dma_addr;
++		host->ecb[ecbno].datalen = 0;
++		host->ecb[ecbno].dataptr = 0;
+ 	}
+ 	host->ecb[ecbno].lun = SCpnt->device->lun;
+ 	host->ecb[ecbno].ses = 1; /* Suppress underrun errors */
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c linux-2.6.22-try2/drivers/scsi/aic7xxx/aic79xx_osm.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aic7xxx/aic79xx_osm.c	2007-12-19 15:29:23.000000000 -0500
+@@ -376,21 +376,10 @@
+ ahd_linux_unmap_scb(struct ahd_softc *ahd, struct scb *scb)
+ {
+ 	struct scsi_cmnd *cmd;
+-	int direction;
+ 
+ 	cmd = scb->io_ctx;
+-	direction = cmd->sc_data_direction;
+ 	ahd_sync_sglist(ahd, scb, BUS_DMASYNC_POSTWRITE);
+-	if (cmd->use_sg != 0) {
+-		struct scatterlist *sg;
+-
+-		sg = (struct scatterlist *)cmd->request_buffer;
+-		pci_unmap_sg(ahd->dev_softc, sg, cmd->use_sg, direction);
+-	} else if (cmd->request_bufflen != 0) {
+-		pci_unmap_single(ahd->dev_softc,
+-				 scb->platform_data->buf_busaddr,
+-				 cmd->request_bufflen, direction);
+-	}
++	scsi_dma_unmap(cmd);
+ }
+ 
+ /******************************** Macros **************************************/
+@@ -1422,6 +1411,7 @@
+ 	u_int	 col_idx;
+ 	uint16_t mask;
+ 	unsigned long flags;
++	int nseg;
+ 
+ 	ahd_lock(ahd, &flags);
+ 
+@@ -1494,18 +1484,17 @@
+ 	ahd_set_residual(scb, 0);
+ 	ahd_set_sense_residual(scb, 0);
+ 	scb->sg_count = 0;
+-	if (cmd->use_sg != 0) {
+-		void	*sg;
++
++	nseg = scsi_dma_map(cmd);
++	BUG_ON(nseg < 0);
++	if (nseg > 0) {
++		void *sg = scb->sg_list;
+ 		struct	 scatterlist *cur_seg;
+-		u_int	 nseg;
+-		int	 dir;
++		int i;
+ 
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		dir = cmd->sc_data_direction;
+-		nseg = pci_map_sg(ahd->dev_softc, cur_seg,
+-				  cmd->use_sg, dir);
+ 		scb->platform_data->xfer_len = 0;
+-		for (sg = scb->sg_list; nseg > 0; nseg--, cur_seg++) {
++
++		scsi_for_each_sg(cmd, cur_seg, nseg, i) {
+ 			dma_addr_t addr;
+ 			bus_size_t len;
+ 
+@@ -1513,22 +1502,8 @@
+ 			len = sg_dma_len(cur_seg);
+ 			scb->platform_data->xfer_len += len;
+ 			sg = ahd_sg_setup(ahd, scb, sg, addr, len,
+-					  /*last*/nseg == 1);
++					  i == (nseg - 1));
+ 		}
+-	} else if (cmd->request_bufflen != 0) {
+-		void *sg;
+-		dma_addr_t addr;
+-		int dir;
+-
+-		sg = scb->sg_list;
+-		dir = cmd->sc_data_direction;
+-		addr = pci_map_single(ahd->dev_softc,
+-				      cmd->request_buffer,
+-				      cmd->request_bufflen, dir);
+-		scb->platform_data->xfer_len = cmd->request_bufflen;
+-		scb->platform_data->buf_busaddr = addr;
+-		sg = ahd_sg_setup(ahd, scb, sg, addr,
+-				  cmd->request_bufflen, /*last*/TRUE);
+ 	}
+ 
+ 	LIST_INSERT_HEAD(&ahd->pending_scbs, scb, pending_links);
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h linux-2.6.22-try2/drivers/scsi/aic7xxx/aic79xx_osm.h
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic79xx_osm.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aic7xxx/aic79xx_osm.h	2007-12-19 15:29:23.000000000 -0500
+@@ -781,7 +781,7 @@
+ static __inline
+ void ahd_set_residual(struct scb *scb, u_long resid)
+ {
+-	scb->io_ctx->resid = resid;
++	scsi_set_resid(scb->io_ctx, resid);
+ }
+ 
+ static __inline
+@@ -793,7 +793,7 @@
+ static __inline
+ u_long ahd_get_residual(struct scb *scb)
+ {
+-	return (scb->io_ctx->resid);
++	return scsi_get_resid(scb->io_ctx);
+ }
+ 
+ static __inline
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c linux-2.6.22-try2/drivers/scsi/aic7xxx/aic7xxx_osm.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aic7xxx/aic7xxx_osm.c	2007-12-19 15:29:23.000000000 -0500
+@@ -402,18 +402,8 @@
+ 
+ 	cmd = scb->io_ctx;
+ 	ahc_sync_sglist(ahc, scb, BUS_DMASYNC_POSTWRITE);
+-	if (cmd->use_sg != 0) {
+-		struct scatterlist *sg;
+ 
+-		sg = (struct scatterlist *)cmd->request_buffer;
+-		pci_unmap_sg(ahc->dev_softc, sg, cmd->use_sg,
+-			     cmd->sc_data_direction);
+-	} else if (cmd->request_bufflen != 0) {
+-		pci_unmap_single(ahc->dev_softc,
+-				 scb->platform_data->buf_busaddr,
+-				 cmd->request_bufflen,
+-				 cmd->sc_data_direction);
+-	}
++	scsi_dma_unmap(cmd);
+ }
+ 
+ static __inline int
+@@ -1381,6 +1371,7 @@
+ 	struct	 ahc_tmode_tstate *tstate;
+ 	uint16_t mask;
+ 	struct scb_tailq *untagged_q = NULL;
++	int nseg;
+ 
+ 	/*
+ 	 * Schedule us to run later.  The only reason we are not
+@@ -1472,23 +1463,21 @@
+ 	ahc_set_residual(scb, 0);
+ 	ahc_set_sense_residual(scb, 0);
+ 	scb->sg_count = 0;
+-	if (cmd->use_sg != 0) {
++
++	nseg = scsi_dma_map(cmd);
++	BUG_ON(nseg < 0);
++	if (nseg > 0) {
+ 		struct	ahc_dma_seg *sg;
+ 		struct	scatterlist *cur_seg;
+-		struct	scatterlist *end_seg;
+-		int	nseg;
++		int i;
+ 
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		nseg = pci_map_sg(ahc->dev_softc, cur_seg, cmd->use_sg,
+-				  cmd->sc_data_direction);
+-		end_seg = cur_seg + nseg;
+ 		/* Copy the segments into the SG list. */
+ 		sg = scb->sg_list;
+ 		/*
+ 		 * The sg_count may be larger than nseg if
+ 		 * a transfer crosses a 32bit page.
+ 		 */ 
+-		while (cur_seg < end_seg) {
++		scsi_for_each_sg(cmd, cur_seg, nseg, i) {
+ 			dma_addr_t addr;
+ 			bus_size_t len;
+ 			int consumed;
+@@ -1499,7 +1488,6 @@
+ 						     sg, addr, len);
+ 			sg += consumed;
+ 			scb->sg_count += consumed;
+-			cur_seg++;
+ 		}
+ 		sg--;
+ 		sg->len |= ahc_htole32(AHC_DMA_LAST_SEG);
+@@ -1516,33 +1504,6 @@
+ 		 */
+ 		scb->hscb->dataptr = scb->sg_list->addr;
+ 		scb->hscb->datacnt = scb->sg_list->len;
+-	} else if (cmd->request_bufflen != 0) {
+-		struct	 ahc_dma_seg *sg;
+-		dma_addr_t addr;
+-
+-		sg = scb->sg_list;
+-		addr = pci_map_single(ahc->dev_softc,
+-				      cmd->request_buffer,
+-				      cmd->request_bufflen,
+-				      cmd->sc_data_direction);
+-		scb->platform_data->buf_busaddr = addr;
+-		scb->sg_count = ahc_linux_map_seg(ahc, scb,
+-						  sg, addr,
+-						  cmd->request_bufflen);
+-		sg->len |= ahc_htole32(AHC_DMA_LAST_SEG);
+-
+-		/*
+-		 * Reset the sg list pointer.
+-		 */
+-		scb->hscb->sgptr =
+-			ahc_htole32(scb->sg_list_phys | SG_FULL_RESID);
+-
+-		/*
+-		 * Copy the first SG into the "current"
+-		 * data pointer area.
+-		 */
+-		scb->hscb->dataptr = sg->addr;
+-		scb->hscb->datacnt = sg->len;
+ 	} else {
+ 		scb->hscb->sgptr = ahc_htole32(SG_LIST_NULL);
+ 		scb->hscb->dataptr = 0;
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h linux-2.6.22-try2/drivers/scsi/aic7xxx/aic7xxx_osm.h
+--- linux-2.6.22-570/drivers/scsi/aic7xxx/aic7xxx_osm.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aic7xxx/aic7xxx_osm.h	2007-12-19 15:29:23.000000000 -0500
+@@ -751,7 +751,7 @@
+ static __inline
+ void ahc_set_residual(struct scb *scb, u_long resid)
+ {
+-	scb->io_ctx->resid = resid;
++	scsi_set_resid(scb->io_ctx, resid);
+ }
+ 
+ static __inline
+@@ -763,7 +763,7 @@
+ static __inline
+ u_long ahc_get_residual(struct scb *scb)
+ {
+-	return (scb->io_ctx->resid);
++	return scsi_get_resid(scb->io_ctx);
+ }
+ 
+ static __inline
+diff -Nurb linux-2.6.22-570/drivers/scsi/aic7xxx_old.c linux-2.6.22-try2/drivers/scsi/aic7xxx_old.c
+--- linux-2.6.22-570/drivers/scsi/aic7xxx_old.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/aic7xxx_old.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2690,17 +2690,8 @@
+ 	struct aic7xxx_scb *scbp;
+ 	unsigned char queue_depth;
+ 
+-  if (cmd->use_sg > 1)
+-  {
+-    struct scatterlist *sg;
++        scsi_dma_unmap(cmd);
+ 
+-    sg = (struct scatterlist *)cmd->request_buffer;
+-    pci_unmap_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
+-  }
+-  else if (cmd->request_bufflen)
+-    pci_unmap_single(p->pdev, aic7xxx_mapping(cmd),
+-		     cmd->request_bufflen,
+-                     cmd->sc_data_direction);
+   if (scb->flags & SCB_SENSE)
+   {
+     pci_unmap_single(p->pdev,
+@@ -3869,7 +3860,7 @@
+        * the mid layer didn't check residual data counts to see if the
+        * command needs retried.
+        */
+-      cmd->resid = scb->sg_length - actual;
++      scsi_set_resid(cmd, scb->sg_length - actual);
+       aic7xxx_status(cmd) = hscb->target_status;
+     }
+   }
+@@ -10137,6 +10128,7 @@
+   struct scsi_device *sdptr = cmd->device;
+   unsigned char tindex = TARGET_INDEX(cmd);
+   struct request *req = cmd->request;
++  int use_sg;
+ 
+   mask = (0x01 << tindex);
+   hscb = scb->hscb;
+@@ -10209,8 +10201,10 @@
+   memcpy(scb->cmnd, cmd->cmnd, cmd->cmd_len);
+   hscb->SCSI_cmd_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, scb->cmnd));
+ 
+-  if (cmd->use_sg)
+-  {
++  use_sg = scsi_dma_map(cmd);
++  BUG_ON(use_sg < 0);
++
++  if (use_sg) {
+     struct scatterlist *sg;  /* Must be mid-level SCSI code scatterlist */
+ 
+     /*
+@@ -10219,11 +10213,11 @@
+      * differences and the kernel SG list uses virtual addresses where
+      * we need physical addresses.
+      */
+-    int i, use_sg;
++    int i;
+ 
+-    sg = (struct scatterlist *)cmd->request_buffer;
+     scb->sg_length = 0;
+-    use_sg = pci_map_sg(p->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
++
++
+     /*
+      * Copy the segments into the SG array.  NOTE!!! - We used to
+      * have the first entry both in the data_pointer area and the first
+@@ -10231,10 +10225,9 @@
+      * entry in both places, but now we download the address of
+      * scb->sg_list[1] instead of 0 to the sg pointer in the hscb.
+      */
+-    for (i = 0; i < use_sg; i++)
+-    {
+-      unsigned int len = sg_dma_len(sg+i);
+-      scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg+i));
++    scsi_for_each_sg(cmd, sg, use_sg, i) {
++      unsigned int len = sg_dma_len(sg);
++      scb->sg_list[i].address = cpu_to_le32(sg_dma_address(sg));
+       scb->sg_list[i].length = cpu_to_le32(len);
+       scb->sg_length += len;
+     }
+@@ -10244,26 +10237,7 @@
+     scb->sg_count = i;
+     hscb->SG_segment_count = i;
+     hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[1]));
+-  }
+-  else
+-  {
+-    if (cmd->request_bufflen)
+-    {
+-      unsigned int address = pci_map_single(p->pdev, cmd->request_buffer,
+-					    cmd->request_bufflen,
+-                                            cmd->sc_data_direction);
+-      aic7xxx_mapping(cmd) = address;
+-      scb->sg_list[0].address = cpu_to_le32(address);
+-      scb->sg_list[0].length = cpu_to_le32(cmd->request_bufflen);
+-      scb->sg_count = 1;
+-      scb->sg_length = cmd->request_bufflen;
+-      hscb->SG_segment_count = 1;
+-      hscb->SG_list_pointer = cpu_to_le32(SCB_DMA_ADDR(scb, &scb->sg_list[0]));
+-      hscb->data_count = scb->sg_list[0].length;
+-      hscb->data_pointer = scb->sg_list[0].address;
+-    }
+-    else
+-    {
++  } else {
+       scb->sg_count = 0;
+       scb->sg_length = 0;
+       hscb->SG_segment_count = 0;
+@@ -10271,7 +10245,6 @@
+       hscb->data_count = 0;
+       hscb->data_pointer = 0;
+     }
+-  }
+ }
+ 
+ /*+F*************************************************************************
+diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.c linux-2.6.22-try2/drivers/scsi/amiga7xx.c
+--- linux-2.6.22-570/drivers/scsi/amiga7xx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/amiga7xx.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,138 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
+- *		Amiga MacroSystemUS WarpEngine SCSI controller.
+- *		Amiga Technologies A4000T SCSI controller.
+- *		Amiga Technologies/DKB A4091 SCSI controller.
+- *
+- * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
+- * plus modifications of the 53c7xx.c driver to support the Amiga.
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-#include <linux/zorro.h>
+-#include <linux/stat.h>
+-
+-#include <asm/setup.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/amigaints.h>
+-#include <asm/amigahw.h>
+-#include <asm/dma.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "amiga7xx.h"
+-
+-
+-static int amiga7xx_register_one(struct scsi_host_template *tpnt,
+-				 unsigned long address)
+-{
+-    long long options;
+-    int clock;
+-
+-    if (!request_mem_region(address, 0x1000, "ncr53c710"))
+-	return 0;
+-
+-    address = (unsigned long)z_ioremap(address, 0x1000);
+-    options = OPTION_MEMORY_MAPPED | OPTION_DEBUG_TEST1 | OPTION_INTFLY |
+-	      OPTION_SYNCHRONOUS | OPTION_ALWAYS_SYNCHRONOUS |
+-	      OPTION_DISCONNECT;
+-    clock = 50000000;	/* 50 MHz SCSI Clock */
+-    ncr53c7xx_init(tpnt, 0, 710, address, 0, IRQ_AMIGA_PORTS, DMA_NONE,
+-		   options, clock);
+-    return 1;
+-}
+-
+-
+-#ifdef CONFIG_ZORRO
+-
+-static struct {
+-    zorro_id id;
+-    unsigned long offset;
+-    int absolute;	/* offset is absolute address */
+-} amiga7xx_table[] = {
+-    { .id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS, .offset = 0xf40000,
+-      .absolute = 1 },
+-    { .id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx, .offset = 0x40000 },
+-    { .id = ZORRO_PROD_CBM_A4091_1, .offset = 0x800000 },
+-    { .id = ZORRO_PROD_CBM_A4091_2, .offset = 0x800000 },
+-    { .id = ZORRO_PROD_GVP_GFORCE_040_060, .offset = 0x40000 },
+-    { 0 }
+-};
+-
+-static int __init amiga7xx_zorro_detect(struct scsi_host_template *tpnt)
+-{
+-    int num = 0, i;
+-    struct zorro_dev *z = NULL;
+-    unsigned long address;
+-
+-    while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
+-	for (i = 0; amiga7xx_table[i].id; i++)
+-	    if (z->id == amiga7xx_table[i].id)
+-		break;
+-	if (!amiga7xx_table[i].id)
+-	    continue;
+-	if (amiga7xx_table[i].absolute)
+-	    address = amiga7xx_table[i].offset;
+-	else
+-	    address = z->resource.start + amiga7xx_table[i].offset;
+-	num += amiga7xx_register_one(tpnt, address);
+-    }
+-    return num;
+-}
+-
+-#endif /* CONFIG_ZORRO */
+-
+-
+-int __init amiga7xx_detect(struct scsi_host_template *tpnt)
+-{
+-    static unsigned char called = 0;
+-    int num = 0;
+-
+-    if (called || !MACH_IS_AMIGA)
+-	return 0;
+-
+-    tpnt->proc_name = "Amiga7xx";
+-
+-    if (AMIGAHW_PRESENT(A4000_SCSI))
+-	num += amiga7xx_register_one(tpnt, 0xdd0040);
+-
+-#ifdef CONFIG_ZORRO
+-    num += amiga7xx_zorro_detect(tpnt);
+-#endif
+-
+-    called = 1;
+-    return num;
+-}
+-
+-static int amiga7xx_release(struct Scsi_Host *shost)
+-{
+-	if (shost->irq)
+-		free_irq(shost->irq, NULL);
+-	if (shost->dma_channel != 0xff)
+-		free_dma(shost->dma_channel);
+-	if (shost->io_port && shost->n_io_port)
+-		release_region(shost->io_port, shost->n_io_port);
+-	scsi_unregister(shost);
+-	return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+-	.name			= "Amiga NCR53c710 SCSI",
+-	.detect			= amiga7xx_detect,
+-	.release		= amiga7xx_release,
+-	.queuecommand		= NCR53c7xx_queue_command,
+-	.abort			= NCR53c7xx_abort,
+-	.reset			= NCR53c7xx_reset,
+-	.can_queue		= 24,
+-	.this_id		= 7,
+-	.sg_tablesize		= 63,
+-	.cmd_per_lun		= 3,
+-	.use_clustering		= DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/amiga7xx.h linux-2.6.22-try2/drivers/scsi/amiga7xx.h
+--- linux-2.6.22-570/drivers/scsi/amiga7xx.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/amiga7xx.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,23 +0,0 @@
+-#ifndef AMIGA7XX_H
+-
+-#include <linux/types.h>
+-
+-int amiga7xx_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* AMIGA7XX_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr.h
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr.h	2007-12-19 15:29:23.000000000 -0500
+@@ -48,9 +48,10 @@
+ 
+ #define ARCMSR_MAX_OUTSTANDING_CMD 						256
+ #define ARCMSR_MAX_FREECCB_NUM							288
+-#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.13"
++#define ARCMSR_DRIVER_VERSION				"Driver Version 1.20.00.14"
+ #define ARCMSR_SCSI_INITIATOR_ID						255
+ #define ARCMSR_MAX_XFER_SECTORS							512
++#define ARCMSR_MAX_XFER_SECTORS_B                                              4096
+ #define ARCMSR_MAX_TARGETID							 17
+ #define ARCMSR_MAX_TARGETLUN							  8
+ #define ARCMSR_MAX_CMD_PERLUN				 ARCMSR_MAX_OUTSTANDING_CMD
+@@ -469,4 +470,3 @@
+ extern struct class_device_attribute *arcmsr_host_attrs[];
+ extern int arcmsr_alloc_sysfs_attr(struct AdapterControlBlock *acb);
+ void arcmsr_free_sysfs_attr(struct AdapterControlBlock *acb);
+-
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr_attr.c
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_attr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr_attr.c	2007-12-19 15:29:22.000000000 -0500
+@@ -59,8 +59,9 @@
+ struct class_device_attribute *arcmsr_host_attrs[];
+ 
+ static ssize_t
+-arcmsr_sysfs_iop_message_read(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++arcmsr_sysfs_iop_message_read(struct kobject *kobj,
++			      struct bin_attribute *bin_attr,
++			      char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *host = class_to_shost(cdev);
+@@ -105,8 +106,9 @@
+ }
+ 
+ static ssize_t
+-arcmsr_sysfs_iop_message_write(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++arcmsr_sysfs_iop_message_write(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *host = class_to_shost(cdev);
+@@ -152,8 +154,9 @@
+ }
+ 
+ static ssize_t
+-arcmsr_sysfs_iop_message_clear(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++arcmsr_sysfs_iop_message_clear(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *host = class_to_shost(cdev);
+@@ -188,7 +191,6 @@
+ 	.attr = {
+ 		.name = "mu_read",
+ 		.mode = S_IRUSR ,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 1032,
+ 	.read = arcmsr_sysfs_iop_message_read,
+@@ -198,7 +200,6 @@
+ 	.attr = {
+ 		.name = "mu_write",
+ 		.mode = S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 1032,
+ 	.write = arcmsr_sysfs_iop_message_write,
+@@ -208,7 +209,6 @@
+ 	.attr = {
+ 		.name = "mu_clear",
+ 		.mode = S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 1,
+ 	.write = arcmsr_sysfs_iop_message_clear,
+diff -Nurb linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr_hba.c
+--- linux-2.6.22-570/drivers/scsi/arcmsr/arcmsr_hba.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/arcmsr/arcmsr_hba.c	2007-12-19 15:29:23.000000000 -0500
+@@ -57,6 +57,7 @@
+ #include <linux/dma-mapping.h>
+ #include <linux/timer.h>
+ #include <linux/pci.h>
++#include <linux/aer.h>
+ #include <asm/dma.h>
+ #include <asm/io.h>
+ #include <asm/system.h>
+@@ -71,7 +72,7 @@
+ #include "arcmsr.h"
+ 
+ MODULE_AUTHOR("Erich Chen <erich@areca.com.tw>");
+-MODULE_DESCRIPTION("ARECA (ARC11xx/12xx) SATA RAID HOST Adapter");
++MODULE_DESCRIPTION("ARECA (ARC11xx/12xx/13xx/16xx) SATA/SAS RAID HOST Adapter");
+ MODULE_LICENSE("Dual BSD/GPL");
+ MODULE_VERSION(ARCMSR_DRIVER_VERSION);
+ 
+@@ -93,7 +94,9 @@
+ static uint8_t arcmsr_wait_msgint_ready(struct AdapterControlBlock *acb);
+ static const char *arcmsr_info(struct Scsi_Host *);
+ static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
+-
++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
++						pci_channel_state_t state);
++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev);
+ static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
+ {
+ 	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
+@@ -104,7 +107,8 @@
+ 
+ static struct scsi_host_template arcmsr_scsi_host_template = {
+ 	.module			= THIS_MODULE,
+-	.name			= "ARCMSR ARECA SATA RAID HOST Adapter" ARCMSR_DRIVER_VERSION,
++	.name			= "ARCMSR ARECA SATA/SAS RAID HOST Adapter"
++							ARCMSR_DRIVER_VERSION,
+ 	.info			= arcmsr_info,
+ 	.queuecommand		= arcmsr_queue_command,
+ 	.eh_abort_handler	= arcmsr_abort,
+@@ -119,6 +123,10 @@
+ 	.use_clustering		= ENABLE_CLUSTERING,
+ 	.shost_attrs		= arcmsr_host_attrs,
+ };
++static struct pci_error_handlers arcmsr_pci_error_handlers = {
++	.error_detected		= arcmsr_pci_error_detected,
++	.slot_reset		= arcmsr_pci_slot_reset,
++};
+ 
+ static struct pci_device_id arcmsr_device_id_table[] = {
+ 	{PCI_DEVICE(PCI_VENDOR_ID_ARECA, PCI_DEVICE_ID_ARECA_1110)},
+@@ -144,7 +152,8 @@
+ 	.id_table		= arcmsr_device_id_table,
+ 	.probe			= arcmsr_probe,
+ 	.remove			= arcmsr_remove,
+-	.shutdown		= arcmsr_shutdown
++	.shutdown		= arcmsr_shutdown,
++	.err_handler		= &arcmsr_pci_error_handlers,
+ };
+ 
+ static irqreturn_t arcmsr_do_interrupt(int irq, void *dev_id)
+@@ -328,6 +337,8 @@
+ 
+ 	arcmsr_iop_init(acb);
+ 	pci_set_drvdata(pdev, host);
++	if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
++		host->max_sectors= ARCMSR_MAX_XFER_SECTORS_B;
+ 
+ 	error = scsi_add_host(host, &pdev->dev);
+ 	if (error)
+@@ -338,6 +349,7 @@
+ 		goto out_free_sysfs;
+ 
+ 	scsi_scan_host(host);
++	pci_enable_pcie_error_reporting(pdev);
+ 	return 0;
+  out_free_sysfs:
+  out_free_irq:
+@@ -369,19 +381,9 @@
+ 
+ static void arcmsr_pci_unmap_dma(struct CommandControlBlock *ccb)
+ {
+-	struct AdapterControlBlock *acb = ccb->acb;
+ 	struct scsi_cmnd *pcmd = ccb->pcmd;
+ 
+-	if (pcmd->use_sg != 0) {
+-		struct scatterlist *sl;
+-
+-		sl = (struct scatterlist *)pcmd->request_buffer;
+-		pci_unmap_sg(acb->pdev, sl, pcmd->use_sg, pcmd->sc_data_direction);
+-	}
+-	else if (pcmd->request_bufflen != 0)
+-		pci_unmap_single(acb->pdev,
+-			pcmd->SCp.dma_handle,
+-			pcmd->request_bufflen, pcmd->sc_data_direction);
++	scsi_dma_unmap(pcmd);
+ }
+ 
+ static void arcmsr_ccb_complete(struct CommandControlBlock *ccb, int stand_flag)
+@@ -498,7 +500,7 @@
+ 
+ static void arcmsr_flush_adapter_cache(struct AdapterControlBlock *acb)
+ {
+-	struct MessageUnit __iomem *reg=acb->pmu;
++	struct MessageUnit __iomem *reg = acb->pmu;
+ 
+ 	writel(ARCMSR_INBOUND_MESG0_FLUSH_CACHE, &reg->inbound_msgaddr0);
+ 	if (arcmsr_wait_msgint_ready(acb))
+@@ -551,6 +553,7 @@
+ 	int8_t *psge = (int8_t *)&arcmsr_cdb->u;
+ 	uint32_t address_lo, address_hi;
+ 	int arccdbsize = 0x30;
++	int nseg;
+ 
+ 	ccb->pcmd = pcmd;
+ 	memset(arcmsr_cdb, 0, sizeof (struct ARCMSR_CDB));
+@@ -561,20 +564,20 @@
+ 	arcmsr_cdb->CdbLength = (uint8_t)pcmd->cmd_len;
+ 	arcmsr_cdb->Context = (unsigned long)arcmsr_cdb;
+ 	memcpy(arcmsr_cdb->Cdb, pcmd->cmnd, pcmd->cmd_len);
+-	if (pcmd->use_sg) {
+-		int length, sgcount, i, cdb_sgcount = 0;
+-		struct scatterlist *sl;
+-
+-		/* Get Scatter Gather List from scsiport. */
+-		sl = (struct scatterlist *) pcmd->request_buffer;
+-		sgcount = pci_map_sg(acb->pdev, sl, pcmd->use_sg,
+-				pcmd->sc_data_direction);
++
++	nseg = scsi_dma_map(pcmd);
++	BUG_ON(nseg < 0);
++
++	if (nseg) {
++		int length, i, cdb_sgcount = 0;
++		struct scatterlist *sg;
++
+ 		/* map stor port SG list to our iop SG List. */
+-		for (i = 0; i < sgcount; i++) {
++		scsi_for_each_sg(pcmd, sg, nseg, i) {
+ 			/* Get the physical address of the current data pointer */
+-			length = cpu_to_le32(sg_dma_len(sl));
+-			address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sl)));
+-			address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sl)));
++			length = cpu_to_le32(sg_dma_len(sg));
++			address_lo = cpu_to_le32(dma_addr_lo32(sg_dma_address(sg)));
++			address_hi = cpu_to_le32(dma_addr_hi32(sg_dma_address(sg)));
+ 			if (address_hi == 0) {
+ 				struct SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+ 
+@@ -591,32 +594,12 @@
+ 				psge += sizeof (struct SG64ENTRY);
+ 				arccdbsize += sizeof (struct SG64ENTRY);
+ 			}
+-			sl++;
+ 			cdb_sgcount++;
+ 		}
+ 		arcmsr_cdb->sgcount = (uint8_t)cdb_sgcount;
+-		arcmsr_cdb->DataLength = pcmd->request_bufflen;
++		arcmsr_cdb->DataLength = scsi_bufflen(pcmd);
+ 		if ( arccdbsize > 256)
+ 			arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_SGL_BSIZE;
+-	} else if (pcmd->request_bufflen) {
+-		dma_addr_t dma_addr;
+-		dma_addr = pci_map_single(acb->pdev, pcmd->request_buffer,
+-				pcmd->request_bufflen, pcmd->sc_data_direction);
+-		pcmd->SCp.dma_handle = dma_addr;
+-		address_lo = cpu_to_le32(dma_addr_lo32(dma_addr));
+-		address_hi = cpu_to_le32(dma_addr_hi32(dma_addr));
+-		if (address_hi == 0) {
+-			struct  SG32ENTRY *pdma_sg = (struct SG32ENTRY *)psge;
+-			pdma_sg->address = address_lo;
+-			pdma_sg->length = pcmd->request_bufflen;
+-		} else {
+-			struct SG64ENTRY *pdma_sg = (struct SG64ENTRY *)psge;
+-			pdma_sg->addresshigh = address_hi;
+-			pdma_sg->address = address_lo;
+-			pdma_sg->length = pcmd->request_bufflen|IS_SG64_ADDR;
+-		}
+-		arcmsr_cdb->sgcount = 1;
+-		arcmsr_cdb->DataLength = pcmd->request_bufflen;
+ 	}
+ 	if (pcmd->sc_data_direction == DMA_TO_DEVICE ) {
+ 		arcmsr_cdb->Flags |= ARCMSR_CDB_FLAG_WRITE;
+@@ -758,20 +741,20 @@
+ 				(flag_ccb << 5));
+ 			if ((ccb->acb != acb) || (ccb->startdone != ARCMSR_CCB_START)) {
+ 				if (ccb->startdone == ARCMSR_CCB_ABORTED) {
+-					struct scsi_cmnd *abortcmd=ccb->pcmd;
++					struct scsi_cmnd *abortcmd = ccb->pcmd;
+ 					if (abortcmd) {
+ 					abortcmd->result |= DID_ABORT >> 16;
+ 					arcmsr_ccb_complete(ccb, 1);
+ 					printk(KERN_NOTICE
+-						"arcmsr%d: ccb='0x%p' isr got aborted command \n"
++						"arcmsr%d: ccb ='0x%p' isr got aborted command \n"
+ 						, acb->host->host_no, ccb);
+ 					}
+ 					continue;
+ 				}
+ 				printk(KERN_NOTICE
+-					"arcmsr%d: isr get an illegal ccb command done acb='0x%p'"
+-					"ccb='0x%p' ccbacb='0x%p' startdone = 0x%x"
+-					" ccboutstandingcount=%d \n"
++					"arcmsr%d: isr get an illegal ccb command done acb = '0x%p'"
++					"ccb = '0x%p' ccbacb = '0x%p' startdone = 0x%x"
++					" ccboutstandingcount = %d \n"
+ 					, acb->host->host_no
+ 					, acb
+ 					, ccb
+@@ -791,7 +774,7 @@
+ 				switch(ccb->arcmsr_cdb.DeviceStatus) {
+ 				case ARCMSR_DEV_SELECT_TIMEOUT: {
+ 						acb->devstate[id][lun] = ARECA_RAID_GONE;
+-						ccb->pcmd->result = DID_TIME_OUT << 16;
++						ccb->pcmd->result = DID_NO_CONNECT << 16;
+ 						arcmsr_ccb_complete(ccb, 1);
+ 					}
+ 					break;
+@@ -810,8 +793,8 @@
+ 					break;
+ 				default:
+ 					printk(KERN_NOTICE
+-						"arcmsr%d: scsi id=%d lun=%d"
+-						" isr get command error done,"
++						"arcmsr%d: scsi id = %d lun = %d"
++						" isr get command error done, "
+ 						"but got unknown DeviceStatus = 0x%x \n"
+ 						, acb->host->host_no
+ 						, id
+@@ -848,24 +831,21 @@
+ 	struct CMD_MESSAGE_FIELD *pcmdmessagefld;
+ 	int retvalue = 0, transfer_len = 0;
+ 	char *buffer;
++	struct scatterlist *sg;
+ 	uint32_t controlcode = (uint32_t ) cmd->cmnd[5] << 24 |
+ 						(uint32_t ) cmd->cmnd[6] << 16 |
+ 						(uint32_t ) cmd->cmnd[7] << 8  |
+ 						(uint32_t ) cmd->cmnd[8];
+ 					/* 4 bytes: Areca io control code */
+-	if (cmd->use_sg) {
+-		struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+ 
++	sg = scsi_sglist(cmd);
+ 		buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+-		if (cmd->use_sg > 1) {
++	if (scsi_sg_count(cmd) > 1) {
+ 			retvalue = ARCMSR_MESSAGE_FAIL;
+ 			goto message_out;
+ 		}
+ 		transfer_len += sg->length;
+-	} else {
+-		buffer = cmd->request_buffer;
+-		transfer_len = cmd->request_bufflen;
+-	}
++
+ 	if (transfer_len > sizeof(struct CMD_MESSAGE_FIELD)) {
+ 		retvalue = ARCMSR_MESSAGE_FAIL;
+ 		goto message_out;
+@@ -1057,12 +1037,9 @@
+ 		retvalue = ARCMSR_MESSAGE_FAIL;
+ 	}
+  message_out:
+-	if (cmd->use_sg) {
+-		struct scatterlist *sg;
+-
+-		sg = (struct scatterlist *) cmd->request_buffer;
++	sg = scsi_sglist(cmd);
+ 		kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+-	}
++
+ 	return retvalue;
+ }
+ 
+@@ -1085,6 +1062,7 @@
+ 	case INQUIRY: {
+ 		unsigned char inqdata[36];
+ 		char *buffer;
++		struct scatterlist *sg;
+ 
+ 		if (cmd->device->lun) {
+ 			cmd->result = (DID_TIME_OUT << 16);
+@@ -1096,7 +1074,7 @@
+ 		inqdata[1] = 0;
+ 		/* rem media bit & Dev Type Modifier */
+ 		inqdata[2] = 0;
+-		/* ISO,ECMA,& ANSI versions */
++		/* ISO, ECMA, & ANSI versions */
+ 		inqdata[4] = 31;
+ 		/* length of additional data */
+ 		strncpy(&inqdata[8], "Areca   ", 8);
+@@ -1104,21 +1082,14 @@
+ 		strncpy(&inqdata[16], "RAID controller ", 16);
+ 		/* Product Identification */
+ 		strncpy(&inqdata[32], "R001", 4); /* Product Revision */
+-		if (cmd->use_sg) {
+-			struct scatterlist *sg;
+ 
+-			sg = (struct scatterlist *) cmd->request_buffer;
++		sg = scsi_sglist(cmd);
+ 			buffer = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+-		} else {
+-			buffer = cmd->request_buffer;
+-		}
+-		memcpy(buffer, inqdata, sizeof(inqdata));
+-		if (cmd->use_sg) {
+-			struct scatterlist *sg;
+ 
+-			sg = (struct scatterlist *) cmd->request_buffer;
++		memcpy(buffer, inqdata, sizeof(inqdata));
++		sg = scsi_sglist(cmd);
+ 			kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+-		}
++
+ 		cmd->scsi_done(cmd);
+ 	}
+ 	break;
+@@ -1153,7 +1124,7 @@
+ 			, acb->host->host_no);
+ 		return SCSI_MLQUEUE_HOST_BUSY;
+ 	}
+-	if(target == 16) {
++	if (target == 16) {
+ 		/* virtual device for iop message transfer */
+ 		arcmsr_handle_virtual_command(acb, cmd);
+ 		return 0;
+@@ -1166,7 +1137,7 @@
+ 			printk(KERN_NOTICE
+ 				"arcmsr%d: block 'read/write'"
+ 				"command with gone raid volume"
+-				" Cmd=%2x, TargetId=%d, Lun=%d \n"
++				" Cmd = %2x, TargetId = %d, Lun = %d \n"
+ 				, acb->host->host_no
+ 				, cmd->cmnd[0]
+ 				, target, lun);
+@@ -1257,7 +1228,7 @@
+ 			if ((ccb->startdone == ARCMSR_CCB_ABORTED) ||
+ 				(ccb == poll_ccb)) {
+ 				printk(KERN_NOTICE
+-					"arcmsr%d: scsi id=%d lun=%d ccb='0x%p'"
++					"arcmsr%d: scsi id = %d lun = %d ccb = '0x%p'"
+ 					" poll command abort successfully \n"
+ 					, acb->host->host_no
+ 					, ccb->pcmd->device->id
+@@ -1270,8 +1241,8 @@
+ 			}
+ 			printk(KERN_NOTICE
+ 				"arcmsr%d: polling get an illegal ccb"
+-				" command done ccb='0x%p'"
+-				"ccboutstandingcount=%d \n"
++				" command done ccb ='0x%p'"
++				"ccboutstandingcount = %d \n"
+ 				, acb->host->host_no
+ 				, ccb
+ 				, atomic_read(&acb->ccboutstandingcount));
+@@ -1288,7 +1259,7 @@
+ 			switch(ccb->arcmsr_cdb.DeviceStatus) {
+ 			case ARCMSR_DEV_SELECT_TIMEOUT: {
+ 					acb->devstate[id][lun] = ARECA_RAID_GONE;
+-					ccb->pcmd->result = DID_TIME_OUT << 16;
++					ccb->pcmd->result = DID_NO_CONNECT << 16;
+ 					arcmsr_ccb_complete(ccb, 1);
+ 				}
+ 				break;
+@@ -1307,7 +1278,7 @@
+ 				break;
+ 			default:
+ 				printk(KERN_NOTICE
+-					"arcmsr%d: scsi id=%d lun=%d"
++					"arcmsr%d: scsi id = %d lun = %d"
+ 					" polling and getting command error done"
+ 					"but got unknown DeviceStatus = 0x%x \n"
+ 					, acb->host->host_no
+@@ -1322,6 +1293,94 @@
+ 		}
+ 	}
+ }
++static void arcmsr_done4_abort_postqueue(struct AdapterControlBlock *acb)
++{
++	int i = 0, found = 0;
++	int id, lun;
++	uint32_t flag_ccb, outbound_intstatus;
++	struct MessageUnit __iomem *reg = acb->pmu;
++	struct CommandControlBlock *ccb;
++	/*clear and abort all outbound posted Q*/
++
++	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
++(i++ < 256)){
++		ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
++(flag_ccb << 5));
++	if (ccb){
++		if ((ccb->acb != acb)||(ccb->startdone != \
++ARCMSR_CCB_START)){
++				printk(KERN_NOTICE "arcmsr%d: polling get \
++an illegal ccb" "command done ccb = '0x%p'""ccboutstandingcount = %d \n",
++					acb->host->host_no, ccb,
++					atomic_read(&acb->ccboutstandingcount));
++				continue;
++			}
++
++			id = ccb->pcmd->device->id;
++			lun = ccb->pcmd->device->lun;
++			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)){
++				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
++					acb->devstate[id][lun] = ARECA_RAID_GOOD;
++				ccb->pcmd->result = DID_OK << 16;
++				arcmsr_ccb_complete(ccb, 1);
++			}
++			else {
++				switch(ccb->arcmsr_cdb.DeviceStatus) {
++				case ARCMSR_DEV_SELECT_TIMEOUT: {
++						acb->devstate[id][lun] = ARECA_RAID_GONE;
++						ccb->pcmd->result = DID_NO_CONNECT << 16;
++						arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				case ARCMSR_DEV_ABORTED:
++
++				case ARCMSR_DEV_INIT_FAIL: {
++						acb->devstate[id][lun] =
++							ARECA_RAID_GONE;
++						ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++				arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				case ARCMSR_DEV_CHECK_CONDITION: {
++						acb->devstate[id][lun] =
++							ARECA_RAID_GOOD;
++						arcmsr_report_sense_info(ccb);
++						arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				default:
++						printk(KERN_NOTICE
++						      "arcmsr%d: scsi id = %d \
++							lun = %d""polling and \
++							getting command error \
++							done""but got unknown \
++							DeviceStatus = 0x%x \n",
++							acb->host->host_no, id,
++					   lun, ccb->arcmsr_cdb.DeviceStatus);
++						acb->devstate[id][lun] =
++								ARECA_RAID_GONE;
++						ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++						arcmsr_ccb_complete(ccb, 1);
++				break;
++			       }
++	}
++		       found = 1;
++	       }
++	}
++	if (found){
++		outbound_intstatus = readl(&reg->outbound_intstatus) & \
++			acb->outbound_int_enable;
++		writel(outbound_intstatus, &reg->outbound_intstatus);
++		/*clear interrupt*/
++	}
++	return;
++}
++
+ 
+ static void arcmsr_iop_init(struct AdapterControlBlock *acb)
+ {
+@@ -1355,7 +1414,6 @@
+ 
+ static void arcmsr_iop_reset(struct AdapterControlBlock *acb)
+ {
+-	struct MessageUnit __iomem *reg = acb->pmu;
+ 	struct CommandControlBlock *ccb;
+ 	uint32_t intmask_org;
+ 	int i = 0;
+@@ -1368,21 +1426,17 @@
+ 		/* disable all outbound interrupt */
+ 		intmask_org = arcmsr_disable_outbound_ints(acb);
+ 		/* clear all outbound posted Q */
+-		for (i = 0; i < ARCMSR_MAX_OUTSTANDING_CMD; i++)
+-			readl(&reg->outbound_queueport);
++		arcmsr_done4_abort_postqueue(acb);
+ 		for (i = 0; i < ARCMSR_MAX_FREECCB_NUM; i++) {
+ 			ccb = acb->pccb_pool[i];
+-			if ((ccb->startdone == ARCMSR_CCB_START) ||
+-				(ccb->startdone == ARCMSR_CCB_ABORTED)) {
++			if (ccb->startdone == ARCMSR_CCB_START) {
+ 				ccb->startdone = ARCMSR_CCB_ABORTED;
+-				ccb->pcmd->result = DID_ABORT << 16;
+-				arcmsr_ccb_complete(ccb, 1);
+ 			}
+ 		}
+ 		/* enable all outbound interrupt */
+ 		arcmsr_enable_outbound_ints(acb, intmask_org);
+ 	}
+-	atomic_set(&acb->ccboutstandingcount, 0);
++
+ }
+ 
+ static int arcmsr_bus_reset(struct scsi_cmnd *cmd)
+@@ -1428,10 +1482,9 @@
+ 	int i = 0;
+ 
+ 	printk(KERN_NOTICE
+-		"arcmsr%d: abort device command of scsi id=%d lun=%d \n",
++		"arcmsr%d: abort device command of scsi id = %d lun = %d \n",
+ 		acb->host->host_no, cmd->device->id, cmd->device->lun);
+ 	acb->num_aborts++;
+-
+ 	/*
+ 	************************************************
+ 	** the all interrupt service routine is locked
+@@ -1492,4 +1545,300 @@
+ 	return buf;
+ }
+ 
++static pci_ers_result_t arcmsr_pci_slot_reset(struct pci_dev *pdev)
++{
++	struct Scsi_Host *host;
++	struct AdapterControlBlock *acb;
++	uint8_t bus, dev_fun;
++	int error;
++
++	error = pci_enable_device(pdev);
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
++	pci_set_master(pdev);
++
++	host = scsi_host_alloc(&arcmsr_scsi_host_template, sizeof \
++(struct AdapterControlBlock));
++	if (!host)
++		return PCI_ERS_RESULT_DISCONNECT;
++	acb = (struct AdapterControlBlock *)host->hostdata;
++	memset(acb, 0, sizeof (struct AdapterControlBlock));
++
++	error = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
++	if (error) {
++		error = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
++		if (error) {
++			printk(KERN_WARNING
++			       "scsi%d: No suitable DMA mask available\n",
++			       host->host_no);
++			return PCI_ERS_RESULT_DISCONNECT;
++		}
++	}
++	bus = pdev->bus->number;
++	dev_fun = pdev->devfn;
++	acb = (struct AdapterControlBlock *) host->hostdata;
++	memset(acb, 0, sizeof(struct AdapterControlBlock));
++	acb->pdev = pdev;
++	acb->host = host;
++	host->max_sectors = ARCMSR_MAX_XFER_SECTORS;
++	host->max_lun = ARCMSR_MAX_TARGETLUN;
++	host->max_id = ARCMSR_MAX_TARGETID;/*16:8*/
++	host->max_cmd_len = 16;    /*this is issue of 64bit LBA, over 2T byte*/
++	host->sg_tablesize = ARCMSR_MAX_SG_ENTRIES;
++	host->can_queue = ARCMSR_MAX_FREECCB_NUM; /* max simultaneous cmds */
++	host->cmd_per_lun = ARCMSR_MAX_CMD_PERLUN;
++	host->this_id = ARCMSR_SCSI_INITIATOR_ID;
++	host->unique_id = (bus << 8) | dev_fun;
++	host->irq = pdev->irq;
++	error = pci_request_regions(pdev, "arcmsr");
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
+ 
++	acb->pmu = ioremap(pci_resource_start(pdev, 0),
++			   pci_resource_len(pdev, 0));
++	if (!acb->pmu) {
++		printk(KERN_NOTICE "arcmsr%d: memory"
++			" mapping region fail \n", acb->host->host_no);
++		return PCI_ERS_RESULT_DISCONNECT;
++	}
++	acb->acb_flags |= (ACB_F_MESSAGE_WQBUFFER_CLEARED |
++			   ACB_F_MESSAGE_RQBUFFER_CLEARED |
++			   ACB_F_MESSAGE_WQBUFFER_READED);
++	acb->acb_flags &= ~ACB_F_SCSISTOPADAPTER;
++	INIT_LIST_HEAD(&acb->ccb_free_list);
++
++	error = arcmsr_alloc_ccb_pool(acb);
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	error = request_irq(pdev->irq, arcmsr_do_interrupt,
++			IRQF_DISABLED | IRQF_SHARED, "arcmsr", acb);
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	arcmsr_iop_init(acb);
++	if (strncmp(acb->firm_version, "V1.42", 5) >= 0)
++	      host->max_sectors = ARCMSR_MAX_XFER_SECTORS_B;
++
++	pci_set_drvdata(pdev, host);
++
++	error = scsi_add_host(host, &pdev->dev);
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	error = arcmsr_alloc_sysfs_attr(acb);
++	if (error)
++		return PCI_ERS_RESULT_DISCONNECT;
++
++	scsi_scan_host(host);
++	return PCI_ERS_RESULT_RECOVERED;
++}
++
++static void arcmsr_pci_ers_need_reset_forepart(struct pci_dev *pdev)
++{
++	struct Scsi_Host *host = pci_get_drvdata(pdev);
++	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
++	struct MessageUnit __iomem *reg = acb->pmu;
++	struct CommandControlBlock *ccb;
++	/*clear and abort all outbound posted Q*/
++	int i = 0, found = 0;
++	int id, lun;
++	uint32_t flag_ccb, outbound_intstatus;
++
++	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
++								(i++ < 256)){
++			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset
++							 + (flag_ccb << 5));
++			if (ccb){
++				if ((ccb->acb != acb)||(ccb->startdone !=
++							ARCMSR_CCB_START)){
++					printk(KERN_NOTICE "arcmsr%d: polling \
++					get an illegal ccb"" command done ccb = '0x%p'"
++					"ccboutstandingcount = %d \n",
++					acb->host->host_no, ccb,
++					atomic_read(&acb->ccboutstandingcount));
++					continue;
++				}
++
++				id = ccb->pcmd->device->id;
++				lun = ccb->pcmd->device->lun;
++				if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR)) {
++					if (acb->devstate[id][lun] ==
++								ARECA_RAID_GONE)
++						acb->devstate[id][lun] =
++								ARECA_RAID_GOOD;
++					ccb->pcmd->result = DID_OK << 16;
++					arcmsr_ccb_complete(ccb, 1);
++				}
++				else {
++					switch(ccb->arcmsr_cdb.DeviceStatus) {
++					case ARCMSR_DEV_SELECT_TIMEOUT: {
++							acb->devstate[id][lun] =
++							ARECA_RAID_GONE;
++							ccb->pcmd->result =
++							DID_NO_CONNECT << 16;
++							arcmsr_ccb_complete(ccb, 1);
++					}
++					break;
++
++					case ARCMSR_DEV_ABORTED:
++
++					case ARCMSR_DEV_INIT_FAIL: {
++							acb->devstate[id][lun] =
++							 ARECA_RAID_GONE;
++							ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++							arcmsr_ccb_complete(ccb, 1);
++					}
++					break;
++
++					case ARCMSR_DEV_CHECK_CONDITION: {
++							acb->devstate[id][lun] =
++							 ARECA_RAID_GOOD;
++							arcmsr_report_sense_info(ccb);
++							arcmsr_ccb_complete(ccb, 1);
++					}
++					break;
++
++					default:
++							printk(KERN_NOTICE
++								"arcmsr%d: scsi \
++								id = %d lun = %d"
++								" polling and \
++								getting command \
++								error done"
++								"but got unknown \
++							DeviceStatus = 0x%x \n"
++							, acb->host->host_no,
++								id, lun,
++						ccb->arcmsr_cdb.DeviceStatus);
++							acb->devstate[id][lun] =
++								ARECA_RAID_GONE;
++							ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++							arcmsr_ccb_complete(ccb, 1);
++					break;
++					}
++				}
++				found = 1;
++			}
++		}
++	if (found){
++		outbound_intstatus = readl(&reg->outbound_intstatus) &
++							acb->outbound_int_enable;
++		writel(outbound_intstatus, &reg->outbound_intstatus);
++		/*clear interrupt*/
++		    }
++	return;
++}
++
++
++static void arcmsr_pci_ers_disconnect_forepart(struct pci_dev *pdev)
++{
++	struct Scsi_Host *host = pci_get_drvdata(pdev);
++	struct AdapterControlBlock *acb = (struct AdapterControlBlock *) host->hostdata;
++	struct MessageUnit __iomem *reg = acb->pmu;
++	struct CommandControlBlock *ccb;
++	/*clear and abort all outbound posted Q*/
++	int i = 0, found = 0;
++	int id, lun;
++	uint32_t flag_ccb, outbound_intstatus;
++
++	while (((flag_ccb = readl(&reg->outbound_queueport)) != 0xFFFFFFFF) &&
++								(i++ < 256)){
++			ccb = (struct CommandControlBlock *)(acb->vir2phy_offset +
++							(flag_ccb << 5));
++			if (ccb){
++				if ((ccb->acb != acb)||(ccb->startdone !=
++							ARCMSR_CCB_START)){
++					printk(KERN_NOTICE
++						"arcmsr%d: polling get an illegal ccb"
++						" command done ccb = '0x%p'"
++						"ccboutstandingcount = %d \n",
++						acb->host->host_no, ccb,
++						atomic_read(&acb->ccboutstandingcount));
++					continue;
++			}
++
++			id = ccb->pcmd->device->id;
++			lun = ccb->pcmd->device->lun;
++			if (!(flag_ccb & ARCMSR_CCBREPLY_FLAG_ERROR))	{
++				if (acb->devstate[id][lun] == ARECA_RAID_GONE)
++					acb->devstate[id][lun] = ARECA_RAID_GOOD;
++				ccb->pcmd->result = DID_OK << 16;
++				arcmsr_ccb_complete(ccb, 1);
++			}
++			else {
++				switch(ccb->arcmsr_cdb.DeviceStatus) {
++				case ARCMSR_DEV_SELECT_TIMEOUT: {
++						acb->devstate[id][lun] =
++								ARECA_RAID_GONE;
++						ccb->pcmd->result =
++							DID_NO_CONNECT << 16;
++						arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				case ARCMSR_DEV_ABORTED:
++
++				case ARCMSR_DEV_INIT_FAIL: {
++						acb->devstate[id][lun] =
++								ARECA_RAID_GONE;
++						ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++						arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				case ARCMSR_DEV_CHECK_CONDITION: {
++						acb->devstate[id][lun] =
++								ARECA_RAID_GOOD;
++						arcmsr_report_sense_info(ccb);
++						arcmsr_ccb_complete(ccb, 1);
++				}
++				break;
++
++				default:
++						printk(KERN_NOTICE "arcmsr%d: \
++							scsi id = %d lun = %d"
++								" polling and \
++						getting command error done"
++								"but got unknown \
++						 DeviceStatus = 0x%x \n"
++								, acb->host->host_no,
++					id, lun, ccb->arcmsr_cdb.DeviceStatus);
++							acb->devstate[id][lun] =
++								ARECA_RAID_GONE;
++							ccb->pcmd->result =
++							DID_BAD_TARGET << 16;
++							arcmsr_ccb_complete(ccb, 1);
++				break;
++				}
++			}
++			found = 1;
++		}
++	}
++	if (found){
++		outbound_intstatus = readl(&reg->outbound_intstatus) &
++						acb->outbound_int_enable;
++		writel(outbound_intstatus, &reg->outbound_intstatus);
++		/*clear interrupt*/
++	}
++	return;
++}
++
++static pci_ers_result_t arcmsr_pci_error_detected(struct pci_dev *pdev,
++						pci_channel_state_t state)
++{
++	switch (state) {
++	case pci_channel_io_frozen:
++			arcmsr_pci_ers_need_reset_forepart(pdev);
++			return PCI_ERS_RESULT_NEED_RESET;
++	case pci_channel_io_perm_failure:
++			arcmsr_pci_ers_disconnect_forepart(pdev);
++			return PCI_ERS_RESULT_DISCONNECT;
++			break;
++	default:
++			return PCI_ERS_RESULT_NEED_RESET;
++	}
++}
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.c linux-2.6.22-try2/drivers/scsi/bvme6000.c
+--- linux-2.6.22-570/drivers/scsi/bvme6000.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/bvme6000.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,76 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux.
+- *
+- * Based on work by Alan Hourihane
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-#include <linux/zorro.h>
+-
+-#include <asm/setup.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/bvme6000hw.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "bvme6000.h"
+-
+-#include<linux/stat.h>
+-
+-
+-int bvme6000_scsi_detect(struct scsi_host_template *tpnt)
+-{
+-    static unsigned char called = 0;
+-    int clock;
+-    long long options;
+-
+-    if (called)
+-	return 0;
+-    if (!MACH_IS_BVME6000)
+-	return 0;
+-
+-    tpnt->proc_name = "BVME6000";
+-
+-    options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT;
+-
+-    clock = 40000000;	/* 66MHz SCSI Clock */
+-
+-    ncr53c7xx_init(tpnt, 0, 710, (unsigned long)BVME_NCR53C710_BASE,
+-			0, BVME_IRQ_SCSI, DMA_NONE,
+-			options, clock);
+-    called = 1;
+-    return 1;
+-}
+-
+-static int bvme6000_scsi_release(struct Scsi_Host *shost)
+-{
+-	if (shost->irq)
+-		free_irq(shost->irq, NULL);
+-	if (shost->dma_channel != 0xff)
+-		free_dma(shost->dma_channel);
+-	if (shost->io_port && shost->n_io_port)
+-		release_region(shost->io_port, shost->n_io_port);
+-	scsi_unregister(shost);
+-	return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+-	.name			= "BVME6000 NCR53c710 SCSI",
+-	.detect			= bvme6000_scsi_detect,
+-	.release		= bvme6000_scsi_release,
+-	.queuecommand		= NCR53c7xx_queue_command,
+-	.abort			= NCR53c7xx_abort,
+-	.reset			= NCR53c7xx_reset,
+-	.can_queue		= 24,
+-	.this_id		= 7,
+-	.sg_tablesize		= 63,
+-	.cmd_per_lun		= 3,
+-	.use_clustering		= DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000.h linux-2.6.22-try2/drivers/scsi/bvme6000.h
+--- linux-2.6.22-570/drivers/scsi/bvme6000.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/bvme6000.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,24 +0,0 @@
+-#ifndef BVME6000_SCSI_H
+-#define BVME6000_SCSI_H
+-
+-#include <linux/types.h>
+-
+-int bvme6000_scsi_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* BVME6000_SCSI_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c linux-2.6.22-try2/drivers/scsi/bvme6000_scsi.c
+--- linux-2.6.22-570/drivers/scsi/bvme6000_scsi.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/bvme6000_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,135 @@
++/*
++ * Detection routine for the NCR53c710 based BVME6000 SCSI Controllers for Linux.
++ *
++ * Based on work by Alan Hourihane and Kars de Jong
++ *
++ * Rewritten to use 53c700.c by Richard Hirst <richard@sleepie.demon.co.uk>
++ */
++
++#include <linux/module.h>
++#include <linux/blkdev.h>
++#include <linux/device.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/bvme6000hw.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Richard Hirst <richard@sleepie.demon.co.uk>");
++MODULE_DESCRIPTION("BVME6000 NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++static struct scsi_host_template bvme6000_scsi_driver_template = {
++	.name			= "BVME6000 NCR53c710 SCSI",
++	.proc_name		= "BVME6000",
++	.this_id		= 7,
++	.module			= THIS_MODULE,
++};
++
++static struct platform_device *bvme6000_scsi_device;
++
++static __devinit int
++bvme6000_probe(struct device *dev)
++{
++	struct Scsi_Host * host = NULL;
++	struct NCR_700_Host_Parameters *hostdata;
++
++	if (!MACH_IS_BVME6000)
++		goto out;
++
++	hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++	if (hostdata == NULL) {
++		printk(KERN_ERR "bvme6000-scsi: "
++				"Failed to allocate host data\n");
++		goto out;
++	}
++	memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++	/* Fill in the required pieces of hostdata */
++	hostdata->base = (void __iomem *)BVME_NCR53C710_BASE;
++	hostdata->clock = 40;	/* XXX - depends on the CPU clock! */
++	hostdata->chip710 = 1;
++	hostdata->dmode_extra = DMODE_FC2;
++	hostdata->dcntl_extra = EA_710;
++	hostdata->ctest7_extra = CTEST7_TT1;
++
++	/* and register the chip */
++	host = NCR_700_detect(&bvme6000_scsi_driver_template, hostdata, dev);
++	if (!host) {
++		printk(KERN_ERR "bvme6000-scsi: No host detected; "
++				"board configuration problem?\n");
++		goto out_free;
++	}
++	host->base = BVME_NCR53C710_BASE;
++	host->this_id = 7;
++	host->irq = BVME_IRQ_SCSI;
++	if (request_irq(BVME_IRQ_SCSI, NCR_700_intr, 0, "bvme6000-scsi",
++			host)) {
++		printk(KERN_ERR "bvme6000-scsi: request_irq failed\n");
++		goto out_put_host;
++	}
++
++	scsi_scan_host(host);
++
++	return 0;
++
++ out_put_host:
++	scsi_host_put(host);
++ out_free:
++	kfree(hostdata);
++ out:
++	return -ENODEV;
++}
++
++static __devexit int
++bvme6000_device_remove(struct device *dev)
++{
++	struct Scsi_Host *host = dev_to_shost(dev);
++	struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++	scsi_remove_host(host);
++	NCR_700_release(host);
++	kfree(hostdata);
++	free_irq(host->irq, host);
++
++	return 0;
++}
++
++static struct device_driver bvme6000_scsi_driver = {
++	.name	= "bvme6000-scsi",
++	.bus	= &platform_bus_type,
++	.probe	= bvme6000_probe,
++	.remove	= __devexit_p(bvme6000_device_remove),
++};
++
++static int __init bvme6000_scsi_init(void)
++{
++	int err;
++
++	err = driver_register(&bvme6000_scsi_driver);
++	if (err)
++		return err;
++
++	bvme6000_scsi_device = platform_device_register_simple("bvme6000-scsi",
++							       -1, NULL, 0);
++	if (IS_ERR(bvme6000_scsi_device)) {
++		driver_unregister(&bvme6000_scsi_driver);
++		return PTR_ERR(bvme6000_scsi_device);
++	}
++
++	return 0;
++}
++
++static void __exit bvme6000_scsi_exit(void)
++{
++	platform_device_unregister(bvme6000_scsi_device);
++	driver_unregister(&bvme6000_scsi_driver);
++}
++
++module_init(bvme6000_scsi_init);
++module_exit(bvme6000_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/dpt_i2o.c linux-2.6.22-try2/drivers/scsi/dpt_i2o.c
+--- linux-2.6.22-570/drivers/scsi/dpt_i2o.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/dpt_i2o.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2078,12 +2078,13 @@
+ 	u32 *lenptr;
+ 	int direction;
+ 	int scsidir;
++	int nseg;
+ 	u32 len;
+ 	u32 reqlen;
+ 	s32 rcode;
+ 
+ 	memset(msg, 0 , sizeof(msg));
+-	len = cmd->request_bufflen;
++	len = scsi_bufflen(cmd);
+ 	direction = 0x00000000;	
+ 	
+ 	scsidir = 0x00000000;			// DATA NO XFER
+@@ -2140,21 +2141,21 @@
+ 	lenptr=mptr++;		/* Remember me - fill in when we know */
+ 	reqlen = 14;		// SINGLE SGE
+ 	/* Now fill in the SGList and command */
+-	if(cmd->use_sg) {
+-		struct scatterlist *sg = (struct scatterlist *)cmd->request_buffer;
+-		int sg_count = pci_map_sg(pHba->pDev, sg, cmd->use_sg,
+-				cmd->sc_data_direction);
+ 
++	nseg = scsi_dma_map(cmd);
++	BUG_ON(nseg < 0);
++	if (nseg) {
++		struct scatterlist *sg;
+ 
+ 		len = 0;
+-		for(i = 0 ; i < sg_count; i++) {
++		scsi_for_each_sg(cmd, sg, nseg, i) {
+ 			*mptr++ = direction|0x10000000|sg_dma_len(sg);
+ 			len+=sg_dma_len(sg);
+ 			*mptr++ = sg_dma_address(sg);
+-			sg++;
+-		}
+ 		/* Make this an end of list */
+-		mptr[-2] = direction|0xD0000000|sg_dma_len(sg-1);
++			if (i == nseg - 1)
++				mptr[-2] = direction|0xD0000000|sg_dma_len(sg);
++		}
+ 		reqlen = mptr - msg;
+ 		*lenptr = len;
+ 		
+@@ -2163,16 +2164,8 @@
+ 				len, cmd->underflow);
+ 		}
+ 	} else {
+-		*lenptr = len = cmd->request_bufflen;
+-		if(len == 0) {
++		*lenptr = len = 0;
+ 			reqlen = 12;
+-		} else {
+-			*mptr++ = 0xD0000000|direction|cmd->request_bufflen;
+-			*mptr++ = pci_map_single(pHba->pDev,
+-				cmd->request_buffer,
+-				cmd->request_bufflen,
+-				cmd->sc_data_direction);
+-		}
+ 	}
+ 	
+ 	/* Stick the headers on */
+@@ -2232,7 +2225,7 @@
+ 	hba_status = detailed_status >> 8;
+ 
+ 	// calculate resid for sg 
+-	cmd->resid = cmd->request_bufflen - readl(reply+5);
++	scsi_set_resid(cmd, scsi_bufflen(cmd) - readl(reply+5));
+ 
+ 	pHba = (adpt_hba*) cmd->device->host->hostdata[0];
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/eata.c linux-2.6.22-try2/drivers/scsi/eata.c
+--- linux-2.6.22-570/drivers/scsi/eata.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/eata.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1609,8 +1609,9 @@
+ 
+ static void map_dma(unsigned int i, struct hostdata *ha)
+ {
+-	unsigned int k, count, pci_dir;
+-	struct scatterlist *sgpnt;
++	unsigned int k, pci_dir;
++	int count;
++	struct scatterlist *sg;
+ 	struct mscp *cpp;
+ 	struct scsi_cmnd *SCpnt;
+ 
+@@ -1625,38 +1626,19 @@
+ 
+ 	cpp->sense_len = sizeof SCpnt->sense_buffer;
+ 
+-	if (!SCpnt->use_sg) {
+-
+-		/* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */
+-		if (!SCpnt->request_bufflen)
+-			pci_dir = PCI_DMA_BIDIRECTIONAL;
+-
+-		if (SCpnt->request_buffer)
+-			cpp->data_address = H2DEV(pci_map_single(ha->pdev,
+-								 SCpnt->
+-								 request_buffer,
+-								 SCpnt->
+-								 request_bufflen,
+-								 pci_dir));
+-
+-		cpp->data_len = H2DEV(SCpnt->request_bufflen);
+-		return;
+-	}
+-
+-	sgpnt = (struct scatterlist *)SCpnt->request_buffer;
+-	count = pci_map_sg(ha->pdev, sgpnt, SCpnt->use_sg, pci_dir);
+-
+-	for (k = 0; k < count; k++) {
+-		cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k]));
+-		cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k]));
++	count = scsi_dma_map(SCpnt);
++	BUG_ON(count < 0);
++	scsi_for_each_sg(SCpnt, sg, count, k) {
++		cpp->sglist[k].address = H2DEV(sg_dma_address(sg));
++		cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg));
+ 	}
+ 
+ 	cpp->sg = 1;
+ 	cpp->data_address = H2DEV(pci_map_single(ha->pdev, cpp->sglist,
+-						 SCpnt->use_sg *
++						 scsi_sg_count(SCpnt) *
+ 						 sizeof(struct sg_list),
+ 						 pci_dir));
+-	cpp->data_len = H2DEV((SCpnt->use_sg * sizeof(struct sg_list)));
++	cpp->data_len = H2DEV((scsi_sg_count(SCpnt) * sizeof(struct sg_list)));
+ }
+ 
+ static void unmap_dma(unsigned int i, struct hostdata *ha)
+@@ -1673,9 +1655,7 @@
+ 		pci_unmap_single(ha->pdev, DEV2H(cpp->sense_addr),
+ 				 DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+ 
+-	if (SCpnt->use_sg)
+-		pci_unmap_sg(ha->pdev, SCpnt->request_buffer, SCpnt->use_sg,
+-			     pci_dir);
++	scsi_dma_unmap(SCpnt);
+ 
+ 	if (!DEV2H(cpp->data_len))
+ 		pci_dir = PCI_DMA_BIDIRECTIONAL;
+@@ -1700,9 +1680,9 @@
+ 					    DEV2H(cpp->sense_len),
+ 					    PCI_DMA_FROMDEVICE);
+ 
+-	if (SCpnt->use_sg)
+-		pci_dma_sync_sg_for_cpu(ha->pdev, SCpnt->request_buffer,
+-					SCpnt->use_sg, pci_dir);
++	if (scsi_sg_count(SCpnt))
++		pci_dma_sync_sg_for_cpu(ha->pdev, scsi_sglist(SCpnt),
++					scsi_sg_count(SCpnt), pci_dir);
+ 
+ 	if (!DEV2H(cpp->data_len))
+ 		pci_dir = PCI_DMA_BIDIRECTIONAL;
+diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.c linux-2.6.22-try2/drivers/scsi/esp_scsi.c
+--- linux-2.6.22-570/drivers/scsi/esp_scsi.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/esp_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -324,17 +324,14 @@
+ static void esp_map_dma(struct esp *esp, struct scsi_cmnd *cmd)
+ {
+ 	struct esp_cmd_priv *spriv = ESP_CMD_PRIV(cmd);
+-	struct scatterlist *sg = cmd->request_buffer;
++	struct scatterlist *sg = scsi_sglist(cmd);
+ 	int dir = cmd->sc_data_direction;
+ 	int total, i;
+ 
+ 	if (dir == DMA_NONE)
+ 		return;
+ 
+-	BUG_ON(cmd->use_sg == 0);
+-
+-	spriv->u.num_sg = esp->ops->map_sg(esp, sg,
+-					   cmd->use_sg, dir);
++	spriv->u.num_sg = esp->ops->map_sg(esp, sg, scsi_sg_count(cmd), dir);
+ 	spriv->cur_residue = sg_dma_len(sg);
+ 	spriv->cur_sg = sg;
+ 
+@@ -407,8 +404,7 @@
+ 	if (dir == DMA_NONE)
+ 		return;
+ 
+-	esp->ops->unmap_sg(esp, cmd->request_buffer,
+-			   spriv->u.num_sg, dir);
++	esp->ops->unmap_sg(esp, scsi_sglist(cmd), spriv->u.num_sg, dir);
+ }
+ 
+ static void esp_save_pointers(struct esp *esp, struct esp_cmd_entry *ent)
+@@ -921,7 +917,7 @@
+ static int esp_queuecommand(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
+ {
+ 	struct scsi_device *dev = cmd->device;
+-	struct esp *esp = host_to_esp(dev->host);
++	struct esp *esp = shost_priv(dev->host);
+ 	struct esp_cmd_priv *spriv;
+ 	struct esp_cmd_entry *ent;
+ 
+@@ -2358,7 +2354,7 @@
+ 
+ static int esp_slave_alloc(struct scsi_device *dev)
+ {
+-	struct esp *esp = host_to_esp(dev->host);
++	struct esp *esp = shost_priv(dev->host);
+ 	struct esp_target_data *tp = &esp->target[dev->id];
+ 	struct esp_lun_data *lp;
+ 
+@@ -2382,7 +2378,7 @@
+ 
+ static int esp_slave_configure(struct scsi_device *dev)
+ {
+-	struct esp *esp = host_to_esp(dev->host);
++	struct esp *esp = shost_priv(dev->host);
+ 	struct esp_target_data *tp = &esp->target[dev->id];
+ 	int goal_tags, queue_depth;
+ 
+@@ -2424,7 +2420,7 @@
+ 
+ static int esp_eh_abort_handler(struct scsi_cmnd *cmd)
+ {
+-	struct esp *esp = host_to_esp(cmd->device->host);
++	struct esp *esp = shost_priv(cmd->device->host);
+ 	struct esp_cmd_entry *ent, *tmp;
+ 	struct completion eh_done;
+ 	unsigned long flags;
+@@ -2540,7 +2536,7 @@
+ 
+ static int esp_eh_bus_reset_handler(struct scsi_cmnd *cmd)
+ {
+-	struct esp *esp = host_to_esp(cmd->device->host);
++	struct esp *esp = shost_priv(cmd->device->host);
+ 	struct completion eh_reset;
+ 	unsigned long flags;
+ 
+@@ -2576,7 +2572,7 @@
+ /* All bets are off, reset the entire device.  */
+ static int esp_eh_host_reset_handler(struct scsi_cmnd *cmd)
+ {
+-	struct esp *esp = host_to_esp(cmd->device->host);
++	struct esp *esp = shost_priv(cmd->device->host);
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(esp->host->host_lock, flags);
+@@ -2616,7 +2612,7 @@
+ 
+ static void esp_get_signalling(struct Scsi_Host *host)
+ {
+-	struct esp *esp = host_to_esp(host);
++	struct esp *esp = shost_priv(host);
+ 	enum spi_signal_type type;
+ 
+ 	if (esp->flags & ESP_FLAG_DIFFERENTIAL)
+@@ -2630,7 +2626,7 @@
+ static void esp_set_offset(struct scsi_target *target, int offset)
+ {
+ 	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+-	struct esp *esp = host_to_esp(host);
++	struct esp *esp = shost_priv(host);
+ 	struct esp_target_data *tp = &esp->target[target->id];
+ 
+ 	tp->nego_goal_offset = offset;
+@@ -2640,7 +2636,7 @@
+ static void esp_set_period(struct scsi_target *target, int period)
+ {
+ 	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+-	struct esp *esp = host_to_esp(host);
++	struct esp *esp = shost_priv(host);
+ 	struct esp_target_data *tp = &esp->target[target->id];
+ 
+ 	tp->nego_goal_period = period;
+@@ -2650,7 +2646,7 @@
+ static void esp_set_width(struct scsi_target *target, int width)
+ {
+ 	struct Scsi_Host *host = dev_to_shost(target->dev.parent);
+-	struct esp *esp = host_to_esp(host);
++	struct esp *esp = shost_priv(host);
+ 	struct esp_target_data *tp = &esp->target[target->id];
+ 
+ 	tp->nego_goal_width = (width ? 1 : 0);
+diff -Nurb linux-2.6.22-570/drivers/scsi/esp_scsi.h linux-2.6.22-try2/drivers/scsi/esp_scsi.h
+--- linux-2.6.22-570/drivers/scsi/esp_scsi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/esp_scsi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -517,8 +517,6 @@
+ 	struct sbus_dma		*dma;
+ };
+ 
+-#define host_to_esp(host)	((struct esp *)(host)->hostdata)
+-
+ /* A front-end driver for the ESP chip should do the following in
+  * it's device probe routine:
+  * 1) Allocate the host and private area using scsi_host_alloc()
+diff -Nurb linux-2.6.22-570/drivers/scsi/fdomain.c linux-2.6.22-try2/drivers/scsi/fdomain.c
+--- linux-2.6.22-570/drivers/scsi/fdomain.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/fdomain.c	2007-12-19 15:29:23.000000000 -0500
+@@ -410,6 +410,8 @@
+ static char * fdomain = NULL;
+ module_param(fdomain, charp, 0);
+ 
++#ifndef PCMCIA
++
+ static unsigned long addresses[] = {
+    0xc8000,
+    0xca000,
+@@ -426,6 +428,8 @@
+ 
+ static unsigned short ints[] = { 3, 5, 10, 11, 12, 14, 15, 0 };
+ 
++#endif /* !PCMCIA */
++
+ /*
+ 
+   READ THIS BEFORE YOU ADD A SIGNATURE!
+@@ -458,6 +462,8 @@
+ 
+ */
+ 
++#ifndef PCMCIA
++
+ static struct signature {
+    const char *signature;
+    int  sig_offset;
+@@ -503,6 +509,8 @@
+ 
+ #define SIGNATURE_COUNT ARRAY_SIZE(signatures)
+ 
++#endif /* !PCMCIA */
++
+ static void print_banner( struct Scsi_Host *shpnt )
+ {
+    if (!shpnt) return;		/* This won't ever happen */
+@@ -633,6 +641,8 @@
+    return 0;
+ }
+ 
++#ifndef PCMCIA
++
+ /* fdomain_get_irq assumes that we have a valid MCA ID for a
+    TMC-1660/TMC-1680 Future Domain board.  Now, check to be sure the
+    bios_base matches these ports.  If someone was unlucky enough to have
+@@ -667,7 +677,6 @@
+ 
+ static int fdomain_isa_detect( int *irq, int *iobase )
+ {
+-#ifndef PCMCIA
+    int i, j;
+    int base = 0xdeadbeef;
+    int flag = 0;
+@@ -786,11 +795,22 @@
+    *iobase = base;
+ 
+    return 1;			/* success */
+-#else
++}
++
++#else /* PCMCIA */
++
++static int fdomain_isa_detect( int *irq, int *iobase )
++{
++	if (irq)
++		*irq = 0;
++	if (iobase)
++		*iobase = 0;
+    return 0;
+-#endif
+ }
+ 
++#endif /* !PCMCIA */
++
++
+ /* PCI detection function: int fdomain_pci_bios_detect(int* irq, int*
+    iobase) This function gets the Interrupt Level and I/O base address from
+    the PCI configuration registers. */
+@@ -1345,16 +1365,15 @@
+ 
+ #if ERRORS_ONLY
+       if (current_SC->cmnd[0] == REQUEST_SENSE && !current_SC->SCp.Status) {
+-	 if ((unsigned char)(*((char *)current_SC->request_buffer+2)) & 0x0f) {
++	      char *buf = scsi_sglist(current_SC);
++	 if ((unsigned char)(*(buf + 2)) & 0x0f) {
+ 	    unsigned char key;
+ 	    unsigned char code;
+ 	    unsigned char qualifier;
+ 
+-	    key = (unsigned char)(*((char *)current_SC->request_buffer + 2))
+-		  & 0x0f;
+-	    code = (unsigned char)(*((char *)current_SC->request_buffer + 12));
+-	    qualifier = (unsigned char)(*((char *)current_SC->request_buffer
+-					  + 13));
++	    key = (unsigned char)(*(buf + 2)) & 0x0f;
++	    code = (unsigned char)(*(buf + 12));
++	    qualifier = (unsigned char)(*(buf + 13));
+ 
+ 	    if (key != UNIT_ATTENTION
+ 		&& !(key == NOT_READY
+@@ -1405,8 +1424,8 @@
+    printk( "queue: target = %d cmnd = 0x%02x pieces = %d size = %u\n",
+ 	   SCpnt->target,
+ 	   *(unsigned char *)SCpnt->cmnd,
+-	   SCpnt->use_sg,
+-	   SCpnt->request_bufflen );
++	   scsi_sg_count(SCpnt),
++	   scsi_bufflen(SCpnt));
+ #endif
+ 
+    fdomain_make_bus_idle();
+@@ -1416,20 +1435,19 @@
+ 
+    /* Initialize static data */
+ 
+-   if (current_SC->use_sg) {
+-      current_SC->SCp.buffer =
+-	    (struct scatterlist *)current_SC->request_buffer;
+-      current_SC->SCp.ptr              = page_address(current_SC->SCp.buffer->page) + current_SC->SCp.buffer->offset;
++   if (scsi_sg_count(current_SC)) {
++	   current_SC->SCp.buffer = scsi_sglist(current_SC);
++	   current_SC->SCp.ptr = page_address(current_SC->SCp.buffer->page)
++		   + current_SC->SCp.buffer->offset;
+       current_SC->SCp.this_residual    = current_SC->SCp.buffer->length;
+-      current_SC->SCp.buffers_residual = current_SC->use_sg - 1;
++	   current_SC->SCp.buffers_residual = scsi_sg_count(current_SC) - 1;
+    } else {
+-      current_SC->SCp.ptr              = (char *)current_SC->request_buffer;
+-      current_SC->SCp.this_residual    = current_SC->request_bufflen;
++	   current_SC->SCp.ptr              = 0;
++	   current_SC->SCp.this_residual    = 0;
+       current_SC->SCp.buffer           = NULL;
+       current_SC->SCp.buffers_residual = 0;
+    }
+ 	 
+-   
+    current_SC->SCp.Status              = 0;
+    current_SC->SCp.Message             = 0;
+    current_SC->SCp.have_data_in        = 0;
+@@ -1472,8 +1490,8 @@
+ 	   SCpnt->SCp.phase,
+ 	   SCpnt->device->id,
+ 	   *(unsigned char *)SCpnt->cmnd,
+-	   SCpnt->use_sg,
+-	   SCpnt->request_bufflen );
++	   scsi_sg_count(SCpnt),
++	   scsi_bufflen(SCpnt));
+    printk( "sent_command = %d, have_data_in = %d, timeout = %d\n",
+ 	   SCpnt->SCp.sent_command,
+ 	   SCpnt->SCp.have_data_in,
+diff -Nurb linux-2.6.22-570/drivers/scsi/gdth.c linux-2.6.22-try2/drivers/scsi/gdth.c
+--- linux-2.6.22-570/drivers/scsi/gdth.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/gdth.c	2007-12-19 15:29:23.000000000 -0500
+@@ -876,7 +876,7 @@
+ /* Vortex only makes RAID controllers.
+  * We do not really want to specify all 550 ids here, so wildcard match.
+  */
+-static struct pci_device_id gdthtable[] __attribute_used__ = {
++static struct pci_device_id gdthtable[] __maybe_unused = {
+     {PCI_VENDOR_ID_VORTEX,PCI_ANY_ID,PCI_ANY_ID, PCI_ANY_ID},
+     {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC,PCI_ANY_ID,PCI_ANY_ID}, 
+     {PCI_VENDOR_ID_INTEL,PCI_DEVICE_ID_INTEL_SRC_XSCALE,PCI_ANY_ID,PCI_ANY_ID}, 
+@@ -1955,7 +1955,7 @@
+         for (j = 0; j < 12; ++j) 
+             rtc[j] = CMOS_READ(j);
+     } while (rtc[0] != CMOS_READ(0));
+-    spin_lock_irqrestore(&rtc_lock, flags);
++    spin_unlock_irqrestore(&rtc_lock, flags);
+     TRACE2(("gdth_search_drives(): RTC: %x/%x/%x\n",*(ulong32 *)&rtc[0],
+             *(ulong32 *)&rtc[4], *(ulong32 *)&rtc[8]));
+     /* 3. send to controller firmware */
+diff -Nurb linux-2.6.22-570/drivers/scsi/hptiop.c linux-2.6.22-try2/drivers/scsi/hptiop.c
+--- linux-2.6.22-570/drivers/scsi/hptiop.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/hptiop.c	2007-12-19 15:29:23.000000000 -0500
+@@ -339,20 +339,8 @@
+ 
+ 	scp = hba->reqs[tag].scp;
+ 
+-	if (HPT_SCP(scp)->mapped) {
+-		if (scp->use_sg)
+-			pci_unmap_sg(hba->pcidev,
+-				(struct scatterlist *)scp->request_buffer,
+-				scp->use_sg,
+-				scp->sc_data_direction
+-			);
+-		else
+-			pci_unmap_single(hba->pcidev,
+-				HPT_SCP(scp)->dma_handle,
+-				scp->request_bufflen,
+-				scp->sc_data_direction
+-			);
+-	}
++	if (HPT_SCP(scp)->mapped)
++		scsi_dma_unmap(scp);
+ 
+ 	switch (le32_to_cpu(req->header.result)) {
+ 	case IOP_RESULT_SUCCESS:
+@@ -449,43 +437,26 @@
+ {
+ 	struct Scsi_Host *host = scp->device->host;
+ 	struct hptiop_hba *hba = (struct hptiop_hba *)host->hostdata;
+-	struct scatterlist *sglist = (struct scatterlist *)scp->request_buffer;
++	struct scatterlist *sg;
++	int idx, nseg;
+ 
+-	/*
+-	 * though we'll not get non-use_sg fields anymore,
+-	 * keep use_sg checking anyway
+-	 */
+-	if (scp->use_sg) {
+-		int idx;
+-
+-		HPT_SCP(scp)->sgcnt = pci_map_sg(hba->pcidev,
+-				sglist, scp->use_sg,
+-				scp->sc_data_direction);
++	nseg = scsi_dma_map(scp);
++	BUG_ON(nseg < 0);
++	if (!nseg)
++		return 0;
++
++	HPT_SCP(scp)->sgcnt = nseg;
+ 		HPT_SCP(scp)->mapped = 1;
++
+ 		BUG_ON(HPT_SCP(scp)->sgcnt > hba->max_sg_descriptors);
+ 
+-		for (idx = 0; idx < HPT_SCP(scp)->sgcnt; idx++) {
+-			psg[idx].pci_address =
+-				cpu_to_le64(sg_dma_address(&sglist[idx]));
+-			psg[idx].size = cpu_to_le32(sg_dma_len(&sglist[idx]));
++	scsi_for_each_sg(scp, sg, HPT_SCP(scp)->sgcnt, idx) {
++		psg[idx].pci_address = cpu_to_le64(sg_dma_address(sg));
++		psg[idx].size = cpu_to_le32(sg_dma_len(sg));
+ 			psg[idx].eot = (idx == HPT_SCP(scp)->sgcnt - 1) ?
+ 				cpu_to_le32(1) : 0;
+ 		}
+-
+ 		return HPT_SCP(scp)->sgcnt;
+-	} else {
+-		HPT_SCP(scp)->dma_handle = pci_map_single(
+-				hba->pcidev,
+-				scp->request_buffer,
+-				scp->request_bufflen,
+-				scp->sc_data_direction
+-			);
+-		HPT_SCP(scp)->mapped = 1;
+-		psg->pci_address = cpu_to_le64(HPT_SCP(scp)->dma_handle);
+-		psg->size = cpu_to_le32(scp->request_bufflen);
+-		psg->eot = cpu_to_le32(1);
+-		return 1;
+-	}
+ }
+ 
+ static int hptiop_queuecommand(struct scsi_cmnd *scp,
+@@ -530,9 +501,8 @@
+ 	req = (struct hpt_iop_request_scsi_command *)_req->req_virt;
+ 
+ 	/* build S/G table */
+-	if (scp->request_bufflen)
+ 		sg_count = hptiop_buildsgl(scp, req->sg_list);
+-	else
++	if (!sg_count)
+ 		HPT_SCP(scp)->mapped = 0;
+ 
+ 	req->header.flags = cpu_to_le32(IOP_REQUEST_FLAG_OUTPUT_CONTEXT);
+@@ -541,7 +511,7 @@
+ 	req->header.context = cpu_to_le32(IOPMU_QUEUE_ADDR_HOST_BIT |
+ 							(u32)_req->index);
+ 	req->header.context_hi32 = 0;
+-	req->dataxfer_length = cpu_to_le32(scp->request_bufflen);
++	req->dataxfer_length = cpu_to_le32(scsi_bufflen(scp));
+ 	req->channel = scp->device->channel;
+ 	req->target = scp->device->id;
+ 	req->lun = scp->device->lun;
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.c linux-2.6.22-try2/drivers/scsi/ibmmca.c
+--- linux-2.6.22-570/drivers/scsi/ibmmca.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ibmmca.c	2007-12-19 15:29:23.000000000 -0500
+@@ -31,14 +31,21 @@
+ #include <linux/mca.h>
+ #include <linux/spinlock.h>
+ #include <linux/init.h>
+-#include <linux/mca-legacy.h>
+ 
+ #include <asm/system.h>
+ #include <asm/io.h>
+ 
+ #include "scsi.h"
+ #include <scsi/scsi_host.h>
+-#include "ibmmca.h"
++
++/* Common forward declarations for all Linux-versions: */
++static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *));
++static int ibmmca_abort (Scsi_Cmnd *);
++static int ibmmca_host_reset (Scsi_Cmnd *);
++static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *);
++static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout);
++
++
+ 
+ /* current version of this driver-source: */
+ #define IBMMCA_SCSI_DRIVER_VERSION "4.0b-ac"
+@@ -65,11 +72,11 @@
+ #define IM_DEBUG_CMD_DEVICE	TYPE_TAPE
+ 
+ /* relative addresses of hardware registers on a subsystem */
+-#define IM_CMD_REG(hi)	(hosts[(hi)]->io_port)	/*Command Interface, (4 bytes long) */
+-#define IM_ATTN_REG(hi)	(hosts[(hi)]->io_port+4)	/*Attention (1 byte) */
+-#define IM_CTR_REG(hi)	(hosts[(hi)]->io_port+5)	/*Basic Control (1 byte) */
+-#define IM_INTR_REG(hi)	(hosts[(hi)]->io_port+6)	/*Interrupt Status (1 byte, r/o) */
+-#define IM_STAT_REG(hi)	(hosts[(hi)]->io_port+7)	/*Basic Status (1 byte, read only) */
++#define IM_CMD_REG(h)	((h)->io_port)	/*Command Interface, (4 bytes long) */
++#define IM_ATTN_REG(h)	((h)->io_port+4)	/*Attention (1 byte) */
++#define IM_CTR_REG(h)	((h)->io_port+5)	/*Basic Control (1 byte) */
++#define IM_INTR_REG(h)	((h)->io_port+6)	/*Interrupt Status (1 byte, r/o) */
++#define IM_STAT_REG(h)	((h)->io_port+7)	/*Basic Status (1 byte, read only) */
+ 
+ /* basic I/O-port of first adapter */
+ #define IM_IO_PORT	0x3540
+@@ -266,30 +273,36 @@
+     if ((display_mode & LED_ACTIVITY)||(!display_mode)) \
+     outb(inb(PS2_SYS_CTR) & 0x3f, PS2_SYS_CTR); }
+ 
+-/*list of supported subsystems */
+-struct subsys_list_struct {
+-	unsigned short mca_id;
+-	char *description;
+-};
+-
+ /* types of different supported hardware that goes to hostdata special */
+ #define IBM_SCSI2_FW     0
+ #define IBM_7568_WCACHE  1
+ #define IBM_EXP_UNIT     2
+ #define IBM_SCSI_WCACHE  3
+ #define IBM_SCSI         4
++#define IBM_INTEGSCSI	 5
+ 
+ /* other special flags for hostdata structure */
+ #define FORCED_DETECTION         100
+ #define INTEGRATED_SCSI          101
+ 
+ /* List of possible IBM-SCSI-adapters */
+-static struct subsys_list_struct subsys_list[] = {
+-	{0x8efc, "IBM SCSI-2 F/W Adapter"},	/* special = 0 */
+-	{0x8efd, "IBM 7568 Industrial Computer SCSI Adapter w/Cache"},	/* special = 1 */
+-	{0x8ef8, "IBM Expansion Unit SCSI Controller"},	/* special = 2 */
+-	{0x8eff, "IBM SCSI Adapter w/Cache"},	/* special = 3 */
+-	{0x8efe, "IBM SCSI Adapter"},	/* special = 4 */
++static short ibmmca_id_table[] = {
++	0x8efc,
++	0x8efd,
++	0x8ef8,
++	0x8eff,
++	0x8efe,
++	/* No entry for integrated SCSI, that's part of the register */
++	0
++};
++
++static const char *ibmmca_description[] = {
++	"IBM SCSI-2 F/W Adapter",	/* special = 0 */
++	"IBM 7568 Industrial Computer SCSI Adapter w/Cache",	/* special = 1 */
++	"IBM Expansion Unit SCSI Controller",	/* special = 2 */
++	"IBM SCSI Adapter w/Cache",	/* special = 3 */
++	"IBM SCSI Adapter",	/* special = 4 */
++	"IBM Integrated SCSI Controller", /* special = 5 */
+ };
+ 
+ /* Max number of logical devices (can be up from 0 to 14).  15 is the address
+@@ -375,30 +388,30 @@
+ };
+ 
+ /* macros to access host data structure */
+-#define subsystem_pun(hi) (hosts[(hi)]->this_id)
+-#define subsystem_maxid(hi) (hosts[(hi)]->max_id)
+-#define ld(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_ld)
+-#define get_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_ldn)
+-#define get_scsi(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_get_scsi)
+-#define local_checking_phase_flag(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_local_checking_phase_flag)
+-#define got_interrupt(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_got_interrupt)
+-#define stat_result(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_stat_result)
+-#define reset_status(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_reset_status)
+-#define last_scsi_command(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_command)
+-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type)
+-#define last_scsi_blockcount(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_blockcount)
+-#define last_scsi_logical_block(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_logical_block)
+-#define last_scsi_type(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_last_scsi_type)
+-#define next_ldn(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_next_ldn)
+-#define IBM_DS(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_IBM_DS)
+-#define special(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_special)
+-#define subsystem_connector_size(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_connector_size)
+-#define adapter_speed(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_adapter_speed)
+-#define pos2(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[2])
+-#define pos3(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[3])
+-#define pos4(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[4])
+-#define pos5(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[5])
+-#define pos6(hi) (((struct ibmmca_hostdata *) hosts[(hi)]->hostdata)->_pos[6])
++#define subsystem_pun(h) ((h)->this_id)
++#define subsystem_maxid(h) ((h)->max_id)
++#define ld(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_ld)
++#define get_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_ldn)
++#define get_scsi(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_get_scsi)
++#define local_checking_phase_flag(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_local_checking_phase_flag)
++#define got_interrupt(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_got_interrupt)
++#define stat_result(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_stat_result)
++#define reset_status(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_reset_status)
++#define last_scsi_command(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_command)
++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type)
++#define last_scsi_blockcount(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_blockcount)
++#define last_scsi_logical_block(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_logical_block)
++#define last_scsi_type(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_last_scsi_type)
++#define next_ldn(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_next_ldn)
++#define IBM_DS(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_IBM_DS)
++#define special(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_special)
++#define subsystem_connector_size(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_connector_size)
++#define adapter_speed(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_adapter_speed)
++#define pos2(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[2])
++#define pos3(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[3])
++#define pos4(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[4])
++#define pos5(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[5])
++#define pos6(h) (((struct ibmmca_hostdata *) (h)->hostdata)->_pos[6])
+ 
+ /* Define a arbitrary number as subsystem-marker-type. This number is, as
+    described in the ANSI-SCSI-standard, not occupied by other device-types. */
+@@ -459,11 +472,6 @@
+ /*counter of concurrent disk read/writes, to turn on/off disk led */
+ static int disk_rw_in_progress = 0;
+ 
+-/* host information */
+-static int found = 0;
+-static struct Scsi_Host *hosts[IM_MAX_HOSTS + 1] = {
+-	NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL
+-};
+ static unsigned int pos[8];	/* whole pos register-line for diagnosis */
+ /* Taking into account the additions, made by ZP Gu.
+  * This selects now the preset value from the configfile and
+@@ -474,70 +482,68 @@
+ static char ibm_ansi_order = 0;
+ #endif
+ 
+-static void issue_cmd(int, unsigned long, unsigned char);
++static void issue_cmd(struct Scsi_Host *, unsigned long, unsigned char);
+ static void internal_done(Scsi_Cmnd * cmd);
+-static void check_devices(int, int);
+-static int immediate_assign(int, unsigned int, unsigned int, unsigned int, unsigned int);
+-static int immediate_feature(int, unsigned int, unsigned int);
++static void check_devices(struct Scsi_Host *, int);
++static int immediate_assign(struct Scsi_Host *, unsigned int, unsigned int, unsigned int, unsigned int);
++static int immediate_feature(struct Scsi_Host *, unsigned int, unsigned int);
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+-static int immediate_reset(int, unsigned int);
++static int immediate_reset(struct Scsi_Host *, unsigned int);
+ #endif
+-static int device_inquiry(int, int);
+-static int read_capacity(int, int);
+-static int get_pos_info(int);
++static int device_inquiry(struct Scsi_Host *, int);
++static int read_capacity(struct Scsi_Host *, int);
++static int get_pos_info(struct Scsi_Host *);
+ static char *ti_p(int);
+ static char *ti_l(int);
+ static char *ibmrate(unsigned int, int);
+ static int probe_display(int);
+-static int probe_bus_mode(int);
+-static int device_exists(int, int, int *, int *);
+-static struct Scsi_Host *ibmmca_register(struct scsi_host_template *, int, int, int, char *);
++static int probe_bus_mode(struct Scsi_Host *);
++static int device_exists(struct Scsi_Host *, int, int *, int *);
+ static int option_setup(char *);
+ /* local functions needed for proc_info */
+-static int ldn_access_load(int, int);
+-static int ldn_access_total_read_write(int);
++static int ldn_access_load(struct Scsi_Host *, int);
++static int ldn_access_total_read_write(struct Scsi_Host *);
+ 
+ static irqreturn_t interrupt_handler(int irq, void *dev_id)
+ {
+-	int host_index, ihost_index;
+ 	unsigned int intr_reg;
+ 	unsigned int cmd_result;
+ 	unsigned int ldn;
++	unsigned long flags;
+ 	Scsi_Cmnd *cmd;
+ 	int lastSCSI;
+-	struct Scsi_Host *dev = dev_id;
++	struct device *dev = dev_id;
++	struct Scsi_Host *shpnt = dev_get_drvdata(dev);
++
++	spin_lock_irqsave(shpnt->host_lock, flags);
+ 
+-	spin_lock(dev->host_lock);
+-	    /* search for one adapter-response on shared interrupt */
+-	    for (host_index = 0; hosts[host_index] && !(inb(IM_STAT_REG(host_index)) & IM_INTR_REQUEST); host_index++);
+-	/* return if some other device on this IRQ caused the interrupt */
+-	if (!hosts[host_index]) {
+-		spin_unlock(dev->host_lock);
++	if(!(inb(IM_STAT_REG(shpnt)) & IM_INTR_REQUEST)) {
++		spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 		return IRQ_NONE;
+ 	}
+ 
+ 	/* the reset-function already did all the job, even ints got
+ 	   renabled on the subsystem, so just return */
+-	if ((reset_status(host_index) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(host_index) == IM_RESET_FINISHED_OK_NO_INT)) {
+-		reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS;
+-		spin_unlock(dev->host_lock);
++	if ((reset_status(shpnt) == IM_RESET_NOT_IN_PROGRESS_NO_INT) || (reset_status(shpnt) == IM_RESET_FINISHED_OK_NO_INT)) {
++		reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS;
++		spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 		return IRQ_HANDLED;
+ 	}
+ 
+ 	/*must wait for attention reg not busy, then send EOI to subsystem */
+ 	while (1) {
+-		if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++		if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ 			break;
+ 		cpu_relax();
+ 	}
+-	ihost_index = host_index;
++
+ 	/*get command result and logical device */
+-	intr_reg = (unsigned char) (inb(IM_INTR_REG(ihost_index)));
++	intr_reg = (unsigned char) (inb(IM_INTR_REG(shpnt)));
+ 	cmd_result = intr_reg & 0xf0;
+ 	ldn = intr_reg & 0x0f;
+ 	/* get the last_scsi_command here */
+-	lastSCSI = last_scsi_command(ihost_index)[ldn];
+-	outb(IM_EOI | ldn, IM_ATTN_REG(ihost_index));
++	lastSCSI = last_scsi_command(shpnt)[ldn];
++	outb(IM_EOI | ldn, IM_ATTN_REG(shpnt));
+ 	
+ 	/*these should never happen (hw fails, or a local programming bug) */
+ 	if (!global_command_error_excuse) {
+@@ -547,38 +553,38 @@
+ 		case IM_SOFTWARE_SEQUENCING_ERROR:
+ 		case IM_CMD_ERROR:
+ 			printk(KERN_ERR "IBM MCA SCSI: Fatal Subsystem ERROR!\n");
+-			printk(KERN_ERR "              Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(ihost_index)[ldn].scb.enable);
+-			if (ld(ihost_index)[ldn].cmd)
+-				printk("%ld/%ld,", (long) (ld(ihost_index)[ldn].cmd->request_bufflen), (long) (ld(ihost_index)[ldn].scb.sys_buf_length));
++			printk(KERN_ERR "              Last cmd=0x%x, ena=%x, len=", lastSCSI, ld(shpnt)[ldn].scb.enable);
++			if (ld(shpnt)[ldn].cmd)
++				printk("%ld/%ld,", (long) (scsi_bufflen(ld(shpnt)[ldn].cmd)), (long) (ld(shpnt)[ldn].scb.sys_buf_length));
+ 			else
+ 				printk("none,");
+-			if (ld(ihost_index)[ldn].cmd)
+-				printk("Blocksize=%d", ld(ihost_index)[ldn].scb.u2.blk.length);
++			if (ld(shpnt)[ldn].cmd)
++				printk("Blocksize=%d", ld(shpnt)[ldn].scb.u2.blk.length);
+ 			else
+ 				printk("Blocksize=none");
+-			printk(", host=0x%x, ldn=0x%x\n", ihost_index, ldn);
+-			if (ld(ihost_index)[ldn].cmd) {
+-				printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u2.blk.count);
+-				printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(ihost_index)[ldn], ld(ihost_index)[ldn].scb.u1.log_blk_adr);
++			printk(", host=%p, ldn=0x%x\n", shpnt, ldn);
++			if (ld(shpnt)[ldn].cmd) {
++				printk(KERN_ERR "Blockcount=%d/%d\n", last_scsi_blockcount(shpnt)[ldn], ld(shpnt)[ldn].scb.u2.blk.count);
++				printk(KERN_ERR "Logical block=%lx/%lx\n", last_scsi_logical_block(shpnt)[ldn], ld(shpnt)[ldn].scb.u1.log_blk_adr);
+ 			}
+ 			printk(KERN_ERR "Reason given: %s\n", (cmd_result == IM_ADAPTER_HW_FAILURE) ? "HARDWARE FAILURE" : (cmd_result == IM_SOFTWARE_SEQUENCING_ERROR) ? "SOFTWARE SEQUENCING ERROR" : (cmd_result == IM_CMD_ERROR) ? "COMMAND ERROR" : "UNKNOWN");
+ 			/* if errors appear, enter this section to give detailed info */
+ 			printk(KERN_ERR "IBM MCA SCSI: Subsystem Error-Status follows:\n");
+-			printk(KERN_ERR "              Command Type................: %x\n", last_scsi_type(ihost_index)[ldn]);
+-			printk(KERN_ERR "              Attention Register..........: %x\n", inb(IM_ATTN_REG(ihost_index)));
+-			printk(KERN_ERR "              Basic Control Register......: %x\n", inb(IM_CTR_REG(ihost_index)));
++			printk(KERN_ERR "              Command Type................: %x\n", last_scsi_type(shpnt)[ldn]);
++			printk(KERN_ERR "              Attention Register..........: %x\n", inb(IM_ATTN_REG(shpnt)));
++			printk(KERN_ERR "              Basic Control Register......: %x\n", inb(IM_CTR_REG(shpnt)));
+ 			printk(KERN_ERR "              Interrupt Status Register...: %x\n", intr_reg);
+-			printk(KERN_ERR "              Basic Status Register.......: %x\n", inb(IM_STAT_REG(ihost_index)));
+-			if ((last_scsi_type(ihost_index)[ldn] == IM_SCB) || (last_scsi_type(ihost_index)[ldn] == IM_LONG_SCB)) {
+-				printk(KERN_ERR "              SCB-Command.................: %x\n", ld(ihost_index)[ldn].scb.command);
+-				printk(KERN_ERR "              SCB-Enable..................: %x\n", ld(ihost_index)[ldn].scb.enable);
+-				printk(KERN_ERR "              SCB-logical block address...: %lx\n", ld(ihost_index)[ldn].scb.u1.log_blk_adr);
+-				printk(KERN_ERR "              SCB-system buffer address...: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_adr);
+-				printk(KERN_ERR "              SCB-system buffer length....: %lx\n", ld(ihost_index)[ldn].scb.sys_buf_length);
+-				printk(KERN_ERR "              SCB-tsb address.............: %lx\n", ld(ihost_index)[ldn].scb.tsb_adr);
+-				printk(KERN_ERR "              SCB-Chain address...........: %lx\n", ld(ihost_index)[ldn].scb.scb_chain_adr);
+-				printk(KERN_ERR "              SCB-block count.............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.count);
+-				printk(KERN_ERR "              SCB-block length............: %x\n", ld(ihost_index)[ldn].scb.u2.blk.length);
++			printk(KERN_ERR "              Basic Status Register.......: %x\n", inb(IM_STAT_REG(shpnt)));
++			if ((last_scsi_type(shpnt)[ldn] == IM_SCB) || (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB)) {
++				printk(KERN_ERR "              SCB-Command.................: %x\n", ld(shpnt)[ldn].scb.command);
++				printk(KERN_ERR "              SCB-Enable..................: %x\n", ld(shpnt)[ldn].scb.enable);
++				printk(KERN_ERR "              SCB-logical block address...: %lx\n", ld(shpnt)[ldn].scb.u1.log_blk_adr);
++				printk(KERN_ERR "              SCB-system buffer address...: %lx\n", ld(shpnt)[ldn].scb.sys_buf_adr);
++				printk(KERN_ERR "              SCB-system buffer length....: %lx\n", ld(shpnt)[ldn].scb.sys_buf_length);
++				printk(KERN_ERR "              SCB-tsb address.............: %lx\n", ld(shpnt)[ldn].scb.tsb_adr);
++				printk(KERN_ERR "              SCB-Chain address...........: %lx\n", ld(shpnt)[ldn].scb.scb_chain_adr);
++				printk(KERN_ERR "              SCB-block count.............: %x\n", ld(shpnt)[ldn].scb.u2.blk.count);
++				printk(KERN_ERR "              SCB-block length............: %x\n", ld(shpnt)[ldn].scb.u2.blk.length);
+ 			}
+ 			printk(KERN_ERR "              Send this report to the maintainer.\n");
+ 			panic("IBM MCA SCSI: Fatal error message from the subsystem (0x%X,0x%X)!\n", lastSCSI, cmd_result);
+@@ -600,72 +606,73 @@
+ 		}
+ 	}
+ 	/* if no panic appeared, increase the interrupt-counter */
+-	IBM_DS(ihost_index).total_interrupts++;
++	IBM_DS(shpnt).total_interrupts++;
+ 	/*only for local checking phase */
+-	if (local_checking_phase_flag(ihost_index)) {
+-		stat_result(ihost_index) = cmd_result;
+-		got_interrupt(ihost_index) = 1;
+-		reset_status(ihost_index) = IM_RESET_FINISHED_OK;
+-		last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+-		spin_unlock(dev->host_lock);
++	if (local_checking_phase_flag(shpnt)) {
++		stat_result(shpnt) = cmd_result;
++		got_interrupt(shpnt) = 1;
++		reset_status(shpnt) = IM_RESET_FINISHED_OK;
++		last_scsi_command(shpnt)[ldn] = NO_SCSI;
++		spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 		return IRQ_HANDLED;
+ 	}
+ 	/* handling of commands coming from upper level of scsi driver */
+-	if (last_scsi_type(ihost_index)[ldn] == IM_IMM_CMD) {
++	if (last_scsi_type(shpnt)[ldn] == IM_IMM_CMD) {
+ 		/* verify ldn, and may handle rare reset immediate command */
+-		if ((reset_status(ihost_index) == IM_RESET_IN_PROGRESS) && (last_scsi_command(ihost_index)[ldn] == IM_RESET_IMM_CMD)) {
++		if ((reset_status(shpnt) == IM_RESET_IN_PROGRESS) && (last_scsi_command(shpnt)[ldn] == IM_RESET_IMM_CMD)) {
+ 			if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) {
+ 				disk_rw_in_progress = 0;
+ 				PS2_DISK_LED_OFF();
+-				reset_status(ihost_index) = IM_RESET_FINISHED_FAIL;
++				reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ 			} else {
+ 				/*reset disk led counter, turn off disk led */
+ 				disk_rw_in_progress = 0;
+ 				PS2_DISK_LED_OFF();
+-				reset_status(ihost_index) = IM_RESET_FINISHED_OK;
++				reset_status(shpnt) = IM_RESET_FINISHED_OK;
+ 			}
+-			stat_result(ihost_index) = cmd_result;
+-			last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+-			last_scsi_type(ihost_index)[ldn] = 0;
+-			spin_unlock(dev->host_lock);
++			stat_result(shpnt) = cmd_result;
++			last_scsi_command(shpnt)[ldn] = NO_SCSI;
++			last_scsi_type(shpnt)[ldn] = 0;
++			spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 			return IRQ_HANDLED;
+-		} else if (last_scsi_command(ihost_index)[ldn] == IM_ABORT_IMM_CMD) {
++		} else if (last_scsi_command(shpnt)[ldn] == IM_ABORT_IMM_CMD) {
+ 			/* react on SCSI abort command */
+ #ifdef IM_DEBUG_PROBE
+ 			printk("IBM MCA SCSI: Interrupt from SCSI-abort.\n");
+ #endif
+ 			disk_rw_in_progress = 0;
+ 			PS2_DISK_LED_OFF();
+-			cmd = ld(ihost_index)[ldn].cmd;
+-			ld(ihost_index)[ldn].cmd = NULL;
++			cmd = ld(shpnt)[ldn].cmd;
++			ld(shpnt)[ldn].cmd = NULL;
+ 			if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE)
+ 				cmd->result = DID_NO_CONNECT << 16;
+ 			else
+ 				cmd->result = DID_ABORT << 16;
+-			stat_result(ihost_index) = cmd_result;
+-			last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+-			last_scsi_type(ihost_index)[ldn] = 0;
++			stat_result(shpnt) = cmd_result;
++			last_scsi_command(shpnt)[ldn] = NO_SCSI;
++			last_scsi_type(shpnt)[ldn] = 0;
+ 			if (cmd->scsi_done)
+ 				(cmd->scsi_done) (cmd);	/* should be the internal_done */
+-			spin_unlock(dev->host_lock);
++			spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 			return IRQ_HANDLED;
+ 		} else {
+ 			disk_rw_in_progress = 0;
+ 			PS2_DISK_LED_OFF();
+-			reset_status(ihost_index) = IM_RESET_FINISHED_OK;
+-			stat_result(ihost_index) = cmd_result;
+-			last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+-			spin_unlock(dev->host_lock);
++			reset_status(shpnt) = IM_RESET_FINISHED_OK;
++			stat_result(shpnt) = cmd_result;
++			last_scsi_command(shpnt)[ldn] = NO_SCSI;
++			spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 			return IRQ_HANDLED;
+ 		}
+ 	}
+-	last_scsi_command(ihost_index)[ldn] = NO_SCSI;
+-	last_scsi_type(ihost_index)[ldn] = 0;
+-	cmd = ld(ihost_index)[ldn].cmd;
+-	ld(ihost_index)[ldn].cmd = NULL;
++	last_scsi_command(shpnt)[ldn] = NO_SCSI;
++	last_scsi_type(shpnt)[ldn] = 0;
++	cmd = ld(shpnt)[ldn].cmd;
++	ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_TIMEOUT
+ 	if (cmd) {
+ 		if ((cmd->target == TIMEOUT_PUN) && (cmd->device->lun == TIMEOUT_LUN)) {
++			spin_unlock_irqsave(shpnt->host_lock, flags);
+ 			printk("IBM MCA SCSI: Ignoring interrupt from pun=%x, lun=%x.\n", cmd->target, cmd->device->lun);
+ 			return IRQ_HANDLED;
+ 		}
+@@ -674,15 +681,15 @@
+ 	/*if no command structure, just return, else clear cmd */
+ 	if (!cmd)
+ 	{
+-		spin_unlock(dev->host_lock);
++		spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 		return IRQ_HANDLED;
+ 	}
+ 
+ #ifdef IM_DEBUG_INT
+-	printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(ihost_index)[ldn].tsb.dev_status, ld(ihost_index)[ldn].tsb.cmd_status, ld(ihost_index)[ldn].tsb.dev_error, ld(ihost_index)[ldn].tsb.cmd_error);
++	printk("cmd=%02x ireg=%02x ds=%02x cs=%02x de=%02x ce=%02x\n", cmd->cmnd[0], intr_reg, ld(shpnt)[ldn].tsb.dev_status, ld(shpnt)[ldn].tsb.cmd_status, ld(shpnt)[ldn].tsb.dev_error, ld(shpnt)[ldn].tsb.cmd_error);
+ #endif
+ 	/*if this is end of media read/write, may turn off PS/2 disk led */
+-	if ((ld(ihost_index)[ldn].device_type != TYPE_NO_LUN) && (ld(ihost_index)[ldn].device_type != TYPE_NO_DEVICE)) {
++	if ((ld(shpnt)[ldn].device_type != TYPE_NO_LUN) && (ld(shpnt)[ldn].device_type != TYPE_NO_DEVICE)) {
+ 		/* only access this, if there was a valid device addressed */
+ 		if (--disk_rw_in_progress == 0)
+ 			PS2_DISK_LED_OFF();
+@@ -693,8 +700,8 @@
+ 	 * adapters do not support CMD_TERMINATED, TASK_SET_FULL and
+ 	 * ACA_ACTIVE as returning statusbyte information. (ML) */
+ 	if (cmd_result == IM_CMD_COMPLETED_WITH_FAILURE) {
+-		cmd->result = (unsigned char) (ld(ihost_index)[ldn].tsb.dev_status & 0x1e);
+-		IBM_DS(ihost_index).total_errors++;
++		cmd->result = (unsigned char) (ld(shpnt)[ldn].tsb.dev_status & 0x1e);
++		IBM_DS(shpnt).total_errors++;
+ 	} else
+ 		cmd->result = 0;
+ 	/* write device status into cmd->result, and call done function */
+@@ -705,24 +712,25 @@
+ 		cmd->result |= DID_OK << 16;
+ 	if (cmd->scsi_done)
+ 		(cmd->scsi_done) (cmd);
+-	spin_unlock(dev->host_lock);
++	spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 	return IRQ_HANDLED;
+ }
+ 
+-static void issue_cmd(int host_index, unsigned long cmd_reg, unsigned char attn_reg)
++static void issue_cmd(struct Scsi_Host *shpnt, unsigned long cmd_reg,
++		      unsigned char attn_reg)
+ {
+ 	unsigned long flags;
+ 	/* must wait for attention reg not busy */
+ 	while (1) {
+-		spin_lock_irqsave(hosts[host_index]->host_lock, flags);
+-		if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++		spin_lock_irqsave(shpnt->host_lock, flags);
++		if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ 			break;
+-		spin_unlock_irqrestore(hosts[host_index]->host_lock, flags);
++		spin_unlock_irqrestore(shpnt->host_lock, flags);
+ 	}
+ 	/* write registers and enable system interrupts */
+-	outl(cmd_reg, IM_CMD_REG(host_index));
+-	outb(attn_reg, IM_ATTN_REG(host_index));
+-	spin_unlock_irqrestore(hosts[host_index]->host_lock, flags);
++	outl(cmd_reg, IM_CMD_REG(shpnt));
++	outb(attn_reg, IM_ATTN_REG(shpnt));
++	spin_unlock_irqrestore(shpnt->host_lock, flags);
+ }
+ 
+ static void internal_done(Scsi_Cmnd * cmd)
+@@ -732,34 +740,34 @@
+ }
+ 
+ /* SCSI-SCB-command for device_inquiry */
+-static int device_inquiry(int host_index, int ldn)
++static int device_inquiry(struct Scsi_Host *shpnt, int ldn)
+ {
+ 	int retr;
+ 	struct im_scb *scb;
+ 	struct im_tsb *tsb;
+ 	unsigned char *buf;
+ 
+-	scb = &(ld(host_index)[ldn].scb);
+-	tsb = &(ld(host_index)[ldn].tsb);
+-	buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
+-	ld(host_index)[ldn].tsb.dev_status = 0;	/* prepare statusblock */
++	scb = &(ld(shpnt)[ldn].scb);
++	tsb = &(ld(shpnt)[ldn].tsb);
++	buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
++	ld(shpnt)[ldn].tsb.dev_status = 0;	/* prepare statusblock */
+ 	for (retr = 0; retr < 3; retr++) {
+ 		/* fill scb with inquiry command */
+ 		scb->command = IM_DEVICE_INQUIRY_CMD | IM_NO_DISCONNECT;
+ 		scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+-		last_scsi_command(host_index)[ldn] = IM_DEVICE_INQUIRY_CMD;
+-		last_scsi_type(host_index)[ldn] = IM_SCB;
++		last_scsi_command(shpnt)[ldn] = IM_DEVICE_INQUIRY_CMD;
++		last_scsi_type(shpnt)[ldn] = IM_SCB;
+ 		scb->sys_buf_adr = isa_virt_to_bus(buf);
+ 		scb->sys_buf_length = 255;	/* maximum bufferlength gives max info */
+ 		scb->tsb_adr = isa_virt_to_bus(tsb);
+ 		/* issue scb to passed ldn, and busy wait for interrupt */
+-		got_interrupt(host_index) = 0;
+-		issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+-		while (!got_interrupt(host_index))
++		got_interrupt(shpnt) = 0;
++		issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++		while (!got_interrupt(shpnt))
+ 			barrier();
+ 
+ 		/*if command successful, break */
+-		if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++		if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ 			return 1;
+ 	}
+ 	/*if all three retries failed, return "no device at this ldn" */
+@@ -769,34 +777,34 @@
+ 		return 1;
+ }
+ 
+-static int read_capacity(int host_index, int ldn)
++static int read_capacity(struct Scsi_Host *shpnt, int ldn)
+ {
+ 	int retr;
+ 	struct im_scb *scb;
+ 	struct im_tsb *tsb;
+ 	unsigned char *buf;
+ 
+-	scb = &(ld(host_index)[ldn].scb);
+-	tsb = &(ld(host_index)[ldn].tsb);
+-	buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
+-	ld(host_index)[ldn].tsb.dev_status = 0;
++	scb = &(ld(shpnt)[ldn].scb);
++	tsb = &(ld(shpnt)[ldn].tsb);
++	buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
++	ld(shpnt)[ldn].tsb.dev_status = 0;
+ 	for (retr = 0; retr < 3; retr++) {
+ 		/*fill scb with read capacity command */
+ 		scb->command = IM_READ_CAPACITY_CMD;
+ 		scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_READ_CONTROL | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+-		last_scsi_command(host_index)[ldn] = IM_READ_CAPACITY_CMD;
+-		last_scsi_type(host_index)[ldn] = IM_SCB;
++		last_scsi_command(shpnt)[ldn] = IM_READ_CAPACITY_CMD;
++		last_scsi_type(shpnt)[ldn] = IM_SCB;
+ 		scb->sys_buf_adr = isa_virt_to_bus(buf);
+ 		scb->sys_buf_length = 8;
+ 		scb->tsb_adr = isa_virt_to_bus(tsb);
+ 		/*issue scb to passed ldn, and busy wait for interrupt */
+-		got_interrupt(host_index) = 0;
+-		issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+-		while (!got_interrupt(host_index))
++		got_interrupt(shpnt) = 0;
++		issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++		while (!got_interrupt(shpnt))
+ 			barrier();
+ 
+ 		/*if got capacity, get block length and return one device found */
+-		if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++		if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ 			return 1;
+ 	}
+ 	/*if all three retries failed, return "no device at this ldn" */
+@@ -806,39 +814,39 @@
+ 		return 1;
+ }
+ 
+-static int get_pos_info(int host_index)
++static int get_pos_info(struct Scsi_Host *shpnt)
+ {
+ 	int retr;
+ 	struct im_scb *scb;
+ 	struct im_tsb *tsb;
+ 	unsigned char *buf;
+ 
+-	scb = &(ld(host_index)[MAX_LOG_DEV].scb);
+-	tsb = &(ld(host_index)[MAX_LOG_DEV].tsb);
+-	buf = (unsigned char *) (&(ld(host_index)[MAX_LOG_DEV].buf));
+-	ld(host_index)[MAX_LOG_DEV].tsb.dev_status = 0;
++	scb = &(ld(shpnt)[MAX_LOG_DEV].scb);
++	tsb = &(ld(shpnt)[MAX_LOG_DEV].tsb);
++	buf = (unsigned char *) (&(ld(shpnt)[MAX_LOG_DEV].buf));
++	ld(shpnt)[MAX_LOG_DEV].tsb.dev_status = 0;
+ 	for (retr = 0; retr < 3; retr++) {
+ 		/*fill scb with get_pos_info command */
+ 		scb->command = IM_GET_POS_INFO_CMD;
+ 		scb->enable = IM_READ_CONTROL | IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE | IM_BYPASS_BUFFER;
+-		last_scsi_command(host_index)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD;
+-		last_scsi_type(host_index)[MAX_LOG_DEV] = IM_SCB;
++		last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_GET_POS_INFO_CMD;
++		last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_SCB;
+ 		scb->sys_buf_adr = isa_virt_to_bus(buf);
+-		if (special(host_index) == IBM_SCSI2_FW)
++		if (special(shpnt) == IBM_SCSI2_FW)
+ 			scb->sys_buf_length = 256;	/* get all info from F/W adapter */
+ 		else
+ 			scb->sys_buf_length = 18;	/* get exactly 18 bytes for other SCSI */
+ 		scb->tsb_adr = isa_virt_to_bus(tsb);
+ 		/*issue scb to ldn=15, and busy wait for interrupt */
+-		got_interrupt(host_index) = 0;
+-		issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV);
++		got_interrupt(shpnt) = 0;
++		issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | MAX_LOG_DEV);
+ 		
+ 		/* FIXME: timeout */
+-		while (!got_interrupt(host_index))
++		while (!got_interrupt(shpnt))
+ 			barrier();
+ 
+ 		/*if got POS-stuff, get block length and return one device found */
+-		if ((stat_result(host_index) == IM_SCB_CMD_COMPLETED) || (stat_result(host_index) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
++		if ((stat_result(shpnt) == IM_SCB_CMD_COMPLETED) || (stat_result(shpnt) == IM_SCB_CMD_COMPLETED_WITH_RETRIES))
+ 			return 1;
+ 	}
+ 	/* if all three retries failed, return "no device at this ldn" */
+@@ -851,14 +859,16 @@
+ /* SCSI-immediate-command for assign. This functions maps/unmaps specific
+  ldn-numbers on SCSI (PUN,LUN). It is needed for presetting of the
+  subsystem and for dynamical remapping od ldns. */
+-static int immediate_assign(int host_index, unsigned int pun, unsigned int lun, unsigned int ldn, unsigned int operation)
++static int immediate_assign(struct Scsi_Host *shpnt, unsigned int pun,
++			    unsigned int lun, unsigned int ldn,
++			    unsigned int operation)
+ {
+ 	int retr;
+ 	unsigned long imm_cmd;
+ 
+ 	for (retr = 0; retr < 3; retr++) {
+ 		/* select mutation level of the SCSI-adapter */
+-		switch (special(host_index)) {
++		switch (special(shpnt)) {
+ 		case IBM_SCSI2_FW:
+ 			imm_cmd = (unsigned long) (IM_ASSIGN_IMM_CMD);
+ 			imm_cmd |= (unsigned long) ((lun & 7) << 24);
+@@ -867,7 +877,7 @@
+ 			imm_cmd |= (unsigned long) ((ldn & 15) << 16);
+ 			break;
+ 		default:
+-			imm_cmd = inl(IM_CMD_REG(host_index));
++			imm_cmd = inl(IM_CMD_REG(shpnt));
+ 			imm_cmd &= (unsigned long) (0xF8000000);	/* keep reserved bits */
+ 			imm_cmd |= (unsigned long) (IM_ASSIGN_IMM_CMD);
+ 			imm_cmd |= (unsigned long) ((lun & 7) << 24);
+@@ -876,15 +886,15 @@
+ 			imm_cmd |= (unsigned long) ((ldn & 15) << 16);
+ 			break;
+ 		}
+-		last_scsi_command(host_index)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD;
+-		last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD;
+-		got_interrupt(host_index) = 0;
+-		issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
+-		while (!got_interrupt(host_index))
++		last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_ASSIGN_IMM_CMD;
++		last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD;
++		got_interrupt(shpnt) = 0;
++		issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
++		while (!got_interrupt(shpnt))
+ 			barrier();
+ 
+ 		/*if command successful, break */
+-		if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++		if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ 			return 1;
+ 	}
+ 	if (retr >= 3)
+@@ -893,7 +903,7 @@
+ 		return 1;
+ }
+ 
+-static int immediate_feature(int host_index, unsigned int speed, unsigned int timeout)
++static int immediate_feature(struct Scsi_Host *shpnt, unsigned int speed, unsigned int timeout)
+ {
+ 	int retr;
+ 	unsigned long imm_cmd;
+@@ -903,16 +913,16 @@
+ 		imm_cmd = IM_FEATURE_CTR_IMM_CMD;
+ 		imm_cmd |= (unsigned long) ((speed & 0x7) << 29);
+ 		imm_cmd |= (unsigned long) ((timeout & 0x1fff) << 16);
+-		last_scsi_command(host_index)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD;
+-		last_scsi_type(host_index)[MAX_LOG_DEV] = IM_IMM_CMD;
+-		got_interrupt(host_index) = 0;
++		last_scsi_command(shpnt)[MAX_LOG_DEV] = IM_FEATURE_CTR_IMM_CMD;
++		last_scsi_type(shpnt)[MAX_LOG_DEV] = IM_IMM_CMD;
++		got_interrupt(shpnt) = 0;
+ 		/* we need to run into command errors in order to probe for the
+ 		 * right speed! */
+ 		global_command_error_excuse = 1;
+-		issue_cmd(host_index, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
++		issue_cmd(shpnt, (unsigned long) (imm_cmd), IM_IMM_CMD | MAX_LOG_DEV);
+ 		
+ 		/* FIXME: timeout */
+-		while (!got_interrupt(host_index))
++		while (!got_interrupt(shpnt))
+ 			barrier();
+ 		if (global_command_error_excuse == CMD_FAIL) {
+ 			global_command_error_excuse = 0;
+@@ -920,7 +930,7 @@
+ 		} else
+ 			global_command_error_excuse = 0;
+ 		/*if command successful, break */
+-		if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++		if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ 			return 1;
+ 	}
+ 	if (retr >= 3)
+@@ -930,35 +940,35 @@
+ }
+ 
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+-static int immediate_reset(int host_index, unsigned int ldn)
++static int immediate_reset(struct Scsi_Host *shpnt, unsigned int ldn)
+ {
+ 	int retries;
+ 	int ticks;
+ 	unsigned long imm_command;
+ 
+ 	for (retries = 0; retries < 3; retries++) {
+-		imm_command = inl(IM_CMD_REG(host_index));
++		imm_command = inl(IM_CMD_REG(shpnt));
+ 		imm_command &= (unsigned long) (0xFFFF0000);	/* keep reserved bits */
+ 		imm_command |= (unsigned long) (IM_RESET_IMM_CMD);
+-		last_scsi_command(host_index)[ldn] = IM_RESET_IMM_CMD;
+-		last_scsi_type(host_index)[ldn] = IM_IMM_CMD;
+-		got_interrupt(host_index) = 0;
+-		reset_status(host_index) = IM_RESET_IN_PROGRESS;
+-		issue_cmd(host_index, (unsigned long) (imm_command), IM_IMM_CMD | ldn);
++		last_scsi_command(shpnt)[ldn] = IM_RESET_IMM_CMD;
++		last_scsi_type(shpnt)[ldn] = IM_IMM_CMD;
++		got_interrupt(shpnt) = 0;
++		reset_status(shpnt) = IM_RESET_IN_PROGRESS;
++		issue_cmd(shpnt, (unsigned long) (imm_command), IM_IMM_CMD | ldn);
+ 		ticks = IM_RESET_DELAY * HZ;
+-		while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks) {
++		while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks) {
+ 			udelay((1 + 999 / HZ) * 1000);
+ 			barrier();
+ 		}
+ 		/* if reset did not complete, just complain */
+ 		if (!ticks) {
+ 			printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY);
+-			reset_status(host_index) = IM_RESET_FINISHED_OK;
++			reset_status(shpnt) = IM_RESET_FINISHED_OK;
+ 			/* did not work, finish */
+ 			return 1;
+ 		}
+ 		/*if command successful, break */
+-		if (stat_result(host_index) == IM_IMMEDIATE_CMD_COMPLETED)
++		if (stat_result(shpnt) == IM_IMMEDIATE_CMD_COMPLETED)
+ 			return 1;
+ 	}
+ 	if (retries >= 3)
+@@ -1060,35 +1070,35 @@
+ 	return 0;
+ }
+ 
+-static int probe_bus_mode(int host_index)
++static int probe_bus_mode(struct Scsi_Host *shpnt)
+ {
+ 	struct im_pos_info *info;
+ 	int num_bus = 0;
+ 	int ldn;
+ 
+-	info = (struct im_pos_info *) (&(ld(host_index)[MAX_LOG_DEV].buf));
+-	if (get_pos_info(host_index)) {
++	info = (struct im_pos_info *) (&(ld(shpnt)[MAX_LOG_DEV].buf));
++	if (get_pos_info(shpnt)) {
+ 		if (info->connector_size & 0xf000)
+-			subsystem_connector_size(host_index) = 16;
++			subsystem_connector_size(shpnt) = 16;
+ 		else
+-			subsystem_connector_size(host_index) = 32;
++			subsystem_connector_size(shpnt) = 32;
+ 		num_bus |= (info->pos_4b & 8) >> 3;
+ 		for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+-			if ((special(host_index) == IBM_SCSI_WCACHE) || (special(host_index) == IBM_7568_WCACHE)) {
++			if ((special(shpnt) == IBM_SCSI_WCACHE) || (special(shpnt) == IBM_7568_WCACHE)) {
+ 				if (!((info->cache_stat >> ldn) & 1))
+-					ld(host_index)[ldn].cache_flag = 0;
++					ld(shpnt)[ldn].cache_flag = 0;
+ 			}
+ 			if (!((info->retry_stat >> ldn) & 1))
+-				ld(host_index)[ldn].retry_flag = 0;
++				ld(shpnt)[ldn].retry_flag = 0;
+ 		}
+ #ifdef IM_DEBUG_PROBE
+ 		printk("IBM MCA SCSI: SCSI-Cache bits: ");
+ 		for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+-			printk("%d", ld(host_index)[ldn].cache_flag);
++			printk("%d", ld(shpnt)[ldn].cache_flag);
+ 		}
+ 		printk("\nIBM MCA SCSI: SCSI-Retry bits: ");
+ 		for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+-			printk("%d", ld(host_index)[ldn].retry_flag);
++			printk("%d", ld(shpnt)[ldn].retry_flag);
+ 		}
+ 		printk("\n");
+ #endif
+@@ -1097,7 +1107,7 @@
+ }
+ 
+ /* probing scsi devices */
+-static void check_devices(int host_index, int adaptertype)
++static void check_devices(struct Scsi_Host *shpnt, int adaptertype)
+ {
+ 	int id, lun, ldn, ticks;
+ 	int count_devices;	/* local counter for connected device */
+@@ -1108,24 +1118,24 @@
+ 	/* assign default values to certain variables */
+ 	ticks = 0;
+ 	count_devices = 0;
+-	IBM_DS(host_index).dyn_flag = 0;	/* normally no need for dynamical ldn management */
+-	IBM_DS(host_index).total_errors = 0;	/* set errorcounter to 0 */
+-	next_ldn(host_index) = 7;	/* next ldn to be assigned is 7, because 0-6 is 'hardwired' */
++	IBM_DS(shpnt).dyn_flag = 0;	/* normally no need for dynamical ldn management */
++	IBM_DS(shpnt).total_errors = 0;	/* set errorcounter to 0 */
++	next_ldn(shpnt) = 7;	/* next ldn to be assigned is 7, because 0-6 is 'hardwired' */
+ 
+ 	/* initialize the very important driver-informational arrays/structs */
+-	memset(ld(host_index), 0, sizeof(ld(host_index)));
++	memset(ld(shpnt), 0, sizeof(ld(shpnt)));
+ 	for (ldn = 0; ldn <= MAX_LOG_DEV; ldn++) {
+-		last_scsi_command(host_index)[ldn] = NO_SCSI;	/* emptify last SCSI-command storage */
+-		last_scsi_type(host_index)[ldn] = 0;
+-		ld(host_index)[ldn].cache_flag = 1;
+-		ld(host_index)[ldn].retry_flag = 1;
++		last_scsi_command(shpnt)[ldn] = NO_SCSI;	/* emptify last SCSI-command storage */
++		last_scsi_type(shpnt)[ldn] = 0;
++		ld(shpnt)[ldn].cache_flag = 1;
++		ld(shpnt)[ldn].retry_flag = 1;
+ 	}
+-	memset(get_ldn(host_index), TYPE_NO_DEVICE, sizeof(get_ldn(host_index)));	/* this is essential ! */
+-	memset(get_scsi(host_index), TYPE_NO_DEVICE, sizeof(get_scsi(host_index)));	/* this is essential ! */
++	memset(get_ldn(shpnt), TYPE_NO_DEVICE, sizeof(get_ldn(shpnt)));	/* this is essential ! */
++	memset(get_scsi(shpnt), TYPE_NO_DEVICE, sizeof(get_scsi(shpnt)));	/* this is essential ! */
+ 	for (lun = 0; lun < 8; lun++) {
+ 		/* mark the adapter at its pun on all luns */
+-		get_scsi(host_index)[subsystem_pun(host_index)][lun] = TYPE_IBM_SCSI_ADAPTER;
+-		get_ldn(host_index)[subsystem_pun(host_index)][lun] = MAX_LOG_DEV;	/* make sure, the subsystem
++		get_scsi(shpnt)[subsystem_pun(shpnt)][lun] = TYPE_IBM_SCSI_ADAPTER;
++		get_ldn(shpnt)[subsystem_pun(shpnt)][lun] = MAX_LOG_DEV;	/* make sure, the subsystem
+ 											   ldn is active for all
+ 											   luns. */
+ 	}
+@@ -1134,9 +1144,9 @@
+ 	/* monitor connected on model XX95. */
+ 
+ 	/* STEP 1: */
+-	adapter_speed(host_index) = global_adapter_speed;
+-	speedrun = adapter_speed(host_index);
+-	while (immediate_feature(host_index, speedrun, adapter_timeout) == 2) {
++	adapter_speed(shpnt) = global_adapter_speed;
++	speedrun = adapter_speed(shpnt);
++	while (immediate_feature(shpnt, speedrun, adapter_timeout) == 2) {
+ 		probe_display(1);
+ 		if (speedrun == 7)
+ 			panic("IBM MCA SCSI: Cannot set Synchronous-Transfer-Rate!\n");
+@@ -1144,30 +1154,30 @@
+ 		if (speedrun > 7)
+ 			speedrun = 7;
+ 	}
+-	adapter_speed(host_index) = speedrun;
++	adapter_speed(shpnt) = speedrun;
+ 	/* Get detailed information about the current adapter, necessary for
+ 	 * device operations: */
+-	num_bus = probe_bus_mode(host_index);
++	num_bus = probe_bus_mode(shpnt);
+ 
+ 	/* num_bus contains only valid data for the F/W adapter! */
+ 	if (adaptertype == IBM_SCSI2_FW) {	/* F/W SCSI adapter: */
+ 		/* F/W adapter PUN-space extension evaluation: */
+ 		if (num_bus) {
+ 			printk(KERN_INFO "IBM MCA SCSI: Separate bus mode (wide-addressing enabled)\n");
+-			subsystem_maxid(host_index) = 16;
++			subsystem_maxid(shpnt) = 16;
+ 		} else {
+ 			printk(KERN_INFO "IBM MCA SCSI: Combined bus mode (wide-addressing disabled)\n");
+-			subsystem_maxid(host_index) = 8;
++			subsystem_maxid(shpnt) = 8;
+ 		}
+ 		printk(KERN_INFO "IBM MCA SCSI: Sync.-Rate (F/W: 20, Int.: 10, Ext.: %s) MBytes/s\n", ibmrate(speedrun, adaptertype));
+ 	} else			/* all other IBM SCSI adapters: */
+ 		printk(KERN_INFO "IBM MCA SCSI: Synchronous-SCSI-Transfer-Rate: %s MBytes/s\n", ibmrate(speedrun, adaptertype));
+ 
+ 	/* assign correct PUN device space */
+-	max_pun = subsystem_maxid(host_index);
++	max_pun = subsystem_maxid(shpnt);
+ 
+ #ifdef IM_DEBUG_PROBE
+-	printk("IBM MCA SCSI: Current SCSI-host index: %d\n", host_index);
++	printk("IBM MCA SCSI: Current SCSI-host index: %d\n", shpnt);
+ 	printk("IBM MCA SCSI: Removing default logical SCSI-device mapping.");
+ #else
+ 	printk(KERN_INFO "IBM MCA SCSI: Dev. Order: %s, Mapping (takes <2min): ", (ibm_ansi_order) ? "ANSI" : "New");
+@@ -1177,7 +1187,7 @@
+ #ifdef IM_DEBUG_PROBE
+ 		printk(".");
+ #endif
+-		immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN);	/* remove ldn (wherever) */
++		immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN);	/* remove ldn (wherever) */
+ 	}
+ 	lun = 0;		/* default lun is 0 */
+ #ifndef IM_DEBUG_PROBE
+@@ -1196,18 +1206,18 @@
+ #ifdef IM_DEBUG_PROBE
+ 			printk(".");
+ #endif
+-			if (id != subsystem_pun(host_index)) {
++			if (id != subsystem_pun(shpnt)) {
+ 				/* if pun is not the adapter: */
+ 				/* set ldn=0 to pun,lun */
+-				immediate_assign(host_index, id, lun, PROBE_LDN, SET_LDN);
+-				if (device_inquiry(host_index, PROBE_LDN)) {	/* probe device */
+-					get_scsi(host_index)[id][lun] = (unsigned char) (ld(host_index)[PROBE_LDN].buf[0]);
++				immediate_assign(shpnt, id, lun, PROBE_LDN, SET_LDN);
++				if (device_inquiry(shpnt, PROBE_LDN)) {	/* probe device */
++					get_scsi(shpnt)[id][lun] = (unsigned char) (ld(shpnt)[PROBE_LDN].buf[0]);
+ 					/* entry, even for NO_LUN */
+-					if (ld(host_index)[PROBE_LDN].buf[0] != TYPE_NO_LUN)
++					if (ld(shpnt)[PROBE_LDN].buf[0] != TYPE_NO_LUN)
+ 						count_devices++;	/* a existing device is found */
+ 				}
+ 				/* remove ldn */
+-				immediate_assign(host_index, id, lun, PROBE_LDN, REMOVE_LDN);
++				immediate_assign(shpnt, id, lun, PROBE_LDN, REMOVE_LDN);
+ 			}
+ 		}
+ #ifndef IM_DEBUG_PROBE
+@@ -1227,16 +1237,16 @@
+ #ifdef IM_DEBUG_PROBE
+ 			printk(".");
+ #endif
+-			if (id != subsystem_pun(host_index)) {
+-				if (get_scsi(host_index)[id][lun] != TYPE_NO_LUN && get_scsi(host_index)[id][lun] != TYPE_NO_DEVICE) {
++			if (id != subsystem_pun(shpnt)) {
++				if (get_scsi(shpnt)[id][lun] != TYPE_NO_LUN && get_scsi(shpnt)[id][lun] != TYPE_NO_DEVICE) {
+ 					/* Only map if accepted type. Always enter for
+ 					   lun == 0 to get no gaps into ldn-mapping for ldn<7. */
+-					immediate_assign(host_index, id, lun, ldn, SET_LDN);
+-					get_ldn(host_index)[id][lun] = ldn;	/* map ldn */
+-					if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) {
++					immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++					get_ldn(shpnt)[id][lun] = ldn;	/* map ldn */
++					if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) {
+ #ifdef CONFIG_IBMMCA_SCSI_DEV_RESET
+ 						printk("resetting device at ldn=%x ... ", ldn);
+-						immediate_reset(host_index, ldn);
++						immediate_reset(shpnt, ldn);
+ #endif
+ 						ldn++;
+ 					} else {
+@@ -1244,15 +1254,15 @@
+ 						 * handle it or because it has problems */
+ 						if (lun > 0) {
+ 							/* remove mapping */
+-							get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE;
+-							immediate_assign(host_index, 0, 0, ldn, REMOVE_LDN);
++							get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE;
++							immediate_assign(shpnt, 0, 0, ldn, REMOVE_LDN);
+ 						} else
+ 							ldn++;
+ 					}
+ 				} else if (lun == 0) {
+ 					/* map lun == 0, even if no device exists */
+-					immediate_assign(host_index, id, lun, ldn, SET_LDN);
+-					get_ldn(host_index)[id][lun] = ldn;	/* map ldn */
++					immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++					get_ldn(shpnt)[id][lun] = ldn;	/* map ldn */
+ 					ldn++;
+ 				}
+ 			}
+@@ -1262,14 +1272,14 @@
+ 	/* map remaining ldns to non-existing devices */
+ 	for (lun = 1; lun < 8 && ldn < MAX_LOG_DEV; lun++)
+ 		for (id = 0; id < max_pun && ldn < MAX_LOG_DEV; id++) {
+-			if (get_scsi(host_index)[id][lun] == TYPE_NO_LUN || get_scsi(host_index)[id][lun] == TYPE_NO_DEVICE) {
++			if (get_scsi(shpnt)[id][lun] == TYPE_NO_LUN || get_scsi(shpnt)[id][lun] == TYPE_NO_DEVICE) {
+ 				probe_display(1);
+ 				/* Map remaining ldns only to NON-existing pun,lun
+ 				   combinations to make sure an inquiry will fail.
+ 				   For MULTI_LUN, it is needed to avoid adapter autonome
+ 				   SCSI-remapping. */
+-				immediate_assign(host_index, id, lun, ldn, SET_LDN);
+-				get_ldn(host_index)[id][lun] = ldn;
++				immediate_assign(shpnt, id, lun, ldn, SET_LDN);
++				get_ldn(shpnt)[id][lun] = ldn;
+ 				ldn++;
+ 			}
+ 		}
+@@ -1292,51 +1302,51 @@
+ 	for (id = 0; id < max_pun; id++) {
+ 		printk("%2d     ", id);
+ 		for (lun = 0; lun < 8; lun++)
+-			printk("%2s ", ti_p(get_scsi(host_index)[id][lun]));
++			printk("%2s ", ti_p(get_scsi(shpnt)[id][lun]));
+ 		printk("      %2d     ", id);
+ 		for (lun = 0; lun < 8; lun++)
+-			printk("%2s ", ti_l(get_ldn(host_index)[id][lun]));
++			printk("%2s ", ti_l(get_ldn(shpnt)[id][lun]));
+ 		printk("\n");
+ 	}
+ #endif
+ 
+ 	/* assign total number of found SCSI-devices to the statistics struct */
+-	IBM_DS(host_index).total_scsi_devices = count_devices;
++	IBM_DS(shpnt).total_scsi_devices = count_devices;
+ 
+ 	/* decide for output in /proc-filesystem, if the configuration of
+ 	   SCSI-devices makes dynamical reassignment of devices necessary */
+ 	if (count_devices >= MAX_LOG_DEV)
+-		IBM_DS(host_index).dyn_flag = 1;	/* dynamical assignment is necessary */
++		IBM_DS(shpnt).dyn_flag = 1;	/* dynamical assignment is necessary */
+ 	else
+-		IBM_DS(host_index).dyn_flag = 0;	/* dynamical assignment is not necessary */
++		IBM_DS(shpnt).dyn_flag = 0;	/* dynamical assignment is not necessary */
+ 
+ 	/* If no SCSI-devices are assigned, return 1 in order to cause message. */
+ 	if (ldn == 0)
+ 		printk("IBM MCA SCSI: Warning: No SCSI-devices found/assigned!\n");
+ 
+ 	/* reset the counters for statistics on the current adapter */
+-	IBM_DS(host_index).scbs = 0;
+-	IBM_DS(host_index).long_scbs = 0;
+-	IBM_DS(host_index).total_accesses = 0;
+-	IBM_DS(host_index).total_interrupts = 0;
+-	IBM_DS(host_index).dynamical_assignments = 0;
+-	memset(IBM_DS(host_index).ldn_access, 0x0, sizeof(IBM_DS(host_index).ldn_access));
+-	memset(IBM_DS(host_index).ldn_read_access, 0x0, sizeof(IBM_DS(host_index).ldn_read_access));
+-	memset(IBM_DS(host_index).ldn_write_access, 0x0, sizeof(IBM_DS(host_index).ldn_write_access));
+-	memset(IBM_DS(host_index).ldn_inquiry_access, 0x0, sizeof(IBM_DS(host_index).ldn_inquiry_access));
+-	memset(IBM_DS(host_index).ldn_modeselect_access, 0x0, sizeof(IBM_DS(host_index).ldn_modeselect_access));
+-	memset(IBM_DS(host_index).ldn_assignments, 0x0, sizeof(IBM_DS(host_index).ldn_assignments));
++	IBM_DS(shpnt).scbs = 0;
++	IBM_DS(shpnt).long_scbs = 0;
++	IBM_DS(shpnt).total_accesses = 0;
++	IBM_DS(shpnt).total_interrupts = 0;
++	IBM_DS(shpnt).dynamical_assignments = 0;
++	memset(IBM_DS(shpnt).ldn_access, 0x0, sizeof(IBM_DS(shpnt).ldn_access));
++	memset(IBM_DS(shpnt).ldn_read_access, 0x0, sizeof(IBM_DS(shpnt).ldn_read_access));
++	memset(IBM_DS(shpnt).ldn_write_access, 0x0, sizeof(IBM_DS(shpnt).ldn_write_access));
++	memset(IBM_DS(shpnt).ldn_inquiry_access, 0x0, sizeof(IBM_DS(shpnt).ldn_inquiry_access));
++	memset(IBM_DS(shpnt).ldn_modeselect_access, 0x0, sizeof(IBM_DS(shpnt).ldn_modeselect_access));
++	memset(IBM_DS(shpnt).ldn_assignments, 0x0, sizeof(IBM_DS(shpnt).ldn_assignments));
+ 	probe_display(0);
+ 	return;
+ }
+ 
+-static int device_exists(int host_index, int ldn, int *block_length, int *device_type)
++static int device_exists(struct Scsi_Host *shpnt, int ldn, int *block_length, int *device_type)
+ {
+ 	unsigned char *buf;
+ 	/* if no valid device found, return immediately with 0 */
+-	if (!(device_inquiry(host_index, ldn)))
++	if (!(device_inquiry(shpnt, ldn)))
+ 		return 0;
+-	buf = (unsigned char *) (&(ld(host_index)[ldn].buf));
++	buf = (unsigned char *) (&(ld(shpnt)[ldn].buf));
+ 	if (*buf == TYPE_ROM) {
+ 		*device_type = TYPE_ROM;
+ 		*block_length = 2048;	/* (standard blocksize for yellow-/red-book) */
+@@ -1349,7 +1359,7 @@
+ 	}
+ 	if (*buf == TYPE_DISK) {
+ 		*device_type = TYPE_DISK;
+-		if (read_capacity(host_index, ldn)) {
++		if (read_capacity(shpnt, ldn)) {
+ 			*block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24);
+ 			return 1;
+ 		} else
+@@ -1357,7 +1367,7 @@
+ 	}
+ 	if (*buf == TYPE_MOD) {
+ 		*device_type = TYPE_MOD;
+-		if (read_capacity(host_index, ldn)) {
++		if (read_capacity(shpnt, ldn)) {
+ 			*block_length = *(buf + 7) + (*(buf + 6) << 8) + (*(buf + 5) << 16) + (*(buf + 4) << 24);
+ 			return 1;
+ 		} else
+@@ -1430,6 +1440,9 @@
+ 	return;
+ }
+ 
++#if 0
++ FIXME NEED TO MOVE TO SYSFS
++
+ static int ibmmca_getinfo(char *buf, int slot, void *dev_id)
+ {
+ 	struct Scsi_Host *shpnt;
+@@ -1480,58 +1493,34 @@
+ 
+ 	return len;
+ }
++#endif
+ 
+-int ibmmca_detect(struct scsi_host_template * scsi_template)
++static struct scsi_host_template ibmmca_driver_template = {
++          .proc_name      = "ibmmca",
++	  .proc_info	  = ibmmca_proc_info,
++          .name           = "IBM SCSI-Subsystem",
++          .queuecommand   = ibmmca_queuecommand,
++	  .eh_abort_handler = ibmmca_abort,
++	  .eh_host_reset_handler = ibmmca_host_reset,
++          .bios_param     = ibmmca_biosparam,
++          .can_queue      = 16,
++          .this_id        = 7,
++          .sg_tablesize   = 16,
++          .cmd_per_lun    = 1,
++          .use_clustering = ENABLE_CLUSTERING,
++};
++
++static int ibmmca_probe(struct device *dev)
+ {
+ 	struct Scsi_Host *shpnt;
+-	int port, id, i, j, k, slot;
+-	int devices_on_irq_11 = 0;
+-	int devices_on_irq_14 = 0;
+-	int IRQ14_registered = 0;
+-	int IRQ11_registered = 0;
+-
+-	found = 0;		/* make absolutely sure, that found is set to 0 */
++	int port, id, i, j, k, irq, enabled, ret = -EINVAL;
++	struct mca_device *mca_dev = to_mca_device(dev);
++	const char *description = ibmmca_description[mca_dev->index];
+ 
+ 	/* First of all, print the version number of the driver. This is
+ 	 * important to allow better user bugreports in case of already
+ 	 * having problems with the MCA_bus probing. */
+ 	printk(KERN_INFO "IBM MCA SCSI: Version %s\n", IBMMCA_SCSI_DRIVER_VERSION);
+-	/* if this is not MCA machine, return "nothing found" */
+-	if (!MCA_bus) {
+-		printk(KERN_INFO "IBM MCA SCSI:  No Microchannel-bus present --> Aborting.\n" "      	     This machine does not have any IBM MCA-bus\n" "    	     or the MCA-Kernel-support is not enabled!\n");
+-		return 0;
+-	}
+-
+-#ifdef MODULE
+-	/* If the driver is run as module, read from conf.modules or cmd-line */
+-	if (boot_options)
+-		option_setup(boot_options);
+-#endif
+-
+-	/* get interrupt request level */
+-	if (request_irq(IM_IRQ, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) {
+-		printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ);
+-		return 0;
+-	} else
+-		IRQ14_registered++;
+-
+-	/* if ibmmcascsi setup option was passed to kernel, return "found" */
+-	for (i = 0; i < IM_MAX_HOSTS; i++)
+-		if (io_port[i] > 0 && scsi_id[i] >= 0 && scsi_id[i] < 8) {
+-			printk("IBM MCA SCSI: forced detected SCSI Adapter, io=0x%x, scsi id=%d.\n", io_port[i], scsi_id[i]);
+-			if ((shpnt = ibmmca_register(scsi_template, io_port[i], scsi_id[i], FORCED_DETECTION, "forced detected SCSI Adapter"))) {
+-				for (k = 2; k < 7; k++)
+-					((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = 0;
+-				((struct ibmmca_hostdata *) shpnt->hostdata)->_special = FORCED_DETECTION;
+-				mca_set_adapter_name(MCA_INTEGSCSI, "forced detected SCSI Adapter");
+-				mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+-				mca_mark_as_used(MCA_INTEGSCSI);
+-				devices_on_irq_14++;
+-			}
+-		}
+-	if (found)
+-		return found;
+-
+ 	/* The POS2-register of all PS/2 model SCSI-subsystems has the following
+ 	 * interpretation of bits:
+ 	 *                             Bit 7 - 4 : Chip Revision ID (Release)
+@@ -1558,7 +1547,14 @@
+ 
+ 	/* first look for the IBM SCSI integrated subsystem on the motherboard */
+ 	for (j = 0; j < 8; j++)	/* read the pos-information */
+-		pos[j] = mca_read_stored_pos(MCA_INTEGSCSI, j);
++		pos[j] = mca_device_read_pos(mca_dev, j);
++	id = (pos[3] & 0xe0) >> 5; /* this is correct and represents the PUN */
++	enabled = (pos[2] &0x01);
++	if (!enabled) {
++		printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
++		printk(KERN_WARNING "              SCSI-operations may not work.\n");
++	}
++
+ 	/* pos2 = pos3 = 0xff if there is no integrated SCSI-subsystem present, but
+ 	 * if we ignore the settings of all surrounding pos registers, it is not
+ 	 * completely sufficient to only check pos2 and pos3. */
+@@ -1566,232 +1562,137 @@
+ 	 * make sure, we see a real integrated onboard SCSI-interface and no
+ 	 * internal system information, which gets mapped to some pos registers
+ 	 * on models 95xx. */
+-	if ((!pos[0] && !pos[1] && pos[2] > 0 && pos[3] > 0 && !pos[4] && !pos[5] && !pos[6] && !pos[7]) || (pos[0] == 0xff && pos[1] == 0xff && pos[2] < 0xff && pos[3] < 0xff && pos[4] == 0xff && pos[5] == 0xff && pos[6] == 0xff && pos[7] == 0xff)) {
+-		if ((pos[2] & 1) == 1)	/* is the subsystem chip enabled ? */
++	if (mca_dev->slot == MCA_INTEGSCSI &&
++	    ((!pos[0] && !pos[1] && pos[2] > 0 &&
++	      pos[3] > 0 && !pos[4] && !pos[5] &&
++	      !pos[6] && !pos[7]) ||
++	     (pos[0] == 0xff && pos[1] == 0xff &&
++	      pos[2] < 0xff && pos[3] < 0xff &&
++	      pos[4] == 0xff && pos[5] == 0xff &&
++	      pos[6] == 0xff && pos[7] == 0xff))) {
++		irq = IM_IRQ;
+ 			port = IM_IO_PORT;
+-		else {		/* if disabled, no IRQs will be generated, as the chip won't
+-				 * listen to the incoming commands and will do really nothing,
+-				 * except for listening to the pos-register settings. If this
+-				 * happens, I need to hugely think about it, as one has to
+-				 * write something to the MCA-Bus pos register in order to
+-				 * enable the chip. Normally, IBM-SCSI won't pass the POST,
+-				 * when the chip is disabled (see IBM tech. ref.). */
+-			port = IM_IO_PORT;	/* anyway, set the portnumber and warn */
+-			printk("IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n" "              SCSI-operations may not work.\n");
+-		}
+-		id = (pos[3] & 0xe0) >> 5;	/* this is correct and represents the PUN */
+-		/* give detailed information on the subsystem. This helps me
+-		 * additionally during debugging and analyzing bug-reports. */
+-		printk(KERN_INFO "IBM MCA SCSI: IBM Integrated SCSI Controller ffound, io=0x%x, scsi id=%d,\n", port, id);
+-		printk(KERN_INFO "              chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled.");
+-
+-		/* register the found integrated SCSI-subsystem */
+-		if ((shpnt = ibmmca_register(scsi_template, port, id, INTEGRATED_SCSI, "IBM Integrated SCSI Controller"))) 
+-		{
+-			for (k = 2; k < 7; k++)
+-				((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+-			((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI;
+-			mca_set_adapter_name(MCA_INTEGSCSI, "IBM Integrated SCSI Controller");
+-			mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+-			mca_mark_as_used(MCA_INTEGSCSI);
+-			devices_on_irq_14++;
+-		}
+-	}
+-
+-	/* now look for other adapters in MCA slots, */
+-	/* determine the number of known IBM-SCSI-subsystem types */
+-	/* see the pos[2] dependence to get the adapter port-offset. */
+-	for (i = 0; i < ARRAY_SIZE(subsys_list); i++) {
+-		/* scan each slot for a fitting adapter id */
+-		slot = 0;	/* start at slot 0 */
+-		while ((slot = mca_find_adapter(subsys_list[i].mca_id, slot))
+-		       != MCA_NOTFOUND) {	/* scan through all slots */
+-			for (j = 0; j < 8; j++)	/* read the pos-information */
+-				pos[j] = mca_read_stored_pos(slot, j);
+-			if ((pos[2] & 1) == 1)
+-				/* is the subsystem chip enabled ? */
+-				/* (explanations see above) */
+-				port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+-			else {
+-				/* anyway, set the portnumber and warn */
+-				port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+-				printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
+-				printk(KERN_WARNING "              SCSI-operations may not work.\n");
+-			}
+-			if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) {
++	} else {
++		irq = IM_IRQ;
++		port = IM_IO_PORT + ((pos[2] &0x0e) << 2);
++		if ((mca_dev->index == IBM_SCSI2_FW) && (pos[6] != 0)) {
+ 				printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n");
+ 				printk(KERN_ERR "              Impossible to determine adapter PUN!\n");
+ 				printk(KERN_ERR "              Guessing adapter PUN = 7.\n");
+ 				id = 7;
+ 			} else {
+ 				id = (pos[3] & 0xe0) >> 5;	/* get subsystem PUN */
+-				if (i == IBM_SCSI2_FW) {
++			if (mca_dev->index == IBM_SCSI2_FW) {
+ 					id |= (pos[3] & 0x10) >> 1;	/* get subsystem PUN high-bit
+ 									 * for F/W adapters */
+ 				}
+ 			}
+-			if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) {
++		if ((mca_dev->index == IBM_SCSI2_FW) &&
++		    (pos[4] & 0x01) && (pos[6] == 0)) {
+ 				/* IRQ11 is used by SCSI-2 F/W Adapter/A */
+ 				printk(KERN_DEBUG "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n");
+-				/* get interrupt request level */
+-				if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts)) {
+-					printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW);
+-				} else
+-					IRQ11_registered++;
++			irq = IM_IRQ_FW;
+ 			}
+-			printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id);
+-			if ((pos[2] & 0xf0) == 0xf0)
+-				printk(KERN_DEBUG"              ROM Addr.=off,");
+-			else
+-				printk(KERN_DEBUG "              ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000);
+-			printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+-
+-			/* register the hostadapter */
+-			if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) {
+-				for (k = 2; k < 8; k++)
+-					((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+-				((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i;
+-				mca_set_adapter_name(slot, subsys_list[i].description);
+-				mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+-				mca_mark_as_used(slot);
+-				if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0))
+-					devices_on_irq_11++;
+-				else
+-					devices_on_irq_14++;
+-			}
+-			slot++;	/* advance to next slot */
+-		}		/* advance to next adapter id in the list of IBM-SCSI-subsystems */
+ 	}
+ 
+-	/* now check for SCSI-adapters, mapped to the integrated SCSI
+-	 * area. E.g. a W/Cache in MCA-slot 9(!). Do the check correct here,
+-	 * as this is a known effect on some models 95xx. */
+-	for (i = 0; i < ARRAY_SIZE(subsys_list); i++) {
+-		/* scan each slot for a fitting adapter id */
+-		slot = mca_find_adapter(subsys_list[i].mca_id, MCA_INTEGSCSI);
+-		if (slot != MCA_NOTFOUND) {	/* scan through all slots */
+-			for (j = 0; j < 8; j++)	/* read the pos-information */
+-				pos[j] = mca_read_stored_pos(slot, j);
+-			if ((pos[2] & 1) == 1) {	/* is the subsystem chip enabled ? */
+-				/* (explanations see above) */
+-				port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+-			} else {	/* anyway, set the portnumber and warn */
+-				port = IM_IO_PORT + ((pos[2] & 0x0e) << 2);
+-				printk(KERN_WARNING "IBM MCA SCSI: WARNING - Your SCSI-subsystem is disabled!\n");
+-				printk(KERN_WARNING "              SCSI-operations may not work.\n");
+-			}
+-			if ((i == IBM_SCSI2_FW) && (pos[6] != 0)) {
+-				printk(KERN_ERR "IBM MCA SCSI: ERROR - Wrong POS(6)-register setting!\n");
+-				printk(KERN_ERR  "              Impossible to determine adapter PUN!\n");
+-				printk(KERN_ERR "              Guessing adapter PUN = 7.\n");
+-				id = 7;
+-			} else {
+-				id = (pos[3] & 0xe0) >> 5;	/* get subsystem PUN */
+-				if (i == IBM_SCSI2_FW)
+-					id |= (pos[3] & 0x10) >> 1;	/* get subsystem PUN high-bit
+-									 * for F/W adapters */
+-			}
+-			if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0)) {
+-				/* IRQ11 is used by SCSI-2 F/W Adapter/A */
+-				printk(KERN_DEBUG  "IBM MCA SCSI: SCSI-2 F/W adapter needs IRQ 11.\n");
+-				/* get interrupt request level */
+-				if (request_irq(IM_IRQ_FW, interrupt_handler, IRQF_SHARED, "ibmmcascsi", hosts))
+-					printk(KERN_ERR "IBM MCA SCSI: Unable to get shared IRQ %d.\n", IM_IRQ_FW);
+-				else
+-					IRQ11_registered++;
+-			}
+-			printk(KERN_INFO "IBM MCA SCSI: %s found in slot %d, io=0x%x, scsi id=%d,\n", subsys_list[i].description, slot + 1, port, id);
++
++
++	/* give detailed information on the subsystem. This helps me
++	 * additionally during debugging and analyzing bug-reports. */
++	printk(KERN_INFO "IBM MCA SCSI: %s found, io=0x%x, scsi id=%d,\n",
++	       description, port, id);
++	if (mca_dev->slot == MCA_INTEGSCSI)
++		printk(KERN_INFO "              chip rev.=%d, 8K NVRAM=%s, subsystem=%s\n", ((pos[2] & 0xf0) >> 4), (pos[2] & 2) ? "locked" : "accessible", (pos[2] & 1) ? "enabled." : "disabled.");
++	else {
+ 			if ((pos[2] & 0xf0) == 0xf0)
+ 				printk(KERN_DEBUG "              ROM Addr.=off,");
+ 			else
+ 				printk(KERN_DEBUG "              ROM Addr.=0x%x,", ((pos[2] & 0xf0) << 13) + 0xc0000);
+-			printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+ 
+-			/* register the hostadapter */
+-			if ((shpnt = ibmmca_register(scsi_template, port, id, i, subsys_list[i].description))) {
+-				for (k = 2; k < 7; k++)
+-					((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
+-				((struct ibmmca_hostdata *) shpnt->hostdata)->_special = i;
+-				mca_set_adapter_name(slot, subsys_list[i].description);
+-				mca_set_adapter_procfn(slot, (MCA_ProcFn) ibmmca_getinfo, shpnt);
+-				mca_mark_as_used(slot);
+-				if ((i == IBM_SCSI2_FW) && (pos[4] & 0x01) && (pos[6] == 0))
+-					devices_on_irq_11++;
+-				else
+-					devices_on_irq_14++;
+-			}
+-			slot++;	/* advance to next slot */
+-		}		/* advance to next adapter id in the list of IBM-SCSI-subsystems */
++		printk(KERN_DEBUG " port-offset=0x%x, subsystem=%s\n", ((pos[2] & 0x0e) << 2), (pos[2] & 1) ? "enabled." : "disabled.");
+ 	}
+-	if (IRQ11_registered && !devices_on_irq_11)
+-		free_irq(IM_IRQ_FW, hosts);	/* no devices on IRQ 11 */
+-	if (IRQ14_registered && !devices_on_irq_14)
+-		free_irq(IM_IRQ, hosts);	/* no devices on IRQ 14 */
+-	if (!devices_on_irq_11 && !devices_on_irq_14)
+-		printk(KERN_WARNING "IBM MCA SCSI: No IBM SCSI-subsystem adapter attached.\n");
+-	return found;		/* return the number of found SCSI hosts. Should be 1 or 0. */
+-}
+-
+-static struct Scsi_Host *ibmmca_register(struct scsi_host_template * scsi_template, int port, int id, int adaptertype, char *hostname)
+-{
+-	struct Scsi_Host *shpnt;
+-	int i, j;
+-	unsigned int ctrl;
+ 
+ 	/* check I/O region */
+-	if (!request_region(port, IM_N_IO_PORT, hostname)) {
++	if (!request_region(port, IM_N_IO_PORT, description)) {
+ 		printk(KERN_ERR "IBM MCA SCSI: Unable to get I/O region 0x%x-0x%x (%d ports).\n", port, port + IM_N_IO_PORT - 1, IM_N_IO_PORT);
+-		return NULL;
++		goto out_fail;
+ 	}
+ 
+ 	/* register host */
+-	shpnt = scsi_register(scsi_template, sizeof(struct ibmmca_hostdata));
++	shpnt = scsi_host_alloc(&ibmmca_driver_template,
++				sizeof(struct ibmmca_hostdata));
+ 	if (!shpnt) {
+ 		printk(KERN_ERR "IBM MCA SCSI: Unable to register host.\n");
+-		release_region(port, IM_N_IO_PORT);
+-		return NULL;
++		goto out_release;
++	}
++
++	dev_set_drvdata(dev, shpnt);
++	if(request_irq(irq, interrupt_handler, IRQF_SHARED, description, dev)) {
++		printk(KERN_ERR "IBM MCA SCSI: failed to request interrupt %d\n", irq);
++		goto out_free_host;
+ 	}
+ 
+ 	/* request I/O region */
+-	hosts[found] = shpnt;	/* add new found hostadapter to the list */
+-	special(found) = adaptertype;	/* important assignment or else crash! */
+-	subsystem_connector_size(found) = 0;	/* preset slot-size */
+-	shpnt->irq = IM_IRQ;	/* assign necessary stuff for the adapter */
++	special(shpnt) = mca_dev->index;	/* important assignment or else crash! */
++	subsystem_connector_size(shpnt) = 0;	/* preset slot-size */
++	shpnt->irq = irq;	/* assign necessary stuff for the adapter */
+ 	shpnt->io_port = port;
+ 	shpnt->n_io_port = IM_N_IO_PORT;
+ 	shpnt->this_id = id;
+ 	shpnt->max_id = 8;	/* 8 PUNs are default */
+ 	/* now, the SCSI-subsystem is connected to Linux */
+ 
+-	ctrl = (unsigned int) (inb(IM_CTR_REG(found)));	/* get control-register status */
+ #ifdef IM_DEBUG_PROBE
++	ctrl = (unsigned int) (inb(IM_CTR_REG(found)));	/* get control-register status */
+ 	printk("IBM MCA SCSI: Control Register contents: %x, status: %x\n", ctrl, inb(IM_STAT_REG(found)));
+ 	printk("IBM MCA SCSI: This adapters' POS-registers: ");
+ 	for (i = 0; i < 8; i++)
+ 		printk("%x ", pos[i]);
+ 	printk("\n");
+ #endif
+-	reset_status(found) = IM_RESET_NOT_IN_PROGRESS;
++	reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS;
+ 
+ 	for (i = 0; i < 16; i++)	/* reset the tables */
+ 		for (j = 0; j < 8; j++)
+-			get_ldn(found)[i][j] = MAX_LOG_DEV;
++			get_ldn(shpnt)[i][j] = MAX_LOG_DEV;
+ 
+ 	/* check which logical devices exist */
+ 	/* after this line, local interrupting is possible: */
+-	local_checking_phase_flag(found) = 1;
+-	check_devices(found, adaptertype);	/* call by value, using the global variable hosts */
+-	local_checking_phase_flag(found) = 0;
+-	found++;		/* now increase index to be prepared for next found subsystem */
++	local_checking_phase_flag(shpnt) = 1;
++	check_devices(shpnt, mca_dev->index);	/* call by value, using the global variable hosts */
++	local_checking_phase_flag(shpnt) = 0;
++
+ 	/* an ibm mca subsystem has been detected */
+-	return shpnt;
++
++	for (k = 2; k < 7; k++)
++		((struct ibmmca_hostdata *) shpnt->hostdata)->_pos[k] = pos[k];
++	((struct ibmmca_hostdata *) shpnt->hostdata)->_special = INTEGRATED_SCSI;
++	mca_device_set_name(mca_dev, description);
++	/* FIXME: NEED TO REPLUMB TO SYSFS
++	   mca_set_adapter_procfn(MCA_INTEGSCSI, (MCA_ProcFn) ibmmca_getinfo, shpnt);
++	*/
++	mca_device_set_claim(mca_dev, 1);
++	if (scsi_add_host(shpnt, dev)) {
++		dev_printk(KERN_ERR, dev, "IBM MCA SCSI: scsi_add_host failed\n");
++		goto out_free_host;
++	}
++	scsi_scan_host(shpnt);
++
++	return 0;
++ out_free_host:
++	scsi_host_put(shpnt);
++ out_release:
++	release_region(port, IM_N_IO_PORT);
++ out_fail:
++	return ret;
+ }
+ 
+-static int ibmmca_release(struct Scsi_Host *shpnt)
++static int __devexit ibmmca_remove(struct device *dev)
+ {
++	struct Scsi_Host *shpnt = dev_get_drvdata(dev);
++	scsi_remove_host(shpnt);
+ 	release_region(shpnt->io_port, shpnt->n_io_port);
+-	if (!(--found))
+-		free_irq(shpnt->irq, hosts);
++	free_irq(shpnt->irq, dev);
+ 	return 0;
+ }
+ 
+@@ -1805,33 +1706,24 @@
+ 	int current_ldn;
+ 	int id, lun;
+ 	int target;
+-	int host_index;
+ 	int max_pun;
+ 	int i;
+-	struct scatterlist *sl;
++	struct scatterlist *sg;
+ 
+ 	shpnt = cmd->device->host;
+-	/* search for the right hostadapter */
+-	for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+ 
+-	if (!hosts[host_index]) {	/* invalid hostadapter descriptor address */
+-		cmd->result = DID_NO_CONNECT << 16;
+-		if (done)
+-			done(cmd);
+-		return 0;
+-	}
+-	max_pun = subsystem_maxid(host_index);
++	max_pun = subsystem_maxid(shpnt);
+ 	if (ibm_ansi_order) {
+ 		target = max_pun - 1 - cmd->device->id;
+-		if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index)))
++		if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt)))
+ 			target--;
+-		else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index)))
++		else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt)))
+ 			target++;
+ 	} else
+ 		target = cmd->device->id;
+ 
+ 	/* if (target,lun) is NO LUN or not existing at all, return error */
+-	if ((get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(host_index)[target][cmd->device->lun] == TYPE_NO_DEVICE)) {
++	if ((get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_LUN) || (get_scsi(shpnt)[target][cmd->device->lun] == TYPE_NO_DEVICE)) {
+ 		cmd->result = DID_NO_CONNECT << 16;
+ 		if (done)
+ 			done(cmd);
+@@ -1839,16 +1731,16 @@
+ 	}
+ 
+ 	/*if (target,lun) unassigned, do further checks... */
+-	ldn = get_ldn(host_index)[target][cmd->device->lun];
++	ldn = get_ldn(shpnt)[target][cmd->device->lun];
+ 	if (ldn >= MAX_LOG_DEV) {	/* on invalid ldn do special stuff */
+ 		if (ldn > MAX_LOG_DEV) {	/* dynamical remapping if ldn unassigned */
+-			current_ldn = next_ldn(host_index);	/* stop-value for one circle */
+-			while (ld(host_index)[next_ldn(host_index)].cmd) {	/* search for a occupied, but not in */
++			current_ldn = next_ldn(shpnt);	/* stop-value for one circle */
++			while (ld(shpnt)[next_ldn(shpnt)].cmd) {	/* search for a occupied, but not in */
+ 				/* command-processing ldn. */
+-				next_ldn(host_index)++;
+-				if (next_ldn(host_index) >= MAX_LOG_DEV)
+-					next_ldn(host_index) = 7;
+-				if (current_ldn == next_ldn(host_index)) {	/* One circle done ? */
++				next_ldn(shpnt)++;
++				if (next_ldn(shpnt) >= MAX_LOG_DEV)
++					next_ldn(shpnt) = 7;
++				if (current_ldn == next_ldn(shpnt)) {	/* One circle done ? */
+ 					/* no non-processing ldn found */
+ 					scmd_printk(KERN_WARNING, cmd,
+ 	"IBM MCA SCSI: Cannot assign SCSI-device dynamically!\n"
+@@ -1864,56 +1756,56 @@
+ 			/* unmap non-processing ldn */
+ 			for (id = 0; id < max_pun; id++)
+ 				for (lun = 0; lun < 8; lun++) {
+-					if (get_ldn(host_index)[id][lun] == next_ldn(host_index)) {
+-						get_ldn(host_index)[id][lun] = TYPE_NO_DEVICE;
+-						get_scsi(host_index)[id][lun] = TYPE_NO_DEVICE;
++					if (get_ldn(shpnt)[id][lun] == next_ldn(shpnt)) {
++						get_ldn(shpnt)[id][lun] = TYPE_NO_DEVICE;
++						get_scsi(shpnt)[id][lun] = TYPE_NO_DEVICE;
+ 						/* unmap entry */
+ 					}
+ 				}
+ 			/* set reduced interrupt_handler-mode for checking */
+-			local_checking_phase_flag(host_index) = 1;
++			local_checking_phase_flag(shpnt) = 1;
+ 			/* map found ldn to pun,lun */
+-			get_ldn(host_index)[target][cmd->device->lun] = next_ldn(host_index);
++			get_ldn(shpnt)[target][cmd->device->lun] = next_ldn(shpnt);
+ 			/* change ldn to the right value, that is now next_ldn */
+-			ldn = next_ldn(host_index);
++			ldn = next_ldn(shpnt);
+ 			/* unassign all ldns (pun,lun,ldn does not matter for remove) */
+-			immediate_assign(host_index, 0, 0, 0, REMOVE_LDN);
++			immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN);
+ 			/* set only LDN for remapped device */
+-			immediate_assign(host_index, target, cmd->device->lun, ldn, SET_LDN);
++			immediate_assign(shpnt, target, cmd->device->lun, ldn, SET_LDN);
+ 			/* get device information for ld[ldn] */
+-			if (device_exists(host_index, ldn, &ld(host_index)[ldn].block_length, &ld(host_index)[ldn].device_type)) {
+-				ld(host_index)[ldn].cmd = NULL;	/* To prevent panic set 0, because
++			if (device_exists(shpnt, ldn, &ld(shpnt)[ldn].block_length, &ld(shpnt)[ldn].device_type)) {
++				ld(shpnt)[ldn].cmd = NULL;	/* To prevent panic set 0, because
+ 								   devices that were not assigned,
+ 								   should have nothing in progress. */
+-				get_scsi(host_index)[target][cmd->device->lun] = ld(host_index)[ldn].device_type;
++				get_scsi(shpnt)[target][cmd->device->lun] = ld(shpnt)[ldn].device_type;
+ 				/* increase assignment counters for statistics in /proc */
+-				IBM_DS(host_index).dynamical_assignments++;
+-				IBM_DS(host_index).ldn_assignments[ldn]++;
++				IBM_DS(shpnt).dynamical_assignments++;
++				IBM_DS(shpnt).ldn_assignments[ldn]++;
+ 			} else
+ 				/* panic here, because a device, found at boottime has
+ 				   vanished */
+ 				panic("IBM MCA SCSI: ldn=0x%x, SCSI-device on (%d,%d) vanished!\n", ldn, target, cmd->device->lun);
+ 			/* unassign again all ldns (pun,lun,ldn does not matter for remove) */
+-			immediate_assign(host_index, 0, 0, 0, REMOVE_LDN);
++			immediate_assign(shpnt, 0, 0, 0, REMOVE_LDN);
+ 			/* remap all ldns, as written in the pun/lun table */
+ 			lun = 0;
+ #ifdef CONFIG_SCSI_MULTI_LUN
+ 			for (lun = 0; lun < 8; lun++)
+ #endif
+ 				for (id = 0; id < max_pun; id++) {
+-					if (get_ldn(host_index)[id][lun] <= MAX_LOG_DEV)
+-						immediate_assign(host_index, id, lun, get_ldn(host_index)[id][lun], SET_LDN);
++					if (get_ldn(shpnt)[id][lun] <= MAX_LOG_DEV)
++						immediate_assign(shpnt, id, lun, get_ldn(shpnt)[id][lun], SET_LDN);
+ 				}
+ 			/* set back to normal interrupt_handling */
+-			local_checking_phase_flag(host_index) = 0;
++			local_checking_phase_flag(shpnt) = 0;
+ #ifdef IM_DEBUG_PROBE
+ 			/* Information on syslog terminal */
+ 			printk("IBM MCA SCSI: ldn=0x%x dynamically reassigned to (%d,%d).\n", ldn, target, cmd->device->lun);
+ #endif
+ 			/* increase next_ldn for next dynamical assignment */
+-			next_ldn(host_index)++;
+-			if (next_ldn(host_index) >= MAX_LOG_DEV)
+-				next_ldn(host_index) = 7;
++			next_ldn(shpnt)++;
++			if (next_ldn(shpnt) >= MAX_LOG_DEV)
++				next_ldn(shpnt) = 7;
+ 		} else {	/* wall against Linux accesses to the subsystem adapter */
+ 			cmd->result = DID_BAD_TARGET << 16;
+ 			if (done)
+@@ -1923,34 +1815,32 @@
+ 	}
+ 
+ 	/*verify there is no command already in progress for this log dev */
+-	if (ld(host_index)[ldn].cmd)
++	if (ld(shpnt)[ldn].cmd)
+ 		panic("IBM MCA SCSI: cmd already in progress for this ldn.\n");
+ 
+ 	/*save done in cmd, and save cmd for the interrupt handler */
+ 	cmd->scsi_done = done;
+-	ld(host_index)[ldn].cmd = cmd;
++	ld(shpnt)[ldn].cmd = cmd;
+ 
+ 	/*fill scb information independent of the scsi command */
+-	scb = &(ld(host_index)[ldn].scb);
+-	ld(host_index)[ldn].tsb.dev_status = 0;
++	scb = &(ld(shpnt)[ldn].scb);
++	ld(shpnt)[ldn].tsb.dev_status = 0;
+ 	scb->enable = IM_REPORT_TSB_ONLY_ON_ERROR | IM_RETRY_ENABLE;
+-	scb->tsb_adr = isa_virt_to_bus(&(ld(host_index)[ldn].tsb));
++	scb->tsb_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].tsb));
+ 	scsi_cmd = cmd->cmnd[0];
+ 
+-	if (cmd->use_sg) {
+-		i = cmd->use_sg;
+-		sl = (struct scatterlist *) (cmd->request_buffer);
+-		if (i > 16)
+-			panic("IBM MCA SCSI: scatter-gather list too long.\n");
+-		while (--i >= 0) {
+-			ld(host_index)[ldn].sge[i].address = (void *) (isa_page_to_bus(sl[i].page) + sl[i].offset);
+-			ld(host_index)[ldn].sge[i].byte_length = sl[i].length;
++	if (scsi_sg_count(cmd)) {
++		BUG_ON(scsi_sg_count(cmd) > 16);
++
++		scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
++			ld(shpnt)[ldn].sge[i].address = (void *) (isa_page_to_bus(sg->page) + sg->offset);
++			ld(shpnt)[ldn].sge[i].byte_length = sg->length;
+ 		}
+ 		scb->enable |= IM_POINTER_TO_LIST;
+-		scb->sys_buf_adr = isa_virt_to_bus(&(ld(host_index)[ldn].sge[0]));
+-		scb->sys_buf_length = cmd->use_sg * sizeof(struct im_sge);
++		scb->sys_buf_adr = isa_virt_to_bus(&(ld(shpnt)[ldn].sge[0]));
++		scb->sys_buf_length = scsi_sg_count(cmd) * sizeof(struct im_sge);
+ 	} else {
+-		scb->sys_buf_adr = isa_virt_to_bus(cmd->request_buffer);
++ 		scb->sys_buf_adr = isa_virt_to_bus(scsi_sglist(cmd));
+ 		/* recent Linux midlevel SCSI places 1024 byte for inquiry
+ 		 * command. Far too much for old PS/2 hardware. */
+ 		switch (scsi_cmd) {
+@@ -1961,16 +1851,16 @@
+ 		case REQUEST_SENSE:
+ 		case MODE_SENSE:
+ 		case MODE_SELECT:
+-			if (cmd->request_bufflen > 255)
++			if (scsi_bufflen(cmd) > 255)
+ 				scb->sys_buf_length = 255;
+ 			else
+-				scb->sys_buf_length = cmd->request_bufflen;
++				scb->sys_buf_length = scsi_bufflen(cmd);
+ 			break;
+ 		case TEST_UNIT_READY:
+ 			scb->sys_buf_length = 0;
+ 			break;
+ 		default:
+-			scb->sys_buf_length = cmd->request_bufflen;
++			scb->sys_buf_length = scsi_bufflen(cmd);
+ 			break;
+ 		}
+ 	}
+@@ -1982,16 +1872,16 @@
+ 
+ 	/* for specific device-type debugging: */
+ #ifdef IM_DEBUG_CMD_SPEC_DEV
+-	if (ld(host_index)[ldn].device_type == IM_DEBUG_CMD_DEVICE)
+-		printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(host_index)[ldn].device_type, scsi_cmd, ldn);
++	if (ld(shpnt)[ldn].device_type == IM_DEBUG_CMD_DEVICE)
++		printk("(SCSI-device-type=0x%x) issue scsi cmd=%02x to ldn=%d\n", ld(shpnt)[ldn].device_type, scsi_cmd, ldn);
+ #endif
+ 
+ 	/* for possible panics store current command */
+-	last_scsi_command(host_index)[ldn] = scsi_cmd;
+-	last_scsi_type(host_index)[ldn] = IM_SCB;
++	last_scsi_command(shpnt)[ldn] = scsi_cmd;
++	last_scsi_type(shpnt)[ldn] = IM_SCB;
+ 	/* update statistical info */
+-	IBM_DS(host_index).total_accesses++;
+-	IBM_DS(host_index).ldn_access[ldn]++;
++	IBM_DS(shpnt).total_accesses++;
++	IBM_DS(shpnt).ldn_access[ldn]++;
+ 
+ 	switch (scsi_cmd) {
+ 	case READ_6:
+@@ -2003,17 +1893,17 @@
+ 		/* Distinguish between disk and other devices. Only disks (that are the
+ 		   most frequently accessed devices) should be supported by the
+ 		   IBM-SCSI-Subsystem commands. */
+-		switch (ld(host_index)[ldn].device_type) {
++		switch (ld(shpnt)[ldn].device_type) {
+ 		case TYPE_DISK:	/* for harddisks enter here ... */
+ 		case TYPE_MOD:	/* ... try it also for MO-drives (send flames as */
+ 			/*     you like, if this won't work.) */
+ 			if (scsi_cmd == READ_6 || scsi_cmd == READ_10 || scsi_cmd == READ_12) {
+ 				/* read command preparations */
+ 				scb->enable |= IM_READ_CONTROL;
+-				IBM_DS(host_index).ldn_read_access[ldn]++;	/* increase READ-access on ldn stat. */
++				IBM_DS(shpnt).ldn_read_access[ldn]++;	/* increase READ-access on ldn stat. */
+ 				scb->command = IM_READ_DATA_CMD | IM_NO_DISCONNECT;
+ 			} else {	/* write command preparations */
+-				IBM_DS(host_index).ldn_write_access[ldn]++;	/* increase write-count on ldn stat. */
++				IBM_DS(shpnt).ldn_write_access[ldn]++;	/* increase write-count on ldn stat. */
+ 				scb->command = IM_WRITE_DATA_CMD | IM_NO_DISCONNECT;
+ 			}
+ 			if (scsi_cmd == READ_6 || scsi_cmd == WRITE_6) {
+@@ -2023,9 +1913,9 @@
+ 				scb->u1.log_blk_adr = (((unsigned) cmd->cmnd[5]) << 0) | (((unsigned) cmd->cmnd[4]) << 8) | (((unsigned) cmd->cmnd[3]) << 16) | (((unsigned) cmd->cmnd[2]) << 24);
+ 				scb->u2.blk.count = (((unsigned) cmd->cmnd[8]) << 0) | (((unsigned) cmd->cmnd[7]) << 8);
+ 			}
+-			last_scsi_logical_block(host_index)[ldn] = scb->u1.log_blk_adr;
+-			last_scsi_blockcount(host_index)[ldn] = scb->u2.blk.count;
+-			scb->u2.blk.length = ld(host_index)[ldn].block_length;
++			last_scsi_logical_block(shpnt)[ldn] = scb->u1.log_blk_adr;
++			last_scsi_blockcount(shpnt)[ldn] = scb->u2.blk.count;
++			scb->u2.blk.length = ld(shpnt)[ldn].block_length;
+ 			break;
+ 			/* for other devices, enter here. Other types are not known by
+ 			   Linux! TYPE_NO_LUN is forbidden as valid device. */
+@@ -2046,14 +1936,14 @@
+ 			scb->enable |= IM_BYPASS_BUFFER;
+ 			scb->u1.scsi_cmd_length = cmd->cmd_len;
+ 			memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+-			last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++			last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ 			/* Read/write on this non-disk devices is also displayworthy,
+ 			   so flash-up the LED/display. */
+ 			break;
+ 		}
+ 		break;
+ 	case INQUIRY:
+-		IBM_DS(host_index).ldn_inquiry_access[ldn]++;
++		IBM_DS(shpnt).ldn_inquiry_access[ldn]++;
+ 		scb->command = IM_DEVICE_INQUIRY_CMD;
+ 		scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER;
+ 		scb->u1.log_blk_adr = 0;
+@@ -2064,7 +1954,7 @@
+ 		scb->u1.log_blk_adr = 0;
+ 		scb->u1.scsi_cmd_length = 6;
+ 		memcpy(scb->u2.scsi_command, cmd->cmnd, 6);
+-		last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++		last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ 		break;
+ 	case READ_CAPACITY:
+ 		/* the length of system memory buffer must be exactly 8 bytes */
+@@ -2081,12 +1971,12 @@
+ 		/* Commands that need write-only-mode (system -> device): */
+ 	case MODE_SELECT:
+ 	case MODE_SELECT_10:
+-		IBM_DS(host_index).ldn_modeselect_access[ldn]++;
++		IBM_DS(shpnt).ldn_modeselect_access[ldn]++;
+ 		scb->command = IM_OTHER_SCSI_CMD_CMD;
+ 		scb->enable |= IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER;	/*Select needs WRITE-enabled */
+ 		scb->u1.scsi_cmd_length = cmd->cmd_len;
+ 		memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+-		last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++		last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ 		break;
+ 		/* For other commands, read-only is useful. Most other commands are
+ 		   running without an input-data-block. */
+@@ -2095,19 +1985,19 @@
+ 		scb->enable |= IM_READ_CONTROL | IM_SUPRESS_EXCEPTION_SHORT | IM_BYPASS_BUFFER;
+ 		scb->u1.scsi_cmd_length = cmd->cmd_len;
+ 		memcpy(scb->u2.scsi_command, cmd->cmnd, cmd->cmd_len);
+-		last_scsi_type(host_index)[ldn] = IM_LONG_SCB;
++		last_scsi_type(shpnt)[ldn] = IM_LONG_SCB;
+ 		break;
+ 	}
+ 	/*issue scb command, and return */
+ 	if (++disk_rw_in_progress == 1)
+ 		PS2_DISK_LED_ON(shpnt->host_no, target);
+ 
+-	if (last_scsi_type(host_index)[ldn] == IM_LONG_SCB) {
+-		issue_cmd(host_index, isa_virt_to_bus(scb), IM_LONG_SCB | ldn);
+-		IBM_DS(host_index).long_scbs++;
++	if (last_scsi_type(shpnt)[ldn] == IM_LONG_SCB) {
++		issue_cmd(shpnt, isa_virt_to_bus(scb), IM_LONG_SCB | ldn);
++		IBM_DS(shpnt).long_scbs++;
+ 	} else {
+-		issue_cmd(host_index, isa_virt_to_bus(scb), IM_SCB | ldn);
+-		IBM_DS(host_index).scbs++;
++		issue_cmd(shpnt, isa_virt_to_bus(scb), IM_SCB | ldn);
++		IBM_DS(shpnt).scbs++;
+ 	}
+ 	return 0;
+ }
+@@ -2122,7 +2012,6 @@
+ 	unsigned int ldn;
+ 	void (*saved_done) (Scsi_Cmnd *);
+ 	int target;
+-	int host_index;
+ 	int max_pun;
+ 	unsigned long imm_command;
+ 
+@@ -2131,35 +2020,23 @@
+ #endif
+ 
+ 	shpnt = cmd->device->host;
+-	/* search for the right hostadapter */
+-	for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+ 
+-	if (!hosts[host_index]) {	/* invalid hostadapter descriptor address */
+-		cmd->result = DID_NO_CONNECT << 16;
+-		if (cmd->scsi_done)
+-			(cmd->scsi_done) (cmd);
+-		shpnt = cmd->device->host;
+-#ifdef IM_DEBUG_PROBE
+-		printk(KERN_DEBUG "IBM MCA SCSI: Abort adapter selection failed!\n");
+-#endif
+-		return SUCCESS;
+-	}
+-	max_pun = subsystem_maxid(host_index);
++	max_pun = subsystem_maxid(shpnt);
+ 	if (ibm_ansi_order) {
+ 		target = max_pun - 1 - cmd->device->id;
+-		if ((target <= subsystem_pun(host_index)) && (cmd->device->id <= subsystem_pun(host_index)))
++		if ((target <= subsystem_pun(shpnt)) && (cmd->device->id <= subsystem_pun(shpnt)))
+ 			target--;
+-		else if ((target >= subsystem_pun(host_index)) && (cmd->device->id >= subsystem_pun(host_index)))
++		else if ((target >= subsystem_pun(shpnt)) && (cmd->device->id >= subsystem_pun(shpnt)))
+ 			target++;
+ 	} else
+ 		target = cmd->device->id;
+ 
+ 	/* get logical device number, and disable system interrupts */
+ 	printk(KERN_WARNING "IBM MCA SCSI: Sending abort to device pun=%d, lun=%d.\n", target, cmd->device->lun);
+-	ldn = get_ldn(host_index)[target][cmd->device->lun];
++	ldn = get_ldn(shpnt)[target][cmd->device->lun];
+ 
+ 	/*if cmd for this ldn has already finished, no need to abort */
+-	if (!ld(host_index)[ldn].cmd) {
++	if (!ld(shpnt)[ldn].cmd) {
+ 		    return SUCCESS;
+ 	}
+ 
+@@ -2170,20 +2047,20 @@
+ 	saved_done = cmd->scsi_done;
+ 	cmd->scsi_done = internal_done;
+ 	cmd->SCp.Status = 0;
+-	last_scsi_command(host_index)[ldn] = IM_ABORT_IMM_CMD;
+-	last_scsi_type(host_index)[ldn] = IM_IMM_CMD;
+-	imm_command = inl(IM_CMD_REG(host_index));
++	last_scsi_command(shpnt)[ldn] = IM_ABORT_IMM_CMD;
++	last_scsi_type(shpnt)[ldn] = IM_IMM_CMD;
++	imm_command = inl(IM_CMD_REG(shpnt));
+ 	imm_command &= (unsigned long) (0xffff0000);	/* mask reserved stuff */
+ 	imm_command |= (unsigned long) (IM_ABORT_IMM_CMD);
+ 	/* must wait for attention reg not busy */
+ 	/* FIXME - timeout, politeness */
+ 	while (1) {
+-		if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++		if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ 			break;
+ 	}
+ 	/* write registers and enable system interrupts */
+-	outl(imm_command, IM_CMD_REG(host_index));
+-	outb(IM_IMM_CMD | ldn, IM_ATTN_REG(host_index));
++	outl(imm_command, IM_CMD_REG(shpnt));
++	outb(IM_IMM_CMD | ldn, IM_ATTN_REG(shpnt));
+ #ifdef IM_DEBUG_PROBE
+ 	printk("IBM MCA SCSI: Abort queued to adapter...\n");
+ #endif
+@@ -2202,7 +2079,7 @@
+ 		cmd->result |= DID_ABORT << 16;
+ 		if (cmd->scsi_done)
+ 			(cmd->scsi_done) (cmd);
+-		ld(host_index)[ldn].cmd = NULL;
++		ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_PROBE
+ 		printk("IBM MCA SCSI: Abort finished with success.\n");
+ #endif
+@@ -2211,7 +2088,7 @@
+ 		cmd->result |= DID_NO_CONNECT << 16;
+ 		if (cmd->scsi_done)
+ 			(cmd->scsi_done) (cmd);
+-		ld(host_index)[ldn].cmd = NULL;
++		ld(shpnt)[ldn].cmd = NULL;
+ #ifdef IM_DEBUG_PROBE
+ 		printk("IBM MCA SCSI: Abort failed.\n");
+ #endif
+@@ -2236,71 +2113,65 @@
+ 	struct Scsi_Host *shpnt;
+ 	Scsi_Cmnd *cmd_aid;
+ 	int ticks, i;
+-	int host_index;
+ 	unsigned long imm_command;
+ 
+ 	BUG_ON(cmd == NULL);
+ 
+ 	ticks = IM_RESET_DELAY * HZ;
+ 	shpnt = cmd->device->host;
+-	/* search for the right hostadapter */
+-	for (host_index = 0; hosts[host_index] && hosts[host_index]->host_no != shpnt->host_no; host_index++);
+ 
+-	if (!hosts[host_index])	/* invalid hostadapter descriptor address */
+-		return FAILED;
+-
+-	if (local_checking_phase_flag(host_index)) {
++	if (local_checking_phase_flag(shpnt)) {
+ 		printk(KERN_WARNING "IBM MCA SCSI: unable to reset while checking devices.\n");
+ 		return FAILED;
+ 	}
+ 
+ 	/* issue reset immediate command to subsystem, and wait for interrupt */
+ 	printk("IBM MCA SCSI: resetting all devices.\n");
+-	reset_status(host_index) = IM_RESET_IN_PROGRESS;
+-	last_scsi_command(host_index)[0xf] = IM_RESET_IMM_CMD;
+-	last_scsi_type(host_index)[0xf] = IM_IMM_CMD;
+-	imm_command = inl(IM_CMD_REG(host_index));
++	reset_status(shpnt) = IM_RESET_IN_PROGRESS;
++	last_scsi_command(shpnt)[0xf] = IM_RESET_IMM_CMD;
++	last_scsi_type(shpnt)[0xf] = IM_IMM_CMD;
++	imm_command = inl(IM_CMD_REG(shpnt));
+ 	imm_command &= (unsigned long) (0xffff0000);	/* mask reserved stuff */
+ 	imm_command |= (unsigned long) (IM_RESET_IMM_CMD);
+ 	/* must wait for attention reg not busy */
+ 	while (1) {
+-		if (!(inb(IM_STAT_REG(host_index)) & IM_BUSY))
++		if (!(inb(IM_STAT_REG(shpnt)) & IM_BUSY))
+ 			break;
+ 		spin_unlock_irq(shpnt->host_lock);
+ 		yield();
+ 		spin_lock_irq(shpnt->host_lock);
+ 	}
+ 	/*write registers and enable system interrupts */
+-	outl(imm_command, IM_CMD_REG(host_index));
+-	outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(host_index));
++	outl(imm_command, IM_CMD_REG(shpnt));
++	outb(IM_IMM_CMD | 0xf, IM_ATTN_REG(shpnt));
+ 	/* wait for interrupt finished or intr_stat register to be set, as the
+ 	 * interrupt will not be executed, while we are in here! */
+ 	 
+ 	/* FIXME: This is really really icky we so want a sleeping version of this ! */
+-	while (reset_status(host_index) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(host_index)) & 0x8f) != 0x8f)) {
++	while (reset_status(shpnt) == IM_RESET_IN_PROGRESS && --ticks && ((inb(IM_INTR_REG(shpnt)) & 0x8f) != 0x8f)) {
+ 		udelay((1 + 999 / HZ) * 1000);
+ 		barrier();
+ 	}
+ 	/* if reset did not complete, just return an error */
+ 	if (!ticks) {
+ 		printk(KERN_ERR "IBM MCA SCSI: reset did not complete within %d seconds.\n", IM_RESET_DELAY);
+-		reset_status(host_index) = IM_RESET_FINISHED_FAIL;
++		reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ 		return FAILED;
+ 	}
+ 
+-	if ((inb(IM_INTR_REG(host_index)) & 0x8f) == 0x8f) {
++	if ((inb(IM_INTR_REG(shpnt)) & 0x8f) == 0x8f) {
+ 		/* analysis done by this routine and not by the intr-routine */
+-		if (inb(IM_INTR_REG(host_index)) == 0xaf)
+-			reset_status(host_index) = IM_RESET_FINISHED_OK_NO_INT;
+-		else if (inb(IM_INTR_REG(host_index)) == 0xcf)
+-			reset_status(host_index) = IM_RESET_FINISHED_FAIL;
++		if (inb(IM_INTR_REG(shpnt)) == 0xaf)
++			reset_status(shpnt) = IM_RESET_FINISHED_OK_NO_INT;
++		else if (inb(IM_INTR_REG(shpnt)) == 0xcf)
++			reset_status(shpnt) = IM_RESET_FINISHED_FAIL;
+ 		else		/* failed, 4get it */
+-			reset_status(host_index) = IM_RESET_NOT_IN_PROGRESS_NO_INT;
+-		outb(IM_EOI | 0xf, IM_ATTN_REG(host_index));
++			reset_status(shpnt) = IM_RESET_NOT_IN_PROGRESS_NO_INT;
++		outb(IM_EOI | 0xf, IM_ATTN_REG(shpnt));
+ 	}
+ 
+ 	/* if reset failed, just return an error */
+-	if (reset_status(host_index) == IM_RESET_FINISHED_FAIL) {
++	if (reset_status(shpnt) == IM_RESET_FINISHED_FAIL) {
+ 		printk(KERN_ERR "IBM MCA SCSI: reset failed.\n");
+ 		return FAILED;
+ 	}
+@@ -2308,9 +2179,9 @@
+ 	/* so reset finished ok - call outstanding done's, and return success */
+ 	printk(KERN_INFO "IBM MCA SCSI: Reset successfully completed.\n");
+ 	for (i = 0; i < MAX_LOG_DEV; i++) {
+-		cmd_aid = ld(host_index)[i].cmd;
++		cmd_aid = ld(shpnt)[i].cmd;
+ 		if (cmd_aid && cmd_aid->scsi_done) {
+-			ld(host_index)[i].cmd = NULL;
++			ld(shpnt)[i].cmd = NULL;
+ 			cmd_aid->result = DID_RESET << 16;
+ 		}
+ 	}
+@@ -2351,46 +2222,46 @@
+ }
+ 
+ /* calculate percentage of total accesses on a ldn */
+-static int ldn_access_load(int host_index, int ldn)
++static int ldn_access_load(struct Scsi_Host *shpnt, int ldn)
+ {
+-	if (IBM_DS(host_index).total_accesses == 0)
++	if (IBM_DS(shpnt).total_accesses == 0)
+ 		return (0);
+-	if (IBM_DS(host_index).ldn_access[ldn] == 0)
++	if (IBM_DS(shpnt).ldn_access[ldn] == 0)
+ 		return (0);
+-	return (IBM_DS(host_index).ldn_access[ldn] * 100) / IBM_DS(host_index).total_accesses;
++	return (IBM_DS(shpnt).ldn_access[ldn] * 100) / IBM_DS(shpnt).total_accesses;
+ }
+ 
+ /* calculate total amount of r/w-accesses */
+-static int ldn_access_total_read_write(int host_index)
++static int ldn_access_total_read_write(struct Scsi_Host *shpnt)
+ {
+ 	int a;
+ 	int i;
+ 
+ 	a = 0;
+ 	for (i = 0; i <= MAX_LOG_DEV; i++)
+-		a += IBM_DS(host_index).ldn_read_access[i] + IBM_DS(host_index).ldn_write_access[i];
++		a += IBM_DS(shpnt).ldn_read_access[i] + IBM_DS(shpnt).ldn_write_access[i];
+ 	return (a);
+ }
+ 
+-static int ldn_access_total_inquiry(int host_index)
++static int ldn_access_total_inquiry(struct Scsi_Host *shpnt)
+ {
+ 	int a;
+ 	int i;
+ 
+ 	a = 0;
+ 	for (i = 0; i <= MAX_LOG_DEV; i++)
+-		a += IBM_DS(host_index).ldn_inquiry_access[i];
++		a += IBM_DS(shpnt).ldn_inquiry_access[i];
+ 	return (a);
+ }
+ 
+-static int ldn_access_total_modeselect(int host_index)
++static int ldn_access_total_modeselect(struct Scsi_Host *shpnt)
+ {
+ 	int a;
+ 	int i;
+ 
+ 	a = 0;
+ 	for (i = 0; i <= MAX_LOG_DEV; i++)
+-		a += IBM_DS(host_index).ldn_modeselect_access[i];
++		a += IBM_DS(shpnt).ldn_modeselect_access[i];
+ 	return (a);
+ }
+ 
+@@ -2398,19 +2269,14 @@
+ static int ibmmca_proc_info(struct Scsi_Host *shpnt, char *buffer, char **start, off_t offset, int length, int inout)
+ {
+ 	int len = 0;
+-	int i, id, lun, host_index;
++	int i, id, lun;
+ 	unsigned long flags;
+ 	int max_pun;
+ 
+-	for (i = 0; hosts[i] && hosts[i] != shpnt; i++);
+ 	
+-	spin_lock_irqsave(hosts[i]->host_lock, flags);	/* Check it */
+-	host_index = i;
+-	if (!shpnt) {
+-		len += sprintf(buffer + len, "\nIBM MCA SCSI: Can't find adapter");
+-		return len;
+-	}
+-	max_pun = subsystem_maxid(host_index);
++	spin_lock_irqsave(shpnt->host_lock, flags);	/* Check it */
++
++	max_pun = subsystem_maxid(shpnt);
+ 
+ 	len += sprintf(buffer + len, "\n             IBM-SCSI-Subsystem-Linux-Driver, Version %s\n\n\n", IBMMCA_SCSI_DRIVER_VERSION);
+ 	len += sprintf(buffer + len, " SCSI Access-Statistics:\n");
+@@ -2421,40 +2287,40 @@
+ 	len += sprintf(buffer + len, "               Multiple LUN probing.....: No\n");
+ #endif
+ 	len += sprintf(buffer + len, "               This Hostnumber..........: %d\n", shpnt->host_no);
+-	len += sprintf(buffer + len, "               Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(host_index)));
++	len += sprintf(buffer + len, "               Base I/O-Port............: 0x%x\n", (unsigned int) (IM_CMD_REG(shpnt)));
+ 	len += sprintf(buffer + len, "               (Shared) IRQ.............: %d\n", IM_IRQ);
+-	len += sprintf(buffer + len, "               Total Interrupts.........: %d\n", IBM_DS(host_index).total_interrupts);
+-	len += sprintf(buffer + len, "               Total SCSI Accesses......: %d\n", IBM_DS(host_index).total_accesses);
+-	len += sprintf(buffer + len, "               Total short SCBs.........: %d\n", IBM_DS(host_index).scbs);
+-	len += sprintf(buffer + len, "               Total long SCBs..........: %d\n", IBM_DS(host_index).long_scbs);
+-	len += sprintf(buffer + len, "                 Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(host_index));
+-	len += sprintf(buffer + len, "                 Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(host_index));
+-	len += sprintf(buffer + len, "                 Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(host_index));
+-	len += sprintf(buffer + len, "                 Total SCSI other cmds..: %d\n", IBM_DS(host_index).total_accesses - ldn_access_total_read_write(host_index)
+-		       - ldn_access_total_modeselect(host_index)
+-		       - ldn_access_total_inquiry(host_index));
+-	len += sprintf(buffer + len, "               Total SCSI command fails.: %d\n\n", IBM_DS(host_index).total_errors);
++	len += sprintf(buffer + len, "               Total Interrupts.........: %d\n", IBM_DS(shpnt).total_interrupts);
++	len += sprintf(buffer + len, "               Total SCSI Accesses......: %d\n", IBM_DS(shpnt).total_accesses);
++	len += sprintf(buffer + len, "               Total short SCBs.........: %d\n", IBM_DS(shpnt).scbs);
++	len += sprintf(buffer + len, "               Total long SCBs..........: %d\n", IBM_DS(shpnt).long_scbs);
++	len += sprintf(buffer + len, "                 Total SCSI READ/WRITE..: %d\n", ldn_access_total_read_write(shpnt));
++	len += sprintf(buffer + len, "                 Total SCSI Inquiries...: %d\n", ldn_access_total_inquiry(shpnt));
++	len += sprintf(buffer + len, "                 Total SCSI Modeselects.: %d\n", ldn_access_total_modeselect(shpnt));
++	len += sprintf(buffer + len, "                 Total SCSI other cmds..: %d\n", IBM_DS(shpnt).total_accesses - ldn_access_total_read_write(shpnt)
++		       - ldn_access_total_modeselect(shpnt)
++		       - ldn_access_total_inquiry(shpnt));
++	len += sprintf(buffer + len, "               Total SCSI command fails.: %d\n\n", IBM_DS(shpnt).total_errors);
+ 	len += sprintf(buffer + len, " Logical-Device-Number (LDN) Access-Statistics:\n");
+ 	len += sprintf(buffer + len, "         LDN | Accesses [%%] |   READ    |   WRITE   | ASSIGNMENTS\n");
+ 	len += sprintf(buffer + len, "        -----|--------------|-----------|-----------|--------------\n");
+ 	for (i = 0; i <= MAX_LOG_DEV; i++)
+-		len += sprintf(buffer + len, "         %2X  |    %3d       |  %8d |  %8d | %8d\n", i, ldn_access_load(host_index, i), IBM_DS(host_index).ldn_read_access[i], IBM_DS(host_index).ldn_write_access[i], IBM_DS(host_index).ldn_assignments[i]);
++		len += sprintf(buffer + len, "         %2X  |    %3d       |  %8d |  %8d | %8d\n", i, ldn_access_load(shpnt, i), IBM_DS(shpnt).ldn_read_access[i], IBM_DS(shpnt).ldn_write_access[i], IBM_DS(shpnt).ldn_assignments[i]);
+ 	len += sprintf(buffer + len, "        -----------------------------------------------------------\n\n");
+ 	len += sprintf(buffer + len, " Dynamical-LDN-Assignment-Statistics:\n");
+-	len += sprintf(buffer + len, "               Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(host_index).total_scsi_devices);
+-	len += sprintf(buffer + len, "               Dynamical Assignment necessary...: %s\n", IBM_DS(host_index).dyn_flag ? "Yes" : "No ");
+-	len += sprintf(buffer + len, "               Next LDN to be assigned..........: 0x%x\n", next_ldn(host_index));
+-	len += sprintf(buffer + len, "               Dynamical assignments done yet...: %d\n", IBM_DS(host_index).dynamical_assignments);
++	len += sprintf(buffer + len, "               Number of physical SCSI-devices..: %d (+ Adapter)\n", IBM_DS(shpnt).total_scsi_devices);
++	len += sprintf(buffer + len, "               Dynamical Assignment necessary...: %s\n", IBM_DS(shpnt).dyn_flag ? "Yes" : "No ");
++	len += sprintf(buffer + len, "               Next LDN to be assigned..........: 0x%x\n", next_ldn(shpnt));
++	len += sprintf(buffer + len, "               Dynamical assignments done yet...: %d\n", IBM_DS(shpnt).dynamical_assignments);
+ 	len += sprintf(buffer + len, "\n Current SCSI-Device-Mapping:\n");
+ 	len += sprintf(buffer + len, "        Physical SCSI-Device Map               Logical SCSI-Device Map\n");
+ 	len += sprintf(buffer + len, "    ID\\LUN  0  1  2  3  4  5  6  7       ID\\LUN  0  1  2  3  4  5  6  7\n");
+ 	for (id = 0; id < max_pun; id++) {
+ 		len += sprintf(buffer + len, "    %2d     ", id);
+ 		for (lun = 0; lun < 8; lun++)
+-			len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(host_index)[id][lun]));
++			len += sprintf(buffer + len, "%2s ", ti_p(get_scsi(shpnt)[id][lun]));
+ 		len += sprintf(buffer + len, "      %2d     ", id);
+ 		for (lun = 0; lun < 8; lun++)
+-			len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(host_index)[id][lun]));
++			len += sprintf(buffer + len, "%2s ", ti_l(get_ldn(shpnt)[id][lun]));
+ 		len += sprintf(buffer + len, "\n");
+ 	}
+ 
+@@ -2488,20 +2354,31 @@
+ 
+ __setup("ibmmcascsi=", option_setup);
+ 
+-static struct scsi_host_template driver_template = {
+-          .proc_name      = "ibmmca",
+-	  .proc_info	  = ibmmca_proc_info,
+-          .name           = "IBM SCSI-Subsystem",
+-          .detect         = ibmmca_detect,
+-          .release        = ibmmca_release,
+-          .queuecommand   = ibmmca_queuecommand,
+-	  .eh_abort_handler = ibmmca_abort,
+-	  .eh_host_reset_handler = ibmmca_host_reset,
+-          .bios_param     = ibmmca_biosparam,
+-          .can_queue      = 16,
+-          .this_id        = 7,
+-          .sg_tablesize   = 16,
+-          .cmd_per_lun    = 1,
+-          .use_clustering = ENABLE_CLUSTERING,
++static struct mca_driver ibmmca_driver = {
++	.id_table = ibmmca_id_table,
++	.driver = {
++		.name	= "ibmmca",
++		.bus	= &mca_bus_type,
++		.probe	= ibmmca_probe,
++		.remove	= __devexit_p(ibmmca_remove),
++	},
+ };
+-#include "scsi_module.c"
++
++static int __init ibmmca_init(void)
++{
++#ifdef MODULE
++	/* If the driver is run as module, read from conf.modules or cmd-line */
++	if (boot_options)
++		option_setup(boot_options);
++#endif
++
++	return mca_register_driver_integrated(&ibmmca_driver, MCA_INTEGSCSI);
++}
++
++static void __exit ibmmca_exit(void)
++{
++	mca_unregister_driver(&ibmmca_driver);
++}
++
++module_init(ibmmca_init);
++module_exit(ibmmca_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmmca.h linux-2.6.22-try2/drivers/scsi/ibmmca.h
+--- linux-2.6.22-570/drivers/scsi/ibmmca.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ibmmca.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,21 +0,0 @@
+-/*
+- * Low Level Driver for the IBM Microchannel SCSI Subsystem
+- * (Headerfile, see Documentation/scsi/ibmmca.txt for description of the
+- * IBM MCA SCSI-driver.
+- * For use under the GNU General Public License within the Linux-kernel project.
+- * This include file works only correctly with kernel 2.4.0 or higher!!! */
+-
+-#ifndef _IBMMCA_H
+-#define _IBMMCA_H
+-
+-/* Common forward declarations for all Linux-versions: */
+-
+-/* Interfaces to the midlevel Linux SCSI driver */
+-static int ibmmca_detect (struct scsi_host_template *);
+-static int ibmmca_release (struct Scsi_Host *);
+-static int ibmmca_queuecommand (Scsi_Cmnd *, void (*done) (Scsi_Cmnd *));
+-static int ibmmca_abort (Scsi_Cmnd *);
+-static int ibmmca_host_reset (Scsi_Cmnd *);
+-static int ibmmca_biosparam (struct scsi_device *, struct block_device *, sector_t, int *);
+-
+-#endif /* _IBMMCA_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c linux-2.6.22-try2/drivers/scsi/ibmvscsi/ibmvscsi.c
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ibmvscsi/ibmvscsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -173,8 +173,7 @@
+ 		}
+ 	}
+ 	if (in_use)
+-		printk(KERN_WARNING
+-		       "ibmvscsi: releasing event pool with %d "
++		dev_warn(hostdata->dev, "releasing event pool with %d "
+ 		       "events still in use?\n", in_use);
+ 	kfree(pool->events);
+ 	dma_free_coherent(hostdata->dev,
+@@ -210,14 +209,12 @@
+ 				       struct srp_event_struct *evt)
+ {
+ 	if (!valid_event_struct(pool, evt)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: Freeing invalid event_struct %p "
++		dev_err(evt->hostdata->dev, "Freeing invalid event_struct %p "
+ 		       "(not in pool %p)\n", evt, pool->events);
+ 		return;
+ 	}
+ 	if (atomic_inc_return(&evt->free) != 1) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: Freeing event_struct %p "
++		dev_err(evt->hostdata->dev, "Freeing event_struct %p "
+ 		       "which is not in use!\n", evt);
+ 		return;
+ 	}
+@@ -408,13 +405,6 @@
+ 		return 1;
+ 	}
+ 
+-	if (sg_mapped > SG_ALL) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: More than %d mapped sg entries, got %d\n",
+-		       SG_ALL, sg_mapped);
+-		return 0;
+-	}
+-
+ 	indirect->table_desc.va = 0;
+ 	indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf);
+ 	indirect->table_desc.key = 0;
+@@ -433,10 +423,9 @@
+ 					   SG_ALL * sizeof(struct srp_direct_buf),
+ 					   &evt_struct->ext_list_token, 0);
+ 		if (!evt_struct->ext_list) {
+-			printk(KERN_ERR
+-			       "ibmvscsi: Can't allocate memory for indirect table\n");
++			sdev_printk(KERN_ERR, cmd->device,
++				    "Can't allocate memory for indirect table\n");
+ 			return 0;
+-			
+ 		}
+ 	}
+ 
+@@ -471,8 +460,8 @@
+ 			       cmd->request_bufflen,
+ 			       DMA_BIDIRECTIONAL);
+ 	if (dma_mapping_error(data->va)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: Unable to map request_buffer for command!\n");
++		sdev_printk(KERN_ERR, cmd->device,
++			    "Unable to map request_buffer for command!\n");
+ 		return 0;
+ 	}
+ 	data->len = cmd->request_bufflen;
+@@ -503,12 +492,12 @@
+ 	case DMA_NONE:
+ 		return 1;
+ 	case DMA_BIDIRECTIONAL:
+-		printk(KERN_ERR
+-		       "ibmvscsi: Can't map DMA_BIDIRECTIONAL to read/write\n");
++		sdev_printk(KERN_ERR, cmd->device,
++			    "Can't map DMA_BIDIRECTIONAL to read/write\n");
+ 		return 0;
+ 	default:
+-		printk(KERN_ERR
+-		       "ibmvscsi: Unknown data direction 0x%02x; can't map!\n",
++		sdev_printk(KERN_ERR, cmd->device,
++			    "Unknown data direction 0x%02x; can't map!\n",
+ 		       cmd->sc_data_direction);
+ 		return 0;
+ 	}
+@@ -520,6 +509,70 @@
+ 	return map_single_data(cmd, srp_cmd, dev);
+ }
+ 
++/**
++ * purge_requests: Our virtual adapter just shut down.  purge any sent requests
++ * @hostdata:    the adapter
++ */
++static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
++{
++	struct srp_event_struct *tmp_evt, *pos;
++	unsigned long flags;
++
++	spin_lock_irqsave(hostdata->host->host_lock, flags);
++	list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) {
++		list_del(&tmp_evt->list);
++		del_timer(&tmp_evt->timer);
++		if (tmp_evt->cmnd) {
++			tmp_evt->cmnd->result = (error_code << 16);
++			unmap_cmd_data(&tmp_evt->iu.srp.cmd,
++				       tmp_evt,
++				       tmp_evt->hostdata->dev);
++			if (tmp_evt->cmnd_done)
++				tmp_evt->cmnd_done(tmp_evt->cmnd);
++		} else if (tmp_evt->done)
++			tmp_evt->done(tmp_evt);
++		free_event_struct(&tmp_evt->hostdata->pool, tmp_evt);
++	}
++	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
++}
++
++/**
++ * ibmvscsi_reset_host - Reset the connection to the server
++ * @hostdata:	struct ibmvscsi_host_data to reset
++*/
++static void ibmvscsi_reset_host(struct ibmvscsi_host_data *hostdata)
++{
++	scsi_block_requests(hostdata->host);
++	atomic_set(&hostdata->request_limit, 0);
++
++	purge_requests(hostdata, DID_ERROR);
++	if ((ibmvscsi_reset_crq_queue(&hostdata->queue, hostdata)) ||
++	    (ibmvscsi_send_crq(hostdata, 0xC001000000000000LL, 0)) ||
++	    (vio_enable_interrupts(to_vio_dev(hostdata->dev)))) {
++		atomic_set(&hostdata->request_limit, -1);
++		dev_err(hostdata->dev, "error after reset\n");
++	}
++
++	scsi_unblock_requests(hostdata->host);
++}
++
++/**
++ * ibmvscsi_timeout - Internal command timeout handler
++ * @evt_struct:	struct srp_event_struct that timed out
++ *
++ * Called when an internally generated command times out
++*/
++static void ibmvscsi_timeout(struct srp_event_struct *evt_struct)
++{
++	struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
++
++	dev_err(hostdata->dev, "Command timed out (%x). Resetting connection\n",
++		evt_struct->iu.srp.cmd.opcode);
++
++	ibmvscsi_reset_host(hostdata);
++}
++
++
+ /* ------------------------------------------------------------
+  * Routines for sending and receiving SRPs
+  */
+@@ -527,12 +580,14 @@
+  * ibmvscsi_send_srp_event: - Transforms event to u64 array and calls send_crq()
+  * @evt_struct:	evt_struct to be sent
+  * @hostdata:	ibmvscsi_host_data of host
++ * @timeout:	timeout in seconds - 0 means do not time command
+  *
+  * Returns the value returned from ibmvscsi_send_crq(). (Zero for success)
+  * Note that this routine assumes that host_lock is held for synchronization
+ */
+ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
+-				   struct ibmvscsi_host_data *hostdata)
++				   struct ibmvscsi_host_data *hostdata,
++				   unsigned long timeout)
+ {
+ 	u64 *crq_as_u64 = (u64 *) &evt_struct->crq;
+ 	int request_status;
+@@ -588,12 +643,20 @@
+ 	 */
+ 	list_add_tail(&evt_struct->list, &hostdata->sent);
+ 
++	init_timer(&evt_struct->timer);
++	if (timeout) {
++		evt_struct->timer.data = (unsigned long) evt_struct;
++		evt_struct->timer.expires = jiffies + (timeout * HZ);
++		evt_struct->timer.function = (void (*)(unsigned long))ibmvscsi_timeout;
++		add_timer(&evt_struct->timer);
++	}
++
+ 	if ((rc =
+ 	     ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) {
+ 		list_del(&evt_struct->list);
++		del_timer(&evt_struct->timer);
+ 
+-		printk(KERN_ERR "ibmvscsi: send error %d\n",
+-		       rc);
++		dev_err(hostdata->dev, "send error %d\n", rc);
+ 		atomic_inc(&hostdata->request_limit);
+ 		goto send_error;
+ 	}
+@@ -634,9 +697,8 @@
+ 
+ 	if (unlikely(rsp->opcode != SRP_RSP)) {
+ 		if (printk_ratelimit())
+-			printk(KERN_WARNING 
+-			       "ibmvscsi: bad SRP RSP type %d\n",
+-			       rsp->opcode);
++			dev_warn(evt_struct->hostdata->dev,
++				 "bad SRP RSP type %d\n", rsp->opcode);
+ 	}
+ 	
+ 	if (cmnd) {
+@@ -697,7 +759,7 @@
+ 	srp_cmd->lun = ((u64) lun) << 48;
+ 
+ 	if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
+-		printk(KERN_ERR "ibmvscsi: couldn't convert cmd to srp_cmd\n");
++		sdev_printk(KERN_ERR, cmnd->device, "couldn't convert cmd to srp_cmd\n");
+ 		free_event_struct(&hostdata->pool, evt_struct);
+ 		return SCSI_MLQUEUE_HOST_BUSY;
+ 	}
+@@ -722,7 +784,7 @@
+ 			offsetof(struct srp_indirect_buf, desc_list);
+ 	}
+ 
+-	return ibmvscsi_send_srp_event(evt_struct, hostdata);
++	return ibmvscsi_send_srp_event(evt_struct, hostdata, 0);
+ }
+ 
+ /* ------------------------------------------------------------
+@@ -744,10 +806,10 @@
+ 			 DMA_BIDIRECTIONAL);
+ 
+ 	if (evt_struct->xfer_iu->mad.adapter_info.common.status) {
+-		printk("ibmvscsi: error %d getting adapter info\n",
++		dev_err(hostdata->dev, "error %d getting adapter info\n",
+ 		       evt_struct->xfer_iu->mad.adapter_info.common.status);
+ 	} else {
+-		printk("ibmvscsi: host srp version: %s, "
++		dev_info(hostdata->dev, "host srp version: %s, "
+ 		       "host partition %s (%d), OS %d, max io %u\n",
+ 		       hostdata->madapter_info.srp_version,
+ 		       hostdata->madapter_info.partition_name,
+@@ -761,10 +823,9 @@
+ 		
+ 		if (hostdata->madapter_info.os_type == 3 &&
+ 		    strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
+-			printk("ibmvscsi: host (Ver. %s) doesn't support large"
+-			       "transfers\n",
++			dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n",
+ 			       hostdata->madapter_info.srp_version);
+-			printk("ibmvscsi: limiting scatterlists to %d\n",
++			dev_err(hostdata->dev, "limiting scatterlists to %d\n",
+ 			       MAX_INDIRECT_BUFS);
+ 			hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
+ 		}
+@@ -784,12 +845,13 @@
+ {
+ 	struct viosrp_adapter_info *req;
+ 	struct srp_event_struct *evt_struct;
++	unsigned long flags;
+ 	dma_addr_t addr;
+ 
+ 	evt_struct = get_event_struct(&hostdata->pool);
+ 	if (!evt_struct) {
+-		printk(KERN_ERR "ibmvscsi: couldn't allocate an event "
+-		       "for ADAPTER_INFO_REQ!\n");
++		dev_err(hostdata->dev,
++			"couldn't allocate an event for ADAPTER_INFO_REQ!\n");
+ 		return;
+ 	}
+ 
+@@ -809,20 +871,20 @@
+ 					    DMA_BIDIRECTIONAL);
+ 
+ 	if (dma_mapping_error(req->buffer)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: Unable to map request_buffer "
+-		       "for adapter_info!\n");
++		dev_err(hostdata->dev, "Unable to map request_buffer for adapter_info!\n");
+ 		free_event_struct(&hostdata->pool, evt_struct);
+ 		return;
+ 	}
+ 	
+-	if (ibmvscsi_send_srp_event(evt_struct, hostdata)) {
+-		printk(KERN_ERR "ibmvscsi: couldn't send ADAPTER_INFO_REQ!\n");
++	spin_lock_irqsave(hostdata->host->host_lock, flags);
++	if (ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2)) {
++		dev_err(hostdata->dev, "couldn't send ADAPTER_INFO_REQ!\n");
+ 		dma_unmap_single(hostdata->dev,
+ 				 addr,
+ 				 sizeof(hostdata->madapter_info),
+ 				 DMA_BIDIRECTIONAL);
+ 	}
++	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ };
+ 
+ /**
+@@ -839,24 +901,23 @@
+ 	case SRP_LOGIN_RSP:	/* it worked! */
+ 		break;
+ 	case SRP_LOGIN_REJ:	/* refused! */
+-		printk(KERN_INFO "ibmvscsi: SRP_LOGIN_REJ reason %u\n",
++		dev_info(hostdata->dev, "SRP_LOGIN_REJ reason %u\n",
+ 		       evt_struct->xfer_iu->srp.login_rej.reason);
+ 		/* Login failed.  */
+ 		atomic_set(&hostdata->request_limit, -1);
+ 		return;
+ 	default:
+-		printk(KERN_ERR
+-		       "ibmvscsi: Invalid login response typecode 0x%02x!\n",
++		dev_err(hostdata->dev, "Invalid login response typecode 0x%02x!\n",
+ 		       evt_struct->xfer_iu->srp.login_rsp.opcode);
+ 		/* Login failed.  */
+ 		atomic_set(&hostdata->request_limit, -1);
+ 		return;
+ 	}
+ 
+-	printk(KERN_INFO "ibmvscsi: SRP_LOGIN succeeded\n");
++	dev_info(hostdata->dev, "SRP_LOGIN succeeded\n");
+ 
+ 	if (evt_struct->xfer_iu->srp.login_rsp.req_lim_delta < 0)
+-		printk(KERN_ERR "ibmvscsi: Invalid request_limit.\n");
++		dev_err(hostdata->dev, "Invalid request_limit.\n");
+ 
+ 	/* Now we know what the real request-limit is.
+ 	 * This value is set rather than added to request_limit because
+@@ -885,8 +946,7 @@
+ 	struct srp_login_req *login;
+ 	struct srp_event_struct *evt_struct = get_event_struct(&hostdata->pool);
+ 	if (!evt_struct) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: couldn't allocate an event for login req!\n");
++		dev_err(hostdata->dev, "couldn't allocate an event for login req!\n");
+ 		return FAILED;
+ 	}
+ 
+@@ -907,9 +967,9 @@
+ 	 */
+ 	atomic_set(&hostdata->request_limit, 1);
+ 
+-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata);
++	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
+ 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+-	printk("ibmvscsic: sent SRP login\n");
++	dev_info(hostdata->dev, "sent SRP login\n");
+ 	return rc;
+ };
+ 
+@@ -958,13 +1018,13 @@
+ 
+ 	if (!found_evt) {
+ 		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+-		return FAILED;
++		return SUCCESS;
+ 	}
+ 
+ 	evt = get_event_struct(&hostdata->pool);
+ 	if (evt == NULL) {
+ 		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+-		printk(KERN_ERR "ibmvscsi: failed to allocate abort event\n");
++		sdev_printk(KERN_ERR, cmd->device, "failed to allocate abort event\n");
+ 		return FAILED;
+ 	}
+ 	
+@@ -982,15 +1042,16 @@
+ 	tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
+ 	tsk_mgmt->task_tag = (u64) found_evt;
+ 
+-	printk(KERN_INFO "ibmvscsi: aborting command. lun 0x%lx, tag 0x%lx\n",
++	sdev_printk(KERN_INFO, cmd->device, "aborting command. lun 0x%lx, tag 0x%lx\n",
+ 	       tsk_mgmt->lun, tsk_mgmt->task_tag);
+ 
+ 	evt->sync_srp = &srp_rsp;
+ 	init_completion(&evt->comp);
+-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata);
++	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+ 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ 	if (rsp_rc != 0) {
+-		printk(KERN_ERR "ibmvscsi: failed to send abort() event\n");
++		sdev_printk(KERN_ERR, cmd->device,
++			    "failed to send abort() event. rc=%d\n", rsp_rc);
+ 		return FAILED;
+ 	}
+ 
+@@ -999,8 +1060,7 @@
+ 	/* make sure we got a good response */
+ 	if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) {
+ 		if (printk_ratelimit())
+-			printk(KERN_WARNING 
+-			       "ibmvscsi: abort bad SRP RSP type %d\n",
++			sdev_printk(KERN_WARNING, cmd->device, "abort bad SRP RSP type %d\n",
+ 			       srp_rsp.srp.rsp.opcode);
+ 		return FAILED;
+ 	}
+@@ -1012,10 +1072,9 @@
+ 
+ 	if (rsp_rc) {
+ 		if (printk_ratelimit())
+-			printk(KERN_WARNING 
+-			       "ibmvscsi: abort code %d for task tag 0x%lx\n",
+-			       rsp_rc,
+-			       tsk_mgmt->task_tag);
++			sdev_printk(KERN_WARNING, cmd->device,
++				    "abort code %d for task tag 0x%lx\n",
++				    rsp_rc, tsk_mgmt->task_tag);
+ 		return FAILED;
+ 	}
+ 
+@@ -1034,14 +1093,12 @@
+ 
+ 	if (found_evt == NULL) {
+ 		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+-		printk(KERN_INFO
+-		       "ibmvscsi: aborted task tag 0x%lx completed\n",
++		sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%lx completed\n",
+ 		       tsk_mgmt->task_tag);
+ 		return SUCCESS;
+ 	}
+ 
+-	printk(KERN_INFO
+-	       "ibmvscsi: successfully aborted task tag 0x%lx\n",
++	sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%lx\n",
+ 	       tsk_mgmt->task_tag);
+ 
+ 	cmd->result = (DID_ABORT << 16);
+@@ -1076,7 +1133,7 @@
+ 	evt = get_event_struct(&hostdata->pool);
+ 	if (evt == NULL) {
+ 		spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+-		printk(KERN_ERR "ibmvscsi: failed to allocate reset event\n");
++		sdev_printk(KERN_ERR, cmd->device, "failed to allocate reset event\n");
+ 		return FAILED;
+ 	}
+ 	
+@@ -1093,15 +1150,16 @@
+ 	tsk_mgmt->lun = ((u64) lun) << 48;
+ 	tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
+ 
+-	printk(KERN_INFO "ibmvscsi: resetting device. lun 0x%lx\n",
++	sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n",
+ 	       tsk_mgmt->lun);
+ 
+ 	evt->sync_srp = &srp_rsp;
+ 	init_completion(&evt->comp);
+-	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata);
++	rsp_rc = ibmvscsi_send_srp_event(evt, hostdata, init_timeout * 2);
+ 	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ 	if (rsp_rc != 0) {
+-		printk(KERN_ERR "ibmvscsi: failed to send reset event\n");
++		sdev_printk(KERN_ERR, cmd->device,
++			    "failed to send reset event. rc=%d\n", rsp_rc);
+ 		return FAILED;
+ 	}
+ 
+@@ -1110,8 +1168,7 @@
+ 	/* make sure we got a good response */
+ 	if (unlikely(srp_rsp.srp.rsp.opcode != SRP_RSP)) {
+ 		if (printk_ratelimit())
+-			printk(KERN_WARNING 
+-			       "ibmvscsi: reset bad SRP RSP type %d\n",
++			sdev_printk(KERN_WARNING, cmd->device, "reset bad SRP RSP type %d\n",
+ 			       srp_rsp.srp.rsp.opcode);
+ 		return FAILED;
+ 	}
+@@ -1123,8 +1180,8 @@
+ 
+ 	if (rsp_rc) {
+ 		if (printk_ratelimit())
+-			printk(KERN_WARNING 
+-			       "ibmvscsi: reset code %d for task tag 0x%lx\n",
++			sdev_printk(KERN_WARNING, cmd->device,
++				    "reset code %d for task tag 0x%lx\n",
+ 			       rsp_rc, tsk_mgmt->task_tag);
+ 		return FAILED;
+ 	}
+@@ -1154,32 +1211,30 @@
+ }
+ 
+ /**
+- * purge_requests: Our virtual adapter just shut down.  purge any sent requests
+- * @hostdata:    the adapter
+- */
+-static void purge_requests(struct ibmvscsi_host_data *hostdata, int error_code)
++ * ibmvscsi_eh_host_reset_handler - Reset the connection to the server
++ * @cmd:	struct scsi_cmnd having problems
++*/
++static int ibmvscsi_eh_host_reset_handler(struct scsi_cmnd *cmd)
+ {
+-	struct srp_event_struct *tmp_evt, *pos;
+-	unsigned long flags;
++	unsigned long wait_switch = 0;
++	struct ibmvscsi_host_data *hostdata =
++		(struct ibmvscsi_host_data *)cmd->device->host->hostdata;
+ 
+-	spin_lock_irqsave(hostdata->host->host_lock, flags);
+-	list_for_each_entry_safe(tmp_evt, pos, &hostdata->sent, list) {
+-		list_del(&tmp_evt->list);
+-		if (tmp_evt->cmnd) {
+-			tmp_evt->cmnd->result = (error_code << 16);
+-			unmap_cmd_data(&tmp_evt->iu.srp.cmd, 
+-				       tmp_evt,	
+-				       tmp_evt->hostdata->dev);
+-			if (tmp_evt->cmnd_done)
+-				tmp_evt->cmnd_done(tmp_evt->cmnd);
+-		} else {
+-			if (tmp_evt->done) {
+-				tmp_evt->done(tmp_evt);
+-			}
+-		}
+-		free_event_struct(&tmp_evt->hostdata->pool, tmp_evt);
++	dev_err(hostdata->dev, "Resetting connection due to error recovery\n");
++
++	ibmvscsi_reset_host(hostdata);
++
++	for (wait_switch = jiffies + (init_timeout * HZ);
++	     time_before(jiffies, wait_switch) &&
++		     atomic_read(&hostdata->request_limit) < 2;) {
++
++		msleep(10);
+ 	}
+-	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
++
++	if (atomic_read(&hostdata->request_limit) <= 0)
++		return FAILED;
++
++	return SUCCESS;
+ }
+ 
+ /**
+@@ -1191,6 +1246,7 @@
+ void ibmvscsi_handle_crq(struct viosrp_crq *crq,
+ 			 struct ibmvscsi_host_data *hostdata)
+ {
++	long rc;
+ 	unsigned long flags;
+ 	struct srp_event_struct *evt_struct =
+ 	    (struct srp_event_struct *)crq->IU_data_ptr;
+@@ -1198,27 +1254,25 @@
+ 	case 0xC0:		/* initialization */
+ 		switch (crq->format) {
+ 		case 0x01:	/* Initialization message */
+-			printk(KERN_INFO "ibmvscsi: partner initialized\n");
++			dev_info(hostdata->dev, "partner initialized\n");
+ 			/* Send back a response */
+-			if (ibmvscsi_send_crq(hostdata,
+-					      0xC002000000000000LL, 0) == 0) {
++			if ((rc = ibmvscsi_send_crq(hostdata,
++						    0xC002000000000000LL, 0)) == 0) {
+ 				/* Now login */
+ 				send_srp_login(hostdata);
+ 			} else {
+-				printk(KERN_ERR
+-				       "ibmvscsi: Unable to send init rsp\n");
++				dev_err(hostdata->dev, "Unable to send init rsp. rc=%ld\n", rc);
+ 			}
+ 
+ 			break;
+ 		case 0x02:	/* Initialization response */
+-			printk(KERN_INFO
+-			       "ibmvscsi: partner initialization complete\n");
++			dev_info(hostdata->dev, "partner initialization complete\n");
+ 
+ 			/* Now login */
+ 			send_srp_login(hostdata);
+ 			break;
+ 		default:
+-			printk(KERN_ERR "ibmvscsi: unknown crq message type\n");
++			dev_err(hostdata->dev, "unknown crq message type: %d\n", crq->format);
+ 		}
+ 		return;
+ 	case 0xFF:	/* Hypervisor telling us the connection is closed */
+@@ -1226,8 +1280,7 @@
+ 		atomic_set(&hostdata->request_limit, 0);
+ 		if (crq->format == 0x06) {
+ 			/* We need to re-setup the interpartition connection */
+-			printk(KERN_INFO
+-			       "ibmvscsi: Re-enabling adapter!\n");
++			dev_info(hostdata->dev, "Re-enabling adapter!\n");
+ 			purge_requests(hostdata, DID_REQUEUE);
+ 			if ((ibmvscsi_reenable_crq_queue(&hostdata->queue,
+ 							hostdata)) ||
+@@ -1235,13 +1288,10 @@
+ 					       0xC001000000000000LL, 0))) {
+ 					atomic_set(&hostdata->request_limit,
+ 						   -1);
+-					printk(KERN_ERR
+-					       "ibmvscsi: error after"
+-					       " enable\n");
++					dev_err(hostdata->dev, "error after enable\n");
+ 			}
+ 		} else {
+-			printk(KERN_INFO
+-			       "ibmvscsi: Virtual adapter failed rc %d!\n",
++			dev_err(hostdata->dev, "Virtual adapter failed rc %d!\n",
+ 			       crq->format);
+ 
+ 			purge_requests(hostdata, DID_ERROR);
+@@ -1251,8 +1301,7 @@
+ 					       0xC001000000000000LL, 0))) {
+ 					atomic_set(&hostdata->request_limit,
+ 						   -1);
+-					printk(KERN_ERR
+-					       "ibmvscsi: error after reset\n");
++					dev_err(hostdata->dev, "error after reset\n");
+ 			}
+ 		}
+ 		scsi_unblock_requests(hostdata->host);
+@@ -1260,8 +1309,7 @@
+ 	case 0x80:		/* real payload */
+ 		break;
+ 	default:
+-		printk(KERN_ERR
+-		       "ibmvscsi: got an invalid message type 0x%02x\n",
++		dev_err(hostdata->dev, "got an invalid message type 0x%02x\n",
+ 		       crq->valid);
+ 		return;
+ 	}
+@@ -1271,15 +1319,13 @@
+ 	 * actually sent
+ 	 */
+ 	if (!valid_event_struct(&hostdata->pool, evt_struct)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: returned correlation_token 0x%p is invalid!\n",
++		dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n",
+ 		       (void *)crq->IU_data_ptr);
+ 		return;
+ 	}
+ 
+ 	if (atomic_read(&evt_struct->free)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: received duplicate  correlation_token 0x%p!\n",
++		dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n",
+ 		       (void *)crq->IU_data_ptr);
+ 		return;
+ 	}
+@@ -1288,11 +1334,12 @@
+ 		atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta,
+ 			   &hostdata->request_limit);
+ 
++	del_timer(&evt_struct->timer);
++
+ 	if (evt_struct->done)
+ 		evt_struct->done(evt_struct);
+ 	else
+-		printk(KERN_ERR
+-		       "ibmvscsi: returned done() is NULL; not running it!\n");
++		dev_err(hostdata->dev, "returned done() is NULL; not running it!\n");
+ 
+ 	/*
+ 	 * Lock the host_lock before messing with these structures, since we
+@@ -1313,13 +1360,13 @@
+ {
+ 	struct viosrp_host_config *host_config;
+ 	struct srp_event_struct *evt_struct;
++	unsigned long flags;
+ 	dma_addr_t addr;
+ 	int rc;
+ 
+ 	evt_struct = get_event_struct(&hostdata->pool);
+ 	if (!evt_struct) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: could't allocate event for HOST_CONFIG!\n");
++		dev_err(hostdata->dev, "couldn't allocate event for HOST_CONFIG!\n");
+ 		return -1;
+ 	}
+ 
+@@ -1339,14 +1386,15 @@
+ 						    DMA_BIDIRECTIONAL);
+ 
+ 	if (dma_mapping_error(host_config->buffer)) {
+-		printk(KERN_ERR
+-		       "ibmvscsi: dma_mapping error " "getting host config\n");
++		dev_err(hostdata->dev, "dma_mapping error getting host config\n");
+ 		free_event_struct(&hostdata->pool, evt_struct);
+ 		return -1;
+ 	}
+ 
+ 	init_completion(&evt_struct->comp);
+-	rc = ibmvscsi_send_srp_event(evt_struct, hostdata);
++	spin_lock_irqsave(hostdata->host->host_lock, flags);
++	rc = ibmvscsi_send_srp_event(evt_struct, hostdata, init_timeout * 2);
++	spin_unlock_irqrestore(hostdata->host->host_lock, flags);
+ 	if (rc == 0)
+ 		wait_for_completion(&evt_struct->comp);
+ 	dma_unmap_single(hostdata->dev, addr, length, DMA_BIDIRECTIONAL);
+@@ -1375,6 +1423,23 @@
+ 	return 0;
+ }
+ 
++/**
++ * ibmvscsi_change_queue_depth - Change the device's queue depth
++ * @sdev:	scsi device struct
++ * @qdepth:	depth to set
++ *
++ * Return value:
++ * 	actual depth set
++ **/
++static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
++{
++	if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN)
++		qdepth = IBMVSCSI_MAX_CMDS_PER_LUN;
++
++	scsi_adjust_queue_depth(sdev, 0, qdepth);
++	return sdev->queue_depth;
++}
++
+ /* ------------------------------------------------------------
+  * sysfs attributes
+  */
+@@ -1520,7 +1585,9 @@
+ 	.queuecommand = ibmvscsi_queuecommand,
+ 	.eh_abort_handler = ibmvscsi_eh_abort_handler,
+ 	.eh_device_reset_handler = ibmvscsi_eh_device_reset_handler,
++	.eh_host_reset_handler = ibmvscsi_eh_host_reset_handler,
+ 	.slave_configure = ibmvscsi_slave_configure,
++	.change_queue_depth = ibmvscsi_change_queue_depth,
+ 	.cmd_per_lun = 16,
+ 	.can_queue = IBMVSCSI_MAX_REQUESTS_DEFAULT,
+ 	.this_id = -1,
+@@ -1545,7 +1612,7 @@
+ 	driver_template.can_queue = max_requests;
+ 	host = scsi_host_alloc(&driver_template, sizeof(*hostdata));
+ 	if (!host) {
+-		printk(KERN_ERR "ibmvscsi: couldn't allocate host data\n");
++		dev_err(&vdev->dev, "couldn't allocate host data\n");
+ 		goto scsi_host_alloc_failed;
+ 	}
+ 
+@@ -1559,11 +1626,11 @@
+ 
+ 	rc = ibmvscsi_init_crq_queue(&hostdata->queue, hostdata, max_requests);
+ 	if (rc != 0 && rc != H_RESOURCE) {
+-		printk(KERN_ERR "ibmvscsi: couldn't initialize crq\n");
++		dev_err(&vdev->dev, "couldn't initialize crq. rc=%d\n", rc);
+ 		goto init_crq_failed;
+ 	}
+ 	if (initialize_event_pool(&hostdata->pool, max_requests, hostdata) != 0) {
+-		printk(KERN_ERR "ibmvscsi: couldn't initialize event pool\n");
++		dev_err(&vdev->dev, "couldn't initialize event pool\n");
+ 		goto init_pool_failed;
+ 	}
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h linux-2.6.22-try2/drivers/scsi/ibmvscsi/ibmvscsi.h
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/ibmvscsi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ibmvscsi/ibmvscsi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -45,6 +45,7 @@
+ #define MAX_INDIRECT_BUFS 10
+ 
+ #define IBMVSCSI_MAX_REQUESTS_DEFAULT 100
++#define IBMVSCSI_MAX_CMDS_PER_LUN 64
+ 
+ /* ------------------------------------------------------------
+  * Data Structures
+@@ -69,6 +70,7 @@
+ 	union viosrp_iu iu;
+ 	void (*cmnd_done) (struct scsi_cmnd *);
+ 	struct completion comp;
++	struct timer_list timer;
+ 	union viosrp_iu *sync_srp;
+ 	struct srp_direct_buf *ext_list;
+ 	dma_addr_t ext_list_token;
+diff -Nurb linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c linux-2.6.22-try2/drivers/scsi/ibmvscsi/rpa_vscsi.c
+--- linux-2.6.22-570/drivers/scsi/ibmvscsi/rpa_vscsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ibmvscsi/rpa_vscsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -177,7 +177,7 @@
+ 	memset(&hostdata->madapter_info, 0x00,
+ 			sizeof(hostdata->madapter_info));
+ 
+-	printk(KERN_INFO "rpa_vscsi: SPR_VERSION: %s\n", SRP_VERSION);
++	dev_info(hostdata->dev, "SRP_VERSION: %s\n", SRP_VERSION);
+ 	strcpy(hostdata->madapter_info.srp_version, SRP_VERSION);
+ 
+ 	strncpy(hostdata->madapter_info.partition_name, partition_name,
+@@ -232,25 +232,24 @@
+ 
+ 	if (rc == 2) {
+ 		/* Adapter is good, but other end is not ready */
+-		printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n");
++		dev_warn(hostdata->dev, "Partner adapter not ready\n");
+ 		retrc = 0;
+ 	} else if (rc != 0) {
+-		printk(KERN_WARNING "ibmvscsi: Error %d opening adapter\n", rc);
++		dev_warn(hostdata->dev, "Error %d opening adapter\n", rc);
+ 		goto reg_crq_failed;
+ 	}
+ 
+ 	if (request_irq(vdev->irq,
+ 			ibmvscsi_handle_event,
+ 			0, "ibmvscsi", (void *)hostdata) != 0) {
+-		printk(KERN_ERR "ibmvscsi: couldn't register irq 0x%x\n",
++		dev_err(hostdata->dev, "couldn't register irq 0x%x\n",
+ 		       vdev->irq);
+ 		goto req_irq_failed;
+ 	}
+ 
+ 	rc = vio_enable_interrupts(vdev);
+ 	if (rc != 0) {
+-		printk(KERN_ERR "ibmvscsi:  Error %d enabling interrupts!!!\n",
+-		       rc);
++		dev_err(hostdata->dev, "Error %d enabling interrupts!!!\n", rc);
+ 		goto req_irq_failed;
+ 	}
+ 
+@@ -294,7 +293,7 @@
+ 	} while ((rc == H_IN_PROGRESS) || (rc == H_BUSY) || (H_IS_LONG_BUSY(rc)));
+ 
+ 	if (rc)
+-		printk(KERN_ERR "ibmvscsi: Error %d enabling adapter\n", rc);
++		dev_err(hostdata->dev, "Error %d enabling adapter\n", rc);
+ 	return rc;
+ }
+ 
+@@ -327,10 +326,9 @@
+ 				queue->msg_token, PAGE_SIZE);
+ 	if (rc == 2) {
+ 		/* Adapter is good, but other end is not ready */
+-		printk(KERN_WARNING "ibmvscsi: Partner adapter not ready\n");
++		dev_warn(hostdata->dev, "Partner adapter not ready\n");
+ 	} else if (rc != 0) {
+-		printk(KERN_WARNING
+-		       "ibmvscsi: couldn't register crq--rc 0x%x\n", rc);
++		dev_warn(hostdata->dev, "couldn't register crq--rc 0x%x\n", rc);
+ 	}
+ 	return rc;
+ }
+diff -Nurb linux-2.6.22-570/drivers/scsi/initio.c linux-2.6.22-try2/drivers/scsi/initio.c
+--- linux-2.6.22-570/drivers/scsi/initio.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/initio.c	2007-12-19 15:29:23.000000000 -0500
+@@ -3,7 +3,8 @@
+  *
+  * Copyright (c) 1994-1998 Initio Corporation
+  * Copyright (c) 1998 Bas Vermeulen <bvermeul@blackstar.xs4all.nl>
+- * All rights reserved.
++ * Copyright (c) 2004 Christoph Hellwig <hch@lst.de>
++ * Copyright (c) 2007 Red Hat <alan@redhat.com>
+  *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+@@ -19,38 +20,6 @@
+  * along with this program; see the file COPYING.  If not, write to
+  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+  *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- *    notice, this list of conditions, and the following disclaimer,
+- *    without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- *    notice, this list of conditions and the following disclaimer in the
+- *    documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- *    derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of 
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the 
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+- * SUCH DAMAGE.
+  *
+  *************************************************************************
+  *
+@@ -70,14 +39,14 @@
+  *		- Fix memory allocation problem
+  * 03/04/98 hc	- v1.01l
+  *		- Fix tape rewind which will hang the system problem
+- *		- Set can_queue to tul_num_scb
++ *		- Set can_queue to initio_num_scb
+  * 06/25/98 hc	- v1.01m
+  *		- Get it work for kernel version >= 2.1.75
+- *		- Dynamic assign SCSI bus reset holding time in init_tulip()
++ *		- Dynamic assign SCSI bus reset holding time in initio_init()
+  * 07/02/98 hc	- v1.01n
+  *		- Support 0002134A
+  * 08/07/98 hc  - v1.01o
+- *		- Change the tul_abort_srb routine to use scsi_done. <01>
++ *		- Change the initio_abort_srb routine to use scsi_done. <01>
+  * 09/07/98 hl  - v1.02
+  *              - Change the INI9100U define and proc_dir_entry to
+  *                reflect the newer Kernel 2.1.118, but the v1.o1o
+@@ -150,23 +119,13 @@
+ static unsigned int i91u_debug = DEBUG_DEFAULT;
+ #endif
+ 
+-#define TUL_RDWORD(x,y)         (short)(inl((int)((ULONG)((ULONG)x+(UCHAR)y)) ))
+-
+-typedef struct PCI_ID_Struc {
+-	unsigned short vendor_id;
+-	unsigned short device_id;
+-} PCI_ID;
+-
+-static int tul_num_ch = 4;	/* Maximum 4 adapters           */
+-static int tul_num_scb;
+-static int tul_tag_enable = 1;
+-static SCB *tul_scb;
++static int initio_tag_enable = 1;
+ 
+ #ifdef DEBUG_i91u
+ static int setup_debug = 0;
+ #endif
+ 
+-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb);
++static void i91uSCBPost(u8 * pHcb, u8 * pScb);
+ 
+ /* PCI Devices supported by this driver */
+ static struct pci_device_id i91u_pci_devices[] = {
+@@ -184,74 +143,66 @@
+ #define DEBUG_STATE     0
+ #define INT_DISC	0
+ 
+-/*--- external functions --*/
+-static void tul_se2_wait(void);
++/*--- forward references ---*/
++static struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun);
++static struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host);
++
++static int tulip_main(struct initio_host * host);
++
++static int initio_next_state(struct initio_host * host);
++static int initio_state_1(struct initio_host * host);
++static int initio_state_2(struct initio_host * host);
++static int initio_state_3(struct initio_host * host);
++static int initio_state_4(struct initio_host * host);
++static int initio_state_5(struct initio_host * host);
++static int initio_state_6(struct initio_host * host);
++static int initio_state_7(struct initio_host * host);
++static int initio_xfer_data_in(struct initio_host * host);
++static int initio_xfer_data_out(struct initio_host * host);
++static int initio_xpad_in(struct initio_host * host);
++static int initio_xpad_out(struct initio_host * host);
++static int initio_status_msg(struct initio_host * host);
++
++static int initio_msgin(struct initio_host * host);
++static int initio_msgin_sync(struct initio_host * host);
++static int initio_msgin_accept(struct initio_host * host);
++static int initio_msgout_reject(struct initio_host * host);
++static int initio_msgin_extend(struct initio_host * host);
++
++static int initio_msgout_ide(struct initio_host * host);
++static int initio_msgout_abort_targ(struct initio_host * host);
++static int initio_msgout_abort_tag(struct initio_host * host);
++
++static int initio_bus_device_reset(struct initio_host * host);
++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb);
++static int int_initio_busfree(struct initio_host * host);
++static int int_initio_scsi_rst(struct initio_host * host);
++static int int_initio_bad_seq(struct initio_host * host);
++static int int_initio_resel(struct initio_host * host);
++static int initio_sync_done(struct initio_host * host);
++static int wdtr_done(struct initio_host * host);
++static int wait_tulip(struct initio_host * host);
++static int initio_wait_done_disc(struct initio_host * host);
++static int initio_wait_disc(struct initio_host * host);
++static void tulip_scsi(struct initio_host * host);
++static int initio_post_scsi_rst(struct initio_host * host);
++
++static void initio_se2_ew_en(unsigned long base);
++static void initio_se2_ew_ds(unsigned long base);
++static int initio_se2_rd_all(unsigned long base);
++static void initio_se2_update_all(unsigned long base);	/* setup default pattern */
++static void initio_read_eeprom(unsigned long base);
+ 
+-/*--- forward refrence ---*/
+-static SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun);
+-static SCB *tul_find_done_scb(HCS * pCurHcb);
+-
+-static int tulip_main(HCS * pCurHcb);
+-
+-static int tul_next_state(HCS * pCurHcb);
+-static int tul_state_1(HCS * pCurHcb);
+-static int tul_state_2(HCS * pCurHcb);
+-static int tul_state_3(HCS * pCurHcb);
+-static int tul_state_4(HCS * pCurHcb);
+-static int tul_state_5(HCS * pCurHcb);
+-static int tul_state_6(HCS * pCurHcb);
+-static int tul_state_7(HCS * pCurHcb);
+-static int tul_xfer_data_in(HCS * pCurHcb);
+-static int tul_xfer_data_out(HCS * pCurHcb);
+-static int tul_xpad_in(HCS * pCurHcb);
+-static int tul_xpad_out(HCS * pCurHcb);
+-static int tul_status_msg(HCS * pCurHcb);
+-
+-static int tul_msgin(HCS * pCurHcb);
+-static int tul_msgin_sync(HCS * pCurHcb);
+-static int tul_msgin_accept(HCS * pCurHcb);
+-static int tul_msgout_reject(HCS * pCurHcb);
+-static int tul_msgin_extend(HCS * pCurHcb);
+-
+-static int tul_msgout_ide(HCS * pCurHcb);
+-static int tul_msgout_abort_targ(HCS * pCurHcb);
+-static int tul_msgout_abort_tag(HCS * pCurHcb);
+-
+-static int tul_bus_device_reset(HCS * pCurHcb);
+-static void tul_select_atn(HCS * pCurHcb, SCB * pCurScb);
+-static void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb);
+-static void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb);
+-static int int_tul_busfree(HCS * pCurHcb);
+-static int int_tul_scsi_rst(HCS * pCurHcb);
+-static int int_tul_bad_seq(HCS * pCurHcb);
+-static int int_tul_resel(HCS * pCurHcb);
+-static int tul_sync_done(HCS * pCurHcb);
+-static int wdtr_done(HCS * pCurHcb);
+-static int wait_tulip(HCS * pCurHcb);
+-static int tul_wait_done_disc(HCS * pCurHcb);
+-static int tul_wait_disc(HCS * pCurHcb);
+-static void tulip_scsi(HCS * pCurHcb);
+-static int tul_post_scsi_rst(HCS * pCurHcb);
+-
+-static void tul_se2_ew_en(WORD CurBase);
+-static void tul_se2_ew_ds(WORD CurBase);
+-static int tul_se2_rd_all(WORD CurBase);
+-static void tul_se2_update_all(WORD CurBase);	/* setup default pattern */
+-static void tul_read_eeprom(WORD CurBase);
+-
+-				/* ---- INTERNAL VARIABLES ---- */
+-static HCS tul_hcs[MAX_SUPPORTED_ADAPTERS];
+-static INI_ADPT_STRUCT i91u_adpt[MAX_SUPPORTED_ADAPTERS];
++/* ---- INTERNAL VARIABLES ---- */
+ 
+-/*NVRAM nvram, *nvramp = &nvram; */
+ static NVRAM i91unvram;
+ static NVRAM *i91unvramp;
+ 
+-
+-
+-static UCHAR i91udftNvRam[64] =
++static u8 i91udftNvRam[64] =
+ {
+-/*----------- header -----------*/
++	/*----------- header -----------*/
+ 	0x25, 0xc9,		/* Signature    */
+ 	0x40,			/* Size         */
+ 	0x01,			/* Revision     */
+@@ -289,7 +240,7 @@
+ 	0, 0};			/*      - CheckSum -            */
+ 
+ 
+-static UCHAR tul_rate_tbl[8] =	/* fast 20      */
++static u8 initio_rate_tbl[8] =	/* fast 20      */
+ {
+ 				/* nanosecond devide by 4 */
+ 	12,			/* 50ns,  20M   */
+@@ -302,53 +253,17 @@
+ 	62			/* 250ns, 4M    */
+ };
+ 
+-static void tul_do_pause(unsigned amount)
+-{				/* Pause for amount jiffies */
++static void initio_do_pause(unsigned amount)
++{
++	/* Pause for amount jiffies */
+ 	unsigned long the_time = jiffies + amount;
+ 
+-	while (time_before_eq(jiffies, the_time));
++	while (time_before_eq(jiffies, the_time))
++		cpu_relax();
+ }
+ 
+ /*-- forward reference --*/
+ 
+-/*******************************************************************
+-	Use memeory refresh time        ~ 15us * 2
+-********************************************************************/
+-void tul_se2_wait(void)
+-{
+-#if 1
+-	udelay(30);
+-#else
+-	UCHAR readByte;
+-
+-	readByte = TUL_RD(0, 0x61);
+-	if ((readByte & 0x10) == 0x10) {
+-		for (;;) {
+-			readByte = TUL_RD(0, 0x61);
+-			if ((readByte & 0x10) == 0x10)
+-				break;
+-		}
+-		for (;;) {
+-			readByte = TUL_RD(0, 0x61);
+-			if ((readByte & 0x10) != 0x10)
+-				break;
+-		}
+-	} else {
+-		for (;;) {
+-			readByte = TUL_RD(0, 0x61);
+-			if ((readByte & 0x10) == 0x10)
+-				break;
+-		}
+-		for (;;) {
+-			readByte = TUL_RD(0, 0x61);
+-			if ((readByte & 0x10) != 0x10)
+-				break;
+-		}
+-	}
+-#endif
+-}
+-
+-
+ /******************************************************************
+  Input: instruction for  Serial E2PROM
+ 
+@@ -379,1174 +294,1019 @@
+ 
+ 
+ ******************************************************************/
+-static void tul_se2_instr(WORD CurBase, UCHAR instr)
++
++/**
++ *	initio_se2_instr	-	bitbang an instruction
++ *	@base: Base of InitIO controller
++ *	@instr: Instruction for serial E2PROM
++ *
++ *	Bitbang an instruction out to the serial E2Prom
++ */
++
++static void initio_se2_instr(unsigned long base, u8 instr)
+ {
+ 	int i;
+-	UCHAR b;
++	u8 b;
+ 
+-	TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO);	/* cs+start bit */
+-	tul_se2_wait();
+-	TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK | SE2DO);	/* +CLK */
+-	tul_se2_wait();
++	outb(SE2CS | SE2DO, base + TUL_NVRAM);		/* cs+start bit */
++	udelay(30);
++	outb(SE2CS | SE2CLK | SE2DO, base + TUL_NVRAM);	/* +CLK */
++	udelay(30);
+ 
+ 	for (i = 0; i < 8; i++) {
+ 		if (instr & 0x80)
+ 			b = SE2CS | SE2DO;	/* -CLK+dataBit */
+ 		else
+ 			b = SE2CS;	/* -CLK */
+-		TUL_WR(CurBase + TUL_NVRAM, b);
+-		tul_se2_wait();
+-		TUL_WR(CurBase + TUL_NVRAM, b | SE2CLK);	/* +CLK */
+-		tul_se2_wait();
++		outb(b, base + TUL_NVRAM);
++		udelay(30);
++		outb(b | SE2CLK, base + TUL_NVRAM);	/* +CLK */
++		udelay(30);
+ 		instr <<= 1;
+ 	}
+-	TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* -CLK */
+-	tul_se2_wait();
+-	return;
++	outb(SE2CS, base + TUL_NVRAM);			/* -CLK */
++	udelay(30);
+ }
+ 
+ 
+-/******************************************************************
+- Function name  : tul_se2_ew_en
+- Description    : Enable erase/write state of serial EEPROM
+-******************************************************************/
+-void tul_se2_ew_en(WORD CurBase)
++/**
++ *	initio_se2_ew_en	-	Enable erase/write
++ *	@base: Base address of InitIO controller
++ *
++ *	Enable erase/write state of serial EEPROM
++ */
++void initio_se2_ew_en(unsigned long base)
+ {
+-	tul_se2_instr(CurBase, 0x30);	/* EWEN */
+-	TUL_WR(CurBase + TUL_NVRAM, 0);		/* -CS  */
+-	tul_se2_wait();
+-	return;
++	initio_se2_instr(base, 0x30);	/* EWEN */
++	outb(0, base + TUL_NVRAM);	/* -CS  */
++	udelay(30);
+ }
+ 
+ 
+-/************************************************************************
+- Disable erase/write state of serial EEPROM
+-*************************************************************************/
+-void tul_se2_ew_ds(WORD CurBase)
+-{
+-	tul_se2_instr(CurBase, 0);	/* EWDS */
+-	TUL_WR(CurBase + TUL_NVRAM, 0);		/* -CS  */
+-	tul_se2_wait();
+-	return;
++/**
++ *	initio_se2_ew_ds	-	Disable erase/write
++ *	@base: Base address of InitIO controller
++ *
++ *	Disable erase/write state of serial EEPROM
++ */
++void initio_se2_ew_ds(unsigned long base)
++{
++	initio_se2_instr(base, 0);	/* EWDS */
++	outb(0, base + TUL_NVRAM);	/* -CS  */
++	udelay(30);
+ }
+ 
+ 
+-/******************************************************************
+-	Input  :address of Serial E2PROM
+-	Output :value stored in  Serial E2PROM
+-*******************************************************************/
+-static USHORT tul_se2_rd(WORD CurBase, ULONG adr)
++/**
++ *	initio_se2_rd		-	read E2PROM word
++ *	@base: Base of InitIO controller
++ *	@addr: Address of word in E2PROM
++ *
++ *	Read a word from the NV E2PROM device
++ */
++static u16 initio_se2_rd(unsigned long base, u8 addr)
+ {
+-	UCHAR instr, readByte;
+-	USHORT readWord;
++	u8 instr, rb;
++	u16 val = 0;
+ 	int i;
+ 
+-	instr = (UCHAR) (adr | 0x80);
+-	tul_se2_instr(CurBase, instr);	/* READ INSTR */
+-	readWord = 0;
++	instr = (u8) (addr | 0x80);
++	initio_se2_instr(base, instr);	/* READ INSTR */
+ 
+ 	for (i = 15; i >= 0; i--) {
+-		TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK);	/* +CLK */
+-		tul_se2_wait();
+-		TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* -CLK */
++		outb(SE2CS | SE2CLK, base + TUL_NVRAM);	/* +CLK */
++		udelay(30);
++		outb(SE2CS, base + TUL_NVRAM);		/* -CLK */
+ 
+ 		/* sample data after the following edge of clock  */
+-		readByte = TUL_RD(CurBase, TUL_NVRAM);
+-		readByte &= SE2DI;
+-		readWord += (readByte << i);
+-		tul_se2_wait();	/* 6/20/95 */
++		rb = inb(base + TUL_NVRAM);
++		rb &= SE2DI;
++		val += (rb << i);
++		udelay(30);	/* 6/20/95 */
+ 	}
+ 
+-	TUL_WR(CurBase + TUL_NVRAM, 0);		/* no chip select */
+-	tul_se2_wait();
+-	return readWord;
++	outb(0, base + TUL_NVRAM);		/* no chip select */
++	udelay(30);
++	return val;
+ }
+ 
+-
+-/******************************************************************
+- Input: new value in  Serial E2PROM, address of Serial E2PROM
+-*******************************************************************/
+-static void tul_se2_wr(WORD CurBase, UCHAR adr, USHORT writeWord)
++/**
++ *	initio_se2_wr		-	read E2PROM word
++ *	@base: Base of InitIO controller
++ *	@addr: Address of word in E2PROM
++ *	@val: Value to write
++ *
++ *	Write a word to the NV E2PROM device. Used when recovering from
++ *	a problem with the NV.
++ */
++static void initio_se2_wr(unsigned long base, u8 addr, u16 val)
+ {
+-	UCHAR readByte;
+-	UCHAR instr;
++	u8 rb;
++	u8 instr;
+ 	int i;
+ 
+-	instr = (UCHAR) (adr | 0x40);
+-	tul_se2_instr(CurBase, instr);	/* WRITE INSTR */
++	instr = (u8) (addr | 0x40);
++	initio_se2_instr(base, instr);	/* WRITE INSTR */
+ 	for (i = 15; i >= 0; i--) {
+-		if (writeWord & 0x8000)
+-			TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2DO);	/* -CLK+dataBit 1 */
++		if (val & 0x8000)
++			outb(SE2CS | SE2DO, base + TUL_NVRAM);	/* -CLK+dataBit 1 */
+ 		else
+-			TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* -CLK+dataBit 0 */
+-		tul_se2_wait();
+-		TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK);	/* +CLK */
+-		tul_se2_wait();
+-		writeWord <<= 1;
+-	}
+-	TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* -CLK */
+-	tul_se2_wait();
+-	TUL_WR(CurBase + TUL_NVRAM, 0);		/* -CS  */
+-	tul_se2_wait();
++			outb(SE2CS, base + TUL_NVRAM);		/* -CLK+dataBit 0 */
++		udelay(30);
++		outb(SE2CS | SE2CLK, base + TUL_NVRAM);		/* +CLK */
++		udelay(30);
++		val <<= 1;
++	}
++	outb(SE2CS, base + TUL_NVRAM);				/* -CLK */
++	udelay(30);
++	outb(0, base + TUL_NVRAM);				/* -CS  */
++	udelay(30);
+ 
+-	TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* +CS  */
+-	tul_se2_wait();
++	outb(SE2CS, base + TUL_NVRAM);				/* +CS  */
++	udelay(30);
+ 
+ 	for (;;) {
+-		TUL_WR(CurBase + TUL_NVRAM, SE2CS | SE2CLK);	/* +CLK */
+-		tul_se2_wait();
+-		TUL_WR(CurBase + TUL_NVRAM, SE2CS);	/* -CLK */
+-		tul_se2_wait();
+-		if ((readByte = TUL_RD(CurBase, TUL_NVRAM)) & SE2DI)
++		outb(SE2CS | SE2CLK, base + TUL_NVRAM);		/* +CLK */
++		udelay(30);
++		outb(SE2CS, base + TUL_NVRAM);			/* -CLK */
++		udelay(30);
++		if ((rb = inb(base + TUL_NVRAM)) & SE2DI)
+ 			break;	/* write complete */
+ 	}
+-	TUL_WR(CurBase + TUL_NVRAM, 0);		/* -CS */
+-	return;
++	outb(0, base + TUL_NVRAM);				/* -CS */
+ }
+ 
++/**
++ *	initio_se2_rd_all	-	read hostadapter NV configuration
++ *	@base: Base address of InitIO controller
++ *
++ *	Reads the E2PROM data into main memory. Ensures that the checksum
++ *	and header marker are valid. Returns 1 on success -1 on error.
++ */
+ 
+-/***********************************************************************
+- Read SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-int tul_se2_rd_all(WORD CurBase)
++static int initio_se2_rd_all(unsigned long base)
+ {
+ 	int i;
+-	ULONG chksum = 0;
+-	USHORT *np;
++	u16 chksum = 0;
++	u16 *np;
+ 
+ 	i91unvramp = &i91unvram;
+-	np = (USHORT *) i91unvramp;
+-	for (i = 0; i < 32; i++) {
+-		*np++ = tul_se2_rd(CurBase, i);
+-	}
++	np = (u16 *) i91unvramp;
++	for (i = 0; i < 32; i++)
++		*np++ = initio_se2_rd(base, i);
+ 
+-/*--------------------Is signature "ini" ok ? ----------------*/
++	/* Is signature "ini" ok ? */
+ 	if (i91unvramp->NVM_Signature != INI_SIGNATURE)
+ 		return -1;
+-/*---------------------- Is ckecksum ok ? ----------------------*/
+-	np = (USHORT *) i91unvramp;
++	/* Is ckecksum ok ? */
++	np = (u16 *) i91unvramp;
+ 	for (i = 0; i < 31; i++)
+ 		chksum += *np++;
+-	if (i91unvramp->NVM_CheckSum != (USHORT) chksum)
++	if (i91unvramp->NVM_CheckSum != chksum)
+ 		return -1;
+ 	return 1;
+ }
+ 
+-
+-/***********************************************************************
+- Update SCSI H/A configuration parameters from serial EEPROM
+-************************************************************************/
+-void tul_se2_update_all(WORD CurBase)
++/**
++ *	initio_se2_update_all		-	Update E2PROM
++ *	@base: Base of InitIO controller
++ *
++ *	Update the E2PROM by wrting any changes into the E2PROM
++ *	chip, rewriting the checksum.
++ */
++static void initio_se2_update_all(unsigned long base)
+ {				/* setup default pattern */
+ 	int i;
+-	ULONG chksum = 0;
+-	USHORT *np, *np1;
++	u16 chksum = 0;
++	u16 *np, *np1;
+ 
+ 	i91unvramp = &i91unvram;
+ 	/* Calculate checksum first */
+-	np = (USHORT *) i91udftNvRam;
++	np = (u16 *) i91udftNvRam;
+ 	for (i = 0; i < 31; i++)
+ 		chksum += *np++;
+-	*np = (USHORT) chksum;
+-	tul_se2_ew_en(CurBase);	/* Enable write  */
++	*np = chksum;
++	initio_se2_ew_en(base);	/* Enable write  */
+ 
+-	np = (USHORT *) i91udftNvRam;
+-	np1 = (USHORT *) i91unvramp;
++	np = (u16 *) i91udftNvRam;
++	np1 = (u16 *) i91unvramp;
+ 	for (i = 0; i < 32; i++, np++, np1++) {
+-		if (*np != *np1) {
+-			tul_se2_wr(CurBase, i, *np);
+-		}
++		if (*np != *np1)
++			initio_se2_wr(base, i, *np);
+ 	}
+-
+-	tul_se2_ew_ds(CurBase);	/* Disable write   */
+-	return;
++	initio_se2_ew_ds(base);	/* Disable write   */
+ }
+ 
+-/*************************************************************************
+- Function name  : read_eeprom
+-**************************************************************************/
+-void tul_read_eeprom(WORD CurBase)
+-{
+-	UCHAR gctrl;
+-
+-	i91unvramp = &i91unvram;
+-/*------Enable EEProm programming ---*/
+-	gctrl = TUL_RD(CurBase, TUL_GCTRL);
+-	TUL_WR(CurBase + TUL_GCTRL, gctrl | TUL_GCTRL_EEPROM_BIT);
+-	if (tul_se2_rd_all(CurBase) != 1) {
+-		tul_se2_update_all(CurBase);	/* setup default pattern */
+-		tul_se2_rd_all(CurBase);	/* load again  */
+-	}
+-/*------ Disable EEProm programming ---*/
+-	gctrl = TUL_RD(CurBase, TUL_GCTRL);
+-	TUL_WR(CurBase + TUL_GCTRL, gctrl & ~TUL_GCTRL_EEPROM_BIT);
+-}				/* read_eeprom */
++/**
++ *	initio_read_eeprom		-	Retrieve configuration
++ *	@base: Base of InitIO Host Adapter
++ *
++ *	Retrieve the host adapter configuration data from E2Prom. If the
++ *	data is invalid then the defaults are used and are also restored
++ *	into the E2PROM. This forms the access point for the SCSI driver
++ *	into the E2PROM layer, the other functions for the E2PROM are all
++ *	internal use.
++ *
++ *	Must be called single threaded, uses a shared global area.
++ */
+ 
+-static int Addi91u_into_Adapter_table(WORD wBIOS, WORD wBASE, BYTE bInterrupt,
+-				      BYTE bBus, BYTE bDevice)
++static void initio_read_eeprom(unsigned long base)
+ {
+-	int i, j;
++	u8 gctrl;
+ 
+-	for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) {
+-		if (i91u_adpt[i].ADPT_BIOS < wBIOS)
+-			continue;
+-		if (i91u_adpt[i].ADPT_BIOS == wBIOS) {
+-			if (i91u_adpt[i].ADPT_BASE == wBASE) {
+-				if (i91u_adpt[i].ADPT_Bus != 0xFF)
+-					return 1;
+-			} else if (i91u_adpt[i].ADPT_BASE < wBASE)
+-					continue;
+-		}
+-		for (j = MAX_SUPPORTED_ADAPTERS - 1; j > i; j--) {
+-			i91u_adpt[j].ADPT_BASE = i91u_adpt[j - 1].ADPT_BASE;
+-			i91u_adpt[j].ADPT_INTR = i91u_adpt[j - 1].ADPT_INTR;
+-			i91u_adpt[j].ADPT_BIOS = i91u_adpt[j - 1].ADPT_BIOS;
+-			i91u_adpt[j].ADPT_Bus = i91u_adpt[j - 1].ADPT_Bus;
+-			i91u_adpt[j].ADPT_Device = i91u_adpt[j - 1].ADPT_Device;
+-		}
+-		i91u_adpt[i].ADPT_BASE = wBASE;
+-		i91u_adpt[i].ADPT_INTR = bInterrupt;
+-		i91u_adpt[i].ADPT_BIOS = wBIOS;
+-		i91u_adpt[i].ADPT_Bus = bBus;
+-		i91u_adpt[i].ADPT_Device = bDevice;
+-		return 0;
+-	}
+-	return 1;
++	i91unvramp = &i91unvram;
++	/* Enable EEProm programming */
++	gctrl = inb(base + TUL_GCTRL);
++	outb(gctrl | TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL);
++	if (initio_se2_rd_all(base) != 1) {
++		initio_se2_update_all(base);	/* setup default pattern */
++		initio_se2_rd_all(base);	/* load again  */
++	}
++	/* Disable EEProm programming */
++	gctrl = inb(base + TUL_GCTRL);
++	outb(gctrl & ~TUL_GCTRL_EEPROM_BIT, base + TUL_GCTRL);
+ }
+ 
+-static void init_i91uAdapter_table(void)
+-{
+-	int i;
+-
+-	for (i = 0; i < MAX_SUPPORTED_ADAPTERS; i++) {	/* Initialize adapter structure */
+-		i91u_adpt[i].ADPT_BIOS = 0xffff;
+-		i91u_adpt[i].ADPT_BASE = 0xffff;
+-		i91u_adpt[i].ADPT_INTR = 0xff;
+-		i91u_adpt[i].ADPT_Bus = 0xff;
+-		i91u_adpt[i].ADPT_Device = 0xff;
+-	}
+-	return;
+-}
++/**
++ *	initio_stop_bm		-	stop bus master
++ *	@host: InitIO we are stopping
++ *
++ *	Stop any pending DMA operation, aborting the DMA if neccessary
++ */
+ 
+-static void tul_stop_bm(HCS * pCurHcb)
++static void initio_stop_bm(struct initio_host * host)
+ {
+ 
+-	if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) {	/* if DMA xfer is pending, abort DMA xfer */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO);
++	if (inb(host->addr + TUL_XStatus) & XPEND) {	/* if DMA xfer is pending, abort DMA xfer */
++		outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd);
+ 		/* wait Abort DMA xfer done */
+-		while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0);
++		while ((inb(host->addr + TUL_Int) & XABT) == 0)
++			cpu_relax();
+ 	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++	outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ }
+ 
+-/***************************************************************************/
+-static void get_tulipPCIConfig(HCS * pCurHcb, int ch_idx)
+-{
+-	pCurHcb->HCS_Base = i91u_adpt[ch_idx].ADPT_BASE;	/* Supply base address  */
+-	pCurHcb->HCS_BIOS = i91u_adpt[ch_idx].ADPT_BIOS;	/* Supply BIOS address  */
+-	pCurHcb->HCS_Intr = i91u_adpt[ch_idx].ADPT_INTR;	/* Supply interrupt line */
+-	return;
+-}
++/**
++ *	initio_reset_scsi		-	Reset SCSI host controller
++ *	@host: InitIO host to reset
++ *	@seconds: Recovery time
++ *
++ *	Perform a full reset of the SCSI subsystem.
++ */
+ 
+-/***************************************************************************/
+-static int tul_reset_scsi(HCS * pCurHcb, int seconds)
++static int initio_reset_scsi(struct initio_host * host, int seconds)
+ {
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_BUS);
++	outb(TSC_RST_BUS, host->addr + TUL_SCtrl0);
+ 
+-	while (!((pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt)) & TSS_SCSIRST_INT));
+-	/* reset tulip chip */
++	while (!((host->jsint = inb(host->addr + TUL_SInt)) & TSS_SCSIRST_INT))
++		cpu_relax();
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, 0);
++	/* reset tulip chip */
++	outb(0, host->addr + TUL_SSignal);
+ 
+ 	/* Stall for a while, wait for target's firmware ready,make it 2 sec ! */
+ 	/* SONY 5200 tape drive won't work if only stall for 1 sec */
+-	tul_do_pause(seconds * HZ);
+-
+-	TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++	/* FIXME: this is a very long busy wait right now */
++	initio_do_pause(seconds * HZ);
+ 
+-	return (SCSI_RESET_SUCCESS);
++	inb(host->addr + TUL_SInt);
++	return SCSI_RESET_SUCCESS;
+ }
+ 
+-/***************************************************************************/
+-static int init_tulip(HCS * pCurHcb, SCB * scbp, int tul_num_scb,
+-		      BYTE * pbBiosAdr, int seconds)
++/**
++ *	initio_init		-	set up an InitIO host adapter
++ *	@host: InitIO host adapter
++ *	@num_scbs: Number of SCBS
++ *	@bios_addr: BIOS address
++ *
++ *	Set up the host adapter and devices according to the configuration
++ *	retrieved from the E2PROM.
++ *
++ *	Locking: Calls E2PROM layer code which is not re-enterable so must
++ *	run single threaded for now.
++ */
++
++static void initio_init(struct initio_host * host, u8 *bios_addr)
+ {
+ 	int i;
+-	BYTE *pwFlags;
+-	BYTE *pbHeads;
+-	SCB *pTmpScb, *pPrevScb = NULL;
+-
+-	pCurHcb->HCS_NumScbs = tul_num_scb;
+-	pCurHcb->HCS_Semaph = 1;
+-	spin_lock_init(&pCurHcb->HCS_SemaphLock);
+-	pCurHcb->HCS_JSStatus0 = 0;
+-	pCurHcb->HCS_Scb = scbp;
+-	pCurHcb->HCS_NxtPend = scbp;
+-	pCurHcb->HCS_NxtAvail = scbp;
+-	for (i = 0, pTmpScb = scbp; i < tul_num_scb; i++, pTmpScb++) {
+-		pTmpScb->SCB_TagId = i;
+-		if (i != 0)
+-			pPrevScb->SCB_NxtScb = pTmpScb;
+-		pPrevScb = pTmpScb;
+-	}
+-	pPrevScb->SCB_NxtScb = NULL;
+-	pCurHcb->HCS_ScbEnd = pTmpScb;
+-	pCurHcb->HCS_FirstAvail = scbp;
+-	pCurHcb->HCS_LastAvail = pPrevScb;
+-	spin_lock_init(&pCurHcb->HCS_AvailLock);
+-	pCurHcb->HCS_FirstPend = NULL;
+-	pCurHcb->HCS_LastPend = NULL;
+-	pCurHcb->HCS_FirstBusy = NULL;
+-	pCurHcb->HCS_LastBusy = NULL;
+-	pCurHcb->HCS_FirstDone = NULL;
+-	pCurHcb->HCS_LastDone = NULL;
+-	pCurHcb->HCS_ActScb = NULL;
+-	pCurHcb->HCS_ActTcs = NULL;
++	u8 *flags;
++	u8 *heads;
+ 
+-	tul_read_eeprom(pCurHcb->HCS_Base);
+-/*---------- get H/A configuration -------------*/
++	/* Get E2Prom configuration */
++	initio_read_eeprom(host->addr);
+ 	if (i91unvramp->NVM_SCSIInfo[0].NVM_NumOfTarg == 8)
+-		pCurHcb->HCS_MaxTar = 8;
++		host->max_tar = 8;
+ 	else
+-		pCurHcb->HCS_MaxTar = 16;
++		host->max_tar = 16;
+ 
+-	pCurHcb->HCS_Config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1;
++	host->config = i91unvramp->NVM_SCSIInfo[0].NVM_ChConfig1;
+ 
+-	pCurHcb->HCS_SCSI_ID = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID;
+-	pCurHcb->HCS_IdMask = ~(1 << pCurHcb->HCS_SCSI_ID);
++	host->scsi_id = i91unvramp->NVM_SCSIInfo[0].NVM_ChSCSIID;
++	host->idmask = ~(1 << host->scsi_id);
+ 
+ #ifdef CHK_PARITY
+ 	/* Enable parity error response */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_PCMD, TUL_RD(pCurHcb->HCS_Base, TUL_PCMD) | 0x40);
++	outb(inb(host->addr + TUL_PCMD) | 0x40, host->addr + TUL_PCMD);
+ #endif
+ 
+ 	/* Mask all the interrupt       */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++	outb(0x1F, host->addr + TUL_Mask);
+ 
+-	tul_stop_bm(pCurHcb);
++	initio_stop_bm(host);
+ 	/* --- Initialize the tulip --- */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_RST_CHIP);
++	outb(TSC_RST_CHIP, host->addr + TUL_SCtrl0);
+ 
+ 	/* program HBA's SCSI ID        */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId, pCurHcb->HCS_SCSI_ID << 4);
++	outb(host->scsi_id << 4, host->addr + TUL_SScsiId);
+ 
+ 	/* Enable Initiator Mode ,phase latch,alternate sync period mode,
+ 	   disable SCSI reset */
+-	if (pCurHcb->HCS_Config & HCC_EN_PAR)
+-		pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR);
++	if (host->config & HCC_EN_PAR)
++		host->sconf1 = (TSC_INITDEFAULT | TSC_EN_SCSI_PAR);
+ 	else
+-		pCurHcb->HCS_SConf1 = (TSC_INITDEFAULT);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_SConf1);
++		host->sconf1 = (TSC_INITDEFAULT);
++	outb(host->sconf1, host->addr + TUL_SConfig);
+ 
+ 	/* Enable HW reselect           */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);
++	outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, 0);
++	outb(0, host->addr + TUL_SPeriod);
+ 
+ 	/* selection time out = 250 ms */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_STimeOut, 153);
++	outb(153, host->addr + TUL_STimeOut);
+ 
+-/*--------- Enable SCSI terminator -----*/
+-	TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, (pCurHcb->HCS_Config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)));
+-	TUL_WR(pCurHcb->HCS_Base + TUL_GCTRL1,
+-	       ((pCurHcb->HCS_Config & HCC_AUTO_TERM) >> 4) | (TUL_RD(pCurHcb->HCS_Base, TUL_GCTRL1) & 0xFE));
++	/* Enable SCSI terminator */
++	outb((host->config & (HCC_ACT_TERM1 | HCC_ACT_TERM2)),
++		host->addr + TUL_XCtrl);
++	outb(((host->config & HCC_AUTO_TERM) >> 4) |
++		(inb(host->addr + TUL_GCTRL1) & 0xFE),
++		host->addr + TUL_GCTRL1);
+ 
+ 	for (i = 0,
+-	     pwFlags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config),
+-	     pbHeads = pbBiosAdr + 0x180;
+-	     i < pCurHcb->HCS_MaxTar;
+-	     i++, pwFlags++) {
+-		pCurHcb->HCS_Tcs[i].TCS_Flags = *pwFlags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+-		if (pCurHcb->HCS_Tcs[i].TCS_Flags & TCF_EN_255)
+-			pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63;
++	     flags = & (i91unvramp->NVM_SCSIInfo[0].NVM_Targ0Config),
++	     heads = bios_addr + 0x180;
++	     i < host->max_tar;
++	     i++, flags++) {
++		host->targets[i].flags = *flags & ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++		if (host->targets[i].flags & TCF_EN_255)
++			host->targets[i].drv_flags = TCF_DRV_255_63;
+ 		else
+-			pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0;
+-		pCurHcb->HCS_Tcs[i].TCS_JS_Period = 0;
+-		pCurHcb->HCS_Tcs[i].TCS_SConfig0 = pCurHcb->HCS_SConf1;
+-		pCurHcb->HCS_Tcs[i].TCS_DrvHead = *pbHeads++;
+-		if (pCurHcb->HCS_Tcs[i].TCS_DrvHead == 255)
+-			pCurHcb->HCS_Tcs[i].TCS_DrvFlags = TCF_DRV_255_63;
++			host->targets[i].drv_flags = 0;
++		host->targets[i].js_period = 0;
++		host->targets[i].sconfig0 = host->sconf1;
++		host->targets[i].heads = *heads++;
++		if (host->targets[i].heads == 255)
++			host->targets[i].drv_flags = TCF_DRV_255_63;
+ 		else
+-			pCurHcb->HCS_Tcs[i].TCS_DrvFlags = 0;
+-		pCurHcb->HCS_Tcs[i].TCS_DrvSector = *pbHeads++;
+-		pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY;
+-		pCurHcb->HCS_ActTags[i] = 0;
+-		pCurHcb->HCS_MaxTags[i] = 0xFF;
++			host->targets[i].drv_flags = 0;
++		host->targets[i].sectors = *heads++;
++		host->targets[i].flags &= ~TCF_BUSY;
++		host->act_tags[i] = 0;
++		host->max_tags[i] = 0xFF;
+ 	}			/* for                          */
+ 	printk("i91u: PCI Base=0x%04X, IRQ=%d, BIOS=0x%04X0, SCSI ID=%d\n",
+-	       pCurHcb->HCS_Base, pCurHcb->HCS_Intr,
+-	       pCurHcb->HCS_BIOS, pCurHcb->HCS_SCSI_ID);
+-/*------------------- reset SCSI Bus ---------------------------*/
+-	if (pCurHcb->HCS_Config & HCC_SCSI_RESET) {
+-		printk("i91u: Reset SCSI Bus ... \n");
+-		tul_reset_scsi(pCurHcb, seconds);
+-	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCFG1, 0x17);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SIntEnable, 0xE9);
+-	return (0);
++	       host->addr, host->irq,
++	       host->bios_addr, host->scsi_id);
++	/* Reset SCSI Bus */
++	if (host->config & HCC_SCSI_RESET) {
++		printk(KERN_INFO "i91u: Reset SCSI Bus ... \n");
++		initio_reset_scsi(host, 10);
++	}
++	outb(0x17, host->addr + TUL_SCFG1);
++	outb(0xE9, host->addr + TUL_SIntEnable);
+ }
+ 
+-/***************************************************************************/
+-static SCB *tul_alloc_scb(HCS * hcsp)
++/**
++ *	initio_alloc_scb		-	Allocate an SCB
++ *	@host: InitIO host we are allocating for
++ *
++ *	Walk the SCB list for the controller and allocate a free SCB if
++ *	one exists.
++ */
++static struct scsi_ctrl_blk *initio_alloc_scb(struct initio_host *host)
+ {
+-	SCB *pTmpScb;
+-	ULONG flags;
+-	spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags);
+-	if ((pTmpScb = hcsp->HCS_FirstAvail) != NULL) {
++	struct scsi_ctrl_blk *scb;
++	unsigned long flags;
++
++	spin_lock_irqsave(&host->avail_lock, flags);
++	if ((scb = host->first_avail) != NULL) {
+ #if DEBUG_QUEUE
+-		printk("find scb at %08lx\n", (ULONG) pTmpScb);
++		printk("find scb at %p\n", scb);
+ #endif
+-		if ((hcsp->HCS_FirstAvail = pTmpScb->SCB_NxtScb) == NULL)
+-			hcsp->HCS_LastAvail = NULL;
+-		pTmpScb->SCB_NxtScb = NULL;
+-		pTmpScb->SCB_Status = SCB_RENT;
++		if ((host->first_avail = scb->next) == NULL)
++			host->last_avail = NULL;
++		scb->next = NULL;
++		scb->status = SCB_RENT;
+ 	}
+-	spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags);
+-	return (pTmpScb);
++	spin_unlock_irqrestore(&host->avail_lock, flags);
++	return scb;
+ }
+ 
+-/***************************************************************************/
+-static void tul_release_scb(HCS * hcsp, SCB * scbp)
++/**
++ *	initio_release_scb		-	Release an SCB
++ *	@host: InitIO host that owns the SCB
++ *	@cmnd: SCB command block being returned
++ *
++ *	Return an allocated SCB to the host free list
++ */
++
++static void initio_release_scb(struct initio_host * host, struct scsi_ctrl_blk * cmnd)
+ {
+-	ULONG flags;
++	unsigned long flags;
+ 
+ #if DEBUG_QUEUE
+-	printk("Release SCB %lx; ", (ULONG) scbp);
++	printk("Release SCB %p; ", cmnd);
+ #endif
+-	spin_lock_irqsave(&(hcsp->HCS_AvailLock), flags);
+-	scbp->SCB_Srb = NULL;
+-	scbp->SCB_Status = 0;
+-	scbp->SCB_NxtScb = NULL;
+-	if (hcsp->HCS_LastAvail != NULL) {
+-		hcsp->HCS_LastAvail->SCB_NxtScb = scbp;
+-		hcsp->HCS_LastAvail = scbp;
++	spin_lock_irqsave(&(host->avail_lock), flags);
++	cmnd->srb = NULL;
++	cmnd->status = 0;
++	cmnd->next = NULL;
++	if (host->last_avail != NULL) {
++		host->last_avail->next = cmnd;
++		host->last_avail = cmnd;
+ 	} else {
+-		hcsp->HCS_FirstAvail = scbp;
+-		hcsp->HCS_LastAvail = scbp;
++		host->first_avail = cmnd;
++		host->last_avail = cmnd;
+ 	}
+-	spin_unlock_irqrestore(&(hcsp->HCS_AvailLock), flags);
++	spin_unlock_irqrestore(&(host->avail_lock), flags);
+ }
+ 
+ /***************************************************************************/
+-static void tul_append_pend_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_append_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+ 
+ #if DEBUG_QUEUE
+-	printk("Append pend SCB %lx; ", (ULONG) scbp);
++	printk("Append pend SCB %p; ", scbp);
+ #endif
+-	scbp->SCB_Status = SCB_PEND;
+-	scbp->SCB_NxtScb = NULL;
+-	if (pCurHcb->HCS_LastPend != NULL) {
+-		pCurHcb->HCS_LastPend->SCB_NxtScb = scbp;
+-		pCurHcb->HCS_LastPend = scbp;
++	scbp->status = SCB_PEND;
++	scbp->next = NULL;
++	if (host->last_pending != NULL) {
++		host->last_pending->next = scbp;
++		host->last_pending = scbp;
+ 	} else {
+-		pCurHcb->HCS_FirstPend = scbp;
+-		pCurHcb->HCS_LastPend = scbp;
++		host->first_pending = scbp;
++		host->last_pending = scbp;
+ 	}
+ }
+ 
+ /***************************************************************************/
+-static void tul_push_pend_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_push_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+ 
+ #if DEBUG_QUEUE
+-	printk("Push pend SCB %lx; ", (ULONG) scbp);
++	printk("Push pend SCB %p; ", scbp);
+ #endif
+-	scbp->SCB_Status = SCB_PEND;
+-	if ((scbp->SCB_NxtScb = pCurHcb->HCS_FirstPend) != NULL) {
+-		pCurHcb->HCS_FirstPend = scbp;
++	scbp->status = SCB_PEND;
++	if ((scbp->next = host->first_pending) != NULL) {
++		host->first_pending = scbp;
+ 	} else {
+-		pCurHcb->HCS_FirstPend = scbp;
+-		pCurHcb->HCS_LastPend = scbp;
++		host->first_pending = scbp;
++		host->last_pending = scbp;
+ 	}
+ }
+ 
+-/***************************************************************************/
+-static SCB *tul_find_first_pend_scb(HCS * pCurHcb)
++static struct scsi_ctrl_blk *initio_find_first_pend_scb(struct initio_host * host)
+ {
+-	SCB *pFirstPend;
++	struct scsi_ctrl_blk *first;
+ 
+ 
+-	pFirstPend = pCurHcb->HCS_FirstPend;
+-	while (pFirstPend != NULL) {
+-		if (pFirstPend->SCB_Opcode != ExecSCSI) {
+-			return (pFirstPend);
+-		}
+-		if (pFirstPend->SCB_TagMsg == 0) {
+-			if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] == 0) &&
+-			    !(pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) {
+-				return (pFirstPend);
+-			}
++	first = host->first_pending;
++	while (first != NULL) {
++		if (first->opcode != ExecSCSI)
++			return first;
++		if (first->tagmsg == 0) {
++			if ((host->act_tags[first->target] == 0) &&
++			    !(host->targets[first->target].flags & TCF_BUSY))
++				return first;
+ 		} else {
+-			if ((pCurHcb->HCS_ActTags[pFirstPend->SCB_Target] >=
+-			  pCurHcb->HCS_MaxTags[pFirstPend->SCB_Target]) |
+-			    (pCurHcb->HCS_Tcs[pFirstPend->SCB_Target].TCS_Flags & TCF_BUSY)) {
+-				pFirstPend = pFirstPend->SCB_NxtScb;
++			if ((host->act_tags[first->target] >=
++			  host->max_tags[first->target]) |
++			    (host->targets[first->target].flags & TCF_BUSY)) {
++				first = first->next;
+ 				continue;
+ 			}
+-			return (pFirstPend);
++			return first;
+ 		}
+-		pFirstPend = pFirstPend->SCB_NxtScb;
++		first = first->next;
+ 	}
+-
+-
+-	return (pFirstPend);
++	return first;
+ }
+-/***************************************************************************/
+-static void tul_unlink_pend_scb(HCS * pCurHcb, SCB * pCurScb)
++
++static void initio_unlink_pend_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+-	SCB *pTmpScb, *pPrevScb;
++	struct scsi_ctrl_blk *tmp, *prev;
+ 
+ #if DEBUG_QUEUE
+-	printk("unlink pend SCB %lx; ", (ULONG) pCurScb);
++	printk("unlink pend SCB %p; ", scb);
+ #endif
+ 
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend;
+-	while (pTmpScb != NULL) {
+-		if (pCurScb == pTmpScb) {	/* Unlink this SCB              */
+-			if (pTmpScb == pCurHcb->HCS_FirstPend) {
+-				if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL)
+-					pCurHcb->HCS_LastPend = NULL;
++	prev = tmp = host->first_pending;
++	while (tmp != NULL) {
++		if (scb == tmp) {	/* Unlink this SCB              */
++			if (tmp == host->first_pending) {
++				if ((host->first_pending = tmp->next) == NULL)
++					host->last_pending = NULL;
+ 			} else {
+-				pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+-				if (pTmpScb == pCurHcb->HCS_LastPend)
+-					pCurHcb->HCS_LastPend = pPrevScb;
++				prev->next = tmp->next;
++				if (tmp == host->last_pending)
++					host->last_pending = prev;
+ 			}
+-			pTmpScb->SCB_NxtScb = NULL;
++			tmp->next = NULL;
+ 			break;
+ 		}
+-		pPrevScb = pTmpScb;
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		prev = tmp;
++		tmp = tmp->next;
+ 	}
+-	return;
+ }
+-/***************************************************************************/
+-static void tul_append_busy_scb(HCS * pCurHcb, SCB * scbp)
++
++static void initio_append_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+ 
+ #if DEBUG_QUEUE
+-	printk("append busy SCB %lx; ", (ULONG) scbp);
++	printk("append busy SCB %o; ", scbp);
+ #endif
+-	if (scbp->SCB_TagMsg)
+-		pCurHcb->HCS_ActTags[scbp->SCB_Target]++;
++	if (scbp->tagmsg)
++		host->act_tags[scbp->target]++;
+ 	else
+-		pCurHcb->HCS_Tcs[scbp->SCB_Target].TCS_Flags |= TCF_BUSY;
+-	scbp->SCB_Status = SCB_BUSY;
+-	scbp->SCB_NxtScb = NULL;
+-	if (pCurHcb->HCS_LastBusy != NULL) {
+-		pCurHcb->HCS_LastBusy->SCB_NxtScb = scbp;
+-		pCurHcb->HCS_LastBusy = scbp;
++		host->targets[scbp->target].flags |= TCF_BUSY;
++	scbp->status = SCB_BUSY;
++	scbp->next = NULL;
++	if (host->last_busy != NULL) {
++		host->last_busy->next = scbp;
++		host->last_busy = scbp;
+ 	} else {
+-		pCurHcb->HCS_FirstBusy = scbp;
+-		pCurHcb->HCS_LastBusy = scbp;
++		host->first_busy = scbp;
++		host->last_busy = scbp;
+ 	}
+ }
+ 
+ /***************************************************************************/
+-static SCB *tul_pop_busy_scb(HCS * pCurHcb)
++static struct scsi_ctrl_blk *initio_pop_busy_scb(struct initio_host * host)
+ {
+-	SCB *pTmpScb;
++	struct scsi_ctrl_blk *tmp;
+ 
+ 
+-	if ((pTmpScb = pCurHcb->HCS_FirstBusy) != NULL) {
+-		if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+-			pCurHcb->HCS_LastBusy = NULL;
+-		pTmpScb->SCB_NxtScb = NULL;
+-		if (pTmpScb->SCB_TagMsg)
+-			pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
++	if ((tmp = host->first_busy) != NULL) {
++		if ((host->first_busy = tmp->next) == NULL)
++			host->last_busy = NULL;
++		tmp->next = NULL;
++		if (tmp->tagmsg)
++			host->act_tags[tmp->target]--;
+ 		else
+-			pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY;
++			host->targets[tmp->target].flags &= ~TCF_BUSY;
+ 	}
+ #if DEBUG_QUEUE
+-	printk("Pop busy SCB %lx; ", (ULONG) pTmpScb);
++	printk("Pop busy SCB %p; ", tmp);
+ #endif
+-	return (pTmpScb);
++	return tmp;
+ }
+ 
+ /***************************************************************************/
+-static void tul_unlink_busy_scb(HCS * pCurHcb, SCB * pCurScb)
++static void initio_unlink_busy_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+-	SCB *pTmpScb, *pPrevScb;
++	struct scsi_ctrl_blk *tmp, *prev;
+ 
+ #if DEBUG_QUEUE
+-	printk("unlink busy SCB %lx; ", (ULONG) pCurScb);
++	printk("unlink busy SCB %p; ", scb);
+ #endif
+ 
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;
+-	while (pTmpScb != NULL) {
+-		if (pCurScb == pTmpScb) {	/* Unlink this SCB              */
+-			if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+-				if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+-					pCurHcb->HCS_LastBusy = NULL;
+-			} else {
+-				pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+-				if (pTmpScb == pCurHcb->HCS_LastBusy)
+-					pCurHcb->HCS_LastBusy = pPrevScb;
+-			}
+-			pTmpScb->SCB_NxtScb = NULL;
+-			if (pTmpScb->SCB_TagMsg)
+-				pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
++	prev = tmp = host->first_busy;
++	while (tmp != NULL) {
++		if (scb == tmp) {	/* Unlink this SCB              */
++			if (tmp == host->first_busy) {
++				if ((host->first_busy = tmp->next) == NULL)
++					host->last_busy = NULL;
++			} else {
++				prev->next = tmp->next;
++				if (tmp == host->last_busy)
++					host->last_busy = prev;
++			}
++			tmp->next = NULL;
++			if (tmp->tagmsg)
++				host->act_tags[tmp->target]--;
+ 			else
+-				pCurHcb->HCS_Tcs[pTmpScb->SCB_Target].TCS_Flags &= ~TCF_BUSY;
++				host->targets[tmp->target].flags &= ~TCF_BUSY;
+ 			break;
+ 		}
+-		pPrevScb = pTmpScb;
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		prev = tmp;
++		tmp = tmp->next;
+ 	}
+ 	return;
+ }
+ 
+-/***************************************************************************/
+-SCB *tul_find_busy_scb(HCS * pCurHcb, WORD tarlun)
++struct scsi_ctrl_blk *initio_find_busy_scb(struct initio_host * host, u16 tarlun)
+ {
+-	SCB *pTmpScb, *pPrevScb;
+-	WORD scbp_tarlun;
++	struct scsi_ctrl_blk *tmp, *prev;
++	u16 scbp_tarlun;
+ 
+ 
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;
+-	while (pTmpScb != NULL) {
+-		scbp_tarlun = (pTmpScb->SCB_Lun << 8) | (pTmpScb->SCB_Target);
++	prev = tmp = host->first_busy;
++	while (tmp != NULL) {
++		scbp_tarlun = (tmp->lun << 8) | (tmp->target);
+ 		if (scbp_tarlun == tarlun) {	/* Unlink this SCB              */
+ 			break;
+ 		}
+-		pPrevScb = pTmpScb;
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		prev = tmp;
++		tmp = tmp->next;
+ 	}
+ #if DEBUG_QUEUE
+-	printk("find busy SCB %lx; ", (ULONG) pTmpScb);
++	printk("find busy SCB %p; ", tmp);
+ #endif
+-	return (pTmpScb);
++	return tmp;
+ }
+ 
+-/***************************************************************************/
+-static void tul_append_done_scb(HCS * pCurHcb, SCB * scbp)
++static void initio_append_done_scb(struct initio_host * host, struct scsi_ctrl_blk * scbp)
+ {
+-
+ #if DEBUG_QUEUE
+-	printk("append done SCB %lx; ", (ULONG) scbp);
++	printk("append done SCB %p; ", scbp);
+ #endif
+ 
+-	scbp->SCB_Status = SCB_DONE;
+-	scbp->SCB_NxtScb = NULL;
+-	if (pCurHcb->HCS_LastDone != NULL) {
+-		pCurHcb->HCS_LastDone->SCB_NxtScb = scbp;
+-		pCurHcb->HCS_LastDone = scbp;
++	scbp->status = SCB_DONE;
++	scbp->next = NULL;
++	if (host->last_done != NULL) {
++		host->last_done->next = scbp;
++		host->last_done = scbp;
+ 	} else {
+-		pCurHcb->HCS_FirstDone = scbp;
+-		pCurHcb->HCS_LastDone = scbp;
++		host->first_done = scbp;
++		host->last_done = scbp;
+ 	}
+ }
+ 
+-/***************************************************************************/
+-SCB *tul_find_done_scb(HCS * pCurHcb)
++struct scsi_ctrl_blk *initio_find_done_scb(struct initio_host * host)
+ {
+-	SCB *pTmpScb;
+-
++	struct scsi_ctrl_blk *tmp;
+ 
+-	if ((pTmpScb = pCurHcb->HCS_FirstDone) != NULL) {
+-		if ((pCurHcb->HCS_FirstDone = pTmpScb->SCB_NxtScb) == NULL)
+-			pCurHcb->HCS_LastDone = NULL;
+-		pTmpScb->SCB_NxtScb = NULL;
++	if ((tmp = host->first_done) != NULL) {
++		if ((host->first_done = tmp->next) == NULL)
++			host->last_done = NULL;
++		tmp->next = NULL;
+ 	}
+ #if DEBUG_QUEUE
+-	printk("find done SCB %lx; ", (ULONG) pTmpScb);
++	printk("find done SCB %p; ",tmp);
+ #endif
+-	return (pTmpScb);
++	return tmp;
+ }
+ 
+-/***************************************************************************/
+-static int tul_abort_srb(HCS * pCurHcb, struct scsi_cmnd *srbp)
++static int initio_abort_srb(struct initio_host * host, struct scsi_cmnd *srbp)
+ {
+-	ULONG flags;
+-	SCB *pTmpScb, *pPrevScb;
++	unsigned long flags;
++	struct scsi_ctrl_blk *tmp, *prev;
+ 
+-	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++	spin_lock_irqsave(&host->semaph_lock, flags);
+ 
+-	if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) {
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++	if ((host->semaph == 0) && (host->active == NULL)) {
+ 		/* disable Jasmin SCSI Int        */
+-
+-                spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-		tulip_main(pCurHcb);
+-
+-        	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-		pCurHcb->HCS_Semaph = 1;
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-
+-		spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
++		outb(0x1F, host->addr + TUL_Mask);
++		spin_unlock_irqrestore(&host->semaph_lock, flags);
++		/* FIXME: synchronize_irq needed ? */
++		tulip_main(host);
++		spin_lock_irqsave(&host->semaph_lock, flags);
++		host->semaph = 1;
++		outb(0x0F, host->addr + TUL_Mask);
++		spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 		return SCSI_ABORT_SNOOZE;
+ 	}
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstPend;	/* Check Pend queue */
+-	while (pTmpScb != NULL) {
++	prev = tmp = host->first_pending;	/* Check Pend queue */
++	while (tmp != NULL) {
+ 		/* 07/27/98 */
+-		if (pTmpScb->SCB_Srb == srbp) {
+-			if (pTmpScb == pCurHcb->HCS_ActScb) {
+-				spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++		if (tmp->srb == srbp) {
++			if (tmp == host->active) {
++				spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 				return SCSI_ABORT_BUSY;
+-			} else if (pTmpScb == pCurHcb->HCS_FirstPend) {
+-				if ((pCurHcb->HCS_FirstPend = pTmpScb->SCB_NxtScb) == NULL)
+-					pCurHcb->HCS_LastPend = NULL;
+-			} else {
+-				pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+-				if (pTmpScb == pCurHcb->HCS_LastPend)
+-					pCurHcb->HCS_LastPend = pPrevScb;
+-			}
+-			pTmpScb->SCB_HaStat = HOST_ABORTED;
+-			pTmpScb->SCB_Flags |= SCF_DONE;
+-			if (pTmpScb->SCB_Flags & SCF_POST)
+-				(*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb);
+-			spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++			} else if (tmp == host->first_pending) {
++				if ((host->first_pending = tmp->next) == NULL)
++					host->last_pending = NULL;
++			} else {
++				prev->next = tmp->next;
++				if (tmp == host->last_pending)
++					host->last_pending = prev;
++			}
++			tmp->hastat = HOST_ABORTED;
++			tmp->flags |= SCF_DONE;
++			if (tmp->flags & SCF_POST)
++				(*tmp->post) ((u8 *) host, (u8 *) tmp);
++			spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 			return SCSI_ABORT_SUCCESS;
+ 		}
+-		pPrevScb = pTmpScb;
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		prev = tmp;
++		tmp = tmp->next;
+ 	}
+ 
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;	/* Check Busy queue */
+-	while (pTmpScb != NULL) {
+-
+-		if (pTmpScb->SCB_Srb == srbp) {
+-
+-			if (pTmpScb == pCurHcb->HCS_ActScb) {
+-				spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++	prev = tmp = host->first_busy;	/* Check Busy queue */
++	while (tmp != NULL) {
++		if (tmp->srb == srbp) {
++			if (tmp == host->active) {
++				spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 				return SCSI_ABORT_BUSY;
+-			} else if (pTmpScb->SCB_TagMsg == 0) {
+-				spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++			} else if (tmp->tagmsg == 0) {
++				spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 				return SCSI_ABORT_BUSY;
+ 			} else {
+-				pCurHcb->HCS_ActTags[pTmpScb->SCB_Target]--;
+-				if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+-					if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+-						pCurHcb->HCS_LastBusy = NULL;
+-				} else {
+-					pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+-					if (pTmpScb == pCurHcb->HCS_LastBusy)
+-						pCurHcb->HCS_LastBusy = pPrevScb;
+-				}
+-				pTmpScb->SCB_NxtScb = NULL;
+-
+-
+-				pTmpScb->SCB_HaStat = HOST_ABORTED;
+-				pTmpScb->SCB_Flags |= SCF_DONE;
+-				if (pTmpScb->SCB_Flags & SCF_POST)
+-					(*pTmpScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pTmpScb);
+-				spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++				host->act_tags[tmp->target]--;
++				if (tmp == host->first_busy) {
++					if ((host->first_busy = tmp->next) == NULL)
++						host->last_busy = NULL;
++				} else {
++					prev->next = tmp->next;
++					if (tmp == host->last_busy)
++						host->last_busy = prev;
++				}
++				tmp->next = NULL;
++
++
++				tmp->hastat = HOST_ABORTED;
++				tmp->flags |= SCF_DONE;
++				if (tmp->flags & SCF_POST)
++					(*tmp->post) ((u8 *) host, (u8 *) tmp);
++				spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 				return SCSI_ABORT_SUCCESS;
+ 			}
+ 		}
+-		pPrevScb = pTmpScb;
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		prev = tmp;
++		tmp = tmp->next;
+ 	}
+-	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-	return (SCSI_ABORT_NOT_RUNNING);
++	spin_unlock_irqrestore(&host->semaph_lock, flags);
++	return SCSI_ABORT_NOT_RUNNING;
+ }
+ 
+ /***************************************************************************/
+-static int tul_bad_seq(HCS * pCurHcb)
+-{
+-	SCB *pCurScb;
+-
+-	printk("tul_bad_seg c=%d\n", pCurHcb->HCS_Index);
+-
+-	if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) {
+-		tul_unlink_busy_scb(pCurHcb, pCurScb);
+-		pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+-		pCurScb->SCB_TaStat = 0;
+-		tul_append_done_scb(pCurHcb, pCurScb);
+-	}
+-	tul_stop_bm(pCurHcb);
+-
+-	tul_reset_scsi(pCurHcb, 8);	/* 7/29/98 */
+-
+-	return (tul_post_scsi_rst(pCurHcb));
+-}
+-
+-#if 0
+-
+-/************************************************************************/
+-static int tul_device_reset(HCS * pCurHcb, struct scsi_cmnd *pSrb,
+-			    unsigned int target, unsigned int ResetFlags)
++static int initio_bad_seq(struct initio_host * host)
+ {
+-	ULONG flags;
+-	SCB *pScb;
+-	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-	if (ResetFlags & SCSI_RESET_ASYNCHRONOUS) {
+-
+-		if ((pCurHcb->HCS_Semaph == 0) && (pCurHcb->HCS_ActScb == NULL)) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+-			/* disable Jasmin SCSI Int        */
+-
+-        		spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-			tulip_main(pCurHcb);
+-
+-        		spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-			pCurHcb->HCS_Semaph = 1;
+-			TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-
+-			spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-			return SCSI_RESET_SNOOZE;
+-		}
+-		pScb = pCurHcb->HCS_FirstBusy;	/* Check Busy queue */
+-		while (pScb != NULL) {
+-			if (pScb->SCB_Srb == pSrb)
+-				break;
+-			pScb = pScb->SCB_NxtScb;
+-		}
+-		if (pScb == NULL) {
+-			printk("Unable to Reset - No SCB Found\n");
+-
+-			spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-			return SCSI_RESET_NOT_RUNNING;
+-		}
+-	}
+-	if ((pScb = tul_alloc_scb(pCurHcb)) == NULL) {
+-		spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-		return SCSI_RESET_NOT_RUNNING;
+-	}
+-	pScb->SCB_Opcode = BusDevRst;
+-	pScb->SCB_Flags = SCF_POST;
+-	pScb->SCB_Target = target;
+-	pScb->SCB_Mode = 0;
+-
+-	pScb->SCB_Srb = NULL;
+-	if (ResetFlags & SCSI_RESET_SYNCHRONOUS) {
+-		pScb->SCB_Srb = pSrb;
+-	}
+-	tul_push_pend_scb(pCurHcb, pScb);	/* push this SCB to Pending queue */
++	struct scsi_ctrl_blk *scb;
+ 
+-	if (pCurHcb->HCS_Semaph == 1) {
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+-		/* disable Jasmin SCSI Int        */
+-		pCurHcb->HCS_Semaph = 0;
+-
+-        	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-		tulip_main(pCurHcb);
+-
+-                spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++	printk("initio_bad_seg c=%d\n", host->index);
+ 
+-		pCurHcb->HCS_Semaph = 1;
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
++	if ((scb = host->active) != NULL) {
++		initio_unlink_busy_scb(host, scb);
++		scb->hastat = HOST_BAD_PHAS;
++		scb->tastat = 0;
++		initio_append_done_scb(host, scb);
+ 	}
+-	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-	return SCSI_RESET_PENDING;
+-}
+-
+-static int tul_reset_scsi_bus(HCS * pCurHcb)
+-{
+-	ULONG flags;
+-
+-	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+-	pCurHcb->HCS_Semaph = 0;
+-
+-	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-	tul_stop_bm(pCurHcb);
+-
+-	tul_reset_scsi(pCurHcb, 2);	/* 7/29/98 */
+-
+-	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-	tul_post_scsi_rst(pCurHcb);
+-
+-        spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-	tulip_main(pCurHcb);
+-
+-        spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-	pCurHcb->HCS_Semaph = 1;
+-	TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-	return (SCSI_RESET_SUCCESS | SCSI_RESET_HOST_RESET);
++	initio_stop_bm(host);
++	initio_reset_scsi(host, 8);	/* 7/29/98 */
++	return initio_post_scsi_rst(host);
+ }
+ 
+-#endif  /*  0  */
+ 
+ /************************************************************************/
+-static void tul_exec_scb(HCS * pCurHcb, SCB * pCurScb)
++static void initio_exec_scb(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+-	ULONG flags;
++	unsigned long flags;
+ 
+-	pCurScb->SCB_Mode = 0;
++	scb->mode = 0;
+ 
+-	pCurScb->SCB_SGIdx = 0;
+-	pCurScb->SCB_SGMax = pCurScb->SCB_SGLen;
++	scb->sgidx = 0;
++	scb->sgmax = scb->sglen;
+ 
+-	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++	spin_lock_irqsave(&host->semaph_lock, flags);
+ 
+-	tul_append_pend_scb(pCurHcb, pCurScb);	/* Append this SCB to Pending queue */
++	initio_append_pend_scb(host, scb);	/* Append this SCB to Pending queue */
+ 
+ /* VVVVV 07/21/98 */
+-	if (pCurHcb->HCS_Semaph == 1) {
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
+-		/* disable Jasmin SCSI Int        */
+-		pCurHcb->HCS_Semaph = 0;
+-
+-        	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
+-
+-		tulip_main(pCurHcb);
++	if (host->semaph == 1) {
++		/* Disable Jasmin SCSI Int */
++		outb(0x1F, host->addr + TUL_Mask);
++		host->semaph = 0;
++		spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 
+-        	spin_lock_irqsave(&(pCurHcb->HCS_SemaphLock), flags);
++		tulip_main(host);
+ 
+-		pCurHcb->HCS_Semaph = 1;
+-		TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
++		spin_lock_irqsave(&host->semaph_lock, flags);
++		host->semaph = 1;
++		outb(0x0F, host->addr + TUL_Mask);
+ 	}
+-	spin_unlock_irqrestore(&(pCurHcb->HCS_SemaphLock), flags);
++	spin_unlock_irqrestore(&host->semaph_lock, flags);
+ 	return;
+ }
+ 
+ /***************************************************************************/
+-static int tul_isr(HCS * pCurHcb)
++static int initio_isr(struct initio_host * host)
+ {
+-	/* Enter critical section       */
+-
+-	if (TUL_RD(pCurHcb->HCS_Base, TUL_Int) & TSS_INT_PENDING) {
+-		if (pCurHcb->HCS_Semaph == 1) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x1F);
++	if (inb(host->addr + TUL_Int) & TSS_INT_PENDING) {
++		if (host->semaph == 1) {
++			outb(0x1F, host->addr + TUL_Mask);
+ 			/* Disable Tulip SCSI Int */
+-			pCurHcb->HCS_Semaph = 0;
++			host->semaph = 0;
+ 
+-			tulip_main(pCurHcb);
++			tulip_main(host);
+ 
+-			pCurHcb->HCS_Semaph = 1;
+-			TUL_WR(pCurHcb->HCS_Base + TUL_Mask, 0x0F);
+-			return (1);
++			host->semaph = 1;
++			outb(0x0F, host->addr + TUL_Mask);
++			return 1;
+ 		}
+ 	}
+-	return (0);
++	return 0;
+ }
+ 
+-/***************************************************************************/
+-int tulip_main(HCS * pCurHcb)
++static int tulip_main(struct initio_host * host)
+ {
+-	SCB *pCurScb;
++	struct scsi_ctrl_blk *scb;
+ 
+ 	for (;;) {
++		tulip_scsi(host);	/* Call tulip_scsi              */
+ 
+-		tulip_scsi(pCurHcb);	/* Call tulip_scsi              */
+-
+-		while ((pCurScb = tul_find_done_scb(pCurHcb)) != NULL) {	/* find done entry */
+-			if (pCurScb->SCB_TaStat == INI_QUEUE_FULL) {
+-				pCurHcb->HCS_MaxTags[pCurScb->SCB_Target] =
+-				    pCurHcb->HCS_ActTags[pCurScb->SCB_Target] - 1;
+-				pCurScb->SCB_TaStat = 0;
+-				tul_append_pend_scb(pCurHcb, pCurScb);
++		/* Walk the list of completed SCBs */
++		while ((scb = initio_find_done_scb(host)) != NULL) {	/* find done entry */
++			if (scb->tastat == INI_QUEUE_FULL) {
++				host->max_tags[scb->target] =
++				    host->act_tags[scb->target] - 1;
++				scb->tastat = 0;
++				initio_append_pend_scb(host, scb);
+ 				continue;
+ 			}
+-			if (!(pCurScb->SCB_Mode & SCM_RSENS)) {		/* not in auto req. sense mode */
+-				if (pCurScb->SCB_TaStat == 2) {
++			if (!(scb->mode & SCM_RSENS)) {		/* not in auto req. sense mode */
++				if (scb->tastat == 2) {
+ 
+ 					/* clr sync. nego flag */
+ 
+-					if (pCurScb->SCB_Flags & SCF_SENSE) {
+-						BYTE len;
+-						len = pCurScb->SCB_SenseLen;
++					if (scb->flags & SCF_SENSE) {
++						u8 len;
++						len = scb->senselen;
+ 						if (len == 0)
+ 							len = 1;
+-						pCurScb->SCB_BufLen = pCurScb->SCB_SenseLen;
+-						pCurScb->SCB_BufPtr = pCurScb->SCB_SensePtr;
+-						pCurScb->SCB_Flags &= ~(SCF_SG | SCF_DIR);	/* for xfer_data_in */
+-/*                      pCurScb->SCB_Flags |= SCF_NO_DCHK;      */
+-						/* so, we won't report worng direction in xfer_data_in,
++						scb->buflen = scb->senselen;
++						scb->bufptr = scb->senseptr;
++						scb->flags &= ~(SCF_SG | SCF_DIR);	/* for xfer_data_in */
++						/* so, we won't report wrong direction in xfer_data_in,
+ 						   and won't report HOST_DO_DU in state_6 */
+-						pCurScb->SCB_Mode = SCM_RSENS;
+-						pCurScb->SCB_Ident &= 0xBF;	/* Disable Disconnect */
+-						pCurScb->SCB_TagMsg = 0;
+-						pCurScb->SCB_TaStat = 0;
+-						pCurScb->SCB_CDBLen = 6;
+-						pCurScb->SCB_CDB[0] = SCSICMD_RequestSense;
+-						pCurScb->SCB_CDB[1] = 0;
+-						pCurScb->SCB_CDB[2] = 0;
+-						pCurScb->SCB_CDB[3] = 0;
+-						pCurScb->SCB_CDB[4] = len;
+-						pCurScb->SCB_CDB[5] = 0;
+-						tul_push_pend_scb(pCurHcb, pCurScb);
++						scb->mode = SCM_RSENS;
++						scb->ident &= 0xBF;	/* Disable Disconnect */
++						scb->tagmsg = 0;
++						scb->tastat = 0;
++						scb->cdblen = 6;
++						scb->cdb[0] = SCSICMD_RequestSense;
++						scb->cdb[1] = 0;
++						scb->cdb[2] = 0;
++						scb->cdb[3] = 0;
++						scb->cdb[4] = len;
++						scb->cdb[5] = 0;
++						initio_push_pend_scb(host, scb);
+ 						break;
+ 					}
+ 				}
+ 			} else {	/* in request sense mode */
+ 
+-				if (pCurScb->SCB_TaStat == 2) {		/* check contition status again after sending
++				if (scb->tastat == 2) {		/* check contition status again after sending
+ 									   requset sense cmd 0x3 */
+-					pCurScb->SCB_HaStat = HOST_BAD_PHAS;
++					scb->hastat = HOST_BAD_PHAS;
+ 				}
+-				pCurScb->SCB_TaStat = 2;
++				scb->tastat = 2;
+ 			}
+-			pCurScb->SCB_Flags |= SCF_DONE;
+-			if (pCurScb->SCB_Flags & SCF_POST) {
+-				(*pCurScb->SCB_Post) ((BYTE *) pCurHcb, (BYTE *) pCurScb);
++			scb->flags |= SCF_DONE;
++			if (scb->flags & SCF_POST) {
++				/* FIXME: only one post method and lose casts */
++				(*scb->post) ((u8 *) host, (u8 *) scb);
+ 			}
+ 		}		/* while */
+-
+ 		/* find_active: */
+-		if (TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0) & TSS_INT_PENDING)
++		if (inb(host->addr + TUL_SStatus0) & TSS_INT_PENDING)
+ 			continue;
+-
+-		if (pCurHcb->HCS_ActScb) {	/* return to OS and wait for xfer_done_ISR/Selected_ISR */
++		if (host->active)	/* return to OS and wait for xfer_done_ISR/Selected_ISR */
+ 			return 1;	/* return to OS, enable interrupt */
+-		}
+ 		/* Check pending SCB            */
+-		if (tul_find_first_pend_scb(pCurHcb) == NULL) {
++		if (initio_find_first_pend_scb(host) == NULL)
+ 			return 1;	/* return to OS, enable interrupt */
+-		}
+ 	}			/* End of for loop */
+ 	/* statement won't reach here */
+ }
+ 
+-
+-
+-
+-/*+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
+-/***************************************************************************/
+-/***************************************************************************/
+-/***************************************************************************/
+-/***************************************************************************/
+-
+-/***************************************************************************/
+-void tulip_scsi(HCS * pCurHcb)
++static void tulip_scsi(struct initio_host * host)
+ {
+-	SCB *pCurScb;
+-	TCS *pCurTcb;
++	struct scsi_ctrl_blk *scb;
++	struct target_control *active_tc;
+ 
+ 	/* make sure to service interrupt asap */
+-
+-	if ((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0)) & TSS_INT_PENDING) {
+-
+-		pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK;
+-		pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1);
+-		pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
+-		if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) {	/* SCSI bus reset detected      */
+-			int_tul_scsi_rst(pCurHcb);
++	if ((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING) {
++		host->phase = host->jsstatus0 & TSS_PH_MASK;
++		host->jsstatus1 = inb(host->addr + TUL_SStatus1);
++		host->jsint = inb(host->addr + TUL_SInt);
++		if (host->jsint & TSS_SCSIRST_INT) {	/* SCSI bus reset detected      */
++			int_initio_scsi_rst(host);
+ 			return;
+ 		}
+-		if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) {	/* if selected/reselected interrupt */
+-			if (int_tul_resel(pCurHcb) == 0)
+-				tul_next_state(pCurHcb);
++		if (host->jsint & TSS_RESEL_INT) {	/* if selected/reselected interrupt */
++			if (int_initio_resel(host) == 0)
++				initio_next_state(host);
+ 			return;
+ 		}
+-		if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) {
+-			int_tul_busfree(pCurHcb);
++		if (host->jsint & TSS_SEL_TIMEOUT) {
++			int_initio_busfree(host);
+ 			return;
+ 		}
+-		if (pCurHcb->HCS_JSInt & TSS_DISC_INT) {	/* BUS disconnection            */
+-			int_tul_busfree(pCurHcb);	/* unexpected bus free or sel timeout */
++		if (host->jsint & TSS_DISC_INT) {	/* BUS disconnection            */
++			int_initio_busfree(host);	/* unexpected bus free or sel timeout */
+ 			return;
+ 		}
+-		if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) {	/* func complete or Bus service */
+-			if ((pCurScb = pCurHcb->HCS_ActScb) != NULL)
+-				tul_next_state(pCurHcb);
++		if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV)) {	/* func complete or Bus service */
++			if ((scb = host->active) != NULL)
++				initio_next_state(host);
+ 			return;
+ 		}
+ 	}
+-	if (pCurHcb->HCS_ActScb != NULL)
++	if (host->active != NULL)
+ 		return;
+ 
+-	if ((pCurScb = tul_find_first_pend_scb(pCurHcb)) == NULL)
++	if ((scb = initio_find_first_pend_scb(host)) == NULL)
+ 		return;
+ 
+ 	/* program HBA's SCSI ID & target SCSI ID */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SScsiId,
+-	     (pCurHcb->HCS_SCSI_ID << 4) | (pCurScb->SCB_Target & 0x0F));
+-	if (pCurScb->SCB_Opcode == ExecSCSI) {
+-		pCurTcb = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
++	outb((host->scsi_id << 4) | (scb->target & 0x0F),
++		host->addr + TUL_SScsiId);
++	if (scb->opcode == ExecSCSI) {
++		active_tc = &host->targets[scb->target];
+ 
+-		if (pCurScb->SCB_TagMsg)
+-			pCurTcb->TCS_DrvFlags |= TCF_DRV_EN_TAG;
++		if (scb->tagmsg)
++			active_tc->drv_flags |= TCF_DRV_EN_TAG;
+ 		else
+-			pCurTcb->TCS_DrvFlags &= ~TCF_DRV_EN_TAG;
++			active_tc->drv_flags &= ~TCF_DRV_EN_TAG;
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period);
+-		if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {	/* do wdtr negotiation          */
+-			tul_select_atn_stop(pCurHcb, pCurScb);
++		outb(active_tc->js_period, host->addr + TUL_SPeriod);
++		if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {	/* do wdtr negotiation          */
++			initio_select_atn_stop(host, scb);
+ 		} else {
+-			if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {	/* do sync negotiation          */
+-				tul_select_atn_stop(pCurHcb, pCurScb);
++			if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {	/* do sync negotiation          */
++				initio_select_atn_stop(host, scb);
+ 			} else {
+-				if (pCurScb->SCB_TagMsg)
+-					tul_select_atn3(pCurHcb, pCurScb);
++				if (scb->tagmsg)
++					initio_select_atn3(host, scb);
+ 				else
+-					tul_select_atn(pCurHcb, pCurScb);
++					initio_select_atn(host, scb);
+ 			}
+ 		}
+-		if (pCurScb->SCB_Flags & SCF_POLL) {
+-			while (wait_tulip(pCurHcb) != -1) {
+-				if (tul_next_state(pCurHcb) == -1)
++		if (scb->flags & SCF_POLL) {
++			while (wait_tulip(host) != -1) {
++				if (initio_next_state(host) == -1)
+ 					break;
+ 			}
+ 		}
+-	} else if (pCurScb->SCB_Opcode == BusDevRst) {
+-		tul_select_atn_stop(pCurHcb, pCurScb);
+-		pCurScb->SCB_NxtStat = 8;
+-		if (pCurScb->SCB_Flags & SCF_POLL) {
+-			while (wait_tulip(pCurHcb) != -1) {
+-				if (tul_next_state(pCurHcb) == -1)
++	} else if (scb->opcode == BusDevRst) {
++		initio_select_atn_stop(host, scb);
++		scb->next_state = 8;
++		if (scb->flags & SCF_POLL) {
++			while (wait_tulip(host) != -1) {
++				if (initio_next_state(host) == -1)
+ 					break;
+ 			}
+ 		}
+-	} else if (pCurScb->SCB_Opcode == AbortCmd) {
+-		if (tul_abort_srb(pCurHcb, pCurScb->SCB_Srb) != 0) {
+-
+-
+-			tul_unlink_pend_scb(pCurHcb, pCurScb);
+-
+-			tul_release_scb(pCurHcb, pCurScb);
++	} else if (scb->opcode == AbortCmd) {
++		if (initio_abort_srb(host, scb->srb) != 0) {
++			initio_unlink_pend_scb(host, scb);
++			initio_release_scb(host, scb);
+ 		} else {
+-			pCurScb->SCB_Opcode = BusDevRst;
+-			tul_select_atn_stop(pCurHcb, pCurScb);
+-			pCurScb->SCB_NxtStat = 8;
++			scb->opcode = BusDevRst;
++			initio_select_atn_stop(host, scb);
++			scb->next_state = 8;
+ 		}
+-
+-/* 08/03/98 */
+ 	} else {
+-		tul_unlink_pend_scb(pCurHcb, pCurScb);
+-		pCurScb->SCB_HaStat = 0x16;	/* bad command */
+-		tul_append_done_scb(pCurHcb, pCurScb);
++		initio_unlink_pend_scb(host, scb);
++		scb->hastat = 0x16;	/* bad command */
++		initio_append_done_scb(host, scb);
+ 	}
+ 	return;
+ }
+ 
++/**
++ *	initio_next_state		-	Next SCSI state
++ *	@host: InitIO host we are processing
++ *
++ *	Progress the active command block along the state machine
++ *	until we hit a state which we must wait for activity to occur.
++ *
++ *	Returns zero or a negative code.
++ */
+ 
+-/***************************************************************************/
+-int tul_next_state(HCS * pCurHcb)
++static int initio_next_state(struct initio_host * host)
+ {
+ 	int next;
+ 
+-	next = pCurHcb->HCS_ActScb->SCB_NxtStat;
++	next = host->active->next_state;
+ 	for (;;) {
+ 		switch (next) {
+ 		case 1:
+-			next = tul_state_1(pCurHcb);
++			next = initio_state_1(host);
+ 			break;
+ 		case 2:
+-			next = tul_state_2(pCurHcb);
++			next = initio_state_2(host);
+ 			break;
+ 		case 3:
+-			next = tul_state_3(pCurHcb);
++			next = initio_state_3(host);
+ 			break;
+ 		case 4:
+-			next = tul_state_4(pCurHcb);
++			next = initio_state_4(host);
+ 			break;
+ 		case 5:
+-			next = tul_state_5(pCurHcb);
++			next = initio_state_5(host);
+ 			break;
+ 		case 6:
+-			next = tul_state_6(pCurHcb);
++			next = initio_state_6(host);
+ 			break;
+ 		case 7:
+-			next = tul_state_7(pCurHcb);
++			next = initio_state_7(host);
+ 			break;
+ 		case 8:
+-			return (tul_bus_device_reset(pCurHcb));
++			return initio_bus_device_reset(host);
+ 		default:
+-			return (tul_bad_seq(pCurHcb));
++			return initio_bad_seq(host);
+ 		}
+ 		if (next <= 0)
+ 			return next;
+@@ -1554,338 +1314,363 @@
+ }
+ 
+ 
+-/***************************************************************************/
+-/* sTate after selection with attention & stop */
+-int tul_state_1(HCS * pCurHcb)
++/**
++ *	initio_state_1		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ *	Perform SCSI state processing for Select/Attention/Stop
++ */
++
++static int initio_state_1(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
+ #if DEBUG_STATE
+ 	printk("-s1-");
+ #endif
+ 
+-	tul_unlink_pend_scb(pCurHcb, pCurScb);
+-	tul_append_busy_scb(pCurHcb, pCurScb);
++	/* Move the SCB from pending to busy */
++	initio_unlink_pend_scb(host, scb);
++	initio_append_busy_scb(host, scb);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
++	outb(active_tc->sconfig0, host->addr + TUL_SConfig );
+ 	/* ATN on */
+-	if (pCurHcb->HCS_Phase == MSG_OUT) {
+-
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, (TSC_EN_BUS_IN | TSC_HW_RESELECT));
+-
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+-
+-		if (pCurScb->SCB_TagMsg) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId);
+-		}
+-		if ((pCurTcb->TCS_Flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {
+-
+-			pCurTcb->TCS_Flags |= TCF_WDTR_DONE;
+-
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2);	/* Extended msg length */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);	/* Sync request */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1);	/* Start from 16 bits */
+-		} else if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {
+-
+-			pCurTcb->TCS_Flags |= TCF_SYNC_DONE;
+-
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);	/* extended msg length */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1);	/* sync request */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET);	/* REQ/ACK offset */
++	if (host->phase == MSG_OUT) {
++		outb(TSC_EN_BUS_IN | TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
++		outb(scb->ident, host->addr + TUL_SFifo);
++
++		if (scb->tagmsg) {
++			outb(scb->tagmsg, host->addr + TUL_SFifo);
++			outb(scb->tagid, host->addr + TUL_SFifo);
++		}
++		if ((active_tc->flags & (TCF_WDTR_DONE | TCF_NO_WDTR)) == 0) {
++			active_tc->flags |= TCF_WDTR_DONE;
++			outb(MSG_EXTEND, host->addr + TUL_SFifo);
++			outb(2, host->addr + TUL_SFifo);	/* Extended msg length */
++			outb(3, host->addr + TUL_SFifo);	/* Sync request */
++			outb(1, host->addr + TUL_SFifo);	/* Start from 16 bits */
++		} else if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {
++			active_tc->flags |= TCF_SYNC_DONE;
++			outb(MSG_EXTEND, host->addr + TUL_SFifo);
++			outb(3, host->addr + TUL_SFifo);	/* extended msg length */
++			outb(1, host->addr + TUL_SFifo);	/* sync request */
++			outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo);
++			outb(MAX_OFFSET, host->addr + TUL_SFifo);	/* REQ/ACK offset */
+ 		}
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-		if (wait_tulip(pCurHcb) == -1)
+-			return (-1);
++		outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
+ 	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+-	return (3);
++	outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++	outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal);
++	/* Into before CDB xfer */
++	return 3;
+ }
+ 
+ 
+-/***************************************************************************/
+-/* state after selection with attention */
+-/* state after selection with attention3 */
+-int tul_state_2(HCS * pCurHcb)
++/**
++ *	initio_state_2		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ * state after selection with attention
++ * state after selection with attention3
++ */
++
++static int initio_state_2(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
+ #if DEBUG_STATE
+ 	printk("-s2-");
+ #endif
+ 
+-	tul_unlink_pend_scb(pCurHcb, pCurScb);
+-	tul_append_busy_scb(pCurHcb, pCurScb);
++	initio_unlink_pend_scb(host, scb);
++	initio_append_busy_scb(host, scb);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
++	outb(active_tc->sconfig0, host->addr + TUL_SConfig);
+ 
+-	if (pCurHcb->HCS_JSStatus1 & TSS_CMD_PH_CMP) {
+-		return (4);
+-	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+-	return (3);
++	if (host->jsstatus1 & TSS_CMD_PH_CMP)
++		return 4;
++
++	outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++	outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)), host->addr + TUL_SSignal);
++	/* Into before CDB xfer */
++	return 3;
+ }
+ 
+-/***************************************************************************/
+-/* state before CDB xfer is done */
+-int tul_state_3(HCS * pCurHcb)
++/**
++ *	initio_state_3		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ * state before CDB xfer is done
++ */
++
++static int initio_state_3(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
+ 	int i;
+ 
+ #if DEBUG_STATE
+ 	printk("-s3-");
+ #endif
+ 	for (;;) {
+-		switch (pCurHcb->HCS_Phase) {
++		switch (host->phase) {
+ 		case CMD_OUT:	/* Command out phase            */
+-			for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-			if (wait_tulip(pCurHcb) == -1)
+-				return (-1);
+-			if (pCurHcb->HCS_Phase == CMD_OUT) {
+-				return (tul_bad_seq(pCurHcb));
+-			}
+-			return (4);
++			for (i = 0; i < (int) scb->cdblen; i++)
++				outb(scb->cdb[i], host->addr + TUL_SFifo);
++			outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++			if (wait_tulip(host) == -1)
++				return -1;
++			if (host->phase == CMD_OUT)
++				return initio_bad_seq(host);
++			return 4;
+ 
+ 		case MSG_IN:	/* Message in phase             */
+-			pCurScb->SCB_NxtStat = 3;
+-			if (tul_msgin(pCurHcb) == -1)
+-				return (-1);
++			scb->next_state = 3;
++			if (initio_msgin(host) == -1)
++				return -1;
+ 			break;
+ 
+ 		case STATUS_IN:	/* Status phase                 */
+-			if (tul_status_msg(pCurHcb) == -1)
+-				return (-1);
++			if (initio_status_msg(host) == -1)
++				return -1;
+ 			break;
+ 
+ 		case MSG_OUT:	/* Message out phase            */
+-			if (pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) {
++			if (active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) {
++				outb(MSG_NOP, host->addr + TUL_SFifo);		/* msg nop */
++				outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++				if (wait_tulip(host) == -1)
++					return -1;
++			} else {
++				active_tc->flags |= TCF_SYNC_DONE;
+ 
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP);		/* msg nop */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-				if (wait_tulip(pCurHcb) == -1)
+-					return (-1);
+-
+-			} else {
+-				pCurTcb->TCS_Flags |= TCF_SYNC_DONE;
+-
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);	/* ext. msg len */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1);	/* sync request */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, tul_rate_tbl[pCurTcb->TCS_Flags & TCF_SCSI_RATE]);
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MAX_OFFSET);	/* REQ/ACK offset */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-				if (wait_tulip(pCurHcb) == -1)
+-					return (-1);
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7));
++				outb(MSG_EXTEND, host->addr + TUL_SFifo);
++				outb(3, host->addr + TUL_SFifo);	/* ext. msg len */
++				outb(1, host->addr + TUL_SFifo);	/* sync request */
++				outb(initio_rate_tbl[active_tc->flags & TCF_SCSI_RATE], host->addr + TUL_SFifo);
++				outb(MAX_OFFSET, host->addr + TUL_SFifo);	/* REQ/ACK offset */
++				outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++				if (wait_tulip(host) == -1)
++					return -1;
++				outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++				outb(inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7), host->addr + TUL_SSignal);
+ 
+ 			}
+ 			break;
+-
+ 		default:
+-			return (tul_bad_seq(pCurHcb));
++			return initio_bad_seq(host);
+ 		}
+ 	}
+ }
+ 
+-
+-/***************************************************************************/
+-int tul_state_4(HCS * pCurHcb)
++/**
++ *	initio_state_4		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ *	SCSI state machine. State 4
++ */
++
++static int initio_state_4(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 
+ #if DEBUG_STATE
+ 	printk("-s4-");
+ #endif
+-	if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_NO_XF) {
+-		return (6);	/* Go to state 6                */
++	if ((scb->flags & SCF_DIR) == SCF_NO_XF) {
++		return 6;	/* Go to state 6 (After data) */
+ 	}
+ 	for (;;) {
+-		if (pCurScb->SCB_BufLen == 0)
+-			return (6);	/* Go to state 6                */
++		if (scb->buflen == 0)
++			return 6;
+ 
+-		switch (pCurHcb->HCS_Phase) {
++		switch (host->phase) {
+ 
+ 		case STATUS_IN:	/* Status phase                 */
+-			if ((pCurScb->SCB_Flags & SCF_DIR) != 0) {	/* if direction bit set then report data underrun */
+-				pCurScb->SCB_HaStat = HOST_DO_DU;
+-			}
+-			if ((tul_status_msg(pCurHcb)) == -1)
+-				return (-1);
++			if ((scb->flags & SCF_DIR) != 0)	/* if direction bit set then report data underrun */
++				scb->hastat = HOST_DO_DU;
++			if ((initio_status_msg(host)) == -1)
++				return -1;
+ 			break;
+ 
+ 		case MSG_IN:	/* Message in phase             */
+-			pCurScb->SCB_NxtStat = 0x4;
+-			if (tul_msgin(pCurHcb) == -1)
+-				return (-1);
++			scb->next_state = 0x4;
++			if (initio_msgin(host) == -1)
++				return -1;
+ 			break;
+ 
+ 		case MSG_OUT:	/* Message out phase            */
+-			if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+-				pCurScb->SCB_BufLen = 0;
+-				pCurScb->SCB_HaStat = HOST_DO_DU;
+-				if (tul_msgout_ide(pCurHcb) == -1)
+-					return (-1);
+-				return (6);	/* Go to state 6                */
+-			} else {
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP);		/* msg nop */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-				if (wait_tulip(pCurHcb) == -1)
+-					return (-1);
++			if (host->jsstatus0 & TSS_PAR_ERROR) {
++				scb->buflen = 0;
++				scb->hastat = HOST_DO_DU;
++				if (initio_msgout_ide(host) == -1)
++					return -1;
++				return 6;
++			} else {
++				outb(MSG_NOP, host->addr + TUL_SFifo);		/* msg nop */
++				outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++				if (wait_tulip(host) == -1)
++					return -1;
+ 			}
+ 			break;
+ 
+ 		case DATA_IN:	/* Data in phase                */
+-			return (tul_xfer_data_in(pCurHcb));
++			return initio_xfer_data_in(host);
+ 
+ 		case DATA_OUT:	/* Data out phase               */
+-			return (tul_xfer_data_out(pCurHcb));
++			return initio_xfer_data_out(host);
+ 
+ 		default:
+-			return (tul_bad_seq(pCurHcb));
++			return initio_bad_seq(host);
+ 		}
+ 	}
+ }
+ 
+ 
+-/***************************************************************************/
+-/* state after dma xfer done or phase change before xfer done */
+-int tul_state_5(HCS * pCurHcb)
++/**
++ *	initio_state_5		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ *	State after dma xfer done or phase change before xfer done
++ */
++
++static int initio_state_5(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 	long cnt, xcnt;		/* cannot use unsigned !! code: if (xcnt < 0) */
+ 
+ #if DEBUG_STATE
+ 	printk("-s5-");
+ #endif
+-/*------ get remaining count -------*/
++	/*------ get remaining count -------*/
++	cnt = inl(host->addr + TUL_SCnt0) & 0x0FFFFFF;
+ 
+-	cnt = TUL_RDLONG(pCurHcb->HCS_Base, TUL_SCnt0) & 0x0FFFFFF;
+-
+-	if (TUL_RD(pCurHcb->HCS_Base, TUL_XCmd) & 0x20) {
++	if (inb(host->addr + TUL_XCmd) & 0x20) {
+ 		/* ----------------------- DATA_IN ----------------------------- */
+ 		/* check scsi parity error */
+-		if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+-			pCurScb->SCB_HaStat = HOST_DO_DU;
+-		}
+-		if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) {	/* DMA xfer pending, Send STOP  */
++		if (host->jsstatus0 & TSS_PAR_ERROR)
++			scb->hastat = HOST_DO_DU;
++		if (inb(host->addr + TUL_XStatus) & XPEND) {	/* DMA xfer pending, Send STOP  */
+ 			/* tell Hardware  scsi xfer has been terminated */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_XCtrl, TUL_RD(pCurHcb->HCS_Base, TUL_XCtrl) | 0x80);
++			outb(inb(host->addr + TUL_XCtrl) | 0x80, host->addr + TUL_XCtrl);
+ 			/* wait until DMA xfer not pending */
+-			while (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND);
++			while (inb(host->addr + TUL_XStatus) & XPEND)
++				cpu_relax();
+ 		}
+ 	} else {
+-/*-------- DATA OUT -----------*/
+-		if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0) {
+-			if (pCurHcb->HCS_ActTcs->TCS_JS_Period & TSC_WIDE_SCSI)
+-				cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F) << 1;
++		/*-------- DATA OUT -----------*/
++		if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0) {
++			if (host->active_tc->js_period & TSC_WIDE_SCSI)
++				cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F) << 1;
+ 			else
+-				cnt += (TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F);
++				cnt += (inb(host->addr + TUL_SFifoCnt) & 0x1F);
+ 		}
+-		if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & XPEND) {	/* if DMA xfer is pending, abort DMA xfer */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT);
++		if (inb(host->addr + TUL_XStatus) & XPEND) {	/* if DMA xfer is pending, abort DMA xfer */
++			outb(TAX_X_ABT, host->addr + TUL_XCmd);
+ 			/* wait Abort DMA xfer done */
+-			while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & XABT) == 0);
+-		}
+-		if ((cnt == 1) && (pCurHcb->HCS_Phase == DATA_OUT)) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-			if (wait_tulip(pCurHcb) == -1) {
+-				return (-1);
++			while ((inb(host->addr + TUL_Int) & XABT) == 0)
++				cpu_relax();
+ 			}
++		if ((cnt == 1) && (host->phase == DATA_OUT)) {
++			outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++			if (wait_tulip(host) == -1)
++				return -1;
+ 			cnt = 0;
+ 		} else {
+-			if ((TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1) & TSS_XFER_CMP) == 0)
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++			if ((inb(host->addr + TUL_SStatus1) & TSS_XFER_CMP) == 0)
++				outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ 		}
+ 	}
+-
+ 	if (cnt == 0) {
+-		pCurScb->SCB_BufLen = 0;
+-		return (6);	/* Go to state 6                */
++		scb->buflen = 0;
++		return 6;	/* After Data */
+ 	}
+ 	/* Update active data pointer */
+-	xcnt = (long) pCurScb->SCB_BufLen - cnt;	/* xcnt== bytes already xferred */
+-	pCurScb->SCB_BufLen = (U32) cnt;	/* cnt == bytes left to be xferred */
+-	if (pCurScb->SCB_Flags & SCF_SG) {
+-		register SG *sgp;
+-		ULONG i;
+-
+-		sgp = &pCurScb->SCB_SGList[pCurScb->SCB_SGIdx];
+-		for (i = pCurScb->SCB_SGIdx; i < pCurScb->SCB_SGMax; sgp++, i++) {
+-			xcnt -= (long) sgp->SG_Len;
++	xcnt = (long) scb->buflen - cnt;	/* xcnt== bytes already xferred */
++	scb->buflen = (u32) cnt;		/* cnt == bytes left to be xferred */
++	if (scb->flags & SCF_SG) {
++		struct sg_entry *sgp;
++		unsigned long i;
++
++		sgp = &scb->sglist[scb->sgidx];
++		for (i = scb->sgidx; i < scb->sgmax; sgp++, i++) {
++			xcnt -= (long) sgp->len;
+ 			if (xcnt < 0) {		/* this sgp xfer half done */
+-				xcnt += (long) sgp->SG_Len;	/* xcnt == bytes xferred in this sgp */
+-				sgp->SG_Ptr += (U32) xcnt;	/* new ptr to be xfer */
+-				sgp->SG_Len -= (U32) xcnt;	/* new len to be xfer */
+-				pCurScb->SCB_BufPtr += ((U32) (i - pCurScb->SCB_SGIdx) << 3);
++				xcnt += (long) sgp->len;	/* xcnt == bytes xferred in this sgp */
++				sgp->data += (u32) xcnt;	/* new ptr to be xfer */
++				sgp->len -= (u32) xcnt;	/* new len to be xfer */
++				scb->bufptr += ((u32) (i - scb->sgidx) << 3);
+ 				/* new SG table ptr */
+-				pCurScb->SCB_SGLen = (BYTE) (pCurScb->SCB_SGMax - i);
++				scb->sglen = (u8) (scb->sgmax - i);
+ 				/* new SG table len */
+-				pCurScb->SCB_SGIdx = (WORD) i;
++				scb->sgidx = (u16) i;
+ 				/* for next disc and come in this loop */
+-				return (4);	/* Go to state 4                */
++				return 4;	/* Go to state 4                */
+ 			}
+ 			/* else (xcnt >= 0 , i.e. this sgp already xferred */
+ 		}		/* for */
+-		return (6);	/* Go to state 6                */
++		return 6;	/* Go to state 6                */
+ 	} else {
+-		pCurScb->SCB_BufPtr += (U32) xcnt;
++		scb->bufptr += (u32) xcnt;
+ 	}
+-	return (4);		/* Go to state 4                */
++	return 4;		/* Go to state 4                */
+ }
+ 
+-/***************************************************************************/
+-/* state after Data phase */
+-int tul_state_6(HCS * pCurHcb)
++/**
++ *	initio_state_6		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ *	State after Data phase
++ */
++
++static int initio_state_6(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 
+ #if DEBUG_STATE
+ 	printk("-s6-");
+ #endif
+ 	for (;;) {
+-		switch (pCurHcb->HCS_Phase) {
++		switch (host->phase) {
+ 		case STATUS_IN:	/* Status phase                 */
+-			if ((tul_status_msg(pCurHcb)) == -1)
+-				return (-1);
++			if ((initio_status_msg(host)) == -1)
++				return -1;
+ 			break;
+ 
+ 		case MSG_IN:	/* Message in phase             */
+-			pCurScb->SCB_NxtStat = 6;
+-			if ((tul_msgin(pCurHcb)) == -1)
+-				return (-1);
++			scb->next_state = 6;
++			if ((initio_msgin(host)) == -1)
++				return -1;
+ 			break;
+ 
+ 		case MSG_OUT:	/* Message out phase            */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP);		/* msg nop */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-			if (wait_tulip(pCurHcb) == -1)
+-				return (-1);
++			outb(MSG_NOP, host->addr + TUL_SFifo);		/* msg nop */
++			outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++			if (wait_tulip(host) == -1)
++				return -1;
+ 			break;
+ 
+ 		case DATA_IN:	/* Data in phase                */
+-			return (tul_xpad_in(pCurHcb));
++			return initio_xpad_in(host);
+ 
+ 		case DATA_OUT:	/* Data out phase               */
+-			return (tul_xpad_out(pCurHcb));
++			return initio_xpad_out(host);
+ 
+ 		default:
+-			return (tul_bad_seq(pCurHcb));
++			return initio_bad_seq(host);
+ 		}
+ 	}
+ }
+ 
+-/***************************************************************************/
+-int tul_state_7(HCS * pCurHcb)
++/**
++ *	initio_state_7		-	SCSI state machine
++ *	@host: InitIO host we are controlling
++ *
++ */
++
++int initio_state_7(struct initio_host * host)
+ {
+ 	int cnt, i;
+ 
+@@ -1893,1139 +1678,1029 @@
+ 	printk("-s7-");
+ #endif
+ 	/* flush SCSI FIFO */
+-	cnt = TUL_RD(pCurHcb->HCS_Base, TUL_SFifoCnt) & 0x1F;
++	cnt = inb(host->addr + TUL_SFifoCnt) & 0x1F;
+ 	if (cnt) {
+ 		for (i = 0; i < cnt; i++)
+-			TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++			inb(host->addr + TUL_SFifo);
+ 	}
+-	switch (pCurHcb->HCS_Phase) {
++	switch (host->phase) {
+ 	case DATA_IN:		/* Data in phase                */
+ 	case DATA_OUT:		/* Data out phase               */
+-		return (tul_bad_seq(pCurHcb));
++		return initio_bad_seq(host);
+ 	default:
+-		return (6);	/* Go to state 6                */
++		return 6;	/* Go to state 6                */
+ 	}
+ }
+ 
+-/***************************************************************************/
+-int tul_xfer_data_in(HCS * pCurHcb)
++/**
++ *	initio_xfer_data_in	-	Commence data input
++ *	@host: InitIO host in use
++ *
++ *	Commence a block of data transfer. The transfer itself will
++ *	be managed by the controller and we will get a completion (or
++ *	failure) interrupt.
++ */
++static int initio_xfer_data_in(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 
+-	if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DOUT) {
+-		return (6);	/* wrong direction */
+-	}
+-	TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen);
++	if ((scb->flags & SCF_DIR) == SCF_DOUT)
++		return 6;	/* wrong direction */
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_IN);	/* 7/25/95 */
++	outl(scb->buflen, host->addr + TUL_SCnt0);
++	outb(TSC_XF_DMA_IN, host->addr + TUL_SCmd);	/* 7/25/95 */
+ 
+-	if (pCurScb->SCB_Flags & SCF_SG) {	/* S/G xfer */
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3);
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_IN);
++	if (scb->flags & SCF_SG) {	/* S/G xfer */
++		outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH);
++		outl(scb->bufptr, host->addr + TUL_XAddH);
++		outb(TAX_SG_IN, host->addr + TUL_XCmd);
+ 	} else {
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen);
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_IN);
++		outl(scb->buflen, host->addr + TUL_XCntH);
++		outl(scb->bufptr, host->addr + TUL_XAddH);
++		outb(TAX_X_IN, host->addr + TUL_XCmd);
+ 	}
+-	pCurScb->SCB_NxtStat = 0x5;
+-	return (0);		/* return to OS, wait xfer done , let jas_isr come in */
++	scb->next_state = 0x5;
++	return 0;		/* return to OS, wait xfer done , let jas_isr come in */
+ }
+ 
++/**
++ *	initio_xfer_data_out	-	Commence data output
++ *	@host: InitIO host in use
++ *
++ *	Commence a block of data transfer. The transfer itself will
++ *	be managed by the controller and we will get a completion (or
++ *	failure) interrupt.
++ */
+ 
+-/***************************************************************************/
+-int tul_xfer_data_out(HCS * pCurHcb)
++static int initio_xfer_data_out(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 
+-	if ((pCurScb->SCB_Flags & SCF_DIR) == SCF_DIN) {
+-		return (6);	/* wrong direction */
+-	}
+-	TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, pCurScb->SCB_BufLen);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_DMA_OUT);
++	if ((scb->flags & SCF_DIR) == SCF_DIN)
++		return 6;	/* wrong direction */
+ 
+-	if (pCurScb->SCB_Flags & SCF_SG) {	/* S/G xfer */
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, ((ULONG) pCurScb->SCB_SGLen) << 3);
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_SG_OUT);
++	outl(scb->buflen, host->addr + TUL_SCnt0);
++	outb(TSC_XF_DMA_OUT, host->addr + TUL_SCmd);
++
++	if (scb->flags & SCF_SG) {	/* S/G xfer */
++		outl(((u32) scb->sglen) << 3, host->addr + TUL_XCntH);
++		outl(scb->bufptr, host->addr + TUL_XAddH);
++		outb(TAX_SG_OUT, host->addr + TUL_XCmd);
+ 	} else {
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XCntH, pCurScb->SCB_BufLen);
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_XAddH, pCurScb->SCB_BufPtr);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_OUT);
++		outl(scb->buflen, host->addr + TUL_XCntH);
++		outl(scb->bufptr, host->addr + TUL_XAddH);
++		outb(TAX_X_OUT, host->addr + TUL_XCmd);
+ 	}
+ 
+-	pCurScb->SCB_NxtStat = 0x5;
+-	return (0);		/* return to OS, wait xfer done , let jas_isr come in */
++	scb->next_state = 0x5;
++	return 0;		/* return to OS, wait xfer done , let jas_isr come in */
+ }
+ 
+-
+-/***************************************************************************/
+-int tul_xpad_in(HCS * pCurHcb)
++int initio_xpad_in(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
+ 
+-	if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) {
+-		pCurScb->SCB_HaStat = HOST_DO_DU;	/* over run             */
+-	}
++	if ((scb->flags & SCF_DIR) != SCF_NO_DCHK)
++		scb->hastat = HOST_DO_DU;	/* over run             */
+ 	for (;;) {
+-		if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI)
+-			TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2);
++		if (active_tc->js_period & TSC_WIDE_SCSI)
++			outl(2, host->addr + TUL_SCnt0);
+ 		else
+-			TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
++			outl(1, host->addr + TUL_SCnt0);
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-		if ((wait_tulip(pCurHcb)) == -1) {
+-			return (-1);
+-		}
+-		if (pCurHcb->HCS_Phase != DATA_IN) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-			return (6);
++		outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
++		if (host->phase != DATA_IN) {
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++			return 6;
+ 		}
+-		TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++		inb(host->addr + TUL_SFifo);
+ 	}
+ }
+ 
+-int tul_xpad_out(HCS * pCurHcb)
++int initio_xpad_out(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
+ 
+-	if ((pCurScb->SCB_Flags & SCF_DIR) != SCF_NO_DCHK) {
+-		pCurScb->SCB_HaStat = HOST_DO_DU;	/* over run             */
+-	}
++	if ((scb->flags & SCF_DIR) != SCF_NO_DCHK)
++		scb->hastat = HOST_DO_DU;	/* over run             */
+ 	for (;;) {
+-		if (pCurTcb->TCS_JS_Period & TSC_WIDE_SCSI)
+-			TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 2);
++		if (active_tc->js_period & TSC_WIDE_SCSI)
++			outl(2, host->addr + TUL_SCnt0);
+ 		else
+-			TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
++			outl(1, host->addr + TUL_SCnt0);
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-		if ((wait_tulip(pCurHcb)) == -1) {
+-			return (-1);
+-		}
+-		if (pCurHcb->HCS_Phase != DATA_OUT) {	/* Disable wide CPU to allow read 16 bits */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-			return (6);
++		outb(0, host->addr + TUL_SFifo);
++		outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++		if ((wait_tulip(host)) == -1)
++			return -1;
++		if (host->phase != DATA_OUT) {	/* Disable wide CPU to allow read 16 bits */
++			outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++			return 6;
+ 		}
+ 	}
+ }
+ 
+-
+-/***************************************************************************/
+-int tul_status_msg(HCS * pCurHcb)
++int initio_status_msg(struct initio_host * host)
+ {				/* status & MSG_IN */
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	BYTE msg;
++	struct scsi_ctrl_blk *scb = host->active;
++	u8 msg;
++
++	outb(TSC_CMD_COMP, host->addr + TUL_SCmd);
++	if (wait_tulip(host) == -1)
++		return -1;
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_CMD_COMP);
+-	if ((wait_tulip(pCurHcb)) == -1) {
+-		return (-1);
+-	}
+ 	/* get status */
+-	pCurScb->SCB_TaStat = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
++	scb->tastat = inb(host->addr + TUL_SFifo);
+ 
+-	if (pCurHcb->HCS_Phase == MSG_OUT) {
+-		if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY);
+-		} else {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_NOP);
+-		}
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-		return (wait_tulip(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_Phase == MSG_IN) {
+-		msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+-		if (pCurHcb->HCS_JSStatus0 & TSS_PAR_ERROR) {	/* Parity error                 */
+-			if ((tul_msgin_accept(pCurHcb)) == -1)
+-				return (-1);
+-			if (pCurHcb->HCS_Phase != MSG_OUT)
+-				return (tul_bad_seq(pCurHcb));
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_PARITY);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-			return (wait_tulip(pCurHcb));
++	if (host->phase == MSG_OUT) {
++		if (host->jsstatus0 & TSS_PAR_ERROR)
++			outb(MSG_PARITY, host->addr + TUL_SFifo);
++		else
++			outb(MSG_NOP, host->addr + TUL_SFifo);
++		outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++		return wait_tulip(host);
++	}
++	if (host->phase == MSG_IN) {
++		msg = inb(host->addr + TUL_SFifo);
++		if (host->jsstatus0 & TSS_PAR_ERROR) {	/* Parity error                 */
++			if ((initio_msgin_accept(host)) == -1)
++				return -1;
++			if (host->phase != MSG_OUT)
++				return initio_bad_seq(host);
++			outb(MSG_PARITY, host->addr + TUL_SFifo);
++			outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++			return wait_tulip(host);
+ 		}
+ 		if (msg == 0) {	/* Command complete             */
+ 
+-			if ((pCurScb->SCB_TaStat & 0x18) == 0x10) {	/* No link support              */
+-				return (tul_bad_seq(pCurHcb));
+-			}
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+-			return tul_wait_done_disc(pCurHcb);
++			if ((scb->tastat & 0x18) == 0x10)	/* No link support              */
++				return initio_bad_seq(host);
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++			outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++			return initio_wait_done_disc(host);
+ 
+ 		}
+-		if ((msg == MSG_LINK_COMP) || (msg == MSG_LINK_FLAG)) {
+-			if ((pCurScb->SCB_TaStat & 0x18) == 0x10)
+-				return (tul_msgin_accept(pCurHcb));
++		if (msg == MSG_LINK_COMP || msg == MSG_LINK_FLAG) {
++			if ((scb->tastat & 0x18) == 0x10)
++				return initio_msgin_accept(host);
+ 		}
+ 	}
+-	return (tul_bad_seq(pCurHcb));
++	return initio_bad_seq(host);
+ }
+ 
+ 
+-/***************************************************************************/
+ /* scsi bus free */
+-int int_tul_busfree(HCS * pCurHcb)
++int int_initio_busfree(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
++	struct scsi_ctrl_blk *scb = host->active;
+ 
+-	if (pCurScb != NULL) {
+-		if (pCurScb->SCB_Status & SCB_SELECT) {		/* selection timeout */
+-			tul_unlink_pend_scb(pCurHcb, pCurScb);
+-			pCurScb->SCB_HaStat = HOST_SEL_TOUT;
+-			tul_append_done_scb(pCurHcb, pCurScb);
++	if (scb != NULL) {
++		if (scb->status & SCB_SELECT) {		/* selection timeout */
++			initio_unlink_pend_scb(host, scb);
++			scb->hastat = HOST_SEL_TOUT;
++			initio_append_done_scb(host, scb);
+ 		} else {	/* Unexpected bus free          */
+-			tul_unlink_busy_scb(pCurHcb, pCurScb);
+-			pCurScb->SCB_HaStat = HOST_BUS_FREE;
+-			tul_append_done_scb(pCurHcb, pCurScb);
+-		}
+-		pCurHcb->HCS_ActScb = NULL;
+-		pCurHcb->HCS_ActTcs = NULL;
+-	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);		/* Flush SCSI FIFO  */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);	/* Enable HW reselect       */
+-	return (-1);
++			initio_unlink_busy_scb(host, scb);
++			scb->hastat = HOST_BUS_FREE;
++			initio_append_done_scb(host, scb);
++		}
++		host->active = NULL;
++		host->active_tc = NULL;
++	}
++	outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);		/* Flush SCSI FIFO  */
++	outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++	outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);	/* Enable HW reselect       */
++	return -1;
+ }
+ 
+ 
+-/***************************************************************************/
+-/* scsi bus reset */
+-static int int_tul_scsi_rst(HCS * pCurHcb)
++/**
++ *	int_initio_scsi_rst	-	SCSI reset occurred
++ *	@host: Host seeing the reset
++ *
++ *	A SCSI bus reset has occurred. Clean up any pending transfer
++ *	the hardware is doing by DMA and then abort all active and
++ *	disconnected commands. The mid layer should sort the rest out
++ *	for us
++ */
++
++static int int_initio_scsi_rst(struct initio_host * host)
+ {
+-	SCB *pCurScb;
++	struct scsi_ctrl_blk *scb;
+ 	int i;
+ 
+ 	/* if DMA xfer is pending, abort DMA xfer */
+-	if (TUL_RD(pCurHcb->HCS_Base, TUL_XStatus) & 0x01) {
+-		TUL_WR(pCurHcb->HCS_Base + TUL_XCmd, TAX_X_ABT | TAX_X_CLR_FIFO);
++	if (inb(host->addr + TUL_XStatus) & 0x01) {
++		outb(TAX_X_ABT | TAX_X_CLR_FIFO, host->addr + TUL_XCmd);
+ 		/* wait Abort DMA xfer done */
+-		while ((TUL_RD(pCurHcb->HCS_Base, TUL_Int) & 0x04) == 0);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
++		while ((inb(host->addr + TUL_Int) & 0x04) == 0)
++			cpu_relax();
++		outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ 	}
+ 	/* Abort all active & disconnected scb */
+-	while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+-		pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+-		tul_append_done_scb(pCurHcb, pCurScb);
++	while ((scb = initio_pop_busy_scb(host)) != NULL) {
++		scb->hastat = HOST_BAD_PHAS;
++		initio_append_done_scb(host, scb);
+ 	}
+-	pCurHcb->HCS_ActScb = NULL;
+-	pCurHcb->HCS_ActTcs = NULL;
++	host->active = NULL;
++	host->active_tc = NULL;
+ 
+ 	/* clr sync nego. done flag */
+-	for (i = 0; i < pCurHcb->HCS_MaxTar; i++) {
+-		pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+-	}
+-	return (-1);
++	for (i = 0; i < host->max_tar; i++)
++		host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++	return -1;
+ }
+ 
++/**
++ *	int_initio_scsi_resel	-	Reselection occured
++ *	@host: InitIO host adapter
++ *
++ *	A SCSI reselection event has been signalled and the interrupt
++ *	is now being processed. Work out which command block needs attention
++ *	and continue processing that command.
++ */
+ 
+-/***************************************************************************/
+-/* scsi reselection */
+-int int_tul_resel(HCS * pCurHcb)
++int int_initio_resel(struct initio_host * host)
+ {
+-	SCB *pCurScb;
+-	TCS *pCurTcb;
+-	BYTE tag, msg = 0;
+-	BYTE tar, lun;
+-
+-	if ((pCurScb = pCurHcb->HCS_ActScb) != NULL) {
+-		if (pCurScb->SCB_Status & SCB_SELECT) {		/* if waiting for selection complete */
+-			pCurScb->SCB_Status &= ~SCB_SELECT;
+-		}
+-		pCurHcb->HCS_ActScb = NULL;
++	struct scsi_ctrl_blk *scb;
++	struct target_control *active_tc;
++	u8 tag, msg = 0;
++	u8 tar, lun;
++
++	if ((scb = host->active) != NULL) {
++		/* FIXME: Why check and not just clear ? */
++		if (scb->status & SCB_SELECT)		/* if waiting for selection complete */
++			scb->status &= ~SCB_SELECT;
++		host->active = NULL;
+ 	}
+ 	/* --------- get target id---------------------- */
+-	tar = TUL_RD(pCurHcb->HCS_Base, TUL_SBusId);
++	tar = inb(host->addr + TUL_SBusId);
+ 	/* ------ get LUN from Identify message----------- */
+-	lun = TUL_RD(pCurHcb->HCS_Base, TUL_SIdent) & 0x0F;
++	lun = inb(host->addr + TUL_SIdent) & 0x0F;
+ 	/* 07/22/98 from 0x1F -> 0x0F */
+-	pCurTcb = &pCurHcb->HCS_Tcs[tar];
+-	pCurHcb->HCS_ActTcs = pCurTcb;
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurTcb->TCS_SConfig0);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurTcb->TCS_JS_Period);
+-
++	active_tc = &host->targets[tar];
++	host->active_tc = active_tc;
++	outb(active_tc->sconfig0, host->addr + TUL_SConfig);
++	outb(active_tc->js_period, host->addr + TUL_SPeriod);
+ 
+ 	/* ------------- tag queueing ? ------------------- */
+-	if (pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG) {
+-		if ((tul_msgin_accept(pCurHcb)) == -1)
+-			return (-1);
+-		if (pCurHcb->HCS_Phase != MSG_IN)
++	if (active_tc->drv_flags & TCF_DRV_EN_TAG) {
++		if ((initio_msgin_accept(host)) == -1)
++			return -1;
++		if (host->phase != MSG_IN)
+ 			goto no_tag;
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-		if ((wait_tulip(pCurHcb)) == -1)
+-			return (-1);
+-		msg = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);	/* Read Tag Message    */
++		outl(1, host->addr + TUL_SCnt0);
++		outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
++		msg = inb(host->addr + TUL_SFifo);	/* Read Tag Message    */
+ 
+-		if ((msg < MSG_STAG) || (msg > MSG_OTAG))	/* Is simple Tag      */
++		if (msg < MSG_STAG || msg > MSG_OTAG)		/* Is simple Tag      */
+ 			goto no_tag;
+ 
+-		if ((tul_msgin_accept(pCurHcb)) == -1)
+-			return (-1);
++		if (initio_msgin_accept(host) == -1)
++			return -1;
+ 
+-		if (pCurHcb->HCS_Phase != MSG_IN)
++		if (host->phase != MSG_IN)
+ 			goto no_tag;
+ 
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-		if ((wait_tulip(pCurHcb)) == -1)
+-			return (-1);
+-		tag = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);	/* Read Tag ID       */
+-		pCurScb = pCurHcb->HCS_Scb + tag;
+-		if ((pCurScb->SCB_Target != tar) || (pCurScb->SCB_Lun != lun)) {
+-			return tul_msgout_abort_tag(pCurHcb);
+-		}
+-		if (pCurScb->SCB_Status != SCB_BUSY) {	/* 03/24/95             */
+-			return tul_msgout_abort_tag(pCurHcb);
+-		}
+-		pCurHcb->HCS_ActScb = pCurScb;
+-		if ((tul_msgin_accept(pCurHcb)) == -1)
+-			return (-1);
++		outl(1, host->addr + TUL_SCnt0);
++		outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
++		tag = inb(host->addr + TUL_SFifo);	/* Read Tag ID       */
++		scb = host->scb + tag;
++		if (scb->target != tar || scb->lun != lun) {
++			return initio_msgout_abort_tag(host);
++		}
++		if (scb->status != SCB_BUSY) {	/* 03/24/95             */
++			return initio_msgout_abort_tag(host);
++		}
++		host->active = scb;
++		if ((initio_msgin_accept(host)) == -1)
++			return -1;
+ 	} else {		/* No tag               */
+ 	      no_tag:
+-		if ((pCurScb = tul_find_busy_scb(pCurHcb, tar | (lun << 8))) == NULL) {
+-			return tul_msgout_abort_targ(pCurHcb);
++		if ((scb = initio_find_busy_scb(host, tar | (lun << 8))) == NULL) {
++			return initio_msgout_abort_targ(host);
+ 		}
+-		pCurHcb->HCS_ActScb = pCurScb;
+-		if (!(pCurTcb->TCS_DrvFlags & TCF_DRV_EN_TAG)) {
+-			if ((tul_msgin_accept(pCurHcb)) == -1)
+-				return (-1);
++		host->active = scb;
++		if (!(active_tc->drv_flags & TCF_DRV_EN_TAG)) {
++			if ((initio_msgin_accept(host)) == -1)
++				return -1;
+ 		}
+ 	}
+ 	return 0;
+ }
+ 
++/**
++ *	int_initio_bad_seq		-	out of phase
++ *	@host: InitIO host flagging event
++ *
++ *	We have ended up out of phase somehow. Reset the host controller
++ *	and throw all our toys out of the pram. Let the midlayer clean up
++ */
+ 
+-/***************************************************************************/
+-static int int_tul_bad_seq(HCS * pCurHcb)
++static int int_initio_bad_seq(struct initio_host * host)
+ {				/* target wrong phase           */
+-	SCB *pCurScb;
++	struct scsi_ctrl_blk *scb;
+ 	int i;
+ 
+-	tul_reset_scsi(pCurHcb, 10);
++	initio_reset_scsi(host, 10);
+ 
+-	while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+-		pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+-		tul_append_done_scb(pCurHcb, pCurScb);
++	while ((scb = initio_pop_busy_scb(host)) != NULL) {
++		scb->hastat = HOST_BAD_PHAS;
++		initio_append_done_scb(host, scb);
+ 	}
+-	for (i = 0; i < pCurHcb->HCS_MaxTar; i++) {
+-		pCurHcb->HCS_Tcs[i].TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+-	}
+-	return (-1);
++	for (i = 0; i < host->max_tar; i++)
++		host->targets[i].flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++	return -1;
+ }
+ 
+ 
+-/***************************************************************************/
+-int tul_msgout_abort_targ(HCS * pCurHcb)
++/**
++ *	initio_msgout_abort_targ		-	abort a tag
++ *	@host: InitIO host
++ *
++ *	Abort when the target/lun does not match or when our SCB is not
++ *	busy. Used by untagged commands.
++ */
++
++static int initio_msgout_abort_targ(struct initio_host * host)
+ {
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-	if (tul_msgin_accept(pCurHcb) == -1)
+-		return (-1);
+-	if (pCurHcb->HCS_Phase != MSG_OUT)
+-		return (tul_bad_seq(pCurHcb));
++	outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++	if (initio_msgin_accept(host) == -1)
++		return -1;
++	if (host->phase != MSG_OUT)
++		return initio_bad_seq(host);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
++	outb(MSG_ABORT, host->addr + TUL_SFifo);
++	outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
+ 
+-	return tul_wait_disc(pCurHcb);
++	return initio_wait_disc(host);
+ }
+ 
+-/***************************************************************************/
+-int tul_msgout_abort_tag(HCS * pCurHcb)
++/**
++ *	initio_msgout_abort_tag		-	abort a tag
++ *	@host: InitIO host
++ *
++ *	Abort when the target/lun does not match or when our SCB is not
++ *	busy. Used for tagged commands.
++ */
++
++static int initio_msgout_abort_tag(struct initio_host * host)
+ {
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-	if (tul_msgin_accept(pCurHcb) == -1)
+-		return (-1);
+-	if (pCurHcb->HCS_Phase != MSG_OUT)
+-		return (tul_bad_seq(pCurHcb));
++	outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++	if (initio_msgin_accept(host) == -1)
++		return -1;
++	if (host->phase != MSG_OUT)
++		return initio_bad_seq(host);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_ABORT_TAG);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
++	outb(MSG_ABORT_TAG, host->addr + TUL_SFifo);
++	outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
+ 
+-	return tul_wait_disc(pCurHcb);
++	return initio_wait_disc(host);
+ 
+ }
+ 
+-/***************************************************************************/
+-int tul_msgin(HCS * pCurHcb)
++/**
++ *	initio_msgin		-	Message in
++ *	@host: InitIO Host
++ *
++ *	Process incoming message
++ */
++static int initio_msgin(struct initio_host * host)
+ {
+-	TCS *pCurTcb;
++	struct target_control *active_tc;
+ 
+ 	for (;;) {
++		outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-		if ((wait_tulip(pCurHcb)) == -1)
+-			return (-1);
++		outl(1, host->addr + TUL_SCnt0);
++		outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
+ 
+-		switch (TUL_RD(pCurHcb->HCS_Base, TUL_SFifo)) {
++		switch (inb(host->addr + TUL_SFifo)) {
+ 		case MSG_DISC:	/* Disconnect msg */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+-
+-			return tul_wait_disc(pCurHcb);
+-
++			outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++			return initio_wait_disc(host);
+ 		case MSG_SDP:
+ 		case MSG_RESTORE:
+ 		case MSG_NOP:
+-			tul_msgin_accept(pCurHcb);
++			initio_msgin_accept(host);
+ 			break;
+-
+ 		case MSG_REJ:	/* Clear ATN first              */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SSignal,
+-			       (TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)));
+-			pCurTcb = pCurHcb->HCS_ActTcs;
+-			if ((pCurTcb->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0) {	/* do sync nego */
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-			}
+-			tul_msgin_accept(pCurHcb);
++			outb((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)),
++				host->addr + TUL_SSignal);
++			active_tc = host->active_tc;
++			if ((active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0)	/* do sync nego */
++				outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN),
++					host->addr + TUL_SSignal);
++			initio_msgin_accept(host);
+ 			break;
+-
+ 		case MSG_EXTEND:	/* extended msg */
+-			tul_msgin_extend(pCurHcb);
++			initio_msgin_extend(host);
+ 			break;
+-
+ 		case MSG_IGNOREWIDE:
+-			tul_msgin_accept(pCurHcb);
+-			break;
+-
+-			/* get */
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-			if (wait_tulip(pCurHcb) == -1)
+-				return -1;
+-
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 0);	/* put pad  */
+-			TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);	/* get IGNORE field */
+-			TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);	/* get pad */
+-
+-			tul_msgin_accept(pCurHcb);
++			initio_msgin_accept(host);
+ 			break;
+-
+ 		case MSG_COMP:
+-			{
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-				TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+-				return tul_wait_done_disc(pCurHcb);
+-			}
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
++			outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++			return initio_wait_done_disc(host);
+ 		default:
+-			tul_msgout_reject(pCurHcb);
++			initio_msgout_reject(host);
+ 			break;
+ 		}
+-		if (pCurHcb->HCS_Phase != MSG_IN)
+-			return (pCurHcb->HCS_Phase);
++		if (host->phase != MSG_IN)
++			return host->phase;
+ 	}
+ 	/* statement won't reach here */
+ }
+ 
+-
+-
+-
+-/***************************************************************************/
+-int tul_msgout_reject(HCS * pCurHcb)
++static int initio_msgout_reject(struct initio_host * host)
+ {
++	outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-
+-	if ((tul_msgin_accept(pCurHcb)) == -1)
+-		return (-1);
++	if (initio_msgin_accept(host) == -1)
++		return -1;
+ 
+-	if (pCurHcb->HCS_Phase == MSG_OUT) {
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_REJ);		/* Msg reject           */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-		return (wait_tulip(pCurHcb));
++	if (host->phase == MSG_OUT) {
++		outb(MSG_REJ, host->addr + TUL_SFifo);		/* Msg reject           */
++		outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++		return wait_tulip(host);
+ 	}
+-	return (pCurHcb->HCS_Phase);
++	return host->phase;
+ }
+ 
+-
+-
+-/***************************************************************************/
+-int tul_msgout_ide(HCS * pCurHcb)
++static int initio_msgout_ide(struct initio_host * host)
+ {
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_IDE);		/* Initiator Detected Error */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-	return (wait_tulip(pCurHcb));
++	outb(MSG_IDE, host->addr + TUL_SFifo);		/* Initiator Detected Error */
++	outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++	return wait_tulip(host);
+ }
+ 
+-
+-/***************************************************************************/
+-int tul_msgin_extend(HCS * pCurHcb)
++static int initio_msgin_extend(struct initio_host * host)
+ {
+-	BYTE len, idx;
++	u8 len, idx;
+ 
+-	if (tul_msgin_accept(pCurHcb) != MSG_IN)
+-		return (pCurHcb->HCS_Phase);
++	if (initio_msgin_accept(host) != MSG_IN)
++		return host->phase;
+ 
+ 	/* Get extended msg length      */
+-	TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-	if (wait_tulip(pCurHcb) == -1)
+-		return (-1);
++	outl(1, host->addr + TUL_SCnt0);
++	outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++	if (wait_tulip(host) == -1)
++		return -1;
+ 
+-	len = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+-	pCurHcb->HCS_Msg[0] = len;
++	len = inb(host->addr + TUL_SFifo);
++	host->msg[0] = len;
+ 	for (idx = 1; len != 0; len--) {
+ 
+-		if ((tul_msgin_accept(pCurHcb)) != MSG_IN)
+-			return (pCurHcb->HCS_Phase);
+-		TUL_WRLONG(pCurHcb->HCS_Base + TUL_SCnt0, 1);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_IN);
+-		if (wait_tulip(pCurHcb) == -1)
+-			return (-1);
+-		pCurHcb->HCS_Msg[idx++] = TUL_RD(pCurHcb->HCS_Base, TUL_SFifo);
+-	}
+-	if (pCurHcb->HCS_Msg[1] == 1) {		/* if it's synchronous data transfer request */
+-		if (pCurHcb->HCS_Msg[0] != 3)	/* if length is not right */
+-			return (tul_msgout_reject(pCurHcb));
+-		if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_SYNC_NEGO) {	/* Set OFFSET=0 to do async, nego back */
+-			pCurHcb->HCS_Msg[3] = 0;
+-		} else {
+-			if ((tul_msgin_sync(pCurHcb) == 0) &&
+-			    (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SYNC_DONE)) {
+-				tul_sync_done(pCurHcb);
+-				return (tul_msgin_accept(pCurHcb));
++		if ((initio_msgin_accept(host)) != MSG_IN)
++			return host->phase;
++		outl(1, host->addr + TUL_SCnt0);
++		outb(TSC_XF_FIFO_IN, host->addr + TUL_SCmd);
++		if (wait_tulip(host) == -1)
++			return -1;
++		host->msg[idx++] = inb(host->addr + TUL_SFifo);
++	}
++	if (host->msg[1] == 1) {		/* if it's synchronous data transfer request */
++		u8 r;
++		if (host->msg[0] != 3)	/* if length is not right */
++			return initio_msgout_reject(host);
++		if (host->active_tc->flags & TCF_NO_SYNC_NEGO) {	/* Set OFFSET=0 to do async, nego back */
++			host->msg[3] = 0;
++		} else {
++			if (initio_msgin_sync(host) == 0 &&
++			    (host->active_tc->flags & TCF_SYNC_DONE)) {
++				initio_sync_done(host);
++				return initio_msgin_accept(host);
+ 			}
+ 		}
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-		if ((tul_msgin_accept(pCurHcb)) != MSG_OUT)
+-			return (pCurHcb->HCS_Phase);
++		r = inb(host->addr + TUL_SSignal);
++		outb((r & (TSC_SET_ACK | 7)) | TSC_SET_ATN,
++			host->addr + TUL_SSignal);
++		if (initio_msgin_accept(host) != MSG_OUT)
++			return host->phase;
+ 		/* sync msg out */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);
+-
+-		tul_sync_done(pCurHcb);
++		outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 1);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[3]);
++		initio_sync_done(host);
+ 
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-		return (wait_tulip(pCurHcb));
++		outb(MSG_EXTEND, host->addr + TUL_SFifo);
++		outb(3, host->addr + TUL_SFifo);
++		outb(1, host->addr + TUL_SFifo);
++		outb(host->msg[2], host->addr + TUL_SFifo);
++		outb(host->msg[3], host->addr + TUL_SFifo);
++		outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++		return wait_tulip(host);
+ 	}
+-	if ((pCurHcb->HCS_Msg[0] != 2) || (pCurHcb->HCS_Msg[1] != 3))
+-		return (tul_msgout_reject(pCurHcb));
++	if (host->msg[0] != 2 || host->msg[1] != 3)
++		return initio_msgout_reject(host);
+ 	/* if it's WIDE DATA XFER REQ   */
+-	if (pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) {
+-		pCurHcb->HCS_Msg[2] = 0;
++	if (host->active_tc->flags & TCF_NO_WDTR) {
++		host->msg[2] = 0;
+ 	} else {
+-		if (pCurHcb->HCS_Msg[2] > 2)	/* > 32 bits            */
+-			return (tul_msgout_reject(pCurHcb));
+-		if (pCurHcb->HCS_Msg[2] == 2) {		/* == 32                */
+-			pCurHcb->HCS_Msg[2] = 1;
++		if (host->msg[2] > 2)	/* > 32 bits            */
++			return initio_msgout_reject(host);
++		if (host->msg[2] == 2) {		/* == 32                */
++			host->msg[2] = 1;
+ 		} else {
+-			if ((pCurHcb->HCS_ActTcs->TCS_Flags & TCF_NO_WDTR) == 0) {
+-				wdtr_done(pCurHcb);
+-				if ((pCurHcb->HCS_ActTcs->TCS_Flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0)
+-					TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
+-				return (tul_msgin_accept(pCurHcb));
++			if ((host->active_tc->flags & TCF_NO_WDTR) == 0) {
++				wdtr_done(host);
++				if ((host->active_tc->flags & (TCF_SYNC_DONE | TCF_NO_SYNC_NEGO)) == 0)
++					outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
++				return initio_msgin_accept(host);
+ 			}
+ 		}
+ 	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SSignal, ((TUL_RD(pCurHcb->HCS_Base, TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN));
++	outb(((inb(host->addr + TUL_SSignal) & (TSC_SET_ACK | 7)) | TSC_SET_ATN), host->addr + TUL_SSignal);
+ 
+-	if (tul_msgin_accept(pCurHcb) != MSG_OUT)
+-		return (pCurHcb->HCS_Phase);
++	if (initio_msgin_accept(host) != MSG_OUT)
++		return host->phase;
+ 	/* WDTR msg out                 */
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_EXTEND);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 2);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, 3);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurHcb->HCS_Msg[2]);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-	return (wait_tulip(pCurHcb));
++	outb(MSG_EXTEND, host->addr + TUL_SFifo);
++	outb(2, host->addr + TUL_SFifo);
++	outb(3, host->addr + TUL_SFifo);
++	outb(host->msg[2], host->addr + TUL_SFifo);
++	outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++	return wait_tulip(host);
+ }
+ 
+-/***************************************************************************/
+-int tul_msgin_sync(HCS * pCurHcb)
++static int initio_msgin_sync(struct initio_host * host)
+ {
+ 	char default_period;
+ 
+-	default_period = tul_rate_tbl[pCurHcb->HCS_ActTcs->TCS_Flags & TCF_SCSI_RATE];
+-	if (pCurHcb->HCS_Msg[3] > MAX_OFFSET) {
+-		pCurHcb->HCS_Msg[3] = MAX_OFFSET;
+-		if (pCurHcb->HCS_Msg[2] < default_period) {
+-			pCurHcb->HCS_Msg[2] = default_period;
++	default_period = initio_rate_tbl[host->active_tc->flags & TCF_SCSI_RATE];
++	if (host->msg[3] > MAX_OFFSET) {
++		host->msg[3] = MAX_OFFSET;
++		if (host->msg[2] < default_period) {
++			host->msg[2] = default_period;
+ 			return 1;
+ 		}
+-		if (pCurHcb->HCS_Msg[2] >= 59) {	/* Change to async              */
+-			pCurHcb->HCS_Msg[3] = 0;
+-		}
++		if (host->msg[2] >= 59)	/* Change to async              */
++			host->msg[3] = 0;
+ 		return 1;
+ 	}
+ 	/* offset requests asynchronous transfers ? */
+-	if (pCurHcb->HCS_Msg[3] == 0) {
++	if (host->msg[3] == 0) {
+ 		return 0;
+ 	}
+-	if (pCurHcb->HCS_Msg[2] < default_period) {
+-		pCurHcb->HCS_Msg[2] = default_period;
++	if (host->msg[2] < default_period) {
++		host->msg[2] = default_period;
+ 		return 1;
+ 	}
+-	if (pCurHcb->HCS_Msg[2] >= 59) {
+-		pCurHcb->HCS_Msg[3] = 0;
++	if (host->msg[2] >= 59) {
++		host->msg[3] = 0;
+ 		return 1;
+ 	}
+ 	return 0;
+ }
+ 
+-
+-/***************************************************************************/
+-int wdtr_done(HCS * pCurHcb)
++static int wdtr_done(struct initio_host * host)
+ {
+-	pCurHcb->HCS_ActTcs->TCS_Flags &= ~TCF_SYNC_DONE;
+-	pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_WDTR_DONE;
++	host->active_tc->flags &= ~TCF_SYNC_DONE;
++	host->active_tc->flags |= TCF_WDTR_DONE;
+ 
+-	pCurHcb->HCS_ActTcs->TCS_JS_Period = 0;
+-	if (pCurHcb->HCS_Msg[2]) {	/* if 16 bit */
+-		pCurHcb->HCS_ActTcs->TCS_JS_Period |= TSC_WIDE_SCSI;
+-	}
+-	pCurHcb->HCS_ActTcs->TCS_SConfig0 &= ~TSC_ALT_PERIOD;
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period);
++	host->active_tc->js_period = 0;
++	if (host->msg[2])	/* if 16 bit */
++		host->active_tc->js_period |= TSC_WIDE_SCSI;
++	host->active_tc->sconfig0 &= ~TSC_ALT_PERIOD;
++	outb(host->active_tc->sconfig0, host->addr + TUL_SConfig);
++	outb(host->active_tc->js_period, host->addr + TUL_SPeriod);
+ 
+ 	return 1;
+ }
+ 
+-/***************************************************************************/
+-int tul_sync_done(HCS * pCurHcb)
++static int initio_sync_done(struct initio_host * host)
+ {
+ 	int i;
+ 
+-	pCurHcb->HCS_ActTcs->TCS_Flags |= TCF_SYNC_DONE;
++	host->active_tc->flags |= TCF_SYNC_DONE;
+ 
+-	if (pCurHcb->HCS_Msg[3]) {
+-		pCurHcb->HCS_ActTcs->TCS_JS_Period |= pCurHcb->HCS_Msg[3];
++	if (host->msg[3]) {
++		host->active_tc->js_period |= host->msg[3];
+ 		for (i = 0; i < 8; i++) {
+-			if (tul_rate_tbl[i] >= pCurHcb->HCS_Msg[2])	/* pick the big one */
++			if (initio_rate_tbl[i] >= host->msg[2])	/* pick the big one */
+ 				break;
+ 		}
+-		pCurHcb->HCS_ActTcs->TCS_JS_Period |= (i << 4);
+-		pCurHcb->HCS_ActTcs->TCS_SConfig0 |= TSC_ALT_PERIOD;
++		host->active_tc->js_period |= (i << 4);
++		host->active_tc->sconfig0 |= TSC_ALT_PERIOD;
+ 	}
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, pCurHcb->HCS_ActTcs->TCS_SConfig0);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SPeriod, pCurHcb->HCS_ActTcs->TCS_JS_Period);
++	outb(host->active_tc->sconfig0, host->addr + TUL_SConfig);
++	outb(host->active_tc->js_period, host->addr + TUL_SPeriod);
+ 
+-	return (-1);
++	return -1;
+ }
+ 
+ 
+-int tul_post_scsi_rst(HCS * pCurHcb)
++static int initio_post_scsi_rst(struct initio_host * host)
+ {
+-	SCB *pCurScb;
+-	TCS *pCurTcb;
++	struct scsi_ctrl_blk *scb;
++	struct target_control *active_tc;
+ 	int i;
+ 
+-	pCurHcb->HCS_ActScb = NULL;
+-	pCurHcb->HCS_ActTcs = NULL;
+-	pCurHcb->HCS_Flags = 0;
+-
+-	while ((pCurScb = tul_pop_busy_scb(pCurHcb)) != NULL) {
+-		pCurScb->SCB_HaStat = HOST_BAD_PHAS;
+-		tul_append_done_scb(pCurHcb, pCurScb);
++	host->active = NULL;
++	host->active_tc = NULL;
++	host->flags = 0;
++
++	while ((scb = initio_pop_busy_scb(host)) != NULL) {
++		scb->hastat = HOST_BAD_PHAS;
++		initio_append_done_scb(host, scb);
+ 	}
+ 	/* clear sync done flag         */
+-	pCurTcb = &pCurHcb->HCS_Tcs[0];
+-	for (i = 0; i < pCurHcb->HCS_MaxTar; pCurTcb++, i++) {
+-		pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
++	active_tc = &host->targets[0];
++	for (i = 0; i < host->max_tar; active_tc++, i++) {
++		active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE);
+ 		/* Initialize the sync. xfer register values to an asyn xfer */
+-		pCurTcb->TCS_JS_Period = 0;
+-		pCurTcb->TCS_SConfig0 = pCurHcb->HCS_SConf1;
+-		pCurHcb->HCS_ActTags[0] = 0;	/* 07/22/98 */
+-		pCurHcb->HCS_Tcs[i].TCS_Flags &= ~TCF_BUSY;	/* 07/22/98 */
++		active_tc->js_period = 0;
++		active_tc->sconfig0 = host->sconf1;
++		host->act_tags[0] = 0;	/* 07/22/98 */
++		host->targets[i].flags &= ~TCF_BUSY;	/* 07/22/98 */
+ 	}			/* for */
+ 
+-	return (-1);
++	return -1;
+ }
+ 
+-/***************************************************************************/
+-void tul_select_atn_stop(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn_stop(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+-	pCurScb->SCB_Status |= SCB_SELECT;
+-	pCurScb->SCB_NxtStat = 0x1;
+-	pCurHcb->HCS_ActScb = pCurScb;
+-	pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SELATNSTOP);
+-	return;
++	scb->status |= SCB_SELECT;
++	scb->next_state = 0x1;
++	host->active = scb;
++	host->active_tc = &host->targets[scb->target];
++	outb(TSC_SELATNSTOP, host->addr + TUL_SCmd);
+ }
+ 
+ 
+-/***************************************************************************/
+-void tul_select_atn(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+ 	int i;
+ 
+-	pCurScb->SCB_Status |= SCB_SELECT;
+-	pCurScb->SCB_NxtStat = 0x2;
++	scb->status |= SCB_SELECT;
++	scb->next_state = 0x2;
+ 
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+-	for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+-	pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+-	pCurHcb->HCS_ActScb = pCurScb;
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN);
+-	return;
++	outb(scb->ident, host->addr + TUL_SFifo);
++	for (i = 0; i < (int) scb->cdblen; i++)
++		outb(scb->cdb[i], host->addr + TUL_SFifo);
++	host->active_tc = &host->targets[scb->target];
++	host->active = scb;
++	outb(TSC_SEL_ATN, host->addr + TUL_SCmd);
+ }
+ 
+-/***************************************************************************/
+-void tul_select_atn3(HCS * pCurHcb, SCB * pCurScb)
++static void initio_select_atn3(struct initio_host * host, struct scsi_ctrl_blk * scb)
+ {
+ 	int i;
+ 
+-	pCurScb->SCB_Status |= SCB_SELECT;
+-	pCurScb->SCB_NxtStat = 0x2;
+-
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_Ident);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagMsg);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_TagId);
+-	for (i = 0; i < (int) pCurScb->SCB_CDBLen; i++)
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, pCurScb->SCB_CDB[i]);
+-	pCurHcb->HCS_ActTcs = &pCurHcb->HCS_Tcs[pCurScb->SCB_Target];
+-	pCurHcb->HCS_ActScb = pCurScb;
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_SEL_ATN3);
+-	return;
+-}
++	scb->status |= SCB_SELECT;
++	scb->next_state = 0x2;
+ 
+-/***************************************************************************/
+-/* SCSI Bus Device Reset */
+-int tul_bus_device_reset(HCS * pCurHcb)
++	outb(scb->ident, host->addr + TUL_SFifo);
++	outb(scb->tagmsg, host->addr + TUL_SFifo);
++	outb(scb->tagid, host->addr + TUL_SFifo);
++	for (i = 0; i < scb->cdblen; i++)
++		outb(scb->cdb[i], host->addr + TUL_SFifo);
++	host->active_tc = &host->targets[scb->target];
++	host->active = scb;
++	outb(TSC_SEL_ATN3, host->addr + TUL_SCmd);
++}
++
++/**
++ *	initio_bus_device_reset	-	 SCSI Bus Device Reset
++ *	@host: InitIO host to reset
++ *
++ *	Perform a device reset and abort all pending SCBs for the
++ *	victim device
++ */
++int initio_bus_device_reset(struct initio_host * host)
+ {
+-	SCB *pCurScb = pCurHcb->HCS_ActScb;
+-	TCS *pCurTcb = pCurHcb->HCS_ActTcs;
+-	SCB *pTmpScb, *pPrevScb;
+-	BYTE tar;
++	struct scsi_ctrl_blk *scb = host->active;
++	struct target_control *active_tc = host->active_tc;
++	struct scsi_ctrl_blk *tmp, *prev;
++	u8 tar;
+ 
+-	if (pCurHcb->HCS_Phase != MSG_OUT) {
+-		return (int_tul_bad_seq(pCurHcb));	/* Unexpected phase             */
+-	}
+-	tul_unlink_pend_scb(pCurHcb, pCurScb);
+-	tul_release_scb(pCurHcb, pCurScb);
++	if (host->phase != MSG_OUT)
++		return int_initio_bad_seq(host);	/* Unexpected phase */
+ 
++	initio_unlink_pend_scb(host, scb);
++	initio_release_scb(host, scb);
+ 
+-	tar = pCurScb->SCB_Target;	/* target                       */
+-	pCurTcb->TCS_Flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY);
++
++	tar = scb->target;	/* target                       */
++	active_tc->flags &= ~(TCF_SYNC_DONE | TCF_WDTR_DONE | TCF_BUSY);
+ 	/* clr sync. nego & WDTR flags  07/22/98 */
+ 
+ 	/* abort all SCB with same target */
+-	pPrevScb = pTmpScb = pCurHcb->HCS_FirstBusy;	/* Check Busy queue */
+-	while (pTmpScb != NULL) {
+-
+-		if (pTmpScb->SCB_Target == tar) {
++	prev = tmp = host->first_busy;	/* Check Busy queue */
++	while (tmp != NULL) {
++		if (tmp->target == tar) {
+ 			/* unlink it */
+-			if (pTmpScb == pCurHcb->HCS_FirstBusy) {
+-				if ((pCurHcb->HCS_FirstBusy = pTmpScb->SCB_NxtScb) == NULL)
+-					pCurHcb->HCS_LastBusy = NULL;
+-			} else {
+-				pPrevScb->SCB_NxtScb = pTmpScb->SCB_NxtScb;
+-				if (pTmpScb == pCurHcb->HCS_LastBusy)
+-					pCurHcb->HCS_LastBusy = pPrevScb;
++			if (tmp == host->first_busy) {
++				if ((host->first_busy = tmp->next) == NULL)
++					host->last_busy = NULL;
++			} else {
++				prev->next = tmp->next;
++				if (tmp == host->last_busy)
++					host->last_busy = prev;
+ 			}
+-			pTmpScb->SCB_HaStat = HOST_ABORTED;
+-			tul_append_done_scb(pCurHcb, pTmpScb);
++			tmp->hastat = HOST_ABORTED;
++			initio_append_done_scb(host, tmp);
+ 		}
+ 		/* Previous haven't change      */
+ 		else {
+-			pPrevScb = pTmpScb;
++			prev = tmp;
+ 		}
+-		pTmpScb = pTmpScb->SCB_NxtScb;
++		tmp = tmp->next;
+ 	}
+-
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SFifo, MSG_DEVRST);
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_XF_FIFO_OUT);
+-
+-	return tul_wait_disc(pCurHcb);
++	outb(MSG_DEVRST, host->addr + TUL_SFifo);
++	outb(TSC_XF_FIFO_OUT, host->addr + TUL_SCmd);
++	return initio_wait_disc(host);
+ 
+ }
+ 
+-/***************************************************************************/
+-int tul_msgin_accept(HCS * pCurHcb)
++static int initio_msgin_accept(struct initio_host * host)
+ {
+-	TUL_WR(pCurHcb->HCS_Base + TUL_SCmd, TSC_MSG_ACCEPT);
+-	return (wait_tulip(pCurHcb));
++	outb(TSC_MSG_ACCEPT, host->addr + TUL_SCmd);
++	return wait_tulip(host);
+ }
+ 
+-/***************************************************************************/
+-int wait_tulip(HCS * pCurHcb)
++static int wait_tulip(struct initio_host * host)
+ {
+ 
+-	while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+-		 & TSS_INT_PENDING));
++	while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0))
++		 & TSS_INT_PENDING))
++			cpu_relax();
++
++	host->jsint = inb(host->addr + TUL_SInt);
++	host->phase = host->jsstatus0 & TSS_PH_MASK;
++	host->jsstatus1 = inb(host->addr + TUL_SStatus1);
+ 
+-	pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
+-	pCurHcb->HCS_Phase = pCurHcb->HCS_JSStatus0 & TSS_PH_MASK;
+-	pCurHcb->HCS_JSStatus1 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus1);
+-
+-	if (pCurHcb->HCS_JSInt & TSS_RESEL_INT) {	/* if SCSI bus reset detected   */
+-		return (int_tul_resel(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_JSInt & TSS_SEL_TIMEOUT) {	/* if selected/reselected timeout interrupt */
+-		return (int_tul_busfree(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) {	/* if SCSI bus reset detected   */
+-		return (int_tul_scsi_rst(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_JSInt & TSS_DISC_INT) {	/* BUS disconnection            */
+-		if (pCurHcb->HCS_Flags & HCF_EXPECT_DONE_DISC) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);		/* Flush SCSI FIFO  */
+-			tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb);
+-			pCurHcb->HCS_ActScb->SCB_HaStat = 0;
+-			tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb);
+-			pCurHcb->HCS_ActScb = NULL;
+-			pCurHcb->HCS_ActTcs = NULL;
+-			pCurHcb->HCS_Flags &= ~HCF_EXPECT_DONE_DISC;
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);	/* Enable HW reselect       */
+-			return (-1);
+-		}
+-		if (pCurHcb->HCS_Flags & HCF_EXPECT_DISC) {
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);		/* Flush SCSI FIFO  */
+-			pCurHcb->HCS_ActScb = NULL;
+-			pCurHcb->HCS_ActTcs = NULL;
+-			pCurHcb->HCS_Flags &= ~HCF_EXPECT_DISC;
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+-			TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);	/* Enable HW reselect       */
+-			return (-1);
++	if (host->jsint & TSS_RESEL_INT)	/* if SCSI bus reset detected */
++		return int_initio_resel(host);
++	if (host->jsint & TSS_SEL_TIMEOUT)	/* if selected/reselected timeout interrupt */
++		return int_initio_busfree(host);
++	if (host->jsint & TSS_SCSIRST_INT)	/* if SCSI bus reset detected   */
++		return int_initio_scsi_rst(host);
++
++	if (host->jsint & TSS_DISC_INT) {	/* BUS disconnection            */
++		if (host->flags & HCF_EXPECT_DONE_DISC) {
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++			initio_unlink_busy_scb(host, host->active);
++			host->active->hastat = 0;
++			initio_append_done_scb(host, host->active);
++			host->active = NULL;
++			host->active_tc = NULL;
++			host->flags &= ~HCF_EXPECT_DONE_DISC;
++			outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++			outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);	/* Enable HW reselect */
++			return -1;
+ 		}
+-		return (int_tul_busfree(pCurHcb));
++		if (host->flags & HCF_EXPECT_DISC) {
++			outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++			host->active = NULL;
++			host->active_tc = NULL;
++			host->flags &= ~HCF_EXPECT_DISC;
++			outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++			outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);	/* Enable HW reselect */
++			return -1;
+ 	}
+-	if (pCurHcb->HCS_JSInt & (TSS_FUNC_COMP | TSS_BUS_SERV)) {
+-		return (pCurHcb->HCS_Phase);
++		return int_initio_busfree(host);
+ 	}
+-	return (pCurHcb->HCS_Phase);
++	/* The old code really does the below. Can probably be removed */
++	if (host->jsint & (TSS_FUNC_COMP | TSS_BUS_SERV))
++		return host->phase;
++	return host->phase;
+ }
+-/***************************************************************************/
+-int tul_wait_disc(HCS * pCurHcb)
+-{
+-
+-	while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+-		 & TSS_INT_PENDING));
+ 
++static int initio_wait_disc(struct initio_host * host)
++{
++	while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0)) & TSS_INT_PENDING))
++		cpu_relax();
+ 
+-	pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++	host->jsint = inb(host->addr + TUL_SInt);
+ 
+-	if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) {	/* if SCSI bus reset detected   */
+-		return (int_tul_scsi_rst(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_JSInt & TSS_DISC_INT) {	/* BUS disconnection            */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);		/* Flush SCSI FIFO  */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);	/* Enable HW reselect       */
+-		pCurHcb->HCS_ActScb = NULL;
+-		return (-1);
++	if (host->jsint & TSS_SCSIRST_INT)	/* if SCSI bus reset detected */
++		return int_initio_scsi_rst(host);
++	if (host->jsint & TSS_DISC_INT) {	/* BUS disconnection */
++		outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0); /* Flush SCSI FIFO */
++		outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++		outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);	/* Enable HW reselect */
++		host->active = NULL;
++		return -1;
+ 	}
+-	return (tul_bad_seq(pCurHcb));
++	return initio_bad_seq(host);
+ }
+ 
+-/***************************************************************************/
+-int tul_wait_done_disc(HCS * pCurHcb)
++static int initio_wait_done_disc(struct initio_host * host)
+ {
++	while (!((host->jsstatus0 = inb(host->addr + TUL_SStatus0))
++		 & TSS_INT_PENDING))
++		 cpu_relax();
+ 
++	host->jsint = inb(host->addr + TUL_SInt);
+ 
+-	while (!((pCurHcb->HCS_JSStatus0 = TUL_RD(pCurHcb->HCS_Base, TUL_SStatus0))
+-		 & TSS_INT_PENDING));
+-
+-	pCurHcb->HCS_JSInt = TUL_RD(pCurHcb->HCS_Base, TUL_SInt);
++	if (host->jsint & TSS_SCSIRST_INT)	/* if SCSI bus reset detected */
++		return int_initio_scsi_rst(host);
++	if (host->jsint & TSS_DISC_INT) {	/* BUS disconnection */
++		outb(TSC_FLUSH_FIFO, host->addr + TUL_SCtrl0);		/* Flush SCSI FIFO */
++		outb(TSC_INITDEFAULT, host->addr + TUL_SConfig);
++		outb(TSC_HW_RESELECT, host->addr + TUL_SCtrl1);		/* Enable HW reselect */
++		initio_unlink_busy_scb(host, host->active);
+ 
+-
+-	if (pCurHcb->HCS_JSInt & TSS_SCSIRST_INT) {	/* if SCSI bus reset detected   */
+-		return (int_tul_scsi_rst(pCurHcb));
+-	}
+-	if (pCurHcb->HCS_JSInt & TSS_DISC_INT) {	/* BUS disconnection            */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl0, TSC_FLUSH_FIFO);		/* Flush SCSI FIFO  */
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SConfig, TSC_INITDEFAULT);
+-		TUL_WR(pCurHcb->HCS_Base + TUL_SCtrl1, TSC_HW_RESELECT);	/* Enable HW reselect       */
+-		tul_unlink_busy_scb(pCurHcb, pCurHcb->HCS_ActScb);
+-
+-		tul_append_done_scb(pCurHcb, pCurHcb->HCS_ActScb);
+-		pCurHcb->HCS_ActScb = NULL;
+-		return (-1);
++		initio_append_done_scb(host, host->active);
++		host->active = NULL;
++		return -1;
+ 	}
+-	return (tul_bad_seq(pCurHcb));
++	return initio_bad_seq(host);
+ }
+ 
++/**
++ *	i91u_intr		-	IRQ handler
++ *	@irqno: IRQ number
++ *	@dev_id: IRQ identifier
++ *
++ *	Take the relevant locks and then invoke the actual isr processing
++ *	code under the lock.
++ */
++
+ static irqreturn_t i91u_intr(int irqno, void *dev_id)
+ {
+ 	struct Scsi_Host *dev = dev_id;
+ 	unsigned long flags;
++	int r;
+ 	
+ 	spin_lock_irqsave(dev->host_lock, flags);
+-	tul_isr((HCS *)dev->base);
++	r = initio_isr((struct initio_host *)dev->hostdata);
+ 	spin_unlock_irqrestore(dev->host_lock, flags);
++	if (r)
+ 	return IRQ_HANDLED;
++	else
++		return IRQ_NONE;
+ }
+ 
+-static int tul_NewReturnNumberOfAdapters(void)
+-{
+-	struct pci_dev *pDev = NULL;	/* Start from none              */
+-	int iAdapters = 0;
+-	long dRegValue;
+-	WORD wBIOS;
+-	int i = 0;
+-
+-	init_i91uAdapter_table();
+-
+-	for (i = 0; i < ARRAY_SIZE(i91u_pci_devices); i++)
+-	{
+-		while ((pDev = pci_find_device(i91u_pci_devices[i].vendor, i91u_pci_devices[i].device, pDev)) != NULL) {
+-			if (pci_enable_device(pDev))
+-				continue;
+-			pci_read_config_dword(pDev, 0x44, (u32 *) & dRegValue);
+-			wBIOS = (UWORD) (dRegValue & 0xFF);
+-			if (((dRegValue & 0xFF00) >> 8) == 0xFF)
+-				dRegValue = 0;
+-			wBIOS = (wBIOS << 8) + ((UWORD) ((dRegValue & 0xFF00) >> 8));
+-			if (pci_set_dma_mask(pDev, DMA_32BIT_MASK)) {
+-				printk(KERN_WARNING 
+-				       "i91u: Could not set 32 bit DMA mask\n");
+-				continue;
+-			}
+-
+-			if (Addi91u_into_Adapter_table(wBIOS,
+-							(pDev->resource[0].start),
+-						       	pDev->irq,
+-						       	pDev->bus->number,
+-					       		(pDev->devfn >> 3)
+-		    		) == 0)
+-				iAdapters++;
+-		}
+-	}
+-
+-	return (iAdapters);
+-}
+-
+-static int i91u_detect(struct scsi_host_template * tpnt)
+-{
+-	HCS *pHCB;
+-	struct Scsi_Host *hreg;
+-	unsigned long i;	/* 01/14/98                     */
+-	int ok = 0, iAdapters;
+-	ULONG dBiosAdr;
+-	BYTE *pbBiosAdr;
+-
+-	/* Get total number of adapters in the motherboard */
+-	iAdapters = tul_NewReturnNumberOfAdapters();
+-	if (iAdapters == 0)	/* If no tulip founded, return */
+-		return (0);
+-
+-	tul_num_ch = (iAdapters > tul_num_ch) ? tul_num_ch : iAdapters;
+-	/* Update actually channel number */
+-	if (tul_tag_enable) {	/* 1.01i                  */
+-		tul_num_scb = MAX_TARGETS * i91u_MAXQUEUE;
+-	} else {
+-		tul_num_scb = MAX_TARGETS + 3;	/* 1-tape, 1-CD_ROM, 1- extra */
+-	}			/* Update actually SCBs per adapter */
+-
+-	/* Get total memory needed for HCS */
+-	i = tul_num_ch * sizeof(HCS);
+-	memset((unsigned char *) &tul_hcs[0], 0, i);	/* Initialize tul_hcs 0 */
+-	/* Get total memory needed for SCB */
+-
+-	for (; tul_num_scb >= MAX_TARGETS + 3; tul_num_scb--) {
+-		i = tul_num_ch * tul_num_scb * sizeof(SCB);
+-		if ((tul_scb = kmalloc(i, GFP_ATOMIC | GFP_DMA)) != NULL)
+-			break;
+-	}
+-	if (tul_scb == NULL) {
+-		printk("i91u: SCB memory allocation error\n");
+-		return (0);
+-	}
+-	memset((unsigned char *) tul_scb, 0, i);
+-
+-	for (i = 0, pHCB = &tul_hcs[0];		/* Get pointer for control block */
+-	     i < tul_num_ch;
+-	     i++, pHCB++) {
+-		get_tulipPCIConfig(pHCB, i);
+-
+-		dBiosAdr = pHCB->HCS_BIOS;
+-		dBiosAdr = (dBiosAdr << 4);
+ 
+-		pbBiosAdr = phys_to_virt(dBiosAdr);
+-
+-		init_tulip(pHCB, tul_scb + (i * tul_num_scb), tul_num_scb, pbBiosAdr, 10);
+-		request_region(pHCB->HCS_Base, 256, "i91u"); /* Register */ 
+-
+-		pHCB->HCS_Index = i;	/* 7/29/98 */
+-		hreg = scsi_register(tpnt, sizeof(HCS));
+-		if(hreg == NULL) {
+-			release_region(pHCB->HCS_Base, 256);
+-			return 0;
+-		}
+-		hreg->io_port = pHCB->HCS_Base;
+-		hreg->n_io_port = 0xff;
+-		hreg->can_queue = tul_num_scb;	/* 03/05/98                      */
+-		hreg->unique_id = pHCB->HCS_Base;
+-		hreg->max_id = pHCB->HCS_MaxTar;
+-		hreg->max_lun = 32;	/* 10/21/97                     */
+-		hreg->irq = pHCB->HCS_Intr;
+-		hreg->this_id = pHCB->HCS_SCSI_ID;	/* Assign HCS index           */
+-		hreg->base = (unsigned long)pHCB;
+-		hreg->sg_tablesize = TOTAL_SG_ENTRY;	/* Maximun support is 32 */
+-
+-		/* Initial tulip chip           */
+-		ok = request_irq(pHCB->HCS_Intr, i91u_intr, IRQF_DISABLED | IRQF_SHARED, "i91u", hreg);
+-		if (ok < 0) {
+-			printk(KERN_WARNING "i91u: unable to request IRQ %d\n\n", pHCB->HCS_Intr);
+-			return 0;
+-		}
+-	}
+-
+-	tpnt->this_id = -1;
+-	tpnt->can_queue = 1;
+-
+-	return 1;
+-}
++/**
++ *	initio_build_scb		-	Build the mappings and SCB
++ *	@host: InitIO host taking the command
++ *	@cblk: Firmware command block
++ *	@cmnd: SCSI midlayer command block
++ *
++ *	Translate the abstract SCSI command into a firmware command block
++ *	suitable for feeding to the InitIO host controller. This also requires
++ *	we build the scatter gather lists and ensure they are mapped properly.
++ */
+ 
+-static void i91uBuildSCB(HCS * pHCB, SCB * pSCB, struct scsi_cmnd * SCpnt)
++static void initio_build_scb(struct initio_host * host, struct scsi_ctrl_blk * cblk, struct scsi_cmnd * cmnd)
+ {				/* Create corresponding SCB     */
+-	struct scatterlist *pSrbSG;
+-	SG *pSG;		/* Pointer to SG list           */
+-	int i;
+-	long TotalLen;
++	struct scatterlist *sglist;
++	struct sg_entry *sg;		/* Pointer to SG list           */
++	int i, nseg;
++	long total_len;
+ 	dma_addr_t dma_addr;
+ 
+-	pSCB->SCB_Post = i91uSCBPost;	/* i91u's callback routine      */
+-	pSCB->SCB_Srb = SCpnt;
+-	pSCB->SCB_Opcode = ExecSCSI;
+-	pSCB->SCB_Flags = SCF_POST;	/* After SCSI done, call post routine */
+-	pSCB->SCB_Target = SCpnt->device->id;
+-	pSCB->SCB_Lun = SCpnt->device->lun;
+-	pSCB->SCB_Ident = SCpnt->device->lun | DISC_ALLOW;
++	/* Fill in the command headers */
++	cblk->post = i91uSCBPost;	/* i91u's callback routine      */
++	cblk->srb = cmnd;
++	cblk->opcode = ExecSCSI;
++	cblk->flags = SCF_POST;	/* After SCSI done, call post routine */
++	cblk->target = cmnd->device->id;
++	cblk->lun = cmnd->device->lun;
++	cblk->ident = cmnd->device->lun | DISC_ALLOW;
+ 
+-	pSCB->SCB_Flags |= SCF_SENSE;	/* Turn on auto request sense   */
+-	dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->sense_buffer,
+-				  SENSE_SIZE, DMA_FROM_DEVICE);
+-	pSCB->SCB_SensePtr = cpu_to_le32((u32)dma_addr);
+-	pSCB->SCB_SenseLen = cpu_to_le32(SENSE_SIZE);
+-	SCpnt->SCp.ptr = (char *)(unsigned long)dma_addr;
+-
+-	pSCB->SCB_CDBLen = SCpnt->cmd_len;
+-	pSCB->SCB_HaStat = 0;
+-	pSCB->SCB_TaStat = 0;
+-	memcpy(&pSCB->SCB_CDB[0], &SCpnt->cmnd, SCpnt->cmd_len);
++	cblk->flags |= SCF_SENSE;	/* Turn on auto request sense   */
+ 
+-	if (SCpnt->device->tagged_supported) {	/* Tag Support                  */
+-		pSCB->SCB_TagMsg = SIMPLE_QUEUE_TAG;	/* Do simple tag only   */
++	/* Map the sense buffer into bus memory */
++	dma_addr = dma_map_single(&host->pci_dev->dev, cmnd->sense_buffer,
++				  SENSE_SIZE, DMA_FROM_DEVICE);
++	cblk->senseptr = cpu_to_le32((u32)dma_addr);
++	cblk->senselen = cpu_to_le32(SENSE_SIZE);
++	cmnd->SCp.ptr = (char *)(unsigned long)dma_addr;
++	cblk->cdblen = cmnd->cmd_len;
++
++	/* Clear the returned status */
++	cblk->hastat = 0;
++	cblk->tastat = 0;
++	/* Command the command */
++	memcpy(&cblk->cdb[0], &cmnd->cmnd, cmnd->cmd_len);
++
++	/* Set up tags */
++	if (cmnd->device->tagged_supported) {	/* Tag Support                  */
++		cblk->tagmsg = SIMPLE_QUEUE_TAG;	/* Do simple tag only   */
+ 	} else {
+-		pSCB->SCB_TagMsg = 0;	/* No tag support               */
++		cblk->tagmsg = 0;	/* No tag support               */
+ 	}
++
+ 	/* todo handle map_sg error */
+-	if (SCpnt->use_sg) {
+-		dma_addr = dma_map_single(&pHCB->pci_dev->dev, &pSCB->SCB_SGList[0],
+-					  sizeof(struct SG_Struc) * TOTAL_SG_ENTRY,
++	nseg = scsi_dma_map(cmnd);
++	BUG_ON(nseg < 0);
++	if (nseg) {
++		dma_addr = dma_map_single(&host->pci_dev->dev, &cblk->sglist[0],
++					  sizeof(struct sg_entry) * TOTAL_SG_ENTRY,
+ 					  DMA_BIDIRECTIONAL);
+-		pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr);
+-		SCpnt->SCp.dma_handle = dma_addr;
++		cblk->bufptr = cpu_to_le32((u32)dma_addr);
++		cmnd->SCp.dma_handle = dma_addr;
+ 
+-		pSrbSG = (struct scatterlist *) SCpnt->request_buffer;
+-		pSCB->SCB_SGLen = dma_map_sg(&pHCB->pci_dev->dev, pSrbSG,
+-					     SCpnt->use_sg, SCpnt->sc_data_direction);
+-
+-		pSCB->SCB_Flags |= SCF_SG;	/* Turn on SG list flag       */
+-		for (i = 0, TotalLen = 0, pSG = &pSCB->SCB_SGList[0];	/* 1.01g */
+-		     i < pSCB->SCB_SGLen; i++, pSG++, pSrbSG++) {
+-			pSG->SG_Ptr = cpu_to_le32((u32)sg_dma_address(pSrbSG));
+-			TotalLen += pSG->SG_Len = cpu_to_le32((u32)sg_dma_len(pSrbSG));
+-		}
+-
+-		pSCB->SCB_BufLen = (SCpnt->request_bufflen > TotalLen) ?
+-		    TotalLen : SCpnt->request_bufflen;
+-	} else if (SCpnt->request_bufflen) {		/* Non SG */
+-		dma_addr = dma_map_single(&pHCB->pci_dev->dev, SCpnt->request_buffer,
+-					  SCpnt->request_bufflen,
+-					  SCpnt->sc_data_direction);
+-		SCpnt->SCp.dma_handle = dma_addr;
+-		pSCB->SCB_BufPtr = cpu_to_le32((u32)dma_addr);
+-		pSCB->SCB_BufLen = cpu_to_le32((u32)SCpnt->request_bufflen);
+-		pSCB->SCB_SGLen = 0;
+-	} else {
+-		pSCB->SCB_BufLen = 0;
+-		pSCB->SCB_SGLen = 0;
++
++		cblk->flags |= SCF_SG;	/* Turn on SG list flag       */
++		total_len = 0;
++		sg = &cblk->sglist[0];
++		scsi_for_each_sg(cmnd, sglist, cblk->sglen, i) {
++			sg->data = cpu_to_le32((u32)sg_dma_address(sglist));
++			total_len += sg->len = cpu_to_le32((u32)sg_dma_len(sglist));
++		}
++
++		cblk->buflen = (scsi_bufflen(cmnd) > total_len) ?
++			total_len : scsi_bufflen(cmnd);
++	} else {	/* No data transfer required */
++		cblk->buflen = 0;
++		cblk->sglen = 0;
+ 	}
+ }
+ 
++/**
++ *	i91u_queuecommand	-	Queue a new command if possible
++ *	@cmd: SCSI command block from the mid layer
++ *	@done: Completion handler
++ *
++ *	Attempts to queue a new command with the host adapter. Will return
++ *	zero if successful or indicate a host busy condition if not (which
++ *	will cause the mid layer to call us again later with the command)
++ */
++
+ static int i91u_queuecommand(struct scsi_cmnd *cmd,
+ 		void (*done)(struct scsi_cmnd *))
+ {
+-	HCS *pHCB = (HCS *) cmd->device->host->base;
+-	register SCB *pSCB;
++	struct initio_host *host = (struct initio_host *) cmd->device->host->hostdata;
++	struct scsi_ctrl_blk *cmnd;
+ 
+ 	cmd->scsi_done = done;
+ 
+-	pSCB = tul_alloc_scb(pHCB);
+-	if (!pSCB)
++	cmnd = initio_alloc_scb(host);
++	if (!cmnd)
+ 		return SCSI_MLQUEUE_HOST_BUSY;
+ 
+-	i91uBuildSCB(pHCB, pSCB, cmd);
+-	tul_exec_scb(pHCB, pSCB);
++	initio_build_scb(host, cmnd, cmd);
++	initio_exec_scb(host, cmnd);
+ 	return 0;
+ }
+ 
+-#if 0 /* no new EH yet */
+-/*
+- *  Abort a queued command
+- *  (commands that are on the bus can't be aborted easily)
+- */
+-static int i91u_abort(struct scsi_cmnd * SCpnt)
+-{
+-	HCS *pHCB;
+-
+-	pHCB = (HCS *) SCpnt->device->host->base;
+-	return tul_abort_srb(pHCB, SCpnt);
+-}
+-
+-/*
+- *  Reset registers, reset a hanging bus and
+- *  kill active and disconnected commands for target w/o soft reset
++/**
++ *	i91u_bus_reset		-	reset the SCSI bus
++ *	@cmnd: Command block we want to trigger the reset for
++ *
++ *	Initiate a SCSI bus reset sequence
+  */
+-static int i91u_reset(struct scsi_cmnd * SCpnt, unsigned int reset_flags)
+-{				/* I need Host Control Block Information */
+-	HCS *pHCB;
+-
+-	pHCB = (HCS *) SCpnt->device->host->base;
+-
+-	if (reset_flags & (SCSI_RESET_SUGGEST_BUS_RESET | SCSI_RESET_SUGGEST_HOST_RESET))
+-		return tul_reset_scsi_bus(pHCB);
+-	else
+-		return tul_device_reset(pHCB, SCpnt, SCpnt->device->id, reset_flags);
+-}
+-#endif
+ 
+-static int i91u_bus_reset(struct scsi_cmnd * SCpnt)
++static int i91u_bus_reset(struct scsi_cmnd * cmnd)
+ {
+-	HCS *pHCB;
++	struct initio_host *host;
+ 
+-	pHCB = (HCS *) SCpnt->device->host->base;
++	host = (struct initio_host *) cmnd->device->host->hostdata;
+ 
+-	spin_lock_irq(SCpnt->device->host->host_lock);
+-	tul_reset_scsi(pHCB, 0);
+-	spin_unlock_irq(SCpnt->device->host->host_lock);
++	spin_lock_irq(cmnd->device->host->host_lock);
++	initio_reset_scsi(host, 0);
++	spin_unlock_irq(cmnd->device->host->host_lock);
+ 
+ 	return SUCCESS;
+ }
+ 
+-/*
+- * Return the "logical geometry"
++/**
++ *	i91u_biospararm			-	return the "logical geometry
++ *	@sdev: SCSI device
++ *	@dev; Matching block device
++ *	@capacity: Sector size of drive
++ *	@info_array: Return space for BIOS geometry
++ *
++ *	Map the device geometry in a manner compatible with the host
++ *	controller BIOS behaviour.
++ *
++ *	FIXME: limited to 2^32 sector devices.
+  */
++
+ static int i91u_biosparam(struct scsi_device *sdev, struct block_device *dev,
+ 		sector_t capacity, int *info_array)
+ {
+-	HCS *pHcb;		/* Point to Host adapter control block */
+-	TCS *pTcb;
++	struct initio_host *host;		/* Point to Host adapter control block */
++	struct target_control *tc;
+ 
+-	pHcb = (HCS *) sdev->host->base;
+-	pTcb = &pHcb->HCS_Tcs[sdev->id];
++	host = (struct initio_host *) sdev->host->hostdata;
++	tc = &host->targets[sdev->id];
+ 
+-	if (pTcb->TCS_DrvHead) {
+-		info_array[0] = pTcb->TCS_DrvHead;
+-		info_array[1] = pTcb->TCS_DrvSector;
+-		info_array[2] = (unsigned long)capacity / pTcb->TCS_DrvHead / pTcb->TCS_DrvSector;
++	if (tc->heads) {
++		info_array[0] = tc->heads;
++		info_array[1] = tc->sectors;
++		info_array[2] = (unsigned long)capacity / tc->heads / tc->sectors;
+ 	} else {
+-		if (pTcb->TCS_DrvFlags & TCF_DRV_255_63) {
++		if (tc->drv_flags & TCF_DRV_255_63) {
+ 			info_array[0] = 255;
+ 			info_array[1] = 63;
+ 			info_array[2] = (unsigned long)capacity / 255 / 63;
+@@ -3047,7 +2722,16 @@
+ 	return 0;
+ }
+ 
+-static void i91u_unmap_cmnd(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd)
++/**
++ *	i91u_unmap_scb		-	Unmap a command
++ *	@pci_dev: PCI device the command is for
++ *	@cmnd: The command itself
++ *
++ *	Unmap any PCI mapping/IOMMU resources allocated when the command
++ *	was mapped originally as part of initio_build_scb
++ */
++
++static void i91u_unmap_scb(struct pci_dev *pci_dev, struct scsi_cmnd *cmnd)
+ {
+ 	/* auto sense buffer */
+ 	if (cmnd->SCp.ptr) {
+@@ -3058,65 +2742,63 @@
+ 	}
+ 
+ 	/* request buffer */
+-	if (cmnd->use_sg) {
++	if (scsi_sg_count(cmnd)) {
+ 		dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle,
+-				 sizeof(struct SG_Struc) * TOTAL_SG_ENTRY,
++				 sizeof(struct sg_entry) * TOTAL_SG_ENTRY,
+ 				 DMA_BIDIRECTIONAL);
+ 
+-		dma_unmap_sg(&pci_dev->dev, cmnd->request_buffer,
+-			     cmnd->use_sg,
+-			     cmnd->sc_data_direction);
+-	} else if (cmnd->request_bufflen) {
+-		dma_unmap_single(&pci_dev->dev, cmnd->SCp.dma_handle,
+-				 cmnd->request_bufflen,
+-				 cmnd->sc_data_direction);
++		scsi_dma_unmap(cmnd);
+ 	}
+ }
+ 
+-/*****************************************************************************
+- Function name  : i91uSCBPost
+- Description    : This is callback routine be called when tulip finish one
+-			SCSI command.
+- Input          : pHCB  -       Pointer to host adapter control block.
+-		  pSCB  -       Pointer to SCSI control block.
+- Output         : None.
+- Return         : None.
+-*****************************************************************************/
+-static void i91uSCBPost(BYTE * pHcb, BYTE * pScb)
+-{
+-	struct scsi_cmnd *pSRB;	/* Pointer to SCSI request block */
+-	HCS *pHCB;
+-	SCB *pSCB;
+-
+-	pHCB = (HCS *) pHcb;
+-	pSCB = (SCB *) pScb;
+-	if ((pSRB = pSCB->SCB_Srb) == 0) {
+-		printk("i91uSCBPost: SRB pointer is empty\n");
++/**
++ *	i91uSCBPost		-	SCSI callback
++ *	@host: Pointer to host adapter control block.
++ *	@cmnd: Pointer to SCSI control block.
++ *
++ *	This is callback routine be called when tulip finish one
++ *	SCSI command.
++ */
++
++static void i91uSCBPost(u8 * host_mem, u8 * cblk_mem)
++{
++	struct scsi_cmnd *cmnd;	/* Pointer to SCSI request block */
++	struct initio_host *host;
++	struct scsi_ctrl_blk *cblk;
+ 
+-		tul_release_scb(pHCB, pSCB);	/* Release SCB for current channel */
++	host = (struct initio_host *) host_mem;
++	cblk = (struct scsi_ctrl_blk *) cblk_mem;
++	if ((cmnd = cblk->srb) == NULL) {
++		printk(KERN_ERR "i91uSCBPost: SRB pointer is empty\n");
++		WARN_ON(1);
++		initio_release_scb(host, cblk);	/* Release SCB for current channel */
+ 		return;
+ 	}
+-	switch (pSCB->SCB_HaStat) {
++
++	/*
++	 *	Remap the firmware error status into a mid layer one
++	 */
++	switch (cblk->hastat) {
+ 	case 0x0:
+ 	case 0xa:		/* Linked command complete without error and linked normally */
+ 	case 0xb:		/* Linked command complete without error interrupt generated */
+-		pSCB->SCB_HaStat = 0;
++		cblk->hastat = 0;
+ 		break;
+ 
+ 	case 0x11:		/* Selection time out-The initiator selection or target
+ 				   reselection was not complete within the SCSI Time out period */
+-		pSCB->SCB_HaStat = DID_TIME_OUT;
++		cblk->hastat = DID_TIME_OUT;
+ 		break;
+ 
+ 	case 0x14:		/* Target bus phase sequence failure-An invalid bus phase or bus
+ 				   phase sequence was requested by the target. The host adapter
+ 				   will generate a SCSI Reset Condition, notifying the host with
+ 				   a SCRD interrupt */
+-		pSCB->SCB_HaStat = DID_RESET;
++		cblk->hastat = DID_RESET;
+ 		break;
+ 
+ 	case 0x1a:		/* SCB Aborted. 07/21/98 */
+-		pSCB->SCB_HaStat = DID_ABORT;
++		cblk->hastat = DID_ABORT;
+ 		break;
+ 
+ 	case 0x12:		/* Data overrun/underrun-The target attempted to transfer more data
+@@ -3126,49 +2808,196 @@
+ 	case 0x16:		/* Invalid SCB Operation Code. */
+ 
+ 	default:
+-		printk("ini9100u: %x %x\n", pSCB->SCB_HaStat, pSCB->SCB_TaStat);
+-		pSCB->SCB_HaStat = DID_ERROR;	/* Couldn't find any better */
++		printk("ini9100u: %x %x\n", cblk->hastat, cblk->tastat);
++		cblk->hastat = DID_ERROR;	/* Couldn't find any better */
+ 		break;
+ 	}
+ 
+-	pSRB->result = pSCB->SCB_TaStat | (pSCB->SCB_HaStat << 16);
+-
+-	if (pSRB == NULL) {
+-		printk("pSRB is NULL\n");
+-	}
+-
+-	i91u_unmap_cmnd(pHCB->pci_dev, pSRB);
+-	pSRB->scsi_done(pSRB);	/* Notify system DONE           */
+-
+-	tul_release_scb(pHCB, pSCB);	/* Release SCB for current channel */
++	cmnd->result = cblk->tastat | (cblk->hastat << 16);
++	WARN_ON(cmnd == NULL);
++	i91u_unmap_scb(host->pci_dev, cmnd);
++	cmnd->scsi_done(cmnd);	/* Notify system DONE           */
++	initio_release_scb(host, cblk);	/* Release SCB for current channel */
+ }
+ 
+-/*
+- * Release ressources
+- */
+-static int i91u_release(struct Scsi_Host *hreg)
+-{
+-	free_irq(hreg->irq, hreg);
+-	release_region(hreg->io_port, 256);
+-	return 0;
+-}
+-MODULE_LICENSE("Dual BSD/GPL");
+-
+-static struct scsi_host_template driver_template = {
++static struct scsi_host_template initio_template = {
+ 	.proc_name	= "INI9100U",
+-	.name		= i91u_REVID,
+-	.detect		= i91u_detect,
+-	.release	= i91u_release,
++	.name			= "Initio INI-9X00U/UW SCSI device driver",
+ 	.queuecommand	= i91u_queuecommand,
+-//	.abort		= i91u_abort,
+-//	.reset		= i91u_reset,
+ 	.eh_bus_reset_handler = i91u_bus_reset,
+ 	.bios_param	= i91u_biosparam,
+-	.can_queue	= 1,
++	.can_queue		= MAX_TARGETS * i91u_MAXQUEUE,
+ 	.this_id	= 1,
+ 	.sg_tablesize	= SG_ALL,
+ 	.cmd_per_lun 	= 1,
+ 	.use_clustering	= ENABLE_CLUSTERING,
+ };
+-#include "scsi_module.c"
+ 
++static int initio_probe_one(struct pci_dev *pdev,
++	const struct pci_device_id *id)
++{
++	struct Scsi_Host *shost;
++	struct initio_host *host;
++	u32 reg;
++	u16 bios_seg;
++	struct scsi_ctrl_blk *scb, *tmp, *prev = NULL /* silence gcc */;
++	int num_scb, i, error;
++
++	error = pci_enable_device(pdev);
++	if (error)
++		return error;
++
++	pci_read_config_dword(pdev, 0x44, (u32 *) & reg);
++	bios_seg = (u16) (reg & 0xFF);
++	if (((reg & 0xFF00) >> 8) == 0xFF)
++		reg = 0;
++	bios_seg = (bios_seg << 8) + ((u16) ((reg & 0xFF00) >> 8));
++
++	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) {
++		printk(KERN_WARNING  "i91u: Could not set 32 bit DMA mask\n");
++		error = -ENODEV;
++		goto out_disable_device;
++	}
++	shost = scsi_host_alloc(&initio_template, sizeof(struct initio_host));
++	if (!shost) {
++		printk(KERN_WARNING "initio: Could not allocate host structure.\n");
++		error = -ENOMEM;
++		goto out_disable_device;
++	}
++	host = (struct initio_host *)shost->hostdata;
++	memset(host, 0, sizeof(struct initio_host));
++
++	if (!request_region(host->addr, 256, "i91u")) {
++		printk(KERN_WARNING "initio: I/O port range 0x%x is busy.\n", host->addr);
++		error = -ENODEV;
++		goto out_host_put;
++	}
++
++	if (initio_tag_enable)	/* 1.01i */
++		num_scb = MAX_TARGETS * i91u_MAXQUEUE;
++	else
++		num_scb = MAX_TARGETS + 3;	/* 1-tape, 1-CD_ROM, 1- extra */
++
++	for (; num_scb >= MAX_TARGETS + 3; num_scb--) {
++		i = num_scb * sizeof(struct scsi_ctrl_blk);
++		if ((scb = kzalloc(i, GFP_DMA)) != NULL)
++			break;
++	}
++
++	if (!scb) {
++		printk(KERN_WARNING "initio: Cannot allocate SCB array.\n");
++		error = -ENOMEM;
++		goto out_release_region;
++	}
++
++	host->num_scbs = num_scb;
++	host->scb = scb;
++	host->next_pending = scb;
++	host->next_avail = scb;
++	for (i = 0, tmp = scb; i < num_scb; i++, tmp++) {
++		tmp->tagid = i;
++		if (i != 0)
++			prev->next = tmp;
++		prev = tmp;
++	}
++	prev->next = NULL;
++	host->scb_end = tmp;
++	host->first_avail = scb;
++	host->last_avail = prev;
++
++	initio_init(host, phys_to_virt(bios_seg << 4));
++
++	host->jsstatus0 = 0;
++
++	shost->io_port = host->addr;
++	shost->n_io_port = 0xff;
++	shost->can_queue = num_scb;		/* 03/05/98                      */
++	shost->unique_id = host->addr;
++	shost->max_id = host->max_tar;
++	shost->max_lun = 32;	/* 10/21/97                     */
++	shost->irq = pdev->irq;
++	shost->this_id = host->scsi_id;	/* Assign HCS index           */
++	shost->base = host->addr;
++	shost->sg_tablesize = TOTAL_SG_ENTRY;
++
++	error = request_irq(pdev->irq, i91u_intr, IRQF_DISABLED|IRQF_SHARED, "i91u", shost);
++	if (error < 0) {
++		printk(KERN_WARNING "initio: Unable to request IRQ %d\n", pdev->irq);
++		goto out_free_scbs;
++	}
++
++	pci_set_drvdata(pdev, shost);
++	host->pci_dev = pdev;
++
++	error = scsi_add_host(shost, &pdev->dev);
++	if (error)
++		goto out_free_irq;
++	scsi_scan_host(shost);
++	return 0;
++out_free_irq:
++	free_irq(pdev->irq, shost);
++out_free_scbs:
++	kfree(host->scb);
++out_release_region:
++	release_region(host->addr, 256);
++out_host_put:
++	scsi_host_put(shost);
++out_disable_device:
++	pci_disable_device(pdev);
++	return error;
++}
++
++/**
++ *	initio_remove_one	-	control shutdown
++ *	@pdev:	PCI device being released
++ *
++ *	Release the resources assigned to this adapter after it has
++ *	finished being used.
++ */
++
++static void initio_remove_one(struct pci_dev *pdev)
++{
++	struct Scsi_Host *host = pci_get_drvdata(pdev);
++	struct initio_host *s = (struct initio_host *)host->hostdata;
++	scsi_remove_host(host);
++	free_irq(pdev->irq, host);
++	release_region(s->addr, 256);
++	scsi_host_put(host);
++	pci_disable_device(pdev);
++}
++
++MODULE_LICENSE("GPL");
++
++static struct pci_device_id initio_pci_tbl[] = {
++	{PCI_VENDOR_ID_INIT, 0x9500, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{PCI_VENDOR_ID_INIT, 0x9400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{PCI_VENDOR_ID_INIT, 0x9401, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{PCI_VENDOR_ID_INIT, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{PCI_VENDOR_ID_DOMEX, 0x0002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
++	{0,}
++};
++MODULE_DEVICE_TABLE(pci, initio_pci_tbl);
++
++static struct pci_driver initio_pci_driver = {
++	.name		= "initio",
++	.id_table	= initio_pci_tbl,
++	.probe		= initio_probe_one,
++	.remove		= __devexit_p(initio_remove_one),
++};
++
++static int __init initio_init_driver(void)
++{
++	return pci_register_driver(&initio_pci_driver);
++}
++
++static void __exit initio_exit_driver(void)
++{
++	pci_unregister_driver(&initio_pci_driver);
++}
++
++MODULE_DESCRIPTION("Initio INI-9X00U/UW SCSI device driver");
++MODULE_AUTHOR("Initio Corporation");
++MODULE_LICENSE("GPL");
++
++module_init(initio_init_driver);
++module_exit(initio_exit_driver);
+diff -Nurb linux-2.6.22-570/drivers/scsi/initio.h linux-2.6.22-try2/drivers/scsi/initio.h
+--- linux-2.6.22-570/drivers/scsi/initio.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/initio.h	2007-12-19 15:29:23.000000000 -0500
+@@ -4,6 +4,8 @@
+  * Copyright (c) 1994-1998 Initio Corporation
+  * All rights reserved.
+  *
++ * Cleanups (c) Copyright 2007 Red Hat <alan@redhat.com>
++ *
+  * This program is free software; you can redistribute it and/or modify
+  * it under the terms of the GNU General Public License as published by
+  * the Free Software Foundation; either version 2, or (at your option)
+@@ -18,27 +20,6 @@
+  * along with this program; see the file COPYING.  If not, write to
+  * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+  *
+- * --------------------------------------------------------------------------
+- *
+- * Redistribution and use in source and binary forms, with or without
+- * modification, are permitted provided that the following conditions
+- * are met:
+- * 1. Redistributions of source code must retain the above copyright
+- *    notice, this list of conditions, and the following disclaimer,
+- *    without modification, immediately at the beginning of the file.
+- * 2. Redistributions in binary form must reproduce the above copyright
+- *    notice, this list of conditions and the following disclaimer in the
+- *    documentation and/or other materials provided with the distribution.
+- * 3. The name of the author may not be used to endorse or promote products
+- *    derived from this software without specific prior written permission.
+- *
+- * Where this Software is combined with software released under the terms of 
+- * the GNU General Public License ("GPL") and the terms of the GPL would require the 
+- * combined work to also be released under the terms of the GPL, the terms
+- * and conditions of this License will apply in addition to those of the
+- * GPL with the exception of any terms or conditions of this License that
+- * conflict with, or are expressly prohibited by, the GPL.
+- *
+  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+@@ -56,17 +37,6 @@
+ 
+ #include <linux/types.h>
+ 
+-#define ULONG   unsigned long
+-#define USHORT  unsigned short
+-#define UCHAR   unsigned char
+-#define BYTE    unsigned char
+-#define WORD    unsigned short
+-#define DWORD   unsigned long
+-#define UBYTE   unsigned char
+-#define UWORD   unsigned short
+-#define UDWORD  unsigned long
+-#define U32     u32
+-
+ #define TOTAL_SG_ENTRY		32
+ #define MAX_SUPPORTED_ADAPTERS  8
+ #define MAX_OFFSET		15
+@@ -368,55 +338,55 @@
+ /************************************************************************/
+ /*              Scatter-Gather Element Structure                        */
+ /************************************************************************/
+-typedef struct SG_Struc {
+-	U32 SG_Ptr;		/* Data Pointer */
+-	U32 SG_Len;		/* Data Length */
+-} SG;
++struct sg_entry {
++	u32 data;		/* Data Pointer */
++	u32 len;		/* Data Length */
++};
+ 
+ /***********************************************************************
+ 		SCSI Control Block
+ ************************************************************************/
+-typedef struct Scsi_Ctrl_Blk {
+-	struct Scsi_Ctrl_Blk *SCB_NxtScb;
+-	UBYTE SCB_Status;	/*4 */
+-	UBYTE SCB_NxtStat;	/*5 */
+-	UBYTE SCB_Mode;		/*6 */
+-	UBYTE SCB_Msgin;	/*7 SCB_Res0 */
+-	UWORD SCB_SGIdx;	/*8 */
+-	UWORD SCB_SGMax;	/*A */
++struct scsi_ctrl_blk {
++	struct scsi_ctrl_blk *next;
++	u8 status;	/*4 */
++	u8 next_state;	/*5 */
++	u8 mode;		/*6 */
++	u8 msgin;	/*7 SCB_Res0 */
++	u16 sgidx;	/*8 */
++	u16 sgmax;	/*A */
+ #ifdef ALPHA
+-	U32 SCB_Reserved[2];	/*C */
++	u32 reserved[2];	/*C */
+ #else
+-	U32 SCB_Reserved[3];	/*C */
++	u32 reserved[3];	/*C */
+ #endif
+ 
+-	U32 SCB_XferLen;	/*18 Current xfer len           */
+-	U32 SCB_TotXLen;	/*1C Total xfer len             */
+-	U32 SCB_PAddr;		/*20 SCB phy. Addr. */
+-
+-	UBYTE SCB_Opcode;	/*24 SCB command code */
+-	UBYTE SCB_Flags;	/*25 SCB Flags */
+-	UBYTE SCB_Target;	/*26 Target Id */
+-	UBYTE SCB_Lun;		/*27 Lun */
+-	U32 SCB_BufPtr;		/*28 Data Buffer Pointer */
+-	U32 SCB_BufLen;		/*2C Data Allocation Length */
+-	UBYTE SCB_SGLen;	/*30 SG list # */
+-	UBYTE SCB_SenseLen;	/*31 Sense Allocation Length */
+-	UBYTE SCB_HaStat;	/*32 */
+-	UBYTE SCB_TaStat;	/*33 */
+-	UBYTE SCB_CDBLen;	/*34 CDB Length */
+-	UBYTE SCB_Ident;	/*35 Identify */
+-	UBYTE SCB_TagMsg;	/*36 Tag Message */
+-	UBYTE SCB_TagId;	/*37 Queue Tag */
+-	UBYTE SCB_CDB[12];	/*38 */
+-	U32 SCB_SGPAddr;	/*44 SG List/Sense Buf phy. Addr. */
+-	U32 SCB_SensePtr;	/*48 Sense data pointer */
+-	void (*SCB_Post) (BYTE *, BYTE *);	/*4C POST routine */
+-	struct scsi_cmnd *SCB_Srb;	/*50 SRB Pointer */
+-	SG SCB_SGList[TOTAL_SG_ENTRY];	/*54 Start of SG list */
+-} SCB;
++	u32 xferlen;	/*18 Current xfer len           */
++	u32 totxlen;	/*1C Total xfer len             */
++	u32 paddr;		/*20 SCB phy. Addr. */
++
++	u8 opcode;	/*24 SCB command code */
++	u8 flags;	/*25 SCB Flags */
++	u8 target;	/*26 Target Id */
++	u8 lun;		/*27 Lun */
++	u32 bufptr;		/*28 Data Buffer Pointer */
++	u32 buflen;		/*2C Data Allocation Length */
++	u8 sglen;	/*30 SG list # */
++	u8 senselen;	/*31 Sense Allocation Length */
++	u8 hastat;	/*32 */
++	u8 tastat;	/*33 */
++	u8 cdblen;	/*34 CDB Length */
++	u8 ident;	/*35 Identify */
++	u8 tagmsg;	/*36 Tag Message */
++	u8 tagid;	/*37 Queue Tag */
++	u8 cdb[12];	/*38 */
++	u32 sgpaddr;	/*44 SG List/Sense Buf phy. Addr. */
++	u32 senseptr;	/*48 Sense data pointer */
++	void (*post) (u8 *, u8 *);	/*4C POST routine */
++	struct scsi_cmnd *srb;	/*50 SRB Pointer */
++	struct sg_entry sglist[TOTAL_SG_ENTRY];	/*54 Start of SG list */
++};
+ 
+-/* Bit Definition for SCB_Status */
++/* Bit Definition for status */
+ #define SCB_RENT        0x01
+ #define SCB_PEND        0x02
+ #define SCB_CONTIG      0x04	/* Contigent Allegiance */
+@@ -425,17 +395,17 @@
+ #define SCB_DONE        0x20
+ 
+ 
+-/* Opcodes of SCB_Opcode */
++/* Opcodes for opcode */
+ #define ExecSCSI        0x1
+ #define BusDevRst       0x2
+ #define AbortCmd        0x3
+ 
+ 
+-/* Bit Definition for SCB_Mode */
++/* Bit Definition for mode */
+ #define SCM_RSENS       0x01	/* request sense mode */
+ 
+ 
+-/* Bit Definition for SCB_Flags */
++/* Bit Definition for flags */
+ #define SCF_DONE        0x01
+ #define SCF_POST        0x02
+ #define SCF_SENSE       0x04
+@@ -492,15 +462,14 @@
+ 		Target Device Control Structure
+ **********************************************************************/
+ 
+-typedef struct Tar_Ctrl_Struc {
+-	UWORD TCS_Flags;	/* 0 */
+-	UBYTE TCS_JS_Period;	/* 2 */
+-	UBYTE TCS_SConfig0;	/* 3 */
+-
+-	UWORD TCS_DrvFlags;	/* 4 */
+-	UBYTE TCS_DrvHead;	/* 6 */
+-	UBYTE TCS_DrvSector;	/* 7 */
+-} TCS;
++struct target_control {
++	u16 flags;
++	u8 js_period;
++	u8 sconfig0;
++	u16 drv_flags;
++	u8 heads;
++	u8 sectors;
++};
+ 
+ /***********************************************************************
+ 		Target Device Control Structure
+@@ -523,62 +492,53 @@
+ #define TCF_DRV_EN_TAG          0x0800
+ #define TCF_DRV_255_63          0x0400
+ 
+-typedef struct I91u_Adpt_Struc {
+-	UWORD ADPT_BIOS;	/* 0 */
+-	UWORD ADPT_BASE;	/* 1 */
+-	UBYTE ADPT_Bus;		/* 2 */
+-	UBYTE ADPT_Device;	/* 3 */
+-	UBYTE ADPT_INTR;	/* 4 */
+-} INI_ADPT_STRUCT;
+-
+-
+ /***********************************************************************
+ 	      Host Adapter Control Structure
+ ************************************************************************/
+-typedef struct Ha_Ctrl_Struc {
+-	UWORD HCS_Base;		/* 00 */
+-	UWORD HCS_BIOS;		/* 02 */
+-	UBYTE HCS_Intr;		/* 04 */
+-	UBYTE HCS_SCSI_ID;	/* 05 */
+-	UBYTE HCS_MaxTar;	/* 06 */
+-	UBYTE HCS_NumScbs;	/* 07 */
+-
+-	UBYTE HCS_Flags;	/* 08 */
+-	UBYTE HCS_Index;	/* 09 */
+-	UBYTE HCS_HaId;		/* 0A */
+-	UBYTE HCS_Config;	/* 0B */
+-	UWORD HCS_IdMask;	/* 0C */
+-	UBYTE HCS_Semaph;	/* 0E */
+-	UBYTE HCS_Phase;	/* 0F */
+-	UBYTE HCS_JSStatus0;	/* 10 */
+-	UBYTE HCS_JSInt;	/* 11 */
+-	UBYTE HCS_JSStatus1;	/* 12 */
+-	UBYTE HCS_SConf1;	/* 13 */
+-
+-	UBYTE HCS_Msg[8];	/* 14 */
+-	SCB *HCS_NxtAvail;	/* 1C */
+-	SCB *HCS_Scb;		/* 20 */
+-	SCB *HCS_ScbEnd;	/* 24 */
+-	SCB *HCS_NxtPend;	/* 28 */
+-	SCB *HCS_NxtContig;	/* 2C */
+-	SCB *HCS_ActScb;	/* 30 */
+-	TCS *HCS_ActTcs;	/* 34 */
+-
+-	SCB *HCS_FirstAvail;	/* 38 */
+-	SCB *HCS_LastAvail;	/* 3C */
+-	SCB *HCS_FirstPend;	/* 40 */
+-	SCB *HCS_LastPend;	/* 44 */
+-	SCB *HCS_FirstBusy;	/* 48 */
+-	SCB *HCS_LastBusy;	/* 4C */
+-	SCB *HCS_FirstDone;	/* 50 */
+-	SCB *HCS_LastDone;	/* 54 */
+-	UBYTE HCS_MaxTags[16];	/* 58 */
+-	UBYTE HCS_ActTags[16];	/* 68 */
+-	TCS HCS_Tcs[MAX_TARGETS];	/* 78 */
+-	spinlock_t HCS_AvailLock;
+-	spinlock_t HCS_SemaphLock;
++struct initio_host {
++	u16 addr;		/* 00 */
++	u16 bios_addr;		/* 02 */
++	u8 irq;			/* 04 */
++	u8 scsi_id;		/* 05 */
++	u8 max_tar;		/* 06 */
++	u8 num_scbs;		/* 07 */
++
++	u8 flags;		/* 08 */
++	u8 index;		/* 09 */
++	u8 ha_id;		/* 0A */
++	u8 config;		/* 0B */
++	u16 idmask;		/* 0C */
++	u8 semaph;		/* 0E */
++	u8 phase;		/* 0F */
++	u8 jsstatus0;		/* 10 */
++	u8 jsint;		/* 11 */
++	u8 jsstatus1;		/* 12 */
++	u8 sconf1;		/* 13 */
++
++	u8 msg[8];		/* 14 */
++	struct scsi_ctrl_blk *next_avail;	/* 1C */
++	struct scsi_ctrl_blk *scb;		/* 20 */
++	struct scsi_ctrl_blk *scb_end;		/* 24 */ /*UNUSED*/
++	struct scsi_ctrl_blk *next_pending;	/* 28 */
++	struct scsi_ctrl_blk *next_contig;	/* 2C */ /*UNUSED*/
++	struct scsi_ctrl_blk *active;		/* 30 */
++	struct target_control *active_tc;	/* 34 */
++
++	struct scsi_ctrl_blk *first_avail;	/* 38 */
++	struct scsi_ctrl_blk *last_avail;	/* 3C */
++	struct scsi_ctrl_blk *first_pending;	/* 40 */
++	struct scsi_ctrl_blk *last_pending;	/* 44 */
++	struct scsi_ctrl_blk *first_busy;	/* 48 */
++	struct scsi_ctrl_blk *last_busy;	/* 4C */
++	struct scsi_ctrl_blk *first_done;	/* 50 */
++	struct scsi_ctrl_blk *last_done;	/* 54 */
++	u8 max_tags[16];	/* 58 */
++	u8 act_tags[16];	/* 68 */
++	struct target_control targets[MAX_TARGETS];	/* 78 */
++	spinlock_t avail_lock;
++	spinlock_t semaph_lock;
+ 	struct pci_dev *pci_dev;
+-} HCS;
++};
+ 
+ /* Bit Definition for HCB_Config */
+ #define HCC_SCSI_RESET          0x01
+@@ -599,47 +559,47 @@
+ *******************************************************************/
+ 
+ typedef struct _NVRAM_SCSI {	/* SCSI channel configuration   */
+-	UCHAR NVM_ChSCSIID;	/* 0Ch -> Channel SCSI ID       */
+-	UCHAR NVM_ChConfig1;	/* 0Dh -> Channel config 1      */
+-	UCHAR NVM_ChConfig2;	/* 0Eh -> Channel config 2      */
+-	UCHAR NVM_NumOfTarg;	/* 0Fh -> Number of SCSI target */
++	u8 NVM_ChSCSIID;	/* 0Ch -> Channel SCSI ID       */
++	u8 NVM_ChConfig1;	/* 0Dh -> Channel config 1      */
++	u8 NVM_ChConfig2;	/* 0Eh -> Channel config 2      */
++	u8 NVM_NumOfTarg;	/* 0Fh -> Number of SCSI target */
+ 	/* SCSI target configuration    */
+-	UCHAR NVM_Targ0Config;	/* 10h -> Target 0 configuration */
+-	UCHAR NVM_Targ1Config;	/* 11h -> Target 1 configuration */
+-	UCHAR NVM_Targ2Config;	/* 12h -> Target 2 configuration */
+-	UCHAR NVM_Targ3Config;	/* 13h -> Target 3 configuration */
+-	UCHAR NVM_Targ4Config;	/* 14h -> Target 4 configuration */
+-	UCHAR NVM_Targ5Config;	/* 15h -> Target 5 configuration */
+-	UCHAR NVM_Targ6Config;	/* 16h -> Target 6 configuration */
+-	UCHAR NVM_Targ7Config;	/* 17h -> Target 7 configuration */
+-	UCHAR NVM_Targ8Config;	/* 18h -> Target 8 configuration */
+-	UCHAR NVM_Targ9Config;	/* 19h -> Target 9 configuration */
+-	UCHAR NVM_TargAConfig;	/* 1Ah -> Target A configuration */
+-	UCHAR NVM_TargBConfig;	/* 1Bh -> Target B configuration */
+-	UCHAR NVM_TargCConfig;	/* 1Ch -> Target C configuration */
+-	UCHAR NVM_TargDConfig;	/* 1Dh -> Target D configuration */
+-	UCHAR NVM_TargEConfig;	/* 1Eh -> Target E configuration */
+-	UCHAR NVM_TargFConfig;	/* 1Fh -> Target F configuration */
++	u8 NVM_Targ0Config;	/* 10h -> Target 0 configuration */
++	u8 NVM_Targ1Config;	/* 11h -> Target 1 configuration */
++	u8 NVM_Targ2Config;	/* 12h -> Target 2 configuration */
++	u8 NVM_Targ3Config;	/* 13h -> Target 3 configuration */
++	u8 NVM_Targ4Config;	/* 14h -> Target 4 configuration */
++	u8 NVM_Targ5Config;	/* 15h -> Target 5 configuration */
++	u8 NVM_Targ6Config;	/* 16h -> Target 6 configuration */
++	u8 NVM_Targ7Config;	/* 17h -> Target 7 configuration */
++	u8 NVM_Targ8Config;	/* 18h -> Target 8 configuration */
++	u8 NVM_Targ9Config;	/* 19h -> Target 9 configuration */
++	u8 NVM_TargAConfig;	/* 1Ah -> Target A configuration */
++	u8 NVM_TargBConfig;	/* 1Bh -> Target B configuration */
++	u8 NVM_TargCConfig;	/* 1Ch -> Target C configuration */
++	u8 NVM_TargDConfig;	/* 1Dh -> Target D configuration */
++	u8 NVM_TargEConfig;	/* 1Eh -> Target E configuration */
++	u8 NVM_TargFConfig;	/* 1Fh -> Target F configuration */
+ } NVRAM_SCSI;
+ 
+ typedef struct _NVRAM {
+ /*----------header ---------------*/
+-	USHORT NVM_Signature;	/* 0,1: Signature */
+-	UCHAR NVM_Size;		/* 2:   Size of data structure */
+-	UCHAR NVM_Revision;	/* 3:   Revision of data structure */
++	u16 NVM_Signature;	/* 0,1: Signature */
++	u8 NVM_Size;		/* 2:   Size of data structure */
++	u8 NVM_Revision;	/* 3:   Revision of data structure */
+ 	/* ----Host Adapter Structure ---- */
+-	UCHAR NVM_ModelByte0;	/* 4:   Model number (byte 0) */
+-	UCHAR NVM_ModelByte1;	/* 5:   Model number (byte 1) */
+-	UCHAR NVM_ModelInfo;	/* 6:   Model information         */
+-	UCHAR NVM_NumOfCh;	/* 7:   Number of SCSI channel */
+-	UCHAR NVM_BIOSConfig1;	/* 8:   BIOS configuration 1  */
+-	UCHAR NVM_BIOSConfig2;	/* 9:   BIOS configuration 2  */
+-	UCHAR NVM_HAConfig1;	/* A:   Hoat adapter configuration 1 */
+-	UCHAR NVM_HAConfig2;	/* B:   Hoat adapter configuration 2 */
++	u8 NVM_ModelByte0;	/* 4:   Model number (byte 0) */
++	u8 NVM_ModelByte1;	/* 5:   Model number (byte 1) */
++	u8 NVM_ModelInfo;	/* 6:   Model information         */
++	u8 NVM_NumOfCh;	/* 7:   Number of SCSI channel */
++	u8 NVM_BIOSConfig1;	/* 8:   BIOS configuration 1  */
++	u8 NVM_BIOSConfig2;	/* 9:   BIOS configuration 2  */
++	u8 NVM_HAConfig1;	/* A:   Hoat adapter configuration 1 */
++	u8 NVM_HAConfig2;	/* B:   Hoat adapter configuration 2 */
+ 	NVRAM_SCSI NVM_SCSIInfo[2];
+-	UCHAR NVM_reserved[10];
++	u8 NVM_reserved[10];
+ 	/* ---------- CheckSum ----------       */
+-	USHORT NVM_CheckSum;	/* 0x3E, 0x3F: Checksum of NVRam        */
++	u16 NVM_CheckSum;	/* 0x3E, 0x3F: Checksum of NVRam        */
+ } NVRAM, *PNVRAM;
+ 
+ /* Bios Configuration for nvram->BIOSConfig1                            */
+@@ -681,19 +641,6 @@
+ #define DISC_ALLOW              0xC0	/* Disconnect is allowed        */
+ #define SCSICMD_RequestSense    0x03
+ 
+-typedef struct _HCSinfo {
+-	ULONG base;
+-	UCHAR vec;
+-	UCHAR bios;		/* High byte of BIOS address */
+-	USHORT BaseAndBios;	/* high byte: pHcsInfo->bios,low byte:pHcsInfo->base */
+-} HCSINFO;
+-
+-#define TUL_RD(x,y)             (UCHAR)(inb(  (int)((ULONG)(x+y)) ))
+-#define TUL_RDLONG(x,y)         (ULONG)(inl((int)((ULONG)(x+y)) ))
+-#define TUL_WR(     adr,data)   outb( (UCHAR)(data), (int)(adr))
+-#define TUL_WRSHORT(adr,data)   outw( (UWORD)(data), (int)(adr))
+-#define TUL_WRLONG( adr,data)   outl( (ULONG)(data), (int)(adr))
+-
+ #define SCSI_ABORT_SNOOZE 0
+ #define SCSI_ABORT_SUCCESS 1
+ #define SCSI_ABORT_PENDING 2
+diff -Nurb linux-2.6.22-570/drivers/scsi/ipr.c linux-2.6.22-try2/drivers/scsi/ipr.c
+--- linux-2.6.22-570/drivers/scsi/ipr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ipr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -540,32 +540,6 @@
+ }
+ 
+ /**
+- * ipr_unmap_sglist - Unmap scatterlist if mapped
+- * @ioa_cfg:	ioa config struct
+- * @ipr_cmd:	ipr command struct
+- *
+- * Return value:
+- * 	nothing
+- **/
+-static void ipr_unmap_sglist(struct ipr_ioa_cfg *ioa_cfg,
+-			     struct ipr_cmnd *ipr_cmd)
+-{
+-	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+-
+-	if (ipr_cmd->dma_use_sg) {
+-		if (scsi_cmd->use_sg > 0) {
+-			pci_unmap_sg(ioa_cfg->pdev, scsi_cmd->request_buffer,
+-				     scsi_cmd->use_sg,
+-				     scsi_cmd->sc_data_direction);
+-		} else {
+-			pci_unmap_single(ioa_cfg->pdev, ipr_cmd->dma_handle,
+-					 scsi_cmd->request_bufflen,
+-					 scsi_cmd->sc_data_direction);
+-		}
+-	}
+-}
+-
+-/**
+  * ipr_mask_and_clear_interrupts - Mask all and clear specified interrupts
+  * @ioa_cfg:	ioa config struct
+  * @clr_ints:     interrupts to clear
+@@ -677,7 +651,7 @@
+ 
+ 	scsi_cmd->result |= (DID_ERROR << 16);
+ 
+-	ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++	scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ 	scsi_cmd->scsi_done(scsi_cmd);
+ 	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ }
+@@ -2465,6 +2439,7 @@
+ /**
+  * ipr_read_trace - Dump the adapter trace
+  * @kobj:		kobject struct
++ * @bin_attr:		bin_attribute struct
+  * @buf:		buffer
+  * @off:		offset
+  * @count:		buffer size
+@@ -2472,8 +2447,9 @@
+  * Return value:
+  *	number of bytes printed to buffer
+  **/
+-static ssize_t ipr_read_trace(struct kobject *kobj, char *buf,
+-			      loff_t off, size_t count)
++static ssize_t ipr_read_trace(struct kobject *kobj,
++			      struct bin_attribute *bin_attr,
++			      char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -3166,6 +3142,7 @@
+ /**
+  * ipr_read_dump - Dump the adapter
+  * @kobj:		kobject struct
++ * @bin_attr:		bin_attribute struct
+  * @buf:		buffer
+  * @off:		offset
+  * @count:		buffer size
+@@ -3173,8 +3150,9 @@
+  * Return value:
+  *	number of bytes printed to buffer
+  **/
+-static ssize_t ipr_read_dump(struct kobject *kobj, char *buf,
+-			      loff_t off, size_t count)
++static ssize_t ipr_read_dump(struct kobject *kobj,
++			     struct bin_attribute *bin_attr,
++			     char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -3327,6 +3305,7 @@
+ /**
+  * ipr_write_dump - Setup dump state of adapter
+  * @kobj:		kobject struct
++ * @bin_attr:		bin_attribute struct
+  * @buf:		buffer
+  * @off:		offset
+  * @count:		buffer size
+@@ -3334,8 +3313,9 @@
+  * Return value:
+  *	number of bytes printed to buffer
+  **/
+-static ssize_t ipr_write_dump(struct kobject *kobj, char *buf,
+-			      loff_t off, size_t count)
++static ssize_t ipr_write_dump(struct kobject *kobj,
++			      struct bin_attribute *bin_attr,
++			      char *buf, loff_t off, size_t count)
+ {
+ 	struct class_device *cdev = container_of(kobj,struct class_device,kobj);
+ 	struct Scsi_Host *shost = class_to_shost(cdev);
+@@ -4292,24 +4272,25 @@
+ static int ipr_build_ioadl(struct ipr_ioa_cfg *ioa_cfg,
+ 			   struct ipr_cmnd *ipr_cmd)
+ {
+-	int i;
+-	struct scatterlist *sglist;
++	int i, nseg;
++	struct scatterlist *sg;
+ 	u32 length;
+ 	u32 ioadl_flags = 0;
+ 	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+ 	struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb;
+ 	struct ipr_ioadl_desc *ioadl = ipr_cmd->ioadl;
+ 
+-	length = scsi_cmd->request_bufflen;
+-
+-	if (length == 0)
++	length = scsi_bufflen(scsi_cmd);
++	if (!length)
+ 		return 0;
+ 
+-	if (scsi_cmd->use_sg) {
+-		ipr_cmd->dma_use_sg = pci_map_sg(ioa_cfg->pdev,
+-						 scsi_cmd->request_buffer,
+-						 scsi_cmd->use_sg,
+-						 scsi_cmd->sc_data_direction);
++	nseg = scsi_dma_map(scsi_cmd);
++	if (nseg < 0) {
++		dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
++		return -1;
++	}
++
++	ipr_cmd->dma_use_sg = nseg;
+ 
+ 		if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) {
+ 			ioadl_flags = IPR_IOADL_FLAGS_WRITE;
+@@ -4324,8 +4305,6 @@
+ 				cpu_to_be32(sizeof(struct ipr_ioadl_desc) * ipr_cmd->dma_use_sg);
+ 		}
+ 
+-		sglist = scsi_cmd->request_buffer;
+-
+ 		if (ipr_cmd->dma_use_sg <= ARRAY_SIZE(ioarcb->add_data.u.ioadl)) {
+ 			ioadl = ioarcb->add_data.u.ioadl;
+ 			ioarcb->write_ioadl_addr =
+@@ -4334,51 +4313,14 @@
+ 			ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
+ 		}
+ 
+-		for (i = 0; i < ipr_cmd->dma_use_sg; i++) {
++	scsi_for_each_sg(scsi_cmd, sg, ipr_cmd->dma_use_sg, i) {
+ 			ioadl[i].flags_and_data_len =
+-				cpu_to_be32(ioadl_flags | sg_dma_len(&sglist[i]));
+-			ioadl[i].address =
+-				cpu_to_be32(sg_dma_address(&sglist[i]));
++			cpu_to_be32(ioadl_flags | sg_dma_len(sg));
++		ioadl[i].address = cpu_to_be32(sg_dma_address(sg));
+ 		}
+ 
+-		if (likely(ipr_cmd->dma_use_sg)) {
+-			ioadl[i-1].flags_and_data_len |=
+-				cpu_to_be32(IPR_IOADL_FLAGS_LAST);
+-			return 0;
+-		} else
+-			dev_err(&ioa_cfg->pdev->dev, "pci_map_sg failed!\n");
+-	} else {
+-		if (scsi_cmd->sc_data_direction == DMA_TO_DEVICE) {
+-			ioadl_flags = IPR_IOADL_FLAGS_WRITE;
+-			ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_WRITE_NOT_READ;
+-			ioarcb->write_data_transfer_length = cpu_to_be32(length);
+-			ioarcb->write_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc));
+-		} else if (scsi_cmd->sc_data_direction == DMA_FROM_DEVICE) {
+-			ioadl_flags = IPR_IOADL_FLAGS_READ;
+-			ioarcb->read_data_transfer_length = cpu_to_be32(length);
+-			ioarcb->read_ioadl_len = cpu_to_be32(sizeof(struct ipr_ioadl_desc));
+-		}
+-
+-		ipr_cmd->dma_handle = pci_map_single(ioa_cfg->pdev,
+-						     scsi_cmd->request_buffer, length,
+-						     scsi_cmd->sc_data_direction);
+-
+-		if (likely(!pci_dma_mapping_error(ipr_cmd->dma_handle))) {
+-			ioadl = ioarcb->add_data.u.ioadl;
+-			ioarcb->write_ioadl_addr =
+-				cpu_to_be32(be32_to_cpu(ioarcb->ioarcb_host_pci_addr) +
+-					    offsetof(struct ipr_ioarcb, add_data));
+-			ioarcb->read_ioadl_addr = ioarcb->write_ioadl_addr;
+-			ipr_cmd->dma_use_sg = 1;
+-			ioadl[0].flags_and_data_len =
+-				cpu_to_be32(ioadl_flags | length | IPR_IOADL_FLAGS_LAST);
+-			ioadl[0].address = cpu_to_be32(ipr_cmd->dma_handle);
++	ioadl[i-1].flags_and_data_len |= cpu_to_be32(IPR_IOADL_FLAGS_LAST);
+ 			return 0;
+-		} else
+-			dev_err(&ioa_cfg->pdev->dev, "pci_map_single failed!\n");
+-	}
+-
+-	return -1;
+ }
+ 
+ /**
+@@ -4441,7 +4383,7 @@
+ 			res->needs_sync_complete = 1;
+ 		res->in_erp = 0;
+ 	}
+-	ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++	scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ 	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ 	scsi_cmd->scsi_done(scsi_cmd);
+ }
+@@ -4819,7 +4761,7 @@
+ 		break;
+ 	}
+ 
+-	ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++	scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ 	list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ 	scsi_cmd->scsi_done(scsi_cmd);
+ }
+@@ -4840,10 +4782,10 @@
+ 	struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd;
+ 	u32 ioasc = be32_to_cpu(ipr_cmd->ioasa.ioasc);
+ 
+-	scsi_cmd->resid = be32_to_cpu(ipr_cmd->ioasa.residual_data_len);
++	scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->ioasa.residual_data_len));
+ 
+ 	if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) {
+-		ipr_unmap_sglist(ioa_cfg, ipr_cmd);
++		scsi_dma_unmap(ipr_cmd->scsi_cmd);
+ 		list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q);
+ 		scsi_cmd->scsi_done(scsi_cmd);
+ 	} else
+diff -Nurb linux-2.6.22-570/drivers/scsi/ips.c linux-2.6.22-try2/drivers/scsi/ips.c
+--- linux-2.6.22-570/drivers/scsi/ips.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ips.c	2007-12-19 15:29:23.000000000 -0500
+@@ -211,19 +211,6 @@
+ #warning "This driver has only been tested on the x86/ia64/x86_64 platforms"
+ #endif
+ 
+-#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,0)
+-#include <linux/blk.h>
+-#include "sd.h"
+-#define IPS_LOCK_SAVE(lock,flags) spin_lock_irqsave(&io_request_lock,flags)
+-#define IPS_UNLOCK_RESTORE(lock,flags) spin_unlock_irqrestore(&io_request_lock,flags)
+-#ifndef __devexit_p
+-#define __devexit_p(x) x
+-#endif
+-#else
+-#define IPS_LOCK_SAVE(lock,flags) do{spin_lock(lock);(void)flags;}while(0)
+-#define IPS_UNLOCK_RESTORE(lock,flags) do{spin_unlock(lock);(void)flags;}while(0)
+-#endif
+-
+ #define IPS_DMA_DIR(scb) ((!scb->scsi_cmd || ips_is_passthru(scb->scsi_cmd) || \
+                          DMA_NONE == scb->scsi_cmd->sc_data_direction) ? \
+                          PCI_DMA_BIDIRECTIONAL : \
+@@ -381,24 +368,13 @@
+ 	.eh_abort_handler	= ips_eh_abort,
+ 	.eh_host_reset_handler	= ips_eh_reset,
+ 	.proc_name		= "ips",
+-#if LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)
+ 	.proc_info		= ips_proc_info,
+ 	.slave_configure	= ips_slave_configure,
+-#else
+-	.proc_info		= ips_proc24_info,
+-	.select_queue_depths	= ips_select_queue_depth,
+-#endif
+ 	.bios_param		= ips_biosparam,
+ 	.this_id		= -1,
+ 	.sg_tablesize		= IPS_MAX_SG,
+ 	.cmd_per_lun		= 3,
+ 	.use_clustering		= ENABLE_CLUSTERING,
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-	.use_new_eh_code	= 1,
+-#endif
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20)  &&  LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-    .highmem_io          = 1,   
+-#endif
+ };
+ 
+ 
+@@ -731,7 +707,7 @@
+ 	/* free IRQ */
+ 	free_irq(ha->irq, ha);
+ 
+-	IPS_REMOVE_HOST(sh);
++	scsi_remove_host(sh);
+ 	scsi_host_put(sh);
+ 
+ 	ips_released_controllers++;
+@@ -813,7 +789,6 @@
+ 	ips_ha_t *ha;
+ 	ips_copp_wait_item_t *item;
+ 	int ret;
+-	unsigned long cpu_flags;
+ 	struct Scsi_Host *host;
+ 
+ 	METHOD_TRACE("ips_eh_abort", 1);
+@@ -830,7 +805,7 @@
+ 	if (!ha->active)
+ 		return (FAILED);
+ 
+-	IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++	spin_lock(host->host_lock);
+ 
+ 	/* See if the command is on the copp queue */
+ 	item = ha->copp_waitlist.head;
+@@ -851,7 +826,7 @@
+ 		ret = (FAILED);
+ 	}
+ 
+-	IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++	spin_unlock(host->host_lock);
+ 	return ret;
+ }
+ 
+@@ -1129,7 +1104,7 @@
+ 		/* A Reset IOCTL is only sent by the boot CD in extreme cases.           */
+ 		/* There can never be any system activity ( network or disk ), but check */
+ 		/* anyway just as a good practice.                                       */
+-		pt = (ips_passthru_t *) SC->request_buffer;
++		pt = (ips_passthru_t *) scsi_sglist(SC);
+ 		if ((pt->CoppCP.cmd.reset.op_code == IPS_CMD_RESET_CHANNEL) &&
+ 		    (pt->CoppCP.cmd.reset.adapter_flag == 1)) {
+ 			if (ha->scb_activelist.count != 0) {
+@@ -1176,18 +1151,10 @@
+ /*   Set bios geometry for the controller                                   */
+ /*                                                                          */
+ /****************************************************************************/
+-static int
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-ips_biosparam(Disk * disk, kdev_t dev, int geom[])
+-{
+-	ips_ha_t *ha = (ips_ha_t *) disk->device->host->hostdata;
+-	unsigned long capacity = disk->capacity;
+-#else
+-ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
++static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ 	      sector_t capacity, int geom[])
+ {
+ 	ips_ha_t *ha = (ips_ha_t *) sdev->host->hostdata;
+-#endif
+ 	int heads;
+ 	int sectors;
+ 	int cylinders;
+@@ -1225,70 +1192,6 @@
+ 	return (0);
+ }
+ 
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-
+-/* ips_proc24_info is a wrapper around ips_proc_info *
+- * for compatibility with the 2.4 scsi parameters    */
+-static int
+-ips_proc24_info(char *buffer, char **start, off_t offset, int length,
+-		              int hostno, int func)
+-{
+-	int i;
+-
+-	for (i = 0; i < ips_next_controller; i++) {
+-		if (ips_sh[i] && ips_sh[i]->host_no == hostno) {
+-			return ips_proc_info(ips_sh[i], buffer, start,
+-					     offset, length, func);
+-		}
+-	}
+-	return -EINVAL;	
+-}
+-
+-/****************************************************************************/
+-/*                                                                          */
+-/* Routine Name: ips_select_queue_depth                                     */
+-/*                                                                          */
+-/* Routine Description:                                                     */
+-/*                                                                          */
+-/*   Select queue depths for the devices on the contoller                   */
+-/*                                                                          */
+-/****************************************************************************/
+-static void
+-ips_select_queue_depth(struct Scsi_Host *host, struct scsi_device * scsi_devs)
+-{
+-	struct scsi_device *device;
+-	ips_ha_t *ha;
+-	int count = 0;
+-	int min;
+-
+-	ha = IPS_HA(host);
+-	min = ha->max_cmds / 4;
+-
+-	for (device = scsi_devs; device; device = device->next) {
+-		if (device->host == host) {
+-			if ((device->channel == 0) && (device->type == 0))
+-				count++;
+-		}
+-	}
+-
+-	for (device = scsi_devs; device; device = device->next) {
+-		if (device->host == host) {
+-			if ((device->channel == 0) && (device->type == 0)) {
+-				device->queue_depth =
+-				    (ha->max_cmds - 1) / count;
+-				if (device->queue_depth < min)
+-					device->queue_depth = min;
+-			} else {
+-				device->queue_depth = 2;
+-			}
+-
+-			if (device->queue_depth < 2)
+-				device->queue_depth = 2;
+-		}
+-	}
+-}
+-
+-#else
+ /****************************************************************************/
+ /*                                                                          */
+ /* Routine Name: ips_slave_configure                                        */
+@@ -1316,7 +1219,6 @@
+ 	SDptr->skip_ms_page_3f = 1;
+ 	return 0;
+ }
+-#endif
+ 
+ /****************************************************************************/
+ /*                                                                          */
+@@ -1331,7 +1233,6 @@
+ do_ipsintr(int irq, void *dev_id)
+ {
+ 	ips_ha_t *ha;
+-	unsigned long cpu_flags;
+ 	struct Scsi_Host *host;
+ 	int irqstatus;
+ 
+@@ -1347,16 +1248,16 @@
+ 		return IRQ_HANDLED;
+ 	}
+ 
+-	IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++	spin_lock(host->host_lock);
+ 
+ 	if (!ha->active) {
+-		IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++		spin_unlock(host->host_lock);
+ 		return IRQ_HANDLED;
+ 	}
+ 
+ 	irqstatus = (*ha->func.intr) (ha);
+ 
+-	IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++	spin_unlock(host->host_lock);
+ 
+ 	/* start the next command */
+ 	ips_next(ha, IPS_INTR_ON);
+@@ -1606,15 +1507,8 @@
+ 	if ((SC->cmnd[0] == IPS_IOCTL_COMMAND) &&
+ 	    (SC->device->channel == 0) &&
+ 	    (SC->device->id == IPS_ADAPTER_ID) &&
+-	    (SC->device->lun == 0) && SC->request_buffer) {
+-		if ((!SC->use_sg) && SC->request_bufflen &&
+-		    (((char *) SC->request_buffer)[0] == 'C') &&
+-		    (((char *) SC->request_buffer)[1] == 'O') &&
+-		    (((char *) SC->request_buffer)[2] == 'P') &&
+-		    (((char *) SC->request_buffer)[3] == 'P'))
+-			return 1;
+-		else if (SC->use_sg) {
+-			struct scatterlist *sg = SC->request_buffer;
++	    (SC->device->lun == 0) && scsi_sglist(SC)) {
++                struct scatterlist *sg = scsi_sglist(SC);
+ 			char  *buffer; 
+ 
+ 			/* kmap_atomic() ensures addressability of the user buffer.*/
+@@ -1630,7 +1524,6 @@
+ 			kunmap_atomic(buffer - sg->offset, KM_IRQ0);
+ 			local_irq_restore(flags);
+ 		}
+-	}
+ 	return 0;
+ }
+ 
+@@ -1680,18 +1573,14 @@
+ {
+ 	ips_passthru_t *pt;
+ 	int length = 0;
+-	int ret;
++	int i, ret;
++        struct scatterlist *sg = scsi_sglist(SC);
+ 
+ 	METHOD_TRACE("ips_make_passthru", 1);
+ 
+-	if (!SC->use_sg) {
+-		length = SC->request_bufflen;
+-	} else {
+-		struct scatterlist *sg = SC->request_buffer;
+-		int i;
+-		for (i = 0; i < SC->use_sg; i++)
++        scsi_for_each_sg(SC, sg, scsi_sg_count(SC), i)
+ 			length += sg[i].length;
+-	}
++
+ 	if (length < sizeof (ips_passthru_t)) {
+ 		/* wrong size */
+ 		DEBUG_VAR(1, "(%s%d) Passthru structure wrong size",
+@@ -2115,7 +2004,7 @@
+ 
+ 	METHOD_TRACE("ips_cleanup_passthru", 1);
+ 
+-	if ((!scb) || (!scb->scsi_cmd) || (!scb->scsi_cmd->request_buffer)) {
++	if ((!scb) || (!scb->scsi_cmd) || (!scsi_sglist(scb->scsi_cmd))) {
+ 		DEBUG_VAR(1, "(%s%d) couldn't cleanup after passthru",
+ 			  ips_name, ha->host_num);
+ 
+@@ -2730,7 +2619,6 @@
+ 	struct scsi_cmnd *q;
+ 	ips_copp_wait_item_t *item;
+ 	int ret;
+-	unsigned long cpu_flags = 0;
+ 	struct Scsi_Host *host;
+ 	METHOD_TRACE("ips_next", 1);
+ 
+@@ -2742,7 +2630,7 @@
+ 	 * this command won't time out
+ 	 */
+ 	if (intr == IPS_INTR_ON)
+-		IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++		spin_lock(host->host_lock);
+ 
+ 	if ((ha->subsys->param[3] & 0x300000)
+ 	    && (ha->scb_activelist.count == 0)) {
+@@ -2769,14 +2657,14 @@
+ 		item = ips_removeq_copp_head(&ha->copp_waitlist);
+ 		ha->num_ioctl++;
+ 		if (intr == IPS_INTR_ON)
+-			IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++			spin_unlock(host->host_lock);
+ 		scb->scsi_cmd = item->scsi_cmd;
+ 		kfree(item);
+ 
+ 		ret = ips_make_passthru(ha, scb->scsi_cmd, scb, intr);
+ 
+ 		if (intr == IPS_INTR_ON)
+-			IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++			spin_lock(host->host_lock);
+ 		switch (ret) {
+ 		case IPS_FAILURE:
+ 			if (scb->scsi_cmd) {
+@@ -2846,7 +2734,7 @@
+ 		SC = ips_removeq_wait(&ha->scb_waitlist, q);
+ 
+ 		if (intr == IPS_INTR_ON)
+-			IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);	/* Unlock HA after command is taken off queue */
++			spin_unlock(host->host_lock);	/* Unlock HA after command is taken off queue */
+ 
+ 		SC->result = DID_OK;
+ 		SC->host_scribble = NULL;
+@@ -2866,43 +2754,28 @@
+ 		/* copy in the CDB */
+ 		memcpy(scb->cdb, SC->cmnd, SC->cmd_len);
+ 
+-		/* Now handle the data buffer */
+-		if (SC->use_sg) {
++                scb->sg_count = scsi_dma_map(SC);
++                BUG_ON(scb->sg_count < 0);
++		if (scb->sg_count) {
+ 			struct scatterlist *sg;
+ 			int i;
+ 
+-			sg = SC->request_buffer;
+-			scb->sg_count = pci_map_sg(ha->pcidev, sg, SC->use_sg,
+-						   SC->sc_data_direction);
+ 			scb->flags |= IPS_SCB_MAP_SG;
+-			for (i = 0; i < scb->sg_count; i++) {
++
++                        scsi_for_each_sg(SC, sg, scb->sg_count, i) {
+ 				if (ips_fill_scb_sg_single
+-				    (ha, sg_dma_address(&sg[i]), scb, i,
+-				     sg_dma_len(&sg[i])) < 0)
++				    (ha, sg_dma_address(sg), scb, i,
++				     sg_dma_len(sg)) < 0)
+ 					break;
+ 			}
+ 			scb->dcdb.transfer_length = scb->data_len;
+ 		} else {
+-			if (SC->request_bufflen) {
+-				scb->data_busaddr =
+-				    pci_map_single(ha->pcidev,
+-						   SC->request_buffer,
+-						   SC->request_bufflen,
+-						   SC->sc_data_direction);
+-				scb->flags |= IPS_SCB_MAP_SINGLE;
+-				ips_fill_scb_sg_single(ha, scb->data_busaddr,
+-						       scb, 0,
+-						       SC->request_bufflen);
+-				scb->dcdb.transfer_length = scb->data_len;
+-			} else {
+ 				scb->data_busaddr = 0L;
+ 				scb->sg_len = 0;
+ 				scb->data_len = 0;
+ 				scb->dcdb.transfer_length = 0;
+ 			}
+ 
+-		}
+-
+ 		scb->dcdb.cmd_attribute =
+ 		    ips_command_direction[scb->scsi_cmd->cmnd[0]];
+ 
+@@ -2919,7 +2792,7 @@
+ 			scb->dcdb.transfer_length = 0;
+ 		}
+ 		if (intr == IPS_INTR_ON)
+-			IPS_LOCK_SAVE(host->host_lock, cpu_flags);
++			spin_lock(host->host_lock);
+ 
+ 		ret = ips_send_cmd(ha, scb);
+ 
+@@ -2958,7 +2831,7 @@
+ 	}			/* end while */
+ 
+ 	if (intr == IPS_INTR_ON)
+-		IPS_UNLOCK_RESTORE(host->host_lock, cpu_flags);
++		spin_unlock(host->host_lock);
+ }
+ 
+ /****************************************************************************/
+@@ -3377,29 +3250,24 @@
+ 		 * the rest of the data and continue.
+ 		 */
+ 		if ((scb->breakup) || (scb->sg_break)) {
++                        struct scatterlist *sg;
++                        int sg_dma_index, ips_sg_index = 0;
++
+ 			/* we had a data breakup */
+ 			scb->data_len = 0;
+ 
+-			if (scb->sg_count) {
+-				/* S/G request */
+-				struct scatterlist *sg;
+-				int ips_sg_index = 0;
+-				int sg_dma_index;
+-
+-				sg = scb->scsi_cmd->request_buffer;
++                        sg = scsi_sglist(scb->scsi_cmd);
+ 
+ 				/* Spin forward to last dma chunk */
+ 				sg_dma_index = scb->breakup;
+ 
+ 				/* Take care of possible partial on last chunk */
+ 				ips_fill_scb_sg_single(ha,
+-						       sg_dma_address(&sg
+-								      [sg_dma_index]),
++                                               sg_dma_address(&sg[sg_dma_index]),
+ 						       scb, ips_sg_index++,
+-						       sg_dma_len(&sg
+-								  [sg_dma_index]));
++                                               sg_dma_len(&sg[sg_dma_index]));
+ 
+-				for (; sg_dma_index < scb->sg_count;
++                        for (; sg_dma_index < scsi_sg_count(scb->scsi_cmd);
+ 				     sg_dma_index++) {
+ 					if (ips_fill_scb_sg_single
+ 					    (ha,
+@@ -3407,21 +3275,6 @@
+ 					     scb, ips_sg_index++,
+ 					     sg_dma_len(&sg[sg_dma_index])) < 0)
+ 						break;
+-
+-				}
+-
+-			} else {
+-				/* Non S/G Request */
+-				(void) ips_fill_scb_sg_single(ha,
+-							      scb->
+-							      data_busaddr +
+-							      (scb->sg_break *
+-							       ha->max_xfer),
+-							      scb, 0,
+-							      scb->scsi_cmd->
+-							      request_bufflen -
+-							      (scb->sg_break *
+-							       ha->max_xfer));
+ 			}
+ 
+ 			scb->dcdb.transfer_length = scb->data_len;
+@@ -3653,15 +3506,15 @@
+ static void
+ ips_scmd_buf_write(struct scsi_cmnd *scmd, void *data, unsigned int count)
+ {
+-	if (scmd->use_sg) {
+ 		int i;
+ 		unsigned int min_cnt, xfer_cnt;
+ 		char *cdata = (char *) data;
+ 		unsigned char *buffer;
+ 		unsigned long flags;
+-		struct scatterlist *sg = scmd->request_buffer;
++        struct scatterlist *sg = scsi_sglist(scmd);
++
+ 		for (i = 0, xfer_cnt = 0;
+-		     (i < scmd->use_sg) && (xfer_cnt < count); i++) {
++             (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) {
+ 			min_cnt = min(count - xfer_cnt, sg[i].length);
+ 
+ 			/* kmap_atomic() ensures addressability of the data buffer.*/
+@@ -3674,11 +3527,6 @@
+ 
+ 			xfer_cnt += min_cnt;
+ 		}
+-
+-	} else {
+-		unsigned int min_cnt = min(count, scmd->request_bufflen);
+-		memcpy(scmd->request_buffer, data, min_cnt);
+-	}
+ }
+ 
+ /****************************************************************************/
+@@ -3691,15 +3539,15 @@
+ static void
+ ips_scmd_buf_read(struct scsi_cmnd *scmd, void *data, unsigned int count)
+ {
+-	if (scmd->use_sg) {
+ 		int i;
+ 		unsigned int min_cnt, xfer_cnt;
+ 		char *cdata = (char *) data;
+ 		unsigned char *buffer;
+ 		unsigned long flags;
+-		struct scatterlist *sg = scmd->request_buffer;
++        struct scatterlist *sg = scsi_sglist(scmd);
++
+ 		for (i = 0, xfer_cnt = 0;
+-		     (i < scmd->use_sg) && (xfer_cnt < count); i++) {
++             (i < scsi_sg_count(scmd)) && (xfer_cnt < count); i++) {
+ 			min_cnt = min(count - xfer_cnt, sg[i].length);
+ 
+ 			/* kmap_atomic() ensures addressability of the data buffer.*/
+@@ -3712,11 +3560,6 @@
+ 
+ 			xfer_cnt += min_cnt;
+ 		}
+-
+-	} else {
+-		unsigned int min_cnt = min(count, scmd->request_bufflen);
+-		memcpy(data, scmd->request_buffer, min_cnt);
+-	}
+ }
+ 
+ /****************************************************************************/
+@@ -4350,7 +4193,7 @@
+ 
+ 	METHOD_TRACE("ips_rdcap", 1);
+ 
+-	if (scb->scsi_cmd->request_bufflen < 8)
++	if (scsi_bufflen(scb->scsi_cmd) < 8)
+ 		return (0);
+ 
+ 	cap.lba =
+@@ -4735,8 +4578,7 @@
+ 
+ 	METHOD_TRACE("ips_freescb", 1);
+ 	if (scb->flags & IPS_SCB_MAP_SG)
+-		pci_unmap_sg(ha->pcidev, scb->scsi_cmd->request_buffer,
+-			     scb->scsi_cmd->use_sg, IPS_DMA_DIR(scb));
++                scsi_dma_unmap(scb->scsi_cmd);
+ 	else if (scb->flags & IPS_SCB_MAP_SINGLE)
+ 		pci_unmap_single(ha->pcidev, scb->data_busaddr, scb->data_len,
+ 				 IPS_DMA_DIR(scb));
+@@ -7004,7 +6846,6 @@
+ 	kfree(oldha);
+ 	ips_sh[index] = sh;
+ 	ips_ha[index] = ha;
+-	IPS_SCSI_SET_DEVICE(sh, ha);
+ 
+ 	/* Store away needed values for later use */
+ 	sh->io_port = ha->io_addr;
+@@ -7016,17 +6857,16 @@
+ 	sh->cmd_per_lun = sh->hostt->cmd_per_lun;
+ 	sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma;
+ 	sh->use_clustering = sh->hostt->use_clustering;
+-
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,7)
+ 	sh->max_sectors = 128;
+-#endif
+ 
+ 	sh->max_id = ha->ntargets;
+ 	sh->max_lun = ha->nlun;
+ 	sh->max_channel = ha->nbus - 1;
+ 	sh->can_queue = ha->max_cmds - 1;
+ 
+-	IPS_ADD_HOST(sh, NULL);
++	scsi_add_host(sh, NULL);
++	scsi_scan_host(sh);
++
+ 	return 0;
+ }
+ 
+@@ -7069,7 +6909,7 @@
+ 		return -ENODEV;
+ 	ips_driver_template.module = THIS_MODULE;
+ 	ips_order_controllers();
+-	if (IPS_REGISTER_HOSTS(&ips_driver_template)) {
++	if (!ips_detect(&ips_driver_template)) {
+ 		pci_unregister_driver(&ips_pci_driver);
+ 		return -ENODEV;
+ 	}
+@@ -7087,7 +6927,6 @@
+ static void __exit
+ ips_module_exit(void)
+ {
+-	IPS_UNREGISTER_HOSTS(&ips_driver_template);
+ 	pci_unregister_driver(&ips_pci_driver);
+ 	unregister_reboot_notifier(&ips_notifier);
+ }
+@@ -7443,15 +7282,9 @@
+ 	return SUCCESS;
+ }
+ 
+-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,9)
+ MODULE_LICENSE("GPL");
+-#endif
+-
+ MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING);
+-
+-#ifdef MODULE_VERSION
+ MODULE_VERSION(IPS_VER_STRING);
+-#endif
+ 
+ 
+ /*
+diff -Nurb linux-2.6.22-570/drivers/scsi/ips.h linux-2.6.22-try2/drivers/scsi/ips.h
+--- linux-2.6.22-570/drivers/scsi/ips.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ips.h	2007-12-19 15:29:23.000000000 -0500
+@@ -58,10 +58,6 @@
+    /*
+     * Some handy macros
+     */
+-   #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,20) || defined CONFIG_HIGHIO
+-      #define IPS_HIGHIO
+-   #endif
+-
+    #define IPS_HA(x)                   ((ips_ha_t *) x->hostdata)
+    #define IPS_COMMAND_ID(ha, scb)     (int) (scb - ha->scbs)
+    #define IPS_IS_TROMBONE(ha)         (((ha->device_id == IPS_DEVICEID_COPPERHEAD) && \
+@@ -84,38 +80,8 @@
+     #define IPS_SGLIST_SIZE(ha)       (IPS_USE_ENH_SGLIST(ha) ? \
+                                          sizeof(IPS_ENH_SG_LIST) : sizeof(IPS_STD_SG_LIST))
+ 
+-   #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,4)
+-      #define pci_set_dma_mask(dev,mask) ( mask > 0xffffffff ? 1:0 )
+-      #define scsi_set_pci_device(sh,dev) (0)
+-   #endif
+-
+-   #ifndef IRQ_NONE
+-      typedef void irqreturn_t;
+-      #define IRQ_NONE
+-      #define IRQ_HANDLED
+-      #define IRQ_RETVAL(x)
+-   #endif
+-   
+-   #if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-      #define IPS_REGISTER_HOSTS(SHT)      scsi_register_module(MODULE_SCSI_HA,SHT)
+-      #define IPS_UNREGISTER_HOSTS(SHT)    scsi_unregister_module(MODULE_SCSI_HA,SHT)
+-      #define IPS_ADD_HOST(shost,device)
+-      #define IPS_REMOVE_HOST(shost)
+-      #define IPS_SCSI_SET_DEVICE(sh,ha)   scsi_set_pci_device(sh, (ha)->pcidev)
+-      #define IPS_PRINTK(level, pcidev, format, arg...)                 \
+-            printk(level "%s %s:" format , "ips" ,     \
+-            (pcidev)->slot_name , ## arg)
+-      #define scsi_host_alloc(sh,size)         scsi_register(sh,size)
+-      #define scsi_host_put(sh)             scsi_unregister(sh)
+-   #else
+-      #define IPS_REGISTER_HOSTS(SHT)      (!ips_detect(SHT))
+-      #define IPS_UNREGISTER_HOSTS(SHT)
+-      #define IPS_ADD_HOST(shost,device)   do { scsi_add_host(shost,device); scsi_scan_host(shost); } while (0)
+-      #define IPS_REMOVE_HOST(shost)       scsi_remove_host(shost)
+-      #define IPS_SCSI_SET_DEVICE(sh,ha)   do { } while (0)
+       #define IPS_PRINTK(level, pcidev, format, arg...)                 \
+             dev_printk(level , &((pcidev)->dev) , format , ## arg)
+-   #endif
+ 
+    #define MDELAY(n)			\
+ 	do {				\
+@@ -134,7 +100,7 @@
+    #define pci_dma_hi32(a)         ((a >> 16) >> 16)
+    #define pci_dma_lo32(a)         (a & 0xffffffff)
+ 
+-   #if (BITS_PER_LONG > 32) || (defined CONFIG_HIGHMEM64G && defined IPS_HIGHIO)
++   #if (BITS_PER_LONG > 32) || defined(CONFIG_HIGHMEM64G)
+       #define IPS_ENABLE_DMA64        (1)
+    #else
+       #define IPS_ENABLE_DMA64        (0)
+@@ -451,16 +417,10 @@
+    /*
+     * Scsi_Host Template
+     */
+-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)
+-   static int ips_proc24_info(char *, char **, off_t, int, int, int);
+-   static void ips_select_queue_depth(struct Scsi_Host *, struct scsi_device *);
+-   static int ips_biosparam(Disk *disk, kdev_t dev, int geom[]);
+-#else
+    static int ips_proc_info(struct Scsi_Host *, char *, char **, off_t, int, int);
+    static int ips_biosparam(struct scsi_device *sdev, struct block_device *bdev,
+ 		sector_t capacity, int geom[]);
+    static int ips_slave_configure(struct scsi_device *SDptr);
+-#endif
+ 
+ /*
+  * Raid Command Formats
+diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.c linux-2.6.22-try2/drivers/scsi/iscsi_tcp.c
+--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/iscsi_tcp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -29,14 +29,15 @@
+ #include <linux/types.h>
+ #include <linux/list.h>
+ #include <linux/inet.h>
++#include <linux/file.h>
+ #include <linux/blkdev.h>
+ #include <linux/crypto.h>
+ #include <linux/delay.h>
+ #include <linux/kfifo.h>
+ #include <linux/scatterlist.h>
+-#include <linux/mutex.h>
+ #include <net/tcp.h>
+ #include <scsi/scsi_cmnd.h>
++#include <scsi/scsi_device.h>
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_transport_iscsi.h>
+@@ -109,7 +110,7 @@
+ 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ 
+ 	crypto_hash_digest(&tcp_conn->tx_hash, &buf->sg, buf->sg.length, crc);
+-	buf->sg.length = tcp_conn->hdr_size;
++	buf->sg.length += sizeof(u32);
+ }
+ 
+ static inline int
+@@ -211,16 +212,14 @@
+ static int
+ iscsi_data_rsp(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+-	int rc;
+ 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+ 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+ 	struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)tcp_conn->in.hdr;
+ 	struct iscsi_session *session = conn->session;
++	struct scsi_cmnd *sc = ctask->sc;
+ 	int datasn = be32_to_cpu(rhdr->datasn);
+ 
+-	rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+-	if (rc)
+-		return rc;
++	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
+ 	/*
+ 	 * setup Data-In byte counter (gets decremented..)
+ 	 */
+@@ -229,31 +228,36 @@
+ 	if (tcp_conn->in.datalen == 0)
+ 		return 0;
+ 
+-	if (ctask->datasn != datasn)
++	if (tcp_ctask->exp_datasn != datasn) {
++		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->datasn(%d)\n",
++		          __FUNCTION__, tcp_ctask->exp_datasn, datasn);
+ 		return ISCSI_ERR_DATASN;
++	}
+ 
+-	ctask->datasn++;
++	tcp_ctask->exp_datasn++;
+ 
+ 	tcp_ctask->data_offset = be32_to_cpu(rhdr->offset);
+-	if (tcp_ctask->data_offset + tcp_conn->in.datalen > ctask->total_length)
++	if (tcp_ctask->data_offset + tcp_conn->in.datalen > scsi_bufflen(sc)) {
++		debug_tcp("%s: data_offset(%d) + data_len(%d) > total_length_in(%d)\n",
++		          __FUNCTION__, tcp_ctask->data_offset,
++		          tcp_conn->in.datalen, scsi_bufflen(sc));
+ 		return ISCSI_ERR_DATA_OFFSET;
++	}
+ 
+ 	if (rhdr->flags & ISCSI_FLAG_DATA_STATUS) {
+-		struct scsi_cmnd *sc = ctask->sc;
+-
+ 		conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+ 		if (rhdr->flags & ISCSI_FLAG_DATA_UNDERFLOW) {
+ 			int res_count = be32_to_cpu(rhdr->residual_count);
+ 
+ 			if (res_count > 0 &&
+-			    res_count <= sc->request_bufflen) {
+-				sc->resid = res_count;
++			    res_count <= scsi_bufflen(sc)) {
++				scsi_set_resid(sc, res_count);
+ 				sc->result = (DID_OK << 16) | rhdr->cmd_status;
+ 			} else
+ 				sc->result = (DID_BAD_TARGET << 16) |
+ 					rhdr->cmd_status;
+ 		} else if (rhdr->flags & ISCSI_FLAG_DATA_OVERFLOW) {
+-			sc->resid = be32_to_cpu(rhdr->residual_count);
++			scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count));
+ 			sc->result = (DID_OK << 16) | rhdr->cmd_status;
+ 		} else
+ 			sc->result = (DID_OK << 16) | rhdr->cmd_status;
+@@ -281,6 +285,8 @@
+ {
+ 	struct iscsi_data *hdr;
+ 	struct scsi_cmnd *sc = ctask->sc;
++	int i, sg_count = 0;
++	struct scatterlist *sg;
+ 
+ 	hdr = &r2t->dtask.hdr;
+ 	memset(hdr, 0, sizeof(struct iscsi_data));
+@@ -308,12 +314,9 @@
+ 	iscsi_buf_init_iov(&r2t->headbuf, (char*)hdr,
+ 			   sizeof(struct iscsi_hdr));
+ 
+-	if (sc->use_sg) {
+-		int i, sg_count = 0;
+-		struct scatterlist *sg = sc->request_buffer;
+-
++	sg = scsi_sglist(sc);
+ 		r2t->sg = NULL;
+-		for (i = 0; i < sc->use_sg; i++, sg += 1) {
++	for (i = 0; i < scsi_sg_count(sc); i++, sg += 1) {
+ 			/* FIXME: prefetch ? */
+ 			if (sg_count + sg->length > r2t->data_offset) {
+ 				int page_offset;
+@@ -335,12 +338,6 @@
+ 			sg_count += sg->length;
+ 		}
+ 		BUG_ON(r2t->sg == NULL);
+-	} else {
+-		iscsi_buf_init_iov(&r2t->sendbuf,
+-			    (char*)sc->request_buffer + r2t->data_offset,
+-			    r2t->data_count);
+-		r2t->sg = NULL;
+-	}
+ }
+ 
+ /**
+@@ -365,17 +362,16 @@
+ 		return ISCSI_ERR_DATALEN;
+ 	}
+ 
+-	if (tcp_ctask->exp_r2tsn && tcp_ctask->exp_r2tsn != r2tsn)
++	if (tcp_ctask->exp_datasn != r2tsn){
++		debug_tcp("%s: ctask->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
++		          __FUNCTION__, tcp_ctask->exp_datasn, r2tsn);
+ 		return ISCSI_ERR_R2TSN;
+-
+-	rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+-	if (rc)
+-		return rc;
+-
+-	/* FIXME: use R2TSN to detect missing R2T */
++	}
+ 
+ 	/* fill-in new R2T associated with the task */
+ 	spin_lock(&session->lock);
++	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
++
+ 	if (!ctask->sc || ctask->mtask ||
+ 	     session->state != ISCSI_STATE_LOGGED_IN) {
+ 		printk(KERN_INFO "iscsi_tcp: dropping R2T itt %d in "
+@@ -401,11 +397,11 @@
+ 			    r2t->data_length, session->max_burst);
+ 
+ 	r2t->data_offset = be32_to_cpu(rhdr->data_offset);
+-	if (r2t->data_offset + r2t->data_length > ctask->total_length) {
++	if (r2t->data_offset + r2t->data_length > scsi_bufflen(ctask->sc)) {
+ 		spin_unlock(&session->lock);
+ 		printk(KERN_ERR "iscsi_tcp: invalid R2T with data len %u at "
+ 		       "offset %u and total length %d\n", r2t->data_length,
+-		       r2t->data_offset, ctask->total_length);
++		       r2t->data_offset, scsi_bufflen(ctask->sc));
+ 		return ISCSI_ERR_DATALEN;
+ 	}
+ 
+@@ -414,9 +410,9 @@
+ 
+ 	iscsi_solicit_data_init(conn, ctask, r2t);
+ 
+-	tcp_ctask->exp_r2tsn = r2tsn + 1;
++	tcp_ctask->exp_datasn = r2tsn + 1;
+ 	__kfifo_put(tcp_ctask->r2tqueue, (void*)&r2t, sizeof(void*));
+-	tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
++	tcp_ctask->xmstate |= XMSTATE_SOL_HDR_INIT;
+ 	list_move_tail(&ctask->running, &conn->xmitqueue);
+ 
+ 	scsi_queue_work(session->host, &conn->xmitwork);
+@@ -600,7 +596,7 @@
+ {
+ 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+ 	int buf_left = buf_size - (tcp_conn->data_copied + offset);
+-	int size = min(tcp_conn->in.copy, buf_left);
++	unsigned size = min(tcp_conn->in.copy, buf_left);
+ 	int rc;
+ 
+ 	size = min(size, ctask->data_count);
+@@ -609,7 +605,7 @@
+ 	       size, tcp_conn->in.offset, tcp_conn->in.copied);
+ 
+ 	BUG_ON(size <= 0);
+-	BUG_ON(tcp_ctask->sent + size > ctask->total_length);
++	BUG_ON(tcp_ctask->sent + size > scsi_bufflen(ctask->sc));
+ 
+ 	rc = skb_copy_bits(tcp_conn->in.skb, tcp_conn->in.offset,
+ 			   (char*)buf + (offset + tcp_conn->data_copied), size);
+@@ -707,25 +703,8 @@
+ 
+ 	BUG_ON((void*)ctask != sc->SCp.ptr);
+ 
+-	/*
+-	 * copying Data-In into the Scsi_Cmnd
+-	 */
+-	if (!sc->use_sg) {
+-		i = ctask->data_count;
+-		rc = iscsi_ctask_copy(tcp_conn, ctask, sc->request_buffer,
+-				      sc->request_bufflen,
+-				      tcp_ctask->data_offset);
+-		if (rc == -EAGAIN)
+-			return rc;
+-		if (conn->datadgst_en)
+-			iscsi_recv_digest_update(tcp_conn, sc->request_buffer,
+-						 i);
+-		rc = 0;
+-		goto done;
+-	}
+-
+ 	offset = tcp_ctask->data_offset;
+-	sg = sc->request_buffer;
++	sg = scsi_sglist(sc);
+ 
+ 	if (tcp_ctask->data_offset)
+ 		for (i = 0; i < tcp_ctask->sg_count; i++)
+@@ -734,7 +713,7 @@
+ 	if (offset < 0)
+ 		offset = 0;
+ 
+-	for (i = tcp_ctask->sg_count; i < sc->use_sg; i++) {
++	for (i = tcp_ctask->sg_count; i < scsi_sg_count(sc); i++) {
+ 		char *dest;
+ 
+ 		dest = kmap_atomic(sg[i].page, KM_SOFTIRQ0);
+@@ -779,7 +758,6 @@
+ 	}
+ 	BUG_ON(ctask->data_count);
+ 
+-done:
+ 	/* check for non-exceptional status */
+ 	if (tcp_conn->in.hdr->flags & ISCSI_FLAG_DATA_STATUS) {
+ 		debug_scsi("done [sc %lx res %d itt 0x%x flags 0x%x]\n",
+@@ -895,11 +873,27 @@
+ 		}
+ 	}
+ 
+-	if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV) {
++	if (tcp_conn->in_progress == IN_PROGRESS_DDIGEST_RECV &&
++	    tcp_conn->in.copy) {
+ 		uint32_t recv_digest;
+ 
+ 		debug_tcp("extra data_recv offset %d copy %d\n",
+ 			  tcp_conn->in.offset, tcp_conn->in.copy);
++
++		if (!tcp_conn->data_copied) {
++			if (tcp_conn->in.padding) {
++				debug_tcp("padding -> %d\n",
++					  tcp_conn->in.padding);
++				memset(pad, 0, tcp_conn->in.padding);
++				sg_init_one(&sg, pad, tcp_conn->in.padding);
++				crypto_hash_update(&tcp_conn->rx_hash,
++						   &sg, sg.length);
++			}
++			crypto_hash_final(&tcp_conn->rx_hash,
++					  (u8 *) &tcp_conn->in.datadgst);
++			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
++		}
++
+ 		rc = iscsi_tcp_copy(conn, sizeof(uint32_t));
+ 		if (rc) {
+ 			if (rc == -EAGAIN)
+@@ -925,7 +919,6 @@
+ 
+ 	if (tcp_conn->in_progress == IN_PROGRESS_DATA_RECV &&
+ 	   tcp_conn->in.copy) {
+-
+ 		debug_tcp("data_recv offset %d copy %d\n",
+ 		       tcp_conn->in.offset, tcp_conn->in.copy);
+ 
+@@ -936,24 +929,32 @@
+ 			iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+ 			return 0;
+ 		}
+-		tcp_conn->in.copy -= tcp_conn->in.padding;
+-		tcp_conn->in.offset += tcp_conn->in.padding;
+-		if (conn->datadgst_en) {
+-			if (tcp_conn->in.padding) {
+-				debug_tcp("padding -> %d\n",
+-					  tcp_conn->in.padding);
+-				memset(pad, 0, tcp_conn->in.padding);
+-				sg_init_one(&sg, pad, tcp_conn->in.padding);
+-				crypto_hash_update(&tcp_conn->rx_hash,
+-						   &sg, sg.length);
+-			}
+-			crypto_hash_final(&tcp_conn->rx_hash,
+-					  (u8 *) &tcp_conn->in.datadgst);
+-			debug_tcp("rx digest 0x%x\n", tcp_conn->in.datadgst);
++
++		if (tcp_conn->in.padding)
++			tcp_conn->in_progress = IN_PROGRESS_PAD_RECV;
++		else if (conn->datadgst_en)
+ 			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
++		else
++			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+ 			tcp_conn->data_copied = 0;
+-		} else
++	}
++
++	if (tcp_conn->in_progress == IN_PROGRESS_PAD_RECV &&
++	    tcp_conn->in.copy) {
++		int copylen = min(tcp_conn->in.padding - tcp_conn->data_copied,
++				  tcp_conn->in.copy);
++
++		tcp_conn->in.copy -= copylen;
++		tcp_conn->in.offset += copylen;
++		tcp_conn->data_copied += copylen;
++
++		if (tcp_conn->data_copied != tcp_conn->in.padding)
++			tcp_conn->in_progress = IN_PROGRESS_PAD_RECV;
++		else if (conn->datadgst_en)
++			tcp_conn->in_progress = IN_PROGRESS_DDIGEST_RECV;
++		else
+ 			tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
++		tcp_conn->data_copied = 0;
+ 	}
+ 
+ 	debug_tcp("f, processed %d from out of %d padding %d\n",
+@@ -1215,7 +1216,6 @@
+ 			struct iscsi_r2t_info *r2t, int left)
+ {
+ 	struct iscsi_data *hdr;
+-	struct scsi_cmnd *sc = ctask->sc;
+ 	int new_offset;
+ 
+ 	hdr = &r2t->dtask.hdr;
+@@ -1245,15 +1245,8 @@
+ 	if (iscsi_buf_left(&r2t->sendbuf))
+ 		return;
+ 
+-	if (sc->use_sg) {
+ 		iscsi_buf_init_sg(&r2t->sendbuf, r2t->sg);
+ 		r2t->sg += 1;
+-	} else {
+-		iscsi_buf_init_iov(&r2t->sendbuf,
+-			    (char*)sc->request_buffer + new_offset,
+-			    r2t->data_count);
+-		r2t->sg = NULL;
+-	}
+ }
+ 
+ static void iscsi_set_padding(struct iscsi_tcp_cmd_task *tcp_ctask,
+@@ -1277,41 +1270,10 @@
+ static void
+ iscsi_tcp_cmd_init(struct iscsi_cmd_task *ctask)
+ {
+-	struct scsi_cmnd *sc = ctask->sc;
+ 	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+ 
+ 	BUG_ON(__kfifo_len(tcp_ctask->r2tqueue));
+-
+-	tcp_ctask->sent = 0;
+-	tcp_ctask->sg_count = 0;
+-
+-	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+-		tcp_ctask->xmstate = XMSTATE_W_HDR;
+-		tcp_ctask->exp_r2tsn = 0;
+-		BUG_ON(ctask->total_length == 0);
+-
+-		if (sc->use_sg) {
+-			struct scatterlist *sg = sc->request_buffer;
+-
+-			iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
+-			tcp_ctask->sg = sg + 1;
+-			tcp_ctask->bad_sg = sg + sc->use_sg;
+-		} else {
+-			iscsi_buf_init_iov(&tcp_ctask->sendbuf,
+-					   sc->request_buffer,
+-					   sc->request_bufflen);
+-			tcp_ctask->sg = NULL;
+-			tcp_ctask->bad_sg = NULL;
+-		}
+-		debug_scsi("cmd [itt 0x%x total %d imm_data %d "
+-			   "unsol count %d, unsol offset %d]\n",
+-			   ctask->itt, ctask->total_length, ctask->imm_count,
+-			   ctask->unsol_count, ctask->unsol_offset);
+-	} else
+-		tcp_ctask->xmstate = XMSTATE_R_HDR;
+-
+-	iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
+-			    sizeof(struct iscsi_hdr));
++	tcp_ctask->xmstate = XMSTATE_CMD_HDR_INIT;
+ }
+ 
+ /**
+@@ -1324,9 +1286,11 @@
+  *	call it again later, or recover. '0' return code means successful
+  *	xmit.
+  *
+- *	Management xmit state machine consists of two states:
+- *		IN_PROGRESS_IMM_HEAD - PDU Header xmit in progress
+- *		IN_PROGRESS_IMM_DATA - PDU Data xmit in progress
++ *	Management xmit state machine consists of these states:
++ *		XMSTATE_IMM_HDR_INIT	- calculate digest of PDU Header
++ *		XMSTATE_IMM_HDR 	- PDU Header xmit in progress
++ *		XMSTATE_IMM_DATA 	- PDU Data xmit in progress
++ *		XMSTATE_IDLE		- management PDU is done
+  **/
+ static int
+ iscsi_tcp_mtask_xmit(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
+@@ -1337,23 +1301,34 @@
+ 	debug_scsi("mtask deq [cid %d state %x itt 0x%x]\n",
+ 		conn->id, tcp_mtask->xmstate, mtask->itt);
+ 
+-	if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
+-		tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
+-		if (mtask->data_count)
++	if (tcp_mtask->xmstate & XMSTATE_IMM_HDR_INIT) {
++		iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
++				   sizeof(struct iscsi_hdr));
++
++		if (mtask->data_count) {
+ 			tcp_mtask->xmstate |= XMSTATE_IMM_DATA;
++			iscsi_buf_init_iov(&tcp_mtask->sendbuf,
++					   (char*)mtask->data,
++					   mtask->data_count);
++		}
++
+ 		if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
+ 		    conn->stop_stage != STOP_CONN_RECOVER &&
+ 		    conn->hdrdgst_en)
+ 			iscsi_hdr_digest(conn, &tcp_mtask->headbuf,
+ 					(u8*)tcp_mtask->hdrext);
++
++		tcp_mtask->sent = 0;
++		tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR_INIT;
++		tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
++	}
++
++	if (tcp_mtask->xmstate & XMSTATE_IMM_HDR) {
+ 		rc = iscsi_sendhdr(conn, &tcp_mtask->headbuf,
+ 				   mtask->data_count);
+-		if (rc) {
+-			tcp_mtask->xmstate |= XMSTATE_IMM_HDR;
+-			if (mtask->data_count)
+-				tcp_mtask->xmstate &= ~XMSTATE_IMM_DATA;
++		if (rc)
+ 			return rc;
+-		}
++		tcp_mtask->xmstate &= ~XMSTATE_IMM_HDR;
+ 	}
+ 
+ 	if (tcp_mtask->xmstate & XMSTATE_IMM_DATA) {
+@@ -1387,55 +1362,67 @@
+ 	return 0;
+ }
+ 
+-static inline int
+-iscsi_send_read_hdr(struct iscsi_conn *conn,
+-		    struct iscsi_tcp_cmd_task *tcp_ctask)
++static int
++iscsi_send_cmd_hdr(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+-	int rc;
++	struct scsi_cmnd *sc = ctask->sc;
++	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
++	int rc = 0;
+ 
+-	tcp_ctask->xmstate &= ~XMSTATE_R_HDR;
+-	if (conn->hdrdgst_en)
+-		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
+-				 (u8*)tcp_ctask->hdrext);
+-	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, 0);
+-	if (!rc) {
+-		BUG_ON(tcp_ctask->xmstate != XMSTATE_IDLE);
+-		return 0; /* wait for Data-In */
++	if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_INIT) {
++		tcp_ctask->sent = 0;
++		tcp_ctask->sg_count = 0;
++		tcp_ctask->exp_datasn = 0;
++
++		if (sc->sc_data_direction == DMA_TO_DEVICE) {
++			struct scatterlist *sg = scsi_sglist(sc);
++
++			iscsi_buf_init_sg(&tcp_ctask->sendbuf, sg);
++			tcp_ctask->sg = sg + 1;
++			tcp_ctask->bad_sg = sg + scsi_sg_count(sc);
++
++			debug_scsi("cmd [itt 0x%x total %d imm_data %d "
++				   "unsol count %d, unsol offset %d]\n",
++				   ctask->itt, scsi_bufflen(sc),
++				   ctask->imm_count, ctask->unsol_count,
++				   ctask->unsol_offset);
+ 	}
+-	tcp_ctask->xmstate |= XMSTATE_R_HDR;
+-	return rc;
+-}
+ 
+-static inline int
+-iscsi_send_write_hdr(struct iscsi_conn *conn,
+-		     struct iscsi_cmd_task *ctask)
+-{
+-	struct iscsi_tcp_cmd_task *tcp_ctask = ctask->dd_data;
+-	int rc;
++		iscsi_buf_init_iov(&tcp_ctask->headbuf, (char*)ctask->hdr,
++				  sizeof(struct iscsi_hdr));
+ 
+-	tcp_ctask->xmstate &= ~XMSTATE_W_HDR;
+ 	if (conn->hdrdgst_en)
+ 		iscsi_hdr_digest(conn, &tcp_ctask->headbuf,
+ 				 (u8*)tcp_ctask->hdrext);
++		tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_INIT;
++		tcp_ctask->xmstate |= XMSTATE_CMD_HDR_XMIT;
++	}
++
++	if (tcp_ctask->xmstate & XMSTATE_CMD_HDR_XMIT) {
+ 	rc = iscsi_sendhdr(conn, &tcp_ctask->headbuf, ctask->imm_count);
+-	if (rc) {
+-		tcp_ctask->xmstate |= XMSTATE_W_HDR;
++		if (rc)
+ 		return rc;
+-	}
++		tcp_ctask->xmstate &= ~XMSTATE_CMD_HDR_XMIT;
++
++		if (sc->sc_data_direction != DMA_TO_DEVICE)
++			return 0;
+ 
+ 	if (ctask->imm_count) {
+ 		tcp_ctask->xmstate |= XMSTATE_IMM_DATA;
+ 		iscsi_set_padding(tcp_ctask, ctask->imm_count);
+ 
+ 		if (ctask->conn->datadgst_en) {
+-			iscsi_data_digest_init(ctask->conn->dd_data, tcp_ctask);
++				iscsi_data_digest_init(ctask->conn->dd_data,
++						       tcp_ctask);
+ 			tcp_ctask->immdigest = 0;
+ 		}
+ 	}
+ 
+ 	if (ctask->unsol_count)
+-		tcp_ctask->xmstate |= XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
+-	return 0;
++			tcp_ctask->xmstate |=
++					XMSTATE_UNS_HDR | XMSTATE_UNS_INIT;
++	}
++	return rc;
+ }
+ 
+ static int
+@@ -1624,9 +1611,7 @@
+ 	struct iscsi_data_task *dtask;
+ 	int left, rc;
+ 
+-	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
+-		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+-		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
++	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR_INIT) {
+ 		if (!tcp_ctask->r2t) {
+ 			spin_lock_bh(&session->lock);
+ 			__kfifo_get(tcp_ctask->r2tqueue, (void*)&tcp_ctask->r2t,
+@@ -1640,13 +1625,20 @@
+ 		if (conn->hdrdgst_en)
+ 			iscsi_hdr_digest(conn, &r2t->headbuf,
+ 					(u8*)dtask->hdrext);
+-		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
+-		if (rc) {
+-			tcp_ctask->xmstate &= ~XMSTATE_SOL_DATA;
++		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR_INIT;
+ 			tcp_ctask->xmstate |= XMSTATE_SOL_HDR;
+-			return rc;
+ 		}
+ 
++	if (tcp_ctask->xmstate & XMSTATE_SOL_HDR) {
++		r2t = tcp_ctask->r2t;
++		dtask = &r2t->dtask;
++
++		rc = iscsi_sendhdr(conn, &r2t->headbuf, r2t->data_count);
++		if (rc)
++			return rc;
++		tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
++		tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
++
+ 		if (conn->datadgst_en) {
+ 			iscsi_data_digest_init(conn->dd_data, tcp_ctask);
+ 			dtask->digest = 0;
+@@ -1677,8 +1669,6 @@
+ 		left = r2t->data_length - r2t->sent;
+ 		if (left) {
+ 			iscsi_solicit_data_cont(conn, ctask, r2t, left);
+-			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+-			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+ 			goto send_hdr;
+ 		}
+ 
+@@ -1693,8 +1683,6 @@
+ 		if (__kfifo_get(tcp_ctask->r2tqueue, (void*)&r2t,
+ 				sizeof(void*))) {
+ 			tcp_ctask->r2t = r2t;
+-			tcp_ctask->xmstate |= XMSTATE_SOL_DATA;
+-			tcp_ctask->xmstate &= ~XMSTATE_SOL_HDR;
+ 			spin_unlock_bh(&session->lock);
+ 			goto send_hdr;
+ 		}
+@@ -1703,6 +1691,46 @@
+ 	return 0;
+ }
+ 
++/**
++ * iscsi_tcp_ctask_xmit - xmit normal PDU task
++ * @conn: iscsi connection
++ * @ctask: iscsi command task
++ *
++ * Notes:
++ *	The function can return -EAGAIN in which case caller must
++ *	call it again later, or recover. '0' return code means successful
++ *	xmit.
++ *	The function is devided to logical helpers (above) for the different
++ *	xmit stages.
++ *
++ *iscsi_send_cmd_hdr()
++ *	XMSTATE_CMD_HDR_INIT - prepare Header and Data buffers Calculate
++ *	                       Header Digest
++ *	XMSTATE_CMD_HDR_XMIT - Transmit header in progress
++ *
++ *iscsi_send_padding
++ *	XMSTATE_W_PAD        - Prepare and send pading
++ *	XMSTATE_W_RESEND_PAD - retry send pading
++ *
++ *iscsi_send_digest
++ *	XMSTATE_W_RESEND_DATA_DIGEST - Finalize and send Data Digest
++ *	XMSTATE_W_RESEND_DATA_DIGEST - retry sending digest
++ *
++ *iscsi_send_unsol_hdr
++ *	XMSTATE_UNS_INIT     - prepare un-solicit data header and digest
++ *	XMSTATE_UNS_HDR      - send un-solicit header
++ *
++ *iscsi_send_unsol_pdu
++ *	XMSTATE_UNS_DATA     - send un-solicit data in progress
++ *
++ *iscsi_send_sol_pdu
++ *	XMSTATE_SOL_HDR_INIT - solicit data header and digest initialize
++ *	XMSTATE_SOL_HDR      - send solicit header
++ *	XMSTATE_SOL_DATA     - send solicit data
++ *
++ *iscsi_tcp_ctask_xmit
++ *	XMSTATE_IMM_DATA     - xmit managment data (??)
++ **/
+ static int
+ iscsi_tcp_ctask_xmit(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask)
+ {
+@@ -1712,20 +1740,11 @@
+ 	debug_scsi("ctask deq [cid %d xmstate %x itt 0x%x]\n",
+ 		conn->id, tcp_ctask->xmstate, ctask->itt);
+ 
+-	/*
+-	 * serialize with TMF AbortTask
+-	 */
+-	if (ctask->mtask)
+-		return rc;
+-
+-	if (tcp_ctask->xmstate & XMSTATE_R_HDR)
+-		return iscsi_send_read_hdr(conn, tcp_ctask);
+-
+-	if (tcp_ctask->xmstate & XMSTATE_W_HDR) {
+-		rc = iscsi_send_write_hdr(conn, ctask);
++	rc = iscsi_send_cmd_hdr(conn, ctask);
+ 		if (rc)
+ 			return rc;
+-	}
++	if (ctask->sc->sc_data_direction != DMA_TO_DEVICE)
++		return 0;
+ 
+ 	if (tcp_ctask->xmstate & XMSTATE_IMM_DATA) {
+ 		rc = iscsi_send_data(ctask, &tcp_ctask->sendbuf, &tcp_ctask->sg,
+@@ -1810,18 +1829,22 @@
+ static void
+ iscsi_tcp_release_conn(struct iscsi_conn *conn)
+ {
++	struct iscsi_session *session = conn->session;
+ 	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
++	struct socket *sock = tcp_conn->sock;
+ 
+-	if (!tcp_conn->sock)
++	if (!sock)
+ 		return;
+ 
+-	sock_hold(tcp_conn->sock->sk);
++	sock_hold(sock->sk);
+ 	iscsi_conn_restore_callbacks(tcp_conn);
+-	sock_put(tcp_conn->sock->sk);
++	sock_put(sock->sk);
+ 
+-	sock_release(tcp_conn->sock);
++	spin_lock_bh(&session->lock);
+ 	tcp_conn->sock = NULL;
+ 	conn->recv_lock = NULL;
++	spin_unlock_bh(&session->lock);
++	sockfd_put(sock);
+ }
+ 
+ static void
+@@ -1852,6 +1875,46 @@
+ 	tcp_conn->hdr_size = sizeof(struct iscsi_hdr);
+ }
+ 
++static int iscsi_tcp_get_addr(struct iscsi_conn *conn, struct socket *sock,
++			      char *buf, int *port,
++			      int (*getname)(struct socket *, struct sockaddr *,
++					int *addrlen))
++{
++	struct sockaddr_storage *addr;
++	struct sockaddr_in6 *sin6;
++	struct sockaddr_in *sin;
++	int rc = 0, len;
++
++	addr = kmalloc(GFP_KERNEL, sizeof(*addr));
++	if (!addr)
++		return -ENOMEM;
++
++	if (getname(sock, (struct sockaddr *) addr, &len)) {
++		rc = -ENODEV;
++		goto free_addr;
++	}
++
++	switch (addr->ss_family) {
++	case AF_INET:
++		sin = (struct sockaddr_in *)addr;
++		spin_lock_bh(&conn->session->lock);
++		sprintf(buf, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
++		*port = be16_to_cpu(sin->sin_port);
++		spin_unlock_bh(&conn->session->lock);
++		break;
++	case AF_INET6:
++		sin6 = (struct sockaddr_in6 *)addr;
++		spin_lock_bh(&conn->session->lock);
++		sprintf(buf, NIP6_FMT, NIP6(sin6->sin6_addr));
++		*port = be16_to_cpu(sin6->sin6_port);
++		spin_unlock_bh(&conn->session->lock);
++		break;
++	}
++free_addr:
++	kfree(addr);
++	return rc;
++}
++
+ static int
+ iscsi_tcp_conn_bind(struct iscsi_cls_session *cls_session,
+ 		    struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
+@@ -1869,10 +1932,24 @@
+ 		printk(KERN_ERR "iscsi_tcp: sockfd_lookup failed %d\n", err);
+ 		return -EEXIST;
+ 	}
++	/*
++	 * copy these values now because if we drop the session
++	 * userspace may still want to query the values since we will
++	 * be using them for the reconnect
++	 */
++	err = iscsi_tcp_get_addr(conn, sock, conn->portal_address,
++				 &conn->portal_port, kernel_getpeername);
++	if (err)
++		goto free_socket;
++
++	err = iscsi_tcp_get_addr(conn, sock, conn->local_address,
++				&conn->local_port, kernel_getsockname);
++	if (err)
++		goto free_socket;
+ 
+ 	err = iscsi_conn_bind(cls_session, cls_conn, is_leading);
+ 	if (err)
+-		return err;
++		goto free_socket;
+ 
+ 	/* bind iSCSI connection and socket */
+ 	tcp_conn->sock = sock;
+@@ -1896,25 +1973,19 @@
+ 	 * set receive state machine into initial state
+ 	 */
+ 	tcp_conn->in_progress = IN_PROGRESS_WAIT_HEADER;
+-
+ 	return 0;
++
++free_socket:
++	sockfd_put(sock);
++	return err;
+ }
+ 
+ /* called with host lock */
+ static void
+-iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask,
+-		    char *data, uint32_t data_size)
++iscsi_tcp_mgmt_init(struct iscsi_conn *conn, struct iscsi_mgmt_task *mtask)
+ {
+ 	struct iscsi_tcp_mgmt_task *tcp_mtask = mtask->dd_data;
+-
+-	iscsi_buf_init_iov(&tcp_mtask->headbuf, (char*)mtask->hdr,
+-			   sizeof(struct iscsi_hdr));
+-	tcp_mtask->xmstate = XMSTATE_IMM_HDR;
+-	tcp_mtask->sent = 0;
+-
+-	if (mtask->data_count)
+-		iscsi_buf_init_iov(&tcp_mtask->sendbuf, (char*)mtask->data,
+-				    mtask->data_count);
++	tcp_mtask->xmstate = XMSTATE_IMM_HDR_INIT;
+ }
+ 
+ static int
+@@ -2026,41 +2097,18 @@
+ 			 enum iscsi_param param, char *buf)
+ {
+ 	struct iscsi_conn *conn = cls_conn->dd_data;
+-	struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
+-	struct inet_sock *inet;
+-	struct ipv6_pinfo *np;
+-	struct sock *sk;
+ 	int len;
+ 
+ 	switch(param) {
+ 	case ISCSI_PARAM_CONN_PORT:
+-		mutex_lock(&conn->xmitmutex);
+-		if (!tcp_conn->sock) {
+-			mutex_unlock(&conn->xmitmutex);
+-			return -EINVAL;
+-		}
+-
+-		inet = inet_sk(tcp_conn->sock->sk);
+-		len = sprintf(buf, "%hu\n", be16_to_cpu(inet->dport));
+-		mutex_unlock(&conn->xmitmutex);
++		spin_lock_bh(&conn->session->lock);
++		len = sprintf(buf, "%hu\n", conn->portal_port);
++		spin_unlock_bh(&conn->session->lock);
+ 		break;
+ 	case ISCSI_PARAM_CONN_ADDRESS:
+-		mutex_lock(&conn->xmitmutex);
+-		if (!tcp_conn->sock) {
+-			mutex_unlock(&conn->xmitmutex);
+-			return -EINVAL;
+-		}
+-
+-		sk = tcp_conn->sock->sk;
+-		if (sk->sk_family == PF_INET) {
+-			inet = inet_sk(sk);
+-			len = sprintf(buf, NIPQUAD_FMT "\n",
+-				      NIPQUAD(inet->daddr));
+-		} else {
+-			np = inet6_sk(sk);
+-			len = sprintf(buf, NIP6_FMT "\n", NIP6(np->daddr));
+-		}
+-		mutex_unlock(&conn->xmitmutex);
++		spin_lock_bh(&conn->session->lock);
++		len = sprintf(buf, "%s\n", conn->portal_address);
++		spin_unlock_bh(&conn->session->lock);
+ 		break;
+ 	default:
+ 		return iscsi_conn_get_param(cls_conn, param, buf);
+@@ -2069,6 +2117,29 @@
+ 	return len;
+ }
+ 
++static int
++iscsi_tcp_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++			 char *buf)
++{
++        struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++	int len;
++
++	switch (param) {
++	case ISCSI_HOST_PARAM_IPADDRESS:
++		spin_lock_bh(&session->lock);
++		if (!session->leadconn)
++			len = -ENODEV;
++		else
++			len = sprintf(buf, "%s\n",
++				     session->leadconn->local_address);
++		spin_unlock_bh(&session->lock);
++		break;
++	default:
++		return iscsi_host_get_param(shost, param, buf);
++	}
++	return len;
++}
++
+ static void
+ iscsi_conn_get_stats(struct iscsi_cls_conn *cls_conn, struct iscsi_stats *stats)
+ {
+@@ -2096,6 +2167,7 @@
+ static struct iscsi_cls_session *
+ iscsi_tcp_session_create(struct iscsi_transport *iscsit,
+ 			 struct scsi_transport_template *scsit,
++			 uint16_t cmds_max, uint16_t qdepth,
+ 			 uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+ 	struct iscsi_cls_session *cls_session;
+@@ -2103,7 +2175,7 @@
+ 	uint32_t hn;
+ 	int cmd_i;
+ 
+-	cls_session = iscsi_session_setup(iscsit, scsit,
++	cls_session = iscsi_session_setup(iscsit, scsit, cmds_max, qdepth,
+ 					 sizeof(struct iscsi_tcp_cmd_task),
+ 					 sizeof(struct iscsi_tcp_mgmt_task),
+ 					 initial_cmdsn, &hn);
+@@ -2142,17 +2214,24 @@
+ 	iscsi_session_teardown(cls_session);
+ }
+ 
++static int iscsi_tcp_slave_configure(struct scsi_device *sdev)
++{
++	blk_queue_dma_alignment(sdev->request_queue, 0);
++	return 0;
++}
++
+ static struct scsi_host_template iscsi_sht = {
+ 	.name			= "iSCSI Initiator over TCP/IP",
+ 	.queuecommand           = iscsi_queuecommand,
+ 	.change_queue_depth	= iscsi_change_queue_depth,
+-	.can_queue		= ISCSI_XMIT_CMDS_MAX - 1,
++	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
+ 	.sg_tablesize		= ISCSI_SG_TABLESIZE,
+ 	.max_sectors		= 0xFFFF,
+ 	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,
+ 	.eh_abort_handler       = iscsi_eh_abort,
+ 	.eh_host_reset_handler	= iscsi_eh_host_reset,
+ 	.use_clustering         = DISABLE_CLUSTERING,
++	.slave_configure        = iscsi_tcp_slave_configure,
+ 	.proc_name		= "iscsi_tcp",
+ 	.this_id		= -1,
+ };
+@@ -2179,8 +2258,12 @@
+ 				  ISCSI_EXP_STATSN |
+ 				  ISCSI_PERSISTENT_PORT |
+ 				  ISCSI_PERSISTENT_ADDRESS |
+-				  ISCSI_TARGET_NAME |
+-				  ISCSI_TPGT,
++				  ISCSI_TARGET_NAME | ISCSI_TPGT |
++				  ISCSI_USERNAME | ISCSI_PASSWORD |
++				  ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN,
++	.host_param_mask	= ISCSI_HOST_HWADDRESS | ISCSI_HOST_IPADDRESS |
++				  ISCSI_HOST_INITIATOR_NAME |
++				  ISCSI_HOST_NETDEV_NAME,
+ 	.host_template		= &iscsi_sht,
+ 	.conndata_size		= sizeof(struct iscsi_conn),
+ 	.max_conn		= 1,
+@@ -2197,6 +2280,9 @@
+ 	.get_session_param	= iscsi_session_get_param,
+ 	.start_conn		= iscsi_conn_start,
+ 	.stop_conn		= iscsi_tcp_conn_stop,
++	/* iscsi host params */
++	.get_host_param		= iscsi_tcp_host_get_param,
++	.set_host_param		= iscsi_host_set_param,
+ 	/* IO */
+ 	.send_pdu		= iscsi_conn_send_pdu,
+ 	.get_stats		= iscsi_conn_get_stats,
+diff -Nurb linux-2.6.22-570/drivers/scsi/iscsi_tcp.h linux-2.6.22-try2/drivers/scsi/iscsi_tcp.h
+--- linux-2.6.22-570/drivers/scsi/iscsi_tcp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/iscsi_tcp.h	2007-12-19 15:29:23.000000000 -0500
+@@ -29,11 +29,12 @@
+ #define IN_PROGRESS_HEADER_GATHER	0x1
+ #define IN_PROGRESS_DATA_RECV		0x2
+ #define IN_PROGRESS_DDIGEST_RECV	0x3
++#define IN_PROGRESS_PAD_RECV		0x4
+ 
+ /* xmit state machine */
+ #define XMSTATE_IDLE			0x0
+-#define XMSTATE_R_HDR			0x1
+-#define XMSTATE_W_HDR			0x2
++#define XMSTATE_CMD_HDR_INIT		0x1
++#define XMSTATE_CMD_HDR_XMIT		0x2
+ #define XMSTATE_IMM_HDR			0x4
+ #define XMSTATE_IMM_DATA		0x8
+ #define XMSTATE_UNS_INIT		0x10
+@@ -44,6 +45,8 @@
+ #define XMSTATE_W_PAD			0x200
+ #define XMSTATE_W_RESEND_PAD		0x400
+ #define XMSTATE_W_RESEND_DATA_DIGEST	0x800
++#define XMSTATE_IMM_HDR_INIT		0x1000
++#define XMSTATE_SOL_HDR_INIT		0x2000
+ 
+ #define ISCSI_PAD_LEN			4
+ #define ISCSI_SG_TABLESIZE		SG_ALL
+@@ -152,7 +155,7 @@
+ 	struct scatterlist	*sg;			/* per-cmd SG list  */
+ 	struct scatterlist	*bad_sg;		/* assert statement */
+ 	int			sg_count;		/* SG's to process  */
+-	uint32_t		exp_r2tsn;
++	uint32_t		exp_datasn;		/* expected target's R2TSN/DataSN */
+ 	int			data_offset;
+ 	struct iscsi_r2t_info	*r2t;			/* in progress R2T    */
+ 	struct iscsi_queue	r2tpool;
+diff -Nurb linux-2.6.22-570/drivers/scsi/jazz_esp.c linux-2.6.22-try2/drivers/scsi/jazz_esp.c
+--- linux-2.6.22-570/drivers/scsi/jazz_esp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/jazz_esp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,6 +1,6 @@
+ /* jazz_esp.c: ESP front-end for MIPS JAZZ systems.
+  *
+- * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende)
++ * Copyright (C) 2007 Thomas Bogendörfer (tsbogend@alpha.frankende)
+  */
+ 
+ #include <linux/kernel.h>
+@@ -143,7 +143,7 @@
+ 		goto fail;
+ 
+ 	host->max_id = 8;
+-	esp = host_to_esp(host);
++	esp = shost_priv(host);
+ 
+ 	esp->host = host;
+ 	esp->dev = dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/libiscsi.c linux-2.6.22-try2/drivers/scsi/libiscsi.c
+--- linux-2.6.22-570/drivers/scsi/libiscsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/libiscsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -22,7 +22,6 @@
+  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+  */
+ #include <linux/types.h>
+-#include <linux/mutex.h>
+ #include <linux/kfifo.h>
+ #include <linux/delay.h>
+ #include <asm/unaligned.h>
+@@ -46,27 +45,53 @@
+ }
+ EXPORT_SYMBOL_GPL(class_to_transport_session);
+ 
+-#define INVALID_SN_DELTA	0xffff
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++#define SNA32_CHECK 2147483648UL
+ 
+-int
+-iscsi_check_assign_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
++static int iscsi_sna_lt(u32 n1, u32 n2)
++{
++	return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++			    (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
++static int iscsi_sna_lte(u32 n1, u32 n2)
++{
++	return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) ||
++			    (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
++}
++
++void
++iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+ {
+ 	uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
+ 	uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
+ 
+-	if (max_cmdsn < exp_cmdsn -1 &&
+-	    max_cmdsn > exp_cmdsn - INVALID_SN_DELTA)
+-		return ISCSI_ERR_MAX_CMDSN;
+-	if (max_cmdsn > session->max_cmdsn ||
+-	    max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA)
+-		session->max_cmdsn = max_cmdsn;
+-	if (exp_cmdsn > session->exp_cmdsn ||
+-	    exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA)
++	/*
++	 * standard specifies this check for when to update expected and
++	 * max sequence numbers
++	 */
++	if (iscsi_sna_lt(max_cmdsn, exp_cmdsn - 1))
++		return;
++
++	if (exp_cmdsn != session->exp_cmdsn &&
++	    !iscsi_sna_lt(exp_cmdsn, session->exp_cmdsn))
+ 		session->exp_cmdsn = exp_cmdsn;
+ 
+-	return 0;
++	if (max_cmdsn != session->max_cmdsn &&
++	    !iscsi_sna_lt(max_cmdsn, session->max_cmdsn)) {
++		session->max_cmdsn = max_cmdsn;
++		/*
++		 * if the window closed with IO queued, then kick the
++		 * xmit thread
++		 */
++		if (!list_empty(&session->leadconn->xmitqueue) ||
++		    __kfifo_len(session->leadconn->mgmtqueue))
++			scsi_queue_work(session->host,
++					&session->leadconn->xmitwork);
++	}
+ }
+-EXPORT_SYMBOL_GPL(iscsi_check_assign_cmdsn);
++EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
+ 
+ void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *ctask,
+ 				   struct iscsi_data *hdr)
+@@ -115,14 +140,17 @@
+         hdr->flags = ISCSI_ATTR_SIMPLE;
+         int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
+         hdr->itt = build_itt(ctask->itt, conn->id, session->age);
+-        hdr->data_length = cpu_to_be32(sc->request_bufflen);
++        hdr->data_length = cpu_to_be32(scsi_bufflen(sc));
+         hdr->cmdsn = cpu_to_be32(session->cmdsn);
+         session->cmdsn++;
+         hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
+         memcpy(hdr->cdb, sc->cmnd, sc->cmd_len);
+-        memset(&hdr->cdb[sc->cmd_len], 0, MAX_COMMAND_SIZE - sc->cmd_len);
++	if (sc->cmd_len < MAX_COMMAND_SIZE)
++		memset(&hdr->cdb[sc->cmd_len], 0,
++			MAX_COMMAND_SIZE - sc->cmd_len);
+ 
+ 	ctask->data_count = 0;
++	ctask->imm_count = 0;
+ 	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+ 		hdr->flags |= ISCSI_FLAG_CMD_WRITE;
+ 		/*
+@@ -139,25 +167,24 @@
+ 		 *
+ 		 *      pad_count       bytes to be sent as zero-padding
+ 		 */
+-		ctask->imm_count = 0;
+ 		ctask->unsol_count = 0;
+ 		ctask->unsol_offset = 0;
+ 		ctask->unsol_datasn = 0;
+ 
+ 		if (session->imm_data_en) {
+-			if (ctask->total_length >= session->first_burst)
++			if (scsi_bufflen(sc) >= session->first_burst)
+ 				ctask->imm_count = min(session->first_burst,
+ 							conn->max_xmit_dlength);
+ 			else
+-				ctask->imm_count = min(ctask->total_length,
++				ctask->imm_count = min(scsi_bufflen(sc),
+ 							conn->max_xmit_dlength);
+ 			hton24(ctask->hdr->dlength, ctask->imm_count);
+ 		} else
+ 			zero_data(ctask->hdr->dlength);
+ 
+ 		if (!session->initial_r2t_en) {
+-			ctask->unsol_count = min(session->first_burst,
+-				ctask->total_length) - ctask->imm_count;
++			ctask->unsol_count = min((session->first_burst),
++				(scsi_bufflen(sc))) - ctask->imm_count;
+ 			ctask->unsol_offset = ctask->imm_count;
+ 		}
+ 
+@@ -165,7 +192,6 @@
+ 			/* No unsolicit Data-Out's */
+ 			ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ 	} else {
+-		ctask->datasn = 0;
+ 		hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+ 		zero_data(hdr->dlength);
+ 
+@@ -174,8 +200,13 @@
+ 	}
+ 
+ 	conn->scsicmd_pdus_cnt++;
++
++        debug_scsi("iscsi prep [%s cid %d sc %p cdb 0x%x itt 0x%x len %d "
++		"cmdsn %d win %d]\n",
++                sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
++		conn->id, sc, sc->cmnd[0], ctask->itt, scsi_bufflen(sc),
++                session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+ }
+-EXPORT_SYMBOL_GPL(iscsi_prep_scsi_cmd_pdu);
+ 
+ /**
+  * iscsi_complete_command - return command back to scsi-ml
+@@ -204,26 +235,12 @@
+ 	atomic_inc(&ctask->refcount);
+ }
+ 
+-static void iscsi_get_ctask(struct iscsi_cmd_task *ctask)
+-{
+-	spin_lock_bh(&ctask->conn->session->lock);
+-	__iscsi_get_ctask(ctask);
+-	spin_unlock_bh(&ctask->conn->session->lock);
+-}
+-
+ static void __iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+ {
+ 	if (atomic_dec_and_test(&ctask->refcount))
+ 		iscsi_complete_command(ctask);
+ }
+ 
+-static void iscsi_put_ctask(struct iscsi_cmd_task *ctask)
+-{
+-	spin_lock_bh(&ctask->conn->session->lock);
+-	__iscsi_put_ctask(ctask);
+-	spin_unlock_bh(&ctask->conn->session->lock);
+-}
+-
+ /**
+  * iscsi_cmd_rsp - SCSI Command Response processing
+  * @conn: iscsi connection
+@@ -235,21 +252,15 @@
+  * iscsi_cmd_rsp sets up the scsi_cmnd fields based on the PDU and
+  * then completes the command and task.
+  **/
+-static int iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
++static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ 			      struct iscsi_cmd_task *ctask, char *data,
+ 			      int datalen)
+ {
+-	int rc;
+ 	struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+ 	struct iscsi_session *session = conn->session;
+ 	struct scsi_cmnd *sc = ctask->sc;
+ 
+-	rc = iscsi_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+-	if (rc) {
+-		sc->result = DID_ERROR << 16;
+-		goto out;
+-	}
+-
++	iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
+ 	conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+ 
+ 	sc->result = (DID_OK << 16) | rhdr->cmd_status;
+@@ -286,14 +297,14 @@
+ 	if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+ 		int res_count = be32_to_cpu(rhdr->residual_count);
+ 
+-		if (res_count > 0 && res_count <= sc->request_bufflen)
+-			sc->resid = res_count;
++		if (res_count > 0 && res_count <= scsi_bufflen(sc))
++			scsi_set_resid(sc, res_count);
+ 		else
+ 			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+ 	} else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW)
+ 		sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+ 	else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW)
+-		sc->resid = be32_to_cpu(rhdr->residual_count);
++		scsi_set_resid(sc, be32_to_cpu(rhdr->residual_count));
+ 
+ out:
+ 	debug_scsi("done [sc %lx res %d itt 0x%x]\n",
+@@ -301,7 +312,6 @@
+ 	conn->scsirsp_pdus_cnt++;
+ 
+ 	__iscsi_put_ctask(ctask);
+-	return rc;
+ }
+ 
+ static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
+@@ -381,7 +391,7 @@
+ 		switch(opcode) {
+ 		case ISCSI_OP_SCSI_CMD_RSP:
+ 			BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+-			rc = iscsi_scsi_cmd_rsp(conn, hdr, ctask, data,
++			iscsi_scsi_cmd_rsp(conn, hdr, ctask, data,
+ 						datalen);
+ 			break;
+ 		case ISCSI_OP_SCSI_DATA_IN:
+@@ -405,11 +415,7 @@
+ 		debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n",
+ 			   opcode, conn->id, mtask->itt, datalen);
+ 
+-		rc = iscsi_check_assign_cmdsn(session,
+-					      (struct iscsi_nopin*)hdr);
+-		if (rc)
+-			goto done;
+-
++		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
+ 		switch(opcode) {
+ 		case ISCSI_OP_LOGOUT_RSP:
+ 			if (datalen) {
+@@ -458,10 +464,7 @@
+ 			break;
+ 		}
+ 	} else if (itt == ~0U) {
+-		rc = iscsi_check_assign_cmdsn(session,
+-					     (struct iscsi_nopin*)hdr);
+-		if (rc)
+-			goto done;
++		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
+ 
+ 		switch(opcode) {
+ 		case ISCSI_OP_NOOP_IN:
+@@ -491,7 +494,6 @@
+ 	} else
+ 		rc = ISCSI_ERR_BAD_ITT;
+ 
+-done:
+ 	return rc;
+ }
+ EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
+@@ -578,17 +580,47 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_failure);
+ 
++static void iscsi_prep_mtask(struct iscsi_conn *conn,
++			     struct iscsi_mgmt_task *mtask)
++{
++	struct iscsi_session *session = conn->session;
++	struct iscsi_hdr *hdr = mtask->hdr;
++	struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
++
++	if (hdr->opcode != (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) &&
++	    hdr->opcode != (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
++		nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
++	/*
++	 * pre-format CmdSN for outgoing PDU.
++	 */
++	nop->cmdsn = cpu_to_be32(session->cmdsn);
++	if (hdr->itt != RESERVED_ITT) {
++		hdr->itt = build_itt(mtask->itt, conn->id, session->age);
++		if (conn->c_stage == ISCSI_CONN_STARTED &&
++		    !(hdr->opcode & ISCSI_OP_IMMEDIATE))
++			session->cmdsn++;
++	}
++
++	if (session->tt->init_mgmt_task)
++		session->tt->init_mgmt_task(conn, mtask);
++
++	debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
++		   hdr->opcode, hdr->itt, mtask->data_count);
++}
++
+ static int iscsi_xmit_mtask(struct iscsi_conn *conn)
+ {
+ 	struct iscsi_hdr *hdr = conn->mtask->hdr;
+ 	int rc, was_logout = 0;
+ 
++	spin_unlock_bh(&conn->session->lock);
+ 	if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT) {
+ 		conn->session->state = ISCSI_STATE_IN_RECOVERY;
+ 		iscsi_block_session(session_to_cls(conn->session));
+ 		was_logout = 1;
+ 	}
+ 	rc = conn->session->tt->xmit_mgmt_task(conn, conn->mtask);
++	spin_lock_bh(&conn->session->lock);
+ 	if (rc)
+ 		return rc;
+ 
+@@ -602,6 +634,45 @@
+ 	return 0;
+ }
+ 
++static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
++{
++	struct iscsi_session *session = conn->session;
++
++	/*
++	 * Check for iSCSI window and take care of CmdSN wrap-around
++	 */
++	if (!iscsi_sna_lte(session->cmdsn, session->max_cmdsn)) {
++		debug_scsi("iSCSI CmdSN closed. MaxCmdSN %u CmdSN %u\n",
++			   session->max_cmdsn, session->cmdsn);
++		return -ENOSPC;
++	}
++	return 0;
++}
++
++static int iscsi_xmit_ctask(struct iscsi_conn *conn)
++{
++	struct iscsi_cmd_task *ctask = conn->ctask;
++	int rc = 0;
++
++	/*
++	 * serialize with TMF AbortTask
++	 */
++	if (ctask->state == ISCSI_TASK_ABORTING)
++		goto done;
++
++	__iscsi_get_ctask(ctask);
++	spin_unlock_bh(&conn->session->lock);
++	rc = conn->session->tt->xmit_cmd_task(conn, ctask);
++	spin_lock_bh(&conn->session->lock);
++	__iscsi_put_ctask(ctask);
++
++done:
++	if (!rc)
++		/* done with this ctask */
++		conn->ctask = NULL;
++	return rc;
++}
++
+ /**
+  * iscsi_data_xmit - xmit any command into the scheduled connection
+  * @conn: iscsi connection
+@@ -613,106 +684,79 @@
+  **/
+ static int iscsi_data_xmit(struct iscsi_conn *conn)
+ {
+-	struct iscsi_transport *tt;
+ 	int rc = 0;
+ 
++	spin_lock_bh(&conn->session->lock);
+ 	if (unlikely(conn->suspend_tx)) {
+ 		debug_scsi("conn %d Tx suspended!\n", conn->id);
++		spin_unlock_bh(&conn->session->lock);
+ 		return -ENODATA;
+ 	}
+-	tt = conn->session->tt;
+-
+-	/*
+-	 * Transmit in the following order:
+-	 *
+-	 * 1) un-finished xmit (ctask or mtask)
+-	 * 2) immediate control PDUs
+-	 * 3) write data
+-	 * 4) SCSI commands
+-	 * 5) non-immediate control PDUs
+-	 *
+-	 * No need to lock around __kfifo_get as long as
+-	 * there's one producer and one consumer.
+-	 */
+-
+-	BUG_ON(conn->ctask && conn->mtask);
+ 
+ 	if (conn->ctask) {
+-		iscsi_get_ctask(conn->ctask);
+-		rc = tt->xmit_cmd_task(conn, conn->ctask);
+-		iscsi_put_ctask(conn->ctask);
++		rc = iscsi_xmit_ctask(conn);
+ 		if (rc)
+ 			goto again;
+-		/* done with this in-progress ctask */
+-		conn->ctask = NULL;
+ 	}
++
+ 	if (conn->mtask) {
+ 		rc = iscsi_xmit_mtask(conn);
+ 	        if (rc)
+ 		        goto again;
+ 	}
+ 
+-	/* process immediate first */
+-        if (unlikely(__kfifo_len(conn->immqueue))) {
+-	        while (__kfifo_get(conn->immqueue, (void*)&conn->mtask,
++	/*
++	 * process mgmt pdus like nops before commands since we should
++	 * only have one nop-out as a ping from us and targets should not
++	 * overflow us with nop-ins
++	 */
++check_mgmt:
++	while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+ 			           sizeof(void*))) {
+-			spin_lock_bh(&conn->session->lock);
+-			list_add_tail(&conn->mtask->running,
+-				      &conn->mgmt_run_list);
+-			spin_unlock_bh(&conn->session->lock);
++		iscsi_prep_mtask(conn, conn->mtask);
++		list_add_tail(&conn->mtask->running, &conn->mgmt_run_list);
+ 			rc = iscsi_xmit_mtask(conn);
+ 		        if (rc)
+ 			        goto again;
+ 	        }
+-	}
+ 
+ 	/* process command queue */
+-	spin_lock_bh(&conn->session->lock);
+ 	while (!list_empty(&conn->xmitqueue)) {
++		rc = iscsi_check_cmdsn_window_closed(conn);
++		if (rc) {
++			spin_unlock_bh(&conn->session->lock);
++			return rc;
++		}
+ 		/*
+ 		 * iscsi tcp may readd the task to the xmitqueue to send
+ 		 * write data
+ 		 */
+ 		conn->ctask = list_entry(conn->xmitqueue.next,
+ 					 struct iscsi_cmd_task, running);
++		if (conn->ctask->state == ISCSI_TASK_PENDING) {
++			iscsi_prep_scsi_cmd_pdu(conn->ctask);
++			conn->session->tt->init_cmd_task(conn->ctask);
++		}
+ 		conn->ctask->state = ISCSI_TASK_RUNNING;
+ 		list_move_tail(conn->xmitqueue.next, &conn->run_list);
+-		__iscsi_get_ctask(conn->ctask);
+-		spin_unlock_bh(&conn->session->lock);
+-
+-		rc = tt->xmit_cmd_task(conn, conn->ctask);
+-
+-		spin_lock_bh(&conn->session->lock);
+-		__iscsi_put_ctask(conn->ctask);
+-		if (rc) {
+-			spin_unlock_bh(&conn->session->lock);
+-			goto again;
+-		}
+-	}
+-	spin_unlock_bh(&conn->session->lock);
+-	/* done with this ctask */
+-	conn->ctask = NULL;
+-
+-	/* process the rest control plane PDUs, if any */
+-        if (unlikely(__kfifo_len(conn->mgmtqueue))) {
+-	        while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+-			           sizeof(void*))) {
+-			spin_lock_bh(&conn->session->lock);
+-			list_add_tail(&conn->mtask->running,
+-				      &conn->mgmt_run_list);
+-			spin_unlock_bh(&conn->session->lock);
+-			rc = iscsi_xmit_mtask(conn);
++		rc = iscsi_xmit_ctask(conn);
+ 		        if (rc)
+ 			        goto again;
++		/*
++		 * we could continuously get new ctask requests so
++		 * we need to check the mgmt queue for nops that need to
++		 * be sent to aviod starvation
++		 */
++		if (__kfifo_len(conn->mgmtqueue))
++			goto check_mgmt;
+ 	        }
+-	}
+-
++	spin_unlock_bh(&conn->session->lock);
+ 	return -ENODATA;
+ 
+ again:
+ 	if (unlikely(conn->suspend_tx))
+-		return -ENODATA;
+-
++		rc = -ENODATA;
++	spin_unlock_bh(&conn->session->lock);
+ 	return rc;
+ }
+ 
+@@ -724,11 +768,9 @@
+ 	/*
+ 	 * serialize Xmit worker on a per-connection basis.
+ 	 */
+-	mutex_lock(&conn->xmitmutex);
+ 	do {
+ 		rc = iscsi_data_xmit(conn);
+ 	} while (rc >= 0 || rc == -EAGAIN);
+-	mutex_unlock(&conn->xmitmutex);
+ }
+ 
+ enum {
+@@ -786,20 +828,23 @@
+ 		goto fault;
+ 	}
+ 
+-	/*
+-	 * Check for iSCSI window and take care of CmdSN wrap-around
+-	 */
+-	if ((int)(session->max_cmdsn - session->cmdsn) < 0) {
+-		reason = FAILURE_WINDOW_CLOSED;
+-		goto reject;
+-	}
+-
+ 	conn = session->leadconn;
+ 	if (!conn) {
+ 		reason = FAILURE_SESSION_FREED;
+ 		goto fault;
+ 	}
+ 
++	/*
++	 * We check this here and in data xmit, because if we get to the point
++	 * that this check is hitting the window then we have enough IO in
++	 * flight and enough IO waiting to be transmitted it is better
++	 * to let the scsi/block layer queue up.
++	 */
++	if (iscsi_check_cmdsn_window_closed(conn)) {
++		reason = FAILURE_WINDOW_CLOSED;
++		goto reject;
++	}
++
+ 	if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask,
+ 			 sizeof(void*))) {
+ 		reason = FAILURE_OOM;
+@@ -814,18 +859,8 @@
+ 	ctask->conn = conn;
+ 	ctask->sc = sc;
+ 	INIT_LIST_HEAD(&ctask->running);
+-	ctask->total_length = sc->request_bufflen;
+-	iscsi_prep_scsi_cmd_pdu(ctask);
+-
+-	session->tt->init_cmd_task(ctask);
+ 
+ 	list_add_tail(&ctask->running, &conn->xmitqueue);
+-	debug_scsi(
+-	       "ctask enq [%s cid %d sc %p cdb 0x%x itt 0x%x len %d cmdsn %d "
+-		"win %d]\n",
+-		sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
+-		conn->id, sc, sc->cmnd[0], ctask->itt, sc->request_bufflen,
+-		session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+ 	spin_unlock(&session->lock);
+ 
+ 	scsi_queue_work(host, &conn->xmitwork);
+@@ -841,7 +876,7 @@
+ 	printk(KERN_ERR "iscsi: cmd 0x%x is not queued (%d)\n",
+ 	       sc->cmnd[0], reason);
+ 	sc->result = (DID_NO_CONNECT << 16);
+-	sc->resid = sc->request_bufflen;
++	scsi_set_resid(sc, scsi_bufflen(sc));
+ 	sc->scsi_done(sc);
+ 	return 0;
+ }
+@@ -856,19 +891,16 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_change_queue_depth);
+ 
+-static int
+-iscsi_conn_send_generic(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
++static struct iscsi_mgmt_task *
++__iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+ 			char *data, uint32_t data_size)
+ {
+ 	struct iscsi_session *session = conn->session;
+-	struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
+ 	struct iscsi_mgmt_task *mtask;
+ 
+-	spin_lock_bh(&session->lock);
+-	if (session->state == ISCSI_STATE_TERMINATE) {
+-		spin_unlock_bh(&session->lock);
+-		return -EPERM;
+-	}
++	if (session->state == ISCSI_STATE_TERMINATE)
++		return NULL;
++
+ 	if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
+ 	    hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
+ 		/*
+@@ -882,27 +914,11 @@
+ 		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+ 		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+ 
+-		nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
+ 		if (!__kfifo_get(session->mgmtpool.queue,
+-				 (void*)&mtask, sizeof(void*))) {
+-			spin_unlock_bh(&session->lock);
+-			return -ENOSPC;
+-		}
++				 (void*)&mtask, sizeof(void*)))
++			return NULL;
+ 	}
+ 
+-	/*
+-	 * pre-format CmdSN for outgoing PDU.
+-	 */
+-	if (hdr->itt != RESERVED_ITT) {
+-		hdr->itt = build_itt(mtask->itt, conn->id, session->age);
+-		nop->cmdsn = cpu_to_be32(session->cmdsn);
+-		if (conn->c_stage == ISCSI_CONN_STARTED &&
+-		    !(hdr->opcode & ISCSI_OP_IMMEDIATE))
+-			session->cmdsn++;
+-	} else
+-		/* do not advance CmdSN */
+-		nop->cmdsn = cpu_to_be32(session->cmdsn);
+-
+ 	if (data_size) {
+ 		memcpy(mtask->data, data, data_size);
+ 		mtask->data_count = data_size;
+@@ -911,38 +927,23 @@
+ 
+ 	INIT_LIST_HEAD(&mtask->running);
+ 	memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr));
+-	if (session->tt->init_mgmt_task)
+-		session->tt->init_mgmt_task(conn, mtask, data, data_size);
+-	spin_unlock_bh(&session->lock);
+-
+-	debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
+-		   hdr->opcode, hdr->itt, data_size);
+-
+-	/*
+-	 * since send_pdu() could be called at least from two contexts,
+-	 * we need to serialize __kfifo_put, so we don't have to take
+-	 * additional lock on fast data-path
+-	 */
+-        if (hdr->opcode & ISCSI_OP_IMMEDIATE)
+-	        __kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*));
+-	else
+ 	        __kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*));
+-
+-	scsi_queue_work(session->host, &conn->xmitwork);
+-	return 0;
++	return mtask;
+ }
+ 
+ int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
+ 			char *data, uint32_t data_size)
+ {
+ 	struct iscsi_conn *conn = cls_conn->dd_data;
+-	int rc;
+-
+-	mutex_lock(&conn->xmitmutex);
+-	rc = iscsi_conn_send_generic(conn, hdr, data, data_size);
+-	mutex_unlock(&conn->xmitmutex);
++	struct iscsi_session *session = conn->session;
++	int err = 0;
+ 
+-	return rc;
++	spin_lock_bh(&session->lock);
++	if (!__iscsi_conn_send_pdu(conn, hdr, data, data_size))
++		err = -EPERM;
++	spin_unlock_bh(&session->lock);
++	scsi_queue_work(session->host, &conn->xmitwork);
++	return err;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_send_pdu);
+ 
+@@ -1027,14 +1028,12 @@
+ 	spin_unlock(&session->lock);
+ }
+ 
+-/* must be called with the mutex lock */
+ static int iscsi_exec_abort_task(struct scsi_cmnd *sc,
+ 				 struct iscsi_cmd_task *ctask)
+ {
+ 	struct iscsi_conn *conn = ctask->conn;
+ 	struct iscsi_session *session = conn->session;
+ 	struct iscsi_tm *hdr = &conn->tmhdr;
+-	int rc;
+ 
+ 	/*
+ 	 * ctask timed out but session is OK requests must be serialized.
+@@ -1047,32 +1046,27 @@
+ 	hdr->rtt = ctask->hdr->itt;
+ 	hdr->refcmdsn = ctask->hdr->cmdsn;
+ 
+-	rc = iscsi_conn_send_generic(conn, (struct iscsi_hdr *)hdr,
++	ctask->mtask = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)hdr,
+ 				     NULL, 0);
+-	if (rc) {
++	if (!ctask->mtask) {
+ 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+-		debug_scsi("abort sent failure [itt 0x%x] %d\n", ctask->itt,
+-		           rc);
+-		return rc;
++		debug_scsi("abort sent failure [itt 0x%x]\n", ctask->itt);
++		return -EPERM;
+ 	}
++	ctask->state = ISCSI_TASK_ABORTING;
+ 
+ 	debug_scsi("abort sent [itt 0x%x]\n", ctask->itt);
+ 
+-	spin_lock_bh(&session->lock);
+-	ctask->mtask = (struct iscsi_mgmt_task *)
+-			session->mgmt_cmds[get_itt(hdr->itt) -
+-					ISCSI_MGMT_ITT_OFFSET];
+-
+ 	if (conn->tmabort_state == TMABORT_INITIAL) {
+ 		conn->tmfcmd_pdus_cnt++;
+-		conn->tmabort_timer.expires = 10*HZ + jiffies;
++		conn->tmabort_timer.expires = 20*HZ + jiffies;
+ 		conn->tmabort_timer.function = iscsi_tmabort_timedout;
+ 		conn->tmabort_timer.data = (unsigned long)ctask;
+ 		add_timer(&conn->tmabort_timer);
+ 		debug_scsi("abort set timeout [itt 0x%x]\n", ctask->itt);
+ 	}
+ 	spin_unlock_bh(&session->lock);
+-	mutex_unlock(&conn->xmitmutex);
++	scsi_queue_work(session->host, &conn->xmitwork);
+ 
+ 	/*
+ 	 * block eh thread until:
+@@ -1089,13 +1083,12 @@
+ 	if (signal_pending(current))
+ 		flush_signals(current);
+ 	del_timer_sync(&conn->tmabort_timer);
+-
+-	mutex_lock(&conn->xmitmutex);
++	spin_lock_bh(&session->lock);
+ 	return 0;
+ }
+ 
+ /*
+- * xmit mutex and session lock must be held
++ * session lock must be held
+  */
+ static struct iscsi_mgmt_task *
+ iscsi_remove_mgmt_task(struct kfifo *fifo, uint32_t itt)
+@@ -1127,7 +1120,7 @@
+ 	if (!ctask->mtask)
+ 		return -EINVAL;
+ 
+-	if (!iscsi_remove_mgmt_task(conn->immqueue, ctask->mtask->itt))
++	if (!iscsi_remove_mgmt_task(conn->mgmtqueue, ctask->mtask->itt))
+ 		list_del(&ctask->mtask->running);
+ 	__kfifo_put(session->mgmtpool.queue, (void*)&ctask->mtask,
+ 		    sizeof(void*));
+@@ -1136,7 +1129,7 @@
+ }
+ 
+ /*
+- * session lock and xmitmutex must be held
++ * session lock must be held
+  */
+ static void fail_command(struct iscsi_conn *conn, struct iscsi_cmd_task *ctask,
+ 			 int err)
+@@ -1147,11 +1140,14 @@
+ 	if (!sc)
+ 		return;
+ 
++	if (ctask->state != ISCSI_TASK_PENDING)
+ 	conn->session->tt->cleanup_cmd_task(conn, ctask);
+ 	iscsi_ctask_mtask_cleanup(ctask);
+ 
+ 	sc->result = err;
+-	sc->resid = sc->request_bufflen;
++	scsi_set_resid(sc, scsi_bufflen(sc));
++	if (conn->ctask == ctask)
++		conn->ctask = NULL;
+ 	/* release ref from queuecommand */
+ 	__iscsi_put_ctask(ctask);
+ }
+@@ -1179,7 +1175,6 @@
+ 	conn->eh_abort_cnt++;
+ 	debug_scsi("aborting [sc %p itt 0x%x]\n", sc, ctask->itt);
+ 
+-	mutex_lock(&conn->xmitmutex);
+ 	spin_lock_bh(&session->lock);
+ 
+ 	/*
+@@ -1192,9 +1187,8 @@
+ 
+ 	/* ctask completed before time out */
+ 	if (!ctask->sc) {
+-		spin_unlock_bh(&session->lock);
+ 		debug_scsi("sc completed while abort in progress\n");
+-		goto success_rel_mutex;
++		goto success;
+ 	}
+ 
+ 	/* what should we do here ? */
+@@ -1204,15 +1198,13 @@
+ 		goto failed;
+ 	}
+ 
+-	if (ctask->state == ISCSI_TASK_PENDING)
+-		goto success_cleanup;
++	if (ctask->state == ISCSI_TASK_PENDING) {
++		fail_command(conn, ctask, DID_ABORT << 16);
++		goto success;
++	}
+ 
+ 	conn->tmabort_state = TMABORT_INITIAL;
+-
+-	spin_unlock_bh(&session->lock);
+ 	rc = iscsi_exec_abort_task(sc, ctask);
+-	spin_lock_bh(&session->lock);
+-
+ 	if (rc || sc->SCp.phase != session->age ||
+ 	    session->state != ISCSI_STATE_LOGGED_IN)
+ 		goto failed;
+@@ -1220,45 +1212,44 @@
+ 
+ 	switch (conn->tmabort_state) {
+ 	case TMABORT_SUCCESS:
+-		goto success_cleanup;
++		spin_unlock_bh(&session->lock);
++		/*
++		 * clean up task if aborted. grab the recv lock as a writer
++		 */
++		write_lock_bh(conn->recv_lock);
++		spin_lock(&session->lock);
++		fail_command(conn, ctask, DID_ABORT << 16);
++		spin_unlock(&session->lock);
++		write_unlock_bh(conn->recv_lock);
++		/*
++		 * make sure xmit thread is not still touching the
++		 * ctask/scsi_cmnd
++		 */
++		scsi_flush_work(session->host);
++		goto success_unlocked;
+ 	case TMABORT_NOT_FOUND:
+ 		if (!ctask->sc) {
+ 			/* ctask completed before tmf abort response */
+-			spin_unlock_bh(&session->lock);
+ 			debug_scsi("sc completed while abort in progress\n");
+-			goto success_rel_mutex;
++			goto success;
+ 		}
+ 		/* fall through */
+ 	default:
+ 		/* timedout or failed */
+ 		spin_unlock_bh(&session->lock);
+ 		iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+-		spin_lock_bh(&session->lock);
+-		goto failed;
++		goto failed_unlocked;
+ 	}
+ 
+-success_cleanup:
+-	debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++success:
+ 	spin_unlock_bh(&session->lock);
+-
+-	/*
+-	 * clean up task if aborted. we have the xmitmutex so grab
+-	 * the recv lock as a writer
+-	 */
+-	write_lock_bh(conn->recv_lock);
+-	spin_lock(&session->lock);
+-	fail_command(conn, ctask, DID_ABORT << 16);
+-	spin_unlock(&session->lock);
+-	write_unlock_bh(conn->recv_lock);
+-
+-success_rel_mutex:
+-	mutex_unlock(&conn->xmitmutex);
++success_unlocked:
++	debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+ 	return SUCCESS;
+ 
+ failed:
+ 	spin_unlock_bh(&session->lock);
+-	mutex_unlock(&conn->xmitmutex);
+-
++failed_unlocked:
+ 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+ 	return FAILED;
+ }
+@@ -1339,6 +1330,10 @@
+  * iscsi_session_setup - create iscsi cls session and host and session
+  * @scsit: scsi transport template
+  * @iscsit: iscsi transport template
++ * @cmds_max: scsi host can queue
++ * @qdepth: scsi host cmds per lun
++ * @cmd_task_size: LLD ctask private data size
++ * @mgmt_task_size: LLD mtask private data size
+  * @initial_cmdsn: initial CmdSN
+  * @hostno: host no allocated
+  *
+@@ -1348,6 +1343,7 @@
+ struct iscsi_cls_session *
+ iscsi_session_setup(struct iscsi_transport *iscsit,
+ 		    struct scsi_transport_template *scsit,
++		    uint16_t cmds_max, uint16_t qdepth,
+ 		    int cmd_task_size, int mgmt_task_size,
+ 		    uint32_t initial_cmdsn, uint32_t *hostno)
+ {
+@@ -1356,11 +1352,32 @@
+ 	struct iscsi_cls_session *cls_session;
+ 	int cmd_i;
+ 
++	if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) {
++		if (qdepth != 0)
++			printk(KERN_ERR "iscsi: invalid queue depth of %d. "
++			      "Queue depth must be between 1 and %d.\n",
++			      qdepth, ISCSI_MAX_CMD_PER_LUN);
++		qdepth = ISCSI_DEF_CMD_PER_LUN;
++	}
++
++	if (cmds_max < 2 || (cmds_max & (cmds_max - 1)) ||
++	    cmds_max >= ISCSI_MGMT_ITT_OFFSET) {
++		if (cmds_max != 0)
++			printk(KERN_ERR "iscsi: invalid can_queue of %d. "
++			       "can_queue must be a power of 2 and between "
++			       "2 and %d - setting to %d.\n", cmds_max,
++			       ISCSI_MGMT_ITT_OFFSET, ISCSI_DEF_XMIT_CMDS_MAX);
++		cmds_max = ISCSI_DEF_XMIT_CMDS_MAX;
++	}
++
+ 	shost = scsi_host_alloc(iscsit->host_template,
+ 				hostdata_privsize(sizeof(*session)));
+ 	if (!shost)
+ 		return NULL;
+ 
++	/* the iscsi layer takes one task for reserve */
++	shost->can_queue = cmds_max - 1;
++	shost->cmd_per_lun = qdepth;
+ 	shost->max_id = 1;
+ 	shost->max_channel = 0;
+ 	shost->max_lun = iscsit->max_lun;
+@@ -1374,7 +1391,7 @@
+ 	session->host = shost;
+ 	session->state = ISCSI_STATE_FREE;
+ 	session->mgmtpool_max = ISCSI_MGMT_CMDS_MAX;
+-	session->cmds_max = ISCSI_XMIT_CMDS_MAX;
++	session->cmds_max = cmds_max;
+ 	session->cmdsn = initial_cmdsn;
+ 	session->exp_cmdsn = initial_cmdsn + 1;
+ 	session->max_cmdsn = initial_cmdsn + 1;
+@@ -1461,7 +1478,14 @@
+ 	iscsi_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+ 	iscsi_pool_free(&session->cmdpool, (void**)session->cmds);
+ 
++	kfree(session->password);
++	kfree(session->password_in);
++	kfree(session->username);
++	kfree(session->username_in);
+ 	kfree(session->targetname);
++	kfree(session->netdev);
++	kfree(session->hwaddress);
++	kfree(session->initiatorname);
+ 
+ 	iscsi_destroy_session(cls_session);
+ 	scsi_host_put(shost);
+@@ -1499,11 +1523,6 @@
+ 	INIT_LIST_HEAD(&conn->xmitqueue);
+ 
+ 	/* initialize general immediate & non-immediate PDU commands queue */
+-	conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+-			                GFP_KERNEL, NULL);
+-	if (conn->immqueue == ERR_PTR(-ENOMEM))
+-		goto immqueue_alloc_fail;
+-
+ 	conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+ 			                GFP_KERNEL, NULL);
+ 	if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
+@@ -1527,7 +1546,6 @@
+ 	conn->login_mtask->data = conn->data = data;
+ 
+ 	init_timer(&conn->tmabort_timer);
+-	mutex_init(&conn->xmitmutex);
+ 	init_waitqueue_head(&conn->ehwait);
+ 
+ 	return cls_conn;
+@@ -1538,8 +1556,6 @@
+ login_mtask_alloc_fail:
+ 	kfifo_free(conn->mgmtqueue);
+ mgmtqueue_alloc_fail:
+-	kfifo_free(conn->immqueue);
+-immqueue_alloc_fail:
+ 	iscsi_destroy_conn(cls_conn);
+ 	return NULL;
+ }
+@@ -1558,10 +1574,8 @@
+ 	struct iscsi_session *session = conn->session;
+ 	unsigned long flags;
+ 
+-	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+-	mutex_lock(&conn->xmitmutex);
+-
+ 	spin_lock_bh(&session->lock);
++	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+ 	conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
+ 	if (session->leadconn == conn) {
+ 		/*
+@@ -1572,8 +1586,6 @@
+ 	}
+ 	spin_unlock_bh(&session->lock);
+ 
+-	mutex_unlock(&conn->xmitmutex);
+-
+ 	/*
+ 	 * Block until all in-progress commands for this connection
+ 	 * time out or fail.
+@@ -1610,7 +1622,6 @@
+ 	}
+ 	spin_unlock_bh(&session->lock);
+ 
+-	kfifo_free(conn->immqueue);
+ 	kfifo_free(conn->mgmtqueue);
+ 
+ 	iscsi_destroy_conn(cls_conn);
+@@ -1671,8 +1682,7 @@
+ 	struct iscsi_mgmt_task *mtask, *tmp;
+ 
+ 	/* handle pending */
+-	while (__kfifo_get(conn->immqueue, (void*)&mtask, sizeof(void*)) ||
+-	       __kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) {
++	while (__kfifo_get(conn->mgmtqueue, (void*)&mtask, sizeof(void*))) {
+ 		if (mtask == conn->login_mtask)
+ 			continue;
+ 		debug_scsi("flushing pending mgmt task itt 0x%x\n", mtask->itt);
+@@ -1742,12 +1752,12 @@
+ 	conn->c_stage = ISCSI_CONN_STOPPED;
+ 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+ 	spin_unlock_bh(&session->lock);
++	scsi_flush_work(session->host);
+ 
+ 	write_lock_bh(conn->recv_lock);
+ 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
+ 	write_unlock_bh(conn->recv_lock);
+ 
+-	mutex_lock(&conn->xmitmutex);
+ 	/*
+ 	 * for connection level recovery we should not calculate
+ 	 * header digest. conn->hdr_size used for optimization
+@@ -1771,8 +1781,6 @@
+ 	fail_all_commands(conn);
+ 	flush_control_queues(session, conn);
+ 	spin_unlock_bh(&session->lock);
+-
+-	mutex_unlock(&conn->xmitmutex);
+ }
+ 
+ void iscsi_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
+@@ -1867,6 +1875,30 @@
+ 	case ISCSI_PARAM_EXP_STATSN:
+ 		sscanf(buf, "%u", &conn->exp_statsn);
+ 		break;
++	case ISCSI_PARAM_USERNAME:
++		kfree(session->username);
++		session->username = kstrdup(buf, GFP_KERNEL);
++		if (!session->username)
++			return -ENOMEM;
++		break;
++	case ISCSI_PARAM_USERNAME_IN:
++		kfree(session->username_in);
++		session->username_in = kstrdup(buf, GFP_KERNEL);
++		if (!session->username_in)
++			return -ENOMEM;
++		break;
++	case ISCSI_PARAM_PASSWORD:
++		kfree(session->password);
++		session->password = kstrdup(buf, GFP_KERNEL);
++		if (!session->password)
++			return -ENOMEM;
++		break;
++	case ISCSI_PARAM_PASSWORD_IN:
++		kfree(session->password_in);
++		session->password_in = kstrdup(buf, GFP_KERNEL);
++		if (!session->password_in)
++			return -ENOMEM;
++		break;
+ 	case ISCSI_PARAM_TARGET_NAME:
+ 		/* this should not change between logins */
+ 		if (session->targetname)
+@@ -1940,6 +1972,18 @@
+ 	case ISCSI_PARAM_TPGT:
+ 		len = sprintf(buf, "%d\n", session->tpgt);
+ 		break;
++	case ISCSI_PARAM_USERNAME:
++		len = sprintf(buf, "%s\n", session->username);
++		break;
++	case ISCSI_PARAM_USERNAME_IN:
++		len = sprintf(buf, "%s\n", session->username_in);
++		break;
++	case ISCSI_PARAM_PASSWORD:
++		len = sprintf(buf, "%s\n", session->password);
++		break;
++	case ISCSI_PARAM_PASSWORD_IN:
++		len = sprintf(buf, "%s\n", session->password_in);
++		break;
+ 	default:
+ 		return -ENOSYS;
+ 	}
+@@ -1990,6 +2034,66 @@
+ }
+ EXPORT_SYMBOL_GPL(iscsi_conn_get_param);
+ 
++int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++			 char *buf)
++{
++	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++	int len;
++
++	switch (param) {
++	case ISCSI_HOST_PARAM_NETDEV_NAME:
++		if (!session->netdev)
++			len = sprintf(buf, "%s\n", "default");
++		else
++			len = sprintf(buf, "%s\n", session->netdev);
++		break;
++	case ISCSI_HOST_PARAM_HWADDRESS:
++		if (!session->hwaddress)
++			len = sprintf(buf, "%s\n", "default");
++		else
++			len = sprintf(buf, "%s\n", session->hwaddress);
++		break;
++	case ISCSI_HOST_PARAM_INITIATOR_NAME:
++		if (!session->initiatorname)
++			len = sprintf(buf, "%s\n", "unknown");
++		else
++			len = sprintf(buf, "%s\n", session->initiatorname);
++		break;
++
++	default:
++		return -ENOSYS;
++	}
++
++	return len;
++}
++EXPORT_SYMBOL_GPL(iscsi_host_get_param);
++
++int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param,
++			 char *buf, int buflen)
++{
++	struct iscsi_session *session = iscsi_hostdata(shost->hostdata);
++
++	switch (param) {
++	case ISCSI_HOST_PARAM_NETDEV_NAME:
++		if (!session->netdev)
++			session->netdev = kstrdup(buf, GFP_KERNEL);
++		break;
++	case ISCSI_HOST_PARAM_HWADDRESS:
++		if (!session->hwaddress)
++			session->hwaddress = kstrdup(buf, GFP_KERNEL);
++		break;
++	case ISCSI_HOST_PARAM_INITIATOR_NAME:
++		if (!session->initiatorname)
++			session->initiatorname = kstrdup(buf, GFP_KERNEL);
++		break;
++	default:
++		return -ENOSYS;
++	}
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(iscsi_host_set_param);
++
+ MODULE_AUTHOR("Mike Christie");
+ MODULE_DESCRIPTION("iSCSI library functions");
+ MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c linux-2.6.22-try2/drivers/scsi/libsas/sas_expander.c
+--- linux-2.6.22-570/drivers/scsi/libsas/sas_expander.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/libsas/sas_expander.c	2007-12-19 15:29:22.000000000 -0500
+@@ -38,8 +38,10 @@
+ 
+ #if 0
+ /* FIXME: smp needs to migrate into the sas class */
+-static ssize_t smp_portal_read(struct kobject *, char *, loff_t, size_t);
+-static ssize_t smp_portal_write(struct kobject *, char *, loff_t, size_t);
++static ssize_t smp_portal_read(struct kobject *, struct bin_attribute *,
++			       char *, loff_t, size_t);
++static ssize_t smp_portal_write(struct kobject *, struct bin_attribute *,
++				char *, loff_t, size_t);
+ #endif
+ 
+ /* ---------- SMP task management ---------- */
+@@ -1368,7 +1370,6 @@
+ 	memset(bin_attr, 0, sizeof(*bin_attr));
+ 
+ 	bin_attr->attr.name = SMP_BIN_ATTR_NAME;
+-	bin_attr->attr.owner = THIS_MODULE;
+ 	bin_attr->attr.mode = 0600;
+ 
+ 	bin_attr->size = 0;
+@@ -1846,8 +1847,9 @@
+ #if 0
+ /* ---------- SMP portal ---------- */
+ 
+-static ssize_t smp_portal_write(struct kobject *kobj, char *buf, loff_t offs,
+-				size_t size)
++static ssize_t smp_portal_write(struct kobject *kobj,
++				struct bin_attribute *bin_attr,
++				char *buf, loff_t offs, size_t size)
+ {
+ 	struct domain_device *dev = to_dom_device(kobj);
+ 	struct expander_device *ex = &dev->ex_dev;
+@@ -1873,8 +1875,9 @@
+ 	return size;
+ }
+ 
+-static ssize_t smp_portal_read(struct kobject *kobj, char *buf, loff_t offs,
+-			       size_t size)
++static ssize_t smp_portal_read(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t offs, size_t size)
+ {
+ 	struct domain_device *dev = to_dom_device(kobj);
+ 	struct expander_device *ex = &dev->ex_dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c linux-2.6.22-try2/drivers/scsi/libsas/sas_scsi_host.c
+--- linux-2.6.22-570/drivers/scsi/libsas/sas_scsi_host.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/libsas/sas_scsi_host.c	2007-12-19 15:29:24.000000000 -0500
+@@ -40,6 +40,7 @@
+ 
+ #include <linux/err.h>
+ #include <linux/blkdev.h>
++#include <linux/freezer.h>
+ #include <linux/scatterlist.h>
+ 
+ /* ---------- SCSI Host glue ---------- */
+@@ -76,8 +77,8 @@
+ 			hs = DID_NO_CONNECT;
+ 			break;
+ 		case SAS_DATA_UNDERRUN:
+-			sc->resid = ts->residual;
+-			if (sc->request_bufflen - sc->resid < sc->underflow)
++			scsi_set_resid(sc, ts->residual);
++			if (scsi_bufflen(sc) - scsi_get_resid(sc) < sc->underflow)
+ 				hs = DID_ERROR;
+ 			break;
+ 		case SAS_DATA_OVERRUN:
+@@ -161,9 +162,9 @@
+ 	task->ssp_task.task_attr = sas_scsi_get_task_attr(cmd);
+ 	memcpy(task->ssp_task.cdb, cmd->cmnd, 16);
+ 
+-	task->scatter = cmd->request_buffer;
+-	task->num_scatter = cmd->use_sg;
+-	task->total_xfer_len = cmd->request_bufflen;
++	task->scatter = scsi_sglist(cmd);
++	task->num_scatter = scsi_sg_count(cmd);
++	task->total_xfer_len = scsi_bufflen(cmd);
+ 	task->data_dir = cmd->sc_data_direction;
+ 
+ 	task->task_done = sas_scsi_task_done;
+@@ -868,8 +869,6 @@
+ {
+ 	struct sas_ha_struct *sas_ha = _sas_ha;
+ 
+-	current->flags |= PF_NOFREEZE;
+-
+ 	while (1) {
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 		schedule();
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/Makefile linux-2.6.22-try2/drivers/scsi/lpfc/Makefile
+--- linux-2.6.22-570/drivers/scsi/lpfc/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -1,7 +1,7 @@
+ #/*******************************************************************
+ # * This file is part of the Emulex Linux Device Driver for         *
+ # * Fibre Channel Host Bus Adapters.                                *
+-# * Copyright (C) 2004-2005 Emulex.  All rights reserved.           *
++# * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
+ # * EMULEX and SLI are trademarks of Emulex.                        *
+ # * www.emulex.com                                                  *
+ # *                                                                 *
+@@ -27,4 +27,5 @@
+ obj-$(CONFIG_SCSI_LPFC) := lpfc.o
+ 
+ lpfc-objs := lpfc_mem.o lpfc_sli.o lpfc_ct.o lpfc_els.o lpfc_hbadisc.o	\
+-	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o
++	lpfc_init.o lpfc_mbox.o lpfc_nportdisc.o lpfc_scsi.o lpfc_attr.o \
++	lpfc_vport.o lpfc_debugfs.o
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -19,8 +19,9 @@
+  * included with this package.                                     *
+  *******************************************************************/
+ 
+-struct lpfc_sli2_slim;
++#include <scsi/scsi_host.h>
+ 
++struct lpfc_sli2_slim;
+ 
+ #define LPFC_MAX_TARGET		256	/* max number of targets supported */
+ #define LPFC_MAX_DISC_THREADS	64	/* max outstanding discovery els
+@@ -32,6 +33,20 @@
+ #define LPFC_IOCB_LIST_CNT	2250	/* list of IOCBs for fast-path usage. */
+ #define LPFC_Q_RAMP_UP_INTERVAL 120     /* lun q_depth ramp up interval */
+ 
++/*
++ * Following time intervals are used of adjusting SCSI device
++ * queue depths when there are driver resource error or Firmware
++ * resource error.
++ */
++#define QUEUE_RAMP_DOWN_INTERVAL	(1 * HZ)   /* 1 Second */
++#define QUEUE_RAMP_UP_INTERVAL		(300 * HZ) /* 5 minutes */
++
++/* Number of exchanges reserved for discovery to complete */
++#define LPFC_DISC_IOCB_BUFF_COUNT 20
++
++#define LPFC_HB_MBOX_INTERVAL   5	/* Heart beat interval in seconds. */
++#define LPFC_HB_MBOX_TIMEOUT    30 	/* Heart beat timeout  in seconds. */
++
+ /* Define macros for 64 bit support */
+ #define putPaddrLow(addr)    ((uint32_t) (0xffffffff & (u64)(addr)))
+ #define putPaddrHigh(addr)   ((uint32_t) (0xffffffff & (((u64)(addr))>>32)))
+@@ -61,6 +76,11 @@
+ 	uint32_t    current_count;
+ };
+ 
++struct hbq_dmabuf {
++	struct lpfc_dmabuf dbuf;
++	uint32_t tag;
++};
++
+ /* Priority bit.  Set value to exceed low water mark in lpfc_mem. */
+ #define MEM_PRI		0x100
+ 
+@@ -90,6 +110,29 @@
+ 		uint32_t sli2FwRev;
+ 		uint8_t sli2FwName[16];
+ 	} rev;
++	struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++		uint32_t rsvd2  :24;  /* Reserved                             */
++		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
++		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
++		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
++		uint32_t chbs   : 1;  /* Cofigure Host Backing store          */
++		uint32_t cinb   : 1;  /* Enable Interrupt Notification Block  */
++		uint32_t cerbm	: 1;  /* Configure Enhanced Receive Buf Mgmt  */
++		uint32_t cmx	: 1;  /* Configure Max XRIs                   */
++		uint32_t cmr	: 1;  /* Configure Max RPIs                   */
++#else	/*  __LITTLE_ENDIAN */
++		uint32_t cmr	: 1;  /* Configure Max RPIs                   */
++		uint32_t cmx	: 1;  /* Configure Max XRIs                   */
++		uint32_t cerbm	: 1;  /* Configure Enhanced Receive Buf Mgmt  */
++		uint32_t cinb   : 1;  /* Enable Interrupt Notification Block  */
++		uint32_t chbs   : 1;  /* Cofigure Host Backing store          */
++		uint32_t csah   : 1;  /* Configure Synchronous Abort Handling */
++		uint32_t ccrp   : 1;  /* Config Command Ring Polling          */
++		uint32_t cmv	: 1;  /* Configure Max VPIs                   */
++		uint32_t rsvd2  :24;  /* Reserved                             */
++#endif
++	} sli3Feat;
+ } lpfc_vpd_t;
+ 
+ struct lpfc_scsi_buf;
+@@ -122,6 +165,7 @@
+ 	uint32_t elsRcvRPS;
+ 	uint32_t elsRcvRPL;
+ 	uint32_t elsXmitFLOGI;
++	uint32_t elsXmitFDISC;
+ 	uint32_t elsXmitPLOGI;
+ 	uint32_t elsXmitPRLI;
+ 	uint32_t elsXmitADISC;
+@@ -165,70 +209,53 @@
+ 	struct lpfcMboxq *    mbox;
+ };
+ 
+-struct lpfc_hba {
+-	struct lpfc_sli sli;
+-	struct lpfc_sli2_slim *slim2p;
+-	dma_addr_t slim2p_mapping;
+-	uint16_t pci_cfg_value;
++struct lpfc_hba;
+ 
+-	int32_t hba_state;
+ 
+-#define LPFC_STATE_UNKNOWN        0    /* HBA state is unknown */
+-#define LPFC_WARM_START           1    /* HBA state after selective reset */
+-#define LPFC_INIT_START           2    /* Initial state after board reset */
+-#define LPFC_INIT_MBX_CMDS        3    /* Initialize HBA with mbox commands */
+-#define LPFC_LINK_DOWN            4    /* HBA initialized, link is down */
+-#define LPFC_LINK_UP              5    /* Link is up  - issue READ_LA */
+-#define LPFC_LOCAL_CFG_LINK       6    /* local NPORT Id configured */
+-#define LPFC_FLOGI                7    /* FLOGI sent to Fabric */
+-#define LPFC_FABRIC_CFG_LINK      8    /* Fabric assigned NPORT Id
+-					   configured */
+-#define LPFC_NS_REG               9	/* Register with NameServer */
+-#define LPFC_NS_QRY               10	/* Query NameServer for NPort ID list */
+-#define LPFC_BUILD_DISC_LIST      11	/* Build ADISC and PLOGI lists for
++enum discovery_state {
++	LPFC_VPORT_UNKNOWN     =  0,    /* vport state is unknown */
++	LPFC_VPORT_FAILED      =  1,    /* vport has failed */
++	LPFC_LOCAL_CFG_LINK    =  6,    /* local NPORT Id configured */
++	LPFC_FLOGI             =  7,    /* FLOGI sent to Fabric */
++	LPFC_FDISC             =  8,    /* FDISC sent for vport */
++	LPFC_FABRIC_CFG_LINK   =  9,    /* Fabric assigned NPORT Id
++				         * configured */
++	LPFC_NS_REG            =  10,   /* Register with NameServer */
++	LPFC_NS_QRY            =  11,   /* Query NameServer for NPort ID list */
++	LPFC_BUILD_DISC_LIST   =  12,   /* Build ADISC and PLOGI lists for
+ 					 * device authentication / discovery */
+-#define LPFC_DISC_AUTH            12	/* Processing ADISC list */
+-#define LPFC_CLEAR_LA             13	/* authentication cmplt - issue
+-					   CLEAR_LA */
+-#define LPFC_HBA_READY            32
+-#define LPFC_HBA_ERROR            -1
++	LPFC_DISC_AUTH         =  13,   /* Processing ADISC list */
++	LPFC_VPORT_READY       =  32,
++};
+ 
+-	int32_t stopped;   /* HBA has not been restarted since last ERATT */
+-	uint8_t fc_linkspeed;	/* Link speed after last READ_LA */
++enum hba_state {
++	LPFC_LINK_UNKNOWN    =   0,   /* HBA state is unknown */
++	LPFC_WARM_START      =   1,   /* HBA state after selective reset */
++	LPFC_INIT_START      =   2,   /* Initial state after board reset */
++	LPFC_INIT_MBX_CMDS   =   3,   /* Initialize HBA with mbox commands */
++	LPFC_LINK_DOWN       =   4,   /* HBA initialized, link is down */
++	LPFC_LINK_UP         =   5,   /* Link is up  - issue READ_LA */
++	LPFC_CLEAR_LA        =   6,   /* authentication cmplt - issue
++				       * CLEAR_LA */
++	LPFC_HBA_READY       =  32,
++	LPFC_HBA_ERROR       =  -1
++};
+ 
+-	uint32_t fc_eventTag;	/* event tag for link attention */
+-	uint32_t fc_prli_sent;	/* cntr for outstanding PRLIs */
++struct lpfc_vport {
++	struct list_head listentry;
++	struct lpfc_hba *phba;
++	uint8_t port_type;
++#define LPFC_PHYSICAL_PORT 1
++#define LPFC_NPIV_PORT  2
++#define LPFC_FABRIC_PORT 3
++	enum discovery_state port_state;
+ 
+-	uint32_t num_disc_nodes;	/*in addition to hba_state */
++	uint16_t vpi;
+ 
+-	struct timer_list fc_estabtmo;	/* link establishment timer */
+-	struct timer_list fc_disctmo;	/* Discovery rescue timer */
+-	struct timer_list fc_fdmitmo;	/* fdmi timer */
+-	/* These fields used to be binfo */
+-	struct lpfc_name fc_nodename;	/* fc nodename */
+-	struct lpfc_name fc_portname;	/* fc portname */
+-	uint32_t fc_pref_DID;	/* preferred D_ID */
+-	uint8_t fc_pref_ALPA;	/* preferred AL_PA */
+-	uint32_t fc_edtov;	/* E_D_TOV timer value */
+-	uint32_t fc_arbtov;	/* ARB_TOV timer value */
+-	uint32_t fc_ratov;	/* R_A_TOV timer value */
+-	uint32_t fc_rttov;	/* R_T_TOV timer value */
+-	uint32_t fc_altov;	/* AL_TOV timer value */
+-	uint32_t fc_crtov;	/* C_R_TOV timer value */
+-	uint32_t fc_citov;	/* C_I_TOV timer value */
+-	uint32_t fc_myDID;	/* fibre channel S_ID */
+-	uint32_t fc_prevDID;	/* previous fibre channel S_ID */
+-
+-	struct serv_parm fc_sparam;	/* buffer for our service parameters */
+-	struct serv_parm fc_fabparam;	/* fabric service parameters buffer */
+-	uint8_t alpa_map[128];	/* AL_PA map from READ_LA */
+-
+-	uint8_t fc_ns_retry;	/* retries for fabric nameserver */
+-	uint32_t fc_nlp_cnt;	/* outstanding NODELIST requests */
+-	uint32_t fc_rscn_id_cnt;	/* count of RSCNs payloads in list */
+-	struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
+-	uint32_t lmt;
+ 	uint32_t fc_flag;	/* FC flags */
++/* Several of these flags are HBA centric and should be moved to
++ * phba->link_flag (e.g. FC_PTP, FC_PUBLIC_LOOP)
++ */
+ #define FC_PT2PT                0x1	/* pt2pt with no fabric */
+ #define FC_PT2PT_PLOGI          0x2	/* pt2pt initiate PLOGI */
+ #define FC_DISC_TMO             0x4	/* Discovery timer running */
+@@ -239,22 +266,14 @@
+ #define FC_OFFLINE_MODE         0x80	/* Interface is offline for diag */
+ #define FC_FABRIC               0x100	/* We are fabric attached */
+ #define FC_ESTABLISH_LINK       0x200	/* Reestablish Link */
+-#define FC_RSCN_DISCOVERY       0x400	/* Authenticate all devices after RSCN*/
+-#define FC_BLOCK_MGMT_IO        0x800   /* Don't allow mgmt mbx or iocb cmds */
+-#define FC_LOADING		0x1000	/* HBA in process of loading drvr */
+-#define FC_UNLOADING		0x2000	/* HBA in process of unloading drvr */
++#define FC_RSCN_DISCOVERY       0x400	 /* Auth all devices after RSCN */
+ #define FC_SCSI_SCAN_TMO        0x4000	/* scsi scan timer running */
+ #define FC_ABORT_DISCOVERY      0x8000	/* we want to abort discovery */
+ #define FC_NDISC_ACTIVE         0x10000	/* NPort discovery active */
+ #define FC_BYPASSED_MODE        0x20000	/* NPort is in bypassed mode */
+-#define FC_LOOPBACK_MODE        0x40000	/* NPort is in Loopback mode */
+-					/* This flag is set while issuing */
+-					/* INIT_LINK mailbox command */
+-#define FC_IGNORE_ERATT         0x80000	/* intr handler should ignore ERATT */
+-
+-	uint32_t fc_topology;	/* link topology, from LINK INIT */
+-
+-	struct lpfc_stats fc_stat;
++#define FC_RFF_NOT_SUPPORTED    0x40000	 /* RFF_ID was rejected by switch */
++#define FC_VPORT_NEEDS_REG_VPI	0x80000  /* Needs to have its vpi registered */
++#define FC_RSCN_DEFERRED	0x100000 /* A deferred RSCN being processed */
+ 
+ 	struct list_head fc_nodes;
+ 
+@@ -267,10 +286,131 @@
+ 	uint16_t fc_map_cnt;
+ 	uint16_t fc_npr_cnt;
+ 	uint16_t fc_unused_cnt;
++	struct serv_parm fc_sparam;	/* buffer for our service parameters */
++
++	uint32_t fc_myDID;	/* fibre channel S_ID */
++	uint32_t fc_prevDID;	/* previous fibre channel S_ID */
++
++	int32_t stopped;   /* HBA has not been restarted since last ERATT */
++	uint8_t fc_linkspeed;	/* Link speed after last READ_LA */
++
++	uint32_t num_disc_nodes;	/*in addition to hba_state */
++
++	uint32_t fc_nlp_cnt;	/* outstanding NODELIST requests */
++	uint32_t fc_rscn_id_cnt;	/* count of RSCNs payloads in list */
++	struct lpfc_dmabuf *fc_rscn_id_list[FC_MAX_HOLD_RSCN];
++	struct lpfc_name fc_nodename;	/* fc nodename */
++	struct lpfc_name fc_portname;	/* fc portname */
++
++	struct lpfc_work_evt disc_timeout_evt;
++
++	struct timer_list fc_disctmo;	/* Discovery rescue timer */
++	uint8_t fc_ns_retry;	/* retries for fabric nameserver */
++	uint32_t fc_prli_sent;	/* cntr for outstanding PRLIs */
++
++	spinlock_t work_port_lock;
++	uint32_t work_port_events; /* Timeout to be handled  */
++#define WORKER_DISC_TMO                0x1	/* vport: Discovery timeout */
++#define WORKER_ELS_TMO                 0x2	/* vport: ELS timeout */
++#define WORKER_FDMI_TMO                0x4	/* vport: FDMI timeout */
++
++#define WORKER_MBOX_TMO                0x100	/* hba: MBOX timeout */
++#define WORKER_HB_TMO                  0x200	/* hba: Heart beat timeout */
++#define WORKER_FABRIC_BLOCK_TMO        0x400	/* hba: fabric block timout */
++#define WORKER_RAMP_DOWN_QUEUE         0x800	/* hba: Decrease Q depth */
++#define WORKER_RAMP_UP_QUEUE           0x1000	/* hba: Increase Q depth */
++
++	struct timer_list fc_fdmitmo;
++	struct timer_list els_tmofunc;
++
++	int unreg_vpi_cmpl;
++
++	uint8_t load_flag;
++#define FC_LOADING		0x1	/* HBA in process of loading drvr */
++#define FC_UNLOADING		0x2	/* HBA in process of unloading drvr */
++	char  *vname;		        /* Application assigned name */
++	struct fc_vport *fc_vport;
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++	struct dentry *debug_disc_trc;
++	struct dentry *debug_nodelist;
++	struct dentry *vport_debugfs_root;
++	struct lpfc_disc_trc *disc_trc;
++	atomic_t disc_trc_cnt;
++#endif
++};
++
++struct hbq_s {
++	uint16_t entry_count;	  /* Current number of HBQ slots */
++	uint32_t next_hbqPutIdx;  /* Index to next HBQ slot to use */
++	uint32_t hbqPutIdx;	  /* HBQ slot to use */
++	uint32_t local_hbqGetIdx; /* Local copy of Get index from Port */
++};
++
++#define LPFC_MAX_HBQS  16
++/* this matches the possition in the lpfc_hbq_defs array */
++#define LPFC_ELS_HBQ	0
++
++struct lpfc_hba {
++	struct lpfc_sli sli;
++	uint32_t sli_rev;		/* SLI2 or SLI3 */
++	uint32_t sli3_options;		/* Mask of enabled SLI3 options */
++#define LPFC_SLI3_ENABLED 	 0x01
++#define LPFC_SLI3_HBQ_ENABLED	 0x02
++#define LPFC_SLI3_NPIV_ENABLED	 0x04
++#define LPFC_SLI3_VPORT_TEARDOWN 0x08
++	uint32_t iocb_cmd_size;
++	uint32_t iocb_rsp_size;
++
++	enum hba_state link_state;
++	uint32_t link_flag;	/* link state flags */
++#define LS_LOOPBACK_MODE      0x1 	/* NPort is in Loopback mode */
++					/* This flag is set while issuing */
++					/* INIT_LINK mailbox command */
++#define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
++#define LS_IGNORE_ERATT       0x3	/* intr handler should ignore ERATT */
++
++	struct lpfc_sli2_slim *slim2p;
++	struct lpfc_dmabuf hbqslimp;
++
++	dma_addr_t slim2p_mapping;
++
++	uint16_t pci_cfg_value;
++
++	uint8_t work_found;
++#define LPFC_MAX_WORKER_ITERATION  4
++
++	uint8_t fc_linkspeed;	/* Link speed after last READ_LA */
++
++	uint32_t fc_eventTag;	/* event tag for link attention */
++
++
++	struct timer_list fc_estabtmo;	/* link establishment timer */
++	/* These fields used to be binfo */
++	uint32_t fc_pref_DID;	/* preferred D_ID */
++	uint8_t  fc_pref_ALPA;	/* preferred AL_PA */
++	uint32_t fc_edtov;	/* E_D_TOV timer value */
++	uint32_t fc_arbtov;	/* ARB_TOV timer value */
++	uint32_t fc_ratov;	/* R_A_TOV timer value */
++	uint32_t fc_rttov;	/* R_T_TOV timer value */
++	uint32_t fc_altov;	/* AL_TOV timer value */
++	uint32_t fc_crtov;	/* C_R_TOV timer value */
++	uint32_t fc_citov;	/* C_I_TOV timer value */
++
++	struct serv_parm fc_fabparam;	/* fabric service parameters buffer */
++	uint8_t alpa_map[128];	/* AL_PA map from READ_LA */
++
++	uint32_t lmt;
++
++	uint32_t fc_topology;	/* link topology, from LINK INIT */
++
++	struct lpfc_stats fc_stat;
++
+ 	struct lpfc_nodelist fc_fcpnodev; /* nodelist entry for no device */
+ 	uint32_t nport_event_cnt;	/* timestamp for nlplist entry */
+ 
+-	uint32_t wwnn[2];
++	uint8_t  wwnn[8];
++	uint8_t  wwpn[8];
+ 	uint32_t RandomData[7];
+ 
+ 	uint32_t cfg_log_verbose;
+@@ -278,6 +418,9 @@
+ 	uint32_t cfg_nodev_tmo;
+ 	uint32_t cfg_devloss_tmo;
+ 	uint32_t cfg_hba_queue_depth;
++	uint32_t cfg_peer_port_login;
++	uint32_t cfg_vport_restrict_login;
++	uint32_t cfg_npiv_enable;
+ 	uint32_t cfg_fcp_class;
+ 	uint32_t cfg_use_adisc;
+ 	uint32_t cfg_ack0;
+@@ -304,22 +447,20 @@
+ 
+ 	lpfc_vpd_t vpd;		/* vital product data */
+ 
+-	struct Scsi_Host *host;
+ 	struct pci_dev *pcidev;
+ 	struct list_head      work_list;
+ 	uint32_t              work_ha;      /* Host Attention Bits for WT */
+ 	uint32_t              work_ha_mask; /* HA Bits owned by WT        */
+ 	uint32_t              work_hs;      /* HS stored in case of ERRAT */
+ 	uint32_t              work_status[2]; /* Extra status from SLIM */
+-	uint32_t              work_hba_events; /* Timeout to be handled  */
+-#define WORKER_DISC_TMO                0x1	/* Discovery timeout */
+-#define WORKER_ELS_TMO                 0x2	/* ELS timeout */
+-#define WORKER_MBOX_TMO                0x4	/* MBOX timeout */
+-#define WORKER_FDMI_TMO                0x8	/* FDMI timeout */
+ 
+ 	wait_queue_head_t    *work_wait;
+ 	struct task_struct   *worker_thread;
+ 
++	struct list_head hbq_buffer_list;
++	uint32_t hbq_count;	        /* Count of configured HBQs */
++	struct hbq_s hbqs[LPFC_MAX_HBQS]; /* local copy of hbq indicies  */
++
+ 	unsigned long pci_bar0_map;     /* Physical address for PCI BAR0 */
+ 	unsigned long pci_bar2_map;     /* Physical address for PCI BAR2 */
+ 	void __iomem *slim_memmap_p;	/* Kernel memory mapped address for
+@@ -334,6 +475,10 @@
+ 					   reg */
+ 	void __iomem *HCregaddr;	/* virtual address for host ctl reg */
+ 
++	struct lpfc_hgp __iomem *host_gp; /* Host side get/put pointers */
++	uint32_t __iomem  *hbq_put;     /* Address in SLIM to HBQ put ptrs */
++	uint32_t          *hbq_get;     /* Host mem address of HBQ get ptrs */
++
+ 	int brd_no;			/* FC board number */
+ 
+ 	char SerialNumber[32];		/* adapter Serial Number */
+@@ -353,7 +498,6 @@
+ 	uint8_t soft_wwn_enable;
+ 
+ 	struct timer_list fcp_poll_timer;
+-	struct timer_list els_tmofunc;
+ 
+ 	/*
+ 	 * stat  counters
+@@ -370,31 +514,69 @@
+ 	uint32_t total_scsi_bufs;
+ 	struct list_head lpfc_iocb_list;
+ 	uint32_t total_iocbq_bufs;
++	spinlock_t hbalock;
+ 
+ 	/* pci_mem_pools */
+ 	struct pci_pool *lpfc_scsi_dma_buf_pool;
+ 	struct pci_pool *lpfc_mbuf_pool;
++	struct pci_pool *lpfc_hbq_pool;
+ 	struct lpfc_dma_pool lpfc_mbuf_safety_pool;
+ 
+ 	mempool_t *mbox_mem_pool;
+ 	mempool_t *nlp_mem_pool;
+ 
+ 	struct fc_host_statistics link_stats;
++
++	struct list_head port_list;
++	struct lpfc_vport *pport; /* physical lpfc_vport pointer */
++	uint16_t max_vpi;	/* Maximum virtual nports */
++#define LPFC_MAX_VPI 100  /* Max number of VPorts supported */
++	unsigned long *vpi_bmask; /* vpi allocation table */
++
++	/* Data structure used by fabric iocb scheduler */
++	struct list_head fabric_iocb_list;
++	atomic_t fabric_iocb_count;
++	struct timer_list fabric_block_timer;
++	unsigned long bit_flags;
++#define	FABRIC_COMANDS_BLOCKED	0
++	atomic_t num_rsrc_err;
++	atomic_t num_cmd_success;
++	unsigned long last_rsrc_error_time;
++	unsigned long last_ramp_down_time;
++	unsigned long last_ramp_up_time;
++#ifdef CONFIG_LPFC_DEBUG_FS
++	struct dentry *hba_debugfs_root;
++	atomic_t debugfs_vport_count;
++#endif
++
++	/* Fields used for heart beat. */
++	unsigned long last_completion_time;
++	struct timer_list hb_tmofunc;
++	uint8_t hb_outstanding;
+ };
+ 
++static inline struct Scsi_Host *
++lpfc_shost_from_vport(struct lpfc_vport *vport)
++{
++	return container_of((void *) vport, struct Scsi_Host, hostdata[0]);
++}
++
+ static inline void
+-lpfc_set_loopback_flag(struct lpfc_hba *phba) {
++lpfc_set_loopback_flag(struct lpfc_hba *phba)
++{
+ 	if (phba->cfg_topology == FLAGS_LOCAL_LB)
+-		phba->fc_flag |= FC_LOOPBACK_MODE;
++		phba->link_flag |= LS_LOOPBACK_MODE;
+ 	else
+-		phba->fc_flag &= ~FC_LOOPBACK_MODE;
++		phba->link_flag &= ~LS_LOOPBACK_MODE;
+ }
+ 
+-struct rnidrsp {
+-	void *buf;
+-	uint32_t uniqueid;
+-	struct list_head list;
+-	uint32_t data;
+-};
++static inline int
++lpfc_is_link_up(struct lpfc_hba *phba)
++{
++	return  phba->link_state == LPFC_LINK_UP ||
++		phba->link_state == LPFC_CLEAR_LA ||
++		phba->link_state == LPFC_HBA_READY;
++}
+ 
+ #define FC_REG_DUMP_EVENT	0x10	/* Register for Dump events */
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_attr.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_attr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_attr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include "lpfc_version.h"
+ #include "lpfc_compat.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+ 
+ #define LPFC_DEF_DEVLOSS_TMO 30
+ #define LPFC_MIN_DEVLOSS_TMO 1
+@@ -76,116 +77,156 @@
+ lpfc_info_show(struct class_device *cdev, char *buf)
+ {
+ 	struct Scsi_Host *host = class_to_shost(cdev);
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",lpfc_info(host));
+ }
+ 
+ static ssize_t
+ lpfc_serialnum_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",phba->SerialNumber);
+ }
+ 
+ static ssize_t
+ lpfc_modeldesc_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelDesc);
+ }
+ 
+ static ssize_t
+ lpfc_modelname_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",phba->ModelName);
+ }
+ 
+ static ssize_t
+ lpfc_programtype_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",phba->ProgramType);
+ }
+ 
+ static ssize_t
+-lpfc_portnum_show(struct class_device *cdev, char *buf)
++lpfc_vportnum_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n",phba->Port);
+ }
+ 
+ static ssize_t
+ lpfc_fwrev_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	char fwrev[32];
++
+ 	lpfc_decode_firmware_rev(phba, fwrev, 1);
+-	return snprintf(buf, PAGE_SIZE, "%s\n",fwrev);
++	return snprintf(buf, PAGE_SIZE, "%s, sli-%d\n", fwrev, phba->sli_rev);
+ }
+ 
+ static ssize_t
+ lpfc_hdw_show(struct class_device *cdev, char *buf)
+ {
+ 	char hdw[9];
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	lpfc_vpd_t *vp = &phba->vpd;
++
+ 	lpfc_jedec_to_ascii(vp->rev.biuRev, hdw);
+ 	return snprintf(buf, PAGE_SIZE, "%s\n", hdw);
+ }
+ static ssize_t
+ lpfc_option_rom_version_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%s\n", phba->OptionROMVersion);
+ }
+ static ssize_t
+ lpfc_state_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	int len = 0;
+-	switch (phba->hba_state) {
+-	case LPFC_STATE_UNKNOWN:
++
++	switch (phba->link_state) {
++	case LPFC_LINK_UNKNOWN:
+ 	case LPFC_WARM_START:
+ 	case LPFC_INIT_START:
+ 	case LPFC_INIT_MBX_CMDS:
+ 	case LPFC_LINK_DOWN:
++	case LPFC_HBA_ERROR:
+ 		len += snprintf(buf + len, PAGE_SIZE-len, "Link Down\n");
+ 		break;
+ 	case LPFC_LINK_UP:
++	case LPFC_CLEAR_LA:
++	case LPFC_HBA_READY:
++		len += snprintf(buf + len, PAGE_SIZE-len, "Link Up - \n");
++
++		switch (vport->port_state) {
++			len += snprintf(buf + len, PAGE_SIZE-len,
++					"initializing\n");
++			break;
+ 	case LPFC_LOCAL_CFG_LINK:
+-		len += snprintf(buf + len, PAGE_SIZE-len, "Link Up\n");
++			len += snprintf(buf + len, PAGE_SIZE-len,
++					"Configuring Link\n");
+ 		break;
++		case LPFC_FDISC:
+ 	case LPFC_FLOGI:
+ 	case LPFC_FABRIC_CFG_LINK:
+ 	case LPFC_NS_REG:
+ 	case LPFC_NS_QRY:
+ 	case LPFC_BUILD_DISC_LIST:
+ 	case LPFC_DISC_AUTH:
+-	case LPFC_CLEAR_LA:
+-		len += snprintf(buf + len, PAGE_SIZE-len,
+-				"Link Up - Discovery\n");
++			len += snprintf(buf + len, PAGE_SIZE - len,
++					"Discovery\n");
+ 		break;
+-	case LPFC_HBA_READY:
+-		len += snprintf(buf + len, PAGE_SIZE-len,
+-				"Link Up - Ready:\n");
++		case LPFC_VPORT_READY:
++			len += snprintf(buf + len, PAGE_SIZE - len, "Ready\n");
++			break;
++
++		case LPFC_VPORT_FAILED:
++			len += snprintf(buf + len, PAGE_SIZE - len, "Failed\n");
++			break;
++
++		case LPFC_VPORT_UNKNOWN:
++			len += snprintf(buf + len, PAGE_SIZE - len,
++					"Unknown\n");
++			break;
++		}
++
+ 		if (phba->fc_topology == TOPOLOGY_LOOP) {
+-			if (phba->fc_flag & FC_PUBLIC_LOOP)
++			if (vport->fc_flag & FC_PUBLIC_LOOP)
+ 				len += snprintf(buf + len, PAGE_SIZE-len,
+ 						"   Public Loop\n");
+ 			else
+ 				len += snprintf(buf + len, PAGE_SIZE-len,
+ 						"   Private Loop\n");
+ 		} else {
+-			if (phba->fc_flag & FC_FABRIC)
++			if (vport->fc_flag & FC_FABRIC)
+ 				len += snprintf(buf + len, PAGE_SIZE-len,
+ 						"   Fabric\n");
+ 			else
+@@ -193,29 +234,32 @@
+ 						"   Point-2-Point\n");
+ 		}
+ 	}
++
+ 	return len;
+ }
+ 
+ static ssize_t
+ lpfc_num_discovered_ports_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
+-	return snprintf(buf, PAGE_SIZE, "%d\n", phba->fc_map_cnt +
+-							phba->fc_unmap_cnt);
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++
++	return snprintf(buf, PAGE_SIZE, "%d\n",
++			vport->fc_map_cnt + vport->fc_unmap_cnt);
+ }
+ 
+ 
+ static int
+-lpfc_issue_lip(struct Scsi_Host *host)
++lpfc_issue_lip(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *) host->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	LPFC_MBOXQ_t *pmboxq;
+ 	int mbxstatus = MBXERR_ERROR;
+ 
+-	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
+-	    (phba->fc_flag & FC_BLOCK_MGMT_IO) ||
+-	    (phba->hba_state != LPFC_HBA_READY))
++	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
++	    (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) ||
++	    (vport->port_state != LPFC_VPORT_READY))
+ 		return -EPERM;
+ 
+ 	pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
+@@ -238,9 +282,7 @@
+ 	}
+ 
+ 	lpfc_set_loopback_flag(phba);
+-	if (mbxstatus == MBX_TIMEOUT)
+-		pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-	else
++	if (mbxstatus != MBX_TIMEOUT)
+ 		mempool_free(pmboxq, phba->mbox_mem_pool);
+ 
+ 	if (mbxstatus == MBXERR_ERROR)
+@@ -320,8 +362,10 @@
+ static ssize_t
+ lpfc_issue_reset(struct class_device *cdev, const char *buf, size_t count)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	int status = -EINVAL;
+ 
+ 	if (strncmp(buf, "selective", sizeof("selective") - 1) == 0)
+@@ -336,23 +380,26 @@
+ static ssize_t
+ lpfc_nport_evt_cnt_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "%d\n", phba->nport_event_cnt);
+ }
+ 
+ static ssize_t
+ lpfc_board_mode_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	char  * state;
+ 
+-	if (phba->hba_state == LPFC_HBA_ERROR)
++	if (phba->link_state == LPFC_HBA_ERROR)
+ 		state = "error";
+-	else if (phba->hba_state == LPFC_WARM_START)
++	else if (phba->link_state == LPFC_WARM_START)
+ 		state = "warm start";
+-	else if (phba->hba_state == LPFC_INIT_START)
++	else if (phba->link_state == LPFC_INIT_START)
+ 		state = "offline";
+ 	else
+ 		state = "online";
+@@ -363,8 +410,9 @@
+ static ssize_t
+ lpfc_board_mode_store(struct class_device *cdev, const char *buf, size_t count)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct completion online_compl;
+ 	int status=0;
+ 
+@@ -389,11 +437,166 @@
+ 		return -EIO;
+ }
+ 
++int
++lpfc_get_hba_info(struct lpfc_hba *phba,
++		  uint32_t *mxri, uint32_t *axri,
++		  uint32_t *mrpi, uint32_t *arpi,
++		  uint32_t *mvpi, uint32_t *avpi)
++{
++	struct lpfc_sli   *psli = &phba->sli;
++	LPFC_MBOXQ_t *pmboxq;
++	MAILBOX_t *pmb;
++	int rc = 0;
++
++	/*
++	 * prevent udev from issuing mailbox commands until the port is
++	 * configured.
++	 */
++	if (phba->link_state < LPFC_LINK_DOWN ||
++	    !phba->mbox_mem_pool ||
++	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
++		return 0;
++
++	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
++		return 0;
++
++	pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!pmboxq)
++		return 0;
++	memset(pmboxq, 0, sizeof (LPFC_MBOXQ_t));
++
++	pmb = &pmboxq->mb;
++	pmb->mbxCommand = MBX_READ_CONFIG;
++	pmb->mbxOwner = OWN_HOST;
++	pmboxq->context1 = NULL;
++
++	if ((phba->pport->fc_flag & FC_OFFLINE_MODE) ||
++		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
++		rc = MBX_NOT_FINISHED;
++	else
++		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
++
++	if (rc != MBX_SUCCESS) {
++		if (rc != MBX_TIMEOUT)
++			mempool_free(pmboxq, phba->mbox_mem_pool);
++		return 0;
++	}
++
++	if (mrpi)
++		*mrpi = pmb->un.varRdConfig.max_rpi;
++	if (arpi)
++		*arpi = pmb->un.varRdConfig.avail_rpi;
++	if (mxri)
++		*mxri = pmb->un.varRdConfig.max_xri;
++	if (axri)
++		*axri = pmb->un.varRdConfig.avail_xri;
++	if (mvpi)
++		*mvpi = pmb->un.varRdConfig.max_vpi;
++	if (avpi)
++		*avpi = pmb->un.varRdConfig.avail_vpi;
++
++	mempool_free(pmboxq, phba->mbox_mem_pool);
++	return 1;
++}
++
++static ssize_t
++lpfc_max_rpi_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt;
++
++	if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, NULL, NULL, NULL))
++		return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_rpi_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt, acnt;
++
++	if (lpfc_get_hba_info(phba, NULL, NULL, &cnt, &acnt, NULL, NULL))
++		return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_max_xri_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt;
++
++	if (lpfc_get_hba_info(phba, &cnt, NULL, NULL, NULL, NULL, NULL))
++		return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_xri_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt, acnt;
++
++	if (lpfc_get_hba_info(phba, &cnt, &acnt, NULL, NULL, NULL, NULL))
++		return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_max_vpi_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt;
++
++	if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, NULL))
++		return snprintf(buf, PAGE_SIZE, "%d\n", cnt);
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_used_vpi_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	uint32_t cnt, acnt;
++
++	if (lpfc_get_hba_info(phba, NULL, NULL, NULL, NULL, &cnt, &acnt))
++		return snprintf(buf, PAGE_SIZE, "%d\n", (cnt - acnt));
++	return snprintf(buf, PAGE_SIZE, "Unknown\n");
++}
++
++static ssize_t
++lpfc_npiv_info_show(struct class_device *cdev, char *buf)
++{
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
++	if (!(phba->max_vpi))
++		return snprintf(buf, PAGE_SIZE, "NPIV Not Supported\n");
++	if (vport->port_type == LPFC_PHYSICAL_PORT)
++		return snprintf(buf, PAGE_SIZE, "NPIV Physical\n");
++	return snprintf(buf, PAGE_SIZE, "NPIV Virtual (VPI %d)\n", vport->vpi);
++}
++
+ static ssize_t
+ lpfc_poll_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	return snprintf(buf, PAGE_SIZE, "%#x\n", phba->cfg_poll);
+ }
+@@ -402,8 +605,9 @@
+ lpfc_poll_store(struct class_device *cdev, const char *buf,
+ 		size_t count)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	uint32_t creg_val;
+ 	uint32_t old_val;
+ 	int val=0;
+@@ -417,7 +621,7 @@
+ 	if ((val & 0x3) != val)
+ 		return -EINVAL;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 
+ 	old_val = phba->cfg_poll;
+ 
+@@ -432,16 +636,16 @@
+ 			lpfc_poll_start_timer(phba);
+ 		}
+ 	} else if (val != 0x0) {
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (!(val & DISABLE_FCP_RING_INT) &&
+ 	    (old_val & DISABLE_FCP_RING_INT))
+ 	{
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		del_timer(&phba->fcp_poll_timer);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
+ 		creg_val = readl(phba->HCregaddr);
+ 		creg_val |= (HC_R0INT_ENA << LPFC_FCP_RING);
+ 		writel(creg_val, phba->HCregaddr);
+@@ -450,7 +654,7 @@
+ 
+ 	phba->cfg_poll = val;
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return strlen(buf);
+ }
+@@ -459,8 +663,9 @@
+ static ssize_t \
+ lpfc_##attr##_show(struct class_device *cdev, char *buf) \
+ { \
+-	struct Scsi_Host *host = class_to_shost(cdev);\
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++	struct Scsi_Host  *shost = class_to_shost(cdev);\
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++	struct lpfc_hba   *phba = vport->phba;\
+ 	int val = 0;\
+ 	val = phba->cfg_##attr;\
+ 	return snprintf(buf, PAGE_SIZE, "%d\n",\
+@@ -471,8 +676,9 @@
+ static ssize_t \
+ lpfc_##attr##_show(struct class_device *cdev, char *buf) \
+ { \
+-	struct Scsi_Host *host = class_to_shost(cdev);\
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++	struct Scsi_Host  *shost = class_to_shost(cdev);\
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++	struct lpfc_hba   *phba = vport->phba;\
+ 	int val = 0;\
+ 	val = phba->cfg_##attr;\
+ 	return snprintf(buf, PAGE_SIZE, "%#x\n",\
+@@ -514,8 +720,9 @@
+ static ssize_t \
+ lpfc_##attr##_store(struct class_device *cdev, const char *buf, size_t count) \
+ { \
+-	struct Scsi_Host *host = class_to_shost(cdev);\
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;\
++	struct Scsi_Host  *shost = class_to_shost(cdev);\
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;\
++	struct lpfc_hba   *phba = vport->phba;\
+ 	int val=0;\
+ 	if (!isdigit(buf[0]))\
+ 		return -EINVAL;\
+@@ -576,7 +783,7 @@
+ static CLASS_DEVICE_ATTR(modeldesc, S_IRUGO, lpfc_modeldesc_show, NULL);
+ static CLASS_DEVICE_ATTR(modelname, S_IRUGO, lpfc_modelname_show, NULL);
+ static CLASS_DEVICE_ATTR(programtype, S_IRUGO, lpfc_programtype_show, NULL);
+-static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_portnum_show, NULL);
++static CLASS_DEVICE_ATTR(portnum, S_IRUGO, lpfc_vportnum_show, NULL);
+ static CLASS_DEVICE_ATTR(fwrev, S_IRUGO, lpfc_fwrev_show, NULL);
+ static CLASS_DEVICE_ATTR(hdw, S_IRUGO, lpfc_hdw_show, NULL);
+ static CLASS_DEVICE_ATTR(state, S_IRUGO, lpfc_state_show, NULL);
+@@ -592,6 +799,13 @@
+ static CLASS_DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR,
+ 			 lpfc_board_mode_show, lpfc_board_mode_store);
+ static CLASS_DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset);
++static CLASS_DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL);
++static CLASS_DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL);
++static CLASS_DEVICE_ATTR(max_rpi, S_IRUGO, lpfc_max_rpi_show, NULL);
++static CLASS_DEVICE_ATTR(used_rpi, S_IRUGO, lpfc_used_rpi_show, NULL);
++static CLASS_DEVICE_ATTR(max_xri, S_IRUGO, lpfc_max_xri_show, NULL);
++static CLASS_DEVICE_ATTR(used_xri, S_IRUGO, lpfc_used_xri_show, NULL);
++static CLASS_DEVICE_ATTR(npiv_info, S_IRUGO, lpfc_npiv_info_show, NULL);
+ 
+ 
+ static char *lpfc_soft_wwn_key = "C99G71SL8032A";
+@@ -600,8 +814,9 @@
+ lpfc_soft_wwn_enable_store(struct class_device *cdev, const char *buf,
+ 				size_t count)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	unsigned int cnt = count;
+ 
+ 	/*
+@@ -634,8 +849,10 @@
+ static ssize_t
+ lpfc_soft_wwpn_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++
+ 	return snprintf(buf, PAGE_SIZE, "0x%llx\n",
+ 			(unsigned long long)phba->cfg_soft_wwpn);
+ }
+@@ -644,8 +861,9 @@
+ static ssize_t
+ lpfc_soft_wwpn_store(struct class_device *cdev, const char *buf, size_t count)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct completion online_compl;
+ 	int stat1=0, stat2=0;
+ 	unsigned int i, j, cnt=count;
+@@ -680,9 +898,9 @@
+ 		}
+ 	}
+ 	phba->cfg_soft_wwpn = wwn_to_u64(wwpn);
+-	fc_host_port_name(host) = phba->cfg_soft_wwpn;
++	fc_host_port_name(shost) = phba->cfg_soft_wwpn;
+ 	if (phba->cfg_soft_wwnn)
+-		fc_host_node_name(host) = phba->cfg_soft_wwnn;
++		fc_host_node_name(shost) = phba->cfg_soft_wwnn;
+ 
+ 	dev_printk(KERN_NOTICE, &phba->pcidev->dev,
+ 		   "lpfc%d: Reinitializing to use soft_wwpn\n", phba->brd_no);
+@@ -777,6 +995,15 @@
+ static CLASS_DEVICE_ATTR(lpfc_poll, S_IRUGO | S_IWUSR,
+ 			 lpfc_poll_show, lpfc_poll_store);
+ 
++int  lpfc_sli_mode = 0;
++module_param(lpfc_sli_mode, int, 0);
++MODULE_PARM_DESC(lpfc_sli_mode, "SLI mode selector:"
++		 " 0 - auto (SLI-3 if supported),"
++		 " 2 - select SLI-2 even on SLI-3 capable HBAs,"
++		 " 3 - select SLI-3");
++
++LPFC_ATTR_R(npiv_enable, 0, 0, 1, "Enable NPIV functionality");
++
+ /*
+ # lpfc_nodev_tmo: If set, it will hold all I/O errors on devices that disappear
+ # until the timer expires. Value range is [0,255]. Default value is 30.
+@@ -790,8 +1017,9 @@
+ static ssize_t
+ lpfc_nodev_tmo_show(struct class_device *cdev, char *buf)
+ {
+-	struct Scsi_Host *host = class_to_shost(cdev);
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	int val = 0;
+ 	val = phba->cfg_devloss_tmo;
+ 	return snprintf(buf, PAGE_SIZE, "%d\n",
+@@ -832,13 +1060,19 @@
+ static void
+ lpfc_update_rport_devloss_tmo(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport;
++	struct Scsi_Host  *shost;
+ 	struct lpfc_nodelist  *ndlp;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp)
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		shost = lpfc_shost_from_vport(vport);
++		spin_lock_irq(shost->host_lock);
++		list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp)
+ 		if (ndlp->rport)
+-			ndlp->rport->dev_loss_tmo = phba->cfg_devloss_tmo;
+-	spin_unlock_irq(phba->host->host_lock);
++				ndlp->rport->dev_loss_tmo =
++					phba->cfg_devloss_tmo;
++		spin_unlock_irq(shost->host_lock);
++	}
+ }
+ 
+ static int
+@@ -946,6 +1180,33 @@
+ 	    "Max number of FCP commands we can queue to a lpfc HBA");
+ 
+ /*
++# peer_port_login:  This parameter allows/prevents logins
++# between peer ports hosted on the same physical port.
++# When this parameter is set 0 peer ports of same physical port
++# are not allowed to login to each other.
++# When this parameter is set 1 peer ports of same physical port
++# are allowed to login to each other.
++# Default value of this parameter is 0.
++*/
++LPFC_ATTR_R(peer_port_login, 0, 0, 1,
++	    "Allow peer ports on the same physical port to login to each "
++	    "other.");
++
++/*
++# vport_restrict_login:  This parameter allows/prevents logins
++# between Virtual Ports and remote initiators.
++# When this parameter is not set (0) Virtual Ports will accept PLOGIs from
++# other initiators and will attempt to PLOGI all remote ports.
++# When this parameter is set (1) Virtual Ports will reject PLOGIs from
++# remote ports and will not attempt to PLOGI to other initiators.
++# This parameter does not restrict to the physical port.
++# This parameter does not restrict logins to Fabric resident remote ports.
++# Default value of this parameter is 1.
++*/
++LPFC_ATTR_RW(vport_restrict_login, 1, 0, 1,
++	    "Restrict virtual ports login to remote initiators.");
++
++/*
+ # Some disk devices have a "select ID" or "select Target" capability.
+ # From a protocol standpoint "select ID" usually means select the
+ # Fibre channel "ALPA".  In the FC-AL Profile there is an "informative
+@@ -1088,7 +1349,8 @@
+ LPFC_ATTR_R(use_msi, 0, 0, 1, "Use Message Signaled Interrupts, if possible");
+ 
+ 
+-struct class_device_attribute *lpfc_host_attrs[] = {
++
++struct class_device_attribute *lpfc_hba_attrs[] = {
+ 	&class_device_attr_info,
+ 	&class_device_attr_serialnum,
+ 	&class_device_attr_modeldesc,
+@@ -1104,6 +1366,8 @@
+ 	&class_device_attr_lpfc_log_verbose,
+ 	&class_device_attr_lpfc_lun_queue_depth,
+ 	&class_device_attr_lpfc_hba_queue_depth,
++	&class_device_attr_lpfc_peer_port_login,
++	&class_device_attr_lpfc_vport_restrict_login,
+ 	&class_device_attr_lpfc_nodev_tmo,
+ 	&class_device_attr_lpfc_devloss_tmo,
+ 	&class_device_attr_lpfc_fcp_class,
+@@ -1119,9 +1383,17 @@
+ 	&class_device_attr_lpfc_multi_ring_type,
+ 	&class_device_attr_lpfc_fdmi_on,
+ 	&class_device_attr_lpfc_max_luns,
++	&class_device_attr_lpfc_npiv_enable,
+ 	&class_device_attr_nport_evt_cnt,
+ 	&class_device_attr_management_version,
+ 	&class_device_attr_board_mode,
++	&class_device_attr_max_vpi,
++	&class_device_attr_used_vpi,
++	&class_device_attr_max_rpi,
++	&class_device_attr_used_rpi,
++	&class_device_attr_max_xri,
++	&class_device_attr_used_xri,
++	&class_device_attr_npiv_info,
+ 	&class_device_attr_issue_reset,
+ 	&class_device_attr_lpfc_poll,
+ 	&class_device_attr_lpfc_poll_tmo,
+@@ -1136,9 +1408,11 @@
+ sysfs_ctlreg_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+ 	size_t buf_off;
+-	struct Scsi_Host *host = class_to_shost(container_of(kobj,
+-					     struct class_device, kobj));
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct class_device *cdev = container_of(kobj, struct class_device,
++						 kobj);
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	if ((off + count) > FF_REG_AREA_SIZE)
+ 		return -ERANGE;
+@@ -1148,18 +1422,16 @@
+ 	if (off % 4 || count % 4 || (unsigned long)buf % 4)
+ 		return -EINVAL;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-
+-	if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
+-		spin_unlock_irq(phba->host->host_lock);
++	if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
+ 		return -EPERM;
+ 	}
+ 
++	spin_lock_irq(&phba->hbalock);
+ 	for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t))
+ 		writel(*((uint32_t *)(buf + buf_off)),
+ 		       phba->ctrl_regs_memmap_p + off + buf_off);
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return count;
+ }
+@@ -1169,9 +1441,11 @@
+ {
+ 	size_t buf_off;
+ 	uint32_t * tmp_ptr;
+-	struct Scsi_Host *host = class_to_shost(container_of(kobj,
+-					     struct class_device, kobj));
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct class_device *cdev = container_of(kobj, struct class_device,
++						 kobj);
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	if (off > FF_REG_AREA_SIZE)
+ 		return -ERANGE;
+@@ -1184,14 +1458,14 @@
+ 	if (off % 4 || count % 4 || (unsigned long)buf % 4)
+ 		return -EINVAL;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 
+ 	for (buf_off = 0; buf_off < count; buf_off += sizeof(uint32_t)) {
+ 		tmp_ptr = (uint32_t *)(buf + buf_off);
+ 		*tmp_ptr = readl(phba->ctrl_regs_memmap_p + off + buf_off);
+ 	}
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return count;
+ }
+@@ -1200,7 +1474,6 @@
+ 	.attr = {
+ 		.name = "ctlreg",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 256,
+ 	.read = sysfs_ctlreg_read,
+@@ -1209,7 +1482,7 @@
+ 
+ 
+ static void
+-sysfs_mbox_idle (struct lpfc_hba * phba)
++sysfs_mbox_idle(struct lpfc_hba *phba)
+ {
+ 	phba->sysfs_mbox.state = SMBOX_IDLE;
+ 	phba->sysfs_mbox.offset = 0;
+@@ -1224,10 +1497,12 @@
+ static ssize_t
+ sysfs_mbox_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+-	struct Scsi_Host * host =
+-		class_to_shost(container_of(kobj, struct class_device, kobj));
+-	struct lpfc_hba * phba = (struct lpfc_hba*)host->hostdata;
+-	struct lpfcMboxq * mbox = NULL;
++	struct class_device *cdev = container_of(kobj, struct class_device,
++						 kobj);
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfcMboxq  *mbox = NULL;
+ 
+ 	if ((count + off) > MAILBOX_CMD_SIZE)
+ 		return -ERANGE;
+@@ -1245,7 +1520,7 @@
+ 		memset(mbox, 0, sizeof (LPFC_MBOXQ_t));
+ 	}
+ 
+-	spin_lock_irq(host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 
+ 	if (off == 0) {
+ 		if (phba->sysfs_mbox.mbox)
+@@ -1256,9 +1531,9 @@
+ 	} else {
+ 		if (phba->sysfs_mbox.state  != SMBOX_WRITING ||
+ 		    phba->sysfs_mbox.offset != off           ||
+-		    phba->sysfs_mbox.mbox   == NULL ) {
++		    phba->sysfs_mbox.mbox   == NULL) {
+ 			sysfs_mbox_idle(phba);
+-			spin_unlock_irq(host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			return -EAGAIN;
+ 		}
+ 	}
+@@ -1268,7 +1543,7 @@
+ 
+ 	phba->sysfs_mbox.offset = off + count;
+ 
+-	spin_unlock_irq(host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return count;
+ }
+@@ -1276,10 +1551,11 @@
+ static ssize_t
+ sysfs_mbox_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
+ {
+-	struct Scsi_Host *host =
+-		class_to_shost(container_of(kobj, struct class_device,
+-					    kobj));
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct class_device *cdev = container_of(kobj, struct class_device,
++						 kobj);
++	struct Scsi_Host  *shost = class_to_shost(cdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	int rc;
+ 
+ 	if (off > MAILBOX_CMD_SIZE)
+@@ -1294,7 +1570,7 @@
+ 	if (off && count == 0)
+ 		return 0;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 
+ 	if (off == 0 &&
+ 	    phba->sysfs_mbox.state  == SMBOX_WRITING &&
+@@ -1317,12 +1593,12 @@
+ 		case MBX_SET_MASK:
+ 		case MBX_SET_SLIM:
+ 		case MBX_SET_DEBUG:
+-			if (!(phba->fc_flag & FC_OFFLINE_MODE)) {
++			if (!(vport->fc_flag & FC_OFFLINE_MODE)) {
+ 				printk(KERN_WARNING "mbox_read:Command 0x%x "
+ 				       "is illegal in on-line state\n",
+ 				       phba->sysfs_mbox.mbox->mb.mbxCommand);
+ 				sysfs_mbox_idle(phba);
+-				spin_unlock_irq(phba->host->host_lock);
++				spin_unlock_irq(&phba->hbalock);
+ 				return -EPERM;
+ 			}
+ 		case MBX_LOAD_SM:
+@@ -1352,48 +1628,48 @@
+ 			printk(KERN_WARNING "mbox_read: Illegal Command 0x%x\n",
+ 			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+ 			sysfs_mbox_idle(phba);
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			return -EPERM;
+ 		default:
+ 			printk(KERN_WARNING "mbox_read: Unknown Command 0x%x\n",
+ 			       phba->sysfs_mbox.mbox->mb.mbxCommand);
+ 			sysfs_mbox_idle(phba);
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			return -EPERM;
+ 		}
+ 
+-		if (phba->fc_flag & FC_BLOCK_MGMT_IO) {
++		phba->sysfs_mbox.mbox->vport = vport;
++
++		if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO) {
+ 			sysfs_mbox_idle(phba);
+-			spin_unlock_irq(host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			return  -EAGAIN;
+ 		}
+ 
+-		if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++		if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ 		    (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE))){
+ 
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			rc = lpfc_sli_issue_mbox (phba,
+ 						  phba->sysfs_mbox.mbox,
+ 						  MBX_POLL);
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(&phba->hbalock);
+ 
+ 		} else {
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			rc = lpfc_sli_issue_mbox_wait (phba,
+ 						       phba->sysfs_mbox.mbox,
+ 				lpfc_mbox_tmo_val(phba,
+ 				    phba->sysfs_mbox.mbox->mb.mbxCommand) * HZ);
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(&phba->hbalock);
+ 		}
+ 
+ 		if (rc != MBX_SUCCESS) {
+ 			if (rc == MBX_TIMEOUT) {
+-				phba->sysfs_mbox.mbox->mbox_cmpl =
+-					lpfc_sli_def_mbox_cmpl;
+ 				phba->sysfs_mbox.mbox = NULL;
+ 			}
+ 			sysfs_mbox_idle(phba);
+-			spin_unlock_irq(host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			return  (rc == MBX_TIMEOUT) ? -ETIME : -ENODEV;
+ 		}
+ 		phba->sysfs_mbox.state = SMBOX_READING;
+@@ -1402,7 +1678,7 @@
+ 		 phba->sysfs_mbox.state  != SMBOX_READING) {
+ 		printk(KERN_WARNING  "mbox_read: Bad State\n");
+ 		sysfs_mbox_idle(phba);
+-		spin_unlock_irq(host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		return -EAGAIN;
+ 	}
+ 
+@@ -1413,7 +1689,7 @@
+ 	if (phba->sysfs_mbox.offset == MAILBOX_CMD_SIZE)
+ 		sysfs_mbox_idle(phba);
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return count;
+ }
+@@ -1422,7 +1698,6 @@
+ 	.attr = {
+ 		.name = "mbox",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = MAILBOX_CMD_SIZE,
+ 	.read = sysfs_mbox_read,
+@@ -1430,35 +1705,35 @@
+ };
+ 
+ int
+-lpfc_alloc_sysfs_attr(struct lpfc_hba *phba)
++lpfc_alloc_sysfs_attr(struct lpfc_vport *vport)
+ {
+-	struct Scsi_Host *host = phba->host;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	int error;
+ 
+-	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
++	error = sysfs_create_bin_file(&shost->shost_classdev.kobj,
+ 							&sysfs_ctlreg_attr);
+ 	if (error)
+ 		goto out;
+ 
+-	error = sysfs_create_bin_file(&host->shost_classdev.kobj,
++	error = sysfs_create_bin_file(&shost->shost_classdev.kobj,
+ 							&sysfs_mbox_attr);
+ 	if (error)
+ 		goto out_remove_ctlreg_attr;
+ 
+ 	return 0;
+ out_remove_ctlreg_attr:
+-	sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++	sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr);
+ out:
+ 	return error;
+ }
+ 
+ void
+-lpfc_free_sysfs_attr(struct lpfc_hba *phba)
++lpfc_free_sysfs_attr(struct lpfc_vport *vport)
+ {
+-	struct Scsi_Host *host = phba->host;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 
+-	sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_mbox_attr);
+-	sysfs_remove_bin_file(&host->shost_classdev.kobj, &sysfs_ctlreg_attr);
++	sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_mbox_attr);
++	sysfs_remove_bin_file(&shost->shost_classdev.kobj, &sysfs_ctlreg_attr);
+ }
+ 
+ 
+@@ -1469,26 +1744,30 @@
+ static void
+ lpfc_get_host_port_id(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++
+ 	/* note: fc_myDID already in cpu endianness */
+-	fc_host_port_id(shost) = phba->fc_myDID;
++	fc_host_port_id(shost) = vport->fc_myDID;
+ }
+ 
+ static void
+ lpfc_get_host_port_type(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	spin_lock_irq(shost->host_lock);
+ 
+-	if (phba->hba_state == LPFC_HBA_READY) {
++	if (vport->port_type == LPFC_NPIV_PORT) {
++		fc_host_port_type(shost) = FC_PORTTYPE_NPIV;
++	} else if (lpfc_is_link_up(phba)) {
+ 		if (phba->fc_topology == TOPOLOGY_LOOP) {
+-			if (phba->fc_flag & FC_PUBLIC_LOOP)
++			if (vport->fc_flag & FC_PUBLIC_LOOP)
+ 				fc_host_port_type(shost) = FC_PORTTYPE_NLPORT;
+ 			else
+ 				fc_host_port_type(shost) = FC_PORTTYPE_LPORT;
+ 		} else {
+-			if (phba->fc_flag & FC_FABRIC)
++			if (vport->fc_flag & FC_FABRIC)
+ 				fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+ 			else
+ 				fc_host_port_type(shost) = FC_PORTTYPE_PTP;
+@@ -1502,29 +1781,20 @@
+ static void
+ lpfc_get_host_port_state(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	spin_lock_irq(shost->host_lock);
+ 
+-	if (phba->fc_flag & FC_OFFLINE_MODE)
++	if (vport->fc_flag & FC_OFFLINE_MODE)
+ 		fc_host_port_state(shost) = FC_PORTSTATE_OFFLINE;
+ 	else {
+-		switch (phba->hba_state) {
+-		case LPFC_STATE_UNKNOWN:
+-		case LPFC_WARM_START:
+-		case LPFC_INIT_START:
+-		case LPFC_INIT_MBX_CMDS:
++		switch (phba->link_state) {
++		case LPFC_LINK_UNKNOWN:
+ 		case LPFC_LINK_DOWN:
+ 			fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+ 			break;
+ 		case LPFC_LINK_UP:
+-		case LPFC_LOCAL_CFG_LINK:
+-		case LPFC_FLOGI:
+-		case LPFC_FABRIC_CFG_LINK:
+-		case LPFC_NS_REG:
+-		case LPFC_NS_QRY:
+-		case LPFC_BUILD_DISC_LIST:
+-		case LPFC_DISC_AUTH:
+ 		case LPFC_CLEAR_LA:
+ 		case LPFC_HBA_READY:
+ 			/* Links up, beyond this port_type reports state */
+@@ -1545,11 +1815,12 @@
+ static void
+ lpfc_get_host_speed(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 
+ 	spin_lock_irq(shost->host_lock);
+ 
+-	if (phba->hba_state == LPFC_HBA_READY) {
++	if (lpfc_is_link_up(phba)) {
+ 		switch(phba->fc_linkspeed) {
+ 			case LA_1GHZ_LINK:
+ 				fc_host_speed(shost) = FC_PORTSPEED_1GBIT;
+@@ -1575,38 +1846,30 @@
+ static void
+ lpfc_get_host_fabric_name (struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	u64 node_name;
+ 
+ 	spin_lock_irq(shost->host_lock);
+ 
+-	if ((phba->fc_flag & FC_FABRIC) ||
++	if ((vport->fc_flag & FC_FABRIC) ||
+ 	    ((phba->fc_topology == TOPOLOGY_LOOP) &&
+-	     (phba->fc_flag & FC_PUBLIC_LOOP)))
++	     (vport->fc_flag & FC_PUBLIC_LOOP)))
+ 		node_name = wwn_to_u64(phba->fc_fabparam.nodeName.u.wwn);
+ 	else
+ 		/* fabric is local port if there is no F/FL_Port */
+-		node_name = wwn_to_u64(phba->fc_nodename.u.wwn);
++		node_name = wwn_to_u64(vport->fc_nodename.u.wwn);
+ 
+ 	spin_unlock_irq(shost->host_lock);
+ 
+ 	fc_host_fabric_name(shost) = node_name;
+ }
+ 
+-static void
+-lpfc_get_host_symbolic_name (struct Scsi_Host *shost)
+-{
+-	struct lpfc_hba *phba = (struct lpfc_hba*)shost->hostdata;
+-
+-	spin_lock_irq(shost->host_lock);
+-	lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
+-	spin_unlock_irq(shost->host_lock);
+-}
+-
+ static struct fc_host_statistics *
+ lpfc_get_stats(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct fc_host_statistics *hs = &phba->link_stats;
+ 	struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
+@@ -1615,7 +1878,16 @@
+ 	unsigned long seconds;
+ 	int rc = 0;
+ 
+-	if (phba->fc_flag & FC_BLOCK_MGMT_IO)
++	/*
++	 * prevent udev from issuing mailbox commands until the port is
++	 * configured.
++	 */
++	if (phba->link_state < LPFC_LINK_DOWN ||
++	    !phba->mbox_mem_pool ||
++	    (phba->sli.sli_flag & LPFC_SLI2_ACTIVE) == 0)
++		return NULL;
++
++	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
+ 		return NULL;
+ 
+ 	pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+@@ -1627,17 +1899,16 @@
+ 	pmb->mbxCommand = MBX_READ_STATUS;
+ 	pmb->mbxOwner = OWN_HOST;
+ 	pmboxq->context1 = NULL;
++	pmboxq->vport = vport;
+ 
+-	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ 		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ 	else
+ 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+ 
+ 	if (rc != MBX_SUCCESS) {
+-		if (rc == MBX_TIMEOUT)
+-			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-		else
++		if (rc != MBX_TIMEOUT)
+ 			mempool_free(pmboxq, phba->mbox_mem_pool);
+ 		return NULL;
+ 	}
+@@ -1653,18 +1924,17 @@
+ 	pmb->mbxCommand = MBX_READ_LNK_STAT;
+ 	pmb->mbxOwner = OWN_HOST;
+ 	pmboxq->context1 = NULL;
++	pmboxq->vport = vport;
+ 
+-	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ 	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ 	else
+ 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+ 
+ 	if (rc != MBX_SUCCESS) {
+-		if (rc == MBX_TIMEOUT)
+-			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-		else
+-			mempool_free( pmboxq, phba->mbox_mem_pool);
++		if (rc != MBX_TIMEOUT)
++			mempool_free(pmboxq, phba->mbox_mem_pool);
+ 		return NULL;
+ 	}
+ 
+@@ -1711,14 +1981,15 @@
+ static void
+ lpfc_reset_stats(struct Scsi_Host *shost)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_sli *psli = &phba->sli;
+-	struct lpfc_lnk_stat * lso = &psli->lnk_stat_offsets;
++	struct lpfc_lnk_stat *lso = &psli->lnk_stat_offsets;
+ 	LPFC_MBOXQ_t *pmboxq;
+ 	MAILBOX_t *pmb;
+ 	int rc = 0;
+ 
+-	if (phba->fc_flag & FC_BLOCK_MGMT_IO)
++	if (phba->sli.sli_flag & LPFC_BLOCK_MGMT_IO)
+ 		return;
+ 
+ 	pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+@@ -1731,17 +2002,16 @@
+ 	pmb->mbxOwner = OWN_HOST;
+ 	pmb->un.varWords[0] = 0x1; /* reset request */
+ 	pmboxq->context1 = NULL;
++	pmboxq->vport = vport;
+ 
+-	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ 		(!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ 	else
+ 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+ 
+ 	if (rc != MBX_SUCCESS) {
+-		if (rc == MBX_TIMEOUT)
+-			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-		else
++		if (rc != MBX_TIMEOUT)
+ 			mempool_free(pmboxq, phba->mbox_mem_pool);
+ 		return;
+ 	}
+@@ -1750,17 +2020,16 @@
+ 	pmb->mbxCommand = MBX_READ_LNK_STAT;
+ 	pmb->mbxOwner = OWN_HOST;
+ 	pmboxq->context1 = NULL;
++	pmboxq->vport = vport;
+ 
+-	if ((phba->fc_flag & FC_OFFLINE_MODE) ||
++	if ((vport->fc_flag & FC_OFFLINE_MODE) ||
+ 	    (!(psli->sli_flag & LPFC_SLI2_ACTIVE)))
+ 		rc = lpfc_sli_issue_mbox(phba, pmboxq, MBX_POLL);
+ 	else
+ 		rc = lpfc_sli_issue_mbox_wait(phba, pmboxq, phba->fc_ratov * 2);
+ 
+ 	if (rc != MBX_SUCCESS) {
+-		if (rc == MBX_TIMEOUT)
+-			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-		else
++		if (rc != MBX_TIMEOUT)
+ 			mempool_free( pmboxq, phba->mbox_mem_pool);
+ 		return;
+ 	}
+@@ -1790,12 +2059,12 @@
+ lpfc_get_node_by_target(struct scsi_target *starget)
+ {
+ 	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
+-	struct lpfc_hba *phba = (struct lpfc_hba *) shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
+ 	struct lpfc_nodelist *ndlp;
+ 
+ 	spin_lock_irq(shost->host_lock);
+ 	/* Search for this, mapped, target ID */
+-	list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ 		if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
+ 		    starget->id == ndlp->nlp_sid) {
+ 			spin_unlock_irq(shost->host_lock);
+@@ -1885,8 +2154,66 @@
+ 	.get_host_fabric_name = lpfc_get_host_fabric_name,
+ 	.show_host_fabric_name = 1,
+ 
+-	.get_host_symbolic_name = lpfc_get_host_symbolic_name,
+-	.show_host_symbolic_name = 1,
++	/*
++	 * The LPFC driver treats linkdown handling as target loss events
++	 * so there are no sysfs handlers for link_down_tmo.
++	 */
++
++	.get_fc_host_stats = lpfc_get_stats,
++	.reset_fc_host_stats = lpfc_reset_stats,
++
++	.dd_fcrport_size = sizeof(struct lpfc_rport_data),
++	.show_rport_maxframe_size = 1,
++	.show_rport_supported_classes = 1,
++
++	.set_rport_dev_loss_tmo = lpfc_set_rport_loss_tmo,
++	.show_rport_dev_loss_tmo = 1,
++
++	.get_starget_port_id  = lpfc_get_starget_port_id,
++	.show_starget_port_id = 1,
++
++	.get_starget_node_name = lpfc_get_starget_node_name,
++	.show_starget_node_name = 1,
++
++	.get_starget_port_name = lpfc_get_starget_port_name,
++	.show_starget_port_name = 1,
++
++	.issue_fc_host_lip = lpfc_issue_lip,
++	.dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
++	.terminate_rport_io = lpfc_terminate_rport_io,
++
++	.vport_create = lpfc_vport_create,
++	.vport_delete = lpfc_vport_delete,
++	.dd_fcvport_size = sizeof(struct lpfc_vport *),
++};
++
++struct fc_function_template lpfc_vport_transport_functions = {
++	/* fixed attributes the driver supports */
++	.show_host_node_name = 1,
++	.show_host_port_name = 1,
++	.show_host_supported_classes = 1,
++	.show_host_supported_fc4s = 1,
++	.show_host_supported_speeds = 1,
++	.show_host_maxframe_size = 1,
++
++	/* dynamic attributes the driver supports */
++	.get_host_port_id = lpfc_get_host_port_id,
++	.show_host_port_id = 1,
++
++	.get_host_port_type = lpfc_get_host_port_type,
++	.show_host_port_type = 1,
++
++	.get_host_port_state = lpfc_get_host_port_state,
++	.show_host_port_state = 1,
++
++	/* active_fc4s is shown but doesn't change (thus no get function) */
++	.show_host_active_fc4s = 1,
++
++	.get_host_speed = lpfc_get_host_speed,
++	.show_host_speed = 1,
++
++	.get_host_fabric_name = lpfc_get_host_fabric_name,
++	.show_host_fabric_name = 1,
+ 
+ 	/*
+ 	 * The LPFC driver treats linkdown handling as target loss events
+@@ -1915,6 +2242,8 @@
+ 	.issue_fc_host_lip = lpfc_issue_lip,
+ 	.dev_loss_tmo_callbk = lpfc_dev_loss_tmo_callbk,
+ 	.terminate_rport_io = lpfc_terminate_rport_io,
++
++	.vport_disable = lpfc_vport_disable,
+ };
+ 
+ void
+@@ -1937,6 +2266,9 @@
+ 	lpfc_discovery_threads_init(phba, lpfc_discovery_threads);
+ 	lpfc_max_luns_init(phba, lpfc_max_luns);
+ 	lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
++	lpfc_peer_port_login_init(phba, lpfc_peer_port_login);
++	lpfc_npiv_enable_init(phba, lpfc_npiv_enable);
++	lpfc_vport_restrict_login_init(phba, lpfc_vport_restrict_login);
+ 	lpfc_use_msi_init(phba, lpfc_use_msi);
+ 	lpfc_devloss_tmo_init(phba, lpfc_devloss_tmo);
+ 	lpfc_nodev_tmo_init(phba, lpfc_nodev_tmo);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_crtn.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_crtn.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_crtn.h	2007-12-19 15:29:23.000000000 -0500
+@@ -23,92 +23,114 @@
+ struct fc_rport;
+ void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t);
+ void lpfc_read_nv(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_heart_beat(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ int lpfc_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb,
+ 		 struct lpfc_dmabuf *mp);
+ void lpfc_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport);
+ void lpfc_config_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *);
++int lpfc_read_sparam(struct lpfc_hba *, LPFC_MBOXQ_t *, int);
+ void lpfc_read_config(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_read_lnk_stat(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-int lpfc_reg_login(struct lpfc_hba *, uint32_t, uint8_t *, LPFC_MBOXQ_t *,
+-		   uint32_t);
+-void lpfc_unreg_login(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
+-void lpfc_unreg_did(struct lpfc_hba *, uint32_t, LPFC_MBOXQ_t *);
++int lpfc_reg_login(struct lpfc_hba *, uint16_t, uint32_t, uint8_t *,
++		   LPFC_MBOXQ_t *, uint32_t);
++void lpfc_unreg_login(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_unreg_did(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_reg_vpi(struct lpfc_hba *, uint16_t, uint32_t, LPFC_MBOXQ_t *);
++void lpfc_unreg_vpi(struct lpfc_hba *, uint16_t, LPFC_MBOXQ_t *);
+ void lpfc_init_link(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t, uint32_t);
+ 
+-
++void lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove);
+ int lpfc_linkdown(struct lpfc_hba *);
+ void lpfc_mbx_cmpl_read_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ 
+ void lpfc_mbx_cmpl_clear_la(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
++void lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *, LPFC_MBOXQ_t *);
+-void lpfc_dequeue_node(struct lpfc_hba *, struct lpfc_nodelist *);
+-void lpfc_nlp_set_state(struct lpfc_hba *, struct lpfc_nodelist *, int);
+-void lpfc_drop_node(struct lpfc_hba *, struct lpfc_nodelist *);
+-void lpfc_set_disctmo(struct lpfc_hba *);
+-int lpfc_can_disctmo(struct lpfc_hba *);
+-int lpfc_unreg_rpi(struct lpfc_hba *, struct lpfc_nodelist *);
++void lpfc_dequeue_node(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_nlp_set_state(struct lpfc_vport *, struct lpfc_nodelist *, int);
++void lpfc_drop_node(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_set_disctmo(struct lpfc_vport *);
++int  lpfc_can_disctmo(struct lpfc_vport *);
++int  lpfc_unreg_rpi(struct lpfc_vport *, struct lpfc_nodelist *);
++void lpfc_unreg_all_rpis(struct lpfc_vport *);
++void lpfc_unreg_default_rpis(struct lpfc_vport *);
++void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *);
++
+ int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 		    struct lpfc_iocbq *, struct lpfc_nodelist *);
+-void lpfc_nlp_init(struct lpfc_hba *, struct lpfc_nodelist *, uint32_t);
++void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t);
+ struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *);
+ int  lpfc_nlp_put(struct lpfc_nodelist *);
+-struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_hba *, uint32_t);
+-void lpfc_disc_list_loopmap(struct lpfc_hba *);
+-void lpfc_disc_start(struct lpfc_hba *);
+-void lpfc_disc_flush_list(struct lpfc_hba *);
++struct lpfc_nodelist *lpfc_setup_disc_node(struct lpfc_vport *, uint32_t);
++void lpfc_disc_list_loopmap(struct lpfc_vport *);
++void lpfc_disc_start(struct lpfc_vport *);
++void lpfc_disc_flush_list(struct lpfc_vport *);
++void lpfc_cleanup_discovery_resources(struct lpfc_vport *);
+ void lpfc_disc_timeout(unsigned long);
+ 
+-struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi);
+-struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi);
++struct lpfc_nodelist *__lpfc_findnode_rpi(struct lpfc_vport *, uint16_t);
++struct lpfc_nodelist *lpfc_findnode_rpi(struct lpfc_vport *, uint16_t);
+ 
++void lpfc_worker_wake_up(struct lpfc_hba *);
+ int lpfc_workq_post_event(struct lpfc_hba *, void *, void *, uint32_t);
+ int lpfc_do_work(void *);
+-int lpfc_disc_state_machine(struct lpfc_hba *, struct lpfc_nodelist *, void *,
++int lpfc_disc_state_machine(struct lpfc_vport *, struct lpfc_nodelist *, void *,
+ 			    uint32_t);
+ 
+-int lpfc_check_sparm(struct lpfc_hba *, struct lpfc_nodelist *,
++void lpfc_register_new_vport(struct lpfc_hba *, struct lpfc_vport *,
++			struct lpfc_nodelist *);
++void lpfc_do_scr_ns_plogi(struct lpfc_hba *, struct lpfc_vport *);
++int lpfc_check_sparm(struct lpfc_vport *, struct lpfc_nodelist *,
+ 		     struct serv_parm *, uint32_t);
+-int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist * ndlp);
++int lpfc_els_abort(struct lpfc_hba *, struct lpfc_nodelist *);
++int lpfc_els_chk_latt(struct lpfc_vport *);
+ int lpfc_els_abort_flogi(struct lpfc_hba *);
+-int lpfc_initial_flogi(struct lpfc_hba *);
+-int lpfc_issue_els_plogi(struct lpfc_hba *, uint32_t, uint8_t);
+-int lpfc_issue_els_prli(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_adisc(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_logo(struct lpfc_hba *, struct lpfc_nodelist *, uint8_t);
+-int lpfc_issue_els_scr(struct lpfc_hba *, uint32_t, uint8_t);
++int lpfc_initial_flogi(struct lpfc_vport *);
++int lpfc_initial_fdisc(struct lpfc_vport *);
++int lpfc_issue_els_fdisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_plogi(struct lpfc_vport *, uint32_t, uint8_t);
++int lpfc_issue_els_prli(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_adisc(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_logo(struct lpfc_vport *, struct lpfc_nodelist *, uint8_t);
++int lpfc_issue_els_npiv_logo(struct lpfc_vport *, struct lpfc_nodelist *);
++int lpfc_issue_els_scr(struct lpfc_vport *, uint32_t, uint8_t);
+ int lpfc_els_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
+-int lpfc_els_rsp_acc(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++int lpfc_ct_free_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
++int lpfc_els_rsp_acc(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
+ 		     struct lpfc_nodelist *, LPFC_MBOXQ_t *, uint8_t);
+-int lpfc_els_rsp_reject(struct lpfc_hba *, uint32_t, struct lpfc_iocbq *,
++int lpfc_els_rsp_reject(struct lpfc_vport *, uint32_t, struct lpfc_iocbq *,
++			struct lpfc_nodelist *, LPFC_MBOXQ_t *);
++int lpfc_els_rsp_adisc_acc(struct lpfc_vport *, struct lpfc_iocbq *,
+ 			struct lpfc_nodelist *);
+-int lpfc_els_rsp_adisc_acc(struct lpfc_hba *, struct lpfc_iocbq *,
++int lpfc_els_rsp_prli_acc(struct lpfc_vport *, struct lpfc_iocbq *,
+ 			   struct lpfc_nodelist *);
+-int lpfc_els_rsp_prli_acc(struct lpfc_hba *, struct lpfc_iocbq *,
+-			  struct lpfc_nodelist *);
+-void lpfc_cancel_retry_delay_tmo(struct lpfc_hba *, struct lpfc_nodelist *);
++void lpfc_cancel_retry_delay_tmo(struct lpfc_vport *, struct lpfc_nodelist *);
+ void lpfc_els_retry_delay(unsigned long);
+ void lpfc_els_retry_delay_handler(struct lpfc_nodelist *);
++void lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *);
+ void lpfc_els_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 			  struct lpfc_iocbq *);
+-int lpfc_els_handle_rscn(struct lpfc_hba *);
+-int lpfc_els_flush_rscn(struct lpfc_hba *);
+-int lpfc_rscn_payload_check(struct lpfc_hba *, uint32_t);
+-void lpfc_els_flush_cmd(struct lpfc_hba *);
+-int lpfc_els_disc_adisc(struct lpfc_hba *);
+-int lpfc_els_disc_plogi(struct lpfc_hba *);
++int lpfc_els_handle_rscn(struct lpfc_vport *);
++void lpfc_els_flush_rscn(struct lpfc_vport *);
++int lpfc_rscn_payload_check(struct lpfc_vport *, uint32_t);
++void lpfc_els_flush_cmd(struct lpfc_vport *);
++int lpfc_els_disc_adisc(struct lpfc_vport *);
++int lpfc_els_disc_plogi(struct lpfc_vport *);
+ void lpfc_els_timeout(unsigned long);
+-void lpfc_els_timeout_handler(struct lpfc_hba *);
++void lpfc_els_timeout_handler(struct lpfc_vport *);
++void lpfc_hb_timeout(unsigned long);
++void lpfc_hb_timeout_handler(struct lpfc_hba *);
+ 
+ void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 			 struct lpfc_iocbq *);
+-int lpfc_ns_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
+-int lpfc_fdmi_cmd(struct lpfc_hba *, struct lpfc_nodelist *, int);
++int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t);
++int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int);
+ void lpfc_fdmi_tmo(unsigned long);
+-void lpfc_fdmi_tmo_handler(struct lpfc_hba *);
++void lpfc_fdmi_timeout_handler(struct lpfc_vport *vport);
+ 
+ int lpfc_config_port_prep(struct lpfc_hba *);
+ int lpfc_config_port_post(struct lpfc_hba *);
+@@ -136,16 +158,23 @@
+ void lpfc_kill_board(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ void lpfc_mbox_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ LPFC_MBOXQ_t *lpfc_mbox_get(struct lpfc_hba *);
++void lpfc_mbox_cmpl_put(struct lpfc_hba *, LPFC_MBOXQ_t *);
+ int lpfc_mbox_tmo_val(struct lpfc_hba *, int);
+ 
++void lpfc_config_hbq(struct lpfc_hba *, struct lpfc_hbq_init *, uint32_t ,
++		     LPFC_MBOXQ_t *);
++struct lpfc_hbq_entry * lpfc_sli_next_hbq_slot(struct lpfc_hba *, uint32_t);
++
+ int lpfc_mem_alloc(struct lpfc_hba *);
+ void lpfc_mem_free(struct lpfc_hba *);
++void lpfc_stop_vport_timers(struct lpfc_vport *);
+ 
+ void lpfc_poll_timeout(unsigned long ptr);
+ void lpfc_poll_start_timer(struct lpfc_hba * phba);
+ void lpfc_sli_poll_fcp_ring(struct lpfc_hba * hba);
+ struct lpfc_iocbq * lpfc_sli_get_iocbq(struct lpfc_hba *);
+ void lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
++void __lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
+ uint16_t lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocb);
+ 
+ void lpfc_reset_barrier(struct lpfc_hba * phba);
+@@ -154,6 +183,7 @@
+ int lpfc_sli_brdreset(struct lpfc_hba *);
+ int lpfc_sli_brdrestart(struct lpfc_hba *);
+ int lpfc_sli_hba_setup(struct lpfc_hba *);
++int lpfc_sli_host_down(struct lpfc_vport *);
+ int lpfc_sli_hba_down(struct lpfc_hba *);
+ int lpfc_sli_issue_mbox(struct lpfc_hba *, LPFC_MBOXQ_t *, uint32_t);
+ int lpfc_sli_handle_mb_event(struct lpfc_hba *);
+@@ -164,12 +194,17 @@
+ int lpfc_sli_issue_iocb(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 			struct lpfc_iocbq *, uint32_t);
+ void lpfc_sli_pcimem_bcopy(void *, void *, uint32_t);
+-int lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
++void lpfc_sli_abort_iocb_ring(struct lpfc_hba *, struct lpfc_sli_ring *);
+ int lpfc_sli_ringpostbuf_put(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 			     struct lpfc_dmabuf *);
+ struct lpfc_dmabuf *lpfc_sli_ringpostbuf_get(struct lpfc_hba *,
+ 					     struct lpfc_sli_ring *,
+ 					     dma_addr_t);
++int lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *, uint32_t);
++int lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *, uint32_t);
++void lpfc_sli_hbqbuf_free_all(struct lpfc_hba *);
++struct hbq_dmabuf *lpfc_sli_hbqbuf_find(struct lpfc_hba *, uint32_t);
++int lpfc_sli_hbq_size(void);
+ int lpfc_sli_issue_abort_iotag(struct lpfc_hba *, struct lpfc_sli_ring *,
+ 			       struct lpfc_iocbq *);
+ int lpfc_sli_sum_iocb(struct lpfc_hba *, struct lpfc_sli_ring *, uint16_t,
+@@ -180,8 +215,12 @@
+ void lpfc_mbox_timeout(unsigned long);
+ void lpfc_mbox_timeout_handler(struct lpfc_hba *);
+ 
+-struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_hba *, uint32_t);
+-struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_hba *, struct lpfc_name *);
++struct lpfc_nodelist *__lpfc_find_node(struct lpfc_vport *, node_filter,
++				       void *);
++struct lpfc_nodelist *lpfc_find_node(struct lpfc_vport *, node_filter, void *);
++struct lpfc_nodelist *lpfc_findnode_did(struct lpfc_vport *, uint32_t);
++struct lpfc_nodelist *lpfc_findnode_wwpn(struct lpfc_vport *,
++					 struct lpfc_name *);
+ 
+ int lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
+ 			 uint32_t timeout);
+@@ -195,25 +234,56 @@
+ 			     struct lpfc_iocbq * cmdiocb,
+ 			     struct lpfc_iocbq * rspiocb);
+ 
++void *lpfc_hbq_alloc(struct lpfc_hba *, int, dma_addr_t *);
++void lpfc_hbq_free(struct lpfc_hba *, void *, dma_addr_t);
++void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
++
+ void *lpfc_mbuf_alloc(struct lpfc_hba *, int, dma_addr_t *);
++void __lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+ void lpfc_mbuf_free(struct lpfc_hba *, void *, dma_addr_t);
+ 
++void lpfc_in_buf_free(struct lpfc_hba *, struct lpfc_dmabuf *);
+ /* Function prototypes. */
+ const char* lpfc_info(struct Scsi_Host *);
+-void lpfc_scan_start(struct Scsi_Host *);
+ int lpfc_scan_finished(struct Scsi_Host *, unsigned long);
+ 
+ void lpfc_get_cfgparam(struct lpfc_hba *);
+-int lpfc_alloc_sysfs_attr(struct lpfc_hba *);
+-void lpfc_free_sysfs_attr(struct lpfc_hba *);
+-extern struct class_device_attribute *lpfc_host_attrs[];
++int lpfc_alloc_sysfs_attr(struct lpfc_vport *);
++void lpfc_free_sysfs_attr(struct lpfc_vport *);
++extern struct class_device_attribute *lpfc_hba_attrs[];
+ extern struct scsi_host_template lpfc_template;
+ extern struct fc_function_template lpfc_transport_functions;
++extern struct fc_function_template lpfc_vport_transport_functions;
++extern int lpfc_sli_mode;
+ 
+-void lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp);
++int  lpfc_vport_symbolic_node_name(struct lpfc_vport *, char *, size_t);
+ void lpfc_terminate_rport_io(struct fc_rport *);
+ void lpfc_dev_loss_tmo_callbk(struct fc_rport *rport);
+ 
++struct lpfc_vport *lpfc_create_port(struct lpfc_hba *, int, struct fc_vport *);
++int  lpfc_vport_disable(struct fc_vport *fc_vport, bool disable);
++void lpfc_mbx_unreg_vpi(struct lpfc_vport *);
++void destroy_port(struct lpfc_vport *);
++int lpfc_get_instance(void);
++void lpfc_host_attrib_init(struct Scsi_Host *);
++
++extern void lpfc_debugfs_initialize(struct lpfc_vport *);
++extern void lpfc_debugfs_terminate(struct lpfc_vport *);
++extern void lpfc_debugfs_disc_trc(struct lpfc_vport *, int, char *, uint32_t,
++	uint32_t, uint32_t);
++
++/* Interface exported by fabric iocb scheduler */
++int lpfc_issue_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *);
++void lpfc_fabric_abort_vport(struct lpfc_vport *);
++void lpfc_fabric_abort_nport(struct lpfc_nodelist *);
++void lpfc_fabric_abort_hba(struct lpfc_hba *);
++void lpfc_fabric_abort_flogi(struct lpfc_hba *);
++void lpfc_fabric_block_timeout(unsigned long);
++void lpfc_unblock_fabric_iocbs(struct lpfc_hba *);
++void lpfc_adjust_queue_depth(struct lpfc_hba *);
++void lpfc_ramp_down_queue_handler(struct lpfc_hba *);
++void lpfc_ramp_up_queue_handler(struct lpfc_hba *);
++
+ #define ScsiResult(host_code, scsi_code) (((host_code) << 16) | scsi_code)
+ #define HBA_EVENT_RSCN                   5
+ #define HBA_EVENT_LINK_UP                2
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_ct.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_ct.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_ct.c	2007-12-19 15:29:23.000000000 -0500
+@@ -40,6 +40,8 @@
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
+ #include "lpfc_version.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+ 
+ #define HBA_PORTSPEED_UNKNOWN               0	/* Unknown - transceiver
+ 						 * incapable of reporting */
+@@ -58,24 +60,68 @@
+ /*
+  * lpfc_ct_unsol_event
+  */
++static void
++lpfc_ct_unsol_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
++		     struct lpfc_dmabuf *mp, uint32_t size)
++{
++	if (!mp) {
++		printk(KERN_ERR "%s (%d): Unsolited CT, no buffer, "
++		       "piocbq = %p, status = x%x, mp = %p, size = %d\n",
++		       __FUNCTION__, __LINE__,
++		       piocbq, piocbq->iocb.ulpStatus, mp, size);
++	}
++
++	printk(KERN_ERR "%s (%d): Ignoring unsolicted CT piocbq = %p, "
++	       "buffer = %p, size = %d, status = x%x\n",
++	       __FUNCTION__, __LINE__,
++	       piocbq, mp, size,
++	       piocbq->iocb.ulpStatus);
++
++}
++
++static void
++lpfc_ct_ignore_hbq_buffer(struct lpfc_hba *phba, struct lpfc_iocbq *piocbq,
++			  struct lpfc_dmabuf *mp, uint32_t size)
++{
++	if (!mp) {
++		printk(KERN_ERR "%s (%d): Unsolited CT, no "
++		       "HBQ buffer, piocbq = %p, status = x%x\n",
++		       __FUNCTION__, __LINE__,
++		       piocbq, piocbq->iocb.ulpStatus);
++	} else {
++		lpfc_ct_unsol_buffer(phba, piocbq, mp, size);
++		printk(KERN_ERR "%s (%d): Ignoring unsolicted CT "
++		       "piocbq = %p, buffer = %p, size = %d, "
++		       "status = x%x\n",
++		       __FUNCTION__, __LINE__,
++		       piocbq, mp, size, piocbq->iocb.ulpStatus);
++	}
++}
++
+ void
+-lpfc_ct_unsol_event(struct lpfc_hba * phba,
+-		    struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocbq)
++lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++		    struct lpfc_iocbq *piocbq)
+ {
+ 
+-	struct lpfc_iocbq *next_piocbq;
+-	struct lpfc_dmabuf *pmbuf = NULL;
+-	struct lpfc_dmabuf *matp, *next_matp;
+-	uint32_t ctx = 0, size = 0, cnt = 0;
++	struct lpfc_dmabuf *mp = NULL;
+ 	IOCB_t *icmd = &piocbq->iocb;
+-	IOCB_t *save_icmd = icmd;
+-	int i, go_exit = 0;
+-	struct list_head head;
++	int i;
++	struct lpfc_iocbq *iocbq;
++	dma_addr_t paddr;
++	uint32_t size;
++	struct lpfc_dmabuf *bdeBuf1 = piocbq->context2;
++	struct lpfc_dmabuf *bdeBuf2 = piocbq->context3;
++
++	piocbq->context2 = NULL;
++	piocbq->context3 = NULL;
+ 
+-	if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++	if (unlikely(icmd->ulpStatus == IOSTAT_NEED_BUFFER)) {
++		lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
++	} else if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+ 		((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
+ 		/* Not enough posted buffers; Try posting more buffers */
+ 		phba->fc_stat.NoRcvBuf++;
++		if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED))
+ 		lpfc_post_buffer(phba, pring, 0, 1);
+ 		return;
+ 	}
+@@ -86,66 +132,56 @@
+ 	if (icmd->ulpBdeCount == 0)
+ 		return;
+ 
+-	INIT_LIST_HEAD(&head);
+-	list_add_tail(&head, &piocbq->list);
+-
+-	list_for_each_entry_safe(piocbq, next_piocbq, &head, list) {
+-		icmd = &piocbq->iocb;
+-		if (ctx == 0)
+-			ctx = (uint32_t) (icmd->ulpContext);
+-		if (icmd->ulpBdeCount == 0)
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++		list_for_each_entry(iocbq, &piocbq->list, list) {
++			icmd = &iocbq->iocb;
++			if (icmd->ulpBdeCount == 0) {
++				printk(KERN_ERR "%s (%d): Unsolited CT, no "
++				       "BDE, iocbq = %p, status = x%x\n",
++				       __FUNCTION__, __LINE__,
++				       iocbq, iocbq->iocb.ulpStatus);
+ 			continue;
+-
+-		for (i = 0; i < icmd->ulpBdeCount; i++) {
+-			matp = lpfc_sli_ringpostbuf_get(phba, pring,
+-							getPaddr(icmd->un.
+-								 cont64[i].
+-								 addrHigh,
+-								 icmd->un.
+-								 cont64[i].
+-								 addrLow));
+-			if (!matp) {
+-				/* Insert lpfc log message here */
+-				lpfc_post_buffer(phba, pring, cnt, 1);
+-				go_exit = 1;
+-				goto ct_unsol_event_exit_piocbq;
+-			}
+-
+-			/* Typically for Unsolicited CT requests */
+-			if (!pmbuf) {
+-				pmbuf = matp;
+-				INIT_LIST_HEAD(&pmbuf->list);
+-			} else
+-				list_add_tail(&matp->list, &pmbuf->list);
+-
+-			size += icmd->un.cont64[i].tus.f.bdeSize;
+-			cnt++;
+ 		}
+ 
+-		icmd->ulpBdeCount = 0;
++			size  = icmd->un.cont64[0].tus.f.bdeSize;
++			lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf1, size);
++			lpfc_in_buf_free(phba, bdeBuf1);
++			if (icmd->ulpBdeCount == 2) {
++				lpfc_ct_ignore_hbq_buffer(phba, piocbq, bdeBuf2,
++							  size);
++				lpfc_in_buf_free(phba, bdeBuf2);
++			}
+ 	}
++	} else {
++		struct lpfc_iocbq  *next;
+ 
+-	lpfc_post_buffer(phba, pring, cnt, 1);
+-	if (save_icmd->ulpStatus) {
+-		go_exit = 1;
++		list_for_each_entry_safe(iocbq, next, &piocbq->list, list) {
++			icmd = &iocbq->iocb;
++			if (icmd->ulpBdeCount == 0) {
++				printk(KERN_ERR "%s (%d): Unsolited CT, no "
++				       "BDE, iocbq = %p, status = x%x\n",
++				       __FUNCTION__, __LINE__,
++				       iocbq, iocbq->iocb.ulpStatus);
++				continue;
+ 	}
+ 
+-ct_unsol_event_exit_piocbq:
+-	list_del(&head);
+-	if (pmbuf) {
+-		list_for_each_entry_safe(matp, next_matp, &pmbuf->list, list) {
+-			lpfc_mbuf_free(phba, matp->virt, matp->phys);
+-			list_del(&matp->list);
+-			kfree(matp);
++			for (i = 0; i < icmd->ulpBdeCount; i++) {
++				paddr = getPaddr(icmd->un.cont64[i].addrHigh,
++						 icmd->un.cont64[i].addrLow);
++				mp = lpfc_sli_ringpostbuf_get(phba, pring,
++							      paddr);
++				size = icmd->un.cont64[i].tus.f.bdeSize;
++				lpfc_ct_unsol_buffer(phba, piocbq, mp, size);
++				lpfc_in_buf_free(phba, mp);
++			}
++			list_del(&iocbq->list);
++			lpfc_sli_release_iocbq(phba, iocbq);
+ 		}
+-		lpfc_mbuf_free(phba, pmbuf->virt, pmbuf->phys);
+-		kfree(pmbuf);
+ 	}
+-	return;
+ }
+ 
+ static void
+-lpfc_free_ct_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mlist)
++lpfc_free_ct_rsp(struct lpfc_hba *phba, struct lpfc_dmabuf *mlist)
+ {
+ 	struct lpfc_dmabuf *mlast, *next_mlast;
+ 
+@@ -160,7 +196,7 @@
+ }
+ 
+ static struct lpfc_dmabuf *
+-lpfc_alloc_ct_rsp(struct lpfc_hba * phba, int cmdcode, struct ulp_bde64 * bpl,
++lpfc_alloc_ct_rsp(struct lpfc_hba *phba, int cmdcode, struct ulp_bde64 *bpl,
+ 		  uint32_t size, int *entries)
+ {
+ 	struct lpfc_dmabuf *mlist = NULL;
+@@ -181,7 +217,8 @@
+ 
+ 		INIT_LIST_HEAD(&mp->list);
+ 
+-		if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT))
++		if (cmdcode == be16_to_cpu(SLI_CTNS_GID_FT) ||
++		    cmdcode == be16_to_cpu(SLI_CTNS_GFF_ID))
+ 			mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
+ 		else
+ 			mp->virt = lpfc_mbuf_alloc(phba, 0, &(mp->phys));
+@@ -201,8 +238,8 @@
+ 
+ 		bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+ 		/* build buffer ptr list for IOCB */
+-		bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
+-		bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
++		bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
++		bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
+ 		bpl->tus.f.bdeSize = (uint16_t) cnt;
+ 		bpl->tus.w = le32_to_cpu(bpl->tus.w);
+ 		bpl++;
+@@ -215,24 +252,49 @@
+ 	return mlist;
+ }
+ 
++int
++lpfc_ct_free_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *ctiocb)
++{
++	struct lpfc_dmabuf *buf_ptr;
++
++	if (ctiocb->context1) {
++		buf_ptr = (struct lpfc_dmabuf *) ctiocb->context1;
++		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++		kfree(buf_ptr);
++		ctiocb->context1 = NULL;
++	}
++	if (ctiocb->context2) {
++		lpfc_free_ct_rsp(phba, (struct lpfc_dmabuf *) ctiocb->context2);
++		ctiocb->context2 = NULL;
++	}
++
++	if (ctiocb->context3) {
++		buf_ptr = (struct lpfc_dmabuf *) ctiocb->context3;
++		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
++		kfree(buf_ptr);
++		ctiocb->context1 = NULL;
++	}
++	lpfc_sli_release_iocbq(phba, ctiocb);
++	return 0;
++}
++
+ static int
+-lpfc_gen_req(struct lpfc_hba *phba, struct lpfc_dmabuf *bmp,
++lpfc_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
+ 	     struct lpfc_dmabuf *inp, struct lpfc_dmabuf *outp,
+ 	     void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ 		     struct lpfc_iocbq *),
+ 	     struct lpfc_nodelist *ndlp, uint32_t usr_flg, uint32_t num_entry,
+-	     uint32_t tmo)
++	     uint32_t tmo, uint8_t retry)
+ {
+-
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *geniocb;
++	int rc;
+ 
+ 	/* Allocate buffer for  command iocb */
+-	spin_lock_irq(phba->host->host_lock);
+ 	geniocb = lpfc_sli_get_iocbq(phba);
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	if (geniocb == NULL)
+ 		return 1;
+@@ -272,31 +334,40 @@
+ 	icmd->ulpClass = CLASS3;
+ 	icmd->ulpContext = ndlp->nlp_rpi;
+ 
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++		/* For GEN_REQUEST64_CR, use the RPI */
++		icmd->ulpCt_h = 0;
++		icmd->ulpCt_l = 0;
++	}
++
+ 	/* Issue GEN REQ IOCB for NPORT <did> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0119 Issue GEN REQ IOCB for NPORT x%x "
+-			"Data: x%x x%x\n", phba->brd_no, icmd->un.ulpWord[5],
+-			icmd->ulpIoTag, phba->hba_state);
++			"%d (%d):0119 Issue GEN REQ IOCB to NPORT x%x "
++			"Data: x%x x%x\n", phba->brd_no, vport->vpi,
++			ndlp->nlp_DID, icmd->ulpIoTag,
++			vport->port_state);
+ 	geniocb->iocb_cmpl = cmpl;
+ 	geniocb->drvrTimeout = icmd->ulpTimeout + LPFC_DRVR_TIMEOUT;
+-	spin_lock_irq(phba->host->host_lock);
+-	if (lpfc_sli_issue_iocb(phba, pring, geniocb, 0) == IOCB_ERROR) {
++	geniocb->vport = vport;
++	geniocb->retry = retry;
++	rc = lpfc_sli_issue_iocb(phba, pring, geniocb, 0);
++
++	if (rc == IOCB_ERROR) {
+ 		lpfc_sli_release_iocbq(phba, geniocb);
+-		spin_unlock_irq(phba->host->host_lock);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_ct_cmd(struct lpfc_hba *phba, struct lpfc_dmabuf *inmp,
++lpfc_ct_cmd(struct lpfc_vport *vport, struct lpfc_dmabuf *inmp,
+ 	    struct lpfc_dmabuf *bmp, struct lpfc_nodelist *ndlp,
+ 	    void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ 			  struct lpfc_iocbq *),
+-	    uint32_t rsp_size)
++	    uint32_t rsp_size, uint8_t retry)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct ulp_bde64 *bpl = (struct ulp_bde64 *) bmp->virt;
+ 	struct lpfc_dmabuf *outmp;
+ 	int cnt = 0, status;
+@@ -310,8 +381,8 @@
+ 	if (!outmp)
+ 		return -ENOMEM;
+ 
+-	status = lpfc_gen_req(phba, bmp, inmp, outmp, cmpl, ndlp, 0,
+-			      cnt+1, 0);
++	status = lpfc_gen_req(vport, bmp, inmp, outmp, cmpl, ndlp, 0,
++			      cnt+1, 0, retry);
+ 	if (status) {
+ 		lpfc_free_ct_rsp(phba, outmp);
+ 		return -ENOMEM;
+@@ -319,20 +390,35 @@
+ 	return 0;
+ }
+ 
++static struct lpfc_vport *
++lpfc_find_vport_by_did(struct lpfc_hba *phba, uint32_t did) {
++
++	struct lpfc_vport *vport_curr;
++
++	list_for_each_entry(vport_curr, &phba->port_list, listentry) {
++		if ((vport_curr->fc_myDID) &&
++			(vport_curr->fc_myDID == did))
++			return vport_curr;
++	}
++
++	return NULL;
++}
++
+ static int
+-lpfc_ns_rsp(struct lpfc_hba * phba, struct lpfc_dmabuf * mp, uint32_t Size)
++lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint32_t Size)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_sli_ct_request *Response =
+ 		(struct lpfc_sli_ct_request *) mp->virt;
+ 	struct lpfc_nodelist *ndlp = NULL;
+ 	struct lpfc_dmabuf *mlast, *next_mp;
+ 	uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType;
+-	uint32_t Did;
+-	uint32_t CTentry;
++	uint32_t Did, CTentry;
+ 	int Cnt;
+ 	struct list_head head;
+ 
+-	lpfc_set_disctmo(phba);
++	lpfc_set_disctmo(vport);
++	vport->num_disc_nodes = 0;
+ 
+ 
+ 	list_add_tail(&head, &mp->list);
+@@ -350,39 +436,96 @@
+ 
+ 		/* Loop through entire NameServer list of DIDs */
+ 		while (Cnt >= sizeof (uint32_t)) {
+-
+ 			/* Get next DID from NameServer List */
+ 			CTentry = *ctptr++;
+ 			Did = ((be32_to_cpu(CTentry)) & Mask_DID);
+ 
+ 			ndlp = NULL;
+-			if (Did != phba->fc_myDID) {
+-				/* Check for rscn processing or not */
+-				ndlp = lpfc_setup_disc_node(phba, Did);
+-			}
+-			/* Mark all node table entries that are in the
+-			   Nameserver */
++
++			/*
++			 * Check for rscn processing or not
++			 * To conserve rpi's, filter out addresses for other
++			 * vports on the same physical HBAs.
++			 */
++			if ((Did != vport->fc_myDID) &&
++			    ((lpfc_find_vport_by_did(phba, Did) == NULL) ||
++			     phba->cfg_peer_port_login)) {
++				if ((vport->port_type != LPFC_NPIV_PORT) ||
++				    (vport->fc_flag & FC_RFF_NOT_SUPPORTED) ||
++				    (!phba->cfg_vport_restrict_login)) {
++					ndlp = lpfc_setup_disc_node(vport, Did);
+ 			if (ndlp) {
+-				/* NameServer Rsp */
+-				lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-						"%d:0238 Process x%x NameServer"
+-						" Rsp Data: x%x x%x x%x\n",
+-						phba->brd_no,
++						lpfc_debugfs_disc_trc(vport,
++						LPFC_DISC_TRC_CT,
++						"Parse GID_FTrsp: "
++						"did:x%x flg:x%x x%x",
+ 						Did, ndlp->nlp_flag,
+-						phba->fc_flag,
+-						phba->fc_rscn_id_cnt);
++						vport->fc_flag);
++
++						lpfc_printf_log(phba, KERN_INFO,
++							LOG_DISCOVERY,
++							"%d (%d):0238 Process "
++							"x%x NameServer Rsp"
++							"Data: x%x x%x x%x\n",
++							phba->brd_no,
++							vport->vpi, Did,
++							ndlp->nlp_flag,
++							vport->fc_flag,
++							vport->fc_rscn_id_cnt);
+ 			} else {
+-				/* NameServer Rsp */
+-				lpfc_printf_log(phba,
+-						KERN_INFO,
++						lpfc_debugfs_disc_trc(vport,
++						LPFC_DISC_TRC_CT,
++						"Skip1 GID_FTrsp: "
++						"did:x%x flg:x%x cnt:%d",
++						Did, vport->fc_flag,
++						vport->fc_rscn_id_cnt);
++
++						lpfc_printf_log(phba, KERN_INFO,
+ 						LOG_DISCOVERY,
+-						"%d:0239 Skip x%x NameServer "
+-						"Rsp Data: x%x x%x x%x\n",
++							"%d (%d):0239 Skip x%x "
++							"NameServer Rsp Data: "
++							"x%x x%x\n",
+ 						phba->brd_no,
+-						Did, Size, phba->fc_flag,
+-						phba->fc_rscn_id_cnt);
++							vport->vpi, Did,
++							vport->fc_flag,
++							vport->fc_rscn_id_cnt);
+ 			}
+ 
++				} else {
++					if (!(vport->fc_flag & FC_RSCN_MODE) ||
++					(lpfc_rscn_payload_check(vport, Did))) {
++						lpfc_debugfs_disc_trc(vport,
++						LPFC_DISC_TRC_CT,
++						"Query GID_FTrsp: "
++						"did:x%x flg:x%x cnt:%d",
++						Did, vport->fc_flag,
++						vport->fc_rscn_id_cnt);
++
++						if (lpfc_ns_cmd(vport,
++							SLI_CTNS_GFF_ID,
++							0, Did) == 0)
++							vport->num_disc_nodes++;
++					}
++					else {
++						lpfc_debugfs_disc_trc(vport,
++						LPFC_DISC_TRC_CT,
++						"Skip2 GID_FTrsp: "
++						"did:x%x flg:x%x cnt:%d",
++						Did, vport->fc_flag,
++						vport->fc_rscn_id_cnt);
++
++						lpfc_printf_log(phba, KERN_INFO,
++							LOG_DISCOVERY,
++							"%d (%d):0245 Skip x%x "
++							"NameServer Rsp Data: "
++							"x%x x%x\n",
++							phba->brd_no,
++							vport->vpi, Did,
++							vport->fc_flag,
++							vport->fc_rscn_id_cnt);
++					}
++				}
++			}
+ 			if (CTentry & (be32_to_cpu(SLI_CT_LAST_ENTRY)))
+ 				goto nsout1;
+ 			Cnt -= sizeof (uint32_t);
+@@ -393,190 +536,369 @@
+ 
+ nsout1:
+ 	list_del(&head);
+-
+-	/*
+- 	 * The driver has cycled through all Nports in the RSCN payload.
+- 	 * Complete the handling by cleaning up and marking the
+- 	 * current driver state.
+- 	 */
+-	if (phba->hba_state == LPFC_HBA_READY) {
+-		lpfc_els_flush_rscn(phba);
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag |= FC_RSCN_MODE; /* we are still in RSCN mode */
+-		spin_unlock_irq(phba->host->host_lock);
+-	}
+ 	return 0;
+ }
+ 
+-
+-
+-
+ static void
+-lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_gid_ft(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp;
+-	struct lpfc_sli *psli;
+ 	struct lpfc_dmabuf *bmp;
+-	struct lpfc_dmabuf *inp;
+ 	struct lpfc_dmabuf *outp;
+-	struct lpfc_nodelist *ndlp;
+ 	struct lpfc_sli_ct_request *CTrsp;
++	int rc;
+ 
+-	psli = &phba->sli;
+ 	/* we pass cmdiocb to state machine which needs rspiocb as well */
+ 	cmdiocb->context_un.rsp_iocb = rspiocb;
+ 
+-	inp = (struct lpfc_dmabuf *) cmdiocb->context1;
+ 	outp = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 	bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
+-
+ 	irsp = &rspiocb->iocb;
+-	if (irsp->ulpStatus) {
+-		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-			((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) ||
+-			 (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED))) {
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++		 "GID_FT cmpl:     status:x%x/x%x rtry:%d",
++		irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_ns_retry);
++
++	/* Don't bother processing response if vport is being torn down. */
++	if (vport->load_flag & FC_UNLOADING)
++		goto out;
++
++
++	if (lpfc_els_chk_latt(vport) || lpfc_error_lost_link(irsp)) {
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++				"%d (%d):0216 Link event during NS query\n",
++				phba->brd_no, vport->vpi);
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
+ 			goto out;
+ 		}
+ 
++	if (irsp->ulpStatus) {
+ 		/* Check for retry */
+-		if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
+-			phba->fc_ns_retry++;
++		if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++			if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
++				(irsp->un.ulpWord[4] != IOERR_NO_RESOURCES))
++				vport->fc_ns_retry++;
+ 			/* CT command is being retried */
+-			ndlp = lpfc_findnode_did(phba, NameServer_DID);
+-			if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+-				if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) ==
+-				    0) {
++			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
++					 vport->fc_ns_retry, 0);
++			if (rc == 0)
+ 					goto out;
+ 				}
+-			}
+-		}
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0257 GID_FT Query error: 0x%x 0x%x\n",
++			phba->brd_no, vport->vpi, irsp->ulpStatus,
++			vport->fc_ns_retry);
+ 	} else {
+ 		/* Good status, continue checking */
+ 		CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
+ 		if (CTrsp->CommandResponse.bits.CmdRsp ==
+ 		    be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-					"%d:0208 NameServer Rsp "
++					"%d (%d):0208 NameServer Rsp "
+ 					"Data: x%x\n",
+-					phba->brd_no,
+-					phba->fc_flag);
+-			lpfc_ns_rsp(phba, outp,
++					phba->brd_no, vport->vpi,
++					vport->fc_flag);
++			lpfc_ns_rsp(vport, outp,
+ 				    (uint32_t) (irsp->un.genreq64.bdl.bdeSize));
+ 		} else if (CTrsp->CommandResponse.bits.CmdRsp ==
+ 			   be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+ 			/* NameServer Rsp Error */
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-					"%d:0240 NameServer Rsp Error "
++					"%d (%d):0240 NameServer Rsp Error "
+ 					"Data: x%x x%x x%x x%x\n",
+-					phba->brd_no,
++					phba->brd_no, vport->vpi,
+ 					CTrsp->CommandResponse.bits.CmdRsp,
+ 					(uint32_t) CTrsp->ReasonCode,
+ 					(uint32_t) CTrsp->Explanation,
+-					phba->fc_flag);
++					vport->fc_flag);
++
++			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++				"GID_FT rsp err1  cmd:x%x rsn:x%x exp:x%x",
++				(uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
++				(uint32_t) CTrsp->ReasonCode,
++				(uint32_t) CTrsp->Explanation);
++
+ 		} else {
+ 			/* NameServer Rsp Error */
+-			lpfc_printf_log(phba,
+-					KERN_INFO,
+-					LOG_DISCOVERY,
+-					"%d:0241 NameServer Rsp Error "
++			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++					"%d (%d):0241 NameServer Rsp Error "
+ 					"Data: x%x x%x x%x x%x\n",
+-					phba->brd_no,
++					phba->brd_no, vport->vpi,
+ 					CTrsp->CommandResponse.bits.CmdRsp,
+ 					(uint32_t) CTrsp->ReasonCode,
+ 					(uint32_t) CTrsp->Explanation,
+-					phba->fc_flag);
++					vport->fc_flag);
++
++			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++				"GID_FT rsp err2  cmd:x%x rsn:x%x exp:x%x",
++				(uint32_t)CTrsp->CommandResponse.bits.CmdRsp,
++				(uint32_t) CTrsp->ReasonCode,
++				(uint32_t) CTrsp->Explanation);
+ 		}
+ 	}
+ 	/* Link up / RSCN discovery */
+-	lpfc_disc_start(phba);
++	if (vport->num_disc_nodes == 0) {
++		/*
++		 * The driver has cycled through all Nports in the RSCN payload.
++		 * Complete the handling by cleaning up and marking the
++		 * current driver state.
++		 */
++		if (vport->port_state >= LPFC_DISC_AUTH) {
++			if (vport->fc_flag & FC_RSCN_MODE) {
++				lpfc_els_flush_rscn(vport);
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
++				spin_unlock_irq(shost->host_lock);
++			}
++			else
++				lpfc_els_flush_rscn(vport);
++		}
++
++		lpfc_disc_start(vport);
++	}
+ out:
+-	lpfc_free_ct_rsp(phba, outp);
+-	lpfc_mbuf_free(phba, inp->virt, inp->phys);
+-	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+-	kfree(inp);
+-	kfree(bmp);
+-	spin_lock_irq(phba->host->host_lock);
+-	lpfc_sli_release_iocbq(phba, cmdiocb);
+-	spin_unlock_irq(phba->host->host_lock);
++	lpfc_ct_free_iocb(phba, cmdiocb);
++	return;
++}
++
++void
++lpfc_cmpl_ct_cmd_gff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
++{
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	IOCB_t *irsp = &rspiocb->iocb;
++	struct lpfc_dmabuf *inp = (struct lpfc_dmabuf *) cmdiocb->context1;
++	struct lpfc_dmabuf *outp = (struct lpfc_dmabuf *) cmdiocb->context2;
++	struct lpfc_sli_ct_request *CTrsp;
++	int did;
++	uint8_t fbits;
++	struct lpfc_nodelist *ndlp;
++
++	did = ((struct lpfc_sli_ct_request *) inp->virt)->un.gff.PortId;
++	did = be32_to_cpu(did);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++		"GFF_ID cmpl:     status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4], did);
++
++	if (irsp->ulpStatus == IOSTAT_SUCCESS) {
++		/* Good status, continue checking */
++		CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
++		fbits = CTrsp->un.gff_acc.fbits[FCP_TYPE_FEATURE_OFFSET];
++
++		if (CTrsp->CommandResponse.bits.CmdRsp ==
++		    be16_to_cpu(SLI_CT_RESPONSE_FS_ACC)) {
++			if ((fbits & FC4_FEATURE_INIT) &&
++			    !(fbits & FC4_FEATURE_TARGET)) {
++				lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++						"%d (%d):0245 Skip x%x GFF "
++						"NameServer Rsp Data: (init) "
++						"x%x x%x\n", phba->brd_no,
++						vport->vpi, did, fbits,
++						vport->fc_rscn_id_cnt);
++				goto out;
++			}
++		}
++	}
++	else {
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0267 NameServer GFF Rsp"
++				" x%x Error (%d %d) Data: x%x x%x\n",
++				phba->brd_no, vport->vpi, did,
++				irsp->ulpStatus, irsp->un.ulpWord[4],
++				vport->fc_flag, vport->fc_rscn_id_cnt)
++	}
++
++	/* This is a target port, unregistered port, or the GFF_ID failed */
++	ndlp = lpfc_setup_disc_node(vport, did);
++	if (ndlp) {
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++				"%d (%d):0242 Process x%x GFF "
++				"NameServer Rsp Data: x%x x%x x%x\n",
++				phba->brd_no, vport->vpi,
++				did, ndlp->nlp_flag, vport->fc_flag,
++				vport->fc_rscn_id_cnt);
++	} else {
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++				"%d (%d):0243 Skip x%x GFF "
++				"NameServer Rsp Data: x%x x%x\n",
++				phba->brd_no, vport->vpi, did,
++				vport->fc_flag,	vport->fc_rscn_id_cnt);
++	}
++out:
++	/* Link up / RSCN discovery */
++	if (vport->num_disc_nodes)
++		vport->num_disc_nodes--;
++	if (vport->num_disc_nodes == 0) {
++		/*
++		 * The driver has cycled through all Nports in the RSCN payload.
++		 * Complete the handling by cleaning up and marking the
++		 * current driver state.
++		 */
++		if (vport->port_state >= LPFC_DISC_AUTH) {
++			if (vport->fc_flag & FC_RSCN_MODE) {
++				lpfc_els_flush_rscn(vport);
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag |= FC_RSCN_MODE; /* RSCN still */
++				spin_unlock_irq(shost->host_lock);
++			}
++			else
++				lpfc_els_flush_rscn(vport);
++		}
++		lpfc_disc_start(vport);
++	}
++	lpfc_ct_free_iocb(phba, cmdiocb);
+ 	return;
+ }
+ 
++
+ static void
+-lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rft_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
+-	struct lpfc_sli *psli;
+-	struct lpfc_dmabuf *bmp;
++	struct lpfc_vport *vport = cmdiocb->vport;
+ 	struct lpfc_dmabuf *inp;
+ 	struct lpfc_dmabuf *outp;
+ 	IOCB_t *irsp;
+ 	struct lpfc_sli_ct_request *CTrsp;
++	int cmdcode, rc;
++	uint8_t retry;
++	uint32_t latt;
+ 
+-	psli = &phba->sli;
+ 	/* we pass cmdiocb to state machine which needs rspiocb as well */
+ 	cmdiocb->context_un.rsp_iocb = rspiocb;
+ 
+ 	inp = (struct lpfc_dmabuf *) cmdiocb->context1;
+ 	outp = (struct lpfc_dmabuf *) cmdiocb->context2;
+-	bmp = (struct lpfc_dmabuf *) cmdiocb->context3;
+ 	irsp = &rspiocb->iocb;
+ 
++	cmdcode = be16_to_cpu(((struct lpfc_sli_ct_request *) inp->virt)->
++					CommandResponse.bits.CmdRsp);
+ 	CTrsp = (struct lpfc_sli_ct_request *) outp->virt;
+ 
++	latt = lpfc_els_chk_latt(vport);
++
+ 	/* RFT request completes status <ulpStatus> CmdRsp <CmdRsp> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0209 RFT request completes ulpStatus x%x "
+-			"CmdRsp x%x\n", phba->brd_no, irsp->ulpStatus,
+-			CTrsp->CommandResponse.bits.CmdRsp);
++			"%d (%d):0209 RFT request completes, latt %d, "
++			"ulpStatus x%x CmdRsp x%x, Context x%x, Tag x%x\n",
++			phba->brd_no, vport->vpi, latt, irsp->ulpStatus,
++			CTrsp->CommandResponse.bits.CmdRsp,
++			cmdiocb->iocb.ulpContext, cmdiocb->iocb.ulpIoTag);
+ 
+-	lpfc_free_ct_rsp(phba, outp);
+-	lpfc_mbuf_free(phba, inp->virt, inp->phys);
+-	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+-	kfree(inp);
+-	kfree(bmp);
+-	spin_lock_irq(phba->host->host_lock);
+-	lpfc_sli_release_iocbq(phba, cmdiocb);
+-	spin_unlock_irq(phba->host->host_lock);
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++		"CT cmd cmpl:     status:x%x/x%x cmd:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4], cmdcode);
++
++	if (irsp->ulpStatus) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++			"%d (%d):0268 NS cmd %x Error (%d %d)\n",
++			phba->brd_no, vport->vpi, cmdcode,
++			irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++			((irsp->un.ulpWord[4] == IOERR_SLI_DOWN) ||
++			 (irsp->un.ulpWord[4] == IOERR_SLI_ABORTED)))
++			goto out;
++
++		retry = cmdiocb->retry;
++		if (retry >= LPFC_MAX_NS_RETRY)
++			goto out;
++
++		retry++;
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++				"%d (%d):0216 Retrying NS cmd %x\n",
++				phba->brd_no, vport->vpi, cmdcode);
++		rc = lpfc_ns_cmd(vport, cmdcode, retry, 0);
++		if (rc == 0)
++			goto out;
++	}
++
++out:
++	lpfc_ct_free_iocb(phba, cmdiocb);
+ 	return;
+ }
+ 
+ static void
+-lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rnn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
+ 	lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ 	return;
+ }
+ 
+ static void
+-lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			 struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rspn_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			 struct lpfc_iocbq *rspiocb)
+ {
+ 	lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ 	return;
+ }
+ 
+ static void
+-lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			 struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_rsnn_nn(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			 struct lpfc_iocbq *rspiocb)
+ {
+ 	lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ 	return;
+ }
+ 
+-void
+-lpfc_get_hba_sym_node_name(struct lpfc_hba * phba, uint8_t * symbp)
++static void
++lpfc_cmpl_ct_cmd_rff_id(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
+-	char fwrev[16];
++	IOCB_t *irsp = &rspiocb->iocb;
++	struct lpfc_vport *vport = cmdiocb->vport;
+ 
+-	lpfc_decode_firmware_rev(phba, fwrev, 0);
++	if (irsp->ulpStatus != IOSTAT_SUCCESS)
++	    vport->fc_flag |= FC_RFF_NOT_SUPPORTED;
+ 
+-	sprintf(symbp, "Emulex %s FV%s DV%s", phba->ModelName,
+-		fwrev, lpfc_release_version);
++	lpfc_cmpl_ct_cmd_rft_id(phba, cmdiocb, rspiocb);
+ 	return;
+ }
+ 
++int
++lpfc_vport_symbolic_port_name(struct lpfc_vport *vport, char *symbol,
++	size_t size)
++{
++	int n;
++	uint8_t *wwn = vport->phba->wwpn;
++
++	n = snprintf(symbol, size,
++		     "Emulex PPN-%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
++		     wwn[0], wwn[1], wwn[2], wwn[3],
++		     wwn[4], wwn[5], wwn[6], wwn[7]);
++
++	if (vport->port_type == LPFC_PHYSICAL_PORT)
++		return n;
++
++	if (n < size)
++		n += snprintf(symbol + n, size - n, " VPort-%d", vport->vpi);
++
++	if (n < size && vport->vname)
++		n += snprintf(symbol + n, size - n, " VName-%s", vport->vname);
++	return n;
++}
++
++int
++lpfc_vport_symbolic_node_name(struct lpfc_vport *vport, char *symbol,
++	size_t size)
++{
++	char fwrev[16];
++	int n;
++
++	lpfc_decode_firmware_rev(vport->phba, fwrev, 0);
++
++	n = snprintf(symbol, size, "Emulex %s FV%s DV%s",
++		vport->phba->ModelName, fwrev, lpfc_release_version);
++	return n;
++}
++
+ /*
+  * lpfc_ns_cmd
+  * Description:
+@@ -585,55 +907,76 @@
+  *       LI_CTNS_RFT_ID
+  */
+ int
+-lpfc_ns_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++lpfc_ns_cmd(struct lpfc_vport *vport, int cmdcode,
++	    uint8_t retry, uint32_t context)
+ {
++	struct lpfc_nodelist * ndlp;
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_dmabuf *mp, *bmp;
+ 	struct lpfc_sli_ct_request *CtReq;
+ 	struct ulp_bde64 *bpl;
+ 	void (*cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ 		      struct lpfc_iocbq *) = NULL;
+ 	uint32_t rsp_size = 1024;
++	size_t   size;
++	int rc = 0;
++
++	ndlp = lpfc_findnode_did(vport, NameServer_DID);
++	if (ndlp == NULL || ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) {
++		rc=1;
++		goto ns_cmd_exit;
++	}
+ 
+ 	/* fill in BDEs for command */
+ 	/* Allocate buffer for command payload */
+ 	mp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+-	if (!mp)
++	if (!mp) {
++		rc=2;
+ 		goto ns_cmd_exit;
++	}
+ 
+ 	INIT_LIST_HEAD(&mp->list);
+ 	mp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(mp->phys));
+-	if (!mp->virt)
++	if (!mp->virt) {
++		rc=3;
+ 		goto ns_cmd_free_mp;
++	}
+ 
+ 	/* Allocate buffer for Buffer ptr list */
+ 	bmp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
+-	if (!bmp)
++	if (!bmp) {
++		rc=4;
+ 		goto ns_cmd_free_mpvirt;
++	}
+ 
+ 	INIT_LIST_HEAD(&bmp->list);
+ 	bmp->virt = lpfc_mbuf_alloc(phba, MEM_PRI, &(bmp->phys));
+-	if (!bmp->virt)
++	if (!bmp->virt) {
++		rc=5;
+ 		goto ns_cmd_free_bmp;
++	}
+ 
+ 	/* NameServer Req */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_DISCOVERY,
+-			"%d:0236 NameServer Req Data: x%x x%x x%x\n",
+-			phba->brd_no, cmdcode, phba->fc_flag,
+-			phba->fc_rscn_id_cnt);
++	lpfc_printf_log(phba, KERN_INFO ,LOG_DISCOVERY,
++			"%d (%d):0236 NameServer Req Data: x%x x%x x%x\n",
++			phba->brd_no, vport->vpi, cmdcode, vport->fc_flag,
++			vport->fc_rscn_id_cnt);
+ 
+ 	bpl = (struct ulp_bde64 *) bmp->virt;
+ 	memset(bpl, 0, sizeof(struct ulp_bde64));
+-	bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
+-	bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++	bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
++	bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
+ 	bpl->tus.f.bdeFlags = 0;
+ 	if (cmdcode == SLI_CTNS_GID_FT)
+ 		bpl->tus.f.bdeSize = GID_REQUEST_SZ;
++	else if (cmdcode == SLI_CTNS_GFF_ID)
++		bpl->tus.f.bdeSize = GFF_REQUEST_SZ;
+ 	else if (cmdcode == SLI_CTNS_RFT_ID)
+ 		bpl->tus.f.bdeSize = RFT_REQUEST_SZ;
+ 	else if (cmdcode == SLI_CTNS_RNN_ID)
+ 		bpl->tus.f.bdeSize = RNN_REQUEST_SZ;
++	else if (cmdcode == SLI_CTNS_RSPN_ID)
++		bpl->tus.f.bdeSize = RSPN_REQUEST_SZ;
+ 	else if (cmdcode == SLI_CTNS_RSNN_NN)
+ 		bpl->tus.f.bdeSize = RSNN_REQUEST_SZ;
+ 	else if (cmdcode == SLI_CTNS_RFF_ID)
+@@ -654,56 +997,78 @@
+ 		CtReq->CommandResponse.bits.CmdRsp =
+ 		    be16_to_cpu(SLI_CTNS_GID_FT);
+ 		CtReq->un.gid.Fc4Type = SLI_CTPT_FCP;
+-		if (phba->hba_state < LPFC_HBA_READY)
+-			phba->hba_state = LPFC_NS_QRY;
+-		lpfc_set_disctmo(phba);
++		if (vport->port_state < LPFC_NS_QRY)
++			vport->port_state = LPFC_NS_QRY;
++		lpfc_set_disctmo(vport);
+ 		cmpl = lpfc_cmpl_ct_cmd_gid_ft;
+ 		rsp_size = FC_MAX_NS_RSP;
+ 		break;
+ 
++	case SLI_CTNS_GFF_ID:
++		CtReq->CommandResponse.bits.CmdRsp =
++			be16_to_cpu(SLI_CTNS_GFF_ID);
++		CtReq->un.gff.PortId = be32_to_cpu(context);
++		cmpl = lpfc_cmpl_ct_cmd_gff_id;
++		break;
++
+ 	case SLI_CTNS_RFT_ID:
+ 		CtReq->CommandResponse.bits.CmdRsp =
+ 		    be16_to_cpu(SLI_CTNS_RFT_ID);
+-		CtReq->un.rft.PortId = be32_to_cpu(phba->fc_myDID);
++		CtReq->un.rft.PortId = be32_to_cpu(vport->fc_myDID);
+ 		CtReq->un.rft.fcpReg = 1;
+ 		cmpl = lpfc_cmpl_ct_cmd_rft_id;
+ 		break;
+ 
+-	case SLI_CTNS_RFF_ID:
+-		CtReq->CommandResponse.bits.CmdRsp =
+-			be16_to_cpu(SLI_CTNS_RFF_ID);
+-		CtReq->un.rff.PortId = be32_to_cpu(phba->fc_myDID);
+-		CtReq->un.rff.feature_res = 0;
+-		CtReq->un.rff.feature_tgt = 0;
+-		CtReq->un.rff.type_code = FC_FCP_DATA;
+-		CtReq->un.rff.feature_init = 1;
+-		cmpl = lpfc_cmpl_ct_cmd_rff_id;
+-		break;
+-
+ 	case SLI_CTNS_RNN_ID:
+ 		CtReq->CommandResponse.bits.CmdRsp =
+ 		    be16_to_cpu(SLI_CTNS_RNN_ID);
+-		CtReq->un.rnn.PortId = be32_to_cpu(phba->fc_myDID);
+-		memcpy(CtReq->un.rnn.wwnn,  &phba->fc_nodename,
++		CtReq->un.rnn.PortId = be32_to_cpu(vport->fc_myDID);
++		memcpy(CtReq->un.rnn.wwnn,  &vport->fc_nodename,
+ 		       sizeof (struct lpfc_name));
+ 		cmpl = lpfc_cmpl_ct_cmd_rnn_id;
+ 		break;
+ 
++	case SLI_CTNS_RSPN_ID:
++		CtReq->CommandResponse.bits.CmdRsp =
++		    be16_to_cpu(SLI_CTNS_RSPN_ID);
++		CtReq->un.rspn.PortId = be32_to_cpu(vport->fc_myDID);
++		size = sizeof(CtReq->un.rspn.symbname);
++		CtReq->un.rspn.len =
++			lpfc_vport_symbolic_port_name(vport,
++			CtReq->un.rspn.symbname, size);
++		cmpl = lpfc_cmpl_ct_cmd_rspn_id;
++		break;
+ 	case SLI_CTNS_RSNN_NN:
+ 		CtReq->CommandResponse.bits.CmdRsp =
+ 		    be16_to_cpu(SLI_CTNS_RSNN_NN);
+-		memcpy(CtReq->un.rsnn.wwnn, &phba->fc_nodename,
++		memcpy(CtReq->un.rsnn.wwnn, &vport->fc_nodename,
+ 		       sizeof (struct lpfc_name));
+-		lpfc_get_hba_sym_node_name(phba, CtReq->un.rsnn.symbname);
+-		CtReq->un.rsnn.len = strlen(CtReq->un.rsnn.symbname);
++		size = sizeof(CtReq->un.rsnn.symbname);
++		CtReq->un.rsnn.len =
++			lpfc_vport_symbolic_node_name(vport,
++			CtReq->un.rsnn.symbname, size);
+ 		cmpl = lpfc_cmpl_ct_cmd_rsnn_nn;
+ 		break;
++	case SLI_CTNS_RFF_ID:
++		vport->fc_flag &= ~FC_RFF_NOT_SUPPORTED;
++		CtReq->CommandResponse.bits.CmdRsp =
++		    be16_to_cpu(SLI_CTNS_RFF_ID);
++		CtReq->un.rff.PortId = be32_to_cpu(vport->fc_myDID);;
++		CtReq->un.rff.fbits = FC4_FEATURE_INIT;
++		CtReq->un.rff.type_code = FC_FCP_DATA;
++		cmpl = lpfc_cmpl_ct_cmd_rff_id;
++		break;
+ 	}
+ 
+-	if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, rsp_size))
++	if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, rsp_size, retry)) {
+ 		/* On success, The cmpl function will free the buffers */
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++			"Issue CT cmd:    cmd:x%x did:x%x",
++			cmdcode, ndlp->nlp_DID, 0);
+ 		return 0;
++	}
+ 
++	rc=6;
+ 	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+ ns_cmd_free_bmp:
+ 	kfree(bmp);
+@@ -712,14 +1077,17 @@
+ ns_cmd_free_mp:
+ 	kfree(mp);
+ ns_cmd_exit:
++	lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++		"%d (%d):0266 Issue NameServer Req x%x err %d Data: x%x x%x\n",
++			phba->brd_no, vport->vpi, cmdcode, rc, vport->fc_flag,
++			vport->fc_rscn_id_cnt);
+ 	return 1;
+ }
+ 
+ static void
+-lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba * phba,
+-		      struct lpfc_iocbq * cmdiocb, struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_ct_cmd_fdmi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		      struct lpfc_iocbq * rspiocb)
+ {
+-	struct lpfc_dmabuf *bmp = cmdiocb->context3;
+ 	struct lpfc_dmabuf *inp = cmdiocb->context1;
+ 	struct lpfc_dmabuf *outp = cmdiocb->context2;
+ 	struct lpfc_sli_ct_request *CTrsp = outp->virt;
+@@ -727,48 +1095,60 @@
+ 	struct lpfc_nodelist *ndlp;
+ 	uint16_t fdmi_cmd = CTcmd->CommandResponse.bits.CmdRsp;
+ 	uint16_t fdmi_rsp = CTrsp->CommandResponse.bits.CmdRsp;
++	struct lpfc_vport *vport = cmdiocb->vport;
++	IOCB_t *irsp = &rspiocb->iocb;
++	uint32_t latt;
++
++	latt = lpfc_els_chk_latt(vport);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_CT,
++		"FDMI cmpl:       status:x%x/x%x latt:%d",
++		irsp->ulpStatus, irsp->un.ulpWord[4], latt);
+ 
+-	ndlp = lpfc_findnode_did(phba, FDMI_DID);
++	if (latt || irsp->ulpStatus) {
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			        "%d (%d):0229 FDMI cmd %04x failed, latt = %d "
++				"ulpStatus: x%x, rid x%x\n",
++			        phba->brd_no, vport->vpi,
++				be16_to_cpu(fdmi_cmd), latt, irsp->ulpStatus,
++				irsp->un.ulpWord[4]);
++		lpfc_ct_free_iocb(phba, cmdiocb);
++		return;
++	}
++
++	ndlp = lpfc_findnode_did(vport, FDMI_DID);
+ 	if (fdmi_rsp == be16_to_cpu(SLI_CT_RESPONSE_FS_RJT)) {
+ 		/* FDMI rsp failed */
+-		lpfc_printf_log(phba,
+-			        KERN_INFO,
+-			        LOG_DISCOVERY,
+-			        "%d:0220 FDMI rsp failed Data: x%x\n",
+-			        phba->brd_no,
++		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			        "%d (%d):0220 FDMI rsp failed Data: x%x\n",
++			        phba->brd_no, vport->vpi,
+ 			       be16_to_cpu(fdmi_cmd));
+ 	}
+ 
+ 	switch (be16_to_cpu(fdmi_cmd)) {
+ 	case SLI_MGMT_RHBA:
+-		lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RPA);
++		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RPA);
+ 		break;
+ 
+ 	case SLI_MGMT_RPA:
+ 		break;
+ 
+ 	case SLI_MGMT_DHBA:
+-		lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DPRT);
++		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DPRT);
+ 		break;
+ 
+ 	case SLI_MGMT_DPRT:
+-		lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_RHBA);
++		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_RHBA);
+ 		break;
+ 	}
+-
+-	lpfc_free_ct_rsp(phba, outp);
+-	lpfc_mbuf_free(phba, inp->virt, inp->phys);
+-	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+-	kfree(inp);
+-	kfree(bmp);
+-	spin_lock_irq(phba->host->host_lock);
+-	lpfc_sli_release_iocbq(phba, cmdiocb);
+-	spin_unlock_irq(phba->host->host_lock);
++	lpfc_ct_free_iocb(phba, cmdiocb);
+ 	return;
+ }
++
+ int
+-lpfc_fdmi_cmd(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp, int cmdcode)
++lpfc_fdmi_cmd(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int cmdcode)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_dmabuf *mp, *bmp;
+ 	struct lpfc_sli_ct_request *CtReq;
+ 	struct ulp_bde64 *bpl;
+@@ -805,12 +1185,10 @@
+ 	INIT_LIST_HEAD(&bmp->list);
+ 
+ 	/* FDMI request */
+-	lpfc_printf_log(phba,
+-		        KERN_INFO,
+-		        LOG_DISCOVERY,
+-		        "%d:0218 FDMI Request Data: x%x x%x x%x\n",
+-		        phba->brd_no,
+-		       phba->fc_flag, phba->hba_state, cmdcode);
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0218 FDMI Request Data: x%x x%x x%x\n",
++			phba->brd_no, vport->vpi, vport->fc_flag,
++			vport->port_state, cmdcode);
+ 
+ 	CtReq = (struct lpfc_sli_ct_request *) mp->virt;
+ 
+@@ -833,11 +1211,11 @@
+ 			    be16_to_cpu(SLI_MGMT_RHBA);
+ 			CtReq->CommandResponse.bits.Size = 0;
+ 			rh = (REG_HBA *) & CtReq->un.PortID;
+-			memcpy(&rh->hi.PortName, &phba->fc_sparam.portName,
++			memcpy(&rh->hi.PortName, &vport->fc_sparam.portName,
+ 			       sizeof (struct lpfc_name));
+ 			/* One entry (port) per adapter */
+ 			rh->rpl.EntryCnt = be32_to_cpu(1);
+-			memcpy(&rh->rpl.pe, &phba->fc_sparam.portName,
++			memcpy(&rh->rpl.pe, &vport->fc_sparam.portName,
+ 			       sizeof (struct lpfc_name));
+ 
+ 			/* point to the HBA attribute block */
+@@ -853,7 +1231,7 @@
+ 			ae->ad.bits.AttrType = be16_to_cpu(NODE_NAME);
+ 			ae->ad.bits.AttrLen =  be16_to_cpu(FOURBYTES
+ 						+ sizeof (struct lpfc_name));
+-			memcpy(&ae->un.NodeName, &phba->fc_sparam.nodeName,
++			memcpy(&ae->un.NodeName, &vport->fc_sparam.nodeName,
+ 			       sizeof (struct lpfc_name));
+ 			ab->EntryCnt++;
+ 			size += FOURBYTES + sizeof (struct lpfc_name);
+@@ -991,7 +1369,7 @@
+ 			pab = (REG_PORT_ATTRIBUTE *) & CtReq->un.PortID;
+ 			size = sizeof (struct lpfc_name) + FOURBYTES;
+ 			memcpy((uint8_t *) & pab->PortName,
+-			       (uint8_t *) & phba->fc_sparam.portName,
++			       (uint8_t *) & vport->fc_sparam.portName,
+ 			       sizeof (struct lpfc_name));
+ 			pab->ab.EntryCnt = 0;
+ 
+@@ -1053,7 +1431,7 @@
+ 			ae = (ATTRIBUTE_ENTRY *) ((uint8_t *) pab + size);
+ 			ae->ad.bits.AttrType = be16_to_cpu(MAX_FRAME_SIZE);
+ 			ae->ad.bits.AttrLen = be16_to_cpu(FOURBYTES + 4);
+-			hsp = (struct serv_parm *) & phba->fc_sparam;
++			hsp = (struct serv_parm *) & vport->fc_sparam;
+ 			ae->un.MaxFrameSize =
+ 			    (((uint32_t) hsp->cmn.
+ 			      bbRcvSizeMsb) << 8) | (uint32_t) hsp->cmn.
+@@ -1097,7 +1475,7 @@
+ 		CtReq->CommandResponse.bits.Size = 0;
+ 		pe = (PORT_ENTRY *) & CtReq->un.PortID;
+ 		memcpy((uint8_t *) & pe->PortName,
+-		       (uint8_t *) & phba->fc_sparam.portName,
++		       (uint8_t *) & vport->fc_sparam.portName,
+ 		       sizeof (struct lpfc_name));
+ 		size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+ 		break;
+@@ -1107,22 +1485,22 @@
+ 		CtReq->CommandResponse.bits.Size = 0;
+ 		pe = (PORT_ENTRY *) & CtReq->un.PortID;
+ 		memcpy((uint8_t *) & pe->PortName,
+-		       (uint8_t *) & phba->fc_sparam.portName,
++		       (uint8_t *) & vport->fc_sparam.portName,
+ 		       sizeof (struct lpfc_name));
+ 		size = GID_REQUEST_SZ - 4 + sizeof (struct lpfc_name);
+ 		break;
+ 	}
+ 
+ 	bpl = (struct ulp_bde64 *) bmp->virt;
+-	bpl->addrHigh = le32_to_cpu( putPaddrHigh(mp->phys) );
+-	bpl->addrLow = le32_to_cpu( putPaddrLow(mp->phys) );
++	bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys) );
++	bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys) );
+ 	bpl->tus.f.bdeFlags = 0;
+ 	bpl->tus.f.bdeSize = size;
+ 	bpl->tus.w = le32_to_cpu(bpl->tus.w);
+ 
+ 	cmpl = lpfc_cmpl_ct_cmd_fdmi;
+ 
+-	if (!lpfc_ct_cmd(phba, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP))
++	if (!lpfc_ct_cmd(vport, mp, bmp, ndlp, cmpl, FC_MAX_NS_RSP, 0))
+ 		return 0;
+ 
+ 	lpfc_mbuf_free(phba, bmp->virt, bmp->phys);
+@@ -1134,49 +1512,50 @@
+ 	kfree(mp);
+ fdmi_cmd_exit:
+ 	/* Issue FDMI request failed */
+-	lpfc_printf_log(phba,
+-		        KERN_INFO,
+-		        LOG_DISCOVERY,
+-		        "%d:0244 Issue FDMI request failed Data: x%x\n",
+-		        phba->brd_no,
+-			cmdcode);
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++		        "%d (%d):0244 Issue FDMI request failed Data: x%x\n",
++		        phba->brd_no, vport->vpi, cmdcode);
+ 	return 1;
+ }
+ 
+ void
+ lpfc_fdmi_tmo(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++	struct lpfc_vport *vport = (struct lpfc_vport *)ptr;
++	struct lpfc_hba   *phba = vport->phba;
+ 	unsigned long iflag;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	if (!(phba->work_hba_events & WORKER_FDMI_TMO)) {
+-		phba->work_hba_events |= WORKER_FDMI_TMO;
++	spin_lock_irqsave(&vport->work_port_lock, iflag);
++	if (!(vport->work_port_events & WORKER_FDMI_TMO)) {
++		vport->work_port_events |= WORKER_FDMI_TMO;
++		spin_unlock_irqrestore(&vport->work_port_lock, iflag);
++
++		spin_lock_irqsave(&phba->hbalock, iflag);
+ 		if (phba->work_wait)
+-			wake_up(phba->work_wait);
++			lpfc_worker_wake_up(phba);
++		spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 	}
+-	spin_unlock_irqrestore(phba->host->host_lock,iflag);
++	else
++		spin_unlock_irqrestore(&vport->work_port_lock, iflag);
+ }
+ 
+ void
+-lpfc_fdmi_tmo_handler(struct lpfc_hba *phba)
++lpfc_fdmi_timeout_handler(struct lpfc_vport *vport)
+ {
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	ndlp = lpfc_findnode_did(phba, FDMI_DID);
++	ndlp = lpfc_findnode_did(vport, FDMI_DID);
+ 	if (ndlp) {
+-		if (init_utsname()->nodename[0] != '\0') {
+-			lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
+-		} else {
+-			mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
+-		}
++		if (init_utsname()->nodename[0] != '\0')
++			lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
++		else
++			mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60);
+ 	}
+ 	return;
+ }
+ 
+-
+ void
+-lpfc_decode_firmware_rev(struct lpfc_hba * phba, char *fwrevision, int flag)
++lpfc_decode_firmware_rev(struct lpfc_hba *phba, char *fwrevision, int flag)
+ {
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	lpfc_vpd_t *vp = &phba->vpd;
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_debugfs.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_debugfs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,508 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for         *
++ * Fibre Channel Host Bus Adapters.                                *
++ * Copyright (C) 2007 Emulex.  All rights reserved.                *
++ * EMULEX and SLI are trademarks of Emulex.                        *
++ * www.emulex.com                                                  *
++ *                                                                 *
++ * This program is free software; you can redistribute it and/or   *
++ * modify it under the terms of version 2 of the GNU General       *
++ * Public License as published by the Free Software Foundation.    *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
++ * more details, a copy of which can be found in the file COPYING  *
++ * included with this package.                                     *
++ *******************************************************************/
++
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/idr.h>
++#include <linux/interrupt.h>
++#include <linux/kthread.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++#include <linux/ctype.h>
++#include <linux/version.h>
++
++#include <scsi/scsi.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_fc.h>
++
++#include "lpfc_hw.h"
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_version.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++/* debugfs interface
++ *
++ * To access this interface the user should:
++ * # mkdir /debug
++ * # mount -t debugfs none /debug
++ *
++ * The lpfc debugfs directory hierachy is:
++ * lpfc/lpfcX/vportY
++ * where X is the lpfc hba unique_id
++ * where Y is the vport VPI on that hba
++ *
++ * Debugging services available per vport:
++ * discovery_trace
++ * This is an ACSII readable file that contains a trace of the last
++ * lpfc_debugfs_max_disc_trc events that happened on a specific vport.
++ * See lpfc_debugfs.h for different categories of
++ * discovery events. To enable the discovery trace, the following
++ * module parameters must be set:
++ * lpfc_debugfs_enable=1         Turns on lpfc debugfs filesystem support
++ * lpfc_debugfs_max_disc_trc=X   Where X is the event trace depth for
++ *                               EACH vport. X MUST also be a power of 2.
++ * lpfc_debugfs_mask_disc_trc=Y  Where Y is an event mask as defined in
++ *                               lpfc_debugfs.h .
++ */
++static int lpfc_debugfs_enable = 0;
++module_param(lpfc_debugfs_enable, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_enable, "Enable debugfs services");
++
++static int lpfc_debugfs_max_disc_trc = 0;  /* This MUST be a power of 2 */
++module_param(lpfc_debugfs_max_disc_trc, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_max_disc_trc,
++	"Set debugfs discovery trace depth");
++
++static int lpfc_debugfs_mask_disc_trc = 0;
++module_param(lpfc_debugfs_mask_disc_trc, int, 0);
++MODULE_PARM_DESC(lpfc_debugfs_mask_disc_trc,
++	"Set debugfs discovery trace mask");
++
++#include <linux/debugfs.h>
++
++/* size of discovery_trace output line */
++#define LPFC_DISC_TRC_ENTRY_SIZE 80
++
++/* nodelist output buffer size */
++#define LPFC_NODELIST_SIZE 8192
++#define LPFC_NODELIST_ENTRY_SIZE 120
++
++struct lpfc_debug {
++	char *buffer;
++	int  len;
++};
++
++atomic_t lpfc_debugfs_disc_trc_cnt = ATOMIC_INIT(0);
++unsigned long lpfc_debugfs_start_time = 0L;
++
++static int
++lpfc_debugfs_disc_trc_data(struct lpfc_vport *vport, char *buf, int size)
++{
++	int i, index, len, enable;
++	uint32_t ms;
++	struct lpfc_disc_trc *dtp;
++	char buffer[80];
++
++
++	enable = lpfc_debugfs_enable;
++	lpfc_debugfs_enable = 0;
++
++	len = 0;
++	index = (atomic_read(&vport->disc_trc_cnt) + 1) &
++		(lpfc_debugfs_max_disc_trc - 1);
++	for (i = index; i < lpfc_debugfs_max_disc_trc; i++) {
++		dtp = vport->disc_trc + i;
++		if (!dtp->fmt)
++			continue;
++		ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time);
++		snprintf(buffer, 80, "%010d:%010d ms:%s\n",
++			dtp->seq_cnt, ms, dtp->fmt);
++		len +=  snprintf(buf+len, size-len, buffer,
++			dtp->data1, dtp->data2, dtp->data3);
++	}
++	for (i = 0; i < index; i++) {
++		dtp = vport->disc_trc + i;
++		if (!dtp->fmt)
++			continue;
++		ms = jiffies_to_msecs(dtp->jif - lpfc_debugfs_start_time);
++		snprintf(buffer, 80, "%010d:%010d ms:%s\n",
++			dtp->seq_cnt, ms, dtp->fmt);
++		len +=  snprintf(buf+len, size-len, buffer,
++			dtp->data1, dtp->data2, dtp->data3);
++	}
++
++	lpfc_debugfs_enable = enable;
++	return len;
++}
++
++static int
++lpfc_debugfs_nodelist_data(struct lpfc_vport *vport, char *buf, int size)
++{
++	int len = 0;
++	int cnt;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_nodelist *ndlp;
++	unsigned char *statep, *name;
++
++	cnt = (LPFC_NODELIST_SIZE / LPFC_NODELIST_ENTRY_SIZE);
++
++	spin_lock_irq(shost->host_lock);
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++		if (!cnt) {
++			len +=  snprintf(buf+len, size-len,
++				"Missing Nodelist Entries\n");
++			break;
++		}
++		cnt--;
++		switch (ndlp->nlp_state) {
++		case NLP_STE_UNUSED_NODE:
++			statep = "UNUSED";
++			break;
++		case NLP_STE_PLOGI_ISSUE:
++			statep = "PLOGI ";
++			break;
++		case NLP_STE_ADISC_ISSUE:
++			statep = "ADISC ";
++			break;
++		case NLP_STE_REG_LOGIN_ISSUE:
++			statep = "REGLOG";
++			break;
++		case NLP_STE_PRLI_ISSUE:
++			statep = "PRLI  ";
++			break;
++		case NLP_STE_UNMAPPED_NODE:
++			statep = "UNMAP ";
++			break;
++		case NLP_STE_MAPPED_NODE:
++			statep = "MAPPED";
++			break;
++		case NLP_STE_NPR_NODE:
++			statep = "NPR   ";
++			break;
++		default:
++			statep = "UNKNOWN";
++		}
++		len +=  snprintf(buf+len, size-len, "%s DID:x%06x ",
++			statep, ndlp->nlp_DID);
++		name = (unsigned char *)&ndlp->nlp_portname;
++		len +=  snprintf(buf+len, size-len,
++			"WWPN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
++			*name, *(name+1), *(name+2), *(name+3),
++			*(name+4), *(name+5), *(name+6), *(name+7));
++		name = (unsigned char *)&ndlp->nlp_nodename;
++		len +=  snprintf(buf+len, size-len,
++			"WWNN %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x ",
++			*name, *(name+1), *(name+2), *(name+3),
++			*(name+4), *(name+5), *(name+6), *(name+7));
++		len +=  snprintf(buf+len, size-len, "RPI:%03d flag:x%08x ",
++			ndlp->nlp_rpi, ndlp->nlp_flag);
++		if (!ndlp->nlp_type)
++			len +=  snprintf(buf+len, size-len, "UNKNOWN_TYPE");
++		if (ndlp->nlp_type & NLP_FC_NODE)
++			len +=  snprintf(buf+len, size-len, "FC_NODE ");
++		if (ndlp->nlp_type & NLP_FABRIC)
++			len +=  snprintf(buf+len, size-len, "FABRIC ");
++		if (ndlp->nlp_type & NLP_FCP_TARGET)
++			len +=  snprintf(buf+len, size-len, "FCP_TGT sid:%d ",
++				ndlp->nlp_sid);
++		if (ndlp->nlp_type & NLP_FCP_INITIATOR)
++			len +=  snprintf(buf+len, size-len, "FCP_INITIATOR");
++		len +=  snprintf(buf+len, size-len, "\n");
++	}
++	spin_unlock_irq(shost->host_lock);
++	return len;
++}
++#endif
++
++
++inline void
++lpfc_debugfs_disc_trc(struct lpfc_vport *vport, int mask, char *fmt,
++	uint32_t data1, uint32_t data2, uint32_t data3)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++	struct lpfc_disc_trc *dtp;
++	int index;
++
++	if (!(lpfc_debugfs_mask_disc_trc & mask))
++		return;
++
++	if (!lpfc_debugfs_enable || !lpfc_debugfs_max_disc_trc ||
++		!vport || !vport->disc_trc)
++		return;
++
++	index = atomic_inc_return(&vport->disc_trc_cnt) &
++		(lpfc_debugfs_max_disc_trc - 1);
++	dtp = vport->disc_trc + index;
++	dtp->fmt = fmt;
++	dtp->data1 = data1;
++	dtp->data2 = data2;
++	dtp->data3 = data3;
++	dtp->seq_cnt = atomic_inc_return(&lpfc_debugfs_disc_trc_cnt);
++	dtp->jif = jiffies;
++#endif
++	return;
++}
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++static int
++lpfc_debugfs_disc_trc_open(struct inode *inode, struct file *file)
++{
++	struct lpfc_vport *vport = inode->i_private;
++	struct lpfc_debug *debug;
++	int size;
++	int rc = -ENOMEM;
++
++	if (!lpfc_debugfs_max_disc_trc) {
++		 rc = -ENOSPC;
++		goto out;
++	}
++
++	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
++	if (!debug)
++		goto out;
++
++	/* Round to page boundry */
++	size =  (lpfc_debugfs_max_disc_trc * LPFC_DISC_TRC_ENTRY_SIZE);
++	size = PAGE_ALIGN(size);
++
++	debug->buffer = kmalloc(size, GFP_KERNEL);
++	if (!debug->buffer) {
++		kfree(debug);
++		goto out;
++	}
++
++	debug->len = lpfc_debugfs_disc_trc_data(vport, debug->buffer, size);
++	file->private_data = debug;
++
++	rc = 0;
++out:
++	return rc;
++}
++
++static int
++lpfc_debugfs_nodelist_open(struct inode *inode, struct file *file)
++{
++	struct lpfc_vport *vport = inode->i_private;
++	struct lpfc_debug *debug;
++	int rc = -ENOMEM;
++
++	debug = kmalloc(sizeof(*debug), GFP_KERNEL);
++	if (!debug)
++		goto out;
++
++	/* Round to page boundry */
++	debug->buffer = kmalloc(LPFC_NODELIST_SIZE, GFP_KERNEL);
++	if (!debug->buffer) {
++		kfree(debug);
++		goto out;
++	}
++
++	debug->len = lpfc_debugfs_nodelist_data(vport, debug->buffer,
++		LPFC_NODELIST_SIZE);
++	file->private_data = debug;
++
++	rc = 0;
++out:
++	return rc;
++}
++
++static loff_t
++lpfc_debugfs_lseek(struct file *file, loff_t off, int whence)
++{
++	struct lpfc_debug *debug;
++	loff_t pos = -1;
++
++	debug = file->private_data;
++
++	switch (whence) {
++	case 0:
++		pos = off;
++		break;
++	case 1:
++		pos = file->f_pos + off;
++		break;
++	case 2:
++		pos = debug->len - off;
++	}
++	return (pos < 0 || pos > debug->len) ? -EINVAL : (file->f_pos = pos);
++}
++
++static ssize_t
++lpfc_debugfs_read(struct file *file, char __user *buf,
++		  size_t nbytes, loff_t *ppos)
++{
++	struct lpfc_debug *debug = file->private_data;
++	return simple_read_from_buffer(buf, nbytes, ppos, debug->buffer,
++				       debug->len);
++}
++
++static int
++lpfc_debugfs_release(struct inode *inode, struct file *file)
++{
++	struct lpfc_debug *debug = file->private_data;
++
++	kfree(debug->buffer);
++	kfree(debug);
++
++	return 0;
++}
++
++#undef lpfc_debugfs_op_disc_trc
++static struct file_operations lpfc_debugfs_op_disc_trc = {
++	.owner =        THIS_MODULE,
++	.open =         lpfc_debugfs_disc_trc_open,
++	.llseek =       lpfc_debugfs_lseek,
++	.read =         lpfc_debugfs_read,
++	.release =      lpfc_debugfs_release,
++};
++
++#undef lpfc_debugfs_op_nodelist
++static struct file_operations lpfc_debugfs_op_nodelist = {
++	.owner =        THIS_MODULE,
++	.open =         lpfc_debugfs_nodelist_open,
++	.llseek =       lpfc_debugfs_lseek,
++	.read =         lpfc_debugfs_read,
++	.release =      lpfc_debugfs_release,
++};
++
++static struct dentry *lpfc_debugfs_root = NULL;
++static atomic_t lpfc_debugfs_hba_count;
++#endif
++
++inline void
++lpfc_debugfs_initialize(struct lpfc_vport *vport)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++	struct lpfc_hba   *phba = vport->phba;
++	char name[64];
++	uint32_t num, i;
++
++	if (!lpfc_debugfs_enable)
++		return;
++
++	if (lpfc_debugfs_max_disc_trc) {
++		num = lpfc_debugfs_max_disc_trc - 1;
++		if (num & lpfc_debugfs_max_disc_trc) {
++			/* Change to be a power of 2 */
++			num = lpfc_debugfs_max_disc_trc;
++			i = 0;
++			while (num > 1) {
++				num = num >> 1;
++				i++;
++			}
++			lpfc_debugfs_max_disc_trc = (1 << i);
++			printk(KERN_ERR
++				"lpfc_debugfs_max_disc_trc changed to %d\n",
++				lpfc_debugfs_max_disc_trc);
++		}
++	}
++
++	if (!lpfc_debugfs_root) {
++		lpfc_debugfs_root = debugfs_create_dir("lpfc", NULL);
++		atomic_set(&lpfc_debugfs_hba_count, 0);
++		if (!lpfc_debugfs_root)
++			goto debug_failed;
++	}
++
++	snprintf(name, sizeof(name), "lpfc%d", phba->brd_no);
++	if (!phba->hba_debugfs_root) {
++		phba->hba_debugfs_root =
++			debugfs_create_dir(name, lpfc_debugfs_root);
++		if (!phba->hba_debugfs_root)
++			goto debug_failed;
++		atomic_inc(&lpfc_debugfs_hba_count);
++		atomic_set(&phba->debugfs_vport_count, 0);
++	}
++
++	snprintf(name, sizeof(name), "vport%d", vport->vpi);
++	if (!vport->vport_debugfs_root) {
++		vport->vport_debugfs_root =
++			debugfs_create_dir(name, phba->hba_debugfs_root);
++		if (!vport->vport_debugfs_root)
++			goto debug_failed;
++		atomic_inc(&phba->debugfs_vport_count);
++	}
++
++	if (!lpfc_debugfs_start_time)
++		lpfc_debugfs_start_time = jiffies;
++
++	vport->disc_trc = kmalloc(
++		(sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc),
++		GFP_KERNEL);
++
++	if (!vport->disc_trc)
++		goto debug_failed;
++	memset(vport->disc_trc, 0,
++		(sizeof(struct lpfc_disc_trc) * lpfc_debugfs_max_disc_trc));
++
++	snprintf(name, sizeof(name), "discovery_trace");
++	vport->debug_disc_trc =
++		debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
++				 vport->vport_debugfs_root,
++				 vport, &lpfc_debugfs_op_disc_trc);
++	if (!vport->debug_disc_trc) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++				"%d:0409 Cannot create debugfs",
++				phba->brd_no);
++		goto debug_failed;
++	}
++	snprintf(name, sizeof(name), "nodelist");
++	vport->debug_nodelist =
++		debugfs_create_file(name, S_IFREG|S_IRUGO|S_IWUSR,
++				 vport->vport_debugfs_root,
++				 vport, &lpfc_debugfs_op_nodelist);
++	if (!vport->debug_nodelist) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++				"%d:0409 Cannot create debugfs",
++				phba->brd_no);
++		goto debug_failed;
++	}
++debug_failed:
++	return;
++#endif
++}
++
++
++inline void
++lpfc_debugfs_terminate(struct lpfc_vport *vport)
++{
++#ifdef CONFIG_LPFC_DEBUG_FS
++	struct lpfc_hba   *phba = vport->phba;
++
++	if (vport->disc_trc) {
++		kfree(vport->disc_trc);
++		vport->disc_trc = NULL;
++	}
++	if (vport->debug_disc_trc) {
++		debugfs_remove(vport->debug_disc_trc); /* discovery_trace */
++		vport->debug_disc_trc = NULL;
++	}
++	if (vport->debug_nodelist) {
++		debugfs_remove(vport->debug_nodelist); /* nodelist */
++		vport->debug_nodelist = NULL;
++	}
++	if (vport->vport_debugfs_root) {
++		debugfs_remove(vport->vport_debugfs_root); /* vportX */
++		vport->vport_debugfs_root = NULL;
++		atomic_dec(&phba->debugfs_vport_count);
++	}
++	if (atomic_read(&phba->debugfs_vport_count) == 0) {
++		debugfs_remove(vport->phba->hba_debugfs_root); /* lpfcX */
++		vport->phba->hba_debugfs_root = NULL;
++		atomic_dec(&lpfc_debugfs_hba_count);
++		if (atomic_read(&lpfc_debugfs_hba_count) == 0) {
++			debugfs_remove(lpfc_debugfs_root); /* lpfc */
++			lpfc_debugfs_root = NULL;
++		}
++	}
++#endif
++}
++
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_debugfs.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_debugfs.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_debugfs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,50 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for         *
++ * Fibre Channel Host Bus Adapters.                                *
++ * Copyright (C) 2007 Emulex.  All rights reserved.                *
++ * EMULEX and SLI are trademarks of Emulex.                        *
++ * www.emulex.com                                                  *
++ *                                                                 *
++ * This program is free software; you can redistribute it and/or   *
++ * modify it under the terms of version 2 of the GNU General       *
++ * Public License as published by the Free Software Foundation.    *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
++ * more details, a copy of which can be found in the file COPYING  *
++ * included with this package.                                     *
++ *******************************************************************/
++
++#ifndef _H_LPFC_DEBUG_FS
++#define _H_LPFC_DEBUG_FS
++
++#ifdef CONFIG_LPFC_DEBUG_FS
++struct lpfc_disc_trc {
++	char *fmt;
++	uint32_t data1;
++	uint32_t data2;
++	uint32_t data3;
++	uint32_t seq_cnt;
++	unsigned long jif;
++};
++#endif
++
++/* Mask for discovery_trace */
++#define LPFC_DISC_TRC_ELS_CMD		0x1	/* Trace ELS commands */
++#define LPFC_DISC_TRC_ELS_RSP		0x2	/* Trace ELS response */
++#define LPFC_DISC_TRC_ELS_UNSOL		0x4	/* Trace ELS rcv'ed   */
++#define LPFC_DISC_TRC_ELS_ALL		0x7	/* Trace ELS */
++#define LPFC_DISC_TRC_MBOX_VPORT	0x8	/* Trace vport MBOXs */
++#define LPFC_DISC_TRC_MBOX		0x10	/* Trace other MBOXs */
++#define LPFC_DISC_TRC_MBOX_ALL		0x18	/* Trace all MBOXs */
++#define LPFC_DISC_TRC_CT		0x20	/* Trace disc CT requests */
++#define LPFC_DISC_TRC_DSM		0x40    /* Trace DSM events */
++#define LPFC_DISC_TRC_RPORT		0x80    /* Trace rport events */
++#define LPFC_DISC_TRC_NODE		0x100   /* Trace ndlp state changes */
++
++#define LPFC_DISC_TRC_DISCOVERY		0xef    /* common mask for general
++						 * discovery */
++#endif /* H_LPFC_DEBUG_FS */
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_disc.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_disc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_disc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -36,21 +36,23 @@
+ 	LPFC_EVT_WARM_START,
+ 	LPFC_EVT_KILL,
+ 	LPFC_EVT_ELS_RETRY,
++	LPFC_EVT_DEV_LOSS_DELAY,
++	LPFC_EVT_DEV_LOSS,
+ };
+ 
+ /* structure used to queue event to the discovery tasklet */
+ struct lpfc_work_evt {
+ 	struct list_head      evt_listp;
+-	void                * evt_arg1;
+-	void                * evt_arg2;
++	void                 *evt_arg1;
++	void                 *evt_arg2;
+ 	enum lpfc_work_type   evt;
+ };
+ 
+ 
+ struct lpfc_nodelist {
+ 	struct list_head nlp_listp;
+-	struct lpfc_name nlp_portname;		/* port name */
+-	struct lpfc_name nlp_nodename;		/* node name */
++	struct lpfc_name nlp_portname;
++	struct lpfc_name nlp_nodename;
+ 	uint32_t         nlp_flag;		/* entry  flags */
+ 	uint32_t         nlp_DID;		/* FC D_ID of entry */
+ 	uint32_t         nlp_last_elscmd;	/* Last ELS cmd sent */
+@@ -75,8 +77,9 @@
+ 	struct timer_list   nlp_delayfunc;	/* Used for delayed ELS cmds */
+ 	struct fc_rport *rport;			/* Corresponding FC transport
+ 						   port structure */
+-	struct lpfc_hba      *nlp_phba;
++	struct lpfc_vport *vport;
+ 	struct lpfc_work_evt els_retry_evt;
++	struct lpfc_work_evt dev_loss_evt;
+ 	unsigned long last_ramp_up_time;        /* jiffy of last ramp up */
+ 	unsigned long last_q_full_time;		/* jiffy of last queue full */
+ 	struct kref     kref;
+@@ -98,7 +101,9 @@
+ 					   ACC */
+ #define NLP_NPR_ADISC      0x2000000	/* Issue ADISC when dq'ed from
+ 					   NPR list */
++#define NLP_RM_DFLT_RPI    0x4000000	/* need to remove leftover dflt RPI */
+ #define NLP_NODEV_REMOVE   0x8000000	/* Defer removal till discovery ends */
++#define NLP_TARGET_REMOVE  0x10000000   /* Target remove in process */
+ 
+ /* There are 4 different double linked lists nodelist entries can reside on.
+  * The Port Login (PLOGI) list and Address Discovery (ADISC) list are used
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_els.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_els.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_els.c	2007-12-19 15:29:23.000000000 -0500
+@@ -35,38 +35,38 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+ 
+ static int lpfc_els_retry(struct lpfc_hba *, struct lpfc_iocbq *,
+ 			  struct lpfc_iocbq *);
++static void lpfc_cmpl_fabric_iocb(struct lpfc_hba *, struct lpfc_iocbq *,
++			struct lpfc_iocbq *);
++
+ static int lpfc_max_els_tries = 3;
+ 
+-static int
+-lpfc_els_chk_latt(struct lpfc_hba * phba)
++int
++lpfc_els_chk_latt(struct lpfc_vport *vport)
+ {
+-	struct lpfc_sli *psli;
+-	LPFC_MBOXQ_t *mbox;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	uint32_t ha_copy;
+-	int rc;
+-
+-	psli = &phba->sli;
+ 
+-	if ((phba->hba_state >= LPFC_HBA_READY) ||
+-	    (phba->hba_state == LPFC_LINK_DOWN))
++	if (vport->port_state >= LPFC_VPORT_READY ||
++	    phba->link_state == LPFC_LINK_DOWN)
+ 		return 0;
+ 
+ 	/* Read the HBA Host Attention Register */
+-	spin_lock_irq(phba->host->host_lock);
+ 	ha_copy = readl(phba->HAregaddr);
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	if (!(ha_copy & HA_LATT))
+ 		return 0;
+ 
+ 	/* Pending Link Event during Discovery */
+-	lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY,
+-			"%d:0237 Pending Link Event during "
++	lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++			"%d (%d):0237 Pending Link Event during "
+ 			"Discovery: State x%x\n",
+-			phba->brd_no, phba->hba_state);
++			phba->brd_no, vport->vpi,  phba->pport->port_state);
+ 
+ 	/* CLEAR_LA should re-enable link attention events and
+ 	 * we should then imediately take a LATT event. The
+@@ -74,48 +74,34 @@
+ 	 * will cleanup any left over in-progress discovery
+ 	 * events.
+ 	 */
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag |= FC_ABORT_DISCOVERY;
+-	spin_unlock_irq(phba->host->host_lock);
+-
+-	if (phba->hba_state != LPFC_CLEAR_LA) {
+-		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+-			phba->hba_state = LPFC_CLEAR_LA;
+-			lpfc_clear_la(phba, mbox);
+-			mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-			rc = lpfc_sli_issue_mbox (phba, mbox,
+-						  (MBX_NOWAIT | MBX_STOP_IOCB));
+-			if (rc == MBX_NOT_FINISHED) {
+-				mempool_free(mbox, phba->mbox_mem_pool);
+-				phba->hba_state = LPFC_HBA_ERROR;
+-			}
+-		}
+-	}
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_ABORT_DISCOVERY;
++	spin_unlock_irq(shost->host_lock);
+ 
+-	return 1;
++	if (phba->link_state != LPFC_CLEAR_LA)
++		lpfc_issue_clear_la(phba, vport);
+ 
++	return 1;
+ }
+ 
+ static struct lpfc_iocbq *
+-lpfc_prep_els_iocb(struct lpfc_hba * phba, uint8_t expectRsp,
+-		   uint16_t cmdSize, uint8_t retry, struct lpfc_nodelist * ndlp,
+-		   uint32_t did, uint32_t elscmd)
++lpfc_prep_els_iocb(struct lpfc_vport *vport, uint8_t expectRsp,
++		   uint16_t cmdSize, uint8_t retry,
++		   struct lpfc_nodelist *ndlp, uint32_t did,
++		   uint32_t elscmd)
+ {
+-	struct lpfc_sli_ring *pring;
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_iocbq *elsiocb;
+ 	struct lpfc_dmabuf *pcmd, *prsp, *pbuflist;
+ 	struct ulp_bde64 *bpl;
+ 	IOCB_t *icmd;
+ 
+-	pring = &phba->sli.ring[LPFC_ELS_RING];
+ 
+-	if (phba->hba_state < LPFC_LINK_UP)
++	if (!lpfc_is_link_up(phba))
+ 		return  NULL;
+ 
+ 	/* Allocate buffer for  command iocb */
+-	spin_lock_irq(phba->host->host_lock);
+ 	elsiocb = lpfc_sli_get_iocbq(phba);
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	if (elsiocb == NULL)
+ 		return NULL;
+@@ -123,14 +109,12 @@
+ 
+ 	/* fill in BDEs for command */
+ 	/* Allocate buffer for command payload */
+-	if (((pcmd = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL)) == 0) ||
++	if (((pcmd = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL)) == 0) ||
+ 	    ((pcmd->virt = lpfc_mbuf_alloc(phba,
+ 					   MEM_PRI, &(pcmd->phys))) == 0)) {
+ 		kfree(pcmd);
+ 
+-		spin_lock_irq(phba->host->host_lock);
+ 		lpfc_sli_release_iocbq(phba, elsiocb);
+-		spin_unlock_irq(phba->host->host_lock);
+ 		return NULL;
+ 	}
+ 
+@@ -138,7 +122,7 @@
+ 
+ 	/* Allocate buffer for response payload */
+ 	if (expectRsp) {
+-		prsp = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
++		prsp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+ 		if (prsp)
+ 			prsp->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
+ 						     &prsp->phys);
+@@ -146,9 +130,7 @@
+ 			kfree(prsp);
+ 			lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
+ 			kfree(pcmd);
+-			spin_lock_irq(phba->host->host_lock);
+ 			lpfc_sli_release_iocbq(phba, elsiocb);
+-			spin_unlock_irq(phba->host->host_lock);
+ 			return NULL;
+ 		}
+ 		INIT_LIST_HEAD(&prsp->list);
+@@ -157,14 +139,12 @@
+ 	}
+ 
+ 	/* Allocate buffer for Buffer ptr list */
+-	pbuflist = kmalloc(sizeof (struct lpfc_dmabuf), GFP_KERNEL);
++	pbuflist = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+ 	if (pbuflist)
+ 	    pbuflist->virt = lpfc_mbuf_alloc(phba, MEM_PRI,
+ 					     &pbuflist->phys);
+ 	if (pbuflist == 0 || pbuflist->virt == 0) {
+-		spin_lock_irq(phba->host->host_lock);
+ 		lpfc_sli_release_iocbq(phba, elsiocb);
+-		spin_unlock_irq(phba->host->host_lock);
+ 		lpfc_mbuf_free(phba, pcmd->virt, pcmd->phys);
+ 		lpfc_mbuf_free(phba, prsp->virt, prsp->phys);
+ 		kfree(pcmd);
+@@ -178,20 +158,28 @@
+ 	icmd->un.elsreq64.bdl.addrHigh = putPaddrHigh(pbuflist->phys);
+ 	icmd->un.elsreq64.bdl.addrLow = putPaddrLow(pbuflist->phys);
+ 	icmd->un.elsreq64.bdl.bdeFlags = BUFF_TYPE_BDL;
+-	if (expectRsp) {
+-		icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof (struct ulp_bde64));
+ 		icmd->un.elsreq64.remoteID = did;	/* DID */
++	if (expectRsp) {
++		icmd->un.elsreq64.bdl.bdeSize = (2 * sizeof(struct ulp_bde64));
+ 		icmd->ulpCommand = CMD_ELS_REQUEST64_CR;
+ 		icmd->ulpTimeout = phba->fc_ratov * 2;
+ 	} else {
+-		icmd->un.elsreq64.bdl.bdeSize = sizeof (struct ulp_bde64);
++		icmd->un.elsreq64.bdl.bdeSize = sizeof(struct ulp_bde64);
+ 		icmd->ulpCommand = CMD_XMIT_ELS_RSP64_CX;
+ 	}
+-
+ 	icmd->ulpBdeCount = 1;
+ 	icmd->ulpLe = 1;
+ 	icmd->ulpClass = CLASS3;
+ 
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++		icmd->un.elsreq64.myID = vport->fc_myDID;
++
++		/* For ELS_REQUEST64_CR, use the VPI by default */
++		icmd->ulpContext = vport->vpi;
++		icmd->ulpCt_h = 0;
++		icmd->ulpCt_l = 1;
++	}
++
+ 	bpl = (struct ulp_bde64 *) pbuflist->virt;
+ 	bpl->addrLow = le32_to_cpu(putPaddrLow(pcmd->phys));
+ 	bpl->addrHigh = le32_to_cpu(putPaddrHigh(pcmd->phys));
+@@ -209,10 +197,12 @@
+ 	}
+ 
+ 	/* Save for completion so we can release these resources */
++	if (elscmd != ELS_CMD_LS_RJT)
+ 	elsiocb->context1 = lpfc_nlp_get(ndlp);
+ 	elsiocb->context2 = pcmd;
+ 	elsiocb->context3 = pbuflist;
+ 	elsiocb->retry = retry;
++	elsiocb->vport = vport;
+ 	elsiocb->drvrTimeout = (phba->fc_ratov << 1) + LPFC_DRVR_TIMEOUT;
+ 
+ 	if (prsp) {
+@@ -222,16 +212,16 @@
+ 	if (expectRsp) {
+ 		/* Xmit ELS command <elsCmd> to remote NPORT <did> */
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-				"%d:0116 Xmit ELS command x%x to remote "
+-				"NPORT x%x I/O tag: x%x, HBA state: x%x\n",
+-				phba->brd_no, elscmd,
+-				did, elsiocb->iotag, phba->hba_state);
++				"%d (%d):0116 Xmit ELS command x%x to remote "
++				"NPORT x%x I/O tag: x%x, port state: x%x\n",
++				phba->brd_no, vport->vpi,  elscmd, did,
++				elsiocb->iotag, vport->port_state);
+ 	} else {
+ 		/* Xmit ELS response <elsCmd> to remote NPORT <did> */
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-				"%d:0117 Xmit ELS response x%x to remote "
++				"%d (%d):0117 Xmit ELS response x%x to remote "
+ 				"NPORT x%x I/O tag: x%x, size: x%x\n",
+-				phba->brd_no, elscmd,
++				phba->brd_no, vport->vpi, elscmd,
+ 				ndlp->nlp_DID, elsiocb->iotag, cmdSize);
+ 	}
+ 
+@@ -240,16 +230,79 @@
+ 
+ 
+ static int
+-lpfc_cmpl_els_flogi_fabric(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
+-		struct serv_parm *sp, IOCB_t *irsp)
++lpfc_issue_fabric_reglogin(struct lpfc_vport *vport)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	LPFC_MBOXQ_t *mbox;
+ 	struct lpfc_dmabuf *mp;
++	struct lpfc_nodelist *ndlp;
++	struct serv_parm *sp;
+ 	int rc;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag |= FC_FABRIC;
+-	spin_unlock_irq(phba->host->host_lock);
++	sp = &phba->fc_fabparam;
++	ndlp = lpfc_findnode_did(vport, Fabric_DID);
++	if (!ndlp)
++		goto fail;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!mbox)
++		goto fail;
++
++	vport->port_state = LPFC_FABRIC_CFG_LINK;
++	lpfc_config_link(phba, mbox);
++	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++	mbox->vport = vport;
++
++	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
++	if (rc == MBX_NOT_FINISHED)
++		goto fail_free_mbox;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!mbox)
++		goto fail;
++	rc = lpfc_reg_login(phba, vport->vpi, Fabric_DID, (uint8_t *)sp, mbox,
++			    0);
++	if (rc)
++		goto fail_free_mbox;
++
++	mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login;
++	mbox->vport = vport;
++	mbox->context2 = lpfc_nlp_get(ndlp);
++
++	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
++	if (rc == MBX_NOT_FINISHED)
++		goto fail_issue_reg_login;
++
++	return 0;
++
++fail_issue_reg_login:
++	lpfc_nlp_put(ndlp);
++	mp = (struct lpfc_dmabuf *) mbox->context1;
++	lpfc_mbuf_free(phba, mp->virt, mp->phys);
++	kfree(mp);
++fail_free_mbox:
++	mempool_free(mbox, phba->mbox_mem_pool);
++
++fail:
++	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++	lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++		"%d (%d):0249 Cannot issue Register Fabric login\n",
++		phba->brd_no, vport->vpi);
++	return -ENXIO;
++}
++
++static int
++lpfc_cmpl_els_flogi_fabric(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   struct serv_parm *sp, IOCB_t *irsp)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_nodelist *np;
++	struct lpfc_nodelist *next_np;
++
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_FABRIC;
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	phba->fc_edtov = be32_to_cpu(sp->cmn.e_d_tov);
+ 	if (sp->cmn.edtovResolution)	/* E_D_TOV ticks are in nanoseconds */
+@@ -258,20 +311,20 @@
+ 	phba->fc_ratov = (be32_to_cpu(sp->cmn.w2.r_a_tov) + 999) / 1000;
+ 
+ 	if (phba->fc_topology == TOPOLOGY_LOOP) {
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag |= FC_PUBLIC_LOOP;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag |= FC_PUBLIC_LOOP;
++		spin_unlock_irq(shost->host_lock);
+ 	} else {
+ 		/*
+ 		 * If we are a N-port connected to a Fabric, fixup sparam's so
+ 		 * logins to devices on remote loops work.
+ 		 */
+-		phba->fc_sparam.cmn.altBbCredit = 1;
++		vport->fc_sparam.cmn.altBbCredit = 1;
+ 	}
+ 
+-	phba->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
++	vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
+ 	memcpy(&ndlp->nlp_portname, &sp->portName, sizeof(struct lpfc_name));
+-	memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name));
++	memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof(struct lpfc_name));
+ 	ndlp->nlp_class_sup = 0;
+ 	if (sp->cls1.classValid)
+ 		ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -285,68 +338,85 @@
+ 				sp->cmn.bbRcvSizeLsb;
+ 	memcpy(&phba->fc_fabparam, sp, sizeof(struct serv_parm));
+ 
+-	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-	if (!mbox)
+-		goto fail;
+-
+-	phba->hba_state = LPFC_FABRIC_CFG_LINK;
+-	lpfc_config_link(phba, mbox);
+-	mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+-
+-	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
+-	if (rc == MBX_NOT_FINISHED)
+-		goto fail_free_mbox;
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++		if (sp->cmn.response_multiple_NPort) {
++			lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT,
++					"%d:1816 FLOGI NPIV supported, "
++					"response data 0x%x\n",
++					phba->brd_no,
++					sp->cmn.response_multiple_NPort);
++			phba->link_flag |= LS_NPIV_FAB_SUPPORTED;
+ 
+-	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-	if (!mbox)
+-		goto fail;
++		} else {
++			/* Because we asked f/w for NPIV it still expects us
++			   to call reg_vnpid atleast for the physcial host */
++			lpfc_printf_log(phba, KERN_WARNING, LOG_ELS | LOG_VPORT,
++					"%d:1817 Fabric does not support NPIV "
++					"- configuring single port mode.\n",
++					phba->brd_no);
++			phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED;
++		}
++	}
+ 
+-	if (lpfc_reg_login(phba, Fabric_DID, (uint8_t *) sp, mbox, 0))
+-		goto fail_free_mbox;
++	if ((vport->fc_prevDID != vport->fc_myDID) &&
++		!(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
+ 
+-	mbox->mbox_cmpl = lpfc_mbx_cmpl_fabric_reg_login;
+-	mbox->context2 = lpfc_nlp_get(ndlp);
++		/* If our NportID changed, we need to ensure all
++		 * remaining NPORTs get unreg_login'ed.
++		 */
++		list_for_each_entry_safe(np, next_np,
++					&vport->fc_nodes, nlp_listp) {
++			if ((np->nlp_state != NLP_STE_NPR_NODE) ||
++				   !(np->nlp_flag & NLP_NPR_ADISC))
++				continue;
++			spin_lock_irq(shost->host_lock);
++			np->nlp_flag &= ~NLP_NPR_ADISC;
++			spin_unlock_irq(shost->host_lock);
++			lpfc_unreg_rpi(vport, np);
++		}
++		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++			lpfc_mbx_unreg_vpi(vport);
++			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++		}
++	}
+ 
+-	rc = lpfc_sli_issue_mbox(phba, mbox, MBX_NOWAIT | MBX_STOP_IOCB);
+-	if (rc == MBX_NOT_FINISHED)
+-		goto fail_issue_reg_login;
++	ndlp->nlp_sid = irsp->un.ulpWord[4] & Mask_DID;
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_REG_LOGIN_ISSUE);
+ 
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED &&
++	    vport->fc_flag & FC_VPORT_NEEDS_REG_VPI) {
++		lpfc_register_new_vport(phba, vport, ndlp);
++		return 0;
++	}
++	lpfc_issue_fabric_reglogin(vport);
+ 	return 0;
+-
+- fail_issue_reg_login:
+-	lpfc_nlp_put(ndlp);
+-	mp = (struct lpfc_dmabuf *) mbox->context1;
+-	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-	kfree(mp);
+- fail_free_mbox:
+-	mempool_free(mbox, phba->mbox_mem_pool);
+- fail:
+-	return -ENXIO;
+ }
+ 
+ /*
+  * We FLOGIed into an NPort, initiate pt2pt protocol
+  */
+ static int
+-lpfc_cmpl_els_flogi_nport(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
++lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		struct serv_parm *sp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	LPFC_MBOXQ_t *mbox;
+ 	int rc;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	phba->fc_edtov = FF_DEF_EDTOV;
+ 	phba->fc_ratov = FF_DEF_RATOV;
+-	rc = memcmp(&phba->fc_portname, &sp->portName,
+-			sizeof(struct lpfc_name));
++	rc = memcmp(&vport->fc_portname, &sp->portName,
++		    sizeof(vport->fc_portname));
+ 	if (rc >= 0) {
+ 		/* This side will initiate the PLOGI */
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag |= FC_PT2PT_PLOGI;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag |= FC_PT2PT_PLOGI;
++		spin_unlock_irq(shost->host_lock);
+ 
+ 		/*
+ 		 * N_Port ID cannot be 0, set our to LocalID the other
+@@ -355,7 +425,7 @@
+ 
+ 		/* not equal */
+ 		if (rc)
+-			phba->fc_myDID = PT2PT_LocalID;
++			vport->fc_myDID = PT2PT_LocalID;
+ 
+ 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 		if (!mbox)
+@@ -364,6 +434,7 @@
+ 		lpfc_config_link(phba, mbox);
+ 
+ 		mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		mbox->vport = vport;
+ 		rc = lpfc_sli_issue_mbox(phba, mbox,
+ 				MBX_NOWAIT | MBX_STOP_IOCB);
+ 		if (rc == MBX_NOT_FINISHED) {
+@@ -372,7 +443,7 @@
+ 		}
+ 		lpfc_nlp_put(ndlp);
+ 
+-		ndlp = lpfc_findnode_did(phba, PT2PT_RemoteID);
++		ndlp = lpfc_findnode_did(vport, PT2PT_RemoteID);
+ 		if (!ndlp) {
+ 			/*
+ 			 * Cannot find existing Fabric ndlp, so allocate a
+@@ -382,28 +453,30 @@
+ 			if (!ndlp)
+ 				goto fail;
+ 
+-			lpfc_nlp_init(phba, ndlp, PT2PT_RemoteID);
++			lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID);
+ 		}
+ 
+ 		memcpy(&ndlp->nlp_portname, &sp->portName,
+ 				sizeof(struct lpfc_name));
+ 		memcpy(&ndlp->nlp_nodename, &sp->nodeName,
+ 				sizeof(struct lpfc_name));
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++		spin_unlock_irq(shost->host_lock);
+ 	} else {
+ 		/* This side will wait for the PLOGI */
+ 		lpfc_nlp_put(ndlp);
+ 	}
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag |= FC_PT2PT;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_PT2PT;
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	/* Start discovery - this should just do CLEAR_LA */
+-	lpfc_disc_start(phba);
++	lpfc_disc_start(vport);
+ 	return 0;
+- fail:
++fail:
+ 	return -ENXIO;
+ }
+ 
+@@ -411,6 +484,8 @@
+ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ 		    struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp = &rspiocb->iocb;
+ 	struct lpfc_nodelist *ndlp = cmdiocb->context1;
+ 	struct lpfc_dmabuf *pcmd = cmdiocb->context2, *prsp;
+@@ -418,21 +493,25 @@
+ 	int rc;
+ 
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba)) {
++	if (lpfc_els_chk_latt(vport)) {
+ 		lpfc_nlp_put(ndlp);
+ 		goto out;
+ 	}
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"FLOGI cmpl:      status:x%x/x%x state:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		vport->port_state);
++
+ 	if (irsp->ulpStatus) {
+ 		/* Check for retry */
+-		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+-			/* ELS command is being retried */
++		if (lpfc_els_retry(phba, cmdiocb, rspiocb))
+ 			goto out;
+-		}
++
+ 		/* FLOGI failed, so there is no fabric */
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++		spin_unlock_irq(shost->host_lock);
+ 
+ 		/* If private loop, then allow max outstanding els to be
+ 		 * LPFC_MAX_DISC_THREADS (32). Scanning in the case of no
+@@ -443,11 +522,10 @@
+ 		}
+ 
+ 		/* FLOGI failure */
+-		lpfc_printf_log(phba,
+-				KERN_INFO,
+-				LOG_ELS,
+-				"%d:0100 FLOGI failure Data: x%x x%x x%x\n",
+-				phba->brd_no,
++		lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++				"%d (%d):0100 FLOGI failure Data: x%x x%x "
++				"x%x\n",
++				phba->brd_no, vport->vpi,
+ 				irsp->ulpStatus, irsp->un.ulpWord[4],
+ 				irsp->ulpTimeout);
+ 		goto flogifail;
+@@ -463,21 +541,21 @@
+ 
+ 	/* FLOGI completes successfully */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0101 FLOGI completes sucessfully "
++			"%d (%d):0101 FLOGI completes sucessfully "
+ 			"Data: x%x x%x x%x x%x\n",
+-			phba->brd_no,
++			phba->brd_no, vport->vpi,
+ 			irsp->un.ulpWord[4], sp->cmn.e_d_tov,
+ 			sp->cmn.w2.r_a_tov, sp->cmn.edtovResolution);
+ 
+-	if (phba->hba_state == LPFC_FLOGI) {
++	if (vport->port_state == LPFC_FLOGI) {
+ 		/*
+ 		 * If Common Service Parameters indicate Nport
+ 		 * we are point to point, if Fport we are Fabric.
+ 		 */
+ 		if (sp->cmn.fPort)
+-			rc = lpfc_cmpl_els_flogi_fabric(phba, ndlp, sp, irsp);
++			rc = lpfc_cmpl_els_flogi_fabric(vport, ndlp, sp, irsp);
+ 		else
+-			rc = lpfc_cmpl_els_flogi_nport(phba, ndlp, sp);
++			rc = lpfc_cmpl_els_flogi_nport(vport, ndlp, sp);
+ 
+ 		if (!rc)
+ 			goto out;
+@@ -486,14 +564,12 @@
+ flogifail:
+ 	lpfc_nlp_put(ndlp);
+ 
+-	if (irsp->ulpStatus != IOSTAT_LOCAL_REJECT ||
+-	    (irsp->un.ulpWord[4] != IOERR_SLI_ABORTED &&
+-	     irsp->un.ulpWord[4] != IOERR_SLI_DOWN)) {
++	if (!lpfc_error_lost_link(irsp)) {
+ 		/* FLOGI failed, so just use loop map to make discovery list */
+-		lpfc_disc_list_loopmap(phba);
++		lpfc_disc_list_loopmap(vport);
+ 
+ 		/* Start discovery */
+-		lpfc_disc_start(phba);
++		lpfc_disc_start(vport);
+ 	}
+ 
+ out:
+@@ -501,9 +577,10 @@
+ }
+ 
+ static int
+-lpfc_issue_els_flogi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		     uint8_t retry)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct serv_parm *sp;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+@@ -515,9 +592,10 @@
+ 
+ 	pring = &phba->sli.ring[LPFC_ELS_RING];
+ 
+-	cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 						 ndlp->nlp_DID, ELS_CMD_FLOGI);
++
+ 	if (!elsiocb)
+ 		return 1;
+ 
+@@ -526,8 +604,8 @@
+ 
+ 	/* For FLOGI request, remainder of payload is service parameters */
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_FLOGI;
+-	pcmd += sizeof (uint32_t);
+-	memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++	pcmd += sizeof(uint32_t);
++	memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
+ 	sp = (struct serv_parm *) pcmd;
+ 
+ 	/* Setup CSPs accordingly for Fabric */
+@@ -541,16 +619,32 @@
+ 	if (sp->cmn.fcphHigh < FC_PH3)
+ 		sp->cmn.fcphHigh = FC_PH3;
+ 
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++		sp->cmn.request_multiple_Nport = 1;
++
++		/* For FLOGI, Let FLOGI rsp set the NPortID for VPI 0 */
++		icmd->ulpCt_h = 1;
++		icmd->ulpCt_l = 0;
++	}
++
++	if (phba->fc_topology != TOPOLOGY_LOOP) {
++		icmd->un.elsreq64.myID = 0;
++		icmd->un.elsreq64.fl = 1;
++	}
++
+ 	tmo = phba->fc_ratov;
+ 	phba->fc_ratov = LPFC_DISC_FLOGI_TMO;
+-	lpfc_set_disctmo(phba);
++	lpfc_set_disctmo(vport);
+ 	phba->fc_ratov = tmo;
+ 
+ 	phba->fc_stat.elsXmitFLOGI++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_flogi;
+-	spin_lock_irq(phba->host->host_lock);
+-	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue FLOGI:     opt:x%x",
++		phba->sli3_options, 0, 0);
++
++	rc = lpfc_issue_fabric_iocb(phba, elsiocb);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -559,7 +653,7 @@
+ }
+ 
+ int
+-lpfc_els_abort_flogi(struct lpfc_hba * phba)
++lpfc_els_abort_flogi(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli_ring *pring;
+ 	struct lpfc_iocbq *iocb, *next_iocb;
+@@ -577,73 +671,99 @@
+ 	 * Check the txcmplq for an iocb that matches the nport the driver is
+ 	 * searching for.
+ 	 */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
+ 		icmd = &iocb->iocb;
+-		if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR) {
++		if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR &&
++		    icmd->un.elsreq64.bdl.ulpIoTag32) {
+ 			ndlp = (struct lpfc_nodelist *)(iocb->context1);
+-			if (ndlp && (ndlp->nlp_DID == Fabric_DID))
++			if (ndlp && (ndlp->nlp_DID == Fabric_DID)) {
+ 				lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ 		}
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	}
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return 0;
+ }
+ 
+ int
+-lpfc_initial_flogi(struct lpfc_hba *phba)
++lpfc_initial_flogi(struct lpfc_vport *vport)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_nodelist *ndlp;
+ 
+ 	/* First look for the Fabric ndlp */
+-	ndlp = lpfc_findnode_did(phba, Fabric_DID);
++	ndlp = lpfc_findnode_did(vport, Fabric_DID);
+ 	if (!ndlp) {
+ 		/* Cannot find existing Fabric ndlp, so allocate a new one */
+ 		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ 		if (!ndlp)
+ 			return 0;
+-		lpfc_nlp_init(phba, ndlp, Fabric_DID);
++		lpfc_nlp_init(vport, ndlp, Fabric_DID);
+ 	} else {
+-		lpfc_dequeue_node(phba, ndlp);
++		lpfc_dequeue_node(vport, ndlp);
+ 	}
+-	if (lpfc_issue_els_flogi(phba, ndlp, 0)) {
++	if (lpfc_issue_els_flogi(vport, ndlp, 0)) {
+ 		lpfc_nlp_put(ndlp);
+ 	}
+ 	return 1;
+ }
+ 
++int
++lpfc_initial_fdisc(struct lpfc_vport *vport)
++{
++	struct lpfc_hba *phba = vport->phba;
++	struct lpfc_nodelist *ndlp;
++
++	/* First look for the Fabric ndlp */
++	ndlp = lpfc_findnode_did(vport, Fabric_DID);
++	if (!ndlp) {
++		/* Cannot find existing Fabric ndlp, so allocate a new one */
++		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++		if (!ndlp)
++			return 0;
++		lpfc_nlp_init(vport, ndlp, Fabric_DID);
++	} else {
++		lpfc_dequeue_node(vport, ndlp);
++	}
++	if (lpfc_issue_els_fdisc(vport, ndlp, 0)) {
++		lpfc_nlp_put(ndlp);
++	}
++	return 1;
++}
+ static void
+-lpfc_more_plogi(struct lpfc_hba * phba)
++lpfc_more_plogi(struct lpfc_vport *vport)
+ {
+ 	int sentplogi;
++	struct lpfc_hba *phba = vport->phba;
+ 
+-	if (phba->num_disc_nodes)
+-		phba->num_disc_nodes--;
++	if (vport->num_disc_nodes)
++		vport->num_disc_nodes--;
+ 
+ 	/* Continue discovery with <num_disc_nodes> PLOGIs to go */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0232 Continue discovery with %d PLOGIs to go "
++			"%d (%d):0232 Continue discovery with %d PLOGIs to go "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, phba->num_disc_nodes, phba->fc_plogi_cnt,
+-			phba->fc_flag, phba->hba_state);
++			phba->brd_no, vport->vpi, vport->num_disc_nodes,
++			vport->fc_plogi_cnt, vport->fc_flag, vport->port_state);
+ 
+ 	/* Check to see if there are more PLOGIs to be sent */
+-	if (phba->fc_flag & FC_NLP_MORE) {
+-		/* go thru NPR list and issue any remaining ELS PLOGIs */
+-		sentplogi = lpfc_els_disc_plogi(phba);
+-	}
++	if (vport->fc_flag & FC_NLP_MORE)
++		/* go thru NPR nodes and issue any remaining ELS PLOGIs */
++		sentplogi = lpfc_els_disc_plogi(vport);
++
+ 	return;
+ }
+ 
+ static struct lpfc_nodelist *
+-lpfc_plogi_confirm_nport(struct lpfc_hba *phba, struct lpfc_dmabuf *prsp,
++lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp,
+ 			 struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_vport    *vport = ndlp->vport;
+ 	struct lpfc_nodelist *new_ndlp;
+-	uint32_t *lp;
+ 	struct serv_parm *sp;
+-	uint8_t name[sizeof (struct lpfc_name)];
++	uint8_t  name[sizeof(struct lpfc_name)];
+ 	uint32_t rc;
+ 
+ 	/* Fabric nodes can have the same WWPN so we don't bother searching
+@@ -652,50 +772,51 @@
+ 	if (ndlp->nlp_type & NLP_FABRIC)
+ 		return ndlp;
+ 
+-	lp = (uint32_t *) prsp->virt;
+-	sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
++	sp = (struct serv_parm *) ((uint8_t *) prsp + sizeof(uint32_t));
+ 	memset(name, 0, sizeof(struct lpfc_name));
+ 
+ 	/* Now we find out if the NPort we are logging into, matches the WWPN
+ 	 * we have for that ndlp. If not, we have some work to do.
+ 	 */
+-	new_ndlp = lpfc_findnode_wwpn(phba, &sp->portName);
++	new_ndlp = lpfc_findnode_wwpn(vport, &sp->portName);
+ 
+ 	if (new_ndlp == ndlp)
+ 		return ndlp;
+ 
+ 	if (!new_ndlp) {
+-		rc =
+-		   memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name));
++		rc = memcmp(&ndlp->nlp_portname, name,
++			    sizeof(struct lpfc_name));
+ 		if (!rc)
+ 			return ndlp;
+ 		new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+ 		if (!new_ndlp)
+ 			return ndlp;
+ 
+-		lpfc_nlp_init(phba, new_ndlp, ndlp->nlp_DID);
++		lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID);
+ 	}
+ 
+-	lpfc_unreg_rpi(phba, new_ndlp);
++	lpfc_unreg_rpi(vport, new_ndlp);
+ 	new_ndlp->nlp_DID = ndlp->nlp_DID;
+ 	new_ndlp->nlp_prev_state = ndlp->nlp_prev_state;
+-	lpfc_nlp_set_state(phba, new_ndlp, ndlp->nlp_state);
++	lpfc_nlp_set_state(vport, new_ndlp, ndlp->nlp_state);
+ 
+-	/* Move this back to NPR list */
++	/* Move this back to NPR state */
+ 	if (memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)) == 0)
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 	else {
+-		lpfc_unreg_rpi(phba, ndlp);
++		lpfc_unreg_rpi(vport, ndlp);
+ 		ndlp->nlp_DID = 0; /* Two ndlps cannot have the same did */
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 	}
+ 	return new_ndlp;
+ }
+ 
+ static void
+-lpfc_cmpl_els_plogi(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		    struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_plogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		    struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp;
+ 	struct lpfc_nodelist *ndlp;
+ 	struct lpfc_dmabuf *prsp;
+@@ -705,32 +826,43 @@
+ 	cmdiocb->context_un.rsp_iocb = rspiocb;
+ 
+ 	irsp = &rspiocb->iocb;
+-	ndlp = lpfc_findnode_did(phba, irsp->un.elsreq64.remoteID);
+-	if (!ndlp)
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"PLOGI cmpl:      status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		irsp->un.elsreq64.remoteID);
++
++	ndlp = lpfc_findnode_did(vport, irsp->un.elsreq64.remoteID);
++	if (!ndlp) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0136 PLOGI completes to NPort x%x "
++			"with no ndlp. Data: x%x x%x x%x\n",
++			phba->brd_no, vport->vpi, irsp->un.elsreq64.remoteID,
++			irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpIoTag);
+ 		goto out;
++	}
+ 
+ 	/* Since ndlp can be freed in the disc state machine, note if this node
+ 	 * is being used during discovery.
+ 	 */
++	spin_lock_irq(shost->host_lock);
+ 	disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
+-	spin_lock_irq(phba->host->host_lock);
+ 	ndlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	rc   = 0;
+ 
+ 	/* PLOGI completes to NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0102 PLOGI completes to NPort x%x "
++			"%d (%d):0102 PLOGI completes to NPort x%x "
+ 			"Data: x%x x%x x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+-			irsp->un.ulpWord[4], irsp->ulpTimeout, disc,
+-			phba->num_disc_nodes);
++			phba->brd_no, vport->vpi, ndlp->nlp_DID,
++			irsp->ulpStatus, irsp->un.ulpWord[4],
++			irsp->ulpTimeout, disc, vport->num_disc_nodes);
+ 
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba)) {
+-		spin_lock_irq(phba->host->host_lock);
++	if (lpfc_els_chk_latt(vport)) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		goto out;
+ 	}
+ 
+@@ -743,22 +875,28 @@
+ 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+ 			/* ELS command is being retried */
+ 			if (disc) {
+-				spin_lock_irq(phba->host->host_lock);
++				spin_lock_irq(shost->host_lock);
+ 				ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+-				spin_unlock_irq(phba->host->host_lock);
++				spin_unlock_irq(shost->host_lock);
+ 			}
+ 			goto out;
+ 		}
+ 
+ 		/* PLOGI failed */
++		if (ndlp->nlp_DID == NameServer_DID) {
++			lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++			lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0250 Nameserver login error: "
++				"0x%x / 0x%x\n",
++				phba->brd_no, vport->vpi,
++				irsp->ulpStatus, irsp->un.ulpWord[4]);
++		}
++
+ 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+-		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-		   ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+-		   (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+-		   (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++		if (lpfc_error_lost_link(irsp)) {
+ 			rc = NLP_STE_FREED_NODE;
+ 		} else {
+-			rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++			rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_PLOGI);
+ 		}
+ 	} else {
+@@ -766,33 +904,33 @@
+ 		prsp = list_entry(((struct lpfc_dmabuf *)
+ 			cmdiocb->context2)->list.next,
+ 			struct lpfc_dmabuf, list);
+-		ndlp = lpfc_plogi_confirm_nport(phba, prsp, ndlp);
+-		rc = lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++		ndlp = lpfc_plogi_confirm_nport(phba, prsp->virt, ndlp);
++		rc = lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_PLOGI);
+ 	}
+ 
+-	if (disc && phba->num_disc_nodes) {
++	if (disc && vport->num_disc_nodes) {
+ 		/* Check to see if there are more PLOGIs to be sent */
+-		lpfc_more_plogi(phba);
++		lpfc_more_plogi(vport);
+ 
+-		if (phba->num_disc_nodes == 0) {
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->fc_flag &= ~FC_NDISC_ACTIVE;
+-			spin_unlock_irq(phba->host->host_lock);
++		if (vport->num_disc_nodes == 0) {
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag &= ~FC_NDISC_ACTIVE;
++			spin_unlock_irq(shost->host_lock);
+ 
+-			lpfc_can_disctmo(phba);
+-			if (phba->fc_flag & FC_RSCN_MODE) {
++			lpfc_can_disctmo(vport);
++			if (vport->fc_flag & FC_RSCN_MODE) {
+ 				/*
+ 				 * Check to see if more RSCNs came in while
+ 				 * we were processing this one.
+ 				 */
+-				if ((phba->fc_rscn_id_cnt == 0) &&
+-			    	(!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+-					spin_lock_irq(phba->host->host_lock);
+-					phba->fc_flag &= ~FC_RSCN_MODE;
+-					spin_unlock_irq(phba->host->host_lock);
++				if ((vport->fc_rscn_id_cnt == 0) &&
++				    (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++					spin_lock_irq(shost->host_lock);
++					vport->fc_flag &= ~FC_RSCN_MODE;
++					spin_unlock_irq(shost->host_lock);
+ 				} else {
+-					lpfc_els_handle_rscn(phba);
++					lpfc_els_handle_rscn(vport);
+ 				}
+ 			}
+ 		}
+@@ -804,8 +942,9 @@
+ }
+ 
+ int
+-lpfc_issue_els_plogi(struct lpfc_hba * phba, uint32_t did, uint8_t retry)
++lpfc_issue_els_plogi(struct lpfc_vport *vport, uint32_t did, uint8_t retry)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct serv_parm *sp;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+@@ -813,12 +952,13 @@
+ 	struct lpfc_sli *psli;
+ 	uint8_t *pcmd;
+ 	uint16_t cmdsize;
++	int ret;
+ 
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+ 
+-	cmdsize = (sizeof (uint32_t) + sizeof (struct serv_parm));
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, NULL, did,
++	cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, NULL, did,
+ 								ELS_CMD_PLOGI);
+ 	if (!elsiocb)
+ 		return 1;
+@@ -828,8 +968,8 @@
+ 
+ 	/* For PLOGI request, remainder of payload is service parameters */
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_PLOGI;
+-	pcmd += sizeof (uint32_t);
+-	memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++	pcmd += sizeof(uint32_t);
++	memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
+ 	sp = (struct serv_parm *) pcmd;
+ 
+ 	if (sp->cmn.fcphLow < FC_PH_4_3)
+@@ -838,22 +978,27 @@
+ 	if (sp->cmn.fcphHigh < FC_PH3)
+ 		sp->cmn.fcphHigh = FC_PH3;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue PLOGI:     did:x%x",
++		did, 0, 0);
++
+ 	phba->fc_stat.elsXmitPLOGI++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_plogi;
+-	spin_lock_irq(phba->host->host_lock);
+-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+-		spin_unlock_irq(phba->host->host_lock);
++	ret = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
++
++	if (ret == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 	return 0;
+ }
+ 
+ static void
+-lpfc_cmpl_els_prli(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		   struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_prli(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		   struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp;
+ 	struct lpfc_sli *psli;
+ 	struct lpfc_nodelist *ndlp;
+@@ -864,21 +1009,26 @@
+ 
+ 	irsp = &(rspiocb->iocb);
+ 	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~NLP_PRLI_SND;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"PRLI cmpl:       status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		ndlp->nlp_DID);
+ 
+ 	/* PRLI completes to NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0103 PRLI completes to NPort x%x "
++			"%d (%d):0103 PRLI completes to NPort x%x "
+ 			"Data: x%x x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+-			irsp->un.ulpWord[4], irsp->ulpTimeout,
+-			phba->num_disc_nodes);
++			phba->brd_no, vport->vpi, ndlp->nlp_DID,
++			irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++			vport->num_disc_nodes);
+ 
+-	phba->fc_prli_sent--;
++	vport->fc_prli_sent--;
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba))
++	if (lpfc_els_chk_latt(vport))
+ 		goto out;
+ 
+ 	if (irsp->ulpStatus) {
+@@ -889,18 +1039,16 @@
+ 		}
+ 		/* PRLI failed */
+ 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+-		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-		   ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+-		   (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+-		   (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++		if (lpfc_error_lost_link(irsp)) {
+ 			goto out;
+ 		} else {
+-			lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_PRLI);
+ 		}
+ 	} else {
+ 		/* Good status, call state machine */
+-		lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_PRLI);
++		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++					NLP_EVT_CMPL_PRLI);
+ 	}
+ 
+ out:
+@@ -909,9 +1057,11 @@
+ }
+ 
+ int
+-lpfc_issue_els_prli(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		    uint8_t retry)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba *phba = vport->phba;
+ 	PRLI *npr;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+@@ -923,8 +1073,8 @@
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+ 
+-	cmdsize = (sizeof (uint32_t) + sizeof (PRLI));
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	cmdsize = (sizeof(uint32_t) + sizeof(PRLI));
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 					ndlp->nlp_DID, ELS_CMD_PRLI);
+ 	if (!elsiocb)
+ 		return 1;
+@@ -933,9 +1083,9 @@
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	/* For PRLI request, remainder of payload is service parameters */
+-	memset(pcmd, 0, (sizeof (PRLI) + sizeof (uint32_t)));
++	memset(pcmd, 0, (sizeof(PRLI) + sizeof(uint32_t)));
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_PRLI;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* For PRLI, remainder of payload is PRLI parameter page */
+ 	npr = (PRLI *) pcmd;
+@@ -955,81 +1105,88 @@
+ 	npr->prliType = PRLI_FCP_TYPE;
+ 	npr->initiatorFunc = 1;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue PRLI:      did:x%x",
++		ndlp->nlp_DID, 0, 0);
++
+ 	phba->fc_stat.elsXmitPRLI++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_prli;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_PRLI_SND;
++	spin_unlock_irq(shost->host_lock);
+ 	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_PRLI_SND;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+-	phba->fc_prli_sent++;
++	vport->fc_prli_sent++;
+ 	return 0;
+ }
+ 
+ static void
+-lpfc_more_adisc(struct lpfc_hba * phba)
++lpfc_more_adisc(struct lpfc_vport *vport)
+ {
+ 	int sentadisc;
++	struct lpfc_hba *phba = vport->phba;
+ 
+-	if (phba->num_disc_nodes)
+-		phba->num_disc_nodes--;
++	if (vport->num_disc_nodes)
++		vport->num_disc_nodes--;
+ 
+ 	/* Continue discovery with <num_disc_nodes> ADISCs to go */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0210 Continue discovery with %d ADISCs to go "
++			"%d (%d):0210 Continue discovery with %d ADISCs to go "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, phba->num_disc_nodes, phba->fc_adisc_cnt,
+-			phba->fc_flag, phba->hba_state);
++			phba->brd_no, vport->vpi, vport->num_disc_nodes,
++			vport->fc_adisc_cnt, vport->fc_flag, vport->port_state);
+ 
+ 	/* Check to see if there are more ADISCs to be sent */
+-	if (phba->fc_flag & FC_NLP_MORE) {
+-		lpfc_set_disctmo(phba);
+-
+-		/* go thru NPR list and issue any remaining ELS ADISCs */
+-		sentadisc = lpfc_els_disc_adisc(phba);
++	if (vport->fc_flag & FC_NLP_MORE) {
++		lpfc_set_disctmo(vport);
++		/* go thru NPR nodes and issue any remaining ELS ADISCs */
++		sentadisc = lpfc_els_disc_adisc(vport);
+ 	}
+ 	return;
+ }
+ 
+ static void
+-lpfc_rscn_disc(struct lpfc_hba * phba)
++lpfc_rscn_disc(struct lpfc_vport *vport)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	lpfc_can_disctmo(vport);
++
+ 	/* RSCN discovery */
+-	/* go thru NPR list and issue ELS PLOGIs */
+-	if (phba->fc_npr_cnt) {
+-		if (lpfc_els_disc_plogi(phba))
++	/* go thru NPR nodes and issue ELS PLOGIs */
++	if (vport->fc_npr_cnt)
++		if (lpfc_els_disc_plogi(vport))
+ 			return;
+-	}
+-	if (phba->fc_flag & FC_RSCN_MODE) {
++
++	if (vport->fc_flag & FC_RSCN_MODE) {
+ 		/* Check to see if more RSCNs came in while we were
+ 		 * processing this one.
+ 		 */
+-		if ((phba->fc_rscn_id_cnt == 0) &&
+-		    (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->fc_flag &= ~FC_RSCN_MODE;
+-			spin_unlock_irq(phba->host->host_lock);
++		if ((vport->fc_rscn_id_cnt == 0) &&
++		    (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag &= ~FC_RSCN_MODE;
++			spin_unlock_irq(shost->host_lock);
+ 		} else {
+-			lpfc_els_handle_rscn(phba);
++			lpfc_els_handle_rscn(vport);
+ 		}
+ 	}
+ }
+ 
+ static void
+-lpfc_cmpl_els_adisc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		    struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_adisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		    struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp;
+-	struct lpfc_sli *psli;
+ 	struct lpfc_nodelist *ndlp;
+-	LPFC_MBOXQ_t *mbox;
+-	int disc, rc;
+-
+-	psli = &phba->sli;
++	int  disc;
+ 
+ 	/* we pass cmdiocb to state machine which needs rspiocb as well */
+ 	cmdiocb->context_un.rsp_iocb = rspiocb;
+@@ -1037,27 +1194,32 @@
+ 	irsp = &(rspiocb->iocb);
+ 	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"ADISC cmpl:      status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		ndlp->nlp_DID);
++
+ 	/* Since ndlp can be freed in the disc state machine, note if this node
+ 	 * is being used during discovery.
+ 	 */
++	spin_lock_irq(shost->host_lock);
+ 	disc = (ndlp->nlp_flag & NLP_NPR_2B_DISC);
+-	spin_lock_irq(phba->host->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_ADISC_SND | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	/* ADISC completes to NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0104 ADISC completes to NPort x%x "
++			"%d (%d):0104 ADISC completes to NPort x%x "
+ 			"Data: x%x x%x x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+-			irsp->un.ulpWord[4], irsp->ulpTimeout, disc,
+-			phba->num_disc_nodes);
++			phba->brd_no, vport->vpi, ndlp->nlp_DID,
++			irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++			disc, vport->num_disc_nodes);
+ 
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba)) {
+-		spin_lock_irq(phba->host->host_lock);
++	if (lpfc_els_chk_latt(vport)) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		goto out;
+ 	}
+ 
+@@ -1066,67 +1228,68 @@
+ 		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+ 			/* ELS command is being retried */
+ 			if (disc) {
+-				spin_lock_irq(phba->host->host_lock);
++				spin_lock_irq(shost->host_lock);
+ 				ndlp->nlp_flag |= NLP_NPR_2B_DISC;
+-				spin_unlock_irq(phba->host->host_lock);
+-				lpfc_set_disctmo(phba);
++				spin_unlock_irq(shost->host_lock);
++				lpfc_set_disctmo(vport);
+ 			}
+ 			goto out;
+ 		}
+ 		/* ADISC failed */
+ 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+-		if ((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
+-		   ((irsp->un.ulpWord[4] != IOERR_SLI_ABORTED) &&
+-		   (irsp->un.ulpWord[4] != IOERR_LINK_DOWN) &&
+-		   (irsp->un.ulpWord[4] != IOERR_SLI_DOWN))) {
+-			lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++		if (!lpfc_error_lost_link(irsp)) {
++			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_ADISC);
+ 		}
+ 	} else {
+ 		/* Good status, call state machine */
+-		lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_ADISC);
+ 	}
+ 
+-	if (disc && phba->num_disc_nodes) {
++	if (disc && vport->num_disc_nodes) {
+ 		/* Check to see if there are more ADISCs to be sent */
+-		lpfc_more_adisc(phba);
++		lpfc_more_adisc(vport);
+ 
+ 		/* Check to see if we are done with ADISC authentication */
+-		if (phba->num_disc_nodes == 0) {
+-			lpfc_can_disctmo(phba);
+-			/* If we get here, there is nothing left to wait for */
+-			if ((phba->hba_state < LPFC_HBA_READY) &&
+-			    (phba->hba_state != LPFC_CLEAR_LA)) {
+-				/* Link up discovery */
+-				if ((mbox = mempool_alloc(phba->mbox_mem_pool,
+-							  GFP_KERNEL))) {
+-					phba->hba_state = LPFC_CLEAR_LA;
+-					lpfc_clear_la(phba, mbox);
+-					mbox->mbox_cmpl =
+-					    lpfc_mbx_cmpl_clear_la;
+-					rc = lpfc_sli_issue_mbox
+-						(phba, mbox,
+-						 (MBX_NOWAIT | MBX_STOP_IOCB));
+-					if (rc == MBX_NOT_FINISHED) {
+-						mempool_free(mbox,
+-						     phba->mbox_mem_pool);
+-						lpfc_disc_flush_list(phba);
+-						psli->ring[(psli->extra_ring)].
+-						    flag &=
+-						    ~LPFC_STOP_IOCB_EVENT;
+-						psli->ring[(psli->fcp_ring)].
+-						    flag &=
+-						    ~LPFC_STOP_IOCB_EVENT;
+-						psli->ring[(psli->next_ring)].
+-						    flag &=
+-						    ~LPFC_STOP_IOCB_EVENT;
+-						phba->hba_state =
+-						    LPFC_HBA_READY;
++		if (vport->num_disc_nodes == 0) {
++			/* If we get here, there is nothing left to ADISC */
++			/*
++			 * For NPIV, cmpl_reg_vpi will set port_state to READY,
++			 * and continue discovery.
++			 */
++			if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++			   !(vport->fc_flag & FC_RSCN_MODE)) {
++				lpfc_issue_reg_vpi(phba, vport);
++				goto out;
++			}
++			/*
++			 * For SLI2, we need to set port_state to READY
++			 * and continue discovery.
++			 */
++			if (vport->port_state < LPFC_VPORT_READY) {
++				/* If we get here, there is nothing to ADISC */
++				if (vport->port_type == LPFC_PHYSICAL_PORT)
++					lpfc_issue_clear_la(phba, vport);
++
++				if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
++					vport->num_disc_nodes = 0;
++					/* go thru NPR list, issue ELS PLOGIs */
++					if (vport->fc_npr_cnt)
++						lpfc_els_disc_plogi(vport);
++
++					if (!vport->num_disc_nodes) {
++						spin_lock_irq(shost->host_lock);
++						vport->fc_flag &=
++							~FC_NDISC_ACTIVE;
++						spin_unlock_irq(
++							shost->host_lock);
++						lpfc_can_disctmo(vport);
+ 					}
+ 				}
++				vport->port_state = LPFC_VPORT_READY;
+ 			} else {
+-				lpfc_rscn_disc(phba);
++				lpfc_rscn_disc(vport);
+ 			}
+ 		}
+ 	}
+@@ -1136,22 +1299,21 @@
+ }
+ 
+ int
+-lpfc_issue_els_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		     uint8_t retry)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	ADISC *ap;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+-	struct lpfc_sli_ring *pring;
+-	struct lpfc_sli *psli;
++	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	uint8_t *pcmd;
+ 	uint16_t cmdsize;
+ 
+-	psli = &phba->sli;
+-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+-
+-	cmdsize = (sizeof (uint32_t) + sizeof (ADISC));
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	cmdsize = (sizeof(uint32_t) + sizeof(ADISC));
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 						ndlp->nlp_DID, ELS_CMD_ADISC);
+ 	if (!elsiocb)
+ 		return 1;
+@@ -1161,81 +1323,97 @@
+ 
+ 	/* For ADISC request, remainder of payload is service parameters */
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_ADISC;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* Fill in ADISC payload */
+ 	ap = (ADISC *) pcmd;
+ 	ap->hardAL_PA = phba->fc_pref_ALPA;
+-	memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+-	memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+-	ap->DID = be32_to_cpu(phba->fc_myDID);
++	memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++	memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++	ap->DID = be32_to_cpu(vport->fc_myDID);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue ADISC:     did:x%x",
++		ndlp->nlp_DID, 0, 0);
+ 
+ 	phba->fc_stat.elsXmitADISC++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_adisc;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_ADISC_SND;
++	spin_unlock_irq(shost->host_lock);
+ 	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_ADISC_SND;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 	return 0;
+ }
+ 
+ static void
+-lpfc_cmpl_els_logo(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		   struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		   struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	struct lpfc_vport *vport = ndlp->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	IOCB_t *irsp;
+ 	struct lpfc_sli *psli;
+-	struct lpfc_nodelist *ndlp;
+ 
+ 	psli = &phba->sli;
+ 	/* we pass cmdiocb to state machine which needs rspiocb as well */
+ 	cmdiocb->context_un.rsp_iocb = rspiocb;
+ 
+ 	irsp = &(rspiocb->iocb);
+-	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~NLP_LOGO_SND;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"LOGO cmpl:       status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		ndlp->nlp_DID);
+ 
+ 	/* LOGO completes to NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0105 LOGO completes to NPort x%x "
++			"%d (%d):0105 LOGO completes to NPort x%x "
+ 			"Data: x%x x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, irsp->ulpStatus,
+-			irsp->un.ulpWord[4], irsp->ulpTimeout,
+-			phba->num_disc_nodes);
++			phba->brd_no, vport->vpi, ndlp->nlp_DID,
++			irsp->ulpStatus, irsp->un.ulpWord[4], irsp->ulpTimeout,
++			vport->num_disc_nodes);
+ 
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba))
++	if (lpfc_els_chk_latt(vport))
++		goto out;
++
++	if (ndlp->nlp_flag & NLP_TARGET_REMOVE) {
++	        /* NLP_EVT_DEVICE_RM should unregister the RPI
++		 * which should abort all outstanding IOs.
++		 */
++		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++					NLP_EVT_DEVICE_RM);
+ 		goto out;
++	}
+ 
+ 	if (irsp->ulpStatus) {
+ 		/* Check for retry */
+-		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
++		if (lpfc_els_retry(phba, cmdiocb, rspiocb))
+ 			/* ELS command is being retried */
+ 			goto out;
+-		}
+ 		/* LOGO failed */
+ 		/* Do not call DSM for lpfc_els_abort'ed ELS cmds */
+-		if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-		   ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+-		   (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+-		   (irsp->un.ulpWord[4] == IOERR_SLI_DOWN))) {
++		if (lpfc_error_lost_link(irsp))
+ 			goto out;
+-		} else {
+-			lpfc_disc_state_machine(phba, ndlp, cmdiocb,
++		else
++			lpfc_disc_state_machine(vport, ndlp, cmdiocb,
+ 					NLP_EVT_CMPL_LOGO);
+-		}
+ 	} else {
+ 		/* Good status, call state machine.
+ 		 * This will unregister the rpi if needed.
+ 		 */
+-		lpfc_disc_state_machine(phba, ndlp, cmdiocb, NLP_EVT_CMPL_LOGO);
++		lpfc_disc_state_machine(vport, ndlp, cmdiocb,
++					NLP_EVT_CMPL_LOGO);
+ 	}
+ 
+ out:
+@@ -1244,21 +1422,24 @@
+ }
+ 
+ int
+-lpfc_issue_els_logo(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_issue_els_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		    uint8_t retry)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+ 	struct lpfc_sli_ring *pring;
+ 	struct lpfc_sli *psli;
+ 	uint8_t *pcmd;
+ 	uint16_t cmdsize;
++	int rc;
+ 
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];
+ 
+-	cmdsize = (2 * sizeof (uint32_t)) + sizeof (struct lpfc_name);
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	cmdsize = (2 * sizeof(uint32_t)) + sizeof(struct lpfc_name);
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 						ndlp->nlp_DID, ELS_CMD_LOGO);
+ 	if (!elsiocb)
+ 		return 1;
+@@ -1266,53 +1447,66 @@
+ 	icmd = &elsiocb->iocb;
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_LOGO;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* Fill in LOGO payload */
+-	*((uint32_t *) (pcmd)) = be32_to_cpu(phba->fc_myDID);
+-	pcmd += sizeof (uint32_t);
+-	memcpy(pcmd, &phba->fc_portname, sizeof (struct lpfc_name));
++	*((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID);
++	pcmd += sizeof(uint32_t);
++	memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name));
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue LOGO:      did:x%x",
++		ndlp->nlp_DID, 0, 0);
+ 
+ 	phba->fc_stat.elsXmitLOGO++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_logo;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_LOGO_SND;
+-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++	spin_unlock_irq(shost->host_lock);
++	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
++
++	if (rc == IOCB_ERROR) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_LOGO_SND;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 	return 0;
+ }
+ 
+ static void
+-lpfc_cmpl_els_cmd(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		  struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_cmd(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		  struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
+ 	IOCB_t *irsp;
+ 
+ 	irsp = &rspiocb->iocb;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"ELS cmd cmpl:    status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		irsp->un.elsreq64.remoteID);
++
+ 	/* ELS cmd tag <ulpIoTag> completes */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_ELS,
+-			"%d:0106 ELS cmd tag x%x completes Data: x%x x%x x%x\n",
+-			phba->brd_no,
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0106 ELS cmd tag x%x completes Data: x%x x%x "
++			"x%x\n",
++			phba->brd_no, vport->vpi,
+ 			irsp->ulpIoTag, irsp->ulpStatus,
+ 			irsp->un.ulpWord[4], irsp->ulpTimeout);
+ 
+ 	/* Check to see if link went down during discovery */
+-	lpfc_els_chk_latt(phba);
++	lpfc_els_chk_latt(vport);
+ 	lpfc_els_free_iocb(phba, cmdiocb);
+ 	return;
+ }
+ 
+ int
+-lpfc_issue_els_scr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+ 	struct lpfc_sli_ring *pring;
+@@ -1323,15 +1517,16 @@
+ 
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+-	cmdsize = (sizeof (uint32_t) + sizeof (SCR));
++	cmdsize = (sizeof(uint32_t) + sizeof(SCR));
+ 	ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ 	if (!ndlp)
+ 		return 1;
+ 
+-	lpfc_nlp_init(phba, ndlp, nportid);
++	lpfc_nlp_init(vport, ndlp, nportid);
+ 
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 						ndlp->nlp_DID, ELS_CMD_SCR);
++
+ 	if (!elsiocb) {
+ 		lpfc_nlp_put(ndlp);
+ 		return 1;
+@@ -1341,29 +1536,31 @@
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_SCR;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* For SCR, remainder of payload is SCR parameter page */
+-	memset(pcmd, 0, sizeof (SCR));
++	memset(pcmd, 0, sizeof(SCR));
+ 	((SCR *) pcmd)->Function = SCR_FUNC_FULL;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue SCR:       did:x%x",
++		ndlp->nlp_DID, 0, 0);
++
+ 	phba->fc_stat.elsXmitSCR++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
+-	spin_lock_irq(phba->host->host_lock);
+ 	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+-		spin_unlock_irq(phba->host->host_lock);
+ 		lpfc_nlp_put(ndlp);
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 	lpfc_nlp_put(ndlp);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_issue_els_farpr(struct lpfc_hba * phba, uint32_t nportid, uint8_t retry)
++lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	IOCB_t *icmd;
+ 	struct lpfc_iocbq *elsiocb;
+ 	struct lpfc_sli_ring *pring;
+@@ -1377,13 +1574,14 @@
+ 
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+-	cmdsize = (sizeof (uint32_t) + sizeof (FARP));
++	cmdsize = (sizeof(uint32_t) + sizeof(FARP));
+ 	ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ 	if (!ndlp)
+ 		return 1;
+-	lpfc_nlp_init(phba, ndlp, nportid);
+ 
+-	elsiocb = lpfc_prep_els_iocb(phba, 1, cmdsize, retry, ndlp,
++	lpfc_nlp_init(vport, ndlp, nportid);
++
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp,
+ 						ndlp->nlp_DID, ELS_CMD_RNID);
+ 	if (!elsiocb) {
+ 		lpfc_nlp_put(ndlp);
+@@ -1394,44 +1592,71 @@
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_FARPR;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* Fill in FARPR payload */
+ 	fp = (FARP *) (pcmd);
+-	memset(fp, 0, sizeof (FARP));
++	memset(fp, 0, sizeof(FARP));
+ 	lp = (uint32_t *) pcmd;
+ 	*lp++ = be32_to_cpu(nportid);
+-	*lp++ = be32_to_cpu(phba->fc_myDID);
++	*lp++ = be32_to_cpu(vport->fc_myDID);
+ 	fp->Rflags = 0;
+ 	fp->Mflags = (FARP_MATCH_PORT | FARP_MATCH_NODE);
+ 
+-	memcpy(&fp->RportName, &phba->fc_portname, sizeof (struct lpfc_name));
+-	memcpy(&fp->RnodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+-	if ((ondlp = lpfc_findnode_did(phba, nportid))) {
++	memcpy(&fp->RportName, &vport->fc_portname, sizeof(struct lpfc_name));
++	memcpy(&fp->RnodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++	ondlp = lpfc_findnode_did(vport, nportid);
++	if (ondlp) {
+ 		memcpy(&fp->OportName, &ondlp->nlp_portname,
+-		       sizeof (struct lpfc_name));
++		       sizeof(struct lpfc_name));
+ 		memcpy(&fp->OnodeName, &ondlp->nlp_nodename,
+-		       sizeof (struct lpfc_name));
++		       sizeof(struct lpfc_name));
+ 	}
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue FARPR:     did:x%x",
++		ndlp->nlp_DID, 0, 0);
++
+ 	phba->fc_stat.elsXmitFARPR++;
+ 	elsiocb->iocb_cmpl = lpfc_cmpl_els_cmd;
+-	spin_lock_irq(phba->host->host_lock);
+ 	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+-		spin_unlock_irq(phba->host->host_lock);
+ 		lpfc_nlp_put(ndlp);
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 	lpfc_nlp_put(ndlp);
+ 	return 0;
+ }
+ 
++static void
++lpfc_end_rscn(struct lpfc_vport *vport)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	if (vport->fc_flag & FC_RSCN_MODE) {
++		/*
++		 * Check to see if more RSCNs came in while we were
++		 * processing this one.
++		 */
++		if (vport->fc_rscn_id_cnt ||
++		    (vport->fc_flag & FC_RSCN_DISCOVERY) != 0)
++			lpfc_els_handle_rscn(vport);
++		else {
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag &= ~FC_RSCN_MODE;
++			spin_unlock_irq(shost->host_lock);
++		}
++	}
++}
++
+ void
+-lpfc_cancel_retry_delay_tmo(struct lpfc_hba *phba, struct lpfc_nodelist * nlp)
++lpfc_cancel_retry_delay_tmo(struct lpfc_vport *vport, struct lpfc_nodelist *nlp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	spin_lock_irq(shost->host_lock);
+ 	nlp->nlp_flag &= ~NLP_DELAY_TMO;
++	spin_unlock_irq(shost->host_lock);
+ 	del_timer_sync(&nlp->nlp_delayfunc);
+ 	nlp->nlp_last_elscmd = 0;
+ 
+@@ -1439,30 +1664,21 @@
+ 		list_del_init(&nlp->els_retry_evt.evt_listp);
+ 
+ 	if (nlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		nlp->nlp_flag &= ~NLP_NPR_2B_DISC;
+-		if (phba->num_disc_nodes) {
++		spin_unlock_irq(shost->host_lock);
++		if (vport->num_disc_nodes) {
+ 			/* Check to see if there are more
+ 			 * PLOGIs to be sent
+ 			 */
+-			lpfc_more_plogi(phba);
++			lpfc_more_plogi(vport);
+ 
+-			if (phba->num_disc_nodes == 0) {
+-				phba->fc_flag &= ~FC_NDISC_ACTIVE;
+-				lpfc_can_disctmo(phba);
+-				if (phba->fc_flag & FC_RSCN_MODE) {
+-					/*
+-					 * Check to see if more RSCNs
+-					 * came in while we were
+-					 * processing this one.
+-					 */
+-					if((phba->fc_rscn_id_cnt==0) &&
+-					 !(phba->fc_flag & FC_RSCN_DISCOVERY)) {
+-						phba->fc_flag &= ~FC_RSCN_MODE;
+-					}
+-					else {
+-						lpfc_els_handle_rscn(phba);
+-					}
+-				}
++			if (vport->num_disc_nodes == 0) {
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag &= ~FC_NDISC_ACTIVE;
++				spin_unlock_irq(shost->host_lock);
++				lpfc_can_disctmo(vport);
++				lpfc_end_rscn(vport);
+ 			}
+ 		}
+ 	}
+@@ -1472,18 +1688,19 @@
+ void
+ lpfc_els_retry_delay(unsigned long ptr)
+ {
+-	struct lpfc_nodelist *ndlp;
+-	struct lpfc_hba *phba;
+-	unsigned long iflag;
+-	struct lpfc_work_evt  *evtp;
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr;
++	struct lpfc_vport *vport = ndlp->vport;
++	struct lpfc_hba   *phba = vport->phba;
++	unsigned long flags;
++	struct lpfc_work_evt  *evtp = &ndlp->els_retry_evt;
+ 
+-	ndlp = (struct lpfc_nodelist *)ptr;
+-	phba = ndlp->nlp_phba;
++	ndlp = (struct lpfc_nodelist *) ptr;
++	phba = ndlp->vport->phba;
+ 	evtp = &ndlp->els_retry_evt;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, flags);
+ 	if (!list_empty(&evtp->evt_listp)) {
+-		spin_unlock_irqrestore(phba->host->host_lock, iflag);
++		spin_unlock_irqrestore(&phba->hbalock, flags);
+ 		return;
+ 	}
+ 
+@@ -1491,33 +1708,31 @@
+ 	evtp->evt       = LPFC_EVT_ELS_RETRY;
+ 	list_add_tail(&evtp->evt_listp, &phba->work_list);
+ 	if (phba->work_wait)
+-		wake_up(phba->work_wait);
++		lpfc_worker_wake_up(phba);
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
+ 	return;
+ }
+ 
+ void
+ lpfc_els_retry_delay_handler(struct lpfc_nodelist *ndlp)
+ {
+-	struct lpfc_hba *phba;
+-	uint32_t cmd;
+-	uint32_t did;
+-	uint8_t retry;
++	struct lpfc_vport *vport = ndlp->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	uint32_t cmd, did, retry;
+ 
+-	phba = ndlp->nlp_phba;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	did = ndlp->nlp_DID;
+ 	cmd = ndlp->nlp_last_elscmd;
+ 	ndlp->nlp_last_elscmd = 0;
+ 
+ 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		return;
+ 	}
+ 
+ 	ndlp->nlp_flag &= ~NLP_DELAY_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	/*
+ 	 * If a discovery event readded nlp_delayfunc after timer
+ 	 * firing and before processing the timer, cancel the
+@@ -1528,57 +1743,54 @@
+ 
+ 	switch (cmd) {
+ 	case ELS_CMD_FLOGI:
+-		lpfc_issue_els_flogi(phba, ndlp, retry);
++		lpfc_issue_els_flogi(vport, ndlp, retry);
+ 		break;
+ 	case ELS_CMD_PLOGI:
+-		if(!lpfc_issue_els_plogi(phba, ndlp->nlp_DID, retry)) {
++		if (!lpfc_issue_els_plogi(vport, ndlp->nlp_DID, retry)) {
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
+ 		}
+ 		break;
+ 	case ELS_CMD_ADISC:
+-		if (!lpfc_issue_els_adisc(phba, ndlp, retry)) {
++		if (!lpfc_issue_els_adisc(vport, ndlp, retry)) {
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
+ 		}
+ 		break;
+ 	case ELS_CMD_PRLI:
+-		if (!lpfc_issue_els_prli(phba, ndlp, retry)) {
++		if (!lpfc_issue_els_prli(vport, ndlp, retry)) {
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
+ 		}
+ 		break;
+ 	case ELS_CMD_LOGO:
+-		if (!lpfc_issue_els_logo(phba, ndlp, retry)) {
++		if (!lpfc_issue_els_logo(vport, ndlp, retry)) {
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 		}
+ 		break;
++	case ELS_CMD_FDISC:
++		lpfc_issue_els_fdisc(vport, ndlp, retry);
++		break;
+ 	}
+ 	return;
+ }
+ 
+ static int
+-lpfc_els_retry(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-	       struct lpfc_iocbq * rspiocb)
++lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++	       struct lpfc_iocbq *rspiocb)
+ {
+-	IOCB_t *irsp;
+-	struct lpfc_dmabuf *pcmd;
+-	struct lpfc_nodelist *ndlp;
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	IOCB_t *irsp = &rspiocb->iocb;
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 	uint32_t *elscmd;
+ 	struct ls_rjt stat;
+-	int retry, maxretry;
+-	int delay;
+-	uint32_t cmd;
++	int retry = 0, maxretry = lpfc_max_els_tries, delay = 0;
++	uint32_t cmd = 0;
+ 	uint32_t did;
+ 
+-	retry = 0;
+-	delay = 0;
+-	maxretry = lpfc_max_els_tries;
+-	irsp = &rspiocb->iocb;
+-	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+-	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+-	cmd = 0;
+ 
+ 	/* Note: context2 may be 0 for internal driver abort
+ 	 * of delays ELS command.
+@@ -1594,11 +1806,15 @@
+ 	else {
+ 		/* We should only hit this case for retrying PLOGI */
+ 		did = irsp->un.elsreq64.remoteID;
+-		ndlp = lpfc_findnode_did(phba, did);
++		ndlp = lpfc_findnode_did(vport, did);
+ 		if (!ndlp && (cmd != ELS_CMD_PLOGI))
+ 			return 1;
+ 	}
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Retry ELS:       wd7:x%x wd4:x%x did:x%x",
++		*(((uint32_t *) irsp) + 7), irsp->un.ulpWord[4], ndlp->nlp_DID);
++
+ 	switch (irsp->ulpStatus) {
+ 	case IOSTAT_FCP_RSP_ERROR:
+ 	case IOSTAT_REMOTE_STOP:
+@@ -1607,25 +1823,37 @@
+ 	case IOSTAT_LOCAL_REJECT:
+ 		switch ((irsp->un.ulpWord[4] & 0xff)) {
+ 		case IOERR_LOOP_OPEN_FAILURE:
+-			if (cmd == ELS_CMD_PLOGI) {
+-				if (cmdiocb->retry == 0) {
+-					delay = 1;
+-				}
+-			}
++			if (cmd == ELS_CMD_PLOGI && cmdiocb->retry == 0)
++				delay = 1000;
+ 			retry = 1;
+ 			break;
+ 
+-		case IOERR_SEQUENCE_TIMEOUT:
++		case IOERR_ILLEGAL_COMMAND:
++			if ((phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) &&
++			    (cmd == ELS_CMD_FDISC)) {
++				lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0124 FDISC failed (3/6) retrying...\n",
++					phba->brd_no, vport->vpi);
++				lpfc_mbx_unreg_vpi(vport);
+ 			retry = 1;
++				/* Always retry for this case */
++				cmdiocb->retry = 0;
++			}
+ 			break;
+ 
+ 		case IOERR_NO_RESOURCES:
+-			if (cmd == ELS_CMD_PLOGI) {
+-				delay = 1;
+-			}
++			retry = 1;
++			if (cmdiocb->retry > 100)
++				delay = 100;
++			maxretry = 250;
++			break;
++
++		case IOERR_ILLEGAL_FRAME:
++			delay = 100;
+ 			retry = 1;
+ 			break;
+ 
++		case IOERR_SEQUENCE_TIMEOUT:
+ 		case IOERR_INVALID_RPI:
+ 			retry = 1;
+ 			break;
+@@ -1655,27 +1883,57 @@
+ 			if (stat.un.b.lsRjtRsnCodeExp ==
+ 			    LSEXP_CMD_IN_PROGRESS) {
+ 				if (cmd == ELS_CMD_PLOGI) {
+-					delay = 1;
++					delay = 1000;
+ 					maxretry = 48;
+ 				}
+ 				retry = 1;
+ 				break;
+ 			}
+ 			if (cmd == ELS_CMD_PLOGI) {
+-				delay = 1;
++				delay = 1000;
+ 				maxretry = lpfc_max_els_tries + 1;
+ 				retry = 1;
+ 				break;
+ 			}
++			if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++			  (cmd == ELS_CMD_FDISC) &&
++			  (stat.un.b.lsRjtRsnCodeExp == LSEXP_OUT_OF_RESOURCE)){
++				lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0125 FDISC Failed (x%x)."
++				" Fabric out of resources\n",
++				phba->brd_no, vport->vpi, stat.un.lsRjtError);
++				lpfc_vport_set_state(vport,
++						     FC_VPORT_NO_FABRIC_RSCS);
++			}
+ 			break;
+ 
+ 		case LSRJT_LOGICAL_BSY:
+-			if (cmd == ELS_CMD_PLOGI) {
+-				delay = 1;
++			if ((cmd == ELS_CMD_PLOGI) ||
++			    (cmd == ELS_CMD_PRLI)) {
++				delay = 1000;
+ 				maxretry = 48;
++			} else if (cmd == ELS_CMD_FDISC) {
++				/* Always retry for this case */
++				cmdiocb->retry = 0;
+ 			}
+ 			retry = 1;
+ 			break;
++
++		case LSRJT_LOGICAL_ERR:
++		case LSRJT_PROTOCOL_ERR:
++			if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++			  (cmd == ELS_CMD_FDISC) &&
++			  ((stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_PNAME) ||
++			  (stat.un.b.lsRjtRsnCodeExp == LSEXP_INVALID_NPORT_ID))
++			  ) {
++				lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0123 FDISC Failed (x%x)."
++				" Fabric Detected Bad WWN\n",
++				phba->brd_no, vport->vpi, stat.un.lsRjtError);
++				lpfc_vport_set_state(vport,
++						     FC_VPORT_FABRIC_REJ_WWN);
++			}
++			break;
+ 		}
+ 		break;
+ 
+@@ -1695,21 +1953,27 @@
+ 		retry = 0;
+ 	}
+ 
++	if ((vport->load_flag & FC_UNLOADING) != 0)
++		retry = 0;
++
+ 	if (retry) {
+ 
+ 		/* Retry ELS command <elsCmd> to remote NPORT <did> */
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-				"%d:0107 Retry ELS command x%x to remote "
++				"%d (%d):0107 Retry ELS command x%x to remote "
+ 				"NPORT x%x Data: x%x x%x\n",
+-				phba->brd_no,
++				phba->brd_no, vport->vpi,
+ 				cmd, did, cmdiocb->retry, delay);
+ 
+-		if ((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) {
++		if (((cmd == ELS_CMD_PLOGI) || (cmd == ELS_CMD_ADISC)) &&
++			((irsp->ulpStatus != IOSTAT_LOCAL_REJECT) ||
++			((irsp->un.ulpWord[4] & 0xff) != IOERR_NO_RESOURCES))) {
++			/* Don't reset timer for no resources */
++
+ 			/* If discovery / RSCN timer is running, reset it */
+-			if (timer_pending(&phba->fc_disctmo) ||
+-			      (phba->fc_flag & FC_RSCN_MODE)) {
+-				lpfc_set_disctmo(phba);
+-			}
++			if (timer_pending(&vport->fc_disctmo) ||
++			    (vport->fc_flag & FC_RSCN_MODE))
++				lpfc_set_disctmo(vport);
+ 		}
+ 
+ 		phba->fc_stat.elsXmitRetry++;
+@@ -1717,50 +1981,62 @@
+ 			phba->fc_stat.elsDelayRetry++;
+ 			ndlp->nlp_retry = cmdiocb->retry;
+ 
+-			mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
++			/* delay is specified in milliseconds */
++			mod_timer(&ndlp->nlp_delayfunc,
++				jiffies + msecs_to_jiffies(delay));
++			spin_lock_irq(shost->host_lock);
+ 			ndlp->nlp_flag |= NLP_DELAY_TMO;
++			spin_unlock_irq(shost->host_lock);
+ 
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++			if (cmd == ELS_CMD_PRLI)
++				lpfc_nlp_set_state(vport, ndlp,
++					NLP_STE_REG_LOGIN_ISSUE);
++			else
++				lpfc_nlp_set_state(vport, ndlp,
++					NLP_STE_NPR_NODE);
+ 			ndlp->nlp_last_elscmd = cmd;
+ 
+ 			return 1;
+ 		}
+ 		switch (cmd) {
+ 		case ELS_CMD_FLOGI:
+-			lpfc_issue_els_flogi(phba, ndlp, cmdiocb->retry);
++			lpfc_issue_els_flogi(vport, ndlp, cmdiocb->retry);
++			return 1;
++		case ELS_CMD_FDISC:
++			lpfc_issue_els_fdisc(vport, ndlp, cmdiocb->retry);
+ 			return 1;
+ 		case ELS_CMD_PLOGI:
+ 			if (ndlp) {
+ 				ndlp->nlp_prev_state = ndlp->nlp_state;
+-				lpfc_nlp_set_state(phba, ndlp,
++				lpfc_nlp_set_state(vport, ndlp,
+ 						   NLP_STE_PLOGI_ISSUE);
+ 			}
+-			lpfc_issue_els_plogi(phba, did, cmdiocb->retry);
++			lpfc_issue_els_plogi(vport, did, cmdiocb->retry);
+ 			return 1;
+ 		case ELS_CMD_ADISC:
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+-			lpfc_issue_els_adisc(phba, ndlp, cmdiocb->retry);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++			lpfc_issue_els_adisc(vport, ndlp, cmdiocb->retry);
+ 			return 1;
+ 		case ELS_CMD_PRLI:
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
+-			lpfc_issue_els_prli(phba, ndlp, cmdiocb->retry);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
++			lpfc_issue_els_prli(vport, ndlp, cmdiocb->retry);
+ 			return 1;
+ 		case ELS_CMD_LOGO:
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-			lpfc_issue_els_logo(phba, ndlp, cmdiocb->retry);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++			lpfc_issue_els_logo(vport, ndlp, cmdiocb->retry);
+ 			return 1;
+ 		}
+ 	}
+ 
+ 	/* No retry ELS command <elsCmd> to remote NPORT <did> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0108 No retry ELS command x%x to remote NPORT x%x "
+-			"Data: x%x\n",
+-			phba->brd_no,
++			"%d (%d):0108 No retry ELS command x%x to remote "
++			"NPORT x%x Data: x%x\n",
++			phba->brd_no, vport->vpi,
+ 			cmd, did, cmdiocb->retry);
+ 
+ 	return 0;
+@@ -1795,33 +2071,36 @@
+ 		lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys);
+ 		kfree(buf_ptr);
+ 	}
+-	spin_lock_irq(phba->host->host_lock);
+ 	lpfc_sli_release_iocbq(phba, elsiocb);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	return 0;
+ }
+ 
+ static void
+-lpfc_cmpl_els_logo_acc(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		       struct lpfc_iocbq * rspiocb)
++lpfc_cmpl_els_logo_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		       struct lpfc_iocbq *rspiocb)
+ {
+-	struct lpfc_nodelist *ndlp;
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	struct lpfc_vport *vport = cmdiocb->vport;
++	IOCB_t *irsp;
+ 
+-	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	irsp = &rspiocb->iocb;
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"ACC LOGO cmpl:   status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4], ndlp->nlp_DID);
+ 
+ 	/* ACC to LOGO completes to NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0109 ACC to LOGO completes to NPort x%x "
++			"%d (%d):0109 ACC to LOGO completes to NPort x%x "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
+-			ndlp->nlp_state, ndlp->nlp_rpi);
++			phba->brd_no, vport->vpi, ndlp->nlp_DID,
++			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+ 	switch (ndlp->nlp_state) {
+ 	case NLP_STE_UNUSED_NODE:	/* node is just allocated */
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case NLP_STE_NPR_NODE:		/* NPort Recovery mode */
+-		lpfc_unreg_rpi(phba, ndlp);
++		lpfc_unreg_rpi(vport, ndlp);
+ 		break;
+ 	default:
+ 		break;
+@@ -1830,24 +2109,38 @@
+ 	return;
+ }
+ 
++void
++lpfc_mbx_cmpl_dflt_rpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++
++	pmb->context1 = NULL;
++	lpfc_mbuf_free(phba, mp->virt, mp->phys);
++	kfree(mp);
++	mempool_free(pmb, phba->mbox_mem_pool);
++	lpfc_nlp_put(ndlp);
++	return;
++}
++
+ static void
+-lpfc_cmpl_els_acc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++lpfc_cmpl_els_rsp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ 		  struct lpfc_iocbq *rspiocb)
+ {
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	struct lpfc_vport *vport = ndlp ? ndlp->vport : NULL;
++	struct Scsi_Host  *shost = vport ? lpfc_shost_from_vport(vport) : NULL;
+ 	IOCB_t *irsp;
+-	struct lpfc_nodelist *ndlp;
+ 	LPFC_MBOXQ_t *mbox = NULL;
+-	struct lpfc_dmabuf *mp;
++	struct lpfc_dmabuf *mp = NULL;
+ 
+ 	irsp = &rspiocb->iocb;
+ 
+-	ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
+ 	if (cmdiocb->context_un.mbox)
+ 		mbox = cmdiocb->context_un.mbox;
+ 
+-
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba) || !ndlp) {
++	if (!ndlp || lpfc_els_chk_latt(vport)) {
+ 		if (mbox) {
+ 			mp = (struct lpfc_dmabuf *) mbox->context1;
+ 			if (mp) {
+@@ -1859,11 +2152,16 @@
+ 		goto out;
+ 	}
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"ACC cmpl:        status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4],
++		irsp->un.rcvels.remoteID);
++
+ 	/* ELS response tag <ulpIoTag> completes */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0110 ELS response tag x%x completes "
++			"%d (%d):0110 ELS response tag x%x completes "
+ 			"Data: x%x x%x x%x x%x x%x x%x x%x\n",
+-			phba->brd_no,
++			phba->brd_no, vport->vpi,
+ 			cmdiocb->iocb.ulpIoTag, rspiocb->iocb.ulpStatus,
+ 			rspiocb->iocb.un.ulpWord[4], rspiocb->iocb.ulpTimeout,
+  			ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_state,
+@@ -1872,11 +2170,19 @@
+ 	if (mbox) {
+ 		if ((rspiocb->iocb.ulpStatus == 0)
+ 		    && (ndlp->nlp_flag & NLP_ACC_REGLOGIN)) {
+-			lpfc_unreg_rpi(phba, ndlp);
+-			mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
++			lpfc_unreg_rpi(vport, ndlp);
+ 			mbox->context2 = lpfc_nlp_get(ndlp);
++			mbox->vport = vport;
++			if (ndlp->nlp_flag & NLP_RM_DFLT_RPI) {
++				mbox->mbox_flag |= LPFC_MBX_IMED_UNREG;
++				mbox->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
++			}
++			else {
++				mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE);
++				lpfc_nlp_set_state(vport, ndlp,
++					   NLP_STE_REG_LOGIN_ISSUE);
++			}
+ 			if (lpfc_sli_issue_mbox(phba, mbox,
+ 						(MBX_NOWAIT | MBX_STOP_IOCB))
+ 			    != MBX_NOT_FINISHED) {
+@@ -1886,17 +2192,13 @@
+ 			/* NOTE: we should have messages for unsuccessful
+ 			   reglogin */
+ 		} else {
+-			/* Do not call NO_LIST for lpfc_els_abort'ed ELS cmds */
+-			if (!((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-			      ((irsp->un.ulpWord[4] == IOERR_SLI_ABORTED) ||
+-			       (irsp->un.ulpWord[4] == IOERR_LINK_DOWN) ||
+-			       (irsp->un.ulpWord[4] == IOERR_SLI_DOWN)))) {
+-				if (ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
+-					lpfc_drop_node(phba, ndlp);
++			/* Do not drop node for lpfc_els_abort'ed ELS cmds */
++			if (!lpfc_error_lost_link(irsp) &&
++			    ndlp->nlp_flag & NLP_ACC_REGLOGIN) {
++				lpfc_drop_node(vport, ndlp);
+ 					ndlp = NULL;
+ 				}
+ 			}
+-		}
+ 		mp = (struct lpfc_dmabuf *) mbox->context1;
+ 		if (mp) {
+ 			lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -1906,19 +2208,21 @@
+ 	}
+ out:
+ 	if (ndlp) {
+-		spin_lock_irq(phba->host->host_lock);
+-		ndlp->nlp_flag &= ~NLP_ACC_REGLOGIN;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		ndlp->nlp_flag &= ~(NLP_ACC_REGLOGIN | NLP_RM_DFLT_RPI);
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 	lpfc_els_free_iocb(phba, cmdiocb);
+ 	return;
+ }
+ 
+ int
+-lpfc_els_rsp_acc(struct lpfc_hba * phba, uint32_t flag,
+-		 struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp,
+-		 LPFC_MBOXQ_t * mbox, uint8_t newnode)
++lpfc_els_rsp_acc(struct lpfc_vport *vport, uint32_t flag,
++		 struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp,
++		 LPFC_MBOXQ_t *mbox, uint8_t newnode)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	IOCB_t *icmd;
+ 	IOCB_t *oldcmd;
+ 	struct lpfc_iocbq *elsiocb;
+@@ -1935,22 +2239,29 @@
+ 
+ 	switch (flag) {
+ 	case ELS_CMD_ACC:
+-		cmdsize = sizeof (uint32_t);
+-		elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++		cmdsize = sizeof(uint32_t);
++		elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ 					ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ 		if (!elsiocb) {
++			spin_lock_irq(shost->host_lock);
+ 			ndlp->nlp_flag &= ~NLP_LOGO_ACC;
++			spin_unlock_irq(shost->host_lock);
+ 			return 1;
+ 		}
++
+ 		icmd = &elsiocb->iocb;
+ 		icmd->ulpContext = oldcmd->ulpContext;	/* Xri */
+ 		pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 		*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-		pcmd += sizeof (uint32_t);
++		pcmd += sizeof(uint32_t);
++
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++			"Issue ACC:       did:x%x flg:x%x",
++			ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ 		break;
+ 	case ELS_CMD_PLOGI:
+-		cmdsize = (sizeof (struct serv_parm) + sizeof (uint32_t));
+-		elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++		cmdsize = (sizeof(struct serv_parm) + sizeof(uint32_t));
++		elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ 					ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ 		if (!elsiocb)
+ 			return 1;
+@@ -1963,12 +2274,16 @@
+ 			elsiocb->context_un.mbox = mbox;
+ 
+ 		*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-		pcmd += sizeof (uint32_t);
+-		memcpy(pcmd, &phba->fc_sparam, sizeof (struct serv_parm));
++		pcmd += sizeof(uint32_t);
++		memcpy(pcmd, &vport->fc_sparam, sizeof(struct serv_parm));
++
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++			"Issue ACC PLOGI: did:x%x flg:x%x",
++			ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ 		break;
+ 	case ELS_CMD_PRLO:
+-		cmdsize = sizeof (uint32_t) + sizeof (PRLO);
+-		elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
++		cmdsize = sizeof(uint32_t) + sizeof(PRLO);
++		elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry,
+ 					     ndlp, ndlp->nlp_DID, ELS_CMD_PRLO);
+ 		if (!elsiocb)
+ 			return 1;
+@@ -1978,10 +2293,14 @@
+ 		pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 		memcpy(pcmd, ((struct lpfc_dmabuf *) oldiocb->context2)->virt,
+-		       sizeof (uint32_t) + sizeof (PRLO));
++		       sizeof(uint32_t) + sizeof(PRLO));
+ 		*((uint32_t *) (pcmd)) = ELS_CMD_PRLO_ACC;
+ 		els_pkt_ptr = (ELS_PKT *) pcmd;
+ 		els_pkt_ptr->un.prlo.acceptRspCode = PRLO_REQ_EXECUTED;
++
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++			"Issue ACC PRLO:  did:x%x flg:x%x",
++			ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ 		break;
+ 	default:
+ 		return 1;
+@@ -1994,25 +2313,23 @@
+ 
+ 	/* Xmit ELS ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0128 Xmit ELS ACC response tag x%x, XRI: x%x, "
++			"%d (%d):0128 Xmit ELS ACC response tag x%x, XRI: x%x, "
+ 			"DID: x%x, nlp_flag: x%x nlp_state: x%x RPI: x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			phba->brd_no, vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+ 	if (ndlp->nlp_flag & NLP_LOGO_ACC) {
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_LOGO_ACC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		elsiocb->iocb_cmpl = lpfc_cmpl_els_logo_acc;
+ 	} else {
+-		elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++		elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 	}
+ 
+ 	phba->fc_stat.elsXmitACC++;
+-	spin_lock_irq(phba->host->host_lock);
+ 	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -2021,9 +2338,11 @@
+ }
+ 
+ int
+-lpfc_els_rsp_reject(struct lpfc_hba * phba, uint32_t rejectError,
+-		    struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_reject(struct lpfc_vport *vport, uint32_t rejectError,
++		    struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp,
++		    LPFC_MBOXQ_t *mbox)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	IOCB_t *icmd;
+ 	IOCB_t *oldcmd;
+ 	struct lpfc_iocbq *elsiocb;
+@@ -2036,9 +2355,9 @@
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+ 
+-	cmdsize = 2 * sizeof (uint32_t);
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+-					ndlp, ndlp->nlp_DID, ELS_CMD_LS_RJT);
++	cmdsize = 2 * sizeof(uint32_t);
++	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++				     ndlp->nlp_DID, ELS_CMD_LS_RJT);
+ 	if (!elsiocb)
+ 		return 1;
+ 
+@@ -2048,22 +2367,30 @@
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_LS_RJT;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 	*((uint32_t *) (pcmd)) = rejectError;
+ 
++	if (mbox) {
++		elsiocb->context_un.mbox = mbox;
++		elsiocb->context1 = lpfc_nlp_get(ndlp);
++	}
++
+ 	/* Xmit ELS RJT <err> response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0129 Xmit ELS RJT x%x response tag x%x xri x%x, "
+-			"did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+-			phba->brd_no, rejectError, elsiocb->iotag,
++			"%d (%d):0129 Xmit ELS RJT x%x response tag x%x "
++			"xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++			"rpi x%x\n",
++			phba->brd_no, vport->vpi, rejectError, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"Issue LS_RJT:    did:x%x flg:x%x err:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, rejectError);
++
+ 	phba->fc_stat.elsXmitLSRJT++;
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
+-	spin_lock_irq(phba->host->host_lock);
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -2072,25 +2399,22 @@
+ }
+ 
+ int
+-lpfc_els_rsp_adisc_acc(struct lpfc_hba * phba,
+-		       struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_adisc_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
++		       struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_sli  *psli = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	ADISC *ap;
+-	IOCB_t *icmd;
+-	IOCB_t *oldcmd;
++	IOCB_t *icmd, *oldcmd;
+ 	struct lpfc_iocbq *elsiocb;
+-	struct lpfc_sli_ring *pring;
+-	struct lpfc_sli *psli;
+ 	uint8_t *pcmd;
+ 	uint16_t cmdsize;
+ 	int rc;
+ 
+-	psli = &phba->sli;
+-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+-
+-	cmdsize = sizeof (uint32_t) + sizeof (ADISC);
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+-					ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
++	cmdsize = sizeof(uint32_t) + sizeof(ADISC);
++	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++				     ndlp->nlp_DID, ELS_CMD_ACC);
+ 	if (!elsiocb)
+ 		return 1;
+ 
+@@ -2100,28 +2424,30 @@
+ 
+ 	/* Xmit ADISC ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0130 Xmit ADISC ACC response iotag x%x xri: "
++			"%d (%d):0130 Xmit ADISC ACC response iotag x%x xri: "
+ 			"x%x, did x%x, nlp_flag x%x, nlp_state x%x rpi x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			phba->brd_no, vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	ap = (ADISC *) (pcmd);
+ 	ap->hardAL_PA = phba->fc_pref_ALPA;
+-	memcpy(&ap->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+-	memcpy(&ap->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
+-	ap->DID = be32_to_cpu(phba->fc_myDID);
++	memcpy(&ap->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++	memcpy(&ap->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
++	ap->DID = be32_to_cpu(vport->fc_myDID);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"Issue ACC ADISC: did:x%x flg:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, 0);
+ 
+ 	phba->fc_stat.elsXmitACC++;
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
+-	spin_lock_irq(phba->host->host_lock);
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -2130,9 +2456,10 @@
+ }
+ 
+ int
+-lpfc_els_rsp_prli_acc(struct lpfc_hba *phba, struct lpfc_iocbq *oldiocb,
++lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb,
+ 		      struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	PRLI *npr;
+ 	lpfc_vpd_t *vpd;
+ 	IOCB_t *icmd;
+@@ -2147,8 +2474,8 @@
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
+ 
+-	cmdsize = sizeof (uint32_t) + sizeof (PRLI);
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry, ndlp,
++	cmdsize = sizeof(uint32_t) + sizeof(PRLI);
++	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
+ 		ndlp->nlp_DID, (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)));
+ 	if (!elsiocb)
+ 		return 1;
+@@ -2159,19 +2486,19 @@
+ 
+ 	/* Xmit PRLI ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0131 Xmit PRLI ACC response tag x%x xri x%x, "
++			"%d (%d):0131 Xmit PRLI ACC response tag x%x xri x%x, "
+ 			"did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			phba->brd_no, vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK));
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+ 	/* For PRLI, remainder of payload is PRLI parameter page */
+-	memset(pcmd, 0, sizeof (PRLI));
++	memset(pcmd, 0, sizeof(PRLI));
+ 
+ 	npr = (PRLI *) pcmd;
+ 	vpd = &phba->vpd;
+@@ -2193,12 +2520,14 @@
+ 	npr->prliType = PRLI_FCP_TYPE;
+ 	npr->initiatorFunc = 1;
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"Issue ACC PRLI:  did:x%x flg:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
+ 	phba->fc_stat.elsXmitACC++;
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+ 	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -2207,12 +2536,12 @@
+ }
+ 
+ static int
+-lpfc_els_rsp_rnid_acc(struct lpfc_hba *phba, uint8_t format,
++lpfc_els_rsp_rnid_acc(struct lpfc_vport *vport, uint8_t format,
+ 		      struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	RNID *rn;
+-	IOCB_t *icmd;
+-	IOCB_t *oldcmd;
++	IOCB_t *icmd, *oldcmd;
+ 	struct lpfc_iocbq *elsiocb;
+ 	struct lpfc_sli_ring *pring;
+ 	struct lpfc_sli *psli;
+@@ -2223,13 +2552,13 @@
+ 	psli = &phba->sli;
+ 	pring = &psli->ring[LPFC_ELS_RING];
+ 
+-	cmdsize = sizeof (uint32_t) + sizeof (uint32_t)
+-		+ (2 * sizeof (struct lpfc_name));
++	cmdsize = sizeof(uint32_t) + sizeof(uint32_t)
++					+ (2 * sizeof(struct lpfc_name));
+ 	if (format)
+-		cmdsize += sizeof (RNID_TOP_DISC);
++		cmdsize += sizeof(RNID_TOP_DISC);
+ 
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+-					ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
++	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++				     ndlp->nlp_DID, ELS_CMD_ACC);
+ 	if (!elsiocb)
+ 		return 1;
+ 
+@@ -2239,30 +2568,30 @@
+ 
+ 	/* Xmit RNID ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0132 Xmit RNID ACC response tag x%x "
++			"%d (%d):0132 Xmit RNID ACC response tag x%x "
+ 			"xri x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			phba->brd_no, vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext);
+ 
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-	pcmd += sizeof (uint32_t);
++	pcmd += sizeof(uint32_t);
+ 
+-	memset(pcmd, 0, sizeof (RNID));
++	memset(pcmd, 0, sizeof(RNID));
+ 	rn = (RNID *) (pcmd);
+ 	rn->Format = format;
+-	rn->CommonLen = (2 * sizeof (struct lpfc_name));
+-	memcpy(&rn->portName, &phba->fc_portname, sizeof (struct lpfc_name));
+-	memcpy(&rn->nodeName, &phba->fc_nodename, sizeof (struct lpfc_name));
++	rn->CommonLen = (2 * sizeof(struct lpfc_name));
++	memcpy(&rn->portName, &vport->fc_portname, sizeof(struct lpfc_name));
++	memcpy(&rn->nodeName, &vport->fc_nodename, sizeof(struct lpfc_name));
+ 	switch (format) {
+ 	case 0:
+ 		rn->SpecificLen = 0;
+ 		break;
+ 	case RNID_TOPOLOGY_DISC:
+-		rn->SpecificLen = sizeof (RNID_TOP_DISC);
++		rn->SpecificLen = sizeof(RNID_TOP_DISC);
+ 		memcpy(&rn->un.topologyDisc.portName,
+-		       &phba->fc_portname, sizeof (struct lpfc_name));
++		       &vport->fc_portname, sizeof(struct lpfc_name));
+ 		rn->un.topologyDisc.unitType = RNID_HBA;
+ 		rn->un.topologyDisc.physPort = 0;
+ 		rn->un.topologyDisc.attachedNodes = 0;
+@@ -2273,15 +2602,17 @@
+ 		break;
+ 	}
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_RSP,
++		"Issue ACC RNID:  did:x%x flg:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
+ 	phba->fc_stat.elsXmitACC++;
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 	lpfc_nlp_put(ndlp);
+ 	elsiocb->context1 = NULL;  /* Don't need ndlp for cmpl,
+ 				    * it could be freed */
+ 
+-	spin_lock_irq(phba->host->host_lock);
+ 	rc = lpfc_sli_issue_iocb(phba, pring, elsiocb, 0);
+-	spin_unlock_irq(phba->host->host_lock);
+ 	if (rc == IOCB_ERROR) {
+ 		lpfc_els_free_iocb(phba, elsiocb);
+ 		return 1;
+@@ -2290,168 +2621,153 @@
+ }
+ 
+ int
+-lpfc_els_disc_adisc(struct lpfc_hba *phba)
++lpfc_els_disc_adisc(struct lpfc_vport *vport)
+ {
+-	int sentadisc;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
++	int sentadisc = 0;
+ 
+-	sentadisc = 0;
+ 	/* go thru NPR nodes and issue any remaining ELS ADISCs */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
+ 		if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+ 		    (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+ 		    (ndlp->nlp_flag & NLP_NPR_ADISC) != 0) {
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(shost->host_lock);
+ 			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(shost->host_lock);
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+-			lpfc_issue_els_adisc(phba, ndlp, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++			lpfc_issue_els_adisc(vport, ndlp, 0);
+ 			sentadisc++;
+-			phba->num_disc_nodes++;
+-			if (phba->num_disc_nodes >=
+-			    phba->cfg_discovery_threads) {
+-				spin_lock_irq(phba->host->host_lock);
+-				phba->fc_flag |= FC_NLP_MORE;
+-				spin_unlock_irq(phba->host->host_lock);
++			vport->num_disc_nodes++;
++			if (vport->num_disc_nodes >=
++			    vport->phba->cfg_discovery_threads) {
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag |= FC_NLP_MORE;
++				spin_unlock_irq(shost->host_lock);
+ 				break;
+ 			}
+ 		}
+ 	}
+ 	if (sentadisc == 0) {
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~FC_NLP_MORE;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~FC_NLP_MORE;
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 	return sentadisc;
+ }
+ 
+ int
+-lpfc_els_disc_plogi(struct lpfc_hba * phba)
++lpfc_els_disc_plogi(struct lpfc_vport *vport)
+ {
+-	int sentplogi;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
++	int sentplogi = 0;
+ 
+-	sentplogi = 0;
+-	/* go thru NPR list and issue any remaining ELS PLOGIs */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
++	/* go thru NPR nodes and issue any remaining ELS PLOGIs */
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
+ 		if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+ 		    (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+ 		    (ndlp->nlp_flag & NLP_DELAY_TMO) == 0 &&
+ 		    (ndlp->nlp_flag & NLP_NPR_ADISC) == 0) {
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-			lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++			lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ 			sentplogi++;
+-			phba->num_disc_nodes++;
+-			if (phba->num_disc_nodes >=
+-			    phba->cfg_discovery_threads) {
+-				spin_lock_irq(phba->host->host_lock);
+-				phba->fc_flag |= FC_NLP_MORE;
+-				spin_unlock_irq(phba->host->host_lock);
++			vport->num_disc_nodes++;
++			if (vport->num_disc_nodes >=
++			    vport->phba->cfg_discovery_threads) {
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag |= FC_NLP_MORE;
++				spin_unlock_irq(shost->host_lock);
+ 				break;
+ 			}
+ 		}
+ 	}
+ 	if (sentplogi == 0) {
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~FC_NLP_MORE;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~FC_NLP_MORE;
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 	return sentplogi;
+ }
+ 
+-int
+-lpfc_els_flush_rscn(struct lpfc_hba * phba)
++void
++lpfc_els_flush_rscn(struct lpfc_vport *vport)
+ {
+-	struct lpfc_dmabuf *mp;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	int i;
+ 
+-	for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
+-		mp = phba->fc_rscn_id_list[i];
+-		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-		kfree(mp);
+-		phba->fc_rscn_id_list[i] = NULL;
+-	}
+-	phba->fc_rscn_id_cnt = 0;
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
+-	spin_unlock_irq(phba->host->host_lock);
+-	lpfc_can_disctmo(phba);
+-	return 0;
++	for (i = 0; i < vport->fc_rscn_id_cnt; i++) {
++		lpfc_in_buf_free(phba, vport->fc_rscn_id_list[i]);
++		vport->fc_rscn_id_list[i] = NULL;
++	}
++	spin_lock_irq(shost->host_lock);
++	vport->fc_rscn_id_cnt = 0;
++	vport->fc_flag &= ~(FC_RSCN_MODE | FC_RSCN_DISCOVERY);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_can_disctmo(vport);
+ }
+ 
+ int
+-lpfc_rscn_payload_check(struct lpfc_hba * phba, uint32_t did)
++lpfc_rscn_payload_check(struct lpfc_vport *vport, uint32_t did)
+ {
+ 	D_ID ns_did;
+ 	D_ID rscn_did;
+-	struct lpfc_dmabuf *mp;
+ 	uint32_t *lp;
+-	uint32_t payload_len, cmd, i, match;
++	uint32_t payload_len, i;
++	struct lpfc_hba *phba = vport->phba;
+ 
+ 	ns_did.un.word = did;
+-	match = 0;
+ 
+ 	/* Never match fabric nodes for RSCNs */
+ 	if ((did & Fabric_DID_MASK) == Fabric_DID_MASK)
+-		return(0);
++		return 0;
+ 
+ 	/* If we are doing a FULL RSCN rediscovery, match everything */
+-	if (phba->fc_flag & FC_RSCN_DISCOVERY) {
++	if (vport->fc_flag & FC_RSCN_DISCOVERY)
+ 		return did;
+-	}
+ 
+-	for (i = 0; i < phba->fc_rscn_id_cnt; i++) {
+-		mp = phba->fc_rscn_id_list[i];
+-		lp = (uint32_t *) mp->virt;
+-		cmd = *lp++;
+-		payload_len = be32_to_cpu(cmd) & 0xffff; /* payload length */
+-		payload_len -= sizeof (uint32_t);	/* take off word 0 */
++	for (i = 0; i < vport->fc_rscn_id_cnt; i++) {
++		lp = vport->fc_rscn_id_list[i]->virt;
++		payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK);
++		payload_len -= sizeof(uint32_t);	/* take off word 0 */
+ 		while (payload_len) {
+-			rscn_did.un.word = *lp++;
+-			rscn_did.un.word = be32_to_cpu(rscn_did.un.word);
+-			payload_len -= sizeof (uint32_t);
++			rscn_did.un.word = be32_to_cpu(*lp++);
++			payload_len -= sizeof(uint32_t);
+ 			switch (rscn_did.un.b.resv) {
+ 			case 0:	/* Single N_Port ID effected */
+-				if (ns_did.un.word == rscn_did.un.word) {
+-					match = did;
+-				}
++				if (ns_did.un.word == rscn_did.un.word)
++					return did;
+ 				break;
+ 			case 1:	/* Whole N_Port Area effected */
+ 				if ((ns_did.un.b.domain == rscn_did.un.b.domain)
+ 				    && (ns_did.un.b.area == rscn_did.un.b.area))
+-					{
+-						match = did;
+-					}
++					return did;
+ 				break;
+ 			case 2:	/* Whole N_Port Domain effected */
+ 				if (ns_did.un.b.domain == rscn_did.un.b.domain)
+-					{
+-						match = did;
+-					}
+-				break;
+-			case 3:	/* Whole Fabric effected */
+-				match = did;
++					return did;
+ 				break;
+ 			default:
+-				/* Unknown Identifier in RSCN list */
++				/* Unknown Identifier in RSCN node */
+ 				lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-						"%d:0217 Unknown Identifier in "
+-						"RSCN payload Data: x%x\n",
+-						phba->brd_no, rscn_did.un.word);
+-				break;
+-			}
+-			if (match) {
+-				break;
++						"%d (%d):0217 Unknown "
++						"Identifier in RSCN payload "
++						"Data: x%x\n",
++						phba->brd_no, vport->vpi,
++						rscn_did.un.word);
++			case 3:	/* Whole Fabric effected */
++				return did;
+ 			}
+ 		}
+ 	}
+-	return match;
++	return 0;
+ }
+ 
+ static int
+-lpfc_rscn_recovery_check(struct lpfc_hba *phba)
++lpfc_rscn_recovery_check(struct lpfc_vport *vport)
+ {
+ 	struct lpfc_nodelist *ndlp = NULL;
+ 
+@@ -2459,12 +2775,12 @@
+ 	 * them to NPR state.
+ 	 */
+ 
+-	list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ 		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE ||
+-		    lpfc_rscn_payload_check(phba, ndlp->nlp_DID) == 0)
++		    lpfc_rscn_payload_check(vport, ndlp->nlp_DID) == 0)
+ 			continue;
+ 
+-		lpfc_disc_state_machine(phba, ndlp, NULL,
++		lpfc_disc_state_machine(vport, ndlp, NULL,
+ 					NLP_EVT_DEVICE_RECOVERY);
+ 
+ 		/*
+@@ -2472,175 +2788,248 @@
+ 		 * recovery event.
+ 		 */
+ 		if (ndlp->nlp_flag & NLP_DELAY_TMO)
+-			lpfc_cancel_retry_delay_tmo(phba, ndlp);
++			lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_rscn(struct lpfc_hba * phba,
+-		  struct lpfc_iocbq * cmdiocb,
+-		  struct lpfc_nodelist * ndlp, uint8_t newnode)
++lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		  struct lpfc_nodelist *ndlp, uint8_t newnode)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_dmabuf *pcmd;
+-	uint32_t *lp;
++	struct lpfc_vport *next_vport;
++	uint32_t *lp, *datap;
+ 	IOCB_t *icmd;
+-	uint32_t payload_len, cmd;
++	uint32_t payload_len, length, nportid, *cmd;
++	int rscn_cnt = vport->fc_rscn_id_cnt;
++	int rscn_id = 0, hba_id = 0;
+ 	int i;
+ 
+ 	icmd = &cmdiocb->iocb;
+ 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 	lp = (uint32_t *) pcmd->virt;
+ 
+-	cmd = *lp++;
+-	payload_len = be32_to_cpu(cmd) & 0xffff;	/* payload length */
+-	payload_len -= sizeof (uint32_t);	/* take off word 0 */
+-	cmd &= ELS_CMD_MASK;
++	payload_len = be32_to_cpu(*lp++ & ~ELS_CMD_MASK);
++	payload_len -= sizeof(uint32_t);	/* take off word 0 */
+ 
+ 	/* RSCN received */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_DISCOVERY,
+-			"%d:0214 RSCN received Data: x%x x%x x%x x%x\n",
+-			phba->brd_no,
+-			phba->fc_flag, payload_len, *lp, phba->fc_rscn_id_cnt);
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0214 RSCN received Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi, vport->fc_flag, payload_len,
++			*lp, rscn_cnt);
+ 
+ 	for (i = 0; i < payload_len/sizeof(uint32_t); i++)
+-		fc_host_post_event(phba->host, fc_get_event_number(),
++		fc_host_post_event(shost, fc_get_event_number(),
+ 			FCH_EVT_RSCN, lp[i]);
+ 
+ 	/* If we are about to begin discovery, just ACC the RSCN.
+ 	 * Discovery processing will satisfy it.
+ 	 */
+-	if (phba->hba_state <= LPFC_NS_QRY) {
+-		lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++	if (vport->port_state <= LPFC_NS_QRY) {
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV RSCN ignore: did:x%x/ste:x%x flg:x%x",
++			ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
+ 								newnode);
+ 		return 0;
+ 	}
+ 
++	/* If this RSCN just contains NPortIDs for other vports on this HBA,
++	 * just ACC and ignore it.
++	 */
++	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++		!(phba->cfg_peer_port_login)) {
++		i = payload_len;
++		datap = lp;
++		while (i > 0) {
++			nportid = *datap++;
++			nportid = ((be32_to_cpu(nportid)) & Mask_DID);
++			i -= sizeof(uint32_t);
++			rscn_id++;
++			list_for_each_entry(next_vport, &phba->port_list,
++				listentry) {
++				if (nportid == next_vport->fc_myDID) {
++					hba_id++;
++					break;
++				}
++			}
++		}
++		if (rscn_id == hba_id) {
++			/* ALL NPortIDs in RSCN are on HBA */
++			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			  "%d (%d):0214 Ignore RSCN Data: x%x x%x x%x x%x\n",
++			  phba->brd_no, vport->vpi, vport->fc_flag, payload_len,
++			  *lp, rscn_cnt);
++
++			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++				"RCV RSCN vport:  did:x%x/ste:x%x flg:x%x",
++				ndlp->nlp_DID, vport->port_state,
++				ndlp->nlp_flag);
++
++			lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb,
++				ndlp, NULL, newnode);
++			return 0;
++		}
++	}
++
+ 	/* If we are already processing an RSCN, save the received
+ 	 * RSCN payload buffer, cmdiocb->context2 to process later.
+ 	 */
+-	if (phba->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
+-		if ((phba->fc_rscn_id_cnt < FC_MAX_HOLD_RSCN) &&
+-		    !(phba->fc_flag & FC_RSCN_DISCOVERY)) {
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->fc_flag |= FC_RSCN_MODE;
+-			spin_unlock_irq(phba->host->host_lock);
+-			phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
+-
++	if (vport->fc_flag & (FC_RSCN_MODE | FC_NDISC_ACTIVE)) {
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV RSCN defer:  did:x%x/ste:x%x flg:x%x",
++			ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++		vport->fc_flag |= FC_RSCN_DEFERRED;
++		if ((rscn_cnt < FC_MAX_HOLD_RSCN) &&
++		    !(vport->fc_flag & FC_RSCN_DISCOVERY)) {
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag |= FC_RSCN_MODE;
++			spin_unlock_irq(shost->host_lock);
++			if (rscn_cnt) {
++				cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt;
++				length = be32_to_cpu(*cmd & ~ELS_CMD_MASK);
++			}
++			if ((rscn_cnt) &&
++			    (payload_len + length <= LPFC_BPL_SIZE)) {
++				*cmd &= ELS_CMD_MASK;
++				*cmd |= be32_to_cpu(payload_len + length);
++				memcpy(((uint8_t *)cmd) + length, lp,
++				       payload_len);
++			} else {
++				vport->fc_rscn_id_list[rscn_cnt] = pcmd;
++				vport->fc_rscn_id_cnt++;
+ 			/* If we zero, cmdiocb->context2, the calling
+ 			 * routine will not try to free it.
+ 			 */
+ 			cmdiocb->context2 = NULL;
++			}
+ 
+ 			/* Deferred RSCN */
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-					"%d:0235 Deferred RSCN "
++					"%d (%d):0235 Deferred RSCN "
+ 					"Data: x%x x%x x%x\n",
+-					phba->brd_no, phba->fc_rscn_id_cnt,
+-					phba->fc_flag, phba->hba_state);
++					phba->brd_no, vport->vpi,
++					vport->fc_rscn_id_cnt, vport->fc_flag,
++					vport->port_state);
+ 		} else {
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->fc_flag |= FC_RSCN_DISCOVERY;
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag |= FC_RSCN_DISCOVERY;
++			spin_unlock_irq(shost->host_lock);
+ 			/* ReDiscovery RSCN */
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-					"%d:0234 ReDiscovery RSCN "
++					"%d (%d):0234 ReDiscovery RSCN "
+ 					"Data: x%x x%x x%x\n",
+-					phba->brd_no, phba->fc_rscn_id_cnt,
+-					phba->fc_flag, phba->hba_state);
++					phba->brd_no, vport->vpi,
++					vport->fc_rscn_id_cnt, vport->fc_flag,
++					vport->port_state);
+ 		}
+ 		/* Send back ACC */
+-		lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
++		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL,
+ 								newnode);
+ 
+ 		/* send RECOVERY event for ALL nodes that match RSCN payload */
+-		lpfc_rscn_recovery_check(phba);
++		lpfc_rscn_recovery_check(vport);
++		vport->fc_flag &= ~FC_RSCN_DEFERRED;
+ 		return 0;
+ 	}
+ 
+-	phba->fc_flag |= FC_RSCN_MODE;
+-	phba->fc_rscn_id_list[phba->fc_rscn_id_cnt++] = pcmd;
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++		"RCV RSCN:        did:x%x/ste:x%x flg:x%x",
++		ndlp->nlp_DID, vport->port_state, ndlp->nlp_flag);
++
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_RSCN_MODE;
++	spin_unlock_irq(shost->host_lock);
++	vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd;
+ 	/*
+ 	 * If we zero, cmdiocb->context2, the calling routine will
+ 	 * not try to free it.
+ 	 */
+ 	cmdiocb->context2 = NULL;
+ 
+-	lpfc_set_disctmo(phba);
++	lpfc_set_disctmo(vport);
+ 
+ 	/* Send back ACC */
+-	lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode);
++	lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, newnode);
+ 
+ 	/* send RECOVERY event for ALL nodes that match RSCN payload */
+-	lpfc_rscn_recovery_check(phba);
++	lpfc_rscn_recovery_check(vport);
+ 
+-	return lpfc_els_handle_rscn(phba);
++	return lpfc_els_handle_rscn(vport);
+ }
+ 
+ int
+-lpfc_els_handle_rscn(struct lpfc_hba * phba)
++lpfc_els_handle_rscn(struct lpfc_vport *vport)
+ {
+ 	struct lpfc_nodelist *ndlp;
++	struct lpfc_hba *phba = vport->phba;
++
++	/* Ignore RSCN if the port is being torn down. */
++	if (vport->load_flag & FC_UNLOADING) {
++		lpfc_els_flush_rscn(vport);
++		return 0;
++	}
+ 
+ 	/* Start timer for RSCN processing */
+-	lpfc_set_disctmo(phba);
++	lpfc_set_disctmo(vport);
+ 
+ 	/* RSCN processed */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_DISCOVERY,
+-			"%d:0215 RSCN processed Data: x%x x%x x%x x%x\n",
+-			phba->brd_no,
+-			phba->fc_flag, 0, phba->fc_rscn_id_cnt,
+-			phba->hba_state);
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0215 RSCN processed Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi,
++			vport->fc_flag, 0, vport->fc_rscn_id_cnt,
++			vport->port_state);
+ 
+ 	/* To process RSCN, first compare RSCN data with NameServer */
+-	phba->fc_ns_retry = 0;
+-	ndlp = lpfc_findnode_did(phba, NameServer_DID);
++	vport->fc_ns_retry = 0;
++	ndlp = lpfc_findnode_did(vport, NameServer_DID);
+ 	if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+ 		/* Good ndlp, issue CT Request to NameServer */
+-		if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT) == 0) {
++		if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0) == 0)
+ 			/* Wait for NameServer query cmpl before we can
+ 			   continue */
+ 			return 1;
+-		}
+ 	} else {
+ 		/* If login to NameServer does not exist, issue one */
+ 		/* Good status, issue PLOGI to NameServer */
+-		ndlp = lpfc_findnode_did(phba, NameServer_DID);
+-		if (ndlp) {
++		ndlp = lpfc_findnode_did(vport, NameServer_DID);
++		if (ndlp)
+ 			/* Wait for NameServer login cmpl before we can
+ 			   continue */
+ 			return 1;
+-		}
++
+ 		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+ 		if (!ndlp) {
+-			lpfc_els_flush_rscn(phba);
++			lpfc_els_flush_rscn(vport);
+ 			return 0;
+ 		} else {
+-			lpfc_nlp_init(phba, ndlp, NameServer_DID);
++			lpfc_nlp_init(vport, ndlp, NameServer_DID);
+ 			ndlp->nlp_type |= NLP_FABRIC;
+ 			ndlp->nlp_prev_state = ndlp->nlp_state;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-			lpfc_issue_els_plogi(phba, NameServer_DID, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++			lpfc_issue_els_plogi(vport, NameServer_DID, 0);
+ 			/* Wait for NameServer login cmpl before we can
+ 			   continue */
+ 			return 1;
+ 		}
+ 	}
+ 
+-	lpfc_els_flush_rscn(phba);
++	lpfc_els_flush_rscn(vport);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_flogi(struct lpfc_hba * phba,
+-		   struct lpfc_iocbq * cmdiocb,
+-		   struct lpfc_nodelist * ndlp, uint8_t newnode)
++lpfc_els_rcv_flogi(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		   struct lpfc_nodelist *ndlp, uint8_t newnode)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 	uint32_t *lp = (uint32_t *) pcmd->virt;
+ 	IOCB_t *icmd = &cmdiocb->iocb;
+@@ -2655,7 +3044,7 @@
+ 
+ 	/* FLOGI received */
+ 
+-	lpfc_set_disctmo(phba);
++	lpfc_set_disctmo(vport);
+ 
+ 	if (phba->fc_topology == TOPOLOGY_LOOP) {
+ 		/* We should never receive a FLOGI in loop mode, ignore it */
+@@ -2664,33 +3053,34 @@
+ 		/* An FLOGI ELS command <elsCmd> was received from DID <did> in
+ 		   Loop Mode */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+-				"%d:0113 An FLOGI ELS command x%x was received "
+-				"from DID x%x in Loop Mode\n",
+-				phba->brd_no, cmd, did);
++				"%d (%d):0113 An FLOGI ELS command x%x was "
++				"received from DID x%x in Loop Mode\n",
++				phba->brd_no, vport->vpi, cmd, did);
+ 		return 1;
+ 	}
+ 
+ 	did = Fabric_DID;
+ 
+-	if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3))) {
++	if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3))) {
+ 		/* For a FLOGI we accept, then if our portname is greater
+ 		 * then the remote portname we initiate Nport login.
+ 		 */
+ 
+-		rc = memcmp(&phba->fc_portname, &sp->portName,
+-			    sizeof (struct lpfc_name));
++		rc = memcmp(&vport->fc_portname, &sp->portName,
++			    sizeof(struct lpfc_name));
+ 
+ 		if (!rc) {
+-			if ((mbox = mempool_alloc(phba->mbox_mem_pool,
+-						  GFP_KERNEL)) == 0) {
++			mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++			if (!mbox)
+ 				return 1;
+-			}
++
+ 			lpfc_linkdown(phba);
+ 			lpfc_init_link(phba, mbox,
+ 				       phba->cfg_topology,
+ 				       phba->cfg_link_speed);
+ 			mbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
+ 			mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++			mbox->vport = vport;
+ 			rc = lpfc_sli_issue_mbox
+ 				(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
+ 			lpfc_set_loopback_flag(phba);
+@@ -2699,31 +3089,34 @@
+ 			}
+ 			return 1;
+ 		} else if (rc > 0) {	/* greater than */
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->fc_flag |= FC_PT2PT_PLOGI;
+-			spin_unlock_irq(phba->host->host_lock);
+-		}
+-		phba->fc_flag |= FC_PT2PT;
+-		phba->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag |= FC_PT2PT_PLOGI;
++			spin_unlock_irq(shost->host_lock);
++		}
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag |= FC_PT2PT;
++		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++		spin_unlock_irq(shost->host_lock);
+ 	} else {
+ 		/* Reject this request because invalid parameters */
+ 		stat.un.b.lsRjtRsvd0 = 0;
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+ 		stat.un.b.vendorUnique = 0;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 		return 1;
+ 	}
+ 
+ 	/* Send back ACC */
+-	lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode);
++	lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, newnode);
+ 
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_rnid(struct lpfc_hba * phba,
+-		  struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rnid(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		  struct lpfc_nodelist *ndlp)
+ {
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+@@ -2746,7 +3139,7 @@
+ 	case 0:
+ 	case RNID_TOPOLOGY_DISC:
+ 		/* Send back ACC */
+-		lpfc_els_rsp_rnid_acc(phba, rn->Format, cmdiocb, ndlp);
++		lpfc_els_rsp_rnid_acc(vport, rn->Format, cmdiocb, ndlp);
+ 		break;
+ 	default:
+ 		/* Reject this request because format not supported */
+@@ -2754,13 +3147,14 @@
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ 		stat.un.b.vendorUnique = 0;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 	}
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_lirr(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++lpfc_els_rcv_lirr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
+ 		 struct lpfc_nodelist *ndlp)
+ {
+ 	struct ls_rjt stat;
+@@ -2770,15 +3164,15 @@
+ 	stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 	stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ 	stat.un.b.vendorUnique = 0;
+-	lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ 	return 0;
+ }
+ 
+ static void
+ lpfc_els_rsp_rps_acc(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	struct lpfc_sli_ring *pring;
++	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	MAILBOX_t *mb;
+ 	IOCB_t *icmd;
+ 	RPS_RSP *rps_rsp;
+@@ -2788,8 +3182,6 @@
+ 	uint16_t xri, status;
+ 	uint32_t cmdsize;
+ 
+-	psli = &phba->sli;
+-	pring = &psli->ring[LPFC_ELS_RING];
+ 	mb = &pmb->mb;
+ 
+ 	ndlp = (struct lpfc_nodelist *) pmb->context2;
+@@ -2804,7 +3196,8 @@
+ 
+ 	cmdsize = sizeof(RPS_RSP) + sizeof(uint32_t);
+ 	mempool_free(pmb, phba->mbox_mem_pool);
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, lpfc_max_els_tries, ndlp,
++	elsiocb = lpfc_prep_els_iocb(phba->pport, 0, cmdsize,
++				     lpfc_max_els_tries, ndlp,
+ 						ndlp->nlp_DID, ELS_CMD_ACC);
+ 	lpfc_nlp_put(ndlp);
+ 	if (!elsiocb)
+@@ -2815,14 +3208,14 @@
+ 
+ 	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-	pcmd += sizeof (uint32_t); /* Skip past command */
++	pcmd += sizeof(uint32_t); /* Skip past command */
+ 	rps_rsp = (RPS_RSP *)pcmd;
+ 
+ 	if (phba->fc_topology != TOPOLOGY_LOOP)
+ 		status = 0x10;
+ 	else
+ 		status = 0x8;
+-	if (phba->fc_flag & FC_FABRIC)
++	if (phba->pport->fc_flag & FC_FABRIC)
+ 		status |= 0x4;
+ 
+ 	rps_rsp->rsvd1 = 0;
+@@ -2836,25 +3229,25 @@
+ 
+ 	/* Xmit ELS RPS ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0118 Xmit ELS RPS ACC response tag x%x xri x%x, "
+-			"did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			"%d (%d):0118 Xmit ELS RPS ACC response tag x%x "
++			"xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++			"rpi x%x\n",
++			phba->brd_no, ndlp->vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 	phba->fc_stat.elsXmitACC++;
+-
+-	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR)
+ 		lpfc_els_free_iocb(phba, elsiocb);
+-	}
+ 	return;
+ }
+ 
+ static int
+-lpfc_els_rcv_rps(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		 struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rps(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		 struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	uint32_t *lp;
+ 	uint8_t flag;
+ 	LPFC_MBOXQ_t *mbox;
+@@ -2868,7 +3261,8 @@
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ 		stat.un.b.vendorUnique = 0;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 	}
+ 
+ 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+@@ -2878,19 +3272,24 @@
+ 
+ 	if ((flag == 0) ||
+ 	    ((flag == 1) && (be32_to_cpu(rps->un.portNum) == 0)) ||
+-	    ((flag == 2) && (memcmp(&rps->un.portName, &phba->fc_portname,
+-			   sizeof (struct lpfc_name)) == 0))) {
+-		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC))) {
++	    ((flag == 2) && (memcmp(&rps->un.portName, &vport->fc_portname,
++				    sizeof(struct lpfc_name)) == 0))) {
++
++		printk("Fix me....\n");
++		dump_stack();
++		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_ATOMIC);
++		if (mbox) {
+ 			lpfc_read_lnk_stat(phba, mbox);
+ 			mbox->context1 =
+-			    (void *)((unsigned long)cmdiocb->iocb.ulpContext);
++			    (void *)((unsigned long) cmdiocb->iocb.ulpContext);
+ 			mbox->context2 = lpfc_nlp_get(ndlp);
++			mbox->vport = vport;
+ 			mbox->mbox_cmpl = lpfc_els_rsp_rps_acc;
+ 			if (lpfc_sli_issue_mbox (phba, mbox,
+-			    (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED) {
++			    (MBX_NOWAIT | MBX_STOP_IOCB)) != MBX_NOT_FINISHED)
+ 				/* Mbox completion will send ELS Response */
+ 				return 0;
+-			}
++
+ 			lpfc_nlp_put(ndlp);
+ 			mempool_free(mbox, phba->mbox_mem_pool);
+ 		}
+@@ -2899,27 +3298,25 @@
+ 	stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 	stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ 	stat.un.b.vendorUnique = 0;
+-	lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rsp_rpl_acc(struct lpfc_hba * phba, uint16_t cmdsize,
+-		 struct lpfc_iocbq * oldiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rsp_rpl_acc(struct lpfc_vport *vport, uint16_t cmdsize,
++		     struct lpfc_iocbq *oldiocb, struct lpfc_nodelist *ndlp)
+ {
+-	IOCB_t *icmd;
+-	IOCB_t *oldcmd;
++	struct lpfc_hba *phba = vport->phba;
++	IOCB_t *icmd, *oldcmd;
+ 	RPL_RSP rpl_rsp;
+ 	struct lpfc_iocbq *elsiocb;
+-	struct lpfc_sli_ring *pring;
+-	struct lpfc_sli *psli;
++	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	uint8_t *pcmd;
+ 
+-	psli = &phba->sli;
+-	pring = &psli->ring[LPFC_ELS_RING];	/* ELS ring */
++	elsiocb = lpfc_prep_els_iocb(vport, 0, cmdsize, oldiocb->retry, ndlp,
++				     ndlp->nlp_DID, ELS_CMD_ACC);
+ 
+-	elsiocb = lpfc_prep_els_iocb(phba, 0, cmdsize, oldiocb->retry,
+-					ndlp, ndlp->nlp_DID, ELS_CMD_ACC);
+ 	if (!elsiocb)
+ 		return 1;
+ 
+@@ -2929,7 +3326,7 @@
+ 
+ 	pcmd = (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
+ 	*((uint32_t *) (pcmd)) = ELS_CMD_ACC;
+-	pcmd += sizeof (uint16_t);
++	pcmd += sizeof(uint16_t);
+ 	*((uint16_t *)(pcmd)) = be16_to_cpu(cmdsize);
+ 	pcmd += sizeof(uint16_t);
+ 
+@@ -2937,8 +3334,8 @@
+ 	rpl_rsp.listLen = be32_to_cpu(1);
+ 	rpl_rsp.index = 0;
+ 	rpl_rsp.port_num_blk.portNum = 0;
+-	rpl_rsp.port_num_blk.portID = be32_to_cpu(phba->fc_myDID);
+-	memcpy(&rpl_rsp.port_num_blk.portName, &phba->fc_portname,
++	rpl_rsp.port_num_blk.portID = be32_to_cpu(vport->fc_myDID);
++	memcpy(&rpl_rsp.port_num_blk.portName, &vport->fc_portname,
+ 	    sizeof(struct lpfc_name));
+ 
+ 	memcpy(pcmd, &rpl_rsp, cmdsize - sizeof(uint32_t));
+@@ -2946,13 +3343,14 @@
+ 
+ 	/* Xmit ELS RPL ACC response tag <ulpIoTag> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0120 Xmit ELS RPL ACC response tag x%x xri x%x, "
+-			"did x%x, nlp_flag x%x, nlp_state x%x, rpi x%x\n",
+-			phba->brd_no, elsiocb->iotag,
++			"%d (%d):0120 Xmit ELS RPL ACC response tag x%x "
++			"xri x%x, did x%x, nlp_flag x%x, nlp_state x%x, "
++			"rpi x%x\n",
++			phba->brd_no, vport->vpi, elsiocb->iotag,
+ 			elsiocb->iocb.ulpContext, ndlp->nlp_DID,
+ 			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+-	elsiocb->iocb_cmpl = lpfc_cmpl_els_acc;
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_rsp;
+ 
+ 	phba->fc_stat.elsXmitACC++;
+ 	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
+@@ -2963,8 +3361,8 @@
+ }
+ 
+ static int
+-lpfc_els_rcv_rpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		 struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_rpl(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		 struct lpfc_nodelist *ndlp)
+ {
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+@@ -2979,7 +3377,8 @@
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_CANT_GIVE_DATA;
+ 		stat.un.b.vendorUnique = 0;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 	}
+ 
+ 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+@@ -2996,15 +3395,16 @@
+ 	} else {
+ 		cmdsize = sizeof(uint32_t) + maxsize * sizeof(uint32_t);
+ 	}
+-	lpfc_els_rsp_rpl_acc(phba, cmdsize, cmdiocb, ndlp);
++	lpfc_els_rsp_rpl_acc(vport, cmdsize, cmdiocb, ndlp);
+ 
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_farp(struct lpfc_hba * phba,
+-		  struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_farp(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		  struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+ 	IOCB_t *icmd;
+@@ -3020,11 +3420,9 @@
+ 	fp = (FARP *) lp;
+ 
+ 	/* FARP-REQ received from DID <did> */
+-	lpfc_printf_log(phba,
+-			 KERN_INFO,
+-			 LOG_ELS,
+-			 "%d:0601 FARP-REQ received from DID x%x\n",
+-			 phba->brd_no, did);
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0601 FARP-REQ received from DID x%x\n",
++			phba->brd_no, vport->vpi, did);
+ 
+ 	/* We will only support match on WWPN or WWNN */
+ 	if (fp->Mflags & ~(FARP_MATCH_NODE | FARP_MATCH_PORT)) {
+@@ -3034,15 +3432,15 @@
+ 	cnt = 0;
+ 	/* If this FARP command is searching for my portname */
+ 	if (fp->Mflags & FARP_MATCH_PORT) {
+-		if (memcmp(&fp->RportName, &phba->fc_portname,
+-			   sizeof (struct lpfc_name)) == 0)
++		if (memcmp(&fp->RportName, &vport->fc_portname,
++			   sizeof(struct lpfc_name)) == 0)
+ 			cnt = 1;
+ 	}
+ 
+ 	/* If this FARP command is searching for my nodename */
+ 	if (fp->Mflags & FARP_MATCH_NODE) {
+-		if (memcmp(&fp->RnodeName, &phba->fc_nodename,
+-			   sizeof (struct lpfc_name)) == 0)
++		if (memcmp(&fp->RnodeName, &vport->fc_nodename,
++			   sizeof(struct lpfc_name)) == 0)
+ 			cnt = 1;
+ 	}
+ 
+@@ -3052,28 +3450,28 @@
+ 			/* Log back into the node before sending the FARP. */
+ 			if (fp->Rflags & FARP_REQUEST_PLOGI) {
+ 				ndlp->nlp_prev_state = ndlp->nlp_state;
+-				lpfc_nlp_set_state(phba, ndlp,
++				lpfc_nlp_set_state(vport, ndlp,
+ 						   NLP_STE_PLOGI_ISSUE);
+-				lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++				lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ 			}
+ 
+ 			/* Send a FARP response to that node */
+-			if (fp->Rflags & FARP_REQUEST_FARPR) {
+-				lpfc_issue_els_farpr(phba, did, 0);
+-			}
++			if (fp->Rflags & FARP_REQUEST_FARPR)
++				lpfc_issue_els_farpr(vport, did, 0);
+ 		}
+ 	}
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_farpr(struct lpfc_hba * phba,
+-		   struct lpfc_iocbq * cmdiocb, struct lpfc_nodelist * ndlp)
++lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		   struct lpfc_nodelist  *ndlp)
+ {
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+ 	IOCB_t *icmd;
+ 	uint32_t cmd, did;
++	struct lpfc_hba *phba = vport->phba;
+ 
+ 	icmd = &cmdiocb->iocb;
+ 	did = icmd->un.elsreq64.remoteID;
+@@ -3082,21 +3480,18 @@
+ 
+ 	cmd = *lp++;
+ 	/* FARP-RSP received from DID <did> */
+-	lpfc_printf_log(phba,
+-			 KERN_INFO,
+-			 LOG_ELS,
+-			 "%d:0600 FARP-RSP received from DID x%x\n",
+-			 phba->brd_no, did);
+-
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0600 FARP-RSP received from DID x%x\n",
++			phba->brd_no, vport->vpi, did);
+ 	/* ACCEPT the Farp resp request */
+-	lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++	lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+ 
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_els_rcv_fan(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-		 struct lpfc_nodelist * fan_ndlp)
++lpfc_els_rcv_fan(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
++		 struct lpfc_nodelist *fan_ndlp)
+ {
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+@@ -3104,10 +3499,12 @@
+ 	uint32_t cmd, did;
+ 	FAN *fp;
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
++	struct lpfc_hba *phba = vport->phba;
+ 
+ 	/* FAN received */
+-	lpfc_printf_log(phba, KERN_INFO, LOG_ELS, "%d:0265 FAN received\n",
+-								phba->brd_no);
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0265 FAN received\n",
++			phba->brd_no, vport->vpi);
+ 
+ 	icmd = &cmdiocb->iocb;
+ 	did = icmd->un.elsreq64.remoteID;
+@@ -3115,11 +3512,11 @@
+ 	lp = (uint32_t *)pcmd->virt;
+ 
+ 	cmd = *lp++;
+-	fp = (FAN *)lp;
++	fp = (FAN *) lp;
+ 
+ 	/* FAN received; Fan does not have a reply sequence */
+ 
+-	if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++	if (phba->pport->port_state == LPFC_LOCAL_CFG_LINK) {
+ 		if ((memcmp(&phba->fc_fabparam.nodeName, &fp->FnodeName,
+ 			sizeof(struct lpfc_name)) != 0) ||
+ 		    (memcmp(&phba->fc_fabparam.portName, &fp->FportName,
+@@ -3130,7 +3527,7 @@
+ 			 */
+ 
+ 			list_for_each_entry_safe(ndlp, next_ndlp,
+-						 &phba->fc_nodes, nlp_listp) {
++						 &vport->fc_nodes, nlp_listp) {
+ 				if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ 					continue;
+ 				if (ndlp->nlp_type & NLP_FABRIC) {
+@@ -3138,24 +3535,24 @@
+ 					 * Clean up old Fabric, Nameserver and
+ 					 * other NLP_FABRIC logins
+ 					 */
+-					lpfc_drop_node(phba, ndlp);
++					lpfc_drop_node(vport, ndlp);
+ 				} else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+ 					/* Fail outstanding I/O now since this
+ 					 * device is marked for PLOGI
+ 					 */
+-					lpfc_unreg_rpi(phba, ndlp);
++					lpfc_unreg_rpi(vport, ndlp);
+ 				}
+ 			}
+ 
+-			phba->hba_state = LPFC_FLOGI;
+-			lpfc_set_disctmo(phba);
+-			lpfc_initial_flogi(phba);
++			vport->port_state = LPFC_FLOGI;
++			lpfc_set_disctmo(vport);
++			lpfc_initial_flogi(vport);
+ 			return 0;
+ 		}
+ 		/* Discovery not needed,
+ 		 * move the nodes to their original state.
+ 		 */
+-		list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++		list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ 					 nlp_listp) {
+ 			if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ 				continue;
+@@ -3163,13 +3560,13 @@
+ 			switch (ndlp->nlp_prev_state) {
+ 			case NLP_STE_UNMAPPED_NODE:
+ 				ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-				lpfc_nlp_set_state(phba, ndlp,
++				lpfc_nlp_set_state(vport, ndlp,
+ 						   NLP_STE_UNMAPPED_NODE);
+ 				break;
+ 
+ 			case NLP_STE_MAPPED_NODE:
+ 				ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-				lpfc_nlp_set_state(phba, ndlp,
++				lpfc_nlp_set_state(vport, ndlp,
+ 						   NLP_STE_MAPPED_NODE);
+ 				break;
+ 
+@@ -3179,7 +3576,7 @@
+ 		}
+ 
+ 		/* Start discovery - this should just do CLEAR_LA */
+-		lpfc_disc_start(phba);
++		lpfc_disc_start(vport);
+ 	}
+ 	return 0;
+ }
+@@ -3187,42 +3584,42 @@
+ void
+ lpfc_els_timeout(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba;
++	struct lpfc_vport *vport = (struct lpfc_vport *) ptr;
++	struct lpfc_hba   *phba = vport->phba;
+ 	unsigned long iflag;
+ 
+-	phba = (struct lpfc_hba *)ptr;
+-	if (phba == 0)
+-		return;
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
+-		phba->work_hba_events |= WORKER_ELS_TMO;
++	spin_lock_irqsave(&vport->work_port_lock, iflag);
++	if ((vport->work_port_events & WORKER_ELS_TMO) == 0) {
++		vport->work_port_events |= WORKER_ELS_TMO;
++		spin_unlock_irqrestore(&vport->work_port_lock, iflag);
++
++		spin_lock_irqsave(&phba->hbalock, iflag);
+ 		if (phba->work_wait)
+-			wake_up(phba->work_wait);
++			lpfc_worker_wake_up(phba);
++		spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 	}
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	else
++		spin_unlock_irqrestore(&vport->work_port_lock, iflag);
+ 	return;
+ }
+ 
+ void
+-lpfc_els_timeout_handler(struct lpfc_hba *phba)
++lpfc_els_timeout_handler(struct lpfc_vport *vport)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_sli_ring *pring;
+ 	struct lpfc_iocbq *tmp_iocb, *piocb;
+ 	IOCB_t *cmd = NULL;
+ 	struct lpfc_dmabuf *pcmd;
+-	uint32_t *elscmd;
+-	uint32_t els_command=0;
++	uint32_t els_command = 0;
+ 	uint32_t timeout;
+-	uint32_t remote_ID;
++	uint32_t remote_ID = 0xffffffff;
+ 
+-	if (phba == 0)
+-		return;
+-	spin_lock_irq(phba->host->host_lock);
+ 	/* If the timer is already canceled do nothing */
+-	if (!(phba->work_hba_events & WORKER_ELS_TMO)) {
+-		spin_unlock_irq(phba->host->host_lock);
++	if ((vport->work_port_events & WORKER_ELS_TMO) == 0) {
+ 		return;
+ 	}
++	spin_lock_irq(&phba->hbalock);
+ 	timeout = (uint32_t)(phba->fc_ratov << 1);
+ 
+ 	pring = &phba->sli.ring[LPFC_ELS_RING];
+@@ -3230,63 +3627,70 @@
+ 	list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
+ 		cmd = &piocb->iocb;
+ 
+-		if ((piocb->iocb_flag & LPFC_IO_LIBDFC) ||
+-			(piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN) ||
+-			(piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)) {
++		if ((piocb->iocb_flag & LPFC_IO_LIBDFC) != 0 ||
++		    piocb->iocb.ulpCommand == CMD_ABORT_XRI_CN ||
++		    piocb->iocb.ulpCommand == CMD_CLOSE_XRI_CN)
+ 			continue;
+-		}
++
++		if (piocb->vport != vport)
++			continue;
++
+ 		pcmd = (struct lpfc_dmabuf *) piocb->context2;
+-		if (pcmd) {
+-			elscmd = (uint32_t *) (pcmd->virt);
+-			els_command = *elscmd;
+-		}
++		if (pcmd)
++			els_command = *(uint32_t *) (pcmd->virt);
+ 
+-		if ((els_command == ELS_CMD_FARP)
+-		    || (els_command == ELS_CMD_FARPR)) {
++		if (els_command == ELS_CMD_FARP ||
++		    els_command == ELS_CMD_FARPR ||
++		    els_command == ELS_CMD_FDISC)
++			continue;
++
++		if (vport != piocb->vport)
+ 			continue;
+-		}
+ 
+ 		if (piocb->drvrTimeout > 0) {
+-			if (piocb->drvrTimeout >= timeout) {
++			if (piocb->drvrTimeout >= timeout)
+ 				piocb->drvrTimeout -= timeout;
+-			} else {
++			else
+ 				piocb->drvrTimeout = 0;
+-			}
+ 			continue;
+ 		}
+ 
+-		if (cmd->ulpCommand == CMD_GEN_REQUEST64_CR) {
++		remote_ID = 0xffffffff;
++		if (cmd->ulpCommand != CMD_GEN_REQUEST64_CR)
++			remote_ID = cmd->un.elsreq64.remoteID;
++		else {
+ 			struct lpfc_nodelist *ndlp;
+-			ndlp = __lpfc_findnode_rpi(phba, cmd->ulpContext);
++			ndlp = __lpfc_findnode_rpi(vport, cmd->ulpContext);
++			if (ndlp)
+ 			remote_ID = ndlp->nlp_DID;
+-		} else {
+-			remote_ID = cmd->un.elsreq64.remoteID;
+ 		}
+ 
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_ELS,
+-				"%d:0127 ELS timeout Data: x%x x%x x%x x%x\n",
+-				phba->brd_no, els_command,
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0127 ELS timeout Data: x%x x%x x%x "
++				"x%x\n",
++				phba->brd_no, vport->vpi, els_command,
+ 				remote_ID, cmd->ulpCommand, cmd->ulpIoTag);
+ 
+ 		lpfc_sli_issue_abort_iotag(phba, pring, piocb);
+ 	}
+-	if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt)
+-		mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout);
++	spin_unlock_irq(&phba->hbalock);
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	if (phba->sli.ring[LPFC_ELS_RING].txcmplq_cnt)
++		mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
+ }
+ 
+ void
+-lpfc_els_flush_cmd(struct lpfc_hba *phba)
++lpfc_els_flush_cmd(struct lpfc_vport *vport)
+ {
+ 	LIST_HEAD(completions);
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+ 	struct lpfc_iocbq *tmp_iocb, *piocb;
+ 	IOCB_t *cmd = NULL;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_fabric_abort_vport(vport);
++
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(piocb, tmp_iocb, &pring->txq, list) {
+ 		cmd = &piocb->iocb;
+ 
+@@ -3301,271 +3705,1042 @@
+ 		    cmd->ulpCommand == CMD_ABORT_XRI_CN)
+ 			continue;
+ 
++		if (piocb->vport != vport)
++			continue;
++
+ 		list_move_tail(&piocb->list, &completions);
+ 		pring->txq_cnt--;
+-
+ 	}
+ 
+ 	list_for_each_entry_safe(piocb, tmp_iocb, &pring->txcmplq, list) {
+-		cmd = &piocb->iocb;
+-
+ 		if (piocb->iocb_flag & LPFC_IO_LIBDFC) {
+ 			continue;
+ 		}
+ 
++		if (piocb->vport != vport)
++			continue;
++
+ 		lpfc_sli_issue_abort_iotag(phba, pring, piocb);
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+-	while(!list_empty(&completions)) {
++	while (!list_empty(&completions)) {
+ 		piocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ 		cmd = &piocb->iocb;
+-		list_del(&piocb->list);
++		list_del_init(&piocb->list);
+ 
+-		if (piocb->iocb_cmpl) {
++		if (!piocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, piocb);
++		else {
+ 			cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ 			(piocb->iocb_cmpl) (phba, piocb, piocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, piocb);
++		}
+ 	}
+ 
+ 	return;
+ }
+ 
+-void
+-lpfc_els_unsol_event(struct lpfc_hba * phba,
+-		     struct lpfc_sli_ring * pring, struct lpfc_iocbq * elsiocb)
++static void
++lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++		      struct lpfc_vport *vport, struct lpfc_iocbq *elsiocb)
+ {
+-	struct lpfc_sli *psli;
+ 	struct lpfc_nodelist *ndlp;
+-	struct lpfc_dmabuf *mp;
+-	uint32_t *lp;
+-	IOCB_t *icmd;
+ 	struct ls_rjt stat;
+-	uint32_t cmd;
+-	uint32_t did;
+-	uint32_t newnode;
+-	uint32_t drop_cmd = 0;	/* by default do NOT drop received cmd */
+-	uint32_t rjt_err = 0;
+-
+-	psli = &phba->sli;
+-	icmd = &elsiocb->iocb;
+-
+-	if ((icmd->ulpStatus == IOSTAT_LOCAL_REJECT) &&
+-		((icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING)) {
+-		/* Not enough posted buffers; Try posting more buffers */
+-		phba->fc_stat.NoRcvBuf++;
+-		lpfc_post_buffer(phba, pring, 0, 1);
+-		return;
+-	}
+-
+-	/* If there are no BDEs associated with this IOCB,
+-	 * there is nothing to do.
+-	 */
+-	if (icmd->ulpBdeCount == 0)
+-		return;
++	uint32_t *payload;
++	uint32_t cmd, did, newnode, rjt_err = 0;
++	IOCB_t *icmd = &elsiocb->iocb;
+ 
+-	/* type of ELS cmd is first 32bit word in packet */
+-	mp = lpfc_sli_ringpostbuf_get(phba, pring, getPaddr(icmd->un.
+-							    cont64[0].
+-							    addrHigh,
+-							    icmd->un.
+-							    cont64[0].addrLow));
+-	if (mp == 0) {
+-		drop_cmd = 1;
++	if (vport == NULL || elsiocb->context2 == NULL)
+ 		goto dropit;
+-	}
+ 
+ 	newnode = 0;
+-	lp = (uint32_t *) mp->virt;
+-	cmd = *lp++;
+-	lpfc_post_buffer(phba, &psli->ring[LPFC_ELS_RING], 1, 1);
++	payload = ((struct lpfc_dmabuf *)elsiocb->context2)->virt;
++	cmd = *payload;
++	if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) == 0)
++		lpfc_post_buffer(phba, pring, 1, 1);
+ 
++	did = icmd->un.rcvels.remoteID;
+ 	if (icmd->ulpStatus) {
+-		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-		kfree(mp);
+-		drop_cmd = 1;
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV Unsol ELS:  status:x%x/x%x did:x%x",
++			icmd->ulpStatus, icmd->un.ulpWord[4], did);
+ 		goto dropit;
+ 	}
+ 
+ 	/* Check to see if link went down during discovery */
+-	if (lpfc_els_chk_latt(phba)) {
+-		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-		kfree(mp);
+-		drop_cmd = 1;
++	if (lpfc_els_chk_latt(vport))
+ 		goto dropit;
+-	}
+ 
+-	did = icmd->un.rcvels.remoteID;
+-	ndlp = lpfc_findnode_did(phba, did);
++	/* Ignore traffic recevied during vport shutdown. */
++	if (vport->load_flag & FC_UNLOADING)
++		goto dropit;
++
++	ndlp = lpfc_findnode_did(vport, did);
+ 	if (!ndlp) {
+ 		/* Cannot find existing Fabric ndlp, so allocate a new one */
+ 		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
+-		if (!ndlp) {
+-			lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-			kfree(mp);
+-			drop_cmd = 1;
++		if (!ndlp)
+ 			goto dropit;
+-		}
+ 
+-		lpfc_nlp_init(phba, ndlp, did);
++		lpfc_nlp_init(vport, ndlp, did);
+ 		newnode = 1;
+ 		if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) {
+ 			ndlp->nlp_type |= NLP_FABRIC;
+ 		}
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 	}
+ 
+ 	phba->fc_stat.elsRcvFrame++;
+ 	if (elsiocb->context1)
+ 		lpfc_nlp_put(elsiocb->context1);
+ 	elsiocb->context1 = lpfc_nlp_get(ndlp);
+-	elsiocb->context2 = mp;
++	elsiocb->vport = vport;
+ 
+ 	if ((cmd & ELS_CMD_MASK) == ELS_CMD_RSCN) {
+ 		cmd &= ELS_CMD_MASK;
+ 	}
+ 	/* ELS command <elsCmd> received from NPORT <did> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
+-			"%d:0112 ELS command x%x received from NPORT x%x "
+-			"Data: x%x\n", phba->brd_no, cmd, did, phba->hba_state);
++			"%d (%d):0112 ELS command x%x received from NPORT x%x "
++			"Data: x%x\n", phba->brd_no, vport->vpi, cmd, did,
++			vport->port_state);
+ 
+ 	switch (cmd) {
+ 	case ELS_CMD_PLOGI:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV PLOGI:       did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvPLOGI++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		ndlp = lpfc_plogi_confirm_nport(phba, payload, ndlp);
++
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		ndlp = lpfc_plogi_confirm_nport(phba, mp, ndlp);
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PLOGI);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb,
++					NLP_EVT_RCV_PLOGI);
++
+ 		break;
+ 	case ELS_CMD_FLOGI:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV FLOGI:       did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvFLOGI++;
+-		lpfc_els_rcv_flogi(phba, elsiocb, ndlp, newnode);
++		lpfc_els_rcv_flogi(vport, elsiocb, ndlp, newnode);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case ELS_CMD_LOGO:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV LOGO:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvLOGO++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO);
+ 		break;
+ 	case ELS_CMD_PRLO:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV PRLO:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvPRLO++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO);
+ 		break;
+ 	case ELS_CMD_RSCN:
+ 		phba->fc_stat.elsRcvRSCN++;
+-		lpfc_els_rcv_rscn(phba, elsiocb, ndlp, newnode);
++		lpfc_els_rcv_rscn(vport, elsiocb, ndlp, newnode);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case ELS_CMD_ADISC:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV ADISC:       did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvADISC++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_ADISC);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb,
++					NLP_EVT_RCV_ADISC);
+ 		break;
+ 	case ELS_CMD_PDISC:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV PDISC:       did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvPDISC++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PDISC);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb,
++					NLP_EVT_RCV_PDISC);
+ 		break;
+ 	case ELS_CMD_FARPR:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV FARPR:       did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvFARPR++;
+-		lpfc_els_rcv_farpr(phba, elsiocb, ndlp);
++		lpfc_els_rcv_farpr(vport, elsiocb, ndlp);
+ 		break;
+ 	case ELS_CMD_FARP:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV FARP:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvFARP++;
+-		lpfc_els_rcv_farp(phba, elsiocb, ndlp);
++		lpfc_els_rcv_farp(vport, elsiocb, ndlp);
+ 		break;
+ 	case ELS_CMD_FAN:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV FAN:         did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvFAN++;
+-		lpfc_els_rcv_fan(phba, elsiocb, ndlp);
++		lpfc_els_rcv_fan(vport, elsiocb, ndlp);
+ 		break;
+ 	case ELS_CMD_PRLI:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV PRLI:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvPRLI++;
+-		if (phba->hba_state < LPFC_DISC_AUTH) {
+-			rjt_err = 1;
++		if (vport->port_state < LPFC_DISC_AUTH) {
++			rjt_err = LSRJT_UNABLE_TPC;
+ 			break;
+ 		}
+-		lpfc_disc_state_machine(phba, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
++		lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI);
+ 		break;
+ 	case ELS_CMD_LIRR:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV LIRR:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvLIRR++;
+-		lpfc_els_rcv_lirr(phba, elsiocb, ndlp);
++		lpfc_els_rcv_lirr(vport, elsiocb, ndlp);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case ELS_CMD_RPS:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV RPS:         did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvRPS++;
+-		lpfc_els_rcv_rps(phba, elsiocb, ndlp);
++		lpfc_els_rcv_rps(vport, elsiocb, ndlp);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case ELS_CMD_RPL:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV RPL:         did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvRPL++;
+-		lpfc_els_rcv_rpl(phba, elsiocb, ndlp);
++		lpfc_els_rcv_rpl(vport, elsiocb, ndlp);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	case ELS_CMD_RNID:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV RNID:        did:x%x/ste:x%x flg:x%x",
++			did, vport->port_state, ndlp->nlp_flag);
++
+ 		phba->fc_stat.elsRcvRNID++;
+-		lpfc_els_rcv_rnid(phba, elsiocb, ndlp);
++		lpfc_els_rcv_rnid(vport, elsiocb, ndlp);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	default:
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL,
++			"RCV ELS cmd:     cmd:x%x did:x%x/ste:x%x",
++			cmd, did, vport->port_state);
++
+ 		/* Unsupported ELS command, reject */
+-		rjt_err = 1;
++		rjt_err = LSRJT_INVALID_CMD;
+ 
+ 		/* Unknown ELS command <elsCmd> received from NPORT <did> */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+-				"%d:0115 Unknown ELS command x%x received from "
+-				"NPORT x%x\n", phba->brd_no, cmd, did);
++				"%d (%d):0115 Unknown ELS command x%x "
++				"received from NPORT x%x\n",
++				phba->brd_no, vport->vpi, cmd, did);
+ 		if (newnode)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 		break;
+ 	}
+ 
+ 	/* check if need to LS_RJT received ELS cmd */
+ 	if (rjt_err) {
+-		stat.un.b.lsRjtRsvd0 = 0;
+-		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
++		memset(&stat, 0, sizeof(stat));
++		stat.un.b.lsRjtRsnCode = rjt_err;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+-		stat.un.b.vendorUnique = 0;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, elsiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp,
++			NULL);
++		if (newnode)
++			lpfc_drop_node(vport, ndlp);
+ 	}
+ 
+-	lpfc_nlp_put(elsiocb->context1);
+-	elsiocb->context1 = NULL;
+-	if (elsiocb->context2) {
+-		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-		kfree(mp);
+-	}
++	return;
++
+ dropit:
+-	/* check if need to drop received ELS cmd */
+-	if (drop_cmd == 1) {
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
+-				"%d:0111 Dropping received ELS cmd "
+-				"Data: x%x x%x x%x\n", phba->brd_no,
++			"%d (%d):0111 Dropping received ELS cmd "
++			"Data: x%x x%x x%x\n",
++			phba->brd_no, vport ? vport->vpi : 0xffff,
+ 				icmd->ulpStatus, icmd->un.ulpWord[4],
+ 				icmd->ulpTimeout);
+ 		phba->fc_stat.elsRcvDrop++;
++}
++
++static struct lpfc_vport *
++lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi)
++{
++	struct lpfc_vport *vport;
++
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		if (vport->vpi == vpi)
++			return vport;
++	}
++	return NULL;
++}
++
++void
++lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++		     struct lpfc_iocbq *elsiocb)
++{
++	struct lpfc_vport *vport = phba->pport;
++	IOCB_t *icmd = &elsiocb->iocb;
++	dma_addr_t paddr;
++	struct lpfc_dmabuf *bdeBuf1 = elsiocb->context2;
++	struct lpfc_dmabuf *bdeBuf2 = elsiocb->context3;
++
++	elsiocb->context2 = NULL;
++	elsiocb->context3 = NULL;
++
++	if (icmd->ulpStatus == IOSTAT_NEED_BUFFER) {
++		lpfc_sli_hbqbuf_add_hbqs(phba, LPFC_ELS_HBQ);
++	} else if (icmd->ulpStatus == IOSTAT_LOCAL_REJECT &&
++	    (icmd->un.ulpWord[4] & 0xff) == IOERR_RCV_BUFFER_WAITING) {
++		phba->fc_stat.NoRcvBuf++;
++		/* Not enough posted buffers; Try posting more buffers */
++		if (!(phba->sli3_options & LPFC_SLI3_HBQ_ENABLED))
++			lpfc_post_buffer(phba, pring, 0, 1);
++		return;
++	}
++
++	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++	    (icmd->ulpCommand == CMD_IOCB_RCV_ELS64_CX ||
++	     icmd->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
++		if (icmd->unsli3.rcvsli3.vpi == 0xffff)
++			vport = phba->pport;
++		else {
++			uint16_t vpi = icmd->unsli3.rcvsli3.vpi;
++			vport = lpfc_find_vport_by_vpid(phba, vpi);
++		}
++	}
++				/* If there are no BDEs associated
++				 * with this IOCB, there is nothing to do.
++				 */
++	if (icmd->ulpBdeCount == 0)
++		return;
++
++				/* type of ELS cmd is first 32bit word
++				 * in packet
++				 */
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++		elsiocb->context2 = bdeBuf1;
++	} else {
++		paddr = getPaddr(icmd->un.cont64[0].addrHigh,
++				 icmd->un.cont64[0].addrLow);
++		elsiocb->context2 = lpfc_sli_ringpostbuf_get(phba, pring,
++							     paddr);
++	}
++
++	lpfc_els_unsol_buffer(phba, pring, vport, elsiocb);
++	/*
++	 * The different unsolicited event handlers would tell us
++	 * if they are done with "mp" by setting context2 to NULL.
++	 */
++	lpfc_nlp_put(elsiocb->context1);
++	elsiocb->context1 = NULL;
++	if (elsiocb->context2) {
++		lpfc_in_buf_free(phba, (struct lpfc_dmabuf *)elsiocb->context2);
++		elsiocb->context2 = NULL;
++	}
++
++	/* RCV_ELS64_CX provide for 2 BDEs - process 2nd if included */
++	if ((phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) &&
++	    icmd->ulpBdeCount == 2) {
++		elsiocb->context2 = bdeBuf2;
++		lpfc_els_unsol_buffer(phba, pring, vport, elsiocb);
++		/* free mp if we are done with it */
++		if (elsiocb->context2) {
++			lpfc_in_buf_free(phba, elsiocb->context2);
++			elsiocb->context2 = NULL;
++		}
++	}
++}
++
++void
++lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++	struct lpfc_nodelist *ndlp, *ndlp_fdmi;
++
++	ndlp = lpfc_findnode_did(vport, NameServer_DID);
++	if (!ndlp) {
++		ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++		if (!ndlp) {
++			if (phba->fc_topology == TOPOLOGY_LOOP) {
++				lpfc_disc_start(vport);
++				return;
++			}
++			lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++			lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++				"%d (%d):0251 NameServer login: no memory\n",
++				phba->brd_no, vport->vpi);
++			return;
++		}
++		lpfc_nlp_init(vport, ndlp, NameServer_DID);
++		ndlp->nlp_type |= NLP_FABRIC;
++	}
++
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++
++	if (lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0)) {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0252 Cannot issue NameServer login\n",
++			phba->brd_no, vport->vpi);
++		return;
++	}
++
++	if (phba->cfg_fdmi_on) {
++		ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool,
++					  GFP_KERNEL);
++		if (ndlp_fdmi) {
++			lpfc_nlp_init(vport, ndlp_fdmi, FDMI_DID);
++			ndlp_fdmi->nlp_type |= NLP_FABRIC;
++			ndlp_fdmi->nlp_state =
++				NLP_STE_PLOGI_ISSUE;
++			lpfc_issue_els_plogi(vport, ndlp_fdmi->nlp_DID,
++					     0);
++		}
++	}
++	return;
++}
++
++static void
++lpfc_cmpl_reg_new_vport(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++	struct lpfc_vport *vport = pmb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++	MAILBOX_t *mb = &pmb->mb;
++
++	vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++	lpfc_nlp_put(ndlp);
++
++	if (mb->mbxStatus) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++				"%d (%d):0915 Register VPI failed: 0x%x\n",
++				phba->brd_no, vport->vpi, mb->mbxStatus);
++
++		switch (mb->mbxStatus) {
++		case 0x11:	/* unsupported feature */
++		case 0x9603:	/* max_vpi exceeded */
++			/* giving up on vport registration */
++			lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++			spin_lock_irq(shost->host_lock);
++			vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++			spin_unlock_irq(shost->host_lock);
++			lpfc_can_disctmo(vport);
++			break;
++		default:
++			/* Try to recover from this error */
++			lpfc_mbx_unreg_vpi(vport);
++			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++			lpfc_initial_fdisc(vport);
++			break;
++		}
++
++	} else {
++		if (vport == phba->pport)
++			lpfc_issue_fabric_reglogin(vport);
++		else
++			lpfc_do_scr_ns_plogi(phba, vport);
+ 	}
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 	return;
+ }
++
++void
++lpfc_register_new_vport(struct lpfc_hba *phba, struct lpfc_vport *vport,
++			struct lpfc_nodelist *ndlp)
++{
++	LPFC_MBOXQ_t *mbox;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (mbox) {
++		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, mbox);
++		mbox->vport = vport;
++		mbox->context2 = lpfc_nlp_get(ndlp);
++		mbox->mbox_cmpl = lpfc_cmpl_reg_new_vport;
++		if (lpfc_sli_issue_mbox(phba, mbox,
++					MBX_NOWAIT | MBX_STOP_IOCB)
++		    == MBX_NOT_FINISHED) {
++			mempool_free(mbox, phba->mbox_mem_pool);
++			vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++
++			lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++				"%d (%d):0253 Register VPI: Cannot send mbox\n",
++				phba->brd_no, vport->vpi);
++		}
++	} else {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++			"%d (%d):0254 Register VPI: no memory\n",
++			phba->brd_no, vport->vpi);
++
++		vport->fc_flag &= ~FC_VPORT_NEEDS_REG_VPI;
++		lpfc_nlp_put(ndlp);
++	}
++}
++
++static void
++lpfc_cmpl_els_fdisc(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		    struct lpfc_iocbq *rspiocb)
++{
++	struct lpfc_vport *vport = cmdiocb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) cmdiocb->context1;
++	struct lpfc_nodelist *np;
++	struct lpfc_nodelist *next_np;
++	IOCB_t *irsp = &rspiocb->iocb;
++	struct lpfc_iocbq *piocb;
++
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++		"%d (%d):0123 FDISC completes. x%x/x%x prevDID: x%x\n",
++		phba->brd_no, vport->vpi,
++		irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID);
++
++	/* Since all FDISCs are being single threaded, we
++	 * must reset the discovery timer for ALL vports
++	 * waiting to send FDISC when one completes.
++	 */
++	list_for_each_entry(piocb, &phba->fabric_iocb_list, list) {
++		lpfc_set_disctmo(piocb->vport);
++	}
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"FDISC cmpl:      status:x%x/x%x prevdid:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4], vport->fc_prevDID);
++
++	if (irsp->ulpStatus) {
++		/* Check for retry */
++		if (lpfc_els_retry(phba, cmdiocb, rspiocb))
++			goto out;
++
++		/* FDISC failed */
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0124 FDISC failed. (%d/%d)\n",
++			phba->brd_no, vport->vpi,
++			irsp->ulpStatus, irsp->un.ulpWord[4]);
++
++		if (vport->fc_vport->vport_state == FC_VPORT_INITIALIZING)
++			lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++		lpfc_nlp_put(ndlp);
++		/* giving up on FDISC. Cancel discovery timer */
++		lpfc_can_disctmo(vport);
++	} else {
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag |= FC_FABRIC;
++		if (vport->phba->fc_topology == TOPOLOGY_LOOP)
++			vport->fc_flag |=  FC_PUBLIC_LOOP;
++		spin_unlock_irq(shost->host_lock);
++
++		vport->fc_myDID = irsp->un.ulpWord[4] & Mask_DID;
++		lpfc_vport_set_state(vport, FC_VPORT_ACTIVE);
++		if ((vport->fc_prevDID != vport->fc_myDID) &&
++			!(vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)) {
++			/* If our NportID changed, we need to ensure all
++			 * remaining NPORTs get unreg_login'ed so we can
++			 * issue unreg_vpi.
++			 */
++			list_for_each_entry_safe(np, next_np,
++				&vport->fc_nodes, nlp_listp) {
++				if (np->nlp_state != NLP_STE_NPR_NODE
++				   || !(np->nlp_flag & NLP_NPR_ADISC))
++					continue;
++				spin_lock_irq(shost->host_lock);
++				np->nlp_flag &= ~NLP_NPR_ADISC;
++				spin_unlock_irq(shost->host_lock);
++				lpfc_unreg_rpi(vport, np);
++			}
++			lpfc_mbx_unreg_vpi(vport);
++			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++		}
++
++		if (vport->fc_flag & FC_VPORT_NEEDS_REG_VPI)
++			lpfc_register_new_vport(phba, vport, ndlp);
++		else
++			lpfc_do_scr_ns_plogi(phba, vport);
++
++		lpfc_nlp_put(ndlp); /* Free Fabric ndlp for vports */
++	}
++
++out:
++	lpfc_els_free_iocb(phba, cmdiocb);
++}
++
++int
++lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		     uint8_t retry)
++{
++	struct lpfc_hba *phba = vport->phba;
++	IOCB_t *icmd;
++	struct lpfc_iocbq *elsiocb;
++	struct serv_parm *sp;
++	uint8_t *pcmd;
++	uint16_t cmdsize;
++	int did = ndlp->nlp_DID;
++	int rc;
++
++	cmdsize = (sizeof(uint32_t) + sizeof(struct serv_parm));
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, retry, ndlp, did,
++				     ELS_CMD_FDISC);
++	if (!elsiocb) {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0255 Issue FDISC: no IOCB\n",
++			phba->brd_no, vport->vpi);
++		return 1;
++	}
++
++	icmd = &elsiocb->iocb;
++	icmd->un.elsreq64.myID = 0;
++	icmd->un.elsreq64.fl = 1;
++
++	/* For FDISC, Let FDISC rsp set the NPortID for this VPI */
++	icmd->ulpCt_h = 1;
++	icmd->ulpCt_l = 0;
++
++	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++	*((uint32_t *) (pcmd)) = ELS_CMD_FDISC;
++	pcmd += sizeof(uint32_t); /* CSP Word 1 */
++	memcpy(pcmd, &vport->phba->pport->fc_sparam, sizeof(struct serv_parm));
++	sp = (struct serv_parm *) pcmd;
++	/* Setup CSPs accordingly for Fabric */
++	sp->cmn.e_d_tov = 0;
++	sp->cmn.w2.r_a_tov = 0;
++	sp->cls1.classValid = 0;
++	sp->cls2.seqDelivery = 1;
++	sp->cls3.seqDelivery = 1;
++
++	pcmd += sizeof(uint32_t); /* CSP Word 2 */
++	pcmd += sizeof(uint32_t); /* CSP Word 3 */
++	pcmd += sizeof(uint32_t); /* CSP Word 4 */
++	pcmd += sizeof(uint32_t); /* Port Name */
++	memcpy(pcmd, &vport->fc_portname, 8);
++	pcmd += sizeof(uint32_t); /* Node Name */
++	pcmd += sizeof(uint32_t); /* Node Name */
++	memcpy(pcmd, &vport->fc_nodename, 8);
++
++	lpfc_set_disctmo(vport);
++
++	phba->fc_stat.elsXmitFDISC++;
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_fdisc;
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue FDISC:     did:x%x",
++		did, 0, 0);
++
++	rc = lpfc_issue_fabric_iocb(phba, elsiocb);
++	if (rc == IOCB_ERROR) {
++		lpfc_els_free_iocb(phba, elsiocb);
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0256 Issue FDISC: Cannot send IOCB\n",
++			phba->brd_no, vport->vpi);
++
++		return 1;
++	}
++	lpfc_vport_set_state(vport, FC_VPORT_INITIALIZING);
++	vport->port_state = LPFC_FDISC;
++	return 0;
++}
++
++static void
++lpfc_cmpl_els_npiv_logo(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
++{
++	struct lpfc_vport *vport = cmdiocb->vport;
++	IOCB_t *irsp;
++
++	irsp = &rspiocb->iocb;
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"LOGO npiv cmpl:  status:x%x/x%x did:x%x",
++		irsp->ulpStatus, irsp->un.ulpWord[4], irsp->un.rcvels.remoteID);
++
++	lpfc_els_free_iocb(phba, cmdiocb);
++	vport->unreg_vpi_cmpl = VPORT_ERROR;
++}
++
++int
++lpfc_issue_els_npiv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++	IOCB_t *icmd;
++	struct lpfc_iocbq *elsiocb;
++	uint8_t *pcmd;
++	uint16_t cmdsize;
++
++	cmdsize = 2 * sizeof(uint32_t) + sizeof(struct lpfc_name);
++	elsiocb = lpfc_prep_els_iocb(vport, 1, cmdsize, 0, ndlp, ndlp->nlp_DID,
++				     ELS_CMD_LOGO);
++	if (!elsiocb)
++		return 1;
++
++	icmd = &elsiocb->iocb;
++	pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt);
++	*((uint32_t *) (pcmd)) = ELS_CMD_LOGO;
++	pcmd += sizeof(uint32_t);
++
++	/* Fill in LOGO payload */
++	*((uint32_t *) (pcmd)) = be32_to_cpu(vport->fc_myDID);
++	pcmd += sizeof(uint32_t);
++	memcpy(pcmd, &vport->fc_portname, sizeof(struct lpfc_name));
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Issue LOGO npiv  did:x%x flg:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
++	elsiocb->iocb_cmpl = lpfc_cmpl_els_npiv_logo;
++	spin_lock_irq(shost->host_lock);
++	ndlp->nlp_flag |= NLP_LOGO_SND;
++	spin_unlock_irq(shost->host_lock);
++	if (lpfc_sli_issue_iocb(phba, pring, elsiocb, 0) == IOCB_ERROR) {
++		spin_lock_irq(shost->host_lock);
++		ndlp->nlp_flag &= ~NLP_LOGO_SND;
++		spin_unlock_irq(shost->host_lock);
++		lpfc_els_free_iocb(phba, elsiocb);
++		return 1;
++	}
++	return 0;
++}
++
++void
++lpfc_fabric_block_timeout(unsigned long ptr)
++{
++	struct lpfc_hba  *phba = (struct lpfc_hba *) ptr;
++	unsigned long iflags;
++	uint32_t tmo_posted;
++	spin_lock_irqsave(&phba->pport->work_port_lock, iflags);
++	tmo_posted = phba->pport->work_port_events & WORKER_FABRIC_BLOCK_TMO;
++	if (!tmo_posted)
++		phba->pport->work_port_events |= WORKER_FABRIC_BLOCK_TMO;
++	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflags);
++
++	if (!tmo_posted) {
++		spin_lock_irqsave(&phba->hbalock, iflags);
++		if (phba->work_wait)
++			lpfc_worker_wake_up(phba);
++		spin_unlock_irqrestore(&phba->hbalock, iflags);
++	}
++}
++
++static void
++lpfc_resume_fabric_iocbs(struct lpfc_hba *phba)
++{
++	struct lpfc_iocbq *iocb;
++	unsigned long iflags;
++	int ret;
++	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++	IOCB_t *cmd;
++
++repeat:
++	iocb = NULL;
++	spin_lock_irqsave(&phba->hbalock, iflags);
++				/* Post any pending iocb to the SLI layer */
++	if (atomic_read(&phba->fabric_iocb_count) == 0) {
++		list_remove_head(&phba->fabric_iocb_list, iocb, typeof(*iocb),
++				 list);
++		if (iocb)
++			atomic_inc(&phba->fabric_iocb_count);
++	}
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++	if (iocb) {
++		iocb->fabric_iocb_cmpl = iocb->iocb_cmpl;
++		iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb;
++		iocb->iocb_flag |= LPFC_IO_FABRIC;
++
++		lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD,
++			"Fabric sched1:   ste:x%x",
++			iocb->vport->port_state, 0, 0);
++
++		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
++
++		if (ret == IOCB_ERROR) {
++			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
++			iocb->fabric_iocb_cmpl = NULL;
++			iocb->iocb_flag &= ~LPFC_IO_FABRIC;
++			cmd = &iocb->iocb;
++			cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++			cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++			iocb->iocb_cmpl(phba, iocb, iocb);
++
++			atomic_dec(&phba->fabric_iocb_count);
++			goto repeat;
++		}
++	}
++
++	return;
++}
++
++void
++lpfc_unblock_fabric_iocbs(struct lpfc_hba *phba)
++{
++	clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++
++	lpfc_resume_fabric_iocbs(phba);
++	return;
++}
++
++static void
++lpfc_block_fabric_iocbs(struct lpfc_hba *phba)
++{
++	int blocked;
++
++	blocked = test_and_set_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++				/* Start a timer to unblock fabric
++				 * iocbs after 100ms
++				 */
++	if (!blocked)
++		mod_timer(&phba->fabric_block_timer, jiffies + HZ/10 );
++
++	return;
++}
++
++static void
++lpfc_cmpl_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++	struct lpfc_iocbq *rspiocb)
++{
++	struct ls_rjt stat;
++
++	if ((cmdiocb->iocb_flag & LPFC_IO_FABRIC) != LPFC_IO_FABRIC)
++		BUG();
++
++	switch (rspiocb->iocb.ulpStatus) {
++		case IOSTAT_NPORT_RJT:
++		case IOSTAT_FABRIC_RJT:
++			if (rspiocb->iocb.un.ulpWord[4] & RJT_UNAVAIL_TEMP) {
++				lpfc_block_fabric_iocbs(phba);
++			}
++			break;
++
++		case IOSTAT_NPORT_BSY:
++		case IOSTAT_FABRIC_BSY:
++			lpfc_block_fabric_iocbs(phba);
++			break;
++
++		case IOSTAT_LS_RJT:
++			stat.un.lsRjtError =
++				be32_to_cpu(rspiocb->iocb.un.ulpWord[4]);
++			if ((stat.un.b.lsRjtRsnCode == LSRJT_UNABLE_TPC) ||
++				(stat.un.b.lsRjtRsnCode == LSRJT_LOGICAL_BSY))
++				lpfc_block_fabric_iocbs(phba);
++			break;
++	}
++
++	if (atomic_read(&phba->fabric_iocb_count) == 0)
++		BUG();
++
++	cmdiocb->iocb_cmpl = cmdiocb->fabric_iocb_cmpl;
++	cmdiocb->fabric_iocb_cmpl = NULL;
++	cmdiocb->iocb_flag &= ~LPFC_IO_FABRIC;
++	cmdiocb->iocb_cmpl(phba, cmdiocb, rspiocb);
++
++	atomic_dec(&phba->fabric_iocb_count);
++	if (!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags)) {
++				/* Post any pending iocbs to HBA */
++		    lpfc_resume_fabric_iocbs(phba);
++	}
++}
++
++int
++lpfc_issue_fabric_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *iocb)
++{
++	unsigned long iflags;
++	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++	int ready;
++	int ret;
++
++	if (atomic_read(&phba->fabric_iocb_count) > 1)
++		BUG();
++
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	ready = atomic_read(&phba->fabric_iocb_count) == 0 &&
++		!test_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++	if (ready) {
++		iocb->fabric_iocb_cmpl = iocb->iocb_cmpl;
++		iocb->iocb_cmpl = lpfc_cmpl_fabric_iocb;
++		iocb->iocb_flag |= LPFC_IO_FABRIC;
++
++		lpfc_debugfs_disc_trc(iocb->vport, LPFC_DISC_TRC_ELS_CMD,
++			"Fabric sched2:   ste:x%x",
++			iocb->vport->port_state, 0, 0);
++
++		atomic_inc(&phba->fabric_iocb_count);
++		ret = lpfc_sli_issue_iocb(phba, pring, iocb, 0);
++
++		if (ret == IOCB_ERROR) {
++			iocb->iocb_cmpl = iocb->fabric_iocb_cmpl;
++			iocb->fabric_iocb_cmpl = NULL;
++			iocb->iocb_flag &= ~LPFC_IO_FABRIC;
++			atomic_dec(&phba->fabric_iocb_count);
++		}
++	} else {
++		spin_lock_irqsave(&phba->hbalock, iflags);
++		list_add_tail(&iocb->list, &phba->fabric_iocb_list);
++		spin_unlock_irqrestore(&phba->hbalock, iflags);
++		ret = IOCB_SUCCESS;
++	}
++	return ret;
++}
++
++
++void lpfc_fabric_abort_vport(struct lpfc_vport *vport)
++{
++	LIST_HEAD(completions);
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_iocbq *tmp_iocb, *piocb;
++	IOCB_t *cmd;
++
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++				 list) {
++
++		if (piocb->vport != vport)
++			continue;
++
++		list_move_tail(&piocb->list, &completions);
++	}
++	spin_unlock_irq(&phba->hbalock);
++
++	while (!list_empty(&completions)) {
++		piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++		list_del_init(&piocb->list);
++
++		cmd = &piocb->iocb;
++		cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++		cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++		(piocb->iocb_cmpl) (phba, piocb, piocb);
++	}
++}
++
++void lpfc_fabric_abort_nport(struct lpfc_nodelist *ndlp)
++{
++	LIST_HEAD(completions);
++	struct lpfc_hba  *phba = ndlp->vport->phba;
++	struct lpfc_iocbq *tmp_iocb, *piocb;
++	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
++	IOCB_t *cmd;
++
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++				 list) {
++		if ((lpfc_check_sli_ndlp(phba, pring, piocb, ndlp))) {
++
++			list_move_tail(&piocb->list, &completions);
++		}
++	}
++	spin_unlock_irq(&phba->hbalock);
++
++	while (!list_empty(&completions)) {
++		piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++		list_del_init(&piocb->list);
++
++		cmd = &piocb->iocb;
++		cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++		cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++		(piocb->iocb_cmpl) (phba, piocb, piocb);
++	}
++}
++
++void lpfc_fabric_abort_hba(struct lpfc_hba *phba)
++{
++	LIST_HEAD(completions);
++	struct lpfc_iocbq *piocb;
++	IOCB_t *cmd;
++
++	spin_lock_irq(&phba->hbalock);
++	list_splice_init(&phba->fabric_iocb_list, &completions);
++	spin_unlock_irq(&phba->hbalock);
++
++	while (!list_empty(&completions)) {
++		piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++		list_del_init(&piocb->list);
++
++		cmd = &piocb->iocb;
++		cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++		cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++		(piocb->iocb_cmpl) (phba, piocb, piocb);
++	}
++}
++
++
++void lpfc_fabric_abort_flogi(struct lpfc_hba *phba)
++{
++	LIST_HEAD(completions);
++	struct lpfc_iocbq *tmp_iocb, *piocb;
++	IOCB_t *cmd;
++	struct lpfc_nodelist *ndlp;
++
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry_safe(piocb, tmp_iocb, &phba->fabric_iocb_list,
++				 list) {
++
++		cmd = &piocb->iocb;
++		ndlp = (struct lpfc_nodelist *) piocb->context1;
++		if (cmd->ulpCommand == CMD_ELS_REQUEST64_CR &&
++		    ndlp != NULL &&
++		    ndlp->nlp_DID == Fabric_DID)
++			list_move_tail(&piocb->list, &completions);
++	}
++	spin_unlock_irq(&phba->hbalock);
++
++	while (!list_empty(&completions)) {
++		piocb = list_get_first(&completions, struct lpfc_iocbq, list);
++		list_del_init(&piocb->list);
++
++		cmd = &piocb->iocb;
++		cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
++		cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
++		(piocb->iocb_cmpl) (phba, piocb, piocb);
++	}
++}
++
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_hbadisc.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hbadisc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_hbadisc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -36,6 +36,8 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+ 
+ /* AlpaArray for assignment of scsid for scan-down and bind_method */
+ static uint8_t lpfcAlpaArray[] = {
+@@ -54,7 +56,7 @@
+ 	0x10, 0x0F, 0x08, 0x04, 0x02, 0x01
+ };
+ 
+-static void lpfc_disc_timeout_handler(struct lpfc_hba *);
++static void lpfc_disc_timeout_handler(struct lpfc_vport *);
+ 
+ void
+ lpfc_terminate_rport_io(struct fc_rport *rport)
+@@ -74,14 +76,16 @@
+ 		return;
+ 	}
+ 
+-	phba = ndlp->nlp_phba;
++	phba  = ndlp->vport->phba;
++
++	lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
++		"rport terminate: sid:x%x did:x%x flg:x%x",
++		ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag);
+ 
+-	spin_lock_irq(phba->host->host_lock);
+ 	if (ndlp->nlp_sid != NLP_NO_SID) {
+ 		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ 			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	return;
+ }
+@@ -94,28 +98,98 @@
+ {
+ 	struct lpfc_rport_data *rdata;
+ 	struct lpfc_nodelist * ndlp;
+-	uint8_t *name;
+-	int warn_on = 0;
++	struct lpfc_vport *vport;
+ 	struct lpfc_hba *phba;
++	struct completion devloss_compl;
++	struct lpfc_work_evt *evtp;
+ 
+ 	rdata = rport->dd_data;
+ 	ndlp = rdata->pnode;
+ 
+ 	if (!ndlp) {
+-		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++		if (rport->scsi_target_id != -1) {
+ 			printk(KERN_ERR "Cannot find remote node"
+ 			" for rport in dev_loss_tmo_callbk x%x\n",
+ 			rport->port_id);
++		}
+ 		return;
+ 	}
+ 
+-	if (ndlp->nlp_state == NLP_STE_MAPPED_NODE)
++	vport = ndlp->vport;
++	phba  = vport->phba;
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++		"rport devlosscb: sid:x%x did:x%x flg:x%x",
++		ndlp->nlp_sid, ndlp->nlp_DID, ndlp->nlp_flag);
++
++	init_completion(&devloss_compl);
++	evtp = &ndlp->dev_loss_evt;
++
++	if (!list_empty(&evtp->evt_listp))
++		return;
++
++	spin_lock_irq(&phba->hbalock);
++	evtp->evt_arg1  = ndlp;
++	evtp->evt_arg2  = &devloss_compl;
++	evtp->evt       = LPFC_EVT_DEV_LOSS;
++	list_add_tail(&evtp->evt_listp, &phba->work_list);
++	if (phba->work_wait)
++		wake_up(phba->work_wait);
++
++	spin_unlock_irq(&phba->hbalock);
++
++	wait_for_completion(&devloss_compl);
++
++	return;
++}
++
++/*
++ * This function is called from the worker thread when dev_loss_tmo
++ * expire.
++ */
++void
++lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
++{
++	struct lpfc_rport_data *rdata;
++	struct fc_rport   *rport;
++	struct lpfc_vport *vport;
++	struct lpfc_hba   *phba;
++	uint8_t *name;
++	int warn_on = 0;
++
++	rport = ndlp->rport;
++
++	if (!rport)
+ 		return;
+ 
+-	name = (uint8_t *)&ndlp->nlp_portname;
+-	phba = ndlp->nlp_phba;
++	rdata = rport->dd_data;
++	name = (uint8_t *) &ndlp->nlp_portname;
++	vport = ndlp->vport;
++	phba  = vport->phba;
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++		"rport devlosstmo:did:x%x type:x%x id:x%x",
++		ndlp->nlp_DID, ndlp->nlp_type, rport->scsi_target_id);
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	if (!(vport->load_flag & FC_UNLOADING) &&
++	    ndlp->nlp_state == NLP_STE_MAPPED_NODE)
++		return;
++
++	if (ndlp->nlp_type & NLP_FABRIC) {
++		int  put_node;
++		int  put_rport;
++
++		/* We will clean up these Nodes in linkup */
++		put_node = rdata->pnode != NULL;
++		put_rport = ndlp->rport != NULL;
++		rdata->pnode = NULL;
++		ndlp->rport = NULL;
++		if (put_node)
++			lpfc_nlp_put(ndlp);
++		if (put_rport)
++			put_device(&rport->dev);
++		return;
++	}
+ 
+ 	if (ndlp->nlp_sid != NLP_NO_SID) {
+ 		warn_on = 1;
+@@ -123,76 +197,114 @@
+ 		lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ 			ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+ 	}
+-	if (phba->fc_flag & FC_UNLOADING)
++	if (vport->load_flag & FC_UNLOADING)
+ 		warn_on = 0;
+ 
+-	spin_unlock_irq(phba->host->host_lock);
+-
+ 	if (warn_on) {
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-				"%d:0203 Devloss timeout on "
++				"%d (%d):0203 Devloss timeout on "
+ 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
+ 				"NPort x%x Data: x%x x%x x%x\n",
+-				phba->brd_no,
++				phba->brd_no, vport->vpi,
+ 				*name, *(name+1), *(name+2), *(name+3),
+ 				*(name+4), *(name+5), *(name+6), *(name+7),
+ 				ndlp->nlp_DID, ndlp->nlp_flag,
+ 				ndlp->nlp_state, ndlp->nlp_rpi);
+ 	} else {
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-				"%d:0204 Devloss timeout on "
++				"%d (%d):0204 Devloss timeout on "
+ 				"WWPN %x:%x:%x:%x:%x:%x:%x:%x "
+ 				"NPort x%x Data: x%x x%x x%x\n",
+-				phba->brd_no,
++				phba->brd_no, vport->vpi,
+ 				*name, *(name+1), *(name+2), *(name+3),
+ 				*(name+4), *(name+5), *(name+6), *(name+7),
+ 				ndlp->nlp_DID, ndlp->nlp_flag,
+ 				ndlp->nlp_state, ndlp->nlp_rpi);
+ 	}
+ 
+-	if (!(phba->fc_flag & FC_UNLOADING) &&
++	if (!(vport->load_flag & FC_UNLOADING) &&
+ 	    !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+ 	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC) &&
+ 	    (ndlp->nlp_state != NLP_STE_UNMAPPED_NODE))
+-		lpfc_disc_state_machine(phba, ndlp, NULL, NLP_EVT_DEVICE_RM);
++		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
+ 	else {
++		int  put_node;
++		int  put_rport;
++
++		put_node = rdata->pnode != NULL;
++		put_rport = ndlp->rport != NULL;
+ 		rdata->pnode = NULL;
+ 		ndlp->rport = NULL;
++		if (put_node)
+ 		lpfc_nlp_put(ndlp);
++		if (put_rport)
+ 		put_device(&rport->dev);
+ 	}
++}
++
+ 
++void
++lpfc_worker_wake_up(struct lpfc_hba *phba)
++{
++	wake_up(phba->work_wait);
+ 	return;
+ }
+ 
+ static void
+-lpfc_work_list_done(struct lpfc_hba * phba)
++lpfc_work_list_done(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_work_evt  *evtp = NULL;
+ 	struct lpfc_nodelist  *ndlp;
++	struct lpfc_vport     *vport;
+ 	int free_evt;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	while(!list_empty(&phba->work_list)) {
++	spin_lock_irq(&phba->hbalock);
++	while (!list_empty(&phba->work_list)) {
+ 		list_remove_head((&phba->work_list), evtp, typeof(*evtp),
+ 				 evt_listp);
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		free_evt = 1;
+ 		switch (evtp->evt) {
++		case LPFC_EVT_DEV_LOSS_DELAY:
++			free_evt = 0; /* evt is part of ndlp */
++			ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
++			vport = ndlp->vport;
++			if (!vport)
++				break;
++
++			lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++				"rport devlossdly:did:x%x flg:x%x",
++				ndlp->nlp_DID, ndlp->nlp_flag, 0);
++
++			if (!(vport->load_flag & FC_UNLOADING) &&
++			    !(ndlp->nlp_flag & NLP_DELAY_TMO) &&
++			    !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
++				lpfc_disc_state_machine(vport, ndlp, NULL,
++					NLP_EVT_DEVICE_RM);
++			}
++			break;
+ 		case LPFC_EVT_ELS_RETRY:
+-			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++			ndlp = (struct lpfc_nodelist *) (evtp->evt_arg1);
+ 			lpfc_els_retry_delay_handler(ndlp);
++			free_evt = 0; /* evt is part of ndlp */
++			break;
++		case LPFC_EVT_DEV_LOSS:
++			ndlp = (struct lpfc_nodelist *)(evtp->evt_arg1);
++			lpfc_nlp_get(ndlp);
++			lpfc_dev_loss_tmo_handler(ndlp);
+ 			free_evt = 0;
++			complete((struct completion *)(evtp->evt_arg2));
++			lpfc_nlp_put(ndlp);
+ 			break;
+ 		case LPFC_EVT_ONLINE:
+-			if (phba->hba_state < LPFC_LINK_DOWN)
+-				*(int *)(evtp->evt_arg1)  = lpfc_online(phba);
++			if (phba->link_state < LPFC_LINK_DOWN)
++				*(int *) (evtp->evt_arg1) = lpfc_online(phba);
+ 			else
+-				*(int *)(evtp->evt_arg1)  = 0;
++				*(int *) (evtp->evt_arg1) = 0;
+ 			complete((struct completion *)(evtp->evt_arg2));
+ 			break;
+ 		case LPFC_EVT_OFFLINE_PREP:
+-			if (phba->hba_state >= LPFC_LINK_DOWN)
++			if (phba->link_state >= LPFC_LINK_DOWN)
+ 				lpfc_offline_prep(phba);
+ 			*(int *)(evtp->evt_arg1) = 0;
+ 			complete((struct completion *)(evtp->evt_arg2));
+@@ -218,33 +330,31 @@
+ 		case LPFC_EVT_KILL:
+ 			lpfc_offline(phba);
+ 			*(int *)(evtp->evt_arg1)
+-				= (phba->stopped) ? 0 : lpfc_sli_brdkill(phba);
++				= (phba->pport->stopped)
++				        ? 0 : lpfc_sli_brdkill(phba);
+ 			lpfc_unblock_mgmt_io(phba);
+ 			complete((struct completion *)(evtp->evt_arg2));
+ 			break;
+ 		}
+ 		if (free_evt)
+ 			kfree(evtp);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ }
+ 
+-static void
+-lpfc_work_done(struct lpfc_hba * phba)
++void
++lpfc_work_done(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli_ring *pring;
+-	int i;
+-	uint32_t ha_copy;
+-	uint32_t control;
+-	uint32_t work_hba_events;
++	uint32_t ha_copy, status, control, work_port_events;
++	struct lpfc_vport *vport;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	ha_copy = phba->work_ha;
+ 	phba->work_ha = 0;
+-	work_hba_events=phba->work_hba_events;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	if (ha_copy & HA_ERATT)
+ 		lpfc_handle_eratt(phba);
+@@ -255,66 +365,111 @@
+ 	if (ha_copy & HA_LATT)
+ 		lpfc_handle_latt(phba);
+ 
+-	if (work_hba_events & WORKER_DISC_TMO)
+-		lpfc_disc_timeout_handler(phba);
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++
++		if (!scsi_host_get(shost)) {
++			continue;
++		}
++		spin_unlock_irq(&phba->hbalock);
++		work_port_events = vport->work_port_events;
++
++		if (work_port_events & WORKER_DISC_TMO)
++			lpfc_disc_timeout_handler(vport);
+ 
+-	if (work_hba_events & WORKER_ELS_TMO)
+-		lpfc_els_timeout_handler(phba);
++		if (work_port_events & WORKER_ELS_TMO)
++			lpfc_els_timeout_handler(vport);
+ 
+-	if (work_hba_events & WORKER_MBOX_TMO)
++		if (work_port_events & WORKER_HB_TMO)
++			lpfc_hb_timeout_handler(phba);
++
++		if (work_port_events & WORKER_MBOX_TMO)
+ 		lpfc_mbox_timeout_handler(phba);
+ 
+-	if (work_hba_events & WORKER_FDMI_TMO)
+-		lpfc_fdmi_tmo_handler(phba);
++		if (work_port_events & WORKER_FABRIC_BLOCK_TMO)
++			lpfc_unblock_fabric_iocbs(phba);
++
++		if (work_port_events & WORKER_FDMI_TMO)
++			lpfc_fdmi_timeout_handler(vport);
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->work_hba_events &= ~work_hba_events;
+-	spin_unlock_irq(phba->host->host_lock);
+-
+-	for (i = 0; i < phba->sli.num_rings; i++, ha_copy >>= 4) {
+-		pring = &phba->sli.ring[i];
+-		if ((ha_copy & HA_RXATT)
++		if (work_port_events & WORKER_RAMP_DOWN_QUEUE)
++			lpfc_ramp_down_queue_handler(phba);
++
++		if (work_port_events & WORKER_RAMP_UP_QUEUE)
++			lpfc_ramp_up_queue_handler(phba);
++
++		spin_lock_irq(&vport->work_port_lock);
++		vport->work_port_events &= ~work_port_events;
++		spin_unlock_irq(&vport->work_port_lock);
++		scsi_host_put(shost);
++		spin_lock_irq(&phba->hbalock);
++	}
++	spin_unlock_irq(&phba->hbalock);
++
++	pring = &phba->sli.ring[LPFC_ELS_RING];
++	status = (ha_copy & (HA_RXMASK  << (4*LPFC_ELS_RING)));
++	status >>= (4*LPFC_ELS_RING);
++	if ((status & HA_RXMASK)
+ 		    || (pring->flag & LPFC_DEFERRED_RING_EVENT)) {
+ 			if (pring->flag & LPFC_STOP_IOCB_MASK) {
+ 				pring->flag |= LPFC_DEFERRED_RING_EVENT;
+ 			} else {
+ 				lpfc_sli_handle_slow_ring_event(phba, pring,
+-								(ha_copy &
++							(status &
+ 								 HA_RXMASK));
+ 				pring->flag &= ~LPFC_DEFERRED_RING_EVENT;
+ 			}
+ 			/*
+ 			 * Turn on Ring interrupts
+ 			 */
+-			spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
+ 			control = readl(phba->HCregaddr);
+-			control |= (HC_R0INT_ENA << i);
++		if (!(control & (HC_R0INT_ENA << LPFC_ELS_RING))) {
++			control |= (HC_R0INT_ENA << LPFC_ELS_RING);
+ 			writel(control, phba->HCregaddr);
+ 			readl(phba->HCregaddr); /* flush */
+-			spin_unlock_irq(phba->host->host_lock);
+ 		}
++		spin_unlock_irq(&phba->hbalock);
+ 	}
+-
+-	lpfc_work_list_done (phba);
+-
++	lpfc_work_list_done(phba);
+ }
+ 
+ static int
+-check_work_wait_done(struct lpfc_hba *phba) {
++check_work_wait_done(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport;
++	struct lpfc_sli_ring *pring;
++	int rc = 0;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	if (phba->work_ha ||
+-	    phba->work_hba_events ||
+-	    (!list_empty(&phba->work_list)) ||
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		if (vport->work_port_events) {
++			rc = 1;
++			goto exit;
++		}
++	}
++
++	if (phba->work_ha || (!list_empty(&phba->work_list)) ||
+ 	    kthread_should_stop()) {
+-		spin_unlock_irq(phba->host->host_lock);
+-		return 1;
+-	} else {
+-		spin_unlock_irq(phba->host->host_lock);
+-		return 0;
++		rc = 1;
++		goto exit;
+ 	}
++
++	pring = &phba->sli.ring[LPFC_ELS_RING];
++	if (pring->flag & LPFC_DEFERRED_RING_EVENT)
++		rc = 1;
++exit:
++	if (rc)
++		phba->work_found++;
++	else
++		phba->work_found = 0;
++
++	spin_unlock_irq(&phba->hbalock);
++	return rc;
+ }
+ 
++
+ int
+ lpfc_do_work(void *p)
+ {
+@@ -324,11 +479,13 @@
+ 
+ 	set_user_nice(current, -20);
+ 	phba->work_wait = &work_waitq;
++	phba->work_found = 0;
+ 
+ 	while (1) {
+ 
+ 		rc = wait_event_interruptible(work_waitq,
+ 						check_work_wait_done(phba));
++
+ 		BUG_ON(rc);
+ 
+ 		if (kthread_should_stop())
+@@ -336,6 +493,17 @@
+ 
+ 		lpfc_work_done(phba);
+ 
++		/* If there is alot of slow ring work, like during link up
++		 * check_work_wait_done() may cause this thread to not give
++		 * up the CPU for very long periods of time. This may cause
++		 * soft lockups or other problems. To avoid these situations
++		 * give up the CPU here after LPFC_MAX_WORKER_ITERATION
++		 * consecutive iterations.
++		 */
++		if (phba->work_found >= LPFC_MAX_WORKER_ITERATION) {
++			phba->work_found = 0;
++			schedule();
++		}
+ 	}
+ 	phba->work_wait = NULL;
+ 	return 0;
+@@ -347,16 +515,17 @@
+  * embedding it in the IOCB.
+  */
+ int
+-lpfc_workq_post_event(struct lpfc_hba * phba, void *arg1, void *arg2,
++lpfc_workq_post_event(struct lpfc_hba *phba, void *arg1, void *arg2,
+ 		      uint32_t evt)
+ {
+ 	struct lpfc_work_evt  *evtp;
++	unsigned long flags;
+ 
+ 	/*
+ 	 * All Mailbox completions and LPFC_ELS_RING rcv ring IOCB events will
+ 	 * be queued to worker thread for processing
+ 	 */
+-	evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_KERNEL);
++	evtp = kmalloc(sizeof(struct lpfc_work_evt), GFP_ATOMIC);
+ 	if (!evtp)
+ 		return 0;
+ 
+@@ -364,136 +533,210 @@
+ 	evtp->evt_arg2  = arg2;
+ 	evtp->evt       = evt;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irqsave(&phba->hbalock, flags);
+ 	list_add_tail(&evtp->evt_listp, &phba->work_list);
+ 	if (phba->work_wait)
+-		wake_up(phba->work_wait);
+-	spin_unlock_irq(phba->host->host_lock);
++		lpfc_worker_wake_up(phba);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
+ 
+ 	return 1;
+ }
+ 
+-int
+-lpfc_linkdown(struct lpfc_hba *phba)
++void
++lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
+ {
+-	struct lpfc_sli       *psli;
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_nodelist  *ndlp, *next_ndlp;
+-	LPFC_MBOXQ_t          *mb;
+ 	int                   rc;
+ 
+-	psli = &phba->sli;
+-	/* sysfs or selective reset may call this routine to clean up */
+-	if (phba->hba_state >= LPFC_LINK_DOWN) {
+-		if (phba->hba_state == LPFC_LINK_DOWN)
+-			return 0;
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++			continue;
++
++		if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN)
++			lpfc_unreg_rpi(vport, ndlp);
+ 
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->hba_state = LPFC_LINK_DOWN;
+-		spin_unlock_irq(phba->host->host_lock);
++		/* Leave Fabric nodes alone on link down */
++		if (!remove && ndlp->nlp_type & NLP_FABRIC)
++			continue;
++		rc = lpfc_disc_state_machine(vport, ndlp, NULL,
++					     remove
++					     ? NLP_EVT_DEVICE_RM
++					     : NLP_EVT_DEVICE_RECOVERY);
++	}
++	if (phba->sli3_options & LPFC_SLI3_VPORT_TEARDOWN) {
++		lpfc_mbx_unreg_vpi(vport);
++		vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+ 	}
++}
++
++static void
++lpfc_linkdown_port(struct lpfc_vport *vport)
++{
++	struct lpfc_nodelist *ndlp, *next_ndlp;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 
+-	fc_host_post_event(phba->host, fc_get_event_number(),
+-			FCH_EVT_LINKDOWN, 0);
++	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKDOWN, 0);
+ 
+-	/* Clean up any firmware default rpi's */
+-	if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+-		lpfc_unreg_did(phba, 0xffffffff, mb);
+-		mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+-		if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
+-		    == MBX_NOT_FINISHED) {
+-			mempool_free( mb, phba->mbox_mem_pool);
+-		}
+-	}
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Link Down:       state:x%x rtry:x%x flg:x%x",
++		vport->port_state, vport->fc_ns_retry, vport->fc_flag);
+ 
+ 	/* Cleanup any outstanding RSCN activity */
+-	lpfc_els_flush_rscn(phba);
++	lpfc_els_flush_rscn(vport);
+ 
+ 	/* Cleanup any outstanding ELS commands */
+-	lpfc_els_flush_cmd(phba);
++	lpfc_els_flush_cmd(vport);
++
++	lpfc_cleanup_rpis(vport, 0);
+ 
+-	/*
+-	 * Issue a LINK DOWN event to all nodes.
+-	 */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
+ 				/* free any ndlp's on unused list */
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
++				/* free any ndlp's in unused state */
+ 		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
+-			lpfc_drop_node(phba, ndlp);
+-		else		/* otherwise, force node recovery. */
+-			rc = lpfc_disc_state_machine(phba, ndlp, NULL,
+-						     NLP_EVT_DEVICE_RECOVERY);
++			lpfc_drop_node(vport, ndlp);
++
++	/* Turn off discovery timer if its running */
++	lpfc_can_disctmo(vport);
++}
++
++int
++lpfc_linkdown(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport = phba->pport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_vport *port_iterator;
++	LPFC_MBOXQ_t          *mb;
++
++	if (phba->link_state == LPFC_LINK_DOWN) {
++		return 0;
++	}
++	spin_lock_irq(&phba->hbalock);
++	if (phba->link_state > LPFC_LINK_DOWN) {
++		phba->link_state = LPFC_LINK_DOWN;
++		phba->pport->fc_flag &= ~FC_LBIT;
++	}
++	spin_unlock_irq(&phba->hbalock);
++
++	list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++
++				/* Issue a LINK DOWN event to all nodes */
++		lpfc_linkdown_port(port_iterator);
++	}
++
++	/* Clean up any firmware default rpi's */
++	mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (mb) {
++		lpfc_unreg_did(phba, 0xffff, 0xffffffff, mb);
++		mb->vport = vport;
++		mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		if (lpfc_sli_issue_mbox(phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++		    == MBX_NOT_FINISHED) {
++			mempool_free(mb, phba->mbox_mem_pool);
++		}
+ 	}
+ 
+ 	/* Setup myDID for link up if we are in pt2pt mode */
+-	if (phba->fc_flag & FC_PT2PT) {
+-		phba->fc_myDID = 0;
+-		if ((mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
++	if (phba->pport->fc_flag & FC_PT2PT) {
++		phba->pport->fc_myDID = 0;
++		mb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++		if (mb) {
+ 			lpfc_config_link(phba, mb);
+-			mb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+-			if (lpfc_sli_issue_mbox
+-			    (phba, mb, (MBX_NOWAIT | MBX_STOP_IOCB))
++			mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++			mb->vport = vport;
++			if (lpfc_sli_issue_mbox(phba, mb,
++						(MBX_NOWAIT | MBX_STOP_IOCB))
+ 			    == MBX_NOT_FINISHED) {
+-				mempool_free( mb, phba->mbox_mem_pool);
++				mempool_free(mb, phba->mbox_mem_pool);
+ 			}
+ 		}
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		phba->pport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI);
++		spin_unlock_irq(shost->host_lock);
+ 	}
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~FC_LBIT;
+-	spin_unlock_irq(phba->host->host_lock);
+-
+-	/* Turn off discovery timer if its running */
+-	lpfc_can_disctmo(phba);
+ 
+-	/* Must process IOCBs on all rings to handle ABORTed I/Os */
+ 	return 0;
+ }
+ 
+-static int
+-lpfc_linkup(struct lpfc_hba *phba)
++static void
++lpfc_linkup_cleanup_nodes(struct lpfc_vport *vport)
+ {
+-	struct lpfc_nodelist *ndlp, *next_ndlp;
+-
+-	fc_host_post_event(phba->host, fc_get_event_number(),
+-			FCH_EVT_LINKUP, 0);
+-
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->hba_state = LPFC_LINK_UP;
+-	phba->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
+-			   FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY);
+-	phba->fc_flag |= FC_NDISC_ACTIVE;
+-	phba->fc_ns_retry = 0;
+-	spin_unlock_irq(phba->host->host_lock);
++	struct lpfc_nodelist *ndlp;
+ 
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++			continue;
+ 
+-	if (phba->fc_flag & FC_LBIT) {
+-		list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
+-			if (ndlp->nlp_state != NLP_STE_UNUSED_NODE) {
+ 				if (ndlp->nlp_type & NLP_FABRIC) {
+-					/*
+-					 * On Linkup its safe to clean up the
+-					 * ndlp from Fabric connections.
++				/* On Linkup its safe to clean up the ndlp
++				 * from Fabric connections.
+ 					 */
+-					lpfc_nlp_set_state(phba, ndlp,
+-							   NLP_STE_UNUSED_NODE);
++			if (ndlp->nlp_DID != Fabric_DID)
++				lpfc_unreg_rpi(vport, ndlp);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 				} else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+-					/*
+-					 * Fail outstanding IO now since
+-					 * device is marked for PLOGI.
++				/* Fail outstanding IO now since device is
++				 * marked for PLOGI.
+ 					 */
+-					lpfc_unreg_rpi(phba, ndlp);
+-				}
+-			}
++			lpfc_unreg_rpi(vport, ndlp);
+ 		}
+ 	}
++}
+ 
+-	/* free any ndlp's on unused list */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
+-				 nlp_listp) {
++static void
++lpfc_linkup_port(struct lpfc_vport *vport)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_nodelist *ndlp, *next_ndlp;
++	struct lpfc_hba  *phba = vport->phba;
++
++	if ((vport->load_flag & FC_UNLOADING) != 0)
++		return;
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"Link Up:         top:x%x speed:x%x flg:x%x",
++		phba->fc_topology, phba->fc_linkspeed, phba->link_flag);
++
++	/* If NPIV is not enabled, only bring the physical port up */
++	if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++		(vport != phba->pport))
++		return;
++
++	fc_host_post_event(shost, fc_get_event_number(), FCH_EVT_LINKUP, 0);
++
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY |
++			    FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY);
++	vport->fc_flag |= FC_NDISC_ACTIVE;
++	vport->fc_ns_retry = 0;
++	spin_unlock_irq(shost->host_lock);
++
++	if (vport->fc_flag & FC_LBIT)
++		lpfc_linkup_cleanup_nodes(vport);
++
++				/* free any ndlp's in unused state */
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
++				 nlp_listp)
+ 		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
++}
++
++static int
++lpfc_linkup(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport;
++
++	phba->link_state = LPFC_LINK_UP;
++
++	/* Unblock fabric iocbs if they are blocked */
++	clear_bit(FABRIC_COMANDS_BLOCKED, &phba->bit_flags);
++	del_timer_sync(&phba->fabric_block_timer);
++
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		lpfc_linkup_port(vport);
+ 	}
++	if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++		lpfc_issue_clear_la(phba, phba->pport);
+ 
+ 	return 0;
+ }
+@@ -505,14 +748,14 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_clear_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_clear_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	MAILBOX_t *mb;
++	struct lpfc_vport *vport = pmb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_sli   *psli = &phba->sli;
++	MAILBOX_t *mb = &pmb->mb;
+ 	uint32_t control;
+ 
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+ 	/* Since we don't do discovery right now, turn these off here */
+ 	psli->ring[psli->extra_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+ 	psli->ring[psli->fcp_ring].flag &= ~LPFC_STOP_IOCB_EVENT;
+@@ -522,69 +765,74 @@
+ 	if ((mb->mbxStatus) && (mb->mbxStatus != 0x1601)) {
+ 		/* CLEAR_LA mbox error <mbxStatus> state <hba_state> */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+-				"%d:0320 CLEAR_LA mbxStatus error x%x hba "
++				"%d (%d):0320 CLEAR_LA mbxStatus error x%x hba "
+ 				"state x%x\n",
+-				phba->brd_no, mb->mbxStatus, phba->hba_state);
++				phba->brd_no, vport->vpi, mb->mbxStatus,
++				vport->port_state);
+ 
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		goto out;
+ 	}
+ 
+-	if (phba->fc_flag & FC_ABORT_DISCOVERY)
+-		goto out;
++	if (vport->port_type == LPFC_PHYSICAL_PORT)
++		phba->link_state = LPFC_HBA_READY;
+ 
+-	phba->num_disc_nodes = 0;
+-	/* go thru NPR list and issue ELS PLOGIs */
+-	if (phba->fc_npr_cnt) {
+-		lpfc_els_disc_plogi(phba);
+-	}
++	spin_lock_irq(&phba->hbalock);
++	psli->sli_flag |= LPFC_PROCESS_LA;
++	control = readl(phba->HCregaddr);
++	control |= HC_LAINT_ENA;
++	writel(control, phba->HCregaddr);
++	readl(phba->HCregaddr); /* flush */
++	spin_unlock_irq(&phba->hbalock);
++	return;
++
++	vport->num_disc_nodes = 0;
++	/* go thru NPR nodes and issue ELS PLOGIs */
++	if (vport->fc_npr_cnt)
++		lpfc_els_disc_plogi(vport);
+ 
+-	if (!phba->num_disc_nodes) {
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~FC_NDISC_ACTIVE;
+-		spin_unlock_irq(phba->host->host_lock);
++	if (!vport->num_disc_nodes) {
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~FC_NDISC_ACTIVE;
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 
+-	phba->hba_state = LPFC_HBA_READY;
++	vport->port_state = LPFC_VPORT_READY;
+ 
+ out:
+ 	/* Device Discovery completes */
+-	lpfc_printf_log(phba,
+-			 KERN_INFO,
+-			 LOG_DISCOVERY,
+-			 "%d:0225 Device Discovery completes\n",
+-			 phba->brd_no);
+-
+-	mempool_free( pmb, phba->mbox_mem_pool);
+-
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~FC_ABORT_DISCOVERY;
+-	if (phba->fc_flag & FC_ESTABLISH_LINK) {
+-		phba->fc_flag &= ~FC_ESTABLISH_LINK;
+-	}
+-	spin_unlock_irq(phba->host->host_lock);
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0225 Device Discovery completes\n",
++			phba->brd_no, vport->vpi);
++
++	mempool_free(pmb, phba->mbox_mem_pool);
++
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag &= ~(FC_ABORT_DISCOVERY | FC_ESTABLISH_LINK);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	del_timer_sync(&phba->fc_estabtmo);
+ 
+-	lpfc_can_disctmo(phba);
++	lpfc_can_disctmo(vport);
+ 
+ 	/* turn on Link Attention interrupts */
+-	spin_lock_irq(phba->host->host_lock);
++
++	spin_lock_irq(&phba->hbalock);
+ 	psli->sli_flag |= LPFC_PROCESS_LA;
+ 	control = readl(phba->HCregaddr);
+ 	control |= HC_LAINT_ENA;
+ 	writel(control, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return;
+ }
+ 
++
+ static void
+ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli = &phba->sli;
+-	int rc;
++	struct lpfc_vport *vport = pmb->vport;
+ 
+ 	if (pmb->mb.mbxStatus)
+ 		goto out;
+@@ -592,127 +840,110 @@
+ 	mempool_free(pmb, phba->mbox_mem_pool);
+ 
+ 	if (phba->fc_topology == TOPOLOGY_LOOP &&
+-		phba->fc_flag & FC_PUBLIC_LOOP &&
+-		 !(phba->fc_flag & FC_LBIT)) {
++	    vport->fc_flag & FC_PUBLIC_LOOP &&
++	    !(vport->fc_flag & FC_LBIT)) {
+ 			/* Need to wait for FAN - use discovery timer
+-			 * for timeout.  hba_state is identically
++			 * for timeout.  port_state is identically
+ 			 * LPFC_LOCAL_CFG_LINK while waiting for FAN
+ 			 */
+-			lpfc_set_disctmo(phba);
++			lpfc_set_disctmo(vport);
+ 			return;
+ 		}
+ 
+-	/* Start discovery by sending a FLOGI. hba_state is identically
++	/* Start discovery by sending a FLOGI. port_state is identically
+ 	 * LPFC_FLOGI while waiting for FLOGI cmpl
+ 	 */
+-	phba->hba_state = LPFC_FLOGI;
+-	lpfc_set_disctmo(phba);
+-	lpfc_initial_flogi(phba);
++	if (vport->port_state != LPFC_FLOGI) {
++		vport->port_state = LPFC_FLOGI;
++		lpfc_set_disctmo(vport);
++		lpfc_initial_flogi(vport);
++	}
+ 	return;
+ 
+ out:
+ 	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+-			"%d:0306 CONFIG_LINK mbxStatus error x%x "
++			"%d (%d):0306 CONFIG_LINK mbxStatus error x%x "
+ 			"HBA state x%x\n",
+-			phba->brd_no, pmb->mb.mbxStatus, phba->hba_state);
++			phba->brd_no, vport->vpi, pmb->mb.mbxStatus,
++			vport->port_state);
+ 
+-	lpfc_linkdown(phba);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 
+-	phba->hba_state = LPFC_HBA_ERROR;
++	lpfc_linkdown(phba);
+ 
+ 	lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-			"%d:0200 CONFIG_LINK bad hba state x%x\n",
+-			phba->brd_no, phba->hba_state);
++			"%d (%d):0200 CONFIG_LINK bad hba state x%x\n",
++			phba->brd_no, vport->vpi, vport->port_state);
+ 
+-	lpfc_clear_la(phba, pmb);
+-	pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-	rc = lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB));
+-	if (rc == MBX_NOT_FINISHED) {
+-		mempool_free(pmb, phba->mbox_mem_pool);
+-		lpfc_disc_flush_list(phba);
+-		psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+-		psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+-		psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+-		phba->hba_state = LPFC_HBA_READY;
+-	}
++	lpfc_issue_clear_la(phba, vport);
+ 	return;
+ }
+ 
+ static void
+-lpfc_mbx_cmpl_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli = &phba->sli;
+ 	MAILBOX_t *mb = &pmb->mb;
+ 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1;
++	struct lpfc_vport  *vport = pmb->vport;
+ 
+ 
+ 	/* Check for error */
+ 	if (mb->mbxStatus) {
+ 		/* READ_SPARAM mbox error <mbxStatus> state <hba_state> */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+-				"%d:0319 READ_SPARAM mbxStatus error x%x "
++				"%d (%d):0319 READ_SPARAM mbxStatus error x%x "
+ 				"hba state x%x>\n",
+-				phba->brd_no, mb->mbxStatus, phba->hba_state);
++				phba->brd_no, vport->vpi, mb->mbxStatus,
++				vport->port_state);
+ 
+ 		lpfc_linkdown(phba);
+-		phba->hba_state = LPFC_HBA_ERROR;
+ 		goto out;
+ 	}
+ 
+-	memcpy((uint8_t *) & phba->fc_sparam, (uint8_t *) mp->virt,
++	memcpy((uint8_t *) &vport->fc_sparam, (uint8_t *) mp->virt,
+ 	       sizeof (struct serv_parm));
+ 	if (phba->cfg_soft_wwnn)
+-		u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn);
++		u64_to_wwn(phba->cfg_soft_wwnn,
++			   vport->fc_sparam.nodeName.u.wwn);
+ 	if (phba->cfg_soft_wwpn)
+-		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
+-	memcpy((uint8_t *) & phba->fc_nodename,
+-	       (uint8_t *) & phba->fc_sparam.nodeName,
+-	       sizeof (struct lpfc_name));
+-	memcpy((uint8_t *) & phba->fc_portname,
+-	       (uint8_t *) & phba->fc_sparam.portName,
+-	       sizeof (struct lpfc_name));
++		u64_to_wwn(phba->cfg_soft_wwpn,
++			   vport->fc_sparam.portName.u.wwn);
++	memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
++	       sizeof(vport->fc_nodename));
++	memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
++	       sizeof(vport->fc_portname));
++	if (vport->port_type == LPFC_PHYSICAL_PORT) {
++		memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn));
++		memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn));
++	}
++
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 	kfree(mp);
+-	mempool_free( pmb, phba->mbox_mem_pool);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 	return;
+ 
+ out:
+ 	pmb->context1 = NULL;
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 	kfree(mp);
+-	if (phba->hba_state != LPFC_CLEAR_LA) {
+-		lpfc_clear_la(phba, pmb);
+-		pmb->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-		if (lpfc_sli_issue_mbox(phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB))
+-		    == MBX_NOT_FINISHED) {
+-			mempool_free( pmb, phba->mbox_mem_pool);
+-			lpfc_disc_flush_list(phba);
+-			psli->ring[(psli->extra_ring)].flag &=
+-			    ~LPFC_STOP_IOCB_EVENT;
+-			psli->ring[(psli->fcp_ring)].flag &=
+-			    ~LPFC_STOP_IOCB_EVENT;
+-			psli->ring[(psli->next_ring)].flag &=
+-			    ~LPFC_STOP_IOCB_EVENT;
+-			phba->hba_state = LPFC_HBA_READY;
+-		}
+-	} else {
+-		mempool_free( pmb, phba->mbox_mem_pool);
+-	}
++	lpfc_issue_clear_la(phba, vport);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 	return;
+ }
+ 
+ static void
+ lpfc_mbx_process_link_up(struct lpfc_hba *phba, READ_LA_VAR *la)
+ {
+-	int i;
++	struct lpfc_vport *vport = phba->pport;
+ 	LPFC_MBOXQ_t *sparam_mbox, *cfglink_mbox;
++	int i;
+ 	struct lpfc_dmabuf *mp;
+ 	int rc;
+ 
+ 	sparam_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 	cfglink_mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	switch (la->UlnkSpeed) {
+ 		case LA_1GHZ_LINK:
+ 			phba->fc_linkspeed = LA_1GHZ_LINK;
+@@ -732,14 +963,16 @@
+ 	}
+ 
+ 	phba->fc_topology = la->topology;
++	phba->link_flag &= ~LS_NPIV_FAB_SUPPORTED;
+ 
+ 	if (phba->fc_topology == TOPOLOGY_LOOP) {
+-	/* Get Loop Map information */
++		phba->sli3_options &= ~LPFC_SLI3_NPIV_ENABLED;
+ 
++				/* Get Loop Map information */
+ 		if (la->il)
+-			phba->fc_flag |= FC_LBIT;
++			vport->fc_flag |= FC_LBIT;
+ 
+-		phba->fc_myDID = la->granted_AL_PA;
++		vport->fc_myDID = la->granted_AL_PA;
+ 		i = la->un.lilpBde64.tus.f.bdeSize;
+ 
+ 		if (i == 0) {
+@@ -781,14 +1014,20 @@
+ 			}
+ 		}
+ 	} else {
+-		phba->fc_myDID = phba->fc_pref_DID;
+-		phba->fc_flag |= FC_LBIT;
++		if (!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) {
++			if (phba->max_vpi && phba->cfg_npiv_enable &&
++			   (phba->sli_rev == 3))
++				phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
++		}
++		vport->fc_myDID = phba->fc_pref_DID;
++		vport->fc_flag |= FC_LBIT;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	lpfc_linkup(phba);
+ 	if (sparam_mbox) {
+-		lpfc_read_sparam(phba, sparam_mbox);
++		lpfc_read_sparam(phba, sparam_mbox, 0);
++		sparam_mbox->vport = vport;
+ 		sparam_mbox->mbox_cmpl = lpfc_mbx_cmpl_read_sparam;
+ 		rc = lpfc_sli_issue_mbox(phba, sparam_mbox,
+ 						(MBX_NOWAIT | MBX_STOP_IOCB));
+@@ -799,36 +1038,48 @@
+ 			mempool_free(sparam_mbox, phba->mbox_mem_pool);
+ 			if (cfglink_mbox)
+ 				mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+-			return;
++			goto out;
+ 		}
+ 	}
+ 
+ 	if (cfglink_mbox) {
+-		phba->hba_state = LPFC_LOCAL_CFG_LINK;
++		vport->port_state = LPFC_LOCAL_CFG_LINK;
+ 		lpfc_config_link(phba, cfglink_mbox);
++		cfglink_mbox->vport = vport;
+ 		cfglink_mbox->mbox_cmpl = lpfc_mbx_cmpl_local_config_link;
+ 		rc = lpfc_sli_issue_mbox(phba, cfglink_mbox,
+ 						(MBX_NOWAIT | MBX_STOP_IOCB));
+-		if (rc == MBX_NOT_FINISHED)
++		if (rc != MBX_NOT_FINISHED)
++			return;
+ 			mempool_free(cfglink_mbox, phba->mbox_mem_pool);
+ 	}
++out:
++	lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++		"%d (%d):0263 Discovery Mailbox error: state: 0x%x : %p %p\n",
++		phba->brd_no, vport->vpi,
++		vport->port_state, sparam_mbox, cfglink_mbox);
++
++	lpfc_issue_clear_la(phba, vport);
++	return;
+ }
+ 
+ static void
+-lpfc_mbx_issue_link_down(struct lpfc_hba *phba) {
++lpfc_mbx_issue_link_down(struct lpfc_hba *phba)
++{
+ 	uint32_t control;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 
+ 	lpfc_linkdown(phba);
+ 
+ 	/* turn on Link Attention interrupts - no CLEAR_LA needed */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	psli->sli_flag |= LPFC_PROCESS_LA;
+ 	control = readl(phba->HCregaddr);
+ 	control |= HC_LAINT_ENA;
+ 	writel(control, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ }
+ 
+ /*
+@@ -838,22 +1089,21 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_read_la(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_read_la(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
++	struct lpfc_vport *vport = pmb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
+ 	READ_LA_VAR *la;
+ 	MAILBOX_t *mb = &pmb->mb;
+ 	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+ 
+ 	/* Check for error */
+ 	if (mb->mbxStatus) {
+-		lpfc_printf_log(phba,
+-				KERN_INFO,
+-				LOG_LINK_EVENT,
++		lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
+ 				"%d:1307 READ_LA mbox error x%x state x%x\n",
+-				phba->brd_no,
+-				mb->mbxStatus, phba->hba_state);
++				phba->brd_no, mb->mbxStatus, vport->port_state);
+ 		lpfc_mbx_issue_link_down(phba);
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		goto lpfc_mbx_cmpl_read_la_free_mbuf;
+ 	}
+ 
+@@ -861,27 +1111,26 @@
+ 
+ 	memcpy(&phba->alpa_map[0], mp->virt, 128);
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	if (la->pb)
+-		phba->fc_flag |= FC_BYPASSED_MODE;
++		vport->fc_flag |= FC_BYPASSED_MODE;
+ 	else
+-		phba->fc_flag &= ~FC_BYPASSED_MODE;
+-	spin_unlock_irq(phba->host->host_lock);
++		vport->fc_flag &= ~FC_BYPASSED_MODE;
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	if (((phba->fc_eventTag + 1) < la->eventTag) ||
+ 	     (phba->fc_eventTag == la->eventTag)) {
+ 		phba->fc_stat.LinkMultiEvent++;
+-		if (la->attType == AT_LINK_UP) {
++		if (la->attType == AT_LINK_UP)
+ 			if (phba->fc_eventTag != 0)
+ 				lpfc_linkdown(phba);
+ 		}
+-	}
+ 
+ 	phba->fc_eventTag = la->eventTag;
+ 
+ 	if (la->attType == AT_LINK_UP) {
+ 		phba->fc_stat.LinkUp++;
+-		if (phba->fc_flag & FC_LOOPBACK_MODE) {
++		if (phba->link_flag & LS_LOOPBACK_MODE) {
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_LINK_EVENT,
+ 				"%d:1306 Link Up Event in loop back mode "
+ 				"x%x received Data: x%x x%x x%x x%x\n",
+@@ -903,7 +1152,7 @@
+ 				"%d:1305 Link Down Event x%x received "
+ 				"Data: x%x x%x x%x\n",
+ 				phba->brd_no, la->eventTag, phba->fc_eventTag,
+-				phba->hba_state, phba->fc_flag);
++				phba->pport->port_state, vport->fc_flag);
+ 		lpfc_mbx_issue_link_down(phba);
+ 	}
+ 
+@@ -921,31 +1170,115 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	MAILBOX_t *mb;
+-	struct lpfc_dmabuf *mp;
+-	struct lpfc_nodelist *ndlp;
+-
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+-
+-	ndlp = (struct lpfc_nodelist *) pmb->context2;
+-	mp = (struct lpfc_dmabuf *) (pmb->context1);
++	struct lpfc_vport  *vport = pmb->vport;
++	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
+ 
+ 	pmb->context1 = NULL;
+ 
+ 	/* Good status, call state machine */
+-	lpfc_disc_state_machine(phba, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN);
++	lpfc_disc_state_machine(vport, ndlp, pmb, NLP_EVT_CMPL_REG_LOGIN);
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 	kfree(mp);
+-	mempool_free( pmb, phba->mbox_mem_pool);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 	lpfc_nlp_put(ndlp);
+ 
+ 	return;
+ }
+ 
++static void
++lpfc_mbx_cmpl_unreg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++	MAILBOX_t *mb = &pmb->mb;
++	struct lpfc_vport *vport = pmb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++
++	switch (mb->mbxStatus) {
++	case 0x0011:
++	case 0x0020:
++	case 0x9700:
++		lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++				"%d (%d):0911 cmpl_unreg_vpi, "
++				"mb status = 0x%x\n",
++				phba->brd_no, vport->vpi, mb->mbxStatus);
++		break;
++	}
++	vport->unreg_vpi_cmpl = VPORT_OK;
++	mempool_free(pmb, phba->mbox_mem_pool);
++	/*
++	 * This shost reference might have been taken at the beginning of
++	 * lpfc_vport_delete()
++	 */
++	if (vport->load_flag & FC_UNLOADING)
++		scsi_host_put(shost);
++}
++
++void
++lpfc_mbx_unreg_vpi(struct lpfc_vport *vport)
++{
++	struct lpfc_hba  *phba = vport->phba;
++	LPFC_MBOXQ_t *mbox;
++	int rc;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!mbox)
++		return;
++
++	lpfc_unreg_vpi(phba, vport->vpi, mbox);
++	mbox->vport = vport;
++	mbox->mbox_cmpl = lpfc_mbx_cmpl_unreg_vpi;
++	rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++	if (rc == MBX_NOT_FINISHED) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
++				"%d (%d):1800 Could not issue unreg_vpi\n",
++				phba->brd_no, vport->vpi);
++		mempool_free(mbox, phba->mbox_mem_pool);
++		vport->unreg_vpi_cmpl = VPORT_ERROR;
++	}
++}
++
++static void
++lpfc_mbx_cmpl_reg_vpi(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
++{
++	struct lpfc_vport *vport = pmb->vport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	MAILBOX_t *mb = &pmb->mb;
++
++	switch (mb->mbxStatus) {
++	case 0x0011:
++	case 0x9601:
++	case 0x9602:
++		lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
++				"%d (%d):0912 cmpl_reg_vpi, mb status = 0x%x\n",
++				phba->brd_no, vport->vpi, mb->mbxStatus);
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP);
++		spin_unlock_irq(shost->host_lock);
++		vport->fc_myDID = 0;
++		goto out;
++	}
++
++	vport->num_disc_nodes = 0;
++	/* go thru NPR list and issue ELS PLOGIs */
++	if (vport->fc_npr_cnt)
++		lpfc_els_disc_plogi(vport);
++
++	if (!vport->num_disc_nodes) {
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~FC_NDISC_ACTIVE;
++		spin_unlock_irq(shost->host_lock);
++		lpfc_can_disctmo(vport);
++	}
++	vport->port_state = LPFC_VPORT_READY;
++
++out:
++	mempool_free(pmb, phba->mbox_mem_pool);
++	return;
++}
++
+ /*
+  * This routine handles processing a Fabric REG_LOGIN mailbox
+  * command upon completion. It is setup in the LPFC_MBOXQ
+@@ -953,20 +1286,14 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_fabric_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	MAILBOX_t *mb;
+-	struct lpfc_dmabuf *mp;
++	struct lpfc_vport *vport = pmb->vport;
++	struct lpfc_vport *next_vport;
++	MAILBOX_t *mb = &pmb->mb;
++	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
+ 	struct lpfc_nodelist *ndlp;
+-	struct lpfc_nodelist *ndlp_fdmi;
+-
+-
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+-
+ 	ndlp = (struct lpfc_nodelist *) pmb->context2;
+-	mp = (struct lpfc_dmabuf *) (pmb->context1);
+ 
+ 	pmb->context1 = NULL;
+ 	pmb->context2 = NULL;
+@@ -977,60 +1304,46 @@
+ 		mempool_free(pmb, phba->mbox_mem_pool);
+ 		lpfc_nlp_put(ndlp);
+ 
+-		/* FLOGI failed, so just use loop map to make discovery list */
+-		lpfc_disc_list_loopmap(phba);
++		if (phba->fc_topology == TOPOLOGY_LOOP) {
++			/* FLOGI failed, use loop map to make discovery list */
++			lpfc_disc_list_loopmap(vport);
+ 
+ 		/* Start discovery */
+-		lpfc_disc_start(phba);
++			lpfc_disc_start(vport);
++			return;
++		}
++
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
++			"%d (%d):0258 Register Fabric login error: 0x%x\n",
++			phba->brd_no, vport->vpi, mb->mbxStatus);
++
+ 		return;
+ 	}
+ 
+ 	ndlp->nlp_rpi = mb->un.varWords[0];
+ 	ndlp->nlp_type |= NLP_FABRIC;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 
+ 	lpfc_nlp_put(ndlp);	/* Drop the reference from the mbox */
+ 
+-	if (phba->hba_state == LPFC_FABRIC_CFG_LINK) {
+-		/* This NPort has been assigned an NPort_ID by the fabric as a
+-		 * result of the completed fabric login.  Issue a State Change
+-		 * Registration (SCR) ELS request to the fabric controller
+-		 * (SCR_DID) so that this NPort gets RSCN events from the
+-		 * fabric.
+-		 */
+-		lpfc_issue_els_scr(phba, SCR_DID, 0);
++	if (vport->port_state == LPFC_FABRIC_CFG_LINK) {
++		list_for_each_entry(next_vport, &phba->port_list, listentry) {
++			if (next_vport->port_type == LPFC_PHYSICAL_PORT)
++				continue;
+ 
+-		ndlp = lpfc_findnode_did(phba, NameServer_DID);
+-		if (!ndlp) {
+-			/* Allocate a new node instance. If the pool is empty,
+-			 * start the discovery process and skip the Nameserver
+-			 * login process.  This is attempted again later on.
+-			 * Otherwise, issue a Port Login (PLOGI) to NameServer.
+-			 */
+-			ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC);
+-			if (!ndlp) {
+-				lpfc_disc_start(phba);
+-				lpfc_mbuf_free(phba, mp->virt, mp->phys);
+-				kfree(mp);
+-				mempool_free(pmb, phba->mbox_mem_pool);
+-				return;
+-			} else {
+-				lpfc_nlp_init(phba, ndlp, NameServer_DID);
+-				ndlp->nlp_type |= NLP_FABRIC;
+-			}
+-		}
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-		lpfc_issue_els_plogi(phba, NameServer_DID, 0);
+-		if (phba->cfg_fdmi_on) {
+-			ndlp_fdmi = mempool_alloc(phba->nlp_mem_pool,
+-								GFP_KERNEL);
+-			if (ndlp_fdmi) {
+-				lpfc_nlp_init(phba, ndlp_fdmi, FDMI_DID);
+-				ndlp_fdmi->nlp_type |= NLP_FABRIC;
+-				ndlp_fdmi->nlp_state = NLP_STE_PLOGI_ISSUE;
+-				lpfc_issue_els_plogi(phba, FDMI_DID, 0);
++			if (phba->link_flag & LS_NPIV_FAB_SUPPORTED)
++				lpfc_initial_fdisc(next_vport);
++			else if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) {
++				lpfc_vport_set_state(vport,
++						     FC_VPORT_NO_FABRIC_SUPP);
++				lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++						"%d (%d):0259 No NPIV Fabric "
++						"support\n",
++						phba->brd_no, vport->vpi);
+ 			}
+ 		}
++		lpfc_do_scr_ns_plogi(phba, vport);
+ 	}
+ 
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -1046,32 +1359,36 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_ns_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	MAILBOX_t *mb;
+-	struct lpfc_dmabuf *mp;
+-	struct lpfc_nodelist *ndlp;
+-
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+-
+-	ndlp = (struct lpfc_nodelist *) pmb->context2;
+-	mp = (struct lpfc_dmabuf *) (pmb->context1);
++	MAILBOX_t *mb = &pmb->mb;
++	struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++	struct lpfc_vport *vport = pmb->vport;
+ 
+ 	if (mb->mbxStatus) {
++out:
+ 		lpfc_nlp_put(ndlp);
+ 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 		kfree(mp);
+ 		mempool_free(pmb, phba->mbox_mem_pool);
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 
+-		/* RegLogin failed, so just use loop map to make discovery
+-		   list */
+-		lpfc_disc_list_loopmap(phba);
++		if (phba->fc_topology == TOPOLOGY_LOOP) {
++			/*
++			 * RegLogin failed, use loop map to make discovery
++			 * list
++			 */
++			lpfc_disc_list_loopmap(vport);
+ 
+ 		/* Start discovery */
+-		lpfc_disc_start(phba);
++			lpfc_disc_start(vport);
++			return;
++		}
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0260 Register NameServer error: 0x%x\n",
++			phba->brd_no, vport->vpi, mb->mbxStatus);
+ 		return;
+ 	}
+ 
+@@ -1079,37 +1396,43 @@
+ 
+ 	ndlp->nlp_rpi = mb->un.varWords[0];
+ 	ndlp->nlp_type |= NLP_FABRIC;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 
+-	if (phba->hba_state < LPFC_HBA_READY) {
+-		/* Link up discovery requires Fabrib registration. */
+-		lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RNN_ID);
+-		lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RSNN_NN);
+-		lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFT_ID);
+-		lpfc_ns_cmd(phba, ndlp, SLI_CTNS_RFF_ID);
++	if (vport->port_state < LPFC_VPORT_READY) {
++		/* Link up discovery requires Fabric registration. */
++		lpfc_ns_cmd(vport, SLI_CTNS_RFF_ID, 0, 0); /* Do this first! */
++		lpfc_ns_cmd(vport, SLI_CTNS_RNN_ID, 0, 0);
++		lpfc_ns_cmd(vport, SLI_CTNS_RSNN_NN, 0, 0);
++		lpfc_ns_cmd(vport, SLI_CTNS_RSPN_ID, 0, 0);
++		lpfc_ns_cmd(vport, SLI_CTNS_RFT_ID, 0, 0);
++
++		/* Issue SCR just before NameServer GID_FT Query */
++		lpfc_issue_els_scr(vport, SCR_DID, 0);
+ 	}
+ 
+-	phba->fc_ns_retry = 0;
++	vport->fc_ns_retry = 0;
+ 	/* Good status, issue CT Request to NameServer */
+-	if (lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT)) {
++	if (lpfc_ns_cmd(vport, SLI_CTNS_GID_FT, 0, 0)) {
+ 		/* Cannot issue NameServer Query, so finish up discovery */
+-		lpfc_disc_start(phba);
++		goto out;
+ 	}
+ 
+ 	lpfc_nlp_put(ndlp);
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 	kfree(mp);
+-	mempool_free( pmb, phba->mbox_mem_pool);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 
+ 	return;
+ }
+ 
+ static void
+-lpfc_register_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_register_remote_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct fc_rport *rport;
+ 	struct lpfc_rport_data *rdata;
+ 	struct fc_rport_identifiers rport_ids;
++	struct lpfc_hba  *phba = vport->phba;
+ 
+ 	/* Remote port has reappeared. Re-register w/ FC transport */
+ 	rport_ids.node_name = wwn_to_u64(ndlp->nlp_nodename.u.wwn);
+@@ -1125,10 +1448,15 @@
+ 	 * registered the port.
+ 	 */
+ 	if (ndlp->rport && ndlp->rport->dd_data &&
+-	    *(struct lpfc_rport_data **) ndlp->rport->dd_data) {
++	    ((struct lpfc_rport_data *) ndlp->rport->dd_data)->pnode == ndlp) {
+ 		lpfc_nlp_put(ndlp);
+ 	}
+-	ndlp->rport = rport = fc_remote_port_add(phba->host, 0, &rport_ids);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++		"rport add:       did:x%x flg:x%x type x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
++
++	ndlp->rport = rport = fc_remote_port_add(shost, 0, &rport_ids);
+ 	if (!rport || !get_device(&rport->dev)) {
+ 		dev_printk(KERN_WARNING, &phba->pcidev->dev,
+ 			   "Warning: fc_remote_port_add failed\n");
+@@ -1154,22 +1482,17 @@
+ 		(rport->scsi_target_id < LPFC_MAX_TARGET)) {
+ 		ndlp->nlp_sid = rport->scsi_target_id;
+ 	}
+-
+ 	return;
+ }
+ 
+ static void
+-lpfc_unregister_remote_port(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_unregister_remote_port(struct lpfc_nodelist *ndlp)
+ {
+ 	struct fc_rport *rport = ndlp->rport;
+-	struct lpfc_rport_data *rdata = rport->dd_data;
+ 
+-	if (rport->scsi_target_id == -1) {
+-		ndlp->rport = NULL;
+-		rdata->pnode = NULL;
+-		lpfc_nlp_put(ndlp);
+-		put_device(&rport->dev);
+-	}
++	lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_RPORT,
++		"rport delete:    did:x%x flg:x%x type x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
+ 
+ 	fc_remote_port_delete(rport);
+ 
+@@ -1177,42 +1500,46 @@
+ }
+ 
+ static void
+-lpfc_nlp_counters(struct lpfc_hba *phba, int state, int count)
++lpfc_nlp_counters(struct lpfc_vport *vport, int state, int count)
+ {
+-	spin_lock_irq(phba->host->host_lock);
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	spin_lock_irq(shost->host_lock);
+ 	switch (state) {
+ 	case NLP_STE_UNUSED_NODE:
+-		phba->fc_unused_cnt += count;
++		vport->fc_unused_cnt += count;
+ 		break;
+ 	case NLP_STE_PLOGI_ISSUE:
+-		phba->fc_plogi_cnt += count;
++		vport->fc_plogi_cnt += count;
+ 		break;
+ 	case NLP_STE_ADISC_ISSUE:
+-		phba->fc_adisc_cnt += count;
++		vport->fc_adisc_cnt += count;
+ 		break;
+ 	case NLP_STE_REG_LOGIN_ISSUE:
+-		phba->fc_reglogin_cnt += count;
++		vport->fc_reglogin_cnt += count;
+ 		break;
+ 	case NLP_STE_PRLI_ISSUE:
+-		phba->fc_prli_cnt += count;
++		vport->fc_prli_cnt += count;
+ 		break;
+ 	case NLP_STE_UNMAPPED_NODE:
+-		phba->fc_unmap_cnt += count;
++		vport->fc_unmap_cnt += count;
+ 		break;
+ 	case NLP_STE_MAPPED_NODE:
+-		phba->fc_map_cnt += count;
++		vport->fc_map_cnt += count;
+ 		break;
+ 	case NLP_STE_NPR_NODE:
+-		phba->fc_npr_cnt += count;
++		vport->fc_npr_cnt += count;
+ 		break;
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ }
+ 
+ static void
+-lpfc_nlp_state_cleanup(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp,
++lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		       int old_state, int new_state)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	if (new_state == NLP_STE_UNMAPPED_NODE) {
+ 		ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
+ 		ndlp->nlp_flag &= ~NLP_NODEV_REMOVE;
+@@ -1226,21 +1553,20 @@
+ 	/* Transport interface */
+ 	if (ndlp->rport && (old_state == NLP_STE_MAPPED_NODE ||
+ 			    old_state == NLP_STE_UNMAPPED_NODE)) {
+-		phba->nport_event_cnt++;
+-		lpfc_unregister_remote_port(phba, ndlp);
++		vport->phba->nport_event_cnt++;
++		lpfc_unregister_remote_port(ndlp);
+ 	}
+ 
+ 	if (new_state ==  NLP_STE_MAPPED_NODE ||
+ 	    new_state == NLP_STE_UNMAPPED_NODE) {
+-		phba->nport_event_cnt++;
++		vport->phba->nport_event_cnt++;
+ 			/*
+ 			 * Tell the fc transport about the port, if we haven't
+ 			 * already. If we have, and it's a scsi entity, be
+ 			 * sure to unblock any attached scsi devices
+ 			 */
+-			lpfc_register_remote_port(phba, ndlp);
++		lpfc_register_remote_port(vport, ndlp);
+ 	}
+-
+ 			/*
+ 			 * if we added to Mapped list, but the remote port
+ 			 * registration failed or assigned a target id outside
+@@ -1251,10 +1577,10 @@
+ 	    (!ndlp->rport ||
+ 	     ndlp->rport->scsi_target_id == -1 ||
+ 	     ndlp->rport->scsi_target_id >= LPFC_MAX_TARGET)) {
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_TGT_NO_SCSIID;
+-		spin_unlock_irq(phba->host->host_lock);
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++		spin_unlock_irq(shost->host_lock);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 	}
+ }
+ 
+@@ -1280,61 +1606,74 @@
+ }
+ 
+ void
+-lpfc_nlp_set_state(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, int state)
++lpfc_nlp_set_state(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		   int state)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	int  old_state = ndlp->nlp_state;
+ 	char name1[16], name2[16];
+ 
+-	lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+-			"%d:0904 NPort state transition x%06x, %s -> %s\n",
+-			phba->brd_no,
++	lpfc_printf_log(vport->phba, KERN_INFO, LOG_NODE,
++			"%d (%d):0904 NPort state transition x%06x, %s -> %s\n",
++			vport->phba->brd_no, vport->vpi,
+ 			ndlp->nlp_DID,
+ 			lpfc_nlp_state_name(name1, sizeof(name1), old_state),
+ 			lpfc_nlp_state_name(name2, sizeof(name2), state));
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE,
++		"node statechg    did:x%x old:%d ste:%d",
++		ndlp->nlp_DID, old_state, state);
++
+ 	if (old_state == NLP_STE_NPR_NODE &&
+ 	    (ndlp->nlp_flag & NLP_DELAY_TMO) != 0 &&
+ 	    state != NLP_STE_NPR_NODE)
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	if (old_state == NLP_STE_UNMAPPED_NODE) {
+ 		ndlp->nlp_flag &= ~NLP_TGT_NO_SCSIID;
+ 		ndlp->nlp_type &= ~NLP_FC_NODE;
+ 	}
+ 
+ 	if (list_empty(&ndlp->nlp_listp)) {
+-		spin_lock_irq(phba->host->host_lock);
+-		list_add_tail(&ndlp->nlp_listp, &phba->fc_nodes);
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
++		list_add_tail(&ndlp->nlp_listp, &vport->fc_nodes);
++		spin_unlock_irq(shost->host_lock);
+ 	} else if (old_state)
+-		lpfc_nlp_counters(phba, old_state, -1);
++		lpfc_nlp_counters(vport, old_state, -1);
+ 
+ 	ndlp->nlp_state = state;
+-	lpfc_nlp_counters(phba, state, 1);
+-	lpfc_nlp_state_cleanup(phba, ndlp, old_state, state);
++	lpfc_nlp_counters(vport, state, 1);
++	lpfc_nlp_state_cleanup(vport, ndlp, old_state, state);
+ }
+ 
+ void
+-lpfc_dequeue_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_dequeue_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0)
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp))
+-		lpfc_nlp_counters(phba, ndlp->nlp_state, -1);
+-	spin_lock_irq(phba->host->host_lock);
++		lpfc_nlp_counters(vport, ndlp->nlp_state, -1);
++	spin_lock_irq(shost->host_lock);
+ 	list_del_init(&ndlp->nlp_listp);
+-	spin_unlock_irq(phba->host->host_lock);
+-	lpfc_nlp_state_cleanup(phba, ndlp, ndlp->nlp_state, 0);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_nlp_state_cleanup(vport, ndlp, ndlp->nlp_state,
++			       NLP_STE_UNUSED_NODE);
+ }
+ 
+ void
+-lpfc_drop_node(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_drop_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	if ((ndlp->nlp_flag & NLP_DELAY_TMO) != 0)
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	if (ndlp->nlp_state && !list_empty(&ndlp->nlp_listp))
+-		lpfc_nlp_counters(phba, ndlp->nlp_state, -1);
+-	spin_lock_irq(phba->host->host_lock);
++		lpfc_nlp_counters(vport, ndlp->nlp_state, -1);
++	spin_lock_irq(shost->host_lock);
+ 	list_del_init(&ndlp->nlp_listp);
+-	spin_unlock_irq(phba->host->host_lock);
++	ndlp->nlp_flag &= ~NLP_TARGET_REMOVE;
++	spin_unlock_irq(shost->host_lock);
+ 	lpfc_nlp_put(ndlp);
+ }
+ 
+@@ -1342,11 +1681,13 @@
+  * Start / ReStart rescue timer for Discovery / RSCN handling
+  */
+ void
+-lpfc_set_disctmo(struct lpfc_hba * phba)
++lpfc_set_disctmo(struct lpfc_vport *vport)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	uint32_t tmo;
+ 
+-	if (phba->hba_state == LPFC_LOCAL_CFG_LINK) {
++	if (vport->port_state == LPFC_LOCAL_CFG_LINK) {
+ 		/* For FAN, timeout should be greater then edtov */
+ 		tmo = (((phba->fc_edtov + 999) / 1000) + 1);
+ 	} else {
+@@ -1356,18 +1697,25 @@
+ 		tmo = ((phba->fc_ratov * 3) + 3);
+ 	}
+ 
+-	mod_timer(&phba->fc_disctmo, jiffies + HZ * tmo);
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag |= FC_DISC_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++
++	if (!timer_pending(&vport->fc_disctmo)) {
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++			"set disc timer:  tmo:x%x state:x%x flg:x%x",
++			tmo, vport->port_state, vport->fc_flag);
++	}
++
++	mod_timer(&vport->fc_disctmo, jiffies + HZ * tmo);
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_DISC_TMO;
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	/* Start Discovery Timer state <hba_state> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0247 Start Discovery Timer state x%x "
++			"%d (%d):0247 Start Discovery Timer state x%x "
+ 			"Data: x%x x%lx x%x x%x\n",
+-			phba->brd_no,
+-			phba->hba_state, tmo, (unsigned long)&phba->fc_disctmo,
+-			phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++			phba->brd_no, vport->vpi, vport->port_state, tmo,
++			(unsigned long)&vport->fc_disctmo, vport->fc_plogi_cnt,
++			vport->fc_adisc_cnt);
+ 
+ 	return;
+ }
+@@ -1376,23 +1724,34 @@
+  * Cancel rescue timer for Discovery / RSCN handling
+  */
+ int
+-lpfc_can_disctmo(struct lpfc_hba * phba)
++lpfc_can_disctmo(struct lpfc_vport *vport)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	unsigned long iflags;
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"can disc timer:  state:x%x rtry:x%x flg:x%x",
++		vport->port_state, vport->fc_ns_retry, vport->fc_flag);
++
+ 	/* Turn off discovery timer if its running */
+-	if (phba->fc_flag & FC_DISC_TMO) {
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~FC_DISC_TMO;
+-		spin_unlock_irq(phba->host->host_lock);
+-		del_timer_sync(&phba->fc_disctmo);
+-		phba->work_hba_events &= ~WORKER_DISC_TMO;
++	if (vport->fc_flag & FC_DISC_TMO) {
++		spin_lock_irqsave(shost->host_lock, iflags);
++		vport->fc_flag &= ~FC_DISC_TMO;
++		spin_unlock_irqrestore(shost->host_lock, iflags);
++		del_timer_sync(&vport->fc_disctmo);
++		spin_lock_irqsave(&vport->work_port_lock, iflags);
++		vport->work_port_events &= ~WORKER_DISC_TMO;
++		spin_unlock_irqrestore(&vport->work_port_lock, iflags);
+ 	}
+ 
+ 	/* Cancel Discovery Timer state <hba_state> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0248 Cancel Discovery Timer state x%x "
++			"%d (%d):0248 Cancel Discovery Timer state x%x "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, phba->hba_state, phba->fc_flag,
+-			phba->fc_plogi_cnt, phba->fc_adisc_cnt);
++			phba->brd_no, vport->vpi, vport->port_state,
++			vport->fc_flag, vport->fc_plogi_cnt,
++			vport->fc_adisc_cnt);
+ 
+ 	return 0;
+ }
+@@ -1402,15 +1761,18 @@
+  * Return true if iocb matches the specified nport
+  */
+ int
+-lpfc_check_sli_ndlp(struct lpfc_hba * phba,
+-		    struct lpfc_sli_ring * pring,
+-		    struct lpfc_iocbq * iocb, struct lpfc_nodelist * ndlp)
++lpfc_check_sli_ndlp(struct lpfc_hba *phba,
++		    struct lpfc_sli_ring *pring,
++		    struct lpfc_iocbq *iocb,
++		    struct lpfc_nodelist *ndlp)
+ {
+-	struct lpfc_sli *psli;
+-	IOCB_t *icmd;
++	struct lpfc_sli *psli = &phba->sli;
++	IOCB_t *icmd = &iocb->iocb;
++	struct lpfc_vport    *vport = ndlp->vport;
++
++	if (iocb->vport != vport)
++		return 0;
+ 
+-	psli = &phba->sli;
+-	icmd = &iocb->iocb;
+ 	if (pring->ringno == LPFC_ELS_RING) {
+ 		switch (icmd->ulpCommand) {
+ 		case CMD_GEN_REQUEST64_CR:
+@@ -1445,7 +1807,7 @@
+  * associated with nlp_rpi in the LPFC_NODELIST entry.
+  */
+ static int
+-lpfc_no_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_no_rpi(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ 	LIST_HEAD(completions);
+ 	struct lpfc_sli *psli;
+@@ -1454,6 +1816,8 @@
+ 	IOCB_t *icmd;
+ 	uint32_t rpi, i;
+ 
++	lpfc_fabric_abort_nport(ndlp);
++
+ 	/*
+ 	 * Everything that matches on txcmplq will be returned
+ 	 * by firmware with a no rpi error.
+@@ -1465,15 +1829,15 @@
+ 		for (i = 0; i < psli->num_rings; i++) {
+ 			pring = &psli->ring[i];
+ 
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(&phba->hbalock);
+ 			list_for_each_entry_safe(iocb, next_iocb, &pring->txq,
+ 						list) {
+ 				/*
+ 				 * Check to see if iocb matches the nport we are
+ 				 * looking for
+ 				 */
+-				if ((lpfc_check_sli_ndlp
+-				     (phba, pring, iocb, ndlp))) {
++				if ((lpfc_check_sli_ndlp(phba, pring, iocb,
++							 ndlp))) {
+ 					/* It matches, so deque and call compl
+ 					   with an error */
+ 					list_move_tail(&iocb->list,
+@@ -1481,22 +1845,22 @@
+ 					pring->txq_cnt--;
+ 				}
+ 			}
+-			spin_unlock_irq(phba->host->host_lock);
+-
++			spin_unlock_irq(&phba->hbalock);
+ 		}
+ 	}
+ 
+ 	while (!list_empty(&completions)) {
+ 		iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+-		list_del(&iocb->list);
++		list_del_init(&iocb->list);
+ 
+-		if (iocb->iocb_cmpl) {
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
+ 			icmd = &iocb->iocb;
+ 			icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+-			(iocb->iocb_cmpl) (phba, iocb, iocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, iocb);
++			(iocb->iocb_cmpl)(phba, iocb, iocb);
++		}
+ 	}
+ 
+ 	return 0;
+@@ -1512,19 +1876,22 @@
+  * we are waiting to PLOGI back to the remote NPort.
+  */
+ int
+-lpfc_unreg_rpi(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	LPFC_MBOXQ_t *mbox;
+ 	int rc;
+ 
+ 	if (ndlp->nlp_rpi) {
+-		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+-			lpfc_unreg_login(phba, ndlp->nlp_rpi, mbox);
+-			mbox->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
+-			rc = lpfc_sli_issue_mbox
+-				    (phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
++		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++		if (mbox) {
++			lpfc_unreg_login(phba, vport->vpi, ndlp->nlp_rpi, mbox);
++			mbox->vport = vport;
++			mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++			rc = lpfc_sli_issue_mbox(phba, mbox,
++						 (MBX_NOWAIT | MBX_STOP_IOCB));
+ 			if (rc == MBX_NOT_FINISHED)
+-				mempool_free( mbox, phba->mbox_mem_pool);
++				mempool_free(mbox, phba->mbox_mem_pool);
+ 		}
+ 		lpfc_no_rpi(phba, ndlp);
+ 		ndlp->nlp_rpi = 0;
+@@ -1533,25 +1900,70 @@
+ 	return 0;
+ }
+ 
++void
++lpfc_unreg_all_rpis(struct lpfc_vport *vport)
++{
++	struct lpfc_hba  *phba  = vport->phba;
++	LPFC_MBOXQ_t     *mbox;
++	int rc;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (mbox) {
++		lpfc_unreg_login(phba, vport->vpi, 0xffff, mbox);
++		mbox->vport = vport;
++		mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		rc = lpfc_sli_issue_mbox(phba, mbox,
++					 (MBX_NOWAIT | MBX_STOP_IOCB));
++		if (rc == MBX_NOT_FINISHED) {
++			mempool_free(mbox, phba->mbox_mem_pool);
++		}
++	}
++}
++
++void
++lpfc_unreg_default_rpis(struct lpfc_vport *vport)
++{
++	struct lpfc_hba  *phba  = vport->phba;
++	LPFC_MBOXQ_t     *mbox;
++	int rc;
++
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (mbox) {
++		lpfc_unreg_did(phba, vport->vpi, 0xffffffff, mbox);
++		mbox->vport = vport;
++		mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		rc = lpfc_sli_issue_mbox(phba, mbox,
++					 (MBX_NOWAIT | MBX_STOP_IOCB));
++		if (rc == MBX_NOT_FINISHED) {
++			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_VPORT,
++					"%d (%d):1815 Could not issue "
++					"unreg_did (default rpis)\n",
++					phba->brd_no, vport->vpi);
++			mempool_free(mbox, phba->mbox_mem_pool);
++		}
++	}
++}
++
+ /*
+  * Free resources associated with LPFC_NODELIST entry
+  * so it can be freed.
+  */
+ static int
+-lpfc_cleanup_node(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_cleanup_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
+-	LPFC_MBOXQ_t       *mb;
+-	LPFC_MBOXQ_t       *nextmb;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	LPFC_MBOXQ_t *mb, *nextmb;
+ 	struct lpfc_dmabuf *mp;
+ 
+ 	/* Cleanup node for NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+-			"%d:0900 Cleanup node for NPort x%x "
++			"%d (%d):0900 Cleanup node for NPort x%x "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
++			phba->brd_no, vport->vpi, ndlp->nlp_DID, ndlp->nlp_flag,
+ 			ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+-	lpfc_dequeue_node(phba, ndlp);
++	lpfc_dequeue_node(vport, ndlp);
+ 
+ 	/* cleanup any ndlp on mbox q waiting for reglogin cmpl */
+ 	if ((mb = phba->sli.mbox_active)) {
+@@ -1562,13 +1974,13 @@
+ 		}
+ 	}
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
+ 		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
+ 			mp = (struct lpfc_dmabuf *) (mb->context1);
+ 			if (mp) {
+-				lpfc_mbuf_free(phba, mp->virt, mp->phys);
++				__lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 				kfree(mp);
+ 			}
+ 			list_del(&mb->list);
+@@ -1576,20 +1988,27 @@
+ 			lpfc_nlp_put(ndlp);
+ 		}
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	lpfc_els_abort(phba,ndlp);
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~NLP_DELAY_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	ndlp->nlp_last_elscmd = 0;
+ 	del_timer_sync(&ndlp->nlp_delayfunc);
+ 
+ 	if (!list_empty(&ndlp->els_retry_evt.evt_listp))
+ 		list_del_init(&ndlp->els_retry_evt.evt_listp);
++	if (!list_empty(&ndlp->dev_loss_evt.evt_listp))
++		list_del_init(&ndlp->dev_loss_evt.evt_listp);
++
++	if (!list_empty(&ndlp->dev_loss_evt.evt_listp)) {
++		list_del_init(&ndlp->dev_loss_evt.evt_listp);
++		complete((struct completion *)(ndlp->dev_loss_evt.evt_arg2));
++	}
+ 
+-	lpfc_unreg_rpi(phba, ndlp);
++	lpfc_unreg_rpi(vport, ndlp);
+ 
+ 	return 0;
+ }
+@@ -1600,18 +2019,22 @@
+  * machine, defer the free till we reach the end of the state machine.
+  */
+ static void
+-lpfc_nlp_remove(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
++lpfc_nlp_remove(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
+ 	struct lpfc_rport_data *rdata;
+ 
+ 	if (ndlp->nlp_flag & NLP_DELAY_TMO) {
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	}
+ 
+-	lpfc_cleanup_node(phba, ndlp);
++	lpfc_cleanup_node(vport, ndlp);
+ 
+-	if ((ndlp->rport) && !(phba->fc_flag & FC_UNLOADING)) {
+-		put_device(&ndlp->rport->dev);
++	/*
++	 * We can get here with a non-NULL ndlp->rport because when we
++	 * unregister a rport we don't break the rport/node linkage.  So if we
++	 * do, make sure we don't leaving any dangling pointers behind.
++	 */
++	if (ndlp->rport) {
+ 		rdata = ndlp->rport->dd_data;
+ 		rdata->pnode = NULL;
+ 		ndlp->rport = NULL;
+@@ -1619,11 +2042,10 @@
+ }
+ 
+ static int
+-lpfc_matchdid(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did)
++lpfc_matchdid(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++	      uint32_t did)
+ {
+-	D_ID mydid;
+-	D_ID ndlpdid;
+-	D_ID matchdid;
++	D_ID mydid, ndlpdid, matchdid;
+ 
+ 	if (did == Bcast_DID)
+ 		return 0;
+@@ -1637,7 +2059,7 @@
+ 		return 1;
+ 
+ 	/* Next check for area/domain identically equals 0 match */
+-	mydid.un.word = phba->fc_myDID;
++	mydid.un.word = vport->fc_myDID;
+ 	if ((mydid.un.b.domain == 0) && (mydid.un.b.area == 0)) {
+ 		return 0;
+ 	}
+@@ -1669,101 +2091,116 @@
+ }
+ 
+ /* Search for a nodelist entry */
+-struct lpfc_nodelist *
+-lpfc_findnode_did(struct lpfc_hba *phba, uint32_t did)
++static struct lpfc_nodelist *
++__lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	struct lpfc_nodelist *ndlp;
+ 	uint32_t data1;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
+-		if (lpfc_matchdid(phba, ndlp, did)) {
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
++		if (lpfc_matchdid(vport, ndlp, did)) {
+ 			data1 = (((uint32_t) ndlp->nlp_state << 24) |
+ 				 ((uint32_t) ndlp->nlp_xri << 16) |
+ 				 ((uint32_t) ndlp->nlp_type << 8) |
+ 				 ((uint32_t) ndlp->nlp_rpi & 0xff));
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+-					"%d:0929 FIND node DID "
++					"%d (%d):0929 FIND node DID "
+ 					" Data: x%p x%x x%x x%x\n",
+-					phba->brd_no,
++					phba->brd_no, vport->vpi,
+ 					ndlp, ndlp->nlp_DID,
+ 					ndlp->nlp_flag, data1);
+-			spin_unlock_irq(phba->host->host_lock);
+ 			return ndlp;
+ 		}
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	/* FIND node did <did> NOT FOUND */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_NODE,
+-			"%d:0932 FIND node did x%x NOT FOUND.\n",
+-			phba->brd_no, did);
++			"%d (%d):0932 FIND node did x%x NOT FOUND.\n",
++			phba->brd_no, vport->vpi, did);
+ 	return NULL;
+ }
+ 
+ struct lpfc_nodelist *
+-lpfc_setup_disc_node(struct lpfc_hba * phba, uint32_t did)
++lpfc_findnode_did(struct lpfc_vport *vport, uint32_t did)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_nodelist *ndlp;
++
++	spin_lock_irq(shost->host_lock);
++	ndlp = __lpfc_findnode_did(vport, did);
++	spin_unlock_irq(shost->host_lock);
++	return ndlp;
++}
++
++struct lpfc_nodelist *
++lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	ndlp = lpfc_findnode_did(phba, did);
++	ndlp = lpfc_findnode_did(vport, did);
+ 	if (!ndlp) {
+-		if ((phba->fc_flag & FC_RSCN_MODE) &&
+-		   ((lpfc_rscn_payload_check(phba, did) == 0)))
++		if ((vport->fc_flag & FC_RSCN_MODE) != 0 &&
++		    lpfc_rscn_payload_check(vport, did) == 0)
+ 			return NULL;
+ 		ndlp = (struct lpfc_nodelist *)
+-		     mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++		     mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL);
+ 		if (!ndlp)
+ 			return NULL;
+-		lpfc_nlp_init(phba, ndlp, did);
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_init(vport, ndlp, did);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp;
+ 	}
+-	if (phba->fc_flag & FC_RSCN_MODE) {
+-		if (lpfc_rscn_payload_check(phba, did)) {
++	if (vport->fc_flag & FC_RSCN_MODE) {
++		if (lpfc_rscn_payload_check(vport, did)) {
++			spin_lock_irq(shost->host_lock);
+ 			ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++			spin_unlock_irq(shost->host_lock);
+ 
+ 			/* Since this node is marked for discovery,
+ 			 * delay timeout is not needed.
+ 			 */
+ 			if (ndlp->nlp_flag & NLP_DELAY_TMO)
+-				lpfc_cancel_retry_delay_tmo(phba, ndlp);
++				lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 		} else
+ 			ndlp = NULL;
+ 	} else {
+ 		if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE ||
+ 		    ndlp->nlp_state == NLP_STE_PLOGI_ISSUE)
+ 			return NULL;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NPR_2B_DISC;
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 	return ndlp;
+ }
+ 
+ /* Build a list of nodes to discover based on the loopmap */
+ void
+-lpfc_disc_list_loopmap(struct lpfc_hba * phba)
++lpfc_disc_list_loopmap(struct lpfc_vport *vport)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	int j;
+ 	uint32_t alpa, index;
+ 
+-	if (phba->hba_state <= LPFC_LINK_DOWN) {
++	if (!lpfc_is_link_up(phba))
+ 		return;
+-	}
+-	if (phba->fc_topology != TOPOLOGY_LOOP) {
++
++	if (phba->fc_topology != TOPOLOGY_LOOP)
+ 		return;
+-	}
+ 
+ 	/* Check for loop map present or not */
+ 	if (phba->alpa_map[0]) {
+ 		for (j = 1; j <= phba->alpa_map[0]; j++) {
+ 			alpa = phba->alpa_map[j];
+-
+-			if (((phba->fc_myDID & 0xff) == alpa) || (alpa == 0)) {
++			if (((vport->fc_myDID & 0xff) == alpa) || (alpa == 0))
+ 				continue;
+-			}
+-			lpfc_setup_disc_node(phba, alpa);
++			lpfc_setup_disc_node(vport, alpa);
+ 		}
+ 	} else {
+ 		/* No alpamap, so try all alpa's */
+@@ -1776,113 +2213,167 @@
+ 			else
+ 				index = FC_MAXLOOP - j - 1;
+ 			alpa = lpfcAlpaArray[index];
+-			if ((phba->fc_myDID & 0xff) == alpa) {
++			if ((vport->fc_myDID & 0xff) == alpa)
+ 				continue;
+-			}
+-
+-			lpfc_setup_disc_node(phba, alpa);
++			lpfc_setup_disc_node(vport, alpa);
+ 		}
+ 	}
+ 	return;
+ }
+ 
+-/* Start Link up / RSCN discovery on NPR list */
+ void
+-lpfc_disc_start(struct lpfc_hba * phba)
++lpfc_issue_clear_la(struct lpfc_hba *phba, struct lpfc_vport *vport)
+ {
+-	struct lpfc_sli *psli;
+ 	LPFC_MBOXQ_t *mbox;
+-	struct lpfc_nodelist *ndlp, *next_ndlp;
++	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_sli_ring *extra_ring = &psli->ring[psli->extra_ring];
++	struct lpfc_sli_ring *fcp_ring   = &psli->ring[psli->fcp_ring];
++	struct lpfc_sli_ring *next_ring  = &psli->ring[psli->next_ring];
++	int  rc;
++
++	/*
++	 * if it's not a physical port or if we already send
++	 * clear_la then don't send it.
++	 */
++	if ((phba->link_state >= LPFC_CLEAR_LA) ||
++	    (vport->port_type != LPFC_PHYSICAL_PORT))
++		return;
++
++			/* Link up discovery */
++	if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL)) != NULL) {
++		phba->link_state = LPFC_CLEAR_LA;
++		lpfc_clear_la(phba, mbox);
++		mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
++		mbox->vport = vport;
++		rc = lpfc_sli_issue_mbox(phba, mbox, (MBX_NOWAIT |
++						      MBX_STOP_IOCB));
++		if (rc == MBX_NOT_FINISHED) {
++			mempool_free(mbox, phba->mbox_mem_pool);
++			lpfc_disc_flush_list(vport);
++			extra_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++			fcp_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++			next_ring->flag &= ~LPFC_STOP_IOCB_EVENT;
++			phba->link_state = LPFC_HBA_ERROR;
++		}
++	}
++}
++
++/* Reg_vpi to tell firmware to resume normal operations */
++void
++lpfc_issue_reg_vpi(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++	LPFC_MBOXQ_t *regvpimbox;
++
++	regvpimbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (regvpimbox) {
++		lpfc_reg_vpi(phba, vport->vpi, vport->fc_myDID, regvpimbox);
++		regvpimbox->mbox_cmpl = lpfc_mbx_cmpl_reg_vpi;
++		regvpimbox->vport = vport;
++		if (lpfc_sli_issue_mbox(phba, regvpimbox,
++					(MBX_NOWAIT | MBX_STOP_IOCB))
++					== MBX_NOT_FINISHED) {
++			mempool_free(regvpimbox, phba->mbox_mem_pool);
++		}
++	}
++}
++
++/* Start Link up / RSCN discovery on NPR nodes */
++void
++lpfc_disc_start(struct lpfc_vport *vport)
++{
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 	uint32_t num_sent;
+ 	uint32_t clear_la_pending;
+ 	int did_changed;
+-	int rc;
+-
+-	psli = &phba->sli;
+ 
+-	if (phba->hba_state <= LPFC_LINK_DOWN) {
++	if (!lpfc_is_link_up(phba))
+ 		return;
+-	}
+-	if (phba->hba_state == LPFC_CLEAR_LA)
++
++	if (phba->link_state == LPFC_CLEAR_LA)
+ 		clear_la_pending = 1;
+ 	else
+ 		clear_la_pending = 0;
+ 
+-	if (phba->hba_state < LPFC_HBA_READY) {
+-		phba->hba_state = LPFC_DISC_AUTH;
+-	}
+-	lpfc_set_disctmo(phba);
++	if (vport->port_state < LPFC_VPORT_READY)
++		vport->port_state = LPFC_DISC_AUTH;
++
++	lpfc_set_disctmo(vport);
+ 
+-	if (phba->fc_prevDID == phba->fc_myDID) {
++	if (vport->fc_prevDID == vport->fc_myDID)
+ 		did_changed = 0;
+-	} else {
++	else
+ 		did_changed = 1;
+-	}
+-	phba->fc_prevDID = phba->fc_myDID;
+-	phba->num_disc_nodes = 0;
++
++	vport->fc_prevDID = vport->fc_myDID;
++	vport->num_disc_nodes = 0;
+ 
+ 	/* Start Discovery state <hba_state> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0202 Start Discovery hba state x%x "
++			"%d (%d):0202 Start Discovery hba state x%x "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, phba->hba_state, phba->fc_flag,
+-			phba->fc_plogi_cnt, phba->fc_adisc_cnt);
+-
+-	/* If our did changed, we MUST do PLOGI */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp) {
+-		if (ndlp->nlp_state == NLP_STE_NPR_NODE &&
+-		    (ndlp->nlp_flag & NLP_NPR_2B_DISC) != 0 &&
+-		    did_changed) {
+-			spin_lock_irq(phba->host->host_lock);
+-			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-			spin_unlock_irq(phba->host->host_lock);
+-		}
+-	}
++			phba->brd_no, vport->vpi, vport->port_state,
++			vport->fc_flag, vport->fc_plogi_cnt,
++			vport->fc_adisc_cnt);
+ 
+ 	/* First do ADISCs - if any */
+-	num_sent = lpfc_els_disc_adisc(phba);
++	num_sent = lpfc_els_disc_adisc(vport);
+ 
+ 	if (num_sent)
+ 		return;
+ 
+-	if ((phba->hba_state < LPFC_HBA_READY) && (!clear_la_pending)) {
++	/*
++	 * For SLI3, cmpl_reg_vpi will set port_state to READY, and
++	 * continue discovery.
++	 */
++	if ((phba->sli3_options & LPFC_SLI3_NPIV_ENABLED) &&
++	    !(vport->fc_flag & FC_RSCN_MODE)) {
++		lpfc_issue_reg_vpi(phba, vport);
++		return;
++	}
++
++	/*
++	 * For SLI2, we need to set port_state to READY and continue
++	 * discovery.
++	 */
++	if (vport->port_state < LPFC_VPORT_READY && !clear_la_pending) {
+ 		/* If we get here, there is nothing to ADISC */
+-		if ((mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL))) {
+-			phba->hba_state = LPFC_CLEAR_LA;
+-			lpfc_clear_la(phba, mbox);
+-			mbox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-			rc = lpfc_sli_issue_mbox(phba, mbox,
+-						 (MBX_NOWAIT | MBX_STOP_IOCB));
+-			if (rc == MBX_NOT_FINISHED) {
+-				mempool_free( mbox, phba->mbox_mem_pool);
+-				lpfc_disc_flush_list(phba);
+-				psli->ring[(psli->extra_ring)].flag &=
+-					~LPFC_STOP_IOCB_EVENT;
+-				psli->ring[(psli->fcp_ring)].flag &=
+-					~LPFC_STOP_IOCB_EVENT;
+-				psli->ring[(psli->next_ring)].flag &=
+-					~LPFC_STOP_IOCB_EVENT;
+-				phba->hba_state = LPFC_HBA_READY;
++		if (vport->port_type == LPFC_PHYSICAL_PORT)
++			lpfc_issue_clear_la(phba, vport);
++
++		if (!(vport->fc_flag & FC_ABORT_DISCOVERY)) {
++			vport->num_disc_nodes = 0;
++			/* go thru NPR nodes and issue ELS PLOGIs */
++			if (vport->fc_npr_cnt)
++				lpfc_els_disc_plogi(vport);
++
++			if (!vport->num_disc_nodes) {
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag &= ~FC_NDISC_ACTIVE;
++				spin_unlock_irq(shost->host_lock);
++				lpfc_can_disctmo(vport);
+ 			}
+ 		}
++		vport->port_state = LPFC_VPORT_READY;
+ 	} else {
+ 		/* Next do PLOGIs - if any */
+-		num_sent = lpfc_els_disc_plogi(phba);
++		num_sent = lpfc_els_disc_plogi(vport);
+ 
+ 		if (num_sent)
+ 			return;
+ 
+-		if (phba->fc_flag & FC_RSCN_MODE) {
++		if (vport->fc_flag & FC_RSCN_MODE) {
+ 			/* Check to see if more RSCNs came in while we
+ 			 * were processing this one.
+ 			 */
+-			if ((phba->fc_rscn_id_cnt == 0) &&
+-			    (!(phba->fc_flag & FC_RSCN_DISCOVERY))) {
+-				spin_lock_irq(phba->host->host_lock);
+-				phba->fc_flag &= ~FC_RSCN_MODE;
+-				spin_unlock_irq(phba->host->host_lock);
++			if ((vport->fc_rscn_id_cnt == 0) &&
++			    (!(vport->fc_flag & FC_RSCN_DISCOVERY))) {
++				spin_lock_irq(shost->host_lock);
++				vport->fc_flag &= ~FC_RSCN_MODE;
++				spin_unlock_irq(shost->host_lock);
++				lpfc_can_disctmo(vport);
+ 			} else
+-				lpfc_els_handle_rscn(phba);
++				lpfc_els_handle_rscn(vport);
+ 		}
+ 	}
+ 	return;
+@@ -1893,7 +2384,7 @@
+  *  ring the match the sppecified nodelist.
+  */
+ static void
+-lpfc_free_tx(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_free_tx(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ 	LIST_HEAD(completions);
+ 	struct lpfc_sli *psli;
+@@ -1907,7 +2398,7 @@
+ 	/* Error matching iocb on txq or txcmplq
+ 	 * First check the txq.
+ 	 */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+ 		if (iocb->context1 != ndlp) {
+ 			continue;
+@@ -1927,36 +2418,36 @@
+ 			continue;
+ 		}
+ 		icmd = &iocb->iocb;
+-		if ((icmd->ulpCommand == CMD_ELS_REQUEST64_CR) ||
+-		    (icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX)) {
++		if (icmd->ulpCommand == CMD_ELS_REQUEST64_CR ||
++		    icmd->ulpCommand == CMD_XMIT_ELS_RSP64_CX) {
+ 			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ 		}
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	while (!list_empty(&completions)) {
+ 		iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+-		list_del(&iocb->list);
++		list_del_init(&iocb->list);
+ 
+-		if (iocb->iocb_cmpl) {
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
+ 			icmd = &iocb->iocb;
+ 			icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ 			(iocb->iocb_cmpl) (phba, iocb, iocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, iocb);
+ 	}
+-
+-	return;
++	}
+ }
+ 
+ void
+-lpfc_disc_flush_list(struct lpfc_hba * phba)
++lpfc_disc_flush_list(struct lpfc_vport *vport)
+ {
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
++	struct lpfc_hba *phba = vport->phba;
+ 
+-	if (phba->fc_plogi_cnt || phba->fc_adisc_cnt) {
+-		list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++	if (vport->fc_plogi_cnt || vport->fc_adisc_cnt) {
++		list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ 					 nlp_listp) {
+ 			if (ndlp->nlp_state == NLP_STE_PLOGI_ISSUE ||
+ 			    ndlp->nlp_state == NLP_STE_ADISC_ISSUE) {
+@@ -1967,6 +2458,14 @@
+ 	}
+ }
+ 
++void
++lpfc_cleanup_discovery_resources(struct lpfc_vport *vport)
++{
++	lpfc_els_flush_rscn(vport);
++	lpfc_els_flush_cmd(vport);
++	lpfc_disc_flush_list(vport);
++}
++
+ /*****************************************************************************/
+ /*
+  * NAME:     lpfc_disc_timeout
+@@ -1985,158 +2484,154 @@
+ void
+ lpfc_disc_timeout(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++	struct lpfc_vport *vport = (struct lpfc_vport *) ptr;
++	struct lpfc_hba   *phba = vport->phba;
+ 	unsigned long flags = 0;
+ 
+ 	if (unlikely(!phba))
+ 		return;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, flags);
+-	if (!(phba->work_hba_events & WORKER_DISC_TMO)) {
+-		phba->work_hba_events |= WORKER_DISC_TMO;
++	if ((vport->work_port_events & WORKER_DISC_TMO) == 0) {
++		spin_lock_irqsave(&vport->work_port_lock, flags);
++		vport->work_port_events |= WORKER_DISC_TMO;
++		spin_unlock_irqrestore(&vport->work_port_lock, flags);
++
++		spin_lock_irqsave(&phba->hbalock, flags);
+ 		if (phba->work_wait)
+-			wake_up(phba->work_wait);
++			lpfc_worker_wake_up(phba);
++		spin_unlock_irqrestore(&phba->hbalock, flags);
+ 	}
+-	spin_unlock_irqrestore(phba->host->host_lock, flags);
+ 	return;
+ }
+ 
+ static void
+-lpfc_disc_timeout_handler(struct lpfc_hba *phba)
++lpfc_disc_timeout_handler(struct lpfc_vport *vport)
+ {
+-	struct lpfc_sli *psli;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_sli  *psli = &phba->sli;
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
+-	LPFC_MBOXQ_t *clearlambox, *initlinkmbox;
++	LPFC_MBOXQ_t *initlinkmbox;
+ 	int rc, clrlaerr = 0;
+ 
+-	if (unlikely(!phba))
+-		return;
+-
+-	if (!(phba->fc_flag & FC_DISC_TMO))
++	if (!(vport->fc_flag & FC_DISC_TMO))
+ 		return;
+ 
+-	psli = &phba->sli;
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag &= ~FC_DISC_TMO;
++	spin_unlock_irq(shost->host_lock);
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~FC_DISC_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_CMD,
++		"disc timeout:    state:x%x rtry:x%x flg:x%x",
++		vport->port_state, vport->fc_ns_retry, vport->fc_flag);
+ 
+-	switch (phba->hba_state) {
++	switch (vport->port_state) {
+ 
+ 	case LPFC_LOCAL_CFG_LINK:
+-	/* hba_state is identically LPFC_LOCAL_CFG_LINK while waiting for FAN */
++	/* port_state is identically  LPFC_LOCAL_CFG_LINK while waiting for
++	 * FAN
++	 */
+ 		/* FAN timeout */
+-		lpfc_printf_log(phba,
+-				 KERN_WARNING,
+-				 LOG_DISCOVERY,
+-				 "%d:0221 FAN timeout\n",
+-				 phba->brd_no);
++		lpfc_printf_log(phba, KERN_WARNING, LOG_DISCOVERY,
++				"%d (%d):0221 FAN timeout\n",
++				phba->brd_no, vport->vpi);
+ 
+ 		/* Start discovery by sending FLOGI, clean up old rpis */
+-		list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes,
++		list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes,
+ 					 nlp_listp) {
+ 			if (ndlp->nlp_state != NLP_STE_NPR_NODE)
+ 				continue;
+ 			if (ndlp->nlp_type & NLP_FABRIC) {
+ 				/* Clean up the ndlp on Fabric connections */
+-				lpfc_drop_node(phba, ndlp);
++				lpfc_drop_node(vport, ndlp);
+ 			} else if (!(ndlp->nlp_flag & NLP_NPR_ADISC)) {
+ 				/* Fail outstanding IO now since device
+ 				 * is marked for PLOGI.
+ 				 */
+-				lpfc_unreg_rpi(phba, ndlp);
++				lpfc_unreg_rpi(vport, ndlp);
+ 			}
+ 		}
+-		phba->hba_state = LPFC_FLOGI;
+-		lpfc_set_disctmo(phba);
+-		lpfc_initial_flogi(phba);
++		if (vport->port_state != LPFC_FLOGI) {
++			vport->port_state = LPFC_FLOGI;
++			lpfc_set_disctmo(vport);
++			lpfc_initial_flogi(vport);
++		}
+ 		break;
+ 
++	case LPFC_FDISC:
+ 	case LPFC_FLOGI:
+-	/* hba_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */
++	/* port_state is identically LPFC_FLOGI while waiting for FLOGI cmpl */
+ 		/* Initial FLOGI timeout */
+-		lpfc_printf_log(phba,
+-				 KERN_ERR,
+-				 LOG_DISCOVERY,
+-				 "%d:0222 Initial FLOGI timeout\n",
+-				 phba->brd_no);
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0222 Initial %s timeout\n",
++				phba->brd_no, vport->vpi,
++				vport->vpi ? "FLOGI" : "FDISC");
+ 
+ 		/* Assume no Fabric and go on with discovery.
+ 		 * Check for outstanding ELS FLOGI to abort.
+ 		 */
+ 
+ 		/* FLOGI failed, so just use loop map to make discovery list */
+-		lpfc_disc_list_loopmap(phba);
++		lpfc_disc_list_loopmap(vport);
+ 
+ 		/* Start discovery */
+-		lpfc_disc_start(phba);
++		lpfc_disc_start(vport);
+ 		break;
+ 
+ 	case LPFC_FABRIC_CFG_LINK:
+ 	/* hba_state is identically LPFC_FABRIC_CFG_LINK while waiting for
+ 	   NameServer login */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-				"%d:0223 Timeout while waiting for NameServer "
+-				"login\n", phba->brd_no);
++				"%d (%d):0223 Timeout while waiting for "
++				"NameServer login\n",
++				phba->brd_no, vport->vpi);
+ 
+ 		/* Next look for NameServer ndlp */
+-		ndlp = lpfc_findnode_did(phba, NameServer_DID);
++		ndlp = lpfc_findnode_did(vport, NameServer_DID);
+ 		if (ndlp)
+ 			lpfc_nlp_put(ndlp);
+ 		/* Start discovery */
+-		lpfc_disc_start(phba);
++		lpfc_disc_start(vport);
+ 		break;
+ 
+ 	case LPFC_NS_QRY:
+ 	/* Check for wait for NameServer Rsp timeout */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-				"%d:0224 NameServer Query timeout "
++				"%d (%d):0224 NameServer Query timeout "
+ 				"Data: x%x x%x\n",
+-				phba->brd_no,
+-				phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
++				phba->brd_no, vport->vpi,
++				vport->fc_ns_retry, LPFC_MAX_NS_RETRY);
+ 
+-		ndlp = lpfc_findnode_did(phba, NameServer_DID);
+-		if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
+-			if (phba->fc_ns_retry < LPFC_MAX_NS_RETRY) {
++		if (vport->fc_ns_retry < LPFC_MAX_NS_RETRY) {
+ 				/* Try it one more time */
+-				rc = lpfc_ns_cmd(phba, ndlp, SLI_CTNS_GID_FT);
++			vport->fc_ns_retry++;
++			rc = lpfc_ns_cmd(vport, SLI_CTNS_GID_FT,
++					 vport->fc_ns_retry, 0);
+ 				if (rc == 0)
+ 					break;
+ 			}
+-			phba->fc_ns_retry = 0;
+-		}
+-
+-		/* Nothing to authenticate, so CLEAR_LA right now */
+-		clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-		if (!clearlambox) {
+-			clrlaerr = 1;
+-			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-					"%d:0226 Device Discovery "
+-					"completion error\n",
+-					phba->brd_no);
+-			phba->hba_state = LPFC_HBA_ERROR;
+-			break;
+-		}
++		vport->fc_ns_retry = 0;
+ 
+-		phba->hba_state = LPFC_CLEAR_LA;
+-		lpfc_clear_la(phba, clearlambox);
+-		clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-		rc = lpfc_sli_issue_mbox(phba, clearlambox,
+-					 (MBX_NOWAIT | MBX_STOP_IOCB));
+-		if (rc == MBX_NOT_FINISHED) {
+-			mempool_free(clearlambox, phba->mbox_mem_pool);
+-			clrlaerr = 1;
+-			break;
++		/*
++		 * Discovery is over.
++		 * set port_state to PORT_READY if SLI2.
++		 * cmpl_reg_vpi will set port_state to READY for SLI3.
++		 */
++		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++			lpfc_issue_reg_vpi(phba, vport);
++		else  {	/* NPIV Not enabled */
++			lpfc_issue_clear_la(phba, vport);
++			vport->port_state = LPFC_VPORT_READY;
+ 		}
+ 
+ 		/* Setup and issue mailbox INITIALIZE LINK command */
+ 		initlinkmbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 		if (!initlinkmbox) {
+ 			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-					"%d:0206 Device Discovery "
++					"%d (%d):0206 Device Discovery "
+ 					"completion error\n",
+-					phba->brd_no);
+-			phba->hba_state = LPFC_HBA_ERROR;
++					phba->brd_no, vport->vpi);
++			phba->link_state = LPFC_HBA_ERROR;
+ 			break;
+ 		}
+ 
+@@ -2144,6 +2639,8 @@
+ 		lpfc_init_link(phba, initlinkmbox, phba->cfg_topology,
+ 			       phba->cfg_link_speed);
+ 		initlinkmbox->mb.un.varInitLnk.lipsr_AL_PA = 0;
++		initlinkmbox->vport = vport;
++		initlinkmbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ 		rc = lpfc_sli_issue_mbox(phba, initlinkmbox,
+ 					 (MBX_NOWAIT | MBX_STOP_IOCB));
+ 		lpfc_set_loopback_flag(phba);
+@@ -2154,67 +2651,81 @@
+ 
+ 	case LPFC_DISC_AUTH:
+ 	/* Node Authentication timeout */
+-		lpfc_printf_log(phba,
+-				 KERN_ERR,
+-				 LOG_DISCOVERY,
+-				 "%d:0227 Node Authentication timeout\n",
+-				 phba->brd_no);
+-		lpfc_disc_flush_list(phba);
+-		clearlambox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-		if (!clearlambox) {
+-			clrlaerr = 1;
+ 			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
+-					"%d:0207 Device Discovery "
+-					"completion error\n",
+-					phba->brd_no);
+-			phba->hba_state = LPFC_HBA_ERROR;
+-			break;
++				"%d (%d):0227 Node Authentication timeout\n",
++				phba->brd_no, vport->vpi);
++		lpfc_disc_flush_list(vport);
++
++		/*
++		 * set port_state to PORT_READY if SLI2.
++		 * cmpl_reg_vpi will set port_state to READY for SLI3.
++		 */
++		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++			lpfc_issue_reg_vpi(phba, vport);
++		else {	/* NPIV Not enabled */
++			lpfc_issue_clear_la(phba, vport);
++			vport->port_state = LPFC_VPORT_READY;
+ 		}
+-		phba->hba_state = LPFC_CLEAR_LA;
+-		lpfc_clear_la(phba, clearlambox);
+-		clearlambox->mbox_cmpl = lpfc_mbx_cmpl_clear_la;
+-		rc = lpfc_sli_issue_mbox(phba, clearlambox,
+-					 (MBX_NOWAIT | MBX_STOP_IOCB));
+-		if (rc == MBX_NOT_FINISHED) {
+-			mempool_free(clearlambox, phba->mbox_mem_pool);
+-			clrlaerr = 1;
++		break;
++
++	case LPFC_VPORT_READY:
++		if (vport->fc_flag & FC_RSCN_MODE) {
++			lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++					"%d (%d):0231 RSCN timeout Data: x%x "
++					"x%x\n",
++					phba->brd_no, vport->vpi,
++					vport->fc_ns_retry, LPFC_MAX_NS_RETRY);
++
++			/* Cleanup any outstanding ELS commands */
++			lpfc_els_flush_cmd(vport);
++
++			lpfc_els_flush_rscn(vport);
++			lpfc_disc_flush_list(vport);
+ 		}
+ 		break;
+ 
++	default:
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0229 Unexpected discovery timeout, "
++				"vport State x%x\n",
++				phba->brd_no, vport->vpi, vport->port_state);
++
++		break;
++	}
++
++	switch (phba->link_state) {
+ 	case LPFC_CLEAR_LA:
+ 	/* CLEAR LA timeout */
+-		lpfc_printf_log(phba,
+-				 KERN_ERR,
+-				 LOG_DISCOVERY,
+-				 "%d:0228 CLEAR LA timeout\n",
+-				 phba->brd_no);
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0228 CLEAR LA timeout\n",
++				phba->brd_no, vport->vpi);
+ 		clrlaerr = 1;
+ 		break;
+ 
+-	case LPFC_HBA_READY:
+-		if (phba->fc_flag & FC_RSCN_MODE) {
+-			lpfc_printf_log(phba,
+-					KERN_ERR,
+-					LOG_DISCOVERY,
+-					"%d:0231 RSCN timeout Data: x%x x%x\n",
+-					phba->brd_no,
+-					phba->fc_ns_retry, LPFC_MAX_NS_RETRY);
+-
+-			/* Cleanup any outstanding ELS commands */
+-			lpfc_els_flush_cmd(phba);
++	case LPFC_LINK_UNKNOWN:
++	case LPFC_WARM_START:
++	case LPFC_INIT_START:
++	case LPFC_INIT_MBX_CMDS:
++	case LPFC_LINK_DOWN:
++	case LPFC_LINK_UP:
++	case LPFC_HBA_ERROR:
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0230 Unexpected timeout, hba link "
++				"state x%x\n",
++				phba->brd_no, vport->vpi, phba->link_state);
++		clrlaerr = 1;
++		break;
+ 
+-			lpfc_els_flush_rscn(phba);
+-			lpfc_disc_flush_list(phba);
+-		}
++	case LPFC_HBA_READY:
+ 		break;
+ 	}
+ 
+ 	if (clrlaerr) {
+-		lpfc_disc_flush_list(phba);
++		lpfc_disc_flush_list(vport);
+ 		psli->ring[(psli->extra_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+ 		psli->ring[(psli->fcp_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+ 		psli->ring[(psli->next_ring)].flag &= ~LPFC_STOP_IOCB_EVENT;
+-		phba->hba_state = LPFC_HBA_READY;
++		vport->port_state = LPFC_VPORT_READY;
+ 	}
+ 
+ 	return;
+@@ -2227,37 +2738,29 @@
+  * handed off to the SLI layer.
+  */
+ void
+-lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_mbx_cmpl_fdmi_reg_login(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+-	struct lpfc_sli *psli;
+-	MAILBOX_t *mb;
+-	struct lpfc_dmabuf *mp;
+-	struct lpfc_nodelist *ndlp;
+-
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+-
+-	ndlp = (struct lpfc_nodelist *) pmb->context2;
+-	mp = (struct lpfc_dmabuf *) (pmb->context1);
++	MAILBOX_t *mb = &pmb->mb;
++	struct lpfc_dmabuf   *mp = (struct lpfc_dmabuf *) (pmb->context1);
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) pmb->context2;
++	struct lpfc_vport    *vport = pmb->vport;
+ 
+ 	pmb->context1 = NULL;
+ 
+ 	ndlp->nlp_rpi = mb->un.varWords[0];
+ 	ndlp->nlp_type |= NLP_FABRIC;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 
+-	/* Start issuing Fabric-Device Management Interface (FDMI)
+-	 * command to 0xfffffa (FDMI well known port)
+-	 */
+-	if (phba->cfg_fdmi_on == 1) {
+-		lpfc_fdmi_cmd(phba, ndlp, SLI_MGMT_DHBA);
+-	} else {
+ 		/*
+-		 * Delay issuing FDMI command if fdmi-on=2
+-		 * (supporting RPA/hostnmae)
++	 * Start issuing Fabric-Device Management Interface (FDMI) command to
++	 * 0xfffffa (FDMI well known port) or Delay issuing FDMI command if
++	 * fdmi-on=2 (supporting RPA/hostnmae)
+ 		 */
+-		mod_timer(&phba->fc_fdmitmo, jiffies + HZ * 60);
+-	}
++
++	if (phba->cfg_fdmi_on == 1)
++		lpfc_fdmi_cmd(vport, ndlp, SLI_MGMT_DHBA);
++	else
++		mod_timer(&vport->fc_fdmitmo, jiffies + HZ * 60);
+ 
+ 				/* Mailbox took a reference to the node */
+ 	lpfc_nlp_put(ndlp);
+@@ -2283,16 +2786,12 @@
+ 		      sizeof(ndlp->nlp_portname)) == 0;
+ }
+ 
+-/*
+- * Search node lists for a remote port matching filter criteria
+- * Caller needs to hold host_lock before calling this routine.
+- */
+ struct lpfc_nodelist *
+-__lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param)
++__lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
+ {
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++	list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ 		if (ndlp->nlp_state != NLP_STE_UNUSED_NODE &&
+ 		    filter(ndlp, param))
+ 			return ndlp;
+@@ -2302,68 +2801,104 @@
+ 
+ /*
+  * Search node lists for a remote port matching filter criteria
+- * This routine is used when the caller does NOT have host_lock.
++ * Caller needs to hold host_lock before calling this routine.
+  */
+ struct lpfc_nodelist *
+-lpfc_find_node(struct lpfc_hba *phba, node_filter filter, void *param)
++lpfc_find_node(struct lpfc_vport *vport, node_filter filter, void *param)
+ {
++	struct Scsi_Host     *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	ndlp = __lpfc_find_node(phba, filter, param);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
++	ndlp = __lpfc_find_node(vport, filter, param);
++	spin_unlock_irq(shost->host_lock);
+ 	return ndlp;
+ }
+ 
+ /*
+  * This routine looks up the ndlp lists for the given RPI. If rpi found it
+- * returns the node list pointer else return NULL.
++ * returns the node list element pointer else return NULL.
+  */
+ struct lpfc_nodelist *
+-__lpfc_findnode_rpi(struct lpfc_hba *phba, uint16_t rpi)
++__lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi)
+ {
+-	return __lpfc_find_node(phba, lpfc_filter_by_rpi, &rpi);
++	return __lpfc_find_node(vport, lpfc_filter_by_rpi, &rpi);
+ }
+ 
+ struct lpfc_nodelist *
+-lpfc_findnode_rpi(struct lpfc_hba * phba, uint16_t rpi)
++lpfc_findnode_rpi(struct lpfc_vport *vport, uint16_t rpi)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	ndlp = __lpfc_findnode_rpi(phba, rpi);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
++	ndlp = __lpfc_findnode_rpi(vport, rpi);
++	spin_unlock_irq(shost->host_lock);
+ 	return ndlp;
+ }
+ 
+ /*
+  * This routine looks up the ndlp lists for the given WWPN. If WWPN found it
+- * returns the node list pointer else return NULL.
++ * returns the node element list pointer else return NULL.
+  */
+ struct lpfc_nodelist *
+-lpfc_findnode_wwpn(struct lpfc_hba *phba, struct lpfc_name *wwpn)
++lpfc_findnode_wwpn(struct lpfc_vport *vport, struct lpfc_name *wwpn)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_nodelist *ndlp;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	ndlp = __lpfc_find_node(phba, lpfc_filter_by_wwpn, wwpn);
+-	spin_unlock_irq(phba->host->host_lock);
+-	return NULL;
++	spin_lock_irq(shost->host_lock);
++	ndlp = __lpfc_find_node(vport, lpfc_filter_by_wwpn, wwpn);
++	spin_unlock_irq(shost->host_lock);
++	return ndlp;
+ }
+ 
+ void
+-lpfc_nlp_init(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp, uint32_t did)
++lpfc_dev_loss_delay(unsigned long ptr)
++{
++	struct lpfc_nodelist *ndlp = (struct lpfc_nodelist *) ptr;
++	struct lpfc_vport *vport = ndlp->vport;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_work_evt  *evtp = &ndlp->dev_loss_evt;
++	unsigned long flags;
++
++	evtp = &ndlp->dev_loss_evt;
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	if (!list_empty(&evtp->evt_listp)) {
++		spin_unlock_irqrestore(&phba->hbalock, flags);
++		return;
++	}
++
++	evtp->evt_arg1  = ndlp;
++	evtp->evt       = LPFC_EVT_DEV_LOSS_DELAY;
++	list_add_tail(&evtp->evt_listp, &phba->work_list);
++	if (phba->work_wait)
++		lpfc_worker_wake_up(phba);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++	return;
++}
++
++void
++lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++	      uint32_t did)
+ {
+ 	memset(ndlp, 0, sizeof (struct lpfc_nodelist));
+ 	INIT_LIST_HEAD(&ndlp->els_retry_evt.evt_listp);
++	INIT_LIST_HEAD(&ndlp->dev_loss_evt.evt_listp);
+ 	init_timer(&ndlp->nlp_delayfunc);
+ 	ndlp->nlp_delayfunc.function = lpfc_els_retry_delay;
+ 	ndlp->nlp_delayfunc.data = (unsigned long)ndlp;
+ 	ndlp->nlp_DID = did;
+-	ndlp->nlp_phba = phba;
++	ndlp->vport = vport;
+ 	ndlp->nlp_sid = NLP_NO_SID;
+ 	INIT_LIST_HEAD(&ndlp->nlp_listp);
+ 	kref_init(&ndlp->kref);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_NODE,
++		"node init:       did:x%x",
++		ndlp->nlp_DID, 0, 0);
++
+ 	return;
+ }
+ 
+@@ -2372,8 +2907,13 @@
+ {
+ 	struct lpfc_nodelist *ndlp = container_of(kref, struct lpfc_nodelist,
+ 						  kref);
+-	lpfc_nlp_remove(ndlp->nlp_phba, ndlp);
+-	mempool_free(ndlp, ndlp->nlp_phba->nlp_mem_pool);
++
++	lpfc_debugfs_disc_trc(ndlp->vport, LPFC_DISC_TRC_NODE,
++		"node release:    did:x%x flg:x%x type:x%x",
++		ndlp->nlp_DID, ndlp->nlp_flag, ndlp->nlp_type);
++
++	lpfc_nlp_remove(ndlp->vport, ndlp);
++	mempool_free(ndlp, ndlp->vport->phba->nlp_mem_pool);
+ }
+ 
+ struct lpfc_nodelist *
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_hw.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_hw.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_hw.h	2007-12-19 15:29:23.000000000 -0500
+@@ -59,6 +59,12 @@
+ #define SLI2_IOCB_CMD_R3XTRA_ENTRIES 24
+ #define SLI2_IOCB_RSP_R3XTRA_ENTRIES 32
+ 
++#define SLI2_IOCB_CMD_SIZE	32
++#define SLI2_IOCB_RSP_SIZE	32
++#define SLI3_IOCB_CMD_SIZE	128
++#define SLI3_IOCB_RSP_SIZE	64
++
++
+ /* Common Transport structures and definitions */
+ 
+ union CtRevisionId {
+@@ -79,6 +85,9 @@
+ 	uint32_t word;
+ };
+ 
++#define FC4_FEATURE_INIT 0x2
++#define FC4_FEATURE_TARGET 0x1
++
+ struct lpfc_sli_ct_request {
+ 	/* Structure is in Big Endian format */
+ 	union CtRevisionId RevisionId;
+@@ -121,20 +130,6 @@
+ 
+ 			uint32_t rsvd[7];
+ 		} rft;
+-		struct rff {
+-			uint32_t PortId;
+-			uint8_t reserved[2];
+-#ifdef __BIG_ENDIAN_BITFIELD
+-			uint8_t feature_res:6;
+-			uint8_t feature_init:1;
+-			uint8_t feature_tgt:1;
+-#else  /*  __LITTLE_ENDIAN_BITFIELD */
+-			uint8_t feature_tgt:1;
+-			uint8_t feature_init:1;
+-			uint8_t feature_res:6;
+-#endif
+-			uint8_t type_code;     /* type=8 for FCP */
+-		} rff;
+ 		struct rnn {
+ 			uint32_t PortId;	/* For RNN_ID requests */
+ 			uint8_t wwnn[8];
+@@ -144,15 +139,42 @@
+ 			uint8_t len;
+ 			uint8_t symbname[255];
+ 		} rsnn;
++		struct rspn {	/* For RSPN_ID requests */
++			uint32_t PortId;
++			uint8_t len;
++			uint8_t symbname[255];
++		} rspn;
++		struct gff {
++			uint32_t PortId;
++		} gff;
++		struct gff_acc {
++			uint8_t fbits[128];
++		} gff_acc;
++#define FCP_TYPE_FEATURE_OFFSET 4
++		struct rff {
++			uint32_t PortId;
++			uint8_t reserved[2];
++			uint8_t fbits;
++			uint8_t type_code;     /* type=8 for FCP */
++		} rff;
+ 	} un;
+ };
+ 
+ #define  SLI_CT_REVISION        1
+-#define  GID_REQUEST_SZ         (sizeof(struct lpfc_sli_ct_request) - 260)
+-#define  RFT_REQUEST_SZ         (sizeof(struct lpfc_sli_ct_request) - 228)
+-#define  RFF_REQUEST_SZ         (sizeof(struct lpfc_sli_ct_request) - 235)
+-#define  RNN_REQUEST_SZ         (sizeof(struct lpfc_sli_ct_request) - 252)
+-#define  RSNN_REQUEST_SZ        (sizeof(struct lpfc_sli_ct_request))
++#define  GID_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct gid))
++#define  GFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct gff))
++#define  RFT_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct rft))
++#define  RFF_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct rff))
++#define  RNN_REQUEST_SZ   (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct rnn))
++#define  RSNN_REQUEST_SZ  (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct rsnn))
++#define  RSPN_REQUEST_SZ  (offsetof(struct lpfc_sli_ct_request, un) + \
++			   sizeof(struct rspn))
+ 
+ /*
+  * FsType Definitions
+@@ -227,6 +249,7 @@
+ #define  SLI_CTNS_GFT_ID      0x0117
+ #define  SLI_CTNS_GSPN_ID     0x0118
+ #define  SLI_CTNS_GPT_ID      0x011A
++#define  SLI_CTNS_GFF_ID      0x011F
+ #define  SLI_CTNS_GID_PN      0x0121
+ #define  SLI_CTNS_GID_NN      0x0131
+ #define  SLI_CTNS_GIP_NN      0x0135
+@@ -240,9 +263,9 @@
+ #define  SLI_CTNS_RNN_ID      0x0213
+ #define  SLI_CTNS_RCS_ID      0x0214
+ #define  SLI_CTNS_RFT_ID      0x0217
+-#define  SLI_CTNS_RFF_ID      0x021F
+ #define  SLI_CTNS_RSPN_ID     0x0218
+ #define  SLI_CTNS_RPT_ID      0x021A
++#define  SLI_CTNS_RFF_ID      0x021F
+ #define  SLI_CTNS_RIP_NN      0x0235
+ #define  SLI_CTNS_RIPA_NN     0x0236
+ #define  SLI_CTNS_RSNN_NN     0x0239
+@@ -311,9 +334,9 @@
+ 	uint8_t bbCreditlsb;	/* FC Word 0, byte 3 */
+ 
+ #ifdef __BIG_ENDIAN_BITFIELD
+-	uint16_t increasingOffset:1;	/* FC Word 1, bit 31 */
++	uint16_t request_multiple_Nport:1;	/* FC Word 1, bit 31 */
+ 	uint16_t randomOffset:1;	/* FC Word 1, bit 30 */
+-	uint16_t word1Reserved2:1;	/* FC Word 1, bit 29 */
++	uint16_t response_multiple_NPort:1;	/* FC Word 1, bit 29 */
+ 	uint16_t fPort:1;	/* FC Word 1, bit 28 */
+ 	uint16_t altBbCredit:1;	/* FC Word 1, bit 27 */
+ 	uint16_t edtovResolution:1;	/* FC Word 1, bit 26 */
+@@ -332,9 +355,9 @@
+ 	uint16_t edtovResolution:1;	/* FC Word 1, bit 26 */
+ 	uint16_t altBbCredit:1;	/* FC Word 1, bit 27 */
+ 	uint16_t fPort:1;	/* FC Word 1, bit 28 */
+-	uint16_t word1Reserved2:1;	/* FC Word 1, bit 29 */
++	uint16_t response_multiple_NPort:1;	/* FC Word 1, bit 29 */
+ 	uint16_t randomOffset:1;	/* FC Word 1, bit 30 */
+-	uint16_t increasingOffset:1;	/* FC Word 1, bit 31 */
++	uint16_t request_multiple_Nport:1;	/* FC Word 1, bit 31 */
+ 
+ 	uint16_t payloadlength:1;	/* FC Word 1, bit 16 */
+ 	uint16_t contIncSeqCnt:1;	/* FC Word 1, bit 17 */
+@@ -1255,7 +1278,9 @@
+ #define MBX_KILL_BOARD      0x24
+ #define MBX_CONFIG_FARP     0x25
+ #define MBX_BEACON          0x2A
++#define MBX_HEARTBEAT       0x31
+ 
++#define MBX_CONFIG_HBQ	    0x7C
+ #define MBX_LOAD_AREA       0x81
+ #define MBX_RUN_BIU_DIAG64  0x84
+ #define MBX_CONFIG_PORT     0x88
+@@ -1263,6 +1288,10 @@
+ #define MBX_READ_RPI64      0x8F
+ #define MBX_REG_LOGIN64     0x93
+ #define MBX_READ_LA64       0x95
++#define MBX_REG_VPI	    0x96
++#define MBX_UNREG_VPI	    0x97
++#define MBX_REG_VNPID	    0x96
++#define MBX_UNREG_VNPID	    0x97
+ 
+ #define MBX_FLASH_WR_ULA    0x98
+ #define MBX_SET_DEBUG       0x99
+@@ -1335,6 +1364,10 @@
+ #define CMD_FCP_TRECEIVE64_CX   0xA1
+ #define CMD_FCP_TRSP64_CX       0xA3
+ 
++#define CMD_IOCB_RCV_SEQ64_CX	0xB5
++#define CMD_IOCB_RCV_ELS64_CX	0xB7
++#define CMD_IOCB_RCV_CONT64_CX	0xBB
++
+ #define CMD_GEN_REQUEST64_CR    0xC2
+ #define CMD_GEN_REQUEST64_CX    0xC3
+ 
+@@ -1561,6 +1594,7 @@
+ #define FLAGS_TOPOLOGY_MODE_PT_PT    0x02 /* Attempt pt-pt only */
+ #define FLAGS_TOPOLOGY_MODE_LOOP     0x04 /* Attempt loop only */
+ #define FLAGS_TOPOLOGY_MODE_PT_LOOP  0x06 /* Attempt pt-pt then loop */
++#define	FLAGS_UNREG_LOGIN_ALL	     0x08 /* UNREG_LOGIN all on link down */
+ #define FLAGS_LIRP_LILP              0x80 /* LIRP / LILP is disabled */
+ 
+ #define FLAGS_TOPOLOGY_FAILOVER      0x0400	/* Bit 10 */
+@@ -1744,8 +1778,6 @@
+ #define LMT_4Gb       0x040
+ #define LMT_8Gb       0x080
+ #define LMT_10Gb      0x100
+-
+-
+ 	uint32_t rsvd2;
+ 	uint32_t rsvd3;
+ 	uint32_t max_xri;
+@@ -1754,7 +1786,10 @@
+ 	uint32_t avail_xri;
+ 	uint32_t avail_iocb;
+ 	uint32_t avail_rpi;
+-	uint32_t default_rpi;
++	uint32_t max_vpi;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++	uint32_t avail_vpi;
+ } READ_CONFIG_VAR;
+ 
+ /* Structure for MB Command READ_RCONFIG (12) */
+@@ -1818,6 +1853,13 @@
+ 				      structure */
+ 		struct ulp_bde64 sp64;
+ 	} un;
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint16_t rsvd3;
++	uint16_t vpi;
++#else	/*  __LITTLE_ENDIAN_BITFIELD */
++	uint16_t vpi;
++	uint16_t rsvd3;
++#endif
+ } READ_SPARM_VAR;
+ 
+ /* Structure for MB Command READ_STATUS (14) */
+@@ -1918,11 +1960,17 @@
+ #ifdef __BIG_ENDIAN_BITFIELD
+ 	uint32_t cv:1;
+ 	uint32_t rr:1;
+-	uint32_t rsvd1:29;
++	uint32_t rsvd2:2;
++	uint32_t v3req:1;
++	uint32_t v3rsp:1;
++	uint32_t rsvd1:25;
+ 	uint32_t rv:1;
+ #else	/*  __LITTLE_ENDIAN_BITFIELD */
+ 	uint32_t rv:1;
+-	uint32_t rsvd1:29;
++	uint32_t rsvd1:25;
++	uint32_t v3rsp:1;
++	uint32_t v3req:1;
++	uint32_t rsvd2:2;
+ 	uint32_t rr:1;
+ 	uint32_t cv:1;
+ #endif
+@@ -1972,8 +2020,8 @@
+ 	uint8_t sli1FwName[16];
+ 	uint32_t sli2FwRev;
+ 	uint8_t sli2FwName[16];
+-	uint32_t rsvd2;
+-	uint32_t RandomData[7];
++	uint32_t sli3Feat;
++	uint32_t RandomData[6];
+ } READ_REV_VAR;
+ 
+ /* Structure for MB Command READ_LINK_STAT (18) */
+@@ -2013,6 +2061,14 @@
+ 		struct ulp_bde64 sp64;
+ 	} un;
+ 
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint16_t rsvd6;
++	uint16_t vpi;
++#else /* __LITTLE_ENDIAN_BITFIELD */
++	uint16_t vpi;
++	uint16_t rsvd6;
++#endif
++
+ } REG_LOGIN_VAR;
+ 
+ /* Word 30 contents for REG_LOGIN */
+@@ -2037,16 +2093,78 @@
+ #ifdef __BIG_ENDIAN_BITFIELD
+ 	uint16_t rsvd1;
+ 	uint16_t rpi;
++	uint32_t rsvd2;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++	uint16_t rsvd6;
++	uint16_t vpi;
+ #else	/*  __LITTLE_ENDIAN_BITFIELD */
+ 	uint16_t rpi;
+ 	uint16_t rsvd1;
++	uint32_t rsvd2;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++	uint16_t vpi;
++	uint16_t rsvd6;
+ #endif
+ } UNREG_LOGIN_VAR;
+ 
++/* Structure for MB Command REG_VPI (0x96) */
++typedef struct {
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd1;
++	uint32_t rsvd2:8;
++	uint32_t sid:24;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++	uint16_t rsvd6;
++	uint16_t vpi;
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t rsvd1;
++	uint32_t sid:24;
++	uint32_t rsvd2:8;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++	uint16_t vpi;
++	uint16_t rsvd6;
++#endif
++} REG_VPI_VAR;
++
++/* Structure for MB Command UNREG_VPI (0x97) */
++typedef struct {
++	uint32_t rsvd1;
++	uint32_t rsvd2;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint16_t rsvd6;
++	uint16_t vpi;
++#else	/*  __LITTLE_ENDIAN */
++	uint16_t vpi;
++	uint16_t rsvd6;
++#endif
++} UNREG_VPI_VAR;
++
+ /* Structure for MB Command UNREG_D_ID (0x23) */
+ 
+ typedef struct {
+ 	uint32_t did;
++	uint32_t rsvd2;
++	uint32_t rsvd3;
++	uint32_t rsvd4;
++	uint32_t rsvd5;
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint16_t rsvd6;
++	uint16_t vpi;
++#else
++	uint16_t vpi;
++	uint16_t rsvd6;
++#endif
+ } UNREG_D_ID_VAR;
+ 
+ /* Structure for MB Command READ_LA (21) */
+@@ -2178,13 +2296,240 @@
+ #define  DMP_RSP_OFFSET          0x14   /* word 5 contains first word of rsp */
+ #define  DMP_RSP_SIZE            0x6C   /* maximum of 27 words of rsp data */
+ 
+-/* Structure for MB Command CONFIG_PORT (0x88) */
++struct hbq_mask {
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint8_t tmatch;
++	uint8_t tmask;
++	uint8_t rctlmatch;
++	uint8_t rctlmask;
++#else	/*  __LITTLE_ENDIAN */
++	uint8_t rctlmask;
++	uint8_t rctlmatch;
++	uint8_t tmask;
++	uint8_t tmatch;
++#endif
++};
+ 
++
++/* Structure for MB Command CONFIG_HBQ (7c) */
++
++struct config_hbq_var {
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd1      :7;
++	uint32_t recvNotify :1;     /* Receive Notification */
++	uint32_t numMask    :8;     /* # Mask Entries       */
++	uint32_t profile    :8;     /* Selection Profile    */
++	uint32_t rsvd2      :8;
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t rsvd2      :8;
++	uint32_t profile    :8;     /* Selection Profile    */
++	uint32_t numMask    :8;     /* # Mask Entries       */
++	uint32_t recvNotify :1;     /* Receive Notification */
++	uint32_t rsvd1      :7;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t hbqId      :16;
++	uint32_t rsvd3      :12;
++	uint32_t ringMask   :4;
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t ringMask   :4;
++	uint32_t rsvd3      :12;
++	uint32_t hbqId      :16;
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t entry_count :16;
++	uint32_t rsvd4        :8;
++	uint32_t headerLen    :8;
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t headerLen    :8;
++	uint32_t rsvd4        :8;
++	uint32_t entry_count :16;
++#endif
++
++	uint32_t hbqaddrLow;
++	uint32_t hbqaddrHigh;
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd5      :31;
++	uint32_t logEntry   :1;
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t logEntry   :1;
++	uint32_t rsvd5      :31;
++#endif
++
++	uint32_t rsvd6;    /* w7 */
++	uint32_t rsvd7;    /* w8 */
++	uint32_t rsvd8;    /* w9 */
++
++	struct hbq_mask hbqMasks[6];
++
++
++	union {
++		uint32_t allprofiles[12];
++
++		struct {
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	seqlenoff	:16;
++				uint32_t	maxlen		:16;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	maxlen		:16;
++				uint32_t	seqlenoff	:16;
++			#endif
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	rsvd1		:28;
++				uint32_t	seqlenbcnt	:4;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	seqlenbcnt	:4;
++				uint32_t	rsvd1		:28;
++			#endif
++			uint32_t rsvd[10];
++		} profile2;
++
++		struct {
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	seqlenoff	:16;
++				uint32_t	maxlen		:16;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	maxlen		:16;
++				uint32_t	seqlenoff	:16;
++			#endif
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	cmdcodeoff	:28;
++				uint32_t	rsvd1		:12;
++				uint32_t	seqlenbcnt	:4;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	seqlenbcnt	:4;
++				uint32_t	rsvd1		:12;
++				uint32_t	cmdcodeoff	:28;
++			#endif
++			uint32_t cmdmatch[8];
++
++			uint32_t rsvd[2];
++		} profile3;
++
++		struct {
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	seqlenoff	:16;
++				uint32_t	maxlen		:16;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	maxlen		:16;
++				uint32_t	seqlenoff	:16;
++			#endif
++			#ifdef __BIG_ENDIAN_BITFIELD
++				uint32_t	cmdcodeoff	:28;
++				uint32_t	rsvd1		:12;
++				uint32_t	seqlenbcnt	:4;
++			#else	/*  __LITTLE_ENDIAN */
++				uint32_t	seqlenbcnt	:4;
++				uint32_t	rsvd1		:12;
++				uint32_t	cmdcodeoff	:28;
++			#endif
++			uint32_t cmdmatch[8];
++
++			uint32_t rsvd[2];
++		} profile5;
++
++	} profiles;
++
++};
++
++
++
++/* Structure for MB Command CONFIG_PORT (0x88) */
+ typedef struct {
+-	uint32_t pcbLen;
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t cBE       :  1;
++	uint32_t cET       :  1;
++	uint32_t cHpcb     :  1;
++	uint32_t cMA       :  1;
++	uint32_t sli_mode  :  4;
++	uint32_t pcbLen    : 24;       /* bit 23:0  of memory based port
++					* config block */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t pcbLen    : 24;       /* bit 23:0  of memory based port
++					* config block */
++	uint32_t sli_mode  :  4;
++	uint32_t cMA       :  1;
++	uint32_t cHpcb     :  1;
++	uint32_t cET       :  1;
++	uint32_t cBE       :  1;
++#endif
++
+ 	uint32_t pcbLow;       /* bit 31:0  of memory based port config block */
+ 	uint32_t pcbHigh;      /* bit 63:32 of memory based port config block */
+-	uint32_t hbainit[5];
++	uint32_t hbainit[6];
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd      : 24;  /* Reserved                             */
++	uint32_t cmv	   :  1;  /* Configure Max VPIs                   */
++	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
++	uint32_t csah      :  1;  /* Configure Synchronous Abort Handling */
++	uint32_t chbs      :  1;  /* Cofigure Host Backing store          */
++	uint32_t cinb      :  1;  /* Enable Interrupt Notification Block  */
++	uint32_t cerbm	   :  1;  /* Configure Enhanced Receive Buf Mgmt  */
++	uint32_t cmx	   :  1;  /* Configure Max XRIs                   */
++	uint32_t cmr	   :  1;  /* Configure Max RPIs                   */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t cmr	   :  1;  /* Configure Max RPIs                   */
++	uint32_t cmx	   :  1;  /* Configure Max XRIs                   */
++	uint32_t cerbm	   :  1;  /* Configure Enhanced Receive Buf Mgmt  */
++	uint32_t cinb      :  1;  /* Enable Interrupt Notification Block  */
++	uint32_t chbs      :  1;  /* Cofigure Host Backing store          */
++	uint32_t csah      :  1;  /* Configure Synchronous Abort Handling */
++	uint32_t ccrp      :  1;  /* Config Command Ring Polling          */
++	uint32_t cmv	   :  1;  /* Configure Max VPIs                   */
++	uint32_t rsvd      : 24;  /* Reserved                             */
++#endif
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd2     : 24;  /* Reserved                             */
++	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
++	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
++	uint32_t gsah	   :  1;  /* Grant Synchronous Abort Handling     */
++	uint32_t ghbs	   :  1;  /* Grant Host Backing Store             */
++	uint32_t ginb	   :  1;  /* Grant Interrupt Notification Block   */
++	uint32_t gerbm	   :  1;  /* Grant ERBM Request                   */
++	uint32_t gmx	   :  1;  /* Grant Max XRIs                       */
++	uint32_t gmr	   :  1;  /* Grant Max RPIs                       */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t gmr	   :  1;  /* Grant Max RPIs                       */
++	uint32_t gmx	   :  1;  /* Grant Max XRIs                       */
++	uint32_t gerbm	   :  1;  /* Grant ERBM Request                   */
++	uint32_t ginb	   :  1;  /* Grant Interrupt Notification Block   */
++	uint32_t ghbs	   :  1;  /* Grant Host Backing Store             */
++	uint32_t gsah	   :  1;  /* Grant Synchronous Abort Handling     */
++	uint32_t gcrp	   :  1;  /* Grant Command Ring Polling           */
++	uint32_t gmv	   :  1;  /* Grant Max VPIs                       */
++	uint32_t rsvd2     : 24;  /* Reserved                             */
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t max_rpi   : 16;  /* Max RPIs Port should configure       */
++	uint32_t max_xri   : 16;  /* Max XRIs Port should configure       */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t max_xri   : 16;  /* Max XRIs Port should configure       */
++	uint32_t max_rpi   : 16;  /* Max RPIs Port should configure       */
++#endif
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
++	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t rsvd3     : 16;  /* Max HBQs Host expect to configure    */
++	uint32_t max_hbq   : 16;  /* Max HBQs Host expect to configure    */
++#endif
++
++	uint32_t rsvd4;           /* Reserved                             */
++
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint32_t rsvd5      : 16;  /* Reserved                             */
++	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
++#else	/*  __LITTLE_ENDIAN */
++	uint32_t max_vpi    : 16;  /* Max number of virt N-Ports           */
++	uint32_t rsvd5      : 16;  /* Reserved                             */
++#endif
++
+ } CONFIG_PORT_VAR;
+ 
+ /* SLI-2 Port Control Block */
+@@ -2262,7 +2607,9 @@
+ #define MAILBOX_CMD_SIZE	(MAILBOX_CMD_WSIZE * sizeof(uint32_t))
+ 
+ typedef union {
+-	uint32_t varWords[MAILBOX_CMD_WSIZE - 1];
++	uint32_t varWords[MAILBOX_CMD_WSIZE - 1]; /* first word is type/
++						    * feature/max ring number
++						    */
+ 	LOAD_SM_VAR varLdSM;	/* cmd =  1 (LOAD_SM)        */
+ 	READ_NV_VAR varRDnvp;	/* cmd =  2 (READ_NVPARMS)   */
+ 	WRITE_NV_VAR varWTnvp;	/* cmd =  3 (WRITE_NVPARMS)  */
+@@ -2287,8 +2634,13 @@
+ 	CLEAR_LA_VAR varClearLA;	/* cmd = 22 (CLEAR_LA)       */
+ 	DUMP_VAR varDmp;	/* Warm Start DUMP mbx cmd   */
+ 	UNREG_D_ID_VAR varUnregDID; /* cmd = 0x23 (UNREG_D_ID)   */
+-	CONFIG_FARP_VAR varCfgFarp; /* cmd = 0x25 (CONFIG_FARP)  NEW_FEATURE */
++	CONFIG_FARP_VAR varCfgFarp;	/* cmd = 0x25 (CONFIG_FARP)
++					 * NEW_FEATURE
++					 */
++	struct config_hbq_var varCfgHbq;/* cmd = 0x7c (CONFIG_HBQ)  */
+ 	CONFIG_PORT_VAR varCfgPort; /* cmd = 0x88 (CONFIG_PORT)  */
++	REG_VPI_VAR varRegVpi;		/* cmd = 0x96 (REG_VPI) */
++	UNREG_VPI_VAR varUnregVpi;	/* cmd = 0x97 (UNREG_VPI) */
+ } MAILVARIANTS;
+ 
+ /*
+@@ -2305,14 +2657,27 @@
+ 	__le32 rspPutInx;
+ };
+ 
+-typedef struct _SLI2_DESC {
+-	struct lpfc_hgp host[MAX_RINGS];
++struct sli2_desc {
+ 	uint32_t unused1[16];
++	struct lpfc_hgp host[MAX_RINGS];
+ 	struct lpfc_pgp port[MAX_RINGS];
+-} SLI2_DESC;
++};
++
++struct sli3_desc {
++	struct lpfc_hgp host[MAX_RINGS];
++	uint32_t reserved[8];
++	uint32_t hbq_put[16];
++};
++
++struct sli3_pgp {
++	struct lpfc_pgp port[MAX_RINGS];
++	uint32_t hbq_get[16];
++};
+ 
+ typedef union {
+-	SLI2_DESC s2;
++	struct sli2_desc s2;
++	struct sli3_desc s3;
++	struct sli3_pgp  s3_pgp;
+ } SLI_VAR;
+ 
+ typedef struct {
+@@ -2618,6 +2983,25 @@
+ 	uint32_t fcpt_Length;	/* transfer ready for IWRITE */
+ } FCPT_FIELDS64;
+ 
++/* IOCB Command template for CMD_IOCB_RCV_ELS64_CX (0xB7)
++   or CMD_IOCB_RCV_SEQ64_CX (0xB5) */
++
++struct rcv_sli3 {
++	uint32_t word8Rsvd;
++#ifdef __BIG_ENDIAN_BITFIELD
++	uint16_t vpi;
++	uint16_t word9Rsvd;
++#else  /*  __LITTLE_ENDIAN */
++	uint16_t word9Rsvd;
++	uint16_t vpi;
++#endif
++	uint32_t word10Rsvd;
++	uint32_t acc_len;      /* accumulated length */
++	struct ulp_bde64 bde2;
++};
++
++
++
+ typedef struct _IOCB {	/* IOCB structure */
+ 	union {
+ 		GENERIC_RSP grsp;	/* Generic response */
+@@ -2633,7 +3017,7 @@
+ 		/* SLI-2 structures */
+ 
+ 		struct ulp_bde64 cont64[2];	/* up to 2 64 bit continuation
+-					   bde_64s */
++					      * bde_64s */
+ 		ELS_REQUEST64 elsreq64;	/* ELS_REQUEST template */
+ 		GEN_REQUEST64 genreq64;	/* GEN_REQUEST template */
+ 		RCV_ELS_REQ64 rcvels64;	/* RCV_ELS_REQ template */
+@@ -2695,9 +3079,20 @@
+ 	uint32_t ulpTimeout:8;
+ #endif
+ 
++	union {
++		struct rcv_sli3 rcvsli3; /* words 8 - 15 */
++		uint32_t sli3Words[24]; /* 96 extra bytes for SLI-3 */
++	} unsli3;
++
++#define ulpCt_h ulpXS
++#define ulpCt_l ulpFCP2Rcvy
++
++#define IOCB_FCP	   1	/* IOCB is used for FCP ELS cmds-ulpRsvByte */
++#define IOCB_IP		   2	/* IOCB is used for IP ELS cmds */
+ #define PARM_UNUSED        0	/* PU field (Word 4) not used */
+ #define PARM_REL_OFF       1	/* PU field (Word 4) = R. O. */
+ #define PARM_READ_CHECK    2	/* PU field (Word 4) = Data Transfer Length */
++#define PARM_NPIV_DID	   3
+ #define CLASS1             0	/* Class 1 */
+ #define CLASS2             1	/* Class 2 */
+ #define CLASS3             2	/* Class 3 */
+@@ -2718,39 +3113,51 @@
+ #define IOSTAT_RSVD2           0xC
+ #define IOSTAT_RSVD3           0xD
+ #define IOSTAT_RSVD4           0xE
+-#define IOSTAT_RSVD5           0xF
++#define IOSTAT_NEED_BUFFER     0xF
+ #define IOSTAT_DRIVER_REJECT   0x10   /* ulpStatus  - Driver defined */
+ #define IOSTAT_DEFAULT         0xF    /* Same as rsvd5 for now */
+ #define IOSTAT_CNT             0x11
+ 
+ } IOCB_t;
+ 
++/* Structure used for a single HBQ entry */
++struct lpfc_hbq_entry {
++	struct ulp_bde64 bde;
++	uint32_t buffer_tag;
++};
++
+ 
+ #define SLI1_SLIM_SIZE   (4 * 1024)
+ 
+ /* Up to 498 IOCBs will fit into 16k
+  * 256 (MAILBOX_t) + 140 (PCB_t) + ( 32 (IOCB_t) * 498 ) = < 16384
+  */
+-#define SLI2_SLIM_SIZE   (16 * 1024)
++#define SLI2_SLIM_SIZE   (64 * 1024)
+ 
+ /* Maximum IOCBs that will fit in SLI2 slim */
+ #define MAX_SLI2_IOCB    498
++#define MAX_SLIM_IOCB_SIZE (SLI2_SLIM_SIZE - \
++			    (sizeof(MAILBOX_t) + sizeof(PCB_t)))
++
++/* HBQ entries are 4 words each = 4k */
++#define LPFC_TOTAL_HBQ_SIZE (sizeof(struct lpfc_hbq_entry) *  \
++			     lpfc_sli_hbq_count())
+ 
+ struct lpfc_sli2_slim {
+ 	MAILBOX_t mbx;
+ 	PCB_t pcb;
+-	IOCB_t IOCBs[MAX_SLI2_IOCB];
++	IOCB_t IOCBs[MAX_SLIM_IOCB_SIZE];
+ };
+ 
+-/*******************************************************************
+-This macro check PCI device to allow special handling for LC HBAs.
+-
+-Parameters:
+-device : struct pci_dev 's device field
+-
+-return 1 => TRUE
+-       0 => FALSE
+- *******************************************************************/
++/*
++ * This function checks PCI device to allow special handling for LC HBAs.
++ *
++ * Parameters:
++ * device : struct pci_dev 's device field
++ *
++ * return 1 => TRUE
++ *        0 => FALSE
++ */
+ static inline int
+ lpfc_is_LC_HBA(unsigned short device)
+ {
+@@ -2766,3 +3173,16 @@
+ 	else
+ 		return 0;
+ }
++
++/*
++ * Determine if an IOCB failed because of a link event or firmware reset.
++ */
++
++static inline int
++lpfc_error_lost_link(IOCB_t *iocbp)
++{
++	return (iocbp->ulpStatus == IOSTAT_LOCAL_REJECT &&
++		(iocbp->un.ulpWord[4] == IOERR_SLI_ABORTED ||
++		 iocbp->un.ulpWord[4] == IOERR_LINK_DOWN ||
++		 iocbp->un.ulpWord[4] == IOERR_SLI_DOWN));
++}
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_init.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_init.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_init.c	2007-12-19 15:29:23.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/kthread.h>
+ #include <linux/pci.h>
+ #include <linux/spinlock.h>
++#include <linux/ctype.h>
+ 
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_device.h>
+@@ -40,15 +41,20 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+ #include "lpfc_version.h"
++#include "lpfc_vport.h"
+ 
+ static int lpfc_parse_vpd(struct lpfc_hba *, uint8_t *, int);
+ static void lpfc_get_hba_model_desc(struct lpfc_hba *, uint8_t *, uint8_t *);
+ static int lpfc_post_rcv_buf(struct lpfc_hba *);
+ 
+ static struct scsi_transport_template *lpfc_transport_template = NULL;
++static struct scsi_transport_template *lpfc_vport_transport_template = NULL;
+ static DEFINE_IDR(lpfc_hba_index);
+ 
++
++
+ /************************************************************************/
+ /*                                                                      */
+ /*    lpfc_config_port_prep                                             */
+@@ -61,7 +67,7 @@
+ /*                                                                      */
+ /************************************************************************/
+ int
+-lpfc_config_port_prep(struct lpfc_hba * phba)
++lpfc_config_port_prep(struct lpfc_hba *phba)
+ {
+ 	lpfc_vpd_t *vp = &phba->vpd;
+ 	int i = 0, rc;
+@@ -75,12 +81,12 @@
+ 
+ 	pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 	if (!pmb) {
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		return -ENOMEM;
+ 	}
+ 
+ 	mb = &pmb->mb;
+-	phba->hba_state = LPFC_INIT_MBX_CMDS;
++	phba->link_state = LPFC_INIT_MBX_CMDS;
+ 
+ 	if (lpfc_is_LC_HBA(phba->pcidev->device)) {
+ 		if (init_key) {
+@@ -100,9 +106,7 @@
+ 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+ 
+ 		if (rc != MBX_SUCCESS) {
+-			lpfc_printf_log(phba,
+-					KERN_ERR,
+-					LOG_MBOX,
++			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX,
+ 					"%d:0324 Config Port initialization "
+ 					"error, mbxCmd x%x READ_NVPARM, "
+ 					"mbxStatus x%x\n",
+@@ -112,16 +116,18 @@
+ 			return -ERESTART;
+ 		}
+ 		memcpy(phba->wwnn, (char *)mb->un.varRDnvp.nodename,
+-		       sizeof (mb->un.varRDnvp.nodename));
++		       sizeof(phba->wwnn));
++		memcpy(phba->wwpn, (char *)mb->un.varRDnvp.portname,
++		       sizeof(phba->wwpn));
+ 	}
+ 
++	phba->sli3_options = 0x0;
++
+ 	/* Setup and issue mailbox READ REV command */
+ 	lpfc_read_rev(phba, pmb);
+ 	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+ 	if (rc != MBX_SUCCESS) {
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0439 Adapter failed to init, mbxCmd x%x "
+ 				"READ_REV, mbxStatus x%x\n",
+ 				phba->brd_no,
+@@ -130,6 +136,7 @@
+ 		return -ERESTART;
+ 	}
+ 
++
+ 	/*
+ 	 * The value of rr must be 1 since the driver set the cv field to 1.
+ 	 * This setting requires the FW to set all revision fields.
+@@ -144,8 +151,12 @@
+ 		return -ERESTART;
+ 	}
+ 
++	if (phba->sli_rev == 3 && !mb->un.varRdRev.v3rsp)
++		return -EINVAL;
++
+ 	/* Save information as VPD data */
+ 	vp->rev.rBit = 1;
++	memcpy(&vp->sli3Feat, &mb->un.varRdRev.sli3Feat, sizeof(uint32_t));
+ 	vp->rev.sli1FwRev = mb->un.varRdRev.sli1FwRev;
+ 	memcpy(vp->rev.sli1FwName, (char*) mb->un.varRdRev.sli1FwName, 16);
+ 	vp->rev.sli2FwRev = mb->un.varRdRev.sli2FwRev;
+@@ -161,6 +172,13 @@
+ 	vp->rev.postKernRev = mb->un.varRdRev.postKernRev;
+ 	vp->rev.opFwRev = mb->un.varRdRev.opFwRev;
+ 
++	/* If the sli feature level is less then 9, we must
++	 * tear down all RPIs and VPIs on link down if NPIV
++	 * is enabled.
++	 */
++	if (vp->rev.feaLevelHigh < 9)
++		phba->sli3_options |= LPFC_SLI3_VPORT_TEARDOWN;
++
+ 	if (lpfc_is_LC_HBA(phba->pcidev->device))
+ 		memcpy(phba->RandomData, (char *)&mb->un.varWords[24],
+ 						sizeof (phba->RandomData));
+@@ -212,48 +230,34 @@
+ /*                                                                      */
+ /************************************************************************/
+ int
+-lpfc_config_port_post(struct lpfc_hba * phba)
++lpfc_config_port_post(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
+ 	LPFC_MBOXQ_t *pmb;
+ 	MAILBOX_t *mb;
+ 	struct lpfc_dmabuf *mp;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	uint32_t status, timeout;
+-	int i, j, rc;
++	int i, j;
++	int rc;
+ 
+ 	pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 	if (!pmb) {
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		return -ENOMEM;
+ 	}
+ 	mb = &pmb->mb;
+ 
+-	lpfc_config_link(phba, pmb);
+-	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+-	if (rc != MBX_SUCCESS) {
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
+-				"%d:0447 Adapter failed init, mbxCmd x%x "
+-				"CONFIG_LINK mbxStatus x%x\n",
+-				phba->brd_no,
+-				mb->mbxCommand, mb->mbxStatus);
+-		phba->hba_state = LPFC_HBA_ERROR;
+-		mempool_free( pmb, phba->mbox_mem_pool);
+-		return -EIO;
+-	}
+-
+ 	/* Get login parameters for NID.  */
+-	lpfc_read_sparam(phba, pmb);
++	lpfc_read_sparam(phba, pmb, 0);
++	pmb->vport = vport;
+ 	if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0448 Adapter failed init, mbxCmd x%x "
+ 				"READ_SPARM mbxStatus x%x\n",
+ 				phba->brd_no,
+ 				mb->mbxCommand, mb->mbxStatus);
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		mp = (struct lpfc_dmabuf *) pmb->context1;
+ 		mempool_free( pmb, phba->mbox_mem_pool);
+ 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+@@ -263,25 +267,27 @@
+ 
+ 	mp = (struct lpfc_dmabuf *) pmb->context1;
+ 
+-	memcpy(&phba->fc_sparam, mp->virt, sizeof (struct serv_parm));
++	memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
+ 	lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 	kfree(mp);
+ 	pmb->context1 = NULL;
+ 
+ 	if (phba->cfg_soft_wwnn)
+-		u64_to_wwn(phba->cfg_soft_wwnn, phba->fc_sparam.nodeName.u.wwn);
++		u64_to_wwn(phba->cfg_soft_wwnn,
++			   vport->fc_sparam.nodeName.u.wwn);
+ 	if (phba->cfg_soft_wwpn)
+-		u64_to_wwn(phba->cfg_soft_wwpn, phba->fc_sparam.portName.u.wwn);
+-	memcpy(&phba->fc_nodename, &phba->fc_sparam.nodeName,
++		u64_to_wwn(phba->cfg_soft_wwpn,
++			   vport->fc_sparam.portName.u.wwn);
++	memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
+ 	       sizeof (struct lpfc_name));
+-	memcpy(&phba->fc_portname, &phba->fc_sparam.portName,
++	memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
+ 	       sizeof (struct lpfc_name));
+ 	/* If no serial number in VPD data, use low 6 bytes of WWNN */
+ 	/* This should be consolidated into parse_vpd ? - mr */
+ 	if (phba->SerialNumber[0] == 0) {
+ 		uint8_t *outptr;
+ 
+-		outptr = &phba->fc_nodename.u.s.IEEE[0];
++		outptr = &vport->fc_nodename.u.s.IEEE[0];
+ 		for (i = 0; i < 12; i++) {
+ 			status = *outptr++;
+ 			j = ((status & 0xf0) >> 4);
+@@ -303,15 +309,14 @@
+ 	}
+ 
+ 	lpfc_read_config(phba, pmb);
++	pmb->vport = vport;
+ 	if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0453 Adapter failed to init, mbxCmd x%x "
+ 				"READ_CONFIG, mbxStatus x%x\n",
+ 				phba->brd_no,
+ 				mb->mbxCommand, mb->mbxStatus);
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		mempool_free( pmb, phba->mbox_mem_pool);
+ 		return -EIO;
+ 	}
+@@ -338,9 +343,7 @@
+ 	    || ((phba->cfg_link_speed == LINK_SPEED_10G)
+ 		&& !(phba->lmt & LMT_10Gb))) {
+ 		/* Reset link speed to auto */
+-		lpfc_printf_log(phba,
+-			KERN_WARNING,
+-			LOG_LINK_EVENT,
++		lpfc_printf_log(phba, KERN_WARNING, LOG_LINK_EVENT,
+ 			"%d:1302 Invalid speed for this board: "
+ 			"Reset link speed to auto: x%x\n",
+ 			phba->brd_no,
+@@ -348,7 +351,7 @@
+ 			phba->cfg_link_speed = LINK_SPEED_AUTO;
+ 	}
+ 
+-	phba->hba_state = LPFC_LINK_DOWN;
++	phba->link_state = LPFC_LINK_DOWN;
+ 
+ 	/* Only process IOCBs on ring 0 till hba_state is READY */
+ 	if (psli->ring[psli->extra_ring].cmdringaddr)
+@@ -359,10 +362,11 @@
+ 		psli->ring[psli->next_ring].flag |= LPFC_STOP_IOCB_EVENT;
+ 
+ 	/* Post receive buffers for desired rings */
++	if (phba->sli_rev != 3)
+ 	lpfc_post_rcv_buf(phba);
+ 
+ 	/* Enable appropriate host interrupts */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	status = readl(phba->HCregaddr);
+ 	status |= HC_MBINT_ENA | HC_ERINT_ENA | HC_LAINT_ENA;
+ 	if (psli->num_rings > 0)
+@@ -380,22 +384,24 @@
+ 
+ 	writel(status, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	/*
+ 	 * Setup the ring 0 (els)  timeout handler
+ 	 */
+ 	timeout = phba->fc_ratov << 1;
+-	mod_timer(&phba->els_tmofunc, jiffies + HZ * timeout);
++	mod_timer(&vport->els_tmofunc, jiffies + HZ * timeout);
++	mod_timer(&phba->hb_tmofunc, jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++	phba->hb_outstanding = 0;
++	phba->last_completion_time = jiffies;
+ 
+ 	lpfc_init_link(phba, pmb, phba->cfg_topology, phba->cfg_link_speed);
+ 	pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++	pmb->vport = vport;
+ 	rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ 	lpfc_set_loopback_flag(phba);
+ 	if (rc != MBX_SUCCESS) {
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0454 Adapter failed to init, mbxCmd x%x "
+ 				"INIT_LINK, mbxStatus x%x\n",
+ 				phba->brd_no,
+@@ -408,7 +414,7 @@
+ 		writel(0xffffffff, phba->HAregaddr);
+ 		readl(phba->HAregaddr); /* flush */
+ 
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		if (rc != MBX_BUSY)
+ 			mempool_free(pmb, phba->mbox_mem_pool);
+ 		return -EIO;
+@@ -429,18 +435,19 @@
+ /*                                                                      */
+ /************************************************************************/
+ int
+-lpfc_hba_down_prep(struct lpfc_hba * phba)
++lpfc_hba_down_prep(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
++
+ 	/* Disable interrupts */
+ 	writel(0, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+ 
+-	/* Cleanup potential discovery resources */
+-	lpfc_els_flush_rscn(phba);
+-	lpfc_els_flush_cmd(phba);
+-	lpfc_disc_flush_list(phba);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		lpfc_cleanup_discovery_resources(vport);
++	}
+ 
+-	return (0);
++	return 0;
+ }
+ 
+ /************************************************************************/
+@@ -453,13 +460,16 @@
+ /*                                                                      */
+ /************************************************************************/
+ int
+-lpfc_hba_down_post(struct lpfc_hba * phba)
++lpfc_hba_down_post(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring *pring;
+ 	struct lpfc_dmabuf *mp, *next_mp;
+ 	int i;
+ 
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED)
++		lpfc_sli_hbqbuf_free_all(phba);
++	else {
+ 	/* Cleanup preposted buffers on the ELS ring */
+ 	pring = &psli->ring[LPFC_ELS_RING];
+ 	list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
+@@ -468,6 +478,7 @@
+ 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 		kfree(mp);
+ 	}
++	}
+ 
+ 	for (i = 0; i < psli->num_rings; i++) {
+ 		pring = &psli->ring[i];
+@@ -477,6 +488,119 @@
+ 	return 0;
+ }
+ 
++/* HBA heart beat timeout handler */
++void
++lpfc_hb_timeout(unsigned long ptr)
++{
++	struct lpfc_hba *phba;
++	unsigned long iflag;
++
++	phba = (struct lpfc_hba *)ptr;
++	spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
++	if (!(phba->pport->work_port_events & WORKER_HB_TMO))
++		phba->pport->work_port_events |= WORKER_HB_TMO;
++	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
++
++	if (phba->work_wait)
++		wake_up(phba->work_wait);
++	return;
++}
++
++static void
++lpfc_hb_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
++{
++	unsigned long drvr_flag;
++
++	spin_lock_irqsave(&phba->hbalock, drvr_flag);
++	phba->hb_outstanding = 0;
++	spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++
++	mempool_free(pmboxq, phba->mbox_mem_pool);
++	if (!(phba->pport->fc_flag & FC_OFFLINE_MODE) &&
++		!(phba->link_state == LPFC_HBA_ERROR) &&
++		!(phba->pport->fc_flag & FC_UNLOADING))
++		mod_timer(&phba->hb_tmofunc,
++			jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++	return;
++}
++
++void
++lpfc_hb_timeout_handler(struct lpfc_hba *phba)
++{
++	LPFC_MBOXQ_t *pmboxq;
++	int retval;
++	struct lpfc_sli *psli = &phba->sli;
++
++	if ((phba->link_state == LPFC_HBA_ERROR) ||
++		(phba->pport->fc_flag & FC_UNLOADING) ||
++		(phba->pport->fc_flag & FC_OFFLINE_MODE))
++		return;
++
++	spin_lock_irq(&phba->pport->work_port_lock);
++	/* If the timer is already canceled do nothing */
++	if (!(phba->pport->work_port_events & WORKER_HB_TMO)) {
++		spin_unlock_irq(&phba->pport->work_port_lock);
++		return;
++	}
++
++	if (time_after(phba->last_completion_time + LPFC_HB_MBOX_INTERVAL * HZ,
++		jiffies)) {
++		spin_unlock_irq(&phba->pport->work_port_lock);
++		if (!phba->hb_outstanding)
++			mod_timer(&phba->hb_tmofunc,
++				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++		else
++			mod_timer(&phba->hb_tmofunc,
++				jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
++		return;
++	}
++	spin_unlock_irq(&phba->pport->work_port_lock);
++
++	/* If there is no heart beat outstanding, issue a heartbeat command */
++	if (!phba->hb_outstanding) {
++		pmboxq = mempool_alloc(phba->mbox_mem_pool,GFP_KERNEL);
++		if (!pmboxq) {
++			mod_timer(&phba->hb_tmofunc,
++				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++			return;
++		}
++
++		lpfc_heart_beat(phba, pmboxq);
++		pmboxq->mbox_cmpl = lpfc_hb_mbox_cmpl;
++		pmboxq->vport = phba->pport;
++		retval = lpfc_sli_issue_mbox(phba, pmboxq, MBX_NOWAIT);
++
++		if (retval != MBX_BUSY && retval != MBX_SUCCESS) {
++			mempool_free(pmboxq, phba->mbox_mem_pool);
++			mod_timer(&phba->hb_tmofunc,
++				jiffies + HZ * LPFC_HB_MBOX_INTERVAL);
++			return;
++		}
++		mod_timer(&phba->hb_tmofunc,
++			jiffies + HZ * LPFC_HB_MBOX_TIMEOUT);
++		phba->hb_outstanding = 1;
++		return;
++	} else {
++		/*
++		 * If heart beat timeout called with hb_outstanding set we
++		 * need to take the HBA offline.
++		 */
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++			"%d:0459 Adapter heartbeat failure, taking "
++			"this port offline.\n", phba->brd_no);
++
++		spin_lock_irq(&phba->hbalock);
++		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
++		spin_unlock_irq(&phba->hbalock);
++
++		lpfc_offline_prep(phba);
++		lpfc_offline(phba);
++		lpfc_unblock_mgmt_io(phba);
++		phba->link_state = LPFC_HBA_ERROR;
++		lpfc_hba_down_post(phba);
++	}
++}
++
+ /************************************************************************/
+ /*                                                                      */
+ /*    lpfc_handle_eratt                                                 */
+@@ -486,11 +610,15 @@
+ /*                                                                      */
+ /************************************************************************/
+ void
+-lpfc_handle_eratt(struct lpfc_hba * phba)
++lpfc_handle_eratt(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring  *pring;
++	struct lpfc_vport *port_iterator;
+ 	uint32_t event_data;
++	struct Scsi_Host  *shost;
++
+ 	/* If the pci channel is offline, ignore possible errors,
+ 	 * since we cannot communicate with the pci card anyway. */
+ 	if (pci_channel_offline(phba->pcidev))
+@@ -504,10 +632,17 @@
+ 				"Data: x%x x%x x%x\n",
+ 				phba->brd_no, phba->work_hs,
+ 				phba->work_status[0], phba->work_status[1]);
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag |= FC_ESTABLISH_LINK;
++		list_for_each_entry(port_iterator, &phba->port_list,
++				    listentry) {
++			shost = lpfc_shost_from_vport(port_iterator);
++
++			spin_lock_irq(shost->host_lock);
++			port_iterator->fc_flag |= FC_ESTABLISH_LINK;
++			spin_unlock_irq(shost->host_lock);
++		}
++		spin_lock_irq(&phba->hbalock);
+ 		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 
+ 		/*
+ 		* Firmware stops when it triggled erratt with HS_FFER6.
+@@ -544,15 +679,18 @@
+ 				phba->work_status[0], phba->work_status[1]);
+ 
+ 		event_data = FC_REG_DUMP_EVENT;
+-		fc_host_post_vendor_event(phba->host, fc_get_event_number(),
++		shost = lpfc_shost_from_vport(vport);
++		fc_host_post_vendor_event(shost, fc_get_event_number(),
+ 				sizeof(event_data), (char *) &event_data,
+ 				SCSI_NL_VID_TYPE_PCI | PCI_VENDOR_ID_EMULEX);
+ 
++		spin_lock_irq(&phba->hbalock);
+ 		psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
++		spin_unlock_irq(&phba->hbalock);
+ 		lpfc_offline_prep(phba);
+ 		lpfc_offline(phba);
+ 		lpfc_unblock_mgmt_io(phba);
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		lpfc_hba_down_post(phba);
+ 	}
+ }
+@@ -566,9 +704,11 @@
+ /*                                                                      */
+ /************************************************************************/
+ void
+-lpfc_handle_latt(struct lpfc_hba * phba)
++lpfc_handle_latt(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
+ 	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_vport *port_iterator;
+ 	LPFC_MBOXQ_t *pmb;
+ 	volatile uint32_t control;
+ 	struct lpfc_dmabuf *mp;
+@@ -589,20 +729,22 @@
+ 	rc = -EIO;
+ 
+ 	/* Cleanup any outstanding ELS commands */
+-	lpfc_els_flush_cmd(phba);
++	list_for_each_entry(port_iterator, &phba->port_list, listentry)
++		lpfc_els_flush_cmd(port_iterator);
+ 
+ 	psli->slistat.link_event++;
+ 	lpfc_read_la(phba, pmb, mp);
+ 	pmb->mbox_cmpl = lpfc_mbx_cmpl_read_la;
++	pmb->vport = vport;
+ 	rc = lpfc_sli_issue_mbox (phba, pmb, (MBX_NOWAIT | MBX_STOP_IOCB));
+ 	if (rc == MBX_NOT_FINISHED)
+ 		goto lpfc_handle_latt_free_mbuf;
+ 
+ 	/* Clear Link Attention in HA REG */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	writel(HA_LATT, phba->HAregaddr);
+ 	readl(phba->HAregaddr); /* flush */
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return;
+ 
+@@ -614,7 +756,7 @@
+ 	mempool_free(pmb, phba->mbox_mem_pool);
+ lpfc_handle_latt_err_exit:
+ 	/* Enable Link attention interrupts */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	psli->sli_flag |= LPFC_PROCESS_LA;
+ 	control = readl(phba->HCregaddr);
+ 	control |= HC_LAINT_ENA;
+@@ -624,15 +766,13 @@
+ 	/* Clear Link Attention in HA REG */
+ 	writel(HA_LATT, phba->HAregaddr);
+ 	readl(phba->HAregaddr); /* flush */
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 	lpfc_linkdown(phba);
+-	phba->hba_state = LPFC_HBA_ERROR;
++	phba->link_state = LPFC_HBA_ERROR;
+ 
+ 	/* The other case is an error from issue_mbox */
+ 	if (rc == -ENOMEM)
+-		lpfc_printf_log(phba,
+-				KERN_WARNING,
+-				LOG_MBOX,
++		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
+ 			        "%d:0300 READ_LA: no buffers\n",
+ 				phba->brd_no);
+ 
+@@ -646,7 +786,7 @@
+ /*                                                                      */
+ /************************************************************************/
+ static int
+-lpfc_parse_vpd(struct lpfc_hba * phba, uint8_t * vpd, int len)
++lpfc_parse_vpd(struct lpfc_hba *phba, uint8_t *vpd, int len)
+ {
+ 	uint8_t lenlo, lenhi;
+ 	int Length;
+@@ -658,9 +798,7 @@
+ 		return 0;
+ 
+ 	/* Vital Product */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_INIT,
++	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+ 			"%d:0455 Vital Product Data: x%x x%x x%x x%x\n",
+ 			phba->brd_no,
+ 			(uint32_t) vpd[0], (uint32_t) vpd[1], (uint32_t) vpd[2],
+@@ -785,7 +923,7 @@
+ }
+ 
+ static void
+-lpfc_get_hba_model_desc(struct lpfc_hba * phba, uint8_t * mdp, uint8_t * descp)
++lpfc_get_hba_model_desc(struct lpfc_hba *phba, uint8_t *mdp, uint8_t *descp)
+ {
+ 	lpfc_vpd_t *vp;
+ 	uint16_t dev_id = phba->pcidev->device;
+@@ -943,7 +1081,7 @@
+ /*   Returns the number of buffers NOT posted.    */
+ /**************************************************/
+ int
+-lpfc_post_buffer(struct lpfc_hba * phba, struct lpfc_sli_ring * pring, int cnt,
++lpfc_post_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, int cnt,
+ 		 int type)
+ {
+ 	IOCB_t *icmd;
+@@ -955,9 +1093,7 @@
+ 	/* While there are buffers to post */
+ 	while (cnt > 0) {
+ 		/* Allocate buffer for  command iocb */
+-		spin_lock_irq(phba->host->host_lock);
+ 		iocb = lpfc_sli_get_iocbq(phba);
+-		spin_unlock_irq(phba->host->host_lock);
+ 		if (iocb == NULL) {
+ 			pring->missbufcnt = cnt;
+ 			return cnt;
+@@ -972,9 +1108,7 @@
+ 						&mp1->phys);
+ 		if (mp1 == 0 || mp1->virt == 0) {
+ 			kfree(mp1);
+-			spin_lock_irq(phba->host->host_lock);
+ 			lpfc_sli_release_iocbq(phba, iocb);
+-			spin_unlock_irq(phba->host->host_lock);
+ 			pring->missbufcnt = cnt;
+ 			return cnt;
+ 		}
+@@ -990,9 +1124,7 @@
+ 				kfree(mp2);
+ 				lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
+ 				kfree(mp1);
+-				spin_lock_irq(phba->host->host_lock);
+ 				lpfc_sli_release_iocbq(phba, iocb);
+-				spin_unlock_irq(phba->host->host_lock);
+ 				pring->missbufcnt = cnt;
+ 				return cnt;
+ 			}
+@@ -1018,7 +1150,6 @@
+ 		icmd->ulpCommand = CMD_QUE_RING_BUF64_CN;
+ 		icmd->ulpLe = 1;
+ 
+-		spin_lock_irq(phba->host->host_lock);
+ 		if (lpfc_sli_issue_iocb(phba, pring, iocb, 0) == IOCB_ERROR) {
+ 			lpfc_mbuf_free(phba, mp1->virt, mp1->phys);
+ 			kfree(mp1);
+@@ -1030,15 +1161,12 @@
+ 			}
+ 			lpfc_sli_release_iocbq(phba, iocb);
+ 			pring->missbufcnt = cnt;
+-			spin_unlock_irq(phba->host->host_lock);
+ 			return cnt;
+ 		}
+-		spin_unlock_irq(phba->host->host_lock);
+ 		lpfc_sli_ringpostbuf_put(phba, pring, mp1);
+-		if (mp2) {
++		if (mp2)
+ 			lpfc_sli_ringpostbuf_put(phba, pring, mp2);
+ 		}
+-	}
+ 	pring->missbufcnt = 0;
+ 	return 0;
+ }
+@@ -1050,7 +1178,7 @@
+ /*                                                                      */
+ /************************************************************************/
+ static int
+-lpfc_post_rcv_buf(struct lpfc_hba * phba)
++lpfc_post_rcv_buf(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli = &phba->sli;
+ 
+@@ -1151,7 +1279,7 @@
+ {
+ 	int t;
+ 	uint32_t *HashWorking;
+-	uint32_t *pwwnn = phba->wwnn;
++	uint32_t *pwwnn = (uint32_t *) phba->wwnn;
+ 
+ 	HashWorking = kmalloc(80 * sizeof(uint32_t), GFP_KERNEL);
+ 	if (!HashWorking)
+@@ -1170,64 +1298,76 @@
+ }
+ 
+ static void
+-lpfc_cleanup(struct lpfc_hba * phba)
++lpfc_cleanup(struct lpfc_vport *vport)
+ {
+ 	struct lpfc_nodelist *ndlp, *next_ndlp;
+ 
+ 	/* clean up phba - lpfc specific */
+-	lpfc_can_disctmo(phba);
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp)
++	lpfc_can_disctmo(vport);
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
+ 		lpfc_nlp_put(ndlp);
+-
+-	INIT_LIST_HEAD(&phba->fc_nodes);
+-
+ 	return;
+ }
+ 
+ static void
+ lpfc_establish_link_tmo(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
++	struct lpfc_hba   *phba = (struct lpfc_hba *) ptr;
++	struct lpfc_vport *vport = phba->pport;
+ 	unsigned long iflag;
+ 
+-
+ 	/* Re-establishing Link, timer expired */
+ 	lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT,
+ 			"%d:1300 Re-establishing Link, timer expired "
+ 			"Data: x%x x%x\n",
+-			phba->brd_no, phba->fc_flag, phba->hba_state);
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	phba->fc_flag &= ~FC_ESTABLISH_LINK;
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++			phba->brd_no, vport->fc_flag,
++			vport->port_state);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++		spin_lock_irqsave(shost->host_lock, iflag);
++		vport->fc_flag &= ~FC_ESTABLISH_LINK;
++		spin_unlock_irqrestore(shost->host_lock, iflag);
++	}
+ }
+ 
+-static int
+-lpfc_stop_timer(struct lpfc_hba * phba)
++void
++lpfc_stop_vport_timers(struct lpfc_vport *vport)
+ {
+-	struct lpfc_sli *psli = &phba->sli;
++	del_timer_sync(&vport->els_tmofunc);
++	del_timer_sync(&vport->fc_fdmitmo);
++	lpfc_can_disctmo(vport);
++	return;
++}
++
++static void
++lpfc_stop_phba_timers(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport;
+ 
+ 	del_timer_sync(&phba->fcp_poll_timer);
+ 	del_timer_sync(&phba->fc_estabtmo);
+-	del_timer_sync(&phba->fc_disctmo);
+-	del_timer_sync(&phba->fc_fdmitmo);
+-	del_timer_sync(&phba->els_tmofunc);
+-	psli = &phba->sli;
+-	del_timer_sync(&psli->mbox_tmo);
+-	return(1);
++	list_for_each_entry(vport, &phba->port_list, listentry)
++		lpfc_stop_vport_timers(vport);
++	del_timer_sync(&phba->sli.mbox_tmo);
++	del_timer_sync(&phba->fabric_block_timer);
++	phba->hb_outstanding = 0;
++	del_timer_sync(&phba->hb_tmofunc);
++	return;
+ }
+ 
+ int
+-lpfc_online(struct lpfc_hba * phba)
++lpfc_online(struct lpfc_hba *phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
++
+ 	if (!phba)
+ 		return 0;
+ 
+-	if (!(phba->fc_flag & FC_OFFLINE_MODE))
++	if (!(vport->fc_flag & FC_OFFLINE_MODE))
+ 		return 0;
+ 
+-	lpfc_printf_log(phba,
+-		       KERN_WARNING,
+-		       LOG_INIT,
++	lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ 		       "%d:0458 Bring Adapter online\n",
+ 		       phba->brd_no);
+ 
+@@ -1243,9 +1383,14 @@
+ 		return 1;
+ 	}
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag &= ~FC_OFFLINE_MODE;
+-	spin_unlock_irq(phba->host->host_lock);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++		spin_lock_irq(shost->host_lock);
++		vport->fc_flag &= ~FC_OFFLINE_MODE;
++		if (phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)
++			vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++		spin_unlock_irq(shost->host_lock);
++	}
+ 
+ 	lpfc_unblock_mgmt_io(phba);
+ 	return 0;
+@@ -1256,9 +1401,9 @@
+ {
+ 	unsigned long iflag;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	phba->fc_flag |= FC_BLOCK_MGMT_IO;
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, iflag);
++	phba->sli.sli_flag |= LPFC_BLOCK_MGMT_IO;
++	spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+ 
+ void
+@@ -1266,17 +1411,18 @@
+ {
+ 	unsigned long iflag;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	phba->fc_flag &= ~FC_BLOCK_MGMT_IO;
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, iflag);
++	phba->sli.sli_flag &= ~LPFC_BLOCK_MGMT_IO;
++	spin_unlock_irqrestore(&phba->hbalock, iflag);
+ }
+ 
+ void
+ lpfc_offline_prep(struct lpfc_hba * phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
+ 	struct lpfc_nodelist  *ndlp, *next_ndlp;
+ 
+-	if (phba->fc_flag & FC_OFFLINE_MODE)
++	if (vport->fc_flag & FC_OFFLINE_MODE)
+ 		return;
+ 
+ 	lpfc_block_mgmt_io(phba);
+@@ -1284,39 +1430,49 @@
+ 	lpfc_linkdown(phba);
+ 
+ 	/* Issue an unreg_login to all nodes */
+-	list_for_each_entry_safe(ndlp, next_ndlp, &phba->fc_nodes, nlp_listp)
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp)
+ 		if (ndlp->nlp_state != NLP_STE_UNUSED_NODE)
+-			lpfc_unreg_rpi(phba, ndlp);
++			lpfc_unreg_rpi(vport, ndlp);
+ 
+ 	lpfc_sli_flush_mbox_queue(phba);
+ }
+ 
+ void
+-lpfc_offline(struct lpfc_hba * phba)
++lpfc_offline(struct lpfc_hba *phba)
+ {
+-	unsigned long iflag;
++	struct lpfc_vport *vport = phba->pport;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_vport *port_iterator;
+ 
+-	if (phba->fc_flag & FC_OFFLINE_MODE)
++	if (vport->fc_flag & FC_OFFLINE_MODE)
+ 		return;
+ 
+ 	/* stop all timers associated with this hba */
+-	lpfc_stop_timer(phba);
++	lpfc_stop_phba_timers(phba);
++	list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++		port_iterator->work_port_events = 0;
++	}
+ 
+-	lpfc_printf_log(phba,
+-		       KERN_WARNING,
+-		       LOG_INIT,
++	lpfc_printf_log(phba, KERN_WARNING, LOG_INIT,
+ 		       "%d:0460 Bring Adapter offline\n",
+ 		       phba->brd_no);
+ 
+ 	/* Bring down the SLI Layer and cleanup.  The HBA is offline
+ 	   now.  */
+ 	lpfc_sli_hba_down(phba);
+-	lpfc_cleanup(phba);
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	phba->work_hba_events = 0;
++	spin_lock_irq(&phba->hbalock);
+ 	phba->work_ha = 0;
+-	phba->fc_flag |= FC_OFFLINE_MODE;
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	vport->fc_flag |= FC_OFFLINE_MODE;
++	spin_unlock_irq(&phba->hbalock);
++	list_for_each_entry(port_iterator, &phba->port_list, listentry) {
++		shost = lpfc_shost_from_vport(port_iterator);
++
++		lpfc_cleanup(port_iterator);
++		spin_lock_irq(shost->host_lock);
++		vport->work_port_events = 0;
++		vport->fc_flag |= FC_OFFLINE_MODE;
++		spin_unlock_irq(shost->host_lock);
++	}
+ }
+ 
+ /******************************************************************************
+@@ -1326,12 +1482,12 @@
+ *
+ ******************************************************************************/
+ static int
+-lpfc_scsi_free(struct lpfc_hba * phba)
++lpfc_scsi_free(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_scsi_buf *sb, *sb_next;
+ 	struct lpfc_iocbq *io, *io_next;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	/* Release all the lpfc_scsi_bufs maintained by this host. */
+ 	list_for_each_entry_safe(sb, sb_next, &phba->lpfc_scsi_buf_list, list) {
+ 		list_del(&sb->list);
+@@ -1348,126 +1504,177 @@
+ 		phba->total_iocbq_bufs--;
+ 	}
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	return 0;
+ }
+ 
+-void lpfc_remove_device(struct lpfc_hba *phba)
+-{
+-	unsigned long iflag;
+-
+-	lpfc_free_sysfs_attr(phba);
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	phba->fc_flag |= FC_UNLOADING;
++struct lpfc_vport *
++lpfc_create_port(struct lpfc_hba *phba, int instance, struct fc_vport *fc_vport)
++{
++	struct lpfc_vport *vport;
++	struct Scsi_Host  *shost;
++	int error = 0;
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	shost = scsi_host_alloc(&lpfc_template, sizeof(struct lpfc_vport));
++	if (!shost)
++		goto out;
+ 
+-	fc_remove_host(phba->host);
+-	scsi_remove_host(phba->host);
++	vport = (struct lpfc_vport *) shost->hostdata;
++	vport->phba = phba;
+ 
+-	kthread_stop(phba->worker_thread);
++	vport->load_flag |= FC_LOADING;
++	vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
+ 
++	shost->unique_id = instance;
++	shost->max_id = LPFC_MAX_TARGET;
++	shost->max_lun = phba->cfg_max_luns;
++	shost->this_id = -1;
++	shost->max_cmd_len = 16;
+ 	/*
+-	 * Bring down the SLI Layer. This step disable all interrupts,
+-	 * clears the rings, discards all mailbox commands, and resets
+-	 * the HBA.
++	 * Set initial can_queue value since 0 is no longer supported and
++	 * scsi_add_host will fail. This will be adjusted later based on the
++	 * max xri value determined in hba setup.
+ 	 */
+-	lpfc_sli_hba_down(phba);
+-	lpfc_sli_brdrestart(phba);
++	shost->can_queue = phba->cfg_hba_queue_depth - 10;
++	if (fc_vport != NULL) {
++		shost->transportt = lpfc_vport_transport_template;
++		vport->port_type = LPFC_NPIV_PORT;
++	} else {
++		shost->transportt = lpfc_transport_template;
++		vport->port_type = LPFC_PHYSICAL_PORT;
++	}
+ 
+-	/* Release the irq reservation */
+-	free_irq(phba->pcidev->irq, phba);
+-	pci_disable_msi(phba->pcidev);
++	/* Initialize all internally managed lists. */
++	INIT_LIST_HEAD(&vport->fc_nodes);
++	spin_lock_init(&vport->work_port_lock);
+ 
+-	lpfc_cleanup(phba);
+-	lpfc_stop_timer(phba);
+-	phba->work_hba_events = 0;
++	init_timer(&vport->fc_disctmo);
++	vport->fc_disctmo.function = lpfc_disc_timeout;
++	vport->fc_disctmo.data = (unsigned long)vport;
+ 
+-	/*
+-	 * Call scsi_free before mem_free since scsi bufs are released to their
+-	 * corresponding pools here.
+-	 */
+-	lpfc_scsi_free(phba);
+-	lpfc_mem_free(phba);
++	init_timer(&vport->fc_fdmitmo);
++	vport->fc_fdmitmo.function = lpfc_fdmi_tmo;
++	vport->fc_fdmitmo.data = (unsigned long)vport;
+ 
+-	/* Free resources associated with SLI2 interface */
+-	dma_free_coherent(&phba->pcidev->dev, SLI2_SLIM_SIZE,
+-			  phba->slim2p, phba->slim2p_mapping);
++	init_timer(&vport->els_tmofunc);
++	vport->els_tmofunc.function = lpfc_els_timeout;
++	vport->els_tmofunc.data = (unsigned long)vport;
+ 
+-	/* unmap adapter SLIM and Control Registers */
+-	iounmap(phba->ctrl_regs_memmap_p);
+-	iounmap(phba->slim_memmap_p);
++	if (fc_vport != NULL) {
++		error = scsi_add_host(shost, &fc_vport->dev);
++	} else {
++		error = scsi_add_host(shost, &phba->pcidev->dev);
++	}
++	if (error)
++		goto out_put_shost;
+ 
+-	pci_release_regions(phba->pcidev);
+-	pci_disable_device(phba->pcidev);
++	if (!shost->shost_classdev.kobj.dentry)
++		goto out_put_shost;
+ 
+-	idr_remove(&lpfc_hba_index, phba->brd_no);
+-	scsi_host_put(phba->host);
++	list_add_tail(&vport->listentry, &phba->port_list);
++	return vport;
++
++out_put_shost:
++	scsi_host_put(shost);
++out:
++	return NULL;
+ }
+ 
+-void lpfc_scan_start(struct Scsi_Host *host)
++void
++destroy_port(struct lpfc_vport *vport)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba*)host->hostdata;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
+ 
+-	if (lpfc_alloc_sysfs_attr(phba))
+-		goto error;
++	kfree(vport->vname);
+ 
+-	phba->MBslimaddr = phba->slim_memmap_p;
+-	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
+-	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
+-	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
+-	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
++	lpfc_debugfs_terminate(vport);
++	fc_remove_host(shost);
++	scsi_remove_host(shost);
+ 
+-	if (lpfc_sli_hba_setup(phba))
+-		goto error;
++	spin_lock_irq(&phba->hbalock);
++	list_del_init(&vport->listentry);
++	spin_unlock_irq(&phba->hbalock);
+ 
+-	/*
+-	 * hba setup may have changed the hba_queue_depth so we need to adjust
+-	 * the value of can_queue.
+-	 */
+-	host->can_queue = phba->cfg_hba_queue_depth - 10;
++	lpfc_cleanup(vport);
+ 	return;
++}
+ 
+-error:
+-	lpfc_remove_device(phba);
++int
++lpfc_get_instance(void)
++{
++	int instance = 0;
++
++	/* Assign an unused number */
++	if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL))
++		return -1;
++	if (idr_get_new(&lpfc_hba_index, NULL, &instance))
++		return -1;
++	return instance;
+ }
+ 
++/*
++ * Note: there is no scan_start function as adapter initialization
++ * will have asynchronously kicked off the link initialization.
++ */
++
+ int lpfc_scan_finished(struct Scsi_Host *shost, unsigned long time)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	int stat = 0;
+ 
+-	if (!phba->host)
+-		return 1;
+-	if (time >= 30 * HZ)
++	spin_lock_irq(shost->host_lock);
++
++	if (vport->fc_flag & FC_UNLOADING) {
++		stat = 1;
+ 		goto finished;
++	}
++	if (time >= 30 * HZ) {
++		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++				"%d:0461 Scanning longer than 30 "
++				"seconds.  Continuing initialization\n",
++				phba->brd_no);
++		stat = 1;
++		goto finished;
++	}
++	if (time >= 15 * HZ && phba->link_state <= LPFC_LINK_DOWN) {
++		lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++				"%d:0465 Link down longer than 15 "
++				"seconds.  Continuing initialization\n",
++				phba->brd_no);
++		stat = 1;
++		goto finished;
++	}
+ 
+-	if (phba->hba_state != LPFC_HBA_READY)
+-		return 0;
+-	if (phba->num_disc_nodes || phba->fc_prli_sent)
+-		return 0;
+-	if ((phba->fc_map_cnt == 0) && (time < 2 * HZ))
+-		return 0;
+-	if (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE)
+-		return 0;
+-	if ((phba->hba_state > LPFC_LINK_DOWN) || (time < 15 * HZ))
+-		return 0;
++	if (vport->port_state != LPFC_VPORT_READY)
++		goto finished;
++	if (vport->num_disc_nodes || vport->fc_prli_sent)
++		goto finished;
++	if (vport->fc_map_cnt == 0 && time < 2 * HZ)
++		goto finished;
++	if ((phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE) != 0)
++		goto finished;
++
++	stat = 1;
+ 
+ finished:
+-	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
+-		spin_lock_irq(shost->host_lock);
+-		lpfc_poll_start_timer(phba);
+ 		spin_unlock_irq(shost->host_lock);
+-	}
++	return stat;
++}
+ 
++void lpfc_host_attrib_init(struct Scsi_Host *shost)
++{
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	/*
+-	 * set fixed host attributes
+-	 * Must done after lpfc_sli_hba_setup()
++	 * Set fixed host attributes.  Must done after lpfc_sli_hba_setup().
+ 	 */
+ 
+-	fc_host_node_name(shost) = wwn_to_u64(phba->fc_nodename.u.wwn);
+-	fc_host_port_name(shost) = wwn_to_u64(phba->fc_portname.u.wwn);
++	fc_host_node_name(shost) = wwn_to_u64(vport->fc_nodename.u.wwn);
++	fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn);
+ 	fc_host_supported_classes(shost) = FC_COS_CLASS3;
+ 
+ 	memset(fc_host_supported_fc4s(shost), 0,
+@@ -1475,7 +1682,8 @@
+ 	fc_host_supported_fc4s(shost)[2] = 1;
+ 	fc_host_supported_fc4s(shost)[7] = 1;
+ 
+-	lpfc_get_hba_sym_node_name(phba, fc_host_symbolic_name(shost));
++	lpfc_vport_symbolic_node_name(vport, fc_host_symbolic_name(shost),
++				 sizeof fc_host_symbolic_name(shost));
+ 
+ 	fc_host_supported_speeds(shost) = 0;
+ 	if (phba->lmt & LMT_10Gb)
+@@ -1488,8 +1696,8 @@
+ 		fc_host_supported_speeds(shost) |= FC_PORTSPEED_1GBIT;
+ 
+ 	fc_host_maxframe_size(shost) =
+-		((((uint32_t) phba->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) |
+-		 (uint32_t) phba->fc_sparam.cmn.bbRcvSizeLsb);
++		(((uint32_t) vport->fc_sparam.cmn.bbRcvSizeMsb & 0x0F) << 8) |
++		(uint32_t) vport->fc_sparam.cmn.bbRcvSizeLsb;
+ 
+ 	/* This value is also unchanging */
+ 	memset(fc_host_active_fc4s(shost), 0,
+@@ -1497,20 +1705,20 @@
+ 	fc_host_active_fc4s(shost)[2] = 1;
+ 	fc_host_active_fc4s(shost)[7] = 1;
+ 
++	fc_host_max_npiv_vports(shost) = phba->max_vpi;
+ 	spin_lock_irq(shost->host_lock);
+-	phba->fc_flag &= ~FC_LOADING;
++	vport->fc_flag &= ~FC_LOADING;
+ 	spin_unlock_irq(shost->host_lock);
+-
+-	return 1;
+ }
+ 
+ static int __devinit
+ lpfc_pci_probe_one(struct pci_dev *pdev, const struct pci_device_id *pid)
+ {
+-	struct Scsi_Host *host;
++	struct lpfc_vport *vport = NULL;
+ 	struct lpfc_hba  *phba;
+ 	struct lpfc_sli  *psli;
+ 	struct lpfc_iocbq *iocbq_entry = NULL, *iocbq_next = NULL;
++	struct Scsi_Host  *shost = NULL;
+ 	unsigned long bar0map_len, bar2map_len;
+ 	int error = -ENODEV, retval;
+ 	int i;
+@@ -1521,61 +1729,46 @@
+ 	if (pci_request_regions(pdev, LPFC_DRIVER_NAME))
+ 		goto out_disable_device;
+ 
+-	host = scsi_host_alloc(&lpfc_template, sizeof (struct lpfc_hba));
+-	if (!host)
++	phba = kzalloc(sizeof (struct lpfc_hba), GFP_KERNEL);
++	if (!phba)
+ 		goto out_release_regions;
+ 
+-	phba = (struct lpfc_hba*)host->hostdata;
+-	memset(phba, 0, sizeof (struct lpfc_hba));
+-	phba->host = host;
++	spin_lock_init(&phba->hbalock);
+ 
+-	phba->fc_flag |= FC_LOADING;
+ 	phba->pcidev = pdev;
+ 
+ 	/* Assign an unused board number */
+-	if (!idr_pre_get(&lpfc_hba_index, GFP_KERNEL))
+-		goto out_put_host;
++	if ((phba->brd_no = lpfc_get_instance()) < 0)
++		goto out_free_phba;
+ 
+-	error = idr_get_new(&lpfc_hba_index, NULL, &phba->brd_no);
+-	if (error)
+-		goto out_put_host;
+-
+-	host->unique_id = phba->brd_no;
++	INIT_LIST_HEAD(&phba->port_list);
++	INIT_LIST_HEAD(&phba->hbq_buffer_list);
++	/*
++	 * Get all the module params for configuring this host and then
++	 * establish the host.
++	 */
++	lpfc_get_cfgparam(phba);
++	phba->max_vpi = LPFC_MAX_VPI;
+ 
+ 	/* Initialize timers used by driver */
+ 	init_timer(&phba->fc_estabtmo);
+ 	phba->fc_estabtmo.function = lpfc_establish_link_tmo;
+ 	phba->fc_estabtmo.data = (unsigned long)phba;
+-	init_timer(&phba->fc_disctmo);
+-	phba->fc_disctmo.function = lpfc_disc_timeout;
+-	phba->fc_disctmo.data = (unsigned long)phba;
+-
+-	init_timer(&phba->fc_fdmitmo);
+-	phba->fc_fdmitmo.function = lpfc_fdmi_tmo;
+-	phba->fc_fdmitmo.data = (unsigned long)phba;
+-	init_timer(&phba->els_tmofunc);
+-	phba->els_tmofunc.function = lpfc_els_timeout;
+-	phba->els_tmofunc.data = (unsigned long)phba;
++
++	init_timer(&phba->hb_tmofunc);
++	phba->hb_tmofunc.function = lpfc_hb_timeout;
++	phba->hb_tmofunc.data = (unsigned long)phba;
++
+ 	psli = &phba->sli;
+ 	init_timer(&psli->mbox_tmo);
+ 	psli->mbox_tmo.function = lpfc_mbox_timeout;
+-	psli->mbox_tmo.data = (unsigned long)phba;
+-
++	psli->mbox_tmo.data = (unsigned long) phba;
+ 	init_timer(&phba->fcp_poll_timer);
+ 	phba->fcp_poll_timer.function = lpfc_poll_timeout;
+-	phba->fcp_poll_timer.data = (unsigned long)phba;
+-
+-	/*
+-	 * Get all the module params for configuring this host and then
+-	 * establish the host parameters.
+-	 */
+-	lpfc_get_cfgparam(phba);
+-
+-	host->max_id = LPFC_MAX_TARGET;
+-	host->max_lun = phba->cfg_max_luns;
+-	host->this_id = -1;
+-
+-	INIT_LIST_HEAD(&phba->fc_nodes);
++	phba->fcp_poll_timer.data = (unsigned long) phba;
++	init_timer(&phba->fabric_block_timer);
++	phba->fabric_block_timer.function = lpfc_fabric_block_timeout;
++	phba->fabric_block_timer.data = (unsigned long) phba;
+ 
+ 	pci_set_master(pdev);
+ 	retval = pci_set_mwi(pdev);
+@@ -1623,13 +1816,22 @@
+ 
+ 	memset(phba->slim2p, 0, SLI2_SLIM_SIZE);
+ 
++	phba->hbqslimp.virt = dma_alloc_coherent(&phba->pcidev->dev,
++						 lpfc_sli_hbq_size(),
++						 &phba->hbqslimp.phys,
++						 GFP_KERNEL);
++	if (!phba->hbqslimp.virt)
++		goto out_free_slim;
++
++	memset(phba->hbqslimp.virt, 0, lpfc_sli_hbq_size());
++
+ 	/* Initialize the SLI Layer to run with lpfc HBAs. */
+ 	lpfc_sli_setup(phba);
+ 	lpfc_sli_queue_setup(phba);
+ 
+ 	error = lpfc_mem_alloc(phba);
+ 	if (error)
+-		goto out_free_slim;
++		goto out_free_hbqslimp;
+ 
+ 	/* Initialize and populate the iocb list per host.  */
+ 	INIT_LIST_HEAD(&phba->lpfc_iocb_list);
+@@ -1653,10 +1855,11 @@
+ 			error = -ENOMEM;
+ 			goto out_free_iocbq;
+ 		}
+-		spin_lock_irq(phba->host->host_lock);
++
++		spin_lock_irq(&phba->hbalock);
+ 		list_add(&iocbq_entry->list, &phba->lpfc_iocb_list);
+ 		phba->total_iocbq_bufs++;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 	}
+ 
+ 	/* Initialize HBA structure */
+@@ -1677,22 +1880,22 @@
+ 		goto out_free_iocbq;
+ 	}
+ 
+-	/*
+-	 * Set initial can_queue value since 0 is no longer supported and
+-	 * scsi_add_host will fail. This will be adjusted later based on the
+-	 * max xri value determined in hba setup.
+-	 */
+-	host->can_queue = phba->cfg_hba_queue_depth - 10;
+-
+-	/* Tell the midlayer we support 16 byte commands */
+-	host->max_cmd_len = 16;
+-
+ 	/* Initialize the list of scsi buffers used by driver for scsi IO. */
+ 	spin_lock_init(&phba->scsi_buf_list_lock);
+ 	INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list);
+ 
+-	host->transportt = lpfc_transport_template;
+-	pci_set_drvdata(pdev, host);
++	/* Initialize list of fabric iocbs */
++	INIT_LIST_HEAD(&phba->fabric_iocb_list);
++
++	vport = lpfc_create_port(phba, phba->brd_no, NULL);
++	if (!vport)
++		goto out_kthread_stop;
++
++	shost = lpfc_shost_from_vport(vport);
++	phba->pport = vport;
++	lpfc_debugfs_initialize(vport);
++
++	pci_set_drvdata(pdev, shost);
+ 
+ 	if (phba->cfg_use_msi) {
+ 		error = pci_enable_msi(phba->pcidev);
+@@ -1708,33 +1911,63 @@
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 			"%d:0451 Enable interrupt handler failed\n",
+ 			phba->brd_no);
+-		goto out_kthread_stop;
++		goto out_disable_msi;
+ 	}
+ 
+-	error = scsi_add_host(host, &pdev->dev);
+-	if (error)
++	phba->MBslimaddr = phba->slim_memmap_p;
++	phba->HAregaddr = phba->ctrl_regs_memmap_p + HA_REG_OFFSET;
++	phba->CAregaddr = phba->ctrl_regs_memmap_p + CA_REG_OFFSET;
++	phba->HSregaddr = phba->ctrl_regs_memmap_p + HS_REG_OFFSET;
++	phba->HCregaddr = phba->ctrl_regs_memmap_p + HC_REG_OFFSET;
++
++	if (lpfc_alloc_sysfs_attr(vport))
+ 		goto out_free_irq;
+ 
+-	scsi_scan_host(host);
++	if (lpfc_sli_hba_setup(phba))
++		goto out_remove_device;
++
++	/*
++	 * hba setup may have changed the hba_queue_depth so we need to adjust
++	 * the value of can_queue.
++	 */
++	shost->can_queue = phba->cfg_hba_queue_depth - 10;
++
++	lpfc_host_attrib_init(shost);
++
++	if (phba->cfg_poll & DISABLE_FCP_RING_INT) {
++		spin_lock_irq(shost->host_lock);
++		lpfc_poll_start_timer(phba);
++		spin_unlock_irq(shost->host_lock);
++	}
++
++	scsi_scan_host(shost);
+ 
+ 	return 0;
+ 
++out_remove_device:
++	lpfc_free_sysfs_attr(vport);
++	spin_lock_irq(shost->host_lock);
++	vport->fc_flag |= FC_UNLOADING;
++	spin_unlock_irq(shost->host_lock);
+ out_free_irq:
+-	lpfc_stop_timer(phba);
+-	phba->work_hba_events = 0;
++	lpfc_stop_phba_timers(phba);
++	phba->pport->work_port_events = 0;
+ 	free_irq(phba->pcidev->irq, phba);
++out_disable_msi:
+ 	pci_disable_msi(phba->pcidev);
++	destroy_port(vport);
+ out_kthread_stop:
+ 	kthread_stop(phba->worker_thread);
+ out_free_iocbq:
+ 	list_for_each_entry_safe(iocbq_entry, iocbq_next,
+ 						&phba->lpfc_iocb_list, list) {
+-		spin_lock_irq(phba->host->host_lock);
+ 		kfree(iocbq_entry);
+ 		phba->total_iocbq_bufs--;
+-		spin_unlock_irq(phba->host->host_lock);
+ 	}
+ 	lpfc_mem_free(phba);
++out_free_hbqslimp:
++	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt,
++			  phba->hbqslimp.phys);
+ out_free_slim:
+ 	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE, phba->slim2p,
+ 							phba->slim2p_mapping);
+@@ -1744,27 +1977,85 @@
+ 	iounmap(phba->slim_memmap_p);
+ out_idr_remove:
+ 	idr_remove(&lpfc_hba_index, phba->brd_no);
+-out_put_host:
+-	phba->host = NULL;
+-	scsi_host_put(host);
++out_free_phba:
++	kfree(phba);
+ out_release_regions:
+ 	pci_release_regions(pdev);
+ out_disable_device:
+ 	pci_disable_device(pdev);
+ out:
+ 	pci_set_drvdata(pdev, NULL);
++	if (shost)
++		scsi_host_put(shost);
+ 	return error;
+ }
+ 
+ static void __devexit
+ lpfc_pci_remove_one(struct pci_dev *pdev)
+ {
+-	struct Scsi_Host   *host = pci_get_drvdata(pdev);
+-	struct lpfc_hba    *phba = (struct lpfc_hba *)host->hostdata;
++	struct Scsi_Host  *shost = pci_get_drvdata(pdev);
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_vport *port_iterator;
++	list_for_each_entry(port_iterator, &phba->port_list, listentry)
++		port_iterator->load_flag |= FC_UNLOADING;
++
++	kfree(vport->vname);
++	lpfc_free_sysfs_attr(vport);
++
++	fc_remove_host(shost);
++	scsi_remove_host(shost);
++
++	/*
++	 * Bring down the SLI Layer. This step disable all interrupts,
++	 * clears the rings, discards all mailbox commands, and resets
++	 * the HBA.
++	 */
++	lpfc_sli_hba_down(phba);
++	lpfc_sli_brdrestart(phba);
++
++	lpfc_stop_phba_timers(phba);
++	spin_lock_irq(&phba->hbalock);
++	list_del_init(&vport->listentry);
++	spin_unlock_irq(&phba->hbalock);
+ 
+-	lpfc_remove_device(phba);
++
++	lpfc_debugfs_terminate(vport);
++	lpfc_cleanup(vport);
++
++	kthread_stop(phba->worker_thread);
++
++	/* Release the irq reservation */
++	free_irq(phba->pcidev->irq, phba);
++	pci_disable_msi(phba->pcidev);
+ 
+ 	pci_set_drvdata(pdev, NULL);
++	scsi_host_put(shost);
++
++	/*
++	 * Call scsi_free before mem_free since scsi bufs are released to their
++	 * corresponding pools here.
++	 */
++	lpfc_scsi_free(phba);
++	lpfc_mem_free(phba);
++
++	dma_free_coherent(&pdev->dev, lpfc_sli_hbq_size(), phba->hbqslimp.virt,
++			  phba->hbqslimp.phys);
++
++	/* Free resources associated with SLI2 interface */
++	dma_free_coherent(&pdev->dev, SLI2_SLIM_SIZE,
++			  phba->slim2p, phba->slim2p_mapping);
++
++	/* unmap adapter SLIM and Control Registers */
++	iounmap(phba->ctrl_regs_memmap_p);
++	iounmap(phba->slim_memmap_p);
++
++	idr_remove(&lpfc_hba_index, phba->brd_no);
++
++	kfree(phba);
++
++	pci_release_regions(pdev);
++	pci_disable_device(pdev);
+ }
+ 
+ /**
+@@ -1822,10 +2113,13 @@
+ 	pci_set_master(pdev);
+ 
+ 	/* Re-establishing Link */
+-	spin_lock_irq(phba->host->host_lock);
+-	phba->fc_flag |= FC_ESTABLISH_LINK;
++	spin_lock_irq(host->host_lock);
++	phba->pport->fc_flag |= FC_ESTABLISH_LINK;
++	spin_unlock_irq(host->host_lock);
++
++	spin_lock_irq(&phba->hbalock);
+ 	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 
+ 	/* Take device offline; this will perform cleanup */
+@@ -1948,11 +2242,15 @@
+ 
+ 	lpfc_transport_template =
+ 				fc_attach_transport(&lpfc_transport_functions);
+-	if (!lpfc_transport_template)
++	lpfc_vport_transport_template =
++			fc_attach_transport(&lpfc_vport_transport_functions);
++	if (!lpfc_transport_template || !lpfc_vport_transport_template)
+ 		return -ENOMEM;
+ 	error = pci_register_driver(&lpfc_driver);
+-	if (error)
++	if (error) {
+ 		fc_release_transport(lpfc_transport_template);
++		fc_release_transport(lpfc_vport_transport_template);
++	}
+ 
+ 	return error;
+ }
+@@ -1962,6 +2260,7 @@
+ {
+ 	pci_unregister_driver(&lpfc_driver);
+ 	fc_release_transport(lpfc_transport_template);
++	fc_release_transport(lpfc_vport_transport_template);
+ }
+ 
+ module_init(lpfc_init);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_logmsg.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_logmsg.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_logmsg.h	2007-12-19 15:29:23.000000000 -0500
+@@ -30,6 +30,7 @@
+ #define LOG_SLI                       0x800	/* SLI events */
+ #define LOG_FCP_ERROR                 0x1000	/* log errors, not underruns */
+ #define LOG_LIBDFC                    0x2000	/* Libdfc events */
++#define LOG_VPORT                     0x4000	/* NPIV events */
+ #define LOG_ALL_MSG                   0xffff	/* LOG all messages */
+ 
+ #define lpfc_printf_log(phba, level, mask, fmt, arg...) \
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_mbox.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mbox.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_mbox.c	2007-12-19 15:29:23.000000000 -0500
+@@ -82,6 +82,22 @@
+ }
+ 
+ /**********************************************/
++/*  lpfc_heart_beat  Issue a HEART_BEAT       */
++/*                mailbox command             */
++/**********************************************/
++void
++lpfc_heart_beat(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++{
++	MAILBOX_t *mb;
++
++	mb = &pmb->mb;
++	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++	mb->mbxCommand = MBX_HEARTBEAT;
++	mb->mbxOwner = OWN_HOST;
++	return;
++}
++
++/**********************************************/
+ /*  lpfc_read_la  Issue a READ LA             */
+ /*                mailbox command             */
+ /**********************************************/
+@@ -134,6 +150,7 @@
+ void
+ lpfc_config_link(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
+ {
++	struct lpfc_vport  *vport = phba->pport;
+ 	MAILBOX_t *mb = &pmb->mb;
+ 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+ 
+@@ -147,7 +164,7 @@
+ 		mb->un.varCfgLnk.cr_count = phba->cfg_cr_count;
+ 	}
+ 
+-	mb->un.varCfgLnk.myId = phba->fc_myDID;
++	mb->un.varCfgLnk.myId = vport->fc_myDID;
+ 	mb->un.varCfgLnk.edtov = phba->fc_edtov;
+ 	mb->un.varCfgLnk.arbtov = phba->fc_arbtov;
+ 	mb->un.varCfgLnk.ratov = phba->fc_ratov;
+@@ -239,7 +256,7 @@
+ /*                    mailbox command         */
+ /**********************************************/
+ int
+-lpfc_read_sparam(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb, int vpi)
+ {
+ 	struct lpfc_dmabuf *mp;
+ 	MAILBOX_t *mb;
+@@ -270,6 +287,7 @@
+ 	mb->un.varRdSparm.un.sp64.tus.f.bdeSize = sizeof (struct serv_parm);
+ 	mb->un.varRdSparm.un.sp64.addrHigh = putPaddrHigh(mp->phys);
+ 	mb->un.varRdSparm.un.sp64.addrLow = putPaddrLow(mp->phys);
++	mb->un.varRdSparm.vpi = vpi;
+ 
+ 	/* save address for completion */
+ 	pmb->context1 = mp;
+@@ -282,7 +300,8 @@
+ /*                  mailbox command         */
+ /********************************************/
+ void
+-lpfc_unreg_did(struct lpfc_hba * phba, uint32_t did, LPFC_MBOXQ_t * pmb)
++lpfc_unreg_did(struct lpfc_hba * phba, uint16_t vpi, uint32_t did,
++	       LPFC_MBOXQ_t * pmb)
+ {
+ 	MAILBOX_t *mb;
+ 
+@@ -290,6 +309,7 @@
+ 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+ 
+ 	mb->un.varUnregDID.did = did;
++	mb->un.varUnregDID.vpi = vpi;
+ 
+ 	mb->mbxCommand = MBX_UNREG_D_ID;
+ 	mb->mbxOwner = OWN_HOST;
+@@ -335,19 +355,17 @@
+ /*                  mailbox command         */
+ /********************************************/
+ int
+-lpfc_reg_login(struct lpfc_hba * phba,
+-	       uint32_t did, uint8_t * param, LPFC_MBOXQ_t * pmb, uint32_t flag)
++lpfc_reg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t did,
++	       uint8_t *param, LPFC_MBOXQ_t *pmb, uint32_t flag)
+ {
++	MAILBOX_t *mb = &pmb->mb;
+ 	uint8_t *sparam;
+ 	struct lpfc_dmabuf *mp;
+-	MAILBOX_t *mb;
+-	struct lpfc_sli *psli;
+ 
+-	psli = &phba->sli;
+-	mb = &pmb->mb;
+ 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+ 
+ 	mb->un.varRegLogin.rpi = 0;
++	mb->un.varRegLogin.vpi = vpi;
+ 	mb->un.varRegLogin.did = did;
+ 	mb->un.varWords[30] = flag;	/* Set flag to issue action on cmpl */
+ 
+@@ -359,12 +377,10 @@
+ 		kfree(mp);
+ 		mb->mbxCommand = MBX_REG_LOGIN64;
+ 		/* REG_LOGIN: no buffers */
+-		lpfc_printf_log(phba,
+-			       KERN_WARNING,
+-			       LOG_MBOX,
+-			       "%d:0302 REG_LOGIN: no buffers Data x%x x%x\n",
+-			       phba->brd_no,
+-			       (uint32_t) did, (uint32_t) flag);
++		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX,
++				"%d (%d):0302 REG_LOGIN: no buffers, DID x%x, "
++				"flag x%x\n",
++				phba->brd_no, vpi, did, flag);
+ 		return (1);
+ 	}
+ 	INIT_LIST_HEAD(&mp->list);
+@@ -389,7 +405,8 @@
+ /*                    mailbox command         */
+ /**********************************************/
+ void
+-lpfc_unreg_login(struct lpfc_hba * phba, uint32_t rpi, LPFC_MBOXQ_t * pmb)
++lpfc_unreg_login(struct lpfc_hba *phba, uint16_t vpi, uint32_t rpi,
++		 LPFC_MBOXQ_t * pmb)
+ {
+ 	MAILBOX_t *mb;
+ 
+@@ -398,12 +415,52 @@
+ 
+ 	mb->un.varUnregLogin.rpi = (uint16_t) rpi;
+ 	mb->un.varUnregLogin.rsvd1 = 0;
++	mb->un.varUnregLogin.vpi = vpi;
+ 
+ 	mb->mbxCommand = MBX_UNREG_LOGIN;
+ 	mb->mbxOwner = OWN_HOST;
+ 	return;
+ }
+ 
++/**************************************************/
++/*  lpfc_reg_vpi   Issue a REG_VPI                */
++/*                    mailbox command             */
++/**************************************************/
++void
++lpfc_reg_vpi(struct lpfc_hba *phba, uint16_t vpi, uint32_t sid,
++	     LPFC_MBOXQ_t *pmb)
++{
++	MAILBOX_t *mb = &pmb->mb;
++
++	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++	mb->un.varRegVpi.vpi = vpi;
++	mb->un.varRegVpi.sid = sid;
++
++	mb->mbxCommand = MBX_REG_VPI;
++	mb->mbxOwner = OWN_HOST;
++	return;
++
++}
++
++/**************************************************/
++/*  lpfc_unreg_vpi   Issue a UNREG_VNPI           */
++/*                    mailbox command             */
++/**************************************************/
++void
++lpfc_unreg_vpi(struct lpfc_hba *phba, uint16_t vpi, LPFC_MBOXQ_t *pmb)
++{
++	MAILBOX_t *mb = &pmb->mb;
++	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++
++	mb->un.varUnregVpi.vpi = vpi;
++
++	mb->mbxCommand = MBX_UNREG_VPI;
++	mb->mbxOwner = OWN_HOST;
++	return;
++
++}
++
+ static void
+ lpfc_config_pcb_setup(struct lpfc_hba * phba)
+ {
+@@ -412,14 +469,18 @@
+ 	PCB_t *pcbp = &phba->slim2p->pcb;
+ 	dma_addr_t pdma_addr;
+ 	uint32_t offset;
+-	uint32_t iocbCnt;
++	uint32_t iocbCnt = 0;
+ 	int i;
+ 
+ 	pcbp->maxRing = (psli->num_rings - 1);
+ 
+-	iocbCnt = 0;
+ 	for (i = 0; i < psli->num_rings; i++) {
+ 		pring = &psli->ring[i];
++
++		pring->sizeCiocb = phba->sli_rev == 3 ? SLI3_IOCB_CMD_SIZE:
++							SLI2_IOCB_CMD_SIZE;
++		pring->sizeRiocb = phba->sli_rev == 3 ? SLI3_IOCB_RSP_SIZE:
++							SLI2_IOCB_RSP_SIZE;
+ 		/* A ring MUST have both cmd and rsp entries defined to be
+ 		   valid */
+ 		if ((pring->numCiocb == 0) || (pring->numRiocb == 0)) {
+@@ -434,20 +495,18 @@
+ 			continue;
+ 		}
+ 		/* Command ring setup for ring */
+-		pring->cmdringaddr =
+-		    (void *)&phba->slim2p->IOCBs[iocbCnt];
++		pring->cmdringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt];
+ 		pcbp->rdsc[i].cmdEntries = pring->numCiocb;
+ 
+-		offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
+-			 (uint8_t *)phba->slim2p;
++		offset = (uint8_t *) &phba->slim2p->IOCBs[iocbCnt] -
++			 (uint8_t *) phba->slim2p;
+ 		pdma_addr = phba->slim2p_mapping + offset;
+ 		pcbp->rdsc[i].cmdAddrHigh = putPaddrHigh(pdma_addr);
+ 		pcbp->rdsc[i].cmdAddrLow = putPaddrLow(pdma_addr);
+ 		iocbCnt += pring->numCiocb;
+ 
+ 		/* Response ring setup for ring */
+-		pring->rspringaddr =
+-		    (void *)&phba->slim2p->IOCBs[iocbCnt];
++		pring->rspringaddr = (void *) &phba->slim2p->IOCBs[iocbCnt];
+ 
+ 		pcbp->rdsc[i].rspEntries = pring->numRiocb;
+ 		offset = (uint8_t *)&phba->slim2p->IOCBs[iocbCnt] -
+@@ -462,16 +521,108 @@
+ void
+ lpfc_read_rev(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
+ {
+-	MAILBOX_t *mb;
+-
+-	mb = &pmb->mb;
++	MAILBOX_t *mb = &pmb->mb;
+ 	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
+ 	mb->un.varRdRev.cv = 1;
++	mb->un.varRdRev.v3req = 1; /* Request SLI3 info */
+ 	mb->mbxCommand = MBX_READ_REV;
+ 	mb->mbxOwner = OWN_HOST;
+ 	return;
+ }
+ 
++static void
++lpfc_build_hbq_profile2(struct config_hbq_var *hbqmb,
++			struct lpfc_hbq_init  *hbq_desc)
++{
++	hbqmb->profiles.profile2.seqlenbcnt = hbq_desc->seqlenbcnt;
++	hbqmb->profiles.profile2.maxlen     = hbq_desc->maxlen;
++	hbqmb->profiles.profile2.seqlenoff  = hbq_desc->seqlenoff;
++}
++
++static void
++lpfc_build_hbq_profile3(struct config_hbq_var *hbqmb,
++			struct lpfc_hbq_init  *hbq_desc)
++{
++	hbqmb->profiles.profile3.seqlenbcnt = hbq_desc->seqlenbcnt;
++	hbqmb->profiles.profile3.maxlen     = hbq_desc->maxlen;
++	hbqmb->profiles.profile3.cmdcodeoff = hbq_desc->cmdcodeoff;
++	hbqmb->profiles.profile3.seqlenoff  = hbq_desc->seqlenoff;
++	memcpy(&hbqmb->profiles.profile3.cmdmatch, hbq_desc->cmdmatch,
++	       sizeof(hbqmb->profiles.profile3.cmdmatch));
++}
++
++static void
++lpfc_build_hbq_profile5(struct config_hbq_var *hbqmb,
++			struct lpfc_hbq_init  *hbq_desc)
++{
++	hbqmb->profiles.profile5.seqlenbcnt = hbq_desc->seqlenbcnt;
++	hbqmb->profiles.profile5.maxlen     = hbq_desc->maxlen;
++	hbqmb->profiles.profile5.cmdcodeoff = hbq_desc->cmdcodeoff;
++	hbqmb->profiles.profile5.seqlenoff  = hbq_desc->seqlenoff;
++	memcpy(&hbqmb->profiles.profile5.cmdmatch, hbq_desc->cmdmatch,
++	       sizeof(hbqmb->profiles.profile5.cmdmatch));
++}
++
++void
++lpfc_config_hbq(struct lpfc_hba *phba, struct lpfc_hbq_init *hbq_desc,
++		uint32_t hbq_entry_index, LPFC_MBOXQ_t *pmb)
++{
++	int i;
++	MAILBOX_t *mb = &pmb->mb;
++	struct config_hbq_var *hbqmb = &mb->un.varCfgHbq;
++
++	memset(pmb, 0, sizeof (LPFC_MBOXQ_t));
++	hbqmb->entry_count = hbq_desc->entry_count;   /* # entries in HBQ */
++	hbqmb->recvNotify = hbq_desc->rn;             /* Receive
++						       * Notification */
++	hbqmb->numMask    = hbq_desc->mask_count;     /* # R_CTL/TYPE masks
++						       * # in words 0-19 */
++	hbqmb->profile    = hbq_desc->profile;	      /* Selection profile:
++						       * 0 = all,
++						       * 7 = logentry */
++	hbqmb->ringMask   = hbq_desc->ring_mask;      /* Binds HBQ to a ring
++						       * e.g. Ring0=b0001,
++						       * ring2=b0100 */
++	hbqmb->headerLen  = hbq_desc->headerLen;      /* 0 if not profile 4
++						       * or 5 */
++	hbqmb->logEntry   = hbq_desc->logEntry;       /* Set to 1 if this
++						       * HBQ will be used
++						       * for LogEntry
++						       * buffers */
++	hbqmb->hbqaddrLow = putPaddrLow(phba->hbqslimp.phys) +
++		hbq_entry_index * sizeof(struct lpfc_hbq_entry);
++	hbqmb->hbqaddrHigh = putPaddrHigh(phba->hbqslimp.phys);
++
++	mb->mbxCommand = MBX_CONFIG_HBQ;
++	mb->mbxOwner = OWN_HOST;
++
++				/* Copy info for profiles 2,3,5. Other
++				 * profiles this area is reserved
++				 */
++	if (hbq_desc->profile == 2)
++		lpfc_build_hbq_profile2(hbqmb, hbq_desc);
++	else if (hbq_desc->profile == 3)
++		lpfc_build_hbq_profile3(hbqmb, hbq_desc);
++	else if (hbq_desc->profile == 5)
++		lpfc_build_hbq_profile5(hbqmb, hbq_desc);
++
++	/* Return if no rctl / type masks for this HBQ */
++	if (!hbq_desc->mask_count)
++		return;
++
++	/* Otherwise we setup specific rctl / type masks for this HBQ */
++	for (i = 0; i < hbq_desc->mask_count; i++) {
++		hbqmb->hbqMasks[i].tmatch = hbq_desc->hbqMasks[i].tmatch;
++		hbqmb->hbqMasks[i].tmask  = hbq_desc->hbqMasks[i].tmask;
++		hbqmb->hbqMasks[i].rctlmatch = hbq_desc->hbqMasks[i].rctlmatch;
++		hbqmb->hbqMasks[i].rctlmask  = hbq_desc->hbqMasks[i].rctlmask;
++	}
++
++	return;
++}
++
++
++
+ void
+ lpfc_config_ring(struct lpfc_hba * phba, int ring, LPFC_MBOXQ_t * pmb)
+ {
+@@ -514,15 +665,16 @@
+ }
+ 
+ void
+-lpfc_config_port(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_config_port(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
++	MAILBOX_t __iomem *mb_slim = (MAILBOX_t __iomem *) phba->MBslimaddr;
+ 	MAILBOX_t *mb = &pmb->mb;
+ 	dma_addr_t pdma_addr;
+ 	uint32_t bar_low, bar_high;
+ 	size_t offset;
+ 	struct lpfc_hgp hgp;
+-	void __iomem *to_slim;
+ 	int i;
++	uint32_t pgp_offset;
+ 
+ 	memset(pmb, 0, sizeof(LPFC_MBOXQ_t));
+ 	mb->mbxCommand = MBX_CONFIG_PORT;
+@@ -535,12 +687,29 @@
+ 	mb->un.varCfgPort.pcbLow = putPaddrLow(pdma_addr);
+ 	mb->un.varCfgPort.pcbHigh = putPaddrHigh(pdma_addr);
+ 
++	/* If HBA supports SLI=3 ask for it */
++
++	if (phba->sli_rev == 3 && phba->vpd.sli3Feat.cerbm) {
++		mb->un.varCfgPort.cerbm = 1; /* Request HBQs */
++		mb->un.varCfgPort.max_hbq = 1; /* Requesting 2 HBQs */
++		if (phba->max_vpi && phba->cfg_npiv_enable &&
++		    phba->vpd.sli3Feat.cmv) {
++			mb->un.varCfgPort.max_vpi = phba->max_vpi;
++			mb->un.varCfgPort.cmv = 1;
++			phba->sli3_options |= LPFC_SLI3_NPIV_ENABLED;
++		} else
++			mb->un.varCfgPort.max_vpi = phba->max_vpi = 0;
++	} else
++		phba->sli_rev = 2;
++	mb->un.varCfgPort.sli_mode = phba->sli_rev;
++
+ 	/* Now setup pcb */
+ 	phba->slim2p->pcb.type = TYPE_NATIVE_SLI2;
+ 	phba->slim2p->pcb.feature = FEATURE_INITIAL_SLI2;
+ 
+ 	/* Setup Mailbox pointers */
+-	phba->slim2p->pcb.mailBoxSize = sizeof(MAILBOX_t);
++	phba->slim2p->pcb.mailBoxSize = offsetof(MAILBOX_t, us) +
++		sizeof(struct sli2_desc);
+ 	offset = (uint8_t *)&phba->slim2p->mbx - (uint8_t *)phba->slim2p;
+ 	pdma_addr = phba->slim2p_mapping + offset;
+ 	phba->slim2p->pcb.mbAddrHigh = putPaddrHigh(pdma_addr);
+@@ -568,29 +737,70 @@
+ 	pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_0, &bar_low);
+ 	pci_read_config_dword(phba->pcidev, PCI_BASE_ADDRESS_1, &bar_high);
+ 
++	/*
++	 * Set up HGP - Port Memory
++	 *
++	 * The port expects the host get/put pointers to reside in memory
++	 * following the "non-diagnostic" mode mailbox (32 words, 0x80 bytes)
++	 * area of SLIM.  In SLI-2 mode, there's an additional 16 reserved
++	 * words (0x40 bytes).  This area is not reserved if HBQs are
++	 * configured in SLI-3.
++	 *
++	 * CR0Put    - SLI2(no HBQs) = 0xc0, With HBQs = 0x80
++	 * RR0Get                      0xc4              0x84
++	 * CR1Put                      0xc8              0x88
++	 * RR1Get                      0xcc              0x8c
++	 * CR2Put                      0xd0              0x90
++	 * RR2Get                      0xd4              0x94
++	 * CR3Put                      0xd8              0x98
++	 * RR3Get                      0xdc              0x9c
++	 *
++	 * Reserved                    0xa0-0xbf
++	 *    If HBQs configured:
++	 *                         HBQ 0 Put ptr  0xc0
++	 *                         HBQ 1 Put ptr  0xc4
++	 *                         HBQ 2 Put ptr  0xc8
++	 *                         ......
++	 *                         HBQ(M-1)Put Pointer 0xc0+(M-1)*4
++	 *
++	 */
++
++	if (phba->sli_rev == 3) {
++		phba->host_gp = &mb_slim->us.s3.host[0];
++		phba->hbq_put = &mb_slim->us.s3.hbq_put[0];
++	} else {
++		phba->host_gp = &mb_slim->us.s2.host[0];
++		phba->hbq_put = NULL;
++	}
+ 
+ 	/* mask off BAR0's flag bits 0 - 3 */
+ 	phba->slim2p->pcb.hgpAddrLow = (bar_low & PCI_BASE_ADDRESS_MEM_MASK) +
+-					(SLIMOFF*sizeof(uint32_t));
++		(void __iomem *) phba->host_gp -
++		(void __iomem *)phba->MBslimaddr;
+ 	if (bar_low & PCI_BASE_ADDRESS_MEM_TYPE_64)
+ 		phba->slim2p->pcb.hgpAddrHigh = bar_high;
+ 	else
+ 		phba->slim2p->pcb.hgpAddrHigh = 0;
+ 	/* write HGP data to SLIM at the required longword offset */
+ 	memset(&hgp, 0, sizeof(struct lpfc_hgp));
+-	to_slim = phba->MBslimaddr + (SLIMOFF*sizeof (uint32_t));
+ 
+ 	for (i=0; i < phba->sli.num_rings; i++) {
+-		lpfc_memcpy_to_slim(to_slim, &hgp, sizeof(struct lpfc_hgp));
+-		to_slim += sizeof (struct lpfc_hgp);
++		lpfc_memcpy_to_slim(phba->host_gp + i, &hgp,
++				    sizeof(*phba->host_gp));
+ 	}
+ 
+ 	/* Setup Port Group ring pointer */
+-	offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port -
++	if (phba->sli_rev == 3)
++		pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s3_pgp.port -
+ 		 (uint8_t *)phba->slim2p;
+-	pdma_addr = phba->slim2p_mapping + offset;
++	else
++		pgp_offset = (uint8_t *)&phba->slim2p->mbx.us.s2.port -
++			(uint8_t *)phba->slim2p;
++
++	pdma_addr = phba->slim2p_mapping + pgp_offset;
+ 	phba->slim2p->pcb.pgpAddrHigh = putPaddrHigh(pdma_addr);
+ 	phba->slim2p->pcb.pgpAddrLow = putPaddrLow(pdma_addr);
++	phba->hbq_get = &phba->slim2p->mbx.us.s3_pgp.hbq_get[0];
+ 
+ 	/* Use callback routine to setp rings in the pcb */
+ 	lpfc_config_pcb_setup(phba);
+@@ -606,11 +816,7 @@
+ 
+ 	/* Swap PCB if needed */
+ 	lpfc_sli_pcimem_bcopy(&phba->slim2p->pcb, &phba->slim2p->pcb,
+-								sizeof (PCB_t));
+-
+-	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
+-		        "%d:0405 Service Level Interface (SLI) 2 selected\n",
+-		        phba->brd_no);
++			      sizeof(PCB_t));
+ }
+ 
+ void
+@@ -644,15 +850,23 @@
+ 	LPFC_MBOXQ_t *mbq = NULL;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 
+-	list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t,
+-			 list);
+-	if (mbq) {
++	list_remove_head((&psli->mboxq), mbq, LPFC_MBOXQ_t, list);
++	if (mbq)
+ 		psli->mboxq_cnt--;
+-	}
+ 
+ 	return mbq;
+ }
+ 
++void
++lpfc_mbox_cmpl_put(struct lpfc_hba * phba, LPFC_MBOXQ_t * mbq)
++{
++	/* This function expects to be called from interupt context */
++	spin_lock(&phba->hbalock);
++	list_add_tail(&mbq->list, &phba->sli.mboxq_cmpl);
++	spin_unlock(&phba->hbalock);
++	return;
++}
++
+ int
+ lpfc_mbox_tmo_val(struct lpfc_hba *phba, int cmd)
+ {
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_mem.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_mem.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_mem.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,7 +1,7 @@
+ /*******************************************************************
+  * This file is part of the Emulex Linux Device Driver for         *
+  * Fibre Channel Host Bus Adapters.                                *
+- * Copyright (C) 2004-2005 Emulex.  All rights reserved.           *
++ * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
+  * EMULEX and SLI are trademarks of Emulex.                        *
+  * www.emulex.com                                                  *
+  * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
+@@ -38,10 +38,13 @@
+ #define LPFC_MBUF_POOL_SIZE     64      /* max elements in MBUF safety pool */
+ #define LPFC_MEM_POOL_SIZE      64      /* max elem in non-DMA safety pool */
+ 
++
++
+ int
+ lpfc_mem_alloc(struct lpfc_hba * phba)
+ {
+ 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++	int longs;
+ 	int i;
+ 
+ 	phba->lpfc_scsi_dma_buf_pool = pci_pool_create("lpfc_scsi_dma_buf_pool",
+@@ -80,10 +83,27 @@
+ 	if (!phba->nlp_mem_pool)
+ 		goto fail_free_mbox_pool;
+ 
++	phba->lpfc_hbq_pool = pci_pool_create("lpfc_hbq_pool",phba->pcidev,
++					      LPFC_BPL_SIZE, 8, 0);
++	if (!phba->lpfc_hbq_pool)
++		goto fail_free_nlp_mem_pool;
++
++	/* vpi zero is reserved for the physical port so add 1 to max */
++	longs = ((phba->max_vpi + 1) + BITS_PER_LONG - 1) / BITS_PER_LONG;
++	phba->vpi_bmask = kzalloc(longs * sizeof(unsigned long), GFP_KERNEL);
++	if (!phba->vpi_bmask)
++		goto fail_free_hbq_pool;
++
+ 	return 0;
+ 
++ fail_free_hbq_pool:
++	lpfc_sli_hbqbuf_free_all(phba);
++ fail_free_nlp_mem_pool:
++	mempool_destroy(phba->nlp_mem_pool);
++	phba->nlp_mem_pool = NULL;
+  fail_free_mbox_pool:
+ 	mempool_destroy(phba->mbox_mem_pool);
++	phba->mbox_mem_pool = NULL;
+  fail_free_mbuf_pool:
+ 	while (i--)
+ 		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+@@ -91,8 +111,10 @@
+ 	kfree(pool->elements);
+  fail_free_lpfc_mbuf_pool:
+ 	pci_pool_destroy(phba->lpfc_mbuf_pool);
++	phba->lpfc_mbuf_pool = NULL;
+  fail_free_dma_buf_pool:
+ 	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
++	phba->lpfc_scsi_dma_buf_pool = NULL;
+  fail:
+ 	return -ENOMEM;
+ }
+@@ -106,6 +128,9 @@
+ 	struct lpfc_dmabuf   *mp;
+ 	int i;
+ 
++	kfree(phba->vpi_bmask);
++	lpfc_sli_hbqbuf_free_all(phba);
++
+ 	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq, list) {
+ 		mp = (struct lpfc_dmabuf *) (mbox->context1);
+ 		if (mp) {
+@@ -115,6 +140,15 @@
+ 		list_del(&mbox->list);
+ 		mempool_free(mbox, phba->mbox_mem_pool);
+ 	}
++	list_for_each_entry_safe(mbox, next_mbox, &psli->mboxq_cmpl, list) {
++		mp = (struct lpfc_dmabuf *) (mbox->context1);
++		if (mp) {
++			lpfc_mbuf_free(phba, mp->virt, mp->phys);
++			kfree(mp);
++		}
++		list_del(&mbox->list);
++		mempool_free(mbox, phba->mbox_mem_pool);
++	}
+ 
+ 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+ 	if (psli->mbox_active) {
+@@ -132,12 +166,20 @@
+ 		pci_pool_free(phba->lpfc_mbuf_pool, pool->elements[i].virt,
+ 						 pool->elements[i].phys);
+ 	kfree(pool->elements);
++
++	pci_pool_destroy(phba->lpfc_hbq_pool);
+ 	mempool_destroy(phba->nlp_mem_pool);
+ 	mempool_destroy(phba->mbox_mem_pool);
+ 
+ 	pci_pool_destroy(phba->lpfc_scsi_dma_buf_pool);
+ 	pci_pool_destroy(phba->lpfc_mbuf_pool);
+ 
++	phba->lpfc_hbq_pool = NULL;
++	phba->nlp_mem_pool = NULL;
++	phba->mbox_mem_pool = NULL;
++	phba->lpfc_scsi_dma_buf_pool = NULL;
++	phba->lpfc_mbuf_pool = NULL;
++
+ 	/* Free the iocb lookup array */
+ 	kfree(psli->iocbq_lookup);
+ 	psli->iocbq_lookup = NULL;
+@@ -148,20 +190,23 @@
+ lpfc_mbuf_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
+ {
+ 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
++	unsigned long iflags;
+ 	void *ret;
+ 
+ 	ret = pci_pool_alloc(phba->lpfc_mbuf_pool, GFP_KERNEL, handle);
+ 
+-	if (!ret && ( mem_flags & MEM_PRI) && pool->current_count) {
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	if (!ret && (mem_flags & MEM_PRI) && pool->current_count) {
+ 		pool->current_count--;
+ 		ret = pool->elements[pool->current_count].virt;
+ 		*handle = pool->elements[pool->current_count].phys;
+ 	}
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
+ 	return ret;
+ }
+ 
+ void
+-lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
++__lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
+ {
+ 	struct lpfc_dma_pool *pool = &phba->lpfc_mbuf_safety_pool;
+ 
+@@ -174,3 +219,51 @@
+ 	}
+ 	return;
+ }
++
++void
++lpfc_mbuf_free(struct lpfc_hba * phba, void *virt, dma_addr_t dma)
++{
++	unsigned long iflags;
++
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	__lpfc_mbuf_free(phba, virt, dma);
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++	return;
++}
++
++void *
++lpfc_hbq_alloc(struct lpfc_hba *phba, int mem_flags, dma_addr_t *handle)
++{
++	void *ret;
++	ret = pci_pool_alloc(phba->lpfc_hbq_pool, GFP_ATOMIC, handle);
++	return ret;
++}
++
++void
++lpfc_hbq_free(struct lpfc_hba *phba, void *virt, dma_addr_t dma)
++{
++	pci_pool_free(phba->lpfc_hbq_pool, virt, dma);
++	return;
++}
++
++void
++lpfc_in_buf_free(struct lpfc_hba *phba, struct lpfc_dmabuf *mp)
++{
++	struct hbq_dmabuf *hbq_entry;
++
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++		hbq_entry = container_of(mp, struct hbq_dmabuf, dbuf);
++		if (hbq_entry->tag == -1) {
++			lpfc_hbq_free(phba, hbq_entry->dbuf.virt,
++				      hbq_entry->dbuf.phys);
++			kfree(hbq_entry);
++		} else {
++			lpfc_sli_free_hbq(phba, hbq_entry);
++		}
++	} else {
++		lpfc_mbuf_free(phba, mp->virt, mp->phys);
++		kfree(mp);
++	}
++	return;
++}
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_nportdisc.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_nportdisc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_nportdisc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,4 +1,4 @@
+-/*******************************************************************
++ /*******************************************************************
+  * This file is part of the Emulex Linux Device Driver for         *
+  * Fibre Channel Host Bus Adapters.                                *
+  * Copyright (C) 2004-2007 Emulex.  All rights reserved.           *
+@@ -35,20 +35,22 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
++#include "lpfc_debugfs.h"
+ 
+ 
+ /* Called to verify a rcv'ed ADISC was intended for us. */
+ static int
+-lpfc_check_adisc(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
+-		 struct lpfc_name * nn, struct lpfc_name * pn)
++lpfc_check_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		 struct lpfc_name *nn, struct lpfc_name *pn)
+ {
+ 	/* Compare the ADISC rsp WWNN / WWPN matches our internal node
+ 	 * table entry for that node.
+ 	 */
+-	if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)) != 0)
++	if (memcmp(nn, &ndlp->nlp_nodename, sizeof (struct lpfc_name)))
+ 		return 0;
+ 
+-	if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)) != 0)
++	if (memcmp(pn, &ndlp->nlp_portname, sizeof (struct lpfc_name)))
+ 		return 0;
+ 
+ 	/* we match, return success */
+@@ -56,11 +58,10 @@
+ }
+ 
+ int
+-lpfc_check_sparm(struct lpfc_hba * phba,
+-		 struct lpfc_nodelist * ndlp, struct serv_parm * sp,
+-		 uint32_t class)
++lpfc_check_sparm(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		 struct serv_parm * sp, uint32_t class)
+ {
+-	volatile struct serv_parm *hsp = &phba->fc_sparam;
++	volatile struct serv_parm *hsp = &vport->fc_sparam;
+ 	uint16_t hsp_value, ssp_value = 0;
+ 
+ 	/*
+@@ -75,12 +76,14 @@
+ 				hsp->cls1.rcvDataSizeLsb;
+ 		ssp_value = (sp->cls1.rcvDataSizeMsb << 8) |
+ 				sp->cls1.rcvDataSizeLsb;
++		if (!ssp_value)
++			goto bad_service_param;
+ 		if (ssp_value > hsp_value) {
+ 			sp->cls1.rcvDataSizeLsb = hsp->cls1.rcvDataSizeLsb;
+ 			sp->cls1.rcvDataSizeMsb = hsp->cls1.rcvDataSizeMsb;
+ 		}
+ 	} else if (class == CLASS1) {
+-		return 0;
++		goto bad_service_param;
+ 	}
+ 
+ 	if (sp->cls2.classValid) {
+@@ -88,12 +91,14 @@
+ 				hsp->cls2.rcvDataSizeLsb;
+ 		ssp_value = (sp->cls2.rcvDataSizeMsb << 8) |
+ 				sp->cls2.rcvDataSizeLsb;
++		if (!ssp_value)
++			goto bad_service_param;
+ 		if (ssp_value > hsp_value) {
+ 			sp->cls2.rcvDataSizeLsb = hsp->cls2.rcvDataSizeLsb;
+ 			sp->cls2.rcvDataSizeMsb = hsp->cls2.rcvDataSizeMsb;
+ 		}
+ 	} else if (class == CLASS2) {
+-		return 0;
++		goto bad_service_param;
+ 	}
+ 
+ 	if (sp->cls3.classValid) {
+@@ -101,12 +106,14 @@
+ 				hsp->cls3.rcvDataSizeLsb;
+ 		ssp_value = (sp->cls3.rcvDataSizeMsb << 8) |
+ 				sp->cls3.rcvDataSizeLsb;
++		if (!ssp_value)
++			goto bad_service_param;
+ 		if (ssp_value > hsp_value) {
+ 			sp->cls3.rcvDataSizeLsb = hsp->cls3.rcvDataSizeLsb;
+ 			sp->cls3.rcvDataSizeMsb = hsp->cls3.rcvDataSizeMsb;
+ 		}
+ 	} else if (class == CLASS3) {
+-		return 0;
++		goto bad_service_param;
+ 	}
+ 
+ 	/*
+@@ -125,11 +132,21 @@
+ 	memcpy(&ndlp->nlp_nodename, &sp->nodeName, sizeof (struct lpfc_name));
+ 	memcpy(&ndlp->nlp_portname, &sp->portName, sizeof (struct lpfc_name));
+ 	return 1;
++bad_service_param:
++	lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY,
++			"%d (%d):0207 Device %x "
++			"(%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x) sent "
++			"invalid service parameters.  Ignoring device.\n",
++			vport->phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID,
++			sp->nodeName.u.wwn[0], sp->nodeName.u.wwn[1],
++			sp->nodeName.u.wwn[2], sp->nodeName.u.wwn[3],
++			sp->nodeName.u.wwn[4], sp->nodeName.u.wwn[5],
++			sp->nodeName.u.wwn[6], sp->nodeName.u.wwn[7]);
++	return 0;
+ }
+ 
+ static void *
+-lpfc_check_elscmpl_iocb(struct lpfc_hba * phba,
+-		      struct lpfc_iocbq *cmdiocb,
++lpfc_check_elscmpl_iocb(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
+ 		      struct lpfc_iocbq *rspiocb)
+ {
+ 	struct lpfc_dmabuf *pcmd, *prsp;
+@@ -168,32 +185,29 @@
+  * routine effectively results in a "software abort".
+  */
+ int
+-lpfc_els_abort(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp)
++lpfc_els_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+ {
+ 	LIST_HEAD(completions);
+-	struct lpfc_sli *psli;
+-	struct lpfc_sli_ring *pring;
++	struct lpfc_sli  *psli = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_ELS_RING];
+ 	struct lpfc_iocbq *iocb, *next_iocb;
+ 	IOCB_t *cmd;
+ 
+ 	/* Abort outstanding I/O on NPort <nlp_DID> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
+-			"%d:0205 Abort outstanding I/O on NPort x%x "
++			"%d (%d):0205 Abort outstanding I/O on NPort x%x "
+ 			"Data: x%x x%x x%x\n",
+-			phba->brd_no, ndlp->nlp_DID, ndlp->nlp_flag,
+-			ndlp->nlp_state, ndlp->nlp_rpi);
++			phba->brd_no, ndlp->vport->vpi, ndlp->nlp_DID,
++			ndlp->nlp_flag, ndlp->nlp_state, ndlp->nlp_rpi);
+ 
+-	psli = &phba->sli;
+-	pring = &psli->ring[LPFC_ELS_RING];
++	lpfc_fabric_abort_nport(ndlp);
+ 
+ 	/* First check the txq */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
+-		/* Check to see if iocb matches the nport we are looking
+-		   for */
++		/* Check to see if iocb matches the nport we are looking for */
+ 		if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) {
+-			/* It matches, so deque and call compl with an
+-			   error */
++			/* It matches, so deque and call compl with anp error */
+ 			list_move_tail(&iocb->list, &completions);
+ 			pring->txq_cnt--;
+ 		}
+@@ -201,37 +215,39 @@
+ 
+ 	/* Next check the txcmplq */
+ 	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list) {
+-		/* Check to see if iocb matches the nport we are looking
+-		   for */
+-		if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp))
++		/* Check to see if iocb matches the nport we are looking for */
++		if (lpfc_check_sli_ndlp(phba, pring, iocb, ndlp)) {
+ 			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	}
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	while (!list_empty(&completions)) {
+ 		iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ 		cmd = &iocb->iocb;
+-		list_del(&iocb->list);
++		list_del_init(&iocb->list);
+ 
+-		if (iocb->iocb_cmpl) {
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
+ 			cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ 			(iocb->iocb_cmpl) (phba, iocb, iocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, iocb);
++		}
+ 	}
+ 
+ 	/* If we are delaying issuing an ELS command, cancel it */
+ 	if (ndlp->nlp_flag & NLP_DELAY_TMO)
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(phba->pport, ndlp);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_rcv_plogi(struct lpfc_hba * phba,
+-		      struct lpfc_nodelist * ndlp,
++lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		      struct lpfc_iocbq *cmdiocb)
+ {
++	struct Scsi_Host   *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba    *phba = vport->phba;
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp;
+ 	IOCB_t *icmd;
+@@ -241,14 +257,14 @@
+ 	int rc;
+ 
+ 	memset(&stat, 0, sizeof (struct ls_rjt));
+-	if (phba->hba_state <= LPFC_FLOGI) {
++	if (vport->port_state <= LPFC_FLOGI) {
+ 		/* Before responding to PLOGI, check for pt2pt mode.
+ 		 * If we are pt2pt, with an outstanding FLOGI, abort
+ 		 * the FLOGI and resend it first.
+ 		 */
+-		if (phba->fc_flag & FC_PT2PT) {
++		if (vport->fc_flag & FC_PT2PT) {
+ 			lpfc_els_abort_flogi(phba);
+-		        if (!(phba->fc_flag & FC_PT2PT_PLOGI)) {
++		        if (!(vport->fc_flag & FC_PT2PT_PLOGI)) {
+ 				/* If the other side is supposed to initiate
+ 				 * the PLOGI anyway, just ACC it now and
+ 				 * move on with discovery.
+@@ -257,45 +273,42 @@
+ 				phba->fc_ratov = FF_DEF_RATOV;
+ 				/* Start discovery - this should just do
+ 				   CLEAR_LA */
+-				lpfc_disc_start(phba);
+-			} else {
+-				lpfc_initial_flogi(phba);
+-			}
++				lpfc_disc_start(vport);
++			} else
++				lpfc_initial_flogi(vport);
+ 		} else {
+ 			stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY;
+ 			stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+-			lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb,
+-					    ndlp);
++			lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
++					    ndlp, NULL);
+ 			return 0;
+ 		}
+ 	}
+ 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 	lp = (uint32_t *) pcmd->virt;
+ 	sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+-	if ((lpfc_check_sparm(phba, ndlp, sp, CLASS3) == 0)) {
++	if ((lpfc_check_sparm(vport, ndlp, sp, CLASS3) == 0)) {
+ 		/* Reject this request because invalid parameters */
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 		return 0;
+ 	}
+ 	icmd = &cmdiocb->iocb;
+ 
+ 	/* PLOGI chkparm OK */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_ELS,
+-			"%d:0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n",
+-			phba->brd_no,
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0114 PLOGI chkparm OK Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi,
+ 			ndlp->nlp_DID, ndlp->nlp_state, ndlp->nlp_flag,
+ 			ndlp->nlp_rpi);
+ 
+-	if ((phba->cfg_fcp_class == 2) &&
+-	    (sp->cls2.classValid)) {
++	if (phba->cfg_fcp_class == 2 && sp->cls2.classValid)
+ 		ndlp->nlp_fcp_info |= CLASS2;
+-	} else {
++	else
+ 		ndlp->nlp_fcp_info |= CLASS3;
+-	}
++
+ 	ndlp->nlp_class_sup = 0;
+ 	if (sp->cls1.classValid)
+ 		ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -317,35 +330,37 @@
+ 	case  NLP_STE_PRLI_ISSUE:
+ 	case  NLP_STE_UNMAPPED_NODE:
+ 	case  NLP_STE_MAPPED_NODE:
+-		lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0);
++		lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL, 0);
+ 		return 1;
+ 	}
+ 
+-	if ((phba->fc_flag & FC_PT2PT)
+-	    && !(phba->fc_flag & FC_PT2PT_PLOGI)) {
++	if ((vport->fc_flag & FC_PT2PT) &&
++	    !(vport->fc_flag & FC_PT2PT_PLOGI)) {
+ 		/* rcv'ed PLOGI decides what our NPortId will be */
+-		phba->fc_myDID = icmd->un.rcvels.parmRo;
++		vport->fc_myDID = icmd->un.rcvels.parmRo;
+ 		mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+ 		if (mbox == NULL)
+ 			goto out;
+ 		lpfc_config_link(phba, mbox);
+ 		mbox->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		mbox->vport = vport;
+ 		rc = lpfc_sli_issue_mbox
+ 			(phba, mbox, (MBX_NOWAIT | MBX_STOP_IOCB));
+ 		if (rc == MBX_NOT_FINISHED) {
+-			mempool_free( mbox, phba->mbox_mem_pool);
++			mempool_free(mbox, phba->mbox_mem_pool);
+ 			goto out;
+ 		}
+ 
+-		lpfc_can_disctmo(phba);
++		lpfc_can_disctmo(vport);
+ 	}
+ 	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-	if (mbox == NULL)
++	if (!mbox)
+ 		goto out;
+ 
+-	if (lpfc_reg_login(phba, icmd->un.rcvels.remoteID,
+-			   (uint8_t *) sp, mbox, 0)) {
+-		mempool_free( mbox, phba->mbox_mem_pool);
++	rc = lpfc_reg_login(phba, vport->vpi, icmd->un.rcvels.remoteID,
++			    (uint8_t *) sp, mbox, 0);
++	if (rc) {
++		mempool_free(mbox, phba->mbox_mem_pool);
+ 		goto out;
+ 	}
+ 
+@@ -357,7 +372,10 @@
+ 	 * mbox->context2 = lpfc_nlp_get(ndlp) deferred until mailbox
+ 	 * command issued in lpfc_cmpl_els_acc().
+ 	 */
++	mbox->vport = vport;
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= (NLP_ACC_REGLOGIN | NLP_RCV_PLOGI);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	/*
+ 	 * If there is an outstanding PLOGI issued, abort it before
+@@ -373,21 +391,38 @@
+ 		lpfc_els_abort(phba, ndlp);
+ 	}
+ 
+-	lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
++	if ((vport->port_type == LPFC_NPIV_PORT &&
++	      phba->cfg_vport_restrict_login)) {
++
++		/* In order to preserve RPIs, we want to cleanup
++		 * the default RPI the firmware created to rcv
++		 * this ELS request. The only way to do this is
++		 * to register, then unregister the RPI.
++		 */
++		spin_lock_irq(shost->host_lock);
++		ndlp->nlp_flag |= NLP_RM_DFLT_RPI;
++		spin_unlock_irq(shost->host_lock);
++		stat.un.b.lsRjtRsnCode = LSRJT_INVALID_CMD;
++		stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb,
++			ndlp, mbox);
++		return 1;
++	}
++	lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, mbox, 0);
+ 	return 1;
+ 
+ out:
+ 	stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 	stat.un.b.lsRjtRsnCodeExp = LSEXP_OUT_OF_RESOURCE;
+-	lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_rcv_padisc(struct lpfc_hba * phba,
+-		struct lpfc_nodelist * ndlp,
++lpfc_rcv_padisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		struct lpfc_iocbq *cmdiocb)
+ {
++	struct Scsi_Host   *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_dmabuf *pcmd;
+ 	struct serv_parm *sp;
+ 	struct lpfc_name *pnn, *ppn;
+@@ -412,12 +447,11 @@
+ 	}
+ 
+ 	icmd = &cmdiocb->iocb;
+-	if ((icmd->ulpStatus == 0) &&
+-	    (lpfc_check_adisc(phba, ndlp, pnn, ppn))) {
++	if (icmd->ulpStatus == 0 && lpfc_check_adisc(vport, ndlp, pnn, ppn)) {
+ 		if (cmd == ELS_CMD_ADISC) {
+-			lpfc_els_rsp_adisc_acc(phba, cmdiocb, ndlp);
++			lpfc_els_rsp_adisc_acc(vport, cmdiocb, ndlp);
+ 		} else {
+-			lpfc_els_rsp_acc(phba, ELS_CMD_PLOGI, cmdiocb, ndlp,
++			lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp,
+ 				NULL, 0);
+ 		}
+ 		return 1;
+@@ -427,55 +461,57 @@
+ 	stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 	stat.un.b.lsRjtRsnCodeExp = LSEXP_SPARM_OPTIONS;
+ 	stat.un.b.vendorUnique = 0;
+-	lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ 
+ 	/* 1 sec timeout */
+ 	mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_DELAY_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 	ndlp->nlp_prev_state = ndlp->nlp_state;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 	return 0;
+ }
+ 
+ static int
+-lpfc_rcv_logo(struct lpfc_hba * phba,
+-		      struct lpfc_nodelist * ndlp,
+-		      struct lpfc_iocbq *cmdiocb,
+-		      uint32_t els_cmd)
++lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++	      struct lpfc_iocbq *cmdiocb, uint32_t els_cmd)
+ {
+-	/* Put ndlp on NPR list with 1 sec timeout for plogi, ACC logo */
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	/* Put ndlp in NPR state with 1 sec timeout for plogi, ACC logo */
+ 	/* Only call LOGO ACC for first LOGO, this avoids sending unnecessary
+ 	 * PLOGIs during LOGO storms from a device.
+ 	 */
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_LOGO_ACC;
++	spin_unlock_irq(shost->host_lock);
+ 	if (els_cmd == ELS_CMD_PRLO)
+-		lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++		lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ 	else
+-		lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+ 
+ 	if (!(ndlp->nlp_type & NLP_FABRIC) ||
+ 		(ndlp->nlp_state == NLP_STE_ADISC_ISSUE)) {
+ 		/* Only try to re-login if this is NOT a Fabric Node */
+ 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_DELAY_TMO;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 
+ 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 		ndlp->nlp_prev_state = ndlp->nlp_state;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 	} else {
+ 		ndlp->nlp_prev_state = ndlp->nlp_state;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 	}
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	/* The driver has to wait until the ACC completes before it continues
+ 	 * processing the LOGO.  The action will resume in
+ 	 * lpfc_cmpl_els_logo_acc routine. Since part of processing includes an
+@@ -485,8 +521,7 @@
+ }
+ 
+ static void
+-lpfc_rcv_prli(struct lpfc_hba * phba,
+-		      struct lpfc_nodelist * ndlp,
++lpfc_rcv_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 		      struct lpfc_iocbq *cmdiocb)
+ {
+ 	struct lpfc_dmabuf *pcmd;
+@@ -501,8 +536,7 @@
+ 
+ 	ndlp->nlp_type &= ~(NLP_FCP_TARGET | NLP_FCP_INITIATOR);
+ 	ndlp->nlp_fcp_info &= ~NLP_FCP_2_DEVICE;
+-	if ((npr->acceptRspCode == PRLI_REQ_EXECUTED) &&
+-	    (npr->prliType == PRLI_FCP_TYPE)) {
++	if (npr->prliType == PRLI_FCP_TYPE) {
+ 		if (npr->initiatorFunc)
+ 			ndlp->nlp_type |= NLP_FCP_INITIATOR;
+ 		if (npr->targetFunc)
+@@ -517,36 +551,42 @@
+ 			roles |= FC_RPORT_ROLE_FCP_INITIATOR;
+ 		if (ndlp->nlp_type & NLP_FCP_TARGET)
+ 			roles |= FC_RPORT_ROLE_FCP_TARGET;
++
++		lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_RPORT,
++			"rport rolechg:   role:x%x did:x%x flg:x%x",
++			roles, ndlp->nlp_DID, ndlp->nlp_flag);
++
+ 		fc_remote_port_rolechg(rport, roles);
+ 	}
+ }
+ 
+ static uint32_t
+-lpfc_disc_set_adisc(struct lpfc_hba * phba,
+-		      struct lpfc_nodelist * ndlp)
++lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++
+ 	/* Check config parameter use-adisc or FCP-2 */
+-	if ((phba->cfg_use_adisc == 0) &&
+-		!(phba->fc_flag & FC_RSCN_MODE)) {
+-		if (!(ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE))
+-			return 0;
+-	}
+-	spin_lock_irq(phba->host->host_lock);
++	if ((phba->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) ||
++	    ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) {
++		spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_NPR_ADISC;
+-	spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 	return 1;
++	}
++	ndlp->nlp_flag &= ~NLP_NPR_ADISC;
++	lpfc_unreg_rpi(vport, ndlp);
++	return 0;
+ }
+ 
+ static uint32_t
+-lpfc_disc_illegal(struct lpfc_hba * phba,
+-		   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_disc_illegal(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		  void *arg, uint32_t evt)
+ {
+-	lpfc_printf_log(phba,
+-			KERN_ERR,
+-			LOG_DISCOVERY,
+-			"%d:0253 Illegal State Transition: node x%x event x%x, "
+-			"state x%x Data: x%x x%x\n",
+-			phba->brd_no,
++	lpfc_printf_log(vport->phba, KERN_ERR, LOG_DISCOVERY,
++			"%d (%d):0253 Illegal State Transition: node x%x "
++			"event x%x, state x%x Data: x%x x%x\n",
++			vport->phba->brd_no, vport->vpi,
+ 			ndlp->nlp_DID, evt, ndlp->nlp_state, ndlp->nlp_rpi,
+ 			ndlp->nlp_flag);
+ 	return ndlp->nlp_state;
+@@ -555,150 +595,161 @@
+ /* Start of Discovery State Machine routines */
+ 
+ static uint32_t
+-lpfc_rcv_plogi_unused_node(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++	if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) {
+ 		ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 		return ndlp->nlp_state;
+ 	}
+-	lpfc_drop_node(phba, ndlp);
++	lpfc_drop_node(vport, ndlp);
+ 	return NLP_STE_FREED_NODE;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_els_unused_node(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_els_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	lpfc_issue_els_logo(phba, ndlp, 0);
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++	lpfc_issue_els_logo(vport, ndlp, 0);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_unused_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_LOGO_ACC;
+-	spin_unlock_irq(phba->host->host_lock);
+-	lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_logo_unused_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_logo_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	lpfc_drop_node(phba, ndlp);
++	lpfc_drop_node(vport, ndlp);
+ 	return NLP_STE_FREED_NODE;
+ }
+ 
+ static uint32_t
+-lpfc_device_rm_unused_node(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_unused_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	lpfc_drop_node(phba, ndlp);
++	lpfc_drop_node(vport, ndlp);
+ 	return NLP_STE_FREED_NODE;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_plogi_issue(struct lpfc_hba * phba, struct lpfc_nodelist * ndlp,
++lpfc_rcv_plogi_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ 			   void *arg, uint32_t evt)
+ {
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_iocbq *cmdiocb = arg;
+-	struct lpfc_dmabuf *pcmd;
+-	struct serv_parm *sp;
+-	uint32_t *lp;
++	struct lpfc_dmabuf *pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
++	uint32_t *lp = (uint32_t *) pcmd->virt;
++	struct serv_parm *sp = (struct serv_parm *) (lp + 1);
+ 	struct ls_rjt stat;
+ 	int port_cmp;
+ 
+-	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+-	lp = (uint32_t *) pcmd->virt;
+-	sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+-
+ 	memset(&stat, 0, sizeof (struct ls_rjt));
+ 
+ 	/* For a PLOGI, we only accept if our portname is less
+ 	 * than the remote portname.
+ 	 */
+ 	phba->fc_stat.elsLogiCol++;
+-	port_cmp = memcmp(&phba->fc_portname, &sp->portName,
+-			  sizeof (struct lpfc_name));
++	port_cmp = memcmp(&vport->fc_portname, &sp->portName,
++			  sizeof(struct lpfc_name));
+ 
+ 	if (port_cmp >= 0) {
+ 		/* Reject this request because the remote node will accept
+ 		   ours */
+ 		stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 		stat.un.b.lsRjtRsnCodeExp = LSEXP_CMD_IN_PROGRESS;
+-		lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++		lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp,
++			NULL);
+ 	} else {
+-		lpfc_rcv_plogi(phba, ndlp, cmdiocb);
+-	} /* if our portname was less */
++		lpfc_rcv_plogi(vport, ndlp, cmdiocb);
++	} /* If our portname was less */
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_plogi_issue(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
++	struct ls_rjt     stat;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	memset(&stat, 0, sizeof (struct ls_rjt));
++	stat.un.b.lsRjtRsnCode = LSRJT_LOGICAL_BSY;
++	stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
++	return ndlp->nlp_state;
++}
++
++static uint32_t
++lpfc_rcv_logo_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
++{
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+ 	/* software abort outstanding PLOGI */
+-	lpfc_els_abort(phba, ndlp);
++	lpfc_els_abort(vport->phba, ndlp);
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_els_plogi_issue(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_els_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+ 	/* software abort outstanding PLOGI */
+ 	lpfc_els_abort(phba, ndlp);
+ 
+ 	if (evt == NLP_EVT_RCV_LOGO) {
+-		lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++		lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+ 	} else {
+-		lpfc_issue_els_logo(phba, ndlp, 0);
++		lpfc_issue_els_logo(vport, ndlp, 0);
+ 	}
+ 
+-	/* Put ndlp in npr list set plogi timer for 1 sec */
++	/* Put ndlp in npr state set plogi timer for 1 sec */
+ 	mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_DELAY_TMO;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 	ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_plogi_plogi_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
++lpfc_cmpl_plogi_plogi_issue(struct lpfc_vport *vport,
++			    struct lpfc_nodelist *ndlp,
++			    void *arg,
+ 			    uint32_t evt)
+ {
++	struct lpfc_hba    *phba = vport->phba;
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
+ 	struct lpfc_dmabuf *pcmd, *prsp, *mp;
+ 	uint32_t *lp;
+@@ -721,31 +772,26 @@
+ 
+ 	pcmd = (struct lpfc_dmabuf *) cmdiocb->context2;
+ 
+-	prsp = list_get_first(&pcmd->list,
+-			      struct lpfc_dmabuf,
+-			      list);
+-	lp = (uint32_t *) prsp->virt;
++	prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list);
+ 
++	lp = (uint32_t *) prsp->virt;
+ 	sp = (struct serv_parm *) ((uint8_t *) lp + sizeof (uint32_t));
+-	if (!lpfc_check_sparm(phba, ndlp, sp, CLASS3))
++	if (!lpfc_check_sparm(vport, ndlp, sp, CLASS3))
+ 		goto out;
+ 
+ 	/* PLOGI chkparm OK */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_ELS,
+-			"%d:0121 PLOGI chkparm OK "
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (%d):0121 PLOGI chkparm OK "
+ 			"Data: x%x x%x x%x x%x\n",
+-			phba->brd_no,
++			phba->brd_no, vport->vpi,
+ 			ndlp->nlp_DID, ndlp->nlp_state,
+ 			ndlp->nlp_flag, ndlp->nlp_rpi);
+ 
+-	if ((phba->cfg_fcp_class == 2) &&
+-	    (sp->cls2.classValid)) {
++	if (phba->cfg_fcp_class == 2 && (sp->cls2.classValid))
+ 		ndlp->nlp_fcp_info |= CLASS2;
+-	} else {
++	else
+ 		ndlp->nlp_fcp_info |= CLASS3;
+-	}
++
+ 	ndlp->nlp_class_sup = 0;
+ 	if (sp->cls1.classValid)
+ 		ndlp->nlp_class_sup |= FC_COS_CLASS1;
+@@ -756,16 +802,23 @@
+ 	if (sp->cls4.classValid)
+ 		ndlp->nlp_class_sup |= FC_COS_CLASS4;
+ 	ndlp->nlp_maxframe =
+-		((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) |
+-		sp->cmn.bbRcvSizeLsb;
++		((sp->cmn.bbRcvSizeMsb & 0x0F) << 8) | sp->cmn.bbRcvSizeLsb;
+ 
+-	if (!(mbox = mempool_alloc(phba->mbox_mem_pool,
+-				   GFP_KERNEL)))
++	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!mbox) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0133 PLOGI: no memory for reg_login "
++			"Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi,
++			ndlp->nlp_DID, ndlp->nlp_state,
++			ndlp->nlp_flag, ndlp->nlp_rpi);
+ 		goto out;
++	}
++
++	lpfc_unreg_rpi(vport, ndlp);
+ 
+-	lpfc_unreg_rpi(phba, ndlp);
+-	if (lpfc_reg_login(phba, irsp->un.elsreq64.remoteID, (uint8_t *) sp,
+-			   mbox, 0) == 0) {
++	if (lpfc_reg_login(phba, vport->vpi, irsp->un.elsreq64.remoteID,
++			   (uint8_t *) sp, mbox, 0) == 0) {
+ 		switch (ndlp->nlp_DID) {
+ 		case NameServer_DID:
+ 			mbox->mbox_cmpl = lpfc_mbx_cmpl_ns_reg_login;
+@@ -777,68 +830,104 @@
+ 			mbox->mbox_cmpl = lpfc_mbx_cmpl_reg_login;
+ 		}
+ 		mbox->context2 = lpfc_nlp_get(ndlp);
++		mbox->vport = vport;
+ 		if (lpfc_sli_issue_mbox(phba, mbox,
+ 					(MBX_NOWAIT | MBX_STOP_IOCB))
+ 		    != MBX_NOT_FINISHED) {
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_REG_LOGIN_ISSUE);
++			lpfc_nlp_set_state(vport, ndlp,
++					   NLP_STE_REG_LOGIN_ISSUE);
+ 			return ndlp->nlp_state;
+ 		}
+ 		lpfc_nlp_put(ndlp);
+-		mp = (struct lpfc_dmabuf *)mbox->context1;
++		mp = (struct lpfc_dmabuf *) mbox->context1;
+ 		lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 		kfree(mp);
+ 		mempool_free(mbox, phba->mbox_mem_pool);
++
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0134 PLOGI: cannot issue reg_login "
++			"Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi,
++			ndlp->nlp_DID, ndlp->nlp_state,
++			ndlp->nlp_flag, ndlp->nlp_rpi);
+ 	} else {
+ 		mempool_free(mbox, phba->mbox_mem_pool);
++
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0135 PLOGI: cannot format reg_login "
++			"Data: x%x x%x x%x x%x\n",
++			phba->brd_no, vport->vpi,
++			ndlp->nlp_DID, ndlp->nlp_state,
++			ndlp->nlp_flag, ndlp->nlp_rpi);
+ 	}
+ 
+ 
+- out:
++out:
++	if (ndlp->nlp_DID == NameServer_DID) {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++		lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++			"%d (%d):0261 Cannot Register NameServer login\n",
++			phba->brd_no, vport->vpi);
++	}
++
+ 	/* Free this node since the driver cannot login or has the wrong
+ 	   sparm */
+-	lpfc_drop_node(phba, ndlp);
++	lpfc_drop_node(vport, ndlp);
+ 	return NLP_STE_FREED_NODE;
+ }
+ 
+ static uint32_t
+-lpfc_device_rm_plogi_issue(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_plogi_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+-	}
+-	else {
++	} else {
+ 		/* software abort outstanding PLOGI */
+-		lpfc_els_abort(phba, ndlp);
++		lpfc_els_abort(vport->phba, ndlp);
+ 
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_plogi_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_plogi_issue(struct lpfc_vport *vport,
++			      struct lpfc_nodelist *ndlp,
++			      void *arg,
+ 			    uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++
++	/* Don't do anything that will mess up processing of the
++	 * previous RSCN.
++	 */
++	if (vport->fc_flag & FC_RSCN_DEFERRED)
++		return ndlp->nlp_state;
++
+ 	/* software abort outstanding PLOGI */
+ 	lpfc_els_abort(phba, ndlp);
+ 
+ 	ndlp->nlp_prev_state = NLP_STE_PLOGI_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_plogi_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	/* software abort outstanding ADISC */
+@@ -846,34 +935,31 @@
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
++	if (lpfc_rcv_plogi(vport, ndlp, cmdiocb))
+ 		return ndlp->nlp_state;
+-	}
++
+ 	ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-	lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++	lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_prli_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_logo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+@@ -881,42 +967,43 @@
+ 	/* software abort outstanding ADISC */
+ 	lpfc_els_abort(phba, ndlp);
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_padisc_adisc_issue(struct lpfc_vport *vport,
++			    struct lpfc_nodelist *ndlp,
++			    void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prlo_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_prlo_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+ 	/* Treat like rcv logo */
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_adisc_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_cmpl_adisc_adisc_issue(struct lpfc_vport *vport,
++			    struct lpfc_nodelist *ndlp,
++			    void *arg, uint32_t evt)
+ {
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
+ 	IOCB_t *irsp;
+ 	ADISC *ap;
+@@ -928,101 +1015,112 @@
+ 	irsp = &rspiocb->iocb;
+ 
+ 	if ((irsp->ulpStatus) ||
+-		(!lpfc_check_adisc(phba, ndlp, &ap->nodeName, &ap->portName))) {
++	    (!lpfc_check_adisc(vport, ndlp, &ap->nodeName, &ap->portName))) {
+ 		/* 1 sec timeout */
+ 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_DELAY_TMO;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 
+-		memset(&ndlp->nlp_nodename, 0, sizeof (struct lpfc_name));
+-		memset(&ndlp->nlp_portname, 0, sizeof (struct lpfc_name));
++		memset(&ndlp->nlp_nodename, 0, sizeof(struct lpfc_name));
++		memset(&ndlp->nlp_portname, 0, sizeof(struct lpfc_name));
+ 
+ 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-		lpfc_unreg_rpi(phba, ndlp);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++		lpfc_unreg_rpi(vport, ndlp);
+ 		return ndlp->nlp_state;
+ 	}
+ 
+ 	if (ndlp->nlp_type & NLP_FCP_TARGET) {
+ 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
+ 	} else {
+ 		ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_device_rm_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_device_rm_adisc_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+-	}
+-	else {
++	} else {
+ 		/* software abort outstanding ADISC */
+-		lpfc_els_abort(phba, ndlp);
++		lpfc_els_abort(vport->phba, ndlp);
+ 
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_adisc_issue(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_adisc_issue(struct lpfc_vport *vport,
++			      struct lpfc_nodelist *ndlp,
++			      void *arg,
+ 			    uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++
++	/* Don't do anything that will mess up processing of the
++	 * previous RSCN.
++	 */
++	if (vport->fc_flag & FC_RSCN_DEFERRED)
++		return ndlp->nlp_state;
++
+ 	/* software abort outstanding ADISC */
+ 	lpfc_els_abort(phba, ndlp);
+ 
+ 	ndlp->nlp_prev_state = NLP_STE_ADISC_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	ndlp->nlp_flag |= NLP_NPR_ADISC;
+-	spin_unlock_irq(phba->host->host_lock);
+-
++	spin_unlock_irq(shost->host_lock);
++	lpfc_disc_set_adisc(vport, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_reglogin_issue(struct lpfc_hba * phba,
+-			      struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_plogi_reglogin_issue(struct lpfc_vport *vport,
++			      struct lpfc_nodelist *ndlp,
++			      void *arg,
+ 			      uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++	lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_reglogin_issue(struct lpfc_hba * phba,
+-			     struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_prli_reglogin_issue(struct lpfc_vport *vport,
++			     struct lpfc_nodelist *ndlp,
++			     void *arg,
+ 			     uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_reglogin_issue(struct lpfc_hba * phba,
+-			     struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_logo_reglogin_issue(struct lpfc_vport *vport,
++			     struct lpfc_nodelist *ndlp,
++			     void *arg,
+ 			     uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 	LPFC_MBOXQ_t	  *mb;
+ 	LPFC_MBOXQ_t	  *nextmb;
+ 	struct lpfc_dmabuf *mp;
+@@ -1033,12 +1131,13 @@
+ 	if ((mb = phba->sli.mbox_active)) {
+ 		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
++			lpfc_nlp_put(ndlp);
+ 			mb->context2 = NULL;
+ 			mb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ 		}
+ 	}
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(mb, nextmb, &phba->sli.mboxq, list) {
+ 		if ((mb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+ 		   (ndlp == (struct lpfc_nodelist *) mb->context2)) {
+@@ -1047,61 +1146,61 @@
+ 				lpfc_mbuf_free(phba, mp->virt, mp->phys);
+ 				kfree(mp);
+ 			}
++			lpfc_nlp_put(ndlp);
+ 			list_del(&mb->list);
+ 			mempool_free(mb, phba->mbox_mem_pool);
+ 		}
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_reglogin_issue(struct lpfc_hba * phba,
+-			       struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_padisc_reglogin_issue(struct lpfc_vport *vport,
++			       struct lpfc_nodelist *ndlp,
++			       void *arg,
+ 			       uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prlo_reglogin_issue(struct lpfc_hba * phba,
+-			     struct lpfc_nodelist * ndlp, void *arg,
++lpfc_rcv_prlo_reglogin_issue(struct lpfc_vport *vport,
++			     struct lpfc_nodelist *ndlp,
++			     void *arg,
+ 			     uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+-	lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++	lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_hba * phba,
+-				  struct lpfc_nodelist * ndlp,
+-				  void *arg, uint32_t evt)
++lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport,
++				  struct lpfc_nodelist *ndlp,
++				  void *arg,
++				  uint32_t evt)
+ {
+-	LPFC_MBOXQ_t *pmb;
+-	MAILBOX_t *mb;
+-	uint32_t did;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
++	MAILBOX_t *mb = &pmb->mb;
++	uint32_t did  = mb->un.varWords[1];
+ 
+-	pmb = (LPFC_MBOXQ_t *) arg;
+-	mb = &pmb->mb;
+-	did = mb->un.varWords[1];
+ 	if (mb->mbxStatus) {
+ 		/* RegLogin failed */
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_DISCOVERY,
+-				"%d:0246 RegLogin failed Data: x%x x%x x%x\n",
+-				phba->brd_no,
+-				did, mb->mbxStatus, phba->hba_state);
++		lpfc_printf_log(phba, KERN_ERR, LOG_DISCOVERY,
++				"%d (%d):0246 RegLogin failed Data: x%x x%x "
++				"x%x\n",
++				phba->brd_no, vport->vpi,
++				did, mb->mbxStatus, vport->port_state);
+ 
+ 		/*
+ 		 * If RegLogin failed due to lack of HBA resources do not
+@@ -1109,20 +1208,20 @@
+ 		 */
+ 		if (mb->mbxStatus == MBXERR_RPI_FULL) {
+ 			ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNUSED_NODE);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
+ 			return ndlp->nlp_state;
+ 		}
+ 
+-		/* Put ndlp in npr list set plogi timer for 1 sec */
++		/* Put ndlp in npr state set plogi timer for 1 sec */
+ 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_DELAY_TMO;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 
+-		lpfc_issue_els_logo(phba, ndlp, 0);
++		lpfc_issue_els_logo(vport, ndlp, 0);
+ 		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
+ 		return ndlp->nlp_state;
+ 	}
+ 
+@@ -1131,91 +1230,99 @@
+ 	/* Only if we are not a fabric nport do we issue PRLI */
+ 	if (!(ndlp->nlp_type & NLP_FABRIC)) {
+ 		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_PRLI_ISSUE);
+-		lpfc_issue_els_prli(phba, ndlp, 0);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PRLI_ISSUE);
++		lpfc_issue_els_prli(vport, ndlp, 0);
+ 	} else {
+ 		ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_device_rm_reglogin_issue(struct lpfc_hba * phba,
+-			      struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_rm_reglogin_issue(struct lpfc_vport *vport,
++			      struct lpfc_nodelist *ndlp,
++			      void *arg,
+ 			      uint32_t evt)
+ {
+-	if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+-	}
+-	else {
+-		lpfc_drop_node(phba, ndlp);
++	} else {
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_reglogin_issue(struct lpfc_hba * phba,
+-			       struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_reglogin_issue(struct lpfc_vport *vport,
++				 struct lpfc_nodelist *ndlp,
++				 void *arg,
+ 			       uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	/* Don't do anything that will mess up processing of the
++	 * previous RSCN.
++	 */
++	if (vport->fc_flag & FC_RSCN_DEFERRED)
++		return ndlp->nlp_state;
++
+ 	ndlp->nlp_prev_state = NLP_STE_REG_LOGIN_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_disc_set_adisc(vport, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_prli_issue(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb;
+ 
+ 	cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++	lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_prli_issue(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_prli_issue(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+ 	/* Software abort outstanding PRLI before sending acc */
+-	lpfc_els_abort(phba, ndlp);
++	lpfc_els_abort(vport->phba, ndlp);
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_prli_issue(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_padisc_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+@@ -1225,21 +1332,22 @@
+  * NEXT STATE = PRLI_ISSUE
+  */
+ static uint32_t
+-lpfc_rcv_prlo_prli_issue(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-	lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++	lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_prli_prli_issue(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_prli_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
++	struct lpfc_hba   *phba = vport->phba;
+ 	IOCB_t *irsp;
+ 	PRLI *npr;
+ 
+@@ -1249,8 +1357,12 @@
+ 
+ 	irsp = &rspiocb->iocb;
+ 	if (irsp->ulpStatus) {
++		if ((vport->port_type == LPFC_NPIV_PORT) &&
++			phba->cfg_vport_restrict_login) {
++			goto out;
++		}
+ 		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_UNMAPPED_NODE);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 		return ndlp->nlp_state;
+ 	}
+ 
+@@ -1266,9 +1378,25 @@
+ 		if (npr->Retry)
+ 			ndlp->nlp_fcp_info |= NLP_FCP_2_DEVICE;
+ 	}
++	if (!(ndlp->nlp_type & NLP_FCP_TARGET) &&
++	    (vport->port_type == LPFC_NPIV_PORT) &&
++	     phba->cfg_vport_restrict_login) {
++out:
++		spin_lock_irq(shost->host_lock);
++		ndlp->nlp_flag |= NLP_TARGET_REMOVE;
++		spin_unlock_irq(shost->host_lock);
++		lpfc_issue_els_logo(vport, ndlp, 0);
++
++		ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNUSED_NODE);
++		return ndlp->nlp_state;
++	}
+ 
+ 	ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_MAPPED_NODE);
++	if (ndlp->nlp_type & NLP_FCP_TARGET)
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_MAPPED_NODE);
++	else
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_UNMAPPED_NODE);
+ 	return ndlp->nlp_state;
+ }
+ 
+@@ -1289,19 +1417,23 @@
+   *    on plogi list so it can be freed when LOGO completes.
+   *
+   */
++
+ static uint32_t
+-lpfc_device_rm_prli_issue(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_rm_prli_issue(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	if(ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+-	}
+-	else {
++	} else {
+ 		/* software abort outstanding PLOGI */
+-		lpfc_els_abort(phba, ndlp);
++		lpfc_els_abort(vport->phba, ndlp);
+ 
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ }
+@@ -1324,261 +1456,251 @@
+   *    outstanding PRLI command, then free the node entry.
+   */
+ static uint32_t
+-lpfc_device_recov_prli_issue(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_recov_prli_issue(struct lpfc_vport *vport,
++			     struct lpfc_nodelist *ndlp,
++			     void *arg,
++			     uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_hba  *phba = vport->phba;
++
++	/* Don't do anything that will mess up processing of the
++	 * previous RSCN.
++	 */
++	if (vport->fc_flag & FC_RSCN_DEFERRED)
++		return ndlp->nlp_state;
++
+ 	/* software abort outstanding PRLI */
+ 	lpfc_els_abort(phba, ndlp);
+ 
+ 	ndlp->nlp_prev_state = NLP_STE_PRLI_ISSUE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_disc_set_adisc(vport, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_unmap_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++	lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_unmap_node(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_rcv_prli(phba, ndlp, cmdiocb);
+-	lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++	lpfc_rcv_prli(vport, ndlp, cmdiocb);
++	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_unmap_node(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_unmap_node(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_padisc_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prlo_unmap_node(struct lpfc_hba * phba,
+-			 struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_unmap_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_els_rsp_acc(phba, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
++	lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL, 0);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_unmap_node(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_device_recov_unmap_node(struct lpfc_vport *vport,
++			     struct lpfc_nodelist *ndlp,
++			     void *arg,
++			     uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	ndlp->nlp_prev_state = NLP_STE_UNMAPPED_NODE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	lpfc_disc_set_adisc(phba, ndlp);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_disc_set_adisc(vport, ndlp);
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_mapped_node(struct lpfc_hba * phba,
+-			   struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_plogi_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_rcv_plogi(phba, ndlp, cmdiocb);
++	lpfc_rcv_plogi(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_mapped_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prli_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_els_rsp_prli_acc(phba, cmdiocb, ndlp);
++	lpfc_els_rsp_prli_acc(vport, cmdiocb, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_mapped_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_logo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_mapped_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_padisc_mapped_node(struct lpfc_vport *vport,
++			    struct lpfc_nodelist *ndlp,
++			    void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prlo_mapped_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_rcv_prlo_mapped_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			  void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_hba  *phba = vport->phba;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+ 	/* flush the target */
+-	spin_lock_irq(phba->host->host_lock);
+ 	lpfc_sli_abort_iocb(phba, &phba->sli.ring[phba->sli.fcp_ring],
+ 			       ndlp->nlp_sid, 0, 0, LPFC_CTX_TGT);
+-	spin_unlock_irq(phba->host->host_lock);
+ 
+ 	/* Treat like rcv logo */
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_PRLO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_PRLO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_mapped_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
++lpfc_device_recov_mapped_node(struct lpfc_vport *vport,
++			      struct lpfc_nodelist *ndlp,
++			      void *arg,
+ 			    uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	ndlp->nlp_prev_state = NLP_STE_MAPPED_NODE;
+-	lpfc_nlp_set_state(phba, ndlp, NLP_STE_NPR_NODE);
+-	spin_lock_irq(phba->host->host_lock);
++	lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
+-	lpfc_disc_set_adisc(phba, ndlp);
++	spin_unlock_irq(shost->host_lock);
++	lpfc_disc_set_adisc(vport, ndlp);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_plogi_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_iocbq *cmdiocb  = (struct lpfc_iocbq *) arg;
+ 
+ 	/* Ignore PLOGI if we have an outstanding LOGO */
+-	if (ndlp->nlp_flag & NLP_LOGO_SND) {
++	if (ndlp->nlp_flag & (NLP_LOGO_SND | NLP_LOGO_ACC)) {
+ 		return ndlp->nlp_state;
+ 	}
+ 
+-	if (lpfc_rcv_plogi(phba, ndlp, cmdiocb)) {
+-		spin_lock_irq(phba->host->host_lock);
++	if (lpfc_rcv_plogi(vport, ndlp, cmdiocb)) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+ 	}
+ 
+ 	/* send PLOGI immediately, move to PLOGI issue state */
+ 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+ 		ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-		lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-		lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++		lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++		lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ 	}
+ 
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prli_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		       void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
++	struct Scsi_Host  *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 	struct ls_rjt          stat;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+ 	memset(&stat, 0, sizeof (struct ls_rjt));
+ 	stat.un.b.lsRjtRsnCode = LSRJT_UNABLE_TPC;
+ 	stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE;
+-	lpfc_els_rsp_reject(phba, stat.un.lsRjtError, cmdiocb, ndlp);
++	lpfc_els_rsp_reject(vport, stat.un.lsRjtError, cmdiocb, ndlp, NULL);
+ 
+ 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
+ 		if (ndlp->nlp_flag & NLP_NPR_ADISC) {
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(shost->host_lock);
+ 			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-			spin_unlock_irq(phba->host->host_lock);
+ 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+-			lpfc_issue_els_adisc(phba, ndlp, 0);
++			spin_unlock_irq(shost->host_lock);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++			lpfc_issue_els_adisc(vport, ndlp, 0);
+ 		} else {
+ 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-			lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++			lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ 		}
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_logo_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_logo_npr_node(struct lpfc_vport *vport,  struct lpfc_nodelist *ndlp,
++		       void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_logo(phba, ndlp, cmdiocb, ELS_CMD_LOGO);
++	lpfc_rcv_logo(vport, ndlp, cmdiocb, ELS_CMD_LOGO);
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_padisc_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_padisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
+-
+-	cmdiocb = (struct lpfc_iocbq *) arg;
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	lpfc_rcv_padisc(phba, ndlp, cmdiocb);
++	lpfc_rcv_padisc(vport, ndlp, cmdiocb);
+ 
+ 	/*
+ 	 * Do not start discovery if discovery is about to start
+@@ -1586,53 +1708,52 @@
+ 	 * here will affect the counting of discovery threads.
+ 	 */
+ 	if (!(ndlp->nlp_flag & NLP_DELAY_TMO) &&
+-		!(ndlp->nlp_flag & NLP_NPR_2B_DISC)){
++	    !(ndlp->nlp_flag & NLP_NPR_2B_DISC)) {
+ 		if (ndlp->nlp_flag & NLP_NPR_ADISC) {
++			ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+ 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_ADISC_ISSUE);
+-			lpfc_issue_els_adisc(phba, ndlp, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_ADISC_ISSUE);
++			lpfc_issue_els_adisc(vport, ndlp, 0);
+ 		} else {
+ 			ndlp->nlp_prev_state = NLP_STE_NPR_NODE;
+-			lpfc_nlp_set_state(phba, ndlp, NLP_STE_PLOGI_ISSUE);
+-			lpfc_issue_els_plogi(phba, ndlp->nlp_DID, 0);
++			lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE);
++			lpfc_issue_els_plogi(vport, ndlp->nlp_DID, 0);
+ 		}
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_rcv_prlo_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_rcv_prlo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++		       void *arg, uint32_t evt)
+ {
+-	struct lpfc_iocbq     *cmdiocb;
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++	struct lpfc_iocbq *cmdiocb = (struct lpfc_iocbq *) arg;
+ 
+-	cmdiocb = (struct lpfc_iocbq *) arg;
+-
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag |= NLP_LOGO_ACC;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 
+-	lpfc_els_rsp_acc(phba, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
++	lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL, 0);
+ 
+-	if (!(ndlp->nlp_flag & NLP_DELAY_TMO)) {
++	if ((ndlp->nlp_flag & NLP_DELAY_TMO) == 0) {
+ 		mod_timer(&ndlp->nlp_delayfunc, jiffies + HZ * 1);
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_DELAY_TMO;
+ 		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 		ndlp->nlp_last_elscmd = ELS_CMD_PLOGI;
+ 	} else {
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag &= ~NLP_NPR_ADISC;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(shost->host_lock);
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_plogi_npr_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
+ 	IOCB_t *irsp;
+@@ -1642,15 +1763,15 @@
+ 
+ 	irsp = &rspiocb->iocb;
+ 	if (irsp->ulpStatus) {
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_prli_npr_node(struct lpfc_hba * phba,
+-			  struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_prli_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
+ 	IOCB_t *irsp;
+@@ -1660,25 +1781,24 @@
+ 
+ 	irsp = &rspiocb->iocb;
+ 	if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) {
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_logo_npr_node(struct lpfc_hba * phba,
+-		struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_cmpl_logo_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			void *arg, uint32_t evt)
+ {
+-	lpfc_unreg_rpi(phba, ndlp);
++	lpfc_unreg_rpi(vport, ndlp);
+ 	/* This routine does nothing, just return the current state */
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_adisc_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_cmpl_adisc_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			 void *arg, uint32_t evt)
+ {
+ 	struct lpfc_iocbq *cmdiocb, *rspiocb;
+ 	IOCB_t *irsp;
+@@ -1688,28 +1808,25 @@
+ 
+ 	irsp = &rspiocb->iocb;
+ 	if (irsp->ulpStatus && (ndlp->nlp_flag & NLP_NODEV_REMOVE)) {
+-		lpfc_drop_node(phba, ndlp);
++		lpfc_drop_node(vport, ndlp);
+ 		return NLP_STE_FREED_NODE;
+ 	}
+ 	return ndlp->nlp_state;
+ }
+ 
+ static uint32_t
+-lpfc_cmpl_reglogin_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_cmpl_reglogin_npr_node(struct lpfc_vport *vport,
++			    struct lpfc_nodelist *ndlp,
++			    void *arg, uint32_t evt)
+ {
+-	LPFC_MBOXQ_t *pmb;
+-	MAILBOX_t *mb;
+-
+-	pmb = (LPFC_MBOXQ_t *) arg;
+-	mb = &pmb->mb;
++	LPFC_MBOXQ_t *pmb = (LPFC_MBOXQ_t *) arg;
++	MAILBOX_t    *mb = &pmb->mb;
+ 
+ 	if (!mb->mbxStatus)
+ 		ndlp->nlp_rpi = mb->un.varWords[0];
+ 	else {
+ 		if (ndlp->nlp_flag & NLP_NODEV_REMOVE) {
+-			lpfc_drop_node(phba, ndlp);
++			lpfc_drop_node(vport, ndlp);
+ 			return NLP_STE_FREED_NODE;
+ 		}
+ 	}
+@@ -1717,28 +1834,38 @@
+ }
+ 
+ static uint32_t
+-lpfc_device_rm_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_device_rm_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			void *arg, uint32_t evt)
+ {
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
+ 	if (ndlp->nlp_flag & NLP_NPR_2B_DISC) {
++		spin_lock_irq(shost->host_lock);
+ 		ndlp->nlp_flag |= NLP_NODEV_REMOVE;
++		spin_unlock_irq(shost->host_lock);
+ 		return ndlp->nlp_state;
+ 	}
+-	lpfc_drop_node(phba, ndlp);
++	lpfc_drop_node(vport, ndlp);
+ 	return NLP_STE_FREED_NODE;
+ }
+ 
+ static uint32_t
+-lpfc_device_recov_npr_node(struct lpfc_hba * phba,
+-			    struct lpfc_nodelist * ndlp, void *arg,
+-			    uint32_t evt)
++lpfc_device_recov_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			   void *arg, uint32_t evt)
+ {
+-	spin_lock_irq(phba->host->host_lock);
++	struct Scsi_Host *shost = lpfc_shost_from_vport(vport);
++
++	/* Don't do anything that will mess up processing of the
++	 * previous RSCN.
++	 */
++	if (vport->fc_flag & FC_RSCN_DEFERRED)
++		return ndlp->nlp_state;
++
++	spin_lock_irq(shost->host_lock);
+ 	ndlp->nlp_flag &= ~(NLP_NODEV_REMOVE | NLP_NPR_2B_DISC);
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(shost->host_lock);
+ 	if (ndlp->nlp_flag & NLP_DELAY_TMO) {
+-		lpfc_cancel_retry_delay_tmo(phba, ndlp);
++		lpfc_cancel_retry_delay_tmo(vport, ndlp);
+ 	}
+ 	return ndlp->nlp_state;
+ }
+@@ -1801,7 +1928,7 @@
+  */
+ 
+ static uint32_t (*lpfc_disc_action[NLP_STE_MAX_STATE * NLP_EVT_MAX_EVENT])
+-     (struct lpfc_hba *, struct lpfc_nodelist *, void *, uint32_t) = {
++     (struct lpfc_vport *, struct lpfc_nodelist *, void *, uint32_t) = {
+ 	/* Action routine                  Event       Current State  */
+ 	lpfc_rcv_plogi_unused_node,	/* RCV_PLOGI   UNUSED_NODE    */
+ 	lpfc_rcv_els_unused_node,	/* RCV_PRLI        */
+@@ -1818,7 +1945,7 @@
+ 	lpfc_disc_illegal,		/* DEVICE_RECOVERY */
+ 
+ 	lpfc_rcv_plogi_plogi_issue,	/* RCV_PLOGI   PLOGI_ISSUE    */
+-	lpfc_rcv_els_plogi_issue,	/* RCV_PRLI        */
++	lpfc_rcv_prli_plogi_issue,	/* RCV_PRLI        */
+ 	lpfc_rcv_logo_plogi_issue,	/* RCV_LOGO        */
+ 	lpfc_rcv_els_plogi_issue,	/* RCV_ADISC       */
+ 	lpfc_rcv_els_plogi_issue,	/* RCV_PDISC       */
+@@ -1917,34 +2044,40 @@
+ };
+ 
+ int
+-lpfc_disc_state_machine(struct lpfc_hba * phba,
+-			struct lpfc_nodelist * ndlp, void *arg, uint32_t evt)
++lpfc_disc_state_machine(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
++			void *arg, uint32_t evt)
+ {
++	struct lpfc_hba  *phba = vport->phba;
+ 	uint32_t cur_state, rc;
+-	uint32_t(*func) (struct lpfc_hba *, struct lpfc_nodelist *, void *,
++	uint32_t(*func) (struct lpfc_vport *, struct lpfc_nodelist *, void *,
+ 			 uint32_t);
+ 
+ 	lpfc_nlp_get(ndlp);
+ 	cur_state = ndlp->nlp_state;
+ 
+ 	/* DSM in event <evt> on NPort <nlp_DID> in state <cur_state> */
+-	lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_DISCOVERY,
+-			"%d:0211 DSM in event x%x on NPort x%x in state %d "
+-			"Data: x%x\n",
+-			phba->brd_no,
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0211 DSM in event x%x on NPort x%x in "
++			"state %d Data: x%x\n",
++			phba->brd_no, vport->vpi,
+ 			evt, ndlp->nlp_DID, cur_state, ndlp->nlp_flag);
+ 
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
++		 "DSM in:          evt:%d ste:%d did:x%x",
++		evt, cur_state, ndlp->nlp_DID);
++
+ 	func = lpfc_disc_action[(cur_state * NLP_EVT_MAX_EVENT) + evt];
+-	rc = (func) (phba, ndlp, arg, evt);
++	rc = (func) (vport, ndlp, arg, evt);
+ 
+ 	/* DSM out state <rc> on NPort <nlp_DID> */
+-	lpfc_printf_log(phba,
+-		       KERN_INFO,
+-		       LOG_DISCOVERY,
+-		       "%d:0212 DSM out state %d on NPort x%x Data: x%x\n",
+-		       phba->brd_no,
++	lpfc_printf_log(phba, KERN_INFO, LOG_DISCOVERY,
++			"%d (%d):0212 DSM out state %d on NPort x%x "
++			"Data: x%x\n",
++			phba->brd_no, vport->vpi,
++			rc, ndlp->nlp_DID, ndlp->nlp_flag);
++
++	lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_DSM,
++		 "DSM out:         ste:%d did:x%x flg:x%x",
+ 		       rc, ndlp->nlp_DID, ndlp->nlp_flag);
+ 
+ 	lpfc_nlp_put(ndlp);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_scsi.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -37,10 +37,158 @@
+ #include "lpfc.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_crtn.h"
++#include "lpfc_vport.h"
+ 
+ #define LPFC_RESET_WAIT  2
+ #define LPFC_ABORT_WAIT  2
+ 
++/*
++ * This function is called with no lock held when there is a resource
++ * error in driver or in firmware.
++ */
++void
++lpfc_adjust_queue_depth(struct lpfc_hba *phba)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	atomic_inc(&phba->num_rsrc_err);
++	phba->last_rsrc_error_time = jiffies;
++
++	if ((phba->last_ramp_down_time + QUEUE_RAMP_DOWN_INTERVAL) > jiffies) {
++		spin_unlock_irqrestore(&phba->hbalock, flags);
++		return;
++	}
++
++	phba->last_ramp_down_time = jiffies;
++
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++
++	spin_lock_irqsave(&phba->pport->work_port_lock, flags);
++	if ((phba->pport->work_port_events &
++		WORKER_RAMP_DOWN_QUEUE) == 0) {
++		phba->pport->work_port_events |= WORKER_RAMP_DOWN_QUEUE;
++	}
++	spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	if (phba->work_wait)
++		wake_up(phba->work_wait);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++
++	return;
++}
++
++/*
++ * This function is called with no lock held when there is a successful
++ * SCSI command completion.
++ */
++static inline void
++lpfc_rampup_queue_depth(struct lpfc_hba *phba,
++			struct scsi_device *sdev)
++{
++	unsigned long flags;
++	atomic_inc(&phba->num_cmd_success);
++
++	if (phba->cfg_lun_queue_depth <= sdev->queue_depth)
++		return;
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	if (((phba->last_ramp_up_time + QUEUE_RAMP_UP_INTERVAL) > jiffies) ||
++	 ((phba->last_rsrc_error_time + QUEUE_RAMP_UP_INTERVAL ) > jiffies)) {
++		spin_unlock_irqrestore(&phba->hbalock, flags);
++		return;
++	}
++
++	phba->last_ramp_up_time = jiffies;
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++
++	spin_lock_irqsave(&phba->pport->work_port_lock, flags);
++	if ((phba->pport->work_port_events &
++		WORKER_RAMP_UP_QUEUE) == 0) {
++		phba->pport->work_port_events |= WORKER_RAMP_UP_QUEUE;
++	}
++	spin_unlock_irqrestore(&phba->pport->work_port_lock, flags);
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	if (phba->work_wait)
++		wake_up(phba->work_wait);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++}
++
++void
++lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport;
++	struct Scsi_Host  *host;
++	struct scsi_device *sdev;
++	unsigned long new_queue_depth;
++	unsigned long num_rsrc_err, num_cmd_success;
++
++	num_rsrc_err = atomic_read(&phba->num_rsrc_err);
++	num_cmd_success = atomic_read(&phba->num_cmd_success);
++
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		host = lpfc_shost_from_vport(vport);
++		if (!scsi_host_get(host))
++			continue;
++
++		spin_unlock_irq(&phba->hbalock);
++
++		shost_for_each_device(sdev, host) {
++			new_queue_depth = sdev->queue_depth * num_rsrc_err /
++			(num_rsrc_err + num_cmd_success);
++			if (!new_queue_depth)
++				new_queue_depth = sdev->queue_depth - 1;
++			else
++				new_queue_depth =
++					sdev->queue_depth - new_queue_depth;
++
++			if (sdev->ordered_tags)
++				scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG,
++					new_queue_depth);
++			else
++				scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG,
++					new_queue_depth);
++		}
++		spin_lock_irq(&phba->hbalock);
++		scsi_host_put(host);
++	}
++	spin_unlock_irq(&phba->hbalock);
++	atomic_set(&phba->num_rsrc_err, 0);
++	atomic_set(&phba->num_cmd_success, 0);
++}
++
++void
++lpfc_ramp_up_queue_handler(struct lpfc_hba *phba)
++{
++	struct lpfc_vport *vport;
++	struct Scsi_Host  *host;
++	struct scsi_device *sdev;
++
++	spin_lock_irq(&phba->hbalock);
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		host = lpfc_shost_from_vport(vport);
++		if (!scsi_host_get(host))
++			continue;
++
++		spin_unlock_irq(&phba->hbalock);
++		shost_for_each_device(sdev, host) {
++			if (sdev->ordered_tags)
++				scsi_adjust_queue_depth(sdev, MSG_ORDERED_TAG,
++					sdev->queue_depth+1);
++			else
++				scsi_adjust_queue_depth(sdev, MSG_SIMPLE_TAG,
++					sdev->queue_depth+1);
++		}
++		spin_lock_irq(&phba->hbalock);
++		scsi_host_put(host);
++	}
++	spin_unlock_irq(&phba->hbalock);
++	atomic_set(&phba->num_rsrc_err, 0);
++	atomic_set(&phba->num_cmd_success, 0);
++}
+ 
+ /*
+  * This routine allocates a scsi buffer, which contains all the necessary
+@@ -51,8 +199,9 @@
+  * and the BPL BDE is setup in the IOCB.
+  */
+ static struct lpfc_scsi_buf *
+-lpfc_new_scsi_buf(struct lpfc_hba * phba)
++lpfc_new_scsi_buf(struct lpfc_vport *vport)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct lpfc_scsi_buf *psb;
+ 	struct ulp_bde64 *bpl;
+ 	IOCB_t *iocb;
+@@ -63,7 +212,6 @@
+ 	if (!psb)
+ 		return NULL;
+ 	memset(psb, 0, sizeof (struct lpfc_scsi_buf));
+-	psb->scsi_hba = phba;
+ 
+ 	/*
+ 	 * Get memory from the pci pool to map the virt space to pci bus space
+@@ -155,7 +303,7 @@
+ }
+ 
+ static void
+-lpfc_release_scsi_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * psb)
++lpfc_release_scsi_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *psb)
+ {
+ 	unsigned long iflag = 0;
+ 
+@@ -166,7 +314,7 @@
+ }
+ 
+ static int
+-lpfc_scsi_prep_dma_buf(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd)
++lpfc_scsi_prep_dma_buf(struct lpfc_hba *phba, struct lpfc_scsi_buf *lpfc_cmd)
+ {
+ 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+ 	struct scatterlist *sgel = NULL;
+@@ -175,8 +323,7 @@
+ 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+ 	dma_addr_t physaddr;
+ 	uint32_t i, num_bde = 0;
+-	int datadir = scsi_cmnd->sc_data_direction;
+-	int dma_error;
++	int nseg, datadir = scsi_cmnd->sc_data_direction;
+ 
+ 	/*
+ 	 * There are three possibilities here - use scatter-gather segment, use
+@@ -185,26 +332,22 @@
+ 	 * data bde entry.
+ 	 */
+ 	bpl += 2;
+-	if (scsi_cmnd->use_sg) {
++	nseg = scsi_dma_map(scsi_cmnd);
++	if (nseg > 0) {
+ 		/*
+ 		 * The driver stores the segment count returned from pci_map_sg
+ 		 * because this a count of dma-mappings used to map the use_sg
+ 		 * pages.  They are not guaranteed to be the same for those
+ 		 * architectures that implement an IOMMU.
+ 		 */
+-		sgel = (struct scatterlist *)scsi_cmnd->request_buffer;
+-		lpfc_cmd->seg_cnt = dma_map_sg(&phba->pcidev->dev, sgel,
+-						scsi_cmnd->use_sg, datadir);
+-		if (lpfc_cmd->seg_cnt == 0)
+-			return 1;
+ 
++		lpfc_cmd->seg_cnt = nseg;
+ 		if (lpfc_cmd->seg_cnt > phba->cfg_sg_seg_cnt) {
+ 			printk(KERN_ERR "%s: Too many sg segments from "
+ 			       "dma_map_sg.  Config %d, seg_cnt %d",
+ 			       __FUNCTION__, phba->cfg_sg_seg_cnt,
+ 			       lpfc_cmd->seg_cnt);
+-			dma_unmap_sg(&phba->pcidev->dev, sgel,
+-				     lpfc_cmd->seg_cnt, datadir);
++			scsi_dma_unmap(scsi_cmnd);
+ 			return 1;
+ 		}
+ 
+@@ -214,7 +357,7 @@
+ 		 * single scsi command.  Just run through the seg_cnt and format
+ 		 * the bde's.
+ 		 */
+-		for (i = 0; i < lpfc_cmd->seg_cnt; i++) {
++		scsi_for_each_sg(scsi_cmnd, sgel, nseg, i) {
+ 			physaddr = sg_dma_address(sgel);
+ 			bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
+ 			bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
+@@ -225,35 +368,10 @@
+ 				bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+ 			bpl->tus.w = le32_to_cpu(bpl->tus.w);
+ 			bpl++;
+-			sgel++;
+ 			num_bde++;
+ 		}
+-	} else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) {
+-		physaddr = dma_map_single(&phba->pcidev->dev,
+-					  scsi_cmnd->request_buffer,
+-					  scsi_cmnd->request_bufflen,
+-					  datadir);
+-		dma_error = dma_mapping_error(physaddr);
+-		if (dma_error) {
+-			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-				"%d:0718 Unable to dma_map_single "
+-				"request_buffer: x%x\n",
+-				phba->brd_no, dma_error);
++	} else if (nseg < 0)
+ 			return 1;
+-		}
+-
+-		lpfc_cmd->nonsg_phys = physaddr;
+-		bpl->addrLow = le32_to_cpu(putPaddrLow(physaddr));
+-		bpl->addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
+-		bpl->tus.f.bdeSize = scsi_cmnd->request_bufflen;
+-		if (datadir == DMA_TO_DEVICE)
+-			bpl->tus.f.bdeFlags = 0;
+-		else
+-			bpl->tus.f.bdeFlags = BUFF_USE_RCV;
+-		bpl->tus.w = le32_to_cpu(bpl->tus.w);
+-		num_bde = 1;
+-		bpl++;
+-	}
+ 
+ 	/*
+ 	 * Finish initializing those IOCB fields that are dependent on the
+@@ -266,7 +384,7 @@
+ 		(num_bde * sizeof (struct ulp_bde64));
+ 	iocb_cmd->ulpBdeCount = 1;
+ 	iocb_cmd->ulpLe = 1;
+-	fcp_cmnd->fcpDl = be32_to_cpu(scsi_cmnd->request_bufflen);
++	fcp_cmnd->fcpDl = be32_to_cpu(scsi_bufflen(scsi_cmnd));
+ 	return 0;
+ }
+ 
+@@ -279,26 +397,20 @@
+ 	 * a request buffer, but did not request use_sg.  There is a third
+ 	 * case, but it does not require resource deallocation.
+ 	 */
+-	if ((psb->seg_cnt > 0) && (psb->pCmd->use_sg)) {
+-		dma_unmap_sg(&phba->pcidev->dev, psb->pCmd->request_buffer,
+-				psb->seg_cnt, psb->pCmd->sc_data_direction);
+-	} else {
+-		 if ((psb->nonsg_phys) && (psb->pCmd->request_bufflen)) {
+-			dma_unmap_single(&phba->pcidev->dev, psb->nonsg_phys,
+-						psb->pCmd->request_bufflen,
+-						psb->pCmd->sc_data_direction);
+-		 }
+-	}
++	if (psb->seg_cnt > 0)
++		scsi_dma_unmap(psb->pCmd);
+ }
+ 
+ static void
+-lpfc_handle_fcp_err(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_iocbq *rsp_iocb)
++lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
++		    struct lpfc_iocbq *rsp_iocb)
+ {
+ 	struct scsi_cmnd *cmnd = lpfc_cmd->pCmd;
+ 	struct fcp_cmnd *fcpcmd = lpfc_cmd->fcp_cmnd;
+ 	struct fcp_rsp *fcprsp = lpfc_cmd->fcp_rsp;
+-	struct lpfc_hba *phba = lpfc_cmd->scsi_hba;
++	struct lpfc_hba *phba = vport->phba;
+ 	uint32_t fcpi_parm = rsp_iocb->iocb.un.fcpi.fcpi_parm;
++	uint32_t vpi = vport->vpi;
+ 	uint32_t resp_info = fcprsp->rspStatus2;
+ 	uint32_t scsi_status = fcprsp->rspStatus3;
+ 	uint32_t *lp;
+@@ -331,9 +443,9 @@
+ 		logit = LOG_FCP;
+ 
+ 	lpfc_printf_log(phba, KERN_WARNING, logit,
+-			"%d:0730 FCP command x%x failed: x%x SNS x%x x%x "
++			"%d (%d):0730 FCP command x%x failed: x%x SNS x%x x%x "
+ 			"Data: x%x x%x x%x x%x x%x\n",
+-			phba->brd_no, cmnd->cmnd[0], scsi_status,
++			phba->brd_no, vpi, cmnd->cmnd[0], scsi_status,
+ 			be32_to_cpu(*lp), be32_to_cpu(*(lp + 3)), resp_info,
+ 			be32_to_cpu(fcprsp->rspResId),
+ 			be32_to_cpu(fcprsp->rspSnsLen),
+@@ -349,15 +461,16 @@
+ 		}
+ 	}
+ 
+-	cmnd->resid = 0;
++	scsi_set_resid(cmnd, 0);
+ 	if (resp_info & RESID_UNDER) {
+-		cmnd->resid = be32_to_cpu(fcprsp->rspResId);
++		scsi_set_resid(cmnd, be32_to_cpu(fcprsp->rspResId));
+ 
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-				"%d:0716 FCP Read Underrun, expected %d, "
+-				"residual %d Data: x%x x%x x%x\n", phba->brd_no,
+-				be32_to_cpu(fcpcmd->fcpDl), cmnd->resid,
+-				fcpi_parm, cmnd->cmnd[0], cmnd->underflow);
++				"%d (%d):0716 FCP Read Underrun, expected %d, "
++				"residual %d Data: x%x x%x x%x\n",
++				phba->brd_no, vpi, be32_to_cpu(fcpcmd->fcpDl),
++				scsi_get_resid(cmnd), fcpi_parm, cmnd->cmnd[0],
++				cmnd->underflow);
+ 
+ 		/*
+ 		 * If there is an under run check if under run reported by
+@@ -366,15 +479,16 @@
+ 		 */
+ 		if ((cmnd->sc_data_direction == DMA_FROM_DEVICE) &&
+ 			fcpi_parm &&
+-			(cmnd->resid != fcpi_parm)) {
++			(scsi_get_resid(cmnd) != fcpi_parm)) {
+ 			lpfc_printf_log(phba, KERN_WARNING,
+ 				LOG_FCP | LOG_FCP_ERROR,
+-				"%d:0735 FCP Read Check Error and Underrun "
+-				"Data: x%x x%x x%x x%x\n", phba->brd_no,
++					"%d (%d):0735 FCP Read Check Error "
++					"and Underrun Data: x%x x%x x%x x%x\n",
++					phba->brd_no, vpi,
+ 				be32_to_cpu(fcpcmd->fcpDl),
+-				cmnd->resid,
+-				fcpi_parm, cmnd->cmnd[0]);
+-			cmnd->resid = cmnd->request_bufflen;
++					scsi_get_resid(cmnd), fcpi_parm,
++					cmnd->cmnd[0]);
++			scsi_set_resid(cmnd, scsi_bufflen(cmnd));
+ 			host_status = DID_ERROR;
+ 		}
+ 		/*
+@@ -385,22 +499,23 @@
+ 		 */
+ 		if (!(resp_info & SNS_LEN_VALID) &&
+ 		    (scsi_status == SAM_STAT_GOOD) &&
+-		    (cmnd->request_bufflen - cmnd->resid) < cmnd->underflow) {
++		    (scsi_bufflen(cmnd) - scsi_get_resid(cmnd)
++		     < cmnd->underflow)) {
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-					"%d:0717 FCP command x%x residual "
++					"%d (%d):0717 FCP command x%x residual "
+ 					"underrun converted to error "
+-					"Data: x%x x%x x%x\n", phba->brd_no,
+-					cmnd->cmnd[0], cmnd->request_bufflen,
+-					cmnd->resid, cmnd->underflow);
+-
++					"Data: x%x x%x x%x\n",
++					phba->brd_no, vpi, cmnd->cmnd[0],
++					cmnd->request_bufflen,
++					scsi_get_resid(cmnd), cmnd->underflow);
+ 			host_status = DID_ERROR;
+ 		}
+ 	} else if (resp_info & RESID_OVER) {
+ 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-				"%d:0720 FCP command x%x residual "
++				"%d (%d):0720 FCP command x%x residual "
+ 				"overrun error. Data: x%x x%x \n",
+-				phba->brd_no, cmnd->cmnd[0],
+-				cmnd->request_bufflen, cmnd->resid);
++				phba->brd_no, vpi, cmnd->cmnd[0],
++				scsi_bufflen(cmnd), scsi_get_resid(cmnd));
+ 		host_status = DID_ERROR;
+ 
+ 	/*
+@@ -410,13 +525,14 @@
+ 	} else if ((scsi_status == SAM_STAT_GOOD) && fcpi_parm &&
+ 			(cmnd->sc_data_direction == DMA_FROM_DEVICE)) {
+ 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR,
+-			"%d:0734 FCP Read Check Error Data: "
+-			"x%x x%x x%x x%x\n", phba->brd_no,
++				"%d (%d):0734 FCP Read Check Error Data: "
++				"x%x x%x x%x x%x\n",
++				phba->brd_no, vpi,
+ 			be32_to_cpu(fcpcmd->fcpDl),
+ 			be32_to_cpu(fcprsp->rspResId),
+ 			fcpi_parm, cmnd->cmnd[0]);
+ 		host_status = DID_ERROR;
+-		cmnd->resid = cmnd->request_bufflen;
++		scsi_set_resid(cmnd, scsi_bufflen(cmnd));
+ 	}
+ 
+  out:
+@@ -429,9 +545,13 @@
+ {
+ 	struct lpfc_scsi_buf *lpfc_cmd =
+ 		(struct lpfc_scsi_buf *) pIocbIn->context1;
++	struct lpfc_vport      *vport = pIocbIn->vport;
+ 	struct lpfc_rport_data *rdata = lpfc_cmd->rdata;
+ 	struct lpfc_nodelist *pnode = rdata->pnode;
+ 	struct scsi_cmnd *cmd = lpfc_cmd->pCmd;
++	uint32_t vpi = (lpfc_cmd->cur_iocbq.vport
++			? lpfc_cmd->cur_iocbq.vport->vpi
++			: 0);
+ 	int result;
+ 	struct scsi_device *sdev, *tmp_sdev;
+ 	int depth = 0;
+@@ -447,22 +567,31 @@
+ 			lpfc_cmd->status = IOSTAT_DEFAULT;
+ 
+ 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-				"%d:0729 FCP cmd x%x failed <%d/%d> status: "
+-				"x%x result: x%x Data: x%x x%x\n",
+-				phba->brd_no, cmd->cmnd[0], cmd->device->id,
+-				cmd->device->lun, lpfc_cmd->status,
+-				lpfc_cmd->result, pIocbOut->iocb.ulpContext,
++				"%d (%d):0729 FCP cmd x%x failed <%d/%d> "
++				"status: x%x result: x%x Data: x%x x%x\n",
++				phba->brd_no, vpi, cmd->cmnd[0],
++				cmd->device ? cmd->device->id : 0xffff,
++				cmd->device ? cmd->device->lun : 0xffff,
++				lpfc_cmd->status, lpfc_cmd->result,
++				pIocbOut->iocb.ulpContext,
+ 				lpfc_cmd->cur_iocbq.iocb.ulpIoTag);
+ 
+ 		switch (lpfc_cmd->status) {
+ 		case IOSTAT_FCP_RSP_ERROR:
+ 			/* Call FCP RSP handler to determine result */
+-			lpfc_handle_fcp_err(lpfc_cmd,pIocbOut);
++			lpfc_handle_fcp_err(vport, lpfc_cmd, pIocbOut);
+ 			break;
+ 		case IOSTAT_NPORT_BSY:
+ 		case IOSTAT_FABRIC_BSY:
+ 			cmd->result = ScsiResult(DID_BUS_BUSY, 0);
+ 			break;
++		case IOSTAT_LOCAL_REJECT:
++			if (lpfc_cmd->result == RJT_UNAVAIL_PERM ||
++			    lpfc_cmd->result == IOERR_NO_RESOURCES ||
++			    lpfc_cmd->result == RJT_LOGIN_REQUIRED) {
++				cmd->result = ScsiResult(DID_REQUEUE, 0);
++			break;
++		} /* else: fall through */
+ 		default:
+ 			cmd->result = ScsiResult(DID_ERROR, 0);
+ 			break;
+@@ -479,11 +608,12 @@
+ 		uint32_t *lp = (uint32_t *)cmd->sense_buffer;
+ 
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-				"%d:0710 Iodone <%d/%d> cmd %p, error x%x "
+-				"SNS x%x x%x Data: x%x x%x\n",
+-				phba->brd_no, cmd->device->id,
++				"%d (%d):0710 Iodone <%d/%d> cmd %p, error "
++				"x%x SNS x%x x%x Data: x%x x%x\n",
++				phba->brd_no, vpi, cmd->device->id,
+ 				cmd->device->lun, cmd, cmd->result,
+-				*lp, *(lp + 3), cmd->retries, cmd->resid);
++				*lp, *(lp + 3), cmd->retries,
++				scsi_get_resid(cmd));
+ 	}
+ 
+ 	result = cmd->result;
+@@ -496,6 +626,10 @@
+ 		return;
+ 	}
+ 
++
++	if (!result)
++		lpfc_rampup_queue_depth(phba, sdev);
++
+ 	if (!result && pnode != NULL &&
+ 	   ((jiffies - pnode->last_ramp_up_time) >
+ 		LPFC_Q_RAMP_UP_INTERVAL * HZ) &&
+@@ -544,8 +678,9 @@
+ 
+ 		if (depth) {
+ 			lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-				"%d:0711 detected queue full - lun queue depth "
+-				" adjusted to %d.\n", phba->brd_no, depth);
++					"%d (%d):0711 detected queue full - "
++					"lun queue depth  adjusted to %d.\n",
++					phba->brd_no, vpi, depth);
+ 		}
+ 	}
+ 
+@@ -553,9 +688,10 @@
+ }
+ 
+ static void
+-lpfc_scsi_prep_cmnd(struct lpfc_hba * phba, struct lpfc_scsi_buf * lpfc_cmd,
++lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
+ 			struct lpfc_nodelist *pnode)
+ {
++	struct lpfc_hba *phba = vport->phba;
+ 	struct scsi_cmnd *scsi_cmnd = lpfc_cmd->pCmd;
+ 	struct fcp_cmnd *fcp_cmnd = lpfc_cmd->fcp_cmnd;
+ 	IOCB_t *iocb_cmd = &lpfc_cmd->cur_iocbq.iocb;
+@@ -592,22 +728,7 @@
+ 	 * bumping the bpl beyond the fcp_cmnd and fcp_rsp regions to the first
+ 	 * data bde entry.
+ 	 */
+-	if (scsi_cmnd->use_sg) {
+-		if (datadir == DMA_TO_DEVICE) {
+-			iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
+-			iocb_cmd->un.fcpi.fcpi_parm = 0;
+-			iocb_cmd->ulpPU = 0;
+-			fcp_cmnd->fcpCntl3 = WRITE_DATA;
+-			phba->fc4OutputRequests++;
+-		} else {
+-			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
+-			iocb_cmd->ulpPU = PARM_READ_CHECK;
+-			iocb_cmd->un.fcpi.fcpi_parm =
+-				scsi_cmnd->request_bufflen;
+-			fcp_cmnd->fcpCntl3 = READ_DATA;
+-			phba->fc4InputRequests++;
+-		}
+-	} else if (scsi_cmnd->request_buffer && scsi_cmnd->request_bufflen) {
++	if (scsi_sg_count(scsi_cmnd)) {
+ 		if (datadir == DMA_TO_DEVICE) {
+ 			iocb_cmd->ulpCommand = CMD_FCP_IWRITE64_CR;
+ 			iocb_cmd->un.fcpi.fcpi_parm = 0;
+@@ -617,8 +738,7 @@
+ 		} else {
+ 			iocb_cmd->ulpCommand = CMD_FCP_IREAD64_CR;
+ 			iocb_cmd->ulpPU = PARM_READ_CHECK;
+-			iocb_cmd->un.fcpi.fcpi_parm =
+-				scsi_cmnd->request_bufflen;
++			iocb_cmd->un.fcpi.fcpi_parm = scsi_bufflen(scsi_cmnd);
+ 			fcp_cmnd->fcpCntl3 = READ_DATA;
+ 			phba->fc4InputRequests++;
+ 		}
+@@ -642,15 +762,15 @@
+ 	piocbq->context1  = lpfc_cmd;
+ 	piocbq->iocb_cmpl = lpfc_scsi_cmd_iocb_cmpl;
+ 	piocbq->iocb.ulpTimeout = lpfc_cmd->timeout;
++	piocbq->vport = vport;
+ }
+ 
+ static int
+-lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_hba *phba,
++lpfc_scsi_prep_task_mgmt_cmd(struct lpfc_vport *vport,
+ 			     struct lpfc_scsi_buf *lpfc_cmd,
+ 			     unsigned int lun,
+ 			     uint8_t task_mgmt_cmd)
+ {
+-	struct lpfc_sli *psli;
+ 	struct lpfc_iocbq *piocbq;
+ 	IOCB_t *piocb;
+ 	struct fcp_cmnd *fcp_cmnd;
+@@ -661,8 +781,9 @@
+ 		return 0;
+ 	}
+ 
+-	psli = &phba->sli;
+ 	piocbq = &(lpfc_cmd->cur_iocbq);
++	piocbq->vport = vport;
++
+ 	piocb = &piocbq->iocb;
+ 
+ 	fcp_cmnd = lpfc_cmd->fcp_cmnd;
+@@ -688,7 +809,7 @@
+ 		piocb->ulpTimeout = lpfc_cmd->timeout;
+ 	}
+ 
+-	return (1);
++	return 1;
+ }
+ 
+ static void
+@@ -704,10 +825,11 @@
+ }
+ 
+ static int
+-lpfc_scsi_tgt_reset(struct lpfc_scsi_buf * lpfc_cmd, struct lpfc_hba * phba,
++lpfc_scsi_tgt_reset(struct lpfc_scsi_buf *lpfc_cmd, struct lpfc_vport *vport,
+ 		    unsigned  tgt_id, unsigned int lun,
+ 		    struct lpfc_rport_data *rdata)
+ {
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_iocbq *iocbq;
+ 	struct lpfc_iocbq *iocbqrsp;
+ 	int ret;
+@@ -716,12 +838,11 @@
+ 		return FAILED;
+ 
+ 	lpfc_cmd->rdata = rdata;
+-	ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, lun,
++	ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun,
+ 					   FCP_TARGET_RESET);
+ 	if (!ret)
+ 		return FAILED;
+ 
+-	lpfc_cmd->scsi_hba = phba;
+ 	iocbq = &lpfc_cmd->cur_iocbq;
+ 	iocbqrsp = lpfc_sli_get_iocbq(phba);
+ 
+@@ -730,10 +851,10 @@
+ 
+ 	/* Issue Target Reset to TGT <num> */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-			"%d:0702 Issue Target Reset to TGT %d "
++			"%d (%d):0702 Issue Target Reset to TGT %d "
+ 			"Data: x%x x%x\n",
+-			phba->brd_no, tgt_id, rdata->pnode->nlp_rpi,
+-			rdata->pnode->nlp_flag);
++			phba->brd_no, vport->vpi, tgt_id,
++			rdata->pnode->nlp_rpi, rdata->pnode->nlp_flag);
+ 
+ 	ret = lpfc_sli_issue_iocb_wait(phba,
+ 				       &phba->sli.ring[phba->sli.fcp_ring],
+@@ -758,7 +879,8 @@
+ const char *
+ lpfc_info(struct Scsi_Host *host)
+ {
+-	struct lpfc_hba    *phba = (struct lpfc_hba *) host->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) host->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	int len;
+ 	static char  lpfcinfobuf[384];
+ 
+@@ -800,25 +922,21 @@
+ 
+ void lpfc_poll_timeout(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)ptr;
+-	unsigned long iflag;
+-
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
++	struct lpfc_hba *phba = (struct lpfc_hba *) ptr;
+ 
+ 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
+ 		lpfc_sli_poll_fcp_ring (phba);
+ 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
+ 			lpfc_poll_rearm_timer(phba);
+ 	}
+-
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ }
+ 
+ static int
+ lpfc_queuecommand(struct scsi_cmnd *cmnd, void (*done) (struct scsi_cmnd *))
+ {
+-	struct lpfc_hba *phba =
+-		(struct lpfc_hba *) cmnd->device->host->hostdata;
++	struct Scsi_Host  *shost = cmnd->device->host;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_rport_data *rdata = cmnd->device->hostdata;
+ 	struct lpfc_nodelist *ndlp = rdata->pnode;
+@@ -840,11 +958,14 @@
+ 		cmnd->result = ScsiResult(DID_BUS_BUSY, 0);
+ 		goto out_fail_command;
+ 	}
+-	lpfc_cmd = lpfc_get_scsi_buf (phba);
++	lpfc_cmd = lpfc_get_scsi_buf(phba);
+ 	if (lpfc_cmd == NULL) {
++		lpfc_adjust_queue_depth(phba);
++
+ 		lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-				"%d:0707 driver's buffer pool is empty, "
+-				"IO busied\n", phba->brd_no);
++				"%d (%d):0707 driver's buffer pool is empty, "
++				"IO busied\n",
++				phba->brd_no, vport->vpi);
+ 		goto out_host_busy;
+ 	}
+ 
+@@ -862,7 +983,7 @@
+ 	if (err)
+ 		goto out_host_busy_free_buf;
+ 
+-	lpfc_scsi_prep_cmnd(phba, lpfc_cmd, ndlp);
++	lpfc_scsi_prep_cmnd(vport, lpfc_cmd, ndlp);
+ 
+ 	err = lpfc_sli_issue_iocb(phba, &phba->sli.ring[psli->fcp_ring],
+ 				&lpfc_cmd->cur_iocbq, SLI_IOCB_RET_IOCB);
+@@ -908,7 +1029,8 @@
+ lpfc_abort_handler(struct scsi_cmnd *cmnd)
+ {
+ 	struct Scsi_Host *shost = cmnd->device->host;
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_sli_ring *pring = &phba->sli.ring[phba->sli.fcp_ring];
+ 	struct lpfc_iocbq *iocb;
+ 	struct lpfc_iocbq *abtsiocb;
+@@ -918,8 +1040,6 @@
+ 	int ret = SUCCESS;
+ 
+ 	lpfc_block_error_handler(cmnd);
+-	spin_lock_irq(shost->host_lock);
+-
+ 	lpfc_cmd = (struct lpfc_scsi_buf *)cmnd->host_scribble;
+ 	BUG_ON(!lpfc_cmd);
+ 
+@@ -956,12 +1076,13 @@
+ 
+ 	icmd->ulpLe = 1;
+ 	icmd->ulpClass = cmd->ulpClass;
+-	if (phba->hba_state >= LPFC_LINK_UP)
++	if (lpfc_is_link_up(phba))
+ 		icmd->ulpCommand = CMD_ABORT_XRI_CN;
+ 	else
+ 		icmd->ulpCommand = CMD_CLOSE_XRI_CN;
+ 
+ 	abtsiocb->iocb_cmpl = lpfc_sli_abort_fcp_cmpl;
++	abtsiocb->vport = vport;
+ 	if (lpfc_sli_issue_iocb(phba, pring, abtsiocb, 0) == IOCB_ERROR) {
+ 		lpfc_sli_release_iocbq(phba, abtsiocb);
+ 		ret = FAILED;
+@@ -977,9 +1098,7 @@
+ 		if (phba->cfg_poll & DISABLE_FCP_RING_INT)
+ 			lpfc_sli_poll_fcp_ring (phba);
+ 
+-		spin_unlock_irq(phba->host->host_lock);
+-			schedule_timeout_uninterruptible(LPFC_ABORT_WAIT*HZ);
+-		spin_lock_irq(phba->host->host_lock);
++		schedule_timeout_uninterruptible(LPFC_ABORT_WAIT * HZ);
+ 		if (++loop_count
+ 		    > (2 * phba->cfg_devloss_tmo)/LPFC_ABORT_WAIT)
+ 			break;
+@@ -988,22 +1107,21 @@
+ 	if (lpfc_cmd->pCmd == cmnd) {
+ 		ret = FAILED;
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-				"%d:0748 abort handler timed out waiting for "
+-				"abort to complete: ret %#x, ID %d, LUN %d, "
+-				"snum %#lx\n",
+-				phba->brd_no,  ret, cmnd->device->id,
+-				cmnd->device->lun, cmnd->serial_number);
++				"%d (%d):0748 abort handler timed out waiting "
++				"for abort to complete: ret %#x, ID %d, "
++				"LUN %d, snum %#lx\n",
++				phba->brd_no, vport->vpi, ret,
++				cmnd->device->id, cmnd->device->lun,
++				cmnd->serial_number);
+ 	}
+ 
+  out:
+ 	lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-			"%d:0749 SCSI Layer I/O Abort Request "
++			"%d (%d):0749 SCSI Layer I/O Abort Request "
+ 			"Status x%x ID %d LUN %d snum %#lx\n",
+-			phba->brd_no, ret, cmnd->device->id,
++			phba->brd_no, vport->vpi, ret, cmnd->device->id,
+ 			cmnd->device->lun, cmnd->serial_number);
+ 
+-	spin_unlock_irq(shost->host_lock);
+-
+ 	return ret;
+ }
+ 
+@@ -1011,7 +1129,8 @@
+ lpfc_device_reset_handler(struct scsi_cmnd *cmnd)
+ {
+ 	struct Scsi_Host *shost = cmnd->device->host;
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_scsi_buf *lpfc_cmd;
+ 	struct lpfc_iocbq *iocbq, *iocbqrsp;
+ 	struct lpfc_rport_data *rdata = cmnd->device->hostdata;
+@@ -1022,28 +1141,26 @@
+ 	int cnt, loopcnt;
+ 
+ 	lpfc_block_error_handler(cmnd);
+-	spin_lock_irq(shost->host_lock);
+ 	loopcnt = 0;
+ 	/*
+ 	 * If target is not in a MAPPED state, delay the reset until
+ 	 * target is rediscovered or devloss timeout expires.
+ 	 */
+-	while ( 1 ) {
++	while (1) {
+ 		if (!pnode)
+ 			goto out;
+ 
+ 		if (pnode->nlp_state != NLP_STE_MAPPED_NODE) {
+-			spin_unlock_irq(phba->host->host_lock);
+ 			schedule_timeout_uninterruptible(msecs_to_jiffies(500));
+-			spin_lock_irq(phba->host->host_lock);
+ 			loopcnt++;
+ 			rdata = cmnd->device->hostdata;
+ 			if (!rdata ||
+ 				(loopcnt > ((phba->cfg_devloss_tmo * 2) + 1))) {
+ 				lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-		   			"%d:0721 LUN Reset rport failure:"
+-					" cnt x%x rdata x%p\n",
+-		   			phba->brd_no, loopcnt, rdata);
++						"%d (%d):0721 LUN Reset rport "
++						"failure: cnt x%x rdata x%p\n",
++						phba->brd_no, vport->vpi,
++						loopcnt, rdata);
+ 				goto out;
+ 			}
+ 			pnode = rdata->pnode;
+@@ -1054,15 +1171,14 @@
+ 			break;
+ 	}
+ 
+-	lpfc_cmd = lpfc_get_scsi_buf (phba);
++	lpfc_cmd = lpfc_get_scsi_buf(phba);
+ 	if (lpfc_cmd == NULL)
+ 		goto out;
+ 
+ 	lpfc_cmd->timeout = 60;
+-	lpfc_cmd->scsi_hba = phba;
+ 	lpfc_cmd->rdata = rdata;
+ 
+-	ret = lpfc_scsi_prep_task_mgmt_cmd(phba, lpfc_cmd, cmnd->device->lun,
++	ret = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, cmnd->device->lun,
+ 					   FCP_TARGET_RESET);
+ 	if (!ret)
+ 		goto out_free_scsi_buf;
+@@ -1075,8 +1191,9 @@
+ 		goto out_free_scsi_buf;
+ 
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_FCP,
+-			"%d:0703 Issue target reset to TGT %d LUN %d rpi x%x "
+-			"nlp_flag x%x\n", phba->brd_no, cmnd->device->id,
++			"%d (%d):0703 Issue target reset to TGT %d LUN %d "
++			"rpi x%x nlp_flag x%x\n",
++			phba->brd_no, vport->vpi, cmnd->device->id,
+ 			cmnd->device->lun, pnode->nlp_rpi, pnode->nlp_flag);
+ 
+ 	iocb_status = lpfc_sli_issue_iocb_wait(phba,
+@@ -1111,9 +1228,7 @@
+ 				    0, LPFC_CTX_LUN);
+ 	loopcnt = 0;
+ 	while(cnt) {
+-		spin_unlock_irq(phba->host->host_lock);
+ 		schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ);
+-		spin_lock_irq(phba->host->host_lock);
+ 
+ 		if (++loopcnt
+ 		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
+@@ -1127,8 +1242,9 @@
+ 
+ 	if (cnt) {
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-			"%d:0719 device reset I/O flush failure: cnt x%x\n",
+-			phba->brd_no, cnt);
++				"%d (%d):0719 device reset I/O flush failure: "
++				"cnt x%x\n",
++				phba->brd_no, vport->vpi, cnt);
+ 		ret = FAILED;
+ 	}
+ 
+@@ -1137,13 +1253,12 @@
+ 		lpfc_release_scsi_buf(phba, lpfc_cmd);
+ 	}
+ 	lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-			"%d:0713 SCSI layer issued device reset (%d, %d) "
++			"%d (%d):0713 SCSI layer issued device reset (%d, %d) "
+ 			"return x%x status x%x result x%x\n",
+-			phba->brd_no, cmnd->device->id, cmnd->device->lun,
+-			ret, cmd_status, cmd_result);
++			phba->brd_no, vport->vpi, cmnd->device->id,
++			cmnd->device->lun, ret, cmd_status, cmd_result);
+ 
+ out:
+-	spin_unlock_irq(shost->host_lock);
+ 	return ret;
+ }
+ 
+@@ -1151,7 +1266,8 @@
+ lpfc_bus_reset_handler(struct scsi_cmnd *cmnd)
+ {
+ 	struct Scsi_Host *shost = cmnd->device->host;
+-	struct lpfc_hba *phba = (struct lpfc_hba *)shost->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_nodelist *ndlp = NULL;
+ 	int match;
+ 	int ret = FAILED, i, err_count = 0;
+@@ -1159,7 +1275,6 @@
+ 	struct lpfc_scsi_buf * lpfc_cmd;
+ 
+ 	lpfc_block_error_handler(cmnd);
+-	spin_lock_irq(shost->host_lock);
+ 
+ 	lpfc_cmd = lpfc_get_scsi_buf(phba);
+ 	if (lpfc_cmd == NULL)
+@@ -1167,7 +1282,6 @@
+ 
+ 	/* The lpfc_cmd storage is reused.  Set all loop invariants. */
+ 	lpfc_cmd->timeout = 60;
+-	lpfc_cmd->scsi_hba = phba;
+ 
+ 	/*
+ 	 * Since the driver manages a single bus device, reset all
+@@ -1177,7 +1291,8 @@
+ 	for (i = 0; i < LPFC_MAX_TARGET; i++) {
+ 		/* Search for mapped node by target ID */
+ 		match = 0;
+-		list_for_each_entry(ndlp, &phba->fc_nodes, nlp_listp) {
++		spin_lock_irq(shost->host_lock);
++		list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) {
+ 			if (ndlp->nlp_state == NLP_STE_MAPPED_NODE &&
+ 			    i == ndlp->nlp_sid &&
+ 			    ndlp->rport) {
+@@ -1185,15 +1300,18 @@
+ 				break;
+ 			}
+ 		}
++		spin_unlock_irq(shost->host_lock);
+ 		if (!match)
+ 			continue;
+ 
+-		ret = lpfc_scsi_tgt_reset(lpfc_cmd, phba, i, cmnd->device->lun,
++		ret = lpfc_scsi_tgt_reset(lpfc_cmd, vport, i,
++					  cmnd->device->lun,
+ 					  ndlp->rport->dd_data);
+ 		if (ret != SUCCESS) {
+ 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-				"%d:0700 Bus Reset on target %d failed\n",
+-				phba->brd_no, i);
++					"%d (%d):0700 Bus Reset on target %d "
++					"failed\n",
++					phba->brd_no, vport->vpi, i);
+ 			err_count++;
+ 			break;
+ 		}
+@@ -1219,9 +1337,7 @@
+ 				    0, 0, 0, LPFC_CTX_HOST);
+ 	loopcnt = 0;
+ 	while(cnt) {
+-		spin_unlock_irq(phba->host->host_lock);
+ 		schedule_timeout_uninterruptible(LPFC_RESET_WAIT*HZ);
+-		spin_lock_irq(phba->host->host_lock);
+ 
+ 		if (++loopcnt
+ 		    > (2 * phba->cfg_devloss_tmo)/LPFC_RESET_WAIT)
+@@ -1234,25 +1350,24 @@
+ 
+ 	if (cnt) {
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-		   "%d:0715 Bus Reset I/O flush failure: cnt x%x left x%x\n",
+-		   phba->brd_no, cnt, i);
++				"%d (%d):0715 Bus Reset I/O flush failure: "
++				"cnt x%x left x%x\n",
++				phba->brd_no, vport->vpi, cnt, i);
+ 		ret = FAILED;
+ 	}
+ 
+-	lpfc_printf_log(phba,
+-			KERN_ERR,
+-			LOG_FCP,
+-			"%d:0714 SCSI layer issued Bus Reset Data: x%x\n",
+-			phba->brd_no, ret);
++	lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
++			"%d (%d):0714 SCSI layer issued Bus Reset Data: x%x\n",
++			phba->brd_no, vport->vpi, ret);
+ out:
+-	spin_unlock_irq(shost->host_lock);
+ 	return ret;
+ }
+ 
+ static int
+ lpfc_slave_alloc(struct scsi_device *sdev)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *)sdev->host->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct lpfc_scsi_buf *scsi_buf = NULL;
+ 	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+ 	uint32_t total = 0, i;
+@@ -1273,27 +1388,35 @@
+ 	 */
+ 	total = phba->total_scsi_bufs;
+ 	num_to_alloc = phba->cfg_lun_queue_depth + 2;
+-	if (total >= phba->cfg_hba_queue_depth) {
++
++	/* Allow some exchanges to be available always to complete discovery */
++	if (total >= phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) {
+ 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-				"%d:0704 At limitation of %d preallocated "
+-				"command buffers\n", phba->brd_no, total);
++				"%d (%d):0704 At limitation of %d "
++				"preallocated command buffers\n",
++				phba->brd_no, vport->vpi, total);
+ 		return 0;
+-	} else if (total + num_to_alloc > phba->cfg_hba_queue_depth) {
++
++	/* Allow some exchanges to be available always to complete discovery */
++	} else if (total + num_to_alloc >
++		phba->cfg_hba_queue_depth - LPFC_DISC_IOCB_BUFF_COUNT ) {
+ 		lpfc_printf_log(phba, KERN_WARNING, LOG_FCP,
+-				"%d:0705 Allocation request of %d command "
+-				"buffers will exceed max of %d.  Reducing "
+-				"allocation request to %d.\n", phba->brd_no,
+-				num_to_alloc, phba->cfg_hba_queue_depth,
++				"%d (%d):0705 Allocation request of %d "
++				"command buffers will exceed max of %d.  "
++				"Reducing allocation request to %d.\n",
++				phba->brd_no, vport->vpi, num_to_alloc,
++				phba->cfg_hba_queue_depth,
+ 				(phba->cfg_hba_queue_depth - total));
+ 		num_to_alloc = phba->cfg_hba_queue_depth - total;
+ 	}
+ 
+ 	for (i = 0; i < num_to_alloc; i++) {
+-		scsi_buf = lpfc_new_scsi_buf(phba);
++		scsi_buf = lpfc_new_scsi_buf(vport);
+ 		if (!scsi_buf) {
+ 			lpfc_printf_log(phba, KERN_ERR, LOG_FCP,
+-					"%d:0706 Failed to allocate command "
+-					"buffer\n", phba->brd_no);
++					"%d (%d):0706 Failed to allocate "
++					"command buffer\n",
++					phba->brd_no, vport->vpi);
+ 			break;
+ 		}
+ 
+@@ -1308,7 +1431,8 @@
+ static int
+ lpfc_slave_configure(struct scsi_device *sdev)
+ {
+-	struct lpfc_hba *phba = (struct lpfc_hba *) sdev->host->hostdata;
++	struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
++	struct lpfc_hba   *phba = vport->phba;
+ 	struct fc_rport *rport = starget_to_rport(sdev->sdev_target);
+ 
+ 	if (sdev->tagged_supported)
+@@ -1340,6 +1464,7 @@
+ 	return;
+ }
+ 
++
+ struct scsi_host_template lpfc_template = {
+ 	.module			= THIS_MODULE,
+ 	.name			= LPFC_DRIVER_NAME,
+@@ -1352,11 +1477,10 @@
+ 	.slave_configure	= lpfc_slave_configure,
+ 	.slave_destroy		= lpfc_slave_destroy,
+ 	.scan_finished		= lpfc_scan_finished,
+-	.scan_start		= lpfc_scan_start,
+ 	.this_id		= -1,
+ 	.sg_tablesize		= LPFC_SG_SEG_CNT,
+ 	.cmd_per_lun		= LPFC_CMD_PER_LUN,
+ 	.use_clustering		= ENABLE_CLUSTERING,
+-	.shost_attrs		= lpfc_host_attrs,
++	.shost_attrs		= lpfc_hba_attrs,
+ 	.max_sectors		= 0xFFFF,
+ };
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_scsi.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_scsi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_scsi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,7 +1,7 @@
+ /*******************************************************************
+  * This file is part of the Emulex Linux Device Driver for         *
+  * Fibre Channel Host Bus Adapters.                                *
+- * Copyright (C) 2004-2005 Emulex.  All rights reserved.           *
++ * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
+  * EMULEX and SLI are trademarks of Emulex.                        *
+  * www.emulex.com                                                  *
+  *                                                                 *
+@@ -110,7 +110,6 @@
+ struct lpfc_scsi_buf {
+ 	struct list_head list;
+ 	struct scsi_cmnd *pCmd;
+-	struct lpfc_hba *scsi_hba;
+ 	struct lpfc_rport_data *rdata;
+ 
+ 	uint32_t timeout;
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_sli.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_sli.c	2007-12-19 15:29:23.000000000 -0500
+@@ -38,23 +38,25 @@
+ #include "lpfc_crtn.h"
+ #include "lpfc_logmsg.h"
+ #include "lpfc_compat.h"
++#include "lpfc_debugfs.h"
+ 
+ /*
+  * Define macro to log: Mailbox command x%x cannot issue Data
+  * This allows multiple uses of lpfc_msgBlk0311
+  * w/o perturbing log msg utility.
+  */
+-#define LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag) \
++#define LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag) \
+ 			lpfc_printf_log(phba, \
+ 				KERN_INFO, \
+ 				LOG_MBOX | LOG_SLI, \
+-				"%d:0311 Mailbox command x%x cannot issue " \
+-				"Data: x%x x%x x%x\n", \
++				"%d (%d):0311 Mailbox command x%x cannot " \
++				"issue Data: x%x x%x x%x\n", \
+ 				phba->brd_no, \
+-				mb->mbxCommand,		\
+-				phba->hba_state,	\
++				pmbox->vport ? pmbox->vport->vpi : 0, \
++				pmbox->mb.mbxCommand,		\
++				phba->pport->port_state,	\
+ 				psli->sli_flag,	\
+-				flag);
++				flag)
+ 
+ 
+ /* There are only four IOCB completion types. */
+@@ -65,8 +67,26 @@
+ 	LPFC_ABORT_IOCB
+ } lpfc_iocb_type;
+ 
+-struct lpfc_iocbq *
+-lpfc_sli_get_iocbq(struct lpfc_hba * phba)
++		/* SLI-2/SLI-3 provide different sized iocbs.  Given a pointer
++		 * to the start of the ring, and the slot number of the
++		 * desired iocb entry, calc a pointer to that entry.
++		 */
++static inline IOCB_t *
++lpfc_cmd_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
++{
++	return (IOCB_t *) (((char *) pring->cmdringaddr) +
++			   pring->cmdidx * phba->iocb_cmd_size);
++}
++
++static inline IOCB_t *
++lpfc_resp_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
++{
++	return (IOCB_t *) (((char *) pring->rspringaddr) +
++			   pring->rspidx * phba->iocb_rsp_size);
++}
++
++static struct lpfc_iocbq *
++__lpfc_sli_get_iocbq(struct lpfc_hba *phba)
+ {
+ 	struct list_head *lpfc_iocb_list = &phba->lpfc_iocb_list;
+ 	struct lpfc_iocbq * iocbq = NULL;
+@@ -75,10 +95,22 @@
+ 	return iocbq;
+ }
+ 
++struct lpfc_iocbq *
++lpfc_sli_get_iocbq(struct lpfc_hba *phba)
++{
++	struct lpfc_iocbq * iocbq = NULL;
++	unsigned long iflags;
++
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	iocbq = __lpfc_sli_get_iocbq(phba);
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++	return iocbq;
++}
++
+ void
+-lpfc_sli_release_iocbq(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq)
++__lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+ {
+-	size_t start_clean = (size_t)(&((struct lpfc_iocbq *)NULL)->iocb);
++	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
+ 
+ 	/*
+ 	 * Clean all volatile data fields, preserve iotag and node struct.
+@@ -87,6 +119,19 @@
+ 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
+ }
+ 
++void
++lpfc_sli_release_iocbq(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
++{
++	unsigned long iflags;
++
++	/*
++	 * Clean all volatile data fields, preserve iotag and node struct.
++	 */
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	__lpfc_sli_release_iocbq(phba, iocbq);
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++}
++
+ /*
+  * Translate the iocb command to an iocb command type used to decide the final
+  * disposition of each completed IOCB.
+@@ -155,6 +200,9 @@
+ 	case CMD_RCV_ELS_REQ_CX:
+ 	case CMD_RCV_SEQUENCE64_CX:
+ 	case CMD_RCV_ELS_REQ64_CX:
++	case CMD_IOCB_RCV_SEQ64_CX:
++	case CMD_IOCB_RCV_ELS64_CX:
++	case CMD_IOCB_RCV_CONT64_CX:
+ 		type = LPFC_UNSOL_IOCB;
+ 		break;
+ 	default:
+@@ -166,73 +214,77 @@
+ }
+ 
+ static int
+-lpfc_sli_ring_map(struct lpfc_hba * phba, LPFC_MBOXQ_t *pmb)
++lpfc_sli_ring_map(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli = &phba->sli;
+-	MAILBOX_t *pmbox = &pmb->mb;
+-	int i, rc;
++	LPFC_MBOXQ_t *pmb;
++	MAILBOX_t *pmbox;
++	int i, rc, ret = 0;
+ 
++	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!pmb)
++		return -ENOMEM;
++	pmbox = &pmb->mb;
++	phba->link_state = LPFC_INIT_MBX_CMDS;
+ 	for (i = 0; i < psli->num_rings; i++) {
+-		phba->hba_state = LPFC_INIT_MBX_CMDS;
+ 		lpfc_config_ring(phba, i, pmb);
+ 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+ 		if (rc != MBX_SUCCESS) {
+-			lpfc_printf_log(phba,
+-					KERN_ERR,
+-					LOG_INIT,
+-					"%d:0446 Adapter failed to init, "
++			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
++					"%d:0446 Adapter failed to init (%d), "
+ 					"mbxCmd x%x CFG_RING, mbxStatus x%x, "
+ 					"ring %d\n",
+-					phba->brd_no,
++					phba->brd_no, rc,
+ 					pmbox->mbxCommand,
+ 					pmbox->mbxStatus,
+ 					i);
+-			phba->hba_state = LPFC_HBA_ERROR;
+-			return -ENXIO;
++			phba->link_state = LPFC_HBA_ERROR;
++			ret = -ENXIO;
++			break;
+ 		}
+ 	}
+-	return 0;
++	mempool_free(pmb, phba->mbox_mem_pool);
++	return ret;
+ }
+ 
+ static int
+-lpfc_sli_ringtxcmpl_put(struct lpfc_hba * phba,
+-			struct lpfc_sli_ring * pring, struct lpfc_iocbq * piocb)
++lpfc_sli_ringtxcmpl_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++			struct lpfc_iocbq *piocb)
+ {
+ 	list_add_tail(&piocb->list, &pring->txcmplq);
+ 	pring->txcmplq_cnt++;
+-	if (unlikely(pring->ringno == LPFC_ELS_RING))
+-		mod_timer(&phba->els_tmofunc,
++	if ((unlikely(pring->ringno == LPFC_ELS_RING)) &&
++	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
++	   (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) {
++		if (!piocb->vport)
++			BUG();
++		else
++			mod_timer(&piocb->vport->els_tmofunc,
+ 					jiffies + HZ * (phba->fc_ratov << 1));
++	}
+ 
+-	return (0);
++
++	return 0;
+ }
+ 
+ static struct lpfc_iocbq *
+-lpfc_sli_ringtx_get(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++lpfc_sli_ringtx_get(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+-	struct list_head *dlp;
+ 	struct lpfc_iocbq *cmd_iocb;
+ 
+-	dlp = &pring->txq;
+-	cmd_iocb = NULL;
+-	list_remove_head((&pring->txq), cmd_iocb,
+-			 struct lpfc_iocbq,
+-			 list);
+-	if (cmd_iocb) {
+-		/* If the first ptr is not equal to the list header,
+-		 * deque the IOCBQ_t and return it.
+-		 */
++	list_remove_head((&pring->txq), cmd_iocb, struct lpfc_iocbq, list);
++	if (cmd_iocb != NULL)
+ 		pring->txq_cnt--;
+-	}
+-	return (cmd_iocb);
++	return cmd_iocb;
+ }
+ 
+ static IOCB_t *
+ lpfc_sli_next_iocb_slot (struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+-	struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++	struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++		&phba->slim2p->mbx.us.s2.port[pring->ringno];
+ 	uint32_t  max_cmd_idx = pring->numCiocb;
+-	IOCB_t *iocb = NULL;
+ 
+ 	if ((pring->next_cmdidx == pring->cmdidx) &&
+ 	   (++pring->next_cmdidx >= max_cmd_idx))
+@@ -249,15 +301,17 @@
+ 					phba->brd_no, pring->ringno,
+ 					pring->local_getidx, max_cmd_idx);
+ 
+-			phba->hba_state = LPFC_HBA_ERROR;
++			phba->link_state = LPFC_HBA_ERROR;
+ 			/*
+ 			 * All error attention handlers are posted to
+ 			 * worker thread
+ 			 */
+ 			phba->work_ha |= HA_ERATT;
+ 			phba->work_hs = HS_FFER3;
++
++			/* hbalock should already be held */
+ 			if (phba->work_wait)
+-				wake_up(phba->work_wait);
++				lpfc_worker_wake_up(phba);
+ 
+ 			return NULL;
+ 		}
+@@ -266,39 +320,34 @@
+ 			return NULL;
+ 	}
+ 
+-	iocb = IOCB_ENTRY(pring->cmdringaddr, pring->cmdidx);
+-
+-	return iocb;
++	return lpfc_cmd_iocb(phba, pring);
+ }
+ 
+ uint16_t
+-lpfc_sli_next_iotag(struct lpfc_hba * phba, struct lpfc_iocbq * iocbq)
++lpfc_sli_next_iotag(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
+ {
+-	struct lpfc_iocbq ** new_arr;
+-	struct lpfc_iocbq ** old_arr;
++	struct lpfc_iocbq **new_arr;
++	struct lpfc_iocbq **old_arr;
+ 	size_t new_len;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	uint16_t iotag;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	iotag = psli->last_iotag;
+ 	if(++iotag < psli->iocbq_lookup_len) {
+ 		psli->last_iotag = iotag;
+ 		psli->iocbq_lookup[iotag] = iocbq;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		iocbq->iotag = iotag;
+ 		return iotag;
+-	}
+-	else if (psli->iocbq_lookup_len < (0xffff
++	} else if (psli->iocbq_lookup_len < (0xffff
+ 					   - LPFC_IOCBQ_LOOKUP_INCREMENT)) {
+ 		new_len = psli->iocbq_lookup_len + LPFC_IOCBQ_LOOKUP_INCREMENT;
+-		spin_unlock_irq(phba->host->host_lock);
+-		new_arr = kmalloc(new_len * sizeof (struct lpfc_iocbq *),
++		spin_unlock_irq(&phba->hbalock);
++		new_arr = kzalloc(new_len * sizeof (struct lpfc_iocbq *),
+ 				  GFP_KERNEL);
+ 		if (new_arr) {
+-			memset((char *)new_arr, 0,
+-			       new_len * sizeof (struct lpfc_iocbq *));
+-			spin_lock_irq(phba->host->host_lock);
++			spin_lock_irq(&phba->hbalock);
+ 			old_arr = psli->iocbq_lookup;
+ 			if (new_len <= psli->iocbq_lookup_len) {
+ 				/* highly unprobable case */
+@@ -307,11 +356,11 @@
+ 				if(++iotag < psli->iocbq_lookup_len) {
+ 					psli->last_iotag = iotag;
+ 					psli->iocbq_lookup[iotag] = iocbq;
+-					spin_unlock_irq(phba->host->host_lock);
++					spin_unlock_irq(&phba->hbalock);
+ 					iocbq->iotag = iotag;
+ 					return iotag;
+ 				}
+-				spin_unlock_irq(phba->host->host_lock);
++				spin_unlock_irq(&phba->hbalock);
+ 				return 0;
+ 			}
+ 			if (psli->iocbq_lookup)
+@@ -322,13 +371,13 @@
+ 			psli->iocbq_lookup_len = new_len;
+ 			psli->last_iotag = iotag;
+ 			psli->iocbq_lookup[iotag] = iocbq;
+-			spin_unlock_irq(phba->host->host_lock);
++			spin_unlock_irq(&phba->hbalock);
+ 			iocbq->iotag = iotag;
+ 			kfree(old_arr);
+ 			return iotag;
+ 		}
+ 	} else
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 
+ 	lpfc_printf_log(phba, KERN_ERR,LOG_SLI,
+ 			"%d:0318 Failed to allocate IOTAG.last IOTAG is %d\n",
+@@ -349,7 +398,7 @@
+ 	/*
+ 	 * Issue iocb command to adapter
+ 	 */
+-	lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, sizeof (IOCB_t));
++	lpfc_sli_pcimem_bcopy(&nextiocb->iocb, iocb, phba->iocb_cmd_size);
+ 	wmb();
+ 	pring->stats.iocb_cmd++;
+ 
+@@ -361,20 +410,18 @@
+ 	if (nextiocb->iocb_cmpl)
+ 		lpfc_sli_ringtxcmpl_put(phba, pring, nextiocb);
+ 	else
+-		lpfc_sli_release_iocbq(phba, nextiocb);
++		__lpfc_sli_release_iocbq(phba, nextiocb);
+ 
+ 	/*
+ 	 * Let the HBA know what IOCB slot will be the next one the
+ 	 * driver will put a command into.
+ 	 */
+ 	pring->cmdidx = pring->next_cmdidx;
+-	writel(pring->cmdidx, phba->MBslimaddr
+-	       + (SLIMOFF + (pring->ringno * 2)) * 4);
++	writel(pring->cmdidx, &phba->host_gp[pring->ringno].cmdPutInx);
+ }
+ 
+ static void
+-lpfc_sli_update_full_ring(struct lpfc_hba * phba,
+-			  struct lpfc_sli_ring *pring)
++lpfc_sli_update_full_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ 	int ringno = pring->ringno;
+ 
+@@ -393,8 +440,7 @@
+ }
+ 
+ static void
+-lpfc_sli_update_ring(struct lpfc_hba * phba,
+-		     struct lpfc_sli_ring *pring)
++lpfc_sli_update_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ 	int ringno = pring->ringno;
+ 
+@@ -407,7 +453,7 @@
+ }
+ 
+ static void
+-lpfc_sli_resume_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring)
++lpfc_sli_resume_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ 	IOCB_t *iocb;
+ 	struct lpfc_iocbq *nextiocb;
+@@ -420,7 +466,7 @@
+ 	 *  (d) IOCB processing is not blocked by the outstanding mbox command.
+ 	 */
+ 	if (pring->txq_cnt &&
+-	    (phba->hba_state > LPFC_LINK_DOWN) &&
++	    lpfc_is_link_up(phba) &&
+ 	    (pring->ringno != phba->sli.fcp_ring ||
+ 	     phba->sli.sli_flag & LPFC_PROCESS_LA) &&
+ 	    !(pring->flag & LPFC_STOP_IOCB_MBX)) {
+@@ -440,11 +486,15 @@
+ 
+ /* lpfc_sli_turn_on_ring is only called by lpfc_sli_handle_mb_event below */
+ static void
+-lpfc_sli_turn_on_ring(struct lpfc_hba * phba, int ringno)
++lpfc_sli_turn_on_ring(struct lpfc_hba *phba, int ringno)
+ {
+-	struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[ringno];
++	struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[ringno] :
++		&phba->slim2p->mbx.us.s2.port[ringno];
++	unsigned long iflags;
+ 
+ 	/* If the ring is active, flag it */
++	spin_lock_irqsave(&phba->hbalock, iflags);
+ 	if (phba->sli.ring[ringno].cmdringaddr) {
+ 		if (phba->sli.ring[ringno].flag & LPFC_STOP_IOCB_MBX) {
+ 			phba->sli.ring[ringno].flag &= ~LPFC_STOP_IOCB_MBX;
+@@ -453,11 +503,176 @@
+ 			 */
+ 			phba->sli.ring[ringno].local_getidx
+ 				= le32_to_cpu(pgp->cmdGetInx);
+-			spin_lock_irq(phba->host->host_lock);
+ 			lpfc_sli_resume_iocb(phba, &phba->sli.ring[ringno]);
+-			spin_unlock_irq(phba->host->host_lock);
+ 		}
+ 	}
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++}
++
++struct lpfc_hbq_entry *
++lpfc_sli_next_hbq_slot(struct lpfc_hba *phba, uint32_t hbqno)
++{
++	struct hbq_s *hbqp = &phba->hbqs[hbqno];
++
++	if (hbqp->next_hbqPutIdx == hbqp->hbqPutIdx &&
++	    ++hbqp->next_hbqPutIdx >= hbqp->entry_count)
++		hbqp->next_hbqPutIdx = 0;
++
++	if (unlikely(hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)) {
++		uint32_t raw_index = phba->hbq_get[hbqno];
++		uint32_t getidx = le32_to_cpu(raw_index);
++
++		hbqp->local_hbqGetIdx = getidx;
++
++		if (unlikely(hbqp->local_hbqGetIdx >= hbqp->entry_count)) {
++			lpfc_printf_log(phba, KERN_ERR,
++					LOG_SLI | LOG_VPORT,
++					"%d:1802 HBQ %d: local_hbqGetIdx "
++					"%u is > than hbqp->entry_count %u\n",
++					phba->brd_no, hbqno,
++					hbqp->local_hbqGetIdx,
++					hbqp->entry_count);
++
++			phba->link_state = LPFC_HBA_ERROR;
++			return NULL;
++		}
++
++		if (hbqp->local_hbqGetIdx == hbqp->next_hbqPutIdx)
++			return NULL;
++	}
++
++	return (struct lpfc_hbq_entry *) phba->hbqslimp.virt + hbqp->hbqPutIdx;
++}
++
++void
++lpfc_sli_hbqbuf_free_all(struct lpfc_hba *phba)
++{
++	struct lpfc_dmabuf *dmabuf, *next_dmabuf;
++	struct hbq_dmabuf *hbq_buf;
++
++	/* Return all memory used by all HBQs */
++	list_for_each_entry_safe(dmabuf, next_dmabuf,
++				 &phba->hbq_buffer_list, list) {
++		hbq_buf = container_of(dmabuf, struct hbq_dmabuf, dbuf);
++		list_del(&hbq_buf->dbuf.list);
++		lpfc_hbq_free(phba, hbq_buf->dbuf.virt, hbq_buf->dbuf.phys);
++		kfree(hbq_buf);
++	}
++}
++
++static void
++lpfc_sli_hbq_to_firmware(struct lpfc_hba *phba, uint32_t hbqno,
++			 struct hbq_dmabuf *hbq_buf)
++{
++	struct lpfc_hbq_entry *hbqe;
++	dma_addr_t physaddr = hbq_buf->dbuf.phys;
++
++	/* Get next HBQ entry slot to use */
++	hbqe = lpfc_sli_next_hbq_slot(phba, hbqno);
++	if (hbqe) {
++		struct hbq_s *hbqp = &phba->hbqs[hbqno];
++
++		hbqe->bde.addrHigh = le32_to_cpu(putPaddrHigh(physaddr));
++		hbqe->bde.addrLow  = le32_to_cpu(putPaddrLow(physaddr));
++		hbqe->bde.tus.f.bdeSize = FCELSSIZE;
++		hbqe->bde.tus.f.bdeFlags = 0;
++		hbqe->bde.tus.w = le32_to_cpu(hbqe->bde.tus.w);
++		hbqe->buffer_tag = le32_to_cpu(hbq_buf->tag);
++				/* Sync SLIM */
++		hbqp->hbqPutIdx = hbqp->next_hbqPutIdx;
++		writel(hbqp->hbqPutIdx, phba->hbq_put + hbqno);
++				/* flush */
++		readl(phba->hbq_put + hbqno);
++		list_add_tail(&hbq_buf->dbuf.list, &phba->hbq_buffer_list);
++	}
++}
++
++static struct lpfc_hbq_init lpfc_els_hbq = {
++	.rn = 1,
++	.entry_count = 200,
++	.mask_count = 0,
++	.profile = 0,
++	.ring_mask = 1 << LPFC_ELS_RING,
++	.buffer_count = 0,
++	.init_count = 20,
++	.add_count = 5,
++};
++
++static struct lpfc_hbq_init *lpfc_hbq_defs[] = {
++	&lpfc_els_hbq,
++};
++
++int
++lpfc_sli_hbqbuf_fill_hbqs(struct lpfc_hba *phba, uint32_t hbqno, uint32_t count)
++{
++	uint32_t i, start, end;
++	struct hbq_dmabuf *hbq_buffer;
++
++	start = lpfc_hbq_defs[hbqno]->buffer_count;
++	end = count + lpfc_hbq_defs[hbqno]->buffer_count;
++	if (end > lpfc_hbq_defs[hbqno]->entry_count) {
++		end = lpfc_hbq_defs[hbqno]->entry_count;
++	}
++
++	/* Populate HBQ entries */
++	for (i = start; i < end; i++) {
++		hbq_buffer = kmalloc(sizeof(struct hbq_dmabuf),
++				     GFP_KERNEL);
++		if (!hbq_buffer)
++			return 1;
++		hbq_buffer->dbuf.virt = lpfc_hbq_alloc(phba, MEM_PRI,
++							&hbq_buffer->dbuf.phys);
++		if (hbq_buffer->dbuf.virt == NULL)
++			return 1;
++		hbq_buffer->tag = (i | (hbqno << 16));
++		lpfc_sli_hbq_to_firmware(phba, hbqno, hbq_buffer);
++		lpfc_hbq_defs[hbqno]->buffer_count++;
++	}
++	return 0;
++}
++
++int
++lpfc_sli_hbqbuf_add_hbqs(struct lpfc_hba *phba, uint32_t qno)
++{
++	return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno,
++					 lpfc_hbq_defs[qno]->add_count));
++}
++
++int
++lpfc_sli_hbqbuf_init_hbqs(struct lpfc_hba *phba, uint32_t qno)
++{
++	return(lpfc_sli_hbqbuf_fill_hbqs(phba, qno,
++					 lpfc_hbq_defs[qno]->init_count));
++}
++
++struct hbq_dmabuf *
++lpfc_sli_hbqbuf_find(struct lpfc_hba *phba, uint32_t tag)
++{
++	struct lpfc_dmabuf *d_buf;
++	struct hbq_dmabuf *hbq_buf;
++
++	list_for_each_entry(d_buf, &phba->hbq_buffer_list, list) {
++		hbq_buf = container_of(d_buf, struct hbq_dmabuf, dbuf);
++		if ((hbq_buf->tag & 0xffff) == tag) {
++			return hbq_buf;
++		}
++	}
++	lpfc_printf_log(phba, KERN_ERR, LOG_SLI | LOG_VPORT,
++			"%d:1803 Bad hbq tag. Data: x%x x%x\n",
++			phba->brd_no, tag,
++			lpfc_hbq_defs[tag >> 16]->buffer_count);
++	return NULL;
++}
++
++void
++lpfc_sli_free_hbq(struct lpfc_hba *phba, struct hbq_dmabuf *sp)
++{
++	uint32_t hbqno;
++
++	if (sp) {
++		hbqno = sp->tag >> 16;
++		lpfc_sli_hbq_to_firmware(phba, hbqno, sp);
++	}
+ }
+ 
+ static int
+@@ -511,32 +726,38 @@
+ 	case MBX_FLASH_WR_ULA:
+ 	case MBX_SET_DEBUG:
+ 	case MBX_LOAD_EXP_ROM:
++	case MBX_REG_VPI:
++	case MBX_UNREG_VPI:
++	case MBX_HEARTBEAT:
+ 		ret = mbxCommand;
+ 		break;
+ 	default:
+ 		ret = MBX_SHUTDOWN;
+ 		break;
+ 	}
+-	return (ret);
++	return ret;
+ }
+ static void
+-lpfc_sli_wake_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq)
++lpfc_sli_wake_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq)
+ {
+ 	wait_queue_head_t *pdone_q;
++	unsigned long drvr_flag;
+ 
+ 	/*
+ 	 * If pdone_q is empty, the driver thread gave up waiting and
+ 	 * continued running.
+ 	 */
+ 	pmboxq->mbox_flag |= LPFC_MBX_WAKE;
++	spin_lock_irqsave(&phba->hbalock, drvr_flag);
+ 	pdone_q = (wait_queue_head_t *) pmboxq->context1;
+ 	if (pdone_q)
+ 		wake_up_interruptible(pdone_q);
++	spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 	return;
+ }
+ 
+ void
+-lpfc_sli_def_mbox_cmpl(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmb)
++lpfc_sli_def_mbox_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
+ {
+ 	struct lpfc_dmabuf *mp;
+ 	uint16_t rpi;
+@@ -553,78 +774,63 @@
+ 	 * If a REG_LOGIN succeeded  after node is destroyed or node
+ 	 * is in re-discovery driver need to cleanup the RPI.
+ 	 */
+-	if (!(phba->fc_flag & FC_UNLOADING) &&
+-		(pmb->mb.mbxCommand == MBX_REG_LOGIN64) &&
+-		(!pmb->mb.mbxStatus)) {
++	if (!(phba->pport->load_flag & FC_UNLOADING) &&
++	    pmb->mb.mbxCommand == MBX_REG_LOGIN64 &&
++	    !pmb->mb.mbxStatus) {
+ 
+ 		rpi = pmb->mb.un.varWords[0];
+-		lpfc_unreg_login(phba, rpi, pmb);
+-		pmb->mbox_cmpl=lpfc_sli_def_mbox_cmpl;
++		lpfc_unreg_login(phba, pmb->mb.un.varRegLogin.vpi, rpi, pmb);
++		pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+ 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ 		if (rc != MBX_NOT_FINISHED)
+ 			return;
+ 	}
+ 
+-	mempool_free( pmb, phba->mbox_mem_pool);
++	mempool_free(pmb, phba->mbox_mem_pool);
+ 	return;
+ }
+ 
+ int
+-lpfc_sli_handle_mb_event(struct lpfc_hba * phba)
++lpfc_sli_handle_mb_event(struct lpfc_hba *phba)
+ {
+-	MAILBOX_t *mbox;
+ 	MAILBOX_t *pmbox;
+ 	LPFC_MBOXQ_t *pmb;
+-	struct lpfc_sli *psli;
+-	int i, rc;
+-	uint32_t process_next;
+-
+-	psli = &phba->sli;
+-	/* We should only get here if we are in SLI2 mode */
+-	if (!(phba->sli.sli_flag & LPFC_SLI2_ACTIVE)) {
+-		return (1);
+-	}
++	int rc;
++	LIST_HEAD(cmplq);
+ 
+ 	phba->sli.slistat.mbox_event++;
+ 
++	/* Get all completed mailboxe buffers into the cmplq */
++	spin_lock_irq(&phba->hbalock);
++	list_splice_init(&phba->sli.mboxq_cmpl, &cmplq);
++	spin_unlock_irq(&phba->hbalock);
++
+ 	/* Get a Mailbox buffer to setup mailbox commands for callback */
+-	if ((pmb = phba->sli.mbox_active)) {
+-		pmbox = &pmb->mb;
+-		mbox = &phba->slim2p->mbx;
++	do {
++		list_remove_head(&cmplq, pmb, LPFC_MBOXQ_t, list);
++		if (pmb == NULL)
++			break;
+ 
+-		/* First check out the status word */
+-		lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof (uint32_t));
++		pmbox = &pmb->mb;
+ 
+-		/* Sanity check to ensure the host owns the mailbox */
+-		if (pmbox->mbxOwner != OWN_HOST) {
+-			/* Lets try for a while */
+-			for (i = 0; i < 10240; i++) {
+-				/* First copy command data */
+-				lpfc_sli_pcimem_bcopy(mbox, pmbox,
+-							sizeof (uint32_t));
+-				if (pmbox->mbxOwner == OWN_HOST)
+-					goto mbout;
++		if (pmbox->mbxCommand != MBX_HEARTBEAT) {
++			if (pmb->vport) {
++				lpfc_debugfs_disc_trc(pmb->vport,
++					LPFC_DISC_TRC_MBOX_VPORT,
++					"MBOX cmpl vport: cmd:x%x mb:x%x x%x",
++					(uint32_t)pmbox->mbxCommand,
++					pmbox->un.varWords[0],
++					pmbox->un.varWords[1]);
++			}
++			else {
++				lpfc_debugfs_disc_trc(phba->pport,
++					LPFC_DISC_TRC_MBOX,
++					"MBOX cmpl:       cmd:x%x mb:x%x x%x",
++					(uint32_t)pmbox->mbxCommand,
++					pmbox->un.varWords[0],
++					pmbox->un.varWords[1]);
+ 			}
+-			/* Stray Mailbox Interrupt, mbxCommand <cmd> mbxStatus
+-			   <status> */
+-			lpfc_printf_log(phba,
+-					KERN_WARNING,
+-					LOG_MBOX | LOG_SLI,
+-					"%d:0304 Stray Mailbox Interrupt "
+-					"mbxCommand x%x mbxStatus x%x\n",
+-					phba->brd_no,
+-					pmbox->mbxCommand,
+-					pmbox->mbxStatus);
+-
+-			spin_lock_irq(phba->host->host_lock);
+-			phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+-			spin_unlock_irq(phba->host->host_lock);
+-			return (1);
+ 		}
+-
+-	      mbout:
+-		del_timer_sync(&phba->sli.mbox_tmo);
+-		phba->work_hba_events &= ~WORKER_MBOX_TMO;
+ 
+ 		/*
+ 		 * It is a fatal error if unknown mbox command completion.
+@@ -633,51 +839,50 @@
+ 		    MBX_SHUTDOWN) {
+ 
+ 			/* Unknow mailbox command compl */
+-			lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_MBOX | LOG_SLI,
+-				"%d:0323 Unknown Mailbox command %x Cmpl\n",
++			lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
++					"%d (%d):0323 Unknown Mailbox command "
++					"%x Cmpl\n",
+ 				phba->brd_no,
++					pmb->vport ? pmb->vport->vpi : 0,
+ 				pmbox->mbxCommand);
+-			phba->hba_state = LPFC_HBA_ERROR;
++			phba->link_state = LPFC_HBA_ERROR;
+ 			phba->work_hs = HS_FFER3;
+ 			lpfc_handle_eratt(phba);
+-			return (0);
++			continue;
+ 		}
+ 
+-		phba->sli.mbox_active = NULL;
+ 		if (pmbox->mbxStatus) {
+ 			phba->sli.slistat.mbox_stat_err++;
+ 			if (pmbox->mbxStatus == MBXERR_NO_RESOURCES) {
+ 				/* Mbox cmd cmpl error - RETRYing */
+-				lpfc_printf_log(phba,
+-					KERN_INFO,
++				lpfc_printf_log(phba, KERN_INFO,
+ 					LOG_MBOX | LOG_SLI,
+-					"%d:0305 Mbox cmd cmpl error - "
+-					"RETRYing Data: x%x x%x x%x x%x\n",
++						"%d (%d):0305 Mbox cmd cmpl "
++						"error - RETRYing Data: x%x "
++						"x%x x%x x%x\n",
+ 					phba->brd_no,
++						pmb->vport ? pmb->vport->vpi :0,
+ 					pmbox->mbxCommand,
+ 					pmbox->mbxStatus,
+ 					pmbox->un.varWords[0],
+-					phba->hba_state);
++						pmb->vport->port_state);
+ 				pmbox->mbxStatus = 0;
+ 				pmbox->mbxOwner = OWN_HOST;
+-				spin_lock_irq(phba->host->host_lock);
++				spin_lock_irq(&phba->hbalock);
+ 				phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+-				spin_unlock_irq(phba->host->host_lock);
++				spin_unlock_irq(&phba->hbalock);
+ 				rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+ 				if (rc == MBX_SUCCESS)
+-					return (0);
++					continue;
+ 			}
+ 		}
+ 
+ 		/* Mailbox cmd <cmd> Cmpl <cmpl> */
+-		lpfc_printf_log(phba,
+-				KERN_INFO,
+-				LOG_MBOX | LOG_SLI,
+-				"%d:0307 Mailbox cmd x%x Cmpl x%p "
++		lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++				"%d (%d):0307 Mailbox cmd x%x Cmpl x%p "
+ 				"Data: x%x x%x x%x x%x x%x x%x x%x x%x x%x\n",
+ 				phba->brd_no,
++				pmb->vport ? pmb->vport->vpi : 0,
+ 				pmbox->mbxCommand,
+ 				pmb->mbox_cmpl,
+ 				*((uint32_t *) pmbox),
+@@ -690,39 +895,35 @@
+ 				pmbox->un.varWords[6],
+ 				pmbox->un.varWords[7]);
+ 
+-		if (pmb->mbox_cmpl) {
+-			lpfc_sli_pcimem_bcopy(mbox, pmbox, MAILBOX_CMD_SIZE);
++		if (pmb->mbox_cmpl)
+ 			pmb->mbox_cmpl(phba,pmb);
+-		}
+-	}
+-
++	} while (1);
++	return 0;
++}
+ 
+-	do {
+-		process_next = 0;	/* by default don't loop */
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++static struct lpfc_dmabuf *
++lpfc_sli_replace_hbqbuff(struct lpfc_hba *phba, uint32_t tag)
++{
++	struct hbq_dmabuf *hbq_entry, *new_hbq_entry;
+ 
+-		/* Process next mailbox command if there is one */
+-		if ((pmb = lpfc_mbox_get(phba))) {
+-			spin_unlock_irq(phba->host->host_lock);
+-			rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
+-			if (rc == MBX_NOT_FINISHED) {
+-				pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+-				pmb->mbox_cmpl(phba,pmb);
+-				process_next = 1;
+-				continue;	/* loop back */
+-			}
+-		} else {
+-			spin_unlock_irq(phba->host->host_lock);
+-			/* Turn on IOCB processing */
+-			for (i = 0; i < phba->sli.num_rings; i++)
+-				lpfc_sli_turn_on_ring(phba, i);
++	hbq_entry = lpfc_sli_hbqbuf_find(phba, tag);
++	if (hbq_entry == NULL)
++		return NULL;
++	list_del(&hbq_entry->dbuf.list);
++	new_hbq_entry = kmalloc(sizeof(struct hbq_dmabuf), GFP_ATOMIC);
++	if (new_hbq_entry == NULL)
++		return &hbq_entry->dbuf;
++	new_hbq_entry->dbuf = hbq_entry->dbuf;
++	new_hbq_entry->tag = -1;
++	hbq_entry->dbuf.virt = lpfc_hbq_alloc(phba, 0, &hbq_entry->dbuf.phys);
++	if (hbq_entry->dbuf.virt == NULL) {
++		kfree(new_hbq_entry);
++		return &hbq_entry->dbuf;
+ 		}
+-
+-	} while (process_next);
+-
+-	return (0);
++	lpfc_sli_free_hbq(phba, hbq_entry);
++	return &new_hbq_entry->dbuf;
+ }
++
+ static int
+ lpfc_sli_process_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ 			    struct lpfc_iocbq *saveq)
+@@ -735,7 +936,9 @@
+ 	match = 0;
+ 	irsp = &(saveq->iocb);
+ 	if ((irsp->ulpCommand == CMD_RCV_ELS_REQ64_CX)
+-	    || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)) {
++	    || (irsp->ulpCommand == CMD_RCV_ELS_REQ_CX)
++	    || (irsp->ulpCommand == CMD_IOCB_RCV_ELS64_CX)
++	    || (irsp->ulpCommand == CMD_IOCB_RCV_CONT64_CX)) {
+ 		Rctl = FC_ELS_REQ;
+ 		Type = FC_ELS_DATA;
+ 	} else {
+@@ -747,13 +950,24 @@
+ 
+ 		/* Firmware Workaround */
+ 		if ((Rctl == 0) && (pring->ringno == LPFC_ELS_RING) &&
+-			(irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX)) {
++			(irsp->ulpCommand == CMD_RCV_SEQUENCE64_CX ||
++			 irsp->ulpCommand == CMD_IOCB_RCV_SEQ64_CX)) {
+ 			Rctl = FC_ELS_REQ;
+ 			Type = FC_ELS_DATA;
+ 			w5p->hcsw.Rctl = Rctl;
+ 			w5p->hcsw.Type = Type;
+ 		}
+ 	}
++
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++		if (irsp->ulpBdeCount != 0)
++			saveq->context2 = lpfc_sli_replace_hbqbuff(phba,
++						irsp->un.ulpWord[3]);
++		if (irsp->ulpBdeCount == 2)
++			saveq->context3 = lpfc_sli_replace_hbqbuff(phba,
++						irsp->un.ulpWord[15]);
++	}
++
+ 	/* unSolicited Responses */
+ 	if (pring->prt[0].profile) {
+ 		if (pring->prt[0].lpfc_sli_rcv_unsol_event)
+@@ -781,23 +995,21 @@
+ 		/* Unexpected Rctl / Type received */
+ 		/* Ring <ringno> handler: unexpected
+ 		   Rctl <Rctl> Type <Type> received */
+-		lpfc_printf_log(phba,
+-				KERN_WARNING,
+-				LOG_SLI,
++		lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+ 				"%d:0313 Ring %d handler: unexpected Rctl x%x "
+-				"Type x%x received \n",
++				"Type x%x received\n",
+ 				phba->brd_no,
+ 				pring->ringno,
+ 				Rctl,
+ 				Type);
+ 	}
+-	return(1);
++	return 1;
+ }
+ 
+ static struct lpfc_iocbq *
+-lpfc_sli_iocbq_lookup(struct lpfc_hba * phba,
+-		      struct lpfc_sli_ring * pring,
+-		      struct lpfc_iocbq * prspiocb)
++lpfc_sli_iocbq_lookup(struct lpfc_hba *phba,
++		      struct lpfc_sli_ring *pring,
++		      struct lpfc_iocbq *prspiocb)
+ {
+ 	struct lpfc_iocbq *cmd_iocb = NULL;
+ 	uint16_t iotag;
+@@ -806,7 +1018,7 @@
+ 
+ 	if (iotag != 0 && iotag <= phba->sli.last_iotag) {
+ 		cmd_iocb = phba->sli.iocbq_lookup[iotag];
+-		list_del(&cmd_iocb->list);
++		list_del_init(&cmd_iocb->list);
+ 		pring->txcmplq_cnt--;
+ 		return cmd_iocb;
+ 	}
+@@ -821,16 +1033,18 @@
+ }
+ 
+ static int
+-lpfc_sli_process_sol_iocb(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
++lpfc_sli_process_sol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ 			  struct lpfc_iocbq *saveq)
+ {
+-	struct lpfc_iocbq * cmdiocbp;
++	struct lpfc_iocbq *cmdiocbp;
+ 	int rc = 1;
+ 	unsigned long iflag;
+ 
+ 	/* Based on the iotag field, get the cmd IOCB from the txcmplq */
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, iflag);
+ 	cmdiocbp = lpfc_sli_iocbq_lookup(phba, pring, saveq);
++	spin_unlock_irqrestore(&phba->hbalock, iflag);
++
+ 	if (cmdiocbp) {
+ 		if (cmdiocbp->iocb_cmpl) {
+ 			/*
+@@ -846,17 +1060,8 @@
+ 					saveq->iocb.un.ulpWord[4] =
+ 						IOERR_SLI_ABORTED;
+ 				}
+-				spin_unlock_irqrestore(phba->host->host_lock,
+-						       iflag);
+-				(cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
+-				spin_lock_irqsave(phba->host->host_lock, iflag);
+ 			}
+-			else {
+-				spin_unlock_irqrestore(phba->host->host_lock,
+-						       iflag);
+ 				(cmdiocbp->iocb_cmpl) (phba, cmdiocbp, saveq);
+-				spin_lock_irqsave(phba->host->host_lock, iflag);
+-			}
+ 		} else
+ 			lpfc_sli_release_iocbq(phba, cmdiocbp);
+ 	} else {
+@@ -870,12 +1075,12 @@
+ 			 * Ring <ringno> handler: unexpected completion IoTag
+ 			 * <IoTag>
+ 			 */
+-			lpfc_printf_log(phba,
+-				KERN_WARNING,
+-				LOG_SLI,
+-				"%d:0322 Ring %d handler: unexpected "
+-				"completion IoTag x%x Data: x%x x%x x%x x%x\n",
++			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
++					"%d (%d):0322 Ring %d handler: "
++					"unexpected completion IoTag x%x "
++					"Data: x%x x%x x%x x%x\n",
+ 				phba->brd_no,
++					cmdiocbp->vport->vpi,
+ 				pring->ringno,
+ 				saveq->iocb.ulpIoTag,
+ 				saveq->iocb.ulpStatus,
+@@ -885,14 +1090,15 @@
+ 		}
+ 	}
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ 	return rc;
+ }
+ 
+-static void lpfc_sli_rsp_pointers_error(struct lpfc_hba * phba,
+-					struct lpfc_sli_ring * pring)
++static void
++lpfc_sli_rsp_pointers_error(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+-	struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++	struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++		&phba->slim2p->mbx.us.s2.port[pring->ringno];
+ 	/*
+ 	 * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+ 	 * rsp ring <portRspMax>
+@@ -904,7 +1110,7 @@
+ 			le32_to_cpu(pgp->rspPutInx),
+ 			pring->numRiocb);
+ 
+-	phba->hba_state = LPFC_HBA_ERROR;
++	phba->link_state = LPFC_HBA_ERROR;
+ 
+ 	/*
+ 	 * All error attention handlers are posted to
+@@ -912,16 +1118,18 @@
+ 	 */
+ 	phba->work_ha |= HA_ERATT;
+ 	phba->work_hs = HS_FFER3;
++
++	/* hbalock should already be held */
+ 	if (phba->work_wait)
+-		wake_up(phba->work_wait);
++		lpfc_worker_wake_up(phba);
+ 
+ 	return;
+ }
+ 
+-void lpfc_sli_poll_fcp_ring(struct lpfc_hba * phba)
++void lpfc_sli_poll_fcp_ring(struct lpfc_hba *phba)
+ {
+-	struct lpfc_sli      * psli   = &phba->sli;
+-	struct lpfc_sli_ring * pring = &psli->ring[LPFC_FCP_RING];
++	struct lpfc_sli      *psli  = &phba->sli;
++	struct lpfc_sli_ring *pring = &psli->ring[LPFC_FCP_RING];
+ 	IOCB_t *irsp = NULL;
+ 	IOCB_t *entry = NULL;
+ 	struct lpfc_iocbq *cmdiocbq = NULL;
+@@ -931,13 +1139,15 @@
+ 	uint32_t portRspPut, portRspMax;
+ 	int type;
+ 	uint32_t rsp_cmpl = 0;
+-	void __iomem *to_slim;
+ 	uint32_t ha_copy;
++	unsigned long iflags;
+ 
+ 	pring->stats.iocb_event++;
+ 
+-	/* The driver assumes SLI-2 mode */
+-	pgp =  &phba->slim2p->mbx.us.s2.port[pring->ringno];
++	pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++		&phba->slim2p->mbx.us.s2.port[pring->ringno];
++
+ 
+ 	/*
+ 	 * The next available response entry should never exceed the maximum
+@@ -952,15 +1162,13 @@
+ 
+ 	rmb();
+ 	while (pring->rspidx != portRspPut) {
+-
+-		entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
+-
++		entry = lpfc_resp_iocb(phba, pring);
+ 		if (++pring->rspidx >= portRspMax)
+ 			pring->rspidx = 0;
+ 
+ 		lpfc_sli_pcimem_bcopy((uint32_t *) entry,
+ 				      (uint32_t *) &rspiocbq.iocb,
+-				      sizeof (IOCB_t));
++				      phba->iocb_rsp_size);
+ 		irsp = &rspiocbq.iocb;
+ 		type = lpfc_sli_iocb_cmd_type(irsp->ulpCommand & CMD_IOCB_MASK);
+ 		pring->stats.iocb_rsp++;
+@@ -998,8 +1206,10 @@
+ 				break;
+ 			}
+ 
++			spin_lock_irqsave(&phba->hbalock, iflags);
+ 			cmdiocbq = lpfc_sli_iocbq_lookup(phba, pring,
+ 							 &rspiocbq);
++			spin_unlock_irqrestore(&phba->hbalock, iflags);
+ 			if ((cmdiocbq) && (cmdiocbq->iocb_cmpl)) {
+ 				(cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ 						      &rspiocbq);
+@@ -1033,9 +1243,7 @@
+ 		 * been updated, sync the pgp->rspPutInx and fetch the new port
+ 		 * response put pointer.
+ 		 */
+-		to_slim = phba->MBslimaddr +
+-			(SLIMOFF + (pring->ringno * 2) + 1) * 4;
+-		writeb(pring->rspidx, to_slim);
++		writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+ 
+ 		if (pring->rspidx == portRspPut)
+ 			portRspPut = le32_to_cpu(pgp->rspPutInx);
+@@ -1045,13 +1253,16 @@
+ 	ha_copy >>= (LPFC_FCP_RING * 4);
+ 
+ 	if ((rsp_cmpl > 0) && (ha_copy & HA_R0RE_REQ)) {
++		spin_lock_irqsave(&phba->hbalock, iflags);
+ 		pring->stats.iocb_rsp_full++;
+ 		status = ((CA_R0ATT | CA_R0RE_RSP) << (LPFC_FCP_RING * 4));
+ 		writel(status, phba->CAregaddr);
+ 		readl(phba->CAregaddr);
++		spin_unlock_irqrestore(&phba->hbalock, iflags);
+ 	}
+ 	if ((ha_copy & HA_R0CE_RSP) &&
+ 	    (pring->flag & LPFC_CALL_RING_AVAILABLE)) {
++		spin_lock_irqsave(&phba->hbalock, iflags);
+ 		pring->flag &= ~LPFC_CALL_RING_AVAILABLE;
+ 		pring->stats.iocb_cmd_empty++;
+ 
+@@ -1062,6 +1273,7 @@
+ 		if ((pring->lpfc_sli_cmd_available))
+ 			(pring->lpfc_sli_cmd_available) (phba, pring);
+ 
++		spin_unlock_irqrestore(&phba->hbalock, iflags);
+ 	}
+ 
+ 	return;
+@@ -1072,10 +1284,12 @@
+  * to check it explicitly.
+  */
+ static int
+-lpfc_sli_handle_fast_ring_event(struct lpfc_hba * phba,
+-				struct lpfc_sli_ring * pring, uint32_t mask)
++lpfc_sli_handle_fast_ring_event(struct lpfc_hba *phba,
++				struct lpfc_sli_ring *pring, uint32_t mask)
+ {
+- 	struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
++	struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++		&phba->slim2p->mbx.us.s2.port[pring->ringno];
+ 	IOCB_t *irsp = NULL;
+ 	IOCB_t *entry = NULL;
+ 	struct lpfc_iocbq *cmdiocbq = NULL;
+@@ -1086,9 +1300,8 @@
+ 	lpfc_iocb_type type;
+ 	unsigned long iflag;
+ 	uint32_t rsp_cmpl = 0;
+-	void __iomem  *to_slim;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, iflag);
+ 	pring->stats.iocb_event++;
+ 
+ 	/*
+@@ -1099,7 +1312,7 @@
+ 	portRspPut = le32_to_cpu(pgp->rspPutInx);
+ 	if (unlikely(portRspPut >= portRspMax)) {
+ 		lpfc_sli_rsp_pointers_error(phba, pring);
+-		spin_unlock_irqrestore(phba->host->host_lock, iflag);
++		spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 		return 1;
+ 	}
+ 
+@@ -1110,14 +1323,15 @@
+ 		 * structure.  The copy involves a byte-swap since the
+ 		 * network byte order and pci byte orders are different.
+ 		 */
+-		entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
++		entry = lpfc_resp_iocb(phba, pring);
++		phba->last_completion_time = jiffies;
+ 
+ 		if (++pring->rspidx >= portRspMax)
+ 			pring->rspidx = 0;
+ 
+ 		lpfc_sli_pcimem_bcopy((uint32_t *) entry,
+ 				      (uint32_t *) &rspiocbq.iocb,
+-				      sizeof (IOCB_t));
++				      phba->iocb_rsp_size);
+ 		INIT_LIST_HEAD(&(rspiocbq.list));
+ 		irsp = &rspiocbq.iocb;
+ 
+@@ -1126,14 +1340,28 @@
+ 		rsp_cmpl++;
+ 
+ 		if (unlikely(irsp->ulpStatus)) {
++			/*
++			 * If resource errors reported from HBA, reduce
++			 * queuedepths of the SCSI device.
++			 */
++			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++				(irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
++				spin_unlock_irqrestore(&phba->hbalock, iflag);
++				lpfc_adjust_queue_depth(phba);
++				spin_lock_irqsave(&phba->hbalock, iflag);
++			}
++
+ 			/* Rsp ring <ringno> error: IOCB */
+ 			lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
+ 				"%d:0336 Rsp Ring %d error: IOCB Data: "
+ 				"x%x x%x x%x x%x x%x x%x x%x x%x\n",
+ 				phba->brd_no, pring->ringno,
+-				irsp->un.ulpWord[0], irsp->un.ulpWord[1],
+-				irsp->un.ulpWord[2], irsp->un.ulpWord[3],
+-				irsp->un.ulpWord[4], irsp->un.ulpWord[5],
++					irsp->un.ulpWord[0],
++					irsp->un.ulpWord[1],
++					irsp->un.ulpWord[2],
++					irsp->un.ulpWord[3],
++					irsp->un.ulpWord[4],
++					irsp->un.ulpWord[5],
+ 				*(((uint32_t *) irsp) + 6),
+ 				*(((uint32_t *) irsp) + 7));
+ 		}
+@@ -1149,7 +1377,8 @@
+ 				lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ 						"%d:0333 IOCB cmd 0x%x"
+ 						" processed. Skipping"
+-						" completion\n", phba->brd_no,
++						" completion\n",
++						phba->brd_no,
+ 						irsp->ulpCommand);
+ 				break;
+ 			}
+@@ -1161,19 +1390,19 @@
+ 					(cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ 							      &rspiocbq);
+ 				} else {
+-					spin_unlock_irqrestore(
+-						phba->host->host_lock, iflag);
++					spin_unlock_irqrestore(&phba->hbalock,
++							       iflag);
+ 					(cmdiocbq->iocb_cmpl)(phba, cmdiocbq,
+ 							      &rspiocbq);
+-					spin_lock_irqsave(phba->host->host_lock,
++					spin_lock_irqsave(&phba->hbalock,
+ 							  iflag);
+ 				}
+ 			}
+ 			break;
+ 		case LPFC_UNSOL_IOCB:
+-			spin_unlock_irqrestore(phba->host->host_lock, iflag);
++			spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 			lpfc_sli_process_unsol_iocb(phba, pring, &rspiocbq);
+-			spin_lock_irqsave(phba->host->host_lock, iflag);
++			spin_lock_irqsave(&phba->hbalock, iflag);
+ 			break;
+ 		default:
+ 			if (irsp->ulpCommand == CMD_ADAPTER_MSG) {
+@@ -1188,8 +1417,10 @@
+ 				lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ 					"%d:0334 Unknown IOCB command "
+ 					"Data: x%x, x%x x%x x%x x%x\n",
+-					phba->brd_no, type, irsp->ulpCommand,
+-					irsp->ulpStatus, irsp->ulpIoTag,
++						phba->brd_no, type,
++						irsp->ulpCommand,
++						irsp->ulpStatus,
++						irsp->ulpIoTag,
+ 					irsp->ulpContext);
+ 			}
+ 			break;
+@@ -1201,9 +1432,7 @@
+ 		 * been updated, sync the pgp->rspPutInx and fetch the new port
+ 		 * response put pointer.
+ 		 */
+-		to_slim = phba->MBslimaddr +
+-			(SLIMOFF + (pring->ringno * 2) + 1) * 4;
+-		writel(pring->rspidx, to_slim);
++		writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+ 
+ 		if (pring->rspidx == portRspPut)
+ 			portRspPut = le32_to_cpu(pgp->rspPutInx);
+@@ -1228,31 +1457,31 @@
+ 
+ 	}
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 	return rc;
+ }
+ 
+-
+ int
+-lpfc_sli_handle_slow_ring_event(struct lpfc_hba * phba,
+-			   struct lpfc_sli_ring * pring, uint32_t mask)
++lpfc_sli_handle_slow_ring_event(struct lpfc_hba *phba,
++				struct lpfc_sli_ring *pring, uint32_t mask)
+ {
++	struct lpfc_pgp *pgp = (phba->sli_rev == 3) ?
++		&phba->slim2p->mbx.us.s3_pgp.port[pring->ringno] :
++		&phba->slim2p->mbx.us.s2.port[pring->ringno];
+ 	IOCB_t *entry;
+ 	IOCB_t *irsp = NULL;
+ 	struct lpfc_iocbq *rspiocbp = NULL;
+ 	struct lpfc_iocbq *next_iocb;
+ 	struct lpfc_iocbq *cmdiocbp;
+ 	struct lpfc_iocbq *saveq;
+-	struct lpfc_pgp *pgp = &phba->slim2p->mbx.us.s2.port[pring->ringno];
+ 	uint8_t iocb_cmd_type;
+ 	lpfc_iocb_type type;
+ 	uint32_t status, free_saveq;
+ 	uint32_t portRspPut, portRspMax;
+ 	int rc = 1;
+ 	unsigned long iflag;
+-	void __iomem  *to_slim;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
++	spin_lock_irqsave(&phba->hbalock, iflag);
+ 	pring->stats.iocb_event++;
+ 
+ 	/*
+@@ -1266,16 +1495,14 @@
+ 		 * Ring <ringno> handler: portRspPut <portRspPut> is bigger then
+ 		 * rsp ring <portRspMax>
+ 		 */
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_SLI,
++		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+ 				"%d:0303 Ring %d handler: portRspPut %d "
+ 				"is bigger then rsp ring %d\n",
+-				phba->brd_no,
+-				pring->ringno, portRspPut, portRspMax);
++				phba->brd_no, pring->ringno, portRspPut,
++				portRspMax);
+ 
+-		phba->hba_state = LPFC_HBA_ERROR;
+-		spin_unlock_irqrestore(phba->host->host_lock, iflag);
++		phba->link_state = LPFC_HBA_ERROR;
++		spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 
+ 		phba->work_hs = HS_FFER3;
+ 		lpfc_handle_eratt(phba);
+@@ -1298,23 +1525,24 @@
+ 		 * the ulpLe field is set, the entire Command has been
+ 		 * received.
+ 		 */
+-		entry = IOCB_ENTRY(pring->rspringaddr, pring->rspidx);
+-		rspiocbp = lpfc_sli_get_iocbq(phba);
++		entry = lpfc_resp_iocb(phba, pring);
++
++		phba->last_completion_time = jiffies;
++		rspiocbp = __lpfc_sli_get_iocbq(phba);
+ 		if (rspiocbp == NULL) {
+ 			printk(KERN_ERR "%s: out of buffers! Failing "
+ 			       "completion.\n", __FUNCTION__);
+ 			break;
+ 		}
+ 
+-		lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb, sizeof (IOCB_t));
++		lpfc_sli_pcimem_bcopy(entry, &rspiocbp->iocb,
++				      phba->iocb_rsp_size);
+ 		irsp = &rspiocbp->iocb;
+ 
+ 		if (++pring->rspidx >= portRspMax)
+ 			pring->rspidx = 0;
+ 
+-		to_slim = phba->MBslimaddr + (SLIMOFF + (pring->ringno * 2)
+-					      + 1) * 4;
+-		writel(pring->rspidx, to_slim);
++		writel(pring->rspidx, &phba->host_gp[pring->ringno].rspGetInx);
+ 
+ 		if (list_empty(&(pring->iocb_continueq))) {
+ 			list_add(&rspiocbp->list, &(pring->iocb_continueq));
+@@ -1338,13 +1566,26 @@
+ 
+ 			pring->stats.iocb_rsp++;
+ 
++			/*
++			 * If resource errors reported from HBA, reduce
++			 * queuedepths of the SCSI device.
++			 */
++			if ((irsp->ulpStatus == IOSTAT_LOCAL_REJECT) &&
++			     (irsp->un.ulpWord[4] == IOERR_NO_RESOURCES)) {
++				spin_unlock_irqrestore(&phba->hbalock, iflag);
++				lpfc_adjust_queue_depth(phba);
++				spin_lock_irqsave(&phba->hbalock, iflag);
++			}
++
+ 			if (irsp->ulpStatus) {
+ 				/* Rsp ring <ringno> error: IOCB */
+-				lpfc_printf_log(phba,
+-					KERN_WARNING,
+-					LOG_SLI,
+-					"%d:0328 Rsp Ring %d error: IOCB Data: "
+-					"x%x x%x x%x x%x x%x x%x x%x x%x\n",
++				lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
++						"%d:0328 Rsp Ring %d error: "
++						"IOCB Data: "
++						"x%x x%x x%x x%x "
++						"x%x x%x x%x x%x "
++						"x%x x%x x%x x%x "
++						"x%x x%x x%x x%x\n",
+ 					phba->brd_no,
+ 					pring->ringno,
+ 					irsp->un.ulpWord[0],
+@@ -1354,7 +1595,15 @@
+ 					irsp->un.ulpWord[4],
+ 					irsp->un.ulpWord[5],
+ 					*(((uint32_t *) irsp) + 6),
+-					*(((uint32_t *) irsp) + 7));
++						*(((uint32_t *) irsp) + 7),
++						*(((uint32_t *) irsp) + 8),
++						*(((uint32_t *) irsp) + 9),
++						*(((uint32_t *) irsp) + 10),
++						*(((uint32_t *) irsp) + 11),
++						*(((uint32_t *) irsp) + 12),
++						*(((uint32_t *) irsp) + 13),
++						*(((uint32_t *) irsp) + 14),
++						*(((uint32_t *) irsp) + 15));
+ 			}
+ 
+ 			/*
+@@ -1366,17 +1615,17 @@
+ 			iocb_cmd_type = irsp->ulpCommand & CMD_IOCB_MASK;
+ 			type = lpfc_sli_iocb_cmd_type(iocb_cmd_type);
+ 			if (type == LPFC_SOL_IOCB) {
+-				spin_unlock_irqrestore(phba->host->host_lock,
++				spin_unlock_irqrestore(&phba->hbalock,
+ 						       iflag);
+ 				rc = lpfc_sli_process_sol_iocb(phba, pring,
+ 					saveq);
+-				spin_lock_irqsave(phba->host->host_lock, iflag);
++				spin_lock_irqsave(&phba->hbalock, iflag);
+ 			} else if (type == LPFC_UNSOL_IOCB) {
+-				spin_unlock_irqrestore(phba->host->host_lock,
++				spin_unlock_irqrestore(&phba->hbalock,
+ 						       iflag);
+ 				rc = lpfc_sli_process_unsol_iocb(phba, pring,
+ 					saveq);
+-				spin_lock_irqsave(phba->host->host_lock, iflag);
++				spin_lock_irqsave(&phba->hbalock, iflag);
+ 			} else if (type == LPFC_ABORT_IOCB) {
+ 				if ((irsp->ulpCommand != CMD_XRI_ABORTED_CX) &&
+ 				    ((cmdiocbp =
+@@ -1386,15 +1635,15 @@
+ 					   routine */
+ 					if (cmdiocbp->iocb_cmpl) {
+ 						spin_unlock_irqrestore(
+-						       phba->host->host_lock,
++						       &phba->hbalock,
+ 						       iflag);
+ 						(cmdiocbp->iocb_cmpl) (phba,
+ 							     cmdiocbp, saveq);
+ 						spin_lock_irqsave(
+-							  phba->host->host_lock,
++							  &phba->hbalock,
+ 							  iflag);
+ 					} else
+-						lpfc_sli_release_iocbq(phba,
++						__lpfc_sli_release_iocbq(phba,
+ 								      cmdiocbp);
+ 				}
+ 			} else if (type == LPFC_UNKNOWN_IOCB) {
+@@ -1411,11 +1660,10 @@
+ 						 phba->brd_no, adaptermsg);
+ 				} else {
+ 					/* Unknown IOCB command */
+-					lpfc_printf_log(phba,
+-						KERN_ERR,
+-						LOG_SLI,
+-						"%d:0335 Unknown IOCB command "
+-						"Data: x%x x%x x%x x%x\n",
++					lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
++							"%d:0335 Unknown IOCB "
++							"command Data: x%x "
++							"x%x x%x x%x\n",
+ 						phba->brd_no,
+ 						irsp->ulpCommand,
+ 						irsp->ulpStatus,
+@@ -1425,18 +1673,15 @@
+ 			}
+ 
+ 			if (free_saveq) {
+-				if (!list_empty(&saveq->list)) {
+-					list_for_each_entry_safe(rspiocbp,
+-								 next_iocb,
+-								 &saveq->list,
+-								 list) {
++				list_for_each_entry_safe(rspiocbp, next_iocb,
++							 &saveq->list, list) {
+ 						list_del(&rspiocbp->list);
+-						lpfc_sli_release_iocbq(phba,
++					__lpfc_sli_release_iocbq(phba,
+ 								     rspiocbp);
+ 					}
++				__lpfc_sli_release_iocbq(phba, saveq);
+ 				}
+-				lpfc_sli_release_iocbq(phba, saveq);
+-			}
++			rspiocbp = NULL;
+ 		}
+ 
+ 		/*
+@@ -1449,7 +1694,7 @@
+ 		}
+ 	} /* while (pring->rspidx != portRspPut) */
+ 
+-	if ((rspiocbp != 0) && (mask & HA_R0RE_REQ)) {
++	if ((rspiocbp != NULL) && (mask & HA_R0RE_REQ)) {
+ 		/* At least one response entry has been freed */
+ 		pring->stats.iocb_rsp_full++;
+ 		/* SET RxRE_RSP in Chip Att register */
+@@ -1470,24 +1715,25 @@
+ 
+ 	}
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
++	spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 	return rc;
+ }
+ 
+-int
++void
+ lpfc_sli_abort_iocb_ring(struct lpfc_hba *phba, struct lpfc_sli_ring *pring)
+ {
+ 	LIST_HEAD(completions);
+ 	struct lpfc_iocbq *iocb, *next_iocb;
+ 	IOCB_t *cmd = NULL;
+-	int errcnt;
+ 
+-	errcnt = 0;
++	if (pring->ringno == LPFC_ELS_RING) {
++		lpfc_fabric_abort_hba(phba);
++	}
+ 
+ 	/* Error everything on txq and txcmplq
+ 	 * First do the txq.
+ 	 */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	list_splice_init(&pring->txq, &completions);
+ 	pring->txq_cnt = 0;
+ 
+@@ -1495,26 +1741,25 @@
+ 	list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq, list)
+ 		lpfc_sli_issue_abort_iotag(phba, pring, iocb);
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	while (!list_empty(&completions)) {
+ 		iocb = list_get_first(&completions, struct lpfc_iocbq, list);
+ 		cmd = &iocb->iocb;
+-		list_del(&iocb->list);
++		list_del_init(&iocb->list);
+ 
+-		if (iocb->iocb_cmpl) {
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
+ 			cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			cmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+ 			(iocb->iocb_cmpl) (phba, iocb, iocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, iocb);
+ 	}
+-
+-	return errcnt;
++	}
+ }
+ 
+ int
+-lpfc_sli_brdready(struct lpfc_hba * phba, uint32_t mask)
++lpfc_sli_brdready(struct lpfc_hba *phba, uint32_t mask)
+ {
+ 	uint32_t status;
+ 	int i = 0;
+@@ -1541,7 +1786,8 @@
+ 			msleep(2500);
+ 
+ 		if (i == 15) {
+-			phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
++				/* Do post */
++			phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ 			lpfc_sli_brdrestart(phba);
+ 		}
+ 		/* Read the HBA Host Status Register */
+@@ -1550,7 +1796,7 @@
+ 
+ 	/* Check to see if any errors occurred during init */
+ 	if ((status & HS_FFERM) || (i >= 20)) {
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		retval = 1;
+ 	}
+ 
+@@ -1559,7 +1805,7 @@
+ 
+ #define BARRIER_TEST_PATTERN (0xdeadbeef)
+ 
+-void lpfc_reset_barrier(struct lpfc_hba * phba)
++void lpfc_reset_barrier(struct lpfc_hba *phba)
+ {
+ 	uint32_t __iomem *resp_buf;
+ 	uint32_t __iomem *mbox_buf;
+@@ -1584,12 +1830,12 @@
+ 	hc_copy = readl(phba->HCregaddr);
+ 	writel((hc_copy & ~HC_ERINT_ENA), phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+-	phba->fc_flag |= FC_IGNORE_ERATT;
++	phba->link_flag |= LS_IGNORE_ERATT;
+ 
+ 	if (readl(phba->HAregaddr) & HA_ERATT) {
+ 		/* Clear Chip error bit */
+ 		writel(HA_ERATT, phba->HAregaddr);
+-		phba->stopped = 1;
++		phba->pport->stopped = 1;
+ 	}
+ 
+ 	mbox = 0;
+@@ -1606,7 +1852,7 @@
+ 
+ 	if (readl(resp_buf + 1) != ~(BARRIER_TEST_PATTERN)) {
+ 		if (phba->sli.sli_flag & LPFC_SLI2_ACTIVE ||
+-		    phba->stopped)
++		    phba->pport->stopped)
+ 			goto restore_hc;
+ 		else
+ 			goto clear_errat;
+@@ -1623,17 +1869,17 @@
+ 
+ 	if (readl(phba->HAregaddr) & HA_ERATT) {
+ 		writel(HA_ERATT, phba->HAregaddr);
+-		phba->stopped = 1;
++		phba->pport->stopped = 1;
+ 	}
+ 
+ restore_hc:
+-	phba->fc_flag &= ~FC_IGNORE_ERATT;
++	phba->link_flag &= ~LS_IGNORE_ERATT;
+ 	writel(hc_copy, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+ }
+ 
+ int
+-lpfc_sli_brdkill(struct lpfc_hba * phba)
++lpfc_sli_brdkill(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli;
+ 	LPFC_MBOXQ_t *pmb;
+@@ -1645,26 +1891,22 @@
+ 	psli = &phba->sli;
+ 
+ 	/* Kill HBA */
+-	lpfc_printf_log(phba,
+-		KERN_INFO,
+-		LOG_SLI,
++	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ 		"%d:0329 Kill HBA Data: x%x x%x\n",
+-		phba->brd_no,
+-		phba->hba_state,
+-		psli->sli_flag);
++			phba->brd_no, phba->pport->port_state, psli->sli_flag);
+ 
+ 	if ((pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool,
+ 						  GFP_KERNEL)) == 0)
+ 		return 1;
+ 
+ 	/* Disable the error attention */
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	status = readl(phba->HCregaddr);
+ 	status &= ~HC_ERINT_ENA;
+ 	writel(status, phba->HCregaddr);
+ 	readl(phba->HCregaddr); /* flush */
+-	phba->fc_flag |= FC_IGNORE_ERATT;
+-	spin_unlock_irq(phba->host->host_lock);
++	phba->link_flag |= LS_IGNORE_ERATT;
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	lpfc_kill_board(phba, pmb);
+ 	pmb->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
+@@ -1673,9 +1915,9 @@
+ 	if (retval != MBX_SUCCESS) {
+ 		if (retval != MBX_BUSY)
+ 			mempool_free(pmb, phba->mbox_mem_pool);
+-		spin_lock_irq(phba->host->host_lock);
+-		phba->fc_flag &= ~FC_IGNORE_ERATT;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
++		phba->link_flag &= ~LS_IGNORE_ERATT;
++		spin_unlock_irq(&phba->hbalock);
+ 		return 1;
+ 	}
+ 
+@@ -1698,22 +1940,22 @@
+ 	del_timer_sync(&psli->mbox_tmo);
+ 	if (ha_copy & HA_ERATT) {
+ 		writel(HA_ERATT, phba->HAregaddr);
+-		phba->stopped = 1;
++		phba->pport->stopped = 1;
+ 	}
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+-	phba->fc_flag &= ~FC_IGNORE_ERATT;
+-	spin_unlock_irq(phba->host->host_lock);
++	phba->link_flag &= ~LS_IGNORE_ERATT;
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	psli->mbox_active = NULL;
+ 	lpfc_hba_down_post(phba);
+-	phba->hba_state = LPFC_HBA_ERROR;
++	phba->link_state = LPFC_HBA_ERROR;
+ 
+-	return (ha_copy & HA_ERATT ? 0 : 1);
++	return ha_copy & HA_ERATT ? 0 : 1;
+ }
+ 
+ int
+-lpfc_sli_brdreset(struct lpfc_hba * phba)
++lpfc_sli_brdreset(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli;
+ 	struct lpfc_sli_ring *pring;
+@@ -1725,12 +1967,12 @@
+ 	/* Reset HBA */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ 			"%d:0325 Reset HBA Data: x%x x%x\n", phba->brd_no,
+-			phba->hba_state, psli->sli_flag);
++			phba->pport->port_state, psli->sli_flag);
+ 
+ 	/* perform board reset */
+ 	phba->fc_eventTag = 0;
+-	phba->fc_myDID = 0;
+-	phba->fc_prevDID = 0;
++	phba->pport->fc_myDID = 0;
++	phba->pport->fc_prevDID = 0;
+ 
+ 	/* Turn off parity checking and serr during the physical reset */
+ 	pci_read_config_word(phba->pcidev, PCI_COMMAND, &cfg_value);
+@@ -1760,12 +2002,12 @@
+ 		pring->missbufcnt = 0;
+ 	}
+ 
+-	phba->hba_state = LPFC_WARM_START;
++	phba->link_state = LPFC_WARM_START;
+ 	return 0;
+ }
+ 
+ int
+-lpfc_sli_brdrestart(struct lpfc_hba * phba)
++lpfc_sli_brdrestart(struct lpfc_hba *phba)
+ {
+ 	MAILBOX_t *mb;
+ 	struct lpfc_sli *psli;
+@@ -1773,14 +2015,14 @@
+ 	volatile uint32_t word0;
+ 	void __iomem *to_slim;
+ 
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 
+ 	psli = &phba->sli;
+ 
+ 	/* Restart HBA */
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+ 			"%d:0337 Restart HBA Data: x%x x%x\n", phba->brd_no,
+-			phba->hba_state, psli->sli_flag);
++			phba->pport->port_state, psli->sli_flag);
+ 
+ 	word0 = 0;
+ 	mb = (MAILBOX_t *) &word0;
+@@ -1794,7 +2036,7 @@
+ 	readl(to_slim); /* flush */
+ 
+ 	/* Only skip post after fc_ffinit is completed */
+-	if (phba->hba_state) {
++	if (phba->pport->port_state) {
+ 		skip_post = 1;
+ 		word0 = 1;	/* This is really setting up word1 */
+ 	} else {
+@@ -1806,10 +2048,10 @@
+ 	readl(to_slim); /* flush */
+ 
+ 	lpfc_sli_brdreset(phba);
+-	phba->stopped = 0;
+-	phba->hba_state = LPFC_INIT_START;
++	phba->pport->stopped = 0;
++	phba->link_state = LPFC_INIT_START;
+ 
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	memset(&psli->lnk_stat_offsets, 0, sizeof(psli->lnk_stat_offsets));
+ 	psli->stats_start = get_seconds();
+@@ -1843,14 +2085,11 @@
+ 		if (i++ >= 20) {
+ 			/* Adapter failed to init, timeout, status reg
+ 			   <status> */
+-			lpfc_printf_log(phba,
+-					KERN_ERR,
+-					LOG_INIT,
++			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 					"%d:0436 Adapter failed to init, "
+ 					"timeout, status reg x%x\n",
+-					phba->brd_no,
+-					status);
+-			phba->hba_state = LPFC_HBA_ERROR;
++					phba->brd_no, status);
++			phba->link_state = LPFC_HBA_ERROR;
+ 			return -ETIMEDOUT;
+ 		}
+ 
+@@ -1859,14 +2098,12 @@
+ 			/* ERROR: During chipset initialization */
+ 			/* Adapter failed to init, chipset, status reg
+ 			   <status> */
+-			lpfc_printf_log(phba,
+-					KERN_ERR,
+-					LOG_INIT,
++			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 					"%d:0437 Adapter failed to init, "
+ 					"chipset, status reg x%x\n",
+ 					phba->brd_no,
+ 					status);
+-			phba->hba_state = LPFC_HBA_ERROR;
++			phba->link_state = LPFC_HBA_ERROR;
+ 			return -EIO;
+ 		}
+ 
+@@ -1879,7 +2116,8 @@
+ 		}
+ 
+ 		if (i == 15) {
+-			phba->hba_state = LPFC_STATE_UNKNOWN; /* Do post */
++				/* Do post */
++			phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ 			lpfc_sli_brdrestart(phba);
+ 		}
+ 		/* Read the HBA Host Status Register */
+@@ -1890,14 +2128,12 @@
+ 	if (status & HS_FFERM) {
+ 		/* ERROR: During chipset initialization */
+ 		/* Adapter failed to init, chipset, status reg <status> */
+-		lpfc_printf_log(phba,
+-				KERN_ERR,
+-				LOG_INIT,
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0438 Adapter failed to init, chipset, "
+ 				"status reg x%x\n",
+ 				phba->brd_no,
+ 				status);
+-		phba->hba_state = LPFC_HBA_ERROR;
++		phba->link_state = LPFC_HBA_ERROR;
+ 		return -EIO;
+ 	}
+ 
+@@ -1911,68 +2147,239 @@
+ 	return 0;
+ }
+ 
+-int
+-lpfc_sli_hba_setup(struct lpfc_hba * phba)
++static int
++lpfc_sli_hbq_count(void)
+ {
+-	LPFC_MBOXQ_t *pmb;
+-	uint32_t resetcount = 0, rc = 0, done = 0;
+-
+-	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+-	if (!pmb) {
+-		phba->hba_state = LPFC_HBA_ERROR;
+-		return -ENOMEM;
+-	}
++	return ARRAY_SIZE(lpfc_hbq_defs);
++}
+ 
++static int
++lpfc_sli_hbq_entry_count(void)
++{
++	int  hbq_count = lpfc_sli_hbq_count();
++	int  count = 0;
++	int  i;
++
++	for (i = 0; i < hbq_count; ++i)
++		count += lpfc_hbq_defs[i]->entry_count;
++	return count;
++}
++
++int
++lpfc_sli_hbq_size(void)
++{
++	return lpfc_sli_hbq_entry_count() * sizeof(struct lpfc_hbq_entry);
++}
++
++static int
++lpfc_sli_hbq_setup(struct lpfc_hba *phba)
++{
++	int  hbq_count = lpfc_sli_hbq_count();
++	LPFC_MBOXQ_t *pmb;
++	MAILBOX_t *pmbox;
++	uint32_t hbqno;
++	uint32_t hbq_entry_index;
++
++				/* Get a Mailbox buffer to setup mailbox
++				 * commands for HBA initialization
++				 */
++	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++
++	if (!pmb)
++		return -ENOMEM;
++
++	pmbox = &pmb->mb;
++
++	/* Initialize the struct lpfc_sli_hbq structure for each hbq */
++	phba->link_state = LPFC_INIT_MBX_CMDS;
++
++	hbq_entry_index = 0;
++	for (hbqno = 0; hbqno < hbq_count; ++hbqno) {
++		phba->hbqs[hbqno].next_hbqPutIdx = 0;
++		phba->hbqs[hbqno].hbqPutIdx      = 0;
++		phba->hbqs[hbqno].local_hbqGetIdx   = 0;
++		phba->hbqs[hbqno].entry_count =
++			lpfc_hbq_defs[hbqno]->entry_count;
++		lpfc_config_hbq(phba, lpfc_hbq_defs[hbqno], hbq_entry_index,
++				pmb);
++		hbq_entry_index += phba->hbqs[hbqno].entry_count;
++
++		if (lpfc_sli_issue_mbox(phba, pmb, MBX_POLL) != MBX_SUCCESS) {
++			/* Adapter failed to init, mbxCmd <cmd> CFG_RING,
++			   mbxStatus <status>, ring <num> */
++
++			lpfc_printf_log(phba, KERN_ERR,
++					LOG_SLI | LOG_VPORT,
++					"%d:1805 Adapter failed to init. "
++					"Data: x%x x%x x%x\n",
++					phba->brd_no, pmbox->mbxCommand,
++					pmbox->mbxStatus, hbqno);
++
++			phba->link_state = LPFC_HBA_ERROR;
++			mempool_free(pmb, phba->mbox_mem_pool);
++			return ENXIO;
++		}
++	}
++	phba->hbq_count = hbq_count;
++
++	mempool_free(pmb, phba->mbox_mem_pool);
++
++	/* Initially populate or replenish the HBQs */
++	for (hbqno = 0; hbqno < hbq_count; ++hbqno) {
++		if (lpfc_sli_hbqbuf_init_hbqs(phba, hbqno))
++			return -ENOMEM;
++	}
++	return 0;
++}
++
++static int
++lpfc_do_config_port(struct lpfc_hba *phba, int sli_mode)
++{
++	LPFC_MBOXQ_t *pmb;
++	uint32_t resetcount = 0, rc = 0, done = 0;
++
++	pmb = (LPFC_MBOXQ_t *) mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!pmb) {
++		phba->link_state = LPFC_HBA_ERROR;
++		return -ENOMEM;
++	}
++
++	phba->sli_rev = sli_mode;
+ 	while (resetcount < 2 && !done) {
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
+ 		phba->sli.sli_flag |= LPFC_SLI_MBOX_ACTIVE;
+-		spin_unlock_irq(phba->host->host_lock);
+-		phba->hba_state = LPFC_STATE_UNKNOWN;
++		spin_unlock_irq(&phba->hbalock);
++		phba->pport->port_state = LPFC_VPORT_UNKNOWN;
+ 		lpfc_sli_brdrestart(phba);
+ 		msleep(2500);
+ 		rc = lpfc_sli_chipset_init(phba);
+ 		if (rc)
+ 			break;
+ 
+-		spin_lock_irq(phba->host->host_lock);
++		spin_lock_irq(&phba->hbalock);
+ 		phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+-		spin_unlock_irq(phba->host->host_lock);
++		spin_unlock_irq(&phba->hbalock);
+ 		resetcount++;
+ 
+-	/* Call pre CONFIG_PORT mailbox command initialization.  A value of 0
+-	 * means the call was successful.  Any other nonzero value is a failure,
+-	 * but if ERESTART is returned, the driver may reset the HBA and try
+-	 * again.
++		/* Call pre CONFIG_PORT mailbox command initialization.  A
++		 * value of 0 means the call was successful.  Any other
++		 * nonzero value is a failure, but if ERESTART is returned,
++		 * the driver may reset the HBA and try again.
+ 	 */
+ 		rc = lpfc_config_port_prep(phba);
+ 		if (rc == -ERESTART) {
+-			phba->hba_state = 0;
++			phba->link_state = LPFC_LINK_UNKNOWN;
+ 			continue;
+ 		} else if (rc) {
+ 			break;
+ 		}
+ 
+-		phba->hba_state = LPFC_INIT_MBX_CMDS;
++		phba->link_state = LPFC_INIT_MBX_CMDS;
+ 		lpfc_config_port(phba, pmb);
+ 		rc = lpfc_sli_issue_mbox(phba, pmb, MBX_POLL);
+-		if (rc == MBX_SUCCESS)
+-			done = 1;
+-		else {
++		if (rc != MBX_SUCCESS) {
+ 			lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0442 Adapter failed to init, mbxCmd x%x "
+ 				"CONFIG_PORT, mbxStatus x%x Data: x%x\n",
+ 				phba->brd_no, pmb->mb.mbxCommand,
+ 				pmb->mb.mbxStatus, 0);
++			spin_lock_irq(&phba->hbalock);
+ 			phba->sli.sli_flag &= ~LPFC_SLI2_ACTIVE;
++			spin_unlock_irq(&phba->hbalock);
++			rc = -ENXIO;
++		} else {
++			done = 1;
++			phba->max_vpi = (phba->max_vpi &&
++					 pmb->mb.un.varCfgPort.gmv) != 0
++				? pmb->mb.un.varCfgPort.max_vpi
++				: 0;
+ 		}
+ 	}
+-	if (!done)
++
++	if (!done) {
++		rc = -EINVAL;
++		goto do_prep_failed;
++	}
++
++	if ((pmb->mb.un.varCfgPort.sli_mode == 3) &&
++		(!pmb->mb.un.varCfgPort.cMA)) {
++		rc = -ENXIO;
++		goto do_prep_failed;
++	}
++	return rc;
++
++do_prep_failed:
++	mempool_free(pmb, phba->mbox_mem_pool);
++	return rc;
++}
++
++int
++lpfc_sli_hba_setup(struct lpfc_hba *phba)
++{
++	uint32_t rc;
++	int  mode = 3;
++
++	switch (lpfc_sli_mode) {
++	case 2:
++		if (phba->cfg_npiv_enable) {
++			lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++				"%d:1824 NPIV enabled: Override lpfc_sli_mode "
++				"parameter (%d) to auto (0).\n",
++				phba->brd_no, lpfc_sli_mode);
++			break;
++		}
++		mode = 2;
++		break;
++	case 0:
++	case 3:
++		break;
++	default:
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++				"%d:1819 Unrecognized lpfc_sli_mode "
++				"parameter: %d.\n",
++				phba->brd_no, lpfc_sli_mode);
++
++		break;
++	}
++
++	rc = lpfc_do_config_port(phba, mode);
++	if (rc && lpfc_sli_mode == 3)
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++				"%d:1820 Unable to select SLI-3.  "
++				"Not supported by adapter.\n",
++				phba->brd_no);
++	if (rc && mode != 2)
++		rc = lpfc_do_config_port(phba, 2);
++	if (rc)
++		goto lpfc_sli_hba_setup_error;
++
++	if (phba->sli_rev == 3) {
++		phba->iocb_cmd_size = SLI3_IOCB_CMD_SIZE;
++		phba->iocb_rsp_size = SLI3_IOCB_RSP_SIZE;
++		phba->sli3_options |= LPFC_SLI3_ENABLED;
++		phba->sli3_options |= LPFC_SLI3_HBQ_ENABLED;
++
++	} else {
++		phba->iocb_cmd_size = SLI2_IOCB_CMD_SIZE;
++		phba->iocb_rsp_size = SLI2_IOCB_RSP_SIZE;
++		phba->sli3_options = 0;
++	}
++
++	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++			"%d:0444 Firmware in SLI %x mode. Max_vpi %d\n",
++			phba->brd_no, phba->sli_rev, phba->max_vpi);
++	rc = lpfc_sli_ring_map(phba);
++
++	if (rc)
+ 		goto lpfc_sli_hba_setup_error;
+ 
+-	rc = lpfc_sli_ring_map(phba, pmb);
++				/* Init HBQs */
+ 
++	if (phba->sli3_options & LPFC_SLI3_HBQ_ENABLED) {
++		rc = lpfc_sli_hbq_setup(phba);
+ 	if (rc)
+ 		goto lpfc_sli_hba_setup_error;
++	}
+ 
+ 	phba->sli.sli_flag |= LPFC_PROCESS_LA;
+ 
+@@ -1980,11 +2387,13 @@
+ 	if (rc)
+ 		goto lpfc_sli_hba_setup_error;
+ 
+-	goto lpfc_sli_hba_setup_exit;
++	return rc;
++
+ lpfc_sli_hba_setup_error:
+-	phba->hba_state = LPFC_HBA_ERROR;
+-lpfc_sli_hba_setup_exit:
+-	mempool_free(pmb, phba->mbox_mem_pool);
++	phba->link_state = LPFC_HBA_ERROR;
++	lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
++			"%d:0445 Firmware initialization failed\n",
++			phba->brd_no);
+ 	return rc;
+ }
+ 
+@@ -2004,44 +2413,43 @@
+ void
+ lpfc_mbox_timeout(unsigned long ptr)
+ {
+-	struct lpfc_hba *phba;
++	struct lpfc_hba  *phba = (struct lpfc_hba *) ptr;
+ 	unsigned long iflag;
++	uint32_t tmo_posted;
++
++	spin_lock_irqsave(&phba->pport->work_port_lock, iflag);
++	tmo_posted = phba->pport->work_port_events & WORKER_MBOX_TMO;
++	if (!tmo_posted)
++		phba->pport->work_port_events |= WORKER_MBOX_TMO;
++	spin_unlock_irqrestore(&phba->pport->work_port_lock, iflag);
+ 
+-	phba = (struct lpfc_hba *)ptr;
+-	spin_lock_irqsave(phba->host->host_lock, iflag);
+-	if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
+-		phba->work_hba_events |= WORKER_MBOX_TMO;
++	if (!tmo_posted) {
++		spin_lock_irqsave(&phba->hbalock, iflag);
+ 		if (phba->work_wait)
+-			wake_up(phba->work_wait);
++			lpfc_worker_wake_up(phba);
++		spin_unlock_irqrestore(&phba->hbalock, iflag);
+ 	}
+-	spin_unlock_irqrestore(phba->host->host_lock, iflag);
+ }
+ 
+ void
+ lpfc_mbox_timeout_handler(struct lpfc_hba *phba)
+ {
+-	LPFC_MBOXQ_t *pmbox;
+-	MAILBOX_t *mb;
++	LPFC_MBOXQ_t *pmbox = phba->sli.mbox_active;
++	MAILBOX_t *mb = &pmbox->mb;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring *pring;
+ 
+-	spin_lock_irq(phba->host->host_lock);
+-	if (!(phba->work_hba_events & WORKER_MBOX_TMO)) {
+-		spin_unlock_irq(phba->host->host_lock);
++	if (!(phba->pport->work_port_events & WORKER_MBOX_TMO)) {
+ 		return;
+ 	}
+ 
+-	pmbox = phba->sli.mbox_active;
+-	mb = &pmbox->mb;
+-
+ 	/* Mbox cmd <mbxCommand> timeout */
+-	lpfc_printf_log(phba,
+-		KERN_ERR,
+-		LOG_MBOX | LOG_SLI,
+-		"%d:0310 Mailbox command x%x timeout Data: x%x x%x x%p\n",
++	lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
++			"%d:0310 Mailbox command x%x timeout Data: x%x x%x "
++			"x%p\n",
+ 		phba->brd_no,
+ 		mb->mbxCommand,
+-		phba->hba_state,
++			phba->pport->port_state,
+ 		phba->sli.sli_flag,
+ 		phba->sli.mbox_active);
+ 
+@@ -2049,11 +2457,14 @@
+ 	 * would get IOCB_ERROR from lpfc_sli_issue_iocb, allowing
+ 	 * it to fail all oustanding SCSI IO.
+ 	 */
+-	phba->hba_state = LPFC_STATE_UNKNOWN;
+-	phba->work_hba_events &= ~WORKER_MBOX_TMO;
+-	phba->fc_flag |= FC_ESTABLISH_LINK;
++	spin_lock_irq(&phba->pport->work_port_lock);
++	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++	spin_unlock_irq(&phba->pport->work_port_lock);
++	spin_lock_irq(&phba->hbalock);
++	phba->link_state = LPFC_LINK_UNKNOWN;
++	phba->pport->fc_flag |= FC_ESTABLISH_LINK;
+ 	psli->sli_flag &= ~LPFC_SLI2_ACTIVE;
+-	spin_unlock_irq(phba->host->host_lock);
++	spin_unlock_irq(&phba->hbalock);
+ 
+ 	pring = &psli->ring[psli->fcp_ring];
+ 	lpfc_sli_abort_iocb_ring(phba, pring);
+@@ -2075,10 +2486,10 @@
+ }
+ 
+ int
+-lpfc_sli_issue_mbox(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmbox, uint32_t flag)
++lpfc_sli_issue_mbox(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmbox, uint32_t flag)
+ {
+ 	MAILBOX_t *mb;
+-	struct lpfc_sli *psli;
++	struct lpfc_sli *psli = &phba->sli;
+ 	uint32_t status, evtctr;
+ 	uint32_t ha_copy;
+ 	int i;
+@@ -2086,31 +2497,44 @@
+ 	volatile uint32_t word0, ldata;
+ 	void __iomem *to_slim;
+ 
++	if (pmbox->mbox_cmpl && pmbox->mbox_cmpl != lpfc_sli_def_mbox_cmpl &&
++		pmbox->mbox_cmpl != lpfc_sli_wake_mbox_wait) {
++		if(!pmbox->vport) {
++			lpfc_printf_log(phba, KERN_ERR,
++					LOG_MBOX | LOG_VPORT,
++					"%d:1806 Mbox x%x failed. No vport\n",
++					phba->brd_no,
++					pmbox->mb.mbxCommand);
++			dump_stack();
++			return MBXERR_ERROR;
++		}
++	}
++
++
+ 	/* If the PCI channel is in offline state, do not post mbox. */
+ 	if (unlikely(pci_channel_offline(phba->pcidev)))
+ 		return MBX_NOT_FINISHED;
+ 
++	spin_lock_irqsave(&phba->hbalock, drvr_flag);
+ 	psli = &phba->sli;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, drvr_flag);
+-
+ 
+ 	mb = &pmbox->mb;
+ 	status = MBX_SUCCESS;
+ 
+-	if (phba->hba_state == LPFC_HBA_ERROR) {
+-		spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
++	if (phba->link_state == LPFC_HBA_ERROR) {
++		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 
+ 		/* Mbox command <mbxCommand> cannot issue */
+-		LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+-		return (MBX_NOT_FINISHED);
++		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag)
++		return MBX_NOT_FINISHED;
+ 	}
+ 
+ 	if (mb->mbxCommand != MBX_KILL_BOARD && flag & MBX_NOWAIT &&
+ 	    !(readl(phba->HCregaddr) & HC_MBINT_ENA)) {
+-		spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
+-		LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+-		return (MBX_NOT_FINISHED);
++		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++		LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag)
++		return MBX_NOT_FINISHED;
+ 	}
+ 
+ 	if (psli->sli_flag & LPFC_SLI_MBOX_ACTIVE) {
+@@ -2120,20 +2544,18 @@
+ 		 */
+ 
+ 		if (flag & MBX_POLL) {
+-			spin_unlock_irqrestore(phba->host->host_lock,
+-					       drvr_flag);
++			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 
+ 			/* Mbox command <mbxCommand> cannot issue */
+-			LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+-			return (MBX_NOT_FINISHED);
++			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++			return MBX_NOT_FINISHED;
+ 		}
+ 
+ 		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE)) {
+-			spin_unlock_irqrestore(phba->host->host_lock,
+-					       drvr_flag);
++			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 			/* Mbox command <mbxCommand> cannot issue */
+-			LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag)
+-			return (MBX_NOT_FINISHED);
++			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++			return MBX_NOT_FINISHED;
+ 		}
+ 
+ 		/* Handle STOP IOCB processing flag. This is only meaningful
+@@ -2157,21 +2579,33 @@
+ 		lpfc_mbox_put(phba, pmbox);
+ 
+ 		/* Mbox cmd issue - BUSY */
+-		lpfc_printf_log(phba,
+-			KERN_INFO,
+-			LOG_MBOX | LOG_SLI,
+-			"%d:0308 Mbox cmd issue - BUSY Data: x%x x%x x%x x%x\n",
++		lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++				"%d (%d):0308 Mbox cmd issue - BUSY Data: "
++				"x%x x%x x%x x%x\n",
+ 			phba->brd_no,
+-			mb->mbxCommand,
+-			phba->hba_state,
+-			psli->sli_flag,
+-			flag);
++				pmbox->vport ? pmbox->vport->vpi : 0xffffff,
++				mb->mbxCommand, phba->pport->port_state,
++				psli->sli_flag, flag);
+ 
+ 		psli->slistat.mbox_busy++;
+-		spin_unlock_irqrestore(phba->host->host_lock,
+-				       drvr_flag);
++		spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 
+-		return (MBX_BUSY);
++		if (pmbox->vport) {
++			lpfc_debugfs_disc_trc(pmbox->vport,
++				LPFC_DISC_TRC_MBOX_VPORT,
++				"MBOX Bsy vport:  cmd:x%x mb:x%x x%x",
++				(uint32_t)mb->mbxCommand,
++				mb->un.varWords[0], mb->un.varWords[1]);
++		}
++		else {
++			lpfc_debugfs_disc_trc(phba->pport,
++				LPFC_DISC_TRC_MBOX,
++				"MBOX Bsy:        cmd:x%x mb:x%x x%x",
++				(uint32_t)mb->mbxCommand,
++				mb->un.varWords[0], mb->un.varWords[1]);
++		}
++
++		return MBX_BUSY;
+ 	}
+ 
+ 	/* Handle STOP IOCB processing flag. This is only meaningful
+@@ -2198,11 +2632,10 @@
+ 		if (!(psli->sli_flag & LPFC_SLI2_ACTIVE) &&
+ 		    (mb->mbxCommand != MBX_KILL_BOARD)) {
+ 			psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+-			spin_unlock_irqrestore(phba->host->host_lock,
+-					       drvr_flag);
++			spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
+ 			/* Mbox command <mbxCommand> cannot issue */
+-			LOG_MBOX_CANNOT_ISSUE_DATA( phba, mb, psli, flag);
+-			return (MBX_NOT_FINISHED);
++			LOG_MBOX_CANNOT_ISSUE_DATA(phba, pmbox, psli, flag);
++			return MBX_NOT_FINISHED;
+ 		}
+ 		/* timeout active mbox command */
+ 		mod_timer(&psli->mbox_tmo, (jiffies +
+@@ -2210,15 +2643,29 @@
+ 	}
+ 
+ 	/* Mailbox cmd <cmd> issue */
+-	lpfc_printf_log(phba,
+-		KERN_INFO,
+-		LOG_MBOX | LOG_SLI,
+-		"%d:0309 Mailbox cmd x%x issue Data: x%x x%x x%x\n",
+-		phba->brd_no,
+-		mb->mbxCommand,
+-		phba->hba_state,
+-		psli->sli_flag,
+-		flag);
++	lpfc_printf_log(phba, KERN_INFO, LOG_MBOX | LOG_SLI,
++			"%d (%d):0309 Mailbox cmd x%x issue Data: x%x x%x "
++			"x%x\n",
++			phba->brd_no, pmbox->vport ? pmbox->vport->vpi : 0,
++			mb->mbxCommand, phba->pport->port_state,
++			psli->sli_flag, flag);
++
++	if (mb->mbxCommand != MBX_HEARTBEAT) {
++		if (pmbox->vport) {
++			lpfc_debugfs_disc_trc(pmbox->vport,
++				LPFC_DISC_TRC_MBOX_VPORT,
++				"MBOX Send vport: cmd:x%x mb:x%x x%x",
++				(uint32_t)mb->mbxCommand,
++				mb->un.varWords[0], mb->un.varWords[1]);
++		}
++		else {
++			lpfc_debugfs_disc_trc(phba->pport,
++				LPFC_DISC_TRC_MBOX,
++				"MBOX Send:       cmd:x%x mb:x%x x%x",
++				(uint32_t)mb->mbxCommand,
++				mb->un.varWords[0], mb->un.varWords[1]);
++		}
++	}
+ 
+ 	psli->slistat.mbox_cmd++;
+ 	evtctr = psli->slistat.mbox_event;
+@@ -2285,12 +2732,12 @@
+ 		/* Wait for command to complete */
+ 		while (((word0 & OWN_CHIP) == OWN_CHIP) ||
+ 		       (!(ha_copy & HA_MBATT) &&
+-			(phba->hba_state > LPFC_WARM_START))) {
++			(phba->link_state > LPFC_WARM_START))) {
+ 			if (i-- <= 0) {
+ 				psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
+-				spin_unlock_irqrestore(phba->host->host_lock,
++				spin_unlock_irqrestore(&phba->hbalock,
+ 						       drvr_flag);
+-				return (MBX_NOT_FINISHED);
++				return MBX_NOT_FINISHED;
+ 			}
+ 
+ 			/* Check if we took a mbox interrupt while we were
+@@ -2299,12 +2746,12 @@
+ 			    && (evtctr != psli->slistat.mbox_event))
+ 				break;
+ 
+-			spin_unlock_irqrestore(phba->host->host_lock,
++			spin_unlock_irqrestore(&phba->hbalock,
+ 					       drvr_flag);
+ 
+ 			msleep(1);
+ 
+-			spin_lock_irqsave(phba->host->host_lock, drvr_flag);
++			spin_lock_irqsave(&phba->hbalock, drvr_flag);
+ 
+ 			if (psli->sli_flag & LPFC_SLI2_ACTIVE) {
+ 				/* First copy command data */
+@@ -2355,23 +2802,25 @@
+ 		status = mb->mbxStatus;
+ 	}
+ 
+-	spin_unlock_irqrestore(phba->host->host_lock, drvr_flag);
+-	return (status);
++	spin_unlock_irqrestore(&phba->hbalock, drvr_flag);
++	return status;
+ }
+ 
+-static int
+-lpfc_sli_ringtx_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
+-		    struct lpfc_iocbq * piocb)
++/*
++ * Caller needs to hold lock.
++ */
++static void
++__lpfc_sli_ringtx_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++		    struct lpfc_iocbq *piocb)
+ {
+ 	/* Insert the caller's iocb in the txq tail for later processing. */
+ 	list_add_tail(&piocb->list, &pring->txq);
+ 	pring->txq_cnt++;
+-	return (0);
+ }
+ 
+ static struct lpfc_iocbq *
+ lpfc_sli_next_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+-		   struct lpfc_iocbq ** piocb)
++		   struct lpfc_iocbq **piocb)
+ {
+ 	struct lpfc_iocbq * nextiocb;
+ 
+@@ -2384,13 +2833,29 @@
+ 	return nextiocb;
+ }
+ 
++/*
++ * Lockless version of lpfc_sli_issue_iocb.
++ */
+ int
+-lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++__lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
+ 		    struct lpfc_iocbq *piocb, uint32_t flag)
+ {
+ 	struct lpfc_iocbq *nextiocb;
+ 	IOCB_t *iocb;
+ 
++	if (piocb->iocb_cmpl && (!piocb->vport) &&
++	   (piocb->iocb.ulpCommand != CMD_ABORT_XRI_CN) &&
++	   (piocb->iocb.ulpCommand != CMD_CLOSE_XRI_CN)) {
++		lpfc_printf_log(phba, KERN_ERR,
++				LOG_SLI | LOG_VPORT,
++				"%d:1807 IOCB x%x failed. No vport\n",
++				phba->brd_no,
++				piocb->iocb.ulpCommand);
++		dump_stack();
++		return IOCB_ERROR;
++	}
++
++
+ 	/* If the PCI channel is in offline state, do not post iocbs. */
+ 	if (unlikely(pci_channel_offline(phba->pcidev)))
+ 		return IOCB_ERROR;
+@@ -2398,7 +2863,7 @@
+ 	/*
+ 	 * We should never get an IOCB if we are in a < LINK_DOWN state
+ 	 */
+-	if (unlikely(phba->hba_state < LPFC_LINK_DOWN))
++	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+ 		return IOCB_ERROR;
+ 
+ 	/*
+@@ -2408,7 +2873,7 @@
+ 	if (unlikely(pring->flag & LPFC_STOP_IOCB_MBX))
+ 		goto iocb_busy;
+ 
+-	if (unlikely(phba->hba_state == LPFC_LINK_DOWN)) {
++	if (unlikely(phba->link_state == LPFC_LINK_DOWN)) {
+ 		/*
+ 		 * Only CREATE_XRI, CLOSE_XRI, and QUE_RING_BUF
+ 		 * can be issued if the link is not up.
+@@ -2436,8 +2901,9 @@
+ 	 * attention events.
+ 	 */
+ 	} else if (unlikely(pring->ringno == phba->sli.fcp_ring &&
+-		   !(phba->sli.sli_flag & LPFC_PROCESS_LA)))
++			    !(phba->sli.sli_flag & LPFC_PROCESS_LA))) {
+ 		goto iocb_busy;
++	}
+ 
+ 	while ((iocb = lpfc_sli_next_iocb_slot(phba, pring)) &&
+ 	       (nextiocb = lpfc_sli_next_iocb(phba, pring, &piocb)))
+@@ -2459,13 +2925,28 @@
+  out_busy:
+ 
+ 	if (!(flag & SLI_IOCB_RET_IOCB)) {
+-		lpfc_sli_ringtx_put(phba, pring, piocb);
++		__lpfc_sli_ringtx_put(phba, pring, piocb);
+ 		return IOCB_SUCCESS;
+ 	}
+ 
+ 	return IOCB_BUSY;
+ }
+ 
++
++int
++lpfc_sli_issue_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++		    struct lpfc_iocbq *piocb, uint32_t flag)
++{
++	unsigned long iflags;
++	int rc;
++
++	spin_lock_irqsave(&phba->hbalock, iflags);
++	rc = __lpfc_sli_issue_iocb(phba, pring, piocb, flag);
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
++
++	return rc;
++}
++
+ static int
+ lpfc_extra_ring_setup( struct lpfc_hba *phba)
+ {
+@@ -2504,7 +2985,7 @@
+ int
+ lpfc_sli_setup(struct lpfc_hba *phba)
+ {
+-	int i, totiocb = 0;
++	int i, totiocbsize = 0;
+ 	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring *pring;
+ 
+@@ -2529,6 +3010,12 @@
+ 			pring->numRiocb += SLI2_IOCB_RSP_R1XTRA_ENTRIES;
+ 			pring->numCiocb += SLI2_IOCB_CMD_R3XTRA_ENTRIES;
+ 			pring->numRiocb += SLI2_IOCB_RSP_R3XTRA_ENTRIES;
++			pring->sizeCiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_CMD_SIZE :
++							SLI2_IOCB_CMD_SIZE;
++			pring->sizeRiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_RSP_SIZE :
++							SLI2_IOCB_RSP_SIZE;
+ 			pring->iotag_ctr = 0;
+ 			pring->iotag_max =
+ 			    (phba->cfg_hba_queue_depth * 2);
+@@ -2539,12 +3026,25 @@
+ 			/* numCiocb and numRiocb are used in config_port */
+ 			pring->numCiocb = SLI2_IOCB_CMD_R1_ENTRIES;
+ 			pring->numRiocb = SLI2_IOCB_RSP_R1_ENTRIES;
++			pring->sizeCiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_CMD_SIZE :
++							SLI2_IOCB_CMD_SIZE;
++			pring->sizeRiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_RSP_SIZE :
++							SLI2_IOCB_RSP_SIZE;
++			pring->iotag_max = phba->cfg_hba_queue_depth;
+ 			pring->num_mask = 0;
+ 			break;
+ 		case LPFC_ELS_RING:	/* ring 2 - ELS / CT */
+ 			/* numCiocb and numRiocb are used in config_port */
+ 			pring->numCiocb = SLI2_IOCB_CMD_R2_ENTRIES;
+ 			pring->numRiocb = SLI2_IOCB_RSP_R2_ENTRIES;
++			pring->sizeCiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_CMD_SIZE :
++							SLI2_IOCB_CMD_SIZE;
++			pring->sizeRiocb = (phba->sli_rev == 3) ?
++							SLI3_IOCB_RSP_SIZE :
++							SLI2_IOCB_RSP_SIZE;
+ 			pring->fast_iotag = 0;
+ 			pring->iotag_ctr = 0;
+ 			pring->iotag_max = 4096;
+@@ -2575,14 +3075,16 @@
+ 			    lpfc_ct_unsol_event;
+ 			break;
+ 		}
+-		totiocb += (pring->numCiocb + pring->numRiocb);
++		totiocbsize += (pring->numCiocb * pring->sizeCiocb) +
++				(pring->numRiocb * pring->sizeRiocb);
+ 	}
+-	if (totiocb > MAX_SLI2_IOCB) {
++	if (totiocbsize > MAX_SLIM_IOCB_SIZE) {
+ 		/* Too many cmd / rsp ring entries in SLI2 SLIM */
+ 		lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 				"%d:0462 Too many cmd / rsp ring entries in "
+-				"SLI2 SLIM Data: x%x x%x\n",
+-				phba->brd_no, totiocb, MAX_SLI2_IOCB);
++				"SLI2 SLIM Data: x%x x%lx\n",
++				phba->brd_no, totiocbsize,
++				(unsigned long) MAX_SLIM_IOCB_SIZE);
+ 	}
+ 	if (phba->cfg_multi_ring_support == 2)
+ 		lpfc_extra_ring_setup(phba);
+@@ -2591,15 +3093,16 @@
+ }
+ 
+ int
+-lpfc_sli_queue_setup(struct lpfc_hba * phba)
++lpfc_sli_queue_setup(struct lpfc_hba *phba)
+ {
+ 	struct lpfc_sli *psli;
+ 	struct lpfc_sli_ring *pring;
+ 	int i;
+ 
+ 	psli = &phba->sli;
+-	spin_lock_irq(phba->host->host_lock);
++	spin_lock_irq(&phba->hbalock);
+ 	INIT_LIST_HEAD(&psli->mboxq);
++	INIT_LIST_HEAD(&psli->mboxq_cmpl);
+ 	/* Initialize list headers for txq and txcmplq as double linked lists */
+ 	for (i = 0; i < psli->num_rings; i++) {
+ 		pring = &psli->ring[i];
+@@ -2612,15 +3115,73 @@
+ 		INIT_LIST_HEAD(&pring->iocb_continueq);
+ 		INIT_LIST_HEAD(&pring->postbufq);
+ 	}
+-	spin_unlock_irq(phba->host->host_lock);
+-	return (1);
++	spin_unlock_irq(&phba->hbalock);
++	return 1;
+ }
+ 
+ int
+-lpfc_sli_hba_down(struct lpfc_hba * phba)
++lpfc_sli_host_down(struct lpfc_vport *vport)
+ {
+ 	LIST_HEAD(completions);
+-	struct lpfc_sli *psli;
++	struct lpfc_hba *phba = vport->phba;
++	struct lpfc_sli *psli = &phba->sli;
++	struct lpfc_sli_ring *pring;
++	struct lpfc_iocbq *iocb, *next_iocb;
++	int i;
++	unsigned long flags = 0;
++	uint16_t prev_pring_flag;
++
++	lpfc_cleanup_discovery_resources(vport);
++
++	spin_lock_irqsave(&phba->hbalock, flags);
++	for (i = 0; i < psli->num_rings; i++) {
++		pring = &psli->ring[i];
++		prev_pring_flag = pring->flag;
++		if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */
++			pring->flag |= LPFC_DEFERRED_RING_EVENT;
++		/*
++		 * Error everything on the txq since these iocbs have not been
++		 * given to the FW yet.
++		 */
++		list_for_each_entry_safe(iocb, next_iocb, &pring->txq, list) {
++			if (iocb->vport != vport)
++				continue;
++			list_move_tail(&iocb->list, &completions);
++			pring->txq_cnt--;
++		}
++
++		/* Next issue ABTS for everything on the txcmplq */
++		list_for_each_entry_safe(iocb, next_iocb, &pring->txcmplq,
++									list) {
++			if (iocb->vport != vport)
++				continue;
++			lpfc_sli_issue_abort_iotag(phba, pring, iocb);
++		}
++
++		pring->flag = prev_pring_flag;
++	}
++
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++
++	while (!list_empty(&completions)) {
++		list_remove_head(&completions, iocb, struct lpfc_iocbq, list);
++
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
++			iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT;
++			iocb->iocb.un.ulpWord[4] = IOERR_SLI_DOWN;
++			(iocb->iocb_cmpl) (phba, iocb, iocb);
++		}
++	}
++	return 1;
++}
++
++int
++lpfc_sli_hba_down(struct lpfc_hba *phba)
++{
++	LIST_HEAD(completions);
++	struct lpfc_sli *psli = &phba->sli;
+ 	struct lpfc_sli_ring *pring;
+ 	LPFC_MBOXQ_t *pmb;
+ 	struct lpfc_iocbq *iocb;
+@@ -2628,12 +3189,14 @@
+ 	int i;
+ 	unsigned long flags = 0;
+ 
+-	psli = &phba->sli;
+ 	lpfc_hba_down_prep(phba);
+ 
+-	spin_lock_irqsave(phba->host->host_lock, flags);
++	lpfc_fabric_abort_hba(phba);
++
++	spin_lock_irqsave(&phba->hbalock, flags);
+ 	for (i = 0; i < psli->num_rings; i++) {
+ 		pring = &psli->ring[i];
++		if (pring->ringno == LPFC_ELS_RING) /* Only slow rings */
+ 		pring->flag |= LPFC_DEFERRED_RING_EVENT;
+ 
+ 		/*
+@@ -2644,51 +3207,50 @@
+ 		pring->txq_cnt = 0;
+ 
+ 	}
+-	spin_unlock_irqrestore(phba->host->host_lock, flags);
++	spin_unlock_irqrestore(&phba->hbalock, flags);
+ 
+ 	while (!list_empty(&completions)) {
+-		iocb = list_get_first(&completions, struct lpfc_iocbq, list);
++		list_remove_head(&completions, iocb, struct lpfc_iocbq, list);
+ 		cmd = &iocb->iocb;
+-		list_del(&iocb->list);
+ 
+-		if (iocb->iocb_cmpl) {
++		if (!iocb->iocb_cmpl)
++			lpfc_sli_release_iocbq(phba, iocb);
++		else {
+ 			cmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+ 			cmd->un.ulpWord[4] = IOERR_SLI_DOWN;
+ 			(iocb->iocb_cmpl) (phba, iocb, iocb);
+-		} else
+-			lpfc_sli_release_iocbq(phba, iocb);
++		}
+ 	}
+ 
+ 	/* Return any active mbox cmds */
+ 	del_timer_sync(&psli->mbox_tmo);
+-	spin_lock_irqsave(phba->host->host_lock, flags);
+-	phba->work_hba_events &= ~WORKER_MBOX_TMO;
++	spin_lock_irqsave(&phba->hbalock, flags);
++
++	spin_lock(&phba->pport->work_port_lock);
++	phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++	spin_unlock(&phba->pport->work_port_lock);
++
+ 	if (psli->mbox_active) {
+-		pmb = psli->mbox_active;
+-		pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+-		if (pmb->mbox_cmpl) {
+-			spin_unlock_irqrestore(phba->host->host_lock, flags);
+-			pmb->mbox_cmpl(phba,pmb);
+-			spin_lock_irqsave(phba->host->host_lock, flags);
+-		}
+-	}
+-	psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++		list_add_tail(&psli->mbox_active->list, &completions);
+ 	psli->mbox_active = NULL;
++		psli->sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++	}
+ 
+-	/* Return any pending mbox cmds */
+-	while ((pmb = lpfc_mbox_get(phba)) != NULL) {
++	/* Return any pending or completed mbox cmds */
++	list_splice_init(&phba->sli.mboxq, &completions);
++	list_splice_init(&phba->sli.mboxq_cmpl, &completions);
++	INIT_LIST_HEAD(&psli->mboxq);
++	INIT_LIST_HEAD(&psli->mboxq_cmpl);
++
++	spin_unlock_irqrestore(&phba->hbalock, flags);
++
++	while (!list_empty(&completions)) {
++		list_remove_head(&completions, pmb, LPFC_MBOXQ_t, list);
+ 		pmb->mb.mbxStatus = MBX_NOT_FINISHED;
+ 		if (pmb->mbox_cmpl) {
+-			spin_unlock_irqrestore(phba->host->host_lock, flags);
+ 			pmb->mbox_cmpl(phba,pmb);
+-			spin_lock_irqsave(phba->host->host_lock, flags);
+ 		}
+ 	}
+-
+-	INIT_LIST_HEAD(&psli->mboxq);
+-
+-	spin_unlock_irqrestore(phba->host->host_lock, flags);
+-
+ 	return 1;
+ }
+ 
+@@ -2710,14 +3272,15 @@
+ }
+ 
+ int
+-lpfc_sli_ringpostbuf_put(struct lpfc_hba * phba, struct lpfc_sli_ring * pring,
+-			 struct lpfc_dmabuf * mp)
++lpfc_sli_ringpostbuf_put(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++			 struct lpfc_dmabuf *mp)
+ {
+ 	/* Stick struct lpfc_dmabuf at end of postbufq so driver can look it up
+ 	   later */
++	spin_lock_irq(&phba->hbalock);
+ 	list_add_tail(&mp->list, &pring->postbufq);
+-
+ 	pring->postbufq_cnt++;
++	spin_unlock_irq(&phba->hbalock);
+ 	return 0;
+ }
+ 
+@@ -2730,14 +3293,17 @@
+ 	struct list_head *slp = &pring->postbufq;
+ 
+ 	/* Search postbufq, from the begining, looking for a match on phys */
++	spin_lock_irq(&phba->hbalock);
+ 	list_for_each_entry_safe(mp, next_mp, &pring->postbufq, list) {
+ 		if (mp->phys == phys) {
+ 			list_del_init(&mp->list);
+ 			pring->postbufq_cnt--;
++			spin_unlock_irq(&phba->hbalock);
+ 			return mp;
+ 		}
+ 	}
+ 
++	spin_unlock_irq(&phba->hbalock);
+ 	lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
+ 			"%d:0410 Cannot find virtual addr for mapped buf on "
+ 			"ring %d Data x%llx x%p x%p x%x\n",
+@@ -2747,92 +3313,110 @@
+ }
+ 
+ static void
+-lpfc_sli_abort_els_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			struct lpfc_iocbq * rspiocb)
++lpfc_sli_abort_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
+-	IOCB_t *irsp;
++	IOCB_t *irsp = &rspiocb->iocb;
+ 	uint16_t abort_iotag, abort_context;
+-	struct lpfc_iocbq *abort_iocb, *rsp_ab_iocb;
++	struct lpfc_iocbq *abort_iocb;
+ 	struct lpfc_sli_ring *pring = &phba->sli.ring[LPFC_ELS_RING];
+ 
+ 	abort_iocb = NULL;
+-	irsp = &rspiocb->iocb;
+-
+-	spin_lock_irq(phba->host->host_lock);
+ 
+ 	if (irsp->ulpStatus) {
+ 		abort_context = cmdiocb->iocb.un.acxri.abortContextTag;
+ 		abort_iotag = cmdiocb->iocb.un.acxri.abortIoTag;
+ 
++		spin_lock_irq(&phba->hbalock);
+ 		if (abort_iotag != 0 && abort_iotag <= phba->sli.last_iotag)
+ 			abort_iocb = phba->sli.iocbq_lookup[abort_iotag];
+ 
+-		lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
+-				"%d:0327 Cannot abort els iocb %p"
+-				" with tag %x context %x\n",
+-				phba->brd_no, abort_iocb,
+-				abort_iotag, abort_context);
++		lpfc_printf_log(phba, KERN_INFO, LOG_ELS | LOG_SLI,
++				"%d:0327 Cannot abort els iocb %p "
++				"with tag %x context %x, abort status %x, "
++				"abort code %x\n",
++				phba->brd_no, abort_iocb, abort_iotag,
++				abort_context, irsp->ulpStatus,
++				irsp->un.ulpWord[4]);
+ 
+ 		/*
+ 		 * make sure we have the right iocbq before taking it
+ 		 * off the txcmplq and try to call completion routine.
+ 		 */
+-		if (abort_iocb &&
+-		    abort_iocb->iocb.ulpContext == abort_context &&
+-		    abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) {
+-			list_del(&abort_iocb->list);
++		if (!abort_iocb ||
++		    abort_iocb->iocb.ulpContext != abort_context ||
++		    (abort_iocb->iocb_flag & LPFC_DRIVER_ABORTED) == 0)
++			spin_unlock_irq(&phba->hbalock);
++		else {
++			list_del_init(&abort_iocb->list);
+ 			pring->txcmplq_cnt--;
++			spin_unlock_irq(&phba->hbalock);
+ 
+-			rsp_ab_iocb = lpfc_sli_get_iocbq(phba);
+-			if (rsp_ab_iocb == NULL)
+-				lpfc_sli_release_iocbq(phba, abort_iocb);
+-			else {
+-				abort_iocb->iocb_flag &=
+-					~LPFC_DRIVER_ABORTED;
+-				rsp_ab_iocb->iocb.ulpStatus =
+-					IOSTAT_LOCAL_REJECT;
+-				rsp_ab_iocb->iocb.un.ulpWord[4] =
+-					IOERR_SLI_ABORTED;
+-				spin_unlock_irq(phba->host->host_lock);
+-				(abort_iocb->iocb_cmpl)
+-					(phba, abort_iocb, rsp_ab_iocb);
+-				spin_lock_irq(phba->host->host_lock);
+-				lpfc_sli_release_iocbq(phba, rsp_ab_iocb);
+-			}
++			abort_iocb->iocb_flag &= ~LPFC_DRIVER_ABORTED;
++			abort_iocb->iocb.ulpStatus = IOSTAT_LOCAL_REJECT;
++			abort_iocb->iocb.un.ulpWord[4] = IOERR_SLI_ABORTED;
++			(abort_iocb->iocb_cmpl)(phba, abort_iocb, abort_iocb);
+ 		}
+ 	}
+ 
+ 	lpfc_sli_release_iocbq(phba, cmdiocb);
+-	spin_unlock_irq(phba->host->host_lock);
++	return;
++}
++
++static void
++lpfc_ignore_els_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++		     struct lpfc_iocbq *rspiocb)
++{
++	IOCB_t *irsp = &rspiocb->iocb;
++
++	/* ELS cmd tag <ulpIoTag> completes */
++	lpfc_printf_log(phba, KERN_INFO, LOG_ELS,
++			"%d (X):0133 Ignoring ELS cmd tag x%x completion Data: "
++			"x%x x%x x%x\n",
++			phba->brd_no, irsp->ulpIoTag, irsp->ulpStatus,
++			irsp->un.ulpWord[4], irsp->ulpTimeout);
++	if (cmdiocb->iocb.ulpCommand == CMD_GEN_REQUEST64_CR)
++		lpfc_ct_free_iocb(phba, cmdiocb);
++	else
++		lpfc_els_free_iocb(phba, cmdiocb);
+ 	return;
+ }
+ 
+ int
+-lpfc_sli_issue_abort_iotag(struct lpfc_hba * phba,
+-			   struct lpfc_sli_ring * pring,
+-			   struct lpfc_iocbq * cmdiocb)
++lpfc_sli_issue_abort_iotag(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
++			   struct lpfc_iocbq *cmdiocb)
+ {
++	struct lpfc_vport *vport = cmdiocb->vport;
+ 	struct lpfc_iocbq *abtsiocbp;
+ 	IOCB_t *icmd = NULL;
+ 	IOCB_t *iabt = NULL;
+ 	int retval = IOCB_ERROR;
+ 
+-	/* There are certain command types we don't want
+-	 * to abort.
++	/*
++	 * There are certain command types we don't want to abort.  And we
++	 * don't want to abort commands that are already in the process of
++	 * being aborted.
+ 	 */
+ 	icmd = &cmdiocb->iocb;
+-	if ((icmd->ulpCommand == CMD_ABORT_XRI_CN) ||
+-	    (icmd->ulpCommand == CMD_CLOSE_XRI_CN))
++	if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
++	    icmd->ulpCommand == CMD_CLOSE_XRI_CN ||
++	    (cmdiocb->iocb_flag & LPFC_DRIVER_ABORTED) != 0)
+ 		return 0;
+ 
+-	/* If we're unloading, interrupts are disabled so we
+-	 * need to cleanup the iocb here.
++	/* If we're unloading, don't abort iocb on the ELS ring, but change the
++	 * callback so that nothing happens when it finishes.
+ 	 */
+-	if (phba->fc_flag & FC_UNLOADING)
++	if ((vport->load_flag & FC_UNLOADING) &&
++	    (pring->ringno == LPFC_ELS_RING)) {
++		if (cmdiocb->iocb_flag & LPFC_IO_FABRIC)
++			cmdiocb->fabric_iocb_cmpl = lpfc_ignore_els_cmpl;
++		else
++			cmdiocb->iocb_cmpl = lpfc_ignore_els_cmpl;
+ 		goto abort_iotag_exit;
++	}
+ 
+ 	/* issue ABTS for this IOCB based on iotag */
+-	abtsiocbp = lpfc_sli_get_iocbq(phba);
++	abtsiocbp = __lpfc_sli_get_iocbq(phba);
+ 	if (abtsiocbp == NULL)
+ 		return 0;
+ 
+@@ -2848,7 +3432,7 @@
+ 	iabt->ulpLe = 1;
+ 	iabt->ulpClass = icmd->ulpClass;
+ 
+-	if (phba->hba_state >= LPFC_LINK_UP)
++	if (phba->link_state >= LPFC_LINK_UP)
+ 		iabt->ulpCommand = CMD_ABORT_XRI_CN;
+ 	else
+ 		iabt->ulpCommand = CMD_CLOSE_XRI_CN;
+@@ -2856,32 +3440,20 @@
+ 	abtsiocbp->iocb_cmpl = lpfc_sli_abort_els_cmpl;
+ 
+ 	lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+-			"%d:0339 Abort xri x%x, original iotag x%x, abort "
+-			"cmd iotag x%x\n",
+-			phba->brd_no, iabt->un.acxri.abortContextTag,
++			"%d (%d):0339 Abort xri x%x, original iotag x%x, "
++			"abort cmd iotag x%x\n",
++			phba->brd_no, vport->vpi,
++			iabt->un.acxri.abortContextTag,
+ 			iabt->un.acxri.abortIoTag, abtsiocbp->iotag);
+-	retval = lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
++	retval = __lpfc_sli_issue_iocb(phba, pring, abtsiocbp, 0);
+ 
+ abort_iotag_exit:
+-
+-	/* If we could not issue an abort dequeue the iocb and handle
+-	 * the completion here.
++	/*
++	 * Caller to this routine should check for IOCB_ERROR
++	 * and handle it properly.  This routine no longer removes
++	 * iocb off txcmplq and call compl in case of IOCB_ERROR.
+ 	 */
+-	if (retval == IOCB_ERROR) {
+-		list_del(&cmdiocb->list);
+-		pring->txcmplq_cnt--;
+-
+-		if (cmdiocb->iocb_cmpl) {
+-			icmd->ulpStatus = IOSTAT_LOCAL_REJECT;
+-			icmd->un.ulpWord[4] = IOERR_SLI_ABORTED;
+-			spin_unlock_irq(phba->host->host_lock);
+-			(cmdiocb->iocb_cmpl) (phba, cmdiocb, cmdiocb);
+-			spin_lock_irq(phba->host->host_lock);
+-		} else
+-			lpfc_sli_release_iocbq(phba, cmdiocb);
+-	}
+-
+-	return 1;
++	return retval;
+ }
+ 
+ static int
+@@ -2947,14 +3519,10 @@
+ }
+ 
+ void
+-lpfc_sli_abort_fcp_cmpl(struct lpfc_hba * phba, struct lpfc_iocbq * cmdiocb,
+-			   struct lpfc_iocbq * rspiocb)
++lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
++			struct lpfc_iocbq *rspiocb)
+ {
+-	unsigned long iflags;
+-
+-	spin_lock_irqsave(phba->host->host_lock, iflags);
+ 	lpfc_sli_release_iocbq(phba, cmdiocb);
+-	spin_unlock_irqrestore(phba->host->host_lock, iflags);
+ 	return;
+ }
+ 
+@@ -2972,8 +3540,8 @@
+ 	for (i = 1; i <= phba->sli.last_iotag; i++) {
+ 		iocbq = phba->sli.iocbq_lookup[i];
+ 
+-		if (lpfc_sli_validate_fcp_iocb (iocbq, tgt_id, lun_id,
+-						0, abort_cmd) != 0)
++		if (lpfc_sli_validate_fcp_iocb(iocbq, tgt_id, lun_id, 0,
++					       abort_cmd) != 0)
+ 			continue;
+ 
+ 		/* issue ABTS for this IOCB based on iotag */
+@@ -2989,8 +3557,9 @@
+ 		abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
+ 		abtsiocb->iocb.ulpLe = 1;
+ 		abtsiocb->iocb.ulpClass = cmd->ulpClass;
++		abtsiocb->vport = phba->pport;
+ 
+-		if (phba->hba_state >= LPFC_LINK_UP)
++		if (lpfc_is_link_up(phba))
+ 			abtsiocb->iocb.ulpCommand = CMD_ABORT_XRI_CN;
+ 		else
+ 			abtsiocb->iocb.ulpCommand = CMD_CLOSE_XRI_CN;
+@@ -3016,16 +3585,16 @@
+ 	wait_queue_head_t *pdone_q;
+ 	unsigned long iflags;
+ 
+-	spin_lock_irqsave(phba->host->host_lock, iflags);
++	spin_lock_irqsave(&phba->hbalock, iflags);
+ 	cmdiocbq->iocb_flag |= LPFC_IO_WAKE;
+ 	if (cmdiocbq->context2 && rspiocbq)
+ 		memcpy(&((struct lpfc_iocbq *)cmdiocbq->context2)->iocb,
+ 		       &rspiocbq->iocb, sizeof(IOCB_t));
+ 
+ 	pdone_q = cmdiocbq->context_un.wait_queue;
+-	spin_unlock_irqrestore(phba->host->host_lock, iflags);
+ 	if (pdone_q)
+ 		wake_up(pdone_q);
++	spin_unlock_irqrestore(&phba->hbalock, iflags);
+ 	return;
+ }
+ 
+@@ -3035,11 +3604,12 @@
+  * lpfc_sli_issue_call since the wake routine sets a unique value and by
+  * definition this is a wait function.
+  */
++
+ int
+-lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
+-			 struct lpfc_sli_ring * pring,
+-			 struct lpfc_iocbq * piocb,
+-			 struct lpfc_iocbq * prspiocbq,
++lpfc_sli_issue_iocb_wait(struct lpfc_hba *phba,
++			 struct lpfc_sli_ring *pring,
++			 struct lpfc_iocbq *piocb,
++			 struct lpfc_iocbq *prspiocbq,
+ 			 uint32_t timeout)
+ {
+ 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
+@@ -3071,11 +3641,9 @@
+ 	retval = lpfc_sli_issue_iocb(phba, pring, piocb, 0);
+ 	if (retval == IOCB_SUCCESS) {
+ 		timeout_req = timeout * HZ;
+-		spin_unlock_irq(phba->host->host_lock);
+ 		timeleft = wait_event_timeout(done_q,
+ 				piocb->iocb_flag & LPFC_IO_WAKE,
+ 				timeout_req);
+-		spin_lock_irq(phba->host->host_lock);
+ 
+ 		if (piocb->iocb_flag & LPFC_IO_WAKE) {
+ 			lpfc_printf_log(phba, KERN_INFO, LOG_SLI,
+@@ -3117,16 +3685,16 @@
+ }
+ 
+ int
+-lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
++lpfc_sli_issue_mbox_wait(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq,
+ 			 uint32_t timeout)
+ {
+ 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
+ 	int retval;
++	unsigned long flag;
+ 
+ 	/* The caller must leave context1 empty. */
+-	if (pmboxq->context1 != 0) {
+-		return (MBX_NOT_FINISHED);
+-	}
++	if (pmboxq->context1 != 0)
++		return MBX_NOT_FINISHED;
+ 
+ 	/* setup wake call as IOCB callback */
+ 	pmboxq->mbox_cmpl = lpfc_sli_wake_mbox_wait;
+@@ -3141,6 +3709,7 @@
+ 				pmboxq->mbox_flag & LPFC_MBX_WAKE,
+ 				timeout * HZ);
+ 
++		spin_lock_irqsave(&phba->hbalock, flag);
+ 		pmboxq->context1 = NULL;
+ 		/*
+ 		 * if LPFC_MBX_WAKE flag is set the mailbox is completed
+@@ -3148,8 +3717,11 @@
+ 		 */
+ 		if (pmboxq->mbox_flag & LPFC_MBX_WAKE)
+ 			retval = MBX_SUCCESS;
+-		else
++		else {
+ 			retval = MBX_TIMEOUT;
++			pmboxq->mbox_cmpl = lpfc_sli_def_mbox_cmpl;
++		}
++		spin_unlock_irqrestore(&phba->hbalock, flag);
+ 	}
+ 
+ 	return retval;
+@@ -3158,12 +3730,25 @@
+ int
+ lpfc_sli_flush_mbox_queue(struct lpfc_hba * phba)
+ {
++	struct lpfc_vport *vport = phba->pport;
+ 	int i = 0;
++	uint32_t ha_copy;
+ 
+-	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !phba->stopped) {
++	while (phba->sli.sli_flag & LPFC_SLI_MBOX_ACTIVE && !vport->stopped) {
+ 		if (i++ > LPFC_MBOX_TMO * 1000)
+ 			return 1;
+ 
++		/*
++		 * Call lpfc_sli_handle_mb_event only if a mailbox cmd
++		 * did finish. This way we won't get the misleading
++		 * "Stray Mailbox Interrupt" message.
++		 */
++		spin_lock_irq(&phba->hbalock);
++		ha_copy = phba->work_ha;
++		phba->work_ha &= ~HA_MBATT;
++		spin_unlock_irq(&phba->hbalock);
++
++		if (ha_copy & HA_MBATT)
+ 		if (lpfc_sli_handle_mb_event(phba) == 0)
+ 			i = 0;
+ 
+@@ -3183,6 +3768,13 @@
+ 	int i;
+ 	uint32_t control;
+ 
++	MAILBOX_t *mbox, *pmbox;
++	struct lpfc_vport *vport;
++	struct lpfc_nodelist *ndlp;
++	struct lpfc_dmabuf *mp;
++	LPFC_MBOXQ_t *pmb;
++	int rc;
++
+ 	/*
+ 	 * Get the driver's phba structure from the dev_id and
+ 	 * assume the HBA is not interrupting.
+@@ -3204,7 +3796,7 @@
+ 	 */
+ 
+ 	/* Ignore all interrupts during initialization. */
+-	if (unlikely(phba->hba_state < LPFC_LINK_DOWN))
++	if (unlikely(phba->link_state < LPFC_LINK_DOWN))
+ 		return IRQ_NONE;
+ 
+ 	/*
+@@ -3212,16 +3804,16 @@
+ 	 * Clear Attention Sources, except Error Attention (to
+ 	 * preserve status) and Link Attention
+ 	 */
+-	spin_lock(phba->host->host_lock);
++	spin_lock(&phba->hbalock);
+ 	ha_copy = readl(phba->HAregaddr);
+ 	/* If somebody is waiting to handle an eratt don't process it
+ 	 * here.  The brdkill function will do this.
+ 	 */
+-	if (phba->fc_flag & FC_IGNORE_ERATT)
++	if (phba->link_flag & LS_IGNORE_ERATT)
+ 		ha_copy &= ~HA_ERATT;
+ 	writel((ha_copy & ~(HA_LATT | HA_ERATT)), phba->HAregaddr);
+ 	readl(phba->HAregaddr); /* flush */
+-	spin_unlock(phba->host->host_lock);
++	spin_unlock(&phba->hbalock);
+ 
+ 	if (unlikely(!ha_copy))
+ 		return IRQ_NONE;
+@@ -3235,36 +3827,41 @@
+ 				 * Turn off Link Attention interrupts
+ 				 * until CLEAR_LA done
+ 				 */
+-				spin_lock(phba->host->host_lock);
++				spin_lock(&phba->hbalock);
+ 				phba->sli.sli_flag &= ~LPFC_PROCESS_LA;
+ 				control = readl(phba->HCregaddr);
+ 				control &= ~HC_LAINT_ENA;
+ 				writel(control, phba->HCregaddr);
+ 				readl(phba->HCregaddr); /* flush */
+-				spin_unlock(phba->host->host_lock);
++				spin_unlock(&phba->hbalock);
+ 			}
+ 			else
+ 				work_ha_copy &= ~HA_LATT;
+ 		}
+ 
+ 		if (work_ha_copy & ~(HA_ERATT|HA_MBATT|HA_LATT)) {
+-			for (i = 0; i < phba->sli.num_rings; i++) {
+-				if (work_ha_copy & (HA_RXATT << (4*i))) {
+ 					/*
+-					 * Turn off Slow Rings interrupts
++			 * Turn off Slow Rings interrupts, LPFC_ELS_RING is
++			 * the only slow ring.
+ 					 */
+-					spin_lock(phba->host->host_lock);
++			status = (work_ha_copy &
++				(HA_RXMASK  << (4*LPFC_ELS_RING)));
++			status >>= (4*LPFC_ELS_RING);
++			if (status & HA_RXMASK) {
++				spin_lock(&phba->hbalock);
+ 					control = readl(phba->HCregaddr);
+-					control &= ~(HC_R0INT_ENA << i);
++				if (control & (HC_R0INT_ENA << LPFC_ELS_RING)) {
++					control &=
++					    ~(HC_R0INT_ENA << LPFC_ELS_RING);
+ 					writel(control, phba->HCregaddr);
+ 					readl(phba->HCregaddr); /* flush */
+-					spin_unlock(phba->host->host_lock);
+ 				}
++				spin_unlock(&phba->hbalock);
+ 			}
+ 		}
+ 
+ 		if (work_ha_copy & HA_ERATT) {
+-			phba->hba_state = LPFC_HBA_ERROR;
++			phba->link_state = LPFC_HBA_ERROR;
+ 			/*
+ 			 * There was a link/board error.  Read the
+ 			 * status register to retrieve the error event
+@@ -3279,14 +3876,108 @@
+ 			/* Clear Chip error bit */
+ 			writel(HA_ERATT, phba->HAregaddr);
+ 			readl(phba->HAregaddr); /* flush */
+-			phba->stopped = 1;
++			phba->pport->stopped = 1;
++		}
++
++		if ((work_ha_copy & HA_MBATT) &&
++		    (phba->sli.mbox_active)) {
++			pmb = phba->sli.mbox_active;
++			pmbox = &pmb->mb;
++			mbox = &phba->slim2p->mbx;
++			vport = pmb->vport;
++
++			/* First check out the status word */
++			lpfc_sli_pcimem_bcopy(mbox, pmbox, sizeof(uint32_t));
++			if (pmbox->mbxOwner != OWN_HOST) {
++				/*
++				 * Stray Mailbox Interrupt, mbxCommand <cmd>
++				 * mbxStatus <status>
++				 */
++				lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX |
++						LOG_SLI,
++						"%d (%d):0304 Stray Mailbox "
++						"Interrupt mbxCommand x%x "
++						"mbxStatus x%x\n",
++						phba->brd_no,
++						(vport
++						 ? vport->vpi : 0),
++						pmbox->mbxCommand,
++						pmbox->mbxStatus);
++			}
++			phba->last_completion_time = jiffies;
++			del_timer_sync(&phba->sli.mbox_tmo);
++
++			phba->sli.mbox_active = NULL;
++			if (pmb->mbox_cmpl) {
++				lpfc_sli_pcimem_bcopy(mbox, pmbox,
++						      MAILBOX_CMD_SIZE);
++			}
++			if (pmb->mbox_flag & LPFC_MBX_IMED_UNREG) {
++				pmb->mbox_flag &= ~LPFC_MBX_IMED_UNREG;
++
++				lpfc_debugfs_disc_trc(vport,
++					LPFC_DISC_TRC_MBOX_VPORT,
++					"MBOX dflt rpi: : status:x%x rpi:x%x",
++					(uint32_t)pmbox->mbxStatus,
++					pmbox->un.varWords[0], 0);
++
++				if ( !pmbox->mbxStatus) {
++					mp = (struct lpfc_dmabuf *)
++						(pmb->context1);
++					ndlp = (struct lpfc_nodelist *)
++						pmb->context2;
++
++					/* Reg_LOGIN of dflt RPI was successful.
++					 * new lets get rid of the RPI using the
++					 * same mbox buffer.
++					 */
++					lpfc_unreg_login(phba, vport->vpi,
++						pmbox->un.varWords[0], pmb);
++					pmb->mbox_cmpl = lpfc_mbx_cmpl_dflt_rpi;
++					pmb->context1 = mp;
++					pmb->context2 = ndlp;
++					pmb->vport = vport;
++					spin_lock(&phba->hbalock);
++					phba->sli.sli_flag &=
++						~LPFC_SLI_MBOX_ACTIVE;
++					spin_unlock(&phba->hbalock);
++					goto send_current_mbox;
++				}
++			}
++			spin_lock(&phba->pport->work_port_lock);
++			phba->pport->work_port_events &= ~WORKER_MBOX_TMO;
++			spin_unlock(&phba->pport->work_port_lock);
++			lpfc_mbox_cmpl_put(phba, pmb);
++		}
++		if ((work_ha_copy & HA_MBATT) &&
++		    (phba->sli.mbox_active == NULL)) {
++send_next_mbox:
++			spin_lock(&phba->hbalock);
++			phba->sli.sli_flag &= ~LPFC_SLI_MBOX_ACTIVE;
++			pmb = lpfc_mbox_get(phba);
++			spin_unlock(&phba->hbalock);
++send_current_mbox:
++			/* Process next mailbox command if there is one */
++			if (pmb != NULL) {
++				rc = lpfc_sli_issue_mbox(phba, pmb, MBX_NOWAIT);
++				if (rc == MBX_NOT_FINISHED) {
++					pmb->mb.mbxStatus = MBX_NOT_FINISHED;
++					lpfc_mbox_cmpl_put(phba, pmb);
++					goto send_next_mbox;
++				}
++			} else {
++				/* Turn on IOCB processing */
++				for (i = 0; i < phba->sli.num_rings; i++)
++					lpfc_sli_turn_on_ring(phba, i);
++			}
++
+ 		}
+ 
+-		spin_lock(phba->host->host_lock);
++		spin_lock(&phba->hbalock);
+ 		phba->work_ha |= work_ha_copy;
+ 		if (phba->work_wait)
+-			wake_up(phba->work_wait);
+-		spin_unlock(phba->host->host_lock);
++			lpfc_worker_wake_up(phba);
++		spin_unlock(&phba->hbalock);
+ 	}
+ 
+ 	ha_copy &= ~(phba->work_ha_mask);
+@@ -3298,7 +3989,7 @@
+ 	 */
+ 	status = (ha_copy & (HA_RXMASK  << (4*LPFC_FCP_RING)));
+ 	status >>= (4*LPFC_FCP_RING);
+-	if (status & HA_RXATT)
++	if (status & HA_RXMASK)
+ 		lpfc_sli_handle_fast_ring_event(phba,
+ 						&phba->sli.ring[LPFC_FCP_RING],
+ 						status);
+@@ -3311,7 +4002,7 @@
+ 		 */
+ 		status = (ha_copy & (HA_RXMASK  << (4*LPFC_EXTRA_RING)));
+ 		status >>= (4*LPFC_EXTRA_RING);
+-		if (status & HA_RXATT) {
++		if (status & HA_RXMASK) {
+ 			lpfc_sli_handle_fast_ring_event(phba,
+ 					&phba->sli.ring[LPFC_EXTRA_RING],
+ 					status);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_sli.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_sli.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_sli.h	2007-12-19 15:29:23.000000000 -0500
+@@ -20,6 +20,7 @@
+ 
+ /* forward declaration for LPFC_IOCB_t's use */
+ struct lpfc_hba;
++struct lpfc_vport;
+ 
+ /* Define the context types that SLI handles for abort and sums. */
+ typedef enum _lpfc_ctx_cmd {
+@@ -43,10 +44,12 @@
+ #define LPFC_IO_WAKE		2	/* High Priority Queue signal flag */
+ #define LPFC_IO_FCP		4	/* FCP command -- iocbq in scsi_buf */
+ #define LPFC_DRIVER_ABORTED	8	/* driver aborted this request */
++#define LPFC_IO_FABRIC		0x10	/* Iocb send using fabric scheduler */
+ 
+ 	uint8_t abort_count;
+ 	uint8_t rsvd2;
+ 	uint32_t drvrTimeout;	/* driver timeout in seconds */
++	struct lpfc_vport *vport;/* virtual port pointer */
+ 	void *context1;		/* caller context information */
+ 	void *context2;		/* caller context information */
+ 	void *context3;		/* caller context information */
+@@ -56,6 +59,8 @@
+ 		struct lpfcMboxq   *mbox;
+ 	} context_un;
+ 
++	void (*fabric_iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
++			   struct lpfc_iocbq *);
+ 	void (*iocb_cmpl) (struct lpfc_hba *, struct lpfc_iocbq *,
+ 			   struct lpfc_iocbq *);
+ 
+@@ -69,11 +74,13 @@
+ #define IOCB_TIMEDOUT       3
+ 
+ #define LPFC_MBX_WAKE	1
++#define LPFC_MBX_IMED_UNREG	2
+ 
+ typedef struct lpfcMboxq {
+ 	/* MBOXQs are used in single linked lists */
+ 	struct list_head list;	/* ptr to next mailbox command */
+ 	MAILBOX_t mb;		/* Mailbox cmd */
++	struct lpfc_vport *vport;/* virutal port pointer */
+ 	void *context1;		/* caller context information */
+ 	void *context2;		/* caller context information */
+ 
+@@ -135,6 +142,8 @@
+ 	uint8_t ringno;		/* ring number */
+ 	uint16_t numCiocb;	/* number of command iocb's per ring */
+ 	uint16_t numRiocb;	/* number of rsp iocb's per ring */
++	uint16_t sizeCiocb;	/* Size of command iocb's in this ring */
++	uint16_t sizeRiocb; 	/* Size of response iocb's in this ring */
+ 
+ 	uint32_t fast_iotag;	/* max fastlookup based iotag           */
+ 	uint32_t iotag_ctr;	/* keeps track of the next iotag to use */
+@@ -165,6 +174,34 @@
+ 					struct lpfc_sli_ring *);
+ };
+ 
++/* Structure used for configuring rings to a specific profile or rctl / type */
++struct lpfc_hbq_init {
++	uint32_t rn;		/* Receive buffer notification */
++	uint32_t entry_count;	/* max # of entries in HBQ */
++	uint32_t headerLen;	/* 0 if not profile 4 or 5 */
++	uint32_t logEntry;	/* Set to 1 if this HBQ used for LogEntry */
++	uint32_t profile;	/* Selection profile 0=all, 7=logentry */
++	uint32_t ring_mask;	/* Binds HBQ to a ring e.g. Ring0=b0001,
++				 * ring2=b0100 */
++	uint32_t hbq_index;	/* index of this hbq in ring .HBQs[] */
++
++	uint32_t seqlenoff;
++	uint32_t maxlen;
++	uint32_t seqlenbcnt;
++	uint32_t cmdcodeoff;
++	uint32_t cmdmatch[8];
++	uint32_t mask_count;	/* number of mask entries in prt array */
++	struct hbq_mask hbqMasks[6];
++
++	/* Non-config rings fields to keep track of buffer allocations */
++	uint32_t buffer_count;	/* number of buffers allocated */
++	uint32_t init_count;	/* number to allocate when initialized */
++	uint32_t add_count;	/* number to allocate when starved */
++} ;
++
++#define LPFC_MAX_HBQ 16
++
++
+ /* Structure used to hold SLI statistical counters and info */
+ struct lpfc_sli_stat {
+ 	uint64_t mbox_stat_err;  /* Mbox cmds completed status error */
+@@ -197,6 +234,7 @@
+ #define LPFC_SLI_MBOX_ACTIVE      0x100	/* HBA mailbox is currently active */
+ #define LPFC_SLI2_ACTIVE          0x200	/* SLI2 overlay in firmware is active */
+ #define LPFC_PROCESS_LA           0x400	/* Able to process link attention */
++#define LPFC_BLOCK_MGMT_IO        0x800	/* Don't allow mgmt mbx or iocb cmds */
+ 
+ 	struct lpfc_sli_ring ring[LPFC_MAX_RING];
+ 	int fcp_ring;		/* ring used for FCP initiator commands */
+@@ -209,6 +247,7 @@
+ 	uint16_t mboxq_cnt;	/* current length of queue */
+ 	uint16_t mboxq_max;	/* max length */
+ 	LPFC_MBOXQ_t *mbox_active;	/* active mboxq information */
++	struct list_head mboxq_cmpl;
+ 
+ 	struct timer_list mbox_tmo;	/* Hold clk to timeout active mbox
+ 					   cmd */
+@@ -221,12 +260,6 @@
+ 	struct lpfc_lnk_stat lnk_stat_offsets;
+ };
+ 
+-/* Given a pointer to the start of the ring, and the slot number of
+- * the desired iocb entry, calc a pointer to that entry.
+- * (assume iocb entry size is 32 bytes, or 8 words)
+- */
+-#define IOCB_ENTRY(ring,slot) ((IOCB_t *)(((char *)(ring)) + ((slot) * 32)))
+-
+ #define LPFC_MBOX_TMO           30	/* Sec tmo for outstanding mbox
+ 					   command */
+ #define LPFC_MBOX_TMO_FLASH_CMD 300     /* Sec tmo for outstanding FLASH write
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_version.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_version.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_version.h	2007-12-19 15:29:23.000000000 -0500
+@@ -18,7 +18,7 @@
+  * included with this package.                                     *
+  *******************************************************************/
+ 
+-#define LPFC_DRIVER_VERSION "8.1.12"
++#define LPFC_DRIVER_VERSION "8.2.1"
+ 
+ #define LPFC_DRIVER_NAME "lpfc"
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_vport.c
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_vport.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,523 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for         *
++ * Fibre Channel Host Bus Adapters.                                *
++ * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
++ * EMULEX and SLI are trademarks of Emulex.                        *
++ * www.emulex.com                                                  *
++ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
++ *                                                                 *
++ * This program is free software; you can redistribute it and/or   *
++ * modify it under the terms of version 2 of the GNU General       *
++ * Public License as published by the Free Software Foundation.    *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
++ * more details, a copy of which can be found in the file COPYING  *
++ * included with this package.                                     *
++ *******************************************************************/
++
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/dma-mapping.h>
++#include <linux/idr.h>
++#include <linux/interrupt.h>
++#include <linux/kthread.h>
++#include <linux/pci.h>
++#include <linux/spinlock.h>
++
++#include <scsi/scsi.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_fc.h>
++#include "lpfc_hw.h"
++#include "lpfc_sli.h"
++#include "lpfc_disc.h"
++#include "lpfc_scsi.h"
++#include "lpfc.h"
++#include "lpfc_logmsg.h"
++#include "lpfc_crtn.h"
++#include "lpfc_version.h"
++#include "lpfc_vport.h"
++
++inline void lpfc_vport_set_state(struct lpfc_vport *vport,
++				 enum fc_vport_state new_state)
++{
++	struct fc_vport *fc_vport = vport->fc_vport;
++
++	if (fc_vport) {
++		/*
++		 * When the transport defines fc_vport_set state we will replace
++		 * this code with the following line
++		 */
++		/* fc_vport_set_state(fc_vport, new_state); */
++		if (new_state != FC_VPORT_INITIALIZING)
++			fc_vport->vport_last_state = fc_vport->vport_state;
++		fc_vport->vport_state = new_state;
++	}
++
++	/* for all the error states we will set the invternal state to FAILED */
++	switch (new_state) {
++	case FC_VPORT_NO_FABRIC_SUPP:
++	case FC_VPORT_NO_FABRIC_RSCS:
++	case FC_VPORT_FABRIC_LOGOUT:
++	case FC_VPORT_FABRIC_REJ_WWN:
++	case FC_VPORT_FAILED:
++		vport->port_state = LPFC_VPORT_FAILED;
++		break;
++	case FC_VPORT_LINKDOWN:
++		vport->port_state = LPFC_VPORT_UNKNOWN;
++		break;
++	default:
++		/* do nothing */
++		break;
++	}
++}
++
++static int
++lpfc_alloc_vpi(struct lpfc_hba *phba)
++{
++	int  vpi;
++
++	spin_lock_irq(&phba->hbalock);
++	/* Start at bit 1 because vpi zero is reserved for the physical port */
++	vpi = find_next_zero_bit(phba->vpi_bmask, (phba->max_vpi + 1), 1);
++	if (vpi > phba->max_vpi)
++		vpi = 0;
++	else
++		set_bit(vpi, phba->vpi_bmask);
++	spin_unlock_irq(&phba->hbalock);
++	return vpi;
++}
++
++static void
++lpfc_free_vpi(struct lpfc_hba *phba, int vpi)
++{
++	spin_lock_irq(&phba->hbalock);
++	clear_bit(vpi, phba->vpi_bmask);
++	spin_unlock_irq(&phba->hbalock);
++}
++
++static int
++lpfc_vport_sparm(struct lpfc_hba *phba, struct lpfc_vport *vport)
++{
++	LPFC_MBOXQ_t *pmb;
++	MAILBOX_t *mb;
++	struct lpfc_dmabuf *mp;
++	int  rc;
++
++	pmb = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
++	if (!pmb) {
++		return -ENOMEM;
++	}
++	mb = &pmb->mb;
++
++	lpfc_read_sparam(phba, pmb, vport->vpi);
++	/*
++	 * Grab buffer pointer and clear context1 so we can use
++	 * lpfc_sli_issue_box_wait
++	 */
++	mp = (struct lpfc_dmabuf *) pmb->context1;
++	pmb->context1 = NULL;
++
++	pmb->vport = vport;
++	rc = lpfc_sli_issue_mbox_wait(phba, pmb, phba->fc_ratov * 2);
++	if (rc != MBX_SUCCESS) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_INIT | LOG_VPORT,
++				"%d (%d):1818 VPort failed init, mbxCmd x%x "
++				"READ_SPARM mbxStatus x%x, rc = x%x\n",
++				phba->brd_no, vport->vpi,
++				mb->mbxCommand, mb->mbxStatus, rc);
++		lpfc_mbuf_free(phba, mp->virt, mp->phys);
++		kfree(mp);
++		if (rc != MBX_TIMEOUT)
++			mempool_free(pmb, phba->mbox_mem_pool);
++		return -EIO;
++	}
++
++	memcpy(&vport->fc_sparam, mp->virt, sizeof (struct serv_parm));
++	memcpy(&vport->fc_nodename, &vport->fc_sparam.nodeName,
++	       sizeof (struct lpfc_name));
++	memcpy(&vport->fc_portname, &vport->fc_sparam.portName,
++	       sizeof (struct lpfc_name));
++
++	lpfc_mbuf_free(phba, mp->virt, mp->phys);
++	kfree(mp);
++	mempool_free(pmb, phba->mbox_mem_pool);
++
++	return 0;
++}
++
++static int
++lpfc_valid_wwn_format(struct lpfc_hba *phba, struct lpfc_name *wwn,
++		      const char *name_type)
++{
++				/* ensure that IEEE format 1 addresses
++				 * contain zeros in bits 59-48
++				 */
++	if (!((wwn->u.wwn[0] >> 4) == 1 &&
++	      ((wwn->u.wwn[0] & 0xf) != 0 || (wwn->u.wwn[1] & 0xf) != 0)))
++		return 1;
++
++	lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++			"%d:1822 Invalid %s: %02x:%02x:%02x:%02x:"
++			"%02x:%02x:%02x:%02x\n",
++			phba->brd_no, name_type,
++			wwn->u.wwn[0], wwn->u.wwn[1],
++			wwn->u.wwn[2], wwn->u.wwn[3],
++			wwn->u.wwn[4], wwn->u.wwn[5],
++			wwn->u.wwn[6], wwn->u.wwn[7]);
++	return 0;
++}
++
++static int
++lpfc_unique_wwpn(struct lpfc_hba *phba, struct lpfc_vport *new_vport)
++{
++	struct lpfc_vport *vport;
++
++	list_for_each_entry(vport, &phba->port_list, listentry) {
++		if (vport == new_vport)
++			continue;
++		/* If they match, return not unique */
++		if (memcmp(&vport->fc_sparam.portName,
++			&new_vport->fc_sparam.portName,
++			sizeof(struct lpfc_name)) == 0)
++			return 0;
++	}
++	return 1;
++}
++
++int
++lpfc_vport_create(struct fc_vport *fc_vport, bool disable)
++{
++	struct lpfc_nodelist *ndlp;
++	struct lpfc_vport *pport =
++		(struct lpfc_vport *) fc_vport->shost->hostdata;
++	struct lpfc_hba   *phba = pport->phba;
++	struct lpfc_vport *vport = NULL;
++	int instance;
++	int vpi;
++	int rc = VPORT_ERROR;
++
++	if ((phba->sli_rev < 3) ||
++		!(phba->sli3_options & LPFC_SLI3_NPIV_ENABLED)) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1808 Create VPORT failed: "
++				"NPIV is not enabled: SLImode:%d\n",
++				phba->brd_no, phba->sli_rev);
++		rc = VPORT_INVAL;
++		goto error_out;
++	}
++
++	vpi = lpfc_alloc_vpi(phba);
++	if (vpi == 0) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1809 Create VPORT failed: "
++				"Max VPORTs (%d) exceeded\n",
++				phba->brd_no, phba->max_vpi);
++		rc = VPORT_NORESOURCES;
++		goto error_out;
++	}
++
++
++	/* Assign an unused board number */
++	if ((instance = lpfc_get_instance()) < 0) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1810 Create VPORT failed: Cannot get "
++				"instance number\n", phba->brd_no);
++		lpfc_free_vpi(phba, vpi);
++		rc = VPORT_NORESOURCES;
++		goto error_out;
++	}
++
++	vport = lpfc_create_port(phba, instance, fc_vport);
++	if (!vport) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1811 Create VPORT failed: vpi x%x\n",
++				phba->brd_no, vpi);
++		lpfc_free_vpi(phba, vpi);
++		rc = VPORT_NORESOURCES;
++		goto error_out;
++	}
++
++	vport->vpi = vpi;
++	lpfc_debugfs_initialize(vport);
++
++	if (lpfc_vport_sparm(phba, vport)) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1813 Create VPORT failed: vpi:%d "
++				"Cannot get sparam\n",
++				phba->brd_no, vpi);
++		lpfc_free_vpi(phba, vpi);
++		destroy_port(vport);
++		rc = VPORT_NORESOURCES;
++		goto error_out;
++	}
++
++	memcpy(vport->fc_portname.u.wwn, vport->fc_sparam.portName.u.wwn, 8);
++	memcpy(vport->fc_nodename.u.wwn, vport->fc_sparam.nodeName.u.wwn, 8);
++
++	if (fc_vport->node_name != 0)
++		u64_to_wwn(fc_vport->node_name, vport->fc_nodename.u.wwn);
++	if (fc_vport->port_name != 0)
++		u64_to_wwn(fc_vport->port_name, vport->fc_portname.u.wwn);
++
++	memcpy(&vport->fc_sparam.portName, vport->fc_portname.u.wwn, 8);
++	memcpy(&vport->fc_sparam.nodeName, vport->fc_nodename.u.wwn, 8);
++
++	if (!lpfc_valid_wwn_format(phba, &vport->fc_sparam.nodeName, "WWNN") ||
++	    !lpfc_valid_wwn_format(phba, &vport->fc_sparam.portName, "WWPN")) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1821 Create VPORT failed: vpi:%d "
++				"Invalid WWN format\n",
++				phba->brd_no, vpi);
++		lpfc_free_vpi(phba, vpi);
++		destroy_port(vport);
++		rc = VPORT_INVAL;
++		goto error_out;
++	}
++
++	if (!lpfc_unique_wwpn(phba, vport)) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1823 Create VPORT failed: vpi:%d "
++				"Duplicate WWN on HBA\n",
++				phba->brd_no, vpi);
++		lpfc_free_vpi(phba, vpi);
++		destroy_port(vport);
++		rc = VPORT_INVAL;
++		goto error_out;
++	}
++
++	*(struct lpfc_vport **)fc_vport->dd_data = vport;
++	vport->fc_vport = fc_vport;
++
++	if ((phba->link_state < LPFC_LINK_UP) ||
++	    (phba->fc_topology == TOPOLOGY_LOOP)) {
++		lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN);
++		rc = VPORT_OK;
++		goto out;
++	}
++
++	if (disable) {
++		rc = VPORT_OK;
++		goto out;
++	}
++
++	/* Use the Physical nodes Fabric NDLP to determine if the link is
++	 * up and ready to FDISC.
++	 */
++	ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++	if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
++		if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) {
++			lpfc_set_disctmo(vport);
++			lpfc_initial_fdisc(vport);
++		} else {
++			lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP);
++			lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++					"%d (%d):0262 No NPIV Fabric "
++					"support\n",
++					phba->brd_no, vport->vpi);
++		}
++	} else {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++	}
++	rc = VPORT_OK;
++
++out:
++	lpfc_host_attrib_init(lpfc_shost_from_vport(vport));
++error_out:
++	return rc;
++}
++
++int
++disable_vport(struct fc_vport *fc_vport)
++{
++	struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_nodelist *ndlp = NULL, *next_ndlp = NULL;
++	long timeout;
++
++	ndlp = lpfc_findnode_did(vport, Fabric_DID);
++	if (ndlp && phba->link_state >= LPFC_LINK_UP) {
++		vport->unreg_vpi_cmpl = VPORT_INVAL;
++		timeout = msecs_to_jiffies(phba->fc_ratov * 2000);
++		if (!lpfc_issue_els_npiv_logo(vport, ndlp))
++			while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout)
++				timeout = schedule_timeout(timeout);
++	}
++
++	lpfc_sli_host_down(vport);
++
++	/* Mark all nodes for discovery so we can remove them by
++	 * calling lpfc_cleanup_rpis(vport, 1)
++	 */
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++		if (ndlp->nlp_state == NLP_STE_UNUSED_NODE)
++			continue;
++		lpfc_disc_state_machine(vport, ndlp, NULL,
++					NLP_EVT_DEVICE_RECOVERY);
++	}
++	lpfc_cleanup_rpis(vport, 1);
++
++	lpfc_stop_vport_timers(vport);
++	lpfc_unreg_all_rpis(vport);
++	lpfc_unreg_default_rpis(vport);
++	/*
++	 * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the
++	 * scsi_host_put() to release the vport.
++	 */
++	lpfc_mbx_unreg_vpi(vport);
++
++	lpfc_vport_set_state(vport, FC_VPORT_DISABLED);
++	return VPORT_OK;
++}
++
++int
++enable_vport(struct fc_vport *fc_vport)
++{
++	struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++	struct lpfc_hba   *phba = vport->phba;
++	struct lpfc_nodelist *ndlp = NULL;
++
++	if ((phba->link_state < LPFC_LINK_UP) ||
++	    (phba->fc_topology == TOPOLOGY_LOOP)) {
++		lpfc_vport_set_state(vport, FC_VPORT_LINKDOWN);
++		return VPORT_OK;
++	}
++
++	vport->load_flag |= FC_LOADING;
++	vport->fc_flag |= FC_VPORT_NEEDS_REG_VPI;
++
++	/* Use the Physical nodes Fabric NDLP to determine if the link is
++	 * up and ready to FDISC.
++	 */
++	ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++	if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) {
++		if (phba->link_flag & LS_NPIV_FAB_SUPPORTED) {
++			lpfc_set_disctmo(vport);
++			lpfc_initial_fdisc(vport);
++		} else {
++			lpfc_vport_set_state(vport, FC_VPORT_NO_FABRIC_SUPP);
++			lpfc_printf_log(phba, KERN_ERR, LOG_ELS,
++					"%d (%d):0264 No NPIV Fabric "
++					"support\n",
++					phba->brd_no, vport->vpi);
++		}
++	} else {
++		lpfc_vport_set_state(vport, FC_VPORT_FAILED);
++	}
++
++	return VPORT_OK;
++}
++
++int
++lpfc_vport_disable(struct fc_vport *fc_vport, bool disable)
++{
++	if (disable)
++		return disable_vport(fc_vport);
++	else
++		return enable_vport(fc_vport);
++}
++
++
++int
++lpfc_vport_delete(struct fc_vport *fc_vport)
++{
++	struct lpfc_nodelist *ndlp = NULL;
++	struct lpfc_nodelist *next_ndlp;
++	struct Scsi_Host *shost = (struct Scsi_Host *) fc_vport->shost;
++	struct lpfc_vport *vport = *(struct lpfc_vport **)fc_vport->dd_data;
++	struct lpfc_hba   *phba = vport->phba;
++	long timeout;
++	int rc = VPORT_ERROR;
++
++	/*
++	 * This is a bit of a mess.  We want to ensure the shost doesn't get
++	 * torn down until we're done with the embedded lpfc_vport structure.
++	 *
++	 * Beyond holding a reference for this function, we also need a
++	 * reference for outstanding I/O requests we schedule during delete
++	 * processing.  But once we scsi_remove_host() we can no longer obtain
++	 * a reference through scsi_host_get().
++	 *
++	 * So we take two references here.  We release one reference at the
++	 * bottom of the function -- after delinking the vport.  And we
++	 * release the other at the completion of the unreg_vpi that get's
++	 * initiated after we've disposed of all other resources associated
++	 * with the port.
++	 */
++	if (!scsi_host_get(shost) || !scsi_host_get(shost))
++		return VPORT_INVAL;
++
++	if (vport->port_type == LPFC_PHYSICAL_PORT) {
++		lpfc_printf_log(phba, KERN_ERR, LOG_VPORT,
++				"%d:1812 vport_delete failed: Cannot delete "
++				"physical host\n", phba->brd_no);
++		goto out;
++	}
++
++	vport->load_flag |= FC_UNLOADING;
++
++	kfree(vport->vname);
++	lpfc_debugfs_terminate(vport);
++	fc_remove_host(lpfc_shost_from_vport(vport));
++	scsi_remove_host(lpfc_shost_from_vport(vport));
++
++	ndlp = lpfc_findnode_did(phba->pport, Fabric_DID);
++	if (ndlp && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE &&
++		phba->link_state >= LPFC_LINK_UP) {
++
++		/* First look for the Fabric ndlp */
++		ndlp = lpfc_findnode_did(vport, Fabric_DID);
++		if (!ndlp) {
++			/* Cannot find existing Fabric ndlp, allocate one */
++			ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL);
++			if (!ndlp)
++				goto skip_logo;
++			lpfc_nlp_init(vport, ndlp, Fabric_DID);
++		} else {
++			lpfc_dequeue_node(vport, ndlp);
++		}
++		vport->unreg_vpi_cmpl = VPORT_INVAL;
++		timeout = msecs_to_jiffies(phba->fc_ratov * 2000);
++		if (!lpfc_issue_els_npiv_logo(vport, ndlp))
++			while (vport->unreg_vpi_cmpl == VPORT_INVAL && timeout)
++				timeout = schedule_timeout(timeout);
++	}
++
++skip_logo:
++	lpfc_sli_host_down(vport);
++
++	list_for_each_entry_safe(ndlp, next_ndlp, &vport->fc_nodes, nlp_listp) {
++		lpfc_disc_state_machine(vport, ndlp, NULL,
++					     NLP_EVT_DEVICE_RECOVERY);
++		lpfc_disc_state_machine(vport, ndlp, NULL,
++					     NLP_EVT_DEVICE_RM);
++	}
++
++	lpfc_stop_vport_timers(vport);
++	lpfc_unreg_all_rpis(vport);
++	lpfc_unreg_default_rpis(vport);
++	/*
++	 * Completion of unreg_vpi (lpfc_mbx_cmpl_unreg_vpi) does the
++	 * scsi_host_put() to release the vport.
++	 */
++	lpfc_mbx_unreg_vpi(vport);
++
++	lpfc_free_vpi(phba, vport->vpi);
++	vport->work_port_events = 0;
++	spin_lock_irq(&phba->hbalock);
++	list_del_init(&vport->listentry);
++	spin_unlock_irq(&phba->hbalock);
++
++	rc = VPORT_OK;
++out:
++	scsi_host_put(shost);
++	return rc;
++}
++
++
++EXPORT_SYMBOL(lpfc_vport_create);
++EXPORT_SYMBOL(lpfc_vport_delete);
+diff -Nurb linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_vport.h
+--- linux-2.6.22-570/drivers/scsi/lpfc/lpfc_vport.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/lpfc/lpfc_vport.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,113 @@
++/*******************************************************************
++ * This file is part of the Emulex Linux Device Driver for         *
++ * Fibre Channel Host Bus Adapters.                                *
++ * Copyright (C) 2004-2006 Emulex.  All rights reserved.           *
++ * EMULEX and SLI are trademarks of Emulex.                        *
++ * www.emulex.com                                                  *
++ * Portions Copyright (C) 2004-2005 Christoph Hellwig              *
++ *                                                                 *
++ * This program is free software; you can redistribute it and/or   *
++ * modify it under the terms of version 2 of the GNU General       *
++ * Public License as published by the Free Software Foundation.    *
++ * This program is distributed in the hope that it will be useful. *
++ * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND          *
++ * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY,  *
++ * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE      *
++ * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
++ * TO BE LEGALLY INVALID.  See the GNU General Public License for  *
++ * more details, a copy of which can be found in the file COPYING  *
++ * included with this package.                                     *
++ *******************************************************************/
++
++#ifndef _H_LPFC_VPORT
++#define _H_LPFC_VPORT
++
++/* API version values (each will be an individual bit) */
++#define VPORT_API_VERSION_1	0x01
++
++/* Values returned via lpfc_vport_getinfo() */
++struct vport_info {
++
++	uint32_t api_versions;
++	uint8_t linktype;
++#define  VPORT_TYPE_PHYSICAL	0
++#define  VPORT_TYPE_VIRTUAL	1
++
++	uint8_t state;
++#define  VPORT_STATE_OFFLINE	0
++#define  VPORT_STATE_ACTIVE	1
++#define  VPORT_STATE_FAILED	2
++
++	uint8_t fail_reason;
++	uint8_t prev_fail_reason;
++#define  VPORT_FAIL_UNKNOWN	0
++#define  VPORT_FAIL_LINKDOWN	1
++#define  VPORT_FAIL_FAB_UNSUPPORTED	2
++#define  VPORT_FAIL_FAB_NORESOURCES	3
++#define  VPORT_FAIL_FAB_LOGOUT	4
++#define  VPORT_FAIL_ADAP_NORESOURCES	5
++
++	uint8_t node_name[8];	/* WWNN */
++	uint8_t port_name[8];	/* WWPN */
++
++	struct Scsi_Host *shost;
++
++/* Following values are valid only on physical links */
++	uint32_t vports_max;
++	uint32_t vports_inuse;
++	uint32_t rpi_max;
++	uint32_t rpi_inuse;
++#define  VPORT_CNT_INVALID	0xFFFFFFFF
++};
++
++/* data used  in link creation */
++struct vport_data {
++	uint32_t api_version;
++
++	uint32_t options;
++#define  VPORT_OPT_AUTORETRY	0x01
++
++	uint8_t node_name[8];	/* WWNN */
++	uint8_t port_name[8];	/* WWPN */
++
++/*
++ *  Upon successful creation, vport_shost will point to the new Scsi_Host
++ *  structure for the new virtual link.
++ */
++	struct Scsi_Host *vport_shost;
++};
++
++/* API function return codes */
++#define VPORT_OK	0
++#define VPORT_ERROR	-1
++#define VPORT_INVAL	-2
++#define VPORT_NOMEM	-3
++#define VPORT_NORESOURCES	-4
++
++int lpfc_vport_create(struct fc_vport *, bool);
++int lpfc_vport_delete(struct fc_vport *);
++int lpfc_vport_getinfo(struct Scsi_Host *, struct vport_info *);
++int lpfc_vport_tgt_remove(struct Scsi_Host *, uint, uint);
++
++/*
++ *  queuecommand  VPORT-specific return codes. Specified in  the host byte code.
++ *  Returned when the virtual link has failed or is not active.
++ */
++#define  DID_VPORT_ERROR	0x0f
++
++#define VPORT_INFO	0x1
++#define VPORT_CREATE	0x2
++#define VPORT_DELETE	0x4
++
++struct vport_cmd_tag {
++	uint32_t cmd;
++	struct vport_data cdata;
++	struct vport_info cinfo;
++	void *vport;
++	int vport_num;
++};
++
++void lpfc_vport_set_state(struct lpfc_vport *vport,
++			  enum fc_vport_state new_state);
++
++#endif /* H_LPFC_VPORT */
+diff -Nurb linux-2.6.22-570/drivers/scsi/mac53c94.c linux-2.6.22-try2/drivers/scsi/mac53c94.c
+--- linux-2.6.22-570/drivers/scsi/mac53c94.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/mac53c94.c	2007-12-19 15:29:23.000000000 -0500
+@@ -77,7 +77,7 @@
+ 		for (i = 0; i < cmd->cmd_len; ++i)
+ 			printk(" %.2x", cmd->cmnd[i]);
+ 		printk("\n" KERN_DEBUG "use_sg=%d request_bufflen=%d request_buffer=%p\n",
+-		       cmd->use_sg, cmd->request_bufflen, cmd->request_buffer);
++		       scsi_sg_count(cmd), scsi_bufflen(cmd), scsi_sglist(cmd));
+ 	}
+ #endif
+ 
+@@ -173,7 +173,6 @@
+ 	writeb(CMD_SELECT, &regs->command);
+ 	state->phase = selecting;
+ 
+-	if (cmd->use_sg > 0 || cmd->request_bufflen != 0)
+ 		set_dma_cmds(state, cmd);
+ }
+ 
+@@ -262,7 +261,7 @@
+ 		writeb(CMD_NOP, &regs->command);
+ 		/* set DMA controller going if any data to transfer */
+ 		if ((stat & (STAT_MSG|STAT_CD)) == 0
+-		    && (cmd->use_sg > 0 || cmd->request_bufflen != 0)) {
++		    && (scsi_sg_count(cmd) > 0 || scsi_bufflen(cmd))) {
+ 			nb = cmd->SCp.this_residual;
+ 			if (nb > 0xfff0)
+ 				nb = 0xfff0;
+@@ -310,14 +309,7 @@
+ 			printk(KERN_DEBUG "intr %x before data xfer complete\n", intr);
+ 		}
+ 		writel(RUN << 16, &dma->control);	/* stop dma */
+-		if (cmd->use_sg != 0) {
+-			pci_unmap_sg(state->pdev,
+-				(struct scatterlist *)cmd->request_buffer,
+-				cmd->use_sg, cmd->sc_data_direction);
+-		} else {
+-			pci_unmap_single(state->pdev, state->dma_addr,
+-				cmd->request_bufflen, cmd->sc_data_direction);
+-		}
++		scsi_dma_unmap(cmd);
+ 		/* should check dma status */
+ 		writeb(CMD_I_COMPLETE, &regs->command);
+ 		state->phase = completing;
+@@ -365,23 +357,23 @@
+  */
+ static void set_dma_cmds(struct fsc_state *state, struct scsi_cmnd *cmd)
+ {
+-	int i, dma_cmd, total;
++	int i, dma_cmd, total, nseg;
+ 	struct scatterlist *scl;
+ 	struct dbdma_cmd *dcmds;
+ 	dma_addr_t dma_addr;
+ 	u32 dma_len;
+ 
++	nseg = scsi_dma_map(cmd);
++	BUG_ON(nseg < 0);
++	if (!nseg)
++		return;
++
+ 	dma_cmd = cmd->sc_data_direction == DMA_TO_DEVICE ?
+ 			OUTPUT_MORE : INPUT_MORE;
+ 	dcmds = state->dma_cmds;
+-	if (cmd->use_sg > 0) {
+-		int nseg;
+-
+ 		total = 0;
+-		scl = (struct scatterlist *) cmd->request_buffer;
+-		nseg = pci_map_sg(state->pdev, scl, cmd->use_sg,
+-				cmd->sc_data_direction);
+-		for (i = 0; i < nseg; ++i) {
++
++	scsi_for_each_sg(cmd, scl, nseg, i) {
+ 			dma_addr = sg_dma_address(scl);
+ 			dma_len = sg_dma_len(scl);
+ 			if (dma_len > 0xffff)
+@@ -391,21 +383,9 @@
+ 			st_le16(&dcmds->command, dma_cmd);
+ 			st_le32(&dcmds->phy_addr, dma_addr);
+ 			dcmds->xfer_status = 0;
+-			++scl;
+-			++dcmds;
+-		}
+-	} else {
+-		total = cmd->request_bufflen;
+-		if (total > 0xffff)
+-			panic("mac53c94: transfer size >= 64k");
+-		dma_addr = pci_map_single(state->pdev, cmd->request_buffer,
+-					  total, cmd->sc_data_direction);
+-		state->dma_addr = dma_addr;
+-		st_le16(&dcmds->req_count, total);
+-		st_le32(&dcmds->phy_addr, dma_addr);
+-		dcmds->xfer_status = 0;
+ 		++dcmds;
+ 	}
++
+ 	dma_cmd += OUTPUT_LAST - OUTPUT_MORE;
+ 	st_le16(&dcmds[-1].command, dma_cmd);
+ 	st_le16(&dcmds->command, DBDMA_STOP);
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c linux-2.6.22-try2/drivers/scsi/megaraid/megaraid_mbox.c
+--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_mbox.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/megaraid/megaraid_mbox.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1378,8 +1378,6 @@
+ {
+ 	struct scatterlist	*sgl;
+ 	mbox_ccb_t		*ccb;
+-	struct page		*page;
+-	unsigned long		offset;
+ 	struct scsi_cmnd	*scp;
+ 	int			sgcnt;
+ 	int			i;
+@@ -1388,48 +1386,16 @@
+ 	scp	= scb->scp;
+ 	ccb	= (mbox_ccb_t *)scb->ccb;
+ 
++	sgcnt = scsi_dma_map(scp);
++	BUG_ON(sgcnt < 0 || sgcnt > adapter->sglen);
++
+ 	// no mapping required if no data to be transferred
+-	if (!scp->request_buffer || !scp->request_bufflen)
++	if (!sgcnt)
+ 		return 0;
+ 
+-	if (!scp->use_sg) {	/* scatter-gather list not used */
+-
+-		page = virt_to_page(scp->request_buffer);
+-
+-		offset = ((unsigned long)scp->request_buffer & ~PAGE_MASK);
+-
+-		ccb->buf_dma_h = pci_map_page(adapter->pdev, page, offset,
+-						  scp->request_bufflen,
+-						  scb->dma_direction);
+-		scb->dma_type = MRAID_DMA_WBUF;
+-
+-		/*
+-		 * We need to handle special 64-bit commands that need a
+-		 * minimum of 1 SG
+-		 */
+-		sgcnt = 1;
+-		ccb->sgl64[0].address	= ccb->buf_dma_h;
+-		ccb->sgl64[0].length	= scp->request_bufflen;
+-
+-		return sgcnt;
+-	}
+-
+-	sgl = (struct scatterlist *)scp->request_buffer;
+-
+-	// The number of sg elements returned must not exceed our limit
+-	sgcnt = pci_map_sg(adapter->pdev, sgl, scp->use_sg,
+-			scb->dma_direction);
+-
+-	if (sgcnt > adapter->sglen) {
+-		con_log(CL_ANN, (KERN_CRIT
+-			"megaraid critical: too many sg elements:%d\n",
+-			sgcnt));
+-		BUG();
+-	}
+-
+ 	scb->dma_type = MRAID_DMA_WSG;
+ 
+-	for (i = 0; i < sgcnt; i++, sgl++) {
++	scsi_for_each_sg(scp, sgl, sgcnt, i) {
+ 		ccb->sgl64[i].address	= sg_dma_address(sgl);
+ 		ccb->sgl64[i].length	= sg_dma_len(sgl);
+ 	}
+@@ -1489,19 +1455,11 @@
+ 
+ 	adapter->outstanding_cmds++;
+ 
+-	if (scb->dma_direction == PCI_DMA_TODEVICE) {
+-		if (!scb->scp->use_sg) {	// sg list not used
+-			pci_dma_sync_single_for_device(adapter->pdev,
+-					ccb->buf_dma_h,
+-					scb->scp->request_bufflen,
+-					PCI_DMA_TODEVICE);
+-		}
+-		else {
++	if (scb->dma_direction == PCI_DMA_TODEVICE)
+ 			pci_dma_sync_sg_for_device(adapter->pdev,
+-				scb->scp->request_buffer,
+-				scb->scp->use_sg, PCI_DMA_TODEVICE);
+-		}
+-	}
++					   scsi_sglist(scb->scp),
++					   scsi_sg_count(scb->scp),
++					   PCI_DMA_TODEVICE);
+ 
+ 	mbox->busy	= 1;	// Set busy
+ 	mbox->poll	= 0;
+@@ -1624,11 +1582,11 @@
+ 			return scb;
+ 
+ 		case MODE_SENSE:
+-			if (scp->use_sg) {
++		{
+ 				struct scatterlist	*sgl;
+ 				caddr_t			vaddr;
+ 
+-				sgl = (struct scatterlist *)scp->request_buffer;
++			sgl = scsi_sglist(scp);
+ 				if (sgl->page) {
+ 					vaddr = (caddr_t)
+ 						(page_address((&sgl[0])->page)
+@@ -1642,9 +1600,6 @@
+ 					__LINE__));
+ 				}
+ 			}
+-			else {
+-				memset(scp->request_buffer, 0, scp->cmnd[4]);
+-			}
+ 			scp->result = (DID_OK << 16);
+ 			return NULL;
+ 
+@@ -1716,7 +1671,7 @@
+ 			mbox->cmd		= MBOXCMD_PASSTHRU64;
+ 			scb->dma_direction	= scp->sc_data_direction;
+ 
+-			pthru->dataxferlen	= scp->request_bufflen;
++			pthru->dataxferlen	= scsi_bufflen(scp);
+ 			pthru->dataxferaddr	= ccb->sgl_dma_h;
+ 			pthru->numsge		= megaraid_mbox_mksgl(adapter,
+ 							scb);
+@@ -2050,8 +2005,8 @@
+ 
+ 	memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
+ 
+-	if (scp->request_bufflen) {
+-		pthru->dataxferlen	= scp->request_bufflen;
++	if (scsi_bufflen(scp)) {
++		pthru->dataxferlen	= scsi_bufflen(scp);
+ 		pthru->dataxferaddr	= ccb->sgl_dma_h;
+ 		pthru->numsge		= megaraid_mbox_mksgl(adapter, scb);
+ 	}
+@@ -2099,8 +2054,8 @@
+ 
+ 	memcpy(epthru->cdb, scp->cmnd, scp->cmd_len);
+ 
+-	if (scp->request_bufflen) {
+-		epthru->dataxferlen	= scp->request_bufflen;
++	if (scsi_bufflen(scp)) {
++		epthru->dataxferlen	= scsi_bufflen(scp);
+ 		epthru->dataxferaddr	= ccb->sgl_dma_h;
+ 		epthru->numsge		= megaraid_mbox_mksgl(adapter, scb);
+ 	}
+@@ -2266,37 +2221,13 @@
+ 
+ 	ccb	= (mbox_ccb_t *)scb->ccb;
+ 
+-	switch (scb->dma_type) {
+-
+-	case MRAID_DMA_WBUF:
+-		if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
+-			pci_dma_sync_single_for_cpu(adapter->pdev,
+-					ccb->buf_dma_h,
+-					scb->scp->request_bufflen,
+-					PCI_DMA_FROMDEVICE);
+-		}
+-
+-		pci_unmap_page(adapter->pdev, ccb->buf_dma_h,
+-			scb->scp->request_bufflen, scb->dma_direction);
+-
+-		break;
+-
+-	case MRAID_DMA_WSG:
+-		if (scb->dma_direction == PCI_DMA_FROMDEVICE) {
++	if (scb->dma_direction == PCI_DMA_FROMDEVICE)
+ 			pci_dma_sync_sg_for_cpu(adapter->pdev,
+-					scb->scp->request_buffer,
+-					scb->scp->use_sg, PCI_DMA_FROMDEVICE);
+-		}
+-
+-		pci_unmap_sg(adapter->pdev, scb->scp->request_buffer,
+-			scb->scp->use_sg, scb->dma_direction);
+-
+-		break;
+-
+-	default:
+-		break;
+-	}
++					scsi_sglist(scb->scp),
++					scsi_sg_count(scb->scp),
++					PCI_DMA_FROMDEVICE);
+ 
++	scsi_dma_unmap(scb->scp);
+ 	return;
+ }
+ 
+@@ -2399,25 +2330,17 @@
+ 		if (scp->cmnd[0] == INQUIRY && status == 0 && islogical == 0
+ 				&& IS_RAID_CH(raid_dev, scb->dev_channel)) {
+ 
+-			if (scp->use_sg) {
+-				sgl = (struct scatterlist *)
+-					scp->request_buffer;
+-
++			sgl = scsi_sglist(scp);
+ 				if (sgl->page) {
+ 					c = *(unsigned char *)
+ 					(page_address((&sgl[0])->page) +
+ 						(&sgl[0])->offset);
+-				}
+-				else {
++			} else {
+ 					con_log(CL_ANN, (KERN_WARNING
+ 					"megaraid mailbox: invalid sg:%d\n",
+ 					__LINE__));
+ 					c = 0;
+ 				}
+-			}
+-			else {
+-				c = *(uint8_t *)scp->request_buffer;
+-			}
+ 
+ 			if ((c & 0x1F ) == TYPE_DISK) {
+ 				pdev_index = (scb->dev_channel * 16) +
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c linux-2.6.22-try2/drivers/scsi/megaraid/megaraid_sas.c
+--- linux-2.6.22-570/drivers/scsi/megaraid/megaraid_sas.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/megaraid/megaraid_sas.c	2007-12-19 15:29:23.000000000 -0500
+@@ -433,34 +433,15 @@
+ 	int sge_count;
+ 	struct scatterlist *os_sgl;
+ 
+-	/*
+-	 * Return 0 if there is no data transfer
+-	 */
+-	if (!scp->request_buffer || !scp->request_bufflen)
+-		return 0;
++	sge_count = scsi_dma_map(scp);
++	BUG_ON(sge_count < 0);
+ 
+-	if (!scp->use_sg) {
+-		mfi_sgl->sge32[0].phys_addr = pci_map_single(instance->pdev,
+-							     scp->
+-							     request_buffer,
+-							     scp->
+-							     request_bufflen,
+-							     scp->
+-							     sc_data_direction);
+-		mfi_sgl->sge32[0].length = scp->request_bufflen;
+-
+-		return 1;
+-	}
+-
+-	os_sgl = (struct scatterlist *)scp->request_buffer;
+-	sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+-			       scp->sc_data_direction);
+-
+-	for (i = 0; i < sge_count; i++, os_sgl++) {
++	if (sge_count) {
++		scsi_for_each_sg(scp, os_sgl, sge_count, i) {
+ 		mfi_sgl->sge32[i].length = sg_dma_len(os_sgl);
+ 		mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl);
+ 	}
+-
++	}
+ 	return sge_count;
+ }
+ 
+@@ -481,35 +462,15 @@
+ 	int sge_count;
+ 	struct scatterlist *os_sgl;
+ 
+-	/*
+-	 * Return 0 if there is no data transfer
+-	 */
+-	if (!scp->request_buffer || !scp->request_bufflen)
+-		return 0;
+-
+-	if (!scp->use_sg) {
+-		mfi_sgl->sge64[0].phys_addr = pci_map_single(instance->pdev,
+-							     scp->
+-							     request_buffer,
+-							     scp->
+-							     request_bufflen,
+-							     scp->
+-							     sc_data_direction);
++	sge_count = scsi_dma_map(scp);
++	BUG_ON(sge_count < 0);
+ 
+-		mfi_sgl->sge64[0].length = scp->request_bufflen;
+-
+-		return 1;
+-	}
+-
+-	os_sgl = (struct scatterlist *)scp->request_buffer;
+-	sge_count = pci_map_sg(instance->pdev, os_sgl, scp->use_sg,
+-			       scp->sc_data_direction);
+-
+-	for (i = 0; i < sge_count; i++, os_sgl++) {
++	if (sge_count) {
++		scsi_for_each_sg(scp, os_sgl, sge_count, i) {
+ 		mfi_sgl->sge64[i].length = sg_dma_len(os_sgl);
+ 		mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl);
+ 	}
+-
++	}
+ 	return sge_count;
+ }
+ 
+@@ -593,7 +554,7 @@
+ 	pthru->cdb_len = scp->cmd_len;
+ 	pthru->timeout = 0;
+ 	pthru->flags = flags;
+-	pthru->data_xfer_len = scp->request_bufflen;
++	pthru->data_xfer_len = scsi_bufflen(scp);
+ 
+ 	memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
+ 
+@@ -1195,45 +1156,6 @@
+ }
+ 
+ /**
+- * megasas_unmap_sgbuf -	Unmap SG buffers
+- * @instance:			Adapter soft state
+- * @cmd:			Completed command
+- */
+-static void
+-megasas_unmap_sgbuf(struct megasas_instance *instance, struct megasas_cmd *cmd)
+-{
+-	dma_addr_t buf_h;
+-	u8 opcode;
+-
+-	if (cmd->scmd->use_sg) {
+-		pci_unmap_sg(instance->pdev, cmd->scmd->request_buffer,
+-			     cmd->scmd->use_sg, cmd->scmd->sc_data_direction);
+-		return;
+-	}
+-
+-	if (!cmd->scmd->request_bufflen)
+-		return;
+-
+-	opcode = cmd->frame->hdr.cmd;
+-
+-	if ((opcode == MFI_CMD_LD_READ) || (opcode == MFI_CMD_LD_WRITE)) {
+-		if (IS_DMA64)
+-			buf_h = cmd->frame->io.sgl.sge64[0].phys_addr;
+-		else
+-			buf_h = cmd->frame->io.sgl.sge32[0].phys_addr;
+-	} else {
+-		if (IS_DMA64)
+-			buf_h = cmd->frame->pthru.sgl.sge64[0].phys_addr;
+-		else
+-			buf_h = cmd->frame->pthru.sgl.sge32[0].phys_addr;
+-	}
+-
+-	pci_unmap_single(instance->pdev, buf_h, cmd->scmd->request_bufflen,
+-			 cmd->scmd->sc_data_direction);
+-	return;
+-}
+-
+-/**
+  * megasas_complete_cmd -	Completes a command
+  * @instance:			Adapter soft state
+  * @cmd:			Command to be completed
+@@ -1281,7 +1203,7 @@
+ 
+ 			atomic_dec(&instance->fw_outstanding);
+ 
+-			megasas_unmap_sgbuf(instance, cmd);
++			scsi_dma_unmap(cmd->scmd);
+ 			cmd->scmd->scsi_done(cmd->scmd);
+ 			megasas_return_cmd(instance, cmd);
+ 
+@@ -1329,7 +1251,7 @@
+ 
+ 		atomic_dec(&instance->fw_outstanding);
+ 
+-		megasas_unmap_sgbuf(instance, cmd);
++		scsi_dma_unmap(cmd->scmd);
+ 		cmd->scmd->scsi_done(cmd->scmd);
+ 		megasas_return_cmd(instance, cmd);
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/megaraid.c linux-2.6.22-try2/drivers/scsi/megaraid.c
+--- linux-2.6.22-570/drivers/scsi/megaraid.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/megaraid.c	2007-12-19 15:29:23.000000000 -0500
+@@ -523,10 +523,8 @@
+ 	/*
+ 	 * filter the internal and ioctl commands
+ 	 */
+-	if((cmd->cmnd[0] == MEGA_INTERNAL_CMD)) {
+-		return cmd->request_buffer;
+-	}
+-
++	if((cmd->cmnd[0] == MEGA_INTERNAL_CMD))
++		return (scb_t *)cmd->host_scribble;
+ 
+ 	/*
+ 	 * We know what channels our logical drives are on - mega_find_card()
+@@ -657,22 +655,14 @@
+ 
+ 		case MODE_SENSE: {
+ 			char *buf;
+-
+-			if (cmd->use_sg) {
+ 				struct scatterlist *sg;
+ 
+-				sg = (struct scatterlist *)cmd->request_buffer;
+-				buf = kmap_atomic(sg->page, KM_IRQ0) +
+-					sg->offset;
+-			} else
+-				buf = cmd->request_buffer;
+-			memset(buf, 0, cmd->cmnd[4]);
+-			if (cmd->use_sg) {
+-				struct scatterlist *sg;
++			sg = scsi_sglist(cmd);
++			buf = kmap_atomic(sg->page, KM_IRQ0) + sg->offset;
+ 
+-				sg = (struct scatterlist *)cmd->request_buffer;
++			memset(buf, 0, cmd->cmnd[4]);
+ 				kunmap_atomic(buf - sg->offset, KM_IRQ0);
+-			}
++
+ 			cmd->result = (DID_OK << 16);
+ 			cmd->scsi_done(cmd);
+ 			return NULL;
+@@ -1551,24 +1541,16 @@
+ 		islogical = adapter->logdrv_chan[cmd->device->channel];
+ 		if( cmd->cmnd[0] == INQUIRY && !islogical ) {
+ 
+-			if( cmd->use_sg ) {
+-				sgl = (struct scatterlist *)
+-					cmd->request_buffer;
+-
++			sgl = scsi_sglist(cmd);
+ 				if( sgl->page ) {
+ 					c = *(unsigned char *)
+ 					page_address((&sgl[0])->page) +
+ 					(&sgl[0])->offset; 
+-				}
+-				else {
++			} else {
+ 					printk(KERN_WARNING
+ 						"megaraid: invalid sg.\n");
+ 					c = 0;
+ 				}
+-			}
+-			else {
+-				c = *(u8 *)cmd->request_buffer;
+-			}
+ 
+ 			if(IS_RAID_CH(adapter, cmd->device->channel) &&
+ 					((c & 0x1F ) == TYPE_DISK)) {
+@@ -1704,30 +1686,14 @@
+ static void
+ mega_free_scb(adapter_t *adapter, scb_t *scb)
+ {
+-	unsigned long length;
+-
+ 	switch( scb->dma_type ) {
+ 
+ 	case MEGA_DMA_TYPE_NONE:
+ 		break;
+ 
+-	case MEGA_BULK_DATA:
+-		if (scb->cmd->use_sg == 0)
+-			length = scb->cmd->request_bufflen;
+-		else {
+-			struct scatterlist *sgl =
+-				(struct scatterlist *)scb->cmd->request_buffer;
+-			length = sgl->length;
+-		}
+-		pci_unmap_page(adapter->dev, scb->dma_h_bulkdata,
+-			       length, scb->dma_direction);
+-		break;
+-
+ 	case MEGA_SGLIST:
+-		pci_unmap_sg(adapter->dev, scb->cmd->request_buffer,
+-			scb->cmd->use_sg, scb->dma_direction);
++		scsi_dma_unmap(scb->cmd);
+ 		break;
+-
+ 	default:
+ 		break;
+ 	}
+@@ -1767,80 +1733,33 @@
+ static int
+ mega_build_sglist(adapter_t *adapter, scb_t *scb, u32 *buf, u32 *len)
+ {
+-	struct scatterlist	*sgl;
+-	struct page	*page;
+-	unsigned long	offset;
+-	unsigned int	length;
++	struct scatterlist *sg;
+ 	Scsi_Cmnd	*cmd;
+ 	int	sgcnt;
+ 	int	idx;
+ 
+ 	cmd = scb->cmd;
+ 
+-	/* Scatter-gather not used */
+-	if( cmd->use_sg == 0 || (cmd->use_sg == 1 && 
+-				 !adapter->has_64bit_addr)) {
+-
+-		if (cmd->use_sg == 0) {
+-			page = virt_to_page(cmd->request_buffer);
+-			offset = offset_in_page(cmd->request_buffer);
+-			length = cmd->request_bufflen;
+-		} else {
+-			sgl = (struct scatterlist *)cmd->request_buffer;
+-			page = sgl->page;
+-			offset = sgl->offset;
+-			length = sgl->length;
+-		}
+-
+-		scb->dma_h_bulkdata = pci_map_page(adapter->dev,
+-						  page, offset,
+-						  length,
+-						  scb->dma_direction);
+-		scb->dma_type = MEGA_BULK_DATA;
+-
+-		/*
+-		 * We need to handle special 64-bit commands that need a
+-		 * minimum of 1 SG
+-		 */
+-		if( adapter->has_64bit_addr ) {
+-			scb->sgl64[0].address = scb->dma_h_bulkdata;
+-			scb->sgl64[0].length = length;
+-			*buf = (u32)scb->sgl_dma_addr;
+-			*len = (u32)length;
+-			return 1;
+-		}
+-		else {
+-			*buf = (u32)scb->dma_h_bulkdata;
+-			*len = (u32)length;
+-		}
+-		return 0;
+-	}
+-
+-	sgl = (struct scatterlist *)cmd->request_buffer;
+-
+ 	/*
+ 	 * Copy Scatter-Gather list info into controller structure.
+ 	 *
+ 	 * The number of sg elements returned must not exceed our limit
+ 	 */
+-	sgcnt = pci_map_sg(adapter->dev, sgl, cmd->use_sg,
+-			scb->dma_direction);
++	sgcnt = scsi_dma_map(cmd);
+ 
+ 	scb->dma_type = MEGA_SGLIST;
+ 
+-	BUG_ON(sgcnt > adapter->sglen);
++	BUG_ON(sgcnt > adapter->sglen || sgcnt < 0);
+ 
+ 	*len = 0;
+ 
+-	for( idx = 0; idx < sgcnt; idx++, sgl++ ) {
+-
+-		if( adapter->has_64bit_addr ) {
+-			scb->sgl64[idx].address = sg_dma_address(sgl);
+-			*len += scb->sgl64[idx].length = sg_dma_len(sgl);
+-		}
+-		else {
+-			scb->sgl[idx].address = sg_dma_address(sgl);
+-			*len += scb->sgl[idx].length = sg_dma_len(sgl);
++	scsi_for_each_sg(cmd, sg, sgcnt, idx) {
++		if (adapter->has_64bit_addr) {
++			scb->sgl64[idx].address = sg_dma_address(sg);
++			*len += scb->sgl64[idx].length = sg_dma_len(sg);
++		} else {
++			scb->sgl[idx].address = sg_dma_address(sg);
++			*len += scb->sgl[idx].length = sg_dma_len(sg);
+ 		}
+ 	}
+ 
+@@ -3571,7 +3490,7 @@
+ 			/*
+ 			 * The user passthru structure
+ 			 */
+-			upthru = (mega_passthru __user *)MBOX(uioc)->xferaddr;
++			upthru = (mega_passthru __user *)(unsigned long)MBOX(uioc)->xferaddr;
+ 
+ 			/*
+ 			 * Copy in the user passthru here.
+@@ -3623,7 +3542,7 @@
+ 				/*
+ 				 * Get the user data
+ 				 */
+-				if( copy_from_user(data, (char __user *)uxferaddr,
++				if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr,
+ 							pthru->dataxferlen) ) {
+ 					rval = (-EFAULT);
+ 					goto freemem_and_return;
+@@ -3649,7 +3568,7 @@
+ 			 * Is data going up-stream
+ 			 */
+ 			if( pthru->dataxferlen && (uioc.flags & UIOC_RD) ) {
+-				if( copy_to_user((char __user *)uxferaddr, data,
++				if( copy_to_user((char __user *)(unsigned long) uxferaddr, data,
+ 							pthru->dataxferlen) ) {
+ 					rval = (-EFAULT);
+ 				}
+@@ -3702,7 +3621,7 @@
+ 				/*
+ 				 * Get the user data
+ 				 */
+-				if( copy_from_user(data, (char __user *)uxferaddr,
++				if( copy_from_user(data, (char __user *)(unsigned long) uxferaddr,
+ 							uioc.xferlen) ) {
+ 
+ 					pci_free_consistent(pdev,
+@@ -3742,7 +3661,7 @@
+ 			 * Is data going up-stream
+ 			 */
+ 			if( uioc.xferlen && (uioc.flags & UIOC_RD) ) {
+-				if( copy_to_user((char __user *)uxferaddr, data,
++				if( copy_to_user((char __user *)(unsigned long) uxferaddr, data,
+ 							uioc.xferlen) ) {
+ 
+ 					rval = (-EFAULT);
+@@ -4494,7 +4413,7 @@
+ 	scmd->device = sdev;
+ 
+ 	scmd->device->host = adapter->host;
+-	scmd->request_buffer = (void *)scb;
++	scmd->host_scribble = (void *)scb;
+ 	scmd->cmnd[0] = MEGA_INTERNAL_CMD;
+ 
+ 	scb->state |= SCB_ACTIVE;
+diff -Nurb linux-2.6.22-570/drivers/scsi/mesh.c linux-2.6.22-try2/drivers/scsi/mesh.c
+--- linux-2.6.22-570/drivers/scsi/mesh.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/mesh.c	2007-12-19 15:29:23.000000000 -0500
+@@ -421,7 +421,7 @@
+ 		for (i = 0; i < cmd->cmd_len; ++i)
+ 			printk(" %x", cmd->cmnd[i]);
+ 		printk(" use_sg=%d buffer=%p bufflen=%u\n",
+-		       cmd->use_sg, cmd->request_buffer, cmd->request_bufflen);
++		       scsi_sg_count(cmd), scsi_sglist(cmd), scsi_bufflen(cmd));
+ 	}
+ #endif
+ 	if (ms->dma_started)
+@@ -602,13 +602,16 @@
+ 			cmd->result += (cmd->SCp.Message << 8);
+ 		if (DEBUG_TARGET(cmd)) {
+ 			printk(KERN_DEBUG "mesh_done: result = %x, data_ptr=%d, buflen=%d\n",
+-			       cmd->result, ms->data_ptr, cmd->request_bufflen);
++			       cmd->result, ms->data_ptr, scsi_bufflen(cmd));
++#if 0
++			/* needs to use sg? */
+ 			if ((cmd->cmnd[0] == 0 || cmd->cmnd[0] == 0x12 || cmd->cmnd[0] == 3)
+ 			    && cmd->request_buffer != 0) {
+ 				unsigned char *b = cmd->request_buffer;
+ 				printk(KERN_DEBUG "buffer = %x %x %x %x %x %x %x %x\n",
+ 				       b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+ 			}
++#endif
+ 		}
+ 		cmd->SCp.this_residual -= ms->data_ptr;
+ 		mesh_completed(ms, cmd);
+@@ -1265,15 +1268,18 @@
+ 	dcmds = ms->dma_cmds;
+ 	dtot = 0;
+ 	if (cmd) {
+-		cmd->SCp.this_residual = cmd->request_bufflen;
+-		if (cmd->use_sg > 0) {
+ 			int nseg;
++
++		cmd->SCp.this_residual = scsi_bufflen(cmd);
++
++		nseg = scsi_dma_map(cmd);
++		BUG_ON(nseg < 0);
++
++		if (nseg) {
+ 			total = 0;
+-			scl = (struct scatterlist *) cmd->request_buffer;
+ 			off = ms->data_ptr;
+-			nseg = pci_map_sg(ms->pdev, scl, cmd->use_sg,
+-					  cmd->sc_data_direction);
+-			for (i = 0; i <nseg; ++i, ++scl) {
++
++			scsi_for_each_sg(cmd, scl, nseg, i) {
+ 				u32 dma_addr = sg_dma_address(scl);
+ 				u32 dma_len = sg_dma_len(scl);
+ 				
+@@ -1292,16 +1298,6 @@
+ 				dtot += dma_len - off;
+ 				off = 0;
+ 			}
+-		} else if (ms->data_ptr < cmd->request_bufflen) {
+-			dtot = cmd->request_bufflen - ms->data_ptr;
+-			if (dtot > 0xffff)
+-				panic("mesh: transfer size >= 64k");
+-			st_le16(&dcmds->req_count, dtot);
+-			/* XXX Use pci DMA API here ... */
+-			st_le32(&dcmds->phy_addr,
+-				virt_to_phys(cmd->request_buffer) + ms->data_ptr);
+-			dcmds->xfer_status = 0;
+-			++dcmds;
+ 		}
+ 	}
+ 	if (dtot == 0) {
+@@ -1356,18 +1352,14 @@
+ 		dumplog(ms, ms->conn_tgt);
+ 		dumpslog(ms);
+ #endif /* MESH_DBG */
+-	} else if (cmd && cmd->request_bufflen != 0 &&
+-		   ms->data_ptr > cmd->request_bufflen) {
++	} else if (cmd && scsi_bufflen(cmd) &&
++		   ms->data_ptr > scsi_bufflen(cmd)) {
+ 		printk(KERN_DEBUG "mesh: target %d overrun, "
+ 		       "data_ptr=%x total=%x goes_out=%d\n",
+-		       ms->conn_tgt, ms->data_ptr, cmd->request_bufflen,
++		       ms->conn_tgt, ms->data_ptr, scsi_bufflen(cmd),
+ 		       ms->tgts[ms->conn_tgt].data_goes_out);
+ 	}
+-	if (cmd->use_sg != 0) {
+-		struct scatterlist *sg;
+-		sg = (struct scatterlist *)cmd->request_buffer;
+-		pci_unmap_sg(ms->pdev, sg, cmd->use_sg, cmd->sc_data_direction);
+-	}
++	scsi_dma_unmap(cmd);
+ 	ms->dma_started = 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.c linux-2.6.22-try2/drivers/scsi/mvme16x.c
+--- linux-2.6.22-570/drivers/scsi/mvme16x.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/mvme16x.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,78 +0,0 @@
+-/*
+- * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux.
+- *
+- * Based on work by Alan Hourihane
+- */
+-#include <linux/types.h>
+-#include <linux/mm.h>
+-#include <linux/blkdev.h>
+-
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
+-#include <asm/mvme16xhw.h>
+-#include <asm/irq.h>
+-
+-#include "scsi.h"
+-#include <scsi/scsi_host.h>
+-#include "53c7xx.h"
+-#include "mvme16x.h"
+-
+-#include<linux/stat.h>
+-
+-
+-int mvme16x_scsi_detect(struct scsi_host_template *tpnt)
+-{
+-    static unsigned char called = 0;
+-    int clock;
+-    long long options;
+-
+-    if (!MACH_IS_MVME16x)
+-		return 0;
+-    if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) {
+-	printk ("SCSI detection disabled, SCSI chip not present\n");
+-	return 0;
+-    }
+-    if (called)
+-	return 0;
+-
+-    tpnt->proc_name = "MVME16x";
+-
+-    options = OPTION_MEMORY_MAPPED|OPTION_DEBUG_TEST1|OPTION_INTFLY|OPTION_SYNCHRONOUS|OPTION_ALWAYS_SYNCHRONOUS|OPTION_DISCONNECT;
+-
+-    clock = 66000000;	/* 66MHz SCSI Clock */
+-
+-    ncr53c7xx_init(tpnt, 0, 710, (unsigned long)0xfff47000,
+-			0, MVME16x_IRQ_SCSI, DMA_NONE,
+-			options, clock);
+-    called = 1;
+-    return 1;
+-}
+-
+-static int mvme16x_scsi_release(struct Scsi_Host *shost)
+-{
+-	if (shost->irq)
+-		free_irq(shost->irq, NULL);
+-	if (shost->dma_channel != 0xff)
+-		free_dma(shost->dma_channel);
+-	if (shost->io_port && shost->n_io_port)
+-		release_region(shost->io_port, shost->n_io_port);
+-	scsi_unregister(shost);
+-	return 0;
+-}
+-
+-static struct scsi_host_template driver_template = {
+-	.name			= "MVME16x NCR53c710 SCSI",
+-	.detect			= mvme16x_scsi_detect,
+-	.release		= mvme16x_scsi_release,
+-	.queuecommand		= NCR53c7xx_queue_command,
+-	.abort			= NCR53c7xx_abort,
+-	.reset			= NCR53c7xx_reset,
+-	.can_queue		= 24,
+-	.this_id		= 7,
+-	.sg_tablesize		= 63,
+-	.cmd_per_lun		= 3,
+-	.use_clustering		= DISABLE_CLUSTERING
+-};
+-
+-
+-#include "scsi_module.c"
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x.h linux-2.6.22-try2/drivers/scsi/mvme16x.h
+--- linux-2.6.22-570/drivers/scsi/mvme16x.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/mvme16x.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,24 +0,0 @@
+-#ifndef MVME16x_SCSI_H
+-#define MVME16x_SCSI_H
+-
+-#include <linux/types.h>
+-
+-int mvme16x_scsi_detect(struct scsi_host_template *);
+-const char *NCR53c7x0_info(void);
+-int NCR53c7xx_queue_command(Scsi_Cmnd *, void (*done)(Scsi_Cmnd *));
+-int NCR53c7xx_abort(Scsi_Cmnd *);
+-int NCR53c7x0_release (struct Scsi_Host *);
+-int NCR53c7xx_reset(Scsi_Cmnd *, unsigned int);
+-void NCR53c7x0_intr(int irq, void *dev_id);
+-
+-#ifndef CMD_PER_LUN
+-#define CMD_PER_LUN 3
+-#endif
+-
+-#ifndef CAN_QUEUE
+-#define CAN_QUEUE 24
+-#endif
+-
+-#include <scsi/scsicam.h>
+-
+-#endif /* MVME16x_SCSI_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c linux-2.6.22-try2/drivers/scsi/mvme16x_scsi.c
+--- linux-2.6.22-570/drivers/scsi/mvme16x_scsi.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/mvme16x_scsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,158 @@
++/*
++ * Detection routine for the NCR53c710 based MVME16x SCSI Controllers for Linux.
++ *
++ * Based on work by Alan Hourihane
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/blkdev.h>
++#include <linux/device.h>
++#include <linux/platform_device.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/mvme16xhw.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_device.h>
++#include <scsi/scsi_transport.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("MVME16x NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++static struct scsi_host_template mvme16x_scsi_driver_template = {
++	.name			= "MVME16x NCR53c710 SCSI",
++	.proc_name		= "MVME16x",
++	.this_id		= 7,
++	.module			= THIS_MODULE,
++};
++
++static struct platform_device *mvme16x_scsi_device;
++
++static __devinit int
++mvme16x_probe(struct device *dev)
++{
++	struct Scsi_Host * host = NULL;
++	struct NCR_700_Host_Parameters *hostdata;
++
++	if (!MACH_IS_MVME16x)
++		goto out;
++
++	if (mvme16x_config & MVME16x_CONFIG_NO_SCSICHIP) {
++		printk(KERN_INFO "mvme16x-scsi: detection disabled, "
++				 "SCSI chip not present\n");
++		goto out;
++	}
++
++	hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++	if (hostdata == NULL) {
++		printk(KERN_ERR "mvme16x-scsi: "
++				"Failed to allocate host data\n");
++		goto out;
++	}
++	memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++	/* Fill in the required pieces of hostdata */
++	hostdata->base = (void __iomem *)0xfff47000UL;
++	hostdata->clock = 50;	/* XXX - depends on the CPU clock! */
++	hostdata->chip710 = 1;
++	hostdata->dmode_extra = DMODE_FC2;
++	hostdata->dcntl_extra = EA_710;
++	hostdata->ctest7_extra = CTEST7_TT1;
++
++	/* and register the chip */
++	host = NCR_700_detect(&mvme16x_scsi_driver_template, hostdata, dev);
++	if (!host) {
++		printk(KERN_ERR "mvme16x-scsi: No host detected; "
++				"board configuration problem?\n");
++		goto out_free;
++	}
++	host->this_id = 7;
++	host->base = 0xfff47000UL;
++	host->irq = MVME16x_IRQ_SCSI;
++	if (request_irq(host->irq, NCR_700_intr, 0, "mvme16x-scsi", host)) {
++		printk(KERN_ERR "mvme16x-scsi: request_irq failed\n");
++		goto out_put_host;
++	}
++
++	/* Enable scsi chip ints */
++	{
++		volatile unsigned long v;
++
++		/* Enable scsi interrupts at level 4 in PCCchip2 */
++		v = in_be32(0xfff4202c);
++		v = (v & ~0xff) | 0x10 | 4;
++		out_be32(0xfff4202c, v);
++	}
++
++	scsi_scan_host(host);
++
++	return 0;
++
++ out_put_host:
++	scsi_host_put(host);
++ out_free:
++	kfree(hostdata);
++ out:
++	return -ENODEV;
++}
++
++static __devexit int
++mvme16x_device_remove(struct device *dev)
++{
++	struct Scsi_Host *host = dev_to_shost(dev);
++	struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++	/* Disable scsi chip ints */
++	{
++		volatile unsigned long v;
++
++		v = in_be32(0xfff4202c);
++		v &= ~0x10;
++		out_be32(0xfff4202c, v);
++	}
++	scsi_remove_host(host);
++	NCR_700_release(host);
++	kfree(hostdata);
++	free_irq(host->irq, host);
++
++	return 0;
++}
++
++static struct device_driver mvme16x_scsi_driver = {
++	.name	= "mvme16x-scsi",
++	.bus	= &platform_bus_type,
++	.probe	= mvme16x_probe,
++	.remove	= __devexit_p(mvme16x_device_remove),
++};
++
++static int __init mvme16x_scsi_init(void)
++{
++	int err;
++
++	err = driver_register(&mvme16x_scsi_driver);
++	if (err)
++		return err;
++
++	mvme16x_scsi_device = platform_device_register_simple("mvme16x-scsi",
++							      -1, NULL, 0);
++	if (IS_ERR(mvme16x_scsi_device)) {
++		driver_unregister(&mvme16x_scsi_driver);
++		return PTR_ERR(mvme16x_scsi_device);
++	}
++
++	return 0;
++}
++
++static void __exit mvme16x_scsi_exit(void)
++{
++	platform_device_unregister(mvme16x_scsi_device);
++	driver_unregister(&mvme16x_scsi_driver);
++}
++
++module_init(mvme16x_scsi_init);
++module_exit(mvme16x_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/scsi/nsp32.c linux-2.6.22-try2/drivers/scsi/nsp32.c
+--- linux-2.6.22-570/drivers/scsi/nsp32.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/nsp32.c	2007-12-19 15:29:23.000000000 -0500
+@@ -49,10 +49,6 @@
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_ioctl.h>
+ 
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+-# include <linux/blk.h>
+-#endif
+-
+ #include "nsp32.h"
+ 
+ 
+@@ -199,17 +195,9 @@
+ static void __exit    exit_nsp32  (void);
+ 
+ /* struct struct scsi_host_template */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ static int         nsp32_proc_info   (struct Scsi_Host *, char *, char **, off_t, int, int);
+-#else
+-static int         nsp32_proc_info   (char *, char **, off_t, int, int, int);
+-#endif
+ 
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ static int         nsp32_detect      (struct pci_dev *pdev);
+-#else
+-static int         nsp32_detect      (struct scsi_host_template *);
+-#endif
+ static int         nsp32_queuecommand(struct scsi_cmnd *,
+ 		void (*done)(struct scsi_cmnd *));
+ static const char *nsp32_info        (struct Scsi_Host *);
+@@ -296,15 +284,7 @@
+ 	.eh_abort_handler       	= nsp32_eh_abort,
+ 	.eh_bus_reset_handler		= nsp32_eh_bus_reset,
+ 	.eh_host_reset_handler		= nsp32_eh_host_reset,
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,74))
+-	.detect				= nsp32_detect,
+-	.release			= nsp32_release,
+-#endif
+-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,2))
+-	.use_new_eh_code        	= 1,
+-#else
+ /*	.highmem_io			= 1, */
+-#endif
+ };
+ 
+ #include "nsp32_io.h"
+@@ -739,7 +719,7 @@
+ 	command = 0;
+ 	command |= (TRANSFER_GO | ALL_COUNTER_CLR);
+ 	if (data->trans_method & NSP32_TRANSFER_BUSMASTER) {
+-		if (SCpnt->request_bufflen > 0) {
++		if (scsi_bufflen(SCpnt) > 0) {
+ 			command |= BM_START;
+ 		}
+ 	} else if (data->trans_method & NSP32_TRANSFER_MMIO) {
+@@ -888,31 +868,28 @@
+ static int nsp32_setup_sg_table(struct scsi_cmnd *SCpnt)
+ {
+ 	nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata;
+-	struct scatterlist   *sgl;
++	struct scatterlist *sg;
+ 	nsp32_sgtable *sgt = data->cur_lunt->sglun->sgt;
+ 	int num, i;
+ 	u32_le l;
+ 
+-	if (SCpnt->request_bufflen == 0) {
+-		return TRUE;
+-	}
+-
+ 	if (sgt == NULL) {
+ 		nsp32_dbg(NSP32_DEBUG_SGLIST, "SGT == null");
+ 		return FALSE;
+ 	}
+ 
+-	if (SCpnt->use_sg) {
+-		sgl = (struct scatterlist *)SCpnt->request_buffer;
+-		num = pci_map_sg(data->Pci, sgl, SCpnt->use_sg,
+-				 SCpnt->sc_data_direction);
+-		for (i = 0; i < num; i++) {
++	num = scsi_dma_map(SCpnt);
++	if (!num)
++		return TRUE;
++	else if (num < 0)
++		return FALSE;
++	else {
++		scsi_for_each_sg(SCpnt, sg, num, i) {
+ 			/*
+ 			 * Build nsp32_sglist, substitute sg dma addresses.
+ 			 */
+-			sgt[i].addr = cpu_to_le32(sg_dma_address(sgl));
+-			sgt[i].len  = cpu_to_le32(sg_dma_len(sgl));
+-			sgl++;
++			sgt[i].addr = cpu_to_le32(sg_dma_address(sg));
++			sgt[i].len  = cpu_to_le32(sg_dma_len(sg));
+ 
+ 			if (le32_to_cpu(sgt[i].len) > 0x10000) {
+ 				nsp32_msg(KERN_ERR,
+@@ -929,23 +906,6 @@
+ 		/* set end mark */
+ 		l = le32_to_cpu(sgt[num-1].len);
+ 		sgt[num-1].len = cpu_to_le32(l | SGTEND);
+-
+-	} else {
+-		SCpnt->SCp.have_data_in	= pci_map_single(data->Pci,
+-			SCpnt->request_buffer, SCpnt->request_bufflen,
+-			SCpnt->sc_data_direction);
+-
+-		sgt[0].addr = cpu_to_le32(SCpnt->SCp.have_data_in);
+-		sgt[0].len  = cpu_to_le32(SCpnt->request_bufflen | SGTEND); /* set end mark */
+-
+-		if (SCpnt->request_bufflen > 0x10000) {
+-			nsp32_msg(KERN_ERR,
+-				  "can't transfer over 64KB at a time, size=0x%lx", SCpnt->request_bufflen);
+-			return FALSE;
+-		}
+-		nsp32_dbg(NSP32_DEBUG_SGLIST, "single : addr 0x%lx len=0x%lx",
+-			  le32_to_cpu(sgt[0].addr),
+-			  le32_to_cpu(sgt[0].len ));
+ 	}
+ 
+ 	return TRUE;
+@@ -962,7 +922,7 @@
+ 		  "enter. target: 0x%x LUN: 0x%x cmnd: 0x%x cmndlen: 0x%x "
+ 		  "use_sg: 0x%x reqbuf: 0x%lx reqlen: 0x%x",
+ 		  SCpnt->device->id, SCpnt->device->lun, SCpnt->cmnd[0], SCpnt->cmd_len,
+-		  SCpnt->use_sg, SCpnt->request_buffer, SCpnt->request_bufflen);
++		  scsi_sg_count(SCpnt), scsi_sglist(SCpnt), scsi_bufflen(SCpnt));
+ 
+ 	if (data->CurrentSC != NULL) {
+ 		nsp32_msg(KERN_ERR, "Currentsc != NULL. Cancel this command request");
+@@ -994,10 +954,10 @@
+ 	data->CurrentSC      = SCpnt;
+ 	SCpnt->SCp.Status    = CHECK_CONDITION;
+ 	SCpnt->SCp.Message   = 0;
+-	SCpnt->resid         = SCpnt->request_bufflen;
++	scsi_set_resid(SCpnt, scsi_bufflen(SCpnt));
+ 
+-	SCpnt->SCp.ptr		    = (char *) SCpnt->request_buffer;
+-	SCpnt->SCp.this_residual    = SCpnt->request_bufflen;
++	SCpnt->SCp.ptr		    = (char *)scsi_sglist(SCpnt);
++	SCpnt->SCp.this_residual    = scsi_bufflen(SCpnt);
+ 	SCpnt->SCp.buffer	    = NULL;
+ 	SCpnt->SCp.buffers_residual = 0;
+ 
+@@ -1210,13 +1170,9 @@
+ 	unsigned long flags;
+ 	int ret;
+ 	int handled = 0;
+-
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ 	struct Scsi_Host *host = data->Host;
++
+ 	spin_lock_irqsave(host->host_lock, flags);
+-#else
+-	spin_lock_irqsave(&io_request_lock, flags);
+-#endif
+ 
+ 	/*
+ 	 * IRQ check, then enable IRQ mask
+@@ -1312,7 +1268,7 @@
+ 		}
+ 
+ 		if ((auto_stat & DATA_IN_PHASE) &&
+-		    (SCpnt->resid > 0) &&
++		    (scsi_get_resid(SCpnt) > 0) &&
+ 		    ((nsp32_read2(base, FIFO_REST_CNT) & FIFO_REST_MASK) != 0)) {
+ 			printk( "auto+fifo\n");
+ 			//nsp32_pio_read(SCpnt);
+@@ -1333,7 +1289,7 @@
+ 			nsp32_dbg(NSP32_DEBUG_INTR, "SSACK=0x%lx", 
+ 				    nsp32_read4(base, SAVED_SACK_CNT));
+ 
+-			SCpnt->resid = 0; /* all data transfered! */
++			scsi_set_resid(SCpnt, 0); /* all data transfered! */
+ 		}
+ 
+ 		/*
+@@ -1480,11 +1436,7 @@
+ 	nsp32_write2(base, IRQ_CONTROL, 0);
+ 
+  out2:
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+ 	spin_unlock_irqrestore(host->host_lock, flags);
+-#else
+-	spin_unlock_irqrestore(&io_request_lock, flags);
+-#endif
+ 
+ 	nsp32_dbg(NSP32_DEBUG_INTR, "exit");
+ 
+@@ -1499,28 +1451,15 @@
+ 			nsp32_dbg(NSP32_DEBUG_PROC, "buffer=0x%p pos=0x%p length=%d %d\n", buffer, pos, length,  length - (pos - buffer));\
+ 		} \
+ 	} while(0)
+-static int nsp32_proc_info(
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) 
+-	struct Scsi_Host *host,
+-#endif
+-	char             *buffer,
+-	char            **start,
+-	off_t             offset,
+-	int               length,
+-#if !(LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) 
+-	int               hostno,
+-#endif
+-	int               inout)
++
++static int nsp32_proc_info(struct Scsi_Host *host, char *buffer, char **start,
++			   off_t offset, int length, int inout)
+ {
+ 	char             *pos = buffer;
+ 	int               thislength;
+ 	unsigned long     flags;
+ 	nsp32_hw_data    *data;
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) 
+ 	int               hostno;
+-#else
+-	struct Scsi_Host *host;
+-#endif
+ 	unsigned int      base;
+ 	unsigned char     mode_reg;
+ 	int               id, speed;
+@@ -1531,15 +1470,7 @@
+ 		return -EINVAL;
+ 	}
+ 
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73)) 
+ 	hostno = host->host_no;
+-#else
+-	/* search this HBA host */
+-	host = scsi_host_hn_get(hostno);
+-	if (host == NULL) {
+-		return -ESRCH;
+-	}
+-#endif
+ 	data = (nsp32_hw_data *)host->hostdata;
+ 	base = host->io_port;
+ 
+@@ -1626,25 +1557,8 @@
+ 	nsp32_hw_data *data = (nsp32_hw_data *)SCpnt->device->host->hostdata;
+ 	unsigned int   base = SCpnt->device->host->io_port;
+ 
+-	/*
+-	 * unmap pci
+-	 */
+-	if (SCpnt->request_bufflen == 0) {
+-		goto skip;
+-	}
+-
+-	if (SCpnt->use_sg) {
+-		pci_unmap_sg(data->Pci,
+-			     (struct scatterlist *)SCpnt->request_buffer,
+-			     SCpnt->use_sg, SCpnt->sc_data_direction);
+-	} else {
+-		pci_unmap_single(data->Pci,
+-				 (u32)SCpnt->SCp.have_data_in,
+-				 SCpnt->request_bufflen,
+-				 SCpnt->sc_data_direction);
+-	}
++	scsi_dma_unmap(SCpnt);
+ 
+- skip:
+ 	/*
+ 	 * clear TRANSFERCONTROL_BM_START
+ 	 */
+@@ -1800,7 +1714,7 @@
+ 		SCpnt->SCp.Message = 0;
+ 		nsp32_dbg(NSP32_DEBUG_BUSFREE, 
+ 			  "normal end stat=0x%x resid=0x%x\n",
+-			  SCpnt->SCp.Status, SCpnt->resid);
++			  SCpnt->SCp.Status, scsi_get_resid(SCpnt));
+ 		SCpnt->result = (DID_OK             << 16) |
+ 			        (SCpnt->SCp.Message <<  8) |
+ 			        (SCpnt->SCp.Status  <<  0);
+@@ -1844,7 +1758,7 @@
+ 	unsigned int          restlen, sentlen;
+ 	u32_le                len, addr;
+ 
+-	nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", SCpnt->resid);
++	nsp32_dbg(NSP32_DEBUG_SGLIST, "old resid=0x%x", scsi_get_resid(SCpnt));
+ 
+ 	/* adjust saved SACK count with 4 byte start address boundary */
+ 	s_sacklen -= le32_to_cpu(sgt[old_entry].addr) & 3;
+@@ -1888,12 +1802,12 @@
+ 	return;
+ 
+  last:
+-	if (SCpnt->resid < sentlen) {
++	if (scsi_get_resid(SCpnt) < sentlen) {
+ 		nsp32_msg(KERN_ERR, "resid underflow");
+ 	}
+ 
+-	SCpnt->resid -= sentlen;
+-	nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", SCpnt->resid);
++	scsi_set_resid(SCpnt, scsi_get_resid(SCpnt) - sentlen);
++	nsp32_dbg(NSP32_DEBUG_SGLIST, "new resid=0x%x", scsi_get_resid(SCpnt));
+ 
+ 	/* update hostdata and lun */
+ 
+@@ -2022,7 +1936,7 @@
+ 	transfer = 0;
+ 	transfer |= (TRANSFER_GO | ALL_COUNTER_CLR);
+ 	if (data->trans_method & NSP32_TRANSFER_BUSMASTER) {
+-		if (SCpnt->request_bufflen > 0) {
++		if (scsi_bufflen(SCpnt) > 0) {
+ 			transfer |= BM_START;
+ 		}
+ 	} else if (data->trans_method & NSP32_TRANSFER_MMIO) {
+@@ -2674,17 +2588,7 @@
+  *	0x900-0xbff: (map same 0x800-0x8ff I/O port image repeatedly)
+  *	0xc00-0xfff: CardBus status registers
+  */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+-#define DETECT_OK 0
+-#define DETECT_NG 1
+-#define PCIDEV    pdev
+ static int nsp32_detect(struct pci_dev *pdev)
+-#else
+-#define DETECT_OK 1
+-#define DETECT_NG 0
+-#define PCIDEV    (data->Pci)
+-static int nsp32_detect(struct scsi_host_template *sht)
+-#endif
+ {
+ 	struct Scsi_Host *host;	/* registered host structure */
+ 	struct resource  *res;
+@@ -2697,11 +2601,7 @@
+ 	/*
+ 	 * register this HBA as SCSI device
+ 	 */
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ 	host = scsi_host_alloc(&nsp32_template, sizeof(nsp32_hw_data));
+-#else
+-	host = scsi_register(sht, sizeof(nsp32_hw_data));
+-#endif
+ 	if (host == NULL) {
+ 		nsp32_msg (KERN_ERR, "failed to scsi register");
+ 		goto err;
+@@ -2719,9 +2619,6 @@
+ 	host->unique_id = data->BaseAddress;
+ 	host->n_io_port	= data->NumAddress;
+ 	host->base      = (unsigned long)data->MmioAddress;
+-#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,5,63))
+-	scsi_set_pci_device(host, PCIDEV);
+-#endif
+ 
+ 	data->Host      = host;
+ 	spin_lock_init(&(data->Lock));
+@@ -2776,7 +2673,7 @@
+ 	/*
+ 	 * setup DMA 
+ 	 */
+-	if (pci_set_dma_mask(PCIDEV, DMA_32BIT_MASK) != 0) {
++	if (pci_set_dma_mask(pdev, DMA_32BIT_MASK) != 0) {
+ 		nsp32_msg (KERN_ERR, "failed to set PCI DMA mask");
+ 		goto scsi_unregister;
+ 	}
+@@ -2784,7 +2681,7 @@
+ 	/*
+ 	 * allocate autoparam DMA resource.
+ 	 */
+-	data->autoparam = pci_alloc_consistent(PCIDEV, sizeof(nsp32_autoparam), &(data->auto_paddr));
++	data->autoparam = pci_alloc_consistent(pdev, sizeof(nsp32_autoparam), &(data->auto_paddr));
+ 	if (data->autoparam == NULL) {
+ 		nsp32_msg(KERN_ERR, "failed to allocate DMA memory");
+ 		goto scsi_unregister;
+@@ -2793,7 +2690,7 @@
+ 	/*
+ 	 * allocate scatter-gather DMA resource.
+ 	 */
+-	data->sg_list = pci_alloc_consistent(PCIDEV, NSP32_SG_TABLE_SIZE,
++	data->sg_list = pci_alloc_consistent(pdev, NSP32_SG_TABLE_SIZE,
+ 					     &(data->sg_paddr));
+ 	if (data->sg_list == NULL) {
+ 		nsp32_msg(KERN_ERR, "failed to allocate DMA memory");
+@@ -2883,16 +2780,14 @@
+ 		goto free_irq;
+         }
+ 
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+-	ret = scsi_add_host(host, &PCIDEV->dev);
++	ret = scsi_add_host(host, &pdev->dev);
+ 	if (ret) {
+ 		nsp32_msg(KERN_ERR, "failed to add scsi host");
+ 		goto free_region;
+ 	}
+ 	scsi_scan_host(host);
+-#endif
+-	pci_set_drvdata(PCIDEV, host);
+-	return DETECT_OK;
++	pci_set_drvdata(pdev, host);
++	return 0;
+ 
+  free_region:
+ 	release_region(host->io_port, host->n_io_port);
+@@ -2901,22 +2796,19 @@
+ 	free_irq(host->irq, data);
+ 
+  free_sg_list:
+-	pci_free_consistent(PCIDEV, NSP32_SG_TABLE_SIZE,
++	pci_free_consistent(pdev, NSP32_SG_TABLE_SIZE,
+ 			    data->sg_list, data->sg_paddr);
+ 
+  free_autoparam:
+-	pci_free_consistent(PCIDEV, sizeof(nsp32_autoparam),
++	pci_free_consistent(pdev, sizeof(nsp32_autoparam),
+ 			    data->autoparam, data->auto_paddr);
+ 	
+  scsi_unregister:
+ 	scsi_host_put(host);
+ 
+  err:
+-	return DETECT_NG;
++	return 1;
+ }
+-#undef DETECT_OK
+-#undef DETECT_NG
+-#undef PCIDEV
+ 
+ static int nsp32_release(struct Scsi_Host *host)
+ {
+@@ -3525,11 +3417,7 @@
+ 
+ 	pci_set_master(pdev);
+ 
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ 	ret = nsp32_detect(pdev);
+-#else
+-	ret = scsi_register_host(&nsp32_template);
+-#endif
+ 
+ 	nsp32_msg(KERN_INFO, "irq: %i mmio: %p+0x%lx slot: %s model: %s",
+ 		  pdev->irq,
+@@ -3544,25 +3432,17 @@
+ 
+ static void __devexit nsp32_remove(struct pci_dev *pdev)
+ {
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+ 	struct Scsi_Host *host = pci_get_drvdata(pdev);
+-#endif
+ 
+ 	nsp32_dbg(NSP32_DEBUG_REGISTER, "enter");
+ 
+-#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,73))
+         scsi_remove_host(host);
+ 
+ 	nsp32_release(host);
+ 
+ 	scsi_host_put(host);
+-#else
+-	scsi_unregister_host(&nsp32_template);	
+-#endif
+ }
+ 
+-
+-
+ static struct pci_driver nsp32_driver = {
+ 	.name		= "nsp32",
+ 	.id_table	= nsp32_pci_table,
+diff -Nurb linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c linux-2.6.22-try2/drivers/scsi/pcmcia/sym53c500_cs.c
+--- linux-2.6.22-570/drivers/scsi/pcmcia/sym53c500_cs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/pcmcia/sym53c500_cs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -370,8 +370,6 @@
+ 	DEB(unsigned char seq_reg;)
+ 	unsigned char status, int_reg;
+ 	unsigned char pio_status;
+-	struct scatterlist *sglist;
+-	unsigned int sgcount;
+ 	int port_base = dev->io_port;
+ 	struct sym53c500_data *data =
+ 	    (struct sym53c500_data *)dev->hostdata;
+@@ -434,20 +432,19 @@
+ 	switch (status & 0x07) {	/* scsi phase */
+ 	case 0x00:			/* DATA-OUT */
+ 		if (int_reg & 0x10) {	/* Target requesting info transfer */
++			struct scatterlist *sg;
++			int i;
++
+ 			curSC->SCp.phase = data_out;
+ 			VDEB(printk("SYM53C500: Data-Out phase\n"));
+ 			outb(FLUSH_FIFO, port_base + CMD_REG);
+-			LOAD_DMA_COUNT(port_base, curSC->request_bufflen);	/* Max transfer size */
++			LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC));	/* Max transfer size */
+ 			outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG);
+-			if (!curSC->use_sg)	/* Don't use scatter-gather */
+-				SYM53C500_pio_write(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen);
+-			else {	/* use scatter-gather */
+-				sgcount = curSC->use_sg;
+-				sglist = curSC->request_buffer;
+-				while (sgcount--) {
+-					SYM53C500_pio_write(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length);
+-					sglist++;
+-				}
++
++			scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
++				SYM53C500_pio_write(fast_pio, port_base,
++						    page_address(sg->page) + sg->offset,
++						    sg->length);
+ 			}
+ 			REG0(port_base);
+ 		}
+@@ -455,20 +452,19 @@
+ 
+ 	case 0x01:		/* DATA-IN */
+ 		if (int_reg & 0x10) {	/* Target requesting info transfer */
++			struct scatterlist *sg;
++			int i;
++
+ 			curSC->SCp.phase = data_in;
+ 			VDEB(printk("SYM53C500: Data-In phase\n"));
+ 			outb(FLUSH_FIFO, port_base + CMD_REG);
+-			LOAD_DMA_COUNT(port_base, curSC->request_bufflen);	/* Max transfer size */
++			LOAD_DMA_COUNT(port_base, scsi_bufflen(curSC));	/* Max transfer size */
+ 			outb(TRANSFER_INFO | DMA_OP, port_base + CMD_REG);
+-			if (!curSC->use_sg)	/* Don't use scatter-gather */
+-				SYM53C500_pio_read(fast_pio, port_base, curSC->request_buffer, curSC->request_bufflen);
+-			else {	/* Use scatter-gather */
+-				sgcount = curSC->use_sg;
+-				sglist = curSC->request_buffer;
+-				while (sgcount--) {
+-					SYM53C500_pio_read(fast_pio, port_base, page_address(sglist->page) + sglist->offset, sglist->length);
+-					sglist++;
+-				}
++
++			scsi_for_each_sg(curSC, sg, scsi_sg_count(curSC), i) {
++				SYM53C500_pio_read(fast_pio, port_base,
++						   page_address(sg->page) + sg->offset,
++						   sg->length);
+ 			}
+ 			REG0(port_base);
+ 		}
+@@ -578,7 +574,7 @@
+ 
+ 	DEB(printk("cmd=%02x, cmd_len=%02x, target=%02x, lun=%02x, bufflen=%d\n", 
+ 	    SCpnt->cmnd[0], SCpnt->cmd_len, SCpnt->device->id, 
+-	    SCpnt->device->lun,  SCpnt->request_bufflen));
++	    SCpnt->device->lun,  scsi_bufflen(SCpnt)));
+ 
+ 	VDEB(for (i = 0; i < SCpnt->cmd_len; i++)
+ 	    printk("cmd[%d]=%02x  ", i, SCpnt->cmnd[i]));
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_attr.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_attr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_attr.c	2007-12-19 15:29:22.000000000 -0500
+@@ -11,8 +11,9 @@
+ /* SYSFS attributes --------------------------------------------------------- */
+ 
+ static ssize_t
+-qla2x00_sysfs_read_fw_dump(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
++			   struct bin_attribute *bin_attr,
++			   char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -31,8 +32,9 @@
+ }
+ 
+ static ssize_t
+-qla2x00_sysfs_write_fw_dump(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
++			    struct bin_attribute *bin_attr,
++			    char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -73,7 +75,6 @@
+ 	.attr = {
+ 		.name = "fw_dump",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 0,
+ 	.read = qla2x00_sysfs_read_fw_dump,
+@@ -81,8 +82,9 @@
+ };
+ 
+ static ssize_t
+-qla2x00_sysfs_read_nvram(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_read_nvram(struct kobject *kobj,
++			 struct bin_attribute *bin_attr,
++			 char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -101,8 +103,9 @@
+ }
+ 
+ static ssize_t
+-qla2x00_sysfs_write_nvram(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_write_nvram(struct kobject *kobj,
++			  struct bin_attribute *bin_attr,
++			  char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -149,7 +152,6 @@
+ 	.attr = {
+ 		.name = "nvram",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 512,
+ 	.read = qla2x00_sysfs_read_nvram,
+@@ -157,8 +159,9 @@
+ };
+ 
+ static ssize_t
+-qla2x00_sysfs_read_optrom(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_read_optrom(struct kobject *kobj,
++			  struct bin_attribute *bin_attr,
++			  char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -176,8 +179,9 @@
+ }
+ 
+ static ssize_t
+-qla2x00_sysfs_write_optrom(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_write_optrom(struct kobject *kobj,
++			   struct bin_attribute *bin_attr,
++			   char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -198,7 +202,6 @@
+ 	.attr = {
+ 		.name = "optrom",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = OPTROM_SIZE_24XX,
+ 	.read = qla2x00_sysfs_read_optrom,
+@@ -206,8 +209,9 @@
+ };
+ 
+ static ssize_t
+-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -279,15 +283,15 @@
+ 	.attr = {
+ 		.name = "optrom_ctl",
+ 		.mode = S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 0,
+ 	.write = qla2x00_sysfs_write_optrom_ctl,
+ };
+ 
+ static ssize_t
+-qla2x00_sysfs_read_vpd(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_read_vpd(struct kobject *kobj,
++		       struct bin_attribute *bin_attr,
++		       char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -305,8 +309,9 @@
+ }
+ 
+ static ssize_t
+-qla2x00_sysfs_write_vpd(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_write_vpd(struct kobject *kobj,
++			struct bin_attribute *bin_attr,
++			char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -327,7 +332,6 @@
+ 	.attr = {
+ 		.name = "vpd",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = 0,
+ 	.read = qla2x00_sysfs_read_vpd,
+@@ -335,8 +339,9 @@
+ };
+ 
+ static ssize_t
+-qla2x00_sysfs_read_sfp(struct kobject *kobj, char *buf, loff_t off,
+-    size_t count)
++qla2x00_sysfs_read_sfp(struct kobject *kobj,
++		       struct bin_attribute *bin_attr,
++		       char *buf, loff_t off, size_t count)
+ {
+ 	struct scsi_qla_host *ha = to_qla_host(dev_to_shost(container_of(kobj,
+ 	    struct device, kobj)));
+@@ -375,7 +380,6 @@
+ 	.attr = {
+ 		.name = "sfp",
+ 		.mode = S_IRUSR | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = SFP_DEV_SIZE * 2,
+ 	.read = qla2x00_sysfs_read_sfp,
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_dbg.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_dbg.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_dbg.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1411,9 +1411,9 @@
+ 		printk("0x%02x ", cmd->cmnd[i]);
+ 	}
+ 	printk("\n  seg_cnt=%d, allowed=%d, retries=%d\n",
+-	    cmd->use_sg, cmd->allowed, cmd->retries);
++	       scsi_sg_count(cmd), cmd->allowed, cmd->retries);
+ 	printk("  request buffer=0x%p, request buffer len=0x%x\n",
+-	    cmd->request_buffer, cmd->request_bufflen);
++	       scsi_sglist(cmd), scsi_bufflen(cmd));
+ 	printk("  tag=%d, transfersize=0x%x\n",
+ 	    cmd->tag, cmd->transfersize);
+ 	printk("  serial_number=%lx, SP=%p\n", cmd->serial_number, sp);
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_iocb.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_iocb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_iocb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -155,6 +155,8 @@
+ 	uint32_t	*cur_dsd;
+ 	scsi_qla_host_t	*ha;
+ 	struct scsi_cmnd *cmd;
++	struct scatterlist *sg;
++	int i;
+ 
+ 	cmd = sp->cmd;
+ 
+@@ -163,7 +165,7 @@
+ 	    __constant_cpu_to_le32(COMMAND_TYPE);
+ 
+ 	/* No data transfer */
+-	if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ 		cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ 		return;
+ 	}
+@@ -177,13 +179,8 @@
+ 	cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+ 
+ 	/* Load data segments */
+-	if (cmd->use_sg != 0) {
+-		struct	scatterlist *cur_seg;
+-		struct	scatterlist *end_seg;
+-
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		end_seg = cur_seg + tot_dsds;
+-		while (cur_seg < end_seg) {
++
++	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ 			cont_entry_t	*cont_pkt;
+ 
+ 			/* Allocate additional continuation packets? */
+@@ -197,15 +194,9 @@
+ 				avail_dsds = 7;
+ 			}
+ 
+-			*cur_dsd++ = cpu_to_le32(sg_dma_address(cur_seg));
+-			*cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++		*cur_dsd++ = cpu_to_le32(sg_dma_address(sg));
++		*cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ 			avail_dsds--;
+-
+-			cur_seg++;
+-		}
+-	} else {
+-		*cur_dsd++ = cpu_to_le32(sp->dma_handle);
+-		*cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ 	}
+ }
+ 
+@@ -224,6 +215,8 @@
+ 	uint32_t	*cur_dsd;
+ 	scsi_qla_host_t	*ha;
+ 	struct scsi_cmnd *cmd;
++	struct scatterlist *sg;
++	int i;
+ 
+ 	cmd = sp->cmd;
+ 
+@@ -232,7 +225,7 @@
+ 	    __constant_cpu_to_le32(COMMAND_A64_TYPE);
+ 
+ 	/* No data transfer */
+-	if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ 		cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ 		return;
+ 	}
+@@ -246,13 +239,7 @@
+ 	cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+ 
+ 	/* Load data segments */
+-	if (cmd->use_sg != 0) {
+-		struct	scatterlist *cur_seg;
+-		struct	scatterlist *end_seg;
+-
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		end_seg = cur_seg + tot_dsds;
+-		while (cur_seg < end_seg) {
++	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ 			dma_addr_t	sle_dma;
+ 			cont_a64_entry_t *cont_pkt;
+ 
+@@ -267,18 +254,11 @@
+ 				avail_dsds = 5;
+ 			}
+ 
+-			sle_dma = sg_dma_address(cur_seg);
++		sle_dma = sg_dma_address(sg);
+ 			*cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+ 			*cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+-			*cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++		*cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ 			avail_dsds--;
+-
+-			cur_seg++;
+-		}
+-	} else {
+-		*cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle));
+-		*cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle));
+-		*cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ 	}
+ }
+ 
+@@ -291,7 +271,7 @@
+ int
+ qla2x00_start_scsi(srb_t *sp)
+ {
+-	int		ret;
++	int		ret, nseg;
+ 	unsigned long   flags;
+ 	scsi_qla_host_t	*ha;
+ 	struct scsi_cmnd *cmd;
+@@ -299,7 +279,6 @@
+ 	uint32_t        index;
+ 	uint32_t	handle;
+ 	cmd_entry_t	*cmd_pkt;
+-	struct scatterlist *sg;
+ 	uint16_t	cnt;
+ 	uint16_t	req_cnt;
+ 	uint16_t	tot_dsds;
+@@ -337,23 +316,10 @@
+ 		goto queuing_error;
+ 
+ 	/* Map the sg table so we have an accurate count of sg entries needed */
+-	if (cmd->use_sg) {
+-		sg = (struct scatterlist *) cmd->request_buffer;
+-		tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+-		    cmd->sc_data_direction);
+-		if (tot_dsds == 0)
++	nseg = scsi_dma_map(cmd);
++	if (nseg < 0)
+ 			goto queuing_error;
+-	} else if (cmd->request_bufflen) {
+-		dma_addr_t	req_dma;
+-
+-		req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+-		    cmd->request_bufflen, cmd->sc_data_direction);
+-		if (dma_mapping_error(req_dma))
+-			goto queuing_error;
+-
+-		sp->dma_handle = req_dma;
+-		tot_dsds = 1;
+-	}
++	tot_dsds = nseg;
+ 
+ 	/* Calculate the number of request entries needed. */
+ 	req_cnt = ha->isp_ops.calc_req_entries(tot_dsds);
+@@ -391,7 +357,7 @@
+ 
+ 	/* Load SCSI command packet. */
+ 	memcpy(cmd_pkt->scsi_cdb, cmd->cmnd, cmd->cmd_len);
+-	cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen);
++	cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+ 
+ 	/* Build IOCB segments */
+ 	ha->isp_ops.build_iocbs(sp, cmd_pkt, tot_dsds);
+@@ -423,14 +389,9 @@
+ 	return (QLA_SUCCESS);
+ 
+ queuing_error:
+-	if (cmd->use_sg && tot_dsds) {
+-		sg = (struct scatterlist *) cmd->request_buffer;
+-		pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+-		    cmd->sc_data_direction);
+-	} else if (tot_dsds) {
+-		pci_unmap_single(ha->pdev, sp->dma_handle,
+-		    cmd->request_bufflen, cmd->sc_data_direction);
+-	}
++	if (tot_dsds)
++		scsi_dma_unmap(cmd);
++
+ 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+ 
+ 	return (QLA_FUNCTION_FAILED);
+@@ -642,6 +603,8 @@
+ 	uint32_t	*cur_dsd;
+ 	scsi_qla_host_t	*ha;
+ 	struct scsi_cmnd *cmd;
++	struct scatterlist *sg;
++	int i;
+ 
+ 	cmd = sp->cmd;
+ 
+@@ -650,7 +613,7 @@
+ 	    __constant_cpu_to_le32(COMMAND_TYPE_7);
+ 
+ 	/* No data transfer */
+-	if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ 		cmd_pkt->byte_count = __constant_cpu_to_le32(0);
+ 		return;
+ 	}
+@@ -670,13 +633,8 @@
+ 	cur_dsd = (uint32_t *)&cmd_pkt->dseg_0_address;
+ 
+ 	/* Load data segments */
+-	if (cmd->use_sg != 0) {
+-		struct	scatterlist *cur_seg;
+-		struct	scatterlist *end_seg;
+-
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		end_seg = cur_seg + tot_dsds;
+-		while (cur_seg < end_seg) {
++
++	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ 			dma_addr_t	sle_dma;
+ 			cont_a64_entry_t *cont_pkt;
+ 
+@@ -691,18 +649,11 @@
+ 				avail_dsds = 5;
+ 			}
+ 
+-			sle_dma = sg_dma_address(cur_seg);
++		sle_dma = sg_dma_address(sg);
+ 			*cur_dsd++ = cpu_to_le32(LSD(sle_dma));
+ 			*cur_dsd++ = cpu_to_le32(MSD(sle_dma));
+-			*cur_dsd++ = cpu_to_le32(sg_dma_len(cur_seg));
++		*cur_dsd++ = cpu_to_le32(sg_dma_len(sg));
+ 			avail_dsds--;
+-
+-			cur_seg++;
+-		}
+-	} else {
+-		*cur_dsd++ = cpu_to_le32(LSD(sp->dma_handle));
+-		*cur_dsd++ = cpu_to_le32(MSD(sp->dma_handle));
+-		*cur_dsd++ = cpu_to_le32(cmd->request_bufflen);
+ 	}
+ }
+ 
+@@ -716,7 +667,7 @@
+ int
+ qla24xx_start_scsi(srb_t *sp)
+ {
+-	int		ret;
++	int		ret, nseg;
+ 	unsigned long   flags;
+ 	scsi_qla_host_t	*ha;
+ 	struct scsi_cmnd *cmd;
+@@ -724,7 +675,6 @@
+ 	uint32_t        index;
+ 	uint32_t	handle;
+ 	struct cmd_type_7 *cmd_pkt;
+-	struct scatterlist *sg;
+ 	uint16_t	cnt;
+ 	uint16_t	req_cnt;
+ 	uint16_t	tot_dsds;
+@@ -762,23 +712,10 @@
+ 		goto queuing_error;
+ 
+ 	/* Map the sg table so we have an accurate count of sg entries needed */
+-	if (cmd->use_sg) {
+-		sg = (struct scatterlist *) cmd->request_buffer;
+-		tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+-		    cmd->sc_data_direction);
+-		if (tot_dsds == 0)
+-			goto queuing_error;
+-	} else if (cmd->request_bufflen) {
+-		dma_addr_t      req_dma;
+-
+-		req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+-		    cmd->request_bufflen, cmd->sc_data_direction);
+-		if (dma_mapping_error(req_dma))
++	nseg = scsi_dma_map(cmd);
++	if (nseg < 0)
+ 			goto queuing_error;
+-
+-		sp->dma_handle = req_dma;
+-		tot_dsds = 1;
+-	}
++	tot_dsds = nseg;
+ 
+ 	req_cnt = qla24xx_calc_iocbs(tot_dsds);
+ 	if (ha->req_q_cnt < (req_cnt + 2)) {
+@@ -821,7 +758,7 @@
+ 	memcpy(cmd_pkt->fcp_cdb, cmd->cmnd, cmd->cmd_len);
+ 	host_to_fcp_swap(cmd_pkt->fcp_cdb, sizeof(cmd_pkt->fcp_cdb));
+ 
+-	cmd_pkt->byte_count = cpu_to_le32((uint32_t)cmd->request_bufflen);
++	cmd_pkt->byte_count = cpu_to_le32((uint32_t)scsi_bufflen(cmd));
+ 
+ 	/* Build IOCB segments */
+ 	qla24xx_build_scsi_iocbs(sp, cmd_pkt, tot_dsds);
+@@ -853,14 +790,9 @@
+ 	return QLA_SUCCESS;
+ 
+ queuing_error:
+-	if (cmd->use_sg && tot_dsds) {
+-		sg = (struct scatterlist *) cmd->request_buffer;
+-		pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+-		    cmd->sc_data_direction);
+-	} else if (tot_dsds) {
+-		pci_unmap_single(ha->pdev, sp->dma_handle,
+-		    cmd->request_bufflen, cmd->sc_data_direction);
+-	}
++	if (tot_dsds)
++		scsi_dma_unmap(cmd);
++
+ 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+ 
+ 	return QLA_FUNCTION_FAILED;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_isr.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_isr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_isr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -889,11 +889,11 @@
+ 		}
+ 		if (scsi_status & (SS_RESIDUAL_UNDER | SS_RESIDUAL_OVER)) {
+ 			resid = resid_len;
+-			cp->resid = resid;
++			scsi_set_resid(cp, resid);
+ 			CMD_RESID_LEN(cp) = resid;
+ 
+ 			if (!lscsi_status &&
+-			    ((unsigned)(cp->request_bufflen - resid) <
++			    ((unsigned)(scsi_bufflen(cp) - resid) <
+ 			     cp->underflow)) {
+ 				qla_printk(KERN_INFO, ha,
+ 				    "scsi(%ld:%d:%d:%d): Mid-layer underflow "
+@@ -901,7 +901,7 @@
+ 				    "error status.\n", ha->host_no,
+ 				    cp->device->channel, cp->device->id,
+ 				    cp->device->lun, resid,
+-				    cp->request_bufflen);
++					   scsi_bufflen(cp));
+ 
+ 				cp->result = DID_ERROR << 16;
+ 				break;
+@@ -963,7 +963,7 @@
+ 			resid = fw_resid_len;
+ 
+ 		if (scsi_status & SS_RESIDUAL_UNDER) {
+-			cp->resid = resid;
++			scsi_set_resid(cp, resid);
+ 			CMD_RESID_LEN(cp) = resid;
+ 		} else {
+ 			DEBUG2(printk(KERN_INFO
+@@ -1046,14 +1046,14 @@
+ 				    "retrying command.\n", ha->host_no,
+ 				    cp->device->channel, cp->device->id,
+ 				    cp->device->lun, resid,
+-				    cp->request_bufflen));
++					      scsi_bufflen(cp)));
+ 
+ 				cp->result = DID_BUS_BUSY << 16;
+ 				break;
+ 			}
+ 
+ 			/* Handle mid-layer underflow */
+-			if ((unsigned)(cp->request_bufflen - resid) <
++			if ((unsigned)(scsi_bufflen(cp) - resid) <
+ 			    cp->underflow) {
+ 				qla_printk(KERN_INFO, ha,
+ 				    "scsi(%ld:%d:%d:%d): Mid-layer underflow "
+@@ -1061,7 +1061,7 @@
+ 				    "error status.\n", ha->host_no,
+ 				    cp->device->channel, cp->device->id,
+ 				    cp->device->lun, resid,
+-				    cp->request_bufflen);
++					   scsi_bufflen(cp));
+ 
+ 				cp->result = DID_ERROR << 16;
+ 				break;
+@@ -1084,7 +1084,7 @@
+ 		DEBUG2(printk(KERN_INFO
+ 		    "PID=0x%lx req=0x%x xtra=0x%x -- returning DID_ERROR "
+ 		    "status!\n",
+-		    cp->serial_number, cp->request_bufflen, resid_len));
++		    cp->serial_number, scsi_bufflen(cp), resid_len));
+ 
+ 		cp->result = DID_ERROR << 16;
+ 		break;
+@@ -1633,7 +1633,7 @@
+ 	uint16_t entry;
+ 	uint16_t index;
+ 	const char *name;
+-	irqreturn_t (*handler)(int, void *);
++	irq_handler_t handler;
+ };
+ 
+ static struct qla_init_msix_entry imsix_entries[QLA_MSIX_ENTRIES] = {
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_os.c
+--- linux-2.6.22-570/drivers/scsi/qla2xxx/qla_os.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla2xxx/qla_os.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2426,13 +2426,7 @@
+ 	struct scsi_cmnd *cmd = sp->cmd;
+ 
+ 	if (sp->flags & SRB_DMA_VALID) {
+-		if (cmd->use_sg) {
+-			dma_unmap_sg(&ha->pdev->dev, cmd->request_buffer,
+-			    cmd->use_sg, cmd->sc_data_direction);
+-		} else if (cmd->request_bufflen) {
+-			dma_unmap_single(&ha->pdev->dev, sp->dma_handle,
+-			    cmd->request_bufflen, cmd->sc_data_direction);
+-		}
++		scsi_dma_unmap(cmd);
+ 		sp->flags &= ~SRB_DMA_VALID;
+ 	}
+ 	CMD_SP(cmd) = NULL;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_dbg.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_dbg.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_dbg.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,176 +6,9 @@
+  */
+ 
+ #include "ql4_def.h"
+-#include <scsi/scsi_dbg.h>
+-
+-#if 0
+-
+-static void qla4xxx_print_srb_info(struct srb * srb)
+-{
+-	printk("%s: srb = 0x%p, flags=0x%02x\n", __func__, srb, srb->flags);
+-	printk("%s: cmd = 0x%p, saved_dma_handle = 0x%lx\n",
+-	       __func__, srb->cmd, (unsigned long) srb->dma_handle);
+-	printk("%s: fw_ddb_index = %d, lun = %d\n",
+-	       __func__, srb->fw_ddb_index, srb->cmd->device->lun);
+-	printk("%s: iocb_tov = %d\n",
+-	       __func__, srb->iocb_tov);
+-	printk("%s: cc_stat = 0x%x, r_start = 0x%lx, u_start = 0x%lx\n\n",
+-	       __func__, srb->cc_stat, srb->r_start, srb->u_start);
+-}
+-
+-void qla4xxx_print_scsi_cmd(struct scsi_cmnd *cmd)
+-{
+-	printk("SCSI Command = 0x%p, Handle=0x%p\n", cmd, cmd->host_scribble);
+-	printk("  b=%d, t=%02xh, l=%02xh, cmd_len = %02xh\n",
+-	       cmd->device->channel, cmd->device->id, cmd->device->lun,
+-	       cmd->cmd_len);
+-	scsi_print_command(cmd);
+-	printk("  seg_cnt = %d\n", cmd->use_sg);
+-	printk("  request buffer = 0x%p, request buffer len = 0x%x\n",
+-	       cmd->request_buffer, cmd->request_bufflen);
+-	if (cmd->use_sg) {
+-		struct scatterlist *sg;
+-		sg = (struct scatterlist *)cmd->request_buffer;
+-		printk("  SG buffer: \n");
+-		qla4xxx_dump_buffer((caddr_t) sg,
+-				    (cmd->use_sg * sizeof(*sg)));
+-	}
+-	printk("  tag = %d, transfersize = 0x%x \n", cmd->tag,
+-	       cmd->transfersize);
+-	printk("  Pid = %d, SP = 0x%p\n", (int)cmd->pid, cmd->SCp.ptr);
+-	printk("  underflow size = 0x%x, direction=0x%x\n", cmd->underflow,
+-	       cmd->sc_data_direction);
+-	printk("  Current time (jiffies) = 0x%lx, "
+-	       "timeout expires = 0x%lx\n", jiffies, cmd->eh_timeout.expires);
+-	qla4xxx_print_srb_info((struct srb *) cmd->SCp.ptr);
+-}
+-
+-void __dump_registers(struct scsi_qla_host *ha)
+-{
+-	uint8_t i;
+-	for (i = 0; i < MBOX_REG_COUNT; i++) {
+-		printk(KERN_INFO "0x%02X mailbox[%d]	  = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, mailbox[i]), i,
+-		       readw(&ha->reg->mailbox[i]));
+-	}
+-	printk(KERN_INFO "0x%02X flash_address	 = 0x%08X\n",
+-	       (uint8_t) offsetof(struct isp_reg, flash_address),
+-	       readw(&ha->reg->flash_address));
+-	printk(KERN_INFO "0x%02X flash_data	 = 0x%08X\n",
+-	       (uint8_t) offsetof(struct isp_reg, flash_data),
+-	       readw(&ha->reg->flash_data));
+-	printk(KERN_INFO "0x%02X ctrl_status	 = 0x%08X\n",
+-	       (uint8_t) offsetof(struct isp_reg, ctrl_status),
+-	       readw(&ha->reg->ctrl_status));
+-	if (is_qla4010(ha)) {
+-		printk(KERN_INFO "0x%02X nvram		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, u1.isp4010.nvram),
+-		       readw(&ha->reg->u1.isp4010.nvram));
+-	}
+-
+-	else if (is_qla4022(ha) | is_qla4032(ha)) {
+-		printk(KERN_INFO "0x%02X intr_mask	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u1.isp4022.intr_mask),
+-		       readw(&ha->reg->u1.isp4022.intr_mask));
+-		printk(KERN_INFO "0x%02X nvram		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, u1.isp4022.nvram),
+-		       readw(&ha->reg->u1.isp4022.nvram));
+-		printk(KERN_INFO "0x%02X semaphore	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u1.isp4022.semaphore),
+-		       readw(&ha->reg->u1.isp4022.semaphore));
+-	}
+-	printk(KERN_INFO "0x%02X req_q_in	 = 0x%08X\n",
+-	       (uint8_t) offsetof(struct isp_reg, req_q_in),
+-	       readw(&ha->reg->req_q_in));
+-	printk(KERN_INFO "0x%02X rsp_q_out	 = 0x%08X\n",
+-	       (uint8_t) offsetof(struct isp_reg, rsp_q_out),
+-	       readw(&ha->reg->rsp_q_out));
+-	if (is_qla4010(ha)) {
+-		printk(KERN_INFO "0x%02X ext_hw_conf	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4010.ext_hw_conf),
+-		       readw(&ha->reg->u2.isp4010.ext_hw_conf));
+-		printk(KERN_INFO "0x%02X port_ctrl	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4010.port_ctrl),
+-		       readw(&ha->reg->u2.isp4010.port_ctrl));
+-		printk(KERN_INFO "0x%02X port_status	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4010.port_status),
+-		       readw(&ha->reg->u2.isp4010.port_status));
+-		printk(KERN_INFO "0x%02X req_q_out	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4010.req_q_out),
+-		       readw(&ha->reg->u2.isp4010.req_q_out));
+-		printk(KERN_INFO "0x%02X gp_out		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_out),
+-		       readw(&ha->reg->u2.isp4010.gp_out));
+-		printk(KERN_INFO "0x%02X gp_in		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, u2.isp4010.gp_in),
+-		       readw(&ha->reg->u2.isp4010.gp_in));
+-		printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4010.port_err_status),
+-		       readw(&ha->reg->u2.isp4010.port_err_status));
+-	}
+-
+-	else if (is_qla4022(ha) | is_qla4032(ha)) {
+-		printk(KERN_INFO "Page 0 Registers:\n");
+-		printk(KERN_INFO "0x%02X ext_hw_conf	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p0.ext_hw_conf),
+-		       readw(&ha->reg->u2.isp4022.p0.ext_hw_conf));
+-		printk(KERN_INFO "0x%02X port_ctrl	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p0.port_ctrl),
+-		       readw(&ha->reg->u2.isp4022.p0.port_ctrl));
+-		printk(KERN_INFO "0x%02X port_status	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p0.port_status),
+-		       readw(&ha->reg->u2.isp4022.p0.port_status));
+-		printk(KERN_INFO "0x%02X gp_out		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p0.gp_out),
+-		       readw(&ha->reg->u2.isp4022.p0.gp_out));
+-		printk(KERN_INFO "0x%02X gp_in		 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg, u2.isp4022.p0.gp_in),
+-		       readw(&ha->reg->u2.isp4022.p0.gp_in));
+-		printk(KERN_INFO "0x%02X port_err_status = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p0.port_err_status),
+-		       readw(&ha->reg->u2.isp4022.p0.port_err_status));
+-		printk(KERN_INFO "Page 1 Registers:\n");
+-		writel(HOST_MEM_CFG_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT),
+-		       &ha->reg->ctrl_status);
+-		printk(KERN_INFO "0x%02X req_q_out	 = 0x%08X\n",
+-		       (uint8_t) offsetof(struct isp_reg,
+-					  u2.isp4022.p1.req_q_out),
+-		       readw(&ha->reg->u2.isp4022.p1.req_q_out));
+-		writel(PORT_CTRL_STAT_PAGE & set_rmask(CSR_SCSI_PAGE_SELECT),
+-		       &ha->reg->ctrl_status);
+-	}
+-}
+-
+-void qla4xxx_dump_mbox_registers(struct scsi_qla_host *ha)
+-{
+-	unsigned long flags = 0;
+-	int i = 0;
+-	spin_lock_irqsave(&ha->hardware_lock, flags);
+-	for (i = 1; i < MBOX_REG_COUNT; i++)
+-		printk(KERN_INFO "  Mailbox[%d] = %08x\n", i,
+-		       readw(&ha->reg->mailbox[i]));
+-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-}
+-
+-void qla4xxx_dump_registers(struct scsi_qla_host *ha)
+-{
+-	unsigned long flags = 0;
+-	spin_lock_irqsave(&ha->hardware_lock, flags);
+-	__dump_registers(ha);
+-	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-}
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ void qla4xxx_dump_buffer(void *b, uint32_t size)
+ {
+@@ -198,4 +31,3 @@
+ 		printk(KERN_DEBUG "\n");
+ }
+ 
+-#endif  /*  0  */
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_def.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_def.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_def.h	2007-12-19 15:29:23.000000000 -0500
+@@ -122,8 +122,7 @@
+ 
+ #define ISCSI_IPADDR_SIZE		4	/* IP address size */
+ #define ISCSI_ALIAS_SIZE		32	/* ISCSI Alais name size */
+-#define ISCSI_NAME_SIZE			255	/* ISCSI Name size -
+-						 * usually a string */
++#define ISCSI_NAME_SIZE			0xE0	/* ISCSI Name size */
+ 
+ #define LSDW(x) ((u32)((u64)(x)))
+ #define MSDW(x) ((u32)((((u64)(x)) >> 16) >> 16))
+@@ -187,7 +186,19 @@
+ 	u_long u_start;		/* Time when we handed the cmd to F/W */
+ };
+ 
+-	/*
++/*
++ * Asynchronous Event Queue structure
++ */
++struct aen {
++        uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
++};
++
++struct ql4_aen_log {
++        int count;
++        struct aen entry[MAX_AEN_ENTRIES];
++};
++
++/*
+ 	 * Device Database (DDB) structure
+ 	 */
+ struct ddb_entry {
+@@ -254,13 +265,6 @@
+ #define DF_ISNS_DISCOVERED	2	/* Device was discovered via iSNS */
+ #define DF_FO_MASKED		3
+ 
+-/*
+- * Asynchronous Event Queue structure
+- */
+-struct aen {
+-	uint32_t mbox_sts[MBOX_AEN_REG_COUNT];
+-};
+-
+ 
+ #include "ql4_fw.h"
+ #include "ql4_nvram.h"
+@@ -270,20 +274,17 @@
+  */
+ struct scsi_qla_host {
+ 	/* Linux adapter configuration data */
+-	struct Scsi_Host *host; /* pointer to host data */
+-	uint32_t tot_ddbs;
+ 	unsigned long flags;
+ 
+ #define AF_ONLINE		      0 /* 0x00000001 */
+ #define AF_INIT_DONE		      1 /* 0x00000002 */
+ #define AF_MBOX_COMMAND		      2 /* 0x00000004 */
+ #define AF_MBOX_COMMAND_DONE	      3 /* 0x00000008 */
+-#define AF_INTERRUPTS_ON	      6 /* 0x00000040 Not Used */
++#define AF_INTERRUPTS_ON		6 /* 0x00000040 */
+ #define AF_GET_CRASH_RECORD	      7 /* 0x00000080 */
+ #define AF_LINK_UP		      8 /* 0x00000100 */
+ #define AF_IRQ_ATTACHED		     10 /* 0x00000400 */
+-#define AF_ISNS_CMD_IN_PROCESS	     12 /* 0x00001000 */
+-#define AF_ISNS_CMD_DONE	     13 /* 0x00002000 */
++#define AF_DISABLE_ACB_COMPLETE		11 /* 0x00000800 */
+ 
+ 	unsigned long dpc_flags;
+ 
+@@ -296,6 +297,9 @@
+ #define DPC_AEN			      9 /* 0x00000200 */
+ #define DPC_GET_DHCP_IP_ADDR	     15 /* 0x00008000 */
+ 
++	struct Scsi_Host *host; /* pointer to host data */
++	uint32_t tot_ddbs;
++
+ 	uint16_t	iocb_cnt;
+ 	uint16_t	iocb_hiwat;
+ 
+@@ -344,6 +348,7 @@
+ 	uint32_t firmware_version[2];
+ 	uint32_t patch_number;
+ 	uint32_t build_number;
++	uint32_t board_id;
+ 
+ 	/* --- From Init_FW --- */
+ 	/* init_cb_t *init_cb; */
+@@ -363,7 +368,6 @@
+ 
+ 	/* --- From GetFwState --- */
+ 	uint32_t firmware_state;
+-	uint32_t board_id;
+ 	uint32_t addl_fw_state;
+ 
+ 	/* Linux kernel thread */
+@@ -414,6 +418,8 @@
+ 	uint16_t aen_out;
+ 	struct aen aen_q[MAX_AEN_ENTRIES];
+ 
++	struct ql4_aen_log aen_log;/* tracks all aens */
++
+ 	/* This mutex protects several threads to do mailbox commands
+ 	 * concurrently.
+ 	 */
+@@ -585,10 +591,4 @@
+ #define FLUSH_DDB_CHANGED_AENS	 1
+ #define RELOGIN_DDB_CHANGED_AENS 2
+ 
+-#include "ql4_version.h"
+-#include "ql4_glbl.h"
+-#include "ql4_dbg.h"
+-#include "ql4_inline.h"
+-
+-
+ #endif	/*_QLA4XXX_H */
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_fw.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_fw.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_fw.h	2007-12-19 15:29:23.000000000 -0500
+@@ -20,143 +20,23 @@
+  *************************************************************************/
+ 
+ struct port_ctrl_stat_regs {
+-	__le32 ext_hw_conf;	/*  80 x50  R/W */
+-	__le32 intChipConfiguration; /*	 84 x54 */
+-	__le32 port_ctrl;	/*  88 x58 */
+-	__le32 port_status;	/*  92 x5c */
+-	__le32 HostPrimMACHi;	/*  96 x60 */
+-	__le32 HostPrimMACLow;	/* 100 x64 */
+-	__le32 HostSecMACHi;	/* 104 x68 */
+-	__le32 HostSecMACLow;	/* 108 x6c */
+-	__le32 EPPrimMACHi;	/* 112 x70 */
+-	__le32 EPPrimMACLow;	/* 116 x74 */
+-	__le32 EPSecMACHi;	/* 120 x78 */
+-	__le32 EPSecMACLow;	/* 124 x7c */
+-	__le32 HostPrimIPHi;	/* 128 x80 */
+-	__le32 HostPrimIPMidHi; /* 132 x84 */
+-	__le32 HostPrimIPMidLow;	/* 136 x88 */
+-	__le32 HostPrimIPLow;	/* 140 x8c */
+-	__le32 HostSecIPHi;	/* 144 x90 */
+-	__le32 HostSecIPMidHi;	/* 148 x94 */
+-	__le32 HostSecIPMidLow; /* 152 x98 */
+-	__le32 HostSecIPLow;	/* 156 x9c */
+-	__le32 EPPrimIPHi;	/* 160 xa0 */
+-	__le32 EPPrimIPMidHi;	/* 164 xa4 */
+-	__le32 EPPrimIPMidLow;	/* 168 xa8 */
+-	__le32 EPPrimIPLow;	/* 172 xac */
+-	__le32 EPSecIPHi;	/* 176 xb0 */
+-	__le32 EPSecIPMidHi;	/* 180 xb4 */
+-	__le32 EPSecIPMidLow;	/* 184 xb8 */
+-	__le32 EPSecIPLow;	/* 188 xbc */
+-	__le32 IPReassemblyTimeout; /* 192 xc0 */
+-	__le32 EthMaxFramePayload; /* 196 xc4 */
+-	__le32 TCPMaxWindowSize; /* 200 xc8 */
+-	__le32 TCPCurrentTimestampHi; /* 204 xcc */
+-	__le32 TCPCurrentTimestampLow; /* 208 xd0 */
+-	__le32 LocalRAMAddress; /* 212 xd4 */
+-	__le32 LocalRAMData;	/* 216 xd8 */
+-	__le32 PCSReserved1;	/* 220 xdc */
+-	__le32 gp_out;		/* 224 xe0 */
+-	__le32 gp_in;		/* 228 xe4 */
+-	__le32 ProbeMuxAddr;	/* 232 xe8 */
+-	__le32 ProbeMuxData;	/* 236 xec */
+-	__le32 ERMQueueBaseAddr0; /* 240 xf0 */
+-	__le32 ERMQueueBaseAddr1; /* 244 xf4 */
+-	__le32 MACConfiguration; /* 248 xf8 */
+-	__le32 port_err_status; /* 252 xfc  COR */
++	__le32 ext_hw_conf;	/* 0x50  R/W */
++	__le32 rsrvd0;		/* 0x54 */
++	__le32 port_ctrl;	/* 0x58 */
++	__le32 port_status;	/* 0x5c */
++	__le32 rsrvd1[32];	/* 0x60-0xdf */
++	__le32 gp_out;		/* 0xe0 */
++	__le32 gp_in;		/* 0xe4 */
++	__le32 rsrvd2[5];	/* 0xe8-0xfb */
++	__le32 port_err_status; /* 0xfc */
+ };
+ 
+ struct host_mem_cfg_regs {
+-	__le32 NetRequestQueueOut; /*  80 x50 */
+-	__le32 NetRequestQueueOutAddrHi; /*  84 x54 */
+-	__le32 NetRequestQueueOutAddrLow; /*  88 x58 */
+-	__le32 NetRequestQueueBaseAddrHi; /*  92 x5c */
+-	__le32 NetRequestQueueBaseAddrLow; /*  96 x60 */
+-	__le32 NetRequestQueueLength; /* 100 x64 */
+-	__le32 NetResponseQueueIn; /* 104 x68 */
+-	__le32 NetResponseQueueInAddrHi; /* 108 x6c */
+-	__le32 NetResponseQueueInAddrLow; /* 112 x70 */
+-	__le32 NetResponseQueueBaseAddrHi; /* 116 x74 */
+-	__le32 NetResponseQueueBaseAddrLow; /* 120 x78 */
+-	__le32 NetResponseQueueLength; /* 124 x7c */
+-	__le32 req_q_out;	/* 128 x80 */
+-	__le32 RequestQueueOutAddrHi; /* 132 x84 */
+-	__le32 RequestQueueOutAddrLow; /* 136 x88 */
+-	__le32 RequestQueueBaseAddrHi; /* 140 x8c */
+-	__le32 RequestQueueBaseAddrLow; /* 144 x90 */
+-	__le32 RequestQueueLength; /* 148 x94 */
+-	__le32 ResponseQueueIn; /* 152 x98 */
+-	__le32 ResponseQueueInAddrHi; /* 156 x9c */
+-	__le32 ResponseQueueInAddrLow; /* 160 xa0 */
+-	__le32 ResponseQueueBaseAddrHi; /* 164 xa4 */
+-	__le32 ResponseQueueBaseAddrLow; /* 168 xa8 */
+-	__le32 ResponseQueueLength; /* 172 xac */
+-	__le32 NetRxLargeBufferQueueOut; /* 176 xb0 */
+-	__le32 NetRxLargeBufferQueueBaseAddrHi; /* 180 xb4 */
+-	__le32 NetRxLargeBufferQueueBaseAddrLow; /* 184 xb8 */
+-	__le32 NetRxLargeBufferQueueLength; /* 188 xbc */
+-	__le32 NetRxLargeBufferLength; /* 192 xc0 */
+-	__le32 NetRxSmallBufferQueueOut; /* 196 xc4 */
+-	__le32 NetRxSmallBufferQueueBaseAddrHi; /* 200 xc8 */
+-	__le32 NetRxSmallBufferQueueBaseAddrLow; /* 204 xcc */
+-	__le32 NetRxSmallBufferQueueLength; /* 208 xd0 */
+-	__le32 NetRxSmallBufferLength; /* 212 xd4 */
+-	__le32 HMCReserved0[10]; /* 216 xd8 */
+-};
+-
+-struct local_ram_cfg_regs {
+-	__le32 BufletSize;	/*  80 x50 */
+-	__le32 BufletMaxCount;	/*  84 x54 */
+-	__le32 BufletCurrCount; /*  88 x58 */
+-	__le32 BufletPauseThresholdCount; /*  92 x5c */
+-	__le32 BufletTCPWinThresholdHi; /*  96 x60 */
+-	__le32 BufletTCPWinThresholdLow; /* 100 x64 */
+-	__le32 IPHashTableBaseAddr; /* 104 x68 */
+-	__le32 IPHashTableSize; /* 108 x6c */
+-	__le32 TCPHashTableBaseAddr; /* 112 x70 */
+-	__le32 TCPHashTableSize; /* 116 x74 */
+-	__le32 NCBAreaBaseAddr; /* 120 x78 */
+-	__le32 NCBMaxCount;	/* 124 x7c */
+-	__le32 NCBCurrCount;	/* 128 x80 */
+-	__le32 DRBAreaBaseAddr; /* 132 x84 */
+-	__le32 DRBMaxCount;	/* 136 x88 */
+-	__le32 DRBCurrCount;	/* 140 x8c */
+-	__le32 LRCReserved[28]; /* 144 x90 */
+-};
+-
+-struct prot_stat_regs {
+-	__le32 MACTxFrameCount; /*  80 x50   R */
+-	__le32 MACTxByteCount;	/*  84 x54   R */
+-	__le32 MACRxFrameCount; /*  88 x58   R */
+-	__le32 MACRxByteCount;	/*  92 x5c   R */
+-	__le32 MACCRCErrCount;	/*  96 x60   R */
+-	__le32 MACEncErrCount;	/* 100 x64   R */
+-	__le32 MACRxLengthErrCount; /* 104 x68	 R */
+-	__le32 IPTxPacketCount; /* 108 x6c   R */
+-	__le32 IPTxByteCount;	/* 112 x70   R */
+-	__le32 IPTxFragmentCount; /* 116 x74   R */
+-	__le32 IPRxPacketCount; /* 120 x78   R */
+-	__le32 IPRxByteCount;	/* 124 x7c   R */
+-	__le32 IPRxFragmentCount; /* 128 x80   R */
+-	__le32 IPDatagramReassemblyCount; /* 132 x84   R */
+-	__le32 IPV6RxPacketCount; /* 136 x88   R */
+-	__le32 IPErrPacketCount; /* 140 x8c   R */
+-	__le32 IPReassemblyErrCount; /* 144 x90	  R */
+-	__le32 TCPTxSegmentCount; /* 148 x94   R */
+-	__le32 TCPTxByteCount;	/* 152 x98   R */
+-	__le32 TCPRxSegmentCount; /* 156 x9c   R */
+-	__le32 TCPRxByteCount;	/* 160 xa0   R */
+-	__le32 TCPTimerExpCount; /* 164 xa4   R */
+-	__le32 TCPRxAckCount;	/* 168 xa8   R */
+-	__le32 TCPTxAckCount;	/* 172 xac   R */
+-	__le32 TCPRxErrOOOCount; /* 176 xb0   R */
+-	__le32 PSReserved0;	/* 180 xb4 */
+-	__le32 TCPRxWindowProbeUpdateCount; /* 184 xb8	 R */
+-	__le32 ECCErrCorrectionCount; /* 188 xbc   R */
+-	__le32 PSReserved1[16]; /* 192 xc0 */
++	__le32 rsrvd0[12];	/* 0x50-0x79 */
++	__le32 req_q_out;	/* 0x80 */
++	__le32 rsrvd1[31];	/* 0x84-0xFF */
+ };
+ 
+-
+ /*  remote register set (access via PCI memory read/write) */
+ struct isp_reg {
+ #define MBOX_REG_COUNT 8
+@@ -207,11 +87,7 @@
+ 			union {
+ 				struct port_ctrl_stat_regs p0;
+ 				struct host_mem_cfg_regs p1;
+-				struct local_ram_cfg_regs p2;
+-				struct prot_stat_regs p3;
+-				__le32 r_union[44];
+ 			};
+-
+ 		} __attribute__ ((packed)) isp4022;
+ 	} u2;
+ };				/* 256 x100 */
+@@ -296,6 +172,7 @@
+ /*  ISP Semaphore definitions */
+ 
+ /*  ISP General Purpose Output definitions */
++#define GPOR_TOPCAT_RESET			0x00000004
+ 
+ /*  shadow registers (DMA'd from HA to system memory.  read only) */
+ struct shadow_regs {
+@@ -337,6 +214,7 @@
+ 
+ /*  Mailbox command definitions */
+ #define MBOX_CMD_ABOUT_FW			0x0009
++#define MBOX_CMD_PING				0x000B
+ #define MBOX_CMD_LUN_RESET			0x0016
+ #define MBOX_CMD_GET_MANAGEMENT_DATA		0x001E
+ #define MBOX_CMD_GET_FW_STATUS			0x001F
+@@ -364,6 +242,17 @@
+ #define MBOX_CMD_GET_FW_STATE			0x0069
+ #define MBOX_CMD_GET_INIT_FW_CTRL_BLOCK_DEFAULTS 0x006A
+ #define MBOX_CMD_RESTORE_FACTORY_DEFAULTS	0x0087
++#define MBOX_CMD_SET_ACB			0x0088
++#define MBOX_CMD_GET_ACB			0x0089
++#define MBOX_CMD_DISABLE_ACB			0x008A
++#define MBOX_CMD_GET_IPV6_NEIGHBOR_CACHE	0x008B
++#define MBOX_CMD_GET_IPV6_DEST_CACHE		0x008C
++#define MBOX_CMD_GET_IPV6_DEF_ROUTER_LIST	0x008D
++#define MBOX_CMD_GET_IPV6_LCL_PREFIX_LIST	0x008E
++#define MBOX_CMD_SET_IPV6_NEIGHBOR_CACHE	0x0090
++#define MBOX_CMD_GET_IP_ADDR_STATE		0x0091
++#define MBOX_CMD_SEND_IPV6_ROUTER_SOL		0x0092
++#define MBOX_CMD_GET_DB_ENTRY_CURRENT_IP_ADDR	0x0093
+ 
+ /* Mailbox 1 */
+ #define FW_STATE_READY				0x0000
+@@ -409,6 +298,16 @@
+ #define MBOX_ASTS_DHCP_LEASE_EXPIRED		0x801D
+ #define MBOX_ASTS_DHCP_LEASE_ACQUIRED		0x801F
+ #define MBOX_ASTS_ISNS_UNSOLICITED_PDU_RECEIVED 0x8021
++#define MBOX_ASTS_DUPLICATE_IP			0x8025
++#define MBOX_ASTS_ARP_COMPLETE			0x8026
++#define MBOX_ASTS_SUBNET_STATE_CHANGE		0x8027
++#define MBOX_ASTS_RESPONSE_QUEUE_FULL		0x8028
++#define MBOX_ASTS_IP_ADDR_STATE_CHANGED		0x8029
++#define MBOX_ASTS_IPV6_PREFIX_EXPIRED		0x802B
++#define MBOX_ASTS_IPV6_ND_PREFIX_IGNORED	0x802C
++#define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED	0x802D
++#define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD		0x802E
++
+ #define ISNS_EVENT_DATA_RECEIVED		0x0000
+ #define ISNS_EVENT_CONNECTION_OPENED		0x0001
+ #define ISNS_EVENT_CONNECTION_FAILED		0x0002
+@@ -418,137 +317,166 @@
+ /*************************************************************************/
+ 
+ /* Host Adapter Initialization Control Block (from host) */
+-struct init_fw_ctrl_blk {
+-	uint8_t Version;	/* 00 */
+-	uint8_t Control;	/* 01 */
++struct addr_ctrl_blk {
++	uint8_t version;	/* 00 */
++	uint8_t control;	/* 01 */
+ 
+-	uint16_t FwOptions;	/* 02-03 */
++	uint16_t fw_options;	/* 02-03 */
+ #define	 FWOPT_HEARTBEAT_ENABLE		  0x1000
+ #define	 FWOPT_SESSION_MODE		  0x0040
+ #define	 FWOPT_INITIATOR_MODE		  0x0020
+ #define	 FWOPT_TARGET_MODE		  0x0010
+ 
+-	uint16_t ExecThrottle;	/* 04-05 */
+-	uint8_t RetryCount;	/* 06 */
+-	uint8_t RetryDelay;	/* 07 */
+-	uint16_t MaxEthFrPayloadSize;	/* 08-09 */
+-	uint16_t AddFwOptions;	/* 0A-0B */
+-
+-	uint8_t HeartbeatInterval;	/* 0C */
+-	uint8_t InstanceNumber; /* 0D */
+-	uint16_t RES2;		/* 0E-0F */
+-	uint16_t ReqQConsumerIndex;	/* 10-11 */
+-	uint16_t ComplQProducerIndex;	/* 12-13 */
+-	uint16_t ReqQLen;	/* 14-15 */
+-	uint16_t ComplQLen;	/* 16-17 */
+-	uint32_t ReqQAddrLo;	/* 18-1B */
+-	uint32_t ReqQAddrHi;	/* 1C-1F */
+-	uint32_t ComplQAddrLo;	/* 20-23 */
+-	uint32_t ComplQAddrHi;	/* 24-27 */
+-	uint32_t ShadowRegBufAddrLo;	/* 28-2B */
+-	uint32_t ShadowRegBufAddrHi;	/* 2C-2F */
+-
+-	uint16_t iSCSIOptions;	/* 30-31 */
+-
+-	uint16_t TCPOptions;	/* 32-33 */
+-
+-	uint16_t IPOptions;	/* 34-35 */
+-
+-	uint16_t MaxPDUSize;	/* 36-37 */
+-	uint16_t RcvMarkerInt;	/* 38-39 */
+-	uint16_t SndMarkerInt;	/* 3A-3B */
+-	uint16_t InitMarkerlessInt;	/* 3C-3D */
+-	uint16_t FirstBurstSize;	/* 3E-3F */
+-	uint16_t DefaultTime2Wait;	/* 40-41 */
+-	uint16_t DefaultTime2Retain;	/* 42-43 */
+-	uint16_t MaxOutStndngR2T;	/* 44-45 */
+-	uint16_t KeepAliveTimeout;	/* 46-47 */
+-	uint16_t PortNumber;	/* 48-49 */
+-	uint16_t MaxBurstSize;	/* 4A-4B */
+-	uint32_t RES4;		/* 4C-4F */
+-	uint8_t IPAddr[4];	/* 50-53 */
+-	uint8_t RES5[12];	/* 54-5F */
+-	uint8_t SubnetMask[4];	/* 60-63 */
+-	uint8_t RES6[12];	/* 64-6F */
+-	uint8_t GatewayIPAddr[4];	/* 70-73 */
+-	uint8_t RES7[12];	/* 74-7F */
+-	uint8_t PriDNSIPAddr[4];	/* 80-83 */
+-	uint8_t SecDNSIPAddr[4];	/* 84-87 */
+-	uint8_t RES8[8];	/* 88-8F */
+-	uint8_t Alias[32];	/* 90-AF */
+-	uint8_t TargAddr[8];	/* B0-B7 *//* /FIXME: Remove?? */
+-	uint8_t CHAPNameSecretsTable[8];	/* B8-BF */
+-	uint8_t EthernetMACAddr[6];	/* C0-C5 */
+-	uint16_t TargetPortalGroup;	/* C6-C7 */
+-	uint8_t SendScale;	/* C8	 */
+-	uint8_t RecvScale;	/* C9	 */
+-	uint8_t TypeOfService;	/* CA	 */
+-	uint8_t Time2Live;	/* CB	 */
+-	uint16_t VLANPriority;	/* CC-CD */
+-	uint16_t Reserved8;	/* CE-CF */
+-	uint8_t SecIPAddr[4];	/* D0-D3 */
+-	uint8_t Reserved9[12];	/* D4-DF */
+-	uint8_t iSNSIPAddr[4];	/* E0-E3 */
+-	uint16_t iSNSServerPortNumber;	/* E4-E5 */
+-	uint8_t Reserved10[10]; /* E6-EF */
+-	uint8_t SLPDAIPAddr[4]; /* F0-F3 */
+-	uint8_t Reserved11[12]; /* F4-FF */
+-	uint8_t iSCSINameString[256];	/* 100-1FF */
++	uint16_t exec_throttle;	/* 04-05 */
++	uint8_t zio_count;	/* 06 */
++	uint8_t res0;	/* 07 */
++	uint16_t eth_mtu_size;	/* 08-09 */
++	uint16_t add_fw_options;	/* 0A-0B */
++
++	uint8_t hb_interval;	/* 0C */
++	uint8_t inst_num; /* 0D */
++	uint16_t res1;		/* 0E-0F */
++	uint16_t rqq_consumer_idx;	/* 10-11 */
++	uint16_t compq_producer_idx;	/* 12-13 */
++	uint16_t rqq_len;	/* 14-15 */
++	uint16_t compq_len;	/* 16-17 */
++	uint32_t rqq_addr_lo;	/* 18-1B */
++	uint32_t rqq_addr_hi;	/* 1C-1F */
++	uint32_t compq_addr_lo;	/* 20-23 */
++	uint32_t compq_addr_hi;	/* 24-27 */
++	uint32_t shdwreg_addr_lo;	/* 28-2B */
++	uint32_t shdwreg_addr_hi;	/* 2C-2F */
++
++	uint16_t iscsi_opts;	/* 30-31 */
++	uint16_t ipv4_tcp_opts;	/* 32-33 */
++	uint16_t ipv4_ip_opts;	/* 34-35 */
++
++	uint16_t iscsi_max_pdu_size;	/* 36-37 */
++	uint8_t ipv4_tos;	/* 38 */
++	uint8_t ipv4_ttl;	/* 39 */
++	uint8_t acb_version;	/* 3A */
++	uint8_t res2;	/* 3B */
++	uint16_t def_timeout;	/* 3C-3D */
++	uint16_t iscsi_fburst_len;	/* 3E-3F */
++	uint16_t iscsi_def_time2wait;	/* 40-41 */
++	uint16_t iscsi_def_time2retain;	/* 42-43 */
++	uint16_t iscsi_max_outstnd_r2t;	/* 44-45 */
++	uint16_t conn_ka_timeout;	/* 46-47 */
++	uint16_t ipv4_port;	/* 48-49 */
++	uint16_t iscsi_max_burst_len;	/* 4A-4B */
++	uint32_t res5;		/* 4C-4F */
++	uint8_t ipv4_addr[4];	/* 50-53 */
++	uint16_t ipv4_vlan_tag;	/* 54-55 */
++	uint8_t ipv4_addr_state;	/* 56 */
++	uint8_t ipv4_cacheid;	/* 57 */
++	uint8_t res6[8];	/* 58-5F */
++	uint8_t ipv4_subnet[4];	/* 60-63 */
++	uint8_t res7[12];	/* 64-6F */
++	uint8_t ipv4_gw_addr[4];	/* 70-73 */
++	uint8_t res8[0xc];	/* 74-7F */
++	uint8_t pri_dns_srvr_ip[4];/* 80-83 */
++	uint8_t sec_dns_srvr_ip[4];/* 84-87 */
++	uint16_t min_eph_port;	/* 88-89 */
++	uint16_t max_eph_port;	/* 8A-8B */
++	uint8_t res9[4];	/* 8C-8F */
++	uint8_t iscsi_alias[32];/* 90-AF */
++	uint8_t res9_1[0x16];	/* B0-C5 */
++	uint16_t tgt_portal_grp;/* C6-C7 */
++	uint8_t abort_timer;	/* C8	 */
++	uint8_t ipv4_tcp_wsf;	/* C9	 */
++	uint8_t res10[6];	/* CA-CF */
++	uint8_t ipv4_sec_ip_addr[4];	/* D0-D3 */
++	uint8_t ipv4_dhcp_vid_len;	/* D4 */
++	uint8_t ipv4_dhcp_vid[11];	/* D5-DF */
++	uint8_t res11[20];	/* E0-F3 */
++	uint8_t ipv4_dhcp_alt_cid_len;	/* F4 */
++	uint8_t ipv4_dhcp_alt_cid[11];	/* F5-FF */
++	uint8_t iscsi_name[224];	/* 100-1DF */
++	uint8_t res12[32];	/* 1E0-1FF */
++	uint32_t cookie;	/* 200-203 */
++	uint16_t ipv6_port;	/* 204-205 */
++	uint16_t ipv6_opts;	/* 206-207 */
++	uint16_t ipv6_addtl_opts;	/* 208-209 */
++	uint16_t ipv6_tcp_opts;	/* 20A-20B */
++	uint8_t ipv6_tcp_wsf;	/* 20C */
++	uint16_t ipv6_flow_lbl;	/* 20D-20F */
++	uint8_t ipv6_gw_addr[16];	/* 210-21F */
++	uint16_t ipv6_vlan_tag;	/* 220-221 */
++	uint8_t ipv6_lnk_lcl_addr_state;/* 222 */
++	uint8_t ipv6_addr0_state;	/* 223 */
++	uint8_t ipv6_addr1_state;	/* 224 */
++	uint8_t ipv6_gw_state;	/* 225 */
++	uint8_t ipv6_traffic_class;	/* 226 */
++	uint8_t ipv6_hop_limit;	/* 227 */
++	uint8_t ipv6_if_id[8];	/* 228-22F */
++	uint8_t ipv6_addr0[16];	/* 230-23F */
++	uint8_t ipv6_addr1[16];	/* 240-24F */
++	uint32_t ipv6_nd_reach_time;	/* 250-253 */
++	uint32_t ipv6_nd_rexmit_timer;	/* 254-257 */
++	uint32_t ipv6_nd_stale_timeout;	/* 258-25B */
++	uint8_t ipv6_dup_addr_detect_count;	/* 25C */
++	uint8_t ipv6_cache_id;	/* 25D */
++	uint8_t res13[18];	/* 25E-26F */
++	uint32_t ipv6_gw_advrt_mtu;	/* 270-273 */
++	uint8_t res14[140];	/* 274-2FF */
++};
++
++struct init_fw_ctrl_blk {
++	struct addr_ctrl_blk pri;
++	struct addr_ctrl_blk sec;
+ };
+ 
+ /*************************************************************************/
+ 
+ struct dev_db_entry {
+-	uint8_t options;	/* 00 */
++	uint16_t options;	/* 00-01 */
+ #define DDB_OPT_DISC_SESSION  0x10
+ #define DDB_OPT_TARGET	      0x02 /* device is a target */
+ 
+-	uint8_t control;	/* 01 */
+-
+-	uint16_t exeThrottle;	/* 02-03 */
+-	uint16_t exeCount;	/* 04-05 */
+-	uint8_t retryCount;	/* 06	 */
+-	uint8_t retryDelay;	/* 07	 */
+-	uint16_t iSCSIOptions;	/* 08-09 */
+-
+-	uint16_t TCPOptions;	/* 0A-0B */
+-
+-	uint16_t IPOptions;	/* 0C-0D */
+-
+-	uint16_t maxPDUSize;	/* 0E-0F */
+-	uint16_t rcvMarkerInt;	/* 10-11 */
+-	uint16_t sndMarkerInt;	/* 12-13 */
+-	uint16_t iSCSIMaxSndDataSegLen; /* 14-15 */
+-	uint16_t firstBurstSize;	/* 16-17 */
+-	uint16_t minTime2Wait;	/* 18-19 : RA :default_time2wait */
+-	uint16_t maxTime2Retain;	/* 1A-1B */
+-	uint16_t maxOutstndngR2T;	/* 1C-1D */
+-	uint16_t keepAliveTimeout;	/* 1E-1F */
+-	uint8_t ISID[6];	/* 20-25 big-endian, must be converted
++	uint16_t exec_throttle;	/* 02-03 */
++	uint16_t exec_count;	/* 04-05 */
++	uint16_t res0;	/* 06-07 */
++	uint16_t iscsi_options;	/* 08-09 */
++	uint16_t tcp_options;	/* 0A-0B */
++	uint16_t ip_options;	/* 0C-0D */
++	uint16_t iscsi_max_rcv_data_seg_len;	/* 0E-0F */
++	uint32_t res1;	/* 10-13 */
++	uint16_t iscsi_max_snd_data_seg_len;	/* 14-15 */
++	uint16_t iscsi_first_burst_len;	/* 16-17 */
++	uint16_t iscsi_def_time2wait;	/* 18-19 */
++	uint16_t iscsi_def_time2retain;	/* 1A-1B */
++	uint16_t iscsi_max_outsnd_r2t;	/* 1C-1D */
++	uint16_t ka_timeout;	/* 1E-1F */
++	uint8_t isid[6];	/* 20-25 big-endian, must be converted
+ 				 * to little-endian */
+-	uint16_t TSID;		/* 26-27 */
+-	uint16_t portNumber;	/* 28-29 */
+-	uint16_t maxBurstSize;	/* 2A-2B */
+-	uint16_t taskMngmntTimeout;	/* 2C-2D */
+-	uint16_t reserved1;	/* 2E-2F */
+-	uint8_t ipAddr[0x10];	/* 30-3F */
+-	uint8_t iSCSIAlias[0x20];	/* 40-5F */
+-	uint8_t targetAddr[0x20];	/* 60-7F */
+-	uint8_t userID[0x20];	/* 80-9F */
+-	uint8_t password[0x20]; /* A0-BF */
+-	uint8_t iscsiName[0x100];	/* C0-1BF : xxzzy Make this a
++	uint16_t tsid;		/* 26-27 */
++	uint16_t port;	/* 28-29 */
++	uint16_t iscsi_max_burst_len;	/* 2A-2B */
++	uint16_t def_timeout;	/* 2C-2D */
++	uint16_t res2;	/* 2E-2F */
++	uint8_t ip_addr[0x10];	/* 30-3F */
++	uint8_t iscsi_alias[0x20];	/* 40-5F */
++	uint8_t tgt_addr[0x20];	/* 60-7F */
++	uint16_t mss;	/* 80-81 */
++	uint16_t res3;	/* 82-83 */
++	uint16_t lcl_port;	/* 84-85 */
++	uint8_t ipv4_tos;	/* 86 */
++	uint16_t ipv6_flow_lbl;	/* 87-89 */
++	uint8_t res4[0x36];	/* 8A-BF */
++	uint8_t iscsi_name[0xE0];	/* C0-19F : xxzzy Make this a
+ 					 * pointer to a string so we
+ 					 * don't have to reserve soooo
+ 					 * much RAM */
+-	uint16_t ddbLink;	/* 1C0-1C1 */
+-	uint16_t CHAPTableIndex; /* 1C2-1C3 */
+-	uint16_t TargetPortalGroup; /* 1C4-1C5 */
+-	uint16_t reserved2[2];	/* 1C6-1C7 */
+-	uint32_t statSN;	/* 1C8-1CB */
+-	uint32_t expStatSN;	/* 1CC-1CF */
+-	uint16_t reserved3[0x2C]; /* 1D0-1FB */
+-	uint16_t ddbValidCookie; /* 1FC-1FD */
+-	uint16_t ddbValidSize;	/* 1FE-1FF */
++	uint8_t ipv6_addr[0x10];/* 1A0-1AF */
++	uint8_t res5[0x10];	/* 1B0-1BF */
++	uint16_t ddb_link;	/* 1C0-1C1 */
++	uint16_t chap_tbl_idx;	/* 1C2-1C3 */
++	uint16_t tgt_portal_grp; /* 1C4-1C5 */
++	uint8_t tcp_xmt_wsf;	/* 1C6 */
++	uint8_t tcp_rcv_wsf;	/* 1C7 */
++	uint32_t stat_sn;	/* 1C8-1CB */
++	uint32_t exp_stat_sn;	/* 1CC-1CF */
++	uint8_t res6[0x30];	/* 1D0-1FF */
+ };
+ 
+ /*************************************************************************/
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_glbl.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_glbl.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_glbl.h	2007-12-19 15:29:23.000000000 -0500
+@@ -8,6 +8,9 @@
+ #ifndef __QLA4x_GBL_H
+ #define	__QLA4x_GBL_H
+ 
++struct iscsi_cls_conn;
++
++void qla4xxx_hw_reset(struct scsi_qla_host *ha);
+ int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a);
+ int qla4xxx_send_tgts(struct scsi_qla_host *ha, char *ip, uint16_t port);
+ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb);
+@@ -58,11 +61,13 @@
+ void qla4xxx_interrupt_service_routine(struct scsi_qla_host * ha,
+ 				       uint32_t intr_status);
+ int qla4xxx_init_rings(struct scsi_qla_host * ha);
+-struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha, uint32_t index);
++struct srb * qla4xxx_del_from_active_array(struct scsi_qla_host *ha,
++					uint32_t index);
+ void qla4xxx_srb_compl(struct scsi_qla_host *ha, struct srb *srb);
+ int qla4xxx_reinitialize_ddb_list(struct scsi_qla_host * ha);
+ int qla4xxx_process_ddb_changed(struct scsi_qla_host * ha,
+ 				uint32_t fw_ddb_index, uint32_t state);
++void qla4xxx_dump_buffer(void *b, uint32_t size);
+ 
+ extern int ql4xextended_error_logging;
+ extern int ql4xdiscoverywait;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_init.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_init.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_init.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,9 @@
+  */
+ 
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ static struct ddb_entry * qla4xxx_alloc_ddb(struct scsi_qla_host *ha,
+ 					    uint32_t fw_ddb_index);
+@@ -300,12 +303,12 @@
+ 	if (!qla4xxx_fw_ready(ha))
+ 		return status;
+ 
+-	set_bit(AF_ONLINE, &ha->flags);
+ 	return qla4xxx_get_firmware_status(ha);
+ }
+ 
+ static struct ddb_entry* qla4xxx_get_ddb_entry(struct scsi_qla_host *ha,
+-					       uint32_t fw_ddb_index)
++						uint32_t fw_ddb_index,
++						uint32_t *new_tgt)
+ {
+ 	struct dev_db_entry *fw_ddb_entry = NULL;
+ 	dma_addr_t fw_ddb_entry_dma;
+@@ -313,6 +316,7 @@
+ 	int found = 0;
+ 	uint32_t device_state;
+ 
++	*new_tgt = 0;
+ 	/* Make sure the dma buffer is valid */
+ 	fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev,
+ 					  sizeof(*fw_ddb_entry),
+@@ -337,7 +341,7 @@
+ 	DEBUG2(printk("scsi%ld: %s: Looking for ddb[%d]\n", ha->host_no,
+ 		      __func__, fw_ddb_index));
+ 	list_for_each_entry(ddb_entry, &ha->ddb_list, list) {
+-		if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsiName,
++		if (memcmp(ddb_entry->iscsi_name, fw_ddb_entry->iscsi_name,
+ 			   ISCSI_NAME_SIZE) == 0) {
+ 			found++;
+ 			break;
+@@ -348,6 +352,7 @@
+ 		DEBUG2(printk("scsi%ld: %s: ddb[%d] not found - allocating "
+ 			      "new ddb\n", ha->host_no, __func__,
+ 			      fw_ddb_index));
++		*new_tgt = 1;
+ 		ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
+ 	}
+ 
+@@ -409,26 +414,26 @@
+ 	}
+ 
+ 	status = QLA_SUCCESS;
+-	ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->TSID);
++	ddb_entry->target_session_id = le16_to_cpu(fw_ddb_entry->tsid);
+ 	ddb_entry->task_mgmt_timeout =
+-		le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
++		le16_to_cpu(fw_ddb_entry->def_timeout);
+ 	ddb_entry->CmdSn = 0;
+-	ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exeThrottle);
++	ddb_entry->exe_throttle = le16_to_cpu(fw_ddb_entry->exec_throttle);
+ 	ddb_entry->default_relogin_timeout =
+-		le16_to_cpu(fw_ddb_entry->taskMngmntTimeout);
+-	ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->minTime2Wait);
++		le16_to_cpu(fw_ddb_entry->def_timeout);
++	ddb_entry->default_time2wait = le16_to_cpu(fw_ddb_entry->iscsi_def_time2wait);
+ 
+ 	/* Update index in case it changed */
+ 	ddb_entry->fw_ddb_index = fw_ddb_index;
+ 	ha->fw_ddb_index_map[fw_ddb_index] = ddb_entry;
+ 
+-	ddb_entry->port = le16_to_cpu(fw_ddb_entry->portNumber);
+-	ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->TargetPortalGroup);
+-	memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsiName[0],
++	ddb_entry->port = le16_to_cpu(fw_ddb_entry->port);
++	ddb_entry->tpgt = le32_to_cpu(fw_ddb_entry->tgt_portal_grp);
++	memcpy(&ddb_entry->iscsi_name[0], &fw_ddb_entry->iscsi_name[0],
+ 	       min(sizeof(ddb_entry->iscsi_name),
+-		   sizeof(fw_ddb_entry->iscsiName)));
+-	memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ipAddr[0],
+-	       min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ipAddr)));
++		   sizeof(fw_ddb_entry->iscsi_name)));
++	memcpy(&ddb_entry->ip_addr[0], &fw_ddb_entry->ip_addr[0],
++	       min(sizeof(ddb_entry->ip_addr), sizeof(fw_ddb_entry->ip_addr)));
+ 
+ 	DEBUG2(printk("scsi%ld: %s: ddb[%d] - State= %x status= %d.\n",
+ 		      ha->host_no, __func__, fw_ddb_index,
+@@ -495,6 +500,7 @@
+ 	uint32_t ddb_state;
+ 	uint32_t conn_err, err_code;
+ 	struct ddb_entry *ddb_entry;
++	uint32_t new_tgt;
+ 
+ 	dev_info(&ha->pdev->dev, "Initializing DDBs ...\n");
+ 	for (fw_ddb_index = 0; fw_ddb_index < MAX_DDB_ENTRIES;
+@@ -526,8 +532,19 @@
+ 					      "completed "
+ 					      "or access denied failure\n",
+ 					      ha->host_no, __func__));
+-			} else
++			} else {
+ 				qla4xxx_set_ddb_entry(ha, fw_ddb_index, 0);
++				if (qla4xxx_get_fwddb_entry(ha, fw_ddb_index,
++					NULL, 0, NULL, &next_fw_ddb_index,
++					&ddb_state, &conn_err, NULL, NULL)
++					== QLA_ERROR) {
++					DEBUG2(printk("scsi%ld: %s:"
++						"get_ddb_entry %d failed\n",
++						ha->host_no,
++						__func__, fw_ddb_index));
++					return QLA_ERROR;
++				}
++			}
+ 		}
+ 
+ 		if (ddb_state != DDB_DS_SESSION_ACTIVE)
+@@ -540,7 +557,7 @@
+ 			      ha->host_no, __func__, fw_ddb_index));
+ 
+ 		/* Add DDB to internal our ddb list. */
+-		ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++		ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt);
+ 		if (ddb_entry == NULL) {
+ 			DEBUG2(printk("scsi%ld: %s: Unable to allocate memory "
+ 				      "for device at fw_ddb_index %d\n",
+@@ -865,21 +882,20 @@
+ 
+ static void qla4x00_pci_config(struct scsi_qla_host *ha)
+ {
+-	uint16_t w, mwi;
++	uint16_t w;
++	int status;
+ 
+ 	dev_info(&ha->pdev->dev, "Configuring PCI space...\n");
+ 
+ 	pci_set_master(ha->pdev);
+-	mwi = 0;
+-	if (pci_set_mwi(ha->pdev))
+-		mwi = PCI_COMMAND_INVALIDATE;
++	status = pci_set_mwi(ha->pdev);
+ 	/*
+ 	 * We want to respect framework's setting of PCI configuration space
+ 	 * command register and also want to make sure that all bits of
+ 	 * interest to us are properly set in command register.
+ 	 */
+ 	pci_read_config_word(ha->pdev, PCI_COMMAND, &w);
+-	w |= mwi | (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
++	w |= PCI_COMMAND_PARITY | PCI_COMMAND_SERR;
+ 	w &= ~PCI_COMMAND_INTX_DISABLE;
+ 	pci_write_config_word(ha->pdev, PCI_COMMAND, w);
+ }
+@@ -911,6 +927,9 @@
+ 		writel(set_rmask(NVR_WRITE_ENABLE),
+ 		       &ha->reg->u1.isp4022.nvram);
+ 
++        writel(2, &ha->reg->mailbox[6]);
++        readl(&ha->reg->mailbox[6]);
++
+ 	writel(set_rmask(CSR_BOOT_ENABLE), &ha->reg->ctrl_status);
+ 	readl(&ha->reg->ctrl_status);
+ 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+@@ -958,25 +977,25 @@
+ 	return status;
+ }
+ 
+-int ql4xxx_lock_drvr_wait(struct scsi_qla_host *ha)
++int ql4xxx_lock_drvr_wait(struct scsi_qla_host *a)
+ {
+-#define QL4_LOCK_DRVR_WAIT	30
++#define QL4_LOCK_DRVR_WAIT	60
+ #define QL4_LOCK_DRVR_SLEEP	1
+ 
+ 	int drvr_wait = QL4_LOCK_DRVR_WAIT;
+ 	while (drvr_wait) {
+-		if (ql4xxx_lock_drvr(ha) == 0) {
++		if (ql4xxx_lock_drvr(a) == 0) {
+ 			ssleep(QL4_LOCK_DRVR_SLEEP);
+ 			if (drvr_wait) {
+ 				DEBUG2(printk("scsi%ld: %s: Waiting for "
+-					      "Global Init Semaphore(%d)...n",
+-					      ha->host_no,
++					      "Global Init Semaphore(%d)...\n",
++					      a->host_no,
+ 					      __func__, drvr_wait));
+ 			}
+ 			drvr_wait -= QL4_LOCK_DRVR_SLEEP;
+ 		} else {
+ 			DEBUG2(printk("scsi%ld: %s: Global Init Semaphore "
+-				      "acquired.n", ha->host_no, __func__));
++				      "acquired\n", a->host_no, __func__));
+ 			return QLA_SUCCESS;
+ 		}
+ 	}
+@@ -1125,17 +1144,17 @@
+ 
+ 	/* Initialize the Host adapter request/response queues and firmware */
+ 	if (qla4xxx_start_firmware(ha) == QLA_ERROR)
+-		return status;
++		goto exit_init_hba;
+ 
+ 	if (qla4xxx_validate_mac_address(ha) == QLA_ERROR)
+-		return status;
++		goto exit_init_hba;
+ 
+ 	if (qla4xxx_init_local_data(ha) == QLA_ERROR)
+-		return status;
++		goto exit_init_hba;
+ 
+ 	status = qla4xxx_init_firmware(ha);
+ 	if (status == QLA_ERROR)
+-		return status;
++		goto exit_init_hba;
+ 
+ 	/*
+ 	 * FW is waiting to get an IP address from DHCP server: Skip building
+@@ -1143,12 +1162,12 @@
+ 	 * followed by 0x8014 aen" to trigger the tgt discovery process.
+ 	 */
+ 	if (ha->firmware_state & FW_STATE_DHCP_IN_PROGRESS)
+-		return status;
++		goto exit_init_online;
+ 
+ 	/* Skip device discovery if ip and subnet is zero */
+ 	if (memcmp(ha->ip_address, ip_address, IP_ADDR_LEN) == 0 ||
+ 	    memcmp(ha->subnet_mask, ip_address, IP_ADDR_LEN) == 0)
+-		return status;
++		goto exit_init_online;
+ 
+ 	if (renew_ddb_list == PRESERVE_DDB_LIST) {
+ 		/*
+@@ -1177,9 +1196,10 @@
+ 			      ha->host_no));
+ 	}
+ 
+- exit_init_hba:
++exit_init_online:
++	set_bit(AF_ONLINE, &ha->flags);
++exit_init_hba:
+ 	return status;
+-
+ }
+ 
+ /**
+@@ -1193,9 +1213,10 @@
+ 					   uint32_t fw_ddb_index)
+ {
+ 	struct ddb_entry * ddb_entry;
++	uint32_t new_tgt;
+ 
+ 	/* First allocate a device structure */
+-	ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index);
++	ddb_entry = qla4xxx_get_ddb_entry(ha, fw_ddb_index, &new_tgt);
+ 	if (ddb_entry == NULL) {
+ 		DEBUG2(printk(KERN_WARNING
+ 			      "scsi%ld: Unable to allocate memory to add "
+@@ -1203,6 +1224,18 @@
+ 		return;
+ 	}
+ 
++	if (!new_tgt && (ddb_entry->fw_ddb_index != fw_ddb_index)) {
++		/* Target has been bound to a new fw_ddb_index */
++		qla4xxx_free_ddb(ha, ddb_entry);
++		ddb_entry = qla4xxx_alloc_ddb(ha, fw_ddb_index);
++		if (ddb_entry == NULL) {
++			DEBUG2(printk(KERN_WARNING
++				"scsi%ld: Unable to allocate memory"
++				" to add fw_ddb_index %d\n",
++				ha->host_no, fw_ddb_index));
++			return;
++		}
++	}
+ 	if (qla4xxx_update_ddb_entry(ha, ddb_entry, fw_ddb_index) ==
+ 				    QLA_ERROR) {
+ 		ha->fw_ddb_index_map[fw_ddb_index] =
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_iocb.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_iocb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_iocb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,10 @@
+  */
+ 
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
++
+ 
+ #include <scsi/scsi_tcq.h>
+ 
+@@ -141,11 +145,13 @@
+ 	uint16_t avail_dsds;
+ 	struct data_seg_a64 *cur_dsd;
+ 	struct scsi_cmnd *cmd;
++	struct scatterlist *sg;
++	int i;
+ 
+ 	cmd = srb->cmd;
+ 	ha = srb->ha;
+ 
+-	if (cmd->request_bufflen == 0 || cmd->sc_data_direction == DMA_NONE) {
++	if (!scsi_bufflen(cmd) || cmd->sc_data_direction == DMA_NONE) {
+ 		/* No data being transferred */
+ 		cmd_entry->ttlByteCnt = __constant_cpu_to_le32(0);
+ 		return;
+@@ -154,14 +160,7 @@
+ 	avail_dsds = COMMAND_SEG;
+ 	cur_dsd = (struct data_seg_a64 *) & (cmd_entry->dataseg[0]);
+ 
+-	/* Load data segments */
+-	if (cmd->use_sg) {
+-		struct scatterlist *cur_seg;
+-		struct scatterlist *end_seg;
+-
+-		cur_seg = (struct scatterlist *)cmd->request_buffer;
+-		end_seg = cur_seg + tot_dsds;
+-		while (cur_seg < end_seg) {
++	scsi_for_each_sg(cmd, sg, tot_dsds, i) {
+ 			dma_addr_t sle_dma;
+ 
+ 			/* Allocate additional continuation packets? */
+@@ -175,19 +174,13 @@
+ 				avail_dsds = CONTINUE_SEG;
+ 			}
+ 
+-			sle_dma = sg_dma_address(cur_seg);
++		sle_dma = sg_dma_address(sg);
+ 			cur_dsd->base.addrLow = cpu_to_le32(LSDW(sle_dma));
+ 			cur_dsd->base.addrHigh = cpu_to_le32(MSDW(sle_dma));
+-			cur_dsd->count = cpu_to_le32(sg_dma_len(cur_seg));
++		cur_dsd->count = cpu_to_le32(sg_dma_len(sg));
+ 			avail_dsds--;
+ 
+ 			cur_dsd++;
+-			cur_seg++;
+-		}
+-	} else {
+-		cur_dsd->base.addrLow = cpu_to_le32(LSDW(srb->dma_handle));
+-		cur_dsd->base.addrHigh = cpu_to_le32(MSDW(srb->dma_handle));
+-		cur_dsd->count = cpu_to_le32(cmd->request_bufflen);
+ 	}
+ }
+ 
+@@ -204,8 +197,8 @@
+ 	struct scsi_cmnd *cmd = srb->cmd;
+ 	struct ddb_entry *ddb_entry;
+ 	struct command_t3_entry *cmd_entry;
+-	struct scatterlist *sg = NULL;
+ 
++	int nseg;
+ 	uint16_t tot_dsds;
+ 	uint16_t req_cnt;
+ 
+@@ -233,24 +226,11 @@
+ 	index = (uint32_t)cmd->request->tag;
+ 
+ 	/* Calculate the number of request entries needed. */
+-	if (cmd->use_sg) {
+-		sg = (struct scatterlist *)cmd->request_buffer;
+-		tot_dsds = pci_map_sg(ha->pdev, sg, cmd->use_sg,
+-				      cmd->sc_data_direction);
+-		if (tot_dsds == 0)
++	nseg = scsi_dma_map(cmd);
++	if (nseg < 0)
+ 			goto queuing_error;
+-	} else if (cmd->request_bufflen) {
+-		dma_addr_t	req_dma;
++	tot_dsds = nseg;
+ 
+-		req_dma = pci_map_single(ha->pdev, cmd->request_buffer,
+-					 cmd->request_bufflen,
+-					 cmd->sc_data_direction);
+-		if (dma_mapping_error(req_dma))
+-			goto queuing_error;
+-
+-		srb->dma_handle = req_dma;
+-		tot_dsds = 1;
+-	}
+ 	req_cnt = qla4xxx_calc_request_entries(tot_dsds);
+ 
+ 	if (ha->req_q_count < (req_cnt + 2)) {
+@@ -279,7 +259,7 @@
+ 
+ 	int_to_scsilun(cmd->device->lun, &cmd_entry->lun);
+ 	cmd_entry->cmdSeqNum = cpu_to_le32(ddb_entry->CmdSn);
+-	cmd_entry->ttlByteCnt = cpu_to_le32(cmd->request_bufflen);
++	cmd_entry->ttlByteCnt = cpu_to_le32(scsi_bufflen(cmd));
+ 	memcpy(cmd_entry->cdb, cmd->cmnd, cmd->cmd_len);
+ 	cmd_entry->dataSegCnt = cpu_to_le16(tot_dsds);
+ 	cmd_entry->hdr.entryCount = req_cnt;
+@@ -289,13 +269,13 @@
+ 	 *	 transferred, as the data direction bit is sometimed filled
+ 	 *	 in when there is no data to be transferred */
+ 	cmd_entry->control_flags = CF_NO_DATA;
+-	if (cmd->request_bufflen) {
++	if (scsi_bufflen(cmd)) {
+ 		if (cmd->sc_data_direction == DMA_TO_DEVICE)
+ 			cmd_entry->control_flags = CF_WRITE;
+ 		else if (cmd->sc_data_direction == DMA_FROM_DEVICE)
+ 			cmd_entry->control_flags = CF_READ;
+ 
+-		ha->bytes_xfered += cmd->request_bufflen;
++		ha->bytes_xfered += scsi_bufflen(cmd);
+ 		if (ha->bytes_xfered & ~0xFFFFF){
+ 			ha->total_mbytes_xferred += ha->bytes_xfered >> 20;
+ 			ha->bytes_xfered &= 0xFFFFF;
+@@ -359,14 +339,9 @@
+ 	return QLA_SUCCESS;
+ 
+ queuing_error:
++	if (tot_dsds)
++		scsi_dma_unmap(cmd);
+ 
+-	if (cmd->use_sg && tot_dsds) {
+-		sg = (struct scatterlist *) cmd->request_buffer;
+-		pci_unmap_sg(ha->pdev, sg, cmd->use_sg,
+-			     cmd->sc_data_direction);
+-	} else if (tot_dsds)
+-		pci_unmap_single(ha->pdev, srb->dma_handle,
+-				 cmd->request_bufflen, cmd->sc_data_direction);
+ 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+ 
+ 	return QLA_ERROR;
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_isr.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_isr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_isr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,9 @@
+  */
+ 
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ /**
+  * qla2x00_process_completed_request() - Process a Fast Post response.
+@@ -92,7 +95,7 @@
+ 
+ 		if (sts_entry->iscsiFlags &
+ 		    (ISCSI_FLAG_RESIDUAL_OVER|ISCSI_FLAG_RESIDUAL_UNDER))
+-			cmd->resid = residual;
++			scsi_set_resid(cmd, residual);
+ 
+ 		cmd->result = DID_OK << 16 | scsi_status;
+ 
+@@ -176,14 +179,14 @@
+ 			 * Firmware detected a SCSI transport underrun
+ 			 * condition
+ 			 */
+-			cmd->resid = residual;
++			scsi_set_resid(cmd, residual);
+ 			DEBUG2(printk("scsi%ld:%d:%d:%d: %s: UNDERRUN status "
+ 				      "detected, xferlen = 0x%x, residual = "
+ 				      "0x%x\n",
+ 				      ha->host_no, cmd->device->channel,
+ 				      cmd->device->id,
+ 				      cmd->device->lun, __func__,
+-				      cmd->request_bufflen,
++				      scsi_bufflen(cmd),
+ 				      residual));
+ 		}
+ 
+@@ -227,7 +230,7 @@
+ 			if ((sts_entry->iscsiFlags &
+ 			     ISCSI_FLAG_RESIDUAL_UNDER) == 0) {
+ 				cmd->result = DID_BUS_BUSY << 16;
+-			} else if ((cmd->request_bufflen - residual) <
++			} else if ((scsi_bufflen(cmd) - residual) <
+ 				   cmd->underflow) {
+ 				/*
+ 				 * Handle mid-layer underflow???
+@@ -248,7 +251,7 @@
+ 					      cmd->device->channel,
+ 					      cmd->device->id,
+ 					      cmd->device->lun, __func__,
+-					      cmd->request_bufflen, residual));
++					      scsi_bufflen(cmd), residual));
+ 
+ 				cmd->result = DID_ERROR << 16;
+ 			} else {
+@@ -417,6 +420,7 @@
+ 				       uint32_t mbox_status)
+ {
+ 	int i;
++	uint32_t mbox_stat2, mbox_stat3;
+ 
+ 	if ((mbox_status == MBOX_STS_BUSY) ||
+ 	    (mbox_status == MBOX_STS_INTERMEDIATE_COMPLETION) ||
+@@ -437,6 +441,12 @@
+ 	} else if (mbox_status >> 12 == MBOX_ASYNC_EVENT_STATUS) {
+ 		/* Immediately process the AENs that don't require much work.
+ 		 * Only queue the database_changed AENs */
++		if (ha->aen_log.count < MAX_AEN_ENTRIES) {
++			for (i = 0; i < MBOX_AEN_REG_COUNT; i++)
++				ha->aen_log.entry[ha->aen_log.count].mbox_sts[i] =
++					readl(&ha->reg->mailbox[i]);
++			ha->aen_log.count++;
++		}
+ 		switch (mbox_status) {
+ 		case MBOX_ASTS_SYSTEM_ERROR:
+ 			/* Log Mailbox registers */
+@@ -493,6 +503,16 @@
+ 				      mbox_status));
+ 			break;
+ 
++		case MBOX_ASTS_IP_ADDR_STATE_CHANGED:
++			mbox_stat2 = readl(&ha->reg->mailbox[2]);
++			mbox_stat3 = readl(&ha->reg->mailbox[3]);
++
++			if ((mbox_stat3 == 5) && (mbox_stat2 == 3))
++				set_bit(DPC_GET_DHCP_IP_ADDR, &ha->dpc_flags);
++			else if ((mbox_stat3 == 2) && (mbox_stat2 == 5))
++				set_bit(DPC_RESET_HA, &ha->dpc_flags);
++			break;
++
+ 		case MBOX_ASTS_MAC_ADDRESS_CHANGED:
+ 		case MBOX_ASTS_DNS:
+ 			/* No action */
+@@ -518,11 +538,6 @@
+ 			/* Queue AEN information and process it in the DPC
+ 			 * routine */
+ 			if (ha->aen_q_count > 0) {
+-				/* advance pointer */
+-				if (ha->aen_in == (MAX_AEN_ENTRIES - 1))
+-					ha->aen_in = 0;
+-				else
+-					ha->aen_in++;
+ 
+ 				/* decrement available counter */
+ 				ha->aen_q_count--;
+@@ -542,6 +557,10 @@
+ 					      ha->aen_q[ha->aen_in].mbox_sts[2],
+ 					      ha->aen_q[ha->aen_in].mbox_sts[3],
+ 					      ha->aen_q[ha->aen_in].  mbox_sts[4]));
++				/* advance pointer */
++				ha->aen_in++;
++				if (ha->aen_in == MAX_AEN_ENTRIES)
++					ha->aen_in = 0;
+ 
+ 				/* The DPC routine will process the aen */
+ 				set_bit(DPC_AEN, &ha->dpc_flags);
+@@ -724,25 +743,24 @@
+ 
+ 	spin_lock_irqsave(&ha->hardware_lock, flags);
+ 	while (ha->aen_out != ha->aen_in) {
+-		/* Advance pointers for next entry */
+-		if (ha->aen_out == (MAX_AEN_ENTRIES - 1))
+-			ha->aen_out = 0;
+-		else
+-			ha->aen_out++;
+-
+-		ha->aen_q_count++;
+ 		aen = &ha->aen_q[ha->aen_out];
+-
+ 		/* copy aen information to local structure */
+ 		for (i = 0; i < MBOX_AEN_REG_COUNT; i++)
+ 			mbox_sts[i] = aen->mbox_sts[i];
+ 
++		ha->aen_q_count++;
++		ha->aen_out++;
++
++		if (ha->aen_out == MAX_AEN_ENTRIES)
++			ha->aen_out = 0;
++
+ 		spin_unlock_irqrestore(&ha->hardware_lock, flags);
+ 
+-		DEBUG(printk("scsi%ld: AEN[%d] %04x, index [%d] state=%04x "
+-			     "mod=%x conerr=%08x \n", ha->host_no, ha->aen_out,
+-			     mbox_sts[0], mbox_sts[2], mbox_sts[3],
+-			     mbox_sts[1], mbox_sts[4]));
++		DEBUG2(printk("qla4xxx(%ld): AEN[%d]=0x%08x, mbx1=0x%08x mbx2=0x%08x"
++			" mbx3=0x%08x mbx4=0x%08x\n", ha->host_no,
++			(ha->aen_out ? (ha->aen_out-1): (MAX_AEN_ENTRIES-1)),
++			mbox_sts[0], mbox_sts[1], mbox_sts[2],
++			mbox_sts[3], mbox_sts[4]));
+ 
+ 		switch (mbox_sts[0]) {
+ 		case MBOX_ASTS_DATABASE_CHANGED:
+@@ -792,6 +810,5 @@
+ 		spin_lock_irqsave(&ha->hardware_lock, flags);
+ 	}
+ 	spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-
+ }
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_mbx.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_mbx.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_mbx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,9 @@
+  */
+ 
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ 
+ /**
+@@ -169,84 +172,6 @@
+ 	return status;
+ }
+ 
+-
+-#if 0
+-
+-/**
+- * qla4xxx_issue_iocb - issue mailbox iocb command
+- * @ha: adapter state pointer.
+- * @buffer: buffer pointer.
+- * @phys_addr: physical address of buffer.
+- * @size: size of buffer.
+- *
+- * Issues iocbs via mailbox commands.
+- * TARGET_QUEUE_LOCK must be released.
+- * ADAPTER_STATE_LOCK must be released.
+- **/
+-int
+-qla4xxx_issue_iocb(struct scsi_qla_host * ha, void *buffer,
+-		   dma_addr_t phys_addr, size_t size)
+-{
+-	uint32_t mbox_cmd[MBOX_REG_COUNT];
+-	uint32_t mbox_sts[MBOX_REG_COUNT];
+-	int status;
+-
+-	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+-	memset(&mbox_sts, 0, sizeof(mbox_sts));
+-	mbox_cmd[0] = MBOX_CMD_EXECUTE_IOCB_A64;
+-	mbox_cmd[1] = 0;
+-	mbox_cmd[2] = LSDW(phys_addr);
+-	mbox_cmd[3] = MSDW(phys_addr);
+-	status = qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]);
+-	return status;
+-}
+-
+-int qla4xxx_conn_close_sess_logout(struct scsi_qla_host * ha,
+-				   uint16_t fw_ddb_index,
+-				   uint16_t connection_id,
+-				   uint16_t option)
+-{
+-	uint32_t mbox_cmd[MBOX_REG_COUNT];
+-	uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+-	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+-	memset(&mbox_sts, 0, sizeof(mbox_sts));
+-	mbox_cmd[0] = MBOX_CMD_CONN_CLOSE_SESS_LOGOUT;
+-	mbox_cmd[1] = fw_ddb_index;
+-	mbox_cmd[2] = connection_id;
+-	mbox_cmd[3] = LOGOUT_OPTION_RELOGIN;
+-	if (qla4xxx_mailbox_command(ha, 4, 2, &mbox_cmd[0], &mbox_sts[0]) !=
+-	    QLA_SUCCESS) {
+-		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_CONN_CLOSE_SESS_LOGOUT "
+-			      "option %04x failed sts %04X %04X",
+-			      ha->host_no, __func__,
+-			      option, mbox_sts[0], mbox_sts[1]));
+-		if (mbox_sts[0] == 0x4005)
+-			DEBUG2(printk("%s reason %04X\n", __func__,
+-				      mbox_sts[1]));
+-	}
+-	return QLA_SUCCESS;
+-}
+-
+-int qla4xxx_clear_database_entry(struct scsi_qla_host * ha,
+-				 uint16_t fw_ddb_index)
+-{
+-	uint32_t mbox_cmd[MBOX_REG_COUNT];
+-	uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+-	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+-	memset(&mbox_sts, 0, sizeof(mbox_sts));
+-	mbox_cmd[0] = MBOX_CMD_CLEAR_DATABASE_ENTRY;
+-	mbox_cmd[1] = fw_ddb_index;
+-	if (qla4xxx_mailbox_command(ha, 2, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+-	    QLA_SUCCESS)
+-		return QLA_ERROR;
+-
+-	return QLA_SUCCESS;
+-}
+-
+-#endif  /*  0  */
+-
+ /**
+  * qla4xxx_initialize_fw_cb - initializes firmware control block.
+  * @ha: Pointer to host adapter structure.
+@@ -272,10 +197,13 @@
+ 	/* Get Initialize Firmware Control Block. */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
+ 	mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ 	mbox_cmd[3] = MSDW(init_fw_cb_dma);
+-	if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++	mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		dma_free_coherent(&ha->pdev->dev,
+ 				  sizeof(struct init_fw_ctrl_blk),
+@@ -287,51 +215,56 @@
+ 	qla4xxx_init_rings(ha);
+ 
+ 	/* Fill in the request and response queue information. */
+-	init_fw_cb->ReqQConsumerIndex = cpu_to_le16(ha->request_out);
+-	init_fw_cb->ComplQProducerIndex = cpu_to_le16(ha->response_in);
+-	init_fw_cb->ReqQLen = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
+-	init_fw_cb->ComplQLen = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
+-	init_fw_cb->ReqQAddrLo = cpu_to_le32(LSDW(ha->request_dma));
+-	init_fw_cb->ReqQAddrHi = cpu_to_le32(MSDW(ha->request_dma));
+-	init_fw_cb->ComplQAddrLo = cpu_to_le32(LSDW(ha->response_dma));
+-	init_fw_cb->ComplQAddrHi = cpu_to_le32(MSDW(ha->response_dma));
+-	init_fw_cb->ShadowRegBufAddrLo =
++	init_fw_cb->pri.rqq_consumer_idx = cpu_to_le16(ha->request_out);
++	init_fw_cb->pri.compq_producer_idx = cpu_to_le16(ha->response_in);
++	init_fw_cb->pri.rqq_len = __constant_cpu_to_le16(REQUEST_QUEUE_DEPTH);
++	init_fw_cb->pri.compq_len = __constant_cpu_to_le16(RESPONSE_QUEUE_DEPTH);
++	init_fw_cb->pri.rqq_addr_lo = cpu_to_le32(LSDW(ha->request_dma));
++	init_fw_cb->pri.rqq_addr_hi = cpu_to_le32(MSDW(ha->request_dma));
++	init_fw_cb->pri.compq_addr_lo = cpu_to_le32(LSDW(ha->response_dma));
++	init_fw_cb->pri.compq_addr_hi = cpu_to_le32(MSDW(ha->response_dma));
++	init_fw_cb->pri.shdwreg_addr_lo =
+ 		cpu_to_le32(LSDW(ha->shadow_regs_dma));
+-	init_fw_cb->ShadowRegBufAddrHi =
++	init_fw_cb->pri.shdwreg_addr_hi =
+ 		cpu_to_le32(MSDW(ha->shadow_regs_dma));
+ 
+ 	/* Set up required options. */
+-	init_fw_cb->FwOptions |=
++	init_fw_cb->pri.fw_options |=
+ 		__constant_cpu_to_le16(FWOPT_SESSION_MODE |
+ 				       FWOPT_INITIATOR_MODE);
+-	init_fw_cb->FwOptions &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
++	init_fw_cb->pri.fw_options &= __constant_cpu_to_le16(~FWOPT_TARGET_MODE);
+ 
+ 	/* Save some info in adapter structure. */
+-	ha->firmware_options = le16_to_cpu(init_fw_cb->FwOptions);
+-	ha->tcp_options = le16_to_cpu(init_fw_cb->TCPOptions);
+-	ha->heartbeat_interval = init_fw_cb->HeartbeatInterval;
+-	memcpy(ha->ip_address, init_fw_cb->IPAddr,
+-	       min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr)));
+-	memcpy(ha->subnet_mask, init_fw_cb->SubnetMask,
+-	       min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask)));
+-	memcpy(ha->gateway, init_fw_cb->GatewayIPAddr,
+-	       min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr)));
+-	memcpy(ha->name_string, init_fw_cb->iSCSINameString,
++	ha->firmware_options = le16_to_cpu(init_fw_cb->pri.fw_options);
++	ha->tcp_options = le16_to_cpu(init_fw_cb->pri.ipv4_tcp_opts);
++	ha->heartbeat_interval = init_fw_cb->pri.hb_interval;
++	memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr,
++	       min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr)));
++	memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet,
++	       min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet)));
++	memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr,
++	       min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr)));
++	memcpy(ha->name_string, init_fw_cb->pri.iscsi_name,
+ 	       min(sizeof(ha->name_string),
+-		   sizeof(init_fw_cb->iSCSINameString)));
+-	memcpy(ha->alias, init_fw_cb->Alias,
+-	       min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));
++		   sizeof(init_fw_cb->pri.iscsi_name)));
++	/*memcpy(ha->alias, init_fw_cb->Alias,
++	       min(sizeof(ha->alias), sizeof(init_fw_cb->Alias)));*/
+ 
+ 	/* Save Command Line Paramater info */
+-	ha->port_down_retry_count = le16_to_cpu(init_fw_cb->KeepAliveTimeout);
++	ha->port_down_retry_count = le16_to_cpu(init_fw_cb->pri.conn_ka_timeout);
+ 	ha->discovery_wait = ql4xdiscoverywait;
+ 
+ 	/* Send Initialize Firmware Control Block. */
++	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_INITIALIZE_FIRMWARE;
+ 	mbox_cmd[1] = 0;
+ 	mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ 	mbox_cmd[3] = MSDW(init_fw_cb_dma);
+-	if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) ==
++	mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) ==
+ 	    QLA_SUCCESS)
+ 		status = QLA_SUCCESS;
+ 	 else {
+@@ -368,12 +301,14 @@
+ 	/* Get Initialize Firmware Control Block. */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	memset(init_fw_cb, 0, sizeof(struct init_fw_ctrl_blk));
+ 	mbox_cmd[0] = MBOX_CMD_GET_INIT_FW_CTRL_BLOCK;
+ 	mbox_cmd[2] = LSDW(init_fw_cb_dma);
+ 	mbox_cmd[3] = MSDW(init_fw_cb_dma);
++	mbox_cmd[4] = sizeof(struct init_fw_ctrl_blk);
+ 
+-	if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: Failed to get init_fw_ctrl_blk\n",
+ 			      ha->host_no, __func__));
+@@ -384,12 +319,12 @@
+ 	}
+ 
+ 	/* Save IP Address. */
+-	memcpy(ha->ip_address, init_fw_cb->IPAddr,
+-	       min(sizeof(ha->ip_address), sizeof(init_fw_cb->IPAddr)));
+-	memcpy(ha->subnet_mask, init_fw_cb->SubnetMask,
+-	       min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->SubnetMask)));
+-	memcpy(ha->gateway, init_fw_cb->GatewayIPAddr,
+-	       min(sizeof(ha->gateway), sizeof(init_fw_cb->GatewayIPAddr)));
++	memcpy(ha->ip_address, init_fw_cb->pri.ipv4_addr,
++	       min(sizeof(ha->ip_address), sizeof(init_fw_cb->pri.ipv4_addr)));
++	memcpy(ha->subnet_mask, init_fw_cb->pri.ipv4_subnet,
++	       min(sizeof(ha->subnet_mask), sizeof(init_fw_cb->pri.ipv4_subnet)));
++	memcpy(ha->gateway, init_fw_cb->pri.ipv4_gw_addr,
++	       min(sizeof(ha->gateway), sizeof(init_fw_cb->pri.ipv4_gw_addr)));
+ 
+ 	dma_free_coherent(&ha->pdev->dev, sizeof(struct init_fw_ctrl_blk),
+ 			  init_fw_cb, init_fw_cb_dma);
+@@ -409,8 +344,10 @@
+ 	/* Get firmware version */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_FW_STATE;
+-	if (qla4xxx_mailbox_command(ha, 1, 4, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 4, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATE failed w/ "
+ 			      "status %04X\n", ha->host_no, __func__,
+@@ -438,8 +375,10 @@
+ 	/* Get firmware version */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_FW_STATUS;
+-	if (qla4xxx_mailbox_command(ha, 1, 3, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_FW_STATUS failed w/ "
+ 			      "status %04X\n", ha->host_no, __func__,
+@@ -491,11 +430,14 @@
+ 	}
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_DATABASE_ENTRY;
+ 	mbox_cmd[1] = (uint32_t) fw_ddb_index;
+ 	mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
+ 	mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
+-	if (qla4xxx_mailbox_command(ha, 4, 7, &mbox_cmd[0], &mbox_sts[0]) ==
++	mbox_cmd[4] = sizeof(struct dev_db_entry);
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 7, &mbox_cmd[0], &mbox_sts[0]) ==
+ 	    QLA_ERROR) {
+ 		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_GET_DATABASE_ENTRY failed"
+ 			      " with status 0x%04X\n", ha->host_no, __func__,
+@@ -512,11 +454,11 @@
+ 		dev_info(&ha->pdev->dev, "DDB[%d] MB0 %04x Tot %d Next %d "
+ 			   "State %04x ConnErr %08x %d.%d.%d.%d:%04d \"%s\"\n",
+ 			   fw_ddb_index, mbox_sts[0], mbox_sts[2], mbox_sts[3],
+-			   mbox_sts[4], mbox_sts[5], fw_ddb_entry->ipAddr[0],
+-			   fw_ddb_entry->ipAddr[1], fw_ddb_entry->ipAddr[2],
+-			   fw_ddb_entry->ipAddr[3],
+-			   le16_to_cpu(fw_ddb_entry->portNumber),
+-			   fw_ddb_entry->iscsiName);
++			   mbox_sts[4], mbox_sts[5], fw_ddb_entry->ip_addr[0],
++			   fw_ddb_entry->ip_addr[1], fw_ddb_entry->ip_addr[2],
++			   fw_ddb_entry->ip_addr[3],
++			   le16_to_cpu(fw_ddb_entry->port),
++			   fw_ddb_entry->iscsi_name);
+ 	}
+ 	if (num_valid_ddb_entries)
+ 		*num_valid_ddb_entries = mbox_sts[2];
+@@ -571,35 +513,10 @@
+ 	mbox_cmd[1] = (uint32_t) fw_ddb_index;
+ 	mbox_cmd[2] = LSDW(fw_ddb_entry_dma);
+ 	mbox_cmd[3] = MSDW(fw_ddb_entry_dma);
+-	return qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]);
+-}
+-
+-#if 0
+-int qla4xxx_conn_open_session_login(struct scsi_qla_host * ha,
+-				    uint16_t fw_ddb_index)
+-{
+-	int status = QLA_ERROR;
+-	uint32_t mbox_cmd[MBOX_REG_COUNT];
+-	uint32_t mbox_sts[MBOX_REG_COUNT];
+-
+-	/* Do not wait for completion. The firmware will send us an
+-	 * ASTS_DATABASE_CHANGED (0x8014) to notify us of the login status.
+-	 */
+-	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+-	memset(&mbox_sts, 0, sizeof(mbox_sts));
+-	mbox_cmd[0] = MBOX_CMD_CONN_OPEN_SESS_LOGIN;
+-	mbox_cmd[1] = (uint32_t) fw_ddb_index;
+-	mbox_cmd[2] = 0;
+-	mbox_cmd[3] = 0;
+-	mbox_cmd[4] = 0;
+-	status = qla4xxx_mailbox_command(ha, 4, 0, &mbox_cmd[0], &mbox_sts[0]);
+-	DEBUG2(printk("%s fw_ddb_index=%d status=%d mbx0_1=0x%x :0x%x\n",
+-		      __func__, fw_ddb_index, status, mbox_sts[0],
+-		      mbox_sts[1]);)
++	mbox_cmd[4] = sizeof(struct dev_db_entry);
+ 
+-		return status;
++	return qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]);
+ }
+-#endif  /*  0  */
+ 
+ /**
+  * qla4xxx_get_crash_record - retrieves crash record.
+@@ -614,12 +531,14 @@
+ 	struct crash_record *crash_record = NULL;
+ 	dma_addr_t crash_record_dma = 0;
+ 	uint32_t crash_record_size = 0;
++
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_cmd));
+ 
+ 	/* Get size of crash record. */
+ 	mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
+-	if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve size!\n",
+ 			      ha->host_no, __func__));
+@@ -639,11 +558,15 @@
+ 		goto exit_get_crash_record;
+ 
+ 	/* Get Crash Record. */
++	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++	memset(&mbox_sts, 0, sizeof(mbox_cmd));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_CRASH_RECORD;
+ 	mbox_cmd[2] = LSDW(crash_record_dma);
+ 	mbox_cmd[3] = MSDW(crash_record_dma);
+ 	mbox_cmd[4] = crash_record_size;
+-	if (qla4xxx_mailbox_command(ha, 5, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS)
+ 		goto exit_get_crash_record;
+ 
+@@ -655,7 +578,6 @@
+ 				  crash_record, crash_record_dma);
+ }
+ 
+-#if 0
+ /**
+  * qla4xxx_get_conn_event_log - retrieves connection event log
+  * @ha: Pointer to host adapter structure.
+@@ -678,7 +600,8 @@
+ 
+ 	/* Get size of crash record. */
+ 	mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG;
+-	if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS)
+ 		goto exit_get_event_log;
+ 
+@@ -693,10 +616,14 @@
+ 		goto exit_get_event_log;
+ 
+ 	/* Get Crash Record. */
++	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
++	memset(&mbox_sts, 0, sizeof(mbox_cmd));
++
+ 	mbox_cmd[0] = MBOX_CMD_GET_CONN_EVENT_LOG;
+ 	mbox_cmd[2] = LSDW(event_log_dma);
+ 	mbox_cmd[3] = MSDW(event_log_dma);
+-	if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: ERROR: Unable to retrieve event "
+ 			      "log!\n", ha->host_no, __func__));
+@@ -745,7 +672,6 @@
+ 		dma_free_coherent(&ha->pdev->dev, event_log_size, event_log,
+ 				  event_log_dma);
+ }
+-#endif  /*  0  */
+ 
+ /**
+  * qla4xxx_reset_lun - issues LUN Reset
+@@ -773,11 +699,13 @@
+ 	 */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_LUN_RESET;
+ 	mbox_cmd[1] = ddb_entry->fw_ddb_index;
+ 	mbox_cmd[2] = lun << 8;
+ 	mbox_cmd[5] = 0x01;	/* Immediate Command Enable */
+-	qla4xxx_mailbox_command(ha, 6, 1, &mbox_cmd[0], &mbox_sts[0]);
++
++	qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]);
+ 	if (mbox_sts[0] != MBOX_STS_COMMAND_COMPLETE &&
+ 	    mbox_sts[0] != MBOX_STS_COMMAND_ERROR)
+ 		status = QLA_ERROR;
+@@ -794,12 +722,14 @@
+ 
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_READ_FLASH;
+ 	mbox_cmd[1] = LSDW(dma_addr);
+ 	mbox_cmd[2] = MSDW(dma_addr);
+ 	mbox_cmd[3] = offset;
+ 	mbox_cmd[4] = len;
+-	if (qla4xxx_mailbox_command(ha, 5, 2, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 2, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_READ_FLASH, failed w/ "
+ 		    "status %04X %04X, offset %08x, len %08x\n", ha->host_no,
+@@ -825,8 +755,10 @@
+ 	/* Get firmware version. */
+ 	memset(&mbox_cmd, 0, sizeof(mbox_cmd));
+ 	memset(&mbox_sts, 0, sizeof(mbox_sts));
++
+ 	mbox_cmd[0] = MBOX_CMD_ABOUT_FW;
+-	if (qla4xxx_mailbox_command(ha, 4, 5, &mbox_cmd[0], &mbox_sts[0]) !=
++
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 5, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: MBOX_CMD_ABOUT_FW failed w/ "
+ 		    "status %04X\n", ha->host_no, __func__, mbox_sts[0]));
+@@ -855,7 +787,7 @@
+ 	mbox_cmd[2] = LSDW(dma_addr);
+ 	mbox_cmd[3] = MSDW(dma_addr);
+ 
+-	if (qla4xxx_mailbox_command(ha, 4, 1, &mbox_cmd[0], &mbox_sts[0]) !=
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 1, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		DEBUG2(printk("scsi%ld: %s: failed status %04X\n",
+ 		     ha->host_no, __func__, mbox_sts[0]));
+@@ -875,7 +807,7 @@
+ 	mbox_cmd[0] = MBOX_CMD_REQUEST_DATABASE_ENTRY;
+ 	mbox_cmd[1] = MAX_PRST_DEV_DB_ENTRIES;
+ 
+-	if (qla4xxx_mailbox_command(ha, 2, 3, &mbox_cmd[0], &mbox_sts[0]) !=
++	if (qla4xxx_mailbox_command(ha, MBOX_REG_COUNT, 3, &mbox_cmd[0], &mbox_sts[0]) !=
+ 	    QLA_SUCCESS) {
+ 		if (mbox_sts[0] == MBOX_STS_COMMAND_ERROR) {
+ 			*ddb_index = mbox_sts[2];
+@@ -918,23 +850,23 @@
+ 	if (ret_val != QLA_SUCCESS)
+ 		goto qla4xxx_send_tgts_exit;
+ 
+-	memset((void *)fw_ddb_entry->iSCSIAlias, 0,
+-	       sizeof(fw_ddb_entry->iSCSIAlias));
++	memset(fw_ddb_entry->iscsi_alias, 0,
++	       sizeof(fw_ddb_entry->iscsi_alias));
+ 
+-	memset((void *)fw_ddb_entry->iscsiName, 0,
+-	       sizeof(fw_ddb_entry->iscsiName));
++	memset(fw_ddb_entry->iscsi_name, 0,
++	       sizeof(fw_ddb_entry->iscsi_name));
+ 
+-	memset((void *)fw_ddb_entry->ipAddr, 0, sizeof(fw_ddb_entry->ipAddr));
+-	memset((void *)fw_ddb_entry->targetAddr, 0,
+-	       sizeof(fw_ddb_entry->targetAddr));
++	memset(fw_ddb_entry->ip_addr, 0, sizeof(fw_ddb_entry->ip_addr));
++	memset(fw_ddb_entry->tgt_addr, 0,
++	       sizeof(fw_ddb_entry->tgt_addr));
+ 
+ 	fw_ddb_entry->options = (DDB_OPT_DISC_SESSION | DDB_OPT_TARGET);
+-	fw_ddb_entry->portNumber = cpu_to_le16(ntohs(port));
++	fw_ddb_entry->port = cpu_to_le16(ntohs(port));
+ 
+-	fw_ddb_entry->ipAddr[0] = *ip;
+-	fw_ddb_entry->ipAddr[1] = *(ip + 1);
+-	fw_ddb_entry->ipAddr[2] = *(ip + 2);
+-	fw_ddb_entry->ipAddr[3] = *(ip + 3);
++	fw_ddb_entry->ip_addr[0] = *ip;
++	fw_ddb_entry->ip_addr[1] = *(ip + 1);
++	fw_ddb_entry->ip_addr[2] = *(ip + 2);
++	fw_ddb_entry->ip_addr[3] = *(ip + 3);
+ 
+ 	ret_val = qla4xxx_set_ddb_entry(ha, ddb_index, fw_ddb_entry_dma);
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_nvram.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_nvram.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_nvram.c	2007-12-19 15:29:23.000000000 -0500
+@@ -6,6 +6,9 @@
+  */
+ 
+ #include "ql4_def.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ static inline void eeprom_cmd(uint32_t cmd, struct scsi_qla_host *ha)
+ {
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_os.c
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_os.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_os.c	2007-12-19 15:29:23.000000000 -0500
+@@ -10,6 +10,10 @@
+ #include <scsi/scsicam.h>
+ 
+ #include "ql4_def.h"
++#include "ql4_version.h"
++#include "ql4_glbl.h"
++#include "ql4_dbg.h"
++#include "ql4_inline.h"
+ 
+ /*
+  * Driver version
+@@ -50,12 +54,15 @@
+ /*
+  * iSCSI template entry points
+  */
+-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no,
+-			     uint32_t enable, struct sockaddr *dst_addr);
++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost,
++			     enum iscsi_tgt_dscvr type, uint32_t enable,
++			     struct sockaddr *dst_addr);
+ static int qla4xxx_conn_get_param(struct iscsi_cls_conn *conn,
+ 				  enum iscsi_param param, char *buf);
+ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess,
+ 				  enum iscsi_param param, char *buf);
++static int qla4xxx_host_get_param(struct Scsi_Host *shost,
++				  enum iscsi_host_param param, char *buf);
+ static void qla4xxx_conn_stop(struct iscsi_cls_conn *conn, int flag);
+ static int qla4xxx_conn_start(struct iscsi_cls_conn *conn);
+ static void qla4xxx_recovery_timedout(struct iscsi_cls_session *session);
+@@ -95,16 +102,20 @@
+ static struct iscsi_transport qla4xxx_iscsi_transport = {
+ 	.owner			= THIS_MODULE,
+ 	.name			= DRIVER_NAME,
+-	.param_mask		= ISCSI_CONN_PORT |
+-				  ISCSI_CONN_ADDRESS |
+-				  ISCSI_TARGET_NAME |
+-				  ISCSI_TPGT,
++	.caps			= CAP_FW_DB | CAP_SENDTARGETS_OFFLOAD |
++				  CAP_DATA_PATH_OFFLOAD,
++	.param_mask		= ISCSI_CONN_PORT | ISCSI_CONN_ADDRESS |
++				  ISCSI_TARGET_NAME | ISCSI_TPGT,
++	.host_param_mask	= ISCSI_HOST_HWADDRESS |
++				  ISCSI_HOST_IPADDRESS |
++				  ISCSI_HOST_INITIATOR_NAME,
+ 	.sessiondata_size	= sizeof(struct ddb_entry),
+ 	.host_template		= &qla4xxx_driver_template,
+ 
+ 	.tgt_dscvr		= qla4xxx_tgt_dscvr,
+ 	.get_conn_param		= qla4xxx_conn_get_param,
+ 	.get_session_param	= qla4xxx_sess_get_param,
++	.get_host_param		= qla4xxx_host_get_param,
+ 	.start_conn		= qla4xxx_conn_start,
+ 	.stop_conn		= qla4xxx_conn_stop,
+ 	.session_recovery_timedout = qla4xxx_recovery_timedout,
+@@ -161,6 +172,43 @@
+ 		printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag);
+ }
+ 
++static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
++{
++	int i;
++	char *cp = buf;
++
++	for (i = 0; i < len; i++)
++		cp += sprintf(cp, "%02x%c", addr[i],
++			      i == (len - 1) ? '\n' : ':');
++	return cp - buf;
++}
++
++
++static int qla4xxx_host_get_param(struct Scsi_Host *shost,
++				  enum iscsi_host_param param, char *buf)
++{
++	struct scsi_qla_host *ha = to_qla_host(shost);
++	int len;
++
++	switch (param) {
++	case ISCSI_HOST_PARAM_HWADDRESS:
++		len = format_addr(buf, ha->my_mac, MAC_ADDR_LEN);
++		break;
++	case ISCSI_HOST_PARAM_IPADDRESS:
++		len = sprintf(buf, "%d.%d.%d.%d\n", ha->ip_address[0],
++			      ha->ip_address[1], ha->ip_address[2],
++			      ha->ip_address[3]);
++		break;
++	case ISCSI_HOST_PARAM_INITIATOR_NAME:
++		len = sprintf(buf, "%s\n", ha->name_string);
++		break;
++	default:
++		return -ENOSYS;
++	}
++
++	return len;
++}
++
+ static int qla4xxx_sess_get_param(struct iscsi_cls_session *sess,
+ 				  enum iscsi_param param, char *buf)
+ {
+@@ -208,21 +256,15 @@
+ 	return len;
+ }
+ 
+-static int qla4xxx_tgt_dscvr(enum iscsi_tgt_dscvr type, uint32_t host_no,
+-			     uint32_t enable, struct sockaddr *dst_addr)
++static int qla4xxx_tgt_dscvr(struct Scsi_Host *shost,
++			     enum iscsi_tgt_dscvr type, uint32_t enable,
++			     struct sockaddr *dst_addr)
+ {
+ 	struct scsi_qla_host *ha;
+-	struct Scsi_Host *shost;
+ 	struct sockaddr_in *addr;
+ 	struct sockaddr_in6 *addr6;
+ 	int ret = 0;
+ 
+-	shost = scsi_host_lookup(host_no);
+-	if (IS_ERR(shost)) {
+-		printk(KERN_ERR "Could not find host no %u\n", host_no);
+-		return -ENODEV;
+-	}
+-
+ 	ha = (struct scsi_qla_host *) shost->hostdata;
+ 
+ 	switch (type) {
+@@ -246,8 +288,6 @@
+ 	default:
+ 		ret = -ENOSYS;
+ 	}
+-
+-	scsi_host_put(shost);
+ 	return ret;
+ }
+ 
+@@ -369,14 +409,7 @@
+ 	struct scsi_cmnd *cmd = srb->cmd;
+ 
+ 	if (srb->flags & SRB_DMA_VALID) {
+-		if (cmd->use_sg) {
+-			pci_unmap_sg(ha->pdev, cmd->request_buffer,
+-				     cmd->use_sg, cmd->sc_data_direction);
+-		} else if (cmd->request_bufflen) {
+-			pci_unmap_single(ha->pdev, srb->dma_handle,
+-					 cmd->request_bufflen,
+-					 cmd->sc_data_direction);
+-		}
++		scsi_dma_unmap(cmd);
+ 		srb->flags &= ~SRB_DMA_VALID;
+ 	}
+ 	cmd->SCp.ptr = NULL;
+@@ -711,7 +744,7 @@
+ 	return stat;
+ }
+ 
+-static void qla4xxx_hw_reset(struct scsi_qla_host *ha)
++void qla4xxx_hw_reset(struct scsi_qla_host *ha)
+ {
+ 	uint32_t ctrl_status;
+ 	unsigned long flags = 0;
+@@ -1081,13 +1114,13 @@
+ 	if (ha->timer_active)
+ 		qla4xxx_stop_timer(ha);
+ 
+-	/* free extra memory */
+-	qla4xxx_mem_free(ha);
+-
+ 	/* Detach interrupts */
+ 	if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags))
+ 		free_irq(ha->pdev->irq, ha);
+ 
++	/* free extra memory */
++	qla4xxx_mem_free(ha);
++
+ 	pci_disable_device(ha->pdev);
+ 
+ }
+@@ -1332,6 +1365,11 @@
+ 
+ 	ha = pci_get_drvdata(pdev);
+ 
++	qla4xxx_disable_intrs(ha);
++
++	while (test_bit(DPC_RESET_HA_INTR, &ha->dpc_flags))
++		ssleep(1);
++
+ 	/* remove devs from iscsi_sessions to scsi_devices */
+ 	qla4xxx_free_ddb_list(ha);
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_version.h
+--- linux-2.6.22-570/drivers/scsi/qla4xxx/ql4_version.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qla4xxx/ql4_version.h	2007-12-19 15:29:23.000000000 -0500
+@@ -5,4 +5,5 @@
+  * See LICENSE.qla4xxx for copyright and licensing details.
+  */
+ 
+-#define QLA4XXX_DRIVER_VERSION	"5.00.07-k1"
++#define QLA4XXX_DRIVER_VERSION	"5.01.00-k7"
++
+diff -Nurb linux-2.6.22-570/drivers/scsi/qlogicfas408.c linux-2.6.22-try2/drivers/scsi/qlogicfas408.c
+--- linux-2.6.22-570/drivers/scsi/qlogicfas408.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/qlogicfas408.c	2007-12-19 15:29:23.000000000 -0500
+@@ -265,8 +265,6 @@
+ 	unsigned int message;	/* scsi returned message */
+ 	unsigned int phase;	/* recorded scsi phase */
+ 	unsigned int reqlen;	/* total length of transfer */
+-	struct scatterlist *sglist;	/* scatter-gather list pointer */
+-	unsigned int sgcount;	/* sg counter */
+ 	char *buf;
+ 	struct qlogicfas408_priv *priv = get_priv_by_cmd(cmd);
+ 	int qbase = priv->qbase;
+@@ -301,9 +299,10 @@
+ 	if (inb(qbase + 7) & 0x1f)	/* if some bytes in fifo */
+ 		outb(1, qbase + 3);	/* clear fifo */
+ 	/* note that request_bufflen is the total xfer size when sg is used */
+-	reqlen = cmd->request_bufflen;
++	reqlen = scsi_bufflen(cmd);
+ 	/* note that it won't work if transfers > 16M are requested */
+ 	if (reqlen && !((phase = inb(qbase + 4)) & 6)) {	/* data phase */
++		struct scatterlist *sg;
+ 		rtrc(2)
+ 		outb(reqlen, qbase);	/* low-mid xfer cnt */
+ 		outb(reqlen >> 8, qbase + 1);	/* low-mid xfer cnt */
+@@ -311,23 +310,16 @@
+ 		outb(0x90, qbase + 3);	/* command do xfer */
+ 		/* PIO pseudo DMA to buffer or sglist */
+ 		REG1;
+-		if (!cmd->use_sg)
+-			ql_pdma(priv, phase, cmd->request_buffer,
+-				cmd->request_bufflen);
+-		else {
+-			sgcount = cmd->use_sg;
+-			sglist = cmd->request_buffer;
+-			while (sgcount--) {
++
++		scsi_for_each_sg(cmd, sg, scsi_sg_count(cmd), i) {
+ 				if (priv->qabort) {
+ 					REG0;
+ 					return ((priv->qabort == 1 ?
+ 						DID_ABORT : DID_RESET) << 16);
+ 				}
+-				buf = page_address(sglist->page) + sglist->offset;
+-				if (ql_pdma(priv, phase, buf, sglist->length))
++			buf = page_address(sg->page) + sg->offset;
++			if (ql_pdma(priv, phase, buf, sg->length))
+ 					break;
+-				sglist++;
+-			}
+ 		}
+ 		REG0;
+ 		rtrc(2)
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_debug.c linux-2.6.22-try2/drivers/scsi/scsi_debug.c
+--- linux-2.6.22-570/drivers/scsi/scsi_debug.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_debug.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2405,7 +2405,7 @@
+ MODULE_PARM_DESC(delay, "# of jiffies to delay response(def=1)");
+ MODULE_PARM_DESC(dev_size_mb, "size in MB of ram shared by devs(def=8)");
+ MODULE_PARM_DESC(dsense, "use descriptor sense format(def=0 -> fixed)");
+-MODULE_PARM_DESC(every_nth, "timeout every nth command(def=100)");
++MODULE_PARM_DESC(every_nth, "timeout every nth command(def=0)");
+ MODULE_PARM_DESC(fake_rw, "fake reads/writes instead of copying (def=0)");
+ MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
+ MODULE_PARM_DESC(no_lun_0, "no LU number 0 (def=0 -> have lun 0)");
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_error.c linux-2.6.22-try2/drivers/scsi/scsi_error.c
+--- linux-2.6.22-570/drivers/scsi/scsi_error.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_error.c	2007-12-19 15:29:24.000000000 -0500
+@@ -18,12 +18,13 @@
+ #include <linux/sched.h>
+ #include <linux/timer.h>
+ #include <linux/string.h>
+-#include <linux/slab.h>
+ #include <linux/kernel.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/interrupt.h>
+ #include <linux/blkdev.h>
+ #include <linux/delay.h>
++#include <linux/scatterlist.h>
+ 
+ #include <scsi/scsi.h>
+ #include <scsi/scsi_cmnd.h>
+@@ -640,16 +641,8 @@
+ 	memcpy(scmd->cmnd, cmnd, cmnd_size);
+ 
+ 	if (copy_sense) {
+-		gfp_t gfp_mask = GFP_ATOMIC;
+-
+-		if (shost->hostt->unchecked_isa_dma)
+-			gfp_mask |= __GFP_DMA;
+-
+-		sgl.page = alloc_page(gfp_mask);
+-		if (!sgl.page)
+-			return FAILED;
+-		sgl.offset = 0;
+-		sgl.length = 252;
++		sg_init_one(&sgl, scmd->sense_buffer,
++			    sizeof(scmd->sense_buffer));
+ 
+ 		scmd->sc_data_direction = DMA_FROM_DEVICE;
+ 		scmd->request_bufflen = sgl.length;
+@@ -720,18 +713,6 @@
+ 
+ 
+ 	/*
+-	 * Last chance to have valid sense data.
+-	 */
+-	if (copy_sense) {
+-		if (!SCSI_SENSE_VALID(scmd)) {
+-			memcpy(scmd->sense_buffer, page_address(sgl.page),
+-			       sizeof(scmd->sense_buffer));
+-		}
+-		__free_page(sgl.page);
+-	}
+-
+-
+-	/*
+ 	 * Restore original data
+ 	 */
+ 	scmd->request_buffer = old_buffer;
+@@ -1536,8 +1517,6 @@
+ {
+ 	struct Scsi_Host *shost = data;
+ 
+-	current->flags |= PF_NOFREEZE;
+-
+ 	/*
+ 	 * We use TASK_INTERRUPTIBLE so that the thread is not
+ 	 * counted against the load average as a running process.
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_lib.c linux-2.6.22-try2/drivers/scsi/scsi_lib.c
+--- linux-2.6.22-570/drivers/scsi/scsi_lib.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_lib.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2290,3 +2290,41 @@
+ 	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+ }
+ EXPORT_SYMBOL(scsi_kunmap_atomic_sg);
++
++/**
++ * scsi_dma_map - perform DMA mapping against command's sg lists
++ * @cmd:	scsi command
++ *
++ * Returns the number of sg lists actually used, zero if the sg lists
++ * is NULL, or -ENOMEM if the mapping failed.
++ */
++int scsi_dma_map(struct scsi_cmnd *cmd)
++{
++	int nseg = 0;
++
++	if (scsi_sg_count(cmd)) {
++		struct device *dev = cmd->device->host->shost_gendev.parent;
++
++		nseg = dma_map_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
++				  cmd->sc_data_direction);
++		if (unlikely(!nseg))
++			return -ENOMEM;
++	}
++	return nseg;
++}
++EXPORT_SYMBOL(scsi_dma_map);
++
++/**
++ * scsi_dma_unmap - unmap command's sg lists mapped by scsi_dma_map
++ * @cmd:	scsi command
++ */
++void scsi_dma_unmap(struct scsi_cmnd *cmd)
++{
++	if (scsi_sg_count(cmd)) {
++		struct device *dev = cmd->device->host->shost_gendev.parent;
++
++		dma_unmap_sg(dev, scsi_sglist(cmd), scsi_sg_count(cmd),
++			     cmd->sc_data_direction);
++	}
++}
++EXPORT_SYMBOL(scsi_dma_unmap);
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_scan.c linux-2.6.22-try2/drivers/scsi/scsi_scan.c
+--- linux-2.6.22-570/drivers/scsi/scsi_scan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_scan.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1213,7 +1213,7 @@
+  *     Given a struct scsi_lun of: 0a 04 0b 03 00 00 00 00, this function returns
+  *     the integer: 0x0b030a04
+  **/
+-static int scsilun_to_int(struct scsi_lun *scsilun)
++int scsilun_to_int(struct scsi_lun *scsilun)
+ {
+ 	int i;
+ 	unsigned int lun;
+@@ -1224,6 +1224,7 @@
+ 			      scsilun->scsi_lun[i + 1]) << (i * 8));
+ 	return lun;
+ }
++EXPORT_SYMBOL(scsilun_to_int);
+ 
+ /**
+  * int_to_scsilun: reverts an int into a scsi_lun
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_sysfs.c linux-2.6.22-try2/drivers/scsi/scsi_sysfs.c
+--- linux-2.6.22-570/drivers/scsi/scsi_sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_sysfs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -293,30 +293,18 @@
+ {
+ 	struct device_driver *drv = dev->driver;
+ 	struct scsi_device *sdev = to_scsi_device(dev);
+-	struct scsi_host_template *sht = sdev->host->hostt;
+ 	int err;
+ 
+ 	err = scsi_device_quiesce(sdev);
+ 	if (err)
+ 		return err;
+ 
+-	/* call HLD suspend first */
+ 	if (drv && drv->suspend) {
+ 		err = drv->suspend(dev, state);
+ 		if (err)
+ 			return err;
+ 	}
+ 
+-	/* then, call host suspend */
+-	if (sht->suspend) {
+-		err = sht->suspend(sdev, state);
+-		if (err) {
+-			if (drv && drv->resume)
+-				drv->resume(dev);
+-			return err;
+-		}
+-	}
+-
+ 	return 0;
+ }
+ 
+@@ -324,21 +312,14 @@
+ {
+ 	struct device_driver *drv = dev->driver;
+ 	struct scsi_device *sdev = to_scsi_device(dev);
+-	struct scsi_host_template *sht = sdev->host->hostt;
+-	int err = 0, err2 = 0;
++	int err = 0;
+ 
+-	/* call host resume first */
+-	if (sht->resume)
+-		err = sht->resume(sdev);
+-
+-	/* then, call HLD resume */
+ 	if (drv && drv->resume)
+-		err2 = drv->resume(dev);
++		err = drv->resume(dev);
+ 
+ 	scsi_device_resume(sdev);
+ 
+-	/* favor LLD failure */
+-	return err ? err : err2;;
++	return err;
+ }
+ 
+ struct bus_type scsi_bus_type = {
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c linux-2.6.22-try2/drivers/scsi/scsi_transport_fc.c
+--- linux-2.6.22-570/drivers/scsi/scsi_transport_fc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_transport_fc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -19,9 +19,10 @@
+  *
+  *  ========
+  *
+- *  Copyright (C) 2004-2005   James Smart, Emulex Corporation
++ *  Copyright (C) 2004-2007   James Smart, Emulex Corporation
+  *    Rewrite for host, target, device, and remote port attributes,
+  *    statistics, and service functions...
++ *    Add vports, etc
+  *
+  */
+ #include <linux/module.h>
+@@ -37,6 +38,34 @@
+ #include "scsi_priv.h"
+ 
+ static int fc_queue_work(struct Scsi_Host *, struct work_struct *);
++static void fc_vport_sched_delete(struct work_struct *work);
++
++/*
++ * This is a temporary carrier for creating a vport. It will eventually
++ * be replaced  by a real message definition for sgio or netlink.
++ *
++ * fc_vport_identifiers: This set of data contains all elements
++ * to uniquely identify and instantiate a FC virtual port.
++ *
++ * Notes:
++ *   symbolic_name: The driver is to append the symbolic_name string data
++ *      to the symbolic_node_name data that it generates by default.
++ *      the resulting combination should then be registered with the switch.
++ *      It is expected that things like Xen may stuff a VM title into
++ *      this field.
++ */
++struct fc_vport_identifiers {
++	u64 node_name;
++	u64 port_name;
++	u32 roles;
++	bool disable;
++	enum fc_port_type vport_type;	/* only FC_PORTTYPE_NPIV allowed */
++	char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
++};
++
++static int fc_vport_create(struct Scsi_Host *shost, int channel,
++	struct device *pdev, struct fc_vport_identifiers  *ids,
++	struct fc_vport **vport);
+ 
+ /*
+  * Redefine so that we can have same named attributes in the
+@@ -90,10 +119,14 @@
+ 	{ FC_PORTTYPE_NLPORT,	"NLPort (fabric via loop)" },
+ 	{ FC_PORTTYPE_LPORT,	"LPort (private loop)" },
+ 	{ FC_PORTTYPE_PTP,	"Point-To-Point (direct nport connection" },
++	{ FC_PORTTYPE_NPIV,		"NPIV VPORT" },
+ };
+ fc_enum_name_search(port_type, fc_port_type, fc_port_type_names)
+ #define FC_PORTTYPE_MAX_NAMELEN		50
+ 
++/* Reuse fc_port_type enum function for vport_type */
++#define get_fc_vport_type_name get_fc_port_type_name
++
+ 
+ /* Convert fc_host_event_code values to ascii string name */
+ static const struct {
+@@ -139,6 +172,29 @@
+ #define FC_PORTSTATE_MAX_NAMELEN	20
+ 
+ 
++/* Convert fc_vport_state values to ascii string name */
++static struct {
++	enum fc_vport_state	value;
++	char			*name;
++} fc_vport_state_names[] = {
++	{ FC_VPORT_UNKNOWN,		"Unknown" },
++	{ FC_VPORT_ACTIVE,		"Active" },
++	{ FC_VPORT_DISABLED,		"Disabled" },
++	{ FC_VPORT_LINKDOWN,		"Linkdown" },
++	{ FC_VPORT_INITIALIZING,	"Initializing" },
++	{ FC_VPORT_NO_FABRIC_SUPP,	"No Fabric Support" },
++	{ FC_VPORT_NO_FABRIC_RSCS,	"No Fabric Resources" },
++	{ FC_VPORT_FABRIC_LOGOUT,	"Fabric Logout" },
++	{ FC_VPORT_FABRIC_REJ_WWN,	"Fabric Rejected WWN" },
++	{ FC_VPORT_FAILED,		"VPort Failed" },
++};
++fc_enum_name_search(vport_state, fc_vport_state, fc_vport_state_names)
++#define FC_VPORTSTATE_MAX_NAMELEN	24
++
++/* Reuse fc_vport_state enum function for vport_last_state */
++#define get_fc_vport_last_state_name get_fc_vport_state_name
++
++
+ /* Convert fc_tgtid_binding_type values to ascii string name */
+ static const struct {
+ 	enum fc_tgtid_binding_type	value;
+@@ -219,16 +275,16 @@
+ }
+ 
+ 
+-/* Convert FC_RPORT_ROLE bit values to ascii string name */
++/* Convert FC_PORT_ROLE bit values to ascii string name */
+ static const struct {
+ 	u32 			value;
+ 	char			*name;
+-} fc_remote_port_role_names[] = {
+-	{ FC_RPORT_ROLE_FCP_TARGET,	"FCP Target" },
+-	{ FC_RPORT_ROLE_FCP_INITIATOR,	"FCP Initiator" },
+-	{ FC_RPORT_ROLE_IP_PORT,	"IP Port" },
++} fc_port_role_names[] = {
++	{ FC_PORT_ROLE_FCP_TARGET,	"FCP Target" },
++	{ FC_PORT_ROLE_FCP_INITIATOR,	"FCP Initiator" },
++	{ FC_PORT_ROLE_IP_PORT,		"IP Port" },
+ };
+-fc_bitfield_name_search(remote_port_roles, fc_remote_port_role_names)
++fc_bitfield_name_search(port_roles, fc_port_role_names)
+ 
+ /*
+  * Define roles that are specific to port_id. Values are relative to ROLE_MASK.
+@@ -252,7 +308,8 @@
+  */
+ #define FC_STARGET_NUM_ATTRS 	3
+ #define FC_RPORT_NUM_ATTRS	10
+-#define FC_HOST_NUM_ATTRS	17
++#define FC_VPORT_NUM_ATTRS	9
++#define FC_HOST_NUM_ATTRS	21
+ 
+ struct fc_internal {
+ 	struct scsi_transport_template t;
+@@ -278,6 +335,10 @@
+ 	struct transport_container rport_attr_cont;
+ 	struct class_device_attribute private_rport_attrs[FC_RPORT_NUM_ATTRS];
+ 	struct class_device_attribute *rport_attrs[FC_RPORT_NUM_ATTRS + 1];
++
++	struct transport_container vport_attr_cont;
++	struct class_device_attribute private_vport_attrs[FC_VPORT_NUM_ATTRS];
++	struct class_device_attribute *vport_attrs[FC_VPORT_NUM_ATTRS + 1];
+ };
+ 
+ #define to_fc_internal(tmpl)	container_of(tmpl, struct fc_internal, t)
+@@ -331,6 +392,7 @@
+ 		sizeof(fc_host->supported_fc4s));
+ 	fc_host->supported_speeds = FC_PORTSPEED_UNKNOWN;
+ 	fc_host->maxframe_size = -1;
++	fc_host->max_npiv_vports = 0;
+ 	memset(fc_host->serial_number, 0,
+ 		sizeof(fc_host->serial_number));
+ 
+@@ -348,8 +410,11 @@
+ 
+ 	INIT_LIST_HEAD(&fc_host->rports);
+ 	INIT_LIST_HEAD(&fc_host->rport_bindings);
++	INIT_LIST_HEAD(&fc_host->vports);
+ 	fc_host->next_rport_number = 0;
+ 	fc_host->next_target_id = 0;
++	fc_host->next_vport_number = 0;
++	fc_host->npiv_vports_inuse = 0;
+ 
+ 	snprintf(fc_host->work_q_name, KOBJ_NAME_LEN, "fc_wq_%d",
+ 		shost->host_no);
+@@ -388,6 +453,16 @@
+ 			       NULL);
+ 
+ /*
++ * Setup and Remove actions for virtual ports are handled
++ * in the service functions below.
++ */
++static DECLARE_TRANSPORT_CLASS(fc_vport_class,
++			       "fc_vports",
++			       NULL,
++			       NULL,
++			       NULL);
++
++/*
+  * Module Parameters
+  */
+ 
+@@ -585,6 +660,9 @@
+ 	error = transport_class_register(&fc_host_class);
+ 	if (error)
+ 		return error;
++	error = transport_class_register(&fc_vport_class);
++	if (error)
++		return error;
+ 	error = transport_class_register(&fc_rport_class);
+ 	if (error)
+ 		return error;
+@@ -596,6 +674,7 @@
+ 	transport_class_unregister(&fc_transport_class);
+ 	transport_class_unregister(&fc_rport_class);
+ 	transport_class_unregister(&fc_host_class);
++	transport_class_unregister(&fc_vport_class);
+ }
+ 
+ /*
+@@ -800,9 +879,9 @@
+ 			return snprintf(buf, 30, "Unknown Fabric Entity\n");
+ 		}
+ 	} else {
+-		if (rport->roles == FC_RPORT_ROLE_UNKNOWN)
++		if (rport->roles == FC_PORT_ROLE_UNKNOWN)
+ 			return snprintf(buf, 20, "unknown\n");
+-		return get_fc_remote_port_roles_names(rport->roles, buf);
++		return get_fc_port_roles_names(rport->roles, buf);
+ 	}
+ }
+ static FC_CLASS_DEVICE_ATTR(rport, roles, S_IRUGO,
+@@ -857,7 +936,7 @@
+ 
+ /*
+  * Note: in the target show function we recognize when the remote
+- *  port is in the hierarchy and do not allow the driver to get
++ *  port is in the heirarchy and do not allow the driver to get
+  *  involved in sysfs functions. The driver only gets involved if
+  *  it's the "old" style that doesn't use rports.
+  */
+@@ -912,6 +991,257 @@
+ 
+ 
+ /*
++ * FC Virtual Port Attribute Management
++ */
++
++#define fc_vport_show_function(field, format_string, sz, cast)		\
++static ssize_t								\
++show_fc_vport_##field (struct class_device *cdev, char *buf)		\
++{									\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	struct Scsi_Host *shost = vport_to_shost(vport);		\
++	struct fc_internal *i = to_fc_internal(shost->transportt);	\
++	if ((i->f->get_vport_##field) &&				\
++	    !(vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING)))	\
++		i->f->get_vport_##field(vport);				\
++	return snprintf(buf, sz, format_string, cast vport->field); 	\
++}
++
++#define fc_vport_store_function(field)					\
++static ssize_t								\
++store_fc_vport_##field(struct class_device *cdev, const char *buf,	\
++			   size_t count)				\
++{									\
++	int val;							\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	struct Scsi_Host *shost = vport_to_shost(vport);		\
++	struct fc_internal *i = to_fc_internal(shost->transportt);	\
++	char *cp;							\
++	if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))	\
++		return -EBUSY;						\
++	val = simple_strtoul(buf, &cp, 0);				\
++	if (*cp && (*cp != '\n'))					\
++		return -EINVAL;						\
++	i->f->set_vport_##field(vport, val);				\
++	return count;							\
++}
++
++#define fc_vport_store_str_function(field, slen)			\
++static ssize_t								\
++store_fc_vport_##field(struct class_device *cdev, const char *buf,	\
++			   size_t count)				\
++{									\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	struct Scsi_Host *shost = vport_to_shost(vport);		\
++	struct fc_internal *i = to_fc_internal(shost->transportt);	\
++	unsigned int cnt=count;						\
++									\
++	/* count may include a LF at end of string */			\
++	if (buf[cnt-1] == '\n')						\
++		cnt--;							\
++	if (cnt > ((slen) - 1))						\
++		return -EINVAL;						\
++	memcpy(vport->field, buf, cnt);					\
++	i->f->set_vport_##field(vport);					\
++	return count;							\
++}
++
++#define fc_vport_rd_attr(field, format_string, sz)			\
++	fc_vport_show_function(field, format_string, sz, )		\
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO,			\
++			 show_fc_vport_##field, NULL)
++
++#define fc_vport_rd_attr_cast(field, format_string, sz, cast)		\
++	fc_vport_show_function(field, format_string, sz, (cast))	\
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO,			\
++			  show_fc_vport_##field, NULL)
++
++#define fc_vport_rw_attr(field, format_string, sz)			\
++	fc_vport_show_function(field, format_string, sz, )		\
++	fc_vport_store_function(field)					\
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR,		\
++			show_fc_vport_##field,				\
++			store_fc_vport_##field)
++
++#define fc_private_vport_show_function(field, format_string, sz, cast)	\
++static ssize_t								\
++show_fc_vport_##field (struct class_device *cdev, char *buf)		\
++{									\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	return snprintf(buf, sz, format_string, cast vport->field); 	\
++}
++
++#define fc_private_vport_store_u32_function(field)			\
++static ssize_t								\
++store_fc_vport_##field(struct class_device *cdev, const char *buf,	\
++			   size_t count)				\
++{									\
++	u32 val;							\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	char *cp;							\
++	if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))		\
++		return -EBUSY;						\
++	val = simple_strtoul(buf, &cp, 0);				\
++	if (*cp && (*cp != '\n'))					\
++		return -EINVAL;						\
++	vport->field = val;						\
++	return count;							\
++}
++
++
++#define fc_private_vport_rd_attr(field, format_string, sz)		\
++	fc_private_vport_show_function(field, format_string, sz, )	\
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO,			\
++			 show_fc_vport_##field, NULL)
++
++#define fc_private_vport_rd_attr_cast(field, format_string, sz, cast)	\
++	fc_private_vport_show_function(field, format_string, sz, (cast)) \
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO,			\
++			  show_fc_vport_##field, NULL)
++
++#define fc_private_vport_rw_u32_attr(field, format_string, sz)		\
++	fc_private_vport_show_function(field, format_string, sz, )	\
++	fc_private_vport_store_u32_function(field)			\
++static FC_CLASS_DEVICE_ATTR(vport, field, S_IRUGO | S_IWUSR,		\
++			show_fc_vport_##field,				\
++			store_fc_vport_##field)
++
++
++#define fc_private_vport_rd_enum_attr(title, maxlen)			\
++static ssize_t								\
++show_fc_vport_##title (struct class_device *cdev, char *buf)		\
++{									\
++	struct fc_vport *vport = transport_class_to_vport(cdev);	\
++	const char *name;						\
++	name = get_fc_##title##_name(vport->title);			\
++	if (!name)							\
++		return -EINVAL;						\
++	return snprintf(buf, maxlen, "%s\n", name);			\
++}									\
++static FC_CLASS_DEVICE_ATTR(vport, title, S_IRUGO,			\
++			show_fc_vport_##title, NULL)
++
++
++#define SETUP_VPORT_ATTRIBUTE_RD(field)					\
++	i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++	i->private_vport_attrs[count].attr.mode = S_IRUGO;		\
++	i->private_vport_attrs[count].store = NULL;			\
++	i->vport_attrs[count] = &i->private_vport_attrs[count];		\
++	if (i->f->get_##field)						\
++		count++
++	/* NOTE: Above MACRO differs: checks function not show bit */
++
++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(field)				\
++	i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++	i->private_vport_attrs[count].attr.mode = S_IRUGO;		\
++	i->private_vport_attrs[count].store = NULL;			\
++	i->vport_attrs[count] = &i->private_vport_attrs[count];		\
++	count++
++
++#define SETUP_VPORT_ATTRIBUTE_WR(field)					\
++	i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++	i->vport_attrs[count] = &i->private_vport_attrs[count];		\
++	if (i->f->field)						\
++		count++
++	/* NOTE: Above MACRO differs: checks function */
++
++#define SETUP_VPORT_ATTRIBUTE_RW(field)					\
++	i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++	if (!i->f->set_vport_##field) {					\
++		i->private_vport_attrs[count].attr.mode = S_IRUGO;	\
++		i->private_vport_attrs[count].store = NULL;		\
++	}								\
++	i->vport_attrs[count] = &i->private_vport_attrs[count];		\
++	count++
++	/* NOTE: Above MACRO differs: does not check show bit */
++
++#define SETUP_PRIVATE_VPORT_ATTRIBUTE_RW(field)				\
++{									\
++	i->private_vport_attrs[count] = class_device_attr_vport_##field; \
++	i->vport_attrs[count] = &i->private_vport_attrs[count];		\
++	count++;							\
++}
++
++
++/* The FC Transport Virtual Port Attributes: */
++
++/* Fixed Virtual Port Attributes */
++
++/* Dynamic Virtual Port Attributes */
++
++/* Private Virtual Port Attributes */
++
++fc_private_vport_rd_enum_attr(vport_state, FC_VPORTSTATE_MAX_NAMELEN);
++fc_private_vport_rd_enum_attr(vport_last_state, FC_VPORTSTATE_MAX_NAMELEN);
++fc_private_vport_rd_attr_cast(node_name, "0x%llx\n", 20, unsigned long long);
++fc_private_vport_rd_attr_cast(port_name, "0x%llx\n", 20, unsigned long long);
++
++static ssize_t
++show_fc_vport_roles (struct class_device *cdev, char *buf)
++{
++	struct fc_vport *vport = transport_class_to_vport(cdev);
++
++	if (vport->roles == FC_PORT_ROLE_UNKNOWN)
++		return snprintf(buf, 20, "unknown\n");
++	return get_fc_port_roles_names(vport->roles, buf);
++}
++static FC_CLASS_DEVICE_ATTR(vport, roles, S_IRUGO, show_fc_vport_roles, NULL);
++
++fc_private_vport_rd_enum_attr(vport_type, FC_PORTTYPE_MAX_NAMELEN);
++
++fc_private_vport_show_function(symbolic_name, "%s\n",
++		FC_VPORT_SYMBOLIC_NAMELEN + 1, )
++fc_vport_store_str_function(symbolic_name, FC_VPORT_SYMBOLIC_NAMELEN)
++static FC_CLASS_DEVICE_ATTR(vport, symbolic_name, S_IRUGO | S_IWUSR,
++		show_fc_vport_symbolic_name, store_fc_vport_symbolic_name);
++
++static ssize_t
++store_fc_vport_delete(struct class_device *cdev, const char *buf,
++			   size_t count)
++{
++	struct fc_vport *vport = transport_class_to_vport(cdev);
++	struct Scsi_Host *shost = vport_to_shost(vport);
++
++	fc_queue_work(shost, &vport->vport_delete_work);
++	return count;
++}
++static FC_CLASS_DEVICE_ATTR(vport, vport_delete, S_IWUSR,
++			NULL, store_fc_vport_delete);
++
++
++/*
++ * Enable/Disable vport
++ *  Write "1" to disable, write "0" to enable
++ */
++static ssize_t
++store_fc_vport_disable(struct class_device *cdev, const char *buf,
++			   size_t count)
++{
++	struct fc_vport *vport = transport_class_to_vport(cdev);
++	struct Scsi_Host *shost = vport_to_shost(vport);
++	struct fc_internal *i = to_fc_internal(shost->transportt);
++	int stat;
++
++	if (vport->flags & (FC_VPORT_DEL | FC_VPORT_CREATING))
++		return -EBUSY;
++
++	if (*buf == '0') {
++		if (vport->vport_state != FC_VPORT_DISABLED)
++			return -EALREADY;
++	} else if (*buf == '1') {
++		if (vport->vport_state == FC_VPORT_DISABLED)
++			return -EALREADY;
++	} else
++		return -EINVAL;
++
++	stat = i->f->vport_disable(vport, ((*buf == '0') ? false : true));
++	return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(vport, vport_disable, S_IWUSR,
++			NULL, store_fc_vport_disable);
++
++
++/*
+  * Host Attribute Management
+  */
+ 
+@@ -1003,6 +1333,13 @@
+ 	if (i->f->show_host_##field)					\
+ 		count++
+ 
++#define SETUP_HOST_ATTRIBUTE_RD_NS(field)				\
++	i->private_host_attrs[count] = class_device_attr_host_##field;	\
++	i->private_host_attrs[count].attr.mode = S_IRUGO;		\
++	i->private_host_attrs[count].store = NULL;			\
++	i->host_attrs[count] = &i->private_host_attrs[count];		\
++	count++
++
+ #define SETUP_HOST_ATTRIBUTE_RW(field)					\
+ 	i->private_host_attrs[count] = class_device_attr_host_##field;	\
+ 	if (!i->f->set_host_##field) {					\
+@@ -1090,6 +1427,7 @@
+ fc_private_host_rd_attr_cast(permanent_port_name, "0x%llx\n", 20,
+ 			     unsigned long long);
+ fc_private_host_rd_attr(maxframe_size, "%u bytes\n", 20);
++fc_private_host_rd_attr(max_npiv_vports, "%u\n", 20);
+ fc_private_host_rd_attr(serial_number, "%s\n", (FC_SERIAL_NUMBER_SIZE +1));
+ 
+ 
+@@ -1210,6 +1548,9 @@
+ static FC_CLASS_DEVICE_ATTR(host, issue_lip, S_IWUSR, NULL,
+ 			store_fc_private_host_issue_lip);
+ 
++fc_private_host_rd_attr(npiv_vports_inuse, "%u\n", 20);
++
++
+ /*
+  * Host Statistics Management
+  */
+@@ -1285,7 +1626,6 @@
+ static FC_CLASS_DEVICE_ATTR(host, reset_statistics, S_IWUSR, NULL,
+ 				fc_reset_statistics);
+ 
+-
+ static struct attribute *fc_statistics_attrs[] = {
+ 	&class_device_attr_host_seconds_since_last_reset.attr,
+ 	&class_device_attr_host_tx_frames.attr,
+@@ -1316,6 +1656,142 @@
+ 	.attrs = fc_statistics_attrs,
+ };
+ 
++
++/* Host Vport Attributes */
++
++static int
++fc_parse_wwn(const char *ns, u64 *nm)
++{
++	unsigned int i, j;
++	u8 wwn[8];
++
++	memset(wwn, 0, sizeof(wwn));
++
++	/* Validate and store the new name */
++	for (i=0, j=0; i < 16; i++) {
++		if ((*ns >= 'a') && (*ns <= 'f'))
++			j = ((j << 4) | ((*ns++ -'a') + 10));
++		else if ((*ns >= 'A') && (*ns <= 'F'))
++			j = ((j << 4) | ((*ns++ -'A') + 10));
++		else if ((*ns >= '0') && (*ns <= '9'))
++			j = ((j << 4) | (*ns++ -'0'));
++		else
++			return -EINVAL;
++		if (i % 2) {
++			wwn[i/2] = j & 0xff;
++			j = 0;
++		}
++	}
++
++	*nm = wwn_to_u64(wwn);
++
++	return 0;
++}
++
++
++/*
++ * "Short-cut" sysfs variable to create a new vport on a FC Host.
++ * Input is a string of the form "<WWPN>:<WWNN>". Other attributes
++ * will default to a NPIV-based FCP_Initiator; The WWNs are specified
++ * as hex characters, and may *not* contain any prefixes (e.g. 0x, x, etc)
++ */
++static ssize_t
++store_fc_host_vport_create(struct class_device *cdev, const char *buf,
++			   size_t count)
++{
++	struct Scsi_Host *shost = transport_class_to_shost(cdev);
++	struct fc_vport_identifiers vid;
++	struct fc_vport *vport;
++	unsigned int cnt=count;
++	int stat;
++
++	memset(&vid, 0, sizeof(vid));
++
++	/* count may include a LF at end of string */
++	if (buf[cnt-1] == '\n')
++		cnt--;
++
++	/* validate we have enough characters for WWPN */
++	if ((cnt != (16+1+16)) || (buf[16] != ':'))
++		return -EINVAL;
++
++	stat = fc_parse_wwn(&buf[0], &vid.port_name);
++	if (stat)
++		return stat;
++
++	stat = fc_parse_wwn(&buf[17], &vid.node_name);
++	if (stat)
++		return stat;
++
++	vid.roles = FC_PORT_ROLE_FCP_INITIATOR;
++	vid.vport_type = FC_PORTTYPE_NPIV;
++	/* vid.symbolic_name is already zero/NULL's */
++	vid.disable = false;		/* always enabled */
++
++	/* we only allow support on Channel 0 !!! */
++	stat = fc_vport_create(shost, 0, &shost->shost_gendev, &vid, &vport);
++	return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(host, vport_create, S_IWUSR, NULL,
++			store_fc_host_vport_create);
++
++
++/*
++ * "Short-cut" sysfs variable to delete a vport on a FC Host.
++ * Vport is identified by a string containing "<WWPN>:<WWNN>".
++ * The WWNs are specified as hex characters, and may *not* contain
++ * any prefixes (e.g. 0x, x, etc)
++ */
++static ssize_t
++store_fc_host_vport_delete(struct class_device *cdev, const char *buf,
++			   size_t count)
++{
++	struct Scsi_Host *shost = transport_class_to_shost(cdev);
++	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++	struct fc_vport *vport;
++	u64 wwpn, wwnn;
++	unsigned long flags;
++	unsigned int cnt=count;
++	int stat, match;
++
++	/* count may include a LF at end of string */
++	if (buf[cnt-1] == '\n')
++		cnt--;
++
++	/* validate we have enough characters for WWPN */
++	if ((cnt != (16+1+16)) || (buf[16] != ':'))
++		return -EINVAL;
++
++	stat = fc_parse_wwn(&buf[0], &wwpn);
++	if (stat)
++		return stat;
++
++	stat = fc_parse_wwn(&buf[17], &wwnn);
++	if (stat)
++		return stat;
++
++	spin_lock_irqsave(shost->host_lock, flags);
++	match = 0;
++	/* we only allow support on Channel 0 !!! */
++	list_for_each_entry(vport, &fc_host->vports, peers) {
++		if ((vport->channel == 0) &&
++		    (vport->port_name == wwpn) && (vport->node_name == wwnn)) {
++			match = 1;
++			break;
++		}
++	}
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
++	if (!match)
++		return -ENODEV;
++
++	stat = fc_vport_terminate(vport);
++	return stat ? stat : count;
++}
++static FC_CLASS_DEVICE_ATTR(host, vport_delete, S_IWUSR, NULL,
++			store_fc_host_vport_delete);
++
++
+ static int fc_host_match(struct attribute_container *cont,
+ 			  struct device *dev)
+ {
+@@ -1387,6 +1863,40 @@
+ }
+ 
+ 
++static void fc_vport_dev_release(struct device *dev)
++{
++	struct fc_vport *vport = dev_to_vport(dev);
++	put_device(dev->parent);		/* release kobj parent */
++	kfree(vport);
++}
++
++int scsi_is_fc_vport(const struct device *dev)
++{
++	return dev->release == fc_vport_dev_release;
++}
++EXPORT_SYMBOL(scsi_is_fc_vport);
++
++static int fc_vport_match(struct attribute_container *cont,
++			    struct device *dev)
++{
++	struct fc_vport *vport;
++	struct Scsi_Host *shost;
++	struct fc_internal *i;
++
++	if (!scsi_is_fc_vport(dev))
++		return 0;
++	vport = dev_to_vport(dev);
++
++	shost = vport_to_shost(vport);
++	if (!shost->transportt  || shost->transportt->host_attrs.ac.class
++	    != &fc_host_class.class)
++		return 0;
++
++	i = to_fc_internal(shost->transportt);
++	return &i->vport_attr_cont.ac == cont;
++}
++
++
+ /**
+  * fc_timed_out - FC Transport I/O timeout intercept handler
+  *
+@@ -1433,6 +1943,9 @@
+ 		if (rport->scsi_target_id == -1)
+ 			continue;
+ 
++		if (rport->port_state != FC_PORTSTATE_ONLINE)
++			continue;
++
+ 		if ((channel == SCAN_WILD_CARD || channel == rport->channel) &&
+ 		    (id == SCAN_WILD_CARD || id == rport->scsi_target_id)) {
+ 			scsi_scan_target(&rport->dev, rport->channel,
+@@ -1472,6 +1985,11 @@
+ 	i->rport_attr_cont.ac.match = fc_rport_match;
+ 	transport_container_register(&i->rport_attr_cont);
+ 
++	i->vport_attr_cont.ac.attrs = &i->vport_attrs[0];
++	i->vport_attr_cont.ac.class = &fc_vport_class.class;
++	i->vport_attr_cont.ac.match = fc_vport_match;
++	transport_container_register(&i->vport_attr_cont);
++
+ 	i->f = ft;
+ 
+ 	/* Transport uses the shost workq for scsi scanning */
+@@ -1505,6 +2023,10 @@
+ 	SETUP_HOST_ATTRIBUTE_RD(supported_fc4s);
+ 	SETUP_HOST_ATTRIBUTE_RD(supported_speeds);
+ 	SETUP_HOST_ATTRIBUTE_RD(maxframe_size);
++	if (ft->vport_create) {
++		SETUP_HOST_ATTRIBUTE_RD_NS(max_npiv_vports);
++		SETUP_HOST_ATTRIBUTE_RD_NS(npiv_vports_inuse);
++	}
+ 	SETUP_HOST_ATTRIBUTE_RD(serial_number);
+ 
+ 	SETUP_HOST_ATTRIBUTE_RD(port_id);
+@@ -1520,6 +2042,10 @@
+ 	SETUP_PRIVATE_HOST_ATTRIBUTE_RW(tgtid_bind_type);
+ 	if (ft->issue_fc_host_lip)
+ 		SETUP_PRIVATE_HOST_ATTRIBUTE_RW(issue_lip);
++	if (ft->vport_create)
++		SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_create);
++	if (ft->vport_delete)
++		SETUP_PRIVATE_HOST_ATTRIBUTE_RW(vport_delete);
+ 
+ 	BUG_ON(count > FC_HOST_NUM_ATTRS);
+ 
+@@ -1545,6 +2071,24 @@
+ 
+ 	i->rport_attrs[count] = NULL;
+ 
++	/*
++	 * Setup Virtual Port Attributes.
++	 */
++	count=0;
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_state);
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_last_state);
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(node_name);
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(port_name);
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(roles);
++	SETUP_PRIVATE_VPORT_ATTRIBUTE_RD(vport_type);
++	SETUP_VPORT_ATTRIBUTE_RW(symbolic_name);
++	SETUP_VPORT_ATTRIBUTE_WR(vport_delete);
++	SETUP_VPORT_ATTRIBUTE_WR(vport_disable);
++
++	BUG_ON(count > FC_VPORT_NUM_ATTRS);
++
++	i->vport_attrs[count] = NULL;
++
+ 	return &i->t;
+ }
+ EXPORT_SYMBOL(fc_attach_transport);
+@@ -1556,6 +2100,7 @@
+ 	transport_container_unregister(&i->t.target_attrs);
+ 	transport_container_unregister(&i->t.host_attrs);
+ 	transport_container_unregister(&i->rport_attr_cont);
++	transport_container_unregister(&i->vport_attr_cont);
+ 
+ 	kfree(i);
+ }
+@@ -1667,9 +2212,17 @@
+ void
+ fc_remove_host(struct Scsi_Host *shost)
+ {
+-	struct fc_rport *rport, *next_rport;
++	struct fc_vport *vport = NULL, *next_vport = NULL;
++	struct fc_rport *rport = NULL, *next_rport = NULL;
+ 	struct workqueue_struct *work_q;
+ 	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++	unsigned long flags;
++
++	spin_lock_irqsave(shost->host_lock, flags);
++
++	/* Remove any vports */
++	list_for_each_entry_safe(vport, next_vport, &fc_host->vports, peers)
++		fc_queue_work(shost, &vport->vport_delete_work);
+ 
+ 	/* Remove any remote ports */
+ 	list_for_each_entry_safe(rport, next_rport,
+@@ -1686,6 +2239,8 @@
+ 		fc_queue_work(shost, &rport->rport_delete_work);
+ 	}
+ 
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
+ 	/* flush all scan work items */
+ 	scsi_flush_work(shost);
+ 
+@@ -1844,7 +2399,7 @@
+ 	spin_lock_irqsave(shost->host_lock, flags);
+ 
+ 	rport->number = fc_host->next_rport_number++;
+-	if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++	if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
+ 		rport->scsi_target_id = fc_host->next_target_id++;
+ 	else
+ 		rport->scsi_target_id = -1;
+@@ -1869,7 +2424,7 @@
+ 	transport_add_device(dev);
+ 	transport_configure_device(dev);
+ 
+-	if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) {
++	if (rport->roles & FC_PORT_ROLE_FCP_TARGET) {
+ 		/* initiate a scan of the target */
+ 		rport->flags |= FC_RPORT_SCAN_PENDING;
+ 		scsi_queue_work(shost, &rport->scan_work);
+@@ -2003,7 +2558,7 @@
+ 
+ 				/* was a target, not in roles */
+ 				if ((rport->scsi_target_id != -1) &&
+-				    (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET)))
++				    (!(ids->roles & FC_PORT_ROLE_FCP_TARGET)))
+ 					return rport;
+ 
+ 				/*
+@@ -2086,7 +2641,7 @@
+ 				memset(rport->dd_data, 0,
+ 						fci->f->dd_fcrport_size);
+ 
+-			if (rport->roles & FC_RPORT_ROLE_FCP_TARGET) {
++			if (rport->roles & FC_PORT_ROLE_FCP_TARGET) {
+ 				/* initiate a scan of the target */
+ 				rport->flags |= FC_RPORT_SCAN_PENDING;
+ 				scsi_queue_work(shost, &rport->scan_work);
+@@ -2243,11 +2798,11 @@
+ 	int create = 0;
+ 
+ 	spin_lock_irqsave(shost->host_lock, flags);
+-	if (roles & FC_RPORT_ROLE_FCP_TARGET) {
++	if (roles & FC_PORT_ROLE_FCP_TARGET) {
+ 		if (rport->scsi_target_id == -1) {
+ 			rport->scsi_target_id = fc_host->next_target_id++;
+ 			create = 1;
+-		} else if (!(rport->roles & FC_RPORT_ROLE_FCP_TARGET))
++		} else if (!(rport->roles & FC_PORT_ROLE_FCP_TARGET))
+ 			create = 1;
+ 	}
+ 
+@@ -2317,7 +2872,7 @@
+ 	 */
+ 	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+ 	    (rport->scsi_target_id != -1) &&
+-	    !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
++	    !(rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
+ 		dev_printk(KERN_ERR, &rport->dev,
+ 			"blocked FC remote port time out: no longer"
+ 			" a FCP target, removing starget\n");
+@@ -2367,7 +2922,7 @@
+ 	 */
+ 	rport->maxframe_size = -1;
+ 	rport->supported_classes = FC_COS_UNSPECIFIED;
+-	rport->roles = FC_RPORT_ROLE_UNKNOWN;
++	rport->roles = FC_PORT_ROLE_UNKNOWN;
+ 	rport->port_state = FC_PORTSTATE_NOTPRESENT;
+ 
+ 	/* remove the identifiers that aren't used in the consisting binding */
+@@ -2436,7 +2991,7 @@
+ 	unsigned long flags;
+ 
+ 	if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
+-	    (rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
++	    (rport->roles & FC_PORT_ROLE_FCP_TARGET)) {
+ 		scsi_scan_target(&rport->dev, rport->channel,
+ 			rport->scsi_target_id, SCAN_WILD_CARD, 1);
+ 	}
+@@ -2447,7 +3002,227 @@
+ }
+ 
+ 
+-MODULE_AUTHOR("Martin Hicks");
++/**
++ * fc_vport_create - allocates and creates a FC virtual port.
++ * @shost:	scsi host the virtual port is connected to.
++ * @channel:	Channel on shost port connected to.
++ * @pdev:	parent device for vport
++ * @ids:	The world wide names, FC4 port roles, etc for
++ *              the virtual port.
++ * @ret_vport:	The pointer to the created vport.
++ *
++ * Allocates and creates the vport structure, calls the parent host
++ * to instantiate the vport, the completes w/ class and sysfs creation.
++ *
++ * Notes:
++ *	This routine assumes no locks are held on entry.
++ **/
++static int
++fc_vport_create(struct Scsi_Host *shost, int channel, struct device *pdev,
++	struct fc_vport_identifiers  *ids, struct fc_vport **ret_vport)
++{
++	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++	struct fc_internal *fci = to_fc_internal(shost->transportt);
++	struct fc_vport *vport;
++	struct device *dev;
++	unsigned long flags;
++	size_t size;
++	int error;
++
++	*ret_vport = NULL;
++
++	if ( ! fci->f->vport_create)
++		return -ENOENT;
++
++	size = (sizeof(struct fc_vport) + fci->f->dd_fcvport_size);
++	vport = kzalloc(size, GFP_KERNEL);
++	if (unlikely(!vport)) {
++		printk(KERN_ERR "%s: allocation failure\n", __FUNCTION__);
++		return -ENOMEM;
++	}
++
++	vport->vport_state = FC_VPORT_UNKNOWN;
++	vport->vport_last_state = FC_VPORT_UNKNOWN;
++	vport->node_name = ids->node_name;
++	vport->port_name = ids->port_name;
++	vport->roles = ids->roles;
++	vport->vport_type = ids->vport_type;
++	if (fci->f->dd_fcvport_size)
++		vport->dd_data = &vport[1];
++	vport->shost = shost;
++	vport->channel = channel;
++	vport->flags = FC_VPORT_CREATING;
++	INIT_WORK(&vport->vport_delete_work, fc_vport_sched_delete);
++
++	spin_lock_irqsave(shost->host_lock, flags);
++
++	if (fc_host->npiv_vports_inuse >= fc_host->max_npiv_vports) {
++		spin_unlock_irqrestore(shost->host_lock, flags);
++		kfree(vport);
++		return -ENOSPC;
++	}
++	fc_host->npiv_vports_inuse++;
++	vport->number = fc_host->next_vport_number++;
++	list_add_tail(&vport->peers, &fc_host->vports);
++	get_device(&shost->shost_gendev);	/* for fc_host->vport list */
++
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
++	dev = &vport->dev;
++	device_initialize(dev);			/* takes self reference */
++	dev->parent = get_device(pdev);		/* takes parent reference */
++	dev->release = fc_vport_dev_release;
++	sprintf(dev->bus_id, "vport-%d:%d-%d",
++		shost->host_no, channel, vport->number);
++	transport_setup_device(dev);
++
++	error = device_add(dev);
++	if (error) {
++		printk(KERN_ERR "FC Virtual Port device_add failed\n");
++		goto delete_vport;
++	}
++	transport_add_device(dev);
++	transport_configure_device(dev);
++
++	error = fci->f->vport_create(vport, ids->disable);
++	if (error) {
++		printk(KERN_ERR "FC Virtual Port LLDD Create failed\n");
++		goto delete_vport_all;
++	}
++
++	/*
++	 * if the parent isn't the physical adapter's Scsi_Host, ensure
++	 * the Scsi_Host at least contains ia symlink to the vport.
++	 */
++	if (pdev != &shost->shost_gendev) {
++		error = sysfs_create_link(&shost->shost_gendev.kobj,
++				 &dev->kobj, dev->bus_id);
++		if (error)
++			printk(KERN_ERR
++				"%s: Cannot create vport symlinks for "
++				"%s, err=%d\n",
++				__FUNCTION__, dev->bus_id, error);
++	}
++	spin_lock_irqsave(shost->host_lock, flags);
++	vport->flags &= ~FC_VPORT_CREATING;
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
++	dev_printk(KERN_NOTICE, pdev,
++			"%s created via shost%d channel %d\n", dev->bus_id,
++			shost->host_no, channel);
++
++	*ret_vport = vport;
++
++	return 0;
++
++delete_vport_all:
++	transport_remove_device(dev);
++	device_del(dev);
++delete_vport:
++	transport_destroy_device(dev);
++	spin_lock_irqsave(shost->host_lock, flags);
++	list_del(&vport->peers);
++	put_device(&shost->shost_gendev);	/* for fc_host->vport list */
++	fc_host->npiv_vports_inuse--;
++	spin_unlock_irqrestore(shost->host_lock, flags);
++	put_device(dev->parent);
++	kfree(vport);
++
++	return error;
++}
++
++
++/**
++ * fc_vport_terminate - Admin App or LLDD requests termination of a vport
++ * @vport:	fc_vport to be terminated
++ *
++ * Calls the LLDD vport_delete() function, then deallocates and removes
++ * the vport from the shost and object tree.
++ *
++ * Notes:
++ *	This routine assumes no locks are held on entry.
++ **/
++int
++fc_vport_terminate(struct fc_vport *vport)
++{
++	struct Scsi_Host *shost = vport_to_shost(vport);
++	struct fc_host_attrs *fc_host = shost_to_fc_host(shost);
++	struct fc_internal *i = to_fc_internal(shost->transportt);
++	struct device *dev = &vport->dev;
++	unsigned long flags;
++	int stat;
++
++	spin_lock_irqsave(shost->host_lock, flags);
++	if (vport->flags & FC_VPORT_CREATING) {
++		spin_unlock_irqrestore(shost->host_lock, flags);
++		return -EBUSY;
++	}
++	if (vport->flags & (FC_VPORT_DEL)) {
++		spin_unlock_irqrestore(shost->host_lock, flags);
++		return -EALREADY;
++	}
++	vport->flags |= FC_VPORT_DELETING;
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
++	if (i->f->vport_delete)
++		stat = i->f->vport_delete(vport);
++	else
++		stat = -ENOENT;
++
++	spin_lock_irqsave(shost->host_lock, flags);
++	vport->flags &= ~FC_VPORT_DELETING;
++	if (!stat) {
++		vport->flags |= FC_VPORT_DELETED;
++		list_del(&vport->peers);
++		fc_host->npiv_vports_inuse--;
++		put_device(&shost->shost_gendev);  /* for fc_host->vport list */
++	}
++	spin_unlock_irqrestore(shost->host_lock, flags);
++
++	if (stat)
++		return stat;
++
++	if (dev->parent != &shost->shost_gendev)
++		sysfs_remove_link(&shost->shost_gendev.kobj, dev->bus_id);
++	transport_remove_device(dev);
++	device_del(dev);
++	transport_destroy_device(dev);
++
++	/*
++	 * Removing our self-reference should mean our
++	 * release function gets called, which will drop the remaining
++	 * parent reference and free the data structure.
++	 */
++	put_device(dev);			/* for self-reference */
++
++	return 0; /* SUCCESS */
++}
++EXPORT_SYMBOL(fc_vport_terminate);
++
++/**
++ * fc_vport_sched_delete - workq-based delete request for a vport
++ *
++ * @work:	vport to be deleted.
++ **/
++static void
++fc_vport_sched_delete(struct work_struct *work)
++{
++	struct fc_vport *vport =
++		container_of(work, struct fc_vport, vport_delete_work);
++	int stat;
++
++	stat = fc_vport_terminate(vport);
++	if (stat)
++		dev_printk(KERN_ERR, vport->dev.parent,
++			"%s: %s could not be deleted created via "
++			"shost%d channel %d - error %d\n", __FUNCTION__,
++			vport->dev.bus_id, vport->shost->host_no,
++			vport->channel, stat);
++}
++
++
++/* Original Author:  Martin Hicks */
++MODULE_AUTHOR("James Smart");
+ MODULE_DESCRIPTION("FC Transport Attributes");
+ MODULE_LICENSE("GPL");
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c
+--- linux-2.6.22-570/drivers/scsi/scsi_transport_iscsi.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c	2007-12-19 15:29:23.000000000 -0500
+@@ -30,9 +30,9 @@
+ #include <scsi/scsi_transport_iscsi.h>
+ #include <scsi/iscsi_if.h>
+ 
+-#define ISCSI_SESSION_ATTRS 11
++#define ISCSI_SESSION_ATTRS 15
+ #define ISCSI_CONN_ATTRS 11
+-#define ISCSI_HOST_ATTRS 0
++#define ISCSI_HOST_ATTRS 4
+ #define ISCSI_TRANSPORT_VERSION "2.0-724"
+ 
+ struct iscsi_internal {
+@@ -609,12 +609,10 @@
+ 	int t = done ? NLMSG_DONE : type;
+ 
+ 	skb = alloc_skb(len, GFP_ATOMIC);
+-	/*
+-	 * FIXME:
+-	 * user is supposed to react on iferror == -ENOMEM;
+-	 * see iscsi_if_rx().
+-	 */
+-	BUG_ON(!skb);
++	if (!skb) {
++		printk(KERN_ERR "Could not allocate skb to send reply.\n");
++		return -ENOMEM;
++	}
+ 
+ 	nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
+ 	nlh->nlmsg_flags = flags;
+@@ -816,6 +814,8 @@
+ 	uint32_t hostno;
+ 
+ 	session = transport->create_session(transport, &priv->t,
++					    ev->u.c_session.cmds_max,
++					    ev->u.c_session.queue_depth,
+ 					    ev->u.c_session.initial_cmdsn,
+ 					    &hostno);
+ 	if (!session)
+@@ -947,15 +947,50 @@
+ iscsi_tgt_dscvr(struct iscsi_transport *transport,
+ 		struct iscsi_uevent *ev)
+ {
++	struct Scsi_Host *shost;
+ 	struct sockaddr *dst_addr;
++	int err;
+ 
+ 	if (!transport->tgt_dscvr)
+ 		return -EINVAL;
+ 
++	shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no);
++	if (IS_ERR(shost)) {
++		printk(KERN_ERR "target discovery could not find host no %u\n",
++		       ev->u.tgt_dscvr.host_no);
++		return -ENODEV;
++	}
++
++
+ 	dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev));
+-	return transport->tgt_dscvr(ev->u.tgt_dscvr.type,
+-				    ev->u.tgt_dscvr.host_no,
++	err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type,
+ 				    ev->u.tgt_dscvr.enable, dst_addr);
++	scsi_host_put(shost);
++	return err;
++}
++
++static int
++iscsi_set_host_param(struct iscsi_transport *transport,
++		     struct iscsi_uevent *ev)
++{
++	char *data = (char*)ev + sizeof(*ev);
++	struct Scsi_Host *shost;
++	int err;
++
++	if (!transport->set_host_param)
++		return -ENOSYS;
++
++	shost = scsi_host_lookup(ev->u.set_host_param.host_no);
++	if (IS_ERR(shost)) {
++		printk(KERN_ERR "set_host_param could not find host no %u\n",
++		       ev->u.set_host_param.host_no);
++		return -ENODEV;
++	}
++
++	err = transport->set_host_param(shost, ev->u.set_host_param.param,
++					data, ev->u.set_host_param.len);
++	scsi_host_put(shost);
++	return err;
+ }
+ 
+ static int
+@@ -1049,8 +1084,11 @@
+ 	case ISCSI_UEVENT_TGT_DSCVR:
+ 		err = iscsi_tgt_dscvr(transport, ev);
+ 		break;
++	case ISCSI_UEVENT_SET_HOST_PARAM:
++		err = iscsi_set_host_param(transport, ev);
++		break;
+ 	default:
+-		err = -EINVAL;
++		err = -ENOSYS;
+ 		break;
+ 	}
+ 
+@@ -1160,30 +1198,37 @@
+ /*
+  * iSCSI session attrs
+  */
+-#define iscsi_session_attr_show(param)					\
++#define iscsi_session_attr_show(param, perm)				\
+ static ssize_t								\
+ show_session_param_##param(struct class_device *cdev, char *buf)	\
+ {									\
+ 	struct iscsi_cls_session *session = iscsi_cdev_to_session(cdev); \
+ 	struct iscsi_transport *t = session->transport;			\
++									\
++	if (perm && !capable(CAP_SYS_ADMIN))				\
++		return -EACCES;						\
+ 	return t->get_session_param(session, param, buf);		\
+ }
+ 
+-#define iscsi_session_attr(field, param)				\
+-	iscsi_session_attr_show(param)					\
++#define iscsi_session_attr(field, param, perm)				\
++	iscsi_session_attr_show(param, perm)				\
+ static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \
+ 			NULL);
+ 
+-iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME);
+-iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN);
+-iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T);
+-iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN);
+-iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST);
+-iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST);
+-iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN);
+-iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN);
+-iscsi_session_attr(erl, ISCSI_PARAM_ERL);
+-iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT);
++iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0);
++iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0);
++iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0);
++iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0);
++iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0);
++iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0);
++iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0);
++iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0);
++iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0);
++iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0);
++iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1);
++iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1);
++iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1);
++iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1);
+ 
+ #define iscsi_priv_session_attr_show(field, format)			\
+ static ssize_t								\
+@@ -1199,6 +1244,28 @@
+ 			NULL)
+ iscsi_priv_session_attr(recovery_tmo, "%d");
+ 
++/*
++ * iSCSI host attrs
++ */
++#define iscsi_host_attr_show(param)					\
++static ssize_t								\
++show_host_param_##param(struct class_device *cdev, char *buf)		\
++{									\
++	struct Scsi_Host *shost = transport_class_to_shost(cdev);	\
++	struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \
++	return priv->iscsi_transport->get_host_param(shost, param, buf); \
++}
++
++#define iscsi_host_attr(field, param)					\
++	iscsi_host_attr_show(param)					\
++static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param,	\
++			NULL);
++
++iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME);
++iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS);
++iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS);
++iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME);
++
+ #define SETUP_PRIV_SESSION_RD_ATTR(field)				\
+ do {									\
+ 	priv->session_attrs[count] = &class_device_attr_priv_sess_##field; \
+@@ -1222,6 +1289,14 @@
+ 	}								\
+ } while (0)
+ 
++#define SETUP_HOST_RD_ATTR(field, param_flag)				\
++do {									\
++	if (tt->host_param_mask & param_flag) {				\
++		priv->host_attrs[count] = &class_device_attr_host_##field; \
++		count++;						\
++	}								\
++} while (0)
++
+ static int iscsi_session_match(struct attribute_container *cont,
+ 			   struct device *dev)
+ {
+@@ -1323,9 +1398,16 @@
+ 	priv->t.host_attrs.ac.class = &iscsi_host_class.class;
+ 	priv->t.host_attrs.ac.match = iscsi_host_match;
+ 	priv->t.host_size = sizeof(struct iscsi_host);
+-	priv->host_attrs[0] = NULL;
+ 	transport_container_register(&priv->t.host_attrs);
+ 
++	SETUP_HOST_RD_ATTR(netdev, ISCSI_HOST_NETDEV_NAME);
++	SETUP_HOST_RD_ATTR(ipaddress, ISCSI_HOST_IPADDRESS);
++	SETUP_HOST_RD_ATTR(hwaddress, ISCSI_HOST_HWADDRESS);
++	SETUP_HOST_RD_ATTR(initiatorname, ISCSI_HOST_INITIATOR_NAME);
++	BUG_ON(count > ISCSI_HOST_ATTRS);
++	priv->host_attrs[count] = NULL;
++	count = 0;
++
+ 	/* connection parameters */
+ 	priv->conn_cont.ac.attrs = &priv->conn_attrs[0];
+ 	priv->conn_cont.ac.class = &iscsi_connection_class.class;
+@@ -1364,6 +1446,10 @@
+ 	SETUP_SESSION_RD_ATTR(erl, ISCSI_ERL);
+ 	SETUP_SESSION_RD_ATTR(targetname, ISCSI_TARGET_NAME);
+ 	SETUP_SESSION_RD_ATTR(tpgt, ISCSI_TPGT);
++	SETUP_SESSION_RD_ATTR(password, ISCSI_USERNAME);
++	SETUP_SESSION_RD_ATTR(password_in, ISCSI_USERNAME_IN);
++	SETUP_SESSION_RD_ATTR(username, ISCSI_PASSWORD);
++	SETUP_SESSION_RD_ATTR(username_in, ISCSI_PASSWORD_IN);
+ 	SETUP_PRIV_SESSION_RD_ATTR(recovery_tmo);
+ 
+ 	BUG_ON(count > ISCSI_SESSION_ATTRS);
+diff -Nurb linux-2.6.22-570/drivers/scsi/sd.c linux-2.6.22-try2/drivers/scsi/sd.c
+--- linux-2.6.22-570/drivers/scsi/sd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/sd.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1515,7 +1515,7 @@
+ 	if (!scsi_device_online(sdp))
+ 		goto out;
+ 
+-	buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL | __GFP_DMA);
++	buffer = kmalloc(SD_BUF_SIZE, GFP_KERNEL);
+ 	if (!buffer) {
+ 		sd_printk(KERN_WARNING, sdkp, "sd_revalidate_disk: Memory "
+ 			  "allocation failure.\n");
+diff -Nurb linux-2.6.22-570/drivers/scsi/sg.c linux-2.6.22-try2/drivers/scsi/sg.c
+--- linux-2.6.22-570/drivers/scsi/sg.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/sg.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1842,7 +1842,7 @@
+ 	int blk_size = buff_size;
+ 	struct page *p = NULL;
+ 
+-	if ((blk_size < 0) || (!sfp))
++	if (blk_size < 0)
+ 		return -EFAULT;
+ 	if (0 == blk_size)
+ 		++blk_size;	/* don't know why */
+diff -Nurb linux-2.6.22-570/drivers/scsi/stex.c linux-2.6.22-try2/drivers/scsi/stex.c
+--- linux-2.6.22-570/drivers/scsi/stex.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/stex.c	2007-12-19 15:29:23.000000000 -0500
+@@ -395,53 +395,34 @@
+ static int stex_map_sg(struct st_hba *hba,
+ 	struct req_msg *req, struct st_ccb *ccb)
+ {
+-	struct pci_dev *pdev = hba->pdev;
+ 	struct scsi_cmnd *cmd;
+-	dma_addr_t dma_handle;
+-	struct scatterlist *src;
++	struct scatterlist *sg;
+ 	struct st_sgtable *dst;
+-	int i;
++	int i, nseg;
+ 
+ 	cmd = ccb->cmd;
+ 	dst = (struct st_sgtable *)req->variable;
+ 	dst->max_sg_count = cpu_to_le16(ST_MAX_SG);
+-	dst->sz_in_byte = cpu_to_le32(cmd->request_bufflen);
+-
+-	if (cmd->use_sg) {
+-		int n_elem;
++	dst->sz_in_byte = cpu_to_le32(scsi_bufflen(cmd));
+ 
+-		src = (struct scatterlist *) cmd->request_buffer;
+-		n_elem = pci_map_sg(pdev, src,
+-			cmd->use_sg, cmd->sc_data_direction);
+-		if (n_elem <= 0)
++	nseg = scsi_dma_map(cmd);
++	if (nseg < 0)
+ 			return -EIO;
++	if (nseg) {
++		ccb->sg_count = nseg;
++		dst->sg_count = cpu_to_le16((u16)nseg);
+ 
+-		ccb->sg_count = n_elem;
+-		dst->sg_count = cpu_to_le16((u16)n_elem);
+-
+-		for (i = 0; i < n_elem; i++, src++) {
+-			dst->table[i].count = cpu_to_le32((u32)sg_dma_len(src));
++		scsi_for_each_sg(cmd, sg, nseg, i) {
++			dst->table[i].count = cpu_to_le32((u32)sg_dma_len(sg));
+ 			dst->table[i].addr =
+-				cpu_to_le32(sg_dma_address(src) & 0xffffffff);
++				cpu_to_le32(sg_dma_address(sg) & 0xffffffff);
+ 			dst->table[i].addr_hi =
+-				cpu_to_le32((sg_dma_address(src) >> 16) >> 16);
++				cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
+ 			dst->table[i].ctrl = SG_CF_64B | SG_CF_HOST;
+ 		}
+ 		dst->table[--i].ctrl |= SG_CF_EOT;
+-		return 0;
+ 	}
+ 
+-	dma_handle = pci_map_single(pdev, cmd->request_buffer,
+-		cmd->request_bufflen, cmd->sc_data_direction);
+-	cmd->SCp.dma_handle = dma_handle;
+-
+-	ccb->sg_count = 1;
+-	dst->sg_count = cpu_to_le16(1);
+-	dst->table[0].addr = cpu_to_le32(dma_handle & 0xffffffff);
+-	dst->table[0].addr_hi = cpu_to_le32((dma_handle >> 16) >> 16);
+-	dst->table[0].count = cpu_to_le32((u32)cmd->request_bufflen);
+-	dst->table[0].ctrl = SG_CF_EOT | SG_CF_64B | SG_CF_HOST;
+-
+ 	return 0;
+ }
+ 
+@@ -451,24 +432,24 @@
+ 	size_t lcount;
+ 	size_t len;
+ 	void *s, *d, *base = NULL;
+-	if (*count > cmd->request_bufflen)
+-		*count = cmd->request_bufflen;
++	size_t offset;
++
++	if (*count > scsi_bufflen(cmd))
++		*count = scsi_bufflen(cmd);
+ 	lcount = *count;
+ 	while (lcount) {
+ 		len = lcount;
+ 		s = (void *)src;
+-		if (cmd->use_sg) {
+-			size_t offset = *count - lcount;
++
++		offset = *count - lcount;
+ 			s += offset;
+-			base = scsi_kmap_atomic_sg(cmd->request_buffer,
++		base = scsi_kmap_atomic_sg(scsi_sglist(cmd),
+ 				sg_count, &offset, &len);
+-			if (base == NULL) {
++		if (!base) {
+ 				*count -= lcount;
+ 				return;
+ 			}
+ 			d = base + offset;
+-		} else
+-			d = cmd->request_buffer;
+ 
+ 		if (direction == ST_TO_CMD)
+ 			memcpy(d, s, len);
+@@ -476,7 +457,6 @@
+ 			memcpy(s, d, len);
+ 
+ 		lcount -= len;
+-		if (cmd->use_sg)
+ 			scsi_kunmap_atomic_sg(base);
+ 	}
+ }
+@@ -484,22 +464,17 @@
+ static int stex_direct_copy(struct scsi_cmnd *cmd,
+ 	const void *src, size_t count)
+ {
+-	struct st_hba *hba = (struct st_hba *) &cmd->device->host->hostdata[0];
+ 	size_t cp_len = count;
+ 	int n_elem = 0;
+ 
+-	if (cmd->use_sg) {
+-		n_elem = pci_map_sg(hba->pdev, cmd->request_buffer,
+-			cmd->use_sg, cmd->sc_data_direction);
+-		if (n_elem <= 0)
++	n_elem = scsi_dma_map(cmd);
++	if (n_elem < 0)
+ 			return 0;
+-	}
+ 
+ 	stex_internal_copy(cmd, src, &cp_len, n_elem, ST_TO_CMD);
+ 
+-	if (cmd->use_sg)
+-		pci_unmap_sg(hba->pdev, cmd->request_buffer,
+-			cmd->use_sg, cmd->sc_data_direction);
++	scsi_dma_unmap(cmd);
++
+ 	return cp_len == count;
+ }
+ 
+@@ -678,18 +653,6 @@
+ 	return 0;
+ }
+ 
+-static void stex_unmap_sg(struct st_hba *hba, struct scsi_cmnd *cmd)
+-{
+-	if (cmd->sc_data_direction != DMA_NONE) {
+-		if (cmd->use_sg)
+-			pci_unmap_sg(hba->pdev, cmd->request_buffer,
+-				cmd->use_sg, cmd->sc_data_direction);
+-		else
+-			pci_unmap_single(hba->pdev, cmd->SCp.dma_handle,
+-				cmd->request_bufflen, cmd->sc_data_direction);
+-	}
+-}
+-
+ static void stex_scsi_done(struct st_ccb *ccb)
+ {
+ 	struct scsi_cmnd *cmd = ccb->cmd;
+@@ -756,7 +719,7 @@
+ 
+ 	if (ccb->cmd->cmnd[0] == MGT_CMD &&
+ 		resp->scsi_status != SAM_STAT_CHECK_CONDITION) {
+-		ccb->cmd->request_bufflen =
++		scsi_bufflen(ccb->cmd) =
+ 			le32_to_cpu(*(__le32 *)&resp->variable[0]);
+ 		return;
+ 	}
+@@ -855,7 +818,7 @@
+ 				ccb->cmd->cmnd[1] == PASSTHRU_GET_ADAPTER))
+ 				stex_controller_info(hba, ccb);
+ 
+-			stex_unmap_sg(hba, ccb->cmd);
++			scsi_dma_unmap(ccb->cmd);
+ 			stex_scsi_done(ccb);
+ 			hba->out_req_cnt--;
+ 		} else if (ccb->req_type & PASSTHRU_REQ_TYPE) {
+@@ -1028,7 +991,7 @@
+ 	}
+ 
+ fail_out:
+-	stex_unmap_sg(hba, cmd);
++	scsi_dma_unmap(cmd);
+ 	hba->wait_ccb->req = NULL; /* nullify the req's future return */
+ 	hba->wait_ccb = NULL;
+ 	result = FAILED;
+diff -Nurb linux-2.6.22-570/drivers/scsi/sun_esp.c linux-2.6.22-try2/drivers/scsi/sun_esp.c
+--- linux-2.6.22-570/drivers/scsi/sun_esp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/sun_esp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -493,7 +493,7 @@
+ 		goto fail;
+ 
+ 	host->max_id = (hme ? 16 : 8);
+-	esp = host_to_esp(host);
++	esp = shost_priv(host);
+ 
+ 	esp->host = host;
+ 	esp->dev = esp_dev;
+diff -Nurb linux-2.6.22-570/drivers/scsi/sym53c416.c linux-2.6.22-try2/drivers/scsi/sym53c416.c
+--- linux-2.6.22-570/drivers/scsi/sym53c416.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/sym53c416.c	2007-12-19 15:29:23.000000000 -0500
+@@ -332,8 +332,7 @@
+ 	int i;
+ 	unsigned long flags = 0;
+ 	unsigned char status_reg, pio_int_reg, int_reg;
+-	struct scatterlist *sglist;
+-	unsigned int sgcount;
++	struct scatterlist *sg;
+ 	unsigned int tot_trans = 0;
+ 
+ 	/* We search the base address of the host adapter which caused the interrupt */
+@@ -429,19 +428,15 @@
+ 			{
+ 				current_command->SCp.phase = data_out;
+ 				outb(FLUSH_FIFO, base + COMMAND_REG);
+-				sym53c416_set_transfer_counter(base, current_command->request_bufflen);
++				sym53c416_set_transfer_counter(base,
++							       scsi_bufflen(current_command));
+ 				outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG);
+-				if(!current_command->use_sg)
+-					tot_trans = sym53c416_write(base, current_command->request_buffer, current_command->request_bufflen);
+-				else
+-				{
+-					sgcount = current_command->use_sg;
+-					sglist = current_command->request_buffer;
+-					while(sgcount--)
+-					{
+-						tot_trans += sym53c416_write(base, SG_ADDRESS(sglist), sglist->length);
+-						sglist++;
+-					}
++
++				scsi_for_each_sg(current_command,
++						 sg, scsi_sg_count(current_command), i) {
++					tot_trans += sym53c416_write(base,
++								     SG_ADDRESS(sg),
++								     sg->length);
+ 				}
+ 				if(tot_trans < current_command->underflow)
+ 					printk(KERN_WARNING "sym53c416: Underflow, wrote %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow);
+@@ -455,19 +450,16 @@
+ 			{
+ 				current_command->SCp.phase = data_in;
+ 				outb(FLUSH_FIFO, base + COMMAND_REG);
+-				sym53c416_set_transfer_counter(base, current_command->request_bufflen);
++				sym53c416_set_transfer_counter(base,
++							       scsi_bufflen(current_command));
++
+ 				outb(TRANSFER_INFORMATION | PIO_MODE, base + COMMAND_REG);
+-				if(!current_command->use_sg)
+-					tot_trans = sym53c416_read(base, current_command->request_buffer, current_command->request_bufflen);
+-				else
+-				{
+-					sgcount = current_command->use_sg;
+-					sglist = current_command->request_buffer;
+-					while(sgcount--)
+-					{
+-						tot_trans += sym53c416_read(base, SG_ADDRESS(sglist), sglist->length);
+-						sglist++;
+-					}
++
++				scsi_for_each_sg(current_command,
++						 sg, scsi_sg_count(current_command), i) {
++					tot_trans += sym53c416_read(base,
++								    SG_ADDRESS(sg),
++								    sg->length);
+ 				}
+ 				if(tot_trans < current_command->underflow)
+ 					printk(KERN_WARNING "sym53c416: Underflow, read %d bytes, request for %d bytes.\n", tot_trans, current_command->underflow);
+diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.c linux-2.6.22-try2/drivers/scsi/tmscsim.c
+--- linux-2.6.22-570/drivers/scsi/tmscsim.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/tmscsim.c	2007-12-19 15:29:23.000000000 -0500
+@@ -457,27 +457,20 @@
+ 			error = 1;
+ 		DEBUG1(printk("%s(): Mapped sense buffer %p at %x\n", __FUNCTION__, pcmd->sense_buffer, cmdp->saved_dma_handle));
+ 	/* Map SG list */
+-	} else if (pcmd->use_sg) {
+-		pSRB->pSegmentList	= (struct scatterlist *) pcmd->request_buffer;
+-		pSRB->SGcount		= pci_map_sg(pdev, pSRB->pSegmentList, pcmd->use_sg,
+-						     pcmd->sc_data_direction);
++	} else if (scsi_sg_count(pcmd)) {
++		int nseg;
++
++		nseg = scsi_dma_map(pcmd);
++
++		pSRB->pSegmentList	= scsi_sglist(pcmd);
++		pSRB->SGcount		= nseg;
++
+ 		/* TODO: error handling */
+-		if (!pSRB->SGcount)
++		if (nseg < 0)
+ 			error = 1;
+ 		DEBUG1(printk("%s(): Mapped SG %p with %d (%d) elements\n",\
+-			      __FUNCTION__, pcmd->request_buffer, pSRB->SGcount, pcmd->use_sg));
++			      __FUNCTION__, scsi_sglist(pcmd), nseg, scsi_sg_count(pcmd)));
+ 	/* Map single segment */
+-	} else if (pcmd->request_buffer && pcmd->request_bufflen) {
+-		pSRB->pSegmentList	= dc390_sg_build_single(&pSRB->Segmentx, pcmd->request_buffer, pcmd->request_bufflen);
+-		pSRB->SGcount		= pci_map_sg(pdev, pSRB->pSegmentList, 1,
+-						     pcmd->sc_data_direction);
+-		cmdp->saved_dma_handle	= sg_dma_address(pSRB->pSegmentList);
+-
+-		/* TODO: error handling */
+-		if (pSRB->SGcount != 1)
+-			error = 1;
+-		DEBUG1(printk("%s(): Mapped request buffer %p at %x\n", __FUNCTION__, pcmd->request_buffer, cmdp->saved_dma_handle));
+-	/* No mapping !? */	
+     	} else
+ 		pSRB->SGcount = 0;
+ 
+@@ -494,12 +487,10 @@
+ 	if (pSRB->SRBFlag) {
+ 		pci_unmap_sg(pdev, &pSRB->Segmentx, 1, DMA_FROM_DEVICE);
+ 		DEBUG1(printk("%s(): Unmapped sense buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle));
+-	} else if (pcmd->use_sg) {
+-		pci_unmap_sg(pdev, pcmd->request_buffer, pcmd->use_sg, pcmd->sc_data_direction);
+-		DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n", __FUNCTION__, pcmd->request_buffer, pcmd->use_sg));
+-	} else if (pcmd->request_buffer && pcmd->request_bufflen) {
+-		pci_unmap_sg(pdev, &pSRB->Segmentx, 1, pcmd->sc_data_direction);
+-		DEBUG1(printk("%s(): Unmapped request buffer at %x\n", __FUNCTION__, cmdp->saved_dma_handle));
++	} else {
++		scsi_dma_unmap(pcmd);
++		DEBUG1(printk("%s(): Unmapped SG at %p with %d elements\n",
++			      __FUNCTION__, scsi_sglist(pcmd), scsi_sg_count(pcmd)));
+ 	}
+ }
+ 
+@@ -1153,9 +1144,9 @@
+     struct scatterlist *psgl;
+     pSRB->TotalXferredLen = 0;
+     pSRB->SGIndex = 0;
+-    if (pcmd->use_sg) {
++    if (scsi_sg_count(pcmd)) {
+ 	size_t saved;
+-	pSRB->pSegmentList = (struct scatterlist *)pcmd->request_buffer;
++	pSRB->pSegmentList = scsi_sglist(pcmd);
+ 	psgl = pSRB->pSegmentList;
+ 	//dc390_pci_sync(pSRB);
+ 
+@@ -1179,12 +1170,6 @@
+ 	printk (KERN_INFO "DC390: Pointer restored. Segment %i, Total %li, Bus %08lx\n",
+ 		pSRB->SGIndex, pSRB->Saved_Ptr, pSRB->SGBusAddr);
+ 
+-    } else if(pcmd->request_buffer) {
+-	//dc390_pci_sync(pSRB);
+-
+-	sg_dma_len(&pSRB->Segmentx) = pcmd->request_bufflen - pSRB->Saved_Ptr;
+-	pSRB->SGcount = 1;
+-	pSRB->pSegmentList = (struct scatterlist *) &pSRB->Segmentx;
+     } else {
+ 	 pSRB->SGcount = 0;
+ 	 printk (KERN_INFO "DC390: RESTORE_PTR message for Transfer without Scatter-Gather ??\n");
+@@ -1579,7 +1564,8 @@
+ 	if( (pSRB->SRBState & (SRB_START_+SRB_MSGOUT)) ||
+ 	   !(pSRB->SRBState & (SRB_DISCONNECT+SRB_COMPLETED)) )
+ 	{	/* Selection time out */
+-		pSRB->TargetStatus = SCSI_STAT_SEL_TIMEOUT;
++		pSRB->AdaptStatus = H_SEL_TIMEOUT;
++		pSRB->TargetStatus = 0;
+ 		goto  disc1;
+ 	}
+ 	else if (!(pSRB->SRBState & SRB_DISCONNECT) && (pSRB->SRBState & SRB_COMPLETED))
+@@ -1612,7 +1598,7 @@
+ 	if( !( pACB->scan_devices ) )
+ 	{
+ 	    struct scsi_cmnd *pcmd = pSRB->pcmd;
+-	    pcmd->resid = pcmd->request_bufflen;
++	    scsi_set_resid(pcmd, scsi_bufflen(pcmd));
+ 	    SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
+ 	    dc390_Going_remove(pDCB, pSRB);
+ 	    dc390_Free_insert(pACB, pSRB);
+@@ -1695,7 +1681,7 @@
+ 			      pcmd->cmnd[0], pDCB->TargetID, pDCB->TargetLUN));
+ 
+ 	pSRB->SRBFlag |= AUTO_REQSENSE;
+-	pSRB->SavedSGCount = pcmd->use_sg;
++	pSRB->SavedSGCount = scsi_sg_count(pcmd);
+ 	pSRB->SavedTotXLen = pSRB->TotalXferredLen;
+ 	pSRB->AdaptStatus = 0;
+ 	pSRB->TargetStatus = 0; /* CHECK_CONDITION<<1; */
+@@ -1728,22 +1714,22 @@
+     {	/* Last command was a Request Sense */
+ 	pSRB->SRBFlag &= ~AUTO_REQSENSE;
+ 	pSRB->AdaptStatus = 0;
+-	pSRB->TargetStatus = CHECK_CONDITION << 1;
++	pSRB->TargetStatus = SAM_STAT_CHECK_CONDITION;
+ 
+ 	//pcmd->result = MK_RES(DRIVER_SENSE,DID_OK,0,status);
+-	if (status == (CHECK_CONDITION << 1))
++	if (status == SAM_STAT_CHECK_CONDITION)
+ 	    pcmd->result = MK_RES_LNX(0, DID_BAD_TARGET, 0, /*CHECK_CONDITION*/0);
+ 	else /* Retry */
+ 	{
+ 	    if( pSRB->pcmd->cmnd[0] == TEST_UNIT_READY /* || pSRB->pcmd->cmnd[0] == START_STOP */)
+ 	    {
+ 		/* Don't retry on TEST_UNIT_READY */
+-		pcmd->result = MK_RES_LNX(DRIVER_SENSE,DID_OK,0,CHECK_CONDITION);
++		pcmd->result = MK_RES_LNX(DRIVER_SENSE, DID_OK, 0, SAM_STAT_CHECK_CONDITION);
+ 		REMOVABLEDEBUG(printk(KERN_INFO "Cmd=%02x, Result=%08x, XferL=%08x\n",pSRB->pcmd->cmnd[0],\
+ 		       (u32) pcmd->result, (u32) pSRB->TotalXferredLen));
+ 	    } else {
+ 		SET_RES_DRV(pcmd->result, DRIVER_SENSE);
+-		pcmd->use_sg = pSRB->SavedSGCount;
++		scsi_sg_count(pcmd) = pSRB->SavedSGCount;
+ 		//pSRB->ScsiCmdLen	 = (u8) (pSRB->Segment1[0] >> 8);
+ 		DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+ 		pSRB->TotalXferredLen = 0;
+@@ -1754,7 +1740,7 @@
+     }
+     if( status )
+     {
+-	if( status_byte(status) == CHECK_CONDITION )
++	if (status == SAM_STAT_CHECK_CONDITION)
+ 	{
+ 	    if (dc390_RequestSense(pACB, pDCB, pSRB)) {
+ 		SET_RES_DID(pcmd->result, DID_ERROR);
+@@ -1762,22 +1748,15 @@
+ 	    }
+ 	    return;
+ 	}
+-	else if( status_byte(status) == QUEUE_FULL )
++	else if (status == SAM_STAT_TASK_SET_FULL)
+ 	{
+ 	    scsi_track_queue_full(pcmd->device, pDCB->GoingSRBCnt - 1);
+-	    pcmd->use_sg = pSRB->SavedSGCount;
++	    scsi_sg_count(pcmd) = pSRB->SavedSGCount;
+ 	    DEBUG0 (printk ("DC390: RETRY pid %li (%02x), target %02i-%02i\n", pcmd->pid, pcmd->cmnd[0], pcmd->device->id, pcmd->device->lun));
+ 	    pSRB->TotalXferredLen = 0;
+ 	    SET_RES_DID(pcmd->result, DID_SOFT_ERROR);
+ 	}
+-	else if(status == SCSI_STAT_SEL_TIMEOUT)
+-	{
+-	    pSRB->AdaptStatus = H_SEL_TIMEOUT;
+-	    pSRB->TargetStatus = 0;
+-	    pcmd->result = MK_RES(0,DID_NO_CONNECT,0,0);
+-	    /* Devices are removed below ... */
+-	}
+-	else if (status_byte(status) == BUSY && 
++	else if (status == SAM_STAT_BUSY &&
+ 		 (pcmd->cmnd[0] == TEST_UNIT_READY || pcmd->cmnd[0] == INQUIRY) &&
+ 		 pACB->scan_devices)
+ 	{
+@@ -1795,12 +1774,17 @@
+     else
+     {	/*  Target status == 0 */
+ 	status = pSRB->AdaptStatus;
+-	if(status & H_OVER_UNDER_RUN)
++	if (status == H_OVER_UNDER_RUN)
+ 	{
+ 	    pSRB->TargetStatus = 0;
+ 	    SET_RES_DID(pcmd->result,DID_OK);
+ 	    SET_RES_MSG(pcmd->result,pSRB->EndMessage);
+ 	}
++	else if (status == H_SEL_TIMEOUT)
++	{
++	    pcmd->result = MK_RES(0, DID_NO_CONNECT, 0, 0);
++	    /* Devices are removed below ... */
++	}
+ 	else if( pSRB->SRBStatus & PARITY_ERROR)
+ 	{
+ 	    //pcmd->result = MK_RES(0,DID_PARITY,pSRB->EndMessage,0);
+@@ -1816,7 +1800,7 @@
+     }
+ 
+ cmd_done:
+-    pcmd->resid = pcmd->request_bufflen - pSRB->TotalXferredLen;
++    scsi_set_resid(pcmd, scsi_bufflen(pcmd) - pSRB->TotalXferredLen);
+ 
+     dc390_Going_remove (pDCB, pSRB);
+     /* Add to free list */
+diff -Nurb linux-2.6.22-570/drivers/scsi/tmscsim.h linux-2.6.22-try2/drivers/scsi/tmscsim.h
+--- linux-2.6.22-570/drivers/scsi/tmscsim.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/tmscsim.h	2007-12-19 15:29:23.000000000 -0500
+@@ -258,13 +258,6 @@
+ #define H_BAD_CCB_OR_SG  0x1A
+ #define H_ABORT 	 0x0FF
+ 
+-/*; SCSI Status byte codes*/ 
+-/* The values defined in include/scsi/scsi.h, to be shifted << 1 */
+-
+-#define SCSI_STAT_UNEXP_BUS_F	0xFD	/*;  Unexpect Bus Free */
+-#define SCSI_STAT_BUS_RST_DETECT 0xFE	/*;  Scsi Bus Reset detected */
+-#define SCSI_STAT_SEL_TIMEOUT	0xFF	/*;  Selection Time out */
+-
+ /* cmd->result */
+ #define RES_TARGET		0x000000FF	/* Target State */
+ #define RES_TARGET_LNX		STATUS_MASK	/* Only official ... */
+@@ -273,7 +266,7 @@
+ #define RES_DRV			0xFF000000	/* DRIVER_ codes */
+ 
+ #define MK_RES(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt))
+-#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt)<<1)
++#define MK_RES_LNX(drv,did,msg,tgt) ((int)(drv)<<24 | (int)(did)<<16 | (int)(msg)<<8 | (int)(tgt))
+ 
+ #define SET_RES_TARGET(who, tgt) do { who &= ~RES_TARGET; who |= (int)(tgt); } while (0)
+ #define SET_RES_TARGET_LNX(who, tgt) do { who &= ~RES_TARGET_LNX; who |= (int)(tgt) << 1; } while (0)
+diff -Nurb linux-2.6.22-570/drivers/scsi/u14-34f.c linux-2.6.22-try2/drivers/scsi/u14-34f.c
+--- linux-2.6.22-570/drivers/scsi/u14-34f.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/u14-34f.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1111,7 +1111,7 @@
+ static void map_dma(unsigned int i, unsigned int j) {
+    unsigned int data_len = 0;
+    unsigned int k, count, pci_dir;
+-   struct scatterlist *sgpnt;
++   struct scatterlist *sg;
+    struct mscp *cpp;
+    struct scsi_cmnd *SCpnt;
+ 
+@@ -1124,33 +1124,28 @@
+ 
+    cpp->sense_len = sizeof SCpnt->sense_buffer;
+ 
+-   if (!SCpnt->use_sg) {
+-
+-      /* If we get here with PCI_DMA_NONE, pci_map_single triggers a BUG() */
+-      if (!SCpnt->request_bufflen) pci_dir = PCI_DMA_BIDIRECTIONAL;
+-
+-      if (SCpnt->request_buffer)
+-         cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev,
+-                  SCpnt->request_buffer, SCpnt->request_bufflen, pci_dir));
+-
+-      cpp->data_len = H2DEV(SCpnt->request_bufflen);
+-      return;
+-      }
+-
+-   sgpnt = (struct scatterlist *) SCpnt->request_buffer;
+-   count = pci_map_sg(HD(j)->pdev, sgpnt, SCpnt->use_sg, pci_dir);
+-
+-   for (k = 0; k < count; k++) {
+-      cpp->sglist[k].address = H2DEV(sg_dma_address(&sgpnt[k]));
+-      cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(&sgpnt[k]));
+-      data_len += sgpnt[k].length;
++   if (scsi_bufflen(SCpnt)) {
++	   count = scsi_dma_map(SCpnt);
++	   BUG_ON(count < 0);
++
++	   scsi_for_each_sg(SCpnt, sg, count, k) {
++		   cpp->sglist[k].address = H2DEV(sg_dma_address(sg));
++		   cpp->sglist[k].num_bytes = H2DEV(sg_dma_len(sg));
++		   data_len += sg->length;
+       }
+ 
+    cpp->sg = TRUE;
+-   cpp->use_sg = SCpnt->use_sg;
+-   cpp->data_address = H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist,
+-                             SCpnt->use_sg * sizeof(struct sg_list), pci_dir));
++	   cpp->use_sg = scsi_sg_count(SCpnt);
++	   cpp->data_address =
++		   H2DEV(pci_map_single(HD(j)->pdev, cpp->sglist,
++					cpp->use_sg * sizeof(struct sg_list),
++					pci_dir));
+    cpp->data_len = H2DEV(data_len);
++
++   } else {
++	   pci_dir = PCI_DMA_BIDIRECTIONAL;
++	   cpp->data_len = H2DEV(scsi_bufflen(SCpnt));
++   }
+ }
+ 
+ static void unmap_dma(unsigned int i, unsigned int j) {
+@@ -1165,8 +1160,7 @@
+       pci_unmap_single(HD(j)->pdev, DEV2H(cpp->sense_addr),
+                        DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+ 
+-   if (SCpnt->use_sg)
+-      pci_unmap_sg(HD(j)->pdev, SCpnt->request_buffer, SCpnt->use_sg, pci_dir);
++   scsi_dma_unmap(SCpnt);
+ 
+    if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
+ 
+@@ -1187,9 +1181,9 @@
+       pci_dma_sync_single_for_cpu(HD(j)->pdev, DEV2H(cpp->sense_addr),
+                           DEV2H(cpp->sense_len), PCI_DMA_FROMDEVICE);
+ 
+-   if (SCpnt->use_sg)
+-      pci_dma_sync_sg_for_cpu(HD(j)->pdev, SCpnt->request_buffer,
+-                         SCpnt->use_sg, pci_dir);
++   if (scsi_sg_count(SCpnt))
++	   pci_dma_sync_sg_for_cpu(HD(j)->pdev, scsi_sglist(SCpnt),
++				   scsi_sg_count(SCpnt), pci_dir);
+ 
+    if (!DEV2H(cpp->data_len)) pci_dir = PCI_DMA_BIDIRECTIONAL;
+ 
+diff -Nurb linux-2.6.22-570/drivers/scsi/ultrastor.c linux-2.6.22-try2/drivers/scsi/ultrastor.c
+--- linux-2.6.22-570/drivers/scsi/ultrastor.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/ultrastor.c	2007-12-19 15:29:23.000000000 -0500
+@@ -675,16 +675,15 @@
+ 
+ static inline void build_sg_list(struct mscp *mscp, struct scsi_cmnd *SCpnt)
+ {
+-	struct scatterlist *sl;
++	struct scatterlist *sg;
+ 	long transfer_length = 0;
+ 	int i, max;
+ 
+-	sl = (struct scatterlist *) SCpnt->request_buffer;
+-	max = SCpnt->use_sg;
+-	for (i = 0; i < max; i++) {
+-		mscp->sglist[i].address = isa_page_to_bus(sl[i].page) + sl[i].offset;
+-		mscp->sglist[i].num_bytes = sl[i].length;
+-		transfer_length += sl[i].length;
++	max = scsi_sg_count(SCpnt);
++	scsi_for_each_sg(SCpnt, sg, max, i) {
++		mscp->sglist[i].address = isa_page_to_bus(sg->page) + sg->offset;
++		mscp->sglist[i].num_bytes = sg->length;
++		transfer_length += sg->length;
+ 	}
+ 	mscp->number_of_sg_list = max;
+ 	mscp->transfer_data = isa_virt_to_bus(mscp->sglist);
+@@ -730,15 +729,15 @@
+     my_mscp->target_id = SCpnt->device->id;
+     my_mscp->ch_no = 0;
+     my_mscp->lun = SCpnt->device->lun;
+-    if (SCpnt->use_sg) {
++    if (scsi_sg_count(SCpnt)) {
+ 	/* Set scatter/gather flag in SCSI command packet */
+ 	my_mscp->sg = TRUE;
+ 	build_sg_list(my_mscp, SCpnt);
+     } else {
+ 	/* Unset scatter/gather flag in SCSI command packet */
+ 	my_mscp->sg = FALSE;
+-	my_mscp->transfer_data = isa_virt_to_bus(SCpnt->request_buffer);
+-	my_mscp->transfer_data_length = SCpnt->request_bufflen;
++	my_mscp->transfer_data = isa_virt_to_bus(scsi_sglist(SCpnt));
++	my_mscp->transfer_data_length = scsi_bufflen(SCpnt);
+     }
+     my_mscp->command_link = 0;		/*???*/
+     my_mscp->scsi_command_link_id = 0;	/*???*/
+diff -Nurb linux-2.6.22-570/drivers/scsi/wd7000.c linux-2.6.22-try2/drivers/scsi/wd7000.c
+--- linux-2.6.22-570/drivers/scsi/wd7000.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/scsi/wd7000.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1091,6 +1091,7 @@
+ 	unchar *cdb = (unchar *) SCpnt->cmnd;
+ 	unchar idlun;
+ 	short cdblen;
++	int nseg;
+ 	Adapter *host = (Adapter *) SCpnt->device->host->hostdata;
+ 
+ 	cdblen = SCpnt->cmd_len;
+@@ -1106,28 +1107,29 @@
+ 	SCpnt->host_scribble = (unchar *) scb;
+ 	scb->host = host;
+ 
+-	if (SCpnt->use_sg) {
+-		struct scatterlist *sg = (struct scatterlist *) SCpnt->request_buffer;
++	nseg = scsi_sg_count(SCpnt);
++	if (nseg) {
++		struct scatterlist *sg;
+ 		unsigned i;
+ 
+ 		if (SCpnt->device->host->sg_tablesize == SG_NONE) {
+ 			panic("wd7000_queuecommand: scatter/gather not supported.\n");
+ 		}
+-		dprintk("Using scatter/gather with %d elements.\n", SCpnt->use_sg);
++		dprintk("Using scatter/gather with %d elements.\n", nseg);
+ 
+ 		sgb = scb->sgb;
+ 		scb->op = 1;
+ 		any2scsi(scb->dataptr, (int) sgb);
+-		any2scsi(scb->maxlen, SCpnt->use_sg * sizeof(Sgb));
++		any2scsi(scb->maxlen, nseg * sizeof(Sgb));
+ 
+-		for (i = 0; i < SCpnt->use_sg; i++) {
+-			any2scsi(sgb[i].ptr, isa_page_to_bus(sg[i].page) + sg[i].offset);
+-			any2scsi(sgb[i].len, sg[i].length);
++		scsi_for_each_sg(SCpnt, sg, nseg, i) {
++			any2scsi(sgb[i].ptr, isa_page_to_bus(sg->page) + sg->offset);
++			any2scsi(sgb[i].len, sg->length);
+ 		}
+ 	} else {
+ 		scb->op = 0;
+-		any2scsi(scb->dataptr, isa_virt_to_bus(SCpnt->request_buffer));
+-		any2scsi(scb->maxlen, SCpnt->request_bufflen);
++		any2scsi(scb->dataptr, isa_virt_to_bus(scsi_sglist(SCpnt)));
++		any2scsi(scb->maxlen, scsi_bufflen(SCpnt));
+ 	}
+ 
+ 	/* FIXME: drop lock and yield here ? */
+diff -Nurb linux-2.6.22-570/drivers/scsi/zorro7xx.c linux-2.6.22-try2/drivers/scsi/zorro7xx.c
+--- linux-2.6.22-570/drivers/scsi/zorro7xx.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/scsi/zorro7xx.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,180 @@
++/*
++ * Detection routine for the NCR53c710 based Amiga SCSI Controllers for Linux.
++ *		Amiga MacroSystemUS WarpEngine SCSI controller.
++ *		Amiga Technologies/DKB A4091 SCSI controller.
++ *
++ * Written 1997 by Alan Hourihane <alanh@fairlite.demon.co.uk>
++ * plus modifications of the 53c7xx.c driver to support the Amiga.
++ *
++ * Rewritten to use 53c700.c by Kars de Jong <jongk@linux-m68k.org>
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/zorro.h>
++#include <asm/amigaints.h>
++#include <scsi/scsi_host.h>
++#include <scsi/scsi_transport_spi.h>
++
++#include "53c700.h"
++
++MODULE_AUTHOR("Alan Hourihane <alanh@fairlite.demon.co.uk> / Kars de Jong <jongk@linux-m68k.org>");
++MODULE_DESCRIPTION("Amiga Zorro NCR53C710 driver");
++MODULE_LICENSE("GPL");
++
++
++static struct scsi_host_template zorro7xx_scsi_driver_template = {
++	.proc_name	= "zorro7xx",
++	.this_id	= 7,
++	.module		= THIS_MODULE,
++};
++
++static struct zorro_driver_data {
++	const char *name;
++	unsigned long offset;
++	int absolute;	/* offset is absolute address */
++} zorro7xx_driver_data[] __devinitdata = {
++	{ .name = "PowerUP 603e+", .offset = 0xf40000, .absolute = 1 },
++	{ .name = "WarpEngine 40xx", .offset = 0x40000 },
++	{ .name = "A4091", .offset = 0x800000 },
++	{ .name = "GForce 040/060", .offset = 0x40000 },
++	{ 0 }
++};
++
++static struct zorro_device_id zorro7xx_zorro_tbl[] __devinitdata = {
++	{
++		.id = ZORRO_PROD_PHASE5_BLIZZARD_603E_PLUS,
++		.driver_data = (unsigned long)&zorro7xx_driver_data[0],
++	},
++	{
++		.id = ZORRO_PROD_MACROSYSTEMS_WARP_ENGINE_40xx,
++		.driver_data = (unsigned long)&zorro7xx_driver_data[1],
++	},
++	{
++		.id = ZORRO_PROD_CBM_A4091_1,
++		.driver_data = (unsigned long)&zorro7xx_driver_data[2],
++	},
++	{
++		.id = ZORRO_PROD_CBM_A4091_2,
++		.driver_data = (unsigned long)&zorro7xx_driver_data[2],
++	},
++	{
++		.id = ZORRO_PROD_GVP_GFORCE_040_060,
++		.driver_data = (unsigned long)&zorro7xx_driver_data[3],
++	},
++	{ 0 }
++};
++
++static int __devinit zorro7xx_init_one(struct zorro_dev *z,
++				       const struct zorro_device_id *ent)
++{
++	struct Scsi_Host * host = NULL;
++	struct NCR_700_Host_Parameters *hostdata;
++	struct zorro_driver_data *zdd;
++	unsigned long board, ioaddr;
++
++	board = zorro_resource_start(z);
++	zdd = (struct zorro_driver_data *)ent->driver_data;
++
++	if (zdd->absolute) {
++		ioaddr = zdd->offset;
++	} else {
++		ioaddr = board + zdd->offset;
++	}
++
++	if (!zorro_request_device(z, zdd->name)) {
++		printk(KERN_ERR "zorro7xx: cannot reserve region 0x%lx, abort\n",
++		       board);
++		return -EBUSY;
++	}
++
++	hostdata = kmalloc(sizeof(struct NCR_700_Host_Parameters), GFP_KERNEL);
++	if (hostdata == NULL) {
++		printk(KERN_ERR "zorro7xx: Failed to allocate host data\n");
++		goto out_release;
++	}
++
++	memset(hostdata, 0, sizeof(struct NCR_700_Host_Parameters));
++
++	/* Fill in the required pieces of hostdata */
++	if (ioaddr > 0x01000000)
++		hostdata->base = ioremap(ioaddr, zorro_resource_len(z));
++	else
++		hostdata->base = (void __iomem *)ZTWO_VADDR(ioaddr);
++
++	hostdata->clock = 50;
++	hostdata->chip710 = 1;
++
++	/* Settings for at least WarpEngine 40xx */
++	hostdata->ctest7_extra = CTEST7_TT1;
++
++	zorro7xx_scsi_driver_template.name = zdd->name;
++
++	/* and register the chip */
++	host = NCR_700_detect(&zorro7xx_scsi_driver_template, hostdata,
++			      &z->dev);
++	if (!host) {
++		printk(KERN_ERR "zorro7xx: No host detected; "
++				"board configuration problem?\n");
++		goto out_free;
++	}
++
++	host->this_id = 7;
++	host->base = ioaddr;
++	host->irq = IRQ_AMIGA_PORTS;
++
++	if (request_irq(host->irq, NCR_700_intr, IRQF_SHARED, "zorro7xx-scsi",
++			host)) {
++		printk(KERN_ERR "zorro7xx: request_irq failed\n");
++		goto out_put_host;
++	}
++
++	scsi_scan_host(host);
++
++	return 0;
++
++ out_put_host:
++	scsi_host_put(host);
++ out_free:
++	if (ioaddr > 0x01000000)
++		iounmap(hostdata->base);
++	kfree(hostdata);
++ out_release:
++	zorro_release_device(z);
++
++	return -ENODEV;
++}
++
++static __devexit void zorro7xx_remove_one(struct zorro_dev *z)
++{
++	struct Scsi_Host *host = dev_to_shost(&z->dev);
++	struct NCR_700_Host_Parameters *hostdata = shost_priv(host);
++
++	scsi_remove_host(host);
++
++	NCR_700_release(host);
++	kfree(hostdata);
++	free_irq(host->irq, host);
++	zorro_release_device(z);
++}
++
++static struct zorro_driver zorro7xx_driver = {
++	.name	  = "zorro7xx-scsi",
++	.id_table = zorro7xx_zorro_tbl,
++	.probe	  = zorro7xx_init_one,
++	.remove	  = __devexit_p(zorro7xx_remove_one),
++};
++
++static int __init zorro7xx_scsi_init(void)
++{
++	return zorro_register_driver(&zorro7xx_driver);
++}
++
++static void __exit zorro7xx_scsi_exit(void)
++{
++	zorro_unregister_driver(&zorro7xx_driver);
++}
++
++module_init(zorro7xx_scsi_init);
++module_exit(zorro7xx_scsi_exit);
+diff -Nurb linux-2.6.22-570/drivers/serial/8250.c linux-2.6.22-try2/drivers/serial/8250.c
+--- linux-2.6.22-570/drivers/serial/8250.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/8250.c	2007-12-19 15:29:24.000000000 -0500
+@@ -2845,6 +2845,25 @@
+ }
+ EXPORT_SYMBOL(serial8250_unregister_port);
+ 
++/**
++ *	serial8250_unregister_by_port - remove a 16x50 serial port
++ *	at runtime.
++ *	@port: A &struct uart_port that describes the port to remove.
++ *
++ *	Remove one serial port.  This may not be called from interrupt
++ *	context.  We hand the port back to the our control.
++ */
++void serial8250_unregister_by_port(struct uart_port *port)
++{
++	struct uart_8250_port *uart;
++
++	uart = serial8250_find_match_or_unused(port);
++
++	if (uart)
++		serial8250_unregister_port(uart->port.line);
++}
++EXPORT_SYMBOL(serial8250_unregister_by_port);
++
+ static int __init serial8250_init(void)
+ {
+ 	int ret, i;
+diff -Nurb linux-2.6.22-570/drivers/serial/8250_kgdb.c linux-2.6.22-try2/drivers/serial/8250_kgdb.c
+--- linux-2.6.22-570/drivers/serial/8250_kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/8250_kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,515 @@
++/*
++ * 8250 interface for kgdb.
++ *
++ * This is a merging of many different drivers, and all of the people have
++ * had an impact in some form or another:
++ *
++ * 2004-2005 (c) MontaVista Software, Inc.
++ * 2005-2006 (c) Wind River Systems, Inc.
++ *
++ * Amit Kale <amitkale@emsyssoft.com>, David Grothe <dave@gcom.com>,
++ * Scott Foehner <sfoehner@engr.sgi.com>, George Anzinger <george@mvista.com>,
++ * Robert Walsh <rjwalsh@durables.org>, wangdi <wangdi@clusterfs.com>,
++ * San Mehat, Tom Rini <trini@mvista.com>,
++ * Jason Wessel <jason.wessel@windriver.com>
++ */
++
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/serial.h>
++#include <linux/serial_reg.h>
++#include <linux/serialP.h>
++#include <linux/ioport.h>
++
++#include <asm/io.h>
++#include <asm/serial.h>		/* For BASE_BAUD and SERIAL_PORT_DFNS */
++
++#include "8250.h"
++
++#define GDB_BUF_SIZE	512	/* power of 2, please */
++
++MODULE_DESCRIPTION("KGDB driver for the 8250");
++MODULE_LICENSE("GPL");
++/* These will conflict with early_param otherwise. */
++#ifdef CONFIG_KGDB_8250_MODULE
++static char config[256];
++module_param_string(kgdb8250, config, 256, 0);
++MODULE_PARM_DESC(kgdb8250,
++		 " kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>\n");
++static struct kgdb_io local_kgdb_io_ops;
++#endif				/* CONFIG_KGDB_8250_MODULE */
++
++/* Speed of the UART. */
++static int kgdb8250_baud;
++
++/* Flag for if we need to call request_mem_region */
++static int kgdb8250_needs_request_mem_region;
++
++static char kgdb8250_buf[GDB_BUF_SIZE];
++static atomic_t kgdb8250_buf_in_cnt;
++static int kgdb8250_buf_out_inx;
++
++/* Old-style serial definitions, if existant, and a counter. */
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++static int __initdata should_copy_rs_table = 1;
++static struct serial_state old_rs_table[] __initdata = {
++#ifdef SERIAL_PORT_DFNS
++	SERIAL_PORT_DFNS
++#endif
++};
++#endif
++
++/* Our internal table of UARTS. */
++#define UART_NR	CONFIG_SERIAL_8250_NR_UARTS
++static struct uart_port kgdb8250_ports[UART_NR];
++
++static struct uart_port *current_port;
++
++/* Base of the UART. */
++static void *kgdb8250_addr;
++
++/* Forward declarations. */
++static int kgdb8250_uart_init(void);
++static int __init kgdb_init_io(void);
++static int __init kgdb8250_opt(char *str);
++
++/* These are much shorter calls to ioread8/iowrite8 that take into
++ * account our shifts, etc. */
++static inline unsigned int kgdb_ioread(u8 mask)
++{
++	return ioread8(kgdb8250_addr + (mask << current_port->regshift));
++}
++
++static inline void kgdb_iowrite(u8 val, u8 mask)
++{
++	iowrite8(val, kgdb8250_addr + (mask << current_port->regshift));
++}
++
++/*
++ * Wait until the interface can accept a char, then write it.
++ */
++static void kgdb_put_debug_char(u8 chr)
++{
++	while (!(kgdb_ioread(UART_LSR) & UART_LSR_THRE)) ;
++
++	kgdb_iowrite(chr, UART_TX);
++}
++
++/*
++ * Get a byte from the hardware data buffer and return it
++ */
++static int read_data_bfr(void)
++{
++	char it = kgdb_ioread(UART_LSR);
++
++	if (it & UART_LSR_DR)
++		return kgdb_ioread(UART_RX);
++
++	/*
++	 * If we have a framing error assume somebody messed with
++	 * our uart.  Reprogram it and send '-' both ways...
++	 */
++	if (it & 0xc) {
++		kgdb8250_uart_init();
++		kgdb_put_debug_char('-');
++		return '-';
++	}
++
++	return -1;
++}
++
++/*
++ * Get a char if available, return -1 if nothing available.
++ * Empty the receive buffer first, then look at the interface hardware.
++ */
++static int kgdb_get_debug_char(void)
++{
++	int retchr;
++
++	/* intr routine has q'd chars */
++	if (atomic_read(&kgdb8250_buf_in_cnt) != 0) {
++		retchr = kgdb8250_buf[kgdb8250_buf_out_inx++];
++		kgdb8250_buf_out_inx &= (GDB_BUF_SIZE - 1);
++		atomic_dec(&kgdb8250_buf_in_cnt);
++		return retchr;
++	}
++
++	do {
++		retchr = read_data_bfr();
++	} while (retchr < 0);
++
++	return retchr;
++}
++
++/*
++ * This is the receiver interrupt routine for the GDB stub.
++ * All that we need to do is verify that the interrupt happened on the
++ * line we're in charge of.  If this is true, schedule a breakpoint and
++ * return.
++ */
++static irqreturn_t
++kgdb8250_interrupt(int irq, void *dev_id)
++{
++	if (kgdb_ioread(UART_IIR) & UART_IIR_RDI) {
++		/* Throw away the data if another I/O routine is active. */
++		if (kgdb_io_ops.read_char != kgdb_get_debug_char &&
++				(kgdb_ioread(UART_LSR) & UART_LSR_DR))
++			kgdb_ioread(UART_RX);
++		else
++			breakpoint();
++	}
++
++	return IRQ_HANDLED;
++}
++
++/*
++ *  Initializes the UART.
++ *  Returns:
++ *	0 on success, 1 on failure.
++ */
++static int
++kgdb8250_uart_init (void)
++{
++	unsigned int ier, base_baud = current_port->uartclk ?
++		current_port->uartclk / 16 : BASE_BAUD;
++
++	/* test uart existance */
++	if(kgdb_ioread(UART_LSR) == 0xff)
++		return -1;
++
++	/* disable interrupts */
++	kgdb_iowrite(0, UART_IER);
++
++#if defined(CONFIG_ARCH_OMAP1510)
++	/* Workaround to enable 115200 baud on OMAP1510 internal ports */
++	if (cpu_is_omap1510() && is_omap_port((void *)kgdb8250_addr)) {
++		if (kgdb8250_baud == 115200) {
++			base_baud = 1;
++			kgdb8250_baud = 1;
++			kgdb_iowrite(1, UART_OMAP_OSC_12M_SEL);
++		} else
++			kgdb_iowrite(0, UART_OMAP_OSC_12M_SEL);
++	}
++#endif
++	/* set DLAB */
++	kgdb_iowrite(UART_LCR_DLAB, UART_LCR);
++
++	/* set baud */
++	kgdb_iowrite((base_baud / kgdb8250_baud) & 0xff, UART_DLL);
++	kgdb_iowrite((base_baud / kgdb8250_baud) >> 8, UART_DLM);
++
++	/* reset DLAB, set LCR */
++	kgdb_iowrite(UART_LCR_WLEN8, UART_LCR);
++
++	/* set DTR and RTS */
++	kgdb_iowrite(UART_MCR_OUT2 | UART_MCR_DTR | UART_MCR_RTS, UART_MCR);
++
++	/* setup fifo */
++	kgdb_iowrite(UART_FCR_ENABLE_FIFO | UART_FCR_CLEAR_RCVR
++		| UART_FCR_CLEAR_XMIT | UART_FCR_TRIGGER_8,
++		UART_FCR);
++
++	/* clear pending interrupts */
++	kgdb_ioread(UART_IIR);
++	kgdb_ioread(UART_RX);
++	kgdb_ioread(UART_LSR);
++	kgdb_ioread(UART_MSR);
++
++	/* turn on RX interrupt only */
++	kgdb_iowrite(UART_IER_RDI, UART_IER);
++
++	/*
++	 * Borrowed from the main 8250 driver.
++	 * Try writing and reading the UART_IER_UUE bit (b6).
++	 * If it works, this is probably one of the Xscale platform's
++	 * internal UARTs.
++	 * We're going to explicitly set the UUE bit to 0 before
++	 * trying to write and read a 1 just to make sure it's not
++	 * already a 1 and maybe locked there before we even start start.
++	 */
++	ier = kgdb_ioread(UART_IER);
++	kgdb_iowrite(ier & ~UART_IER_UUE, UART_IER);
++	if (!(kgdb_ioread(UART_IER) & UART_IER_UUE)) {
++		/*
++		 * OK it's in a known zero state, try writing and reading
++		 * without disturbing the current state of the other bits.
++		 */
++		kgdb_iowrite(ier | UART_IER_UUE, UART_IER);
++		if (kgdb_ioread(UART_IER) & UART_IER_UUE)
++			/*
++			 * It's an Xscale.
++			 */
++			ier |= UART_IER_UUE | UART_IER_RTOIE;
++	}
++	kgdb_iowrite(ier, UART_IER);
++	return 0;
++}
++
++/*
++ * Copy the old serial_state table to our uart_port table if we haven't
++ * had values specifically configured in.  We need to make sure this only
++ * happens once.
++ */
++static void __init kgdb8250_copy_rs_table(void)
++{
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++	int i;
++
++	if (!should_copy_rs_table)
++		return;
++
++	for (i = 0; i < ARRAY_SIZE(old_rs_table); i++) {
++		kgdb8250_ports[i].iobase = old_rs_table[i].port;
++		kgdb8250_ports[i].irq = irq_canonicalize(old_rs_table[i].irq);
++		kgdb8250_ports[i].uartclk = old_rs_table[i].baud_base * 16;
++		kgdb8250_ports[i].membase = old_rs_table[i].iomem_base;
++		kgdb8250_ports[i].iotype = old_rs_table[i].io_type;
++		kgdb8250_ports[i].regshift = old_rs_table[i].iomem_reg_shift;
++		kgdb8250_ports[i].line = i;
++	}
++
++	should_copy_rs_table = 0;
++#endif
++}
++
++/*
++ * Hookup our IRQ line now that it is safe to do so, after we grab any
++ * memory regions we might need to.  If we haven't been initialized yet,
++ * go ahead and copy the old_rs_table in.
++ */
++static void __init kgdb8250_late_init(void)
++{
++	/* Try and copy the old_rs_table. */
++	kgdb8250_copy_rs_table();
++
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE)
++	/* Take the port away from the main driver. */
++	serial8250_unregister_by_port(current_port);
++
++	/* Now reinit the port as the above has disabled things. */
++	kgdb8250_uart_init();
++#endif
++	/* We may need to call request_mem_region() first. */
++	if (kgdb8250_needs_request_mem_region)
++		request_mem_region(current_port->mapbase,
++				   8 << current_port->regshift, "kgdb");
++	if (request_irq(current_port->irq, kgdb8250_interrupt, SA_SHIRQ,
++			"GDB-stub", current_port) < 0)
++		printk(KERN_ERR "KGDB failed to request the serial IRQ (%d)\n",
++		       current_port->irq);
++}
++
++static __init int kgdb_init_io(void)
++{
++	/* Give us the basic table of uarts. */
++	kgdb8250_copy_rs_table();
++
++	/* We're either a module and parse a config string, or we have a
++	 * semi-static config. */
++#ifdef CONFIG_KGDB_8250_MODULE
++	if (strlen(config)) {
++		if (kgdb8250_opt(config))
++			return -EINVAL;
++	} else {
++		printk(KERN_ERR "kgdb8250: argument error, usage: "
++		       "kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>\n");
++		return -EINVAL;
++	}
++#elif defined(CONFIG_KGDB_SIMPLE_SERIAL)
++	kgdb8250_baud = CONFIG_KGDB_BAUDRATE;
++
++	/* Setup our pointer to the serial port now. */
++	current_port = &kgdb8250_ports[CONFIG_KGDB_PORT_NUM];
++#else
++	if (kgdb8250_opt(CONFIG_KGDB_8250_CONF_STRING))
++		return -EINVAL;
++#endif
++
++
++	/* Internal driver setup. */
++	switch (current_port->iotype) {
++	case UPIO_MEM:
++		if (current_port->mapbase)
++			kgdb8250_needs_request_mem_region = 1;
++		if (current_port->flags & UPF_IOREMAP) {
++			current_port->membase = ioremap(current_port->mapbase,
++						8 << current_port->regshift);
++			if (!current_port->membase)
++				return -EIO;	/* Failed. */
++		}
++		kgdb8250_addr = current_port->membase;
++		break;
++	case UPIO_PORT:
++	default:
++		kgdb8250_addr = ioport_map(current_port->iobase,
++					   8 << current_port->regshift);
++		if (!kgdb8250_addr)
++			return -EIO;	/* Failed. */
++	}
++
++	if (kgdb8250_uart_init() == -1) {
++		printk(KERN_ERR "kgdb8250: init failed\n");
++		return -EIO;
++	}
++#ifdef CONFIG_KGDB_8250_MODULE
++	/* Attach the kgdb irq. When this is built into the kernel, it
++	 * is called as a part of late_init sequence.
++	 */
++	kgdb8250_late_init();
++	if (kgdb_register_io_module(&local_kgdb_io_ops))
++		return -EINVAL;
++
++	printk(KERN_INFO "kgdb8250: debugging enabled\n");
++#endif				/* CONFIG_KGD_8250_MODULE */
++
++	return 0;
++}
++
++#ifdef CONFIG_KGDB_8250_MODULE
++/* If it is a module the kgdb_io_ops should be a static which
++ * is passed to the KGDB I/O initialization
++ */
++static struct kgdb_io local_kgdb_io_ops = {
++#else				/* ! CONFIG_KGDB_8250_MODULE */
++struct kgdb_io kgdb_io_ops = {
++#endif				/* ! CONFIG_KGD_8250_MODULE */
++	.read_char = kgdb_get_debug_char,
++	.write_char = kgdb_put_debug_char,
++	.init = kgdb_init_io,
++	.late_init = kgdb8250_late_init,
++};
++
++/**
++ * 	kgdb8250_add_port - Define a serial port for use with KGDB
++ * 	@i: The index of the port being added
++ * 	@serial_req: The &struct uart_port describing the port
++ *
++ * 	On platforms where we must register the serial device
++ * 	dynamically, this is the best option if a platform also normally
++ * 	calls early_serial_setup().
++ */
++void __init kgdb8250_add_port(int i, struct uart_port *serial_req)
++{
++	/* Make sure we've got the built-in data before we override. */
++	kgdb8250_copy_rs_table();
++
++	/* Copy the whole thing over. */
++	if (current_port != &kgdb8250_ports[i])
++                memcpy(&kgdb8250_ports[i], serial_req, sizeof(struct uart_port));
++}
++
++/**
++ * 	kgdb8250_add_platform_port - Define a serial port for use with KGDB
++ * 	@i: The index of the port being added
++ * 	@p: The &struct plat_serial8250_port describing the port
++ *
++ * 	On platforms where we must register the serial device
++ * 	dynamically, this is the best option if a platform normally
++ * 	handles uart setup with an array of &struct plat_serial8250_port.
++ */
++void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *p)
++{
++	/* Make sure we've got the built-in data before we override. */
++	kgdb8250_copy_rs_table();
++
++	kgdb8250_ports[i].iobase = p->iobase;
++	kgdb8250_ports[i].membase = p->membase;
++	kgdb8250_ports[i].irq = p->irq;
++	kgdb8250_ports[i].uartclk = p->uartclk;
++	kgdb8250_ports[i].regshift = p->regshift;
++	kgdb8250_ports[i].iotype = p->iotype;
++	kgdb8250_ports[i].flags = p->flags;
++	kgdb8250_ports[i].mapbase = p->mapbase;
++}
++
++/*
++ * Syntax for this cmdline option is:
++ * kgdb8250=<io or mmio>,<address>,<baud rate>,<irq>"
++ */
++static int __init kgdb8250_opt(char *str)
++{
++	/* We'll fill out and use the first slot. */
++	current_port = &kgdb8250_ports[0];
++
++	if (!strncmp(str, "io", 2)) {
++		current_port->iotype = UPIO_PORT;
++		str += 2;
++	} else if (!strncmp(str, "mmap", 4)) {
++		current_port->iotype = UPIO_MEM;
++		current_port->flags |= UPF_IOREMAP;
++		str += 4;
++	} else if (!strncmp(str, "mmio", 4)) {
++		current_port->iotype = UPIO_MEM;
++		current_port->flags &= ~UPF_IOREMAP;
++		str += 4;
++	} else
++		goto errout;
++
++	if (*str != ',')
++		goto errout;
++	str++;
++
++	if (current_port->iotype == UPIO_PORT)
++		current_port->iobase = simple_strtoul(str, &str, 16);
++	else {
++		if (current_port->flags & UPF_IOREMAP)
++			current_port->mapbase =
++				(unsigned long) simple_strtoul(str, &str, 16);
++		else
++			current_port->membase =
++				(void *) simple_strtoul(str, &str, 16);
++	}
++
++	if (*str != ',')
++		goto errout;
++	str++;
++
++	kgdb8250_baud = simple_strtoul(str, &str, 10);
++	if (!kgdb8250_baud)
++		goto errout;
++
++	if (*str != ',')
++		goto errout;
++	str++;
++
++	current_port->irq = simple_strtoul(str, &str, 10);
++
++#ifdef CONFIG_KGDB_SIMPLE_SERIAL
++	should_copy_rs_table = 0;
++#endif
++
++	return 0;
++
++      errout:
++	printk(KERN_ERR "Invalid syntax for option kgdb8250=\n");
++	return 1;
++}
++
++#ifdef CONFIG_KGDB_8250_MODULE
++static void cleanup_kgdb8250(void)
++{
++	kgdb_unregister_io_module(&local_kgdb_io_ops);
++
++	/* Clean up the irq and memory */
++	free_irq(current_port->irq, current_port);
++
++	if (kgdb8250_needs_request_mem_region)
++		release_mem_region(current_port->mapbase,
++				   8 << current_port->regshift);
++	/* Hook up the serial port back to what it was previously
++	 * hooked up to.
++	 */
++#if defined(CONFIG_SERIAL_8250) || defined(CONFIG_SERIAL_8250_MODULE)
++	/* Give the port back to the 8250 driver. */
++	serial8250_register_port(current_port);
++#endif
++}
++
++module_init(kgdb_init_io);
++module_exit(cleanup_kgdb8250);
++#else				/* ! CONFIG_KGDB_8250_MODULE */
++early_param("kgdb8250", kgdb8250_opt);
++#endif				/* ! CONFIG_KGDB_8250_MODULE */
+diff -Nurb linux-2.6.22-570/drivers/serial/Kconfig linux-2.6.22-try2/drivers/serial/Kconfig
+--- linux-2.6.22-570/drivers/serial/Kconfig	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/Kconfig	2007-12-19 15:29:24.000000000 -0500
+@@ -107,7 +107,7 @@
+ 
+ config SERIAL_8250_NR_UARTS
+ 	int "Maximum number of 8250/16550 serial ports"
+-	depends on SERIAL_8250
++	depends on SERIAL_8250 || KGDB_8250
+ 	default "4"
+ 	help
+ 	  Set this to the number of serial ports you want the driver
+diff -Nurb linux-2.6.22-570/drivers/serial/Makefile linux-2.6.22-try2/drivers/serial/Makefile
+--- linux-2.6.22-570/drivers/serial/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -23,6 +23,7 @@
+ obj-$(CONFIG_SERIAL_8250_AU1X00) += 8250_au1x00.o
+ obj-$(CONFIG_SERIAL_AMBA_PL010) += amba-pl010.o
+ obj-$(CONFIG_SERIAL_AMBA_PL011) += amba-pl011.o
++obj-$(CONFIG_KGDB_AMBA_PL011)	+= pl011_kgdb.o
+ obj-$(CONFIG_SERIAL_CLPS711X) += clps711x.o
+ obj-$(CONFIG_SERIAL_PXA) += pxa.o
+ obj-$(CONFIG_SERIAL_PNX8XXX) += pnx8xxx_uart.o
+@@ -50,10 +51,12 @@
+ obj-$(CONFIG_SERIAL_MPC52xx) += mpc52xx_uart.o
+ obj-$(CONFIG_SERIAL_ICOM) += icom.o
+ obj-$(CONFIG_SERIAL_M32R_SIO) += m32r_sio.o
++obj-$(CONFIG_KGDB_MPSC) += mpsc_kgdb.o
+ obj-$(CONFIG_SERIAL_MPSC) += mpsc.o
+ obj-$(CONFIG_ETRAX_SERIAL) += crisv10.o
+ obj-$(CONFIG_SERIAL_JSM) += jsm/
+ obj-$(CONFIG_SERIAL_TXX9) += serial_txx9.o
++obj-$(CONFIG_KGDB_TXX9) += serial_txx9_kgdb.o
+ obj-$(CONFIG_SERIAL_VR41XX) += vr41xx_siu.o
+ obj-$(CONFIG_SERIAL_SGI_IOC4) += ioc4_serial.o
+ obj-$(CONFIG_SERIAL_SGI_IOC3) += ioc3_serial.o
+@@ -62,3 +65,4 @@
+ obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
+ obj-$(CONFIG_SERIAL_OF_PLATFORM) += of_serial.o
+ obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
++obj-$(CONFIG_KGDB_8250) += 8250_kgdb.o
+diff -Nurb linux-2.6.22-570/drivers/serial/amba-pl011.c linux-2.6.22-try2/drivers/serial/amba-pl011.c
+--- linux-2.6.22-570/drivers/serial/amba-pl011.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/amba-pl011.c	2007-12-19 15:29:24.000000000 -0500
+@@ -332,7 +332,7 @@
+ 	/*
+ 	 * Allocate the IRQ
+ 	 */
+-	retval = request_irq(uap->port.irq, pl011_int, 0, "uart-pl011", uap);
++	retval = request_irq(uap->port.irq, pl011_int, SA_SHIRQ, "uart-pl011", uap);
+ 	if (retval)
+ 		goto clk_dis;
+ 
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/Makefile linux-2.6.22-try2/drivers/serial/cpm_uart/Makefile
+--- linux-2.6.22-570/drivers/serial/cpm_uart/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -7,5 +7,6 @@
+ # Select the correct platform objects.
+ cpm_uart-objs-$(CONFIG_CPM2)	+= cpm_uart_cpm2.o
+ cpm_uart-objs-$(CONFIG_8xx)	+= cpm_uart_cpm1.o
++cpm_uart-objs-$(CONFIG_KGDB_CPM_UART)	+= cpm_uart_kgdb.o
+ 
+ cpm_uart-objs	:= cpm_uart_core.o $(cpm_uart-objs-y)
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart.h
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart.h	2007-12-19 15:29:24.000000000 -0500
+@@ -50,6 +50,41 @@
+ 
+ #define SCC_WAIT_CLOSING 100
+ 
++#ifdef CONFIG_KGDB_CPM_UART
++
++/* Speed of the debug UART. */
++#if CONFIG_KGDB_BAUDRATE == 9600
++#define KGDB_BAUD B9600
++#elif CONFIG_KGDB_BAUDRATE == 19200
++#define KGDB_BAUD B19200
++#elif CONFIG_KGDB_BAUDRATE == 38400
++#define KGDB_BAUD B38400
++#elif CONFIG_KGDB_BAUDRATE == 57600
++#define KGDB_BAUD B57600
++#elif CONFIG_KGDB_BAUDRATE == 115200
++#define KGDB_BAUD B115200	/* Start with this if not given */
++#else
++#error Unsupported baud rate!
++#endif
++
++#if defined(CONFIG_KGDB_CPM_UART_SCC1)
++#define KGDB_PINFO_INDEX 	UART_SCC1
++#elif defined(CONFIG_KGDB_CPM_UART_SCC2)
++#define KGDB_PINFO_INDEX	UART_SCC2
++#elif defined(CONFIG_KGDB_CPM_UART_SCC3)
++#define KGDB_PINFO_INDEX	UART_SCC3
++#elif defined(CONFIG_KGDB_CPM_UART_SCC4)
++#define KGDB_PINFO_INDEX	UART_SCC4
++#elif defined(CONFIG_KGDB_CPM_UART_SMC1)
++#define KGDB_PINFO_INDEX	UART_SMC1
++#elif defined(CONFIG_KGDB_CPM_UART_SMC2)
++#define KGDB_PINFO_INDEX	UART_SMC2
++#else
++#error The port for KGDB is undefined!
++#endif
++
++#endif /* CONFIG_KGDB_CPM_UART */
++
+ struct uart_cpm_port {
+ 	struct uart_port	port;
+ 	u16			rx_nrfifos;
+@@ -86,6 +121,9 @@
+ extern int cpm_uart_nr;
+ extern struct uart_cpm_port cpm_uart_ports[UART_NR];
+ 
++void cpm_uart_early_write(int index, const char *s, u_int count);
++int cpm_uart_early_setup(int index,int early);
++
+ /* these are located in their respective files */
+ void cpm_line_cr_cmd(int line, int cmd);
+ int cpm_uart_init_portdesc(void);
+@@ -132,5 +170,4 @@
+ 	return 0;
+ }
+ 
+-
+ #endif /* CPM_UART_H */
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_core.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1073,22 +1073,17 @@
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_SERIAL_CPM_CONSOLE
+-/*
+- *	Print a string to the serial port trying not to disturb
+- *	any possible real use of the port...
+- *
+- *	Note that this is called with interrupts already disabled
+- */
+-static void cpm_uart_console_write(struct console *co, const char *s,
++void cpm_uart_early_write(int index, const char *s,
+ 				   u_int count)
+ {
+-	struct uart_cpm_port *pinfo =
+-	    &cpm_uart_ports[cpm_uart_port_map[co->index]];
++	struct uart_cpm_port *pinfo;
+ 	unsigned int i;
+ 	volatile cbd_t *bdp, *bdbase;
+ 	volatile unsigned char *cp;
+ 
++	BUG_ON(index>UART_NR);
++	pinfo = &cpm_uart_ports[index];
++
+ 	/* Get the address of the host memory buffer.
+ 	 */
+ 	bdp = pinfo->tx_cur;
+@@ -1152,19 +1147,14 @@
+ 	pinfo->tx_cur = (volatile cbd_t *) bdp;
+ }
+ 
+-
+-static int __init cpm_uart_console_setup(struct console *co, char *options)
++int cpm_uart_early_setup(int index, int early)
+ {
++	int ret;
+ 	struct uart_port *port;
+ 	struct uart_cpm_port *pinfo;
+-	int baud = 38400;
+-	int bits = 8;
+-	int parity = 'n';
+-	int flow = 'n';
+-	int ret;
+ 
+ 	struct fs_uart_platform_info *pdata;
+-	struct platform_device* pdev = early_uart_get_pdev(co->index);
++	struct platform_device* pdev = early_uart_get_pdev(index);
+ 
+ 	if (!pdev) {
+ 		pr_info("cpm_uart: console: compat mode\n");
+@@ -1172,8 +1162,9 @@
+ 		cpm_uart_init_portdesc();
+ 	}
+ 
++	BUG_ON(index>UART_NR);
+ 	port =
+-	    (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]];
++		(struct uart_port *)&cpm_uart_ports[index];
+ 	pinfo = (struct uart_cpm_port *)port;
+ 	if (!pdev) {
+ 		if (pinfo->set_lineif)
+@@ -1187,15 +1178,6 @@
+ 		cpm_uart_drv_get_platform_data(pdev, 1);
+ 	}
+ 
+-	pinfo->flags |= FLAG_CONSOLE;
+-
+-	if (options) {
+-		uart_parse_options(options, &baud, &parity, &bits, &flow);
+-	} else {
+-		if ((baud = uart_baudrate()) == -1)
+-			baud = 9600;
+-	}
+-
+ 	if (IS_SMC(pinfo)) {
+ 		pinfo->smcp->smc_smcm &= ~(SMCM_RX | SMCM_TX);
+ 		pinfo->smcp->smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN);
+@@ -1203,8 +1185,7 @@
+ 		pinfo->sccp->scc_sccm &= ~(UART_SCCM_TX | UART_SCCM_RX);
+ 		pinfo->sccp->scc_gsmrl &= ~(SCC_GSMRL_ENR | SCC_GSMRL_ENT);
+ 	}
+-
+-	ret = cpm_uart_allocbuf(pinfo, 1);
++	ret = cpm_uart_allocbuf(pinfo, early);
+ 
+ 	if (ret)
+ 		return ret;
+@@ -1216,6 +1197,62 @@
+ 	else
+ 		cpm_uart_init_scc(pinfo);
+ 
++	return 0;
++}
++
++#ifdef CONFIG_SERIAL_CPM_CONSOLE
++/*
++ *	Print a string to the serial port trying not to disturb
++ *	any possible real use of the port...
++ *
++ *	Note that this is called with interrupts already disabled
++ */
++
++static void cpm_uart_console_write(struct console *co, const char *s,
++				   u_int count)
++{
++	cpm_uart_early_write(cpm_uart_port_map[co->index],s,count);
++}
++
++/*
++ * Setup console. Be careful is called early !
++ */
++static int __init cpm_uart_console_setup(struct console *co, char *options)
++{
++	struct uart_port *port;
++	struct uart_cpm_port *pinfo;
++	int baud = 115200;
++	int bits = 8;
++	int parity = 'n';
++	int flow = 'n';
++	int ret;
++
++#ifdef CONFIG_KGDB_CPM_UART
++	/* We are not interested in ports yet utilized by kgdb */
++	if (co->index == KGDB_PINFO_INDEX)
++		return 0;
++#endif
++
++	port =
++	    (struct uart_port *)&cpm_uart_ports[cpm_uart_port_map[co->index]];
++	pinfo = (struct uart_cpm_port *)port;
++
++	pinfo->flags |= FLAG_CONSOLE;
++
++	if (options) {
++		uart_parse_options(options, &baud, &parity, &bits, &flow);
++	} else {
++		bd_t *bd = (bd_t *) __res;
++
++		if (bd->bi_baudrate)
++			baud = bd->bi_baudrate;
++		else
++			baud = 9600;
++	}
++
++	ret = cpm_uart_early_setup(cpm_uart_port_map[co->index], 1);
++	if(ret)
++		return ret;
+ 	uart_set_options(port, co, baud, parity, bits, flow);
+ 
+ 	return 0;
+@@ -1266,6 +1303,12 @@
+ 
+ 	pdata = pdev->dev.platform_data;
+ 
++#ifdef CONFIG_KGDB_CPM_UART
++	/* We are not interested in ports yet utilized by kgdb */
++	if (cpm_uart_id2nr(fs_uart_get_id(pdata)) == KGDB_PINFO_INDEX)
++		return ret;
++#endif
++
+ 	if ((ret = cpm_uart_drv_get_platform_data(pdev, 0)))
+ 		return ret;
+ 
+@@ -1363,6 +1406,12 @@
+ 
+ 		for (i = 0; i < cpm_uart_nr; i++) {
+ 			int con = cpm_uart_port_map[i];
++
++#ifdef CONFIG_KGDB_CPM_UART
++			/* We are not interested in ports yet utilized by kgdb */
++			if (con == KGDB_PINFO_INDEX)
++				continue;
++#endif
+ 			cpm_uart_ports[con].port.line = i;
+ 			cpm_uart_ports[con].port.flags = UPF_BOOT_AUTOCONF;
+ 			if (cpm_uart_ports[con].set_lineif)
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm1.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_cpm1.c	2007-12-19 15:29:24.000000000 -0500
+@@ -53,6 +53,7 @@
+ {
+ 	ushort val;
+ 	volatile cpm8xx_t *cp = cpmp;
++	unsigned *bcsr_io;
+ 
+ 	switch (line) {
+ 	case UART_SMC1:
+@@ -95,12 +96,35 @@
+ {
+ 	/* XXX SCC1: insert port configuration here */
+ 	pinfo->brg = 1;
++
++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS)
++	bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
++
++	if (bcsr_io == NULL) {
++		printk(KERN_CRIT "Could not remap BCSR\n");
++		return;
++	}
++	out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_1);
++	iounmap(bcsr_io);
++#endif
+ }
+ 
+ void scc2_lineif(struct uart_cpm_port *pinfo)
+ {
+ 	/* XXX SCC2: insert port configuration here */
+ 	pinfo->brg = 2;
++	unsigned *bcsr_io;
++
++#if defined (CONFIG_MPC885ADS) || defined (CONFIG_MPC86XADS)
++	bcsr_io = ioremap(BCSR1, sizeof(unsigned long));
++
++	if (bcsr_io == NULL) {
++		printk(KERN_CRIT "Could not remap BCSR\n");
++		return;
++	}
++        out_be32(bcsr_io, in_be32(bcsr_io) & ~BCSR1_RS232EN_2);
++	iounmap(bcsr_io);
++#endif
+ }
+ 
+ void scc3_lineif(struct uart_cpm_port *pinfo)
+@@ -189,6 +213,10 @@
+ {
+ 	pr_debug("CPM uart[-]:init portdesc\n");
+ 
++	/* Check if we have called this yet. This may happen if early kgdb
++	breakpoint is on */
++	if(cpm_uart_nr)
++		return 0;
+ 	cpm_uart_nr = 0;
+ #ifdef CONFIG_SERIAL_CPM_SMC1
+ 	cpm_uart_ports[UART_SMC1].smcp = &cpmp->cp_smc[0];
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_cpm2.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_cpm2.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_cpm2.c	2007-12-19 15:29:24.000000000 -0500
+@@ -289,6 +289,10 @@
+ #endif
+ 	pr_debug("CPM uart[-]:init portdesc\n");
+ 
++	/* Check if we have called this yet. This may happen if early kgdb
++	breakpoint is on */
++	if(cpm_uart_nr)
++		return 0;
+ 	cpm_uart_nr = 0;
+ #ifdef CONFIG_SERIAL_CPM_SMC1
+ 	cpm_uart_ports[UART_SMC1].smcp = (smc_t *) cpm2_map(im_smc[0]);
+diff -Nurb linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_kgdb.c
+--- linux-2.6.22-570/drivers/serial/cpm_uart/cpm_uart_kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/cpm_uart/cpm_uart_kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,185 @@
++/*
++ * drivers/serial/cpm_uart/cpm_uart_kgdb.c
++ *
++ * CPM UART interface for kgdb.
++ *
++ * Author: Vitaly Bordug <vbordug@ru.mvista.com>
++ *
++ * Used some bits from drivers/serial/kgdb_8250.c as a template
++ *
++ * 2005-2007 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++
++#include <linux/kgdb.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/tty.h>
++#include <linux/serial.h>
++#include <linux/serial_core.h>
++#include <linux/serial_reg.h>
++
++#include <asm/io.h>
++#include <asm/serial.h>		/* For BASE_BAUD and SERIAL_PORT_DFNS */
++
++#include "cpm_uart.h"
++
++#define GDB_BUF_SIZE	512	/* power of 2, please */
++
++
++static char kgdb_buf[GDB_BUF_SIZE], *kgdbp;
++static int kgdb_chars;
++
++/* Forward declarations. */
++
++/*
++ * Receive character from the serial port.  This only works well
++ * before the port is initialize for real use.
++ */
++static int kgdb_wait_key(char *obuf)
++{
++	struct uart_cpm_port *pinfo;
++	u_char		c, *cp;
++	volatile cbd_t	*bdp;
++	int		i;
++
++	pinfo = &cpm_uart_ports[KGDB_PINFO_INDEX];
++
++	/* Get the address of the host memory buffer.
++	 */
++	bdp = pinfo->rx_cur;
++	while (bdp->cbd_sc & BD_SC_EMPTY);
++
++	/* If the buffer address is in the CPM DPRAM, don't
++	 * convert it.
++	 */
++	cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo);
++
++	if (obuf) {
++		i = c = bdp->cbd_datlen;
++		while (i-- > 0)
++			*obuf++ = *cp++;
++	} else
++		c = *cp;
++	bdp->cbd_sc |= BD_SC_EMPTY;
++
++	if (bdp->cbd_sc & BD_SC_WRAP)
++		bdp = pinfo->rx_bd_base;
++	else
++		bdp++;
++	pinfo->rx_cur = (cbd_t *)bdp;
++
++	return (int)c;
++}
++
++
++/*
++ * Wait until the interface can accept a char, then write it.
++ */
++static void kgdb_put_debug_char(u8 chr)
++{
++	static char ch[2];
++
++	ch[0] = (char)chr;
++	cpm_uart_early_write(KGDB_PINFO_INDEX, ch, 1);
++}
++
++
++/*
++ * Get a char if available, return -1 if nothing available.
++ * Empty the receive buffer first, then look at the interface hardware.
++ */
++static int kgdb_get_debug_char(void)
++{
++	if (kgdb_chars <= 0) {
++		kgdb_chars = kgdb_wait_key(kgdb_buf);
++		kgdbp = kgdb_buf;
++	}
++	kgdb_chars--;
++
++	return (*kgdbp++);
++}
++
++static void termios_set_options(int index,
++		 int baud, int parity, int bits, int flow)
++{
++	struct ktermios termios;
++	struct uart_port *port;
++	struct uart_cpm_port *pinfo;
++
++	BUG_ON(index>UART_NR);
++
++	port  = (struct uart_port *)&cpm_uart_ports[index];
++	pinfo = (struct uart_cpm_port *)port;
++
++	/*
++	 * Ensure that the serial console lock is initialised
++	 * early.
++	 */
++	spin_lock_init(&port->lock);
++
++	memset(&termios, 0, sizeof(struct termios));
++
++	termios.c_cflag = CREAD | HUPCL | CLOCAL;
++
++	termios.c_cflag |= baud;
++
++	if (bits == 7)
++		termios.c_cflag |= CS7;
++	else
++		termios.c_cflag |= CS8;
++
++	switch (parity) {
++	case 'o': case 'O':
++		termios.c_cflag |= PARODD;
++		/*fall through*/
++	case 'e': case 'E':
++		termios.c_cflag |= PARENB;
++		break;
++	}
++
++	if (flow == 'r')
++		termios.c_cflag |= CRTSCTS;
++
++	port->ops->set_termios(port, &termios, NULL);
++}
++
++/*
++ *  Returns:
++ *	0 on success, 1 on failure.
++ */
++static int kgdb_init(void)
++{
++	struct uart_port *port;
++	struct uart_cpm_port *pinfo;
++	int use_bootmem = 0; /* use dma by default */
++
++	if (!cpm_uart_nr) {
++		use_bootmem = 1;
++		cpm_uart_init_portdesc();
++	}
++	port  = (struct uart_port *)&cpm_uart_ports[KGDB_PINFO_INDEX];
++	pinfo = (struct uart_cpm_port *)port;
++
++	if (cpm_uart_early_setup(KGDB_PINFO_INDEX, use_bootmem))
++		return 1;
++
++	termios_set_options(KGDB_PINFO_INDEX, KGDB_BAUD,'n',8,'n');
++        if (IS_SMC(pinfo))
++                pinfo->smcp->smc_smcm |= SMCM_TX;
++        else
++                pinfo->sccp->scc_sccm |= UART_SCCM_TX;
++
++	return 0;
++}
++
++
++struct kgdb_io kgdb_io_ops = {
++	.read_char  = kgdb_get_debug_char,
++	.write_char = kgdb_put_debug_char,
++	.init = kgdb_init,
++};
++
+diff -Nurb linux-2.6.22-570/drivers/serial/mpsc_kgdb.c linux-2.6.22-try2/drivers/serial/mpsc_kgdb.c
+--- linux-2.6.22-570/drivers/serial/mpsc_kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/mpsc_kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,345 @@
++/*
++ * drivers/serial/mpsc_kgdb.c
++ *
++ * KGDB driver for the Marvell MultiProtocol Serial Controller (MPCS)
++ *
++ * Based on the polled boot loader driver by Ajit Prem (ajit.prem@motorola.com)
++ *
++ * Author: Randy Vinson <rvinson@mvista.com>
++ *
++ * Copyright (C) 2005-2006 MontaVista Software, Inc.
++ * This program is free software; you can redistribute  it and/or modify it
++ * under  the terms of  the GNU General  Public License as published by the
++ * Free Software Foundation;  either version 2 of the  License, or (at your
++ * option) any later version.
++ */
++
++#include <linux/kgdb.h>
++#include <linux/mv643xx.h>
++#include <linux/device.h>
++#include <asm/mv64x60.h>
++#include <asm/serial.h>
++#include <asm/io.h>
++#include <asm/delay.h>
++
++/* Main MPSC Configuration Register Offsets */
++#define MPSC_MMCRL		0x0000
++#define MPSC_MMCRH		0x0004
++#define MPSC_MPCR		0x0008
++#define MPSC_CHR_1		0x000c
++#define MPSC_CHR_2		0x0010
++#define MPSC_CHR_3		0x0014
++#define MPSC_CHR_4		0x0018
++#define MPSC_CHR_5		0x001c
++#define MPSC_CHR_6		0x0020
++#define MPSC_CHR_7		0x0024
++#define MPSC_CHR_8		0x0028
++#define MPSC_CHR_9		0x002c
++#define MPSC_CHR_10		0x0030
++#define MPSC_CHR_11		0x0034
++
++#define MPSC_MPCR_FRZ		(1 << 9)
++#define MPSC_MPCR_CL_5		0
++#define MPSC_MPCR_CL_6		1
++#define MPSC_MPCR_CL_7		2
++#define MPSC_MPCR_CL_8		3
++#define MPSC_MPCR_SBL_1 	0
++#define MPSC_MPCR_SBL_2 	1
++
++#define MPSC_CHR_2_TEV		(1<<1)
++#define MPSC_CHR_2_TA		(1<<7)
++#define MPSC_CHR_2_TTCS		(1<<9)
++#define MPSC_CHR_2_REV		(1<<17)
++#define MPSC_CHR_2_RA		(1<<23)
++#define MPSC_CHR_2_CRD		(1<<25)
++#define MPSC_CHR_2_EH		(1<<31)
++#define MPSC_CHR_2_PAR_ODD	0
++#define MPSC_CHR_2_PAR_SPACE	1
++#define MPSC_CHR_2_PAR_EVEN	2
++#define MPSC_CHR_2_PAR_MARK	3
++
++/* MPSC Signal Routing */
++#define MPSC_MRR		0x0000
++#define MPSC_RCRR		0x0004
++#define MPSC_TCRR		0x0008
++
++/* MPSC Interrupt registers (offset from MV64x60_SDMA_INTR_OFFSET) */
++#define MPSC_INTR_CAUSE 	0x0004
++#define MPSC_INTR_MASK		0x0084
++#define MPSC_INTR_CAUSE_RCC	(1<<6)
++
++/* Baud Rate Generator Interface Registers */
++#define BRG_BCR 		0x0000
++#define BRG_BTR 		0x0004
++
++/* Speed of the UART. */
++static int kgdbmpsc_baud = CONFIG_KGDB_BAUDRATE;
++
++/* Index of the UART, matches ttyMX naming. */
++static int kgdbmpsc_ttyMM = CONFIG_KGDB_PORT_NUM;
++
++#define MPSC_INTR_REG_SELECT(x)	((x) + (8 * kgdbmpsc_ttyMM))
++
++static int kgdbmpsc_init(void);
++
++static struct platform_device mpsc_dev, shared_dev;
++
++static void __iomem *mpsc_base;
++static void __iomem *brg_base;
++static void __iomem *routing_base;
++static void __iomem *sdma_base;
++
++static unsigned int mpsc_irq;
++
++static void kgdb_write_debug_char(u8 c)
++{
++	u32 data;
++
++	data = readl(mpsc_base + MPSC_MPCR);
++	writeb(c, mpsc_base + MPSC_CHR_1);
++	mb();
++	data = readl(mpsc_base + MPSC_CHR_2);
++	data |= MPSC_CHR_2_TTCS;
++	writel(data, mpsc_base + MPSC_CHR_2);
++	mb();
++
++	while (readl(mpsc_base + MPSC_CHR_2) & MPSC_CHR_2_TTCS) ;
++}
++
++static int kgdb_get_debug_char(void)
++{
++	unsigned char c;
++
++	while (!(readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) &
++		 MPSC_INTR_CAUSE_RCC)) ;
++
++	c = readb(mpsc_base + MPSC_CHR_10 + (1 << 1));
++	mb();
++	writeb(c, mpsc_base + MPSC_CHR_10 + (1 << 1));
++	mb();
++	writel(~MPSC_INTR_CAUSE_RCC, sdma_base +
++	       MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE));
++	return (c);
++}
++
++/*
++ * This is the receiver interrupt routine for the GDB stub.
++ * All that we need to do is verify that the interrupt happened on the
++ * line we're in charge of.  If this is true, schedule a breakpoint and
++ * return.
++ */
++static irqreturn_t kgdbmpsc_interrupt(int irq, void *dev_id)
++{
++	if (irq != mpsc_irq)
++		return IRQ_NONE;
++	/*
++	 * If  there is some other CPU in KGDB then this is a
++	 * spurious interrupt. so return without even checking a byte
++	 */
++	if (atomic_read(&debugger_active))
++		return IRQ_NONE;
++
++	if (readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE)) &
++	    MPSC_INTR_CAUSE_RCC)
++		breakpoint();
++
++	return IRQ_HANDLED;
++}
++
++static int __init kgdbmpsc_init(void)
++{
++	struct mpsc_pdata *pdata;
++	u32 cdv;
++
++	if (!brg_base || !mpsc_base || !routing_base || !sdma_base)
++		return -1;
++
++	/* Set MPSC Routing to enable both ports */
++	writel(0x0, routing_base + MPSC_MRR);
++
++	/* MPSC 0/1 Rx & Tx get clocks BRG0/1 */
++	writel(0x00000100, routing_base + MPSC_RCRR);
++	writel(0x00000100, routing_base + MPSC_TCRR);
++
++	/* Disable all MPSC interrupts and clear any pending interrupts */
++	writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++	writel(0, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_CAUSE));
++
++	pdata = (struct mpsc_pdata *)mpsc_dev.dev.platform_data;
++
++	/* cdv = (clock/(2*16*baud rate)) for 16X mode. */
++	cdv = ((pdata->brg_clk_freq / (32 * kgdbmpsc_baud)) - 1);
++	writel((pdata->brg_clk_src << 18) | (1 << 16) | cdv,
++	       brg_base + BRG_BCR);
++
++	/* Put MPSC into UART mode, no null modem, 16x clock mode */
++	writel(0x000004c4, mpsc_base + MPSC_MMCRL);
++	writel(0x04400400, mpsc_base + MPSC_MMCRH);
++
++	writel(0, mpsc_base + MPSC_CHR_1);
++	writel(0, mpsc_base + MPSC_CHR_9);
++	writel(0, mpsc_base + MPSC_CHR_10);
++	writel(4, mpsc_base + MPSC_CHR_3);
++	writel(0x20000000, mpsc_base + MPSC_CHR_4);
++	writel(0x9000, mpsc_base + MPSC_CHR_5);
++	writel(0, mpsc_base + MPSC_CHR_6);
++	writel(0, mpsc_base + MPSC_CHR_7);
++	writel(0, mpsc_base + MPSC_CHR_8);
++
++	/* 8 data bits, 1 stop bit */
++	writel((3 << 12), mpsc_base + MPSC_MPCR);
++
++	/* Enter "hunt" mode */
++	writel((1 << 31), mpsc_base + MPSC_CHR_2);
++
++	udelay(100);
++	return 0;
++}
++
++static void __iomem *__init
++kgdbmpsc_map_resource(struct platform_device *pd, int type, int num)
++{
++	void __iomem *base = NULL;
++	struct resource *r;
++
++	if ((r = platform_get_resource(pd, IORESOURCE_MEM, num)))
++		base = ioremap(r->start, r->end - r->start + 1);
++	return base;
++}
++
++static void __iomem *__init
++kgdbmpsc_unmap_resource(struct platform_device *pd, int type, int num,
++			void __iomem * base)
++{
++	if (base)
++		iounmap(base);
++	return NULL;
++}
++
++static void __init
++kgdbmpsc_reserve_resource(struct platform_device *pd, int type, int num)
++{
++	struct resource *r;
++
++	if ((r = platform_get_resource(pd, IORESOURCE_MEM, num)))
++		request_mem_region(r->start, r->end - r->start + 1, "kgdb");
++}
++
++static int __init kgdbmpsc_local_init(void)
++{
++	if (!mpsc_dev.num_resources || !shared_dev.num_resources)
++		return 1;	/* failure */
++
++	mpsc_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM,
++					  MPSC_BASE_ORDER);
++	brg_base = kgdbmpsc_map_resource(&mpsc_dev, IORESOURCE_MEM,
++					 MPSC_BRG_BASE_ORDER);
++
++	/* get the platform data for the shared registers and get them mapped */
++	routing_base = kgdbmpsc_map_resource(&shared_dev,
++					     IORESOURCE_MEM,
++					     MPSC_ROUTING_BASE_ORDER);
++	sdma_base =
++	    kgdbmpsc_map_resource(&shared_dev, IORESOURCE_MEM,
++				  MPSC_SDMA_INTR_BASE_ORDER);
++
++	mpsc_irq = platform_get_irq(&mpsc_dev, 1);
++
++	if (mpsc_base && brg_base && routing_base && sdma_base)
++		return 0;	/* success */
++
++	return 1;		/* failure */
++}
++
++static void __init kgdbmpsc_local_exit(void)
++{
++	if (sdma_base)
++		sdma_base = kgdbmpsc_unmap_resource(&shared_dev, IORESOURCE_MEM,
++						    MPSC_SDMA_INTR_BASE_ORDER,
++						    sdma_base);
++	if (routing_base)
++		routing_base = kgdbmpsc_unmap_resource(&shared_dev,
++						       IORESOURCE_MEM,
++						       MPSC_ROUTING_BASE_ORDER,
++						       routing_base);
++	if (brg_base)
++		brg_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM,
++						   MPSC_BRG_BASE_ORDER,
++						   brg_base);
++	if (mpsc_base)
++		mpsc_base = kgdbmpsc_unmap_resource(&mpsc_dev, IORESOURCE_MEM,
++						    MPSC_BASE_ORDER, mpsc_base);
++}
++
++static void __init kgdbmpsc_update_pdata(struct platform_device *pdev)
++{
++
++	snprintf(pdev->dev.bus_id, BUS_ID_SIZE, "%s.%u", pdev->name, pdev->id);
++}
++
++static int __init kgdbmpsc_pdev_init(void)
++{
++	struct platform_device *pdev;
++
++	/* get the platform data for the specified port. */
++	pdev = mv64x60_early_get_pdev_data(MPSC_CTLR_NAME, kgdbmpsc_ttyMM, 1);
++	if (pdev) {
++		memcpy(&mpsc_dev, pdev, sizeof(struct platform_device));
++		if (platform_notify) {
++			kgdbmpsc_update_pdata(&mpsc_dev);
++			platform_notify(&mpsc_dev.dev);
++		}
++
++		/* get the platform data for the shared registers. */
++		pdev = mv64x60_early_get_pdev_data(MPSC_SHARED_NAME, 0, 0);
++		if (pdev) {
++			memcpy(&shared_dev, pdev,
++			       sizeof(struct platform_device));
++			if (platform_notify) {
++				kgdbmpsc_update_pdata(&shared_dev);
++				platform_notify(&shared_dev.dev);
++			}
++		}
++	}
++	return 0;
++}
++
++postcore_initcall(kgdbmpsc_pdev_init);
++
++static int __init kgdbmpsc_init_io(void)
++{
++
++	kgdbmpsc_pdev_init();
++
++	if (kgdbmpsc_local_init()) {
++		kgdbmpsc_local_exit();
++		return -1;
++	}
++
++	if (kgdbmpsc_init() == -1)
++		return -1;
++	return 0;
++}
++
++static void __init kgdbmpsc_hookup_irq(void)
++{
++	unsigned int msk;
++	if (!request_irq(mpsc_irq, kgdbmpsc_interrupt, 0, "kgdb mpsc", NULL)) {
++		/* Enable interrupt */
++		msk = readl(sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++		msk |= MPSC_INTR_CAUSE_RCC;
++		writel(msk, sdma_base + MPSC_INTR_REG_SELECT(MPSC_INTR_MASK));
++
++		kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM,
++					  MPSC_BASE_ORDER);
++		kgdbmpsc_reserve_resource(&mpsc_dev, IORESOURCE_MEM,
++					  MPSC_BRG_BASE_ORDER);
++	}
++}
++
++struct kgdb_io kgdb_io_ops = {
++	.read_char = kgdb_get_debug_char,
++	.write_char = kgdb_write_debug_char,
++	.init = kgdbmpsc_init_io,
++	.late_init = kgdbmpsc_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/pl011_kgdb.c linux-2.6.22-try2/drivers/serial/pl011_kgdb.c
+--- linux-2.6.22-570/drivers/serial/pl011_kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/pl011_kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,111 @@
++/*
++ * driver/serial/pl011_kgdb.c
++ *
++ * Support for KGDB on ARM AMBA PL011 UARTs
++ *
++ * Authors: Manish Lachwani <mlachwani@mvista.com>
++ *          Deepak Saxena <dsaxena@plexity.net>
++ *
++ * Copyright (c) 2005-2007 MontaVista Software, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether expressor implied.
++ *
++ */
++#include <linux/kgdb.h>
++#include <linux/amba/bus.h>
++#include <linux/amba/serial.h>
++
++#include <asm/io.h>
++#include <asm/processor.h>
++#include <asm/hardware.h>
++
++static int kgdb_irq = CONFIG_KGDB_AMBA_IRQ;
++
++#define UART_DIVISOR	(CONFIG_KGDB_AMBA_UARTCLK * 4 / CONFIG_KGDB_BAUDRATE)
++/*
++ * Todo: IO_ADDRESS is not very generic across ARM...
++ */
++static volatile unsigned char *kgdb_port =
++	(unsigned char*)IO_ADDRESS(CONFIG_KGDB_AMBA_BASE);
++
++/*
++ * Init code taken from amba-pl011.c.
++ */
++static int kgdb_serial_init(void)
++{
++	writew(0, kgdb_port + UART010_CR);
++
++	/* Set baud rate */
++	writew(UART_DIVISOR & 0x3f, kgdb_port + UART011_FBRD);
++	writew(UART_DIVISOR >> 6, kgdb_port + UART011_IBRD);
++
++	writew(UART01x_LCRH_WLEN_8 | UART01x_LCRH_FEN, kgdb_port + UART010_LCRH);
++	writew(UART01x_CR_UARTEN | UART011_CR_TXE | UART011_CR_RXE,
++	       kgdb_port + UART010_CR);
++
++	writew(UART011_RXIM, kgdb_port + UART011_IMSC);
++
++	return 0;
++}
++
++static void kgdb_serial_putchar(u8 ch)
++{
++	unsigned int status;
++
++	do {
++		status = readw(kgdb_port + UART01x_FR);
++	} while (status & UART01x_FR_TXFF);
++
++	writew(ch, kgdb_port + UART01x_DR);
++}
++
++static int kgdb_serial_getchar(void)
++{
++	unsigned int status;
++	int ch;
++
++#ifdef CONFIG_DEBUG_LL
++	printascii("Entering serial_getchar loop");
++#endif
++	do {
++		status = readw(kgdb_port + UART01x_FR);
++	} while (status & UART01x_FR_RXFE);
++	ch = readw(kgdb_port + UART01x_DR);
++#ifdef CONFIG_DEBUG_LL
++	printascii("Exited serial_getchar loop");
++	printascii("Read char: ");
++	printch(ch);
++	printascii("\n");
++#endif
++	return ch;
++}
++
++static irqreturn_t kgdb_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++{
++	int status = readw(kgdb_port + UART011_MIS);
++
++#ifdef CONFIG_DEBUG_LL
++	printascii("KGDB irq\n");
++#endif
++	if (irq != kgdb_irq)
++		return IRQ_NONE;
++
++	if (status & 0x40)
++		breakpoint();
++
++	return IRQ_HANDLED;
++}
++
++static void __init kgdb_hookup_irq(void)
++{
++	request_irq(kgdb_irq, kgdb_interrupt, SA_SHIRQ, "KGDB-serial", kgdb_port);
++}
++
++struct kgdb_io kgdb_io_ops = {
++	.init = kgdb_serial_init,
++	.write_char = kgdb_serial_putchar,
++	.read_char  = kgdb_serial_getchar,
++	.late_init  = kgdb_hookup_irq,
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/pxa.c linux-2.6.22-try2/drivers/serial/pxa.c
+--- linux-2.6.22-570/drivers/serial/pxa.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/pxa.c	2007-12-19 15:29:24.000000000 -0500
+@@ -42,6 +42,9 @@
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/serial_core.h>
++#ifdef CONFIG_KGDB_CONSOLE
++#include <linux/kgdb.h>
++#endif
+ 
+ #include <asm/io.h>
+ #include <asm/hardware.h>
+@@ -690,6 +693,8 @@
+ console_initcall(serial_pxa_console_init);
+ 
+ #define PXA_CONSOLE	&serial_pxa_console
++#elif defined(CONFIG_KGDB_CONSOLE)
++#define PXA_CONSOLE	&kgdbcons
+ #else
+ #define PXA_CONSOLE	NULL
+ #endif
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_core.c linux-2.6.22-try2/drivers/serial/serial_core.c
+--- linux-2.6.22-570/drivers/serial/serial_core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/serial_core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/serial.h> /* for serial_state and serial_icounter_struct */
+ #include <linux/delay.h>
+ #include <linux/mutex.h>
++#include <linux/kgdb.h>
+ 
+ #include <asm/irq.h>
+ #include <asm/uaccess.h>
+@@ -58,6 +59,12 @@
+ #define uart_console(port)	(0)
+ #endif
+ 
++#ifdef CONFIG_KGDB_CONSOLE
++#define uart_kgdb(port)		(port->cons && !strcmp(port->cons->name, "kgdb"))
++#else
++#define uart_kgdb(port)		(0)
++#endif
++
+ static void uart_change_speed(struct uart_state *state, struct ktermios *old_termios);
+ static void uart_wait_until_sent(struct tty_struct *tty, int timeout);
+ static void uart_change_pm(struct uart_state *state, int pm_state);
+@@ -1671,6 +1678,9 @@
+ 			mmio ? "mmio:0x" : "port:",
+ 			mmio ? port->mapbase : (unsigned long) port->iobase,
+ 			port->irq);
++	if (port->iotype == UPIO_MEM)
++		ret += sprintf(buf+ret, " membase 0x%08lX",
++					   (unsigned long) port->membase);
+ 
+ 	if (port->type == PORT_UNKNOWN) {
+ 		strcat(buf, "\n");
+@@ -2063,7 +2073,8 @@
+ 	case UPIO_TSI:
+ 	case UPIO_DWAPB:
+ 		snprintf(address, sizeof(address),
+-			 "MMIO 0x%lx", port->mapbase);
++			"MMIO map 0x%lx mem 0x%lx", port->mapbase,
++			(unsigned long) port->membase);
+ 		break;
+ 	default:
+ 		strlcpy(address, "*unknown*", sizeof(address));
+@@ -2118,9 +2129,9 @@
+ 
+ 		/*
+ 		 * Power down all ports by default, except the
+-		 * console if we have one.
++		 * console (real or kgdb) if we have one.
+ 		 */
+-		if (!uart_console(port))
++		if (!uart_console(port) && !uart_kgdb(port))
+ 			uart_change_pm(state, 3);
+ 	}
+ }
+@@ -2311,6 +2322,12 @@
+ 	 */
+ 	port->flags &= ~UPF_DEAD;
+ 
++#if defined(CONFIG_KGDB_8250)
++	/* Add any 8250-like ports we find later. */
++	if (port->type <= PORT_MAX_8250)
++		kgdb8250_add_port(port->line, port);
++#endif
++
+  out:
+ 	mutex_unlock(&state->mutex);
+ 	mutex_unlock(&port_mutex);
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9.c linux-2.6.22-try2/drivers/serial/serial_txx9.c
+--- linux-2.6.22-570/drivers/serial/serial_txx9.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/serial_txx9.c	2007-12-19 15:29:24.000000000 -0500
+@@ -40,6 +40,10 @@
+ static char *serial_version = "1.09";
+ static char *serial_name = "TX39/49 Serial driver";
+ 
++#ifndef CONFIG_KGDB_TXX9
++#define CONFIG_KGDB_PORT_NUM -1
++#endif
++
+ #define PASS_LIMIT	256
+ 
+ #if !defined(CONFIG_SERIAL_TXX9_STDSERIAL)
+@@ -471,6 +475,9 @@
+ 	unsigned long flags;
+ 	int retval;
+ 
++	if (up->port.line == CONFIG_KGDB_PORT_NUM)
++		return -EBUSY;
++
+ 	/*
+ 	 * Clear the FIFO buffers and disable them.
+ 	 * (they will be reenabled in set_termios())
+@@ -799,6 +806,9 @@
+ 	for (i = 0; i < UART_NR; i++) {
+ 		struct uart_txx9_port *up = &serial_txx9_ports[i];
+ 
++		if (up->port.line == CONFIG_KGDB_PORT_NUM)
++			continue;
++
+ 		up->port.line = i;
+ 		up->port.ops = &serial_txx9_pops;
+ 		up->port.dev = dev;
+@@ -967,6 +977,9 @@
+ 
+ 	mutex_lock(&serial_txx9_mutex);
+ 	for (i = 0; i < UART_NR; i++) {
++		if (i == CONFIG_KGDB_PORT_NUM)
++			continue;
++
+ 		uart = &serial_txx9_ports[i];
+ 		if (uart_match_port(&uart->port, port)) {
+ 			uart_remove_one_port(&serial_txx9_reg, &uart->port);
+diff -Nurb linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c linux-2.6.22-try2/drivers/serial/serial_txx9_kgdb.c
+--- linux-2.6.22-570/drivers/serial/serial_txx9_kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/drivers/serial/serial_txx9_kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,150 @@
++/*
++ * drivers/serial/serial_txx9_kgdb.c
++ *
++ * kgdb interface for gdb
++ *
++ * Author: MontaVista Software, Inc.
++ *         source@mvista.com
++ *
++ * Copyright (C) 2005-2006 MontaVista Software Inc.
++ *
++ *  This program is free software; you can redistribute it and/or modify it
++ *  under the terms of the GNU General Public License as published by the
++ *  Free Software Foundation; either version 2 of the License, or (at your
++ *  option) any later version.
++ */
++
++#include <linux/delay.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
++#include <asm/io.h>
++
++/* Speed of the UART. */
++static unsigned int kgdb_txx9_baud = CONFIG_KGDB_BAUDRATE;
++
++#define TXX9_NPORT 4		/* TX4939 has 4 UARTs, others only have 2 */
++
++static struct uart_port  kgdb_txx9_ports[TXX9_NPORT];
++static struct uart_port *kgdb_port;
++
++/* TXX9 Serial Registers */
++#define TXX9_SILCR	0x00
++#define TXX9_SIDISR	0x08
++#define TXX9_SISCISR	0x0c
++#define TXX9_SIFCR	0x10
++#define TXX9_SIFLCR	0x14
++#define TXX9_SIBGR	0x18
++#define TXX9_SITFIFO	0x1c
++#define TXX9_SIRFIFO	0x20
++
++/* SILCR : Line Control */
++#define TXX9_SILCR_SCS_IMCLK_BG	0x00000020
++#define TXX9_SILCR_SCS_SCLK_BG	0x00000060
++#define TXX9_SILCR_USBL_1BIT	0x00000000
++#define TXX9_SILCR_UMODE_8BIT	0x00000000
++
++/* SIDISR : DMA/Int. Status */
++#define TXX9_SIDISR_RFDN_MASK	0x0000001f
++
++/* SISCISR : Status Change Int. Status */
++#define TXX9_SISCISR_TRDY	0x00000004
++
++/* SIFCR : FIFO Control */
++#define TXX9_SIFCR_SWRST	0x00008000
++
++/* SIBGR : Baud Rate Control */
++#define TXX9_SIBGR_BCLK_T0	0x00000000
++#define TXX9_SIBGR_BCLK_T2	0x00000100
++#define TXX9_SIBGR_BCLK_T4	0x00000200
++#define TXX9_SIBGR_BCLK_T6	0x00000300
++
++static inline unsigned int sio_in(struct uart_port *port, int offset)
++{
++	return *(volatile u32 *)(port->membase + offset);
++}
++
++static inline void sio_out(struct uart_port *port, int offset, unsigned int value)
++{
++	*(volatile u32 *)(port->membase + offset) = value;
++}
++
++void __init txx9_kgdb_add_port(int n, struct uart_port *port)
++{
++	memcpy(&kgdb_txx9_ports[n], port, sizeof(struct uart_port));
++}
++
++static int txx9_kgdb_init(void)
++{
++	unsigned int quot, sibgr;
++
++	kgdb_port = &kgdb_txx9_ports[CONFIG_KGDB_PORT_NUM];
++
++	if (kgdb_port->iotype != UPIO_MEM &&
++	    kgdb_port->iotype != UPIO_MEM32)
++		return -1;
++
++	/* Reset the UART. */
++	sio_out(kgdb_port, TXX9_SIFCR, TXX9_SIFCR_SWRST);
++#ifdef CONFIG_CPU_TX49XX
++	/*
++	 * TX4925 BUG WORKAROUND.  Accessing SIOC register
++	 * immediately after soft reset causes bus error.
++	 */
++	iob();
++	udelay(1);
++#endif
++	/* Wait until reset is complete. */
++	while (sio_in(kgdb_port, TXX9_SIFCR) & TXX9_SIFCR_SWRST);
++
++	/* Select the frame format and input clock. */
++	sio_out(kgdb_port, TXX9_SILCR,
++		TXX9_SILCR_UMODE_8BIT | TXX9_SILCR_USBL_1BIT |
++		((kgdb_port->flags & UPF_MAGIC_MULTIPLIER) ?
++		TXX9_SILCR_SCS_SCLK_BG : TXX9_SILCR_SCS_IMCLK_BG));
++
++	/* Select the input clock prescaler that fits the baud rate. */
++	quot = (kgdb_port->uartclk + 8 * kgdb_txx9_baud) / (16 * kgdb_txx9_baud);
++	if (quot < (256 << 1))
++		sibgr = (quot >> 1) | TXX9_SIBGR_BCLK_T0;
++	else if (quot < ( 256 << 3))
++		sibgr = (quot >> 3) | TXX9_SIBGR_BCLK_T2;
++	else if (quot < ( 256 << 5))
++		sibgr = (quot >> 5) | TXX9_SIBGR_BCLK_T4;
++	else if (quot < ( 256 << 7))
++		sibgr = (quot >> 7) | TXX9_SIBGR_BCLK_T6;
++	else
++		sibgr = 0xff | TXX9_SIBGR_BCLK_T6;
++
++	sio_out(kgdb_port, TXX9_SIBGR, sibgr);
++
++	/* Enable receiver and transmitter. */
++	sio_out(kgdb_port, TXX9_SIFLCR, 0);
++
++	return 0;
++}
++
++static void txx9_kgdb_late_init(void)
++{
++	request_mem_region(kgdb_port->mapbase, 0x40, "serial_txx9(debug)");
++}
++
++static int txx9_kgdb_read(void)
++{
++	while (!(sio_in(kgdb_port, TXX9_SIDISR) & TXX9_SIDISR_RFDN_MASK));
++
++	return sio_in(kgdb_port, TXX9_SIRFIFO);
++}
++
++static void txx9_kgdb_write(u8 ch)
++{
++	while (!(sio_in(kgdb_port, TXX9_SISCISR) & TXX9_SISCISR_TRDY));
++
++	sio_out(kgdb_port, TXX9_SITFIFO, ch);
++}
++
++struct kgdb_io kgdb_io_ops = {
++	.read_char	= txx9_kgdb_read,
++	.write_char	= txx9_kgdb_write,
++	.init		= txx9_kgdb_init,
++	.late_init	= txx9_kgdb_late_init
++};
+diff -Nurb linux-2.6.22-570/drivers/serial/sh-sci.c linux-2.6.22-try2/drivers/serial/sh-sci.c
+--- linux-2.6.22-570/drivers/serial/sh-sci.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/serial/sh-sci.c	2007-12-19 15:29:24.000000000 -0500
+@@ -118,7 +118,8 @@
+ 	do {
+ 		status = sci_in(port, SCxSR);
+ 		if (status & SCxSR_ERRORS(port)) {
+-			handle_error(port);
++			/* Clear error flags. */
++			sci_out(port, SCxSR, SCxSR_ERROR_CLEAR(port));
+ 			continue;
+ 		}
+ 	} while (!(status & SCxSR_RDxF(port)));
+@@ -184,18 +185,18 @@
+ 			int h, l;
+ 
+ 			c = *p++;
+-			h = highhex(c);
+-			l = lowhex(c);
++			h = hexchars[c >> 4];
++			l = hexchars[c % 16];
+ 			put_char(port, h);
+ 			put_char(port, l);
+ 			checksum += h + l;
+ 		}
+ 		put_char(port, '#');
+-		put_char(port, highhex(checksum));
+-		put_char(port, lowhex(checksum));
++		put_char(port, hexchars[checksum >> 4]);
++		put_char(port, hexchars[checksum & 16]);
+ 	    } while  (get_char(port) != '+');
+ 	} else
+-#endif /* CONFIG_SH_STANDARD_BIOS || CONFIG_SH_KGDB */
++#endif /* CONFIG_SH_STANDARD_BIOS */
+ 	for (i=0; i<count; i++) {
+ 		if (*p == 10)
+ 			put_char(port, '\r');
+@@ -547,6 +548,16 @@
+ 					continue;
+ 				}
+ 
++#ifdef CONFIG_KGDB_SH_SCI
++				/* We assume that a ^C on the port KGDB
++				 * is using means that KGDB wants to
++				 * interrupt the running system.
++				 */
++				if (port->line == KGDBPORT.port.line &&
++						c == 3)
++					breakpoint();
++#endif
++
+ 				/* Store data and status */
+ 				if (status&SCxSR_FER(port)) {
+ 					flag = TTY_FRAME;
+@@ -1279,6 +1290,7 @@
+ console_initcall(sci_console_init);
+ #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */
+ 
++#if 0
+ #ifdef CONFIG_SH_KGDB
+ /*
+  * FIXME: Most of this can go away.. at the moment, we rely on
+diff -Nurb linux-2.6.22-570/drivers/spi/at25.c linux-2.6.22-try2/drivers/spi/at25.c
+--- linux-2.6.22-570/drivers/spi/at25.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/spi/at25.c	2007-12-19 15:29:22.000000000 -0500
+@@ -111,7 +111,8 @@
+ }
+ 
+ static ssize_t
+-at25_bin_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
++	      char *buf, loff_t off, size_t count)
+ {
+ 	struct device		*dev;
+ 	struct at25_data	*at25;
+@@ -236,7 +237,8 @@
+ }
+ 
+ static ssize_t
+-at25_bin_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
++	       char *buf, loff_t off, size_t count)
+ {
+ 	struct device		*dev;
+ 	struct at25_data	*at25;
+@@ -314,7 +316,6 @@
+ 	 */
+ 	at25->bin.attr.name = "eeprom";
+ 	at25->bin.attr.mode = S_IRUSR;
+-	at25->bin.attr.owner = THIS_MODULE;
+ 	at25->bin.read = at25_bin_read;
+ 
+ 	at25->bin.size = at25->chip.byte_len;
+diff -Nurb linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c linux-2.6.22-try2/drivers/usb/atm/ueagle-atm.c
+--- linux-2.6.22-570/drivers/usb/atm/ueagle-atm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/usb/atm/ueagle-atm.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1168,6 +1168,7 @@
+ 	struct uea_softc *sc = data;
+ 	int ret = -EAGAIN;
+ 
++	set_freezable();
+ 	uea_enters(INS_TO_USBDEV(sc));
+ 	while (!kthread_should_stop()) {
+ 		if (ret < 0 || sc->reset)
+diff -Nurb linux-2.6.22-570/drivers/usb/core/hub.c linux-2.6.22-try2/drivers/usb/core/hub.c
+--- linux-2.6.22-570/drivers/usb/core/hub.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/drivers/usb/core/hub.c	2007-12-19 15:29:24.000000000 -0500
+@@ -2831,6 +2831,7 @@
+ 
+ static int hub_thread(void *__unused)
+ {
++	set_freezable();
+ 	do {
+ 		hub_events();
+ 		wait_event_interruptible(khubd_wait,
+diff -Nurb linux-2.6.22-570/drivers/usb/gadget/file_storage.c linux-2.6.22-try2/drivers/usb/gadget/file_storage.c
+--- linux-2.6.22-570/drivers/usb/gadget/file_storage.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/usb/gadget/file_storage.c	2007-12-19 15:29:24.000000000 -0500
+@@ -3434,6 +3434,9 @@
+ 	allow_signal(SIGKILL);
+ 	allow_signal(SIGUSR1);
+ 
++	/* Allow the thread to be frozen */
++	set_freezable();
++
+ 	/* Arrange for userspace references to be interpreted as kernel
+ 	 * pointers.  That way we can pass a kernel pointer to a routine
+ 	 * that expects a __user pointer and it will work okay. */
+diff -Nurb linux-2.6.22-570/drivers/usb/storage/usb.c linux-2.6.22-try2/drivers/usb/storage/usb.c
+--- linux-2.6.22-570/drivers/usb/storage/usb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/usb/storage/usb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -301,8 +301,6 @@
+ 	struct us_data *us = (struct us_data *)__us;
+ 	struct Scsi_Host *host = us_to_host(us);
+ 
+-	current->flags |= PF_NOFREEZE;
+-
+ 	for(;;) {
+ 		US_DEBUGP("*** thread sleeping.\n");
+ 		if(down_interruptible(&us->sema))
+@@ -909,6 +907,7 @@
+ 	printk(KERN_DEBUG
+ 		"usb-storage: device found at %d\n", us->pusb_dev->devnum);
+ 
++	set_freezable();
+ 	/* Wait for the timeout to expire or for a disconnect */
+ 	if (delay_use > 0) {
+ 		printk(KERN_DEBUG "usb-storage: waiting for device "
+diff -Nurb linux-2.6.22-570/drivers/video/Kconfig linux-2.6.22-try2/drivers/video/Kconfig
+--- linux-2.6.22-570/drivers/video/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/Kconfig	2007-12-19 15:29:22.000000000 -0500
+@@ -12,6 +12,13 @@
+        tristate
+        default n
+ 
++config VIDEO_OUTPUT_CONTROL
++	tristate "Lowlevel video output switch controls"
++	default m
++	help
++	  This framework adds support for low-level control of the video 
++	  output switch.
++
+ config FB
+ 	tristate "Support for frame buffer devices"
+ 	---help---
+diff -Nurb linux-2.6.22-570/drivers/video/Makefile linux-2.6.22-try2/drivers/video/Makefile
+--- linux-2.6.22-570/drivers/video/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/Makefile	2007-12-19 15:29:22.000000000 -0500
+@@ -122,3 +122,6 @@
+ 
+ # the test framebuffer is last
+ obj-$(CONFIG_FB_VIRTUAL)          += vfb.o
++
++#video output switch sysfs driver
++obj-$(CONFIG_VIDEO_OUTPUT_CONTROL) += output.o
+diff -Nurb linux-2.6.22-570/drivers/video/aty/radeon_base.c linux-2.6.22-try2/drivers/video/aty/radeon_base.c
+--- linux-2.6.22-570/drivers/video/aty/radeon_base.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/aty/radeon_base.c	2007-12-19 15:29:22.000000000 -0500
+@@ -2102,7 +2102,9 @@
+ }
+ 
+ 
+-static ssize_t radeon_show_edid1(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t radeon_show_edid1(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t off, size_t count)
+ {
+ 	struct device *dev = container_of(kobj, struct device, kobj);
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+@@ -2113,7 +2115,9 @@
+ }
+ 
+ 
+-static ssize_t radeon_show_edid2(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t radeon_show_edid2(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t off, size_t count)
+ {
+ 	struct device *dev = container_of(kobj, struct device, kobj);
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+@@ -2126,7 +2130,6 @@
+ static struct bin_attribute edid1_attr = {
+ 	.attr   = {
+ 		.name	= "edid1",
+-		.owner	= THIS_MODULE,
+ 		.mode	= 0444,
+ 	},
+ 	.size	= EDID_LENGTH,
+@@ -2136,7 +2139,6 @@
+ static struct bin_attribute edid2_attr = {
+ 	.attr   = {
+ 		.name	= "edid2",
+-		.owner	= THIS_MODULE,
+ 		.mode	= 0444,
+ 	},
+ 	.size	= EDID_LENGTH,
+diff -Nurb linux-2.6.22-570/drivers/video/backlight/backlight.c linux-2.6.22-try2/drivers/video/backlight/backlight.c
+--- linux-2.6.22-570/drivers/video/backlight/backlight.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/backlight/backlight.c	2007-12-19 15:29:22.000000000 -0500
+@@ -172,7 +172,7 @@
+ 
+ #define DECLARE_ATTR(_name,_mode,_show,_store)			\
+ {							 	\
+-	.attr	= { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE },	\
++	.attr	= { .name = __stringify(_name), .mode = _mode }, \
+ 	.show	= _show,					\
+ 	.store	= _store,					\
+ }
+diff -Nurb linux-2.6.22-570/drivers/video/backlight/lcd.c linux-2.6.22-try2/drivers/video/backlight/lcd.c
+--- linux-2.6.22-570/drivers/video/backlight/lcd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/backlight/lcd.c	2007-12-19 15:29:22.000000000 -0500
+@@ -157,7 +157,7 @@
+ 
+ #define DECLARE_ATTR(_name,_mode,_show,_store)			\
+ {							 	\
+-	.attr	= { .name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE },	\
++	.attr	= { .name = __stringify(_name), .mode = _mode }, \
+ 	.show	= _show,					\
+ 	.store	= _store,					\
+ }
+diff -Nurb linux-2.6.22-570/drivers/video/ps3fb.c linux-2.6.22-try2/drivers/video/ps3fb.c
+--- linux-2.6.22-570/drivers/video/ps3fb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/video/ps3fb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -812,6 +812,7 @@
+ 
+ static int ps3fbd(void *arg)
+ {
++	set_freezable();
+ 	while (!kthread_should_stop()) {
+ 		try_to_freeze();
+ 		set_current_state(TASK_INTERRUPTIBLE);
+diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c linux-2.6.22-try2/drivers/w1/slaves/w1_ds2433.c
+--- linux-2.6.22-570/drivers/w1/slaves/w1_ds2433.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/w1/slaves/w1_ds2433.c	2007-12-19 15:29:22.000000000 -0500
+@@ -91,8 +91,9 @@
+ }
+ #endif	/* CONFIG_W1_SLAVE_DS2433_CRC */
+ 
+-static ssize_t w1_f23_read_bin(struct kobject *kobj, char *buf, loff_t off,
+-			       size_t count)
++static ssize_t w1_f23_read_bin(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ #ifdef CONFIG_W1_SLAVE_DS2433_CRC
+@@ -199,8 +200,9 @@
+ 	return 0;
+ }
+ 
+-static ssize_t w1_f23_write_bin(struct kobject *kobj, char *buf, loff_t off,
+-				size_t count)
++static ssize_t w1_f23_write_bin(struct kobject *kobj,
++				struct bin_attribute *bin_attr,
++				char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ 	int addr, len, idx;
+@@ -252,7 +254,6 @@
+ 	.attr = {
+ 		.name = "eeprom",
+ 		.mode = S_IRUGO | S_IWUSR,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = W1_EEPROM_SIZE,
+ 	.read = w1_f23_read_bin,
+diff -Nurb linux-2.6.22-570/drivers/w1/slaves/w1_therm.c linux-2.6.22-try2/drivers/w1/slaves/w1_therm.c
+--- linux-2.6.22-570/drivers/w1/slaves/w1_therm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/w1/slaves/w1_therm.c	2007-12-19 15:29:22.000000000 -0500
+@@ -42,13 +42,13 @@
+ 				{}
+ 			};
+ 
+-static ssize_t w1_therm_read_bin(struct kobject *, char *, loff_t, size_t);
++static ssize_t w1_therm_read_bin(struct kobject *, struct bin_attribute *,
++				 char *, loff_t, size_t);
+ 
+ static struct bin_attribute w1_therm_bin_attr = {
+ 	.attr = {
+ 		.name = "w1_slave",
+ 		.mode = S_IRUGO,
+-		.owner = THIS_MODULE,
+ 	},
+ 	.size = W1_SLAVE_DATA_SIZE,
+ 	.read = w1_therm_read_bin,
+@@ -159,7 +159,9 @@
+ 	return 0;
+ }
+ 
+-static ssize_t w1_therm_read_bin(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_therm_read_bin(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ 	struct w1_master *dev = sl->master;
+diff -Nurb linux-2.6.22-570/drivers/w1/w1.c linux-2.6.22-try2/drivers/w1/w1.c
+--- linux-2.6.22-570/drivers/w1/w1.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/w1/w1.c	2007-12-19 15:29:24.000000000 -0500
+@@ -105,7 +105,9 @@
+ 	return sprintf(buf, "%s\n", sl->name);
+ }
+ 
+-static ssize_t w1_slave_read_id(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_slave_read_id(struct kobject *kobj,
++				struct bin_attribute *bin_attr,
++				char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ 
+@@ -128,7 +130,6 @@
+       .attr = {
+               .name = "id",
+               .mode = S_IRUGO,
+-              .owner = THIS_MODULE,
+       },
+       .size = 8,
+       .read = w1_slave_read_id,
+@@ -136,7 +137,9 @@
+ 
+ /* Default family */
+ 
+-static ssize_t w1_default_write(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_default_write(struct kobject *kobj,
++				struct bin_attribute *bin_attr,
++				char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ 
+@@ -153,7 +156,9 @@
+ 	return count;
+ }
+ 
+-static ssize_t w1_default_read(struct kobject *kobj, char *buf, loff_t off, size_t count)
++static ssize_t w1_default_read(struct kobject *kobj,
++			       struct bin_attribute *bin_attr,
++			       char *buf, loff_t off, size_t count)
+ {
+ 	struct w1_slave *sl = kobj_to_w1_slave(kobj);
+ 
+@@ -167,7 +172,6 @@
+       .attr = {
+               .name = "rw",
+               .mode = S_IRUGO | S_IWUSR,
+-              .owner = THIS_MODULE,
+       },
+       .size = PAGE_SIZE,
+       .read = w1_default_read,
+@@ -801,6 +805,7 @@
+ 	struct w1_master *dev, *n;
+ 	int have_to_wait = 0;
+ 
++	set_freezable();
+ 	while (!kthread_should_stop() || have_to_wait) {
+ 		have_to_wait = 0;
+ 
+diff -Nurb linux-2.6.22-570/drivers/zorro/zorro-sysfs.c linux-2.6.22-try2/drivers/zorro/zorro-sysfs.c
+--- linux-2.6.22-570/drivers/zorro/zorro-sysfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/drivers/zorro/zorro-sysfs.c	2007-12-19 15:29:22.000000000 -0500
+@@ -49,8 +49,9 @@
+ 
+ static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL);
+ 
+-static ssize_t zorro_read_config(struct kobject *kobj, char *buf, loff_t off,
+-				 size_t count)
++static ssize_t zorro_read_config(struct kobject *kobj,
++				 struct bin_attribute *bin_attr,
++				 char *buf, loff_t off, size_t count)
+ {
+ 	struct zorro_dev *z = to_zorro_dev(container_of(kobj, struct device,
+ 					   kobj));
+@@ -78,7 +79,6 @@
+ 	.attr =	{
+ 		.name = "config",
+ 		.mode = S_IRUGO | S_IWUSR,
+-		.owner = THIS_MODULE
+ 	},
+ 	.size = sizeof(struct ConfigDev),
+ 	.read = zorro_read_config,
+diff -Nurb linux-2.6.22-570/fs/Kconfig linux-2.6.22-try2/fs/Kconfig
+--- linux-2.6.22-570/fs/Kconfig	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -1030,6 +1030,41 @@
+ 
+ endmenu
+ 
++menu "Layered filesystems"
++
++config ECRYPT_FS
++	tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
++	depends on EXPERIMENTAL && KEYS && CRYPTO && NET
++	help
++	  Encrypted filesystem that operates on the VFS layer.  See
++	  <file:Documentation/ecryptfs.txt> to learn more about
++	  eCryptfs.  Userspace components are required and can be
++	  obtained from <http://ecryptfs.sf.net>.
++
++	  To compile this file system support as a module, choose M here: the
++	  module will be called ecryptfs.
++
++config UNION_FS
++	tristate "Union file system (EXPERIMENTAL)"
++	depends on EXPERIMENTAL
++	help
++	  Unionfs is a stackable unification file system, which appears to
++	  merge the contents of several directories (branches), while keeping
++	  their physical content separate.
++
++	  See <http://unionfs.filesystems.org> for details
++
++config UNION_FS_XATTR
++	bool "Unionfs extended attributes"
++	depends on UNION_FS
++	help
++	  Extended attributes are name:value pairs associated with inodes by
++	  the kernel or by users (see the attr(5) manual page).
++
++	  If unsure, say N.
++
++endmenu
++
+ menu "Miscellaneous filesystems"
+ 
+ config ADFS_FS
+@@ -1082,18 +1117,6 @@
+ 	  To compile this file system support as a module, choose M here: the
+ 	  module will be called affs.  If unsure, say N.
+ 
+-config ECRYPT_FS
+-	tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
+-	depends on EXPERIMENTAL && KEYS && CRYPTO && NET
+-	help
+-	  Encrypted filesystem that operates on the VFS layer.  See
+-	  <file:Documentation/ecryptfs.txt> to learn more about
+-	  eCryptfs.  Userspace components are required and can be
+-	  obtained from <http://ecryptfs.sf.net>.
+-
+-	  To compile this file system support as a module, choose M here: the
+-	  module will be called ecryptfs.
+-
+ config HFS_FS
+ 	tristate "Apple Macintosh file system support (EXPERIMENTAL)"
+ 	depends on BLOCK && EXPERIMENTAL
+diff -Nurb linux-2.6.22-570/fs/Makefile linux-2.6.22-try2/fs/Makefile
+--- linux-2.6.22-570/fs/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -19,6 +19,7 @@
+ obj-y +=	no-block.o
+ endif
+ 
++obj-$(CONFIG_MMU)		+= revoke.o revoked_inode.o
+ obj-$(CONFIG_INOTIFY)		+= inotify.o
+ obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
+ obj-$(CONFIG_EPOLL)		+= eventpoll.o
+@@ -118,3 +119,4 @@
+ obj-$(CONFIG_DEBUG_FS)		+= debugfs/
+ obj-$(CONFIG_OCFS2_FS)		+= ocfs2/
+ obj-$(CONFIG_GFS2_FS)           += gfs2/
++obj-$(CONFIG_UNION_FS)		+= unionfs/
+diff -Nurb linux-2.6.22-570/fs/buffer.c linux-2.6.22-try2/fs/buffer.c
+--- linux-2.6.22-570/fs/buffer.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/buffer.c	2007-12-19 15:29:24.000000000 -0500
+@@ -982,7 +982,7 @@
+ 	struct buffer_head *bh;
+ 
+ 	page = find_or_create_page(inode->i_mapping, index,
+-		mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
++		(mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+ 	if (!page)
+ 		return NULL;
+ 
+@@ -2899,7 +2899,8 @@
+ 	
+ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags)
+ {
+-	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep, gfp_flags);
++	struct buffer_head *ret = kmem_cache_zalloc(bh_cachep,
++				set_migrateflags(gfp_flags, __GFP_RECLAIMABLE));
+ 	if (ret) {
+ 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
+ 		get_cpu_var(bh_accounting).nr++;
+diff -Nurb linux-2.6.22-570/fs/cifs/cifsfs.c linux-2.6.22-try2/fs/cifs/cifsfs.c
+--- linux-2.6.22-570/fs/cifs/cifsfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/cifs/cifsfs.c	2007-12-19 15:29:24.000000000 -0500
+@@ -849,6 +849,7 @@
+ 	__u16  netfid;
+ 	int rc;
+ 
++	set_freezable();
+ 	do {
+ 		if (try_to_freeze()) 
+ 			continue;
+diff -Nurb linux-2.6.22-570/fs/cifs/connect.c linux-2.6.22-try2/fs/cifs/connect.c
+--- linux-2.6.22-570/fs/cifs/connect.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/cifs/connect.c	2007-12-19 15:29:24.000000000 -0500
+@@ -363,6 +363,7 @@
+ 			GFP_KERNEL);
+ 	}
+ 
++	set_freezable();
+ 	while (!kthread_should_stop()) {
+ 		if (try_to_freeze())
+ 			continue;
+diff -Nurb linux-2.6.22-570/fs/configfs/configfs_internal.h linux-2.6.22-try2/fs/configfs/configfs_internal.h
+--- linux-2.6.22-570/fs/configfs/configfs_internal.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/configfs/configfs_internal.h	2007-12-19 15:29:23.000000000 -0500
+@@ -29,6 +29,7 @@
+ 
+ struct configfs_dirent {
+ 	atomic_t		s_count;
++	int			s_dependent_count;
+ 	struct list_head	s_sibling;
+ 	struct list_head	s_children;
+ 	struct list_head	s_links;
+diff -Nurb linux-2.6.22-570/fs/configfs/dir.c linux-2.6.22-try2/fs/configfs/dir.c
+--- linux-2.6.22-570/fs/configfs/dir.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/configfs/dir.c	2007-12-19 15:29:23.000000000 -0500
+@@ -355,6 +355,10 @@
+ 			/* Mark that we've taken i_mutex */
+ 			sd->s_type |= CONFIGFS_USET_DROPPING;
+ 
++			/*
++			 * Yup, recursive.  If there's a problem, blame
++			 * deep nesting of default_groups
++			 */
+ 			ret = configfs_detach_prep(sd->s_dentry);
+ 			if (!ret)
+ 				continue;
+@@ -714,6 +718,28 @@
+ }
+ 
+ /*
++ * After the item has been detached from the filesystem view, we are
++ * ready to tear it out of the hierarchy.  Notify the client before
++ * we do that so they can perform any cleanup that requires
++ * navigating the hierarchy.  A client does not need to provide this
++ * callback.  The subsystem semaphore MUST be held by the caller, and
++ * references must be valid for both items.  It also assumes the
++ * caller has validated ci_type.
++ */
++static void client_disconnect_notify(struct config_item *parent_item,
++				     struct config_item *item)
++{
++	struct config_item_type *type;
++
++	type = parent_item->ci_type;
++	BUG_ON(!type);
++
++	if (type->ct_group_ops && type->ct_group_ops->disconnect_notify)
++		type->ct_group_ops->disconnect_notify(to_config_group(parent_item),
++						      item);
++}
++
++/*
+  * Drop the initial reference from make_item()/make_group()
+  * This function assumes that reference is held on item
+  * and that item holds a valid reference to the parent.  Also, it
+@@ -738,6 +764,239 @@
+ 		config_item_put(item);
+ }
+ 
++#ifdef DEBUG
++static void configfs_dump_one(struct configfs_dirent *sd, int level)
++{
++	printk(KERN_INFO "%*s\"%s\":\n", level, " ", configfs_get_name(sd));
++
++#define type_print(_type) if (sd->s_type & _type) printk(KERN_INFO "%*s %s\n", level, " ", #_type);
++	type_print(CONFIGFS_ROOT);
++	type_print(CONFIGFS_DIR);
++	type_print(CONFIGFS_ITEM_ATTR);
++	type_print(CONFIGFS_ITEM_LINK);
++	type_print(CONFIGFS_USET_DIR);
++	type_print(CONFIGFS_USET_DEFAULT);
++	type_print(CONFIGFS_USET_DROPPING);
++#undef type_print
++}
++
++static int configfs_dump(struct configfs_dirent *sd, int level)
++{
++	struct configfs_dirent *child_sd;
++	int ret = 0;
++
++	configfs_dump_one(sd, level);
++
++	if (!(sd->s_type & (CONFIGFS_DIR|CONFIGFS_ROOT)))
++		return 0;
++
++	list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
++		ret = configfs_dump(child_sd, level + 2);
++		if (ret)
++			break;
++	}
++
++	return ret;
++}
++#endif
++
++
++/*
++ * configfs_depend_item() and configfs_undepend_item()
++ *
++ * WARNING: Do not call these from a configfs callback!
++ *
++ * This describes these functions and their helpers.
++ *
++ * Allow another kernel system to depend on a config_item.  If this
++ * happens, the item cannot go away until the dependant can live without
++ * it.  The idea is to give client modules as simple an interface as
++ * possible.  When a system asks them to depend on an item, they just
++ * call configfs_depend_item().  If the item is live and the client
++ * driver is in good shape, we'll happily do the work for them.
++ *
++ * Why is the locking complex?  Because configfs uses the VFS to handle
++ * all locking, but this function is called outside the normal
++ * VFS->configfs path.  So it must take VFS locks to prevent the
++ * VFS->configfs stuff (configfs_mkdir(), configfs_rmdir(), etc).  This is
++ * why you can't call these functions underneath configfs callbacks.
++ *
++ * Note, btw, that this can be called at *any* time, even when a configfs
++ * subsystem isn't registered, or when configfs is loading or unloading.
++ * Just like configfs_register_subsystem().  So we take the same
++ * precautions.  We pin the filesystem.  We lock each i_mutex _in_order_
++ * on our way down the tree.  If we can find the target item in the
++ * configfs tree, it must be part of the subsystem tree as well, so we
++ * do not need the subsystem semaphore.  Holding the i_mutex chain locks
++ * out mkdir() and rmdir(), who might be racing us.
++ */
++
++/*
++ * configfs_depend_prep()
++ *
++ * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
++ * attributes.  This is similar but not the same to configfs_detach_prep().
++ * Note that configfs_detach_prep() expects the parent to be locked when it
++ * is called, but we lock the parent *inside* configfs_depend_prep().  We
++ * do that so we can unlock it if we find nothing.
++ *
++ * Here we do a depth-first search of the dentry hierarchy looking for
++ * our object.  We take i_mutex on each step of the way down.  IT IS
++ * ESSENTIAL THAT i_mutex LOCKING IS ORDERED.  If we come back up a branch,
++ * we'll drop the i_mutex.
++ *
++ * If the target is not found, -ENOENT is bubbled up and we have released
++ * all locks.  If the target was found, the locks will be cleared by
++ * configfs_depend_rollback().
++ *
++ * This adds a requirement that all config_items be unique!
++ *
++ * This is recursive because the locking traversal is tricky.  There isn't
++ * much on the stack, though, so folks that need this function - be careful
++ * about your stack!  Patches will be accepted to make it iterative.
++ */
++static int configfs_depend_prep(struct dentry *origin,
++				struct config_item *target)
++{
++	struct configfs_dirent *child_sd, *sd = origin->d_fsdata;
++	int ret = 0;
++
++	BUG_ON(!origin || !sd);
++
++	/* Lock this guy on the way down */
++	mutex_lock(&sd->s_dentry->d_inode->i_mutex);
++	if (sd->s_element == target)  /* Boo-yah */
++		goto out;
++
++	list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
++		if (child_sd->s_type & CONFIGFS_DIR) {
++			ret = configfs_depend_prep(child_sd->s_dentry,
++						   target);
++			if (!ret)
++				goto out;  /* Child path boo-yah */
++		}
++	}
++
++	/* We looped all our children and didn't find target */
++	mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
++	ret = -ENOENT;
++
++out:
++	return ret;
++}
++
++/*
++ * This is ONLY called if configfs_depend_prep() did its job.  So we can
++ * trust the entire path from item back up to origin.
++ *
++ * We walk backwards from item, unlocking each i_mutex.  We finish by
++ * unlocking origin.
++ */
++static void configfs_depend_rollback(struct dentry *origin,
++				     struct config_item *item)
++{
++	struct dentry *dentry = item->ci_dentry;
++
++	while (dentry != origin) {
++		mutex_unlock(&dentry->d_inode->i_mutex);
++		dentry = dentry->d_parent;
++	}
++
++	mutex_unlock(&origin->d_inode->i_mutex);
++}
++
++int configfs_depend_item(struct configfs_subsystem *subsys,
++			 struct config_item *target)
++{
++	int ret;
++	struct configfs_dirent *p, *root_sd, *subsys_sd = NULL;
++	struct config_item *s_item = &subsys->su_group.cg_item;
++
++	/*
++	 * Pin the configfs filesystem.  This means we can safely access
++	 * the root of the configfs filesystem.
++	 */
++	ret = configfs_pin_fs();
++	if (ret)
++		return ret;
++
++	/*
++	 * Next, lock the root directory.  We're going to check that the
++	 * subsystem is really registered, and so we need to lock out
++	 * configfs_[un]register_subsystem().
++	 */
++	mutex_lock(&configfs_sb->s_root->d_inode->i_mutex);
++
++	root_sd = configfs_sb->s_root->d_fsdata;
++
++	list_for_each_entry(p, &root_sd->s_children, s_sibling) {
++		if (p->s_type & CONFIGFS_DIR) {
++			if (p->s_element == s_item) {
++				subsys_sd = p;
++				break;
++			}
++		}
++	}
++
++	if (!subsys_sd) {
++		ret = -ENOENT;
++		goto out_unlock_fs;
++	}
++
++	/* Ok, now we can trust subsys/s_item */
++
++	/* Scan the tree, locking i_mutex recursively, return 0 if found */
++	ret = configfs_depend_prep(subsys_sd->s_dentry, target);
++	if (ret)
++		goto out_unlock_fs;
++
++	/* We hold all i_mutexes from the subsystem down to the target */
++	p = target->ci_dentry->d_fsdata;
++	p->s_dependent_count += 1;
++
++	configfs_depend_rollback(subsys_sd->s_dentry, target);
++
++out_unlock_fs:
++	mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
++
++	/*
++	 * If we succeeded, the fs is pinned via other methods.  If not,
++	 * we're done with it anyway.  So release_fs() is always right.
++	 */
++	configfs_release_fs();
++
++	return ret;
++}
++EXPORT_SYMBOL(configfs_depend_item);
++
++/*
++ * Release the dependent linkage.  This is much simpler than
++ * configfs_depend_item() because we know that that the client driver is
++ * pinned, thus the subsystem is pinned, and therefore configfs is pinned.
++ */
++void configfs_undepend_item(struct configfs_subsystem *subsys,
++			    struct config_item *target)
++{
++	struct configfs_dirent *sd;
++
++	/*
++	 * Since we can trust everything is pinned, we just need i_mutex
++	 * on the item.
++	 */
++	mutex_lock(&target->ci_dentry->d_inode->i_mutex);
++
++	sd = target->ci_dentry->d_fsdata;
++	BUG_ON(sd->s_dependent_count < 1);
++
++	sd->s_dependent_count -= 1;
++
++	/*
++	 * After this unlock, we cannot trust the item to stay alive!
++	 * DO NOT REFERENCE item after this unlock.
++	 */
++	mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
++}
++EXPORT_SYMBOL(configfs_undepend_item);
+ 
+ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ {
+@@ -842,11 +1101,14 @@
+ 	if (ret) {
+ 		/* Tear down everything we built up */
+ 		down(&subsys->su_sem);
++
++		client_disconnect_notify(parent_item, item);
+ 		if (group)
+ 			unlink_group(group);
+ 		else
+ 			unlink_obj(item);
+ 		client_drop_item(parent_item, item);
++
+ 		up(&subsys->su_sem);
+ 
+ 		if (module_got)
+@@ -881,6 +1143,13 @@
+ 	if (sd->s_type & CONFIGFS_USET_DEFAULT)
+ 		return -EPERM;
+ 
++	/*
++	 * Here's where we check for dependents.  We're protected by
++	 * i_mutex.
++	 */
++	if (sd->s_dependent_count)
++		return -EBUSY;
++
+ 	/* Get a working ref until we have the child */
+ 	parent_item = configfs_get_config_item(dentry->d_parent);
+ 	subsys = to_config_group(parent_item)->cg_subsys;
+@@ -911,11 +1180,13 @@
+ 		configfs_detach_group(item);
+ 
+ 		down(&subsys->su_sem);
++		client_disconnect_notify(parent_item, item);
+ 		unlink_group(to_config_group(item));
+ 	} else {
+ 		configfs_detach_item(item);
+ 
+ 		down(&subsys->su_sem);
++		client_disconnect_notify(parent_item, item);
+ 		unlink_obj(item);
+ 	}
+ 
+diff -Nurb linux-2.6.22-570/fs/configfs/file.c linux-2.6.22-try2/fs/configfs/file.c
+--- linux-2.6.22-570/fs/configfs/file.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/configfs/file.c	2007-12-19 15:29:23.000000000 -0500
+@@ -27,19 +27,26 @@
+ #include <linux/fs.h>
+ #include <linux/module.h>
+ #include <linux/slab.h>
++#include <linux/mutex.h>
+ #include <asm/uaccess.h>
+-#include <asm/semaphore.h>
+ 
+ #include <linux/configfs.h>
+ #include "configfs_internal.h"
+ 
++/*
++ * A simple attribute can only be 4096 characters.  Why 4k?  Because the
++ * original code limited it to PAGE_SIZE.  That's a bad idea, though,
++ * because an attribute of 16k on ia64 won't work on x86.  So we limit to
++ * 4k, our minimum common page size.
++ */
++#define SIMPLE_ATTR_SIZE 4096
+ 
+ struct configfs_buffer {
+ 	size_t			count;
+ 	loff_t			pos;
+ 	char			* page;
+ 	struct configfs_item_operations	* ops;
+-	struct semaphore	sem;
++	struct mutex		mutex;
+ 	int			needs_read_fill;
+ };
+ 
+@@ -69,7 +76,7 @@
+ 
+ 	count = ops->show_attribute(item,attr,buffer->page);
+ 	buffer->needs_read_fill = 0;
+-	BUG_ON(count > (ssize_t)PAGE_SIZE);
++	BUG_ON(count > (ssize_t)SIMPLE_ATTR_SIZE);
+ 	if (count >= 0)
+ 		buffer->count = count;
+ 	else
+@@ -102,7 +109,7 @@
+ 	struct configfs_buffer * buffer = file->private_data;
+ 	ssize_t retval = 0;
+ 
+-	down(&buffer->sem);
++	mutex_lock(&buffer->mutex);
+ 	if (buffer->needs_read_fill) {
+ 		if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
+ 			goto out;
+@@ -112,7 +119,7 @@
+ 	retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
+ 					 buffer->count);
+ out:
+-	up(&buffer->sem);
++	mutex_unlock(&buffer->mutex);
+ 	return retval;
+ }
+ 
+@@ -137,8 +144,8 @@
+ 	if (!buffer->page)
+ 		return -ENOMEM;
+ 
+-	if (count >= PAGE_SIZE)
+-		count = PAGE_SIZE - 1;
++	if (count >= SIMPLE_ATTR_SIZE)
++		count = SIMPLE_ATTR_SIZE - 1;
+ 	error = copy_from_user(buffer->page,buf,count);
+ 	buffer->needs_read_fill = 1;
+ 	/* if buf is assumed to contain a string, terminate it by \0,
+@@ -193,13 +200,13 @@
+ 	struct configfs_buffer * buffer = file->private_data;
+ 	ssize_t len;
+ 
+-	down(&buffer->sem);
++	mutex_lock(&buffer->mutex);
+ 	len = fill_write_buffer(buffer, buf, count);
+ 	if (len > 0)
+ 		len = flush_write_buffer(file->f_path.dentry, buffer, count);
+ 	if (len > 0)
+ 		*ppos += len;
+-	up(&buffer->sem);
++	mutex_unlock(&buffer->mutex);
+ 	return len;
+ }
+ 
+@@ -253,7 +260,7 @@
+ 		error = -ENOMEM;
+ 		goto Enomem;
+ 	}
+-	init_MUTEX(&buffer->sem);
++	mutex_init(&buffer->mutex);
+ 	buffer->needs_read_fill = 1;
+ 	buffer->ops = ops;
+ 	file->private_data = buffer;
+@@ -292,6 +299,7 @@
+ 	if (buffer) {
+ 		if (buffer->page)
+ 			free_page((unsigned long)buffer->page);
++		mutex_destroy(&buffer->mutex);
+ 		kfree(buffer);
+ 	}
+ 	return 0;
+diff -Nurb linux-2.6.22-570/fs/configfs/item.c linux-2.6.22-try2/fs/configfs/item.c
+--- linux-2.6.22-570/fs/configfs/item.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/configfs/item.c	2007-12-19 15:29:23.000000000 -0500
+@@ -62,7 +62,6 @@
+  *	dynamically allocated string that @item->ci_name points to.
+  *	Otherwise, use the static @item->ci_namebuf array.
+  */
+-
+ int config_item_set_name(struct config_item * item, const char * fmt, ...)
+ {
+ 	int error = 0;
+@@ -139,12 +138,7 @@
+ 	return item;
+ }
+ 
+-/**
+- *	config_item_cleanup - free config_item resources.
+- *	@item:	item.
+- */
+-
+-void config_item_cleanup(struct config_item * item)
++static void config_item_cleanup(struct config_item * item)
+ {
+ 	struct config_item_type * t = item->ci_type;
+ 	struct config_group * s = item->ci_group;
+@@ -179,12 +173,10 @@
+ 		kref_put(&item->ci_kref, config_item_release);
+ }
+ 
+-
+ /**
+  *	config_group_init - initialize a group for use
+  *	@k:	group
+  */
+-
+ void config_group_init(struct config_group *group)
+ {
+ 	config_item_init(&group->cg_item);
+@@ -201,8 +193,8 @@
+  *	looking for a matching config_item. If matching item is found
+  *	take a reference and return the item.
+  */
+-
+-struct config_item * config_group_find_obj(struct config_group * group, const char * name)
++struct config_item *config_group_find_obj(struct config_group *group,
++					  const char * name)
+ {
+ 	struct list_head * entry;
+ 	struct config_item * ret = NULL;
+@@ -219,7 +211,6 @@
+ 	return ret;
+ }
+ 
+-
+ EXPORT_SYMBOL(config_item_init);
+ EXPORT_SYMBOL(config_group_init);
+ EXPORT_SYMBOL(config_item_get);
+diff -Nurb linux-2.6.22-570/fs/drop_caches.c linux-2.6.22-try2/fs/drop_caches.c
+--- linux-2.6.22-570/fs/drop_caches.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/drop_caches.c	2007-12-19 15:29:23.000000000 -0500
+@@ -3,6 +3,7 @@
+  */
+ 
+ #include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/mm.h>
+ #include <linux/fs.h>
+ #include <linux/writeback.h>
+@@ -12,7 +13,7 @@
+ /* A global variable is a bit ugly, but it keeps the code simple */
+ int sysctl_drop_caches;
+ 
+-static void drop_pagecache_sb(struct super_block *sb)
++void drop_pagecache_sb(struct super_block *sb)
+ {
+ 	struct inode *inode;
+ 
+@@ -24,6 +25,7 @@
+ 	}
+ 	spin_unlock(&inode_lock);
+ }
++EXPORT_SYMBOL(drop_pagecache_sb);
+ 
+ void drop_pagecache(void)
+ {
+diff -Nurb linux-2.6.22-570/fs/ecryptfs/inode.c linux-2.6.22-try2/fs/ecryptfs/inode.c
+--- linux-2.6.22-570/fs/ecryptfs/inode.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ecryptfs/inode.c	2007-12-19 15:29:23.000000000 -0500
+@@ -280,7 +280,9 @@
+ 	int rc = 0;
+ 	struct dentry *lower_dir_dentry;
+ 	struct dentry *lower_dentry;
++	struct dentry *dentry_save;
+ 	struct vfsmount *lower_mnt;
++	struct vfsmount *mnt_save;
+ 	char *encoded_name;
+ 	unsigned int encoded_namelen;
+ 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
+@@ -308,9 +310,13 @@
+ 	}
+ 	ecryptfs_printk(KERN_DEBUG, "encoded_name = [%s]; encoded_namelen "
+ 			"= [%d]\n", encoded_name, encoded_namelen);
+-	lower_dentry = lookup_one_len(encoded_name, lower_dir_dentry,
+-				      encoded_namelen - 1);
++	dentry_save = nd->dentry;
++	mnt_save = nd->mnt;
++	lower_dentry = lookup_one_len_nd(encoded_name, lower_dir_dentry,
++					 (encoded_namelen - 1), nd);
+ 	kfree(encoded_name);
++	nd->mnt = mnt_save;
++	nd->dentry = dentry_save;
+ 	if (IS_ERR(lower_dentry)) {
+ 		ecryptfs_printk(KERN_ERR, "ERR from lower_dentry\n");
+ 		rc = PTR_ERR(lower_dentry);
+diff -Nurb linux-2.6.22-570/fs/ecryptfs/main.c linux-2.6.22-try2/fs/ecryptfs/main.c
+--- linux-2.6.22-570/fs/ecryptfs/main.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ecryptfs/main.c	2007-12-19 15:29:22.000000000 -0500
+@@ -840,8 +840,6 @@
+ 		goto out;
+ 	}
+ 	kobj_set_kset_s(&ecryptfs_subsys, fs_subsys);
+-	sysfs_attr_version.attr.owner = THIS_MODULE;
+-	sysfs_attr_version_str.attr.owner = THIS_MODULE;
+ 	rc = do_sysfs_registration();
+ 	if (rc) {
+ 		printk(KERN_ERR "sysfs registration failed\n");
+diff -Nurb linux-2.6.22-570/fs/exec.c linux-2.6.22-try2/fs/exec.c
+--- linux-2.6.22-570/fs/exec.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/exec.c	2007-12-19 15:29:24.000000000 -0500
+@@ -861,9 +861,9 @@
+ 	current->sas_ss_sp = current->sas_ss_size = 0;
+ 
+ 	if (current->euid == current->uid && current->egid == current->gid)
+-		current->mm->dumpable = 1;
++		set_dumpable(current->mm, 1);
+ 	else
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 
+ 	name = bprm->filename;
+ 
+@@ -894,7 +894,7 @@
+ 	} else if (file_permission(bprm->file, MAY_READ) ||
+ 			(bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)) {
+ 		suid_keys(current);
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 	}
+ 
+ 	/* An exec changes our domain. We are no longer part of the thread
+@@ -1486,6 +1486,55 @@
+ 	return core_waiters;
+ }
+ 
++/*
++ * set_dumpable converts traditional three-value dumpable to two flags and
++ * stores them into mm->flags.  It modifies lower two bits of mm->flags, but
++ * these bits are not changed atomically.  So get_dumpable can observe the
++ * intermediate state.  To avoid doing unexpected behavior, get get_dumpable
++ * return either old dumpable or new one by paying attention to the order of
++ * modifying the bits.
++ *
++ * dumpable |   mm->flags (binary)
++ * old  new | initial interim  final
++ * ---------+-----------------------
++ *  0    1  |   00      01      01
++ *  0    2  |   00      10(*)   11
++ *  1    0  |   01      00      00
++ *  1    2  |   01      11      11
++ *  2    0  |   11      10(*)   00
++ *  2    1  |   11      11      01
++ *
++ * (*) get_dumpable regards interim value of 10 as 11.
++ */
++void set_dumpable(struct mm_struct *mm, int value)
++{
++	switch (value) {
++	case 0:
++		clear_bit(MMF_DUMPABLE, &mm->flags);
++		smp_wmb();
++		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
++		break;
++	case 1:
++		set_bit(MMF_DUMPABLE, &mm->flags);
++		smp_wmb();
++		clear_bit(MMF_DUMP_SECURELY, &mm->flags);
++		break;
++	case 2:
++		set_bit(MMF_DUMP_SECURELY, &mm->flags);
++		smp_wmb();
++		set_bit(MMF_DUMPABLE, &mm->flags);
++		break;
++	}
++}
++
++int get_dumpable(struct mm_struct *mm)
++{
++	int ret;
++
++	ret = mm->flags & 0x3;
++	return (ret >= 2) ? 2 : ret;
++}
++
+ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
+ {
+ 	char corename[CORENAME_MAX_SIZE + 1];
+@@ -1504,7 +1553,7 @@
+ 	if (!binfmt || !binfmt->core_dump)
+ 		goto fail;
+ 	down_write(&mm->mmap_sem);
+-	if (!mm->dumpable) {
++	if (!get_dumpable(mm)) {
+ 		up_write(&mm->mmap_sem);
+ 		goto fail;
+ 	}
+@@ -1514,11 +1563,11 @@
+ 	 *	process nor do we know its entire history. We only know it
+ 	 *	was tainted so we dump it as root in mode 2.
+ 	 */
+-	if (mm->dumpable == 2) {	/* Setuid core dump mode */
++	if (get_dumpable(mm) == 2) {	/* Setuid core dump mode */
+ 		flag = O_EXCL;		/* Stop rewrite attacks */
+ 		current->fsuid = 0;	/* Dump root private */
+ 	}
+-	mm->dumpable = 0;
++	set_dumpable(mm, 0);
+ 
+ 	retval = coredump_wait(exit_code);
+ 	if (retval < 0)
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_address.c linux-2.6.22-try2/fs/gfs2/ops_address.c
+--- linux-2.6.22-570/fs/gfs2/ops_address.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/gfs2/ops_address.c	2007-12-19 15:29:24.000000000 -0500
+@@ -250,7 +250,7 @@
+ 		if (file) {
+ 			gf = file->private_data;
+ 			if (test_bit(GFF_EXLOCK, &gf->f_flags))
+-				/* gfs2_sharewrite_nopage has grabbed the ip->i_gl already */
++				/* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
+ 				goto skip_lock;
+ 		}
+ 		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_file.c linux-2.6.22-try2/fs/gfs2/ops_file.c
+--- linux-2.6.22-570/fs/gfs2/ops_file.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/gfs2/ops_file.c	2007-12-19 15:29:24.000000000 -0500
+@@ -364,6 +364,8 @@
+ 	else
+ 		vma->vm_ops = &gfs2_vm_ops_private;
+ 
++	vma->vm_flags |= VM_CAN_INVALIDATE|VM_CAN_NONLINEAR;
++
+ 	gfs2_glock_dq_uninit(&i_gh);
+ 
+ 	return error;
+diff -Nurb linux-2.6.22-570/fs/gfs2/ops_vm.c linux-2.6.22-try2/fs/gfs2/ops_vm.c
+--- linux-2.6.22-570/fs/gfs2/ops_vm.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/gfs2/ops_vm.c	2007-12-19 15:29:24.000000000 -0500
+@@ -27,13 +27,13 @@
+ #include "trans.h"
+ #include "util.h"
+ 
+-static struct page *gfs2_private_nopage(struct vm_area_struct *area,
+-					unsigned long address, int *type)
++static struct page *gfs2_private_fault(struct vm_area_struct *vma,
++					struct fault_data *fdata)
+ {
+-	struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
++	struct gfs2_inode *ip = GFS2_I(vma->vm_file->f_mapping->host);
+ 
+ 	set_bit(GIF_PAGED, &ip->i_flags);
+-	return filemap_nopage(area, address, type);
++	return filemap_fault(vma, fdata);
+ }
+ 
+ static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
+@@ -104,16 +104,14 @@
+ 	return error;
+ }
+ 
+-static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
+-					   unsigned long address, int *type)
++static struct page *gfs2_sharewrite_fault(struct vm_area_struct *vma,
++						struct fault_data *fdata)
+ {
+-	struct file *file = area->vm_file;
++	struct file *file = vma->vm_file;
+ 	struct gfs2_file *gf = file->private_data;
+ 	struct gfs2_inode *ip = GFS2_I(file->f_mapping->host);
+ 	struct gfs2_holder i_gh;
+ 	struct page *result = NULL;
+-	unsigned long index = ((address - area->vm_start) >> PAGE_CACHE_SHIFT) +
+-			      area->vm_pgoff;
+ 	int alloc_required;
+ 	int error;
+ 
+@@ -124,21 +122,27 @@
+ 	set_bit(GIF_PAGED, &ip->i_flags);
+ 	set_bit(GIF_SW_PAGED, &ip->i_flags);
+ 
+-	error = gfs2_write_alloc_required(ip, (u64)index << PAGE_CACHE_SHIFT,
++	error = gfs2_write_alloc_required(ip,
++					(u64)fdata->pgoff << PAGE_CACHE_SHIFT,
+ 					  PAGE_CACHE_SIZE, &alloc_required);
+-	if (error)
++	if (error) {
++		fdata->type = VM_FAULT_OOM; /* XXX: are these right? */
+ 		goto out;
++	}
+ 
+ 	set_bit(GFF_EXLOCK, &gf->f_flags);
+-	result = filemap_nopage(area, address, type);
++	result = filemap_fault(vma, fdata);
+ 	clear_bit(GFF_EXLOCK, &gf->f_flags);
+-	if (!result || result == NOPAGE_OOM)
++	if (!result)
+ 		goto out;
+ 
+ 	if (alloc_required) {
+ 		error = alloc_page_backing(ip, result);
+ 		if (error) {
++			if (vma->vm_flags & VM_CAN_INVALIDATE)
++				unlock_page(result);
+ 			page_cache_release(result);
++			fdata->type = VM_FAULT_OOM;
+ 			result = NULL;
+ 			goto out;
+ 		}
+@@ -152,10 +156,10 @@
+ }
+ 
+ struct vm_operations_struct gfs2_vm_ops_private = {
+-	.nopage = gfs2_private_nopage,
++	.fault = gfs2_private_fault,
+ };
+ 
+ struct vm_operations_struct gfs2_vm_ops_sharewrite = {
+-	.nopage = gfs2_sharewrite_nopage,
++	.fault = gfs2_sharewrite_fault,
+ };
+ 
+diff -Nurb linux-2.6.22-570/fs/inode.c linux-2.6.22-try2/fs/inode.c
+--- linux-2.6.22-570/fs/inode.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/inode.c	2007-12-19 15:29:24.000000000 -0500
+@@ -149,7 +149,7 @@
+ 		mapping->a_ops = &empty_aops;
+  		mapping->host = inode;
+ 		mapping->flags = 0;
+-		mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
++		mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE);
+ 		mapping->assoc_mapping = NULL;
+ 		mapping->backing_dev_info = &default_backing_dev_info;
+ 
+@@ -525,7 +525,13 @@
+  *	new_inode 	- obtain an inode
+  *	@sb: superblock
+  *
+- *	Allocates a new inode for given superblock.
++ *	Allocates a new inode for given superblock. The default gfp_mask
++ *	for allocations related to inode->i_mapping is GFP_HIGHUSER_PAGECACHE.
++ *	If HIGHMEM pages are unsuitable or it is known that pages allocated
++ *	for the page cache are not reclaimable or migratable,
++ *	mapping_set_gfp_mask() must be called with suitable flags on the
++ *	newly created inode's mapping
++ *
+  */
+ struct inode *new_inode(struct super_block *sb)
+ {
+diff -Nurb linux-2.6.22-570/fs/jbd/journal.c linux-2.6.22-try2/fs/jbd/journal.c
+--- linux-2.6.22-570/fs/jbd/journal.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/jbd/journal.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1710,7 +1710,7 @@
+ 	journal_head_cache = kmem_cache_create("journal_head",
+ 				sizeof(struct journal_head),
+ 				0,		/* offset */
+-				0,		/* flags */
++				SLAB_TEMPORARY,	/* flags */
+ 				NULL,		/* ctor */
+ 				NULL);		/* dtor */
+ 	retval = 0;
+@@ -2007,7 +2007,7 @@
+ 	jbd_handle_cache = kmem_cache_create("journal_handle",
+ 				sizeof(handle_t),
+ 				0,		/* offset */
+-				0,		/* flags */
++				SLAB_TEMPORARY,	/* flags */
+ 				NULL,		/* ctor */
+ 				NULL);		/* dtor */
+ 	if (jbd_handle_cache == NULL) {
+diff -Nurb linux-2.6.22-570/fs/jbd/revoke.c linux-2.6.22-try2/fs/jbd/revoke.c
+--- linux-2.6.22-570/fs/jbd/revoke.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/jbd/revoke.c	2007-12-19 15:29:24.000000000 -0500
+@@ -169,13 +169,17 @@
+ {
+ 	revoke_record_cache = kmem_cache_create("revoke_record",
+ 					   sizeof(struct jbd_revoke_record_s),
+-					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++					   0,
++					   SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
++					   NULL, NULL);
+ 	if (revoke_record_cache == 0)
+ 		return -ENOMEM;
+ 
+ 	revoke_table_cache = kmem_cache_create("revoke_table",
+ 					   sizeof(struct jbd_revoke_table_s),
+-					   0, 0, NULL, NULL);
++					   0,
++					   SLAB_TEMPORARY,
++					   NULL, NULL);
+ 	if (revoke_table_cache == 0) {
+ 		kmem_cache_destroy(revoke_record_cache);
+ 		revoke_record_cache = NULL;
+diff -Nurb linux-2.6.22-570/fs/jffs2/background.c linux-2.6.22-try2/fs/jffs2/background.c
+--- linux-2.6.22-570/fs/jffs2/background.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/jffs2/background.c	2007-12-19 15:29:24.000000000 -0500
+@@ -81,6 +81,7 @@
+ 
+ 	set_user_nice(current, 10);
+ 
++	set_freezable();
+ 	for (;;) {
+ 		allow_signal(SIGHUP);
+ 
+diff -Nurb linux-2.6.22-570/fs/lockd/host.c linux-2.6.22-try2/fs/lockd/host.c
+--- linux-2.6.22-570/fs/lockd/host.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/lockd/host.c	2007-12-19 15:29:23.000000000 -0500
+@@ -161,15 +161,9 @@
+ 	 */
+ 	nsm_unmonitor(host);
+ 
+-	if ((clnt = host->h_rpcclnt) != NULL) {
+-		if (atomic_read(&clnt->cl_users)) {
+-			printk(KERN_WARNING
+-				"lockd: active RPC handle\n");
+-			clnt->cl_dead = 1;
+-		} else {
+-			rpc_destroy_client(host->h_rpcclnt);
+-		}
+-	}
++	clnt = host->h_rpcclnt;
++	if (clnt != NULL)
++		rpc_shutdown_client(clnt);
+ 	kfree(host);
+ }
+ 
+diff -Nurb linux-2.6.22-570/fs/lockd/mon.c linux-2.6.22-try2/fs/lockd/mon.c
+--- linux-2.6.22-570/fs/lockd/mon.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/lockd/mon.c	2007-12-19 15:29:23.000000000 -0500
+@@ -61,6 +61,7 @@
+ 			status);
+ 	else
+ 		status = 0;
++	rpc_shutdown_client(clnt);
+  out:
+ 	return status;
+ }
+@@ -138,7 +139,6 @@
+ 		.program	= &nsm_program,
+ 		.version	= SM_VERSION,
+ 		.authflavor	= RPC_AUTH_NULL,
+-		.flags		= (RPC_CLNT_CREATE_ONESHOT),
+ 	};
+ 
+ 	return rpc_create(&args);
+diff -Nurb linux-2.6.22-570/fs/lockd/svc.c linux-2.6.22-try2/fs/lockd/svc.c
+--- linux-2.6.22-570/fs/lockd/svc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/lockd/svc.c	2007-12-19 15:29:24.000000000 -0500
+@@ -25,6 +25,7 @@
+ #include <linux/smp.h>
+ #include <linux/smp_lock.h>
+ #include <linux/mutex.h>
++#include <linux/freezer.h>
+ 
+ #include <linux/sunrpc/types.h>
+ #include <linux/sunrpc/stats.h>
+@@ -119,13 +120,11 @@
+ 	complete(&lockd_start_done);
+ 
+ 	daemonize("lockd");
++	set_freezable();
+ 
+ 	/* Process request with signals blocked, but allow SIGKILL.  */
+ 	allow_signal(SIGKILL);
+ 
+-	/* kick rpciod */
+-	rpciod_up();
+-
+ 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
+ 
+ 	if (!nlm_timeout)
+@@ -202,9 +201,6 @@
+ 	/* Exit the RPC thread */
+ 	svc_exit_thread(rqstp);
+ 
+-	/* release rpciod */
+-	rpciod_down();
+-
+ 	/* Release module */
+ 	unlock_kernel();
+ 	module_put_and_exit(0);
+diff -Nurb linux-2.6.22-570/fs/namei.c linux-2.6.22-try2/fs/namei.c
+--- linux-2.6.22-570/fs/namei.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/namei.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1386,7 +1386,8 @@
+ 	return 0;
+ }
+ 
+-struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
++struct dentry *lookup_one_len_nd(const char *name, struct dentry *base, 
++				 int len, struct nameidata *nd)
+ {
+ 	int err;
+ 	struct qstr this;
+@@ -1394,7 +1395,7 @@
+ 	err = __lookup_one_len(name, &this, base, len);
+ 	if (err)
+ 		return ERR_PTR(err);
+-	return __lookup_hash(&this, base, NULL);
++	return __lookup_hash(&this, base, nd);
+ }
+ 
+ struct dentry *lookup_one_len_kern(const char *name, struct dentry *base, int len)
+@@ -3086,7 +3087,7 @@
+ EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
+ EXPORT_SYMBOL(getname);
+ EXPORT_SYMBOL(lock_rename);
+-EXPORT_SYMBOL(lookup_one_len);
++EXPORT_SYMBOL(lookup_one_len_nd);
+ EXPORT_SYMBOL(page_follow_link_light);
+ EXPORT_SYMBOL(page_put_link);
+ EXPORT_SYMBOL(page_readlink);
+diff -Nurb linux-2.6.22-570/fs/namespace.c linux-2.6.22-try2/fs/namespace.c
+--- linux-2.6.22-570/fs/namespace.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/namespace.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1538,7 +1538,7 @@
+ 
+ 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+ 	if (!new_ns)
+-		return NULL;
++		return ERR_PTR(-ENOMEM);
+ 
+ 	atomic_set(&new_ns->count, 1);
+ 	INIT_LIST_HEAD(&new_ns->list);
+@@ -1552,7 +1552,7 @@
+ 	if (!new_ns->root) {
+ 		up_write(&namespace_sem);
+ 		kfree(new_ns);
+-		return NULL;
++		return ERR_PTR(-ENOMEM);;
+ 	}
+ 	spin_lock(&vfsmount_lock);
+ 	list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
+@@ -1597,7 +1597,7 @@
+ 	return new_ns;
+ }
+ 
+-struct mnt_namespace *copy_mnt_ns(int flags, struct mnt_namespace *ns,
++struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
+ 		struct fs_struct *new_fs)
+ {
+ 	struct mnt_namespace *new_ns;
+diff -Nurb linux-2.6.22-570/fs/ncpfs/mmap.c linux-2.6.22-try2/fs/ncpfs/mmap.c
+--- linux-2.6.22-570/fs/ncpfs/mmap.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ncpfs/mmap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -25,8 +25,8 @@
+ /*
+  * Fill in the supplied page for mmap
+  */
+-static struct page* ncp_file_mmap_nopage(struct vm_area_struct *area,
+-				     unsigned long address, int *type)
++static struct page* ncp_file_mmap_fault(struct vm_area_struct *area,
++						struct fault_data *fdata)
+ {
+ 	struct file *file = area->vm_file;
+ 	struct dentry *dentry = file->f_path.dentry;
+@@ -40,15 +40,17 @@
+ 
+ 	page = alloc_page(GFP_HIGHUSER); /* ncpfs has nothing against high pages
+ 	           as long as recvmsg and memset works on it */
+-	if (!page)
+-		return page;
++	if (!page) {
++		fdata->type = VM_FAULT_OOM;
++		return NULL;
++	}
+ 	pg_addr = kmap(page);
+-	address &= PAGE_MASK;
+-	pos = address - area->vm_start + (area->vm_pgoff << PAGE_SHIFT);
++	pos = fdata->pgoff << PAGE_SHIFT;
+ 
+ 	count = PAGE_SIZE;
+-	if (address + PAGE_SIZE > area->vm_end) {
+-		count = area->vm_end - address;
++	if (fdata->address + PAGE_SIZE > area->vm_end) {
++		WARN_ON(1); /* shouldn't happen? */
++		count = area->vm_end - fdata->address;
+ 	}
+ 	/* what we can read in one go */
+ 	bufsize = NCP_SERVER(inode)->buffer_size;
+@@ -91,15 +93,14 @@
+ 	 * fetches from the network, here the analogue of disk.
+ 	 * -- wli
+ 	 */
+-	if (type)
+-		*type = VM_FAULT_MAJOR;
++	fdata->type = VM_FAULT_MAJOR;
+ 	count_vm_event(PGMAJFAULT);
+ 	return page;
+ }
+ 
+ static struct vm_operations_struct ncp_file_mmap =
+ {
+-	.nopage	= ncp_file_mmap_nopage,
++	.fault = ncp_file_mmap_fault,
+ };
+ 
+ 
+@@ -123,6 +124,7 @@
+ 		return -EFBIG;
+ 
+ 	vma->vm_ops = &ncp_file_mmap;
++	vma->vm_flags |= VM_CAN_INVALIDATE;
+ 	file_accessed(file);
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/fs/nfs/callback.c linux-2.6.22-try2/fs/nfs/callback.c
+--- linux-2.6.22-570/fs/nfs/callback.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/callback.c	2007-12-19 15:29:24.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/sunrpc/svcsock.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/mutex.h>
++#include <linux/freezer.h>
+ 
+ #include <net/inet_sock.h>
+ 
+@@ -67,6 +68,7 @@
+ 	daemonize("nfsv4-svc");
+ 	/* Process request with signals blocked, but allow SIGKILL.  */
+ 	allow_signal(SIGKILL);
++	set_freezable();
+ 
+ 	complete(&nfs_callback_info.started);
+ 
+diff -Nurb linux-2.6.22-570/fs/nfs/client.c linux-2.6.22-try2/fs/nfs/client.c
+--- linux-2.6.22-570/fs/nfs/client.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/nfs/client.c	2007-12-19 15:29:23.000000000 -0500
+@@ -102,19 +102,10 @@
+ 					   int nfsversion)
+ {
+ 	struct nfs_client *clp;
+-	int error;
+ 
+ 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
+ 		goto error_0;
+ 
+-	error = rpciod_up();
+-	if (error < 0) {
+-		dprintk("%s: couldn't start rpciod! Error = %d\n",
+-				__FUNCTION__, error);
+-		goto error_1;
+-	}
+-	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-
+ 	if (nfsversion == 4) {
+ 		if (nfs_callback_up() < 0)
+ 			goto error_2;
+@@ -154,9 +145,6 @@
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ 		nfs_callback_down();
+ error_2:
+-	rpciod_down();
+-	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
+-error_1:
+ 	kfree(clp);
+ error_0:
+ 	return NULL;
+@@ -198,9 +186,6 @@
+ 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ 		nfs_callback_down();
+ 
+-	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
+-		rpciod_down();
+-
+ 	kfree(clp->cl_hostname);
+ 	kfree(clp);
+ 
+diff -Nurb linux-2.6.22-570/fs/nfs/delegation.c linux-2.6.22-try2/fs/nfs/delegation.c
+--- linux-2.6.22-570/fs/nfs/delegation.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/delegation.c	2007-12-19 15:29:23.000000000 -0500
+@@ -74,7 +74,7 @@
+ 			continue;
+ 		get_nfs_open_context(ctx);
+ 		spin_unlock(&inode->i_lock);
+-		err = nfs4_open_delegation_recall(ctx->dentry, state);
++		err = nfs4_open_delegation_recall(ctx, state);
+ 		if (err >= 0)
+ 			err = nfs_delegation_claim_locks(ctx, state);
+ 		put_nfs_open_context(ctx);
+diff -Nurb linux-2.6.22-570/fs/nfs/delegation.h linux-2.6.22-try2/fs/nfs/delegation.h
+--- linux-2.6.22-570/fs/nfs/delegation.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/delegation.h	2007-12-19 15:29:23.000000000 -0500
+@@ -39,7 +39,7 @@
+ 
+ /* NFSv4 delegation-related procedures */
+ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state);
+ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
+ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
+ 
+diff -Nurb linux-2.6.22-570/fs/nfs/dir.c linux-2.6.22-try2/fs/nfs/dir.c
+--- linux-2.6.22-570/fs/nfs/dir.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/nfs/dir.c	2007-12-19 15:29:23.000000000 -0500
+@@ -898,14 +898,13 @@
+ 	return (nd->intent.open.flags & O_EXCL) != 0;
+ }
+ 
+-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
+-				 struct nfs_fh *fh, struct nfs_fattr *fattr)
++static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
+ {
+ 	struct nfs_server *server = NFS_SERVER(dir);
+ 
+ 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
+-		/* Revalidate fsid on root dir */
+-		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
++		/* Revalidate fsid using the parent directory */
++		return __nfs_revalidate_inode(server, dir);
+ 	return 0;
+ }
+ 
+@@ -947,7 +946,7 @@
+ 		res = ERR_PTR(error);
+ 		goto out_unlock;
+ 	}
+-	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
++	error = nfs_reval_fsid(dir, &fattr);
+ 	if (error < 0) {
+ 		res = ERR_PTR(error);
+ 		goto out_unlock;
+@@ -1247,7 +1246,7 @@
+ 	attr.ia_mode = mode;
+ 	attr.ia_valid = ATTR_MODE;
+ 
+-	if (nd && (nd->flags & LOOKUP_CREATE))
++	if ((nd->flags & LOOKUP_CREATE) != 0)
+ 		open_flags = nd->intent.open.flags;
+ 
+ 	lock_kernel();
+@@ -1747,8 +1746,8 @@
+ 	struct nfs_inode *nfsi;
+ 	struct nfs_access_entry *cache;
+ 
+-	spin_lock(&nfs_access_lru_lock);
+ restart:
++	spin_lock(&nfs_access_lru_lock);
+ 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+ 		struct inode *inode;
+ 
+@@ -1773,6 +1772,7 @@
+ 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ 		}
+ 		spin_unlock(&inode->i_lock);
++		spin_unlock(&nfs_access_lru_lock);
+ 		iput(inode);
+ 		goto restart;
+ 	}
+diff -Nurb linux-2.6.22-570/fs/nfs/direct.c linux-2.6.22-try2/fs/nfs/direct.c
+--- linux-2.6.22-570/fs/nfs/direct.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/direct.c	2007-12-19 15:29:23.000000000 -0500
+@@ -266,7 +266,7 @@
+ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
+ {
+ 	struct nfs_open_context *ctx = dreq->ctx;
+-	struct inode *inode = ctx->dentry->d_inode;
++	struct inode *inode = ctx->path.dentry->d_inode;
+ 	size_t rsize = NFS_SERVER(inode)->rsize;
+ 	unsigned int pgbase;
+ 	int result;
+@@ -295,10 +295,15 @@
+ 			break;
+ 		}
+ 		if ((unsigned)result < data->npages) {
++			bytes = result * PAGE_SIZE;
++			if (bytes <= pgbase) {
+ 			nfs_direct_release_pages(data->pagevec, result);
+ 			nfs_readdata_release(data);
+ 			break;
+ 		}
++			bytes -= pgbase;
++			data->npages = result;
++		}
+ 
+ 		get_dreq(dreq);
+ 
+@@ -601,7 +606,7 @@
+ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
+ {
+ 	struct nfs_open_context *ctx = dreq->ctx;
+-	struct inode *inode = ctx->dentry->d_inode;
++	struct inode *inode = ctx->path.dentry->d_inode;
+ 	size_t wsize = NFS_SERVER(inode)->wsize;
+ 	unsigned int pgbase;
+ 	int result;
+@@ -630,10 +635,15 @@
+ 			break;
+ 		}
+ 		if ((unsigned)result < data->npages) {
++			bytes = result * PAGE_SIZE;
++			if (bytes <= pgbase) {
+ 			nfs_direct_release_pages(data->pagevec, result);
+ 			nfs_writedata_release(data);
+ 			break;
+ 		}
++			bytes -= pgbase;
++			data->npages = result;
++		}
+ 
+ 		get_dreq(dreq);
+ 
+@@ -763,10 +773,8 @@
+ 		(unsigned long) count, (long long) pos);
+ 
+ 	if (nr_segs != 1)
+-		return -EINVAL;
+-
+-	if (count < 0)
+ 		goto out;
++
+ 	retval = -EFAULT;
+ 	if (!access_ok(VERIFY_WRITE, buf, count))
+ 		goto out;
+@@ -814,7 +822,7 @@
+ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
+ 				unsigned long nr_segs, loff_t pos)
+ {
+-	ssize_t retval;
++	ssize_t retval = -EINVAL;
+ 	struct file *file = iocb->ki_filp;
+ 	struct address_space *mapping = file->f_mapping;
+ 	/* XXX: temporary */
+@@ -827,7 +835,7 @@
+ 		(unsigned long) count, (long long) pos);
+ 
+ 	if (nr_segs != 1)
+-		return -EINVAL;
++		goto out;
+ 
+ 	retval = generic_write_checks(file, &pos, &count, 0);
+ 	if (retval)
+diff -Nurb linux-2.6.22-570/fs/nfs/inode.c linux-2.6.22-try2/fs/nfs/inode.c
+--- linux-2.6.22-570/fs/nfs/inode.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/nfs/inode.c	2007-12-19 15:29:23.000000000 -0500
+@@ -466,14 +466,14 @@
+ 
+ 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+ 	if (ctx != NULL) {
+-		atomic_set(&ctx->count, 1);
+-		ctx->dentry = dget(dentry);
+-		ctx->vfsmnt = mntget(mnt);
++		ctx->path.dentry = dget(dentry);
++		ctx->path.mnt = mntget(mnt);
+ 		ctx->cred = get_rpccred(cred);
+ 		ctx->state = NULL;
+ 		ctx->lockowner = current->files;
+ 		ctx->error = 0;
+ 		ctx->dir_cookie = 0;
++		kref_init(&ctx->kref);
+ 	}
+ 	return ctx;
+ }
+@@ -481,27 +481,33 @@
+ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
+ {
+ 	if (ctx != NULL)
+-		atomic_inc(&ctx->count);
++		kref_get(&ctx->kref);
+ 	return ctx;
+ }
+ 
+-void put_nfs_open_context(struct nfs_open_context *ctx)
++static void nfs_free_open_context(struct kref *kref)
+ {
+-	if (atomic_dec_and_test(&ctx->count)) {
++	struct nfs_open_context *ctx = container_of(kref,
++			struct nfs_open_context, kref);
++
+ 		if (!list_empty(&ctx->list)) {
+-			struct inode *inode = ctx->dentry->d_inode;
++		struct inode *inode = ctx->path.dentry->d_inode;
+ 			spin_lock(&inode->i_lock);
+ 			list_del(&ctx->list);
+ 			spin_unlock(&inode->i_lock);
+ 		}
+ 		if (ctx->state != NULL)
+-			nfs4_close_state(ctx->state, ctx->mode);
++		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+ 		if (ctx->cred != NULL)
+ 			put_rpccred(ctx->cred);
+-		dput(ctx->dentry);
+-		mntput(ctx->vfsmnt);
++	dput(ctx->path.dentry);
++	mntput(ctx->path.mnt);
+ 		kfree(ctx);
+-	}
++}
++
++void put_nfs_open_context(struct nfs_open_context *ctx)
++{
++	kref_put(&ctx->kref, nfs_free_open_context);
+ }
+ 
+ /*
+@@ -977,8 +983,8 @@
+ 		goto out_changed;
+ 
+ 	server = NFS_SERVER(inode);
+-	/* Update the fsid if and only if this is the root directory */
+-	if (inode == inode->i_sb->s_root->d_inode
++	/* Update the fsid? */
++	if (S_ISDIR(inode->i_mode)
+ 			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
+ 		server->fsid = fattr->fsid;
+ 
+@@ -1125,27 +1131,10 @@
+  */
+ void nfs4_clear_inode(struct inode *inode)
+ {
+-	struct nfs_inode *nfsi = NFS_I(inode);
+-
+ 	/* If we are holding a delegation, return it! */
+ 	nfs_inode_return_delegation(inode);
+ 	/* First call standard NFS clear_inode() code */
+ 	nfs_clear_inode(inode);
+-	/* Now clear out any remaining state */
+-	while (!list_empty(&nfsi->open_states)) {
+-		struct nfs4_state *state;
+-		
+-		state = list_entry(nfsi->open_states.next,
+-				struct nfs4_state,
+-				inode_states);
+-		dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
+-				__FUNCTION__,
+-				inode->i_sb->s_id,
+-				(long long)NFS_FILEID(inode),
+-				state);
+-		BUG_ON(atomic_read(&state->count) != 1);
+-		nfs4_close_state(state, state->state);
+-	}
+ }
+ #endif
+ 
+@@ -1188,14 +1177,11 @@
+ 
+ 	inode_init_once(&nfsi->vfs_inode);
+ 	spin_lock_init(&nfsi->req_lock);
+-	INIT_LIST_HEAD(&nfsi->dirty);
+-	INIT_LIST_HEAD(&nfsi->commit);
+ 	INIT_LIST_HEAD(&nfsi->open_files);
+ 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
+ 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
+ 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
+ 	atomic_set(&nfsi->data_updates, 0);
+-	nfsi->ndirty = 0;
+ 	nfsi->ncommit = 0;
+ 	nfsi->npages = 0;
+ 	nfs4_init_once(nfsi);
+diff -Nurb linux-2.6.22-570/fs/nfs/mount_clnt.c linux-2.6.22-try2/fs/nfs/mount_clnt.c
+--- linux-2.6.22-570/fs/nfs/mount_clnt.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/mount_clnt.c	2007-12-19 15:29:23.000000000 -0500
+@@ -69,6 +69,7 @@
+ 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
+ 
+ 	status = rpc_call_sync(mnt_clnt, &msg, 0);
++	rpc_shutdown_client(mnt_clnt);
+ 	return status < 0? status : (result.status? -EACCES : 0);
+ }
+ 
+@@ -84,8 +85,7 @@
+ 		.program	= &mnt_program,
+ 		.version	= version,
+ 		.authflavor	= RPC_AUTH_UNIX,
+-		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+-				   RPC_CLNT_CREATE_INTR),
++		.flags		= RPC_CLNT_CREATE_INTR,
+ 	};
+ 
+ 	return rpc_create(&args);
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs3proc.c linux-2.6.22-try2/fs/nfs/nfs3proc.c
+--- linux-2.6.22-570/fs/nfs/nfs3proc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/nfs3proc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -335,9 +335,7 @@
+ 		 * not sure this buys us anything (and I'd have
+ 		 * to revamp the NFSv3 XDR code) */
+ 		status = nfs3_proc_setattr(dentry, &fattr, sattr);
+-		if (status == 0)
+-			nfs_setattr_update_inode(dentry->d_inode, sattr);
+-		nfs_refresh_inode(dentry->d_inode, &fattr);
++		nfs_post_op_update_inode(dentry->d_inode, &fattr);
+ 		dprintk("NFS reply setattr (post-create): %d\n", status);
+ 	}
+ 	if (status != 0)
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4_fs.h linux-2.6.22-try2/fs/nfs/nfs4_fs.h
+--- linux-2.6.22-570/fs/nfs/nfs4_fs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/nfs4_fs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -165,7 +165,7 @@
+ extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
+ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
+-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
++extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
+ extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
+ extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
+@@ -196,7 +196,7 @@
+ extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
+ extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
+ extern void nfs4_put_open_state(struct nfs4_state *);
+-extern void nfs4_close_state(struct nfs4_state *, mode_t);
++extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
+ extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
+ extern void nfs4_schedule_state_recovery(struct nfs_client *);
+ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
+@@ -222,7 +222,7 @@
+ 
+ #else
+ 
+-#define nfs4_close_state(a, b) do { } while (0)
++#define nfs4_close_state(a, b, c) do { } while (0)
+ 
+ #endif /* CONFIG_NFS_V4 */
+ #endif /* __LINUX_FS_NFS_NFS4_FS.H */
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4proc.c linux-2.6.22-try2/fs/nfs/nfs4proc.c
+--- linux-2.6.22-570/fs/nfs/nfs4proc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/nfs4proc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -214,14 +214,14 @@
+ }
+ 
+ struct nfs4_opendata {
+-	atomic_t count;
++	struct kref kref;
+ 	struct nfs_openargs o_arg;
+ 	struct nfs_openres o_res;
+ 	struct nfs_open_confirmargs c_arg;
+ 	struct nfs_open_confirmres c_res;
+ 	struct nfs_fattr f_attr;
+ 	struct nfs_fattr dir_attr;
+-	struct dentry *dentry;
++	struct path path;
+ 	struct dentry *dir;
+ 	struct nfs4_state_owner *owner;
+ 	struct iattr attrs;
+@@ -230,11 +230,11 @@
+ 	int cancelled;
+ };
+ 
+-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
++static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
+ 		struct nfs4_state_owner *sp, int flags,
+ 		const struct iattr *attrs)
+ {
+-	struct dentry *parent = dget_parent(dentry);
++	struct dentry *parent = dget_parent(path->dentry);
+ 	struct inode *dir = parent->d_inode;
+ 	struct nfs_server *server = NFS_SERVER(dir);
+ 	struct nfs4_opendata *p;
+@@ -245,8 +245,8 @@
+ 	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+ 	if (p->o_arg.seqid == NULL)
+ 		goto err_free;
+-	atomic_set(&p->count, 1);
+-	p->dentry = dget(dentry);
++	p->path.mnt = mntget(path->mnt);
++	p->path.dentry = dget(path->dentry);
+ 	p->dir = parent;
+ 	p->owner = sp;
+ 	atomic_inc(&sp->so_count);
+@@ -254,7 +254,7 @@
+ 	p->o_arg.open_flags = flags,
+ 	p->o_arg.clientid = server->nfs_client->cl_clientid;
+ 	p->o_arg.id = sp->so_id;
+-	p->o_arg.name = &dentry->d_name;
++	p->o_arg.name = &p->path.dentry->d_name;
+ 	p->o_arg.server = server;
+ 	p->o_arg.bitmask = server->attr_bitmask;
+ 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
+@@ -274,6 +274,7 @@
+ 	p->c_arg.fh = &p->o_res.fh;
+ 	p->c_arg.stateid = &p->o_res.stateid;
+ 	p->c_arg.seqid = p->o_arg.seqid;
++	kref_init(&p->kref);
+ 	return p;
+ err_free:
+ 	kfree(p);
+@@ -282,27 +283,23 @@
+ 	return NULL;
+ }
+ 
+-static void nfs4_opendata_free(struct nfs4_opendata *p)
++static void nfs4_opendata_free(struct kref *kref)
+ {
+-	if (p != NULL && atomic_dec_and_test(&p->count)) {
++	struct nfs4_opendata *p = container_of(kref,
++			struct nfs4_opendata, kref);
++
+ 		nfs_free_seqid(p->o_arg.seqid);
+ 		nfs4_put_state_owner(p->owner);
+ 		dput(p->dir);
+-		dput(p->dentry);
++	dput(p->path.dentry);
++	mntput(p->path.mnt);
+ 		kfree(p);
+-	}
+ }
+ 
+-/* Helper for asynchronous RPC calls */
+-static int nfs4_call_async(struct rpc_clnt *clnt,
+-		const struct rpc_call_ops *tk_ops, void *calldata)
++static void nfs4_opendata_put(struct nfs4_opendata *p)
+ {
+-	struct rpc_task *task;
+-
+-	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
+-		return -ENOMEM;
+-	rpc_execute(task);
+-	return 0;
++	if (p != NULL)
++		kref_put(&p->kref, nfs4_opendata_free);
+ }
+ 
+ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
+@@ -451,7 +448,7 @@
+ 						opendata->owner->so_cred,
+ 						&opendata->o_res);
+ 		}
+-		nfs4_close_state(newstate, opendata->o_arg.open_flags);
++		nfs4_close_state(&opendata->path, newstate, opendata->o_arg.open_flags);
+ 	}
+ 	if (newstate != state)
+ 		return -ESTALE;
+@@ -462,7 +459,7 @@
+  * OPEN_RECLAIM:
+  * 	reclaim state on the server after a reboot.
+  */
+-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ 	struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+ 	struct nfs4_opendata *opendata;
+@@ -478,7 +475,7 @@
+ 		}
+ 		delegation_type = delegation->type;
+ 	}
+-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
+ 	if (opendata == NULL)
+ 		return -ENOMEM;
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
+@@ -486,17 +483,17 @@
+ 	nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+ 	opendata->o_arg.u.delegation_type = delegation_type;
+ 	status = nfs4_open_recover(opendata, state);
+-	nfs4_opendata_free(opendata);
++	nfs4_opendata_put(opendata);
+ 	return status;
+ }
+ 
+-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ 	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_exception exception = { };
+ 	int err;
+ 	do {
+-		err = _nfs4_do_open_reclaim(sp, state, dentry);
++		err = _nfs4_do_open_reclaim(ctx, state);
+ 		if (err != -NFS4ERR_DELAY)
+ 			break;
+ 		nfs4_handle_exception(server, err, &exception);
+@@ -512,12 +509,12 @@
+ 	ctx = nfs4_state_find_open_context(state);
+ 	if (IS_ERR(ctx))
+ 		return PTR_ERR(ctx);
+-	ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
++	ret = nfs4_do_open_reclaim(ctx, state);
+ 	put_nfs_open_context(ctx);
+ 	return ret;
+ }
+ 
+-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ 	struct nfs4_state_owner  *sp  = state->owner;
+ 	struct nfs4_opendata *opendata;
+@@ -525,24 +522,24 @@
+ 
+ 	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ 		return 0;
+-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
+ 	if (opendata == NULL)
+ 		return -ENOMEM;
+ 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
+ 	memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+ 			sizeof(opendata->o_arg.u.delegation.data));
+ 	ret = nfs4_open_recover(opendata, state);
+-	nfs4_opendata_free(opendata);
++	nfs4_opendata_put(opendata);
+ 	return ret;
+ }
+ 
+-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
++int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ 	struct nfs4_exception exception = { };
+-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	int err;
+ 	do {
+-		err = _nfs4_open_delegation_recall(dentry, state);
++		err = _nfs4_open_delegation_recall(ctx, state);
+ 		switch (err) {
+ 			case 0:
+ 				return err;
+@@ -601,9 +598,9 @@
+ 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 	state = nfs4_opendata_to_nfs4_state(data);
+ 	if (state != NULL)
+-		nfs4_close_state(state, data->o_arg.open_flags);
++		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+-	nfs4_opendata_free(data);
++	nfs4_opendata_put(data);
+ }
+ 
+ static const struct rpc_call_ops nfs4_open_confirm_ops = {
+@@ -621,7 +618,7 @@
+ 	struct rpc_task *task;
+ 	int status;
+ 
+-	atomic_inc(&data->count);
++	kref_get(&data->kref);
+ 	/*
+ 	 * If rpc_run_task() ends up calling ->rpc_release(), we
+ 	 * want to ensure that it takes the 'error' code path.
+@@ -704,9 +701,9 @@
+ 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
+ 	state = nfs4_opendata_to_nfs4_state(data);
+ 	if (state != NULL)
+-		nfs4_close_state(state, data->o_arg.open_flags);
++		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
+ out_free:
+-	nfs4_opendata_free(data);
++	nfs4_opendata_put(data);
+ }
+ 
+ static const struct rpc_call_ops nfs4_open_ops = {
+@@ -727,7 +724,7 @@
+ 	struct rpc_task *task;
+ 	int status;
+ 
+-	atomic_inc(&data->count);
++	kref_get(&data->kref);
+ 	/*
+ 	 * If rpc_run_task() ends up calling ->rpc_release(), we
+ 	 * want to ensure that it takes the 'error' code path.
+@@ -811,7 +808,7 @@
+  * 	reclaim state on the server after a network partition.
+  * 	Assumes caller holds the appropriate lock
+  */
+-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+ 	struct inode *inode = state->inode;
+ 	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
+@@ -820,34 +817,34 @@
+ 	int ret;
+ 
+ 	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
+-		ret = _nfs4_do_access(inode, sp->so_cred, openflags);
++		ret = _nfs4_do_access(inode, ctx->cred, openflags);
+ 		if (ret < 0)
+ 			return ret;
+ 		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
+ 		set_bit(NFS_DELEGATED_STATE, &state->flags);
+ 		return 0;
+ 	}
+-	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
++	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, openflags, NULL);
+ 	if (opendata == NULL)
+ 		return -ENOMEM;
+ 	ret = nfs4_open_recover(opendata, state);
+ 	if (ret == -ESTALE) {
+ 		/* Invalidate the state owner so we don't ever use it again */
+-		nfs4_drop_state_owner(sp);
+-		d_drop(dentry);
++		nfs4_drop_state_owner(state->owner);
++		d_drop(ctx->path.dentry);
+ 	}
+-	nfs4_opendata_free(opendata);
++	nfs4_opendata_put(opendata);
+ 	return ret;
+ }
+ 
+-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
++static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
+ {
+-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
++	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_exception exception = { };
+ 	int err;
+ 
+ 	do {
+-		err = _nfs4_open_expired(sp, state, dentry);
++		err = _nfs4_open_expired(ctx, state);
+ 		if (err == -NFS4ERR_DELAY)
+ 			nfs4_handle_exception(server, err, &exception);
+ 	} while (exception.retry);
+@@ -862,7 +859,7 @@
+ 	ctx = nfs4_state_find_open_context(state);
+ 	if (IS_ERR(ctx))
+ 		return PTR_ERR(ctx);
+-	ret = nfs4_do_open_expired(sp, state, ctx->dentry);
++	ret = nfs4_do_open_expired(ctx, state);
+ 	put_nfs_open_context(ctx);
+ 	return ret;
+ }
+@@ -953,9 +950,25 @@
+ }
+ 
+ /*
++ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
++ * fields corresponding to attributes that were used to store the verifier.
++ * Make sure we clobber those fields in the later setattr call
++ */
++static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
++{
++	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
++	    !(sattr->ia_valid & ATTR_ATIME_SET))
++		sattr->ia_valid |= ATTR_ATIME;
++
++	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
++	    !(sattr->ia_valid & ATTR_MTIME_SET))
++		sattr->ia_valid |= ATTR_MTIME;
++}
++
++/*
+  * Returns a referenced nfs4_state
+  */
+-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
++static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+ {
+ 	struct nfs4_state_owner  *sp;
+ 	struct nfs4_state     *state = NULL;
+@@ -975,27 +988,30 @@
+ 		goto err_put_state_owner;
+ 	down_read(&clp->cl_sem);
+ 	status = -ENOMEM;
+-	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
++	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
+ 	if (opendata == NULL)
+ 		goto err_release_rwsem;
+ 
+ 	status = _nfs4_proc_open(opendata);
+ 	if (status != 0)
+-		goto err_opendata_free;
++		goto err_opendata_put;
++
++	if (opendata->o_arg.open_flags & O_EXCL)
++		nfs4_exclusive_attrset(opendata, sattr);
+ 
+ 	status = -ENOMEM;
+ 	state = nfs4_opendata_to_nfs4_state(opendata);
+ 	if (state == NULL)
+-		goto err_opendata_free;
++		goto err_opendata_put;
+ 	if (opendata->o_res.delegation_type != 0)
+ 		nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
+-	nfs4_opendata_free(opendata);
++	nfs4_opendata_put(opendata);
+ 	nfs4_put_state_owner(sp);
+ 	up_read(&clp->cl_sem);
+ 	*res = state;
+ 	return 0;
+-err_opendata_free:
+-	nfs4_opendata_free(opendata);
++err_opendata_put:
++	nfs4_opendata_put(opendata);
+ err_release_rwsem:
+ 	up_read(&clp->cl_sem);
+ err_put_state_owner:
+@@ -1006,14 +1022,14 @@
+ }
+ 
+ 
+-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
++static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
+ {
+ 	struct nfs4_exception exception = { };
+ 	struct nfs4_state *res;
+ 	int status;
+ 
+ 	do {
+-		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
++		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
+ 		if (status == 0)
+ 			break;
+ 		/* NOTE: BAD_SEQID means the server and client disagree about the
+@@ -1101,6 +1117,7 @@
+ }
+ 
+ struct nfs4_closedata {
++	struct path path;
+ 	struct inode *inode;
+ 	struct nfs4_state *state;
+ 	struct nfs_closeargs arg;
+@@ -1117,6 +1134,8 @@
+ 	nfs4_put_open_state(calldata->state);
+ 	nfs_free_seqid(calldata->arg.seqid);
+ 	nfs4_put_state_owner(sp);
++	dput(calldata->path.dentry);
++	mntput(calldata->path.mnt);
+ 	kfree(calldata);
+ }
+ 
+@@ -1209,18 +1228,20 @@
+  *
+  * NOTE: Caller must be holding the sp->so_owner semaphore!
+  */
+-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
++int nfs4_do_close(struct path *path, struct nfs4_state *state) 
+ {
+-	struct nfs_server *server = NFS_SERVER(inode);
++	struct nfs_server *server = NFS_SERVER(state->inode);
+ 	struct nfs4_closedata *calldata;
++	struct nfs4_state_owner *sp = state->owner;
++	struct rpc_task *task;
+ 	int status = -ENOMEM;
+ 
+ 	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+ 	if (calldata == NULL)
+ 		goto out;
+-	calldata->inode = inode;
++	calldata->inode = state->inode;
+ 	calldata->state = state;
+-	calldata->arg.fh = NFS_FH(inode);
++	calldata->arg.fh = NFS_FH(state->inode);
+ 	calldata->arg.stateid = &state->stateid;
+ 	/* Serialization for the sequence id */
+ 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+@@ -1229,36 +1250,44 @@
+ 	calldata->arg.bitmask = server->attr_bitmask;
+ 	calldata->res.fattr = &calldata->fattr;
+ 	calldata->res.server = server;
++	calldata->path.mnt = mntget(path->mnt);
++	calldata->path.dentry = dget(path->dentry);
+ 
+-	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
+-	if (status == 0)
+-		goto out;
+-
+-	nfs_free_seqid(calldata->arg.seqid);
++	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
++	rpc_put_task(task);
++	return 0;
+ out_free_calldata:
+ 	kfree(calldata);
+ out:
++	nfs4_put_open_state(state);
++	nfs4_put_state_owner(sp);
+ 	return status;
+ }
+ 
+-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
++static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
+ {
+ 	struct file *filp;
+ 
+-	filp = lookup_instantiate_filp(nd, dentry, NULL);
++	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
+ 	if (!IS_ERR(filp)) {
+ 		struct nfs_open_context *ctx;
+ 		ctx = (struct nfs_open_context *)filp->private_data;
+ 		ctx->state = state;
+ 		return 0;
+ 	}
+-	nfs4_close_state(state, nd->intent.open.flags);
++	nfs4_close_state(path, state, nd->intent.open.flags);
+ 	return PTR_ERR(filp);
+ }
+ 
+ struct dentry *
+ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ {
++	struct path path = {
++		.mnt = nd->mnt,
++		.dentry = dentry,
++	};
+ 	struct iattr attr;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+@@ -1277,7 +1306,7 @@
+ 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+ 	if (IS_ERR(cred))
+ 		return (struct dentry *)cred;
+-	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
++	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
+ 	put_rpccred(cred);
+ 	if (IS_ERR(state)) {
+ 		if (PTR_ERR(state) == -ENOENT)
+@@ -1287,13 +1316,17 @@
+ 	res = d_add_unique(dentry, igrab(state->inode));
+ 	if (res != NULL)
+ 		dentry = res;
+-	nfs4_intent_set_file(nd, dentry, state);
++	nfs4_intent_set_file(nd, &path, state);
+ 	return res;
+ }
+ 
+ int
+ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
+ {
++	struct path path = {
++		.mnt = nd->mnt,
++		.dentry = dentry,
++	};
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+ 
+@@ -1302,7 +1335,7 @@
+ 		return PTR_ERR(cred);
+ 	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
+ 	if (IS_ERR(state))
+-		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
++		state = nfs4_do_open(dir, &path, openflags, NULL, cred);
+ 	put_rpccred(cred);
+ 	if (IS_ERR(state)) {
+ 		switch (PTR_ERR(state)) {
+@@ -1318,10 +1351,10 @@
+ 		}
+ 	}
+ 	if (state->inode == dentry->d_inode) {
+-		nfs4_intent_set_file(nd, dentry, state);
++		nfs4_intent_set_file(nd, &path, state);
+ 		return 1;
+ 	}
+-	nfs4_close_state(state, openflags);
++	nfs4_close_state(&path, state, openflags);
+ out_drop:
+ 	d_drop(dentry);
+ 	return 0;
+@@ -1752,6 +1785,10 @@
+ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
+                  int flags, struct nameidata *nd)
+ {
++	struct path path = {
++		.mnt = nd->mnt,
++		.dentry = dentry,
++	};
+ 	struct nfs4_state *state;
+ 	struct rpc_cred *cred;
+ 	int status = 0;
+@@ -1761,7 +1798,7 @@
+ 		status = PTR_ERR(cred);
+ 		goto out;
+ 	}
+-	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
++	state = nfs4_do_open(dir, &path, flags, sattr, cred);
+ 	put_rpccred(cred);
+ 	if (IS_ERR(state)) {
+ 		status = PTR_ERR(state);
+@@ -1773,11 +1810,12 @@
+ 		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
+ 		if (status == 0)
+ 			nfs_setattr_update_inode(state->inode, sattr);
++		nfs_post_op_update_inode(state->inode, &fattr);
+ 	}
+-	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
+-		status = nfs4_intent_set_file(nd, dentry, state);
++	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
++		status = nfs4_intent_set_file(nd, &path, state);
+ 	else
+-		nfs4_close_state(state, flags);
++		nfs4_close_state(&path, state, flags);
+ out:
+ 	return status;
+ }
+@@ -3285,7 +3323,7 @@
+ 		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
+ 					sizeof(data->lsp->ls_stateid.data));
+ 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
+-		renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
++		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
+ 	}
+ 	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
+ out:
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4state.c linux-2.6.22-try2/fs/nfs/nfs4state.c
+--- linux-2.6.22-570/fs/nfs/nfs4state.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/nfs4state.c	2007-12-19 15:29:23.000000000 -0500
+@@ -341,7 +341,7 @@
+ /*
+  * Close the current file.
+  */
+-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
++void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
+ {
+ 	struct inode *inode = state->inode;
+ 	struct nfs4_state_owner *owner = state->owner;
+@@ -375,10 +375,11 @@
+ 	spin_unlock(&inode->i_lock);
+ 	spin_unlock(&owner->so_lock);
+ 
+-	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
+-		return;
++	if (oldstate == newstate) {
+ 	nfs4_put_open_state(state);
+ 	nfs4_put_state_owner(owner);
++	} else
++		nfs4_do_close(path, state);
+ }
+ 
+ /*
+diff -Nurb linux-2.6.22-570/fs/nfs/nfs4xdr.c linux-2.6.22-try2/fs/nfs/nfs4xdr.c
+--- linux-2.6.22-570/fs/nfs/nfs4xdr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/nfs4xdr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -3269,7 +3269,7 @@
+ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
+ {
+         __be32 *p;
+-        uint32_t bmlen;
++        uint32_t savewords, bmlen, i;
+         int status;
+ 
+         status = decode_op_hdr(xdr, OP_OPEN);
+@@ -3287,7 +3287,12 @@
+                 goto xdr_error;
+ 
+         READ_BUF(bmlen << 2);
+-        p += bmlen;
++	savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
++	for (i = 0; i < savewords; ++i)
++		READ32(res->attrset[i]);
++
++	p += (bmlen - savewords);
++
+ 	return decode_delegation(xdr, res);
+ xdr_error:
+ 	dprintk("%s: Bitmap too large! Length = %u\n", __FUNCTION__, bmlen);
+diff -Nurb linux-2.6.22-570/fs/nfs/pagelist.c linux-2.6.22-try2/fs/nfs/pagelist.c
+--- linux-2.6.22-570/fs/nfs/pagelist.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/pagelist.c	2007-12-19 15:29:23.000000000 -0500
+@@ -85,9 +85,8 @@
+ 	req->wb_offset  = offset;
+ 	req->wb_pgbase	= offset;
+ 	req->wb_bytes   = count;
+-	atomic_set(&req->wb_count, 1);
+ 	req->wb_context = get_nfs_open_context(ctx);
+-
++	kref_init(&req->wb_kref);
+ 	return req;
+ }
+ 
+@@ -109,29 +108,29 @@
+ }
+ 
+ /**
+- * nfs_set_page_writeback_locked - Lock a request for writeback
++ * nfs_set_page_tag_locked - Tag a request as locked
+  * @req:
+  */
+-int nfs_set_page_writeback_locked(struct nfs_page *req)
++static int nfs_set_page_tag_locked(struct nfs_page *req)
+ {
+-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
+ 
+ 	if (!nfs_lock_request(req))
+ 		return 0;
+-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
++	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ 	return 1;
+ }
+ 
+ /**
+- * nfs_clear_page_writeback - Unlock request and wake up sleepers
++ * nfs_clear_page_tag_locked - Clear request tag and wake up sleepers
+  */
+-void nfs_clear_page_writeback(struct nfs_page *req)
++void nfs_clear_page_tag_locked(struct nfs_page *req)
+ {
+-	struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
++	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
+ 
+ 	if (req->wb_page != NULL) {
+ 		spin_lock(&nfsi->req_lock);
+-		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_WRITEBACK);
++		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+ 		spin_unlock(&nfsi->req_lock);
+ 	}
+ 	nfs_unlock_request(req);
+@@ -160,11 +159,9 @@
+  *
+  * Note: Should never be called with the spinlock held!
+  */
+-void
+-nfs_release_request(struct nfs_page *req)
++static void nfs_free_request(struct kref *kref)
+ {
+-	if (!atomic_dec_and_test(&req->wb_count))
+-		return;
++	struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ 
+ 	/* Release struct file or cached credential */
+ 	nfs_clear_request(req);
+@@ -172,6 +169,11 @@
+ 	nfs_page_free(req);
+ }
+ 
++void nfs_release_request(struct nfs_page *req)
++{
++	kref_put(&req->wb_kref, nfs_free_request);
++}
++
+ static int nfs_wait_bit_interruptible(void *word)
+ {
+ 	int ret = 0;
+@@ -193,7 +195,7 @@
+ int
+ nfs_wait_on_request(struct nfs_page *req)
+ {
+-        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->dentry->d_inode);
++        struct rpc_clnt	*clnt = NFS_CLIENT(req->wb_context->path.dentry->d_inode);
+ 	sigset_t oldmask;
+ 	int ret = 0;
+ 
+@@ -379,10 +381,10 @@
+ /**
+  * nfs_scan_list - Scan a list for matching requests
+  * @nfsi: NFS inode
+- * @head: One of the NFS inode request lists
+  * @dst: Destination list
+  * @idx_start: lower bound of page->index to scan
+  * @npages: idx_start + npages sets the upper bound to scan.
++ * @tag: tag to scan for
+  *
+  * Moves elements from one of the inode request lists.
+  * If the number of requests is set to 0, the entire address_space
+@@ -390,9 +392,9 @@
+  * The requests are *not* checked to ensure that they form a contiguous set.
+  * You must be holding the inode's req_lock when calling this function
+  */
+-int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head,
++int nfs_scan_list(struct nfs_inode *nfsi,
+ 		struct list_head *dst, pgoff_t idx_start,
+-		unsigned int npages)
++		unsigned int npages, int tag)
+ {
+ 	struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
+ 	struct nfs_page *req;
+@@ -407,9 +409,9 @@
+ 		idx_end = idx_start + npages - 1;
+ 
+ 	for (;;) {
+-		found = radix_tree_gang_lookup(&nfsi->nfs_page_tree,
++		found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
+ 				(void **)&pgvec[0], idx_start,
+-				NFS_SCAN_MAXENTRIES);
++				NFS_SCAN_MAXENTRIES, tag);
+ 		if (found <= 0)
+ 			break;
+ 		for (i = 0; i < found; i++) {
+@@ -417,15 +419,18 @@
+ 			if (req->wb_index > idx_end)
+ 				goto out;
+ 			idx_start = req->wb_index + 1;
+-			if (req->wb_list_head != head)
+-				continue;
+-			if (nfs_set_page_writeback_locked(req)) {
++			if (nfs_set_page_tag_locked(req)) {
+ 				nfs_list_remove_request(req);
++				radix_tree_tag_clear(&nfsi->nfs_page_tree,
++						req->wb_index, tag);
+ 				nfs_list_add_request(req, dst);
+ 				res++;
++				if (res == INT_MAX)
++					goto out;
+ 			}
+ 		}
+-
++		/* for latency reduction */
++		cond_resched_lock(&nfsi->req_lock);
+ 	}
+ out:
+ 	return res;
+diff -Nurb linux-2.6.22-570/fs/nfs/read.c linux-2.6.22-try2/fs/nfs/read.c
+--- linux-2.6.22-570/fs/nfs/read.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/read.c	2007-12-19 15:29:23.000000000 -0500
+@@ -145,8 +145,8 @@
+ 	unlock_page(req->wb_page);
+ 
+ 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
+-			req->wb_context->dentry->d_inode->i_sb->s_id,
+-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++			req->wb_context->path.dentry->d_inode->i_sb->s_id,
++			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 	nfs_clear_request(req);
+@@ -164,7 +164,7 @@
+ 	int flags;
+ 
+ 	data->req	  = req;
+-	data->inode	  = inode = req->wb_context->dentry->d_inode;
++	data->inode	  = inode = req->wb_context->path.dentry->d_inode;
+ 	data->cred	  = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+@@ -483,17 +483,19 @@
+ 	 */
+ 	error = nfs_wb_page(inode, page);
+ 	if (error)
+-		goto out_error;
++		goto out_unlock;
++	if (PageUptodate(page))
++		goto out_unlock;
+ 
+ 	error = -ESTALE;
+ 	if (NFS_STALE(inode))
+-		goto out_error;
++		goto out_unlock;
+ 
+ 	if (file == NULL) {
+ 		error = -EBADF;
+ 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ 		if (ctx == NULL)
+-			goto out_error;
++			goto out_unlock;
+ 	} else
+ 		ctx = get_nfs_open_context((struct nfs_open_context *)
+ 				file->private_data);
+@@ -502,8 +504,7 @@
+ 
+ 	put_nfs_open_context(ctx);
+ 	return error;
+-
+-out_error:
++out_unlock:
+ 	unlock_page(page);
+ 	return error;
+ }
+@@ -520,21 +521,32 @@
+ 	struct inode *inode = page->mapping->host;
+ 	struct nfs_page *new;
+ 	unsigned int len;
++	int error;
++
++	error = nfs_wb_page(inode, page);
++	if (error)
++		goto out_unlock;
++	if (PageUptodate(page))
++		goto out_unlock;
+ 
+-	nfs_wb_page(inode, page);
+ 	len = nfs_page_length(page);
+ 	if (len == 0)
+ 		return nfs_return_empty_page(page);
++
+ 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
+-	if (IS_ERR(new)) {
+-			SetPageError(page);
+-			unlock_page(page);
+-			return PTR_ERR(new);
+-	}
++	if (IS_ERR(new))
++		goto out_error;
++
+ 	if (len < PAGE_CACHE_SIZE)
+ 		zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0);
+ 	nfs_pageio_add_request(desc->pgio, new);
+ 	return 0;
++out_error:
++	error = PTR_ERR(new);
++	SetPageError(page);
++out_unlock:
++	unlock_page(page);
++	return error;
+ }
+ 
+ int nfs_readpages(struct file *filp, struct address_space *mapping,
+diff -Nurb linux-2.6.22-570/fs/nfs/super.c linux-2.6.22-try2/fs/nfs/super.c
+--- linux-2.6.22-570/fs/nfs/super.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/nfs/super.c	2007-12-19 15:39:54.000000000 -0500
+@@ -292,6 +292,7 @@
+ 		{ NFS_MOUNT_NONLM, ",nolock", "" },
+ 		{ NFS_MOUNT_NOACL, ",noacl", "" },
+ 		{ NFS_MOUNT_NORDIRPLUS, ",nordirplus", "" },
++ 		{ NFS_MOUNT_UNSHARED, ",nosharecache", ""},
+ 		{ NFS_MOUNT_TAGGED, ",tag", "" },
+ 		{ 0, NULL, NULL }
+ 	};
+@@ -432,7 +433,20 @@
+  */
+ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
+ {
++	struct nfs_server *server = NFS_SB(vfsmnt->mnt_sb);
++	struct rpc_clnt *rpc;
++
+ 	shrink_submounts(vfsmnt, &nfs_automount_list);
++	
++	if (!(flags & MNT_FORCE))
++		return;
++	/* -EIO all pending I/O */
++	rpc = server->client_acl;
++	if (!IS_ERR(rpc))
++		rpc_killall_tasks(rpc);
++	rpc = server->client;
++	if (!IS_ERR(rpc))
++		rpc_killall_tasks(rpc);
+ }
+ 
+ /*
+@@ -602,13 +616,51 @@
+ {
+ 	struct nfs_server *server = data, *old = NFS_SB(sb);
+ 
+-	if (old->nfs_client != server->nfs_client)
++	if (memcmp(&old->nfs_client->cl_addr,
++				&server->nfs_client->cl_addr,
++				sizeof(old->nfs_client->cl_addr)) != 0)
++		return 0;
++	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
++	if (old->flags & NFS_MOUNT_UNSHARED)
+ 		return 0;
+ 	if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0)
+ 		return 0;
+ 	return 1;
+ }
+ 
++#define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS)
++
++static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
++{
++	const struct nfs_server *a = s->s_fs_info;
++	const struct rpc_clnt *clnt_a = a->client;
++	const struct rpc_clnt *clnt_b = b->client;
++
++	if ((s->s_flags & NFS_MS_MASK) != (flags & NFS_MS_MASK))
++		goto Ebusy;
++	if (a->nfs_client != b->nfs_client)
++		goto Ebusy;
++	if (a->flags != b->flags)
++		goto Ebusy;
++	if (a->wsize != b->wsize)
++		goto Ebusy;
++	if (a->rsize != b->rsize)
++		goto Ebusy;
++	if (a->acregmin != b->acregmin)
++		goto Ebusy;
++	if (a->acregmax != b->acregmax)
++		goto Ebusy;
++	if (a->acdirmin != b->acdirmin)
++		goto Ebusy;
++	if (a->acdirmax != b->acdirmax)
++		goto Ebusy;
++	if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor)
++		goto Ebusy;
++	return 0;
++Ebusy:
++	return -EBUSY;
++}
++
+ static int nfs_get_sb(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+ {
+@@ -617,6 +669,7 @@
+ 	struct nfs_fh mntfh;
+ 	struct nfs_mount_data *data = raw_data;
+ 	struct dentry *mntroot;
++	int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ 	int error;
+ 
+ 	/* Validate the mount data */
+@@ -631,16 +684,22 @@
+ 		goto out_err_noserver;
+ 	}
+ 
++	if (server->flags & NFS_MOUNT_UNSHARED)
++		compare_super = NULL;
++
+ 	/* Get a superblock - note that we may end up sharing one that already exists */
+-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++	s = sget(fs_type, compare_super, nfs_set_super, server);
+ 	if (IS_ERR(s)) {
+ 		error = PTR_ERR(s);
+ 		goto out_err_nosb;
+ 	}
+ 
+ 	if (s->s_fs_info != server) {
++		error = nfs_compare_mount_options(s, server, flags);
+ 		nfs_free_server(server);
+ 		server = NULL;
++		if (error < 0)
++			goto error_splat_super;
+ 	}
+ 
+ 	if (!s->s_root) {
+@@ -693,6 +752,7 @@
+ 	struct super_block *s;
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
++	int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ 	int error;
+ 
+ 	dprintk("--> nfs_xdev_get_sb()\n");
+@@ -704,8 +764,11 @@
+ 		goto out_err_noserver;
+ 	}
+ 
++	if (server->flags & NFS_MOUNT_UNSHARED)
++		compare_super = NULL;
++
+ 	/* Get a superblock - note that we may end up sharing one that already exists */
+-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ 	if (IS_ERR(s)) {
+ 		error = PTR_ERR(s);
+ 		goto out_err_nosb;
+@@ -810,6 +873,7 @@
+ 	struct dentry *mntroot;
+ 	char *mntpath = NULL, *hostname = NULL, ip_addr[16];
+ 	void *p;
++	int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ 	int error;
+ 
+ 	if (data == NULL) {
+@@ -881,16 +945,22 @@
+ 		goto out_err_noserver;
+ 	}
+ 
++	if (server->flags & NFS4_MOUNT_UNSHARED)
++		compare_super = NULL;
++
+ 	/* Get a superblock - note that we may end up sharing one that already exists */
+-	s = sget(fs_type, nfs_compare_super, nfs_set_super, server);
++	s = sget(fs_type, compare_super, nfs_set_super, server);
+ 	if (IS_ERR(s)) {
+ 		error = PTR_ERR(s);
+ 		goto out_free;
+ 	}
+ 
+ 	if (s->s_fs_info != server) {
++		error = nfs_compare_mount_options(s, server, flags);
+ 		nfs_free_server(server);
+ 		server = NULL;
++		if (error < 0)
++			goto error_splat_super;
+ 	}
+ 
+ 	if (!s->s_root) {
+@@ -951,6 +1021,7 @@
+ 	struct super_block *s;
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
++	int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ 	int error;
+ 
+ 	dprintk("--> nfs4_xdev_get_sb()\n");
+@@ -962,8 +1033,11 @@
+ 		goto out_err_noserver;
+ 	}
+ 
++	if (server->flags & NFS4_MOUNT_UNSHARED)
++		compare_super = NULL;
++
+ 	/* Get a superblock - note that we may end up sharing one that already exists */
+-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ 	if (IS_ERR(s)) {
+ 		error = PTR_ERR(s);
+ 		goto out_err_nosb;
+@@ -1018,6 +1092,7 @@
+ 	struct nfs_server *server;
+ 	struct dentry *mntroot;
+ 	struct nfs_fh mntfh;
++	int (*compare_super)(struct super_block *,void *) = nfs_compare_super;
+ 	int error;
+ 
+ 	dprintk("--> nfs4_referral_get_sb()\n");
+@@ -1029,8 +1104,11 @@
+ 		goto out_err_noserver;
+ 	}
+ 
++	if (server->flags & NFS4_MOUNT_UNSHARED)
++		compare_super = NULL;
++
+ 	/* Get a superblock - note that we may end up sharing one that already exists */
+-	s = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server);
++	s = sget(&nfs_fs_type, compare_super, nfs_set_super, server);
+ 	if (IS_ERR(s)) {
+ 		error = PTR_ERR(s);
+ 		goto out_err_nosb;
+diff -Nurb linux-2.6.22-570/fs/nfs/write.c linux-2.6.22-try2/fs/nfs/write.c
+--- linux-2.6.22-570/fs/nfs/write.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfs/write.c	2007-12-19 15:29:23.000000000 -0500
+@@ -117,7 +117,7 @@
+ 	if (PagePrivate(page)) {
+ 		req = (struct nfs_page *)page_private(page);
+ 		if (req != NULL)
+-			atomic_inc(&req->wb_count);
++			kref_get(&req->wb_kref);
+ 	}
+ 	return req;
+ }
+@@ -191,8 +191,6 @@
+ 	}
+ 	/* Update file length */
+ 	nfs_grow_file(page, offset, count);
+-	/* Set the PG_uptodate flag? */
+-	nfs_mark_uptodate(page, offset, count);
+ 	nfs_unlock_request(req);
+ 	return 0;
+ }
+@@ -291,7 +289,7 @@
+ 		BUG();
+ 	}
+ 	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+-			NFS_PAGE_TAG_WRITEBACK);
++			NFS_PAGE_TAG_LOCKED);
+ 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+ 	spin_unlock(req_lock);
+ 	nfs_pageio_add_request(pgio, req);
+@@ -400,7 +398,7 @@
+ 	if (PageDirty(req->wb_page))
+ 		set_bit(PG_NEED_FLUSH, &req->wb_flags);
+ 	nfsi->npages++;
+-	atomic_inc(&req->wb_count);
++	kref_get(&req->wb_kref);
+ 	return 0;
+ }
+ 
+@@ -409,7 +407,7 @@
+  */
+ static void nfs_inode_remove_request(struct nfs_page *req)
+ {
+-	struct inode *inode = req->wb_context->dentry->d_inode;
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+ 	BUG_ON (!NFS_WBACK_BUSY(req));
+@@ -457,13 +455,15 @@
+ static void
+ nfs_mark_request_commit(struct nfs_page *req)
+ {
+-	struct inode *inode = req->wb_context->dentry->d_inode;
++	struct inode *inode = req->wb_context->path.dentry->d_inode;
+ 	struct nfs_inode *nfsi = NFS_I(inode);
+ 
+ 	spin_lock(&nfsi->req_lock);
+-	nfs_list_add_request(req, &nfsi->commit);
+ 	nfsi->ncommit++;
+ 	set_bit(PG_NEED_COMMIT, &(req)->wb_flags);
++	radix_tree_tag_set(&nfsi->nfs_page_tree,
++			req->wb_index,
++			NFS_PAGE_TAG_COMMIT);
+ 	spin_unlock(&nfsi->req_lock);
+ 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+@@ -526,14 +526,14 @@
+ 		idx_end = idx_start + npages - 1;
+ 
+ 	next = idx_start;
+-	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
++	while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) {
+ 		if (req->wb_index > idx_end)
+ 			break;
+ 
+ 		next = req->wb_index + 1;
+ 		BUG_ON(!NFS_WBACK_BUSY(req));
+ 
+-		atomic_inc(&req->wb_count);
++		kref_get(&req->wb_kref);
+ 		spin_unlock(&nfsi->req_lock);
+ 		error = nfs_wait_on_request(req);
+ 		nfs_release_request(req);
+@@ -577,10 +577,9 @@
+ 	int res = 0;
+ 
+ 	if (nfsi->ncommit != 0) {
+-		res = nfs_scan_list(nfsi, &nfsi->commit, dst, idx_start, npages);
++		res = nfs_scan_list(nfsi, dst, idx_start, npages,
++				NFS_PAGE_TAG_COMMIT);
+ 		nfsi->ncommit -= res;
+-		if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+-			printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+ 	}
+ 	return res;
+ }
+@@ -751,12 +750,17 @@
+ static void nfs_writepage_release(struct nfs_page *req)
+ {
+ 
+-	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
++	if (PageError(req->wb_page)) {
++		nfs_end_page_writeback(req->wb_page);
++		nfs_inode_remove_request(req);
++	} else if (!nfs_reschedule_unstable_write(req)) {
++		/* Set the PG_uptodate flag */
++		nfs_mark_uptodate(req->wb_page, req->wb_pgbase, req->wb_bytes);
+ 		nfs_end_page_writeback(req->wb_page);
+ 		nfs_inode_remove_request(req);
+ 	} else
+ 		nfs_end_page_writeback(req->wb_page);
+-	nfs_clear_page_writeback(req);
++	nfs_clear_page_tag_locked(req);
+ }
+ 
+ static inline int flush_task_priority(int how)
+@@ -786,7 +790,7 @@
+ 	 * NB: take care not to mess about with data->commit et al. */
+ 
+ 	data->req = req;
+-	data->inode = inode = req->wb_context->dentry->d_inode;
++	data->inode = inode = req->wb_context->path.dentry->d_inode;
+ 	data->cred = req->wb_context->cred;
+ 
+ 	data->args.fh     = NFS_FH(inode);
+@@ -885,7 +889,7 @@
+ 	}
+ 	nfs_redirty_request(req);
+ 	nfs_end_page_writeback(req->wb_page);
+-	nfs_clear_page_writeback(req);
++	nfs_clear_page_tag_locked(req);
+ 	return -ENOMEM;
+ }
+ 
+@@ -928,7 +932,7 @@
+ 		nfs_list_remove_request(req);
+ 		nfs_redirty_request(req);
+ 		nfs_end_page_writeback(req->wb_page);
+-		nfs_clear_page_writeback(req);
++		nfs_clear_page_tag_locked(req);
+ 	}
+ 	return -ENOMEM;
+ }
+@@ -954,8 +958,8 @@
+ 	struct page		*page = req->wb_page;
+ 
+ 	dprintk("NFS: write (%s/%Ld %d@%Ld)",
+-		req->wb_context->dentry->d_inode->i_sb->s_id,
+-		(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++		req->wb_context->path.dentry->d_inode->i_sb->s_id,
++		(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ 		req->wb_bytes,
+ 		(long long)req_offset(req));
+ 
+@@ -1020,8 +1024,8 @@
+ 		page = req->wb_page;
+ 
+ 		dprintk("NFS: write (%s/%Ld %d@%Ld)",
+-			req->wb_context->dentry->d_inode->i_sb->s_id,
+-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++			req->wb_context->path.dentry->d_inode->i_sb->s_id,
++			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 
+@@ -1039,12 +1043,14 @@
+ 			dprintk(" marked for commit\n");
+ 			goto next;
+ 		}
++		/* Set the PG_uptodate flag? */
++		nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ 		dprintk(" OK\n");
+ remove_request:
+ 		nfs_end_page_writeback(page);
+ 		nfs_inode_remove_request(req);
+ 	next:
+-		nfs_clear_page_writeback(req);
++		nfs_clear_page_tag_locked(req);
+ 	}
+ }
+ 
+@@ -1157,7 +1163,7 @@
+ 
+ 	list_splice_init(head, &data->pages);
+ 	first = nfs_list_entry(data->pages.next);
+-	inode = first->wb_context->dentry->d_inode;
++	inode = first->wb_context->path.dentry->d_inode;
+ 
+ 	data->inode	  = inode;
+ 	data->cred	  = first->wb_context->cred;
+@@ -1207,7 +1213,7 @@
+ 		nfs_list_remove_request(req);
+ 		nfs_mark_request_commit(req);
+ 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+-		nfs_clear_page_writeback(req);
++		nfs_clear_page_tag_locked(req);
+ 	}
+ 	return -ENOMEM;
+ }
+@@ -1234,8 +1240,8 @@
+ 		dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
+ 
+ 		dprintk("NFS: commit (%s/%Ld %d@%Ld)",
+-			req->wb_context->dentry->d_inode->i_sb->s_id,
+-			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
++			req->wb_context->path.dentry->d_inode->i_sb->s_id,
++			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
+ 			req->wb_bytes,
+ 			(long long)req_offset(req));
+ 		if (task->tk_status < 0) {
+@@ -1249,6 +1255,9 @@
+ 		 * returned by the server against all stored verfs. */
+ 		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ 			/* We have a match */
++			/* Set the PG_uptodate flag */
++			nfs_mark_uptodate(req->wb_page, req->wb_pgbase,
++					req->wb_bytes);
+ 			nfs_inode_remove_request(req);
+ 			dprintk(" OK\n");
+ 			goto next;
+@@ -1257,7 +1266,7 @@
+ 		dprintk(" mismatch\n");
+ 		nfs_redirty_request(req);
+ 	next:
+-		nfs_clear_page_writeback(req);
++		nfs_clear_page_tag_locked(req);
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4callback.c linux-2.6.22-try2/fs/nfsd/nfs4callback.c
+--- linux-2.6.22-570/fs/nfsd/nfs4callback.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfsd/nfs4callback.c	2007-12-19 15:29:23.000000000 -0500
+@@ -429,29 +429,23 @@
+ 		goto out_err;
+ 	}
+ 
+-	/* Kick rpciod, put the call on the wire. */
+-	if (rpciod_up() != 0)
+-		goto out_clnt;
+-
+ 	/* the task holds a reference to the nfs4_client struct */
+ 	atomic_inc(&clp->cl_count);
+ 
+ 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
+ 	if (IS_ERR(msg.rpc_cred))
+-		goto out_rpciod;
++		goto out_release_clp;
+ 	status = rpc_call_async(cb->cb_client, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
+ 	put_rpccred(msg.rpc_cred);
+ 
+ 	if (status != 0) {
+ 		dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+-		goto out_rpciod;
++		goto out_release_clp;
+ 	}
+ 	return;
+ 
+-out_rpciod:
++out_release_clp:
+ 	atomic_dec(&clp->cl_count);
+-	rpciod_down();
+-out_clnt:
+ 	rpc_shutdown_client(cb->cb_client);
+ out_err:
+ 	cb->cb_client = NULL;
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfs4state.c linux-2.6.22-try2/fs/nfsd/nfs4state.c
+--- linux-2.6.22-570/fs/nfsd/nfs4state.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfsd/nfs4state.c	2007-12-19 15:29:23.000000000 -0500
+@@ -378,7 +378,6 @@
+ 	if (clnt) {
+ 		clp->cl_callback.cb_client = NULL;
+ 		rpc_shutdown_client(clnt);
+-		rpciod_down();
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-570/fs/nfsd/nfssvc.c linux-2.6.22-try2/fs/nfsd/nfssvc.c
+--- linux-2.6.22-570/fs/nfsd/nfssvc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/nfsd/nfssvc.c	2007-12-19 15:29:24.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/slab.h>
+ #include <linux/smp.h>
+ #include <linux/smp_lock.h>
++#include <linux/freezer.h>
+ #include <linux/fs_struct.h>
+ 
+ #include <linux/sunrpc/types.h>
+@@ -432,6 +433,7 @@
+ 	 * dirty pages.
+ 	 */
+ 	current->flags |= PF_LESS_THROTTLE;
++	set_freezable();
+ 
+ 	/*
+ 	 * The main request loop
+diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.c linux-2.6.22-try2/fs/ocfs2/alloc.c
+--- linux-2.6.22-570/fs/ocfs2/alloc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/alloc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -50,6 +50,8 @@
+ #include "buffer_head_io.h"
+ 
+ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
++					 struct ocfs2_extent_block *eb);
+ 
+ /*
+  * Structures which describe a path through a btree, and functions to
+@@ -117,6 +119,31 @@
+ }
+ 
+ /*
++ * All the elements of src into dest. After this call, src could be freed
++ * without affecting dest.
++ *
++ * Both paths should have the same root. Any non-root elements of dest
++ * will be freed.
++ */
++static void ocfs2_cp_path(struct ocfs2_path *dest, struct ocfs2_path *src)
++{
++	int i;
++
++	BUG_ON(path_root_bh(dest) != path_root_bh(src));
++	BUG_ON(path_root_el(dest) != path_root_el(src));
++
++	ocfs2_reinit_path(dest, 1);
++
++	for(i = 1; i < OCFS2_MAX_PATH_DEPTH; i++) {
++		dest->p_node[i].bh = src->p_node[i].bh;
++		dest->p_node[i].el = src->p_node[i].el;
++
++		if (dest->p_node[i].bh)
++			get_bh(dest->p_node[i].bh);
++	}
++}
++
++/*
+  * Make the *dest path the same as src and re-initialize src path to
+  * have a root only.
+  */
+@@ -212,10 +239,41 @@
+ 	return ret;
+ }
+ 
++/*
++ * Return the index of the extent record which contains cluster #v_cluster.
++ * -1 is returned if it was not found.
++ *
++ * Should work fine on interior and exterior nodes.
++ */
++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
++{
++	int ret = -1;
++	int i;
++	struct ocfs2_extent_rec *rec;
++	u32 rec_end, rec_start, clusters;
++
++	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
++		rec = &el->l_recs[i];
++
++		rec_start = le32_to_cpu(rec->e_cpos);
++		clusters = ocfs2_rec_clusters(el, rec);
++
++		rec_end = rec_start + clusters;
++
++		if (v_cluster >= rec_start && v_cluster < rec_end) {
++			ret = i;
++			break;
++		}
++	}
++
++	return ret;
++}
++
+ enum ocfs2_contig_type {
+ 	CONTIG_NONE = 0,
+ 	CONTIG_LEFT,
+-	CONTIG_RIGHT
++	CONTIG_RIGHT,
++	CONTIG_LEFTRIGHT,
+ };
+ 
+ 
+@@ -253,6 +311,14 @@
+ {
+ 	u64 blkno = le64_to_cpu(insert_rec->e_blkno);
+ 
++	/*
++	 * Refuse to coalesce extent records with different flag
++	 * fields - we don't want to mix unwritten extents with user
++	 * data.
++	 */
++	if (ext->e_flags != insert_rec->e_flags)
++		return CONTIG_NONE;
++
+ 	if (ocfs2_extents_adjacent(ext, insert_rec) &&
+ 	    ocfs2_block_extent_contig(inode->i_sb, ext, blkno))
+ 			return CONTIG_RIGHT;
+@@ -277,7 +343,14 @@
+ 	APPEND_TAIL,
+ };
+ 
++enum ocfs2_split_type {
++	SPLIT_NONE = 0,
++	SPLIT_LEFT,
++	SPLIT_RIGHT,
++};
++
+ struct ocfs2_insert_type {
++	enum ocfs2_split_type	ins_split;
+ 	enum ocfs2_append_type	ins_appending;
+ 	enum ocfs2_contig_type	ins_contig;
+ 	int			ins_contig_index;
+@@ -285,6 +358,13 @@
+ 	int			ins_tree_depth;
+ };
+ 
++struct ocfs2_merge_ctxt {
++	enum ocfs2_contig_type	c_contig_type;
++	int			c_has_empty_extent;
++	int			c_split_covers_rec;
++	int			c_used_tail_recs;
++};
++
+ /*
+  * How many free extents have we got before we need more meta data?
+  */
+@@ -384,13 +464,7 @@
+ 			strcpy(eb->h_signature, OCFS2_EXTENT_BLOCK_SIGNATURE);
+ 			eb->h_blkno = cpu_to_le64(first_blkno);
+ 			eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
+-
+-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
+-			/* we always use slot zero's suballocator */
+-			eb->h_suballoc_slot = 0;
+-#else
+ 			eb->h_suballoc_slot = cpu_to_le16(osb->slot_num);
+-#endif
+ 			eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
+ 			eb->h_list.l_count =
+ 				cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
+@@ -461,7 +535,7 @@
+ 			    struct inode *inode,
+ 			    struct buffer_head *fe_bh,
+ 			    struct buffer_head *eb_bh,
+-			    struct buffer_head *last_eb_bh,
++			    struct buffer_head **last_eb_bh,
+ 			    struct ocfs2_alloc_context *meta_ac)
+ {
+ 	int status, new_blocks, i;
+@@ -476,7 +550,7 @@
+ 
+ 	mlog_entry_void();
+ 
+-	BUG_ON(!last_eb_bh);
++	BUG_ON(!last_eb_bh || !*last_eb_bh);
+ 
+ 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
+ 
+@@ -507,7 +581,7 @@
+ 		goto bail;
+ 	}
+ 
+-	eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
++	eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
+ 	new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
+ 
+ 	/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
+@@ -568,7 +642,7 @@
+ 	 * journal_dirty erroring as it won't unless we've aborted the
+ 	 * handle (in which case we would never be here) so reserving
+ 	 * the write with journal_access is all we need to do. */
+-	status = ocfs2_journal_access(handle, inode, last_eb_bh,
++	status = ocfs2_journal_access(handle, inode, *last_eb_bh,
+ 				      OCFS2_JOURNAL_ACCESS_WRITE);
+ 	if (status < 0) {
+ 		mlog_errno(status);
+@@ -601,10 +675,10 @@
+ 	 * next_leaf on the previously last-extent-block. */
+ 	fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk);
+ 
+-	eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
++	eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
+ 	eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
+ 
+-	status = ocfs2_journal_dirty(handle, last_eb_bh);
++	status = ocfs2_journal_dirty(handle, *last_eb_bh);
+ 	if (status < 0)
+ 		mlog_errno(status);
+ 	status = ocfs2_journal_dirty(handle, fe_bh);
+@@ -616,6 +690,14 @@
+ 			mlog_errno(status);
+ 	}
+ 
++	/*
++	 * Some callers want to track the rightmost leaf so pass it
++	 * back here.
++	 */
++	brelse(*last_eb_bh);
++	get_bh(new_eb_bhs[0]);
++	*last_eb_bh = new_eb_bhs[0];
++
+ 	status = 0;
+ bail:
+ 	if (new_eb_bhs) {
+@@ -829,6 +911,87 @@
+ }
+ 
+ /*
++ * Grow a b-tree so that it has more records.
++ *
++ * We might shift the tree depth in which case existing paths should
++ * be considered invalid.
++ *
++ * Tree depth after the grow is returned via *final_depth.
++ *
++ * *last_eb_bh will be updated by ocfs2_add_branch().
++ */
++static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
++			   struct buffer_head *di_bh, int *final_depth,
++			   struct buffer_head **last_eb_bh,
++			   struct ocfs2_alloc_context *meta_ac)
++{
++	int ret, shift;
++	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++	int depth = le16_to_cpu(di->id2.i_list.l_tree_depth);
++	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	struct buffer_head *bh = NULL;
++
++	BUG_ON(meta_ac == NULL);
++
++	shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh);
++	if (shift < 0) {
++		ret = shift;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	/* We traveled all the way to the bottom of the allocation tree
++	 * and didn't find room for any more extents - we need to add
++	 * another tree level */
++	if (shift) {
++		BUG_ON(bh);
++		mlog(0, "need to shift tree depth (current = %d)\n", depth);
++
++		/* ocfs2_shift_tree_depth will return us a buffer with
++		 * the new extent block (so we can pass that to
++		 * ocfs2_add_branch). */
++		ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh,
++					     meta_ac, &bh);
++		if (ret < 0) {
++			mlog_errno(ret);
++			goto out;
++		}
++		depth++;
++		if (depth == 1) {
++			/*
++			 * Special case: we have room now if we shifted from
++			 * tree_depth 0, so no more work needs to be done.
++			 *
++			 * We won't be calling add_branch, so pass
++			 * back *last_eb_bh as the new leaf. At depth
++			 * zero, it should always be null so there's
++			 * no reason to brelse.
++			 */
++			BUG_ON(*last_eb_bh);
++			get_bh(bh);
++			*last_eb_bh = bh;
++			goto out;
++		}
++	}
++
++	/* call ocfs2_add_branch to add the final part of the tree with
++	 * the new data. */
++	mlog(0, "add branch. bh = %p\n", bh);
++	ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh,
++			       meta_ac);
++	if (ret < 0) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++out:
++	if (final_depth)
++		*final_depth = depth;
++	brelse(bh);
++	return ret;
++}
++
++/*
+  * This is only valid for leaf nodes, which are the only ones that can
+  * have empty extents anyway.
+  */
+@@ -934,6 +1097,22 @@
+ 
+ }
+ 
++static void ocfs2_remove_empty_extent(struct ocfs2_extent_list *el)
++{
++	int size, num_recs = le16_to_cpu(el->l_next_free_rec);
++
++	BUG_ON(num_recs == 0);
++
++	if (ocfs2_is_empty_extent(&el->l_recs[0])) {
++		num_recs--;
++		size = num_recs * sizeof(struct ocfs2_extent_rec);
++		memmove(&el->l_recs[0], &el->l_recs[1], size);
++		memset(&el->l_recs[num_recs], 0,
++		       sizeof(struct ocfs2_extent_rec));
++		el->l_next_free_rec = cpu_to_le16(num_recs);
++	}
++}
++
+ /*
+  * Create an empty extent record .
+  *
+@@ -1211,6 +1390,10 @@
+ 	 * immediately to their right.
+ 	 */
+ 	left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
++	if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) {
++		BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
++		left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
++	}
+ 	left_clusters -= le32_to_cpu(left_rec->e_cpos);
+ 	left_rec->e_int_clusters = cpu_to_le32(left_clusters);
+ 
+@@ -1531,10 +1714,16 @@
+ 	return ret;
+ }
+ 
++/*
++ * Extend the transaction by enough credits to complete the rotation,
++ * and still leave at least the original number of credits allocated
++ * to this transaction.
++ */
+ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
++					   int op_credits,
+ 					   struct ocfs2_path *path)
+ {
+-	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1;
++	int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
+ 
+ 	if (handle->h_buffer_credits < credits)
+ 		return ocfs2_extend_trans(handle, credits);
+@@ -1568,6 +1757,29 @@
+ 	return 0;
+ }
+ 
++static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
++{
++	int next_free = le16_to_cpu(el->l_next_free_rec);
++	unsigned int range;
++	struct ocfs2_extent_rec *rec;
++
++	if (next_free == 0)
++		return 0;
++
++	rec = &el->l_recs[0];
++	if (ocfs2_is_empty_extent(rec)) {
++		/* Empty list. */
++		if (next_free == 1)
++			return 0;
++		rec = &el->l_recs[1];
++	}
++
++	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
++	if (cpos >= le32_to_cpu(rec->e_cpos) && cpos < range)
++		return 1;
++	return 0;
++}
++
+ /*
+  * Rotate all the records in a btree right one record, starting at insert_cpos.
+  *
+@@ -1586,11 +1798,12 @@
+  */
+ static int ocfs2_rotate_tree_right(struct inode *inode,
+ 				   handle_t *handle,
++				   enum ocfs2_split_type split,
+ 				   u32 insert_cpos,
+ 				   struct ocfs2_path *right_path,
+ 				   struct ocfs2_path **ret_left_path)
+ {
+-	int ret, start;
++	int ret, start, orig_credits = handle->h_buffer_credits;
+ 	u32 cpos;
+ 	struct ocfs2_path *left_path = NULL;
+ 
+@@ -1657,9 +1870,9 @@
+ 				(unsigned long long)
+ 				path_leaf_bh(left_path)->b_blocknr);
+ 
+-		if (ocfs2_rotate_requires_path_adjustment(left_path,
++		if (split == SPLIT_NONE &&
++		    ocfs2_rotate_requires_path_adjustment(left_path,
+ 							  insert_cpos)) {
+-			mlog(0, "Path adjustment required\n");
+ 
+ 			/*
+ 			 * We've rotated the tree as much as we
+@@ -1687,7 +1900,7 @@
+ 		     right_path->p_tree_depth);
+ 
+ 		ret = ocfs2_extend_rotate_transaction(handle, start,
+-						      right_path);
++						      orig_credits, right_path);
+ 		if (ret) {
+ 			mlog_errno(ret);
+ 			goto out;
+@@ -1700,6 +1913,24 @@
+ 			goto out;
+ 		}
+ 
++		if (split != SPLIT_NONE &&
++		    ocfs2_leftmost_rec_contains(path_leaf_el(right_path),
++						insert_cpos)) {
++			/*
++			 * A rotate moves the rightmost left leaf
++			 * record over to the leftmost right leaf
++			 * slot. If we're doing an extent split
++			 * instead of a real insert, then we have to
++			 * check that the extent to be split wasn't
++			 * just moved over. If it was, then we can
++			 * exit here, passing left_path back -
++			 * ocfs2_split_extent() is smart enough to
++			 * search both leaves.
++			 */
++			*ret_left_path = left_path;
++			goto out_ret_path;
++		}
++
+ 		/*
+ 		 * There is no need to re-read the next right path
+ 		 * as we know that it'll be our current left
+@@ -1722,124 +1953,935 @@
+ 	return ret;
+ }
+ 
+-/*
+- * Do the final bits of extent record insertion at the target leaf
+- * list. If this leaf is part of an allocation tree, it is assumed
+- * that the tree above has been prepared.
+- */
+-static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
+-				 struct ocfs2_extent_list *el,
+-				 struct ocfs2_insert_type *insert,
+-				 struct inode *inode)
++static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
++				      struct ocfs2_path *path)
+ {
+-	int i = insert->ins_contig_index;
+-	unsigned int range;
++	int i, idx;
+ 	struct ocfs2_extent_rec *rec;
++	struct ocfs2_extent_list *el;
++	struct ocfs2_extent_block *eb;
++	u32 range;
+ 
+-	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
++	/* Path should always be rightmost. */
++	eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
++	BUG_ON(eb->h_next_leaf_blk != 0ULL);
+ 
+-	/*
+-	 * Contiguous insert - either left or right.
+-	 */
+-	if (insert->ins_contig != CONTIG_NONE) {
+-		rec = &el->l_recs[i];
+-		if (insert->ins_contig == CONTIG_LEFT) {
+-			rec->e_blkno = insert_rec->e_blkno;
+-			rec->e_cpos = insert_rec->e_cpos;
+-		}
+-		le16_add_cpu(&rec->e_leaf_clusters,
+-			     le16_to_cpu(insert_rec->e_leaf_clusters));
+-		return;
+-	}
++	el = &eb->h_list;
++	BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
++	idx = le16_to_cpu(el->l_next_free_rec) - 1;
++	rec = &el->l_recs[idx];
++	range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
+ 
+-	/*
+-	 * Handle insert into an empty leaf.
+-	 */
+-	if (le16_to_cpu(el->l_next_free_rec) == 0 ||
+-	    ((le16_to_cpu(el->l_next_free_rec) == 1) &&
+-	     ocfs2_is_empty_extent(&el->l_recs[0]))) {
+-		el->l_recs[0] = *insert_rec;
+-		el->l_next_free_rec = cpu_to_le16(1);
+-		return;
+-	}
++	for (i = 0; i < path->p_tree_depth; i++) {
++		el = path->p_node[i].el;
++		idx = le16_to_cpu(el->l_next_free_rec) - 1;
++		rec = &el->l_recs[idx];
+ 
+-	/*
+-	 * Appending insert.
+-	 */
+-	if (insert->ins_appending == APPEND_TAIL) {
+-		i = le16_to_cpu(el->l_next_free_rec) - 1;
+-		rec = &el->l_recs[i];
+-		range = le32_to_cpu(rec->e_cpos)
+-			+ le16_to_cpu(rec->e_leaf_clusters);
+-		BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
++		rec->e_int_clusters = cpu_to_le32(range);
++		le32_add_cpu(&rec->e_int_clusters, -le32_to_cpu(rec->e_cpos));
+ 
+-		mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
+-				le16_to_cpu(el->l_count),
+-				"inode %lu, depth %u, count %u, next free %u, "
+-				"rec.cpos %u, rec.clusters %u, "
+-				"insert.cpos %u, insert.clusters %u\n",
+-				inode->i_ino,
+-				le16_to_cpu(el->l_tree_depth),
+-				le16_to_cpu(el->l_count),
+-				le16_to_cpu(el->l_next_free_rec),
+-				le32_to_cpu(el->l_recs[i].e_cpos),
+-				le16_to_cpu(el->l_recs[i].e_leaf_clusters),
+-				le32_to_cpu(insert_rec->e_cpos),
+-				le16_to_cpu(insert_rec->e_leaf_clusters));
+-		i++;
+-		el->l_recs[i] = *insert_rec;
+-		le16_add_cpu(&el->l_next_free_rec, 1);
+-		return;
++		ocfs2_journal_dirty(handle, path->p_node[i].bh);
+ 	}
+-
+-	/*
+-	 * Ok, we have to rotate.
+-	 *
+-	 * At this point, it is safe to assume that inserting into an
+-	 * empty leaf and appending to a leaf have both been handled
+-	 * above.
+-	 *
+-	 * This leaf needs to have space, either by the empty 1st
+-	 * extent record, or by virtue of an l_next_rec < l_count.
+-	 */
+-	ocfs2_rotate_leaf(el, insert_rec);
+-}
+-
+-static inline void ocfs2_update_dinode_clusters(struct inode *inode,
+-						struct ocfs2_dinode *di,
+-						u32 clusters)
+-{
+-	le32_add_cpu(&di->i_clusters, clusters);
+-	spin_lock(&OCFS2_I(inode)->ip_lock);
+-	OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
+-	spin_unlock(&OCFS2_I(inode)->ip_lock);
+ }
+ 
+-static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
+-				    struct ocfs2_extent_rec *insert_rec,
++static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
++				 struct ocfs2_path *left_path,
+ 				    struct ocfs2_path *right_path,
+-				    struct ocfs2_path **ret_left_path)
++				 int subtree_index,
++				 struct ocfs2_cached_dealloc_ctxt *dealloc)
+ {
+-	int ret, i, next_free;
+-	struct buffer_head *bh;
++	int ret, i;
++	struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
++	struct ocfs2_extent_list *root_el = left_path->p_node[subtree_index].el;
+ 	struct ocfs2_extent_list *el;
+-	struct ocfs2_path *left_path = NULL;
++	struct ocfs2_extent_block *eb;
++	struct buffer_head *bh;
+ 
+-	*ret_left_path = NULL;
++	el = path_leaf_el(left_path);
+ 
+-	/*
+-	 * This shouldn't happen for non-trees. The extent rec cluster
+-	 * count manipulation below only works for interior nodes.
+-	 */
+-	BUG_ON(right_path->p_tree_depth == 0);
++	eb = (struct ocfs2_extent_block *)right_path->p_node[subtree_index + 1].bh->b_data;
+ 
+-	/*
+-	 * If our appending insert is at the leftmost edge of a leaf,
+-	 * then we might need to update the rightmost records of the
+-	 * neighboring path.
+-	 */
+-	el = path_leaf_el(right_path);
+-	next_free = le16_to_cpu(el->l_next_free_rec);
++	for(i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
++		if (root_el->l_recs[i].e_blkno == eb->h_blkno)
++			break;
++
++	BUG_ON(i >= le16_to_cpu(root_el->l_next_free_rec));
++
++	memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
++	le16_add_cpu(&root_el->l_next_free_rec, -1);
++
++	eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
++	eb->h_next_leaf_blk = 0;
++
++	ocfs2_journal_dirty(handle, root_bh);
++	ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
++
++	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
++		bh = right_path->p_node[i].bh;
++
++		eb = (struct ocfs2_extent_block *)bh->b_data;
++		/*
++		 * Not all nodes might have had their final count
++		 * decremented by the caller - handle this here.
++		 */
++		el = &eb->h_list;
++		if (le16_to_cpu(el->l_next_free_rec) > 1) {
++			mlog(ML_ERROR,
++			     "Inode %llu, attempted to remove extent block "
++			     "%llu with %u records\n",
++			     (unsigned long long)OCFS2_I(inode)->ip_blkno,
++			     (unsigned long long)le64_to_cpu(eb->h_blkno),
++			     le16_to_cpu(el->l_next_free_rec));
++
++			ocfs2_journal_dirty(handle, bh);
++			ocfs2_remove_from_cache(inode, bh);
++			continue;
++		}
++
++		el->l_next_free_rec = 0;
++		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++
++		ocfs2_journal_dirty(handle, bh);
++
++		ret = ocfs2_cache_extent_block_free(dealloc, eb);
++		if (ret)
++			mlog_errno(ret);
++
++		ocfs2_remove_from_cache(inode, bh);
++	}
++}
++
++static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
++				     struct ocfs2_path *left_path,
++				     struct ocfs2_path *right_path,
++				     int subtree_index,
++				     struct ocfs2_cached_dealloc_ctxt *dealloc,
++				     int *deleted)
++{
++	int ret, i, del_right_subtree = 0;
++	struct buffer_head *root_bh, *di_bh = path_root_bh(right_path);
++	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++	struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
++	struct ocfs2_extent_block *eb;
++
++	*deleted = 0;
++
++	right_leaf_el = path_leaf_el(right_path);
++	left_leaf_el = path_leaf_el(left_path);
++	root_bh = left_path->p_node[subtree_index].bh;
++	BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
++
++	if (!ocfs2_is_empty_extent(&left_leaf_el->l_recs[0]))
++		return 0;
++
++	if (ocfs2_is_empty_extent(&right_leaf_el->l_recs[0]))
++		return -EAGAIN;
++
++	eb = (struct ocfs2_extent_block *)path_leaf_bh(right_path)->b_data;
++	if (eb->h_next_leaf_blk == 0ULL &&
++	    le16_to_cpu(right_leaf_el->l_next_free_rec) == 1) {
++		/*
++		 * We have to update i_last_eb_blk during the meta
++		 * data delete.
++		 */
++		ret = ocfs2_journal_access(handle, inode, di_bh,
++					   OCFS2_JOURNAL_ACCESS_WRITE);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		del_right_subtree = 1;
++	}
++
++	ret = ocfs2_journal_access(handle, inode, root_bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
++		ret = ocfs2_journal_access(handle, inode,
++					   right_path->p_node[i].bh,
++					   OCFS2_JOURNAL_ACCESS_WRITE);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		ret = ocfs2_journal_access(handle, inode,
++					   left_path->p_node[i].bh,
++					   OCFS2_JOURNAL_ACCESS_WRITE);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++	}
++
++	ocfs2_rotate_leaf(left_leaf_el, &right_leaf_el->l_recs[0]);
++	memset(&right_leaf_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++	if (eb->h_next_leaf_blk == 0ULL) {
++		/*
++		 * XXX: move recs over to get rid of empty extent,
++		 * decrease next_free. how does this play with the
++		 * delete code below?
++		 */
++		ocfs2_remove_empty_extent(right_leaf_el);
++	}
++
++	ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
++	if (ret)
++		mlog_errno(ret);
++	ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
++	if (ret)
++		mlog_errno(ret);
++
++	if (del_right_subtree) {
++		ocfs2_unlink_subtree(inode, handle, left_path, right_path,
++				     subtree_index, dealloc);
++		ocfs2_update_edge_lengths(inode, handle, left_path);
++
++		eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
++		di->i_last_eb_blk = eb->h_blkno;
++		ret = ocfs2_journal_dirty(handle, di_bh);
++		if (ret)
++			mlog_errno(ret);
++
++		*deleted = 1;
++	} else
++		ocfs2_complete_edge_insert(inode, handle, left_path, right_path,
++					   subtree_index);
++
++out:
++	return ret;
++}
++
++/*
++ * Given a full path, determine what cpos value would return us a path
++ * containing the leaf immediately to the right of the current one.
++ *
++ * Will return zero if the path passed in is already the rightmost path.
++ *
++ * This looks similar, but is subtly different to
++ * ocfs2_find_cpos_for_left_leaf().
++ */
++static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
++					  struct ocfs2_path *path, u32 *cpos)
++{
++	int i, j, ret = 0;
++	u64 blkno;
++	struct ocfs2_extent_list *el;
++
++	*cpos = 0;
++
++	if (path->p_tree_depth == 0)
++		return 0;
++
++	blkno = path_leaf_bh(path)->b_blocknr;
++
++	/* Start at the tree node just above the leaf and work our way up. */
++	i = path->p_tree_depth - 1;
++	while (i >= 0) {
++		int next_free;
++
++		el = path->p_node[i].el;
++
++		/*
++		 * Find the extent record just after the one in our
++		 * path.
++		 */
++		next_free = le16_to_cpu(el->l_next_free_rec);
++		for(j = 0; j < le16_to_cpu(el->l_next_free_rec); j++) {
++			if (le64_to_cpu(el->l_recs[j].e_blkno) == blkno) {
++				if (j == (next_free - 1)) {
++					if (i == 0) {
++						/*
++						 * We've determined that the
++						 * path specified is already
++						 * the rightmost one - return a
++						 * cpos of zero.
++						 */
++						goto out;
++					}
++					/*
++					 * The rightmost record points to our
++					 * leaf - we need to travel up the
++					 * tree one level.
++					 */
++					goto next_node;
++				}
++
++				*cpos = le32_to_cpu(el->l_recs[j + 1].e_cpos);
++				goto out;
++			}
++		}
++
++		/*
++		 * If we got here, we never found a valid node where
++		 * the tree indicated one should be.
++		 */
++		ocfs2_error(sb,
++			    "Invalid extent tree at extent block %llu\n",
++			    (unsigned long long)blkno);
++		ret = -EROFS;
++		goto out;
++
++next_node:
++		blkno = path->p_node[i].bh->b_blocknr;
++		i--;
++	}
++
++out:
++	return ret;
++}
++
++static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
++					    handle_t *handle,
++					    struct buffer_head *bh,
++					    struct ocfs2_extent_list *el,
++					    int *rotated_any)
++{
++	int ret;
++
++	if (rotated_any)
++		*rotated_any = 0;
++
++	if (!ocfs2_is_empty_extent(&el->l_recs[0]))
++		return 0;
++
++	if (le16_to_cpu(el->l_next_free_rec) == 1)
++		return -EAGAIN;
++
++	ret = ocfs2_journal_access(handle, inode, bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	ocfs2_remove_empty_extent(el);
++
++	ret = ocfs2_journal_dirty(handle, bh);
++	if (ret)
++		mlog_errno(ret);
++
++	if (rotated_any)
++		*rotated_any = 1;
++out:
++	return ret;
++}
++
++/*
++ * Left rotation of btree records.
++ *
++ * In many ways, this is (unsurprisingly) the opposite of right
++ * rotation. We start at some non-rightmost path containing an empty
++ * extent in the leaf block. The code works its way to the rightmost
++ * path by rotating records to the left in every subtree.
++ *
++ * There are a few places where we might want to do this:
++ *   - merging extent records
++ *     - rightleft contiguousness during insert
++ *     - merging two previously unwritten extents
++ *   - truncate
++ *     - via ocfs2_truncate, if we ever fix it up to use this code
++ *     - via ioctl at the request of user (reverse fallocate)
++ *   - "compressing" a tree with empty extents
++ *     - as a result of a user defrag request
++ *     - perhaps as a preventative measure if we notice a tree needs
++ *       this during any of the above operations.
++ *
++ * The major difference between those states above are the ability to
++ * lock one of the meta data allocators so that we can remove unused
++ * extent blocks. It might be unrealistic for us to assume that any
++ * merging cases will want to lock the meta data allocator. Luckily,
++ * the merges are an optimization.
++ *
++ * So, merging won't happen if it would result in an empty rightmost
++ * path (this is illegal).
++ *
++ * This function will move extents left until it runs out of leaves to
++ * rotate, or it hits a right leaf that already contains an empty
++ * extent, in which case it will exit early. This means that we might
++ * never rotate anything if the 1st right leaf contains an empty
++ * extent.
++ *
++ * Truncate cases will have to happen as a second step. I'm not
++ * completely sure how we want to handle those yet.
++ */
++static int ocfs2_rotate_tree_left(struct inode *inode,
++				  handle_t *handle,
++				  struct ocfs2_path *path,
++				  struct ocfs2_cached_dealloc_ctxt *dealloc,
++				  int *rotated_any)
++{
++	int ret, subtree_root, deleted, orig_credits = handle->h_buffer_credits;
++	u32 right_cpos;
++	struct ocfs2_path *left_path = NULL;
++	struct ocfs2_path *right_path = NULL;
++
++	BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
++
++	if (rotated_any)
++		*rotated_any = 0;
++
++	ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path,
++					     &right_cpos);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	if (path->p_tree_depth == 0 || right_cpos == 0) {
++		/*
++		 * Two cases where rotation of adjacent leaves isn't
++		 * necessary:
++		 *  - in-inode extents (no btree)
++		 *  - path passed is already rightmost
++		 */
++		ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
++						       path_leaf_bh(path),
++						       path_leaf_el(path),
++						       rotated_any);
++		if (ret)
++			mlog_errno(ret);
++		goto out;
++	}
++
++	left_path = ocfs2_new_path(path_root_bh(path),
++				   path_root_el(path));
++	if (!left_path) {
++		ret = -ENOMEM;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	ocfs2_cp_path(left_path, path);
++
++	right_path = ocfs2_new_path(path_root_bh(path),
++				    path_root_el(path));
++	if (!right_path) {
++		ret = -ENOMEM;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	while (right_cpos) {
++		ret = ocfs2_find_path(inode, right_path, right_cpos);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		subtree_root = ocfs2_find_subtree_root(inode, left_path,
++						       right_path);
++
++		mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
++		     subtree_root,
++		     (unsigned long long)
++		     right_path->p_node[subtree_root].bh->b_blocknr,
++		     right_path->p_tree_depth);
++
++		ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
++						      orig_credits, left_path);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
++						right_path, subtree_root,
++						dealloc, &deleted);
++		if (ret) {
++			if (ret != -EAGAIN)
++				mlog_errno(ret);
++			goto out;
++		}
++
++		if (rotated_any)
++			*rotated_any = 1;
++
++		/*
++		 * The subtree rotate might have removed records on
++		 * the rightmost edge. If so, then rotation is
++		 * complete.
++		 */
++		if (deleted)
++			break;
++
++		ocfs2_mv_path(left_path, right_path);
++
++		ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path,
++						     &right_cpos);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++	}
++
++out:
++	ocfs2_free_path(right_path);
++	ocfs2_free_path(left_path);
++
++	return ret;
++}
++
++static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
++				int index)
++{
++	struct ocfs2_extent_rec *rec = &el->l_recs[index];
++	unsigned int size;
++
++	if (rec->e_leaf_clusters == 0) {
++		/*
++		 * We consumed all of the merged-from record. An empty
++		 * extent cannot exist anywhere but the 1st array
++		 * position, so move things over if the merged-from
++		 * record doesn't occupy that position.
++		 *
++		 * This creates a new empty extent so the caller
++		 * should be smart enough to have removed any existing
++		 * ones.
++		 */
++		if (index > 0) {
++			BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
++			size = index * sizeof(struct ocfs2_extent_rec);
++			memmove(&el->l_recs[1], &el->l_recs[0], size);
++		}
++
++		/*
++		 * Always memset - the caller doesn't check whether it
++		 * created an empty extent, so there could be junk in
++		 * the other fields.
++		 */
++		memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
++	}
++}
++
++/*
++ * Remove split_rec clusters from the record at index and merge them
++ * onto the beginning of the record at index + 1.
++ */
++static int ocfs2_merge_rec_right(struct inode *inode, struct buffer_head *bh,
++				handle_t *handle,
++				struct ocfs2_extent_rec *split_rec,
++				struct ocfs2_extent_list *el, int index)
++{
++	int ret;
++	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
++	struct ocfs2_extent_rec *left_rec;
++	struct ocfs2_extent_rec *right_rec;
++
++	BUG_ON(index >= le16_to_cpu(el->l_next_free_rec));
++
++	left_rec = &el->l_recs[index];
++	right_rec = &el->l_recs[index + 1];
++
++	ret = ocfs2_journal_access(handle, inode, bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	le16_add_cpu(&left_rec->e_leaf_clusters, -split_clusters);
++
++	le32_add_cpu(&right_rec->e_cpos, -split_clusters);
++	le64_add_cpu(&right_rec->e_blkno,
++		     -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
++	le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
++
++	ocfs2_cleanup_merge(el, index);
++
++	ret = ocfs2_journal_dirty(handle, bh);
++	if (ret)
++		mlog_errno(ret);
++
++out:
++	return ret;
++}
++
++/*
++ * Remove split_rec clusters from the record at index and merge them
++ * onto the tail of the record at index - 1.
++ */
++static int ocfs2_merge_rec_left(struct inode *inode, struct buffer_head *bh,
++				handle_t *handle,
++				struct ocfs2_extent_rec *split_rec,
++				struct ocfs2_extent_list *el, int index)
++{
++	int ret, has_empty_extent = 0;
++	unsigned int split_clusters = le16_to_cpu(split_rec->e_leaf_clusters);
++	struct ocfs2_extent_rec *left_rec;
++	struct ocfs2_extent_rec *right_rec;
++
++	BUG_ON(index <= 0);
++
++	left_rec = &el->l_recs[index - 1];
++	right_rec = &el->l_recs[index];
++	if (ocfs2_is_empty_extent(&el->l_recs[0]))
++		has_empty_extent = 1;
++
++	ret = ocfs2_journal_access(handle, inode, bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	if (has_empty_extent && index == 1) {
++		/*
++		 * The easy case - we can just plop the record right in.
++		 */
++		*left_rec = *split_rec;
++
++		has_empty_extent = 0;
++	} else {
++		le16_add_cpu(&left_rec->e_leaf_clusters, split_clusters);
++	}
++
++	le32_add_cpu(&right_rec->e_cpos, split_clusters);
++	le64_add_cpu(&right_rec->e_blkno,
++		     ocfs2_clusters_to_blocks(inode->i_sb, split_clusters));
++	le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
++
++	ocfs2_cleanup_merge(el, index);
++
++	ret = ocfs2_journal_dirty(handle, bh);
++	if (ret)
++		mlog_errno(ret);
++
++out:
++	return ret;
++}
++
++static int ocfs2_try_to_merge_extent(struct inode *inode,
++				     handle_t *handle,
++				     struct ocfs2_path *left_path,
++				     int split_index,
++				     struct ocfs2_extent_rec *split_rec,
++				     struct ocfs2_cached_dealloc_ctxt *dealloc,
++				     struct ocfs2_merge_ctxt *ctxt)
++
++{
++	int ret = 0, rotated, delete_tail_recs = 0;
++	struct ocfs2_extent_list *el = path_leaf_el(left_path);
++	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
++
++	BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
++
++	if (ctxt->c_split_covers_rec) {
++		delete_tail_recs++;
++
++		if (ctxt->c_contig_type == CONTIG_LEFTRIGHT ||
++		    ctxt->c_has_empty_extent)
++			delete_tail_recs++;
++
++		if (ctxt->c_has_empty_extent) {
++			/*
++			 * The merge code will need to create an empty
++			 * extent to take the place of the newly
++			 * emptied slot. Remove any pre-existing empty
++			 * extents - having more than one in a leaf is
++			 * illegal.
++			 */
++			ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++						     dealloc, &rotated);
++			if (rotated) {
++				split_index--;
++				rec = &el->l_recs[split_index];
++			}
++			if (ret) {
++				if (ret == -EAGAIN) {
++					ret = 0;
++					goto straight_insert;
++				}
++
++				mlog_errno(ret);
++				goto out;
++			}
++		}
++	}
++
++	if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
++		/*
++		 * Left-right contig implies this.
++		 */
++		BUG_ON(!ctxt->c_split_covers_rec);
++		BUG_ON(split_index == 0);
++
++		/*
++		 * Since the leftright insert always covers the entire
++		 * extent, this call will delete the insert record
++		 * entirely, resulting in an empty extent record added to
++		 * the extent block.
++		 *
++		 * Since the adding of an empty extent shifts
++		 * everything back to the right, there's no need to
++		 * update split_index here.
++		 */
++		ret = ocfs2_merge_rec_left(inode, path_leaf_bh(left_path),
++					   handle, split_rec, el, split_index);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		/*
++		 * We can only get this from logic error above.
++		 */
++		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
++
++		/*
++		 * The left merge left us with an empty extent, remove
++		 * it.
++		 */
++		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++					     dealloc, &rotated);
++		if (rotated) {
++			split_index--;
++			rec = &el->l_recs[split_index];
++		}
++		if (ret) {
++			if (ret == -EAGAIN) {
++				ret = 0;
++				goto straight_insert;
++			}
++
++			mlog_errno(ret);
++			goto out;
++		}
++
++		/*
++		 * Note that we don't pass split_rec here on purpose -
++		 * we've merged it into the left side.
++		 */
++		ret = ocfs2_merge_rec_right(inode, path_leaf_bh(left_path),
++					    handle, rec, el, split_index);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
++
++		ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++					     dealloc, NULL);
++		/*
++		 * Error from this last rotate is not critical, so
++		 * print but don't bubble it up.
++		 */
++		if (ret && ret != -EAGAIN)
++			mlog_errno(ret);
++		ret = 0;
++	} else {
++		/*
++		 * Merge a record to the left or right.
++		 *
++		 * 'contig_type' is relative to the existing record,
++		 * so for example, if we're "right contig", it's to
++		 * the record on the left (hence the left merge).
++		 */
++		if (ctxt->c_contig_type == CONTIG_RIGHT) {
++			ret = ocfs2_merge_rec_left(inode,
++						   path_leaf_bh(left_path),
++						   handle, split_rec, el,
++						   split_index);
++			if (ret) {
++				mlog_errno(ret);
++				goto out;
++			}
++		} else {
++			ret = ocfs2_merge_rec_right(inode,
++						    path_leaf_bh(left_path),
++						    handle, split_rec, el,
++						    split_index);
++			if (ret) {
++				mlog_errno(ret);
++				goto out;
++			}
++		}
++
++		if (ctxt->c_split_covers_rec) {
++			/*
++			 * The merge may have left an empty extent in
++			 * our leaf. Try to rotate it away.
++			 */
++			ret = ocfs2_rotate_tree_left(inode, handle, left_path,
++						     dealloc, &rotated);
++			if (ret)
++				mlog_errno(ret);
++			ret = 0;
++		}
++	}
++
++out:
++	return ret;
++
++straight_insert:
++	el->l_recs[split_index] = *split_rec;
++	goto out;
++}
++
++static void ocfs2_subtract_from_rec(struct super_block *sb,
++				    enum ocfs2_split_type split,
++				    struct ocfs2_extent_rec *rec,
++				    struct ocfs2_extent_rec *split_rec)
++{
++	u64 len_blocks;
++
++	len_blocks = ocfs2_clusters_to_blocks(sb,
++				le16_to_cpu(split_rec->e_leaf_clusters));
++
++	if (split == SPLIT_LEFT) {
++		/*
++		 * Region is on the left edge of the existing
++		 * record.
++		 */
++		le32_add_cpu(&rec->e_cpos,
++			     le16_to_cpu(split_rec->e_leaf_clusters));
++		le64_add_cpu(&rec->e_blkno, len_blocks);
++		le16_add_cpu(&rec->e_leaf_clusters,
++			     -le16_to_cpu(split_rec->e_leaf_clusters));
++	} else {
++		/*
++		 * Region is on the right edge of the existing
++		 * record.
++		 */
++		le16_add_cpu(&rec->e_leaf_clusters,
++			     -le16_to_cpu(split_rec->e_leaf_clusters));
++	}
++}
++
++/*
++ * Do the final bits of extent record insertion at the target leaf
++ * list. If this leaf is part of an allocation tree, it is assumed
++ * that the tree above has been prepared.
++ */
++static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
++				 struct ocfs2_extent_list *el,
++				 struct ocfs2_insert_type *insert,
++				 struct inode *inode)
++{
++	int i = insert->ins_contig_index;
++	unsigned int range;
++	struct ocfs2_extent_rec *rec;
++
++	BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
++
++	if (insert->ins_split != SPLIT_NONE) {
++		i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
++		BUG_ON(i == -1);
++		rec = &el->l_recs[i];
++		ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec,
++					insert_rec);
++		goto rotate;
++	}
++
++	/*
++	 * Contiguous insert - either left or right.
++	 */
++	if (insert->ins_contig != CONTIG_NONE) {
++		rec = &el->l_recs[i];
++		if (insert->ins_contig == CONTIG_LEFT) {
++			rec->e_blkno = insert_rec->e_blkno;
++			rec->e_cpos = insert_rec->e_cpos;
++		}
++		le16_add_cpu(&rec->e_leaf_clusters,
++			     le16_to_cpu(insert_rec->e_leaf_clusters));
++		return;
++	}
++
++	/*
++	 * Handle insert into an empty leaf.
++	 */
++	if (le16_to_cpu(el->l_next_free_rec) == 0 ||
++	    ((le16_to_cpu(el->l_next_free_rec) == 1) &&
++	     ocfs2_is_empty_extent(&el->l_recs[0]))) {
++		el->l_recs[0] = *insert_rec;
++		el->l_next_free_rec = cpu_to_le16(1);
++		return;
++	}
++
++	/*
++	 * Appending insert.
++	 */
++	if (insert->ins_appending == APPEND_TAIL) {
++		i = le16_to_cpu(el->l_next_free_rec) - 1;
++		rec = &el->l_recs[i];
++		range = le32_to_cpu(rec->e_cpos)
++			+ le16_to_cpu(rec->e_leaf_clusters);
++		BUG_ON(le32_to_cpu(insert_rec->e_cpos) < range);
++
++		mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
++				le16_to_cpu(el->l_count),
++				"inode %lu, depth %u, count %u, next free %u, "
++				"rec.cpos %u, rec.clusters %u, "
++				"insert.cpos %u, insert.clusters %u\n",
++				inode->i_ino,
++				le16_to_cpu(el->l_tree_depth),
++				le16_to_cpu(el->l_count),
++				le16_to_cpu(el->l_next_free_rec),
++				le32_to_cpu(el->l_recs[i].e_cpos),
++				le16_to_cpu(el->l_recs[i].e_leaf_clusters),
++				le32_to_cpu(insert_rec->e_cpos),
++				le16_to_cpu(insert_rec->e_leaf_clusters));
++		i++;
++		el->l_recs[i] = *insert_rec;
++		le16_add_cpu(&el->l_next_free_rec, 1);
++		return;
++	}
++
++rotate:
++	/*
++	 * Ok, we have to rotate.
++	 *
++	 * At this point, it is safe to assume that inserting into an
++	 * empty leaf and appending to a leaf have both been handled
++	 * above.
++	 *
++	 * This leaf needs to have space, either by the empty 1st
++	 * extent record, or by virtue of an l_next_rec < l_count.
++	 */
++	ocfs2_rotate_leaf(el, insert_rec);
++}
++
++static inline void ocfs2_update_dinode_clusters(struct inode *inode,
++						struct ocfs2_dinode *di,
++						u32 clusters)
++{
++	le32_add_cpu(&di->i_clusters, clusters);
++	spin_lock(&OCFS2_I(inode)->ip_lock);
++	OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
++	spin_unlock(&OCFS2_I(inode)->ip_lock);
++}
++
++static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
++				    struct ocfs2_extent_rec *insert_rec,
++				    struct ocfs2_path *right_path,
++				    struct ocfs2_path **ret_left_path)
++{
++	int ret, i, next_free;
++	struct buffer_head *bh;
++	struct ocfs2_extent_list *el;
++	struct ocfs2_path *left_path = NULL;
++
++	*ret_left_path = NULL;
++
++	/*
++	 * This shouldn't happen for non-trees. The extent rec cluster
++	 * count manipulation below only works for interior nodes.
++	 */
++	BUG_ON(right_path->p_tree_depth == 0);
++
++	/*
++	 * If our appending insert is at the leftmost edge of a leaf,
++	 * then we might need to update the rightmost records of the
++	 * neighboring path.
++	 */
++	el = path_leaf_el(right_path);
++	next_free = le16_to_cpu(el->l_next_free_rec);
+ 	if (next_free == 0 ||
+ 	    (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
+ 		u32 left_cpos;
+@@ -1931,6 +2973,83 @@
+ 	return ret;
+ }
+ 
++static void ocfs2_split_record(struct inode *inode,
++			       struct ocfs2_path *left_path,
++			       struct ocfs2_path *right_path,
++			       struct ocfs2_extent_rec *split_rec,
++			       enum ocfs2_split_type split)
++{
++	int index;
++	u32 cpos = le32_to_cpu(split_rec->e_cpos);
++	struct ocfs2_extent_list *left_el = NULL, *right_el, *insert_el, *el;
++	struct ocfs2_extent_rec *rec, *tmprec;
++
++	right_el = path_leaf_el(right_path);;
++	if (left_path)
++		left_el = path_leaf_el(left_path);
++
++	el = right_el;
++	insert_el = right_el;
++	index = ocfs2_search_extent_list(el, cpos);
++	if (index != -1) {
++		if (index == 0 && left_path) {
++			BUG_ON(ocfs2_is_empty_extent(&el->l_recs[0]));
++
++			/*
++			 * This typically means that the record
++			 * started in the left path but moved to the
++			 * right as a result of rotation. We either
++			 * move the existing record to the left, or we
++			 * do the later insert there.
++			 *
++			 * In this case, the left path should always
++			 * exist as the rotate code will have passed
++			 * it back for a post-insert update.
++			 */
++
++			if (split == SPLIT_LEFT) {
++				/*
++				 * It's a left split. Since we know
++				 * that the rotate code gave us an
++				 * empty extent in the left path, we
++				 * can just do the insert there.
++				 */
++				insert_el = left_el;
++			} else {
++				/*
++				 * Right split - we have to move the
++				 * existing record over to the left
++				 * leaf. The insert will be into the
++				 * newly created empty extent in the
++				 * right leaf.
++				 */
++				tmprec = &right_el->l_recs[index];
++				ocfs2_rotate_leaf(left_el, tmprec);
++				el = left_el;
++
++				memset(tmprec, 0, sizeof(*tmprec));
++				index = ocfs2_search_extent_list(left_el, cpos);
++				BUG_ON(index == -1);
++			}
++		}
++	} else {
++		BUG_ON(!left_path);
++		BUG_ON(!ocfs2_is_empty_extent(&left_el->l_recs[0]));
++		/*
++		 * Left path is easy - we can just allow the insert to
++		 * happen.
++		 */
++		el = left_el;
++		insert_el = left_el;
++		index = ocfs2_search_extent_list(el, cpos);
++		BUG_ON(index == -1);
++	}
++
++	rec = &el->l_recs[index];
++	ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec);
++	ocfs2_rotate_leaf(insert_el, split_rec);
++}
++
+ /*
+  * This function only does inserts on an allocation b-tree. For dinode
+  * lists, ocfs2_insert_at_leaf() is called directly.
+@@ -1948,7 +3067,6 @@
+ {
+ 	int ret, subtree_index;
+ 	struct buffer_head *leaf_bh = path_leaf_bh(right_path);
+-	struct ocfs2_extent_list *el;
+ 
+ 	/*
+ 	 * Pass both paths to the journal. The majority of inserts
+@@ -1984,9 +3102,18 @@
+ 		}
+ 	}
+ 
+-	el = path_leaf_el(right_path);
++	if (insert->ins_split != SPLIT_NONE) {
++		/*
++		 * We could call ocfs2_insert_at_leaf() for some types
++		 * of splits, but it's easier to just let one seperate
++		 * function sort it all out.
++		 */
++		ocfs2_split_record(inode, left_path, right_path,
++				   insert_rec, insert->ins_split);
++	} else
++		ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path),
++				     insert, inode);
+ 
+-	ocfs2_insert_at_leaf(insert_rec, el, insert, inode);
+ 	ret = ocfs2_journal_dirty(handle, leaf_bh);
+ 	if (ret)
+ 		mlog_errno(ret);
+@@ -2075,7 +3202,7 @@
+ 	 * can wind up skipping both of these two special cases...
+ 	 */
+ 	if (rotate) {
+-		ret = ocfs2_rotate_tree_right(inode, handle,
++		ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split,
+ 					      le32_to_cpu(insert_rec->e_cpos),
+ 					      right_path, &left_path);
+ 		if (ret) {
+@@ -2100,6 +3227,7 @@
+ 	}
+ 
+ out_update_clusters:
++	if (type->ins_split == SPLIT_NONE)
+ 	ocfs2_update_dinode_clusters(inode, di,
+ 				     le16_to_cpu(insert_rec->e_leaf_clusters));
+ 
+@@ -2114,6 +3242,44 @@
+ 	return ret;
+ }
+ 
++static enum ocfs2_contig_type
++ocfs2_figure_merge_contig_type(struct inode *inode,
++			       struct ocfs2_extent_list *el, int index,
++			       struct ocfs2_extent_rec *split_rec)
++{
++	struct ocfs2_extent_rec *rec;
++	enum ocfs2_contig_type ret = CONTIG_NONE;
++
++	/*
++	 * We're careful to check for an empty extent record here -
++	 * the merge code will know what to do if it sees one.
++	 */
++
++	if (index > 0) {
++		rec = &el->l_recs[index - 1];
++		if (index == 1 && ocfs2_is_empty_extent(rec)) {
++			if (split_rec->e_cpos == el->l_recs[index].e_cpos)
++				ret = CONTIG_RIGHT;
++		} else {
++			ret = ocfs2_extent_contig(inode, rec, split_rec);
++		}
++	}
++
++	if (index < (le16_to_cpu(el->l_next_free_rec) - 1)) {
++		enum ocfs2_contig_type contig_type;
++
++		rec = &el->l_recs[index + 1];
++		contig_type = ocfs2_extent_contig(inode, rec, split_rec);
++
++		if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
++			ret = CONTIG_LEFTRIGHT;
++		else if (ret == CONTIG_NONE)
++			ret = contig_type;
++	}
++
++	return ret;
++}
++
+ static void ocfs2_figure_contig_type(struct inode *inode,
+ 				     struct ocfs2_insert_type *insert,
+ 				     struct ocfs2_extent_list *el,
+@@ -2205,6 +3371,8 @@
+ 	struct ocfs2_path *path = NULL;
+ 	struct buffer_head *bh = NULL;
+ 
++	insert->ins_split = SPLIT_NONE;
++
+ 	el = &di->id2.i_list;
+ 	insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
+ 
+@@ -2305,130 +3473,425 @@
+ 		ocfs2_figure_appending_type(insert, el, insert_rec);
+ 	}
+ 
+-out:
+-	ocfs2_free_path(path);
++out:
++	ocfs2_free_path(path);
++
++	if (ret == 0)
++		*last_eb_bh = bh;
++	else
++		brelse(bh);
++	return ret;
++}
++
++/*
++ * Insert an extent into an inode btree.
++ *
++ * The caller needs to update fe->i_clusters
++ */
++int ocfs2_insert_extent(struct ocfs2_super *osb,
++			handle_t *handle,
++			struct inode *inode,
++			struct buffer_head *fe_bh,
++			u32 cpos,
++			u64 start_blk,
++			u32 new_clusters,
++			u8 flags,
++			struct ocfs2_alloc_context *meta_ac)
++{
++	int status;
++	struct buffer_head *last_eb_bh = NULL;
++	struct buffer_head *bh = NULL;
++	struct ocfs2_insert_type insert = {0, };
++	struct ocfs2_extent_rec rec;
++
++	mlog(0, "add %u clusters at position %u to inode %llu\n",
++	     new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
++
++	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
++			(OCFS2_I(inode)->ip_clusters != cpos),
++			"Device %s, asking for sparse allocation: inode %llu, "
++			"cpos %u, clusters %u\n",
++			osb->dev_str,
++			(unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
++			OCFS2_I(inode)->ip_clusters);
++
++	memset(&rec, 0, sizeof(rec));
++	rec.e_cpos = cpu_to_le32(cpos);
++	rec.e_blkno = cpu_to_le64(start_blk);
++	rec.e_leaf_clusters = cpu_to_le16(new_clusters);
++	rec.e_flags = flags;
++
++	status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
++					  &insert);
++	if (status < 0) {
++		mlog_errno(status);
++		goto bail;
++	}
++
++	mlog(0, "Insert.appending: %u, Insert.Contig: %u, "
++	     "Insert.contig_index: %d, Insert.free_records: %d, "
++	     "Insert.tree_depth: %d\n",
++	     insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
++	     insert.ins_free_records, insert.ins_tree_depth);
++
++	if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) {
++		status = ocfs2_grow_tree(inode, handle, fe_bh,
++					 &insert.ins_tree_depth, &last_eb_bh,
++					 meta_ac);
++		if (status) {
++			mlog_errno(status);
++			goto bail;
++		}
++	}
++
++	/* Finally, we can add clusters. This might rotate the tree for us. */
++	status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
++	if (status < 0)
++		mlog_errno(status);
++	else
++		ocfs2_extent_map_insert_rec(inode, &rec);
++
++bail:
++	if (bh)
++		brelse(bh);
++
++	if (last_eb_bh)
++		brelse(last_eb_bh);
++
++	mlog_exit(status);
++	return status;
++}
++
++static int ocfs2_split_and_insert(struct inode *inode,
++				  handle_t *handle,
++				  struct ocfs2_path *path,
++				  struct buffer_head *di_bh,
++				  struct buffer_head **last_eb_bh,
++				  int split_index,
++				  struct ocfs2_extent_rec *orig_split_rec,
++				  struct ocfs2_alloc_context *meta_ac)
++{
++	int ret = 0, depth;
++	unsigned int insert_range, rec_range, do_leftright = 0;
++	struct ocfs2_extent_rec tmprec;
++	struct ocfs2_extent_list *rightmost_el;
++	struct ocfs2_extent_rec rec;
++	struct ocfs2_extent_rec split_rec = *orig_split_rec;
++	struct ocfs2_insert_type insert;
++	struct ocfs2_extent_block *eb;
++	struct ocfs2_dinode *di;
++
++leftright:
++	/*
++	 * Store a copy of the record on the stack - it might move
++	 * around as the tree is manipulated below.
++	 */
++	rec = path_leaf_el(path)->l_recs[split_index];
++
++	di = (struct ocfs2_dinode *)di_bh->b_data;
++	rightmost_el = &di->id2.i_list;
++
++	depth = le16_to_cpu(rightmost_el->l_tree_depth);
++	if (depth) {
++		BUG_ON(!(*last_eb_bh));
++		eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
++		rightmost_el = &eb->h_list;
++	}
++
++	if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
++	    le16_to_cpu(rightmost_el->l_count)) {
++		int old_depth = depth;
++
++		ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
++				      meta_ac);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		if (old_depth != depth) {
++			eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
++			rightmost_el = &eb->h_list;
++		}
++	}
++
++	memset(&insert, 0, sizeof(struct ocfs2_insert_type));
++	insert.ins_appending = APPEND_NONE;
++	insert.ins_contig = CONTIG_NONE;
++	insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
++		- le16_to_cpu(rightmost_el->l_next_free_rec);
++	insert.ins_tree_depth = depth;
++
++	insert_range = le32_to_cpu(split_rec.e_cpos) +
++		le16_to_cpu(split_rec.e_leaf_clusters);
++	rec_range = le32_to_cpu(rec.e_cpos) +
++		le16_to_cpu(rec.e_leaf_clusters);
++
++	if (split_rec.e_cpos == rec.e_cpos) {
++		insert.ins_split = SPLIT_LEFT;
++	} else if (insert_range == rec_range) {
++		insert.ins_split = SPLIT_RIGHT;
++	} else {
++		/*
++		 * Left/right split. We fake this as a right split
++		 * first and then make a second pass as a left split.
++		 */
++		insert.ins_split = SPLIT_RIGHT;
++
++		memset(&tmprec, 0, sizeof(tmprec));
++
++		tmprec.e_cpos = cpu_to_le32(insert_range);
++		tmprec.e_leaf_clusters = cpu_to_le16(rec_range - insert_range);
++		tmprec.e_flags = rec.e_flags;
++		tmprec.e_blkno = split_rec.e_blkno;
++		le64_add_cpu(&tmprec.e_blkno,
++			     ocfs2_clusters_to_blocks(inode->i_sb,
++				     le16_to_cpu(split_rec.e_leaf_clusters)));
++		split_rec = tmprec;
++
++		BUG_ON(do_leftright);
++		do_leftright = 1;
++	}
++
++	ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec,
++				     &insert);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	if (do_leftright == 1) {
++		u32 cpos;
++		struct ocfs2_extent_list *el;
++
++		do_leftright++;
++		split_rec = *orig_split_rec;
++
++		ocfs2_reinit_path(path, 1);
++
++		cpos = le32_to_cpu(split_rec.e_cpos);
++		ret = ocfs2_find_path(inode, path, cpos);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		el = path_leaf_el(path);
++		split_index = ocfs2_search_extent_list(el, cpos);
++		goto leftright;
++	}
++out:
++
++	return ret;
++}
++
++/*
++ * Mark part or all of the extent record at split_index in the leaf
++ * pointed to by path as written. This removes the unwritten
++ * extent flag.
++ *
++ * Care is taken to handle contiguousness so as to not grow the tree.
++ *
++ * meta_ac is not strictly necessary - we only truly need it if growth
++ * of the tree is required. All other cases will degrade into a less
++ * optimal tree layout.
++ *
++ * last_eb_bh should be the rightmost leaf block for any inode with a
++ * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call.
++ *
++ * This code is optimized for readability - several passes might be
++ * made over certain portions of the tree. All of those blocks will
++ * have been brought into cache (and pinned via the journal), so the
++ * extra overhead is not expressed in terms of disk reads.
++ */
++static int __ocfs2_mark_extent_written(struct inode *inode,
++				       struct buffer_head *di_bh,
++				       handle_t *handle,
++				       struct ocfs2_path *path,
++				       int split_index,
++				       struct ocfs2_extent_rec *split_rec,
++				       struct ocfs2_alloc_context *meta_ac,
++				       struct ocfs2_cached_dealloc_ctxt *dealloc)
++{
++	int ret = 0;
++	struct ocfs2_extent_list *el = path_leaf_el(path);
++	struct buffer_head *eb_bh, *last_eb_bh = NULL;
++	struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
++	struct ocfs2_merge_ctxt ctxt;
++	struct ocfs2_extent_list *rightmost_el;
++
++	if (!rec->e_flags & OCFS2_EXT_UNWRITTEN) {
++		ret = -EIO;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
++	    ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
++	     (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
++		ret = -EIO;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	eb_bh = path_leaf_bh(path);
++	ret = ocfs2_journal_access(handle, inode, eb_bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, el,
++							    split_index,
++							    split_rec);
++
++	/*
++	 * The core merge / split code wants to know how much room is
++	 * left in this inodes allocation tree, so we pass the
++	 * rightmost extent list.
++	 */
++	if (path->p_tree_depth) {
++		struct ocfs2_extent_block *eb;
++		struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
++
++		ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
++				       le64_to_cpu(di->i_last_eb_blk),
++				       &last_eb_bh, OCFS2_BH_CACHED, inode);
++		if (ret) {
++			mlog_exit(ret);
++			goto out;
++		}
++
++		eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
++		if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) {
++			OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb);
++			ret = -EROFS;
++			goto out;
++		}
+ 
+-	if (ret == 0)
+-		*last_eb_bh = bh;
++		rightmost_el = &eb->h_list;
++	} else
++		rightmost_el = path_root_el(path);
++
++	ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
++	if (ctxt.c_used_tail_recs > 0 &&
++	    ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
++		ctxt.c_used_tail_recs--;
++
++	if (rec->e_cpos == split_rec->e_cpos &&
++	    rec->e_leaf_clusters == split_rec->e_leaf_clusters)
++		ctxt.c_split_covers_rec = 1;
+ 	else
+-		brelse(bh);
++		ctxt.c_split_covers_rec = 0;
++
++	ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
++
++	mlog(0, "index: %d, contig: %u, used_tail_recs: %u, "
++	     "has_empty: %u, split_covers: %u\n", split_index,
++	     ctxt.c_contig_type, ctxt.c_used_tail_recs,
++	     ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
++
++	if (ctxt.c_contig_type == CONTIG_NONE) {
++		if (ctxt.c_split_covers_rec)
++			el->l_recs[split_index] = *split_rec;
++		else
++			ret = ocfs2_split_and_insert(inode, handle, path, di_bh,
++						     &last_eb_bh, split_index,
++						     split_rec, meta_ac);
++		if (ret)
++			mlog_errno(ret);
++	} else {
++		ret = ocfs2_try_to_merge_extent(inode, handle, path,
++						split_index, split_rec,
++						dealloc, &ctxt);
++		if (ret)
++			mlog_errno(ret);
++	}
++
++	ocfs2_journal_dirty(handle, eb_bh);
++
++out:
++	brelse(last_eb_bh);
+ 	return ret;
+ }
+ 
+ /*
+- * Insert an extent into an inode btree.
++ * Mark the already-existing extent at cpos as written for len clusters.
+  *
+- * The caller needs to update fe->i_clusters
++ * If the existing extent is larger than the request, initiate a
++ * split. An attempt will be made at merging with adjacent extents.
++ *
++ * The caller is responsible for passing down meta_ac if we'll need it.
+  */
+-int ocfs2_insert_extent(struct ocfs2_super *osb,
+-			handle_t *handle,
+-			struct inode *inode,
+-			struct buffer_head *fe_bh,
+-			u32 cpos,
+-			u64 start_blk,
+-			u32 new_clusters,
+-			struct ocfs2_alloc_context *meta_ac)
++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
++			      handle_t *handle, u32 cpos, u32 len, u32 phys,
++			      struct ocfs2_alloc_context *meta_ac,
++			      struct ocfs2_cached_dealloc_ctxt *dealloc)
+ {
+-	int status, shift;
+-	struct buffer_head *last_eb_bh = NULL;
+-	struct buffer_head *bh = NULL;
+-	struct ocfs2_insert_type insert = {0, };
+-	struct ocfs2_extent_rec rec;
+-
+-	mlog(0, "add %u clusters at position %u to inode %llu\n",
+-	     new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+-
+-	mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
+-			(OCFS2_I(inode)->ip_clusters != cpos),
+-			"Device %s, asking for sparse allocation: inode %llu, "
+-			"cpos %u, clusters %u\n",
+-			osb->dev_str,
+-			(unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
+-			OCFS2_I(inode)->ip_clusters);
++	int ret, index;
++	u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys);
++	struct ocfs2_extent_rec split_rec;
++	struct ocfs2_path *left_path = NULL;
++	struct ocfs2_extent_list *el;
+ 
+-	memset(&rec, 0, sizeof(rec));
+-	rec.e_cpos = cpu_to_le32(cpos);
+-	rec.e_blkno = cpu_to_le64(start_blk);
+-	rec.e_leaf_clusters = cpu_to_le16(new_clusters);
++	mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
++	     inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
+ 
+-	status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
+-					  &insert);
+-	if (status < 0) {
+-		mlog_errno(status);
+-		goto bail;
++	if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
++		ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
++			    "that are being written to, but the feature bit "
++			    "is not set in the super block.",
++			    (unsigned long long)OCFS2_I(inode)->ip_blkno);
++		ret = -EROFS;
++		goto out;
+ 	}
+ 
+-	mlog(0, "Insert.appending: %u, Insert.Contig: %u, "
+-	     "Insert.contig_index: %d, Insert.free_records: %d, "
+-	     "Insert.tree_depth: %d\n",
+-	     insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
+-	     insert.ins_free_records, insert.ins_tree_depth);
+-
+ 	/*
+-	 * Avoid growing the tree unless we're out of records and the
+-	 * insert type requres one.
++	 * XXX: This should be fixed up so that we just re-insert the
++	 * next extent records.
+ 	 */
+-	if (insert.ins_contig != CONTIG_NONE || insert.ins_free_records)
+-		goto out_add;
++	ocfs2_extent_map_trunc(inode, 0);
+ 
+-	shift = ocfs2_find_branch_target(osb, inode, fe_bh, &bh);
+-	if (shift < 0) {
+-		status = shift;
+-		mlog_errno(status);
+-		goto bail;
++	left_path = ocfs2_new_inode_path(di_bh);
++	if (!left_path) {
++		ret = -ENOMEM;
++		mlog_errno(ret);
++		goto out;
+ 	}
+ 
+-	/* We traveled all the way to the bottom of the allocation tree
+-	 * and didn't find room for any more extents - we need to add
+-	 * another tree level */
+-	if (shift) {
+-		BUG_ON(bh);
+-		mlog(0, "need to shift tree depth "
+-		     "(current = %d)\n", insert.ins_tree_depth);
+-
+-		/* ocfs2_shift_tree_depth will return us a buffer with
+-		 * the new extent block (so we can pass that to
+-		 * ocfs2_add_branch). */
+-		status = ocfs2_shift_tree_depth(osb, handle, inode, fe_bh,
+-						meta_ac, &bh);
+-		if (status < 0) {
+-			mlog_errno(status);
+-			goto bail;
+-		}
+-		insert.ins_tree_depth++;
+-		/* Special case: we have room now if we shifted from
+-		 * tree_depth 0 */
+-		if (insert.ins_tree_depth == 1)
+-			goto out_add;
++	ret = ocfs2_find_path(inode, left_path, cpos);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
+ 	}
++	el = path_leaf_el(left_path);
+ 
+-	/* call ocfs2_add_branch to add the final part of the tree with
+-	 * the new data. */
+-	mlog(0, "add branch. bh = %p\n", bh);
+-	status = ocfs2_add_branch(osb, handle, inode, fe_bh, bh, last_eb_bh,
+-				  meta_ac);
+-	if (status < 0) {
+-		mlog_errno(status);
+-		goto bail;
++	index = ocfs2_search_extent_list(el, cpos);
++	if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
++		ocfs2_error(inode->i_sb,
++			    "Inode %llu has an extent at cpos %u which can no "
++			    "longer be found.\n",
++			    (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
++		ret = -EROFS;
++		goto out;
+ 	}
+ 
+-out_add:
+-	/* Finally, we can add clusters. This might rotate the tree for us. */
+-	status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert);
+-	if (status < 0)
+-		mlog_errno(status);
+-	else
+-		ocfs2_extent_map_insert_rec(inode, &rec);
+-
+-bail:
+-	if (bh)
+-		brelse(bh);
++	memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
++	split_rec.e_cpos = cpu_to_le32(cpos);
++	split_rec.e_leaf_clusters = cpu_to_le16(len);
++	split_rec.e_blkno = cpu_to_le64(start_blkno);
++	split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
++	split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
+ 
+-	if (last_eb_bh)
+-		brelse(last_eb_bh);
++	ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path,
++					  index, &split_rec, meta_ac, dealloc);
++	if (ret)
++		mlog_errno(ret);
+ 
+-	mlog_exit(status);
+-	return status;
++out:
++	ocfs2_free_path(left_path);
++	return ret;
+ }
+ 
+ static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb)
+@@ -2957,6 +4420,219 @@
+ 	return status;
+ }
+ 
++/*
++ * Delayed de-allocation of suballocator blocks.
++ *
++ * Some sets of block de-allocations might involve multiple suballocator inodes.
++ *
++ * The locking for this can get extremely complicated, especially when
++ * the suballocator inodes to delete from aren't known until deep
++ * within an unrelated codepath.
++ *
++ * ocfs2_extent_block structures are a good example of this - an inode
++ * btree could have been grown by any number of nodes each allocating
++ * out of their own suballoc inode.
++ *
++ * These structures allow the delay of block de-allocation until a
++ * later time, when locking of multiple cluster inodes won't cause
++ * deadlock.
++ */
++
++/*
++ * Describes a single block free from a suballocator
++ */
++struct ocfs2_cached_block_free {
++	struct ocfs2_cached_block_free		*free_next;
++	u64					free_blk;
++	unsigned int				free_bit;
++};
++
++struct ocfs2_per_slot_free_list {
++	struct ocfs2_per_slot_free_list		*f_next_suballocator;
++	int					f_inode_type;
++	int					f_slot;
++	struct ocfs2_cached_block_free		*f_first;
++};
++
++static int ocfs2_free_cached_items(struct ocfs2_super *osb,
++				   int sysfile_type,
++				   int slot,
++				   struct ocfs2_cached_block_free *head)
++{
++	int ret;
++	u64 bg_blkno;
++	handle_t *handle;
++	struct inode *inode;
++	struct buffer_head *di_bh = NULL;
++	struct ocfs2_cached_block_free *tmp;
++
++	inode = ocfs2_get_system_file_inode(osb, sysfile_type, slot);
++	if (!inode) {
++		ret = -EINVAL;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	mutex_lock(&inode->i_mutex);
++
++	ret = ocfs2_meta_lock(inode, &di_bh, 1);
++	if (ret) {
++		mlog_errno(ret);
++		goto out_mutex;
++	}
++
++	handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
++	if (IS_ERR(handle)) {
++		ret = PTR_ERR(handle);
++		mlog_errno(ret);
++		goto out_unlock;
++	}
++
++	while (head) {
++		bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
++						      head->free_bit);
++		mlog(0, "Free bit: (bit %u, blkno %llu)\n",
++		     head->free_bit, (unsigned long long)head->free_blk);
++
++		ret = ocfs2_free_suballoc_bits(handle, inode, di_bh,
++					       head->free_bit, bg_blkno, 1);
++		if (ret) {
++			mlog_errno(ret);
++			goto out_journal;
++		}
++
++		ret = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_FREE);
++		if (ret) {
++			mlog_errno(ret);
++			goto out_journal;
++		}
++
++		tmp = head;
++		head = head->free_next;
++		kfree(tmp);
++	}
++
++out_journal:
++	ocfs2_commit_trans(osb, handle);
++
++out_unlock:
++	ocfs2_meta_unlock(inode, 1);
++	brelse(di_bh);
++out_mutex:
++	mutex_unlock(&inode->i_mutex);
++	iput(inode);
++out:
++	while(head) {
++		/* Premature exit may have left some dangling items. */
++		tmp = head;
++		head = head->free_next;
++		kfree(tmp);
++	}
++
++	return ret;
++}
++
++int ocfs2_run_deallocs(struct ocfs2_super *osb,
++		       struct ocfs2_cached_dealloc_ctxt *ctxt)
++{
++	int ret = 0, ret2;
++	struct ocfs2_per_slot_free_list *fl;
++
++	if (!ctxt)
++		return 0;
++
++	while (ctxt->c_first_suballocator) {
++		fl = ctxt->c_first_suballocator;
++
++		if (fl->f_first) {
++			mlog(0, "Free items: (type %u, slot %d)\n",
++			     fl->f_inode_type, fl->f_slot);
++			ret2 = ocfs2_free_cached_items(osb, fl->f_inode_type,
++						       fl->f_slot, fl->f_first);
++			if (ret2)
++				mlog_errno(ret2);
++			if (!ret)
++				ret = ret2;
++		}
++
++		ctxt->c_first_suballocator = fl->f_next_suballocator;
++		kfree(fl);
++	}
++
++	return ret;
++}
++
++static struct ocfs2_per_slot_free_list *
++ocfs2_find_per_slot_free_list(int type,
++			      int slot,
++			      struct ocfs2_cached_dealloc_ctxt *ctxt)
++{
++	struct ocfs2_per_slot_free_list *fl = ctxt->c_first_suballocator;
++
++	while (fl) {
++		if (fl->f_inode_type == type && fl->f_slot == slot)
++			return fl;
++
++		fl = fl->f_next_suballocator;
++	}
++
++	fl = kmalloc(sizeof(*fl), GFP_NOFS);
++	if (fl) {
++		fl->f_inode_type = type;
++		fl->f_slot = slot;
++		fl->f_first = NULL;
++		fl->f_next_suballocator = ctxt->c_first_suballocator;
++
++		ctxt->c_first_suballocator = fl;
++	}
++	return fl;
++}
++
++static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
++				     int type, int slot, u64 blkno,
++				     unsigned int bit)
++{
++	int ret;
++	struct ocfs2_per_slot_free_list *fl;
++	struct ocfs2_cached_block_free *item;
++
++	fl = ocfs2_find_per_slot_free_list(type, slot, ctxt);
++	if (fl == NULL) {
++		ret = -ENOMEM;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	item = kmalloc(sizeof(*item), GFP_NOFS);
++	if (item == NULL) {
++		ret = -ENOMEM;
++		mlog_errno(ret);
++		goto out;
++	}
++
++	mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
++	     type, slot, bit, (unsigned long long)blkno);
++
++	item->free_blk = blkno;
++	item->free_bit = bit;
++	item->free_next = fl->f_first;
++
++	fl->f_first = item;
++
++	ret = 0;
++out:
++	return ret;
++}
++
++static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
++					 struct ocfs2_extent_block *eb)
++{
++	return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
++					 le16_to_cpu(eb->h_suballoc_slot),
++					 le64_to_cpu(eb->h_blkno),
++					 le16_to_cpu(eb->h_suballoc_bit));
++}
++
+ /* This function will figure out whether the currently last extent
+  * block will be deleted, and if it will, what the new last extent
+  * block will be so we can update his h_next_leaf_blk field, as well
+@@ -3238,27 +4914,10 @@
+ 			BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
+ 			BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
+ 
+-			if (le16_to_cpu(eb->h_suballoc_slot) == 0) {
+-				/*
+-				 * This code only understands how to
+-				 * lock the suballocator in slot 0,
+-				 * which is fine because allocation is
+-				 * only ever done out of that
+-				 * suballocator too. A future version
+-				 * might change that however, so avoid
+-				 * a free if we don't know how to
+-				 * handle it. This way an fs incompat
+-				 * bit will not be necessary.
+-				 */
+-				ret = ocfs2_free_extent_block(handle,
+-							      tc->tc_ext_alloc_inode,
+-							      tc->tc_ext_alloc_bh,
+-							      eb);
+-
++			ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
+ 				/* An error here is not fatal. */
+ 				if (ret < 0)
+ 					mlog_errno(ret);
+-			}
+ 		} else {
+ 			deleted_eb = 0;
+ 		}
+@@ -3631,8 +5290,6 @@
+ 
+ 	mlog_entry_void();
+ 
+-	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+-
+ 	new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
+ 						     i_size_read(inode));
+ 
+@@ -3754,7 +5411,6 @@
+ 	goto start;
+ 
+ bail:
+-	up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ 
+ 	ocfs2_schedule_truncate_log_flush(osb, 1);
+ 
+@@ -3764,6 +5420,8 @@
+ 	if (handle)
+ 		ocfs2_commit_trans(osb, handle);
+ 
++	ocfs2_run_deallocs(osb, &tc->tc_dealloc);
++
+ 	ocfs2_free_path(path);
+ 
+ 	/* This will drop the ext_alloc cluster lock for us */
+@@ -3774,23 +5432,18 @@
+ }
+ 
+ /*
+- * Expects the inode to already be locked. This will figure out which
+- * inodes need to be locked and will put them on the returned truncate
+- * context.
++ * Expects the inode to already be locked.
+  */
+ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
+ 			   struct inode *inode,
+ 			   struct buffer_head *fe_bh,
+ 			   struct ocfs2_truncate_context **tc)
+ {
+-	int status, metadata_delete, i;
++	int status;
+ 	unsigned int new_i_clusters;
+ 	struct ocfs2_dinode *fe;
+ 	struct ocfs2_extent_block *eb;
+-	struct ocfs2_extent_list *el;
+ 	struct buffer_head *last_eb_bh = NULL;
+-	struct inode *ext_alloc_inode = NULL;
+-	struct buffer_head *ext_alloc_bh = NULL;
+ 
+ 	mlog_entry_void();
+ 
+@@ -3810,12 +5463,9 @@
+ 		mlog_errno(status);
+ 		goto bail;
+ 	}
++	ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
+ 
+-	metadata_delete = 0;
+ 	if (fe->id2.i_list.l_tree_depth) {
+-		/* If we have a tree, then the truncate may result in
+-		 * metadata deletes. Figure this out from the
+-		 * rightmost leaf block.*/
+ 		status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk),
+ 					  &last_eb_bh, OCFS2_BH_CACHED, inode);
+ 		if (status < 0) {
+@@ -3830,43 +5480,10 @@
+ 			status = -EIO;
+ 			goto bail;
+ 		}
+-		el = &(eb->h_list);
+-
+-		i = 0;
+-		if (ocfs2_is_empty_extent(&el->l_recs[0]))
+-			i = 1;
+-		/*
+-		 * XXX: Should we check that next_free_rec contains
+-		 * the extent?
+-		 */
+-		if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_i_clusters)
+-			metadata_delete = 1;
+ 	}
+ 
+ 	(*tc)->tc_last_eb_bh = last_eb_bh;
+ 
+-	if (metadata_delete) {
+-		mlog(0, "Will have to delete metadata for this trunc. "
+-		     "locking allocator.\n");
+-		ext_alloc_inode = ocfs2_get_system_file_inode(osb, EXTENT_ALLOC_SYSTEM_INODE, 0);
+-		if (!ext_alloc_inode) {
+-			status = -ENOMEM;
+-			mlog_errno(status);
+-			goto bail;
+-		}
+-
+-		mutex_lock(&ext_alloc_inode->i_mutex);
+-		(*tc)->tc_ext_alloc_inode = ext_alloc_inode;
+-
+-		status = ocfs2_meta_lock(ext_alloc_inode, &ext_alloc_bh, 1);
+-		if (status < 0) {
+-			mlog_errno(status);
+-			goto bail;
+-		}
+-		(*tc)->tc_ext_alloc_bh = ext_alloc_bh;
+-		(*tc)->tc_ext_alloc_locked = 1;
+-	}
+-
+ 	status = 0;
+ bail:
+ 	if (status < 0) {
+@@ -3880,16 +5497,13 @@
+ 
+ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
+ {
+-	if (tc->tc_ext_alloc_inode) {
+-		if (tc->tc_ext_alloc_locked)
+-			ocfs2_meta_unlock(tc->tc_ext_alloc_inode, 1);
+-
+-		mutex_unlock(&tc->tc_ext_alloc_inode->i_mutex);
+-		iput(tc->tc_ext_alloc_inode);
+-	}
+-
+-	if (tc->tc_ext_alloc_bh)
+-		brelse(tc->tc_ext_alloc_bh);
++	/*
++	 * The caller is responsible for completing deallocation
++	 * before freeing the context.
++	 */
++	if (tc->tc_dealloc.c_first_suballocator != NULL)
++		mlog(ML_NOTICE,
++		     "Truncate completion has non-empty dealloc context\n");
+ 
+ 	if (tc->tc_last_eb_bh)
+ 		brelse(tc->tc_last_eb_bh);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/alloc.h linux-2.6.22-try2/fs/ocfs2/alloc.h
+--- linux-2.6.22-570/fs/ocfs2/alloc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/alloc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -34,7 +34,13 @@
+ 			u32 cpos,
+ 			u64 start_blk,
+ 			u32 new_clusters,
++			u8 flags,
+ 			struct ocfs2_alloc_context *meta_ac);
++struct ocfs2_cached_dealloc_ctxt;
++int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
++			      handle_t *handle, u32 cpos, u32 len, u32 phys,
++			      struct ocfs2_alloc_context *meta_ac,
++			      struct ocfs2_cached_dealloc_ctxt *dealloc);
+ int ocfs2_num_free_extents(struct ocfs2_super *osb,
+ 			   struct inode *inode,
+ 			   struct ocfs2_dinode *fe);
+@@ -63,9 +69,27 @@
+ int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb,
+ 					 struct ocfs2_dinode *tl_copy);
+ 
++/*
++ * Process local structure which describes the block unlinks done
++ * during an operation. This is populated via
++ * ocfs2_cache_block_dealloc().
++ *
++ * ocfs2_run_deallocs() should be called after the potentially
++ * de-allocating routines. No journal handles should be open, and most
++ * locks should have been dropped.
++ */
++struct ocfs2_cached_dealloc_ctxt {
++	struct ocfs2_per_slot_free_list		*c_first_suballocator;
++};
++static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
++{
++	c->c_first_suballocator = NULL;
++}
++int ocfs2_run_deallocs(struct ocfs2_super *osb,
++		       struct ocfs2_cached_dealloc_ctxt *ctxt);
++
+ struct ocfs2_truncate_context {
+-	struct inode *tc_ext_alloc_inode;
+-	struct buffer_head *tc_ext_alloc_bh;
++	struct ocfs2_cached_dealloc_ctxt tc_dealloc;
+ 	int tc_ext_alloc_locked; /* is it cluster locked? */
+ 	/* these get destroyed once it's passed to ocfs2_commit_truncate. */
+ 	struct buffer_head *tc_last_eb_bh;
+@@ -84,6 +108,7 @@
+ 
+ int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
+ 		    u32 cpos, struct buffer_head **leaf_bh);
++int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
+ 
+ /*
+  * Helper function to look at the # of clusters in an extent record.
+diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.c linux-2.6.22-try2/fs/ocfs2/aops.c
+--- linux-2.6.22-570/fs/ocfs2/aops.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/aops.c	2007-12-19 15:29:24.000000000 -0500
+@@ -232,7 +232,7 @@
+ 	 * might now be discovering a truncate that hit on another node.
+ 	 * block_read_full_page->get_block freaks out if it is asked to read
+ 	 * beyond the end of a file, so we check here.  Callers
+-	 * (generic_file_read, fault->nopage) are clever enough to check i_size
++	 * (generic_file_read, vm_ops->fault) are clever enough to check i_size
+ 	 * and notice that the page they just read isn't needed.
+ 	 *
+ 	 * XXX sys_readahead() seems to get that wrong?
+@@ -705,6 +705,8 @@
+ 	     bh = bh->b_this_page, block_start += bsize) {
+ 		block_end = block_start + bsize;
+ 
++		clear_buffer_new(bh);
++
+ 		/*
+ 		 * Ignore blocks outside of our i/o range -
+ 		 * they may belong to unallocated clusters.
+@@ -719,9 +721,8 @@
+ 		 * For an allocating write with cluster size >= page
+ 		 * size, we always write the entire page.
+ 		 */
+-
+-		if (buffer_new(bh))
+-			clear_buffer_new(bh);
++		if (new)
++			set_buffer_new(bh);
+ 
+ 		if (!buffer_mapped(bh)) {
+ 			map_bh(bh, inode->i_sb, *p_blkno);
+@@ -760,18 +761,13 @@
+ 	bh = head;
+ 	block_start = 0;
+ 	do {
+-		void *kaddr;
+-
+ 		block_end = block_start + bsize;
+ 		if (block_end <= from)
+ 			goto next_bh;
+ 		if (block_start >= to)
+ 			break;
+ 
+-		kaddr = kmap_atomic(page, KM_USER0);
+-		memset(kaddr+block_start, 0, bh->b_size);
+-		flush_dcache_page(page);
+-		kunmap_atomic(kaddr, KM_USER0);
++		zero_user_page(page, block_start, bh->b_size, KM_USER0);
+ 		set_buffer_uptodate(bh);
+ 		mark_buffer_dirty(bh);
+ 
+@@ -783,217 +779,240 @@
+ 	return ret;
+ }
+ 
++#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
++#define OCFS2_MAX_CTXT_PAGES	1
++#else
++#define OCFS2_MAX_CTXT_PAGES	(OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
++#endif
++
++#define OCFS2_MAX_CLUSTERS_PER_PAGE	(PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
++
+ /*
+- * This will copy user data from the buffer page in the splice
+- * context.
+- *
+- * For now, we ignore SPLICE_F_MOVE as that would require some extra
+- * communication out all the way to ocfs2_write().
++ * Describe the state of a single cluster to be written to.
+  */
+-int ocfs2_map_and_write_splice_data(struct inode *inode,
+-				  struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+-				  unsigned int *ret_from, unsigned int *ret_to)
++struct ocfs2_write_cluster_desc {
++	u32		c_cpos;
++	u32		c_phys;
++	/*
++	 * Give this a unique field because c_phys eventually gets
++	 * filled.
++	 */
++	unsigned	c_new;
++	unsigned	c_unwritten;
++};
++
++static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
+ {
+-	int ret;
+-	unsigned int to, from, cluster_start, cluster_end;
+-	char *src, *dst;
+-	struct ocfs2_splice_write_priv *sp = wc->w_private;
+-	struct pipe_buffer *buf = sp->s_buf;
+-	unsigned long bytes, src_from;
+-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	return d->c_new || d->c_unwritten;
++}
+ 
+-	ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+-					&cluster_end);
++struct ocfs2_write_ctxt {
++	/* Logical cluster position / len of write */
++	u32				w_cpos;
++	u32				w_clen;
+ 
+-	from = sp->s_offset;
+-	src_from = sp->s_buf_offset;
+-	bytes = wc->w_count;
++	struct ocfs2_write_cluster_desc	w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
+ 
+-	if (wc->w_large_pages) {
+ 		/*
+-		 * For cluster size < page size, we have to
+-		 * calculate pos within the cluster and obey
+-		 * the rightmost boundary.
+-		 */
+-		bytes = min(bytes, (unsigned long)(osb->s_clustersize
+-				   - (wc->w_pos & (osb->s_clustersize - 1))));
+-	}
+-	to = from + bytes;
+-
+-	BUG_ON(from > PAGE_CACHE_SIZE);
+-	BUG_ON(to > PAGE_CACHE_SIZE);
+-	BUG_ON(from < cluster_start);
+-	BUG_ON(to > cluster_end);
+-
+-	if (wc->w_this_page_new)
+-		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+-					    cluster_start, cluster_end, 1);
+-	else
+-		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+-					    from, to, 0);
+-	if (ret) {
+-		mlog_errno(ret);
+-		goto out;
++	 * This is true if page_size > cluster_size.
++	 *
++	 * It triggers a set of special cases during write which might
++	 * have to deal with allocating writes to partial pages.
++	 */
++	unsigned int			w_large_pages;
++
++	/*
++	 * Pages involved in this write.
++	 *
++	 * w_target_page is the page being written to by the user.
++	 *
++	 * w_pages is an array of pages which always contains
++	 * w_target_page, and in the case of an allocating write with
++	 * page_size < cluster size, it will contain zero'd and mapped
++	 * pages adjacent to w_target_page which need to be written
++	 * out in so that future reads from that region will get
++	 * zero's.
++	 */
++	struct page			*w_pages[OCFS2_MAX_CTXT_PAGES];
++	unsigned int			w_num_pages;
++	struct page			*w_target_page;
++
++	/*
++	 * ocfs2_write_end() uses this to know what the real range to
++	 * write in the target should be.
++	 */
++	unsigned int			w_target_from;
++	unsigned int			w_target_to;
++
++	/*
++	 * We could use journal_current_handle() but this is cleaner,
++	 * IMHO -Mark
++	 */
++	handle_t			*w_handle;
++
++	struct buffer_head		*w_di_bh;
++
++	struct ocfs2_cached_dealloc_ctxt w_dealloc;
++};
++
++static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
++{
++	int i;
++
++	for(i = 0; i < wc->w_num_pages; i++) {
++		if (wc->w_pages[i] == NULL)
++			continue;
++
++		unlock_page(wc->w_pages[i]);
++		mark_page_accessed(wc->w_pages[i]);
++		page_cache_release(wc->w_pages[i]);
+ 	}
+ 
+-	src = buf->ops->map(sp->s_pipe, buf, 1);
+-	dst = kmap_atomic(wc->w_this_page, KM_USER1);
+-	memcpy(dst + from, src + src_from, bytes);
+-	kunmap_atomic(wc->w_this_page, KM_USER1);
+-	buf->ops->unmap(sp->s_pipe, buf, src);
++	brelse(wc->w_di_bh);
++	kfree(wc);
++}
++
++static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
++				  struct ocfs2_super *osb, loff_t pos,
++				  unsigned len, struct buffer_head *di_bh)
++{
++	struct ocfs2_write_ctxt *wc;
++
++	wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS);
++	if (!wc)
++		return -ENOMEM;
++
++	wc->w_cpos = pos >> osb->s_clustersize_bits;
++	wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len);
++	get_bh(di_bh);
++	wc->w_di_bh = di_bh;
+ 
+-	wc->w_finished_copy = 1;
++	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
++		wc->w_large_pages = 1;
++	else
++		wc->w_large_pages = 0;
+ 
+-	*ret_from = from;
+-	*ret_to = to;
+-out:
++	ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
+ 
+-	return bytes ? (unsigned int)bytes : ret;
++	*wcp = wc;
++
++	return 0;
+ }
+ 
+ /*
+- * This will copy user data from the iovec in the buffered write
+- * context.
++ * If a page has any new buffers, zero them out here, and mark them uptodate
++ * and dirty so they'll be written out (in order to prevent uninitialised
++ * block data from leaking). And clear the new bit.
+  */
+-int ocfs2_map_and_write_user_data(struct inode *inode,
+-				  struct ocfs2_write_ctxt *wc, u64 *p_blkno,
+-				  unsigned int *ret_from, unsigned int *ret_to)
++static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to)
+ {
+-	int ret;
+-	unsigned int to, from, cluster_start, cluster_end;
+-	unsigned long bytes, src_from;
+-	char *dst;
+-	struct ocfs2_buffered_write_priv *bp = wc->w_private;
+-	const struct iovec *cur_iov = bp->b_cur_iov;
+-	char __user *buf;
+-	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+-
+-	ocfs2_figure_cluster_boundaries(osb, wc->w_cpos, &cluster_start,
+-					&cluster_end);
++	unsigned int block_start, block_end;
++	struct buffer_head *head, *bh;
+ 
+-	buf = cur_iov->iov_base + bp->b_cur_off;
+-	src_from = (unsigned long)buf & ~PAGE_CACHE_MASK;
++	BUG_ON(!PageLocked(page));
++	if (!page_has_buffers(page))
++		return;
+ 
+-	from = wc->w_pos & (PAGE_CACHE_SIZE - 1);
++	bh = head = page_buffers(page);
++	block_start = 0;
++	do {
++		block_end = block_start + bh->b_size;
+ 
+-	/*
+-	 * This is a lot of comparisons, but it reads quite
+-	 * easily, which is important here.
+-	 */
+-	/* Stay within the src page */
+-	bytes = PAGE_SIZE - src_from;
+-	/* Stay within the vector */
+-	bytes = min(bytes,
+-		    (unsigned long)(cur_iov->iov_len - bp->b_cur_off));
+-	/* Stay within count */
+-	bytes = min(bytes, (unsigned long)wc->w_count);
+-	/*
+-	 * For clustersize > page size, just stay within
+-	 * target page, otherwise we have to calculate pos
+-	 * within the cluster and obey the rightmost
+-	 * boundary.
+-	 */
+-	if (wc->w_large_pages) {
+-		/*
+-		 * For cluster size < page size, we have to
+-		 * calculate pos within the cluster and obey
+-		 * the rightmost boundary.
+-		 */
+-		bytes = min(bytes, (unsigned long)(osb->s_clustersize
+-				   - (wc->w_pos & (osb->s_clustersize - 1))));
+-	} else {
+-		/*
+-		 * cluster size > page size is the most common
+-		 * case - we just stay within the target page
+-		 * boundary.
+-		 */
+-		bytes = min(bytes, PAGE_CACHE_SIZE - from);
+-	}
++		if (buffer_new(bh)) {
++			if (block_end > from && block_start < to) {
++				if (!PageUptodate(page)) {
++					unsigned start, end;
+ 
+-	to = from + bytes;
++					start = max(from, block_start);
++					end = min(to, block_end);
+ 
+-	BUG_ON(from > PAGE_CACHE_SIZE);
+-	BUG_ON(to > PAGE_CACHE_SIZE);
+-	BUG_ON(from < cluster_start);
+-	BUG_ON(to > cluster_end);
++					zero_user_page(page, start, end - start, KM_USER0);
++					set_buffer_uptodate(bh);
++				}
+ 
+-	if (wc->w_this_page_new)
+-		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+-					    cluster_start, cluster_end, 1);
+-	else
+-		ret = ocfs2_map_page_blocks(wc->w_this_page, p_blkno, inode,
+-					    from, to, 0);
+-	if (ret) {
+-		mlog_errno(ret);
+-		goto out;
++				clear_buffer_new(bh);
++				mark_buffer_dirty(bh);
++			}
+ 	}
+ 
+-	dst = kmap(wc->w_this_page);
+-	memcpy(dst + from, bp->b_src_buf + src_from, bytes);
+-	kunmap(wc->w_this_page);
++		block_start = block_end;
++		bh = bh->b_this_page;
++	} while (bh != head);
++}
+ 
+-	/*
+-	 * XXX: This is slow, but simple. The caller of
+-	 * ocfs2_buffered_write_cluster() is responsible for
+-	 * passing through the iovecs, so it's difficult to
+-	 * predict what our next step is in here after our
+-	 * initial write. A future version should be pushing
+-	 * that iovec manipulation further down.
+-	 *
+-	 * By setting this, we indicate that a copy from user
+-	 * data was done, and subsequent calls for this
+-	 * cluster will skip copying more data.
++/*
++ * Only called when we have a failure during allocating write to write
++ * zero's to the newly allocated region.
+ 	 */
+-	wc->w_finished_copy = 1;
++static void ocfs2_write_failure(struct inode *inode,
++				struct ocfs2_write_ctxt *wc,
++				loff_t user_pos, unsigned user_len)
++{
++	int i;
++	unsigned from, to;
++	struct page *tmppage;
+ 
+-	*ret_from = from;
+-	*ret_to = to;
+-out:
++	ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len);
++
++	if (wc->w_large_pages) {
++		from = wc->w_target_from;
++		to = wc->w_target_to;
++	} else {
++		from = 0;
++		to = PAGE_CACHE_SIZE;
++	}
+ 
+-	return bytes ? (unsigned int)bytes : ret;
++	for(i = 0; i < wc->w_num_pages; i++) {
++		tmppage = wc->w_pages[i];
++
++		if (ocfs2_should_order_data(inode))
++			walk_page_buffers(wc->w_handle, page_buffers(tmppage),
++					  from, to, NULL,
++					  ocfs2_journal_dirty_data);
++
++		block_commit_write(tmppage, from, to);
++	}
+ }
+ 
+-/*
+- * Map, fill and write a page to disk.
+- *
+- * The work of copying data is done via callback.  Newly allocated
+- * pages which don't take user data will be zero'd (set 'new' to
+- * indicate an allocating write)
+- *
+- * Returns a negative error code or the number of bytes copied into
+- * the page.
+- */
+-static int ocfs2_write_data_page(struct inode *inode, handle_t *handle,
+-				 u64 *p_blkno, struct page *page,
+-				 struct ocfs2_write_ctxt *wc, int new)
++static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
++					struct ocfs2_write_ctxt *wc,
++					struct page *page, u32 cpos,
++					loff_t user_pos, unsigned user_len,
++					int new)
+ {
+-	int ret, copied = 0;
+-	unsigned int from = 0, to = 0;
++	int ret;
++	unsigned int map_from = 0, map_to = 0;
+ 	unsigned int cluster_start, cluster_end;
+-	unsigned int zero_from = 0, zero_to = 0;
++	unsigned int user_data_from = 0, user_data_to = 0;
+ 
+-	ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), wc->w_cpos,
++	ocfs2_figure_cluster_boundaries(OCFS2_SB(inode->i_sb), cpos,
+ 					&cluster_start, &cluster_end);
+ 
+-	if ((wc->w_pos >> PAGE_CACHE_SHIFT) == page->index
+-	    && !wc->w_finished_copy) {
++	if (page == wc->w_target_page) {
++		map_from = user_pos & (PAGE_CACHE_SIZE - 1);
++		map_to = map_from + user_len;
+ 
+-		wc->w_this_page = page;
+-		wc->w_this_page_new = new;
+-		ret = wc->w_write_data_page(inode, wc, p_blkno, &from, &to);
+-		if (ret < 0) {
++		if (new)
++			ret = ocfs2_map_page_blocks(page, p_blkno, inode,
++						    cluster_start, cluster_end,
++						    new);
++		else
++			ret = ocfs2_map_page_blocks(page, p_blkno, inode,
++						    map_from, map_to, new);
++		if (ret) {
+ 			mlog_errno(ret);
+ 			goto out;
+ 		}
+ 
+-		copied = ret;
+-
+-		zero_from = from;
+-		zero_to = to;
++		user_data_from = map_from;
++		user_data_to = map_to;
+ 		if (new) {
+-			from = cluster_start;
+-			to = cluster_end;
++			map_from = cluster_start;
++			map_to = cluster_end;
+ 		}
++
++		wc->w_target_from = map_from;
++		wc->w_target_to = map_to;
+ 	} else {
+ 		/*
+ 		 * If we haven't allocated the new page yet, we
+@@ -1002,11 +1021,11 @@
+ 		 */
+ 		BUG_ON(!new);
+ 
+-		from = cluster_start;
+-		to = cluster_end;
++		map_from = cluster_start;
++		map_to = cluster_end;
+ 
+ 		ret = ocfs2_map_page_blocks(page, p_blkno, inode,
+-					    cluster_start, cluster_end, 1);
++					    cluster_start, cluster_end, new);
+ 		if (ret) {
+ 			mlog_errno(ret);
+ 			goto out;
+@@ -1025,108 +1044,113 @@
+ 	 */
+ 	if (new && !PageUptodate(page))
+ 		ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb),
+-					 wc->w_cpos, zero_from, zero_to);
++					 cpos, user_data_from, user_data_to);
+ 
+ 	flush_dcache_page(page);
+ 
+-	if (ocfs2_should_order_data(inode)) {
+-		ret = walk_page_buffers(handle,
+-					page_buffers(page),
+-					from, to, NULL,
+-					ocfs2_journal_dirty_data);
+-		if (ret < 0)
+-			mlog_errno(ret);
+-	}
+-
+-	/*
+-	 * We don't use generic_commit_write() because we need to
+-	 * handle our own i_size update.
+-	 */
+-	ret = block_commit_write(page, from, to);
+-	if (ret)
+-		mlog_errno(ret);
+ out:
+-
+-	return copied ? copied : ret;
++	return ret;
+ }
+ 
+ /*
+- * Do the actual write of some data into an inode. Optionally allocate
+- * in order to fulfill the write.
+- *
+- * cpos is the logical cluster offset within the file to write at
+- *
+- * 'phys' is the physical mapping of that offset. a 'phys' value of
+- * zero indicates that allocation is required. In this case, data_ac
+- * and meta_ac should be valid (meta_ac can be null if metadata
+- * allocation isn't required).
++ * This function will only grab one clusters worth of pages.
+  */
+-static ssize_t ocfs2_write(struct file *file, u32 phys, handle_t *handle,
+-			   struct buffer_head *di_bh,
+-			   struct ocfs2_alloc_context *data_ac,
+-			   struct ocfs2_alloc_context *meta_ac,
+-			   struct ocfs2_write_ctxt *wc)
++static int ocfs2_grab_pages_for_write(struct address_space *mapping,
++				      struct ocfs2_write_ctxt *wc,
++				      u32 cpos, loff_t user_pos, int new,
++				      struct page *mmap_page)
+ {
+-	int ret, i, numpages = 1, new;
+-	unsigned int copied = 0;
+-	u32 tmp_pos;
+-	u64 v_blkno, p_blkno;
+-	struct address_space *mapping = file->f_mapping;
++	int ret = 0, i;
++	unsigned long start, target_index, index;
+ 	struct inode *inode = mapping->host;
+-	unsigned long index, start;
+-	struct page **cpages;
+ 
+-	new = phys == 0 ? 1 : 0;
++	target_index = user_pos >> PAGE_CACHE_SHIFT;
+ 
+ 	/*
+ 	 * Figure out how many pages we'll be manipulating here. For
+ 	 * non allocating write, we just change the one
+ 	 * page. Otherwise, we'll need a whole clusters worth.
+ 	 */
+-	if (new)
+-		numpages = ocfs2_pages_per_cluster(inode->i_sb);
+-
+-	cpages = kzalloc(sizeof(*cpages) * numpages, GFP_NOFS);
+-	if (!cpages) {
+-		ret = -ENOMEM;
+-		mlog_errno(ret);
+-		return ret;
+-	}
+-
+-	/*
+-	 * Fill our page array first. That way we've grabbed enough so
+-	 * that we can zero and flush if we error after adding the
+-	 * extent.
+-	 */
+ 	if (new) {
+-		start = ocfs2_align_clusters_to_page_index(inode->i_sb,
+-							   wc->w_cpos);
+-		v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, wc->w_cpos);
++		wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb);
++		start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos);
+ 	} else {
+-		start = wc->w_pos >> PAGE_CACHE_SHIFT;
+-		v_blkno = wc->w_pos >> inode->i_sb->s_blocksize_bits;
++		wc->w_num_pages = 1;
++		start = target_index;
+ 	}
+ 
+-	for(i = 0; i < numpages; i++) {
++	for(i = 0; i < wc->w_num_pages; i++) {
+ 		index = start + i;
+ 
+-		cpages[i] = find_or_create_page(mapping, index, GFP_NOFS);
+-		if (!cpages[i]) {
++		if (index == target_index && mmap_page) {
++			/*
++			 * ocfs2_pagemkwrite() is a little different
++			 * and wants us to directly use the page
++			 * passed in.
++			 */
++			lock_page(mmap_page);
++
++			if (mmap_page->mapping != mapping) {
++				unlock_page(mmap_page);
++				/*
++				 * Sanity check - the locking in
++				 * ocfs2_pagemkwrite() should ensure
++				 * that this code doesn't trigger.
++				 */
++				ret = -EINVAL;
++				mlog_errno(ret);
++				goto out;
++			}
++
++			page_cache_get(mmap_page);
++			wc->w_pages[i] = mmap_page;
++		} else {
++			wc->w_pages[i] = find_or_create_page(mapping, index,
++							     GFP_NOFS);
++			if (!wc->w_pages[i]) {
+ 			ret = -ENOMEM;
+ 			mlog_errno(ret);
+ 			goto out;
+ 		}
+ 	}
+ 
++		if (index == target_index)
++			wc->w_target_page = wc->w_pages[i];
++	}
++out:
++	return ret;
++}
++
++/*
++ * Prepare a single cluster for write one cluster into the file.
++ */
++static int ocfs2_write_cluster(struct address_space *mapping,
++			       u32 phys, unsigned int unwritten,
++			       struct ocfs2_alloc_context *data_ac,
++			       struct ocfs2_alloc_context *meta_ac,
++			       struct ocfs2_write_ctxt *wc, u32 cpos,
++			       loff_t user_pos, unsigned user_len)
++{
++	int ret, i, new, should_zero = 0;
++	u64 v_blkno, p_blkno;
++	struct inode *inode = mapping->host;
++
++	new = phys == 0 ? 1 : 0;
++	if (new || unwritten)
++		should_zero = 1;
++
+ 	if (new) {
++		u32 tmp_pos;
++
+ 		/*
+ 		 * This is safe to call with the page locks - it won't take
+ 		 * any additional semaphores or cluster locks.
+ 		 */
+-		tmp_pos = wc->w_cpos;
++		tmp_pos = cpos;
+ 		ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode,
+-						 &tmp_pos, 1, di_bh, handle,
+-						 data_ac, meta_ac, NULL);
++						 &tmp_pos, 1, 0, wc->w_di_bh,
++						 wc->w_handle, data_ac,
++						 meta_ac, NULL);
+ 		/*
+ 		 * This shouldn't happen because we must have already
+ 		 * calculated the correct meta data allocation required. The
+@@ -1143,159 +1167,433 @@
+ 			mlog_errno(ret);
+ 			goto out;
+ 		}
++	} else if (unwritten) {
++		ret = ocfs2_mark_extent_written(inode, wc->w_di_bh,
++						wc->w_handle, cpos, 1, phys,
++						meta_ac, &wc->w_dealloc);
++		if (ret < 0) {
++			mlog_errno(ret);
++			goto out;
++		}
+ 	}
+ 
++	if (should_zero)
++		v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
++	else
++		v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
++
++	/*
++	 * The only reason this should fail is due to an inability to
++	 * find the extent added.
++	 */
+ 	ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
+ 					  NULL);
+ 	if (ret < 0) {
++		ocfs2_error(inode->i_sb, "Corrupting extend for inode %llu, "
++			    "at logical block %llu",
++			    (unsigned long long)OCFS2_I(inode)->ip_blkno,
++			    (unsigned long long)v_blkno);
++		goto out;
++	}
++
++	BUG_ON(p_blkno == 0);
++
++	for(i = 0; i < wc->w_num_pages; i++) {
++		int tmpret;
++
++		tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
++						      wc->w_pages[i], cpos,
++						      user_pos, user_len,
++						      should_zero);
++		if (tmpret) {
++			mlog_errno(tmpret);
++			if (ret == 0)
++				tmpret = ret;
++		}
++	}
+ 
+ 		/*
+-		 * XXX: Should we go readonly here?
++	 * We only have cleanup to do in case of allocating write.
+ 		 */
++	if (ret && new)
++		ocfs2_write_failure(inode, wc, user_pos, user_len);
+ 
+-		mlog_errno(ret);
+-		goto out;
+-	}
++out:
+ 
+-	BUG_ON(p_blkno == 0);
++	return ret;
++}
+ 
+-	for(i = 0; i < numpages; i++) {
+-		ret = ocfs2_write_data_page(inode, handle, &p_blkno, cpages[i],
+-					    wc, new);
+-		if (ret < 0) {
++static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
++				       struct ocfs2_alloc_context *data_ac,
++				       struct ocfs2_alloc_context *meta_ac,
++				       struct ocfs2_write_ctxt *wc,
++				       loff_t pos, unsigned len)
++{
++	int ret, i;
++	struct ocfs2_write_cluster_desc *desc;
++
++	for (i = 0; i < wc->w_clen; i++) {
++		desc = &wc->w_desc[i];
++
++		ret = ocfs2_write_cluster(mapping, desc->c_phys,
++					  desc->c_unwritten, data_ac, meta_ac,
++					  wc, desc->c_cpos, pos, len);
++		if (ret) {
+ 			mlog_errno(ret);
+ 			goto out;
+ 		}
+-
+-		copied += ret;
+ 	}
+ 
++	ret = 0;
+ out:
+-	for(i = 0; i < numpages; i++) {
+-		unlock_page(cpages[i]);
+-		mark_page_accessed(cpages[i]);
+-		page_cache_release(cpages[i]);
+-	}
+-	kfree(cpages);
+-
+-	return copied ? copied : ret;
++	return ret;
+ }
+ 
+-static void ocfs2_write_ctxt_init(struct ocfs2_write_ctxt *wc,
+-				  struct ocfs2_super *osb, loff_t pos,
+-				  size_t count, ocfs2_page_writer *cb,
+-				  void *cb_priv)
++/*
++ * ocfs2_write_end() wants to know which parts of the target page it
++ * should complete the write on. It's easiest to compute them ahead of
++ * time when a more complete view of the write is available.
++ */
++static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
++					struct ocfs2_write_ctxt *wc,
++					loff_t pos, unsigned len, int alloc)
+ {
+-	wc->w_count = count;
+-	wc->w_pos = pos;
+-	wc->w_cpos = wc->w_pos >> osb->s_clustersize_bits;
+-	wc->w_finished_copy = 0;
++	struct ocfs2_write_cluster_desc *desc;
+ 
+-	if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
+-		wc->w_large_pages = 1;
+-	else
+-		wc->w_large_pages = 0;
++	wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
++	wc->w_target_to = wc->w_target_from + len;
++
++	if (alloc == 0)
++		return;
++
++	/*
++	 * Allocating write - we may have different boundaries based
++	 * on page size and cluster size.
++	 *
++	 * NOTE: We can no longer compute one value from the other as
++	 * the actual write length and user provided length may be
++	 * different.
++	 */
+ 
+-	wc->w_write_data_page = cb;
+-	wc->w_private = cb_priv;
++	if (wc->w_large_pages) {
++		/*
++		 * We only care about the 1st and last cluster within
++		 * our range and whether they should be zero'd or not. Either
++		 * value may be extended out to the start/end of a
++		 * newly allocated cluster.
++		 */
++		desc = &wc->w_desc[0];
++		if (ocfs2_should_zero_cluster(desc))
++			ocfs2_figure_cluster_boundaries(osb,
++							desc->c_cpos,
++							&wc->w_target_from,
++							NULL);
++
++		desc = &wc->w_desc[wc->w_clen - 1];
++		if (ocfs2_should_zero_cluster(desc))
++			ocfs2_figure_cluster_boundaries(osb,
++							desc->c_cpos,
++							NULL,
++							&wc->w_target_to);
++	} else {
++		wc->w_target_from = 0;
++		wc->w_target_to = PAGE_CACHE_SIZE;
++	}
+ }
+ 
+ /*
+- * Write a cluster to an inode. The cluster may not be allocated yet,
+- * in which case it will be. This only exists for buffered writes -
+- * O_DIRECT takes a more "traditional" path through the kernel.
++ * Populate each single-cluster write descriptor in the write context
++ * with information about the i/o to be done.
+  *
+- * The caller is responsible for incrementing pos, written counts, etc
+- *
+- * For file systems that don't support sparse files, pre-allocation
+- * and page zeroing up until cpos should be done prior to this
+- * function call.
+- *
+- * Callers should be holding i_sem, and the rw cluster lock.
++ * Returns the number of clusters that will have to be allocated, as
++ * well as a worst case estimate of the number of extent records that
++ * would have to be created during a write to an unwritten region.
++ */
++static int ocfs2_populate_write_desc(struct inode *inode,
++				     struct ocfs2_write_ctxt *wc,
++				     unsigned int *clusters_to_alloc,
++				     unsigned int *extents_to_split)
++{
++	int ret;
++	struct ocfs2_write_cluster_desc *desc;
++	unsigned int num_clusters = 0;
++	unsigned int ext_flags = 0;
++	u32 phys = 0;
++	int i;
++
++	*clusters_to_alloc = 0;
++	*extents_to_split = 0;
++
++	for (i = 0; i < wc->w_clen; i++) {
++		desc = &wc->w_desc[i];
++		desc->c_cpos = wc->w_cpos + i;
++
++		if (num_clusters == 0) {
++			/*
++			 * Need to look up the next extent record.
++			 */
++			ret = ocfs2_get_clusters(inode, desc->c_cpos, &phys,
++						 &num_clusters, &ext_flags);
++			if (ret) {
++				mlog_errno(ret);
++				goto out;
++			}
++
++			/*
++			 * Assume worst case - that we're writing in
++			 * the middle of the extent.
+  *
+- * Returns the number of user bytes written, or less than zero for
+- * error.
++			 * We can assume that the write proceeds from
++			 * left to right, in which case the extent
++			 * insert code is smart enough to coalesce the
++			 * next splits into the previous records created.
+  */
+-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
+-				     size_t count, ocfs2_page_writer *actor,
+-				     void *priv)
++			if (ext_flags & OCFS2_EXT_UNWRITTEN)
++				*extents_to_split = *extents_to_split + 2;
++		} else if (phys) {
++			/*
++			 * Only increment phys if it doesn't describe
++			 * a hole.
++			 */
++			phys++;
++		}
++
++		desc->c_phys = phys;
++		if (phys == 0) {
++			desc->c_new = 1;
++			*clusters_to_alloc = *clusters_to_alloc + 1;
++		}
++		if (ext_flags & OCFS2_EXT_UNWRITTEN)
++			desc->c_unwritten = 1;
++
++		num_clusters--;
++	}
++
++	ret = 0;
++out:
++	return ret;
++}
++
++int ocfs2_write_begin_nolock(struct address_space *mapping,
++			     loff_t pos, unsigned len, unsigned flags,
++			     struct page **pagep, void **fsdata,
++			     struct buffer_head *di_bh, struct page *mmap_page)
+ {
+ 	int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+-	ssize_t written = 0;
+-	u32 phys;
+-	struct inode *inode = file->f_mapping->host;
++	unsigned int clusters_to_alloc, extents_to_split;
++	struct ocfs2_write_ctxt *wc;
++	struct inode *inode = mapping->host;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+-	struct buffer_head *di_bh = NULL;
+ 	struct ocfs2_dinode *di;
+ 	struct ocfs2_alloc_context *data_ac = NULL;
+ 	struct ocfs2_alloc_context *meta_ac = NULL;
+ 	handle_t *handle;
+-	struct ocfs2_write_ctxt wc;
+ 
+-	ocfs2_write_ctxt_init(&wc, osb, pos, count, actor, priv);
++	ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
++	if (ret) {
++		mlog_errno(ret);
++		return ret;
++	}
+ 
+-	ret = ocfs2_meta_lock(inode, &di_bh, 1);
++	ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
++					&extents_to_split);
+ 	if (ret) {
+ 		mlog_errno(ret);
+ 		goto out;
+ 	}
+-	di = (struct ocfs2_dinode *)di_bh->b_data;
++
++	di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
+ 
+ 	/*
+-	 * Take alloc sem here to prevent concurrent lookups. That way
+-	 * the mapping, zeroing and tree manipulation within
+-	 * ocfs2_write() will be safe against ->readpage(). This
+-	 * should also serve to lock out allocation from a shared
+-	 * writeable region.
++	 * We set w_target_from, w_target_to here so that
++	 * ocfs2_write_end() knows which range in the target page to
++	 * write out. An allocation requires that we write the entire
++	 * cluster range.
+ 	 */
+-	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+-
+-	ret = ocfs2_get_clusters(inode, wc.w_cpos, &phys, NULL, NULL);
++	if (clusters_to_alloc || extents_to_split) {
++		/*
++		 * XXX: We are stretching the limits of
++		 * ocfs2_lock_allocators(). It greatly over-estimates
++		 * the work to be done.
++		 */
++		ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc,
++					    extents_to_split, &data_ac, &meta_ac);
+ 	if (ret) {
+ 		mlog_errno(ret);
+-		goto out_meta;
++			goto out;
+ 	}
+ 
+-	/* phys == 0 means that allocation is required. */
+-	if (phys == 0) {
+-		ret = ocfs2_lock_allocators(inode, di, 1, &data_ac, &meta_ac);
++		credits = ocfs2_calc_extend_credits(inode->i_sb, di,
++						    clusters_to_alloc);
++
++	}
++
++	ocfs2_set_target_boundaries(osb, wc, pos, len,
++				    clusters_to_alloc + extents_to_split);
++
++	handle = ocfs2_start_trans(osb, credits);
++	if (IS_ERR(handle)) {
++		ret = PTR_ERR(handle);
++		mlog_errno(ret);
++		goto out;
++	}
++
++	wc->w_handle = handle;
++
++	/*
++	 * We don't want this to fail in ocfs2_write_end(), so do it
++	 * here.
++	 */
++	ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
++				   OCFS2_JOURNAL_ACCESS_WRITE);
+ 		if (ret) {
+ 			mlog_errno(ret);
+-			goto out_meta;
++		goto out_commit;
+ 		}
+ 
+-		credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1);
++	/*
++	 * Fill our page array first. That way we've grabbed enough so
++	 * that we can zero and flush if we error after adding the
++	 * extent.
++	 */
++	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
++					 clusters_to_alloc + extents_to_split,
++					 mmap_page);
++	if (ret) {
++		mlog_errno(ret);
++		goto out_commit;
+ 	}
+ 
+-	ret = ocfs2_data_lock(inode, 1);
++	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
++					  len);
+ 	if (ret) {
+ 		mlog_errno(ret);
+-		goto out_meta;
++		goto out_commit;
+ 	}
+ 
+-	handle = ocfs2_start_trans(osb, credits);
+-	if (IS_ERR(handle)) {
+-		ret = PTR_ERR(handle);
++	if (data_ac)
++		ocfs2_free_alloc_context(data_ac);
++	if (meta_ac)
++		ocfs2_free_alloc_context(meta_ac);
++
++	*pagep = wc->w_target_page;
++	*fsdata = wc;
++	return 0;
++out_commit:
++	ocfs2_commit_trans(osb, handle);
++
++out:
++	ocfs2_free_write_ctxt(wc);
++
++	if (data_ac)
++		ocfs2_free_alloc_context(data_ac);
++	if (meta_ac)
++		ocfs2_free_alloc_context(meta_ac);
++	return ret;
++}
++
++int ocfs2_write_begin(struct file *file, struct address_space *mapping,
++		      loff_t pos, unsigned len, unsigned flags,
++		      struct page **pagep, void **fsdata)
++{
++	int ret;
++	struct buffer_head *di_bh = NULL;
++	struct inode *inode = mapping->host;
++
++	ret = ocfs2_meta_lock(inode, &di_bh, 1);
++	if (ret) {
+ 		mlog_errno(ret);
+-		goto out_data;
++		return ret;
+ 	}
+ 
+-	written = ocfs2_write(file, phys, handle, di_bh, data_ac,
+-			      meta_ac, &wc);
+-	if (written < 0) {
+-		ret = written;
++	/*
++	 * Take alloc sem here to prevent concurrent lookups. That way
++	 * the mapping, zeroing and tree manipulation within
++	 * ocfs2_write() will be safe against ->readpage(). This
++	 * should also serve to lock out allocation from a shared
++	 * writeable region.
++	 */
++	down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++	ret = ocfs2_data_lock(inode, 1);
++	if (ret) {
+ 		mlog_errno(ret);
+-		goto out_commit;
++		goto out_fail;
+ 	}
+ 
+-	ret = ocfs2_journal_access(handle, inode, di_bh,
+-				   OCFS2_JOURNAL_ACCESS_WRITE);
++	ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep,
++				       fsdata, di_bh, NULL);
+ 	if (ret) {
+ 		mlog_errno(ret);
+-		goto out_commit;
++		goto out_fail_data;
++	}
++
++	brelse(di_bh);
++
++	return 0;
++
++out_fail_data:
++	ocfs2_data_unlock(inode, 1);
++out_fail:
++	up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++	brelse(di_bh);
++	ocfs2_meta_unlock(inode, 1);
++
++	return ret;
++}
++
++int ocfs2_write_end_nolock(struct address_space *mapping,
++			   loff_t pos, unsigned len, unsigned copied,
++			   struct page *page, void *fsdata)
++{
++	int i;
++	unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
++	struct inode *inode = mapping->host;
++	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	struct ocfs2_write_ctxt *wc = fsdata;
++	struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
++	handle_t *handle = wc->w_handle;
++	struct page *tmppage;
++
++	if (unlikely(copied < len)) {
++		if (!PageUptodate(wc->w_target_page))
++			copied = 0;
++
++		ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
++				       start+len);
++	}
++	flush_dcache_page(wc->w_target_page);
++
++	for(i = 0; i < wc->w_num_pages; i++) {
++		tmppage = wc->w_pages[i];
++
++		if (tmppage == wc->w_target_page) {
++			from = wc->w_target_from;
++			to = wc->w_target_to;
++
++			BUG_ON(from > PAGE_CACHE_SIZE ||
++			       to > PAGE_CACHE_SIZE ||
++			       to < from);
++		} else {
++			/*
++			 * Pages adjacent to the target (if any) imply
++			 * a hole-filling write in which case we want
++			 * to flush their entire range.
++			 */
++			from = 0;
++			to = PAGE_CACHE_SIZE;
++		}
++
++		if (ocfs2_should_order_data(inode))
++			walk_page_buffers(wc->w_handle, page_buffers(tmppage),
++					  from, to, NULL,
++					  ocfs2_journal_dirty_data);
++
++		block_commit_write(tmppage, from, to);
+ 	}
+ 
+-	pos += written;
++	pos += copied;
+ 	if (pos > inode->i_size) {
+ 		i_size_write(inode, pos);
+ 		mark_inode_dirty(inode);
+@@ -1306,28 +1604,31 @@
+ 	di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ 	di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ 
+-	ret = ocfs2_journal_dirty(handle, di_bh);
+-	if (ret)
+-		mlog_errno(ret);
++	ocfs2_journal_dirty(handle, wc->w_di_bh);
+ 
+-out_commit:
+ 	ocfs2_commit_trans(osb, handle);
+ 
+-out_data:
+-	ocfs2_data_unlock(inode, 1);
++	ocfs2_run_deallocs(osb, &wc->w_dealloc);
++
++	ocfs2_free_write_ctxt(wc);
++
++	return copied;
++}
++
++int ocfs2_write_end(struct file *file, struct address_space *mapping,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct page *page, void *fsdata)
++{
++	int ret;
++	struct inode *inode = mapping->host;
++
++	ret = ocfs2_write_end_nolock(mapping, pos, len, copied, page, fsdata);
+ 
+-out_meta:
++	ocfs2_data_unlock(inode, 1);
+ 	up_write(&OCFS2_I(inode)->ip_alloc_sem);
+ 	ocfs2_meta_unlock(inode, 1);
+ 
+-out:
+-	brelse(di_bh);
+-	if (data_ac)
+-		ocfs2_free_alloc_context(data_ac);
+-	if (meta_ac)
+-		ocfs2_free_alloc_context(meta_ac);
+-
+-	return written ? written : ret;
++	return ret;
+ }
+ 
+ const struct address_space_operations ocfs2_aops = {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/aops.h linux-2.6.22-try2/fs/ocfs2/aops.h
+--- linux-2.6.22-570/fs/ocfs2/aops.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/aops.h	2007-12-19 15:29:23.000000000 -0500
+@@ -42,57 +42,22 @@
+ 			int (*fn)(	handle_t *handle,
+ 					struct buffer_head *bh));
+ 
+-struct ocfs2_write_ctxt;
+-typedef int (ocfs2_page_writer)(struct inode *, struct ocfs2_write_ctxt *,
+-				u64 *, unsigned int *, unsigned int *);
+-
+-ssize_t ocfs2_buffered_write_cluster(struct file *file, loff_t pos,
+-				     size_t count, ocfs2_page_writer *actor,
+-				     void *priv);
+-
+-struct ocfs2_write_ctxt {
+-	size_t				w_count;
+-	loff_t				w_pos;
+-	u32				w_cpos;
+-	unsigned int			w_finished_copy;
+-
+-	/* This is true if page_size > cluster_size */
+-	unsigned int			w_large_pages;
+-
+-	/* Filler callback and private data */
+-	ocfs2_page_writer		*w_write_data_page;
+-	void				*w_private;
+-
+-	/* Only valid for the filler callback */
+-	struct page			*w_this_page;
+-	unsigned int			w_this_page_new;
+-};
+-
+-struct ocfs2_buffered_write_priv {
+-	char				*b_src_buf;
+-	const struct iovec		*b_cur_iov; /* Current iovec */
+-	size_t				b_cur_off; /* Offset in the
+-						    * current iovec */
+-};
+-int ocfs2_map_and_write_user_data(struct inode *inode,
+-				  struct ocfs2_write_ctxt *wc,
+-				  u64 *p_blkno,
+-				  unsigned int *ret_from,
+-				  unsigned int *ret_to);
+-
+-struct ocfs2_splice_write_priv {
+-	struct splice_desc		*s_sd;
+-	struct pipe_buffer		*s_buf;
+-	struct pipe_inode_info		*s_pipe;
+-	/* Neither offset value is ever larger than one page */
+-	unsigned int			s_offset;
+-	unsigned int			s_buf_offset;
+-};
+-int ocfs2_map_and_write_splice_data(struct inode *inode,
+-				    struct ocfs2_write_ctxt *wc,
+-				    u64 *p_blkno,
+-				    unsigned int *ret_from,
+-				    unsigned int *ret_to);
++int ocfs2_write_begin(struct file *file, struct address_space *mapping,
++		      loff_t pos, unsigned len, unsigned flags,
++		      struct page **pagep, void **fsdata);
++
++int ocfs2_write_end(struct file *file, struct address_space *mapping,
++		    loff_t pos, unsigned len, unsigned copied,
++		    struct page *page, void *fsdata);
++
++int ocfs2_write_end_nolock(struct address_space *mapping,
++			   loff_t pos, unsigned len, unsigned copied,
++			   struct page *page, void *fsdata);
++
++int ocfs2_write_begin_nolock(struct address_space *mapping,
++			     loff_t pos, unsigned len, unsigned flags,
++			     struct page **pagep, void **fsdata,
++			     struct buffer_head *di_bh, struct page *mmap_page);
+ 
+ /* all ocfs2_dio_end_io()'s fault */
+ #define ocfs2_iocb_is_rw_locked(iocb) \
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c linux-2.6.22-try2/fs/ocfs2/cluster/heartbeat.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/heartbeat.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1335,6 +1335,7 @@
+ 	ret = wait_event_interruptible(o2hb_steady_queue,
+ 				atomic_read(&reg->hr_steady_iterations) == 0);
+ 	if (ret) {
++		/* We got interrupted (hello ptrace!).  Clean up */
+ 		spin_lock(&o2hb_live_lock);
+ 		hb_task = reg->hr_task;
+ 		reg->hr_task = NULL;
+@@ -1345,7 +1346,16 @@
+ 		goto out;
+ 	}
+ 
++	/* Ok, we were woken.  Make sure it wasn't by drop_item() */
++	spin_lock(&o2hb_live_lock);
++	hb_task = reg->hr_task;
++	spin_unlock(&o2hb_live_lock);
++
++	if (hb_task)
+ 	ret = count;
++	else
++		ret = -EIO;
++
+ out:
+ 	if (filp)
+ 		fput(filp);
+@@ -1523,6 +1533,15 @@
+ 	if (hb_task)
+ 		kthread_stop(hb_task);
+ 
++	/*
++	 * If we're racing a dev_write(), we need to wake them.  They will
++	 * check reg->hr_task
++	 */
++	if (atomic_read(&reg->hr_steady_iterations) != 0) {
++		atomic_set(&reg->hr_steady_iterations, 0);
++		wake_up(&o2hb_steady_queue);
++	}
++
+ 	config_item_put(item);
+ }
+ 
+@@ -1665,7 +1684,67 @@
+ }
+ EXPORT_SYMBOL_GPL(o2hb_setup_callback);
+ 
+-int o2hb_register_callback(struct o2hb_callback_func *hc)
++static struct o2hb_region *o2hb_find_region(const char *region_uuid)
++{
++	struct o2hb_region *p, *reg = NULL;
++
++	assert_spin_locked(&o2hb_live_lock);
++
++	list_for_each_entry(p, &o2hb_all_regions, hr_all_item) {
++		if (!strcmp(region_uuid, config_item_name(&p->hr_item))) {
++			reg = p;
++			break;
++		}
++	}
++
++	return reg;
++}
++
++static int o2hb_region_get(const char *region_uuid)
++{
++	int ret = 0;
++	struct o2hb_region *reg;
++
++	spin_lock(&o2hb_live_lock);
++
++	reg = o2hb_find_region(region_uuid);
++	if (!reg)
++		ret = -ENOENT;
++	spin_unlock(&o2hb_live_lock);
++
++	if (ret)
++		goto out;
++
++	ret = o2nm_depend_this_node();
++	if (ret)
++		goto out;
++
++	ret = o2nm_depend_item(&reg->hr_item);
++	if (ret)
++		o2nm_undepend_this_node();
++
++out:
++	return ret;
++}
++
++static void o2hb_region_put(const char *region_uuid)
++{
++	struct o2hb_region *reg;
++
++	spin_lock(&o2hb_live_lock);
++
++	reg = o2hb_find_region(region_uuid);
++
++	spin_unlock(&o2hb_live_lock);
++
++	if (reg) {
++		o2nm_undepend_item(&reg->hr_item);
++		o2nm_undepend_this_node();
++	}
++}
++
++int o2hb_register_callback(const char *region_uuid,
++			   struct o2hb_callback_func *hc)
+ {
+ 	struct o2hb_callback_func *tmp;
+ 	struct list_head *iter;
+@@ -1681,6 +1760,12 @@
+ 		goto out;
+ 	}
+ 
++	if (region_uuid) {
++		ret = o2hb_region_get(region_uuid);
++		if (ret)
++			goto out;
++	}
++
+ 	down_write(&o2hb_callback_sem);
+ 
+ 	list_for_each(iter, &hbcall->list) {
+@@ -1702,16 +1787,21 @@
+ }
+ EXPORT_SYMBOL_GPL(o2hb_register_callback);
+ 
+-void o2hb_unregister_callback(struct o2hb_callback_func *hc)
++void o2hb_unregister_callback(const char *region_uuid,
++			      struct o2hb_callback_func *hc)
+ {
+ 	BUG_ON(hc->hc_magic != O2HB_CB_MAGIC);
+ 
+ 	mlog(ML_HEARTBEAT, "on behalf of %p for funcs %p\n",
+ 	     __builtin_return_address(0), hc);
+ 
++	/* XXX Can this happen _with_ a region reference? */
+ 	if (list_empty(&hc->hc_item))
+ 		return;
+ 
++	if (region_uuid)
++		o2hb_region_put(region_uuid);
++
+ 	down_write(&o2hb_callback_sem);
+ 
+ 	list_del_init(&hc->hc_item);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h linux-2.6.22-try2/fs/ocfs2/cluster/heartbeat.h
+--- linux-2.6.22-570/fs/ocfs2/cluster/heartbeat.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/heartbeat.h	2007-12-19 15:29:23.000000000 -0500
+@@ -69,8 +69,10 @@
+ 			 o2hb_cb_func *func,
+ 			 void *data,
+ 			 int priority);
+-int o2hb_register_callback(struct o2hb_callback_func *hc);
+-void o2hb_unregister_callback(struct o2hb_callback_func *hc);
++int o2hb_register_callback(const char *region_uuid,
++			   struct o2hb_callback_func *hc);
++void o2hb_unregister_callback(const char *region_uuid,
++			      struct o2hb_callback_func *hc);
+ void o2hb_fill_node_map(unsigned long *map,
+ 			unsigned bytes);
+ void o2hb_init(void);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/masklog.c linux-2.6.22-try2/fs/ocfs2/cluster/masklog.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/masklog.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/masklog.c	2007-12-19 15:29:22.000000000 -0500
+@@ -74,7 +74,6 @@
+ #define define_mask(_name) {			\
+ 	.attr = {				\
+ 		.name = #_name,			\
+-		.owner = THIS_MODULE,		\
+ 		.mode = S_IRUGO | S_IWUSR,	\
+ 	},					\
+ 	.mask = ML_##_name,			\
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c linux-2.6.22-try2/fs/ocfs2/cluster/nodemanager.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/nodemanager.c	2007-12-19 15:29:23.000000000 -0500
+@@ -900,6 +900,46 @@
+ 	},
+ };
+ 
++int o2nm_depend_item(struct config_item *item)
++{
++	return configfs_depend_item(&o2nm_cluster_group.cs_subsys, item);
++}
++
++void o2nm_undepend_item(struct config_item *item)
++{
++	configfs_undepend_item(&o2nm_cluster_group.cs_subsys, item);
++}
++
++int o2nm_depend_this_node(void)
++{
++	int ret = 0;
++	struct o2nm_node *local_node;
++
++	local_node = o2nm_get_node_by_num(o2nm_this_node());
++	if (!local_node) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	ret = o2nm_depend_item(&local_node->nd_item);
++	o2nm_node_put(local_node);
++
++out:
++	return ret;
++}
++
++void o2nm_undepend_this_node(void)
++{
++	struct o2nm_node *local_node;
++
++	local_node = o2nm_get_node_by_num(o2nm_this_node());
++	BUG_ON(!local_node);
++
++	o2nm_undepend_item(&local_node->nd_item);
++	o2nm_node_put(local_node);
++}
++
++
+ static void __exit exit_o2nm(void)
+ {
+ 	if (ocfs2_table_header)
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h linux-2.6.22-try2/fs/ocfs2/cluster/nodemanager.h
+--- linux-2.6.22-570/fs/ocfs2/cluster/nodemanager.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/nodemanager.h	2007-12-19 15:29:23.000000000 -0500
+@@ -77,4 +77,9 @@
+ void o2nm_node_get(struct o2nm_node *node);
+ void o2nm_node_put(struct o2nm_node *node);
+ 
++int o2nm_depend_item(struct config_item *item);
++void o2nm_undepend_item(struct config_item *item);
++int o2nm_depend_this_node(void);
++void o2nm_undepend_this_node(void);
++
+ #endif /* O2CLUSTER_NODEMANAGER_H */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/cluster/tcp.c linux-2.6.22-try2/fs/ocfs2/cluster/tcp.c
+--- linux-2.6.22-570/fs/ocfs2/cluster/tcp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/cluster/tcp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -261,14 +261,12 @@
+ 
+ static void o2net_complete_nodes_nsw(struct o2net_node *nn)
+ {
+-	struct list_head *iter, *tmp;
++	struct o2net_status_wait *nsw, *tmp;
+ 	unsigned int num_kills = 0;
+-	struct o2net_status_wait *nsw;
+ 
+ 	assert_spin_locked(&nn->nn_lock);
+ 
+-	list_for_each_safe(iter, tmp, &nn->nn_status_list) {
+-		nsw = list_entry(iter, struct o2net_status_wait, ns_node_item);
++	list_for_each_entry_safe(nsw, tmp, &nn->nn_status_list, ns_node_item) {
+ 		o2net_complete_nsw_locked(nn, nsw, O2NET_ERR_DIED, 0);
+ 		num_kills++;
+ 	}
+@@ -764,13 +762,10 @@
+ 
+ void o2net_unregister_handler_list(struct list_head *list)
+ {
+-	struct list_head *pos, *n;
+-	struct o2net_msg_handler *nmh;
++	struct o2net_msg_handler *nmh, *n;
+ 
+ 	write_lock(&o2net_handler_lock);
+-	list_for_each_safe(pos, n, list) {
+-		nmh = list_entry(pos, struct o2net_msg_handler,
+-				 nh_unregister_item);
++	list_for_each_entry_safe(nmh, n, list, nh_unregister_item) {
+ 		mlog(ML_TCP, "unregistering handler func %p type %u key %08x\n",
+ 		     nmh->nh_func, nmh->nh_msg_type, nmh->nh_key);
+ 		rb_erase(&nmh->nh_node, &o2net_handler_tree);
+@@ -1638,8 +1633,8 @@
+ 
+ void o2net_unregister_hb_callbacks(void)
+ {
+-	o2hb_unregister_callback(&o2net_hb_up);
+-	o2hb_unregister_callback(&o2net_hb_down);
++	o2hb_unregister_callback(NULL, &o2net_hb_up);
++	o2hb_unregister_callback(NULL, &o2net_hb_down);
+ }
+ 
+ int o2net_register_hb_callbacks(void)
+@@ -1651,9 +1646,9 @@
+ 	o2hb_setup_callback(&o2net_hb_up, O2HB_NODE_UP_CB,
+ 			    o2net_hb_node_up_cb, NULL, O2NET_HB_PRI);
+ 
+-	ret = o2hb_register_callback(&o2net_hb_up);
++	ret = o2hb_register_callback(NULL, &o2net_hb_up);
+ 	if (ret == 0)
+-		ret = o2hb_register_callback(&o2net_hb_down);
++		ret = o2hb_register_callback(NULL, &o2net_hb_down);
+ 
+ 	if (ret)
+ 		o2net_unregister_hb_callbacks();
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dir.c linux-2.6.22-try2/fs/ocfs2/dir.c
+--- linux-2.6.22-570/fs/ocfs2/dir.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/dir.c	2007-12-19 15:29:23.000000000 -0500
+@@ -368,7 +368,7 @@
+ 		u32 offset = OCFS2_I(dir)->ip_clusters;
+ 
+ 		status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset,
+-						    1, parent_fe_bh, handle,
++						    1, 0, parent_fe_bh, handle,
+ 						    data_ac, meta_ac, NULL);
+ 		BUG_ON(status == -EAGAIN);
+ 		if (status < 0) {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c linux-2.6.22-try2/fs/ocfs2/dlm/dlmdomain.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmdomain.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/dlm/dlmdomain.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1128,8 +1128,8 @@
+ 
+ static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm)
+ {
+-	o2hb_unregister_callback(&dlm->dlm_hb_up);
+-	o2hb_unregister_callback(&dlm->dlm_hb_down);
++	o2hb_unregister_callback(NULL, &dlm->dlm_hb_up);
++	o2hb_unregister_callback(NULL, &dlm->dlm_hb_down);
+ 	o2net_unregister_handler_list(&dlm->dlm_domain_handlers);
+ }
+ 
+@@ -1141,13 +1141,13 @@
+ 
+ 	o2hb_setup_callback(&dlm->dlm_hb_down, O2HB_NODE_DOWN_CB,
+ 			    dlm_hb_node_down_cb, dlm, DLM_HB_NODE_DOWN_PRI);
+-	status = o2hb_register_callback(&dlm->dlm_hb_down);
++	status = o2hb_register_callback(NULL, &dlm->dlm_hb_down);
+ 	if (status)
+ 		goto bail;
+ 
+ 	o2hb_setup_callback(&dlm->dlm_hb_up, O2HB_NODE_UP_CB,
+ 			    dlm_hb_node_up_cb, dlm, DLM_HB_NODE_UP_PRI);
+-	status = o2hb_register_callback(&dlm->dlm_hb_up);
++	status = o2hb_register_callback(NULL, &dlm->dlm_hb_up);
+ 	if (status)
+ 		goto bail;
+ 
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c linux-2.6.22-try2/fs/ocfs2/dlm/dlmmaster.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmmaster.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/dlm/dlmmaster.c	2007-12-19 15:29:23.000000000 -0500
+@@ -192,25 +192,20 @@
+ static void dlm_dump_mles(struct dlm_ctxt *dlm)
+ {
+ 	struct dlm_master_list_entry *mle;
+-	struct list_head *iter;
+ 	
+ 	mlog(ML_NOTICE, "dumping all mles for domain %s:\n", dlm->name);
+ 	spin_lock(&dlm->master_lock);
+-	list_for_each(iter, &dlm->master_list) {
+-		mle = list_entry(iter, struct dlm_master_list_entry, list);
++	list_for_each_entry(mle, &dlm->master_list, list)
+ 		dlm_print_one_mle(mle);
+-	}
+ 	spin_unlock(&dlm->master_lock);
+ }
+ 
+ int dlm_dump_all_mles(const char __user *data, unsigned int len)
+ {
+-	struct list_head *iter;
+ 	struct dlm_ctxt *dlm;
+ 
+ 	spin_lock(&dlm_domain_lock);
+-	list_for_each(iter, &dlm_domains) {
+-		dlm = list_entry (iter, struct dlm_ctxt, list);
++	list_for_each_entry(dlm, &dlm_domains, list) {
+ 		mlog(ML_NOTICE, "found dlm: %p, name=%s\n", dlm, dlm->name);
+ 		dlm_dump_mles(dlm);
+ 	}
+@@ -454,12 +449,10 @@
+ 			char *name, unsigned int namelen)
+ {
+ 	struct dlm_master_list_entry *tmpmle;
+-	struct list_head *iter;
+ 
+ 	assert_spin_locked(&dlm->master_lock);
+ 
+-	list_for_each(iter, &dlm->master_list) {
+-		tmpmle = list_entry(iter, struct dlm_master_list_entry, list);
++	list_for_each_entry(tmpmle, &dlm->master_list, list) {
+ 		if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
+ 			continue;
+ 		dlm_get_mle(tmpmle);
+@@ -472,13 +465,10 @@
+ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
+ {
+ 	struct dlm_master_list_entry *mle;
+-	struct list_head *iter;
+ 
+ 	assert_spin_locked(&dlm->spinlock);
+ 	
+-	list_for_each(iter, &dlm->mle_hb_events) {
+-		mle = list_entry(iter, struct dlm_master_list_entry, 
+-				 hb_events);
++	list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
+ 		if (node_up)
+ 			dlm_mle_node_up(dlm, mle, NULL, idx);
+ 		else
+@@ -2434,7 +2424,7 @@
+ 	int ret;
+ 	int i;
+ 	int count = 0;
+-	struct list_head *queue, *iter;
++	struct list_head *queue;
+ 	struct dlm_lock *lock;
+ 
+ 	assert_spin_locked(&res->spinlock);
+@@ -2453,8 +2443,7 @@
+ 	ret = 0;
+ 	queue = &res->granted;
+ 	for (i = 0; i < 3; i++) {
+-		list_for_each(iter, queue) {
+-			lock = list_entry(iter, struct dlm_lock, list);
++		list_for_each_entry(lock, queue, list) {
+ 			++count;
+ 			if (lock->ml.node == dlm->node_num) {
+ 				mlog(0, "found a lock owned by this node still "
+@@ -2923,18 +2912,16 @@
+ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
+ 				      struct dlm_lock_resource *res)
+ {
+-	struct list_head *iter, *iter2;
+ 	struct list_head *queue = &res->granted;
+ 	int i, bit;
+-	struct dlm_lock *lock;
++	struct dlm_lock *lock, *next;
+ 
+ 	assert_spin_locked(&res->spinlock);
+ 
+ 	BUG_ON(res->owner == dlm->node_num);
+ 
+ 	for (i=0; i<3; i++) {
+-		list_for_each_safe(iter, iter2, queue) {
+-			lock = list_entry (iter, struct dlm_lock, list);
++		list_for_each_entry_safe(lock, next, queue, list) {
+ 			if (lock->ml.node != dlm->node_num) {
+ 				mlog(0, "putting lock for node %u\n",
+ 				     lock->ml.node);
+@@ -2976,7 +2963,6 @@
+ {
+ 	int i;
+ 	struct list_head *queue = &res->granted;
+-	struct list_head *iter;
+ 	struct dlm_lock *lock;
+ 	int nodenum;
+ 
+@@ -2984,10 +2970,9 @@
+ 
+ 	spin_lock(&res->spinlock);
+ 	for (i=0; i<3; i++) {
+-		list_for_each(iter, queue) {
++		list_for_each_entry(lock, queue, list) {
+ 			/* up to the caller to make sure this node
+ 			 * is alive */
+-			lock = list_entry (iter, struct dlm_lock, list);
+ 			if (lock->ml.node != dlm->node_num) {
+ 				spin_unlock(&res->spinlock);
+ 				return lock->ml.node;
+@@ -3234,8 +3219,7 @@
+ 
+ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
+ {
+-	struct list_head *iter, *iter2;
+-	struct dlm_master_list_entry *mle;
++	struct dlm_master_list_entry *mle, *next;
+ 	struct dlm_lock_resource *res;
+ 	unsigned int hash;
+ 
+@@ -3245,9 +3229,7 @@
+ 
+ 	/* clean the master list */
+ 	spin_lock(&dlm->master_lock);
+-	list_for_each_safe(iter, iter2, &dlm->master_list) {
+-		mle = list_entry(iter, struct dlm_master_list_entry, list);
+-
++	list_for_each_entry_safe(mle, next, &dlm->master_list, list) {
+ 		BUG_ON(mle->type != DLM_MLE_BLOCK &&
+ 		       mle->type != DLM_MLE_MASTER &&
+ 		       mle->type != DLM_MLE_MIGRATION);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c linux-2.6.22-try2/fs/ocfs2/dlm/dlmrecovery.c
+--- linux-2.6.22-570/fs/ocfs2/dlm/dlmrecovery.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/dlm/dlmrecovery.c	2007-12-19 15:29:23.000000000 -0500
+@@ -158,8 +158,7 @@
+ 	struct dlm_ctxt *dlm =
+ 		container_of(work, struct dlm_ctxt, dispatched_work);
+ 	LIST_HEAD(tmp_list);
+-	struct list_head *iter, *iter2;
+-	struct dlm_work_item *item;
++	struct dlm_work_item *item, *next;
+ 	dlm_workfunc_t *workfunc;
+ 	int tot=0;
+ 
+@@ -167,13 +166,12 @@
+ 	list_splice_init(&dlm->work_list, &tmp_list);
+ 	spin_unlock(&dlm->work_lock);
+ 
+-	list_for_each_safe(iter, iter2, &tmp_list) {
++	list_for_each_entry(item, &tmp_list, list) {
+ 		tot++;
+ 	}
+ 	mlog(0, "%s: work thread has %d work items\n", dlm->name, tot);
+ 
+-	list_for_each_safe(iter, iter2, &tmp_list) {
+-		item = list_entry(iter, struct dlm_work_item, list);
++	list_for_each_entry_safe(item, next, &tmp_list, list) {
+ 		workfunc = item->func;
+ 		list_del_init(&item->list);
+ 
+@@ -549,7 +547,6 @@
+ {
+ 	int status = 0;
+ 	struct dlm_reco_node_data *ndata;
+-	struct list_head *iter;
+ 	int all_nodes_done;
+ 	int destroy = 0;
+ 	int pass = 0;
+@@ -567,8 +564,7 @@
+ 
+ 	/* safe to access the node data list without a lock, since this
+ 	 * process is the only one to change the list */
+-	list_for_each(iter, &dlm->reco.node_data) {
+-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
++	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ 		BUG_ON(ndata->state != DLM_RECO_NODE_DATA_INIT);
+ 		ndata->state = DLM_RECO_NODE_DATA_REQUESTING;
+ 
+@@ -655,9 +651,7 @@
+ 		 * done, or if anyone died */
+ 		all_nodes_done = 1;
+ 		spin_lock(&dlm_reco_state_lock);
+-		list_for_each(iter, &dlm->reco.node_data) {
+-			ndata = list_entry (iter, struct dlm_reco_node_data, list);
+-
++		list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ 			mlog(0, "checking recovery state of node %u\n",
+ 			     ndata->node_num);
+ 			switch (ndata->state) {
+@@ -774,16 +768,14 @@
+ 
+ static void dlm_destroy_recovery_area(struct dlm_ctxt *dlm, u8 dead_node)
+ {
+-	struct list_head *iter, *iter2;
+-	struct dlm_reco_node_data *ndata;
++	struct dlm_reco_node_data *ndata, *next;
+ 	LIST_HEAD(tmplist);
+ 
+ 	spin_lock(&dlm_reco_state_lock);
+ 	list_splice_init(&dlm->reco.node_data, &tmplist);
+ 	spin_unlock(&dlm_reco_state_lock);
+ 
+-	list_for_each_safe(iter, iter2, &tmplist) {
+-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
++	list_for_each_entry_safe(ndata, next, &tmplist, list) {
+ 		list_del_init(&ndata->list);
+ 		kfree(ndata);
+ 	}
+@@ -876,7 +868,6 @@
+ 	struct dlm_lock_resource *res;
+ 	struct dlm_ctxt *dlm;
+ 	LIST_HEAD(resources);
+-	struct list_head *iter;
+ 	int ret;
+ 	u8 dead_node, reco_master;
+ 	int skip_all_done = 0;
+@@ -920,8 +911,7 @@
+ 
+ 	/* any errors returned will be due to the new_master dying,
+ 	 * the dlm_reco_thread should detect this */
+-	list_for_each(iter, &resources) {
+-		res = list_entry (iter, struct dlm_lock_resource, recovering);
++	list_for_each_entry(res, &resources, recovering) {
+ 		ret = dlm_send_one_lockres(dlm, res, mres, reco_master,
+ 				   	DLM_MRES_RECOVERY);
+ 		if (ret < 0) {
+@@ -983,7 +973,6 @@
+ {
+ 	struct dlm_ctxt *dlm = data;
+ 	struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
+-	struct list_head *iter;
+ 	struct dlm_reco_node_data *ndata = NULL;
+ 	int ret = -EINVAL;
+ 
+@@ -1000,8 +989,7 @@
+ 			dlm->reco.dead_node, done->node_idx, dlm->node_num);
+ 
+ 	spin_lock(&dlm_reco_state_lock);
+-	list_for_each(iter, &dlm->reco.node_data) {
+-		ndata = list_entry (iter, struct dlm_reco_node_data, list);
++	list_for_each_entry(ndata, &dlm->reco.node_data, list) {
+ 		if (ndata->node_num != done->node_idx)
+ 			continue;
+ 
+@@ -1049,13 +1037,11 @@
+ 					struct list_head *list,
+ 				       	u8 dead_node)
+ {
+-	struct dlm_lock_resource *res;
+-	struct list_head *iter, *iter2;
++	struct dlm_lock_resource *res, *next;
+ 	struct dlm_lock *lock;
+ 
+ 	spin_lock(&dlm->spinlock);
+-	list_for_each_safe(iter, iter2, &dlm->reco.resources) {
+-		res = list_entry (iter, struct dlm_lock_resource, recovering);
++	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
+ 		/* always prune any $RECOVERY entries for dead nodes,
+ 		 * otherwise hangs can occur during later recovery */
+ 		if (dlm_is_recovery_lock(res->lockname.name,
+@@ -1169,7 +1155,7 @@
+ 					u8 flags, u8 master)
+ {
+ 	/* mres here is one full page */
+-	memset(mres, 0, PAGE_SIZE);
++	clear_page(mres);
+ 	mres->lockname_len = namelen;
+ 	memcpy(mres->lockname, lockname, namelen);
+ 	mres->num_locks = 0;
+@@ -1252,7 +1238,7 @@
+ 			 struct dlm_migratable_lockres *mres,
+ 			 u8 send_to, u8 flags)
+ {
+-	struct list_head *queue, *iter;
++	struct list_head *queue;
+ 	int total_locks, i;
+ 	u64 mig_cookie = 0;
+ 	struct dlm_lock *lock;
+@@ -1278,9 +1264,7 @@
+ 	total_locks = 0;
+ 	for (i=DLM_GRANTED_LIST; i<=DLM_BLOCKED_LIST; i++) {
+ 		queue = dlm_list_idx_to_ptr(res, i);
+-		list_for_each(iter, queue) {
+-			lock = list_entry (iter, struct dlm_lock, list);
+-
++		list_for_each_entry(lock, queue, list) {
+ 			/* add another lock. */
+ 			total_locks++;
+ 			if (!dlm_add_lock_to_array(lock, mres, i))
+@@ -1717,7 +1701,6 @@
+ 	struct dlm_lockstatus *lksb = NULL;
+ 	int ret = 0;
+ 	int i, j, bad;
+-	struct list_head *iter;
+ 	struct dlm_lock *lock = NULL;
+ 	u8 from = O2NM_MAX_NODES;
+ 	unsigned int added = 0;
+@@ -1755,8 +1738,7 @@
+ 			spin_lock(&res->spinlock);
+ 			for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+ 				tmpq = dlm_list_idx_to_ptr(res, j);
+-				list_for_each(iter, tmpq) {
+-					lock = list_entry (iter, struct dlm_lock, list);
++				list_for_each_entry(lock, tmpq, list) {
+ 					if (lock->ml.cookie != ml->cookie)
+ 						lock = NULL;
+ 					else
+@@ -1930,8 +1912,8 @@
+ 				       struct dlm_lock_resource *res)
+ {
+ 	int i;
+-	struct list_head *queue, *iter, *iter2;
+-	struct dlm_lock *lock;
++	struct list_head *queue;
++	struct dlm_lock *lock, *next;
+ 
+ 	res->state |= DLM_LOCK_RES_RECOVERING;
+ 	if (!list_empty(&res->recovering)) {
+@@ -1947,8 +1929,7 @@
+ 	/* find any pending locks and put them back on proper list */
+ 	for (i=DLM_BLOCKED_LIST; i>=DLM_GRANTED_LIST; i--) {
+ 		queue = dlm_list_idx_to_ptr(res, i);
+-		list_for_each_safe(iter, iter2, queue) {
+-			lock = list_entry (iter, struct dlm_lock, list);
++		list_for_each_entry_safe(lock, next, queue, list) {
+ 			dlm_lock_get(lock);
+ 			if (lock->convert_pending) {
+ 				/* move converting lock back to granted */
+@@ -2013,18 +1994,15 @@
+ 					      u8 dead_node, u8 new_master)
+ {
+ 	int i;
+-	struct list_head *iter, *iter2;
+ 	struct hlist_node *hash_iter;
+ 	struct hlist_head *bucket;
+-
+-	struct dlm_lock_resource *res;
++	struct dlm_lock_resource *res, *next;
+ 
+ 	mlog_entry_void();
+ 
+ 	assert_spin_locked(&dlm->spinlock);
+ 
+-	list_for_each_safe(iter, iter2, &dlm->reco.resources) {
+-		res = list_entry (iter, struct dlm_lock_resource, recovering);
++	list_for_each_entry_safe(res, next, &dlm->reco.resources, recovering) {
+ 		if (res->owner == dead_node) {
+ 			list_del_init(&res->recovering);
+ 			spin_lock(&res->spinlock);
+@@ -2099,7 +2077,7 @@
+ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
+ 			       struct dlm_lock_resource *res, u8 dead_node)
+ {
+-	struct list_head *iter, *queue;
++	struct list_head *queue;
+ 	struct dlm_lock *lock;
+ 	int blank_lvb = 0, local = 0;
+ 	int i;
+@@ -2121,8 +2099,7 @@
+ 
+ 	for (i=DLM_GRANTED_LIST; i<=DLM_CONVERTING_LIST; i++) {
+ 		queue = dlm_list_idx_to_ptr(res, i);
+-		list_for_each(iter, queue) {
+-			lock = list_entry (iter, struct dlm_lock, list);
++		list_for_each_entry(lock, queue, list) {
+ 			if (lock->ml.node == search_node) {
+ 				if (dlm_lvb_needs_invalidation(lock, local)) {
+ 					/* zero the lksb lvb and lockres lvb */
+@@ -2143,8 +2120,7 @@
+ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
+ 				struct dlm_lock_resource *res, u8 dead_node)
+ {
+-	struct list_head *iter, *tmpiter;
+-	struct dlm_lock *lock;
++	struct dlm_lock *lock, *next;
+ 	unsigned int freed = 0;
+ 
+ 	/* this node is the lockres master:
+@@ -2155,24 +2131,21 @@
+ 	assert_spin_locked(&res->spinlock);
+ 
+ 	/* TODO: check pending_asts, pending_basts here */
+-	list_for_each_safe(iter, tmpiter, &res->granted) {
+-		lock = list_entry (iter, struct dlm_lock, list);
++	list_for_each_entry_safe(lock, next, &res->granted, list) {
+ 		if (lock->ml.node == dead_node) {
+ 			list_del_init(&lock->list);
+ 			dlm_lock_put(lock);
+ 			freed++;
+ 		}
+ 	}
+-	list_for_each_safe(iter, tmpiter, &res->converting) {
+-		lock = list_entry (iter, struct dlm_lock, list);
++	list_for_each_entry_safe(lock, next, &res->converting, list) {
+ 		if (lock->ml.node == dead_node) {
+ 			list_del_init(&lock->list);
+ 			dlm_lock_put(lock);
+ 			freed++;
+ 		}
+ 	}
+-	list_for_each_safe(iter, tmpiter, &res->blocked) {
+-		lock = list_entry (iter, struct dlm_lock, list);
++	list_for_each_entry_safe(lock, next, &res->blocked, list) {
+ 		if (lock->ml.node == dead_node) {
+ 			list_del_init(&lock->list);
+ 			dlm_lock_put(lock);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/dlmglue.c linux-2.6.22-try2/fs/ocfs2/dlmglue.c
+--- linux-2.6.22-570/fs/ocfs2/dlmglue.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/dlmglue.c	2007-12-19 15:29:23.000000000 -0500
+@@ -600,15 +600,13 @@
+ static void lockres_set_flags(struct ocfs2_lock_res *lockres,
+ 			      unsigned long newflags)
+ {
+-	struct list_head *pos, *tmp;
+-	struct ocfs2_mask_waiter *mw;
++	struct ocfs2_mask_waiter *mw, *tmp;
+ 
+  	assert_spin_locked(&lockres->l_lock);
+ 
+ 	lockres->l_flags = newflags;
+ 
+-	list_for_each_safe(pos, tmp, &lockres->l_mask_waiters) {
+-		mw = list_entry(pos, struct ocfs2_mask_waiter, mw_item);
++	list_for_each_entry_safe(mw, tmp, &lockres->l_mask_waiters, mw_item) {
+ 		if ((lockres->l_flags & mw->mw_mask) != mw->mw_goal)
+ 			continue;
+ 
+diff -Nurb linux-2.6.22-570/fs/ocfs2/endian.h linux-2.6.22-try2/fs/ocfs2/endian.h
+--- linux-2.6.22-570/fs/ocfs2/endian.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/endian.h	2007-12-19 15:29:23.000000000 -0500
+@@ -32,6 +32,11 @@
+ 	*var = cpu_to_le32(le32_to_cpu(*var) + val);
+ }
+ 
++static inline void le64_add_cpu(__le64 *var, u64 val)
++{
++	*var = cpu_to_le64(le64_to_cpu(*var) + val);
++}
++
+ static inline void le32_and_cpu(__le32 *var, u32 val)
+ {
+ 	*var = cpu_to_le32(le32_to_cpu(*var) & val);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/extent_map.c linux-2.6.22-try2/fs/ocfs2/extent_map.c
+--- linux-2.6.22-570/fs/ocfs2/extent_map.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/extent_map.c	2007-12-19 15:29:23.000000000 -0500
+@@ -109,17 +109,14 @@
+  */
+ void ocfs2_extent_map_trunc(struct inode *inode, unsigned int cpos)
+ {
+-	struct list_head *p, *n;
+-	struct ocfs2_extent_map_item *emi;
++	struct ocfs2_extent_map_item *emi, *n;
+ 	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ 	struct ocfs2_extent_map *em = &oi->ip_extent_map;
+ 	LIST_HEAD(tmp_list);
+ 	unsigned int range;
+ 
+ 	spin_lock(&oi->ip_lock);
+-	list_for_each_safe(p, n, &em->em_list) {
+-		emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
+-
++	list_for_each_entry_safe(emi, n, &em->em_list, ei_list) {
+ 		if (emi->ei_cpos >= cpos) {
+ 			/* Full truncate of this record. */
+ 			list_move(&emi->ei_list, &tmp_list);
+@@ -136,8 +133,7 @@
+ 	}
+ 	spin_unlock(&oi->ip_lock);
+ 
+-	list_for_each_safe(p, n, &tmp_list) {
+-		emi = list_entry(p, struct ocfs2_extent_map_item, ei_list);
++	list_for_each_entry_safe(emi, n, &tmp_list, ei_list) {
+ 		list_del(&emi->ei_list);
+ 		kfree(emi);
+ 	}
+@@ -377,37 +373,6 @@
+ 	return ret;
+ }
+ 
+-/*
+- * Return the index of the extent record which contains cluster #v_cluster.
+- * -1 is returned if it was not found.
+- *
+- * Should work fine on interior and exterior nodes.
+- */
+-static int ocfs2_search_extent_list(struct ocfs2_extent_list *el,
+-				    u32 v_cluster)
+-{
+-	int ret = -1;
+-	int i;
+-	struct ocfs2_extent_rec *rec;
+-	u32 rec_end, rec_start, clusters;
+-
+-	for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
+-		rec = &el->l_recs[i];
+-
+-		rec_start = le32_to_cpu(rec->e_cpos);
+-		clusters = ocfs2_rec_clusters(el, rec);
+-
+-		rec_end = rec_start + clusters;
+-
+-		if (v_cluster >= rec_start && v_cluster < rec_end) {
+-			ret = i;
+-			break;
+-		}
+-	}
+-
+-	return ret;
+-}
+-
+ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
+ 		       u32 *p_cluster, u32 *num_clusters,
+ 		       unsigned int *extent_flags)
+diff -Nurb linux-2.6.22-570/fs/ocfs2/file.c linux-2.6.22-try2/fs/ocfs2/file.c
+--- linux-2.6.22-570/fs/ocfs2/file.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/file.c	2007-12-19 15:29:23.000000000 -0500
+@@ -326,9 +326,6 @@
+ 		   (unsigned long long)OCFS2_I(inode)->ip_blkno,
+ 		   (unsigned long long)new_i_size);
+ 
+-	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
+-	truncate_inode_pages(inode->i_mapping, new_i_size);
+-
+ 	fe = (struct ocfs2_dinode *) di_bh->b_data;
+ 	if (!OCFS2_IS_VALID_DINODE(fe)) {
+ 		OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe);
+@@ -363,16 +360,23 @@
+ 	if (new_i_size == le64_to_cpu(fe->i_size))
+ 		goto bail;
+ 
++	down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ 	/* This forces other nodes to sync and drop their pages. Do
+ 	 * this even if we have a truncate without allocation change -
+ 	 * ocfs2 cluster sizes can be much greater than page size, so
+ 	 * we have to truncate them anyway.  */
+ 	status = ocfs2_data_lock(inode, 1);
+ 	if (status < 0) {
++		up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ 		mlog_errno(status);
+ 		goto bail;
+ 	}
+ 
++	unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
++	truncate_inode_pages(inode->i_mapping, new_i_size);
++
+ 	/* alright, we're going to need to do a full blown alloc size
+ 	 * change. Orphan the inode so that recovery can complete the
+ 	 * truncate if necessary. This does the task of marking
+@@ -399,6 +403,8 @@
+ bail_unlock_data:
+ 	ocfs2_data_unlock(inode, 1);
+ 
++	up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
+ bail:
+ 
+ 	mlog_exit(status);
+@@ -419,6 +425,7 @@
+ 			       struct inode *inode,
+ 			       u32 *logical_offset,
+ 			       u32 clusters_to_add,
++			       int mark_unwritten,
+ 			       struct buffer_head *fe_bh,
+ 			       handle_t *handle,
+ 			       struct ocfs2_alloc_context *data_ac,
+@@ -431,9 +438,13 @@
+ 	enum ocfs2_alloc_restarted reason = RESTART_NONE;
+ 	u32 bit_off, num_bits;
+ 	u64 block;
++	u8 flags = 0;
+ 
+ 	BUG_ON(!clusters_to_add);
+ 
++	if (mark_unwritten)
++		flags = OCFS2_EXT_UNWRITTEN;
++
+ 	free_extents = ocfs2_num_free_extents(osb, inode, fe);
+ 	if (free_extents < 0) {
+ 		status = free_extents;
+@@ -483,7 +494,7 @@
+ 	     num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
+ 	status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
+ 				     *logical_offset, block, num_bits,
+-				     meta_ac);
++				     flags, meta_ac);
+ 	if (status < 0) {
+ 		mlog_errno(status);
+ 		goto leave;
+@@ -516,25 +527,28 @@
+  * For a given allocation, determine which allocators will need to be
+  * accessed, and lock them, reserving the appropriate number of bits.
+  *
+- * Called from ocfs2_extend_allocation() for file systems which don't
+- * support holes, and from ocfs2_write() for file systems which
+- * understand sparse inodes.
++ * Sparse file systems call this from ocfs2_write_begin_nolock()
++ * and ocfs2_allocate_unwritten_extents().
++ *
++ * File systems which don't support holes call this from
++ * ocfs2_extend_allocation().
+  */
+ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
+-			  u32 clusters_to_add,
++			  u32 clusters_to_add, u32 extents_to_split,
+ 			  struct ocfs2_alloc_context **data_ac,
+ 			  struct ocfs2_alloc_context **meta_ac)
+ {
+ 	int ret, num_free_extents;
++	unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 
+ 	*meta_ac = NULL;
+ 	*data_ac = NULL;
+ 
+ 	mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
+-	     "clusters_to_add = %u\n",
++	     "clusters_to_add = %u, extents_to_split = %u\n",
+ 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, i_size_read(inode),
+-	     le32_to_cpu(di->i_clusters), clusters_to_add);
++	     le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
+ 
+ 	num_free_extents = ocfs2_num_free_extents(osb, inode, di);
+ 	if (num_free_extents < 0) {
+@@ -552,9 +566,12 @@
+ 	 *
+ 	 * Most of the time we'll only be seeing this 1 cluster at a time
+ 	 * anyway.
++	 *
++	 * Always lock for any unwritten extents - we might want to
++	 * remove blocks for a merge.
+ 	 */
+ 	if (!num_free_extents ||
+-	    (ocfs2_sparse_alloc(osb) && num_free_extents < clusters_to_add)) {
++	    (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
+ 		ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
+ 		if (ret < 0) {
+ 			if (ret != -ENOSPC)
+@@ -585,14 +602,13 @@
+ 	return ret;
+ }
+ 
+-static int ocfs2_extend_allocation(struct inode *inode,
+-				   u32 clusters_to_add)
++static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
++				     u32 clusters_to_add, int mark_unwritten)
+ {
+ 	int status = 0;
+ 	int restart_func = 0;
+-	int drop_alloc_sem = 0;
+ 	int credits;
+-	u32 prev_clusters, logical_start;
++	u32 prev_clusters;
+ 	struct buffer_head *bh = NULL;
+ 	struct ocfs2_dinode *fe = NULL;
+ 	handle_t *handle = NULL;
+@@ -607,7 +623,7 @@
+ 	 * This function only exists for file systems which don't
+ 	 * support holes.
+ 	 */
+-	BUG_ON(ocfs2_sparse_alloc(osb));
++	BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
+ 
+ 	status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh,
+ 				  OCFS2_BH_CACHED, inode);
+@@ -623,19 +639,10 @@
+ 		goto leave;
+ 	}
+ 
+-	logical_start = OCFS2_I(inode)->ip_clusters;
+-
+ restart_all:
+ 	BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
+ 
+-	/* blocks peope in read/write from reading our allocation
+-	 * until we're done changing it. We depend on i_mutex to block
+-	 * other extend/truncate calls while we're here. Ordering wrt
+-	 * start_trans is important here -- always do it before! */
+-	down_write(&OCFS2_I(inode)->ip_alloc_sem);
+-	drop_alloc_sem = 1;
+-
+-	status = ocfs2_lock_allocators(inode, fe, clusters_to_add, &data_ac,
++	status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac,
+ 				       &meta_ac);
+ 	if (status) {
+ 		mlog_errno(status);
+@@ -668,6 +675,7 @@
+ 					    inode,
+ 					    &logical_start,
+ 					    clusters_to_add,
++					    mark_unwritten,
+ 					    bh,
+ 					    handle,
+ 					    data_ac,
+@@ -720,10 +728,6 @@
+ 	     OCFS2_I(inode)->ip_clusters, i_size_read(inode));
+ 
+ leave:
+-	if (drop_alloc_sem) {
+-		up_write(&OCFS2_I(inode)->ip_alloc_sem);
+-		drop_alloc_sem = 0;
+-	}
+ 	if (handle) {
+ 		ocfs2_commit_trans(osb, handle);
+ 		handle = NULL;
+@@ -749,6 +753,25 @@
+ 	return status;
+ }
+ 
++static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
++				   u32 clusters_to_add, int mark_unwritten)
++{
++	int ret;
++
++	/*
++	 * The alloc sem blocks peope in read/write from reading our
++	 * allocation until we're done changing it. We depend on
++	 * i_mutex to block other extend/truncate calls while we're
++	 * here.
++	 */
++	down_write(&OCFS2_I(inode)->ip_alloc_sem);
++	ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
++					mark_unwritten);
++	up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++	return ret;
++}
++
+ /* Some parts of this taken from generic_cont_expand, which turned out
+  * to be too fragile to do exactly what we need without us having to
+  * worry about recursive locking in ->prepare_write() and
+@@ -890,7 +913,9 @@
+ 	}
+ 
+ 	if (clusters_to_add) {
+-		ret = ocfs2_extend_allocation(inode, clusters_to_add);
++		ret = ocfs2_extend_allocation(inode,
++					      OCFS2_I(inode)->ip_clusters,
++					      clusters_to_add, 0);
+ 		if (ret < 0) {
+ 			mlog_errno(ret);
+ 			goto out_unlock;
+@@ -997,6 +1022,13 @@
+ 		goto bail_unlock;
+ 	}
+ 
++	/*
++	 * This will intentionally not wind up calling vmtruncate(),
++	 * since all the work for a size change has been done above.
++	 * Otherwise, we could get into problems with truncate as
++	 * ip_alloc_sem is used there to protect against i_size
++	 * changes.
++	 */
+ 	status = inode_setattr(inode, attr);
+ 	if (status < 0) {
+ 		mlog_errno(status);
+@@ -1072,17 +1104,16 @@
+ 	return ret;
+ }
+ 
+-static int ocfs2_write_remove_suid(struct inode *inode)
++static int __ocfs2_write_remove_suid(struct inode *inode,
++				     struct buffer_head *bh)
+ {
+ 	int ret;
+-	struct buffer_head *bh = NULL;
+-	struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ 	handle_t *handle;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 	struct ocfs2_dinode *di;
+ 
+ 	mlog_entry("(Inode %llu, mode 0%o)\n",
+-		   (unsigned long long)oi->ip_blkno, inode->i_mode);
++		   (unsigned long long)OCFS2_I(inode)->ip_blkno, inode->i_mode);
+ 
+ 	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ 	if (handle == NULL) {
+@@ -1091,17 +1122,11 @@
+ 		goto out;
+ 	}
+ 
+-	ret = ocfs2_read_block(osb, oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
+-	if (ret < 0) {
+-		mlog_errno(ret);
+-		goto out_trans;
+-	}
+-
+ 	ret = ocfs2_journal_access(handle, inode, bh,
+ 				   OCFS2_JOURNAL_ACCESS_WRITE);
+ 	if (ret < 0) {
+ 		mlog_errno(ret);
+-		goto out_bh;
++		goto out_trans;
+ 	}
+ 
+ 	inode->i_mode &= ~S_ISUID;
+@@ -1114,8 +1139,7 @@
+ 	ret = ocfs2_journal_dirty(handle, bh);
+ 	if (ret < 0)
+ 		mlog_errno(ret);
+-out_bh:
+-	brelse(bh);
++
+ out_trans:
+ 	ocfs2_commit_trans(osb, handle);
+ out:
+@@ -1161,6 +1185,211 @@
+ 	return ret;
+ }
+ 
++static int ocfs2_write_remove_suid(struct inode *inode)
++{
++	int ret;
++	struct buffer_head *bh = NULL;
++	struct ocfs2_inode_info *oi = OCFS2_I(inode);
++
++	ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
++			       oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
++	if (ret < 0) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	ret =  __ocfs2_write_remove_suid(inode, bh);
++out:
++	brelse(bh);
++	return ret;
++}
++
++/*
++ * Allocate enough extents to cover the region starting at byte offset
++ * start for len bytes. Existing extents are skipped, any extents
++ * added are marked as "unwritten".
++ */
++static int ocfs2_allocate_unwritten_extents(struct inode *inode,
++					    u64 start, u64 len)
++{
++	int ret;
++	u32 cpos, phys_cpos, clusters, alloc_size;
++
++	/*
++	 * We consider both start and len to be inclusive.
++	 */
++	cpos = start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
++	clusters = ocfs2_clusters_for_bytes(inode->i_sb, start + len);
++	clusters -= cpos;
++
++	while (clusters) {
++		ret = ocfs2_get_clusters(inode, cpos, &phys_cpos,
++					 &alloc_size, NULL);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
++
++		/*
++		 * Hole or existing extent len can be arbitrary, so
++		 * cap it to our own allocation request.
++		 */
++		if (alloc_size > clusters)
++			alloc_size = clusters;
++
++		if (phys_cpos) {
++			/*
++			 * We already have an allocation at this
++			 * region so we can safely skip it.
++			 */
++			goto next;
++		}
++
++		ret = __ocfs2_extend_allocation(inode, cpos, alloc_size, 1);
++		if (ret) {
++			if (ret != -ENOSPC)
++				mlog_errno(ret);
++			goto out;
++		}
++
++next:
++		cpos += alloc_size;
++		clusters -= alloc_size;
++	}
++
++	ret = 0;
++out:
++	return ret;
++}
++
++/*
++ * Parts of this function taken from xfs_change_file_space()
++ */
++int ocfs2_change_file_space(struct file *file, unsigned int cmd,
++			    struct ocfs2_space_resv *sr)
++{
++	int ret;
++	s64 llen;
++	struct inode *inode = file->f_path.dentry->d_inode;
++	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
++	struct buffer_head *di_bh = NULL;
++	handle_t *handle;
++	unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits);
++
++	if (!ocfs2_writes_unwritten_extents(osb))
++		return -ENOTTY;
++
++	if (!S_ISREG(inode->i_mode))
++		return -EINVAL;
++
++	if (!(file->f_mode & FMODE_WRITE))
++		return -EBADF;
++
++	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
++		return -EROFS;
++
++	mutex_lock(&inode->i_mutex);
++
++	/*
++	 * This prevents concurrent writes on other nodes
++	 */
++	ret = ocfs2_rw_lock(inode, 1);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	ret = ocfs2_meta_lock(inode, &di_bh, 1);
++	if (ret) {
++		mlog_errno(ret);
++		goto out_rw_unlock;
++	}
++
++	if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) {
++		ret = -EPERM;
++		goto out_meta_unlock;
++	}
++
++	switch (sr->l_whence) {
++	case 0: /*SEEK_SET*/
++		break;
++	case 1: /*SEEK_CUR*/
++		sr->l_start += file->f_pos;
++		break;
++	case 2: /*SEEK_END*/
++		sr->l_start += i_size_read(inode);
++		break;
++	default:
++		ret = -EINVAL;
++		goto out_meta_unlock;
++	}
++	sr->l_whence = 0;
++
++	llen = sr->l_len > 0 ? sr->l_len - 1 : sr->l_len;
++
++	if (sr->l_start < 0
++	    || sr->l_start > max_off
++	    || (sr->l_start + llen) < 0
++	    || (sr->l_start + llen) > max_off) {
++		ret = -EINVAL;
++		goto out_meta_unlock;
++	}
++
++	if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
++		if (sr->l_len <= 0) {
++			ret = -EINVAL;
++			goto out_meta_unlock;
++		}
++	}
++
++	if (should_remove_suid(file->f_path.dentry)) {
++		ret = __ocfs2_write_remove_suid(inode, di_bh);
++		if (ret) {
++			mlog_errno(ret);
++			goto out_meta_unlock;
++		}
++	}
++
++	down_write(&OCFS2_I(inode)->ip_alloc_sem);
++	/*
++	 * This takes unsigned offsets, but the signed ones we pass
++	 * have been checked against overflow above.
++	 */
++	ret = ocfs2_allocate_unwritten_extents(inode, sr->l_start, sr->l_len);
++	up_write(&OCFS2_I(inode)->ip_alloc_sem);
++	if (ret) {
++		mlog_errno(ret);
++		goto out_meta_unlock;
++	}
++
++	/*
++	 * We update c/mtime for these changes
++	 */
++	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
++	if (IS_ERR(handle)) {
++		ret = PTR_ERR(handle);
++		mlog_errno(ret);
++		goto out_meta_unlock;
++	}
++
++	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
++	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
++	if (ret < 0)
++		mlog_errno(ret);
++
++	ocfs2_commit_trans(osb, handle);
++
++out_meta_unlock:
++	brelse(di_bh);
++	ocfs2_meta_unlock(inode, 1);
++out_rw_unlock:
++	ocfs2_rw_unlock(inode, 1);
++
++	mutex_unlock(&inode->i_mutex);
++out:
++	return ret;
++}
++
+ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
+ 					 loff_t *ppos,
+ 					 size_t count,
+@@ -1331,15 +1560,16 @@
+ 	*basep = base;
+ }
+ 
+-static struct page * ocfs2_get_write_source(struct ocfs2_buffered_write_priv *bp,
++static struct page * ocfs2_get_write_source(char **ret_src_buf,
+ 					    const struct iovec *cur_iov,
+ 					    size_t iov_offset)
+ {
+ 	int ret;
+-	char *buf;
++	char *buf = cur_iov->iov_base + iov_offset;
+ 	struct page *src_page = NULL;
++	unsigned long off;
+ 
+-	buf = cur_iov->iov_base + iov_offset;
++	off = (unsigned long)(buf) & ~PAGE_CACHE_MASK;
+ 
+ 	if (!segment_eq(get_fs(), KERNEL_DS)) {
+ 		/*
+@@ -1378,10 +1608,12 @@
+ {
+ 	int ret = 0;
+ 	ssize_t copied, total = 0;
+-	size_t iov_offset = 0;
++	size_t iov_offset = 0, bytes;
++	loff_t pos;
+ 	const struct iovec *cur_iov = iov;
+-	struct ocfs2_buffered_write_priv bp;
+-	struct page *page;
++	struct page *user_page, *page;
++	char *buf, *dst;
++	void *fsdata;
+ 
+ 	/*
+ 	 * handle partial DIO write.  Adjust cur_iov if needed.
+@@ -1389,21 +1621,38 @@
+ 	ocfs2_set_next_iovec(&cur_iov, &iov_offset, o_direct_written);
+ 
+ 	do {
+-		bp.b_cur_off = iov_offset;
+-		bp.b_cur_iov = cur_iov;
++		pos = *ppos;
+ 
+-		page = ocfs2_get_write_source(&bp, cur_iov, iov_offset);
+-		if (IS_ERR(page)) {
+-			ret = PTR_ERR(page);
++		user_page = ocfs2_get_write_source(&buf, cur_iov, iov_offset);
++		if (IS_ERR(user_page)) {
++			ret = PTR_ERR(user_page);
+ 			goto out;
+ 		}
+ 
+-		copied = ocfs2_buffered_write_cluster(file, *ppos, count,
+-						      ocfs2_map_and_write_user_data,
+-						      &bp);
++		/* Stay within our page boundaries */
++		bytes = min((PAGE_CACHE_SIZE - ((unsigned long)pos & ~PAGE_CACHE_MASK)),
++			    (PAGE_CACHE_SIZE - ((unsigned long)buf & ~PAGE_CACHE_MASK)));
++		/* Stay within the vector boundary */
++		bytes = min_t(size_t, bytes, cur_iov->iov_len - iov_offset);
++		/* Stay within count */
++		bytes = min(bytes, count);
++
++		page = NULL;
++		ret = ocfs2_write_begin(file, file->f_mapping, pos, bytes, 0,
++					&page, &fsdata);
++		if (ret) {
++			mlog_errno(ret);
++			goto out;
++		}
+ 
+-		ocfs2_put_write_source(&bp, page);
++		dst = kmap_atomic(page, KM_USER0);
++		memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes);
++		kunmap_atomic(dst, KM_USER0);
++		flush_dcache_page(page);
++		ocfs2_put_write_source(user_page);
+ 
++		copied = ocfs2_write_end(file, file->f_mapping, pos, bytes,
++					 bytes, page, fsdata);
+ 		if (copied < 0) {
+ 			mlog_errno(copied);
+ 			ret = copied;
+@@ -1411,7 +1660,7 @@
+ 		}
+ 
+ 		total += copied;
+-		*ppos = *ppos + copied;
++		*ppos = pos + copied;
+ 		count -= copied;
+ 
+ 		ocfs2_set_next_iovec(&cur_iov, &iov_offset, copied);
+@@ -1581,52 +1830,46 @@
+ 				    struct pipe_buffer *buf,
+ 				    struct splice_desc *sd)
+ {
+-	int ret, count, total = 0;
++	int ret, count;
+ 	ssize_t copied = 0;
+-	struct ocfs2_splice_write_priv sp;
++	struct file *file = sd->file;
++	unsigned int offset;
++	struct page *page = NULL;
++	void *fsdata;
++	char *src, *dst;
+ 
+ 	ret = buf->ops->pin(pipe, buf);
+ 	if (ret)
+ 		goto out;
+ 
+-	sp.s_sd = sd;
+-	sp.s_buf = buf;
+-	sp.s_pipe = pipe;
+-	sp.s_offset = sd->pos & ~PAGE_CACHE_MASK;
+-	sp.s_buf_offset = buf->offset;
+-
++	offset = sd->pos & ~PAGE_CACHE_MASK;
+ 	count = sd->len;
+-	if (count + sp.s_offset > PAGE_CACHE_SIZE)
+-		count = PAGE_CACHE_SIZE - sp.s_offset;
++	if (count + offset > PAGE_CACHE_SIZE)
++		count = PAGE_CACHE_SIZE - offset;
+ 
+-	do {
+-		/*
+-		 * splice wants us to copy up to one page at a
+-		 * time. For pagesize > cluster size, this means we
+-		 * might enter ocfs2_buffered_write_cluster() more
+-		 * than once, so keep track of our progress here.
+-		 */
+-		copied = ocfs2_buffered_write_cluster(sd->file,
+-						      (loff_t)sd->pos + total,
+-						      count,
+-						      ocfs2_map_and_write_splice_data,
+-						      &sp);
++	ret = ocfs2_write_begin(file, file->f_mapping, sd->pos, count, 0,
++				&page, &fsdata);
++	if (ret) {
++		mlog_errno(ret);
++		goto out;
++	}
++
++	src = buf->ops->map(pipe, buf, 1);
++	dst = kmap_atomic(page, KM_USER1);
++	memcpy(dst + offset, src + buf->offset, count);
++	kunmap_atomic(page, KM_USER1);
++	buf->ops->unmap(pipe, buf, src);
++
++	copied = ocfs2_write_end(file, file->f_mapping, sd->pos, count, count,
++				 page, fsdata);
+ 		if (copied < 0) {
+ 			mlog_errno(copied);
+ 			ret = copied;
+ 			goto out;
+ 		}
+-
+-		count -= copied;
+-		sp.s_offset += copied;
+-		sp.s_buf_offset += copied;
+-		total += copied;
+-	} while (count);
+-
+-	ret = 0;
+ out:
+ 
+-	return total ? total : ret;
++	return copied ? copied : ret;
+ }
+ 
+ static ssize_t __ocfs2_file_splice_write(struct pipe_inode_info *pipe,
+diff -Nurb linux-2.6.22-570/fs/ocfs2/file.h linux-2.6.22-try2/fs/ocfs2/file.h
+--- linux-2.6.22-570/fs/ocfs2/file.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/file.h	2007-12-19 15:29:23.000000000 -0500
+@@ -39,15 +39,16 @@
+ };
+ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
+ 			       struct inode *inode,
+-			       u32 *cluster_start,
++			       u32 *logical_offset,
+ 			       u32 clusters_to_add,
++			       int mark_unwritten,
+ 			       struct buffer_head *fe_bh,
+ 			       handle_t *handle,
+ 			       struct ocfs2_alloc_context *data_ac,
+ 			       struct ocfs2_alloc_context *meta_ac,
+-			       enum ocfs2_alloc_restarted *reason);
++			       enum ocfs2_alloc_restarted *reason_ret);
+ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
+-			  u32 clusters_to_add,
++			  u32 clusters_to_add, u32 extents_to_split,
+ 			  struct ocfs2_alloc_context **data_ac,
+ 			  struct ocfs2_alloc_context **meta_ac);
+ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
+@@ -61,4 +62,7 @@
+ int ocfs2_update_inode_atime(struct inode *inode,
+ 			     struct buffer_head *bh);
+ 
++int ocfs2_change_file_space(struct file *file, unsigned int cmd,
++			    struct ocfs2_space_resv *sr);
++
+ #endif /* OCFS2_FILE_H */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/heartbeat.c linux-2.6.22-try2/fs/ocfs2/heartbeat.c
+--- linux-2.6.22-570/fs/ocfs2/heartbeat.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/heartbeat.c	2007-12-19 15:29:23.000000000 -0500
+@@ -157,16 +157,16 @@
+ 	if (ocfs2_mount_local(osb))
+ 		return 0;
+ 
+-	status = o2hb_register_callback(&osb->osb_hb_down);
++	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_down);
+ 	if (status < 0) {
+ 		mlog_errno(status);
+ 		goto bail;
+ 	}
+ 
+-	status = o2hb_register_callback(&osb->osb_hb_up);
++	status = o2hb_register_callback(osb->uuid_str, &osb->osb_hb_up);
+ 	if (status < 0) {
+ 		mlog_errno(status);
+-		o2hb_unregister_callback(&osb->osb_hb_down);
++		o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
+ 	}
+ 
+ bail:
+@@ -178,8 +178,8 @@
+ 	if (ocfs2_mount_local(osb))
+ 		return;
+ 
+-	o2hb_unregister_callback(&osb->osb_hb_down);
+-	o2hb_unregister_callback(&osb->osb_hb_up);
++	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_down);
++	o2hb_unregister_callback(osb->uuid_str, &osb->osb_hb_up);
+ }
+ 
+ void ocfs2_stop_heartbeat(struct ocfs2_super *osb)
+@@ -209,7 +209,7 @@
+ 	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+ 	envp[2] = NULL;
+ 
+-	ret = call_usermodehelper(argv[0], argv, envp, 1);
++	ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+ 	if (ret < 0)
+ 		mlog_errno(ret);
+ }
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ioctl.c linux-2.6.22-try2/fs/ocfs2/ioctl.c
+--- linux-2.6.22-570/fs/ocfs2/ioctl.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/ioctl.c	2007-12-19 15:29:23.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include "ocfs2.h"
+ #include "alloc.h"
+ #include "dlmglue.h"
++#include "file.h"
+ #include "inode.h"
+ #include "journal.h"
+ 
+@@ -115,6 +116,7 @@
+ {
+ 	unsigned int flags;
+ 	int status;
++	struct ocfs2_space_resv sr;
+ 
+ 	switch (cmd) {
+ 	case OCFS2_IOC_GETFLAGS:
+@@ -130,6 +132,12 @@
+ 
+ 		return ocfs2_set_inode_attr(inode, flags,
+ 			OCFS2_FL_MODIFIABLE);
++	case OCFS2_IOC_RESVSP:
++	case OCFS2_IOC_RESVSP64:
++		if (copy_from_user(&sr, (int __user *) arg, sizeof(sr)))
++			return -EFAULT;
++
++		return ocfs2_change_file_space(filp, cmd, &sr);
+ 	default:
+ 		return -ENOTTY;
+ 	}
+@@ -148,6 +156,9 @@
+ 	case OCFS2_IOC32_SETFLAGS:
+ 		cmd = OCFS2_IOC_SETFLAGS;
+ 		break;
++	case OCFS2_IOC_RESVSP:
++	case OCFS2_IOC_RESVSP64:
++		break;
+ 	default:
+ 		return -ENOIOCTLCMD;
+ 	}
+diff -Nurb linux-2.6.22-570/fs/ocfs2/journal.c linux-2.6.22-try2/fs/ocfs2/journal.c
+--- linux-2.6.22-570/fs/ocfs2/journal.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/journal.c	2007-12-19 15:29:23.000000000 -0500
+@@ -722,8 +722,7 @@
+ 		container_of(work, struct ocfs2_journal, j_recovery_work);
+ 	struct ocfs2_super *osb = journal->j_osb;
+ 	struct ocfs2_dinode *la_dinode, *tl_dinode;
+-	struct ocfs2_la_recovery_item *item;
+-	struct list_head *p, *n;
++	struct ocfs2_la_recovery_item *item, *n;
+ 	LIST_HEAD(tmp_la_list);
+ 
+ 	mlog_entry_void();
+@@ -734,8 +733,7 @@
+ 	list_splice_init(&journal->j_la_cleanups, &tmp_la_list);
+ 	spin_unlock(&journal->j_lock);
+ 
+-	list_for_each_safe(p, n, &tmp_la_list) {
+-		item = list_entry(p, struct ocfs2_la_recovery_item, lri_list);
++	list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) {
+ 		list_del_init(&item->lri_list);
+ 
+ 		mlog(0, "Complete recovery for slot %d\n", item->lri_slot);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/mmap.c linux-2.6.22-try2/fs/ocfs2/mmap.c
+--- linux-2.6.22-570/fs/ocfs2/mmap.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/mmap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -37,38 +37,48 @@
+ 
+ #include "ocfs2.h"
+ 
++#include "aops.h"
+ #include "dlmglue.h"
+ #include "file.h"
+ #include "inode.h"
+ #include "mmap.h"
+ 
+-static struct page *ocfs2_nopage(struct vm_area_struct * area,
+-				 unsigned long address,
+-				 int *type)
++static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
+ {
+-	struct page *page = NOPAGE_SIGBUS;
+-	sigset_t blocked, oldset;
+-	int ret;
+-
+-	mlog_entry("(area=%p, address=%lu, type=%p)\n", area, address,
+-		   type);
+-
+-	/* The best way to deal with signals in this path is
++	/* The best way to deal with signals in the vm path is
+ 	 * to block them upfront, rather than allowing the
+ 	 * locking paths to return -ERESTARTSYS. */
+-	sigfillset(&blocked);
++	sigfillset(blocked);
+ 
+-	/* We should technically never get a bad ret return
++	/* We should technically never get a bad return value
+ 	 * from sigprocmask */
+-	ret = sigprocmask(SIG_BLOCK, &blocked, &oldset);
++	return sigprocmask(SIG_BLOCK, blocked, oldset);
++}
++
++static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
++{
++	return sigprocmask(SIG_SETMASK, oldset, NULL);
++}
++
++static struct page *ocfs2_fault(struct vm_area_struct *area,
++						struct fault_data *fdata)
++{
++	struct page *page = NULL;
++	sigset_t blocked, oldset;
++	int ret;
++
++	mlog_entry("(area=%p, page offset=%lu)\n", area, fdata->pgoff);
++
++	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
+ 	if (ret < 0) {
++		fdata->type = VM_FAULT_SIGBUS;
+ 		mlog_errno(ret);
+ 		goto out;
+ 	}
+ 
+-	page = filemap_nopage(area, address, type);
++	page = filemap_fault(area, fdata);
+ 
+-	ret = sigprocmask(SIG_SETMASK, &oldset, NULL);
++	ret = ocfs2_vm_op_unblock_sigs(&oldset);
+ 	if (ret < 0)
+ 		mlog_errno(ret);
+ out:
+@@ -76,28 +86,136 @@
+ 	return page;
+ }
+ 
+-static struct vm_operations_struct ocfs2_file_vm_ops = {
+-	.nopage = ocfs2_nopage,
+-};
++static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh,
++				struct page *page)
++{
++	int ret;
++	struct address_space *mapping = inode->i_mapping;
++	loff_t pos = page->index << PAGE_CACHE_SHIFT;
++	unsigned int len = PAGE_CACHE_SIZE;
++	pgoff_t last_index;
++	struct page *locked_page = NULL;
++	void *fsdata;
++	loff_t size = i_size_read(inode);
+ 
+-int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
++	/*
++	 * Another node might have truncated while we were waiting on
++	 * cluster locks.
++	 */
++	last_index = size >> PAGE_CACHE_SHIFT;
++	if (page->index > last_index) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	/*
++	 * The i_size check above doesn't catch the case where nodes
++	 * truncated and then re-extended the file. We'll re-check the
++	 * page mapping after taking the page lock inside of
++	 * ocfs2_write_begin_nolock().
++	 */
++	if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	/*
++	 * Call ocfs2_write_begin() and ocfs2_write_end() to take
++	 * advantage of the allocation code there. We pass a write
++	 * length of the whole page (chopped to i_size) to make sure
++	 * the whole thing is allocated.
++	 *
++	 * Since we know the page is up to date, we don't have to
++	 * worry about ocfs2_write_begin() skipping some buffer reads
++	 * because the "write" would invalidate their data.
++	 */
++	if (page->index == last_index)
++		len = size & ~PAGE_CACHE_MASK;
++
++	ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page,
++				       &fsdata, di_bh, page);
++	if (ret) {
++		if (ret != -ENOSPC)
++			mlog_errno(ret);
++		goto out;
++	}
++
++	ret = ocfs2_write_end_nolock(mapping, pos, len, len, locked_page,
++				     fsdata);
++	if (ret < 0) {
++		mlog_errno(ret);
++		goto out;
++	}
++	BUG_ON(ret != len);
++	ret = 0;
++out:
++	return ret;
++}
++
++static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+ {
+-	int ret = 0, lock_level = 0;
+-	struct ocfs2_super *osb = OCFS2_SB(file->f_dentry->d_inode->i_sb);
++	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
++	struct buffer_head *di_bh = NULL;
++	sigset_t blocked, oldset;
++	int ret, ret2;
++
++	ret = ocfs2_vm_op_block_sigs(&blocked, &oldset);
++	if (ret < 0) {
++		mlog_errno(ret);
++		return ret;
++	}
+ 
+ 	/*
+-	 * Only support shared writeable mmap for local mounts which
+-	 * don't know about holes.
++	 * The cluster locks taken will block a truncate from another
++	 * node. Taking the data lock will also ensure that we don't
++	 * attempt page truncation as part of a downconvert.
+ 	 */
+-	if ((!ocfs2_mount_local(osb) || ocfs2_sparse_alloc(osb)) &&
+-	    ((vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_MAYSHARE)) &&
+-	    ((vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_MAYWRITE))) {
+-		mlog(0, "disallow shared writable mmaps %lx\n", vma->vm_flags);
+-		/* This is -EINVAL because generic_file_readonly_mmap
+-		 * returns it in a similar situation. */
+-		return -EINVAL;
++	ret = ocfs2_meta_lock(inode, &di_bh, 1);
++	if (ret < 0) {
++		mlog_errno(ret);
++		goto out;
+ 	}
+ 
++	/*
++	 * The alloc sem should be enough to serialize with
++	 * ocfs2_truncate_file() changing i_size as well as any thread
++	 * modifying the inode btree.
++	 */
++	down_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++	ret = ocfs2_data_lock(inode, 1);
++	if (ret < 0) {
++		mlog_errno(ret);
++		goto out_meta_unlock;
++	}
++
++	ret = __ocfs2_page_mkwrite(inode, di_bh, page);
++
++	ocfs2_data_unlock(inode, 1);
++
++out_meta_unlock:
++	up_write(&OCFS2_I(inode)->ip_alloc_sem);
++
++	brelse(di_bh);
++	ocfs2_meta_unlock(inode, 1);
++
++out:
++	ret2 = ocfs2_vm_op_unblock_sigs(&oldset);
++	if (ret2 < 0)
++		mlog_errno(ret2);
++
++	return ret;
++}
++
++static struct vm_operations_struct ocfs2_file_vm_ops = {
++	.nopage		= ocfs2_fault,
++	.page_mkwrite	= ocfs2_page_mkwrite,
++};
++
++int ocfs2_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int ret = 0, lock_level = 0;
++
+ 	ret = ocfs2_meta_lock_atime(file->f_dentry->d_inode,
+ 				    file->f_vfsmnt, &lock_level);
+ 	if (ret < 0) {
+@@ -107,6 +225,7 @@
+ 	ocfs2_meta_unlock(file->f_dentry->d_inode, lock_level);
+ out:
+ 	vma->vm_ops = &ocfs2_file_vm_ops;
++	vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/fs/ocfs2/namei.c linux-2.6.22-try2/fs/ocfs2/namei.c
+--- linux-2.6.22-570/fs/ocfs2/namei.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/namei.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1684,7 +1684,7 @@
+ 		u32 offset = 0;
+ 
+ 		inode->i_op = &ocfs2_symlink_inode_operations;
+-		status = ocfs2_do_extend_allocation(osb, inode, &offset, 1,
++		status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0,
+ 						    new_fe_bh,
+ 						    handle, data_ac, NULL,
+ 						    NULL);
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2.h linux-2.6.22-try2/fs/ocfs2/ocfs2.h
+--- linux-2.6.22-570/fs/ocfs2/ocfs2.h	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/ocfs2.h	2007-12-19 15:29:23.000000000 -0500
+@@ -220,6 +220,7 @@
+ 	u16 max_slots;
+ 	s16 node_num;
+ 	s16 slot_num;
++	s16 preferred_slot;
+ 	int s_sectsize_bits;
+ 	int s_clustersize;
+ 	int s_clustersize_bits;
+@@ -306,6 +307,19 @@
+ 	return 0;
+ }
+ 
++static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
++{
++	/*
++	 * Support for sparse files is a pre-requisite
++	 */
++	if (!ocfs2_sparse_alloc(osb))
++		return 0;
++
++	if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_UNWRITTEN)
++		return 1;
++	return 0;
++}
++
+ /* set / clear functions because cluster events can make these happen
+  * in parallel so we want the transitions to be atomic. this also
+  * means that any future flags osb_flags must be protected by spinlock
+diff -Nurb linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h linux-2.6.22-try2/fs/ocfs2/ocfs2_fs.h
+--- linux-2.6.22-570/fs/ocfs2/ocfs2_fs.h	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/ocfs2_fs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -88,7 +88,7 @@
+ #define OCFS2_FEATURE_COMPAT_SUPP	OCFS2_FEATURE_COMPAT_BACKUP_SB
+ #define OCFS2_FEATURE_INCOMPAT_SUPP	(OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
+ 					 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC)
+-#define OCFS2_FEATURE_RO_COMPAT_SUPP	0
++#define OCFS2_FEATURE_RO_COMPAT_SUPP	OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
+ 
+ /*
+  * Heartbeat-only devices are missing journals and other files.  The
+@@ -116,6 +116,11 @@
+  */
+ #define OCFS2_FEATURE_COMPAT_BACKUP_SB		0x0001
+ 
++/*
++ * Unwritten extents support.
++ */
++#define OCFS2_FEATURE_RO_COMPAT_UNWRITTEN	0x0001
++
+ /* The byte offset of the first backup block will be 1G.
+  * The following will be 4G, 16G, 64G, 256G and 1T.
+  */
+@@ -174,6 +179,32 @@
+ #define OCFS2_IOC32_SETFLAGS	_IOW('f', 2, int)
+ 
+ /*
++ * Space reservation / allocation / free ioctls and argument structure
++ * are designed to be compatible with XFS.
++ *
++ * ALLOCSP* and FREESP* are not and will never be supported, but are
++ * included here for completeness.
++ */
++struct ocfs2_space_resv {
++	__s16		l_type;
++	__s16		l_whence;
++	__s64		l_start;
++	__s64		l_len;		/* len == 0 means until end of file */
++	__s32		l_sysid;
++	__u32		l_pid;
++	__s32		l_pad[4];	/* reserve area			    */
++};
++
++#define OCFS2_IOC_ALLOCSP		_IOW ('X', 10, struct ocfs2_space_resv)
++#define OCFS2_IOC_FREESP		_IOW ('X', 11, struct ocfs2_space_resv)
++#define OCFS2_IOC_RESVSP		_IOW ('X', 40, struct ocfs2_space_resv)
++#define OCFS2_IOC_UNRESVSP	_IOW ('X', 41, struct ocfs2_space_resv)
++#define OCFS2_IOC_ALLOCSP64	_IOW ('X', 36, struct ocfs2_space_resv)
++#define OCFS2_IOC_FREESP64	_IOW ('X', 37, struct ocfs2_space_resv)
++#define OCFS2_IOC_RESVSP64	_IOW ('X', 42, struct ocfs2_space_resv)
++#define OCFS2_IOC_UNRESVSP64	_IOW ('X', 43, struct ocfs2_space_resv)
++
++/*
+  * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
+  */
+ #define OCFS2_JOURNAL_DIRTY_FL	(0x00000001)	/* Journal needs recovery */
+diff -Nurb linux-2.6.22-570/fs/ocfs2/slot_map.c linux-2.6.22-try2/fs/ocfs2/slot_map.c
+--- linux-2.6.22-570/fs/ocfs2/slot_map.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/slot_map.c	2007-12-19 15:29:23.000000000 -0500
+@@ -121,17 +121,25 @@
+ 	return ret;
+ }
+ 
+-static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si)
++static s16 __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, s16 preferred)
+ {
+ 	int i;
+ 	s16 ret = OCFS2_INVALID_SLOT;
+ 
++	if (preferred >= 0 && preferred < si->si_num_slots) {
++		if (OCFS2_INVALID_SLOT == si->si_global_node_nums[preferred]) {
++			ret = preferred;
++			goto out;
++		}
++	}
++
+ 	for(i = 0; i < si->si_num_slots; i++) {
+ 		if (OCFS2_INVALID_SLOT == si->si_global_node_nums[i]) {
+ 			ret = (s16) i;
+ 			break;
+ 		}
+ 	}
++out:
+ 	return ret;
+ }
+ 
+@@ -248,7 +256,7 @@
+ 	if (slot == OCFS2_INVALID_SLOT) {
+ 		/* if no slot yet, then just take 1st available
+ 		 * one. */
+-		slot = __ocfs2_find_empty_slot(si);
++		slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+ 		if (slot == OCFS2_INVALID_SLOT) {
+ 			spin_unlock(&si->si_lock);
+ 			mlog(ML_ERROR, "no free slots available!\n");
+diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.c linux-2.6.22-try2/fs/ocfs2/suballoc.c
+--- linux-2.6.22-570/fs/ocfs2/suballoc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/suballoc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -98,14 +98,6 @@
+ 				    u16 chain);
+ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
+ 						     u32 wanted);
+-static int ocfs2_free_suballoc_bits(handle_t *handle,
+-				    struct inode *alloc_inode,
+-				    struct buffer_head *alloc_bh,
+-				    unsigned int start_bit,
+-				    u64 bg_blkno,
+-				    unsigned int count);
+-static inline u64 ocfs2_which_suballoc_group(u64 block,
+-					     unsigned int bit);
+ static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
+ 						   u64 bg_blkno,
+ 						   u16 bg_bit_off);
+@@ -496,13 +488,7 @@
+ 
+ 	(*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
+ 	(*ac)->ac_which = OCFS2_AC_USE_META;
+-
+-#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
+-	slot = 0;
+-#else
+ 	slot = osb->slot_num;
+-#endif
+-
+ 	(*ac)->ac_group_search = ocfs2_block_group_search;
+ 
+ 	status = ocfs2_reserve_suballoc_bits(osb, (*ac),
+@@ -1626,7 +1612,7 @@
+ /*
+  * expects the suballoc inode to already be locked.
+  */
+-static int ocfs2_free_suballoc_bits(handle_t *handle,
++int ocfs2_free_suballoc_bits(handle_t *handle,
+ 				    struct inode *alloc_inode,
+ 				    struct buffer_head *alloc_bh,
+ 				    unsigned int start_bit,
+@@ -1703,13 +1689,6 @@
+ 	return status;
+ }
+ 
+-static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
+-{
+-	u64 group = block - (u64) bit;
+-
+-	return group;
+-}
+-
+ int ocfs2_free_dinode(handle_t *handle,
+ 		      struct inode *inode_alloc_inode,
+ 		      struct buffer_head *inode_alloc_bh,
+@@ -1723,19 +1702,6 @@
+ 					inode_alloc_bh, bit, bg_blkno, 1);
+ }
+ 
+-int ocfs2_free_extent_block(handle_t *handle,
+-			    struct inode *eb_alloc_inode,
+-			    struct buffer_head *eb_alloc_bh,
+-			    struct ocfs2_extent_block *eb)
+-{
+-	u64 blk = le64_to_cpu(eb->h_blkno);
+-	u16 bit = le16_to_cpu(eb->h_suballoc_bit);
+-	u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
+-
+-	return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
+-					bit, bg_blkno, 1);
+-}
+-
+ int ocfs2_free_clusters(handle_t *handle,
+ 		       struct inode *bitmap_inode,
+ 		       struct buffer_head *bitmap_bh,
+diff -Nurb linux-2.6.22-570/fs/ocfs2/suballoc.h linux-2.6.22-try2/fs/ocfs2/suballoc.h
+--- linux-2.6.22-570/fs/ocfs2/suballoc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/suballoc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -86,20 +86,29 @@
+ 			 u32 *cluster_start,
+ 			 u32 *num_clusters);
+ 
++int ocfs2_free_suballoc_bits(handle_t *handle,
++			     struct inode *alloc_inode,
++			     struct buffer_head *alloc_bh,
++			     unsigned int start_bit,
++			     u64 bg_blkno,
++			     unsigned int count);
+ int ocfs2_free_dinode(handle_t *handle,
+ 		      struct inode *inode_alloc_inode,
+ 		      struct buffer_head *inode_alloc_bh,
+ 		      struct ocfs2_dinode *di);
+-int ocfs2_free_extent_block(handle_t *handle,
+-			    struct inode *eb_alloc_inode,
+-			    struct buffer_head *eb_alloc_bh,
+-			    struct ocfs2_extent_block *eb);
+ int ocfs2_free_clusters(handle_t *handle,
+ 			struct inode *bitmap_inode,
+ 			struct buffer_head *bitmap_bh,
+ 			u64 start_blk,
+ 			unsigned int num_clusters);
+ 
++static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
++{
++	u64 group = block - (u64) bit;
++
++	return group;
++}
++
+ static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
+ 					  u64 bg_blkno)
+ {
+diff -Nurb linux-2.6.22-570/fs/ocfs2/super.c linux-2.6.22-try2/fs/ocfs2/super.c
+--- linux-2.6.22-570/fs/ocfs2/super.c	2007-12-12 18:08:43.000000000 -0500
++++ linux-2.6.22-try2/fs/ocfs2/super.c	2007-12-19 15:29:23.000000000 -0500
+@@ -82,7 +82,8 @@
+ MODULE_LICENSE("GPL");
+ 
+ static int ocfs2_parse_options(struct super_block *sb, char *options,
+-			       unsigned long *mount_opt, int is_remount);
++			       unsigned long *mount_opt, s16 *slot,
++			       int is_remount);
+ static void ocfs2_put_super(struct super_block *sb);
+ static int ocfs2_mount_volume(struct super_block *sb);
+ static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
+@@ -114,8 +115,6 @@
+ static struct inode *ocfs2_alloc_inode(struct super_block *sb);
+ static void ocfs2_destroy_inode(struct inode *inode);
+ 
+-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
+-
+ static const struct super_operations ocfs2_sops = {
+ 	.statfs		= ocfs2_statfs,
+ 	.alloc_inode	= ocfs2_alloc_inode,
+@@ -323,7 +322,7 @@
+ /* From xfs_super.c:xfs_max_file_offset
+  * Copyright (c) 2000-2004 Silicon Graphics, Inc.
+  */
+-static unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
++unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
+ {
+ 	unsigned int pagefactor = 1;
+ 	unsigned int bitshift = BITS_PER_LONG - 1;
+@@ -360,9 +359,10 @@
+ 	int incompat_features;
+ 	int ret = 0;
+ 	unsigned long parsed_options;
++	s16 slot;
+ 	struct ocfs2_super *osb = OCFS2_SB(sb);
+ 
+-	if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
++	if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) {
+ 		ret = -EINVAL;
+ 		goto out;
+ 	}
+@@ -546,6 +546,7 @@
+ 	struct dentry *root;
+ 	int status, sector_size;
+ 	unsigned long parsed_opt;
++	s16 slot;
+ 	struct inode *inode = NULL;
+ 	struct ocfs2_super *osb = NULL;
+ 	struct buffer_head *bh = NULL;
+@@ -553,7 +554,7 @@
+ 
+ 	mlog_entry("%p, %p, %i", sb, data, silent);
+ 
+-	if (!ocfs2_parse_options(sb, data, &parsed_opt, 0)) {
++	if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) {
+ 		status = -EINVAL;
+ 		goto read_super_error;
+ 	}
+@@ -583,6 +584,7 @@
+ 	brelse(bh);
+ 	bh = NULL;
+ 	osb->s_mount_opt = parsed_opt;
++	osb->preferred_slot = slot;
+ 
+ 	sb->s_magic = OCFS2_SUPER_MAGIC;
+ 
+@@ -728,6 +730,7 @@
+ static int ocfs2_parse_options(struct super_block *sb,
+ 			       char *options,
+ 			       unsigned long *mount_opt,
++			       s16 *slot,
+ 			       int is_remount)
+ {
+ 	int status;
+@@ -737,6 +740,7 @@
+ 		   options ? options : "(none)");
+ 
+ 	*mount_opt = 0;
++	*slot = OCFS2_INVALID_SLOT;
+ 
+ 	if (!options) {
+ 		status = 1;
+diff -Nurb linux-2.6.22-570/fs/ocfs2/super.h linux-2.6.22-try2/fs/ocfs2/super.h
+--- linux-2.6.22-570/fs/ocfs2/super.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ocfs2/super.h	2007-12-19 15:29:23.000000000 -0500
+@@ -45,4 +45,6 @@
+ 
+ #define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
+ 
++unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
++
+ #endif /* OCFS2_SUPER_H */
+diff -Nurb linux-2.6.22-570/fs/open.c linux-2.6.22-try2/fs/open.c
+--- linux-2.6.22-570/fs/open.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/open.c	2007-12-19 15:29:24.000000000 -0500
+@@ -362,6 +362,92 @@
+ #endif
+ 
+ /*
++ * sys_fallocate - preallocate blocks or free preallocated blocks
++ * @fd: the file descriptor
++ * @mode: mode specifies if fallocate should preallocate blocks OR free
++ *	  (unallocate) preallocated blocks. Currently only FA_ALLOCATE and
++ *	  FA_DEALLOCATE modes are supported.
++ * @offset: The offset within file, from where (un)allocation is being
++ *	    requested. It should not have a negative value.
++ * @len: The amount (in bytes) of space to be (un)allocated, from the offset.
++ *
++ * This system call, depending on the mode, preallocates or unallocates blocks
++ * for a file. The range of blocks depends on the value of offset and len
++ * arguments provided by the user/application. For FA_ALLOCATE mode, if this
++ * system call succeeds, subsequent writes to the file in the given range
++ * (specified by offset & len) should not fail - even if the file system
++ * later becomes full. Hence the preallocation done is persistent (valid
++ * even after reopen of the file and remount/reboot).
++ *
++ * It is expected that the ->fallocate() inode operation implemented by the
++ * individual file systems will update the file size and/or ctime/mtime
++ * depending on the mode and also on the success of the operation.
++ *
++ * Note: Incase the file system does not support preallocation,
++ * posix_fallocate() should fall back to the library implementation (i.e.
++ * allocating zero-filled new blocks to the file).
++ *
++ * Return Values
++ *	0	: On SUCCESS a value of zero is returned.
++ *	error	: On Failure, an error code will be returned.
++ * An error code of -ENOSYS or -EOPNOTSUPP should make posix_fallocate()
++ * fall back on library implementation of fallocate.
++ *
++ * <TBD> Generic fallocate to be added for file systems that do not
++ *	 support fallocate it.
++ */
++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len)
++{
++	struct file *file;
++	struct inode *inode;
++	long ret = -EINVAL;
++
++	if (offset < 0 || len <= 0)
++		goto out;
++
++	/* Return error if mode is not supported */
++	ret = -EOPNOTSUPP;
++	if (mode != FA_ALLOCATE && mode !=FA_DEALLOCATE)
++		goto out;
++
++	ret = -EBADF;
++	file = fget(fd);
++	if (!file)
++		goto out;
++	if (!(file->f_mode & FMODE_WRITE))
++		goto out_fput;
++
++	inode = file->f_path.dentry->d_inode;
++
++	ret = -ESPIPE;
++	if (S_ISFIFO(inode->i_mode))
++		goto out_fput;
++
++	ret = -ENODEV;
++	/*
++	 * Let individual file system decide if it supports preallocation
++	 * for directories or not.
++	 */
++	if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
++		goto out_fput;
++
++	ret = -EFBIG;
++	/* Check for wrap through zero too */
++	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
++		goto out_fput;
++
++	if (inode->i_op && inode->i_op->fallocate)
++		ret = inode->i_op->fallocate(inode, mode, offset, len);
++	else
++		ret = -ENOSYS;
++
++out_fput:
++	fput(file);
++out:
++	return ret;
++}
++
++/*
+  * access() needs to use the real uid/gid, not the effective uid/gid.
+  * We do this by temporarily clearing all FS-related capabilities and
+  * switching the fsuid/fsgid around to the real ones.
+diff -Nurb linux-2.6.22-570/fs/partitions/check.c linux-2.6.22-try2/fs/partitions/check.c
+--- linux-2.6.22-570/fs/partitions/check.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/partitions/check.c	2007-12-19 15:29:23.000000000 -0500
+@@ -397,7 +397,6 @@
+ 		static struct attribute addpartattr = {
+ 			.name = "whole_disk",
+ 			.mode = S_IRUSR | S_IRGRP | S_IROTH,
+-			.owner = THIS_MODULE,
+ 		};
+ 
+ 		sysfs_create_file(&p->kobj, &addpartattr);
+diff -Nurb linux-2.6.22-570/fs/proc/array.c linux-2.6.22-try2/fs/proc/array.c
+--- linux-2.6.22-570/fs/proc/array.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/proc/array.c	2007-12-19 15:29:24.000000000 -0500
+@@ -291,6 +291,15 @@
+ 	return buffer;
+ }
+ 
++static inline char *task_context_switch_counts(struct task_struct *p,
++						char *buffer)
++{
++	return buffer + sprintf(buffer, "voluntary_ctxt_switches:\t%lu\n"
++			    "nonvoluntary_ctxt_switches:\t%lu\n",
++			    p->nvcsw,
++			    p->nivcsw);
++}
++
+ static inline char *task_cap(struct task_struct *p, char *buffer)
+ {
+ 	struct vx_info *vxi = p->vx_info;
+@@ -328,6 +337,7 @@
+ #if defined(CONFIG_S390)
+ 	buffer = task_show_regs(task, buffer);
+ #endif
++	buffer = task_context_switch_counts(task, buffer);
+ 	return buffer - orig;
+ }
+ 
+@@ -426,8 +436,9 @@
+ 
+ 	/* Temporary variable needed for gcc-2.96 */
+ 	/* convert timespec -> nsec*/
+-	start_time = (unsigned long long)task->start_time.tv_sec * NSEC_PER_SEC
+-				+ task->start_time.tv_nsec;
++	start_time =
++		(unsigned long long)task->real_start_time.tv_sec * NSEC_PER_SEC
++				+ task->real_start_time.tv_nsec;
+ 	/* convert nsec -> ticks */
+ 	start_time = nsec_to_clock_t(start_time);
+ 
+diff -Nurb linux-2.6.22-570/fs/proc/base.c linux-2.6.22-try2/fs/proc/base.c
+--- linux-2.6.22-570/fs/proc/base.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/proc/base.c	2007-12-19 15:29:24.000000000 -0500
+@@ -67,7 +67,7 @@
+ #include <linux/mount.h>
+ #include <linux/security.h>
+ #include <linux/ptrace.h>
+-#include <linux/seccomp.h>
++#include <linux/container.h>
+ #include <linux/cpuset.h>
+ #include <linux/audit.h>
+ #include <linux/poll.h>
+@@ -490,7 +490,7 @@
+ 		count = PROC_BLOCK_SIZE;
+ 
+ 	length = -ENOMEM;
+-	if (!(page = __get_free_page(GFP_KERNEL)))
++	if (!(page = __get_free_page(GFP_TEMPORARY)))
+ 		goto out;
+ 
+ 	length = PROC_I(inode)->op.proc_read(task, (char*)page);
+@@ -530,7 +530,7 @@
+ 		goto out;
+ 
+ 	ret = -ENOMEM;
+-	page = (char *)__get_free_page(GFP_USER);
++	page = (char *)__get_free_page(GFP_TEMPORARY);
+ 	if (!page)
+ 		goto out;
+ 
+@@ -600,7 +600,7 @@
+ 		goto out;
+ 
+ 	copied = -ENOMEM;
+-	page = (char *)__get_free_page(GFP_USER);
++	page = (char *)__get_free_page(GFP_TEMPORARY);
+ 	if (!page)
+ 		goto out;
+ 
+@@ -633,7 +633,7 @@
+ }
+ #endif
+ 
+-static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
++loff_t mem_lseek(struct file * file, loff_t offset, int orig)
+ {
+ 	switch (orig) {
+ 	case 0:
+@@ -711,42 +711,6 @@
+ 	.write		= oom_adjust_write,
+ };
+ 
+-#ifdef CONFIG_MMU
+-static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+-				size_t count, loff_t *ppos)
+-{
+-	struct task_struct *task;
+-	char buffer[PROC_NUMBUF], *end;
+-	struct mm_struct *mm;
+-
+-	memset(buffer, 0, sizeof(buffer));
+-	if (count > sizeof(buffer) - 1)
+-		count = sizeof(buffer) - 1;
+-	if (copy_from_user(buffer, buf, count))
+-		return -EFAULT;
+-	if (!simple_strtol(buffer, &end, 0))
+-		return -EINVAL;
+-	if (*end == '\n')
+-		end++;
+-	task = get_proc_task(file->f_path.dentry->d_inode);
+-	if (!task)
+-		return -ESRCH;
+-	mm = get_task_mm(task);
+-	if (mm) {
+-		clear_refs_smap(mm);
+-		mmput(mm);
+-	}
+-	put_task_struct(task);
+-	if (end - buffer == 0)
+-		return -EIO;
+-	return end - buffer;
+-}
+-
+-static struct file_operations proc_clear_refs_operations = {
+-	.write		= clear_refs_write,
+-};
+-#endif
+-
+ #ifdef CONFIG_AUDITSYSCALL
+ #define TMPBUFLEN 21
+ static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
+@@ -786,7 +750,7 @@
+ 		/* No partial writes. */
+ 		return -EINVAL;
+ 	}
+-	page = (char*)__get_free_page(GFP_USER);
++	page = (char*)__get_free_page(GFP_TEMPORARY);
+ 	if (!page)
+ 		return -ENOMEM;
+ 	length = -EFAULT;
+@@ -815,71 +779,6 @@
+ };
+ #endif
+ 
+-#ifdef CONFIG_SECCOMP
+-static ssize_t seccomp_read(struct file *file, char __user *buf,
+-			    size_t count, loff_t *ppos)
+-{
+-	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
+-	char __buf[20];
+-	size_t len;
+-
+-	if (!tsk)
+-		return -ESRCH;
+-	/* no need to print the trailing zero, so use only len */
+-	len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
+-	put_task_struct(tsk);
+-
+-	return simple_read_from_buffer(buf, count, ppos, __buf, len);
+-}
+-
+-static ssize_t seccomp_write(struct file *file, const char __user *buf,
+-			     size_t count, loff_t *ppos)
+-{
+-	struct task_struct *tsk = get_proc_task(file->f_dentry->d_inode);
+-	char __buf[20], *end;
+-	unsigned int seccomp_mode;
+-	ssize_t result;
+-
+-	result = -ESRCH;
+-	if (!tsk)
+-		goto out_no_task;
+-
+-	/* can set it only once to be even more secure */
+-	result = -EPERM;
+-	if (unlikely(tsk->seccomp.mode))
+-		goto out;
+-
+-	result = -EFAULT;
+-	memset(__buf, 0, sizeof(__buf));
+-	count = min(count, sizeof(__buf) - 1);
+-	if (copy_from_user(__buf, buf, count))
+-		goto out;
+-
+-	seccomp_mode = simple_strtoul(__buf, &end, 0);
+-	if (*end == '\n')
+-		end++;
+-	result = -EINVAL;
+-	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
+-		tsk->seccomp.mode = seccomp_mode;
+-		set_tsk_thread_flag(tsk, TIF_SECCOMP);
+-	} else
+-		goto out;
+-	result = -EIO;
+-	if (unlikely(!(end - __buf)))
+-		goto out;
+-	result = end - __buf;
+-out:
+-	put_task_struct(tsk);
+-out_no_task:
+-	return result;
+-}
+-
+-static const struct file_operations proc_seccomp_operations = {
+-	.read		= seccomp_read,
+-	.write		= seccomp_write,
+-};
+-#endif /* CONFIG_SECCOMP */
+-
+ #ifdef CONFIG_FAULT_INJECTION
+ static ssize_t proc_fault_inject_read(struct file * file, char __user * buf,
+ 				      size_t count, loff_t *ppos)
+@@ -954,7 +853,8 @@
+ 			    char __user *buffer, int buflen)
+ {
+ 	struct inode * inode;
+-	char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
++	char *tmp = (char*)__get_free_page(GFP_TEMPORARY);
++	char *path;
+ 	int len;
+ 
+ 	if (!tmp)
+@@ -1015,7 +915,7 @@
+ 	task_lock(task);
+ 	mm = task->mm;
+ 	if (mm)
+-		dumpable = mm->dumpable;
++		dumpable = get_dumpable(mm);
+ 	task_unlock(task);
+ 	if(dumpable == 1)
+ 		return 1;
+@@ -1744,7 +1644,7 @@
+ 		goto out;
+ 
+ 	length = -ENOMEM;
+-	page = (char*)__get_free_page(GFP_USER);
++	page = (char*)__get_free_page(GFP_TEMPORARY);
+ 	if (!page)
+ 		goto out;
+ 
+@@ -1804,6 +1704,91 @@
+ 
+ #endif
+ 
++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
++static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf,
++					 size_t count, loff_t *ppos)
++{
++	struct task_struct *task = get_proc_task(file->f_dentry->d_inode);
++	struct mm_struct *mm;
++	char buffer[PROC_NUMBUF];
++	size_t len;
++	int ret;
++
++	if (!task)
++		return -ESRCH;
++
++	ret = 0;
++	mm = get_task_mm(task);
++	if (mm) {
++		len = snprintf(buffer, sizeof(buffer), "%08lx\n",
++			       ((mm->flags & MMF_DUMP_FILTER_MASK) >>
++				MMF_DUMP_FILTER_SHIFT));
++		mmput(mm);
++		ret = simple_read_from_buffer(buf, count, ppos, buffer, len);
++	}
++
++	put_task_struct(task);
++
++	return ret;
++}
++
++static ssize_t proc_coredump_filter_write(struct file *file,
++					  const char __user *buf,
++					  size_t count,
++					  loff_t *ppos)
++{
++	struct task_struct *task;
++	struct mm_struct *mm;
++	char buffer[PROC_NUMBUF], *end;
++	unsigned int val;
++	int ret;
++	int i;
++	unsigned long mask;
++
++	ret = -EFAULT;
++	memset(buffer, 0, sizeof(buffer));
++	if (count > sizeof(buffer) - 1)
++		count = sizeof(buffer) - 1;
++	if (copy_from_user(buffer, buf, count))
++		goto out_no_task;
++
++	ret = -EINVAL;
++	val = (unsigned int)simple_strtoul(buffer, &end, 0);
++	if (*end == '\n')
++		end++;
++	if (end - buffer == 0)
++		goto out_no_task;
++
++	ret = -ESRCH;
++	task = get_proc_task(file->f_dentry->d_inode);
++	if (!task)
++		goto out_no_task;
++
++	ret = end - buffer;
++	mm = get_task_mm(task);
++	if (!mm)
++		goto out_no_mm;
++
++	for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) {
++		if (val & mask)
++			set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
++		else
++			clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags);
++	}
++
++	mmput(mm);
++ out_no_mm:
++	put_task_struct(task);
++ out_no_task:
++	return ret;
++}
++
++static const struct file_operations proc_coredump_filter_operations = {
++	.read		= proc_coredump_filter_read,
++	.write		= proc_coredump_filter_write,
++};
++#endif
++
+ /*
+  * /proc/self:
+  */
+@@ -1995,18 +1980,22 @@
+ 	REG("numa_maps",  S_IRUGO, numa_maps),
+ #endif
+ 	REG("mem",        S_IRUSR|S_IWUSR, mem),
+-#ifdef CONFIG_SECCOMP
+-	REG("seccomp",    S_IRUSR|S_IWUSR, seccomp),
+-#endif
+ 	LNK("cwd",        cwd),
+ 	LNK("root",       root),
+ 	LNK("exe",        exe),
+ 	REG("mounts",     S_IRUGO, mounts),
+ 	REG("mountstats", S_IRUSR, mountstats),
+ #ifdef CONFIG_MMU
++#ifdef CONFIG_PROC_CLEAR_REFS
+ 	REG("clear_refs", S_IWUSR, clear_refs),
++#endif
++#ifdef CONFIG_PROC_SMAPS
+ 	REG("smaps",      S_IRUGO, smaps),
+ #endif
++#ifdef CONFIG_PROC_PAGEMAP
++	REG("pagemap",    S_IRUSR, pagemap),
++#endif
++#endif
+ #ifdef CONFIG_SECURITY
+ 	DIR("attr",       S_IRUGO|S_IXUGO, attr_dir),
+ #endif
+@@ -2016,7 +2005,7 @@
+ #ifdef CONFIG_SCHEDSTATS
+ 	INF("schedstat",  S_IRUGO, pid_schedstat),
+ #endif
+-#ifdef CONFIG_CPUSETS
++#ifdef CONFIG_PROC_PID_CPUSET
+ 	REG("cpuset",     S_IRUGO, cpuset),
+ #endif
+ 	INF("vinfo",      S_IRUGO, pid_vx_info),
+@@ -2029,6 +2018,9 @@
+ #ifdef CONFIG_FAULT_INJECTION
+ 	REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
+ #endif
++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
++	REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
++#endif
+ #ifdef CONFIG_TASK_IO_ACCOUNTING
+ 	INF("io",	S_IRUGO, pid_io_accounting),
+ #endif
+@@ -2285,17 +2277,21 @@
+ 	REG("numa_maps", S_IRUGO, numa_maps),
+ #endif
+ 	REG("mem",       S_IRUSR|S_IWUSR, mem),
+-#ifdef CONFIG_SECCOMP
+-	REG("seccomp",   S_IRUSR|S_IWUSR, seccomp),
+-#endif
+ 	LNK("cwd",       cwd),
+ 	LNK("root",      root),
+ 	LNK("exe",       exe),
+ 	REG("mounts",    S_IRUGO, mounts),
+ #ifdef CONFIG_MMU
++#ifdef CONFIG_PROC_CLEAR_REFS
+ 	REG("clear_refs", S_IWUSR, clear_refs),
++#endif
++#ifdef CONFIG_PROC_SMAPS
+ 	REG("smaps",     S_IRUGO, smaps),
+ #endif
++#ifdef CONFIG_PROC_PAGEMAP
++	REG("pagemap",    S_IRUSR, pagemap),
++#endif
++#endif
+ #ifdef CONFIG_SECURITY
+ 	DIR("attr",      S_IRUGO|S_IXUGO, attr_dir),
+ #endif
+@@ -2305,9 +2301,12 @@
+ #ifdef CONFIG_SCHEDSTATS
+ 	INF("schedstat", S_IRUGO, pid_schedstat),
+ #endif
+-#ifdef CONFIG_CPUSETS
++#ifdef CONFIG_PROC_PID_CPUSET
+ 	REG("cpuset",    S_IRUGO, cpuset),
+ #endif
++#ifdef CONFIG_CONTAINERS
++	REG("container",  S_IRUGO, container),
++#endif
+ 	INF("oom_score", S_IRUGO, oom_score),
+ 	REG("oom_adj",   S_IRUGO|S_IWUSR, oom_adjust),
+ #ifdef CONFIG_AUDITSYSCALL
+diff -Nurb linux-2.6.22-570/fs/proc/generic.c linux-2.6.22-try2/fs/proc/generic.c
+--- linux-2.6.22-570/fs/proc/generic.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/proc/generic.c	2007-12-19 15:29:24.000000000 -0500
+@@ -74,7 +74,7 @@
+ 		nbytes = MAX_NON_LFS - pos;
+ 
+ 	dp = PDE(inode);
+-	if (!(page = (char*) __get_free_page(GFP_KERNEL)))
++	if (!(page = (char*) __get_free_page(GFP_TEMPORARY)))
+ 		return -ENOMEM;
+ 
+ 	while ((nbytes > 0) && !eof) {
+diff -Nurb linux-2.6.22-570/fs/proc/internal.h linux-2.6.22-try2/fs/proc/internal.h
+--- linux-2.6.22-570/fs/proc/internal.h	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/proc/internal.h	2007-12-19 15:29:24.000000000 -0500
+@@ -46,15 +46,13 @@
+ extern int proc_tgid_stat(struct task_struct *, char *);
+ extern int proc_pid_status(struct task_struct *, char *);
+ extern int proc_pid_statm(struct task_struct *, char *);
++extern loff_t mem_lseek(struct file * file, loff_t offset, int orig);
+ 
+ extern const struct file_operations proc_maps_operations;
+ extern const struct file_operations proc_numa_maps_operations;
+ extern const struct file_operations proc_smaps_operations;
+-
+-extern const struct file_operations proc_maps_operations;
+-extern const struct file_operations proc_numa_maps_operations;
+-extern const struct file_operations proc_smaps_operations;
+-
++extern const struct file_operations proc_clear_refs_operations;
++extern const struct file_operations proc_pagemap_operations;
+ 
+ void free_proc_entry(struct proc_dir_entry *de);
+ 
+diff -Nurb linux-2.6.22-570/fs/proc/proc_misc.c linux-2.6.22-try2/fs/proc/proc_misc.c
+--- linux-2.6.22-570/fs/proc/proc_misc.c	2007-12-12 18:08:36.000000000 -0500
++++ linux-2.6.22-try2/fs/proc/proc_misc.c	2007-12-19 15:29:24.000000000 -0500
+@@ -463,12 +463,14 @@
+ 	unsigned long jif;
+ 	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
+ 	u64 sum = 0;
++	struct timespec boottime;
+ 
+ 	user = nice = system = idle = iowait =
+ 		irq = softirq = steal = cputime64_zero;
+-	jif = - wall_to_monotonic.tv_sec;
+-	if (wall_to_monotonic.tv_nsec)
+-		--jif;
++	getboottime(&boottime);
++	jif = boottime.tv_sec;
++	if (boottime.tv_nsec)
++		++jif;
+ 
+ 	for_each_possible_cpu(i) {
+ 		int j;
+diff -Nurb linux-2.6.22-570/fs/proc/task_mmu.c linux-2.6.22-try2/fs/proc/task_mmu.c
+--- linux-2.6.22-570/fs/proc/task_mmu.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/proc/task_mmu.c	2007-12-19 15:29:24.000000000 -0500
+@@ -5,6 +5,7 @@
+ #include <linux/highmem.h>
+ #include <linux/ptrace.h>
+ #include <linux/pagemap.h>
++#include <linux/ptrace.h>
+ #include <linux/mempolicy.h>
+ 
+ #include <asm/elf.h>
+@@ -114,24 +115,123 @@
+ 	seq_printf(m, "%*c", len, ' ');
+ }
+ 
+-struct mem_size_stats
++static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
+ {
+-	unsigned long resident;
+-	unsigned long shared_clean;
+-	unsigned long shared_dirty;
+-	unsigned long private_clean;
+-	unsigned long private_dirty;
+-	unsigned long referenced;
+-};
++	if (vma && vma != priv->tail_vma) {
++		struct mm_struct *mm = vma->vm_mm;
++		up_read(&mm->mmap_sem);
++		mmput(mm);
++	}
++}
+ 
+-struct pmd_walker {
+-	struct vm_area_struct *vma;
+-	void *private;
+-	void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
+-		       unsigned long, void *);
+-};
++static void *m_start(struct seq_file *m, loff_t *pos)
++{
++	struct proc_maps_private *priv = m->private;
++	unsigned long last_addr = m->version;
++	struct mm_struct *mm;
++	struct vm_area_struct *vma, *tail_vma = NULL;
++	loff_t l = *pos;
++
++	/* Clear the per syscall fields in priv */
++	priv->task = NULL;
++	priv->tail_vma = NULL;
++
++	/*
++	 * We remember last_addr rather than next_addr to hit with
++	 * mmap_cache most of the time. We have zero last_addr at
++	 * the beginning and also after lseek. We will have -1 last_addr
++	 * after the end of the vmas.
++	 */
++
++	if (last_addr == -1UL)
++		return NULL;
++
++	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
++	if (!priv->task)
++		return NULL;
++
++	mm = get_task_mm(priv->task);
++	if (!mm)
++		return NULL;
++
++	priv->tail_vma = tail_vma = get_gate_vma(priv->task);
++	down_read(&mm->mmap_sem);
++
++	/* Start with last addr hint */
++	if (last_addr && (vma = find_vma(mm, last_addr))) {
++		vma = vma->vm_next;
++		goto out;
++	}
++
++	/*
++	 * Check the vma index is within the range and do
++	 * sequential scan until m_index.
++	 */
++	vma = NULL;
++	if ((unsigned long)l < mm->map_count) {
++		vma = mm->mmap;
++		while (l-- && vma)
++			vma = vma->vm_next;
++		goto out;
++	}
++
++	if (l != mm->map_count)
++		tail_vma = NULL; /* After gate vma */
++
++out:
++	if (vma)
++		return vma;
++
++	/* End of vmas has been reached */
++	m->version = (tail_vma != NULL)? 0: -1UL;
++	up_read(&mm->mmap_sem);
++	mmput(mm);
++	return tail_vma;
++}
++
++static void *m_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct proc_maps_private *priv = m->private;
++	struct vm_area_struct *vma = v;
++	struct vm_area_struct *tail_vma = priv->tail_vma;
++
++	(*pos)++;
++	if (vma && (vma != tail_vma) && vma->vm_next)
++		return vma->vm_next;
++	vma_stop(priv, vma);
++	return (vma != tail_vma)? tail_vma: NULL;
++}
++
++static void m_stop(struct seq_file *m, void *v)
++{
++	struct proc_maps_private *priv = m->private;
++	struct vm_area_struct *vma = v;
++
++	vma_stop(priv, vma);
++	if (priv->task)
++		put_task_struct(priv->task);
++}
++
++static int do_maps_open(struct inode *inode, struct file *file,
++			struct seq_operations *ops)
++{
++	struct proc_maps_private *priv;
++	int ret = -ENOMEM;
++	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++	if (priv) {
++		priv->pid = proc_pid(inode);
++		ret = seq_open(file, ops);
++		if (!ret) {
++			struct seq_file *m = file->private_data;
++			m->private = priv;
++		} else {
++			kfree(priv);
++		}
++	}
++	return ret;
++}
+ 
+-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
++static int show_map(struct seq_file *m, void *v)
+ {
+ 	struct proc_maps_private *priv = m->private;
+ 	struct task_struct *task = priv->task;
+@@ -191,38 +291,47 @@
+ 	}
+ 	seq_putc(m, '\n');
+ 
+-	if (mss)
+-		seq_printf(m,
+-			   "Size:           %8lu kB\n"
+-			   "Rss:            %8lu kB\n"
+-			   "Shared_Clean:   %8lu kB\n"
+-			   "Shared_Dirty:   %8lu kB\n"
+-			   "Private_Clean:  %8lu kB\n"
+-			   "Private_Dirty:  %8lu kB\n"
+-			   "Referenced:     %8lu kB\n",
+-			   (vma->vm_end - vma->vm_start) >> 10,
+-			   mss->resident >> 10,
+-			   mss->shared_clean  >> 10,
+-			   mss->shared_dirty  >> 10,
+-			   mss->private_clean >> 10,
+-			   mss->private_dirty >> 10,
+-			   mss->referenced >> 10);
+-
+ 	if (m->count < m->size)  /* vma is copied successfully */
+ 		m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
+ 	return 0;
+ }
+ 
+-static int show_map(struct seq_file *m, void *v)
++static struct seq_operations proc_pid_maps_op = {
++	.start	= m_start,
++	.next	= m_next,
++	.stop	= m_stop,
++	.show	= show_map
++};
++
++static int maps_open(struct inode *inode, struct file *file)
+ {
+-	return show_map_internal(m, v, NULL);
++	return do_maps_open(inode, file, &proc_pid_maps_op);
+ }
+ 
+-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+-			    unsigned long addr, unsigned long end,
++const struct file_operations proc_maps_operations = {
++	.open		= maps_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release_private,
++};
++
++#ifdef CONFIG_PROC_SMAPS
++struct mem_size_stats
++{
++	struct vm_area_struct *vma;
++	unsigned long resident;
++	unsigned long shared_clean;
++	unsigned long shared_dirty;
++	unsigned long private_clean;
++	unsigned long private_dirty;
++	unsigned long referenced;
++};
++
++static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ 			    void *private)
+ {
+ 	struct mem_size_stats *mss = private;
++	struct vm_area_struct *vma = mss->vma;
+ 	pte_t *pte, ptent;
+ 	spinlock_t *ptl;
+ 	struct page *page;
+@@ -256,12 +365,71 @@
+ 	}
+ 	pte_unmap_unlock(pte - 1, ptl);
+ 	cond_resched();
++	return 0;
+ }
+ 
+-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+-				 unsigned long addr, unsigned long end,
+-				 void *private)
++static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
++
++static int show_smap(struct seq_file *m, void *v)
+ {
++	struct vm_area_struct *vma = v;
++	struct mem_size_stats mss;
++	int ret;
++
++	memset(&mss, 0, sizeof mss);
++	mss.vma = vma;
++	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
++		walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
++				&smaps_walk, &mss);
++
++	ret = show_map(m, v);
++	if (ret)
++		return ret;
++
++	seq_printf(m,
++		   "Size:           %8lu kB\n"
++		   "Rss:            %8lu kB\n"
++		   "Shared_Clean:   %8lu kB\n"
++		   "Shared_Dirty:   %8lu kB\n"
++		   "Private_Clean:  %8lu kB\n"
++		   "Private_Dirty:  %8lu kB\n"
++		   "Referenced:     %8lu kB\n",
++		   (vma->vm_end - vma->vm_start) >> 10,
++		   mss.resident >> 10,
++		   mss.shared_clean  >> 10,
++		   mss.shared_dirty  >> 10,
++		   mss.private_clean >> 10,
++		   mss.private_dirty >> 10,
++		   mss.referenced >> 10);
++
++	return ret;
++}
++
++static struct seq_operations proc_pid_smaps_op = {
++	.start	= m_start,
++	.next	= m_next,
++	.stop	= m_stop,
++	.show	= show_smap
++};
++
++static int smaps_open(struct inode *inode, struct file *file)
++{
++	return do_maps_open(inode, file, &proc_pid_smaps_op);
++}
++
++const struct file_operations proc_smaps_operations = {
++	.open		= smaps_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release_private,
++};
++#endif
++
++#ifdef CONFIG_PROC_CLEAR_REFS
++static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
++				unsigned long end, void *private)
++{
++	struct vm_area_struct *vma = private;
+ 	pte_t *pte, ptent;
+ 	spinlock_t *ptl;
+ 	struct page *page;
+@@ -282,236 +450,52 @@
+ 	}
+ 	pte_unmap_unlock(pte - 1, ptl);
+ 	cond_resched();
++	return 0;
+ }
+ 
+-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
+-				  unsigned long addr, unsigned long end)
+-{
+-	pmd_t *pmd;
+-	unsigned long next;
+-
+-	for (pmd = pmd_offset(pud, addr); addr != end;
+-	     pmd++, addr = next) {
+-		next = pmd_addr_end(addr, end);
+-		if (pmd_none_or_clear_bad(pmd))
+-			continue;
+-		walker->action(walker->vma, pmd, addr, next, walker->private);
+-	}
+-}
+-
+-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
+-				  unsigned long addr, unsigned long end)
+-{
+-	pud_t *pud;
+-	unsigned long next;
+-
+-	for (pud = pud_offset(pgd, addr); addr != end;
+-	     pud++, addr = next) {
+-		next = pud_addr_end(addr, end);
+-		if (pud_none_or_clear_bad(pud))
+-			continue;
+-		walk_pmd_range(walker, pud, addr, next);
+-	}
+-}
+-
+-/*
+- * walk_page_range - walk the page tables of a VMA with a callback
+- * @vma - VMA to walk
+- * @action - callback invoked for every bottom-level (PTE) page table
+- * @private - private data passed to the callback function
+- *
+- * Recursively walk the page table for the memory area in a VMA, calling
+- * a callback for every bottom-level (PTE) page table.
+- */
+-static inline void walk_page_range(struct vm_area_struct *vma,
+-				   void (*action)(struct vm_area_struct *,
+-						  pmd_t *, unsigned long,
+-						  unsigned long, void *),
+-				   void *private)
+-{
+-	unsigned long addr = vma->vm_start;
+-	unsigned long end = vma->vm_end;
+-	struct pmd_walker walker = {
+-		.vma		= vma,
+-		.private	= private,
+-		.action		= action,
+-	};
+-	pgd_t *pgd;
+-	unsigned long next;
+-
+-	for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
+-	     pgd++, addr = next) {
+-		next = pgd_addr_end(addr, end);
+-		if (pgd_none_or_clear_bad(pgd))
+-			continue;
+-		walk_pud_range(&walker, pgd, addr, next);
+-	}
+-}
+-
+-static int show_smap(struct seq_file *m, void *v)
+-{
+-	struct vm_area_struct *vma = v;
+-	struct mem_size_stats mss;
++static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+ 
+-	memset(&mss, 0, sizeof mss);
+-	if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+-		walk_page_range(vma, smaps_pte_range, &mss);
+-	return show_map_internal(m, v, &mss);
+-}
+-
+-void clear_refs_smap(struct mm_struct *mm)
++static ssize_t clear_refs_write(struct file *file, const char __user *buf,
++				size_t count, loff_t *ppos)
+ {
++	struct task_struct *task;
++	char buffer[13], *end;
++	struct mm_struct *mm;
+ 	struct vm_area_struct *vma;
+ 
++	memset(buffer, 0, sizeof(buffer));
++	if (count > sizeof(buffer) - 1)
++		count = sizeof(buffer) - 1;
++	if (copy_from_user(buffer, buf, count))
++		return -EFAULT;
++	if (!simple_strtol(buffer, &end, 0))
++		return -EINVAL;
++	if (*end == '\n')
++		end++;
++	task = get_proc_task(file->f_path.dentry->d_inode);
++	if (!task)
++		return -ESRCH;
++	mm = get_task_mm(task);
++	if (mm) {
+ 	down_read(&mm->mmap_sem);
+ 	for (vma = mm->mmap; vma; vma = vma->vm_next)
+-		if (vma->vm_mm && !is_vm_hugetlb_page(vma))
+-			walk_page_range(vma, clear_refs_pte_range, NULL);
++			if (!is_vm_hugetlb_page(vma))
++				walk_page_range(mm, vma->vm_start, vma->vm_end,
++						&clear_refs_walk, vma);
+ 	flush_tlb_mm(mm);
+ 	up_read(&mm->mmap_sem);
+-}
+-
+-static void *m_start(struct seq_file *m, loff_t *pos)
+-{
+-	struct proc_maps_private *priv = m->private;
+-	unsigned long last_addr = m->version;
+-	struct mm_struct *mm;
+-	struct vm_area_struct *vma, *tail_vma = NULL;
+-	loff_t l = *pos;
+-
+-	/* Clear the per syscall fields in priv */
+-	priv->task = NULL;
+-	priv->tail_vma = NULL;
+-
+-	/*
+-	 * We remember last_addr rather than next_addr to hit with
+-	 * mmap_cache most of the time. We have zero last_addr at
+-	 * the beginning and also after lseek. We will have -1 last_addr
+-	 * after the end of the vmas.
+-	 */
+-
+-	if (last_addr == -1UL)
+-		return NULL;
+-
+-	priv->task = get_pid_task(priv->pid, PIDTYPE_PID);
+-	if (!priv->task)
+-		return NULL;
+-
+-	mm = get_task_mm(priv->task);
+-	if (!mm)
+-		return NULL;
+-
+-	priv->tail_vma = tail_vma = get_gate_vma(priv->task);
+-	down_read(&mm->mmap_sem);
+-
+-	/* Start with last addr hint */
+-	if (last_addr && (vma = find_vma(mm, last_addr))) {
+-		vma = vma->vm_next;
+-		goto out;
+-	}
+-
+-	/*
+-	 * Check the vma index is within the range and do
+-	 * sequential scan until m_index.
+-	 */
+-	vma = NULL;
+-	if ((unsigned long)l < mm->map_count) {
+-		vma = mm->mmap;
+-		while (l-- && vma)
+-			vma = vma->vm_next;
+-		goto out;
+-	}
+-
+-	if (l != mm->map_count)
+-		tail_vma = NULL; /* After gate vma */
+-
+-out:
+-	if (vma)
+-		return vma;
+-
+-	/* End of vmas has been reached */
+-	m->version = (tail_vma != NULL)? 0: -1UL;
+-	up_read(&mm->mmap_sem);
+-	mmput(mm);
+-	return tail_vma;
+-}
+-
+-static void vma_stop(struct proc_maps_private *priv, struct vm_area_struct *vma)
+-{
+-	if (vma && vma != priv->tail_vma) {
+-		struct mm_struct *mm = vma->vm_mm;
+-		up_read(&mm->mmap_sem);
+ 		mmput(mm);
+ 	}
++	put_task_struct(task);
++	if (end - buffer == 0)
++		return -EIO;
++	return end - buffer;
+ }
+ 
+-static void *m_next(struct seq_file *m, void *v, loff_t *pos)
+-{
+-	struct proc_maps_private *priv = m->private;
+-	struct vm_area_struct *vma = v;
+-	struct vm_area_struct *tail_vma = priv->tail_vma;
+-
+-	(*pos)++;
+-	if (vma && (vma != tail_vma) && vma->vm_next)
+-		return vma->vm_next;
+-	vma_stop(priv, vma);
+-	return (vma != tail_vma)? tail_vma: NULL;
+-}
+-
+-static void m_stop(struct seq_file *m, void *v)
+-{
+-	struct proc_maps_private *priv = m->private;
+-	struct vm_area_struct *vma = v;
+-
+-	vma_stop(priv, vma);
+-	if (priv->task)
+-		put_task_struct(priv->task);
+-}
+-
+-static struct seq_operations proc_pid_maps_op = {
+-	.start	= m_start,
+-	.next	= m_next,
+-	.stop	= m_stop,
+-	.show	= show_map
+-};
+-
+-static struct seq_operations proc_pid_smaps_op = {
+-	.start	= m_start,
+-	.next	= m_next,
+-	.stop	= m_stop,
+-	.show	= show_smap
+-};
+-
+-static int do_maps_open(struct inode *inode, struct file *file,
+-			struct seq_operations *ops)
+-{
+-	struct proc_maps_private *priv;
+-	int ret = -ENOMEM;
+-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+-	if (priv) {
+-		priv->pid = proc_pid(inode);
+-		ret = seq_open(file, ops);
+-		if (!ret) {
+-			struct seq_file *m = file->private_data;
+-			m->private = priv;
+-		} else {
+-			kfree(priv);
+-		}
+-	}
+-	return ret;
+-}
+-
+-static int maps_open(struct inode *inode, struct file *file)
+-{
+-	return do_maps_open(inode, file, &proc_pid_maps_op);
+-}
+-
+-const struct file_operations proc_maps_operations = {
+-	.open		= maps_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++const struct file_operations proc_clear_refs_operations = {
++	.write		= clear_refs_write,
+ };
++#endif
+ 
+ #ifdef CONFIG_NUMA
+ extern int show_numa_map(struct seq_file *m, void *v);
+@@ -547,14 +531,211 @@
+ };
+ #endif
+ 
+-static int smaps_open(struct inode *inode, struct file *file)
++#ifdef CONFIG_PROC_PAGEMAP
++struct pagemapread {
++	struct mm_struct *mm;
++	unsigned long next;
++	unsigned long *buf;
++	pte_t *ptebuf;
++	unsigned long pos;
++	size_t count;
++	int index;
++	char __user *out;
++};
++
++static int flush_pagemap(struct pagemapread *pm)
+ {
+-	return do_maps_open(inode, file, &proc_pid_smaps_op);
++	int n = min(pm->count, pm->index * sizeof(unsigned long));
++	if (copy_to_user(pm->out, pm->buf, n))
++		return -EFAULT;
++	pm->out += n;
++	pm->pos += n;
++	pm->count -= n;
++	pm->index = 0;
++	cond_resched();
++	return 0;
+ }
+ 
+-const struct file_operations proc_smaps_operations = {
+-	.open		= smaps_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++static int add_to_pagemap(unsigned long addr, unsigned long pfn,
++			  struct pagemapread *pm)
++{
++	pm->buf[pm->index++] = pfn;
++	pm->next = addr + PAGE_SIZE;
++	if (pm->index * sizeof(unsigned long) >= PAGE_SIZE ||
++	    pm->index * sizeof(unsigned long) >= pm->count)
++		return flush_pagemap(pm);
++	return 0;
++}
++
++static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
++			     void *private)
++{
++	struct pagemapread *pm = private;
++	pte_t *pte;
++	int err;
++
++	pte = pte_offset_map(pmd, addr);
++
++#ifdef CONFIG_HIGHPTE
++	/* copy PTE directory to temporary buffer and unmap it */
++	memcpy(pm->ptebuf, pte, PAGE_ALIGN((unsigned long)pte) - (unsigned long)pte);
++	pte_unmap(pte);
++	pte = pm->ptebuf;
++#endif
++
++	for (; addr != end; pte++, addr += PAGE_SIZE) {
++		if (addr < pm->next)
++			continue;
++		if (!pte_present(*pte))
++			err = add_to_pagemap(addr, -1, pm);
++		else
++			err = add_to_pagemap(addr, pte_pfn(*pte), pm);
++		if (err)
++			return err;
++	}
++
++#ifndef CONFIG_HIGHPTE
++	pte_unmap(pte - 1);
++#endif
++
++	return 0;
++}
++
++static int pagemap_fill(struct pagemapread *pm, unsigned long end)
++{
++	int ret;
++
++	while (pm->next != end) {
++		ret = add_to_pagemap(pm->next, -1UL, pm);
++		if (ret)
++			return ret;
++	}
++	return 0;
++}
++
++static struct mm_walk pagemap_walk = { .pmd_entry = pagemap_pte_range };
++
++/*
++ * /proc/pid/pagemap - an array mapping virtual pages to pfns
++ *
++ * For each page in the address space, this file contains one long
++ * representing the corresponding physical page frame number (PFN) or
++ * -1 if the page isn't present. This allows determining precisely
++ * which pages are mapped and comparing mapped pages between
++ * processes.
++ *
++ * Efficient users of this interface will use /proc/pid/maps to
++ * determine which areas of memory are actually mapped and llseek to
++ * skip over unmapped regions.
++ *
++ * The first 4 bytes of this file form a simple header:
++ *
++ * first byte:   0 for big endian, 1 for little
++ * second byte:  page shift (eg 12 for 4096 byte pages)
++ * third byte:   entry size in bytes (currently either 4 or 8)
++ * fourth byte:  header size
++ */
++static ssize_t pagemap_read(struct file *file, char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
++	unsigned long src = *ppos;
++	unsigned long *page;
++	unsigned long addr, end, vend, svpfn, evpfn;
++	struct mm_struct *mm;
++	struct vm_area_struct *vma;
++	struct pagemapread pm;
++	int ret = -ESRCH;
++
++	if (!task)
++		goto out_no_task;
++
++	ret = -EACCES;
++	if (!ptrace_may_attach(task))
++		goto out;
++
++	ret = -EIO;
++	svpfn = src / sizeof(unsigned long) - 1;
++	addr = PAGE_SIZE * svpfn;
++	if ((svpfn + 1) * sizeof(unsigned long) != src)
++		goto out;
++	evpfn = min((src + count) / sizeof(unsigned long),
++		    ((~0UL) >> PAGE_SHIFT) + 1);
++	count = (evpfn - svpfn) * sizeof(unsigned long);
++	end = PAGE_SIZE * evpfn;
++
++	ret = -ENOMEM;
++	page = kzalloc(PAGE_SIZE, GFP_USER);
++	if (!page)
++		goto out;
++
++#ifdef CONFIG_HIGHPTE
++	pm.ptebuf = kzalloc(PAGE_SIZE, GFP_USER);
++	if (!pm.ptebuf)
++		goto out_free;
++#endif
++
++	ret = 0;
++	mm = get_task_mm(task);
++	if (!mm)
++		goto out_freepte;
++
++	pm.mm = mm;
++	pm.next = addr;
++	pm.buf = page;
++	pm.pos = src;
++	pm.count = count;
++	pm.index = 0;
++	pm.out = buf;
++
++	if (svpfn == -1) {
++		add_to_pagemap(pm.next, 0, &pm);
++		((char *)page)[0] = (ntohl(1) != 1);
++		((char *)page)[1] = PAGE_SHIFT;
++		((char *)page)[2] = sizeof(unsigned long);
++		((char *)page)[3] = sizeof(unsigned long);
++	}
++
++	down_read(&mm->mmap_sem);
++	vma = find_vma(mm, pm.next);
++	while (pm.count > 0 && vma) {
++		if (!ptrace_may_attach(task)) {
++			ret = -EIO;
++			goto out_mm;
++		}
++		vend = min(vma->vm_start - 1, end - 1) + 1;
++		ret = pagemap_fill(&pm, vend);
++		if (ret || !pm.count)
++			break;
++		vend = min(vma->vm_end - 1, end - 1) + 1;
++		ret = walk_page_range(mm, vma->vm_start, vend,
++				      &pagemap_walk, &pm);
++		vma = vma->vm_next;
++	}
++	up_read(&mm->mmap_sem);
++
++	ret = pagemap_fill(&pm, end);
++
++	*ppos = pm.pos;
++	if (!ret)
++		ret = pm.pos - src;
++
++out_mm:
++	mmput(mm);
++out_freepte:
++#ifdef CONFIG_HIGHPTE
++	kfree(pm.ptebuf);
++out_free:
++#endif
++	kfree(page);
++out:
++	put_task_struct(task);
++out_no_task:
++	return ret;
++}
++
++const struct file_operations proc_pagemap_operations = {
++	.llseek		= mem_lseek, /* borrow this */
++	.read		= pagemap_read,
+ };
++#endif
+diff -Nurb linux-2.6.22-570/fs/ramfs/inode.c linux-2.6.22-try2/fs/ramfs/inode.c
+--- linux-2.6.22-570/fs/ramfs/inode.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/ramfs/inode.c	2007-12-19 15:29:24.000000000 -0500
+@@ -60,6 +60,7 @@
+ 		inode->i_blocks = 0;
+ 		inode->i_mapping->a_ops = &ramfs_aops;
+ 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
++		mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
+ 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ 		switch (mode & S_IFMT) {
+ 		default:
+diff -Nurb linux-2.6.22-570/fs/revoke.c linux-2.6.22-try2/fs/revoke.c
+--- linux-2.6.22-570/fs/revoke.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/revoke.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,777 @@
++/*
++ * fs/revoke.c - Invalidate all current open file descriptors of an inode.
++ *
++ * Copyright (C) 2006-2007  Pekka Enberg
++ *
++ * This file is released under the GPLv2.
++ */
++
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/namei.h>
++#include <linux/magic.h>
++#include <linux/mm.h>
++#include <linux/mman.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/sched.h>
++#include <linux/revoked_fs_i.h>
++#include <linux/syscalls.h>
++
++/**
++ * fileset - an array of file pointers.
++ * @files:    the array of file pointers
++ * @nr:               number of elements in the array
++ * @end:      index to next unused file pointer
++ */
++struct fileset {
++	struct file	**files;
++	unsigned long	nr;
++	unsigned long	end;
++};
++
++/**
++ * revoke_details - details of the revoke operation
++ * @inode:            invalidate open file descriptors of this inode
++ * @fset:             set of files that point to a revoked inode
++ * @restore_start:    index to the first file pointer that is currently in
++ *                    use by a file descriptor but the real file has not
++ *                    been revoked
++ */
++struct revoke_details {
++	struct fileset	*fset;
++	unsigned long	restore_start;
++};
++
++static struct kmem_cache *revokefs_inode_cache;
++
++static inline bool fset_is_full(struct fileset *set)
++{
++	return set->nr == set->end;
++}
++
++static inline struct file *fset_get_filp(struct fileset *set)
++{
++	return set->files[set->end++];
++}
++
++static struct fileset *alloc_fset(unsigned long size)
++{
++	struct fileset *fset;
++
++	fset = kzalloc(sizeof *fset, GFP_KERNEL);
++	if (!fset)
++		return NULL;
++
++	fset->files = kcalloc(size, sizeof(struct file *), GFP_KERNEL);
++	if (!fset->files) {
++		kfree(fset);
++		return NULL;
++	}
++	fset->nr = size;
++	return fset;
++}
++
++static void free_fset(struct fileset *fset)
++{
++      int i;
++
++      for (i = fset->end; i < fset->nr; i++)
++              fput(fset->files[i]);
++
++      kfree(fset->files);
++      kfree(fset);
++}
++
++/*
++ * Revoked file descriptors point to inodes in the revokefs filesystem.
++ */
++static struct vfsmount *revokefs_mnt;
++
++static struct file *get_revoked_file(void)
++{
++	struct dentry *dentry;
++	struct inode *inode;
++	struct file *filp;
++	struct qstr name;
++
++	filp = get_empty_filp();
++	if (!filp)
++		goto err;
++
++	inode = new_inode(revokefs_mnt->mnt_sb);
++	if (!inode)
++		goto err_inode;
++
++	name.name = "revoked_file";
++	name.len = strlen(name.name);
++	dentry = d_alloc(revokefs_mnt->mnt_sb->s_root, &name);
++	if (!dentry)
++		goto err_dentry;
++
++	d_instantiate(dentry, inode);
++
++	filp->f_mapping = inode->i_mapping;
++	filp->f_dentry = dget(dentry);
++	filp->f_vfsmnt = mntget(revokefs_mnt);
++	filp->f_op = fops_get(inode->i_fop);
++	filp->f_pos = 0;
++
++	return filp;
++
++  err_dentry:
++	iput(inode);
++  err_inode:
++	fput(filp);
++  err:
++	return NULL;
++}
++
++static inline bool can_revoke_file(struct file *file, struct inode *inode,
++				   struct file *to_exclude)
++{
++	if (!file || file == to_exclude)
++		return false;
++
++	return file->f_dentry->d_inode == inode;
++}
++
++/*
++ * 	LOCKING: task_lock(owner)
++ */
++static int revoke_fds(struct task_struct *owner,
++		      struct inode *inode,
++		      struct file *to_exclude, struct fileset *fset)
++{
++	struct files_struct *files;
++	struct fdtable *fdt;
++	unsigned int fd;
++	int err = 0;
++
++	files = get_files_struct(owner);
++	if (!files)
++		goto out;
++
++	spin_lock(&files->file_lock);
++	fdt = files_fdtable(files);
++
++	for (fd = 0; fd < fdt->max_fds; fd++) {
++		struct revokefs_inode_info *info;
++		struct file *filp, *new_filp;
++		struct inode *new_inode;
++
++		filp = fcheck_files(files, fd);
++		if (!can_revoke_file(filp, inode, to_exclude))
++			continue;
++
++		if (!filp->f_op->revoke) {
++			err = -EOPNOTSUPP;
++			goto failed;
++		}
++
++		if (fset_is_full(fset)) {
++			err = -ENOMEM;
++			goto failed;
++		}
++
++		new_filp = fset_get_filp(fset);
++
++		/*
++		 * Replace original struct file pointer with a pointer to
++		 * a 'revoked file.'  After this point, we don't need to worry
++		 * about racing with sys_close or sys_dup.
++		 */
++		rcu_assign_pointer(fdt->fd[fd], new_filp);
++
++		/*
++		 * Hold on to task until we can take down the file and its
++		 * mmap.
++		 */
++		get_task_struct(owner);
++
++		new_inode = new_filp->f_dentry->d_inode;
++		make_revoked_inode(new_inode, inode->i_mode & S_IFMT);
++
++		info = revokefs_i(new_inode);
++		info->fd = fd;
++		info->file = filp;
++		info->owner = owner;
++	}
++  failed:
++	spin_unlock(&files->file_lock);
++	put_files_struct(files);
++  out:
++	return err;
++}
++
++static inline bool can_revoke_vma(struct vm_area_struct *vma,
++				  struct inode *inode, struct file *to_exclude)
++{
++	struct file *file = vma->vm_file;
++
++	if (vma->vm_flags & VM_REVOKED)
++		return false;
++
++	if (!file || file == to_exclude)
++		return false;
++
++	return file->f_path.dentry->d_inode == inode;
++}
++
++static int __revoke_break_cow(struct task_struct *tsk, struct inode *inode,
++			      struct file *to_exclude)
++{
++	struct mm_struct *mm = tsk->mm;
++	struct vm_area_struct *vma;
++	int err = 0;
++
++	down_read(&mm->mmap_sem);
++	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
++		int ret;
++
++		if (vma->vm_flags & VM_SHARED)
++			continue;
++
++		if (!can_revoke_vma(vma, inode, to_exclude))
++			continue;
++
++		ret = get_user_pages(tsk, tsk->mm, vma->vm_start,
++				     vma_pages(vma), 1, 1, NULL, NULL);
++		if (ret < 0) {
++			err = ret;
++			break;
++		}
++
++		unlink_file_vma(vma);
++		fput(vma->vm_file);
++		vma->vm_file = NULL;
++	}
++	up_read(&mm->mmap_sem);
++	return err;
++}
++
++static int revoke_break_cow(struct fileset *fset, struct inode *inode,
++			    struct file *to_exclude)
++{
++	unsigned long i;
++	int err = 0;
++
++	for (i = 0; i < fset->end; i++) {
++		struct revokefs_inode_info *info;
++		struct file *this;
++
++		this = fset->files[i];
++		info = revokefs_i(this->f_dentry->d_inode);
++
++		err = __revoke_break_cow(info->owner, inode, to_exclude);
++		if (err)
++			break;
++	}
++	return err;
++}
++
++/*
++ *	 LOCKING: down_write(&mm->mmap_sem)
++ *	 	    -> spin_lock(&mapping->i_mmap_lock)
++ */
++static int revoke_vma(struct vm_area_struct *vma, struct zap_details *details)
++{
++	unsigned long restart_addr, start_addr, end_addr;
++	int need_break;
++
++	start_addr = vma->vm_start;
++	end_addr = vma->vm_end;
++
++  again:
++	restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr,
++				      details);
++
++	need_break = need_resched() || need_lockbreak(details->i_mmap_lock);
++	if (need_break)
++		goto out_need_break;
++
++	if (restart_addr < end_addr) {
++		start_addr = restart_addr;
++		goto again;
++	}
++	vma->vm_flags |= VM_REVOKED;
++	return 0;
++
++  out_need_break:
++	spin_unlock(details->i_mmap_lock);
++	cond_resched();
++	spin_lock(details->i_mmap_lock);
++	return -EINTR;
++}
++
++/*
++ *	LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static int revoke_mm(struct mm_struct *mm, struct address_space *mapping,
++		     struct file *to_exclude)
++{
++	struct vm_area_struct *vma;
++	struct zap_details details;
++	int err = 0;
++
++	details.i_mmap_lock = &mapping->i_mmap_lock;
++
++	/*
++ 	 * If ->mmap_sem is under contention, we continue scanning other
++ 	 * mms and try again later.
++ 	 */
++	if (!down_write_trylock(&mm->mmap_sem)) {
++		err = -EAGAIN;
++		goto out;
++	}
++	for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
++		if (!(vma->vm_flags & VM_SHARED))
++			continue;
++
++		if (!can_revoke_vma(vma, mapping->host, to_exclude))
++			continue;
++
++		err = revoke_vma(vma, &details);
++		if (err)
++			break;
++
++		__unlink_file_vma(vma);
++		fput(vma->vm_file);
++		vma->vm_file = NULL;
++	}
++	up_write(&mm->mmap_sem);
++  out:
++	return err;
++}
++
++/*
++ *	LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static void revoke_mapping_tree(struct address_space *mapping,
++				struct file *to_exclude)
++{
++	struct vm_area_struct *vma;
++	struct prio_tree_iter iter;
++	int try_again = 0;
++
++  restart:
++	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
++		int err;
++
++		if (!(vma->vm_flags & VM_SHARED))
++			continue;
++
++		if (likely(!can_revoke_vma(vma, mapping->host, to_exclude)))
++			continue;
++
++		err = revoke_mm(vma->vm_mm, mapping, to_exclude);
++		if (err == -EAGAIN)
++			try_again = 1;
++
++		goto restart;
++	}
++	if (try_again) {
++		cond_resched();
++		goto restart;
++	}
++}
++
++/*
++ *	LOCKING: spin_lock(&mapping->i_mmap_lock)
++ */
++static void revoke_mapping_list(struct address_space *mapping,
++				struct file *to_exclude)
++{
++	struct vm_area_struct *vma;
++	int try_again = 0;
++
++  restart:
++	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) {
++		int err;
++
++		if (likely(!can_revoke_vma(vma, mapping->host, to_exclude)))
++			continue;
++
++		err = revoke_mm(vma->vm_mm, mapping, to_exclude);
++		if (err == -EAGAIN) {
++			try_again = 1;
++			continue;
++		}
++		if (err == -EINTR)
++			goto restart;
++	}
++	if (try_again) {
++		cond_resched();
++		goto restart;
++	}
++}
++
++static void revoke_mapping(struct address_space *mapping, struct file *to_exclude)
++{
++	spin_lock(&mapping->i_mmap_lock);
++	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
++		revoke_mapping_tree(mapping, to_exclude);
++	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
++		revoke_mapping_list(mapping, to_exclude);
++	spin_unlock(&mapping->i_mmap_lock);
++}
++
++static void restore_file(struct revokefs_inode_info *info)
++{
++	struct files_struct *files;
++
++	files = get_files_struct(info->owner);
++	if (files) {
++		struct fdtable *fdt;
++		struct file *filp;
++
++		spin_lock(&files->file_lock);
++		fdt = files_fdtable(files);
++
++		filp = fdt->fd[info->fd];
++		if (filp)
++			fput(filp);
++
++		rcu_assign_pointer(fdt->fd[info->fd], info->file);
++		FD_SET(info->fd, fdt->close_on_exec);
++		spin_unlock(&files->file_lock);
++		put_files_struct(files);
++	}
++	put_task_struct(info->owner);
++	info->owner = NULL;	/* To avoid double-restore. */
++}
++
++static void restore_files(struct revoke_details *details)
++{
++	unsigned long i;
++
++	for (i = details->restore_start; i < details->fset->end; i++) {
++		struct revokefs_inode_info *info;
++		struct file *filp;
++
++		filp = details->fset->files[i];
++		info = revokefs_i(filp->f_dentry->d_inode);
++
++		restore_file(info);
++	}
++}
++
++static int revoke_files(struct revoke_details *details)
++{
++	unsigned long i;
++	int err = 0;
++
++	for (i = 0; i < details->fset->end; i++) {
++		struct revokefs_inode_info *info;
++		struct file *this, *filp;
++		struct inode *inode;
++
++		this = details->fset->files[i];
++		inode = this->f_dentry->d_inode;
++		info = revokefs_i(inode);
++
++		/*
++		 * Increase count before attempting to close file as
++		 * an partially closed file can no longer be restored.
++		 */
++		details->restore_start++;
++		filp = info->file;
++		err = filp->f_op->revoke(filp, inode->i_mapping);
++		put_task_struct(info->owner);
++		info->owner = NULL;	/* To avoid restoring closed file. */
++		if (err)
++			goto out;
++	}
++  out:
++	return err;
++}
++
++/*
++ *	Returns the maximum number of file descriptors pointing to an inode.
++ *
++ *	LOCKING: read_lock(&tasklist_lock)
++ */
++static unsigned long inode_fds(struct inode *inode, struct file *to_exclude)
++{
++	struct task_struct *g, *p;
++	unsigned long nr_fds = 0;
++
++	do_each_thread(g, p) {
++		struct files_struct *files;
++		struct fdtable *fdt;
++		unsigned int fd;
++
++		files = get_files_struct(p);
++		if (!files)
++			continue;
++
++		spin_lock(&files->file_lock);
++		fdt = files_fdtable(files);
++		for (fd = 0; fd < fdt->max_fds; fd++) {
++			struct file *file;
++
++			file = fcheck_files(files, fd);
++			if (can_revoke_file(file, inode, to_exclude)) {
++				nr_fds += fdt->max_fds;
++				break;
++			}
++		}
++		spin_unlock(&files->file_lock);
++		put_files_struct(files);
++	}
++	while_each_thread(g, p);
++	return nr_fds;
++}
++
++static struct fileset *__alloc_revoke_fset(unsigned long size)
++{
++	struct fileset *fset;
++	int i;
++
++	fset = alloc_fset(size);
++	if (!fset)
++		return NULL;
++
++	for (i = 0; i < fset->nr; i++) {
++		struct file *filp;
++
++		filp = get_revoked_file();
++		if (!filp)
++			goto err;
++
++		fset->files[i] = filp;
++	}
++	return fset;
++  err:
++	free_fset(fset);
++	return NULL;
++}
++
++static struct fileset *alloc_revoke_fset(struct inode *inode, struct file *to_exclude)
++{
++	unsigned long nr_fds;
++
++	read_lock(&tasklist_lock);
++	nr_fds = inode_fds(inode, to_exclude);
++	read_unlock(&tasklist_lock);
++
++	return __alloc_revoke_fset(nr_fds);
++}
++
++static int do_revoke(struct inode *inode, struct file *to_exclude)
++{
++	struct revoke_details details;
++	struct fileset *fset = NULL;
++	struct task_struct *g, *p;
++	int err = 0;
++
++	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
++		err = -EPERM;
++		goto out;
++	}
++
++  retry:
++	if (signal_pending(current)) {
++		err = -ERESTARTSYS;
++		goto out;
++	}
++
++	/*
++	 * Pre-allocate memory because the first pass is done under
++	 * tasklist_lock.
++	 */
++	fset = alloc_revoke_fset(inode, to_exclude);
++	if (!fset) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	read_lock(&tasklist_lock);
++
++	/*
++	 * If someone forked while we were allocating memory, try again.
++	 */
++	if (inode_fds(inode, to_exclude) > fset->nr) {
++		read_unlock(&tasklist_lock);
++		free_fset(fset);
++		goto retry;
++	}
++
++	details.fset = fset;
++	details.restore_start = 0;
++
++	/*
++	 * First revoke the descriptors. After we are done, no one can start
++	 * new operations on them.
++	 */
++	do_each_thread(g, p) {
++		err = revoke_fds(p, inode, to_exclude, fset);
++		if (err)
++			goto exit_loop;
++	}
++	while_each_thread(g, p);
++  exit_loop:
++	read_unlock(&tasklist_lock);
++
++	if (err)
++		goto out_restore;
++
++	/*
++	 * Take down shared memory mappings.
++	 */
++	revoke_mapping(inode->i_mapping, to_exclude);
++
++	/*
++ 	 * Break COW for private mappings.
++ 	 */
++	err = revoke_break_cow(fset, inode, to_exclude);
++	if (err)
++		goto out_restore;
++
++	/*
++	 * Now, revoke the files for good.
++	 */
++	err = revoke_files(&details);
++	if (err)
++		goto out_restore;
++
++  out_free_table:
++	free_fset(fset);
++  out:
++	return err;
++
++  out_restore:
++	restore_files(&details);
++	goto out_free_table;
++}
++
++asmlinkage long sys_revokeat(int dfd, const char __user * filename)
++{
++	struct nameidata nd;
++	int err;
++
++	err = __user_walk_fd(dfd, filename, 0, &nd);
++	if (!err) {
++		err = do_revoke(nd.dentry->d_inode, NULL);
++		path_release(&nd);
++	}
++	return err;
++}
++
++asmlinkage long sys_frevoke(unsigned int fd)
++{
++	struct file *file = fget(fd);
++	int err = -EBADF;
++
++	if (file) {
++		err = do_revoke(file->f_dentry->d_inode, file);
++		fput(file);
++	}
++	return err;
++}
++
++int generic_file_revoke(struct file *file, struct address_space *new_mapping)
++{
++	struct address_space *mapping = file->f_mapping;
++	int err;
++
++	/*
++	 * Flush pending writes.
++	 */
++	err = do_fsync(file, 1);
++	if (err)
++		goto out;
++
++	file->f_mapping = new_mapping;
++
++	/*
++	 * Make pending reads fail.
++	 */
++	err = invalidate_inode_pages2(mapping);
++
++  out:
++	return err;
++}
++EXPORT_SYMBOL(generic_file_revoke);
++
++/*
++ *	Filesystem for revoked files.
++ */
++
++static struct inode *revokefs_alloc_inode(struct super_block *sb)
++{
++	struct revokefs_inode_info *info;
++
++	info = kmem_cache_alloc(revokefs_inode_cache, GFP_KERNEL);
++	if (!info)
++		return NULL;
++
++	return &info->vfs_inode;
++}
++
++static void revokefs_destroy_inode(struct inode *inode)
++{
++	kmem_cache_free(revokefs_inode_cache, revokefs_i(inode));
++}
++
++static struct super_operations revokefs_super_ops = {
++	.alloc_inode = revokefs_alloc_inode,
++	.destroy_inode = revokefs_destroy_inode,
++	.drop_inode = generic_delete_inode,
++};
++
++static int revokefs_get_sb(struct file_system_type *fs_type,
++			   int flags, const char *dev_name, void *data,
++			   struct vfsmount *mnt)
++{
++	return get_sb_pseudo(fs_type, "revoke:", &revokefs_super_ops,
++			     REVOKEFS_MAGIC, mnt);
++}
++
++static struct file_system_type revokefs_fs_type = {
++	.name = "revokefs",
++	.get_sb = revokefs_get_sb,
++	.kill_sb = kill_anon_super
++};
++
++static void revokefs_init_inode(void *obj, struct kmem_cache *cache,
++				unsigned long flags)
++{
++	struct revokefs_inode_info *info = obj;
++
++	info->owner = NULL;
++	inode_init_once(&info->vfs_inode);
++}
++
++static int __init revokefs_init(void)
++{
++	int err = -ENOMEM;
++
++	revokefs_inode_cache =
++	    kmem_cache_create("revokefs_inode_cache",
++			      sizeof(struct revokefs_inode_info),
++			      0,
++			      (SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT |
++			       SLAB_MEM_SPREAD), revokefs_init_inode, NULL);
++	if (!revokefs_inode_cache)
++		goto out;
++
++	err = register_filesystem(&revokefs_fs_type);
++	if (err)
++		goto err_register;
++
++	revokefs_mnt = kern_mount(&revokefs_fs_type);
++	if (IS_ERR(revokefs_mnt)) {
++		err = PTR_ERR(revokefs_mnt);
++		goto err_mnt;
++	}
++  out:
++	return err;
++  err_mnt:
++	unregister_filesystem(&revokefs_fs_type);
++  err_register:
++	kmem_cache_destroy(revokefs_inode_cache);
++	return err;
++}
++
++late_initcall(revokefs_init);
+diff -Nurb linux-2.6.22-570/fs/revoked_inode.c linux-2.6.22-try2/fs/revoked_inode.c
+--- linux-2.6.22-570/fs/revoked_inode.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/revoked_inode.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,417 @@
++/*
++ * fs/revoked_inode.c
++ *
++ * Copyright (C) 2007  Pekka Enberg
++ *
++ * Provide stub functions for revoked inodes. Based on fs/bad_inode.c which is
++ *
++ * Copyright (C) 1997  Stephen Tweedie
++ *
++ * This file is released under the GPLv2.
++ */
++
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/stat.h>
++#include <linux/time.h>
++#include <linux/smp_lock.h>
++#include <linux/namei.h>
++#include <linux/poll.h>
++#include <linux/revoked_fs_i.h>
++
++static loff_t revoked_file_llseek(struct file *file, loff_t offset, int origin)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_read(struct file *filp, char __user * buf,
++				 size_t size, loff_t * ppos)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_special_file_read(struct file *filp, char __user * buf,
++					 size_t size, loff_t * ppos)
++{
++	return 0;
++}
++
++static ssize_t revoked_file_write(struct file *filp, const char __user * buf,
++				  size_t siz, loff_t * ppos)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_aio_read(struct kiocb *iocb,
++				     const struct iovec *iov,
++				     unsigned long nr_segs, loff_t pos)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_aio_write(struct kiocb *iocb,
++				      const struct iovec *iov,
++				      unsigned long nr_segs, loff_t pos)
++{
++	return -EBADF;
++}
++
++static int revoked_file_readdir(struct file *filp, void *dirent,
++				filldir_t filldir)
++{
++	return -EBADF;
++}
++
++static unsigned int revoked_file_poll(struct file *filp, poll_table * wait)
++{
++	return POLLERR;
++}
++
++static int revoked_file_ioctl(struct inode *inode, struct file *filp,
++			      unsigned int cmd, unsigned long arg)
++{
++	return -EBADF;
++}
++
++static long revoked_file_unlocked_ioctl(struct file *file, unsigned cmd,
++					unsigned long arg)
++{
++	return -EBADF;
++}
++
++static long revoked_file_compat_ioctl(struct file *file, unsigned int cmd,
++				      unsigned long arg)
++{
++	return -EBADF;
++}
++
++static int revoked_file_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	return -EBADF;
++}
++
++static int revoked_file_open(struct inode *inode, struct file *filp)
++{
++	return -EBADF;
++}
++
++static int revoked_file_flush(struct file *file, fl_owner_t id)
++{
++	return filp_close(file, id);
++}
++
++static int revoked_file_release(struct inode *inode, struct file *filp)
++{
++	return -EBADF;
++}
++
++static int revoked_file_fsync(struct file *file, struct dentry *dentry,
++			      int datasync)
++{
++	return -EBADF;
++}
++
++static int revoked_file_aio_fsync(struct kiocb *iocb, int datasync)
++{
++	return -EBADF;
++}
++
++static int revoked_file_fasync(int fd, struct file *filp, int on)
++{
++	return -EBADF;
++}
++
++static int revoked_file_lock(struct file *file, int cmd, struct file_lock *fl)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_sendfile(struct file *in_file, loff_t * ppos,
++				     size_t count, read_actor_t actor,
++				     void *target)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_sendpage(struct file *file, struct page *page,
++				     int off, size_t len, loff_t * pos,
++				     int more)
++{
++	return -EBADF;
++}
++
++static unsigned long revoked_file_get_unmapped_area(struct file *file,
++						    unsigned long addr,
++						    unsigned long len,
++						    unsigned long pgoff,
++						    unsigned long flags)
++{
++	return -EBADF;
++}
++
++static int revoked_file_check_flags(int flags)
++{
++	return -EBADF;
++}
++
++static int revoked_file_dir_notify(struct file *file, unsigned long arg)
++{
++	return -EBADF;
++}
++
++static int revoked_file_flock(struct file *filp, int cmd, struct file_lock *fl)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_splice_write(struct pipe_inode_info *pipe,
++					 struct file *out, loff_t * ppos,
++					 size_t len, unsigned int flags)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_file_splice_read(struct file *in, loff_t * ppos,
++					struct pipe_inode_info *pipe,
++					size_t len, unsigned int flags)
++{
++	return -EBADF;
++}
++
++static const struct file_operations revoked_file_ops = {
++	.llseek = revoked_file_llseek,
++	.read = revoked_file_read,
++	.write = revoked_file_write,
++	.aio_read = revoked_file_aio_read,
++	.aio_write = revoked_file_aio_write,
++	.readdir = revoked_file_readdir,
++	.poll = revoked_file_poll,
++	.ioctl = revoked_file_ioctl,
++	.unlocked_ioctl = revoked_file_unlocked_ioctl,
++	.compat_ioctl = revoked_file_compat_ioctl,
++	.mmap = revoked_file_mmap,
++	.open = revoked_file_open,
++	.flush = revoked_file_flush,
++	.release = revoked_file_release,
++	.fsync = revoked_file_fsync,
++	.aio_fsync = revoked_file_aio_fsync,
++	.fasync = revoked_file_fasync,
++	.lock = revoked_file_lock,
++	.sendfile = revoked_file_sendfile,
++	.sendpage = revoked_file_sendpage,
++	.get_unmapped_area = revoked_file_get_unmapped_area,
++	.check_flags = revoked_file_check_flags,
++	.dir_notify = revoked_file_dir_notify,
++	.flock = revoked_file_flock,
++	.splice_write = revoked_file_splice_write,
++	.splice_read = revoked_file_splice_read,
++};
++
++static const struct file_operations revoked_special_file_ops = {
++	.llseek = revoked_file_llseek,
++	.read = revoked_special_file_read,
++	.write = revoked_file_write,
++	.aio_read = revoked_file_aio_read,
++	.aio_write = revoked_file_aio_write,
++	.readdir = revoked_file_readdir,
++	.poll = revoked_file_poll,
++	.ioctl = revoked_file_ioctl,
++	.unlocked_ioctl = revoked_file_unlocked_ioctl,
++	.compat_ioctl = revoked_file_compat_ioctl,
++	.mmap = revoked_file_mmap,
++	.open = revoked_file_open,
++	.flush = revoked_file_flush,
++	.release = revoked_file_release,
++	.fsync = revoked_file_fsync,
++	.aio_fsync = revoked_file_aio_fsync,
++	.fasync = revoked_file_fasync,
++	.lock = revoked_file_lock,
++	.sendfile = revoked_file_sendfile,
++	.sendpage = revoked_file_sendpage,
++	.get_unmapped_area = revoked_file_get_unmapped_area,
++	.check_flags = revoked_file_check_flags,
++	.dir_notify = revoked_file_dir_notify,
++	.flock = revoked_file_flock,
++	.splice_write = revoked_file_splice_write,
++	.splice_read = revoked_file_splice_read,
++};
++
++static int revoked_inode_create(struct inode *dir, struct dentry *dentry,
++				int mode, struct nameidata *nd)
++{
++	return -EBADF;
++}
++
++static struct dentry *revoked_inode_lookup(struct inode *dir,
++					   struct dentry *dentry,
++					   struct nameidata *nd)
++{
++	return ERR_PTR(-EBADF);
++}
++
++static int revoked_inode_link(struct dentry *old_dentry, struct inode *dir,
++			      struct dentry *dentry)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_unlink(struct inode *dir, struct dentry *dentry)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_symlink(struct inode *dir, struct dentry *dentry,
++				 const char *symname)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_mkdir(struct inode *dir, struct dentry *dentry,
++			       int mode)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_mknod(struct inode *dir, struct dentry *dentry,
++			       int mode, dev_t rdev)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_rename(struct inode *old_dir,
++				struct dentry *old_dentry,
++				struct inode *new_dir,
++				struct dentry *new_dentry)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_readlink(struct dentry *dentry, char __user * buffer,
++				  int buflen)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_permission(struct inode *inode, int mask,
++				    struct nameidata *nd)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
++				 struct kstat *stat)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_setattr(struct dentry *direntry, struct iattr *attrs)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_setxattr(struct dentry *dentry, const char *name,
++				  const void *value, size_t size, int flags)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_inode_getxattr(struct dentry *dentry, const char *name,
++				      void *buffer, size_t size)
++{
++	return -EBADF;
++}
++
++static ssize_t revoked_inode_listxattr(struct dentry *dentry, char *buffer,
++				       size_t buffer_size)
++{
++	return -EBADF;
++}
++
++static int revoked_inode_removexattr(struct dentry *dentry, const char *name)
++{
++	return -EBADF;
++}
++
++static struct inode_operations revoked_inode_ops = {
++	.create = revoked_inode_create,
++	.lookup = revoked_inode_lookup,
++	.link = revoked_inode_link,
++	.unlink = revoked_inode_unlink,
++	.symlink = revoked_inode_symlink,
++	.mkdir = revoked_inode_mkdir,
++	.rmdir = revoked_inode_rmdir,
++	.mknod = revoked_inode_mknod,
++	.rename = revoked_inode_rename,
++	.readlink = revoked_inode_readlink,
++	/* follow_link must be no-op, otherwise unmounting this inode
++	   won't work */
++	/* put_link returns void */
++	/* truncate returns void */
++	.permission = revoked_inode_permission,
++	.getattr = revoked_inode_getattr,
++	.setattr = revoked_inode_setattr,
++	.setxattr = revoked_inode_setxattr,
++	.getxattr = revoked_inode_getxattr,
++	.listxattr = revoked_inode_listxattr,
++	.removexattr = revoked_inode_removexattr,
++	/* truncate_range returns void */
++};
++
++static int revoked_readpage(struct file *file, struct page *page)
++{
++	return -EIO;
++}
++
++static int revoked_writepage(struct page *page, struct writeback_control *wbc)
++{
++	return -EIO;
++}
++
++static int revoked_prepare_write(struct file *file, struct page *page,
++				 unsigned from, unsigned to)
++{
++	return -EIO;
++}
++
++static int revoked_commit_write(struct file *file, struct page *page,
++				unsigned from, unsigned to)
++{
++	return -EIO;
++}
++
++static ssize_t revoked_direct_IO(int rw, struct kiocb *iocb,
++				 const struct iovec *iov, loff_t offset,
++				 unsigned long nr_segs)
++{
++	return -EIO;
++}
++
++static const struct address_space_operations revoked_aops = {
++        .readpage       = revoked_readpage,
++        .writepage      = revoked_writepage,
++        .prepare_write  = revoked_prepare_write,
++        .commit_write   = revoked_commit_write,
++        .direct_IO      = revoked_direct_IO,
++};
++
++void make_revoked_inode(struct inode *inode, int mode)
++{
++	remove_inode_hash(inode);
++
++	inode->i_mode = mode;
++	inode->i_atime = inode->i_mtime = inode->i_ctime =
++	    current_fs_time(inode->i_sb);
++	inode->i_op = &revoked_inode_ops;
++
++	if (special_file(mode))
++		inode->i_fop = &revoked_special_file_ops;
++	else
++		inode->i_fop = &revoked_file_ops;
++
++	inode->i_mapping->a_ops = &revoked_aops;
++}
+diff -Nurb linux-2.6.22-570/fs/splice.c linux-2.6.22-try2/fs/splice.c
+--- linux-2.6.22-570/fs/splice.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/fs/splice.c	2007-12-19 15:29:23.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/module.h>
+ #include <linux/syscalls.h>
+ #include <linux/uio.h>
++#include <linux/security.h>
+ 
+ struct partial_page {
+ 	unsigned int offset;
+@@ -932,6 +933,10 @@
+ 	if (unlikely(ret < 0))
+ 		return ret;
+ 
++	ret = security_file_permission(out, MAY_WRITE);
++	if (unlikely(ret < 0))
++		return ret;
++
+ 	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+ }
+ 
+@@ -954,6 +959,21 @@
+ 	if (unlikely(ret < 0))
+ 		return ret;
+ 
++<<<<<<< HEAD/fs/splice.c
++=======
++	ret = security_file_permission(in, MAY_READ);
++	if (unlikely(ret < 0))
++		return ret;
++
++	isize = i_size_read(in->f_mapping->host);
++	if (unlikely(*ppos >= isize))
++		return 0;
++	
++	left = isize - *ppos;
++	if (unlikely(left < len))
++		len = left;
++
++>>>>>>> /fs/splice.c
+ 	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+ }
+ 
+@@ -1272,6 +1292,7 @@
+ static long do_vmsplice(struct file *file, const struct iovec __user *iov,
+ 			unsigned long nr_segs, unsigned int flags)
+ {
++	long err;
+ 	struct pipe_inode_info *pipe;
+ 	struct page *pages[PIPE_BUFFERS];
+ 	struct partial_page partial[PIPE_BUFFERS];
+@@ -1290,6 +1311,10 @@
+ 	else if (unlikely(!nr_segs))
+ 		return 0;
+ 
++	err = security_file_permission(file, MAY_WRITE);
++	if (unlikely(err < 0))
++		return err;
++
+ 	spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial,
+ 					    flags & SPLICE_F_GIFT);
+ 	if (spd.nr_pages <= 0)
+diff -Nurb linux-2.6.22-570/fs/stack.c linux-2.6.22-try2/fs/stack.c
+--- linux-2.6.22-570/fs/stack.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/stack.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,20 @@
++/*
++ * Copyright (c) 2006-2007 Erez Zadok
++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2006-2007 Stony Brook University
++ * Copyright (c) 2006-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/fs_stack.h>
+ 
+-/* does _NOT_ require i_mutex to be held.
++/*
++ * does _NOT_ require i_mutex to be held.
+  *
+  * This function cannot be inlined since i_size_{read,write} is rather
+  * heavy-weight on 32-bit systems
+@@ -14,7 +26,8 @@
+ }
+ EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
+ 
+-/* copy all attributes; get_nlinks is optional way to override the i_nlink
++/*
++ * copy all attributes; get_nlinks is optional way to override the i_nlink
+  * copying
+  */
+ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
+diff -Nurb linux-2.6.22-570/fs/sync.c linux-2.6.22-try2/fs/sync.c
+--- linux-2.6.22-570/fs/sync.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sync.c	2007-12-19 15:46:56.000000000 -0500
+@@ -174,6 +174,14 @@
+  * already-instantiated disk blocks, there are no guarantees here that the data
+  * will be available after a crash.
+  */
++/* It would be nice if people remember that not all the world's an i386
++   when they introduce new system calls */
++asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
++				     loff_t offset, loff_t nbytes)
++{
++ 	return sys_sync_file_range(fd, offset, nbytes, flags);
++}
++
+ asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes,
+ 					unsigned int flags)
+ {
+diff -Nurb linux-2.6.22-570/fs/sysfs/bin.c linux-2.6.22-try2/fs/sysfs/bin.c
+--- linux-2.6.22-570/fs/sysfs/bin.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/bin.c	2007-12-19 15:29:23.000000000 -0500
+@@ -20,29 +20,41 @@
+ 
+ #include "sysfs.h"
+ 
++struct bin_buffer {
++	struct mutex	mutex;
++	void		*buffer;
++	int		mmapped;
++};
++
+ static int
+ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
+ {
+-	struct bin_attribute * attr = to_bin_attr(dentry);
+-	struct kobject * kobj = to_kobj(dentry->d_parent);
++	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++	int rc;
++
++	/* need attr_sd for attr, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
++
++	rc = -EIO;
++	if (attr->read)
++		rc = attr->read(kobj, attr, buffer, off, count);
+ 
+-	if (!attr->read)
+-		return -EIO;
++	sysfs_put_active_two(attr_sd);
+ 
+-	return attr->read(kobj, buffer, off, count);
++	return rc;
+ }
+ 
+ static ssize_t
+-read(struct file * file, char __user * userbuf, size_t count, loff_t * off)
++read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
+ {
+-	char *buffer = file->private_data;
++	struct bin_buffer *bb = file->private_data;
+ 	struct dentry *dentry = file->f_path.dentry;
+ 	int size = dentry->d_inode->i_size;
+ 	loff_t offs = *off;
+-	int ret;
+-
+-	if (count > PAGE_SIZE)
+-		count = PAGE_SIZE;
++	int count = min_t(size_t, bytes, PAGE_SIZE);
+ 
+ 	if (size) {
+ 		if (offs > size)
+@@ -51,43 +63,56 @@
+ 			count = size - offs;
+ 	}
+ 
+-	ret = fill_read(dentry, buffer, offs, count);
+-	if (ret < 0) 
+-		return ret;
+-	count = ret;
++	mutex_lock(&bb->mutex);
+ 
+-	if (copy_to_user(userbuf, buffer, count))
+-		return -EFAULT;
++	count = fill_read(dentry, bb->buffer, offs, count);
++	if (count < 0)
++		goto out_unlock;
++
++	if (copy_to_user(userbuf, bb->buffer, count)) {
++		count = -EFAULT;
++		goto out_unlock;
++	}
+ 
+-	pr_debug("offs = %lld, *off = %lld, count = %zd\n", offs, *off, count);
++	pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count);
+ 
+ 	*off = offs + count;
+ 
++ out_unlock:
++	mutex_unlock(&bb->mutex);
+ 	return count;
+ }
+ 
+ static int
+ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
+ {
+-	struct bin_attribute *attr = to_bin_attr(dentry);
+-	struct kobject *kobj = to_kobj(dentry->d_parent);
++	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++	int rc;
+ 
+-	if (!attr->write)
+-		return -EIO;
++	/* need attr_sd for attr, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
+ 
+-	return attr->write(kobj, buffer, offset, count);
++	rc = -EIO;
++	if (attr->write)
++		rc = attr->write(kobj, attr, buffer, offset, count);
++
++	sysfs_put_active_two(attr_sd);
++
++	return rc;
+ }
+ 
+-static ssize_t write(struct file * file, const char __user * userbuf,
+-		     size_t count, loff_t * off)
++static ssize_t write(struct file *file, const char __user *userbuf,
++		     size_t bytes, loff_t *off)
+ {
+-	char *buffer = file->private_data;
++	struct bin_buffer *bb = file->private_data;
+ 	struct dentry *dentry = file->f_path.dentry;
+ 	int size = dentry->d_inode->i_size;
+ 	loff_t offs = *off;
++	int count = min_t(size_t, bytes, PAGE_SIZE);
+ 
+-	if (count > PAGE_SIZE)
+-		count = PAGE_SIZE;
+ 	if (size) {
+ 		if (offs > size)
+ 			return 0;
+@@ -95,72 +120,100 @@
+ 			count = size - offs;
+ 	}
+ 
+-	if (copy_from_user(buffer, userbuf, count))
+-		return -EFAULT;
++	mutex_lock(&bb->mutex);
+ 
+-	count = flush_write(dentry, buffer, offs, count);
++	if (copy_from_user(bb->buffer, userbuf, count)) {
++		count = -EFAULT;
++		goto out_unlock;
++	}
++
++	count = flush_write(dentry, bb->buffer, offs, count);
+ 	if (count > 0)
+ 		*off = offs + count;
++
++ out_unlock:
++	mutex_unlock(&bb->mutex);
+ 	return count;
+ }
+ 
+ static int mmap(struct file *file, struct vm_area_struct *vma)
+ {
+-	struct dentry *dentry = file->f_path.dentry;
+-	struct bin_attribute *attr = to_bin_attr(dentry);
+-	struct kobject *kobj = to_kobj(dentry->d_parent);
++	struct bin_buffer *bb = file->private_data;
++	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
++	int rc;
++
++	mutex_lock(&bb->mutex);
++
++	/* need attr_sd for attr, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
++
++	rc = -EINVAL;
++	if (attr->mmap)
++		rc = attr->mmap(kobj, attr, vma);
++
++	if (rc == 0 && !bb->mmapped)
++		bb->mmapped = 1;
++	else
++		sysfs_put_active_two(attr_sd);
+ 
+-	if (!attr->mmap)
+-		return -EINVAL;
++	mutex_unlock(&bb->mutex);
+ 
+-	return attr->mmap(kobj, attr, vma);
++	return rc;
+ }
+ 
+ static int open(struct inode * inode, struct file * file)
+ {
+-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+-	int error = -EINVAL;
+-
+-	if (!kobj || !attr)
+-		goto Done;
+-
+-	/* Grab the module reference for this attribute if we have one */
+-	error = -ENODEV;
+-	if (!try_module_get(attr->attr.owner)) 
+-		goto Done;
++	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++	struct bin_attribute *attr = attr_sd->s_elem.bin_attr.bin_attr;
++	struct bin_buffer *bb = NULL;
++	int error;
++
++	/* need attr_sd for attr */
++	if (!sysfs_get_active(attr_sd))
++		return -ENODEV;
+ 
+ 	error = -EACCES;
+ 	if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap))
+-		goto Error;
++		goto err_out;
+ 	if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap))
+-		goto Error;
++		goto err_out;
+ 
+ 	error = -ENOMEM;
+-	file->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+-	if (!file->private_data)
+-		goto Error;
+-
+-	error = 0;
+-    goto Done;
+-
+- Error:
+-	module_put(attr->attr.owner);
+- Done:
+-	if (error)
+-		kobject_put(kobj);
++	bb = kzalloc(sizeof(*bb), GFP_KERNEL);
++	if (!bb)
++		goto err_out;
++
++	bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!bb->buffer)
++		goto err_out;
++
++	mutex_init(&bb->mutex);
++	file->private_data = bb;
++
++	/* open succeeded, put active reference and pin attr_sd */
++	sysfs_put_active(attr_sd);
++	sysfs_get(attr_sd);
++	return 0;
++
++ err_out:
++	sysfs_put_active(attr_sd);
++	kfree(bb);
+ 	return error;
+ }
+ 
+ static int release(struct inode * inode, struct file * file)
+ {
+-	struct kobject * kobj = to_kobj(file->f_path.dentry->d_parent);
+-	struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
+-	u8 * buffer = file->private_data;
+-
+-	kobject_put(kobj);
+-	module_put(attr->attr.owner);
+-	kfree(buffer);
++	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++	struct bin_buffer *bb = file->private_data;
++
++	if (bb->mmapped)
++		sysfs_put_active_two(attr_sd);
++	sysfs_put(attr_sd);
++	kfree(bb->buffer);
++	kfree(bb);
+ 	return 0;
+ }
+ 
+@@ -181,9 +234,9 @@
+ 
+ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
+-	BUG_ON(!kobj || !kobj->dentry || !attr);
++	BUG_ON(!kobj || !kobj->sd || !attr);
+ 
+-	return sysfs_add_file(kobj->dentry, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
++	return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR);
+ }
+ 
+ 
+@@ -195,7 +248,7 @@
+ 
+ void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
+-	if (sysfs_hash_and_remove(kobj->dentry, attr->attr.name) < 0) {
++	if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
+ 		printk(KERN_ERR "%s: "
+ 			"bad dentry or inode or no such file: \"%s\"\n",
+ 			__FUNCTION__, attr->attr.name);
+diff -Nurb linux-2.6.22-570/fs/sysfs/dir.c linux-2.6.22-try2/fs/sysfs/dir.c
+--- linux-2.6.22-570/fs/sysfs/dir.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/dir.c	2007-12-19 15:29:23.000000000 -0500
+@@ -9,21 +9,337 @@
+ #include <linux/module.h>
+ #include <linux/kobject.h>
+ #include <linux/namei.h>
++#include <linux/idr.h>
++#include <linux/completion.h>
+ #include <asm/semaphore.h>
+ #include "sysfs.h"
+ 
+-DECLARE_RWSEM(sysfs_rename_sem);
+-spinlock_t sysfs_lock = SPIN_LOCK_UNLOCKED;
++DEFINE_MUTEX(sysfs_mutex);
++spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
++
++static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
++static DEFINE_IDA(sysfs_ino_ida);
++
++/**
++ *	sysfs_link_sibling - link sysfs_dirent into sibling list
++ *	@sd: sysfs_dirent of interest
++ *
++ *	Link @sd into its sibling list which starts from
++ *	sd->s_parent->s_children.
++ *
++ *	Locking:
++ *	mutex_lock(sysfs_mutex)
++ */
++void sysfs_link_sibling(struct sysfs_dirent *sd)
++{
++	struct sysfs_dirent *parent_sd = sd->s_parent;
++
++	BUG_ON(sd->s_sibling);
++	sd->s_sibling = parent_sd->s_children;
++	parent_sd->s_children = sd;
++}
++
++/**
++ *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
++ *	@sd: sysfs_dirent of interest
++ *
++ *	Unlink @sd from its sibling list which starts from
++ *	sd->s_parent->s_children.
++ *
++ *	Locking:
++ *	mutex_lock(sysfs_mutex)
++ */
++void sysfs_unlink_sibling(struct sysfs_dirent *sd)
++{
++	struct sysfs_dirent **pos;
++
++	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
++		if (*pos == sd) {
++			*pos = sd->s_sibling;
++			sd->s_sibling = NULL;
++			break;
++		}
++	}
++}
++
++/**
++ *	sysfs_get_dentry - get dentry for the given sysfs_dirent
++ *	@sd: sysfs_dirent of interest
++ *
++ *	Get dentry for @sd.  Dentry is looked up if currently not
++ *	present.  This function climbs sysfs_dirent tree till it
++ *	reaches a sysfs_dirent with valid dentry attached and descends
++ *	down from there looking up dentry for each step.
++ *
++ *	LOCKING:
++ *	Kernel thread context (may sleep)
++ *
++ *	RETURNS:
++ *	Pointer to found dentry on success, ERR_PTR() value on error.
++ */
++struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
++{
++	struct sysfs_dirent *cur;
++	struct dentry *parent_dentry, *dentry;
++	int i, depth;
++
++	/* Find the first parent which has valid s_dentry and get the
++	 * dentry.
++	 */
++	mutex_lock(&sysfs_mutex);
++ restart0:
++	spin_lock(&sysfs_assoc_lock);
++ restart1:
++	spin_lock(&dcache_lock);
++
++	dentry = NULL;
++	depth = 0;
++	cur = sd;
++	while (!cur->s_dentry || !cur->s_dentry->d_inode) {
++		if (cur->s_flags & SYSFS_FLAG_REMOVED) {
++			dentry = ERR_PTR(-ENOENT);
++			depth = 0;
++			break;
++		}
++		cur = cur->s_parent;
++		depth++;
++	}
++	if (!IS_ERR(dentry))
++		dentry = dget_locked(cur->s_dentry);
++
++	spin_unlock(&dcache_lock);
++	spin_unlock(&sysfs_assoc_lock);
++
++	/* from the found dentry, look up depth times */
++	while (depth--) {
++		/* find and get depth'th ancestor */
++		for (cur = sd, i = 0; cur && i < depth; i++)
++			cur = cur->s_parent;
++
++		/* This can happen if tree structure was modified due
++		 * to move/rename.  Restart.
++		 */
++		if (i != depth) {
++			dput(dentry);
++			goto restart0;
++		}
++
++		sysfs_get(cur);
++
++		mutex_unlock(&sysfs_mutex);
++
++		/* look it up */
++		parent_dentry = dentry;
++		dentry = lookup_one_len_kern(cur->s_name, parent_dentry,
++					     strlen(cur->s_name));
++		dput(parent_dentry);
++
++		if (IS_ERR(dentry)) {
++			sysfs_put(cur);
++			return dentry;
++		}
++
++		mutex_lock(&sysfs_mutex);
++		spin_lock(&sysfs_assoc_lock);
++
++		/* This, again, can happen if tree structure has
++		 * changed and we looked up the wrong thing.  Restart.
++		 */
++		if (cur->s_dentry != dentry) {
++			dput(dentry);
++			sysfs_put(cur);
++			goto restart1;
++		}
++
++		spin_unlock(&sysfs_assoc_lock);
++
++		sysfs_put(cur);
++	}
++
++	mutex_unlock(&sysfs_mutex);
++	return dentry;
++}
++
++/**
++ *	sysfs_get_active - get an active reference to sysfs_dirent
++ *	@sd: sysfs_dirent to get an active reference to
++ *
++ *	Get an active reference of @sd.  This function is noop if @sd
++ *	is NULL.
++ *
++ *	RETURNS:
++ *	Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
++{
++	if (unlikely(!sd))
++		return NULL;
++
++	while (1) {
++		int v, t;
++
++		v = atomic_read(&sd->s_active);
++		if (unlikely(v < 0))
++			return NULL;
++
++		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
++		if (likely(t == v))
++			return sd;
++		if (t < 0)
++			return NULL;
++
++		cpu_relax();
++	}
++}
++
++/**
++ *	sysfs_put_active - put an active reference to sysfs_dirent
++ *	@sd: sysfs_dirent to put an active reference to
++ *
++ *	Put an active reference to @sd.  This function is noop if @sd
++ *	is NULL.
++ */
++void sysfs_put_active(struct sysfs_dirent *sd)
++{
++	struct completion *cmpl;
++	int v;
++
++	if (unlikely(!sd))
++		return;
++
++	v = atomic_dec_return(&sd->s_active);
++	if (likely(v != SD_DEACTIVATED_BIAS))
++		return;
++
++	/* atomic_dec_return() is a mb(), we'll always see the updated
++	 * sd->s_sibling.
++	 */
++	cmpl = (void *)sd->s_sibling;
++	complete(cmpl);
++}
++
++/**
++ *	sysfs_get_active_two - get active references to sysfs_dirent and parent
++ *	@sd: sysfs_dirent of interest
++ *
++ *	Get active reference to @sd and its parent.  Parent's active
++ *	reference is grabbed first.  This function is noop if @sd is
++ *	NULL.
++ *
++ *	RETURNS:
++ *	Pointer to @sd on success, NULL on failure.
++ */
++struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
++{
++	if (sd) {
++		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
++			return NULL;
++		if (unlikely(!sysfs_get_active(sd))) {
++			sysfs_put_active(sd->s_parent);
++			return NULL;
++		}
++	}
++	return sd;
++}
++
++/**
++ *	sysfs_put_active_two - put active references to sysfs_dirent and parent
++ *	@sd: sysfs_dirent of interest
++ *
++ *	Put active references to @sd and its parent.  This function is
++ *	noop if @sd is NULL.
++ */
++void sysfs_put_active_two(struct sysfs_dirent *sd)
++{
++	if (sd) {
++		sysfs_put_active(sd);
++		sysfs_put_active(sd->s_parent);
++	}
++}
++
++/**
++ *	sysfs_deactivate - deactivate sysfs_dirent
++ *	@sd: sysfs_dirent to deactivate
++ *
++ *	Deny new active references and drain existing ones.
++ */
++static void sysfs_deactivate(struct sysfs_dirent *sd)
++{
++	DECLARE_COMPLETION_ONSTACK(wait);
++	int v;
++
++	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
++	sd->s_sibling = (void *)&wait;
++
++	/* atomic_add_return() is a mb(), put_active() will always see
++	 * the updated sd->s_sibling.
++	 */
++	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
++
++	if (v != SD_DEACTIVATED_BIAS)
++		wait_for_completion(&wait);
++
++	sd->s_sibling = NULL;
++}
++
++static int sysfs_alloc_ino(ino_t *pino)
++{
++	int ino, rc;
++
++ retry:
++	spin_lock(&sysfs_ino_lock);
++	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
++	spin_unlock(&sysfs_ino_lock);
++
++	if (rc == -EAGAIN) {
++		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
++			goto retry;
++		rc = -ENOMEM;
++	}
++
++	*pino = ino;
++	return rc;
++}
++
++static void sysfs_free_ino(ino_t ino)
++{
++	spin_lock(&sysfs_ino_lock);
++	ida_remove(&sysfs_ino_ida, ino);
++	spin_unlock(&sysfs_ino_lock);
++}
++
++void release_sysfs_dirent(struct sysfs_dirent * sd)
++{
++	struct sysfs_dirent *parent_sd;
++
++ repeat:
++	/* Moving/renaming is always done while holding reference.
++	 * sd->s_parent won't change beneath us.
++	 */
++	parent_sd = sd->s_parent;
++
++	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
++		sysfs_put(sd->s_elem.symlink.target_sd);
++	if (sysfs_type(sd) & SYSFS_COPY_NAME)
++		kfree(sd->s_name);
++	kfree(sd->s_iattr);
++	sysfs_free_ino(sd->s_ino);
++	kmem_cache_free(sysfs_dir_cachep, sd);
++
++	sd = parent_sd;
++	if (sd && atomic_dec_and_test(&sd->s_count))
++		goto repeat;
++}
+ 
+ static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
+ {
+ 	struct sysfs_dirent * sd = dentry->d_fsdata;
+ 
+ 	if (sd) {
+-		/* sd->s_dentry is protected with sysfs_lock.  This
+-		 * allows sysfs_drop_dentry() to dereference it.
++		/* sd->s_dentry is protected with sysfs_assoc_lock.
++		 * This allows sysfs_drop_dentry() to dereference it.
+ 		 */
+-		spin_lock(&sysfs_lock);
++		spin_lock(&sysfs_assoc_lock);
+ 
+ 		/* The dentry might have been deleted or another
+ 		 * lookup could have happened updating sd->s_dentry to
+@@ -32,7 +348,7 @@
+ 		 */
+ 		if (sd->s_dentry == dentry)
+ 			sd->s_dentry = NULL;
+-		spin_unlock(&sysfs_lock);
++		spin_unlock(&sysfs_assoc_lock);
+ 		sysfs_put(sd);
+ 	}
+ 	iput(inode);
+@@ -42,260 +358,402 @@
+ 	.d_iput		= sysfs_d_iput,
+ };
+ 
+-static unsigned int sysfs_inode_counter;
+-ino_t sysfs_get_inum(void)
++struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
+ {
+-	if (unlikely(sysfs_inode_counter < 3))
+-		sysfs_inode_counter = 3;
+-	return sysfs_inode_counter++;
+-}
++	char *dup_name = NULL;
++	struct sysfs_dirent *sd = NULL;
+ 
+-/*
+- * Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
+- */
+-static struct sysfs_dirent * __sysfs_new_dirent(void * element)
+-{
+-	struct sysfs_dirent * sd;
++	if (type & SYSFS_COPY_NAME) {
++		name = dup_name = kstrdup(name, GFP_KERNEL);
++		if (!name)
++			goto err_out;
++	}
+ 
+ 	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
+ 	if (!sd)
+-		return NULL;
++		goto err_out;
++
++	if (sysfs_alloc_ino(&sd->s_ino))
++		goto err_out;
+ 
+-	sd->s_ino = sysfs_get_inum();
+ 	atomic_set(&sd->s_count, 1);
++	atomic_set(&sd->s_active, 0);
+ 	atomic_set(&sd->s_event, 1);
+-	INIT_LIST_HEAD(&sd->s_children);
+-	INIT_LIST_HEAD(&sd->s_sibling);
+-	sd->s_element = element;
++
++	sd->s_name = name;
++	sd->s_mode = mode;
++	sd->s_flags = type;
+ 
+ 	return sd;
++
++ err_out:
++	kfree(dup_name);
++	kmem_cache_free(sysfs_dir_cachep, sd);
++	return NULL;
+ }
+ 
+-static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
+-			      struct sysfs_dirent *sd)
++/**
++ *	sysfs_attach_dentry - associate sysfs_dirent with dentry
++ *	@sd: target sysfs_dirent
++ *	@dentry: dentry to associate
++ *
++ *	Associate @sd with @dentry.  This is protected by
++ *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
++ *
++ *	LOCKING:
++ *	mutex_lock(sysfs_mutex)
++ */
++static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
+ {
+-	if (sd)
+-		list_add(&sd->s_sibling, &parent_sd->s_children);
++	dentry->d_op = &sysfs_dentry_ops;
++	dentry->d_fsdata = sysfs_get(sd);
++
++	/* protect sd->s_dentry against sysfs_d_iput */
++	spin_lock(&sysfs_assoc_lock);
++	sd->s_dentry = dentry;
++	spin_unlock(&sysfs_assoc_lock);
++
++	d_rehash(dentry);
+ }
+ 
+-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
+-						void * element)
++static int sysfs_ilookup_test(struct inode *inode, void *arg)
+ {
+-	struct sysfs_dirent *sd;
+-	sd = __sysfs_new_dirent(element);
+-	__sysfs_list_dirent(parent_sd, sd);
+-	return sd;
++	struct sysfs_dirent *sd = arg;
++	return inode->i_ino == sd->s_ino;
+ }
+ 
+-/*
++/**
++ *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
++ *	@acxt: pointer to sysfs_addrm_cxt to be used
++ *	@parent_sd: parent sysfs_dirent
+  *
+- * Return -EEXIST if there is already a sysfs element with the same name for
+- * the same parent.
++ *	This function is called when the caller is about to add or
++ *	remove sysfs_dirent under @parent_sd.  This function acquires
++ *	sysfs_mutex, grabs inode for @parent_sd if available and lock
++ *	i_mutex of it.  @acxt is used to keep and pass context to
++ *	other addrm functions.
+  *
+- * called with parent inode's i_mutex held
++ *	LOCKING:
++ *	Kernel thread context (may sleep).  sysfs_mutex is locked on
++ *	return.  i_mutex of parent inode is locked on return if
++ *	available.
+  */
+-int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
+-			  const unsigned char *new)
++void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
++		       struct sysfs_dirent *parent_sd)
+ {
+-	struct sysfs_dirent * sd;
++	struct inode *inode;
+ 
+-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+-		if (sd->s_element) {
+-			const unsigned char *existing = sysfs_get_name(sd);
+-			if (strcmp(existing, new))
+-				continue;
+-			else
+-				return -EEXIST;
+-		}
+-	}
++	memset(acxt, 0, sizeof(*acxt));
++	acxt->parent_sd = parent_sd;
+ 
+-	return 0;
++	/* Lookup parent inode.  inode initialization and I_NEW
++	 * clearing are protected by sysfs_mutex.  By grabbing it and
++	 * looking up with _nowait variant, inode state can be
++	 * determined reliably.
++	 */
++	mutex_lock(&sysfs_mutex);
++
++	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
++				parent_sd);
++
++	if (inode && !(inode->i_state & I_NEW)) {
++		/* parent inode available */
++		acxt->parent_inode = inode;
++
++		/* sysfs_mutex is below i_mutex in lock hierarchy.
++		 * First, trylock i_mutex.  If fails, unlock
++		 * sysfs_mutex and lock them in order.
++		 */
++		if (!mutex_trylock(&inode->i_mutex)) {
++			mutex_unlock(&sysfs_mutex);
++			mutex_lock(&inode->i_mutex);
++			mutex_lock(&sysfs_mutex);
++		}
++	} else
++		iput(inode);
+ }
+ 
++/**
++ *	sysfs_add_one - add sysfs_dirent to parent
++ *	@acxt: addrm context to use
++ *	@sd: sysfs_dirent to be added
++ *
++ *	Get @acxt->parent_sd and set sd->s_parent to it and increment
++ *	nlink of parent inode if @sd is a directory.  @sd is NOT
++ *	linked into the children list of the parent.  The caller
++ *	should invoke sysfs_link_sibling() after this function
++ *	completes if @sd needs to be on the children list.
++ *
++ *	This function should be called between calls to
++ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ *	passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ *	LOCKING:
++ *	Determined by sysfs_addrm_start().
++ */
++void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++	sd->s_parent = sysfs_get(acxt->parent_sd);
++
++	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++		inc_nlink(acxt->parent_inode);
++
++	acxt->cnt++;
++}
+ 
+-static struct sysfs_dirent *
+-__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
++/**
++ *	sysfs_remove_one - remove sysfs_dirent from parent
++ *	@acxt: addrm context to use
++ *	@sd: sysfs_dirent to be added
++ *
++ *	Mark @sd removed and drop nlink of parent inode if @sd is a
++ *	directory.  @sd is NOT unlinked from the children list of the
++ *	parent.  The caller is repsonsible for removing @sd from the
++ *	children list before calling this function.
++ *
++ *	This function should be called between calls to
++ *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
++ *	passed the same @acxt as passed to sysfs_addrm_start().
++ *
++ *	LOCKING:
++ *	Determined by sysfs_addrm_start().
++ */
++void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
+ {
+-	struct sysfs_dirent * sd;
++	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));
+ 
+-	sd = __sysfs_new_dirent(element);
+-	if (!sd)
+-		goto out;
++	sd->s_flags |= SYSFS_FLAG_REMOVED;
++	sd->s_sibling = acxt->removed;
++	acxt->removed = sd;
+ 
+-	sd->s_mode = mode;
+-	sd->s_type = type;
+-	sd->s_dentry = dentry;
+-	if (dentry) {
+-		dentry->d_fsdata = sysfs_get(sd);
+-		dentry->d_op = &sysfs_dentry_ops;
+-	}
++	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
++		drop_nlink(acxt->parent_inode);
+ 
+-out:
+-	return sd;
++	acxt->cnt++;
+ }
+ 
+-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
+-			void * element, umode_t mode, int type)
++/**
++ *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
++ *	@sd: target sysfs_dirent
++ *
++ *	Drop dentry for @sd.  @sd must have been unlinked from its
++ *	parent on entry to this function such that it can't be looked
++ *	up anymore.
++ *
++ *	@sd->s_dentry which is protected with sysfs_assoc_lock points
++ *	to the currently associated dentry but we're not holding a
++ *	reference to it and racing with dput().  Grab dcache_lock and
++ *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
++ *	dput() beats us, no need to bother.
++ */
++static void sysfs_drop_dentry(struct sysfs_dirent *sd)
+ {
+-	struct sysfs_dirent *sd;
++	struct dentry *dentry = NULL;
++	struct inode *inode;
+ 
+-	sd = __sysfs_make_dirent(dentry, element, mode, type);
+-	__sysfs_list_dirent(parent_sd, sd);
++	/* We're not holding a reference to ->s_dentry dentry but the
++	 * field will stay valid as long as sysfs_assoc_lock is held.
++	 */
++	spin_lock(&sysfs_assoc_lock);
++	spin_lock(&dcache_lock);
+ 
+-	return sd ? 0 : -ENOMEM;
++	/* drop dentry if it's there and dput() didn't kill it yet */
++	if (sd->s_dentry && sd->s_dentry->d_inode) {
++		dentry = dget_locked(sd->s_dentry);
++		spin_lock(&dentry->d_lock);
++		__d_drop(dentry);
++		spin_unlock(&dentry->d_lock);
++	}
++
++	spin_unlock(&dcache_lock);
++	spin_unlock(&sysfs_assoc_lock);
++
++	/* dentries for shadowed inodes are pinned, unpin */
++	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
++		dput(dentry);
++	dput(dentry);
++
++	/* adjust nlink and update timestamp */
++	inode = ilookup(sysfs_sb, sd->s_ino);
++	if (inode) {
++		mutex_lock(&inode->i_mutex);
++
++		inode->i_ctime = CURRENT_TIME;
++		drop_nlink(inode);
++		if (sysfs_type(sd) == SYSFS_DIR)
++			drop_nlink(inode);
++
++		mutex_unlock(&inode->i_mutex);
++		iput(inode);
++	}
+ }
+ 
+-static int init_dir(struct inode * inode)
++/**
++ *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
++ *	@acxt: addrm context to finish up
++ *
++ *	Finish up sysfs_dirent add/remove.  Resources acquired by
++ *	sysfs_addrm_start() are released and removed sysfs_dirents are
++ *	cleaned up.  Timestamps on the parent inode are updated.
++ *
++ *	LOCKING:
++ *	All mutexes acquired by sysfs_addrm_start() are released.
++ *
++ *	RETURNS:
++ *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
++ */
++int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
+ {
+-	inode->i_op = &sysfs_dir_inode_operations;
+-	inode->i_fop = &sysfs_dir_operations;
++	/* release resources acquired by sysfs_addrm_start() */
++	mutex_unlock(&sysfs_mutex);
++	if (acxt->parent_inode) {
++		struct inode *inode = acxt->parent_inode;
++
++		/* if added/removed, update timestamps on the parent */
++		if (acxt->cnt)
++			inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ 
+-	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+-	inc_nlink(inode);
+-	return 0;
++		mutex_unlock(&inode->i_mutex);
++		iput(inode);
++	}
++
++	/* kill removed sysfs_dirents */
++	while (acxt->removed) {
++		struct sysfs_dirent *sd = acxt->removed;
++
++		acxt->removed = sd->s_sibling;
++		sd->s_sibling = NULL;
++
++		sysfs_drop_dentry(sd);
++		sysfs_deactivate(sd);
++		sysfs_put(sd);
++	}
++
++	return acxt->cnt;
+ }
+ 
+-static int init_file(struct inode * inode)
++/**
++ *	sysfs_find_dirent - find sysfs_dirent with the given name
++ *	@parent_sd: sysfs_dirent to search under
++ *	@name: name to look for
++ *
++ *	Look for sysfs_dirent with name @name under @parent_sd.
++ *
++ *	LOCKING:
++ *	mutex_lock(sysfs_mutex)
++ *
++ *	RETURNS:
++ *	Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
++				       const unsigned char *name)
+ {
+-	inode->i_size = PAGE_SIZE;
+-	inode->i_fop = &sysfs_file_operations;
+-	return 0;
++	struct sysfs_dirent *sd;
++
++	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
++		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
++			return sd;
++	return NULL;
+ }
+ 
+-static int init_symlink(struct inode * inode)
++/**
++ *	sysfs_get_dirent - find and get sysfs_dirent with the given name
++ *	@parent_sd: sysfs_dirent to search under
++ *	@name: name to look for
++ *
++ *	Look for sysfs_dirent with name @name under @parent_sd and get
++ *	it if found.
++ *
++ *	LOCKING:
++ *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
++ *
++ *	RETURNS:
++ *	Pointer to sysfs_dirent if found, NULL if not.
++ */
++struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
++				      const unsigned char *name)
+ {
+-	inode->i_op = &sysfs_symlink_inode_operations;
+-	return 0;
++	struct sysfs_dirent *sd;
++
++	mutex_lock(&sysfs_mutex);
++	sd = sysfs_find_dirent(parent_sd, name);
++	sysfs_get(sd);
++	mutex_unlock(&sysfs_mutex);
++
++	return sd;
+ }
+ 
+-static int create_dir(struct kobject * k, struct dentry * p,
+-		      const char * n, struct dentry ** d)
++static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
++		      const char *name, struct sysfs_dirent **p_sd)
+ {
+-	int error;
+ 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
++	struct sysfs_addrm_cxt acxt;
++	struct sysfs_dirent *sd;
+ 
+-	mutex_lock(&p->d_inode->i_mutex);
+-	*d = lookup_one_len(n, p, strlen(n));
+-	if (!IS_ERR(*d)) {
+- 		if (sysfs_dirent_exist(p->d_fsdata, n))
+-  			error = -EEXIST;
+-  		else
+-			error = sysfs_make_dirent(p->d_fsdata, *d, k, mode,
+-								SYSFS_DIR);
+-		if (!error) {
+-			error = sysfs_create(*d, mode, init_dir);
+-			if (!error) {
+-				inc_nlink(p->d_inode);
+-				(*d)->d_op = &sysfs_dentry_ops;
+-				d_rehash(*d);
++	/* allocate */
++	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
++	if (!sd)
++		return -ENOMEM;
++	sd->s_elem.dir.kobj = kobj;
++
++	/* link in */
++	sysfs_addrm_start(&acxt, parent_sd);
++	if (!sysfs_find_dirent(parent_sd, name)) {
++		sysfs_add_one(&acxt, sd);
++		sysfs_link_sibling(sd);
+ 			}
++	if (sysfs_addrm_finish(&acxt)) {
++		*p_sd = sd;
++		return 0;
+ 		}
+-		if (error && (error != -EEXIST)) {
+-			struct sysfs_dirent *sd = (*d)->d_fsdata;
+-			if (sd) {
+- 				list_del_init(&sd->s_sibling);
++
+ 				sysfs_put(sd);
+-			}
+-			d_drop(*d);
+-		}
+-		dput(*d);
+-	} else
+-		error = PTR_ERR(*d);
+-	mutex_unlock(&p->d_inode->i_mutex);
+-	return error;
++	return -EEXIST;
+ }
+ 
+-
+-int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
++int sysfs_create_subdir(struct kobject *kobj, const char *name,
++			struct sysfs_dirent **p_sd)
+ {
+-	return create_dir(k,k->dentry,n,d);
++	return create_dir(kobj, kobj->sd, name, p_sd);
+ }
+ 
+ /**
+  *	sysfs_create_dir - create a directory for an object.
+  *	@kobj:		object we're creating directory for. 
+- *	@shadow_parent:	parent parent object.
++ *	@shadow_parent:	parent object.
+  */
+-
+-int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
++int sysfs_create_dir(struct kobject *kobj,
++		     struct sysfs_dirent *shadow_parent_sd)
+ {
+-	struct dentry * dentry = NULL;
+-	struct dentry * parent;
++	struct sysfs_dirent *parent_sd, *sd;
+ 	int error = 0;
+ 
+ 	BUG_ON(!kobj);
+ 
+-	if (shadow_parent)
+-		parent = shadow_parent;
++	if (shadow_parent_sd)
++		parent_sd = shadow_parent_sd;
+ 	else if (kobj->parent)
+-		parent = kobj->parent->dentry;
++		parent_sd = kobj->parent->sd;
+ 	else if (sysfs_mount && sysfs_mount->mnt_sb)
+-		parent = sysfs_mount->mnt_sb->s_root;
++		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
+ 	else
+ 		return -EFAULT;
+ 
+-	error = create_dir(kobj,parent,kobject_name(kobj),&dentry);
++	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
+ 	if (!error)
+-		kobj->dentry = dentry;
+-	return error;
+-}
+-
+-/* attaches attribute's sysfs_dirent to the dentry corresponding to the
+- * attribute file
+- */
+-static int sysfs_attach_attr(struct sysfs_dirent * sd, struct dentry * dentry)
+-{
+-	struct attribute * attr = NULL;
+-	struct bin_attribute * bin_attr = NULL;
+-	int (* init) (struct inode *) = NULL;
+-	int error = 0;
+-
+-        if (sd->s_type & SYSFS_KOBJ_BIN_ATTR) {
+-                bin_attr = sd->s_element;
+-                attr = &bin_attr->attr;
+-        } else {
+-                attr = sd->s_element;
+-                init = init_file;
+-        }
+-
+-	dentry->d_fsdata = sysfs_get(sd);
+-	/* protect sd->s_dentry against sysfs_d_iput */
+-	spin_lock(&sysfs_lock);
+-	sd->s_dentry = dentry;
+-	spin_unlock(&sysfs_lock);
+-	error = sysfs_create(dentry, (attr->mode & S_IALLUGO) | S_IFREG, init);
+-	if (error) {
+-		sysfs_put(sd);
++		kobj->sd = sd;
+ 		return error;
+-	}
+-
+-        if (bin_attr) {
+-		dentry->d_inode->i_size = bin_attr->size;
+-		dentry->d_inode->i_fop = &bin_fops;
+-	}
+-	dentry->d_op = &sysfs_dentry_ops;
+-	d_rehash(dentry);
+-
+-	return 0;
+ }
+ 
+-static int sysfs_attach_link(struct sysfs_dirent * sd, struct dentry * dentry)
++static int sysfs_count_nlink(struct sysfs_dirent *sd)
+ {
+-	int err = 0;
++	struct sysfs_dirent *child;
++	int nr = 0;
+ 
+-	dentry->d_fsdata = sysfs_get(sd);
+-	/* protect sd->s_dentry against sysfs_d_iput */
+-	spin_lock(&sysfs_lock);
+-	sd->s_dentry = dentry;
+-	spin_unlock(&sysfs_lock);
+-	err = sysfs_create(dentry, S_IFLNK|S_IRWXUGO, init_symlink);
+-	if (!err) {
+-		dentry->d_op = &sysfs_dentry_ops;
+-		d_rehash(dentry);
+-	} else
+-		sysfs_put(sd);
+-
+-	return err;
++	for (child = sd->s_children; child; child = child->s_sibling)
++		if (sysfs_type(child) == SYSFS_DIR)
++			nr++;
++	return nr + 2;
+ }
+ 
+ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
+@@ -303,24 +761,60 @@
+ {
+ 	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
+ 	struct sysfs_dirent * sd;
+-	int err = 0;
++	struct bin_attribute *bin_attr;
++	struct inode *inode;
++	int found = 0;
+ 
+-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+-		if (sd->s_type & SYSFS_NOT_PINNED) {
+-			const unsigned char * name = sysfs_get_name(sd);
++	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++		if (sysfs_type(sd) &&
++		    !strcmp(sd->s_name, dentry->d_name.name)) {
++			found = 1;
++			break;
++		}
++	}
+ 
+-			if (strcmp(name, dentry->d_name.name))
+-				continue;
++	/* no such entry */
++	if (!found)
++		return NULL;
+ 
+-			if (sd->s_type & SYSFS_KOBJ_LINK)
+-				err = sysfs_attach_link(sd, dentry);
+-			else
+-				err = sysfs_attach_attr(sd, dentry);
++	/* attach dentry and inode */
++	inode = sysfs_get_inode(sd);
++	if (!inode)
++		return ERR_PTR(-ENOMEM);
++
++	mutex_lock(&sysfs_mutex);
++
++	if (inode->i_state & I_NEW) {
++		/* initialize inode according to type */
++		switch (sysfs_type(sd)) {
++		case SYSFS_DIR:
++			inode->i_op = &sysfs_dir_inode_operations;
++			inode->i_fop = &sysfs_dir_operations;
++			inode->i_nlink = sysfs_count_nlink(sd);
+ 			break;
++		case SYSFS_KOBJ_ATTR:
++			inode->i_size = PAGE_SIZE;
++			inode->i_fop = &sysfs_file_operations;
++			break;
++		case SYSFS_KOBJ_BIN_ATTR:
++			bin_attr = sd->s_elem.bin_attr.bin_attr;
++			inode->i_size = bin_attr->size;
++			inode->i_fop = &bin_fops;
++			break;
++		case SYSFS_KOBJ_LINK:
++			inode->i_op = &sysfs_symlink_inode_operations;
++			break;
++		default:
++			BUG();
+ 		}
+ 	}
+ 
+-	return ERR_PTR(err);
++	sysfs_instantiate(dentry, inode);
++	sysfs_attach_dentry(sd, dentry);
++
++	mutex_unlock(&sysfs_mutex);
++
++	return NULL;
+ }
+ 
+ const struct inode_operations sysfs_dir_inode_operations = {
+@@ -328,58 +822,46 @@
+ 	.setattr	= sysfs_setattr,
+ };
+ 
+-static void remove_dir(struct dentry * d)
++static void remove_dir(struct sysfs_dirent *sd)
+ {
+-	struct dentry * parent = dget(d->d_parent);
+-	struct sysfs_dirent * sd;
++	struct sysfs_addrm_cxt acxt;
+ 
+-	mutex_lock(&parent->d_inode->i_mutex);
+-	d_delete(d);
+-	sd = d->d_fsdata;
+- 	list_del_init(&sd->s_sibling);
+-	sysfs_put(sd);
+-	if (d->d_inode)
+-		simple_rmdir(parent->d_inode,d);
+-
+-	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
+-		 atomic_read(&d->d_count));
+-
+-	mutex_unlock(&parent->d_inode->i_mutex);
+-	dput(parent);
++	sysfs_addrm_start(&acxt, sd->s_parent);
++	sysfs_unlink_sibling(sd);
++	sysfs_remove_one(&acxt, sd);
++	sysfs_addrm_finish(&acxt);
+ }
+ 
+-void sysfs_remove_subdir(struct dentry * d)
++void sysfs_remove_subdir(struct sysfs_dirent *sd)
+ {
+-	remove_dir(d);
++	remove_dir(sd);
+ }
+ 
+ 
+-static void __sysfs_remove_dir(struct dentry *dentry)
++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
+ {
+-	struct sysfs_dirent * parent_sd;
+-	struct sysfs_dirent * sd, * tmp;
++	struct sysfs_addrm_cxt acxt;
++	struct sysfs_dirent **pos;
+ 
+-	dget(dentry);
+-	if (!dentry)
++	if (!dir_sd)
+ 		return;
+ 
+-	pr_debug("sysfs %s: removing dir\n",dentry->d_name.name);
+-	mutex_lock(&dentry->d_inode->i_mutex);
+-	parent_sd = dentry->d_fsdata;
+-	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
+-		if (!sd->s_element || !(sd->s_type & SYSFS_NOT_PINNED))
+-			continue;
+-		list_del_init(&sd->s_sibling);
+-		sysfs_drop_dentry(sd, dentry);
+-		sysfs_put(sd);
++	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
++	sysfs_addrm_start(&acxt, dir_sd);
++	pos = &dir_sd->s_children;
++	while (*pos) {
++		struct sysfs_dirent *sd = *pos;
++
++		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
++			*pos = sd->s_sibling;
++			sd->s_sibling = NULL;
++			sysfs_remove_one(&acxt, sd);
++		} else
++			pos = &(*pos)->s_sibling;
+ 	}
+-	mutex_unlock(&dentry->d_inode->i_mutex);
++	sysfs_addrm_finish(&acxt);
+ 
+-	remove_dir(dentry);
+-	/**
+-	 * Drop reference from dget() on entrance.
+-	 */
+-	dput(dentry);
++	remove_dir(dir_sd);
+ }
+ 
+ /**
+@@ -393,102 +875,166 @@
+ 
+ void sysfs_remove_dir(struct kobject * kobj)
+ {
+-	__sysfs_remove_dir(kobj->dentry);
+-	kobj->dentry = NULL;
++	struct sysfs_dirent *sd = kobj->sd;
++
++	spin_lock(&sysfs_assoc_lock);
++	kobj->sd = NULL;
++	spin_unlock(&sysfs_assoc_lock);
++
++	__sysfs_remove_dir(sd);
+ }
+ 
+-int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
++int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
+ 		     const char *new_name)
+ {
+-	int error = 0;
+-	struct dentry * new_dentry;
++	struct sysfs_dirent *sd = kobj->sd;
++	struct dentry *new_parent = NULL;
++	struct dentry *old_dentry = NULL, *new_dentry = NULL;
++	const char *dup_name = NULL;
++	int error;
+ 
+-	if (!new_parent)
+-		return -EFAULT;
++	/* get dentries */
++	old_dentry = sysfs_get_dentry(sd);
++	if (IS_ERR(old_dentry)) {
++		error = PTR_ERR(old_dentry);
++		goto out_dput;
++	}
++
++	new_parent = sysfs_get_dentry(new_parent_sd);
++	if (IS_ERR(new_parent)) {
++		error = PTR_ERR(new_parent);
++		goto out_dput;
++	}
+ 
+-	down_write(&sysfs_rename_sem);
++	/* lock new_parent and get dentry for new name */
+ 	mutex_lock(&new_parent->d_inode->i_mutex);
+ 
+ 	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
+-	if (!IS_ERR(new_dentry)) {
+-		/* By allowing two different directories with the
+-		 * same d_parent we allow this routine to move
+-		 * between different shadows of the same directory
++	if (IS_ERR(new_dentry)) {
++		error = PTR_ERR(new_dentry);
++		goto out_unlock;
++	}
++
++	/* By allowing two different directories with the same
++	 * d_parent we allow this routine to move between different
++	 * shadows of the same directory
+ 		 */
+-		if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
+-			return -EINVAL;
+-		else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
+-			error = -EINVAL;
+-		else if (new_dentry == kobj->dentry)
+ 			error = -EINVAL;
+-		else if (!new_dentry->d_inode) {
++	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
++	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
++	    old_dentry == new_dentry)
++		goto out_unlock;
++
++	error = -EEXIST;
++	if (new_dentry->d_inode)
++		goto out_unlock;
++
++	/* rename kobject and sysfs_dirent */
++	error = -ENOMEM;
++	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
++	if (!new_name)
++		goto out_drop;
++
+ 			error = kobject_set_name(kobj, "%s", new_name);
+-			if (!error) {
+-				struct sysfs_dirent *sd, *parent_sd;
++	if (error)
++		goto out_drop;
++
++	dup_name = sd->s_name;
++	sd->s_name = new_name;
+ 
++	/* move under the new parent */
+ 				d_add(new_dentry, NULL);
+-				d_move(kobj->dentry, new_dentry);
++	d_move(sd->s_dentry, new_dentry);
+ 
+-				sd = kobj->dentry->d_fsdata;
+-				parent_sd = new_parent->d_fsdata;
++	mutex_lock(&sysfs_mutex);
+ 
+-				list_del_init(&sd->s_sibling);
+-				list_add(&sd->s_sibling, &parent_sd->s_children);
+-			}
+-			else
++	sysfs_unlink_sibling(sd);
++	sysfs_get(new_parent_sd);
++	sysfs_put(sd->s_parent);
++	sd->s_parent = new_parent_sd;
++	sysfs_link_sibling(sd);
++
++	mutex_unlock(&sysfs_mutex);
++
++	error = 0;
++	goto out_unlock;
++
++ out_drop:
+ 				d_drop(new_dentry);
+-		} else
+-			error = -EEXIST;
+-		dput(new_dentry);
+-	}
++ out_unlock:
+ 	mutex_unlock(&new_parent->d_inode->i_mutex);
+-	up_write(&sysfs_rename_sem);
+-
++ out_dput:
++	kfree(dup_name);
++	dput(new_parent);
++	dput(old_dentry);
++	dput(new_dentry);
+ 	return error;
+ }
+ 
+-int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
++int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
+ {
+-	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
+-	struct sysfs_dirent *new_parent_sd, *sd;
++	struct sysfs_dirent *sd = kobj->sd;
++	struct sysfs_dirent *new_parent_sd;
++	struct dentry *old_parent, *new_parent = NULL;
++	struct dentry *old_dentry = NULL, *new_dentry = NULL;
+ 	int error;
+ 
+-	old_parent_dentry = kobj->parent ?
+-		kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
+-	new_parent_dentry = new_parent ?
+-		new_parent->dentry : sysfs_mount->mnt_sb->s_root;
++	BUG_ON(!sd->s_parent);
++	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;
++
++	/* get dentries */
++	old_dentry = sysfs_get_dentry(sd);
++	if (IS_ERR(old_dentry)) {
++		error = PTR_ERR(old_dentry);
++		goto out_dput;
++	}
++	old_parent = sd->s_parent->s_dentry;
+ 
+-	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
+-		return 0;	/* nothing to move */
++	new_parent = sysfs_get_dentry(new_parent_sd);
++	if (IS_ERR(new_parent)) {
++		error = PTR_ERR(new_parent);
++		goto out_dput;
++	}
++
++	if (old_parent->d_inode == new_parent->d_inode) {
++		error = 0;
++		goto out_dput;	/* nothing to move */
++	}
+ again:
+-	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
+-	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
+-		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
++	mutex_lock(&old_parent->d_inode->i_mutex);
++	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
++		mutex_unlock(&old_parent->d_inode->i_mutex);
+ 		goto again;
+ 	}
+ 
+-	new_parent_sd = new_parent_dentry->d_fsdata;
+-	sd = kobj->dentry->d_fsdata;
+-
+-	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
+-				    strlen(kobj->name));
++	new_dentry = lookup_one_len(kobj->name, new_parent, strlen(kobj->name));
+ 	if (IS_ERR(new_dentry)) {
+ 		error = PTR_ERR(new_dentry);
+-		goto out;
++		goto out_unlock;
+ 	} else
+ 		error = 0;
+ 	d_add(new_dentry, NULL);
+-	d_move(kobj->dentry, new_dentry);
++	d_move(sd->s_dentry, new_dentry);
+ 	dput(new_dentry);
+ 
+ 	/* Remove from old parent's list and insert into new parent's list. */
+-	list_del_init(&sd->s_sibling);
+-	list_add(&sd->s_sibling, &new_parent_sd->s_children);
++	mutex_lock(&sysfs_mutex);
+ 
+-out:
+-	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
+-	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
++	sysfs_unlink_sibling(sd);
++	sysfs_get(new_parent_sd);
++	sysfs_put(sd->s_parent);
++	sd->s_parent = new_parent_sd;
++	sysfs_link_sibling(sd);
+ 
++	mutex_unlock(&sysfs_mutex);
++
++ out_unlock:
++	mutex_unlock(&new_parent->d_inode->i_mutex);
++	mutex_unlock(&old_parent->d_inode->i_mutex);
++ out_dput:
++	dput(new_parent);
++	dput(old_dentry);
++	dput(new_dentry);
+ 	return error;
+ }
+ 
+@@ -496,23 +1042,27 @@
+ {
+ 	struct dentry * dentry = file->f_path.dentry;
+ 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
++	struct sysfs_dirent * sd;
+ 
+-	mutex_lock(&dentry->d_inode->i_mutex);
+-	file->private_data = sysfs_new_dirent(parent_sd, NULL);
+-	mutex_unlock(&dentry->d_inode->i_mutex);
+-
+-	return file->private_data ? 0 : -ENOMEM;
++	sd = sysfs_new_dirent("_DIR_", 0, 0);
++	if (sd) {
++		mutex_lock(&sysfs_mutex);
++		sd->s_parent = sysfs_get(parent_sd);
++		sysfs_link_sibling(sd);
++		mutex_unlock(&sysfs_mutex);
++	}
+ 
++	file->private_data = sd;
++	return sd ? 0 : -ENOMEM;
+ }
+ 
+ static int sysfs_dir_close(struct inode *inode, struct file *file)
+ {
+-	struct dentry * dentry = file->f_path.dentry;
+ 	struct sysfs_dirent * cursor = file->private_data;
+ 
+-	mutex_lock(&dentry->d_inode->i_mutex);
+-	list_del_init(&cursor->s_sibling);
+-	mutex_unlock(&dentry->d_inode->i_mutex);
++	mutex_lock(&sysfs_mutex);
++	sysfs_unlink_sibling(cursor);
++	mutex_unlock(&sysfs_mutex);
+ 
+ 	release_sysfs_dirent(cursor);
+ 
+@@ -530,7 +1080,7 @@
+ 	struct dentry *dentry = filp->f_path.dentry;
+ 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
+ 	struct sysfs_dirent *cursor = filp->private_data;
+-	struct list_head *p, *q = &cursor->s_sibling;
++	struct sysfs_dirent **pos;
+ 	ino_t ino;
+ 	int i = filp->f_pos;
+ 
+@@ -543,38 +1093,52 @@
+ 			i++;
+ 			/* fallthrough */
+ 		case 1:
+-			ino = parent_ino(dentry);
++			if (parent_sd->s_parent)
++				ino = parent_sd->s_parent->s_ino;
++			else
++				ino = parent_sd->s_ino;
+ 			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+ 				break;
+ 			filp->f_pos++;
+ 			i++;
+ 			/* fallthrough */
+ 		default:
++			mutex_lock(&sysfs_mutex);
++
++			pos = &parent_sd->s_children;
++			while (*pos != cursor)
++				pos = &(*pos)->s_sibling;
++
++			/* unlink cursor */
++			*pos = cursor->s_sibling;
++
+ 			if (filp->f_pos == 2)
+-				list_move(q, &parent_sd->s_children);
++				pos = &parent_sd->s_children;
+ 
+-			for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
+-				struct sysfs_dirent *next;
++			for ( ; *pos; pos = &(*pos)->s_sibling) {
++				struct sysfs_dirent *next = *pos;
+ 				const char * name;
+ 				int len;
+ 
+-				next = list_entry(p, struct sysfs_dirent,
+-						   s_sibling);
+-				if (!next->s_element)
++				if (!sysfs_type(next))
+ 					continue;
+ 
+-				name = sysfs_get_name(next);
++				name = next->s_name;
+ 				len = strlen(name);
+ 				ino = next->s_ino;
+ 
+ 				if (filldir(dirent, name, len, filp->f_pos, ino,
+ 						 dt_type(next)) < 0)
+-					return 0;
++					break;
+ 
+-				list_move(q, p);
+-				p = q;
+ 				filp->f_pos++;
+ 			}
++
++			/* put cursor back in */
++			cursor->s_sibling = *pos;
++			*pos = cursor;
++
++			mutex_unlock(&sysfs_mutex);
+ 	}
+ 	return 0;
+ }
+@@ -583,7 +1147,6 @@
+ {
+ 	struct dentry * dentry = file->f_path.dentry;
+ 
+-	mutex_lock(&dentry->d_inode->i_mutex);
+ 	switch (origin) {
+ 		case 1:
+ 			offset += file->f_pos;
+@@ -591,31 +1154,35 @@
+ 			if (offset >= 0)
+ 				break;
+ 		default:
+-			mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
+ 			return -EINVAL;
+ 	}
+ 	if (offset != file->f_pos) {
++		mutex_lock(&sysfs_mutex);
++
+ 		file->f_pos = offset;
+ 		if (file->f_pos >= 2) {
+ 			struct sysfs_dirent *sd = dentry->d_fsdata;
+ 			struct sysfs_dirent *cursor = file->private_data;
+-			struct list_head *p;
++			struct sysfs_dirent **pos;
+ 			loff_t n = file->f_pos - 2;
+ 
+-			list_del(&cursor->s_sibling);
+-			p = sd->s_children.next;
+-			while (n && p != &sd->s_children) {
+-				struct sysfs_dirent *next;
+-				next = list_entry(p, struct sysfs_dirent,
+-						   s_sibling);
+-				if (next->s_element)
++			sysfs_unlink_sibling(cursor);
++
++			pos = &sd->s_children;
++			while (n && *pos) {
++				struct sysfs_dirent *next = *pos;
++				if (sysfs_type(next))
+ 					n--;
+-				p = p->next;
++				pos = &(*pos)->s_sibling;
+ 			}
+-			list_add_tail(&cursor->s_sibling, p);
++
++			cursor->s_sibling = *pos;
++			*pos = cursor;
+ 		}
++
++		mutex_unlock(&sysfs_mutex);
+ 	}
+-	mutex_unlock(&dentry->d_inode->i_mutex);
++
+ 	return offset;
+ }
+ 
+@@ -628,12 +1195,20 @@
+ int sysfs_make_shadowed_dir(struct kobject *kobj,
+ 	void * (*follow_link)(struct dentry *, struct nameidata *))
+ {
++	struct dentry *dentry;
+ 	struct inode *inode;
+ 	struct inode_operations *i_op;
+ 
+-	inode = kobj->dentry->d_inode;
+-	if (inode->i_op != &sysfs_dir_inode_operations)
++	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
++	dentry = sysfs_get_dentry(kobj->sd);
++	if (IS_ERR(dentry))
++		return PTR_ERR(dentry);
++
++	inode = dentry->d_inode;
++	if (inode->i_op != &sysfs_dir_inode_operations) {
++		dput(dentry);
+ 		return -EINVAL;
++	}
+ 
+ 	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
+ 	if (!i_op)
+@@ -658,54 +1233,72 @@
+  *	directory.
+  */
+ 
+-struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
++struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
+ {
+-	struct sysfs_dirent *sd;
+-	struct dentry *parent, *dir, *shadow;
++	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
++	struct dentry *dir, *parent, *shadow;
+ 	struct inode *inode;
++	struct sysfs_dirent *sd;
++	struct sysfs_addrm_cxt acxt;
+ 
+-	dir = kobj->dentry;
+-	inode = dir->d_inode;
++	dir = sysfs_get_dentry(kobj->sd);
++	if (IS_ERR(dir)) {
++		sd = (void *)dir;
++		goto out;
++	}
+ 	parent = dir->d_parent;
+-	shadow = ERR_PTR(-EINVAL);
++
++	inode = dir->d_inode;
++	sd = ERR_PTR(-EINVAL);
+ 	if (!sysfs_is_shadowed_inode(inode))
+-		goto out;
++		goto out_dput;
+ 
+ 	shadow = d_alloc(parent, &dir->d_name);
+ 	if (!shadow)
+ 		goto nomem;
+ 
+-	sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
++	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
+ 	if (!sd)
+ 		goto nomem;
++	sd->s_elem.dir.kobj = kobj;
+ 
++	sysfs_addrm_start(&acxt, parent_sd);
++
++	/* add but don't link into children list */
++	sysfs_add_one(&acxt, sd);
++
++	/* attach and instantiate dentry */
++	sysfs_attach_dentry(sd, shadow);
+ 	d_instantiate(shadow, igrab(inode));
+-	inc_nlink(inode);
+-	inc_nlink(parent->d_inode);
+-	shadow->d_op = &sysfs_dentry_ops;
++	inc_nlink(inode);	/* tj: synchronization? */
++
++	sysfs_addrm_finish(&acxt);
+ 
+ 	dget(shadow);		/* Extra count - pin the dentry in core */
+ 
+-out:
+-	return shadow;
+-nomem:
++	goto out_dput;
++
++ nomem:
+ 	dput(shadow);
+-	shadow = ERR_PTR(-ENOMEM);
+-	goto out;
++	sd = ERR_PTR(-ENOMEM);
++ out_dput:
++	dput(dir);
++ out:
++	return sd;
+ }
+ 
+ /**
+  *	sysfs_remove_shadow_dir - remove an object's directory.
+- *	@shadow: dentry of shadow directory
++ *	@shadow_sd: sysfs_dirent of shadow directory
+  *
+  *	The only thing special about this is that we remove any files in
+  *	the directory before we remove the directory, and we've inlined
+  *	what used to be sysfs_rmdir() below, instead of calling separately.
+  */
+ 
+-void sysfs_remove_shadow_dir(struct dentry *shadow)
++void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
+ {
+-	__sysfs_remove_dir(shadow);
++	__sysfs_remove_dir(shadow_sd);
+ }
+ 
+ const struct file_operations sysfs_dir_operations = {
+diff -Nurb linux-2.6.22-570/fs/sysfs/file.c linux-2.6.22-try2/fs/sysfs/file.c
+--- linux-2.6.22-570/fs/sysfs/file.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/fs/sysfs/file.c	2007-12-19 15:46:06.000000000 -0500
+@@ -50,29 +50,15 @@
+ 	.store	= subsys_attr_store,
+ };
+ 
+-/**
+- *	add_to_collection - add buffer to a collection
+- *	@buffer:	buffer to be added
+- *	@node:		inode of set to add to
+- */
+-
+-static inline void
+-add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
+-{
+-	struct sysfs_buffer_collection *set = node->i_private;
+-
+-	mutex_lock(&node->i_mutex);
+-	list_add(&buffer->associates, &set->associates);
+-	mutex_unlock(&node->i_mutex);
+-}
+-
+-static inline void
+-remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
+-{
+-	mutex_lock(&node->i_mutex);
+-	list_del(&buffer->associates);
+-	mutex_unlock(&node->i_mutex);
+-}
++struct sysfs_buffer {
++	size_t			count;
++	loff_t			pos;
++	char			* page;
++	struct sysfs_ops	* ops;
++	struct semaphore	sem;
++	int			needs_read_fill;
++	int			event;
++};
+ 
+ /**
+  *	fill_read_buffer - allocate and fill buffer from object.
+@@ -87,9 +73,8 @@
+  */
+ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
+ {
+-	struct sysfs_dirent * sd = dentry->d_fsdata;
+-	struct attribute * attr = to_attr(dentry);
+-	struct kobject * kobj = to_kobj(dentry->d_parent);
++	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ 	struct sysfs_ops * ops = buffer->ops;
+ 	int ret = 0;
+ 	ssize_t count;
+@@ -99,8 +84,15 @@
+ 	if (!buffer->page)
+ 		return -ENOMEM;
+ 
+-	buffer->event = atomic_read(&sd->s_event);
+-	count = ops->show(kobj,attr,buffer->page);
++	/* need attr_sd for attr and ops, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
++
++	buffer->event = atomic_read(&attr_sd->s_event);
++	count = ops->show(kobj, attr_sd->s_elem.attr.attr, buffer->page);
++
++	sysfs_put_active_two(attr_sd);
++
+ 	BUG_ON(count > (ssize_t)PAGE_SIZE);
+ 	if (count >= 0) {
+ 		buffer->needs_read_fill = 0;
+@@ -138,9 +130,6 @@
+ 
+ 	down(&buffer->sem);
+ 	if (buffer->needs_read_fill) {
+-		if (buffer->orphaned)
+-			retval = -ENODEV;
+-		else
+ 			retval = fill_read_buffer(file->f_path.dentry,buffer);
+ 		if (retval)
+ 			goto out;
+@@ -199,11 +188,20 @@
+ static int 
+ flush_write_buffer(struct dentry * dentry, struct sysfs_buffer * buffer, size_t count)
+ {
+-	struct attribute * attr = to_attr(dentry);
+-	struct kobject * kobj = to_kobj(dentry->d_parent);
++	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ 	struct sysfs_ops * ops = buffer->ops;
++	int rc;
+ 
+-	return ops->store(kobj,attr,buffer->page,count);
++	/* need attr_sd for attr and ops, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
++
++	rc = ops->store(kobj, attr_sd->s_elem.attr.attr, buffer->page, count);
++
++	sysfs_put_active_two(attr_sd);
++
++	return rc;
+ }
+ 
+ 
+@@ -231,37 +229,26 @@
+ 	ssize_t len;
+ 
+ 	down(&buffer->sem);
+-	if (buffer->orphaned) {
+-		len = -ENODEV;
+-		goto out;
+-	}
+ 	len = fill_write_buffer(buffer, buf, count);
+ 	if (len > 0)
+ 		len = flush_write_buffer(file->f_path.dentry, buffer, len);
+ 	if (len > 0)
+ 		*ppos += len;
+-out:
+ 	up(&buffer->sem);
+ 	return len;
+ }
+ 
+ static int sysfs_open_file(struct inode *inode, struct file *file)
+ {
+-	struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
+-	struct attribute * attr = to_attr(file->f_path.dentry);
+-	struct sysfs_buffer_collection *set;
++	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ 	struct sysfs_buffer * buffer;
+ 	struct sysfs_ops * ops = NULL;
+-	int error = 0;
+-
+-	if (!kobj || !attr)
+-		goto Einval;
++	int error;
+ 
+-	/* Grab the module reference for this attribute if we have one */
+-	if (!try_module_get(attr->owner)) {
+-		error = -ENODEV;
+-		goto Done;
+-	}
++	/* need attr_sd for attr and ops, its parent for kobj */
++	if (!sysfs_get_active_two(attr_sd))
++		return -ENODEV;
+ 
+ 	/* if the kobject has no ktype, then we assume that it is a subsystem
+ 	 * itself, and use ops for it.
+@@ -277,7 +264,7 @@
+ 	 * or the subsystem have no operations.
+ 	 */
+ 	if (!ops)
+-		goto Eaccess;
++		goto err_out;
+ 
+ 	/* make sure we have a collection to add our buffers to */
+ 	mutex_lock(&inode->i_mutex);
+@@ -299,7 +286,7 @@
+ 	if (file->f_mode & FMODE_WRITE) {
+ 
+ 		if (!(inode->i_mode & S_IWUGO) || !ops->store)
+-			goto Eaccess;
++			goto err_out;
+ 
+ 	}
+ 
+@@ -309,48 +296,38 @@
+ 	 */
+ 	if (file->f_mode & FMODE_READ) {
+ 		if (!(inode->i_mode & S_IRUGO) || !ops->show)
+-			goto Eaccess;
++			goto err_out;
+ 	}
+ 
+ 	/* No error? Great, allocate a buffer for the file, and store it
+ 	 * it in file->private_data for easy access.
+ 	 */
++	error = -ENOMEM;
+ 	buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
+-	if (buffer) {
+-		INIT_LIST_HEAD(&buffer->associates);
++	if (!buffer)
++		goto err_out;
++
+ 		init_MUTEX(&buffer->sem);
+ 		buffer->needs_read_fill = 1;
+ 		buffer->ops = ops;
+-		add_to_collection(buffer, inode);
+ 		file->private_data = buffer;
+-	} else
+-		error = -ENOMEM;
+-	goto Done;
+ 
+- Einval:
+-	error = -EINVAL;
+-	goto Done;
+- Eaccess:
+-	error = -EACCES;
+-	module_put(attr->owner);
+- Done:
+-	if (error)
+-		kobject_put(kobj);
++	/* open succeeded, put active references and pin attr_sd */
++	sysfs_put_active_two(attr_sd);
++	sysfs_get(attr_sd);
++	return 0;
++
++ err_out:
++	sysfs_put_active_two(attr_sd);
+ 	return error;
+ }
+ 
+ static int sysfs_release(struct inode * inode, struct file * filp)
+ {
+-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+-	struct attribute * attr = to_attr(filp->f_path.dentry);
+-	struct module * owner = attr->owner;
+-	struct sysfs_buffer * buffer = filp->private_data;
++	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
++	struct sysfs_buffer *buffer = filp->private_data;
+ 
+-	if (buffer)
+-		remove_from_collection(buffer, inode);
+-	kobject_put(kobj);
+-	/* After this point, attr should not be accessed. */
+-	module_put(owner);
++	sysfs_put(attr_sd);
+ 
+ 	if (buffer) {
+ 		if (buffer->page)
+@@ -377,57 +354,43 @@
+ static unsigned int sysfs_poll(struct file *filp, poll_table *wait)
+ {
+ 	struct sysfs_buffer * buffer = filp->private_data;
+-	struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
+-	struct sysfs_dirent * sd = filp->f_path.dentry->d_fsdata;
+-	int res = 0;
++	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
++	struct kobject *kobj = attr_sd->s_parent->s_elem.dir.kobj;
+ 
+-	poll_wait(filp, &kobj->poll, wait);
++	/* need parent for the kobj, grab both */
++	if (!sysfs_get_active_two(attr_sd))
++		goto trigger;
+ 
+-	if (buffer->event != atomic_read(&sd->s_event)) {
+-		res = POLLERR|POLLPRI;
+-		buffer->needs_read_fill = 1;
+-	}
+-
+-	return res;
+-}
++	poll_wait(filp, &kobj->poll, wait);
+ 
++	sysfs_put_active_two(attr_sd);
+ 
+-static struct dentry *step_down(struct dentry *dir, const char * name)
+-{
+-	struct dentry * de;
++	if (buffer->event != atomic_read(&attr_sd->s_event))
++		goto trigger;
+ 
+-	if (dir == NULL || dir->d_inode == NULL)
+-		return NULL;
++	return 0;
+ 
+-	mutex_lock(&dir->d_inode->i_mutex);
+-	de = lookup_one_len(name, dir, strlen(name));
+-	mutex_unlock(&dir->d_inode->i_mutex);
+-	dput(dir);
+-	if (IS_ERR(de))
+-		return NULL;
+-	if (de->d_inode == NULL) {
+-		dput(de);
+-		return NULL;
+-	}
+-	return de;
++ trigger:
++	buffer->needs_read_fill = 1;
++	return POLLERR|POLLPRI;
+ }
+ 
+-void sysfs_notify(struct kobject * k, char *dir, char *attr)
++void sysfs_notify(struct kobject *k, char *dir, char *attr)
+ {
+-	struct dentry *de = k->dentry;
+-	if (de)
+-		dget(de);
+-	if (de && dir)
+-		de = step_down(de, dir);
+-	if (de && attr)
+-		de = step_down(de, attr);
+-	if (de) {
+-		struct sysfs_dirent * sd = de->d_fsdata;
+-		if (sd)
++	struct sysfs_dirent *sd = k->sd;
++
++	mutex_lock(&sysfs_mutex);
++
++	if (sd && dir)
++		sd = sysfs_find_dirent(sd, dir);
++	if (sd && attr)
++		sd = sysfs_find_dirent(sd, attr);
++	if (sd) {
+ 			atomic_inc(&sd->s_event);
+ 		wake_up_interruptible(&k->poll);
+-		dput(de);
+ 	}
++
++	mutex_unlock(&sysfs_mutex);
+ }
+ EXPORT_SYMBOL_GPL(sysfs_notify);
+ 
+@@ -441,19 +404,30 @@
+ };
+ 
+ 
+-int sysfs_add_file(struct dentry * dir, const struct attribute * attr, int type)
++int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
++		   int type)
+ {
+-	struct sysfs_dirent * parent_sd = dir->d_fsdata;
+ 	umode_t mode = (attr->mode & S_IALLUGO) | S_IFREG;
+-	int error = -EEXIST;
++	struct sysfs_addrm_cxt acxt;
++	struct sysfs_dirent *sd;
+ 
+-	mutex_lock(&dir->d_inode->i_mutex);
+-	if (!sysfs_dirent_exist(parent_sd, attr->name))
+-		error = sysfs_make_dirent(parent_sd, NULL, (void *)attr,
+-					  mode, type);
+-	mutex_unlock(&dir->d_inode->i_mutex);
++	sd = sysfs_new_dirent(attr->name, mode, type);
++	if (!sd)
++		return -ENOMEM;
++	sd->s_elem.attr.attr = (void *)attr;
+ 
+-	return error;
++	sysfs_addrm_start(&acxt, dir_sd);
++
++	if (!sysfs_find_dirent(dir_sd, attr->name)) {
++		sysfs_add_one(&acxt, sd);
++		sysfs_link_sibling(sd);
++	}
++
++	if (sysfs_addrm_finish(&acxt))
++		return 0;
++
++	sysfs_put(sd);
++	return -EEXIST;
+ }
+ 
+ 
+@@ -465,9 +439,9 @@
+ 
+ int sysfs_create_file(struct kobject * kobj, const struct attribute * attr)
+ {
+-	BUG_ON(!kobj || !kobj->dentry || !attr);
++	BUG_ON(!kobj || !kobj->sd || !attr);
+ 
+-	return sysfs_add_file(kobj->dentry, attr, SYSFS_KOBJ_ATTR);
++	return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR);
+ 
+ }
+ 
+@@ -481,16 +455,16 @@
+ int sysfs_add_file_to_group(struct kobject *kobj,
+ 		const struct attribute *attr, const char *group)
+ {
+-	struct dentry *dir;
++	struct sysfs_dirent *dir_sd;
+ 	int error;
+ 
+-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
+-	if (IS_ERR(dir))
+-		error = PTR_ERR(dir);
+-	else {
+-		error = sysfs_add_file(dir, attr, SYSFS_KOBJ_ATTR);
+-		dput(dir);
+-	}
++	dir_sd = sysfs_get_dirent(kobj->sd, group);
++	if (!dir_sd)
++		return -ENOENT;
++
++	error = sysfs_add_file(dir_sd, attr, SYSFS_KOBJ_ATTR);
++	sysfs_put(dir_sd);
++
+ 	return error;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);
+@@ -503,30 +477,31 @@
+  */
+ int sysfs_update_file(struct kobject * kobj, const struct attribute * attr)
+ {
+-	struct dentry * dir = kobj->dentry;
+-	struct dentry * victim;
+-	int res = -ENOENT;
+-
+-	mutex_lock(&dir->d_inode->i_mutex);
+-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+-	if (!IS_ERR(victim)) {
+-		/* make sure dentry is really there */
+-		if (victim->d_inode && 
+-		    (victim->d_parent->d_inode == dir->d_inode)) {
+-			victim->d_inode->i_mtime = CURRENT_TIME;
+-			fsnotify_modify(victim);
+-			res = 0;
+-		} else
+-			d_drop(victim);
++	struct sysfs_dirent *victim_sd = NULL;
++	struct dentry *victim = NULL;
++	int rc;
++
++	rc = -ENOENT;
++	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
++	if (!victim_sd)
++		goto out;
+ 		
+-		/**
+-		 * Drop the reference acquired from lookup_one_len() above.
+-		 */
+-		dput(victim);
++	victim = sysfs_get_dentry(victim_sd);
++	if (IS_ERR(victim)) {
++		rc = PTR_ERR(victim);
++		victim = NULL;
++		goto out;
+ 	}
+-	mutex_unlock(&dir->d_inode->i_mutex);
+ 
+-	return res;
++	mutex_lock(&victim->d_inode->i_mutex);
++	victim->d_inode->i_mtime = CURRENT_TIME;
++	fsnotify_modify(victim);
++	mutex_unlock(&victim->d_inode->i_mutex);
++	rc = 0;
++ out:
++	dput(victim);
++	sysfs_put(victim_sd);
++	return rc;
+ }
+ 
+ 
+@@ -539,30 +514,34 @@
+  */
+ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
+ {
+-	struct dentry *dir = kobj->dentry;
+-	struct dentry *victim;
++	struct sysfs_dirent *victim_sd = NULL;
++	struct dentry *victim = NULL;
+ 	struct inode * inode;
+ 	struct iattr newattrs;
+-	int res = -ENOENT;
++	int rc;
++
++	rc = -ENOENT;
++	victim_sd = sysfs_get_dirent(kobj->sd, attr->name);
++	if (!victim_sd)
++		goto out;
++
++	victim = sysfs_get_dentry(victim_sd);
++	if (IS_ERR(victim)) {
++		rc = PTR_ERR(victim);
++		victim = NULL;
++		goto out;
++	}
+ 
+-	mutex_lock(&dir->d_inode->i_mutex);
+-	victim = lookup_one_len(attr->name, dir, strlen(attr->name));
+-	if (!IS_ERR(victim)) {
+-		if (victim->d_inode &&
+-		    (victim->d_parent->d_inode == dir->d_inode)) {
+ 			inode = victim->d_inode;
+ 			mutex_lock(&inode->i_mutex);
+-			newattrs.ia_mode = (mode & S_IALLUGO) |
+-						(inode->i_mode & ~S_IALLUGO);
++	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+ 			newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+-			res = notify_change(victim, &newattrs);
++	rc = notify_change(victim, &newattrs);
+ 			mutex_unlock(&inode->i_mutex);
+-		}
++ out:
+ 		dput(victim);
+-	}
+-	mutex_unlock(&dir->d_inode->i_mutex);
+-
+-	return res;
++	sysfs_put(victim_sd);
++	return rc;
+ }
+ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
+ 
+@@ -577,7 +556,7 @@
+ 
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
+-	sysfs_hash_and_remove(kobj->dentry, attr->name);
++	sysfs_hash_and_remove(kobj->sd, attr->name);
+ }
+ 
+ 
+@@ -590,12 +569,12 @@
+ void sysfs_remove_file_from_group(struct kobject *kobj,
+ 		const struct attribute *attr, const char *group)
+ {
+-	struct dentry *dir;
++	struct sysfs_dirent *dir_sd;
+ 
+-	dir = lookup_one_len(group, kobj->dentry, strlen(group));
+-	if (!IS_ERR(dir)) {
+-		sysfs_hash_and_remove(dir, attr->name);
+-		dput(dir);
++	dir_sd = sysfs_get_dirent(kobj->sd, group);
++	if (dir_sd) {
++		sysfs_hash_and_remove(dir_sd, attr->name);
++		sysfs_put(dir_sd);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
+diff -Nurb linux-2.6.22-570/fs/sysfs/group.c linux-2.6.22-try2/fs/sysfs/group.c
+--- linux-2.6.22-570/fs/sysfs/group.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/group.c	2007-12-19 15:29:23.000000000 -0500
+@@ -18,26 +18,25 @@
+ #include "sysfs.h"
+ 
+ 
+-static void remove_files(struct dentry * dir, 
+-			 const struct attribute_group * grp)
++static void remove_files(struct sysfs_dirent *dir_sd,
++			 const struct attribute_group *grp)
+ {
+ 	struct attribute *const* attr;
+ 
+ 	for (attr = grp->attrs; *attr; attr++)
+-		sysfs_hash_and_remove(dir,(*attr)->name);
++		sysfs_hash_and_remove(dir_sd, (*attr)->name);
+ }
+ 
+-static int create_files(struct dentry * dir,
+-			const struct attribute_group * grp)
++static int create_files(struct sysfs_dirent *dir_sd,
++			const struct attribute_group *grp)
+ {
+ 	struct attribute *const* attr;
+ 	int error = 0;
+ 
+-	for (attr = grp->attrs; *attr && !error; attr++) {
+-		error = sysfs_add_file(dir, *attr, SYSFS_KOBJ_ATTR);
+-	}
++	for (attr = grp->attrs; *attr && !error; attr++)
++		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+ 	if (error)
+-		remove_files(dir,grp);
++		remove_files(dir_sd, grp);
+ 	return error;
+ }
+ 
+@@ -45,44 +44,44 @@
+ int sysfs_create_group(struct kobject * kobj, 
+ 		       const struct attribute_group * grp)
+ {
+-	struct dentry * dir;
++	struct sysfs_dirent *sd;
+ 	int error;
+ 
+-	BUG_ON(!kobj || !kobj->dentry);
++	BUG_ON(!kobj || !kobj->sd);
+ 
+ 	if (grp->name) {
+-		error = sysfs_create_subdir(kobj,grp->name,&dir);
++		error = sysfs_create_subdir(kobj, grp->name, &sd);
+ 		if (error)
+ 			return error;
+ 	} else
+-		dir = kobj->dentry;
+-	dir = dget(dir);
+-	if ((error = create_files(dir,grp))) {
++		sd = kobj->sd;
++	sysfs_get(sd);
++	error = create_files(sd, grp);
++	if (error) {
+ 		if (grp->name)
+-			sysfs_remove_subdir(dir);
++			sysfs_remove_subdir(sd);
+ 	}
+-	dput(dir);
++	sysfs_put(sd);
+ 	return error;
+ }
+ 
+ void sysfs_remove_group(struct kobject * kobj, 
+ 			const struct attribute_group * grp)
+ {
+-	struct dentry * dir;
++	struct sysfs_dirent *dir_sd = kobj->sd;
++	struct sysfs_dirent *sd;
+ 
+ 	if (grp->name) {
+-		dir = lookup_one_len_kern(grp->name, kobj->dentry,
+-				strlen(grp->name));
+-		BUG_ON(IS_ERR(dir));
+-	}
+-	else
+-		dir = dget(kobj->dentry);
++		sd = sysfs_get_dirent(dir_sd, grp->name);
++		BUG_ON(!sd);
++	} else
++		sd = sysfs_get(dir_sd);
+ 
+-	remove_files(dir,grp);
++	remove_files(sd, grp);
+ 	if (grp->name)
+-		sysfs_remove_subdir(dir);
+-	/* release the ref. taken in this routine */
+-	dput(dir);
++		sysfs_remove_subdir(sd);
++
++	sysfs_put(sd);
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-570/fs/sysfs/inode.c linux-2.6.22-try2/fs/sysfs/inode.c
+--- linux-2.6.22-570/fs/sysfs/inode.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/inode.c	2007-12-19 15:29:23.000000000 -0500
+@@ -133,10 +133,8 @@
+  */
+ static struct lock_class_key sysfs_inode_imutex_key;
+ 
+-struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
++void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode)
+ {
+-	struct inode * inode = new_inode(sysfs_sb);
+-	if (inode) {
+ 		inode->i_blocks = 0;
+ 		inode->i_mapping->a_ops = &sysfs_aops;
+ 		inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
+@@ -151,169 +149,78 @@
+ 			 */
+ 			set_inode_attr(inode, sd->s_iattr);
+ 		} else
+-			set_default_inode_attr(inode, mode);
+-	}
+-	return inode;
++		set_default_inode_attr(inode, sd->s_mode);
+ }
+ 
+-int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
+-{
+-	int error = 0;
+-	struct inode * inode = NULL;
+-	if (dentry) {
+-		if (!dentry->d_inode) {
+-			struct sysfs_dirent * sd = dentry->d_fsdata;
+-			if ((inode = sysfs_new_inode(mode, sd))) {
+-				if (dentry->d_parent && dentry->d_parent->d_inode) {
+-					struct inode *p_inode = dentry->d_parent->d_inode;
+-					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+-				}
+-				goto Proceed;
+-			}
+-			else 
+-				error = -ENOMEM;
+-		} else
+-			error = -EEXIST;
+-	} else 
+-		error = -ENOENT;
+-	goto Done;
+-
+- Proceed:
+-	if (init)
+-		error = init(inode);
+-	if (!error) {
+-		d_instantiate(dentry, inode);
+-		if (S_ISDIR(mode))
+-			dget(dentry);  /* pin only directory dentry in core */
+-	} else
+-		iput(inode);
+- Done:
+-	return error;
+-}
+-
+-/*
+- * Get the name for corresponding element represented by the given sysfs_dirent
++/**
++ *	sysfs_get_inode - get inode for sysfs_dirent
++ *	@sd: sysfs_dirent to allocate inode for
++ *
++ *	Get inode for @sd.  If such inode doesn't exist, a new inode
++ *	is allocated and basics are initialized.  New inode is
++ *	returned locked.
++ *
++ *	LOCKING:
++ *	Kernel thread context (may sleep).
++ *
++ *	RETURNS:
++ *	Pointer to allocated inode on success, NULL on failure.
+  */
+-const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
++struct inode * sysfs_get_inode(struct sysfs_dirent *sd)
+ {
+-	struct attribute * attr;
+-	struct bin_attribute * bin_attr;
+-	struct sysfs_symlink  * sl;
+-
+-	BUG_ON(!sd || !sd->s_element);
+-
+-	switch (sd->s_type) {
+-		case SYSFS_DIR:
+-			/* Always have a dentry so use that */
+-			return sd->s_dentry->d_name.name;
+-
+-		case SYSFS_KOBJ_ATTR:
+-			attr = sd->s_element;
+-			return attr->name;
+-
+-		case SYSFS_KOBJ_BIN_ATTR:
+-			bin_attr = sd->s_element;
+-			return bin_attr->attr.name;
+-
+-		case SYSFS_KOBJ_LINK:
+-			sl = sd->s_element;
+-			return sl->link_name;
+-	}
+-	return NULL;
+-}
++	struct inode *inode;
+ 
+-static inline void orphan_all_buffers(struct inode *node)
+-{
+-	struct sysfs_buffer_collection *set;
+-	struct sysfs_buffer *buf;
++	inode = iget_locked(sysfs_sb, sd->s_ino);
++	if (inode && (inode->i_state & I_NEW))
++		sysfs_init_inode(sd, inode);
+ 
+-	mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
+-	set = node->i_private;
+-	if (set) {
+-		list_for_each_entry(buf, &set->associates, associates) {
+-			down(&buf->sem);
+-			buf->orphaned = 1;
+-			up(&buf->sem);
+-		}
+-	}
+-	mutex_unlock(&node->i_mutex);
++	return inode;
+ }
+ 
+-
+-/*
+- * Unhashes the dentry corresponding to given sysfs_dirent
+- * Called with parent inode's i_mutex held.
++/**
++ *	sysfs_instantiate - instantiate dentry
++ *	@dentry: dentry to be instantiated
++ *	@inode: inode associated with @sd
++ *
++ *	Unlock @inode if locked and instantiate @dentry with @inode.
++ *
++ *	LOCKING:
++ *	None.
+  */
+-void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
++void sysfs_instantiate(struct dentry *dentry, struct inode *inode)
+ {
+-	struct dentry *dentry = NULL;
+-	struct inode *inode;
+-
+-	/* We're not holding a reference to ->s_dentry dentry but the
+-	 * field will stay valid as long as sysfs_lock is held.
+-	 */
+-	spin_lock(&sysfs_lock);
+-	spin_lock(&dcache_lock);
++	BUG_ON(!dentry || dentry->d_inode);
+ 
+-	/* dget dentry if it's still alive */
+-	if (sd->s_dentry && sd->s_dentry->d_inode)
+-		dentry = dget_locked(sd->s_dentry);
+-
+-	spin_unlock(&dcache_lock);
+-	spin_unlock(&sysfs_lock);
+-
+-	/* drop dentry */
+-	if (dentry) {
+-		spin_lock(&dcache_lock);
+-		spin_lock(&dentry->d_lock);
+-		if (!d_unhashed(dentry) && dentry->d_inode) {
+-			inode = dentry->d_inode;
+-			spin_lock(&inode->i_lock);
+-			__iget(inode);
+-			spin_unlock(&inode->i_lock);
+-			dget_locked(dentry);
+-			__d_drop(dentry);
+-			spin_unlock(&dentry->d_lock);
+-			spin_unlock(&dcache_lock);
+-			simple_unlink(parent->d_inode, dentry);
+-			orphan_all_buffers(inode);
+-			iput(inode);
+-		} else {
+-			spin_unlock(&dentry->d_lock);
+-			spin_unlock(&dcache_lock);
+-		}
++	if (inode->i_state & I_NEW)
++		unlock_new_inode(inode);
+ 
+-		dput(dentry);
+-	}
++	d_instantiate(dentry, inode);
+ }
+ 
+-int sysfs_hash_and_remove(struct dentry * dir, const char * name)
++int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+ {
+-	struct sysfs_dirent * sd;
+-	struct sysfs_dirent * parent_sd;
+-	int found = 0;
++	struct sysfs_addrm_cxt acxt;
++	struct sysfs_dirent **pos, *sd;
+ 
+-	if (!dir)
++	if (!dir_sd)
+ 		return -ENOENT;
+ 
+-	if (dir->d_inode == NULL)
+-		/* no inode means this hasn't been made visible yet */
+-		return -ENOENT;
++	sysfs_addrm_start(&acxt, dir_sd);
++
++	for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
++		sd = *pos;
+ 
+-	parent_sd = dir->d_fsdata;
+-	mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
+-	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+-		if (!sd->s_element)
++		if (!sysfs_type(sd))
+ 			continue;
+-		if (!strcmp(sysfs_get_name(sd), name)) {
+-			list_del_init(&sd->s_sibling);
+-			sysfs_drop_dentry(sd, dir);
+-			sysfs_put(sd);
+-			found = 1;
++		if (!strcmp(sd->s_name, name)) {
++			*pos = sd->s_sibling;
++			sd->s_sibling = NULL;
++			sysfs_remove_one(&acxt, sd);
+ 			break;
+ 		}
+ 	}
+-	mutex_unlock(&dir->d_inode->i_mutex);
+ 
+-	return found ? 0 : -ENOENT;
++	if (sysfs_addrm_finish(&acxt))
++		return 0;
++	return -ENOENT;
+ }
+diff -Nurb linux-2.6.22-570/fs/sysfs/mount.c linux-2.6.22-try2/fs/sysfs/mount.c
+--- linux-2.6.22-570/fs/sysfs/mount.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/fs/sysfs/mount.c	2007-12-19 15:29:23.000000000 -0500
+@@ -17,28 +17,18 @@
+ struct super_block * sysfs_sb = NULL;
+ struct kmem_cache *sysfs_dir_cachep;
+ 
+-static void sysfs_clear_inode(struct inode *inode);
+-
+ static const struct super_operations sysfs_ops = {
+ 	.statfs		= simple_statfs,
+ 	.drop_inode	= sysfs_delete_inode,
+-	.clear_inode	= sysfs_clear_inode,
+ };
+ 
+-static struct sysfs_dirent sysfs_root = {
+-	.s_sibling	= LIST_HEAD_INIT(sysfs_root.s_sibling),
+-	.s_children	= LIST_HEAD_INIT(sysfs_root.s_children),
+-	.s_element	= NULL,
+-	.s_type		= SYSFS_ROOT,
+-	.s_iattr	= NULL,
++struct sysfs_dirent sysfs_root = {
++	.s_count	= ATOMIC_INIT(1),
++	.s_flags	= SYSFS_ROOT,
++	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+ 	.s_ino		= 1,
+ };
+ 
+-static void sysfs_clear_inode(struct inode *inode)
+-{
+-	kfree(inode->i_private);
+-}
+-
+ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
+ {
+ 	struct inode *inode;
+@@ -51,17 +41,18 @@
+ 	sb->s_time_gran = 1;
+ 	sysfs_sb = sb;
+ 
+-	inode = sysfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
+-				 &sysfs_root);
+-	if (inode) {
++	inode = new_inode(sysfs_sb);
++	if (!inode) {
++		pr_debug("sysfs: could not get root inode\n");
++		return -ENOMEM;
++	}
++
++	sysfs_init_inode(&sysfs_root, inode);
++
+ 		inode->i_op = &sysfs_dir_inode_operations;
+ 		inode->i_fop = &sysfs_dir_operations;
+ 		/* directory inodes start off with i_nlink == 2 (for "." entry) */
+ 		inc_nlink(inode);
+-	} else {
+-		pr_debug("sysfs: could not get root inode\n");
+-		return -ENOMEM;
+-	}
+ 
+ 	root = d_alloc_root(inode);
+ 	if (!root) {
+diff -Nurb linux-2.6.22-570/fs/sysfs/symlink.c linux-2.6.22-try2/fs/sysfs/symlink.c
+--- linux-2.6.22-570/fs/sysfs/symlink.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/symlink.c	2007-12-19 15:29:23.000000000 -0500
+@@ -11,71 +11,39 @@
+ 
+ #include "sysfs.h"
+ 
+-static int object_depth(struct kobject * kobj)
++static int object_depth(struct sysfs_dirent *sd)
+ {
+-	struct kobject * p = kobj;
+ 	int depth = 0;
+-	do { depth++; } while ((p = p->parent));
++
++	for (; sd->s_parent; sd = sd->s_parent)
++		depth++;
++
+ 	return depth;
+ }
+ 
+-static int object_path_length(struct kobject * kobj)
++static int object_path_length(struct sysfs_dirent * sd)
+ {
+-	struct kobject * p = kobj;
+ 	int length = 1;
+-	do {
+-		length += strlen(kobject_name(p)) + 1;
+-		p = p->parent;
+-	} while (p);
++
++	for (; sd->s_parent; sd = sd->s_parent)
++		length += strlen(sd->s_name) + 1;
++
+ 	return length;
+ }
+ 
+-static void fill_object_path(struct kobject * kobj, char * buffer, int length)
++static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
+ {
+-	struct kobject * p;
+-
+ 	--length;
+-	for (p = kobj; p; p = p->parent) {
+-		int cur = strlen(kobject_name(p));
++	for (; sd->s_parent; sd = sd->s_parent) {
++		int cur = strlen(sd->s_name);
+ 
+ 		/* back up enough to print this bus id with '/' */
+ 		length -= cur;
+-		strncpy(buffer + length,kobject_name(p),cur);
++		strncpy(buffer + length, sd->s_name, cur);
+ 		*(buffer + --length) = '/';
+ 	}
+ }
+ 
+-static int sysfs_add_link(struct dentry * parent, const char * name, struct kobject * target)
+-{
+-	struct sysfs_dirent * parent_sd = parent->d_fsdata;
+-	struct sysfs_symlink * sl;
+-	int error = 0;
+-
+-	error = -ENOMEM;
+-	sl = kmalloc(sizeof(*sl), GFP_KERNEL);
+-	if (!sl)
+-		goto exit1;
+-
+-	sl->link_name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+-	if (!sl->link_name)
+-		goto exit2;
+-
+-	strcpy(sl->link_name, name);
+-	sl->target_kobj = kobject_get(target);
+-
+-	error = sysfs_make_dirent(parent_sd, NULL, sl, S_IFLNK|S_IRWXUGO,
+-				SYSFS_KOBJ_LINK);
+-	if (!error)
+-		return 0;
+-
+-	kobject_put(target);
+-	kfree(sl->link_name);
+-exit2:
+-	kfree(sl);
+-exit1:
+-	return error;
+-}
+-
+ /**
+  *	sysfs_create_link - create symlink between two objects.
+  *	@kobj:	object whose directory we're creating the link in.
+@@ -84,24 +52,57 @@
+  */
+ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
+ {
+-	struct dentry *dentry = NULL;
+-	int error = -EEXIST;
++	struct sysfs_dirent *parent_sd = NULL;
++	struct sysfs_dirent *target_sd = NULL;
++	struct sysfs_dirent *sd = NULL;
++	struct sysfs_addrm_cxt acxt;
++	int error;
+ 
+ 	BUG_ON(!name);
+ 
+ 	if (!kobj) {
+ 		if (sysfs_mount && sysfs_mount->mnt_sb)
+-			dentry = sysfs_mount->mnt_sb->s_root;
++			parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
+ 	} else
+-		dentry = kobj->dentry;
++		parent_sd = kobj->sd;
++
++	error = -EFAULT;
++	if (!parent_sd)
++		goto out_put;
++
++	/* target->sd can go away beneath us but is protected with
++	 * sysfs_assoc_lock.  Fetch target_sd from it.
++	 */
++	spin_lock(&sysfs_assoc_lock);
++	if (target->sd)
++		target_sd = sysfs_get(target->sd);
++	spin_unlock(&sysfs_assoc_lock);
++
++	error = -ENOENT;
++	if (!target_sd)
++		goto out_put;
++
++	error = -ENOMEM;
++	sd = sysfs_new_dirent(name, S_IFLNK|S_IRWXUGO, SYSFS_KOBJ_LINK);
++	if (!sd)
++		goto out_put;
++	sd->s_elem.symlink.target_sd = target_sd;
++
++	sysfs_addrm_start(&acxt, parent_sd);
++
++	if (!sysfs_find_dirent(parent_sd, name)) {
++		sysfs_add_one(&acxt, sd);
++		sysfs_link_sibling(sd);
++	}
+ 
+-	if (!dentry)
+-		return -EFAULT;
++	if (sysfs_addrm_finish(&acxt))
++		return 0;
+ 
+-	mutex_lock(&dentry->d_inode->i_mutex);
+-	if (!sysfs_dirent_exist(dentry->d_fsdata, name))
+-		error = sysfs_add_link(dentry, name, target);
+-	mutex_unlock(&dentry->d_inode->i_mutex);
++	error = -EEXIST;
++	/* fall through */
++ out_put:
++	sysfs_put(target_sd);
++	sysfs_put(sd);
+ 	return error;
+ }
+ 
+@@ -114,17 +115,17 @@
+ 
+ void sysfs_remove_link(struct kobject * kobj, const char * name)
+ {
+-	sysfs_hash_and_remove(kobj->dentry,name);
++	sysfs_hash_and_remove(kobj->sd, name);
+ }
+ 
+-static int sysfs_get_target_path(struct kobject * kobj, struct kobject * target,
+-				 char *path)
++static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
++				 struct sysfs_dirent * target_sd, char *path)
+ {
+ 	char * s;
+ 	int depth, size;
+ 
+-	depth = object_depth(kobj);
+-	size = object_path_length(target) + depth * 3 - 1;
++	depth = object_depth(parent_sd);
++	size = object_path_length(target_sd) + depth * 3 - 1;
+ 	if (size > PATH_MAX)
+ 		return -ENAMETOOLONG;
+ 
+@@ -133,7 +134,7 @@
+ 	for (s = path; depth--; s += 3)
+ 		strcpy(s,"../");
+ 
+-	fill_object_path(target, path, size);
++	fill_object_path(target_sd, path, size);
+ 	pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+ 
+ 	return 0;
+@@ -141,27 +142,16 @@
+ 
+ static int sysfs_getlink(struct dentry *dentry, char * path)
+ {
+-	struct kobject *kobj, *target_kobj;
+-	int error = 0;
+-
+-	kobj = sysfs_get_kobject(dentry->d_parent);
+-	if (!kobj)
+-		return -EINVAL;
+-
+-	target_kobj = sysfs_get_kobject(dentry);
+-	if (!target_kobj) {
+-		kobject_put(kobj);
+-		return -EINVAL;
+-	}
+-
+-	down_read(&sysfs_rename_sem);
+-	error = sysfs_get_target_path(kobj, target_kobj, path);
+-	up_read(&sysfs_rename_sem);
++	struct sysfs_dirent *sd = dentry->d_fsdata;
++	struct sysfs_dirent *parent_sd = sd->s_parent;
++	struct sysfs_dirent *target_sd = sd->s_elem.symlink.target_sd;
++	int error;
++
++	mutex_lock(&sysfs_mutex);
++	error = sysfs_get_target_path(parent_sd, target_sd, path);
++	mutex_unlock(&sysfs_mutex);
+ 	
+-	kobject_put(kobj);
+-	kobject_put(target_kobj);
+ 	return error;
+-
+ }
+ 
+ static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+diff -Nurb linux-2.6.22-570/fs/sysfs/sysfs.h linux-2.6.22-try2/fs/sysfs/sysfs.h
+--- linux-2.6.22-570/fs/sysfs/sysfs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/sysfs/sysfs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,9 +1,40 @@
++struct sysfs_elem_dir {
++	struct kobject		* kobj;
++};
++
++struct sysfs_elem_symlink {
++	struct sysfs_dirent	* target_sd;
++};
++
++struct sysfs_elem_attr {
++	struct attribute	* attr;
++};
++
++struct sysfs_elem_bin_attr {
++	struct bin_attribute	* bin_attr;
++};
++
++/*
++ * As long as s_count reference is held, the sysfs_dirent itself is
++ * accessible.  Dereferencing s_elem or any other outer entity
++ * requires s_active reference.
++ */
+ struct sysfs_dirent {
+ 	atomic_t		s_count;
+-	struct list_head	s_sibling;
+-	struct list_head	s_children;
+-	void 			* s_element;
+-	int			s_type;
++	atomic_t		s_active;
++	struct sysfs_dirent	* s_parent;
++	struct sysfs_dirent	* s_sibling;
++	struct sysfs_dirent	* s_children;
++	const char		* s_name;
++
++	union {
++		struct sysfs_elem_dir		dir;
++		struct sysfs_elem_symlink	symlink;
++		struct sysfs_elem_attr		attr;
++		struct sysfs_elem_bin_attr	bin_attr;
++	}			s_elem;
++
++	unsigned int		s_flags;
+ 	umode_t			s_mode;
+ 	ino_t			s_ino;
+ 	struct dentry		* s_dentry;
+@@ -11,30 +42,60 @@
+ 	atomic_t		s_event;
+ };
+ 
++#define SD_DEACTIVATED_BIAS	INT_MIN
++
++struct sysfs_addrm_cxt {
++	struct sysfs_dirent	*parent_sd;
++	struct inode		*parent_inode;
++	struct sysfs_dirent	*removed;
++	int			cnt;
++};
++
+ extern struct vfsmount * sysfs_mount;
++extern struct sysfs_dirent sysfs_root;
+ extern struct kmem_cache *sysfs_dir_cachep;
+ 
+-extern void sysfs_delete_inode(struct inode *inode);
+-extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
+-extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
++extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
++extern void sysfs_link_sibling(struct sysfs_dirent *sd);
++extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
++extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
++extern void sysfs_put_active(struct sysfs_dirent *sd);
++extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
++extern void sysfs_put_active_two(struct sysfs_dirent *sd);
++extern void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
++			      struct sysfs_dirent *parent_sd);
++extern void sysfs_add_one(struct sysfs_addrm_cxt *acxt,
++			  struct sysfs_dirent *sd);
++extern void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
++			     struct sysfs_dirent *sd);
++extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
+ 
+-extern int sysfs_dirent_exist(struct sysfs_dirent *, const unsigned char *);
+-extern int sysfs_make_dirent(struct sysfs_dirent *, struct dentry *, void *,
+-				umode_t, int);
+-
+-extern int sysfs_add_file(struct dentry *, const struct attribute *, int);
+-extern int sysfs_hash_and_remove(struct dentry * dir, const char * name);
++extern void sysfs_delete_inode(struct inode *inode);
++extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
++extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
++extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
++
++extern void release_sysfs_dirent(struct sysfs_dirent * sd);
++extern struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
++					      const unsigned char *name);
++extern struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
++					     const unsigned char *name);
++extern struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode,
++					     int type);
++
++extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
++			  const struct attribute *attr, int type);
++extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+ extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
+ 
+-extern int sysfs_create_subdir(struct kobject *, const char *, struct dentry **);
+-extern void sysfs_remove_subdir(struct dentry *);
++extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
++			       struct sysfs_dirent **p_sd);
++extern void sysfs_remove_subdir(struct sysfs_dirent *sd);
+ 
+-extern const unsigned char * sysfs_get_name(struct sysfs_dirent *sd);
+-extern void sysfs_drop_dentry(struct sysfs_dirent *sd, struct dentry *parent);
+ extern int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
+ 
+-extern spinlock_t sysfs_lock;
+-extern struct rw_semaphore sysfs_rename_sem;
++extern spinlock_t sysfs_assoc_lock;
++extern struct mutex sysfs_mutex;
+ extern struct super_block * sysfs_sb;
+ extern const struct file_operations sysfs_dir_operations;
+ extern const struct file_operations sysfs_file_operations;
+@@ -42,73 +103,9 @@
+ extern const struct inode_operations sysfs_dir_inode_operations;
+ extern const struct inode_operations sysfs_symlink_inode_operations;
+ 
+-struct sysfs_symlink {
+-	char * link_name;
+-	struct kobject * target_kobj;
+-};
+-
+-struct sysfs_buffer {
+-	struct list_head		associates;
+-	size_t				count;
+-	loff_t				pos;
+-	char				* page;
+-	struct sysfs_ops		* ops;
+-	struct semaphore		sem;
+-	int				orphaned;
+-	int				needs_read_fill;
+-	int				event;
+-};
+-
+-struct sysfs_buffer_collection {
+-	struct list_head	associates;
+-};
+-
+-static inline struct kobject * to_kobj(struct dentry * dentry)
+-{
+-	struct sysfs_dirent * sd = dentry->d_fsdata;
+-	return ((struct kobject *) sd->s_element);
+-}
+-
+-static inline struct attribute * to_attr(struct dentry * dentry)
+-{
+-	struct sysfs_dirent * sd = dentry->d_fsdata;
+-	return ((struct attribute *) sd->s_element);
+-}
+-
+-static inline struct bin_attribute * to_bin_attr(struct dentry * dentry)
+-{
+-	struct sysfs_dirent * sd = dentry->d_fsdata;
+-	return ((struct bin_attribute *) sd->s_element);
+-}
+-
+-static inline struct kobject *sysfs_get_kobject(struct dentry *dentry)
++static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
+ {
+-	struct kobject * kobj = NULL;
+-
+-	spin_lock(&dcache_lock);
+-	if (!d_unhashed(dentry)) {
+-		struct sysfs_dirent * sd = dentry->d_fsdata;
+-		if (sd->s_type & SYSFS_KOBJ_LINK) {
+-			struct sysfs_symlink * sl = sd->s_element;
+-			kobj = kobject_get(sl->target_kobj);
+-		} else
+-			kobj = kobject_get(sd->s_element);
+-	}
+-	spin_unlock(&dcache_lock);
+-
+-	return kobj;
+-}
+-
+-static inline void release_sysfs_dirent(struct sysfs_dirent * sd)
+-{
+-	if (sd->s_type & SYSFS_KOBJ_LINK) {
+-		struct sysfs_symlink * sl = sd->s_element;
+-		kfree(sl->link_name);
+-		kobject_put(sl->target_kobj);
+-		kfree(sl);
+-	}
+-	kfree(sd->s_iattr);
+-	kmem_cache_free(sysfs_dir_cachep, sd);
++	return sd->s_flags & SYSFS_TYPE_MASK;
+ }
+ 
+ static inline struct sysfs_dirent * sysfs_get(struct sysfs_dirent * sd)
+@@ -122,7 +119,7 @@
+ 
+ static inline void sysfs_put(struct sysfs_dirent * sd)
+ {
+-	if (atomic_dec_and_test(&sd->s_count))
++	if (sd && atomic_dec_and_test(&sd->s_count))
+ 		release_sysfs_dirent(sd);
+ }
+ 
+diff -Nurb linux-2.6.22-570/fs/unionfs/Makefile linux-2.6.22-try2/fs/unionfs/Makefile
+--- linux-2.6.22-570/fs/unionfs/Makefile	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,7 @@
++obj-$(CONFIG_UNION_FS) += unionfs.o
++
++unionfs-y := subr.o dentry.o file.o inode.o main.o super.o \
++	rdstate.o copyup.o dirhelper.o rename.o unlink.o \
++	lookup.o commonfops.o dirfops.o sioq.o mmap.o
++
++unionfs-$(CONFIG_UNION_FS_XATTR) += xattr.o
+diff -Nurb linux-2.6.22-570/fs/unionfs/commonfops.c linux-2.6.22-try2/fs/unionfs/commonfops.c
+--- linux-2.6.22-570/fs/unionfs/commonfops.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/commonfops.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,748 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * 1) Copyup the file
++ * 2) Rename the file to '.unionfs<original inode#><counter>' - obviously
++ * stolen from NFS's silly rename
++ */
++static int copyup_deleted_file(struct file *file, struct dentry *dentry,
++			       int bstart, int bindex)
++{
++	static unsigned int counter;
++	const int i_inosize = sizeof(dentry->d_inode->i_ino) * 2;
++	const int countersize = sizeof(counter) * 2;
++	const int nlen = sizeof(".unionfs") + i_inosize + countersize - 1;
++	char name[nlen + 1];
++
++	int err;
++	struct dentry *tmp_dentry = NULL;
++	struct dentry *hidden_dentry;
++	struct dentry *hidden_dir_dentry = NULL;
++
++	hidden_dentry = unionfs_lower_dentry_idx(dentry, bstart);
++
++	sprintf(name, ".unionfs%*.*lx",
++		i_inosize, i_inosize, hidden_dentry->d_inode->i_ino);
++
++retry:
++	/*
++	 * Loop, looking for an unused temp name to copyup to.
++	 *
++	 * It's somewhat silly that we look for a free temp tmp name in the
++	 * source branch (bstart) instead of the dest branch (bindex), where
++	 * the final name will be created.  We _will_ catch it if somehow
++	 * the name exists in the dest branch, but it'd be nice to catch it
++	 * sooner than later.
++	 */
++	tmp_dentry = NULL;
++	do {
++		char *suffix = name + nlen - countersize;
++
++		dput(tmp_dentry);
++		counter++;
++		sprintf(suffix, "%*.*x", countersize, countersize, counter);
++
++		printk(KERN_DEBUG "unionfs: trying to rename %s to %s\n",
++		       dentry->d_name.name, name);
++
++		tmp_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++					    nlen);
++		if (IS_ERR(tmp_dentry)) {
++			err = PTR_ERR(tmp_dentry);
++			goto out;
++		}
++	} while (tmp_dentry->d_inode != NULL);	/* need negative dentry */
++	dput(tmp_dentry);
++
++	err = copyup_named_file(dentry->d_parent->d_inode, file, name, bstart,
++				bindex, file->f_dentry->d_inode->i_size);
++	if (err == -EEXIST)
++		goto retry;
++	else if (err)
++		goto out;
++
++	/* bring it to the same state as an unlinked file */
++	hidden_dentry = unionfs_lower_dentry_idx(dentry, dbstart(dentry));
++	hidden_dir_dentry = lock_parent(hidden_dentry);
++	err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry);
++	unlock_dir(hidden_dir_dentry);
++
++out:
++	return err;
++}
++
++/*
++ * put all references held by upper struct file and free lower file pointer
++ * array
++ */
++static void cleanup_file(struct file *file)
++{
++	int bindex, bstart, bend;
++	struct file **lf;
++	struct super_block *sb = file->f_dentry->d_sb;
++
++	lf = UNIONFS_F(file)->lower_files;
++	bstart = fbstart(file);
++	bend = fbend(file);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		if (unionfs_lower_file_idx(file, bindex)) {
++			/*
++			 * Find new index of matching branch with an open
++			 * file, since branches could have been added or
++			 * deleted causing the one with open files to shift.
++			 */
++			int i;	/* holds (possibly) updated branch index */
++			int old_bid;
++
++			old_bid = UNIONFS_F(file)->saved_branch_ids[bindex];
++			i = branch_id_to_idx(sb, old_bid);
++			if (i < 0)
++				printk(KERN_ERR "unionfs: no superblock for "
++				       "file %p\n", file);
++			else {
++				/* decrement count of open files */
++				branchput(sb, i);
++				/*
++				 * fput will perform an mntput for us on the
++				 * correct branch.  Although we're using the
++				 * file's old branch configuration, bindex,
++				 * which is the old index, correctly points
++				 * to the right branch in the file's branch
++				 * list.  In other words, we're going to
++				 * mntput the correct branch even if
++				 * branches have been added/removed.
++				 */
++				fput(unionfs_lower_file_idx(file, bindex));
++			}
++		}
++	}
++
++	UNIONFS_F(file)->lower_files = NULL;
++	kfree(lf);
++	kfree(UNIONFS_F(file)->saved_branch_ids);
++	/* set to NULL because caller needs to know if to kfree on error */
++	UNIONFS_F(file)->saved_branch_ids = NULL;
++}
++
++/* open all lower files for a given file */
++static int open_all_files(struct file *file)
++{
++	int bindex, bstart, bend, err = 0;
++	struct file *hidden_file;
++	struct dentry *hidden_dentry;
++	struct dentry *dentry = file->f_dentry;
++	struct super_block *sb = dentry->d_sb;
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++
++		dget(hidden_dentry);
++		unionfs_mntget(dentry, bindex);
++		branchget(sb, bindex);
++
++		hidden_file =
++			dentry_open(hidden_dentry,
++				    unionfs_lower_mnt_idx(dentry, bindex),
++				    file->f_flags);
++		if (IS_ERR(hidden_file)) {
++			err = PTR_ERR(hidden_file);
++			goto out;
++		} else
++			unionfs_set_lower_file_idx(file, bindex, hidden_file);
++	}
++out:
++	return err;
++}
++
++/* open the highest priority file for a given upper file */
++static int open_highest_file(struct file *file, int willwrite)
++{
++	int bindex, bstart, bend, err = 0;
++	struct file *hidden_file;
++	struct dentry *hidden_dentry;
++
++	struct dentry *dentry = file->f_dentry;
++	struct inode *parent_inode = dentry->d_parent->d_inode;
++	struct super_block *sb = dentry->d_sb;
++	size_t inode_size = dentry->d_inode->i_size;
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++	if (willwrite && IS_WRITE_FLAG(file->f_flags) && is_robranch(dentry)) {
++		for (bindex = bstart - 1; bindex >= 0; bindex--) {
++			err = copyup_file(parent_inode, file, bstart, bindex,
++					  inode_size);
++			if (!err)
++				break;
++		}
++		atomic_set(&UNIONFS_F(file)->generation,
++			   atomic_read(&UNIONFS_I(dentry->d_inode)->
++				       generation));
++		goto out;
++	}
++
++	dget(hidden_dentry);
++	unionfs_mntget(dentry, bstart);
++	branchget(sb, bstart);
++	hidden_file = dentry_open(hidden_dentry,
++				  unionfs_lower_mnt_idx(dentry, bstart),
++				  file->f_flags);
++	if (IS_ERR(hidden_file)) {
++		err = PTR_ERR(hidden_file);
++		goto out;
++	}
++	unionfs_set_lower_file(file, hidden_file);
++	/* Fix up the position. */
++	hidden_file->f_pos = file->f_pos;
++
++	memcpy(&hidden_file->f_ra, &file->f_ra, sizeof(struct file_ra_state));
++out:
++	return err;
++}
++
++/* perform a delayed copyup of a read-write file on a read-only branch */
++static int do_delayed_copyup(struct file *file, struct dentry *dentry)
++{
++	int bindex, bstart, bend, err = 0;
++	struct inode *parent_inode = dentry->d_parent->d_inode;
++	loff_t inode_size = file->f_dentry->d_inode->i_size;
++
++	bstart = fbstart(file);
++	bend = fbend(file);
++
++	BUG_ON(!S_ISREG(file->f_dentry->d_inode->i_mode));
++
++	for (bindex = bstart - 1; bindex >= 0; bindex--) {
++		if (!d_deleted(file->f_dentry))
++			err = copyup_file(parent_inode, file, bstart,
++					  bindex, inode_size);
++		else
++			err = copyup_deleted_file(file, dentry, bstart,
++						  bindex);
++
++		if (!err)
++			break;
++	}
++	if (!err && (bstart > fbstart(file))) {
++		bend = fbend(file);
++		for (bindex = bstart; bindex <= bend; bindex++) {
++			if (unionfs_lower_file_idx(file, bindex)) {
++				branchput(dentry->d_sb, bindex);
++				fput(unionfs_lower_file_idx(file, bindex));
++				unionfs_set_lower_file_idx(file, bindex, NULL);
++			}
++		}
++		fbend(file) = bend;
++	}
++	return err;
++}
++
++/*
++ * Revalidate the struct file
++ * @file: file to revalidate
++ * @willwrite: 1 if caller may cause changes to the file; 0 otherwise.
++ */
++int unionfs_file_revalidate(struct file *file, int willwrite)
++{
++	struct super_block *sb;
++	struct dentry *dentry;
++	int sbgen, fgen, dgen;
++	int bstart, bend;
++	int size;
++
++	int err = 0;
++
++	dentry = file->f_dentry;
++	unionfs_lock_dentry(dentry);
++	sb = dentry->d_sb;
++
++	/*
++	 * First revalidate the dentry inside struct file,
++	 * but not unhashed dentries.
++	 */
++	if (!d_deleted(dentry) &&
++	    !__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out_nofree;
++	}
++
++	sbgen = atomic_read(&UNIONFS_SB(sb)->generation);
++	dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++	fgen = atomic_read(&UNIONFS_F(file)->generation);
++
++	BUG_ON(sbgen > dgen);
++
++	/*
++	 * There are two cases we are interested in.  The first is if the
++	 * generation is lower than the super-block.  The second is if
++	 * someone has copied up this file from underneath us, we also need
++	 * to refresh things.
++	 */
++	if (!d_deleted(dentry) &&
++	    (sbgen > fgen || dbstart(dentry) != fbstart(file))) {
++		/* First we throw out the existing files. */
++		cleanup_file(file);
++
++		/* Now we reopen the file(s) as in unionfs_open. */
++		bstart = fbstart(file) = dbstart(dentry);
++		bend = fbend(file) = dbend(dentry);
++
++		size = sizeof(struct file *) * sbmax(sb);
++		UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
++		if (!UNIONFS_F(file)->lower_files) {
++			err = -ENOMEM;
++			goto out;
++		}
++		size = sizeof(int) * sbmax(sb);
++		UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
++		if (!UNIONFS_F(file)->saved_branch_ids) {
++			err = -ENOMEM;
++			goto out;
++		}
++
++		if (S_ISDIR(dentry->d_inode->i_mode)) {
++			/* We need to open all the files. */
++			err = open_all_files(file);
++			if (err)
++				goto out;
++		} else {
++			/* We only open the highest priority branch. */
++			err = open_highest_file(file, willwrite);
++			if (err)
++				goto out;
++		}
++		atomic_set(&UNIONFS_F(file)->generation,
++			   atomic_read(&UNIONFS_I(dentry->d_inode)->
++				       generation));
++	}
++
++	/* Copyup on the first write to a file on a readonly branch. */
++	if (willwrite && IS_WRITE_FLAG(file->f_flags) &&
++	    !IS_WRITE_FLAG(unionfs_lower_file(file)->f_flags) &&
++	    is_robranch(dentry)) {
++		printk(KERN_DEBUG "unionfs: Doing delayed copyup of a "
++		       "read-write file on a read-only branch.\n");
++		err = do_delayed_copyup(file, dentry);
++	}
++
++out:
++	if (err) {
++		kfree(UNIONFS_F(file)->lower_files);
++		kfree(UNIONFS_F(file)->saved_branch_ids);
++	}
++out_nofree:
++	unionfs_unlock_dentry(dentry);
++	return err;
++}
++
++/* unionfs_open helper function: open a directory */
++static int __open_dir(struct inode *inode, struct file *file)
++{
++	struct dentry *hidden_dentry;
++	struct file *hidden_file;
++	int bindex, bstart, bend;
++
++	bstart = fbstart(file) = dbstart(file->f_dentry);
++	bend = fbend(file) = dbend(file->f_dentry);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry =
++			unionfs_lower_dentry_idx(file->f_dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++
++		dget(hidden_dentry);
++		unionfs_mntget(file->f_dentry, bindex);
++		hidden_file = dentry_open(hidden_dentry,
++					  unionfs_lower_mnt_idx(file->f_dentry,
++								bindex),
++					  file->f_flags);
++		if (IS_ERR(hidden_file))
++			return PTR_ERR(hidden_file);
++
++		unionfs_set_lower_file_idx(file, bindex, hidden_file);
++
++		/*
++		 * The branchget goes after the open, because otherwise
++		 * we would miss the reference on release.
++		 */
++		branchget(inode->i_sb, bindex);
++	}
++
++	return 0;
++}
++
++/* unionfs_open helper function: open a file */
++static int __open_file(struct inode *inode, struct file *file)
++{
++	struct dentry *hidden_dentry;
++	struct file *hidden_file;
++	int hidden_flags;
++	int bindex, bstart, bend;
++
++	hidden_dentry = unionfs_lower_dentry(file->f_dentry);
++	hidden_flags = file->f_flags;
++
++	bstart = fbstart(file) = dbstart(file->f_dentry);
++	bend = fbend(file) = dbend(file->f_dentry);
++
++	/*
++	 * check for the permission for hidden file.  If the error is
++	 * COPYUP_ERR, copyup the file.
++	 */
++	if (hidden_dentry->d_inode && is_robranch(file->f_dentry)) {
++		/*
++		 * if the open will change the file, copy it up otherwise
++		 * defer it.
++		 */
++		if (hidden_flags & O_TRUNC) {
++			int size = 0;
++			int err = -EROFS;
++
++			/* copyup the file */
++			for (bindex = bstart - 1; bindex >= 0; bindex--) {
++				err = copyup_file(
++					file->f_dentry->d_parent->d_inode,
++					file, bstart, bindex, size);
++				if (!err)
++					break;
++			}
++			return err;
++		} else
++			hidden_flags &= ~(OPEN_WRITE_FLAGS);
++	}
++
++	dget(hidden_dentry);
++
++	/*
++	 * dentry_open will decrement mnt refcnt if err.
++	 * otherwise fput() will do an mntput() for us upon file close.
++	 */
++	unionfs_mntget(file->f_dentry, bstart);
++	hidden_file =
++		dentry_open(hidden_dentry,
++			    unionfs_lower_mnt_idx(file->f_dentry, bstart),
++			    hidden_flags);
++	if (IS_ERR(hidden_file))
++		return PTR_ERR(hidden_file);
++
++	unionfs_set_lower_file(file, hidden_file);
++	branchget(inode->i_sb, bstart);
++
++	return 0;
++}
++
++int unionfs_open(struct inode *inode, struct file *file)
++{
++	int err = 0;
++	struct file *hidden_file = NULL;
++	struct dentry *dentry = NULL;
++	int bindex = 0, bstart = 0, bend = 0;
++	int size;
++
++	unionfs_read_lock(inode->i_sb);
++
++	file->private_data =
++		kzalloc(sizeof(struct unionfs_file_info), GFP_KERNEL);
++	if (!UNIONFS_F(file)) {
++		err = -ENOMEM;
++		goto out_nofree;
++	}
++	fbstart(file) = -1;
++	fbend(file) = -1;
++	atomic_set(&UNIONFS_F(file)->generation,
++		   atomic_read(&UNIONFS_I(inode)->generation));
++
++	size = sizeof(struct file *) * sbmax(inode->i_sb);
++	UNIONFS_F(file)->lower_files = kzalloc(size, GFP_KERNEL);
++	if (!UNIONFS_F(file)->lower_files) {
++		err = -ENOMEM;
++		goto out;
++	}
++	size = sizeof(int) * sbmax(inode->i_sb);
++	UNIONFS_F(file)->saved_branch_ids = kzalloc(size, GFP_KERNEL);
++	if (!UNIONFS_F(file)->saved_branch_ids) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	dentry = file->f_dentry;
++	unionfs_lock_dentry(dentry);
++
++	bstart = fbstart(file) = dbstart(dentry);
++	bend = fbend(file) = dbend(dentry);
++
++	/* increment, so that we can flush appropriately */
++	atomic_inc(&UNIONFS_I(dentry->d_inode)->totalopens);
++
++	/*
++	 * open all directories and make the unionfs file struct point to
++	 * these hidden file structs
++	 */
++	if (S_ISDIR(inode->i_mode))
++		err = __open_dir(inode, file);	/* open a dir */
++	else
++		err = __open_file(inode, file);	/* open a file */
++
++	/* freeing the allocated resources, and fput the opened files */
++	if (err) {
++		atomic_dec(&UNIONFS_I(dentry->d_inode)->totalopens);
++		for (bindex = bstart; bindex <= bend; bindex++) {
++			hidden_file = unionfs_lower_file_idx(file, bindex);
++			if (!hidden_file)
++				continue;
++
++			branchput(file->f_dentry->d_sb, bindex);
++			/* fput calls dput for hidden_dentry */
++			fput(hidden_file);
++		}
++	}
++
++	unionfs_unlock_dentry(dentry);
++
++out:
++	if (err) {
++		kfree(UNIONFS_F(file)->lower_files);
++		kfree(UNIONFS_F(file)->saved_branch_ids);
++		kfree(UNIONFS_F(file));
++	}
++out_nofree:
++	unionfs_read_unlock(inode->i_sb);
++	return err;
++}
++
++/*
++ * release all lower object references & free the file info structure
++ *
++ * No need to grab sb info's rwsem.
++ */
++int unionfs_file_release(struct inode *inode, struct file *file)
++{
++	struct file *hidden_file = NULL;
++	struct unionfs_file_info *fileinfo;
++	struct unionfs_inode_info *inodeinfo;
++	struct super_block *sb = inode->i_sb;
++	int bindex, bstart, bend;
++	int fgen;
++	int err;
++
++	unionfs_read_lock(sb);
++	/*
++	 * Yes, we have to revalidate this file even if it's being released.
++	 * This is important for open-but-unlinked files, as well as mmap
++	 * support.
++	 */
++	if ((err = unionfs_file_revalidate(file, 1)))
++		return err;
++	fileinfo = UNIONFS_F(file);
++	BUG_ON(file->f_dentry->d_inode != inode);
++	inodeinfo = UNIONFS_I(inode);
++
++	/* fput all the hidden files */
++	fgen = atomic_read(&fileinfo->generation);
++	bstart = fbstart(file);
++	bend = fbend(file);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_file = unionfs_lower_file_idx(file, bindex);
++
++		if (hidden_file) {
++			fput(hidden_file);
++			branchput(inode->i_sb, bindex);
++		}
++	}
++	kfree(fileinfo->lower_files);
++	kfree(fileinfo->saved_branch_ids);
++
++	if (fileinfo->rdstate) {
++		fileinfo->rdstate->access = jiffies;
++		printk(KERN_DEBUG "unionfs: saving rdstate with cookie "
++		       "%u [%d.%lld]\n",
++		       fileinfo->rdstate->cookie,
++		       fileinfo->rdstate->bindex,
++		       (long long)fileinfo->rdstate->dirpos);
++		spin_lock(&inodeinfo->rdlock);
++		inodeinfo->rdcount++;
++		list_add_tail(&fileinfo->rdstate->cache,
++			      &inodeinfo->readdircache);
++		mark_inode_dirty(inode);
++		spin_unlock(&inodeinfo->rdlock);
++		fileinfo->rdstate = NULL;
++	}
++	kfree(fileinfo);
++	return 0;
++}
++
++/* pass the ioctl to the lower fs */
++static long do_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	struct file *hidden_file;
++	int err;
++
++	hidden_file = unionfs_lower_file(file);
++
++	err = security_file_ioctl(hidden_file, cmd, arg);
++	if (err)
++		goto out;
++
++	err = -ENOTTY;
++	if (!hidden_file || !hidden_file->f_op)
++		goto out;
++	if (hidden_file->f_op->unlocked_ioctl) {
++		err = hidden_file->f_op->unlocked_ioctl(hidden_file, cmd, arg);
++	} else if (hidden_file->f_op->ioctl) {
++		lock_kernel();
++		err = hidden_file->f_op->ioctl(hidden_file->f_dentry->d_inode,
++					       hidden_file, cmd, arg);
++		unlock_kernel();
++	}
++
++out:
++	return err;
++}
++
++/*
++ * return to user-space the branch indices containing the file in question
++ *
++ * We use fd_set and therefore we are limited to the number of the branches
++ * to FD_SETSIZE, which is currently 1024 - plenty for most people
++ */
++static int unionfs_ioctl_queryfile(struct file *file, unsigned int cmd,
++				   unsigned long arg)
++{
++	int err = 0;
++	fd_set branchlist;
++
++	int bstart = 0, bend = 0, bindex = 0;
++	struct dentry *dentry, *hidden_dentry;
++
++	dentry = file->f_dentry;
++	unionfs_lock_dentry(dentry);
++	if ((err = unionfs_partial_lookup(dentry)))
++		goto out;
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++
++	FD_ZERO(&branchlist);
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++		if (hidden_dentry->d_inode)
++			FD_SET(bindex, &branchlist);
++	}
++
++	err = copy_to_user((void __user *)arg, &branchlist, sizeof(fd_set));
++	if (err)
++		err = -EFAULT;
++
++out:
++	unionfs_unlock_dentry(dentry);
++	return err < 0 ? err : bend;
++}
++
++long unionfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	long err;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 1)))
++		goto out;
++
++	/* check if asked for local commands */
++	switch (cmd) {
++	case UNIONFS_IOCTL_INCGEN:
++		/* Increment the superblock generation count */
++		printk("unionfs: incgen ioctl deprecated; "
++		       "use \"-o remount,incgen\"\n");
++		err = -ENOSYS;
++		break;
++
++	case UNIONFS_IOCTL_QUERYFILE:
++		/* Return list of branches containing the given file */
++		err = unionfs_ioctl_queryfile(file, cmd, arg);
++		break;
++
++	default:
++		/* pass the ioctl down */
++		err = do_ioctl(file, cmd, arg);
++		break;
++	}
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++int unionfs_flush(struct file *file, fl_owner_t id)
++{
++	int err = 0;
++	struct file *hidden_file = NULL;
++	struct dentry *dentry = file->f_dentry;
++	int bindex, bstart, bend;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 1)))
++		goto out;
++
++	if (!atomic_dec_and_test(&UNIONFS_I(dentry->d_inode)->totalopens))
++		goto out;
++
++	unionfs_lock_dentry(dentry);
++
++	bstart = fbstart(file);
++	bend = fbend(file);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_file = unionfs_lower_file_idx(file, bindex);
++
++		if (hidden_file && hidden_file->f_op &&
++		    hidden_file->f_op->flush) {
++			err = hidden_file->f_op->flush(hidden_file, id);
++			if (err)
++				goto out_lock;
++
++			/* if there are no more refs to the dentry, dput it */
++			if (d_deleted(dentry)) {
++				dput(unionfs_lower_dentry_idx(dentry, bindex));
++				unionfs_set_lower_dentry_idx(dentry, bindex,
++							     NULL);
++			}
++		}
++
++	}
++
++out_lock:
++	unionfs_unlock_dentry(dentry);
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/copyup.c linux-2.6.22-try2/fs/unionfs/copyup.c
+--- linux-2.6.22-570/fs/unionfs/copyup.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/copyup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,806 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * For detailed explanation of copyup see:
++ * Documentation/filesystems/unionfs/concepts.txt
++ */
++
++/* forward definitions */
++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry,
++			       int bstart, int new_bindex, const char *name,
++			       int namelen, struct file **copyup_file,
++			       loff_t len);
++static struct dentry *create_parents_named(struct inode *dir,
++					   struct dentry *dentry,
++					   const char *name, int bindex);
++
++#ifdef CONFIG_UNION_FS_XATTR
++/* copyup all extended attrs for a given dentry */
++static int copyup_xattrs(struct dentry *old_hidden_dentry,
++			 struct dentry *new_hidden_dentry)
++{
++	int err = 0;
++	ssize_t list_size = -1;
++	char *name_list = NULL;
++	char *attr_value = NULL;
++	char *name_list_orig = NULL;
++
++	list_size = vfs_listxattr(old_hidden_dentry, NULL, 0);
++
++	if (list_size <= 0) {
++		err = list_size;
++		goto out;
++	}
++
++	name_list = unionfs_xattr_alloc(list_size + 1, XATTR_LIST_MAX);
++	if (!name_list || IS_ERR(name_list)) {
++		err = PTR_ERR(name_list);
++		goto out;
++	}
++	list_size = vfs_listxattr(old_hidden_dentry, name_list, list_size);
++	attr_value = unionfs_xattr_alloc(XATTR_SIZE_MAX, XATTR_SIZE_MAX);
++	if (!attr_value || IS_ERR(attr_value)) {
++		err = PTR_ERR(name_list);
++		goto out;
++	}
++	name_list_orig = name_list;
++	while (*name_list) {
++		ssize_t size;
++
++		/* Lock here since vfs_getxattr doesn't lock for us */
++		mutex_lock(&old_hidden_dentry->d_inode->i_mutex);
++		size = vfs_getxattr(old_hidden_dentry, name_list,
++				    attr_value, XATTR_SIZE_MAX);
++		mutex_unlock(&old_hidden_dentry->d_inode->i_mutex);
++		if (size < 0) {
++			err = size;
++			goto out;
++		}
++
++		if (size > XATTR_SIZE_MAX) {
++			err = -E2BIG;
++			goto out;
++		}
++		/* Don't lock here since vfs_setxattr does it for us. */
++		err = vfs_setxattr(new_hidden_dentry, name_list, attr_value,
++				   size, 0);
++
++		if (err < 0)
++			goto out;
++		name_list += strlen(name_list) + 1;
++	}
++out:
++	name_list = name_list_orig;
++
++	if (name_list)
++		unionfs_xattr_free(name_list, list_size + 1);
++	if (attr_value)
++		unionfs_xattr_free(attr_value, XATTR_SIZE_MAX);
++	/* It is no big deal if this fails, we just roll with the punches. */
++	if (err == -ENOTSUPP || err == -EOPNOTSUPP)
++		err = 0;
++	return err;
++}
++#endif /* CONFIG_UNION_FS_XATTR */
++
++/* Determine the mode based on the copyup flags, and the existing dentry. */
++static int copyup_permissions(struct super_block *sb,
++			      struct dentry *old_hidden_dentry,
++			      struct dentry *new_hidden_dentry)
++{
++	struct inode *i = old_hidden_dentry->d_inode;
++	struct iattr newattrs;
++	int err;
++
++	newattrs.ia_atime = i->i_atime;
++	newattrs.ia_mtime = i->i_mtime;
++	newattrs.ia_ctime = i->i_ctime;
++
++	newattrs.ia_gid = i->i_gid;
++	newattrs.ia_uid = i->i_uid;
++
++	newattrs.ia_mode = i->i_mode;
++
++	newattrs.ia_valid = ATTR_CTIME | ATTR_ATIME | ATTR_MTIME |
++		ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_FORCE |
++		ATTR_GID | ATTR_UID | ATTR_MODE;
++
++	err = notify_change(new_hidden_dentry, &newattrs);
++
++	return err;
++}
++
++int copyup_dentry(struct inode *dir, struct dentry *dentry,
++		  int bstart, int new_bindex,
++		  struct file **copyup_file, loff_t len)
++{
++	return copyup_named_dentry(dir, dentry, bstart, new_bindex,
++				   dentry->d_name.name,
++				   dentry->d_name.len, copyup_file, len);
++}
++
++/*
++ * create the new device/file/directory - use copyup_permission to copyup
++ * times, and mode
++ *
++ * if the object being copied up is a regular file, the file is only created,
++ * the contents have to be copied up separately
++ */
++static int __copyup_ndentry(struct dentry *old_hidden_dentry,
++			    struct dentry *new_hidden_dentry,
++			    struct dentry *new_hidden_parent_dentry,
++			    char *symbuf)
++{
++	int err = 0;
++	umode_t old_mode = old_hidden_dentry->d_inode->i_mode;
++	struct sioq_args args;
++
++	if (S_ISDIR(old_mode)) {
++		args.mkdir.parent = new_hidden_parent_dentry->d_inode;
++		args.mkdir.dentry = new_hidden_dentry;
++		args.mkdir.mode = old_mode;
++
++		run_sioq(__unionfs_mkdir, &args);
++		err = args.err;
++	} else if (S_ISLNK(old_mode)) {
++		args.symlink.parent = new_hidden_parent_dentry->d_inode;
++		args.symlink.dentry = new_hidden_dentry;
++		args.symlink.symbuf = symbuf;
++		args.symlink.mode = old_mode;
++
++		run_sioq(__unionfs_symlink, &args);
++		err = args.err;
++	} else if (S_ISBLK(old_mode) || S_ISCHR(old_mode) ||
++		   S_ISFIFO(old_mode) || S_ISSOCK(old_mode)) {
++		args.mknod.parent = new_hidden_parent_dentry->d_inode;
++		args.mknod.dentry = new_hidden_dentry;
++		args.mknod.mode = old_mode;
++		args.mknod.dev = old_hidden_dentry->d_inode->i_rdev;
++
++		run_sioq(__unionfs_mknod, &args);
++		err = args.err;
++	} else if (S_ISREG(old_mode)) {
++		args.create.parent = new_hidden_parent_dentry->d_inode;
++		args.create.dentry = new_hidden_dentry;
++		args.create.mode = old_mode;
++		args.create.nd = NULL;
++
++		run_sioq(__unionfs_create, &args);
++		err = args.err;
++	} else {
++		printk(KERN_ERR "unionfs: unknown inode type %d\n",
++		       old_mode);
++		BUG();
++	}
++
++	return err;
++}
++
++static int __copyup_reg_data(struct dentry *dentry,
++			     struct dentry *new_hidden_dentry, int new_bindex,
++			     struct dentry *old_hidden_dentry, int old_bindex,
++			     struct file **copyup_file, loff_t len)
++{
++	struct super_block *sb = dentry->d_sb;
++	struct file *input_file;
++	struct file *output_file;
++	mm_segment_t old_fs;
++	char *buf = NULL;
++	ssize_t read_bytes, write_bytes;
++	loff_t size;
++	int err = 0;
++
++	/* open old file */
++	unionfs_mntget(dentry, old_bindex);
++	branchget(sb, old_bindex);
++	input_file = dentry_open(old_hidden_dentry,
++				 unionfs_lower_mnt_idx(dentry, old_bindex),
++				 O_RDONLY | O_LARGEFILE);
++	if (IS_ERR(input_file)) {
++		dput(old_hidden_dentry);
++		err = PTR_ERR(input_file);
++		goto out;
++	}
++	if (!input_file->f_op || !input_file->f_op->read) {
++		err = -EINVAL;
++		goto out_close_in;
++	}
++
++	/* open new file */
++	dget(new_hidden_dentry);
++	unionfs_mntget(dentry, new_bindex);
++	branchget(sb, new_bindex);
++	output_file = dentry_open(new_hidden_dentry,
++				  unionfs_lower_mnt_idx(dentry, new_bindex),
++				  O_WRONLY | O_LARGEFILE);
++	if (IS_ERR(output_file)) {
++		err = PTR_ERR(output_file);
++		goto out_close_in2;
++	}
++	if (!output_file->f_op || !output_file->f_op->write) {
++		err = -EINVAL;
++		goto out_close_out;
++	}
++
++	/* allocating a buffer */
++	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!buf) {
++		err = -ENOMEM;
++		goto out_close_out;
++	}
++
++	input_file->f_pos = 0;
++	output_file->f_pos = 0;
++
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++
++	size = len;
++	err = 0;
++	do {
++		if (len >= PAGE_SIZE)
++			size = PAGE_SIZE;
++		else if ((len < PAGE_SIZE) && (len > 0))
++			size = len;
++
++		len -= PAGE_SIZE;
++
++		read_bytes =
++			input_file->f_op->read(input_file,
++					       (char __user *)buf, size,
++					       &input_file->f_pos);
++		if (read_bytes <= 0) {
++			err = read_bytes;
++			break;
++		}
++
++		write_bytes =
++			output_file->f_op->write(output_file,
++						 (char __user *)buf,
++						 read_bytes,
++						 &output_file->f_pos);
++		if ((write_bytes < 0) || (write_bytes < read_bytes)) {
++			err = write_bytes;
++			break;
++		}
++	} while ((read_bytes > 0) && (len > 0));
++
++	set_fs(old_fs);
++
++	kfree(buf);
++
++	if (!err)
++		err = output_file->f_op->fsync(output_file,
++					       new_hidden_dentry, 0);
++
++	if (err)
++		goto out_close_out;
++
++	if (copyup_file) {
++		*copyup_file = output_file;
++		goto out_close_in;
++	}
++
++out_close_out:
++	fput(output_file);
++
++out_close_in2:
++	branchput(sb, new_bindex);
++
++out_close_in:
++	fput(input_file);
++
++out:
++	branchput(sb, old_bindex);
++
++	return err;
++}
++
++/*
++ * dput the lower references for old and new dentry & clear a lower dentry
++ * pointer
++ */
++static void __clear(struct dentry *dentry, struct dentry *old_hidden_dentry,
++		    int old_bstart, int old_bend,
++		    struct dentry *new_hidden_dentry, int new_bindex)
++{
++	/* get rid of the hidden dentry and all its traces */
++	unionfs_set_lower_dentry_idx(dentry, new_bindex, NULL);
++	set_dbstart(dentry, old_bstart);
++	set_dbend(dentry, old_bend);
++
++	dput(new_hidden_dentry);
++	dput(old_hidden_dentry);
++}
++
++/* copy up a dentry to a file of specified name */
++static int copyup_named_dentry(struct inode *dir, struct dentry *dentry,
++			       int bstart, int new_bindex, const char *name,
++			       int namelen, struct file **copyup_file,
++			       loff_t len)
++{
++	struct dentry *new_hidden_dentry;
++	struct dentry *old_hidden_dentry = NULL;
++	struct super_block *sb;
++	int err = 0;
++	int old_bindex;
++	int old_bstart;
++	int old_bend;
++	struct dentry *new_hidden_parent_dentry = NULL;
++	mm_segment_t oldfs;
++	char *symbuf = NULL;
++
++	verify_locked(dentry);
++
++	old_bindex = bstart;
++	old_bstart = dbstart(dentry);
++	old_bend = dbend(dentry);
++
++	BUG_ON(new_bindex < 0);
++	BUG_ON(new_bindex >= old_bindex);
++
++	sb = dir->i_sb;
++
++	if ((err = is_robranch_super(sb, new_bindex)))
++		goto out;
++
++	/* Create the directory structure above this dentry. */
++	new_hidden_dentry =
++		create_parents_named(dir, dentry, name, new_bindex);
++	if (IS_ERR(new_hidden_dentry)) {
++		err = PTR_ERR(new_hidden_dentry);
++		goto out;
++	}
++
++	old_hidden_dentry = unionfs_lower_dentry_idx(dentry, old_bindex);
++	/* we conditionally dput this old_hidden_dentry at end of function */
++	dget(old_hidden_dentry);
++
++	/* For symlinks, we must read the link before we lock the directory. */
++	if (S_ISLNK(old_hidden_dentry->d_inode->i_mode)) {
++
++		symbuf = kmalloc(PATH_MAX, GFP_KERNEL);
++		if (!symbuf) {
++			__clear(dentry, old_hidden_dentry,
++				old_bstart, old_bend,
++				new_hidden_dentry, new_bindex);
++			err = -ENOMEM;
++			goto out_free;
++		}
++
++		oldfs = get_fs();
++		set_fs(KERNEL_DS);
++		err = old_hidden_dentry->d_inode->i_op->readlink(
++			old_hidden_dentry,
++			(char __user *)symbuf,
++			PATH_MAX);
++		set_fs(oldfs);
++		if (err) {
++			__clear(dentry, old_hidden_dentry,
++				old_bstart, old_bend,
++				new_hidden_dentry, new_bindex);
++			goto out_free;
++		}
++		symbuf[err] = '\0';
++	}
++
++	/* Now we lock the parent, and create the object in the new branch. */
++	new_hidden_parent_dentry = lock_parent(new_hidden_dentry);
++
++	/* create the new inode */
++	err = __copyup_ndentry(old_hidden_dentry, new_hidden_dentry,
++			       new_hidden_parent_dentry, symbuf);
++
++	if (err) {
++		__clear(dentry, old_hidden_dentry,
++			old_bstart, old_bend,
++			new_hidden_dentry, new_bindex);
++		goto out_unlock;
++	}
++
++	/* We actually copyup the file here. */
++	if (S_ISREG(old_hidden_dentry->d_inode->i_mode))
++		err = __copyup_reg_data(dentry, new_hidden_dentry, new_bindex,
++					old_hidden_dentry, old_bindex,
++					copyup_file, len);
++	if (err)
++		goto out_unlink;
++
++	/* Set permissions. */
++	if ((err = copyup_permissions(sb, old_hidden_dentry,
++				      new_hidden_dentry)))
++		goto out_unlink;
++
++#ifdef CONFIG_UNION_FS_XATTR
++	/* Selinux uses extended attributes for permissions. */
++	if ((err = copyup_xattrs(old_hidden_dentry, new_hidden_dentry)))
++		goto out_unlink;
++#endif
++
++	/* do not allow files getting deleted to be re-interposed */
++	if (!d_deleted(dentry))
++		unionfs_reinterpose(dentry);
++
++	goto out_unlock;
++
++out_unlink:
++	/*
++	 * copyup failed, because we possibly ran out of space or
++	 * quota, or something else happened so let's unlink; we don't
++	 * really care about the return value of vfs_unlink
++	 */
++	vfs_unlink(new_hidden_parent_dentry->d_inode, new_hidden_dentry);
++
++	if (copyup_file) {
++		/* need to close the file */
++
++		fput(*copyup_file);
++		branchput(sb, new_bindex);
++	}
++
++	/*
++	 * TODO: should we reset the error to something like -EIO?
++	 *
++	 * If we don't reset, the user may get some nonsensical errors, but
++	 * on the other hand, if we reset to EIO, we guarantee that the user
++	 * will get a "confusing" error message.
++	 */
++
++out_unlock:
++	unlock_dir(new_hidden_parent_dentry);
++
++out_free:
++	/*
++	 * If old_hidden_dentry was a directory, we need to dput it.  If it
++	 * was a file, then it was already dput indirectly by other
++	 * functions we call above which operate on regular files.
++	 */
++	if (old_hidden_dentry && old_hidden_dentry->d_inode &&
++	    S_ISDIR(old_hidden_dentry->d_inode->i_mode))
++		dput(old_hidden_dentry);
++	kfree(symbuf);
++
++out:
++	return err;
++}
++
++/*
++ * This function creates a copy of a file represented by 'file' which
++ * currently resides in branch 'bstart' to branch 'new_bindex.'  The copy
++ * will be named "name".
++ */
++int copyup_named_file(struct inode *dir, struct file *file, char *name,
++		      int bstart, int new_bindex, loff_t len)
++{
++	int err = 0;
++	struct file *output_file = NULL;
++
++	err = copyup_named_dentry(dir, file->f_dentry, bstart,
++				  new_bindex, name, strlen(name), &output_file,
++				  len);
++	if (!err) {
++		fbstart(file) = new_bindex;
++		unionfs_set_lower_file_idx(file, new_bindex, output_file);
++	}
++
++	return err;
++}
++
++/*
++ * This function creates a copy of a file represented by 'file' which
++ * currently resides in branch 'bstart' to branch 'new_bindex'.
++ */
++int copyup_file(struct inode *dir, struct file *file, int bstart,
++		int new_bindex, loff_t len)
++{
++	int err = 0;
++	struct file *output_file = NULL;
++
++	err = copyup_dentry(dir, file->f_dentry, bstart, new_bindex,
++			    &output_file, len);
++	if (!err) {
++		fbstart(file) = new_bindex;
++		unionfs_set_lower_file_idx(file, new_bindex, output_file);
++	}
++
++	return err;
++}
++
++/*
++ * This function replicates the directory structure up-to given dentry in the
++ * bindex branch. Can create directory structure recursively to the right
++ * also.
++ */
++struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
++			      int bindex)
++{
++	return create_parents_named(dir, dentry, dentry->d_name.name, bindex);
++}
++
++/* purge a dentry's lower-branch states (dput/mntput, etc.) */
++static void __cleanup_dentry(struct dentry *dentry, int bindex,
++			     int old_bstart, int old_bend)
++{
++	int loop_start;
++	int loop_end;
++	int new_bstart = -1;
++	int new_bend = -1;
++	int i;
++
++	loop_start = min(old_bstart, bindex);
++	loop_end = max(old_bend, bindex);
++
++	/*
++	 * This loop sets the bstart and bend for the new dentry by
++	 * traversing from left to right.  It also dputs all negative
++	 * dentries except bindex
++	 */
++	for (i = loop_start; i <= loop_end; i++) {
++		if (!unionfs_lower_dentry_idx(dentry, i))
++			continue;
++
++		if (i == bindex) {
++			new_bend = i;
++			if (new_bstart < 0)
++				new_bstart = i;
++			continue;
++		}
++
++		if (!unionfs_lower_dentry_idx(dentry, i)->d_inode) {
++			dput(unionfs_lower_dentry_idx(dentry, i));
++			unionfs_set_lower_dentry_idx(dentry, i, NULL);
++
++			unionfs_mntput(dentry, i);
++			unionfs_set_lower_mnt_idx(dentry, i, NULL);
++		} else {
++			if (new_bstart < 0)
++				new_bstart = i;
++			new_bend = i;
++		}
++	}
++
++	if (new_bstart < 0)
++		new_bstart = bindex;
++	if (new_bend < 0)
++		new_bend = bindex;
++	set_dbstart(dentry, new_bstart);
++	set_dbend(dentry, new_bend);
++
++}
++
++/* set lower inode ptr and update bstart & bend if necessary */
++static void __set_inode(struct dentry *upper, struct dentry *lower,
++			int bindex)
++{
++	unionfs_set_lower_inode_idx(upper->d_inode, bindex,
++				    igrab(lower->d_inode));
++	if (likely(ibstart(upper->d_inode) > bindex))
++		ibstart(upper->d_inode) = bindex;
++	if (likely(ibend(upper->d_inode) < bindex))
++		ibend(upper->d_inode) = bindex;
++
++}
++
++/* set lower dentry ptr and update bstart & bend if necessary */
++static void __set_dentry(struct dentry *upper, struct dentry *lower,
++			 int bindex)
++{
++	unionfs_set_lower_dentry_idx(upper, bindex, lower);
++	if (likely(dbstart(upper) > bindex))
++		set_dbstart(upper, bindex);
++	if (likely(dbend(upper) < bindex))
++		set_dbend(upper, bindex);
++}
++
++/*
++ * This function replicates the directory structure up-to given dentry
++ * in the bindex branch.
++ */
++static struct dentry *create_parents_named(struct inode *dir,
++					   struct dentry *dentry,
++					   const char *name, int bindex)
++{
++	int err;
++	struct dentry *child_dentry;
++	struct dentry *parent_dentry;
++	struct dentry *hidden_parent_dentry = NULL;
++	struct dentry *hidden_dentry = NULL;
++	const char *childname;
++	unsigned int childnamelen;
++
++	int nr_dentry;
++	int count = 0;
++
++	int old_bstart;
++	int old_bend;
++	struct dentry **path = NULL;
++	struct super_block *sb;
++
++	verify_locked(dentry);
++
++	if ((err = is_robranch_super(dir->i_sb, bindex))) {
++		hidden_dentry = ERR_PTR(err);
++		goto out;
++	}
++
++	old_bstart = dbstart(dentry);
++	old_bend = dbend(dentry);
++
++	hidden_dentry = ERR_PTR(-ENOMEM);
++
++	/* There is no sense allocating any less than the minimum. */
++	nr_dentry = 1;
++	path = kmalloc(nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
++	if (!path)
++		goto out;
++
++	/* assume the negative dentry of unionfs as the parent dentry */
++	parent_dentry = dentry;
++
++	/*
++	 * This loop finds the first parent that exists in the given branch.
++	 * We start building the directory structure from there.  At the end
++	 * of the loop, the following should hold:
++	 *  - child_dentry is the first nonexistent child
++	 *  - parent_dentry is the first existent parent
++	 *  - path[0] is the = deepest child
++	 *  - path[count] is the first child to create
++	 */
++	do {
++		child_dentry = parent_dentry;
++
++		/* find the parent directory dentry in unionfs */
++		parent_dentry = child_dentry->d_parent;
++		unionfs_lock_dentry(parent_dentry);
++
++		/* find out the hidden_parent_dentry in the given branch */
++		hidden_parent_dentry =
++			unionfs_lower_dentry_idx(parent_dentry, bindex);
++
++		/* grow path table */
++		if (count == nr_dentry) {
++			void *p;
++
++			nr_dentry *= 2;
++			p = krealloc(path, nr_dentry * sizeof(struct dentry *), GFP_KERNEL);
++			if (!p) {
++				hidden_dentry = ERR_PTR(-ENOMEM);
++				goto out;
++			}
++			path = p;
++		}
++
++		/* store the child dentry */
++		path[count++] = child_dentry;
++	} while (!hidden_parent_dentry);
++	count--;
++
++	sb = dentry->d_sb;
++
++	/*
++	 * This is basically while(child_dentry != dentry).  This loop is
++	 * horrible to follow and should be replaced with cleaner code.
++	 */
++	while (1) {
++		/* get hidden parent dir in the current branch */
++		hidden_parent_dentry =
++			unionfs_lower_dentry_idx(parent_dentry, bindex);
++		unionfs_unlock_dentry(parent_dentry);
++
++		/* init the values to lookup */
++		childname = child_dentry->d_name.name;
++		childnamelen = child_dentry->d_name.len;
++
++		if (child_dentry != dentry) {
++			/* lookup child in the underlying file system */
++			hidden_dentry =
++				lookup_one_len(childname, hidden_parent_dentry,
++					       childnamelen);
++			if (IS_ERR(hidden_dentry))
++				goto out;
++		} else {
++
++			/*
++			 * is the name a whiteout of the child name ?
++			 * lookup the whiteout child in the underlying file
++			 * system
++			 */
++			hidden_dentry =
++				lookup_one_len(name, hidden_parent_dentry,
++					       strlen(name));
++			if (IS_ERR(hidden_dentry))
++				goto out;
++
++			/*
++			 * Replace the current dentry (if any) with the new
++			 * one.
++			 */
++			dput(unionfs_lower_dentry_idx(dentry, bindex));
++			unionfs_set_lower_dentry_idx(dentry, bindex,
++						     hidden_dentry);
++
++			__cleanup_dentry(dentry, bindex, old_bstart, old_bend);
++			break;
++		}
++
++		if (hidden_dentry->d_inode) {
++			/*
++			 * since this already exists we dput to avoid
++			 * multiple references on the same dentry
++			 */
++			dput(hidden_dentry);
++		} else {
++			struct sioq_args args;
++
++			/* its a negative dentry, create a new dir */
++			hidden_parent_dentry = lock_parent(hidden_dentry);
++
++			args.mkdir.parent = hidden_parent_dentry->d_inode;
++			args.mkdir.dentry = hidden_dentry;
++			args.mkdir.mode = child_dentry->d_inode->i_mode;
++
++			run_sioq(__unionfs_mkdir, &args);
++			err = args.err;
++
++			if (!err)
++				err = copyup_permissions(dir->i_sb,
++							 child_dentry,
++							 hidden_dentry);
++			unlock_dir(hidden_parent_dentry);
++			if (err) {
++				struct inode *inode = hidden_dentry->d_inode;
++				/*
++				 * If we get here, it means that we created a new
++				 * dentry+inode, but copying permissions failed.
++				 * Therefore, we should delete this inode and dput
++				 * the dentry so as not to leave cruft behind.
++				 *
++				 * XXX: call dentry_iput() instead, but then we have
++				 * to export that symbol.
++				 */
++				if (hidden_dentry->d_op && hidden_dentry->d_op->d_iput)
++					hidden_dentry->d_op->d_iput(hidden_dentry,
++								    inode);
++				else
++					iput(inode);
++				hidden_dentry->d_inode = NULL;
++
++				dput(hidden_dentry);
++				hidden_dentry = ERR_PTR(err);
++				goto out;
++			}
++
++		}
++
++		__set_inode(child_dentry, hidden_dentry, bindex);
++		__set_dentry(child_dentry, hidden_dentry, bindex);
++
++		parent_dentry = child_dentry;
++		child_dentry = path[--count];
++	}
++out:
++	/* cleanup any leftover locks from the do/while loop above */
++	if (IS_ERR(hidden_dentry))
++		while (count)
++			unionfs_unlock_dentry(path[count--]);
++	kfree(path);
++	return hidden_dentry;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/dentry.c linux-2.6.22-try2/fs/unionfs/dentry.c
+--- linux-2.6.22-570/fs/unionfs/dentry.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/dentry.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,353 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Revalidate a single dentry.
++ * Assume that dentry's info node is locked.
++ * Assume that parent(s) are all valid already, but
++ * the child may not yet be valid.
++ * Returns 1 if valid, 0 otherwise.
++ */
++static int __unionfs_d_revalidate_one(struct dentry *dentry,
++				      struct nameidata *nd)
++{
++	int valid = 1;		/* default is valid (1); invalid is 0. */
++	struct dentry *hidden_dentry;
++	int bindex, bstart, bend;
++	int sbgen, dgen;
++	int positive = 0;
++	int locked = 0;
++	int interpose_flag;
++
++	struct nameidata lowernd; /* TODO: be gentler to the stack */
++
++	if (nd)
++		memcpy(&lowernd, nd, sizeof(struct nameidata));
++	else
++		memset(&lowernd, 0, sizeof(struct nameidata));
++
++	verify_locked(dentry);
++
++	/* if the dentry is unhashed, do NOT revalidate */
++	if (d_deleted(dentry)) {
++		printk(KERN_DEBUG "unionfs: unhashed dentry being "
++		       "revalidated: %*s\n",
++		       dentry->d_name.len, dentry->d_name.name);
++		goto out;
++	}
++
++	BUG_ON(dbstart(dentry) == -1);
++	if (dentry->d_inode)
++		positive = 1;
++	dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++	sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++	/*
++	 * If we are working on an unconnected dentry, then there is no
++	 * revalidation to be done, because this file does not exist within
++	 * the namespace, and Unionfs operates on the namespace, not data.
++	 */
++	if (sbgen != dgen) {
++		struct dentry *result;
++		int pdgen;
++
++		/* The root entry should always be valid */
++		BUG_ON(IS_ROOT(dentry));
++
++		/* We can't work correctly if our parent isn't valid. */
++		pdgen = atomic_read(&UNIONFS_D(dentry->d_parent)->generation);
++		BUG_ON(pdgen != sbgen);	/* should never happen here */
++
++		/* Free the pointers for our inodes and this dentry. */
++		bstart = dbstart(dentry);
++		bend = dbend(dentry);
++		if (bstart >= 0) {
++			struct dentry *hidden_dentry;
++			for (bindex = bstart; bindex <= bend; bindex++) {
++				hidden_dentry =
++					unionfs_lower_dentry_idx(dentry,
++								 bindex);
++				dput(hidden_dentry);
++			}
++		}
++		set_dbstart(dentry, -1);
++		set_dbend(dentry, -1);
++
++		interpose_flag = INTERPOSE_REVAL_NEG;
++		if (positive) {
++			interpose_flag = INTERPOSE_REVAL;
++			/*
++			 * During BRM, the VFS could already hold a lock on
++			 * a file being read, so don't lock it again
++			 * (deadlock), but if you lock it in this function,
++			 * then release it here too.
++			 */
++			if (!mutex_is_locked(&dentry->d_inode->i_mutex)) {
++				mutex_lock(&dentry->d_inode->i_mutex);
++				locked = 1;
++			}
++
++			bstart = ibstart(dentry->d_inode);
++			bend = ibend(dentry->d_inode);
++			if (bstart >= 0) {
++				struct inode *hidden_inode;
++				for (bindex = bstart; bindex <= bend;
++				     bindex++) {
++					hidden_inode =
++						unionfs_lower_inode_idx(
++							dentry->d_inode,
++							bindex);
++					iput(hidden_inode);
++				}
++			}
++			kfree(UNIONFS_I(dentry->d_inode)->lower_inodes);
++			UNIONFS_I(dentry->d_inode)->lower_inodes = NULL;
++			ibstart(dentry->d_inode) = -1;
++			ibend(dentry->d_inode) = -1;
++			if (locked)
++				mutex_unlock(&dentry->d_inode->i_mutex);
++		}
++
++		result = unionfs_lookup_backend(dentry, &lowernd,
++						interpose_flag);
++		if (result) {
++			if (IS_ERR(result)) {
++				valid = 0;
++				goto out;
++			}
++			/*
++			 * current unionfs_lookup_backend() doesn't return
++			 * a valid dentry
++			 */
++			dput(dentry);
++			dentry = result;
++		}
++
++		if (positive && UNIONFS_I(dentry->d_inode)->stale) {
++			make_bad_inode(dentry->d_inode);
++			d_drop(dentry);
++			valid = 0;
++			goto out;
++		}
++		goto out;
++	}
++
++	/* The revalidation must occur across all branches */
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++	BUG_ON(bstart == -1);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry || !hidden_dentry->d_op
++		    || !hidden_dentry->d_op->d_revalidate)
++			continue;
++		if (!hidden_dentry->d_op->d_revalidate(hidden_dentry,
++						       &lowernd))
++			valid = 0;
++	}
++
++	if (!dentry->d_inode)
++		valid = 0;
++
++	if (valid) {
++		fsstack_copy_attr_all(dentry->d_inode,
++				      unionfs_lower_inode(dentry->d_inode),
++				      unionfs_get_nlinks);
++		fsstack_copy_inode_size(dentry->d_inode,
++					unionfs_lower_inode(dentry->d_inode));
++	}
++
++out:
++	return valid;
++}
++
++/*
++ * Revalidate a parent chain of dentries, then the actual node.
++ * Assumes that dentry is locked, but will lock all parents if/when needed.
++ */
++int __unionfs_d_revalidate_chain(struct dentry *dentry, struct nameidata *nd)
++{
++	int valid = 0;		/* default is invalid (0); valid is 1. */
++	struct dentry **chain = NULL; /* chain of dentries to reval */
++	int chain_len = 0;
++	struct dentry *dtmp;
++	int sbgen, dgen, i;
++	int saved_bstart, saved_bend, bindex;
++
++	/* find length of chain needed to revalidate */
++	/* XXX: should I grab some global (dcache?) lock? */
++	chain_len = 0;
++	sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++	dtmp = dentry->d_parent;
++	dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
++	while (sbgen != dgen) {
++		/* The root entry should always be valid */
++		BUG_ON(IS_ROOT(dtmp));
++		chain_len++;
++		dtmp = dtmp->d_parent;
++		dgen = atomic_read(&UNIONFS_D(dtmp)->generation);
++	}
++	if (chain_len == 0)
++		goto out_this;	/* shortcut if parents are OK */
++
++	/*
++	 * Allocate array of dentries to reval.  We could use linked lists,
++	 * but the number of entries we need to alloc here is often small,
++	 * and short lived, so locality will be better.
++	 */
++	chain = kzalloc(chain_len * sizeof(struct dentry *), GFP_KERNEL);
++	if (!chain) {
++		printk("unionfs: no more memory in %s\n", __FUNCTION__);
++		goto out;
++	}
++
++	/*
++	 * lock all dentries in chain, in child to parent order.
++	 * if failed, then sleep for a little, then retry.
++	 */
++	dtmp = dentry->d_parent;
++	for (i=chain_len-1; i>=0; i--) {
++		chain[i] = dget(dtmp);
++		dtmp = dtmp->d_parent;
++	}
++
++	/*
++	 * call __unionfs_d_revalidate() on each dentry, but in parent to
++	 * child order.
++	 */
++	for (i=0; i<chain_len; i++) {
++		unionfs_lock_dentry(chain[i]);
++		saved_bstart = dbstart(chain[i]);
++		saved_bend = dbend(chain[i]);
++		sbgen = atomic_read(&UNIONFS_SB(dentry->d_sb)->generation);
++		dgen = atomic_read(&UNIONFS_D(chain[i])->generation);
++
++		valid = __unionfs_d_revalidate_one(chain[i], nd);
++		/* XXX: is this the correct mntput condition?! */
++		if (valid && chain_len > 0 &&
++		    sbgen != dgen && chain[i]->d_inode &&
++		    S_ISDIR(chain[i]->d_inode->i_mode)) {
++			for (bindex = saved_bstart; bindex <= saved_bend;
++			     bindex++)
++				unionfs_mntput(chain[i], bindex);
++		}
++		unionfs_unlock_dentry(chain[i]);
++
++		if (!valid)
++			goto out_free;
++	}
++
++
++out_this:
++	/* finally, lock this dentry and revalidate it */
++	verify_locked(dentry);
++	dgen = atomic_read(&UNIONFS_D(dentry)->generation);
++	valid = __unionfs_d_revalidate_one(dentry, nd);
++
++	/*
++	 * If __unionfs_d_revalidate_one() succeeded above, then it will
++	 * have incremented the refcnt of the mnt's, but also the branch
++	 * indices of the dentry will have been updated (to take into
++	 * account any branch insertions/deletion.  So the current
++	 * dbstart/dbend match the current, and new, indices of the mnts
++	 * which __unionfs_d_revalidate_one has incremented.  Note: the "if"
++	 * test below does not depend on whether chain_len was 0 or greater.
++	 */
++	if (valid && sbgen != dgen)
++		for (bindex = dbstart(dentry);
++		     bindex <= dbend(dentry);
++		     bindex++)
++			unionfs_mntput(dentry, bindex);
++
++out_free:
++	/* unlock/dput all dentries in chain and return status */
++	if (chain_len > 0) {
++		for (i=0; i<chain_len; i++)
++			dput(chain[i]);
++		kfree(chain);
++	}
++out:
++	return valid;
++}
++
++static int unionfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
++{
++	int err;
++
++	unionfs_read_lock(dentry->d_sb);
++
++	unionfs_lock_dentry(dentry);
++	err = __unionfs_d_revalidate_chain(dentry, nd);
++	unionfs_unlock_dentry(dentry);
++
++	unionfs_read_unlock(dentry->d_sb);
++
++	return err;
++}
++
++/*
++ * At this point no one can reference this dentry, so we don't have to be
++ * careful about concurrent access.
++ */
++static void unionfs_d_release(struct dentry *dentry)
++{
++	int bindex, bstart, bend;
++
++	unionfs_read_lock(dentry->d_sb);
++
++	/* this could be a negative dentry, so check first */
++	if (!UNIONFS_D(dentry)) {
++		printk(KERN_DEBUG "unionfs: dentry without private data: %.*s",
++		       dentry->d_name.len, dentry->d_name.name);
++		goto out;
++	} else if (dbstart(dentry) < 0) {
++		/* this is due to a failed lookup */
++		printk(KERN_DEBUG "unionfs: dentry without hidden "
++		       "dentries: %.*s",
++		       dentry->d_name.len, dentry->d_name.name);
++		goto out_free;
++	}
++
++	/* Release all the hidden dentries */
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		dput(unionfs_lower_dentry_idx(dentry, bindex));
++		unionfs_mntput(dentry, bindex);
++
++		unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++		unionfs_set_lower_mnt_idx(dentry, bindex, NULL);
++	}
++	/* free private data (unionfs_dentry_info) here */
++	kfree(UNIONFS_D(dentry)->lower_paths);
++	UNIONFS_D(dentry)->lower_paths = NULL;
++
++out_free:
++	/* No need to unlock it, because it is disappeared. */
++	free_dentry_private_data(dentry);
++
++out:
++	unionfs_read_unlock(dentry->d_sb);
++	return;
++}
++
++struct dentry_operations unionfs_dops = {
++	.d_revalidate	= unionfs_d_revalidate,
++	.d_release	= unionfs_d_release,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/dirfops.c linux-2.6.22-try2/fs/unionfs/dirfops.c
+--- linux-2.6.22-570/fs/unionfs/dirfops.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/dirfops.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,276 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* Make sure our rdstate is playing by the rules. */
++static void verify_rdstate_offset(struct unionfs_dir_state *rdstate)
++{
++	BUG_ON(rdstate->offset >= DIREOF);
++	BUG_ON(rdstate->cookie >= MAXRDCOOKIE);
++}
++
++struct unionfs_getdents_callback {
++	struct unionfs_dir_state *rdstate;
++	void *dirent;
++	int entries_written;
++	int filldir_called;
++	int filldir_error;
++	filldir_t filldir;
++	struct super_block *sb;
++};
++
++/* based on generic filldir in fs/readir.c */
++static int unionfs_filldir(void *dirent, const char *name, int namelen,
++			   loff_t offset, u64 ino, unsigned int d_type)
++{
++	struct unionfs_getdents_callback *buf = dirent;
++	struct filldir_node *found = NULL;
++	int err = 0;
++	int is_wh_entry = 0;
++
++	buf->filldir_called++;
++
++	if ((namelen > UNIONFS_WHLEN) &&
++	    !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
++		name += UNIONFS_WHLEN;
++		namelen -= UNIONFS_WHLEN;
++		is_wh_entry = 1;
++	}
++
++	found = find_filldir_node(buf->rdstate, name, namelen);
++
++	if (found)
++		goto out;
++
++	/* if 'name' isn't a whiteout, filldir it. */
++	if (!is_wh_entry) {
++		off_t pos = rdstate2offset(buf->rdstate);
++		u64 unionfs_ino = ino;
++
++		if (!err) {
++			err = buf->filldir(buf->dirent, name, namelen, pos,
++					   unionfs_ino, d_type);
++			buf->rdstate->offset++;
++			verify_rdstate_offset(buf->rdstate);
++		}
++	}
++	/*
++	 * If we did fill it, stuff it in our hash, otherwise return an
++	 * error.
++	 */
++	if (err) {
++		buf->filldir_error = err;
++		goto out;
++	}
++	buf->entries_written++;
++	if ((err = add_filldir_node(buf->rdstate, name, namelen,
++				    buf->rdstate->bindex, is_wh_entry)))
++		buf->filldir_error = err;
++
++out:
++	return err;
++}
++
++static int unionfs_readdir(struct file *file, void *dirent, filldir_t filldir)
++{
++	int err = 0;
++	struct file *hidden_file = NULL;
++	struct inode *inode = NULL;
++	struct unionfs_getdents_callback buf;
++	struct unionfs_dir_state *uds;
++	int bend;
++	loff_t offset;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 0)))
++		goto out;
++
++	inode = file->f_dentry->d_inode;
++
++	uds = UNIONFS_F(file)->rdstate;
++	if (!uds) {
++		if (file->f_pos == DIREOF) {
++			goto out;
++		} else if (file->f_pos > 0) {
++			uds = find_rdstate(inode, file->f_pos);
++			if (!uds) {
++				err = -ESTALE;
++				goto out;
++			}
++			UNIONFS_F(file)->rdstate = uds;
++		} else {
++			init_rdstate(file);
++			uds = UNIONFS_F(file)->rdstate;
++		}
++	}
++	bend = fbend(file);
++
++	while (uds->bindex <= bend) {
++		hidden_file = unionfs_lower_file_idx(file, uds->bindex);
++		if (!hidden_file) {
++			uds->bindex++;
++			uds->dirpos = 0;
++			continue;
++		}
++
++		/* prepare callback buffer */
++		buf.filldir_called = 0;
++		buf.filldir_error = 0;
++		buf.entries_written = 0;
++		buf.dirent = dirent;
++		buf.filldir = filldir;
++		buf.rdstate = uds;
++		buf.sb = inode->i_sb;
++
++		/* Read starting from where we last left off. */
++		offset = vfs_llseek(hidden_file, uds->dirpos, SEEK_SET);
++		if (offset < 0) {
++			err = offset;
++			goto out;
++		}
++		err = vfs_readdir(hidden_file, unionfs_filldir, &buf);
++
++		/* Save the position for when we continue. */
++		offset = vfs_llseek(hidden_file, 0, SEEK_CUR);
++		if (offset < 0) {
++			err = offset;
++			goto out;
++		}
++		uds->dirpos = offset;
++
++		/* Copy the atime. */
++		fsstack_copy_attr_atime(inode, hidden_file->f_dentry->d_inode);
++
++		if (err < 0)
++			goto out;
++
++		if (buf.filldir_error)
++			break;
++
++		if (!buf.entries_written) {
++			uds->bindex++;
++			uds->dirpos = 0;
++		}
++	}
++
++	if (!buf.filldir_error && uds->bindex >= bend) {
++		/* Save the number of hash entries for next time. */
++		UNIONFS_I(inode)->hashsize = uds->hashentries;
++		free_rdstate(uds);
++		UNIONFS_F(file)->rdstate = NULL;
++		file->f_pos = DIREOF;
++	} else
++		file->f_pos = rdstate2offset(uds);
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++/*
++ * This is not meant to be a generic repositioning function.  If you do
++ * things that aren't supported, then we return EINVAL.
++ *
++ * What is allowed:
++ *  (1) seeking to the same position that you are currently at
++ *	This really has no effect, but returns where you are.
++ *  (2) seeking to the beginning of the file
++ *	This throws out all state, and lets you begin again.
++ */
++static loff_t unionfs_dir_llseek(struct file *file, loff_t offset, int origin)
++{
++	struct unionfs_dir_state *rdstate;
++	loff_t err;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 0)))
++		goto out;
++
++	rdstate = UNIONFS_F(file)->rdstate;
++
++	/*
++	 * we let users seek to their current position, but not anywhere
++	 * else.
++	 */
++	if (!offset) {
++		switch (origin) {
++		case SEEK_SET:
++			if (rdstate) {
++				free_rdstate(rdstate);
++				UNIONFS_F(file)->rdstate = NULL;
++			}
++			init_rdstate(file);
++			err = 0;
++			break;
++		case SEEK_CUR:
++			err = file->f_pos;
++			break;
++		case SEEK_END:
++			/* Unsupported, because we would break everything.  */
++			err = -EINVAL;
++			break;
++		}
++	} else {
++		switch (origin) {
++		case SEEK_SET:
++			if (rdstate) {
++				if (offset == rdstate2offset(rdstate))
++					err = offset;
++				else if (file->f_pos == DIREOF)
++					err = DIREOF;
++				else
++					err = -EINVAL;
++			} else {
++				rdstate = find_rdstate(file->f_dentry->d_inode,
++						       offset);
++				if (rdstate) {
++					UNIONFS_F(file)->rdstate = rdstate;
++					err = rdstate->offset;
++				} else
++					err = -EINVAL;
++			}
++			break;
++		case SEEK_CUR:
++		case SEEK_END:
++			/* Unsupported, because we would break everything.  */
++			err = -EINVAL;
++			break;
++		}
++	}
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++/*
++ * Trimmed directory options, we shouldn't pass everything down since
++ * we don't want to operate on partial directories.
++ */
++struct file_operations unionfs_dir_fops = {
++	.llseek		= unionfs_dir_llseek,
++	.read		= generic_read_dir,
++	.readdir	= unionfs_readdir,
++	.unlocked_ioctl	= unionfs_ioctl,
++	.open		= unionfs_open,
++	.release	= unionfs_file_release,
++	.flush		= unionfs_flush,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/dirhelper.c linux-2.6.22-try2/fs/unionfs/dirhelper.c
+--- linux-2.6.22-570/fs/unionfs/dirhelper.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/dirhelper.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,273 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Delete all of the whiteouts in a given directory for rmdir.
++ *
++ * hidden directory inode should be locked
++ */
++int do_delete_whiteouts(struct dentry *dentry, int bindex,
++			struct unionfs_dir_state *namelist)
++{
++	int err = 0;
++	struct dentry *hidden_dir_dentry = NULL;
++	struct dentry *hidden_dentry;
++	char *name = NULL, *p;
++	struct inode *hidden_dir;
++
++	int i;
++	struct list_head *pos;
++	struct filldir_node *cursor;
++
++	/* Find out hidden parent dentry */
++	hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++	BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode));
++	hidden_dir = hidden_dir_dentry->d_inode;
++	BUG_ON(!S_ISDIR(hidden_dir->i_mode));
++
++	err = -ENOMEM;
++	name = __getname();
++	if (!name)
++		goto out;
++	strcpy(name, UNIONFS_WHPFX);
++	p = name + UNIONFS_WHLEN;
++
++	err = 0;
++	for (i = 0; !err && i < namelist->size; i++) {
++		list_for_each(pos, &namelist->list[i]) {
++			cursor =
++				list_entry(pos, struct filldir_node,
++					   file_list);
++			/* Only operate on whiteouts in this branch. */
++			if (cursor->bindex != bindex)
++				continue;
++			if (!cursor->whiteout)
++				continue;
++
++			strcpy(p, cursor->name);
++			hidden_dentry =
++				lookup_one_len(name, hidden_dir_dentry,
++					       cursor->namelen +
++					       UNIONFS_WHLEN);
++			if (IS_ERR(hidden_dentry)) {
++				err = PTR_ERR(hidden_dentry);
++				break;
++			}
++			if (hidden_dentry->d_inode)
++				err = vfs_unlink(hidden_dir, hidden_dentry);
++			dput(hidden_dentry);
++			if (err)
++				break;
++		}
++	}
++
++	__putname(name);
++
++	/* After all of the removals, we should copy the attributes once. */
++	fsstack_copy_attr_times(dentry->d_inode, hidden_dir_dentry->d_inode);
++
++out:
++	return err;
++}
++
++/* delete whiteouts in a dir (for rmdir operation) using sioq if necessary */
++int delete_whiteouts(struct dentry *dentry, int bindex,
++		     struct unionfs_dir_state *namelist)
++{
++	int err;
++	struct super_block *sb;
++	struct dentry *hidden_dir_dentry;
++	struct inode *hidden_dir;
++
++	struct sioq_args args;
++
++	sb = dentry->d_sb;
++
++	BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
++	BUG_ON(bindex < dbstart(dentry));
++	BUG_ON(bindex > dbend(dentry));
++	err = is_robranch_super(sb, bindex);
++	if (err)
++		goto out;
++
++	hidden_dir_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++	BUG_ON(!S_ISDIR(hidden_dir_dentry->d_inode->i_mode));
++	hidden_dir = hidden_dir_dentry->d_inode;
++	BUG_ON(!S_ISDIR(hidden_dir->i_mode));
++
++	mutex_lock(&hidden_dir->i_mutex);
++	if (!permission(hidden_dir, MAY_WRITE | MAY_EXEC, NULL))
++		err = do_delete_whiteouts(dentry, bindex, namelist);
++	else {
++		args.deletewh.namelist = namelist;
++		args.deletewh.dentry = dentry;
++		args.deletewh.bindex = bindex;
++		run_sioq(__delete_whiteouts, &args);
++		err = args.err;
++	}
++	mutex_unlock(&hidden_dir->i_mutex);
++
++out:
++	return err;
++}
++
++#define RD_NONE 0
++#define RD_CHECK_EMPTY 1
++/* The callback structure for check_empty. */
++struct unionfs_rdutil_callback {
++	int err;
++	int filldir_called;
++	struct unionfs_dir_state *rdstate;
++	int mode;
++};
++
++/* This filldir function makes sure only whiteouts exist within a directory. */
++static int readdir_util_callback(void *dirent, const char *name, int namelen,
++				 loff_t offset, u64 ino, unsigned int d_type)
++{
++	int err = 0;
++	struct unionfs_rdutil_callback *buf = dirent;
++	int whiteout = 0;
++	struct filldir_node *found;
++
++	buf->filldir_called = 1;
++
++	if (name[0] == '.' && (namelen == 1 ||
++			       (name[1] == '.' && namelen == 2)))
++		goto out;
++
++	if (namelen > UNIONFS_WHLEN &&
++	    !strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN)) {
++		namelen -= UNIONFS_WHLEN;
++		name += UNIONFS_WHLEN;
++		whiteout = 1;
++	}
++
++	found = find_filldir_node(buf->rdstate, name, namelen);
++	/* If it was found in the table there was a previous whiteout. */
++	if (found)
++		goto out;
++
++	/*
++	 * if it wasn't found and isn't a whiteout, the directory isn't
++	 * empty.
++	 */
++	err = -ENOTEMPTY;
++	if ((buf->mode == RD_CHECK_EMPTY) && !whiteout)
++		goto out;
++
++	err = add_filldir_node(buf->rdstate, name, namelen,
++			       buf->rdstate->bindex, whiteout);
++
++out:
++	buf->err = err;
++	return err;
++}
++
++/* Is a directory logically empty? */
++int check_empty(struct dentry *dentry, struct unionfs_dir_state **namelist)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL;
++	struct super_block *sb;
++	struct file *hidden_file;
++	struct unionfs_rdutil_callback *buf = NULL;
++	int bindex, bstart, bend, bopaque;
++
++	sb = dentry->d_sb;
++
++
++	BUG_ON(!S_ISDIR(dentry->d_inode->i_mode));
++
++	if ((err = unionfs_partial_lookup(dentry)))
++		goto out;
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++	bopaque = dbopaque(dentry);
++	if (0 <= bopaque && bopaque < bend)
++		bend = bopaque;
++
++	buf = kmalloc(sizeof(struct unionfs_rdutil_callback), GFP_KERNEL);
++	if (!buf) {
++		err = -ENOMEM;
++		goto out;
++	}
++	buf->err = 0;
++	buf->mode = RD_CHECK_EMPTY;
++	buf->rdstate = alloc_rdstate(dentry->d_inode, bstart);
++	if (!buf->rdstate) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	/* Process the hidden directories with rdutil_callback as a filldir. */
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++		if (!hidden_dentry->d_inode)
++			continue;
++		if (!S_ISDIR(hidden_dentry->d_inode->i_mode))
++			continue;
++
++		dget(hidden_dentry);
++		unionfs_mntget(dentry, bindex);
++		branchget(sb, bindex);
++		hidden_file =
++			dentry_open(hidden_dentry,
++				    unionfs_lower_mnt_idx(dentry, bindex),
++				    O_RDONLY);
++		if (IS_ERR(hidden_file)) {
++			err = PTR_ERR(hidden_file);
++			dput(hidden_dentry);
++			branchput(sb, bindex);
++			goto out;
++		}
++
++		do {
++			buf->filldir_called = 0;
++			buf->rdstate->bindex = bindex;
++			err = vfs_readdir(hidden_file,
++					  readdir_util_callback, buf);
++			if (buf->err)
++				err = buf->err;
++		} while ((err >= 0) && buf->filldir_called);
++
++		/* fput calls dput for hidden_dentry */
++		fput(hidden_file);
++		branchput(sb, bindex);
++
++		if (err < 0)
++			goto out;
++	}
++
++out:
++	if (buf) {
++		if (namelist && !err)
++			*namelist = buf->rdstate;
++		else if (buf->rdstate)
++			free_rdstate(buf->rdstate);
++		kfree(buf);
++	}
++
++
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/fanout.h linux-2.6.22-try2/fs/unionfs/fanout.h
+--- linux-2.6.22-570/fs/unionfs/fanout.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/fanout.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,308 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _FANOUT_H_
++#define _FANOUT_H_
++
++/*
++ * Inode to private data
++ *
++ * Since we use containers and the struct inode is _inside_ the
++ * unionfs_inode_info structure, UNIONFS_I will always (given a non-NULL
++ * inode pointer), return a valid non-NULL pointer.
++ */
++static inline struct unionfs_inode_info *UNIONFS_I(const struct inode *inode)
++{
++	return container_of(inode, struct unionfs_inode_info, vfs_inode);
++}
++
++#define ibstart(ino) (UNIONFS_I(ino)->bstart)
++#define ibend(ino) (UNIONFS_I(ino)->bend)
++
++/* Superblock to private data */
++#define UNIONFS_SB(super) ((struct unionfs_sb_info *)(super)->s_fs_info)
++#define sbstart(sb) 0
++#define sbend(sb) (UNIONFS_SB(sb)->bend)
++#define sbmax(sb) (UNIONFS_SB(sb)->bend + 1)
++#define sbhbid(sb) (UNIONFS_SB(sb)->high_branch_id)
++
++/* File to private Data */
++#define UNIONFS_F(file) ((struct unionfs_file_info *)((file)->private_data))
++#define fbstart(file) (UNIONFS_F(file)->bstart)
++#define fbend(file) (UNIONFS_F(file)->bend)
++
++/* macros to manipulate branch IDs in stored in our superblock */
++static inline int branch_id(struct super_block *sb, int index)
++{
++	BUG_ON(!sb || index < 0);
++	return UNIONFS_SB(sb)->data[index].branch_id;
++}
++
++static inline void set_branch_id(struct super_block *sb, int index, int val)
++{
++	BUG_ON(!sb || index < 0);
++	UNIONFS_SB(sb)->data[index].branch_id = val;
++}
++
++static inline void new_branch_id(struct super_block *sb, int index)
++{
++	BUG_ON(!sb || index < 0);
++	set_branch_id(sb, index, ++UNIONFS_SB(sb)->high_branch_id);
++}
++
++/*
++ * Find new index of matching branch with an existing superblock a a known
++ * (possibly old) id.  This is needed because branches could have been
++ * added/deleted causing the branchs of any open files to shift.
++ *
++ * @sb: the new superblock which may have new/different branch IDs
++ * @id: the old/existing id we're looking for
++ * Returns index of newly found branch (0 or greater), -1 otherwise.
++ */
++static inline int branch_id_to_idx(struct super_block *sb, int id)
++{
++	int i;
++	for (i = 0; i < sbmax(sb); i++) {
++		if (branch_id(sb, i) == id)
++			return i;
++	}
++	/*
++	 * XXX: maybe we should BUG_ON if not found new branch index?
++	 * (really that should never happen).
++	 */
++	printk(KERN_WARNING "unionfs: cannot find branch with id %d\n", id);
++	return -1;
++}
++
++/* File to lower file. */
++static inline struct file *unionfs_lower_file(const struct file *f)
++{
++	BUG_ON(!f);
++	return UNIONFS_F(f)->lower_files[fbstart(f)];
++}
++
++static inline struct file *unionfs_lower_file_idx(const struct file *f,
++						  int index)
++{
++	BUG_ON(!f || index < 0);
++	return UNIONFS_F(f)->lower_files[index];
++}
++
++static inline void unionfs_set_lower_file_idx(struct file *f, int index,
++					      struct file *val)
++{
++	BUG_ON(!f || index < 0);
++	UNIONFS_F(f)->lower_files[index] = val;
++	/* save branch ID (may be redundant?) */
++	UNIONFS_F(f)->saved_branch_ids[index] =
++		branch_id((f)->f_dentry->d_sb, index);
++}
++
++static inline void unionfs_set_lower_file(struct file *f, struct file *val)
++{
++	BUG_ON(!f);
++	unionfs_set_lower_file_idx((f), fbstart(f), (val));
++}
++
++/* Inode to lower inode. */
++static inline struct inode *unionfs_lower_inode(const struct inode *i)
++{
++	BUG_ON(!i);
++	return UNIONFS_I(i)->lower_inodes[ibstart(i)];
++}
++
++static inline struct inode *unionfs_lower_inode_idx(const struct inode *i,
++						    int index)
++{
++	BUG_ON(!i || index < 0);
++	return UNIONFS_I(i)->lower_inodes[index];
++}
++
++static inline void unionfs_set_lower_inode_idx(struct inode *i, int index,
++					       struct inode *val)
++{
++	BUG_ON(!i || index < 0);
++	UNIONFS_I(i)->lower_inodes[index] = val;
++}
++
++static inline void unionfs_set_lower_inode(struct inode *i, struct inode *val)
++{
++	BUG_ON(!i);
++	UNIONFS_I(i)->lower_inodes[ibstart(i)] = val;
++}
++
++/* Superblock to lower superblock. */
++static inline struct super_block *unionfs_lower_super(
++					const struct super_block *sb)
++{
++	BUG_ON(!sb);
++	return UNIONFS_SB(sb)->data[sbstart(sb)].sb;
++}
++
++static inline struct super_block *unionfs_lower_super_idx(
++					const struct super_block *sb,
++					int index)
++{
++	BUG_ON(!sb || index < 0);
++	return UNIONFS_SB(sb)->data[index].sb;
++}
++
++static inline void unionfs_set_lower_super_idx(struct super_block *sb,
++					       int index,
++					       struct super_block *val)
++{
++	BUG_ON(!sb || index < 0);
++	UNIONFS_SB(sb)->data[index].sb = val;
++}
++
++static inline void unionfs_set_lower_super(struct super_block *sb,
++					   struct super_block *val)
++{
++	BUG_ON(!sb);
++	UNIONFS_SB(sb)->data[sbstart(sb)].sb = val;
++}
++
++/* Branch count macros. */
++static inline int branch_count(const struct super_block *sb, int index)
++{
++	BUG_ON(!sb || index < 0);
++	return atomic_read(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++static inline void set_branch_count(struct super_block *sb, int index, int val)
++{
++	BUG_ON(!sb || index < 0);
++	atomic_set(&UNIONFS_SB(sb)->data[index].open_files, val);
++}
++
++static inline void branchget(struct super_block *sb, int index)
++{
++	BUG_ON(!sb || index < 0);
++	atomic_inc(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++static inline void branchput(struct super_block *sb, int index)
++{
++	BUG_ON(!sb || index < 0);
++	atomic_dec(&UNIONFS_SB(sb)->data[index].open_files);
++}
++
++/* Dentry macros */
++static inline struct unionfs_dentry_info *UNIONFS_D(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return dent->d_fsdata;
++}
++
++static inline int dbstart(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return UNIONFS_D(dent)->bstart;
++}
++
++static inline void set_dbstart(struct dentry *dent, int val)
++{
++	BUG_ON(!dent);
++	UNIONFS_D(dent)->bstart = val;
++}
++
++static inline int dbend(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return UNIONFS_D(dent)->bend;
++}
++
++static inline void set_dbend(struct dentry *dent, int val)
++{
++	BUG_ON(!dent);
++	UNIONFS_D(dent)->bend = val;
++}
++
++static inline int dbopaque(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return UNIONFS_D(dent)->bopaque;
++}
++
++static inline void set_dbopaque(struct dentry *dent, int val)
++{
++	BUG_ON(!dent);
++	UNIONFS_D(dent)->bopaque = val;
++}
++
++static inline void unionfs_set_lower_dentry_idx(struct dentry *dent, int index,
++						struct dentry *val)
++{
++	BUG_ON(!dent || index < 0);
++	UNIONFS_D(dent)->lower_paths[index].dentry = val;
++}
++
++static inline struct dentry *unionfs_lower_dentry_idx(
++				const struct dentry *dent,
++				int index)
++{
++	BUG_ON(!dent || index < 0);
++	return UNIONFS_D(dent)->lower_paths[index].dentry;
++}
++
++static inline struct dentry *unionfs_lower_dentry(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return unionfs_lower_dentry_idx(dent, dbstart(dent));
++}
++
++static inline void unionfs_set_lower_mnt_idx(struct dentry *dent, int index,
++					     struct vfsmount *mnt)
++{
++	BUG_ON(!dent || index < 0);
++	UNIONFS_D(dent)->lower_paths[index].mnt = mnt;
++}
++
++static inline struct vfsmount *unionfs_lower_mnt_idx(
++					const struct dentry *dent,
++					int index)
++{
++	BUG_ON(!dent || index < 0);
++	return UNIONFS_D(dent)->lower_paths[index].mnt;
++}
++
++static inline struct vfsmount *unionfs_lower_mnt(const struct dentry *dent)
++{
++	BUG_ON(!dent);
++	return unionfs_lower_mnt_idx(dent, dbstart(dent));
++}
++
++/* Macros for locking a dentry. */
++static inline void unionfs_lock_dentry(struct dentry *d)
++{
++	BUG_ON(!d);
++	mutex_lock(&UNIONFS_D(d)->lock);
++}
++
++static inline void unionfs_unlock_dentry(struct dentry *d)
++{
++	BUG_ON(!d);
++	mutex_unlock(&UNIONFS_D(d)->lock);
++}
++
++static inline void verify_locked(struct dentry *d)
++{
++	BUG_ON(!d);
++	BUG_ON(!mutex_is_locked(&UNIONFS_D(d)->lock));
++}
++
++#endif	/* _FANOUT_H */
+diff -Nurb linux-2.6.22-570/fs/unionfs/file.c linux-2.6.22-try2/fs/unionfs/file.c
+--- linux-2.6.22-570/fs/unionfs/file.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/file.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*******************
++ * File Operations *
++ *******************/
++
++static ssize_t unionfs_read(struct file *file, char __user *buf,
++			    size_t count, loff_t *ppos)
++{
++	int err;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 0)))
++		goto out;
++
++	err = do_sync_read(file, buf, count, ppos);
++
++	if (err >= 0)
++		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++			    unionfs_lower_dentry(file->f_path.dentry));
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++static ssize_t unionfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
++				unsigned long nr_segs, loff_t pos)
++{
++	int err = 0;
++	struct file *file = iocb->ki_filp;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 0)))
++		goto out;
++
++	err = generic_file_aio_read(iocb, iov, nr_segs, pos);
++
++	if (err == -EIOCBQUEUED)
++		err = wait_on_sync_kiocb(iocb);
++
++	if (err >= 0)
++		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++			    unionfs_lower_dentry(file->f_path.dentry));
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++static ssize_t unionfs_write(struct file * file, const char __user * buf,
++			     size_t count, loff_t *ppos)
++{
++	int err = 0;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 1)))
++		goto out;
++
++	err = do_sync_write(file, buf, count, ppos);
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++static int unionfs_file_readdir(struct file *file, void *dirent,
++				filldir_t filldir)
++{
++	return -ENOTDIR;
++}
++
++static int unionfs_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	int err = 0;
++	int willwrite;
++	struct file *lower_file;
++
++	unionfs_read_lock(file->f_path.dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 1)))
++		goto out;
++
++	/* This might be deferred to mmap's writepage */
++	willwrite = ((vma->vm_flags | VM_SHARED | VM_WRITE) == vma->vm_flags);
++	if ((err = unionfs_file_revalidate(file, willwrite)))
++		goto out;
++
++	/*
++	 * File systems which do not implement ->writepage may use
++	 * generic_file_readonly_mmap as their ->mmap op.  If you call
++	 * generic_file_readonly_mmap with VM_WRITE, you'd get an -EINVAL.
++	 * But we cannot call the lower ->mmap op, so we can't tell that
++	 * writeable mappings won't work.  Therefore, our only choice is to
++	 * check if the lower file system supports the ->writepage, and if
++	 * not, return EINVAL (the same error that
++	 * generic_file_readonly_mmap returns in that case).
++	 */
++	lower_file = unionfs_lower_file(file);
++	if (willwrite && !lower_file->f_mapping->a_ops->writepage) {
++		err = -EINVAL;
++		printk("unionfs: branch %d file system does not support "
++		       "writeable mmap\n", fbstart(file));
++	} else {
++		err = generic_file_mmap(file, vma);
++		if (err)
++			printk("unionfs: generic_file_mmap failed %d\n", err);
++	}
++
++out:
++	unionfs_read_unlock(file->f_path.dentry->d_sb);
++	return err;
++}
++
++struct file_operations unionfs_main_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= unionfs_read,
++	.aio_read       = unionfs_aio_read,
++	.write		= unionfs_write,
++	.aio_write      = generic_file_aio_write,
++	.readdir	= unionfs_file_readdir,
++	.unlocked_ioctl	= unionfs_ioctl,
++	.mmap		= unionfs_mmap,
++	.open		= unionfs_open,
++	.flush		= unionfs_flush,
++	.release	= unionfs_file_release,
++	.fsync		= file_fsync,
++	.sendfile	= generic_file_sendfile,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/inode.c linux-2.6.22-try2/fs/unionfs/inode.c
+--- linux-2.6.22-570/fs/unionfs/inode.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/inode.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,1138 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++static int unionfs_create(struct inode *parent, struct dentry *dentry,
++			  int mode, struct nameidata *nd)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL;
++	struct dentry *wh_dentry = NULL;
++	struct dentry *new_hidden_dentry;
++	struct dentry *hidden_parent_dentry = NULL;
++	int bindex = 0, bstart;
++	char *name = NULL;
++	int valid = 0;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	unionfs_lock_dentry(dentry->d_parent);
++	valid = __unionfs_d_revalidate_chain(dentry->d_parent, nd);
++	unionfs_unlock_dentry(dentry->d_parent);
++	if (!valid) {
++		err = -ESTALE;	/* same as what real_lookup does */
++		goto out;
++	}
++	valid = __unionfs_d_revalidate_chain(dentry, nd);
++	/*
++	 * It's only a bug if this dentry was not negative and couldn't be
++	 * revalidated (shouldn't happen).
++	 */
++	BUG_ON(!valid && dentry->d_inode);
++
++	/* We start out in the leftmost branch. */
++	bstart = dbstart(dentry);
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	/*
++	 * check if whiteout exists in this branch, i.e. lookup .wh.foo
++	 * first.
++	 */
++	name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	wh_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++				   dentry->d_name.len + UNIONFS_WHLEN);
++	if (IS_ERR(wh_dentry)) {
++		err = PTR_ERR(wh_dentry);
++		wh_dentry = NULL;
++		goto out;
++	}
++
++	if (wh_dentry->d_inode) {
++		/*
++		 * .wh.foo has been found.
++		 * First truncate it and then rename it to foo (hence having
++		 * the same overall effect as a normal create.
++		 */
++		struct dentry *hidden_dir_dentry;
++		struct iattr newattrs;
++
++		mutex_lock(&wh_dentry->d_inode->i_mutex);
++		newattrs.ia_valid = ATTR_CTIME | ATTR_MODE | ATTR_ATIME
++			| ATTR_MTIME | ATTR_UID | ATTR_GID | ATTR_FORCE
++			| ATTR_KILL_SUID | ATTR_KILL_SGID;
++
++		newattrs.ia_mode = mode & ~current->fs->umask;
++		newattrs.ia_uid = current->fsuid;
++		newattrs.ia_gid = current->fsgid;
++
++		if (wh_dentry->d_inode->i_size != 0) {
++			newattrs.ia_valid |= ATTR_SIZE;
++			newattrs.ia_size = 0;
++		}
++
++		err = notify_change(wh_dentry, &newattrs);
++
++		mutex_unlock(&wh_dentry->d_inode->i_mutex);
++
++		if (err)
++			printk(KERN_WARNING "unionfs: %s:%d: notify_change "
++			       "failed: %d, ignoring..\n",
++			       __FILE__, __LINE__, err);
++
++		new_hidden_dentry = unionfs_lower_dentry(dentry);
++		dget(new_hidden_dentry);
++
++		hidden_dir_dentry = dget_parent(wh_dentry);
++		lock_rename(hidden_dir_dentry, hidden_dir_dentry);
++
++		if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
++			err = vfs_rename(hidden_dir_dentry->d_inode,
++					 wh_dentry,
++					 hidden_dir_dentry->d_inode,
++					 new_hidden_dentry);
++		}
++		if (!err) {
++			fsstack_copy_attr_times(parent,
++						new_hidden_dentry->d_parent->
++						d_inode);
++			fsstack_copy_inode_size(parent,
++						new_hidden_dentry->d_parent->
++						d_inode);
++			parent->i_nlink = unionfs_get_nlinks(parent);
++		}
++
++		unlock_rename(hidden_dir_dentry, hidden_dir_dentry);
++		dput(hidden_dir_dentry);
++
++		dput(new_hidden_dentry);
++
++		if (err) {
++			/* exit if the error returned was NOT -EROFS */
++			if (!IS_COPYUP_ERR(err))
++				goto out;
++			/*
++			 * We were not able to create the file in this
++			 * branch, so, we try to create it in one branch to
++			 * left
++			 */
++			bstart--;
++		} else {
++			/*
++			 * reset the unionfs dentry to point to the .wh.foo
++			 * entry.
++			 */
++
++			/* Discard any old reference. */
++			dput(unionfs_lower_dentry(dentry));
++
++			/* Trade one reference to another. */
++			unionfs_set_lower_dentry_idx(dentry, bstart,
++						     wh_dentry);
++			wh_dentry = NULL;
++
++			err = unionfs_interpose(dentry, parent->i_sb, 0);
++			goto out;
++		}
++	}
++
++	for (bindex = bstart; bindex >= 0; bindex--) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry) {
++			/*
++			 * if hidden_dentry is NULL, create the entire
++			 * dentry directory structure in branch 'bindex'.
++			 * hidden_dentry will NOT be null when bindex == bstart
++			 * because lookup passed as a negative unionfs dentry
++			 * pointing to a lone negative underlying dentry.
++			 */
++			hidden_dentry = create_parents(parent, dentry, bindex);
++			if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++				if (IS_ERR(hidden_dentry))
++					err = PTR_ERR(hidden_dentry);
++				continue;
++			}
++		}
++
++		hidden_parent_dentry = lock_parent(hidden_dentry);
++		if (IS_ERR(hidden_parent_dentry)) {
++			err = PTR_ERR(hidden_parent_dentry);
++			goto out;
++		}
++		/* We shouldn't create things in a read-only branch. */
++		if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++			err = vfs_create(hidden_parent_dentry->d_inode,
++					 hidden_dentry, mode, nd);
++
++		if (err || !hidden_dentry->d_inode) {
++			unlock_dir(hidden_parent_dentry);
++
++			/* break out of for loop if the error wasn't  -EROFS */
++			if (!IS_COPYUP_ERR(err))
++				break;
++		} else {
++			err = unionfs_interpose(dentry, parent->i_sb, 0);
++			if (!err) {
++				fsstack_copy_attr_times(parent,
++							hidden_parent_dentry->
++							d_inode);
++				fsstack_copy_inode_size(parent,
++							hidden_parent_dentry->
++							d_inode);
++				/* update no. of links on parent directory */
++				parent->i_nlink = unionfs_get_nlinks(parent);
++			}
++			unlock_dir(hidden_parent_dentry);
++			break;
++		}
++	}
++
++out:
++	dput(wh_dentry);
++	kfree(name);
++
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++static struct dentry *unionfs_lookup(struct inode *parent,
++				     struct dentry *dentry,
++				     struct nameidata *nd)
++{
++	struct path path_save;
++	struct dentry *ret;
++
++	unionfs_read_lock(dentry->d_sb);
++
++	/* save the dentry & vfsmnt from namei */
++	if (nd) {
++		path_save.dentry = nd->dentry;
++		path_save.mnt = nd->mnt;
++	}
++
++	/* The locking is done by unionfs_lookup_backend. */
++	ret = unionfs_lookup_backend(dentry, nd, INTERPOSE_LOOKUP);
++
++	/* restore the dentry & vfsmnt in namei */
++	if (nd) {
++		nd->dentry = path_save.dentry;
++		nd->mnt = path_save.mnt;
++	}
++
++	unionfs_read_unlock(dentry->d_sb);
++
++	return ret;
++}
++
++static int unionfs_link(struct dentry *old_dentry, struct inode *dir,
++			struct dentry *new_dentry)
++{
++	int err = 0;
++	struct dentry *hidden_old_dentry = NULL;
++	struct dentry *hidden_new_dentry = NULL;
++	struct dentry *hidden_dir_dentry = NULL;
++	struct dentry *whiteout_dentry;
++	char *name = NULL;
++
++	unionfs_read_lock(old_dentry->d_sb);
++	unionfs_double_lock_dentry(new_dentry, old_dentry);
++
++	if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++	if (new_dentry->d_inode &&
++	    !__unionfs_d_revalidate_chain(new_dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_new_dentry = unionfs_lower_dentry(new_dentry);
++
++	/*
++	 * check if whiteout exists in the branch of new dentry, i.e. lookup
++	 * .wh.foo first. If present, delete it
++	 */
++	name = alloc_whname(new_dentry->d_name.name, new_dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	whiteout_dentry = lookup_one_len(name, hidden_new_dentry->d_parent,
++					 new_dentry->d_name.len +
++					 UNIONFS_WHLEN);
++	if (IS_ERR(whiteout_dentry)) {
++		err = PTR_ERR(whiteout_dentry);
++		goto out;
++	}
++
++	if (!whiteout_dentry->d_inode) {
++		dput(whiteout_dentry);
++		whiteout_dentry = NULL;
++	} else {
++		/* found a .wh.foo entry, unlink it and then call vfs_link() */
++		hidden_dir_dentry = lock_parent(whiteout_dentry);
++		err = is_robranch_super(new_dentry->d_sb, dbstart(new_dentry));
++		if (!err)
++			err = vfs_unlink(hidden_dir_dentry->d_inode,
++					 whiteout_dentry);
++
++		fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++		dir->i_nlink = unionfs_get_nlinks(dir);
++		unlock_dir(hidden_dir_dentry);
++		hidden_dir_dentry = NULL;
++		dput(whiteout_dentry);
++		if (err)
++			goto out;
++	}
++
++	if (dbstart(old_dentry) != dbstart(new_dentry)) {
++		hidden_new_dentry =
++			create_parents(dir, new_dentry, dbstart(old_dentry));
++		err = PTR_ERR(hidden_new_dentry);
++		if (IS_COPYUP_ERR(err))
++			goto docopyup;
++		if (!hidden_new_dentry || IS_ERR(hidden_new_dentry))
++			goto out;
++	}
++	hidden_new_dentry = unionfs_lower_dentry(new_dentry);
++	hidden_old_dentry = unionfs_lower_dentry(old_dentry);
++
++	BUG_ON(dbstart(old_dentry) != dbstart(new_dentry));
++	hidden_dir_dentry = lock_parent(hidden_new_dentry);
++	if (!(err = is_robranch(old_dentry)))
++		err = vfs_link(hidden_old_dentry, hidden_dir_dentry->d_inode,
++			       hidden_new_dentry);
++	unlock_dir(hidden_dir_dentry);
++
++docopyup:
++	if (IS_COPYUP_ERR(err)) {
++		int old_bstart = dbstart(old_dentry);
++		int bindex;
++
++		for (bindex = old_bstart - 1; bindex >= 0; bindex--) {
++			err = copyup_dentry(old_dentry->d_parent->d_inode,
++					    old_dentry, old_bstart,
++					    bindex, NULL,
++					    old_dentry->d_inode->i_size);
++			if (!err) {
++				hidden_new_dentry =
++					create_parents(dir, new_dentry,
++						       bindex);
++				hidden_old_dentry =
++					unionfs_lower_dentry(old_dentry);
++				hidden_dir_dentry =
++					lock_parent(hidden_new_dentry);
++				/* do vfs_link */
++				err = vfs_link(hidden_old_dentry,
++					       hidden_dir_dentry->d_inode,
++					       hidden_new_dentry);
++				unlock_dir(hidden_dir_dentry);
++				goto check_link;
++			}
++		}
++		goto out;
++	}
++
++check_link:
++	if (err || !hidden_new_dentry->d_inode)
++		goto out;
++
++	/* Its a hard link, so use the same inode */
++	new_dentry->d_inode = igrab(old_dentry->d_inode);
++	d_instantiate(new_dentry, new_dentry->d_inode);
++	fsstack_copy_attr_all(dir, hidden_new_dentry->d_parent->d_inode,
++			      unionfs_get_nlinks);
++	fsstack_copy_inode_size(dir, hidden_new_dentry->d_parent->d_inode);
++
++	/* propagate number of hard-links */
++	old_dentry->d_inode->i_nlink = unionfs_get_nlinks(old_dentry->d_inode);
++
++out:
++	if (!new_dentry->d_inode)
++		d_drop(new_dentry);
++
++	kfree(name);
++
++	unionfs_unlock_dentry(new_dentry);
++	unionfs_unlock_dentry(old_dentry);
++
++	unionfs_read_unlock(old_dentry->d_sb);
++
++	return err;
++}
++
++static int unionfs_symlink(struct inode *dir, struct dentry *dentry,
++			   const char *symname)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL;
++	struct dentry *whiteout_dentry = NULL;
++	struct dentry *hidden_dir_dentry = NULL;
++	umode_t mode;
++	int bindex = 0, bstart;
++	char *name = NULL;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (dentry->d_inode &&
++	    !__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	/* We start out in the leftmost branch. */
++	bstart = dbstart(dentry);
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	/*
++	 * check if whiteout exists in this branch, i.e. lookup .wh.foo
++	 * first. If present, delete it
++	 */
++	name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	whiteout_dentry =
++		lookup_one_len(name, hidden_dentry->d_parent,
++			       dentry->d_name.len + UNIONFS_WHLEN);
++	if (IS_ERR(whiteout_dentry)) {
++		err = PTR_ERR(whiteout_dentry);
++		goto out;
++	}
++
++	if (!whiteout_dentry->d_inode) {
++		dput(whiteout_dentry);
++		whiteout_dentry = NULL;
++	} else {
++		/*
++		 * found a .wh.foo entry, unlink it and then call
++		 * vfs_symlink().
++		 */
++		hidden_dir_dentry = lock_parent(whiteout_dentry);
++
++		if (!(err = is_robranch_super(dentry->d_sb, bstart)))
++			err = vfs_unlink(hidden_dir_dentry->d_inode,
++					 whiteout_dentry);
++		dput(whiteout_dentry);
++
++		fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++		/* propagate number of hard-links */
++		dir->i_nlink = unionfs_get_nlinks(dir);
++
++		unlock_dir(hidden_dir_dentry);
++
++		if (err) {
++			/* exit if the error returned was NOT -EROFS */
++			if (!IS_COPYUP_ERR(err))
++				goto out;
++			/*
++			 * should now try to create symlink in the another
++			 * branch.
++			 */
++			bstart--;
++		}
++	}
++
++	/*
++	 * deleted whiteout if it was present, now do a normal vfs_symlink()
++	 * with possible recursive directory creation
++	 */
++	for (bindex = bstart; bindex >= 0; bindex--) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry) {
++			/*
++			 * if hidden_dentry is NULL, create the entire
++			 * dentry directory structure in branch 'bindex'.
++			 * hidden_dentry will NOT be null when bindex ==
++			 * bstart because lookup passed as a negative
++			 * unionfs dentry pointing to a lone negative
++			 * underlying dentry
++			 */
++			hidden_dentry = create_parents(dir, dentry, bindex);
++			if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++				if (IS_ERR(hidden_dentry))
++					err = PTR_ERR(hidden_dentry);
++
++				printk(KERN_DEBUG "unionfs: hidden dentry "
++				       "NULL (or error) for bindex = %d\n",
++				       bindex);
++				continue;
++			}
++		}
++
++		hidden_dir_dentry = lock_parent(hidden_dentry);
++
++		if (!(err = is_robranch_super(dentry->d_sb, bindex))) {
++			mode = S_IALLUGO;
++			err =
++				vfs_symlink(hidden_dir_dentry->d_inode,
++					    hidden_dentry, symname, mode);
++		}
++		unlock_dir(hidden_dir_dentry);
++
++		if (err || !hidden_dentry->d_inode) {
++			/*
++			 * break out of for loop if error returned was NOT
++			 * -EROFS.
++			 */
++			if (!IS_COPYUP_ERR(err))
++				break;
++		} else {
++			err = unionfs_interpose(dentry, dir->i_sb, 0);
++			if (!err) {
++				fsstack_copy_attr_times(dir,
++							hidden_dir_dentry->
++							d_inode);
++				fsstack_copy_inode_size(dir,
++							hidden_dir_dentry->
++							d_inode);
++				/*
++				 * update number of links on parent
++				 * directory.
++				 */
++				dir->i_nlink = unionfs_get_nlinks(dir);
++			}
++			break;
++		}
++	}
++
++out:
++	if (!dentry->d_inode)
++		d_drop(dentry);
++
++	kfree(name);
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++static int unionfs_mkdir(struct inode *parent, struct dentry *dentry, int mode)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL;
++	struct dentry *hidden_parent_dentry = NULL;
++	int bindex = 0, bstart;
++	char *name = NULL;
++	int whiteout_unlinked = 0;
++	struct sioq_args args;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (dentry->d_inode &&
++	    !__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	bstart = dbstart(dentry);
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	/*
++	 * check if whiteout exists in this branch, i.e. lookup .wh.foo
++	 * first.
++	 */
++	name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++					 dentry->d_name.len + UNIONFS_WHLEN);
++	if (IS_ERR(whiteout_dentry)) {
++		err = PTR_ERR(whiteout_dentry);
++		goto out;
++	}
++
++	if (!whiteout_dentry->d_inode) {
++		dput(whiteout_dentry);
++		whiteout_dentry = NULL;
++	} else {
++		hidden_parent_dentry = lock_parent(whiteout_dentry);
++
++		/* found a.wh.foo entry, remove it then do vfs_mkdir */
++		if (!(err = is_robranch_super(dentry->d_sb, bstart))) {
++			args.unlink.parent = hidden_parent_dentry->d_inode;
++			args.unlink.dentry = whiteout_dentry;
++			run_sioq(__unionfs_unlink, &args);
++			err = args.err;
++		}
++		dput(whiteout_dentry);
++
++		unlock_dir(hidden_parent_dentry);
++
++		if (err) {
++			/* exit if the error returned was NOT -EROFS */
++			if (!IS_COPYUP_ERR(err))
++				goto out;
++			bstart--;
++		} else
++			whiteout_unlinked = 1;
++	}
++
++	for (bindex = bstart; bindex >= 0; bindex--) {
++		int i;
++		int bend = dbend(dentry);
++
++		if (is_robranch_super(dentry->d_sb, bindex))
++			continue;
++
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry) {
++			hidden_dentry = create_parents(parent, dentry, bindex);
++			if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++				printk(KERN_DEBUG "unionfs: hidden dentry "
++				       " NULL for bindex = %d\n", bindex);
++				continue;
++			}
++		}
++
++		hidden_parent_dentry = lock_parent(hidden_dentry);
++
++		if (IS_ERR(hidden_parent_dentry)) {
++			err = PTR_ERR(hidden_parent_dentry);
++			goto out;
++		}
++
++		err = vfs_mkdir(hidden_parent_dentry->d_inode, hidden_dentry,
++				mode);
++
++		unlock_dir(hidden_parent_dentry);
++
++		/* did the mkdir succeed? */
++		if (err)
++			break;
++
++		for (i = bindex + 1; i < bend; i++) {
++			if (unionfs_lower_dentry_idx(dentry, i)) {
++				dput(unionfs_lower_dentry_idx(dentry, i));
++				unionfs_set_lower_dentry_idx(dentry, i, NULL);
++			}
++		}
++		set_dbend(dentry, bindex);
++
++		err = unionfs_interpose(dentry, parent->i_sb, 0);
++		if (!err) {
++			fsstack_copy_attr_times(parent,
++						hidden_parent_dentry->d_inode);
++			fsstack_copy_inode_size(parent,
++						hidden_parent_dentry->d_inode);
++
++			/* update number of links on parent directory */
++			parent->i_nlink = unionfs_get_nlinks(parent);
++		}
++
++		err = make_dir_opaque(dentry, dbstart(dentry));
++		if (err) {
++			printk(KERN_ERR "unionfs: mkdir: error creating "
++			       ".wh.__dir_opaque: %d\n", err);
++			goto out;
++		}
++
++		/* we are done! */
++		break;
++	}
++
++out:
++	if (!dentry->d_inode)
++		d_drop(dentry);
++
++	kfree(name);
++
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++static int unionfs_mknod(struct inode *dir, struct dentry *dentry, int mode,
++			 dev_t dev)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL, *whiteout_dentry = NULL;
++	struct dentry *hidden_parent_dentry = NULL;
++	int bindex = 0, bstart;
++	char *name = NULL;
++	int whiteout_unlinked = 0;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (dentry->d_inode &&
++	    !__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	bstart = dbstart(dentry);
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	/*
++	 * check if whiteout exists in this branch, i.e. lookup .wh.foo
++	 * first.
++	 */
++	name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	whiteout_dentry = lookup_one_len(name, hidden_dentry->d_parent,
++					 dentry->d_name.len + UNIONFS_WHLEN);
++	if (IS_ERR(whiteout_dentry)) {
++		err = PTR_ERR(whiteout_dentry);
++		goto out;
++	}
++
++	if (!whiteout_dentry->d_inode) {
++		dput(whiteout_dentry);
++		whiteout_dentry = NULL;
++	} else {
++		/* found .wh.foo, unlink it */
++		hidden_parent_dentry = lock_parent(whiteout_dentry);
++
++		/* found a.wh.foo entry, remove it then do vfs_mkdir */
++		if (!(err = is_robranch_super(dentry->d_sb, bstart)))
++			err = vfs_unlink(hidden_parent_dentry->d_inode,
++					 whiteout_dentry);
++		dput(whiteout_dentry);
++
++		unlock_dir(hidden_parent_dentry);
++
++		if (err) {
++			if (!IS_COPYUP_ERR(err))
++				goto out;
++
++			bstart--;
++		} else
++			whiteout_unlinked = 1;
++	}
++
++	for (bindex = bstart; bindex >= 0; bindex--) {
++		if (is_robranch_super(dentry->d_sb, bindex))
++			continue;
++
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry) {
++			hidden_dentry = create_parents(dir, dentry, bindex);
++			if (IS_ERR(hidden_dentry)) {
++				printk(KERN_DEBUG "unionfs: failed to create "
++				       "parents on %d, err = %ld\n",
++				       bindex, PTR_ERR(hidden_dentry));
++				continue;
++			}
++		}
++
++		hidden_parent_dentry = lock_parent(hidden_dentry);
++		if (IS_ERR(hidden_parent_dentry)) {
++			err = PTR_ERR(hidden_parent_dentry);
++			goto out;
++		}
++
++		err = vfs_mknod(hidden_parent_dentry->d_inode,
++				hidden_dentry, mode, dev);
++
++		if (err) {
++			unlock_dir(hidden_parent_dentry);
++			break;
++		}
++
++		err = unionfs_interpose(dentry, dir->i_sb, 0);
++		if (!err) {
++			fsstack_copy_attr_times(dir,
++						hidden_parent_dentry->d_inode);
++			fsstack_copy_inode_size(dir,
++						hidden_parent_dentry->d_inode);
++			/* update number of links on parent directory */
++			dir->i_nlink = unionfs_get_nlinks(dir);
++		}
++		unlock_dir(hidden_parent_dentry);
++
++		break;
++	}
++
++out:
++	if (!dentry->d_inode)
++		d_drop(dentry);
++
++	kfree(name);
++
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++static int unionfs_readlink(struct dentry *dentry, char __user *buf,
++			    int bufsiz)
++{
++	int err;
++	struct dentry *hidden_dentry;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	if (!hidden_dentry->d_inode->i_op ||
++	    !hidden_dentry->d_inode->i_op->readlink) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	err = hidden_dentry->d_inode->i_op->readlink(hidden_dentry,
++						     buf, bufsiz);
++	if (err > 0)
++		fsstack_copy_attr_atime(dentry->d_inode,
++					hidden_dentry->d_inode);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++/*
++ * Check if dentry is valid or not, as per our generation numbers.
++ * @dentry: dentry to check.
++ * Returns 1 (valid) or 0 (invalid/stale).
++ */
++static inline int is_valid_dentry(struct dentry *dentry)
++{
++	BUG_ON(!UNIONFS_D(dentry));
++	BUG_ON(!UNIONFS_SB(dentry->d_sb));
++	return (atomic_read(&UNIONFS_D(dentry)->generation) ==
++			atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
++}
++
++/* We don't lock the dentry here, because readlink does the heavy lifting. */
++static void *unionfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	char *buf;
++	int len = PAGE_SIZE, err;
++	mm_segment_t old_fs;
++
++	/*
++	 * FIXME: Really nasty...we can get called from two distinct places:
++	 * 1) read_link - locks the dentry
++	 * 2) VFS lookup code - does NOT lock the dentry
++	 *
++	 * The proper thing would be to call dentry revalidate. It however
++	 * expects a locked dentry, and we can't cleanly guarantee that.
++	 */
++	BUG_ON(!is_valid_dentry(dentry));
++
++	unionfs_read_lock(dentry->d_sb);
++
++	/* This is freed by the put_link method assuming a successful call. */
++	buf = kmalloc(len, GFP_KERNEL);
++	if (!buf) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	/* read the symlink, and then we will follow it */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	err = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
++	set_fs(old_fs);
++	if (err < 0) {
++		kfree(buf);
++		buf = NULL;
++		goto out;
++	}
++	buf[err] = 0;
++	nd_set_link(nd, buf);
++	err = 0;
++
++out:
++	unionfs_read_unlock(dentry->d_sb);
++	return ERR_PTR(err);
++}
++
++/* FIXME: We may not have to lock here */
++static void unionfs_put_link(struct dentry *dentry, struct nameidata *nd,
++			     void *cookie)
++{
++	unionfs_read_lock(dentry->d_sb);
++	kfree(nd_get_link(nd));
++	unionfs_read_unlock(dentry->d_sb);
++}
++
++/*
++ * Basically copied from the kernel vfs permission(), but we've changed
++ * the following:
++ *   (1) the IS_RDONLY check is skipped, and
++ *   (2) if you set the mount option `mode=nfsro', we assume that -EACCES
++ *   means that the export is read-only and we should check standard Unix
++ *   permissions.  This means that NFS ACL checks (or other advanced
++ *   permission features) are bypassed. Note however, that we do call
++ *   security_inode_permission, and therefore security inside SELinux, etc.
++ *   are performed.
++ */
++static int inode_permission(struct inode *inode, int mask,
++			    struct nameidata *nd, int bindex)
++{
++	int retval, submask;
++
++	if (mask & MAY_WRITE) {
++		/* The first branch is allowed to be really readonly. */
++		if (bindex == 0) {
++			umode_t mode = inode->i_mode;
++			if (IS_RDONLY(inode) &&
++			    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
++				return -EROFS;
++		}
++		/*
++		 * Nobody gets write access to an immutable file.
++		 */
++		if (IS_IMMUTABLE(inode))
++			return -EACCES;
++	}
++
++	/* Ordinary permission routines do not understand MAY_APPEND. */
++	submask = mask & ~MAY_APPEND;
++	if (inode->i_op && inode->i_op->permission) {
++		retval = inode->i_op->permission(inode, submask, nd);
++		if ((retval == -EACCES) && (submask & MAY_WRITE) &&
++		    (!strcmp("nfs", (inode)->i_sb->s_type->name)) &&
++		    (nd) && (nd->mnt) && (nd->mnt->mnt_sb)) {
++			int perms;
++			perms = branchperms(nd->mnt->mnt_sb, bindex);
++			if (perms & MAY_NFSRO)
++				retval = generic_permission(inode, submask,
++							    NULL);
++		}
++	} else
++		retval = generic_permission(inode, submask, NULL);
++
++	if (retval && retval != -EROFS)	/* ignore EROFS */
++		return retval;
++
++	retval = security_inode_permission(inode, mask, nd);
++	return ((retval == -EROFS) ? 0 : retval);	/* ignore EROFS */
++}
++
++static int unionfs_permission(struct inode *inode, int mask,
++			      struct nameidata *nd)
++{
++	struct inode *hidden_inode = NULL;
++	int err = 0;
++	int bindex, bstart, bend;
++	const int is_file = !S_ISDIR(inode->i_mode);
++	const int write_mask = (mask & MAY_WRITE) && !(mask & MAY_READ);
++
++	unionfs_read_lock(inode->i_sb);
++
++	bstart = ibstart(inode);
++	bend = ibend(inode);
++	if (bstart < 0 || bend < 0) {
++		/*
++		 * With branch-management, we can get a stale inode here.
++		 * If so, we return ESTALE back to link_path_walk, which
++		 * would discard the dcache entry and re-lookup the
++		 * dentry+inode.  This should be equivalent to issuing
++		 * __unionfs_d_revalidate_chain on nd.dentry here.
++		 */
++		err = -ESTALE;	/* force revalidate */
++		goto out;
++	}
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++		if (!hidden_inode)
++			continue;
++
++		/*
++		 * check the condition for D-F-D underlying files/directories,
++		 * we don't have to check for files, if we are checking for
++		 * directories.
++		 */
++		if (!is_file && !S_ISDIR(hidden_inode->i_mode))
++			continue;
++
++		/*
++		 * We use our own special version of permission, such that
++		 * only the first branch returns -EROFS.
++		 */
++		err = inode_permission(hidden_inode, mask, nd, bindex);
++
++		/*
++		 * The permissions are an intersection of the overall directory
++		 * permissions, so we fail if one fails.
++		 */
++		if (err)
++			goto out;
++
++		/* only the leftmost file matters. */
++		if (is_file || write_mask) {
++			if (is_file && write_mask) {
++				err = get_write_access(hidden_inode);
++				if (!err)
++					put_write_access(hidden_inode);
++			}
++			break;
++		}
++	}
++
++out:
++	unionfs_read_unlock(inode->i_sb);
++	return err;
++}
++
++static int unionfs_setattr(struct dentry *dentry, struct iattr *ia)
++{
++	int err = 0;
++	struct dentry *hidden_dentry;
++	struct inode *inode = NULL;
++	struct inode *hidden_inode = NULL;
++	int bstart, bend, bindex;
++	int i;
++	int copyup = 0;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++	inode = dentry->d_inode;
++
++	for (bindex = bstart; (bindex <= bend) || (bindex == bstart);
++	     bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++		BUG_ON(hidden_dentry->d_inode == NULL);
++
++		/* If the file is on a read only branch */
++		if (is_robranch_super(dentry->d_sb, bindex)
++		    || IS_RDONLY(hidden_dentry->d_inode)) {
++			if (copyup || (bindex != bstart))
++				continue;
++			/* Only if its the leftmost file, copyup the file */
++			for (i = bstart - 1; i >= 0; i--) {
++				loff_t size = dentry->d_inode->i_size;
++				if (ia->ia_valid & ATTR_SIZE)
++					size = ia->ia_size;
++				err = copyup_dentry(dentry->d_parent->d_inode,
++						    dentry, bstart, i, NULL,
++						    size);
++
++				if (!err) {
++					copyup = 1;
++					hidden_dentry =
++						unionfs_lower_dentry(dentry);
++					break;
++				}
++				/*
++				 * if error is in the leftmost branch, pass
++				 * it up.
++				 */
++				if (i == 0)
++					goto out;
++			}
++
++		}
++		err = notify_change(hidden_dentry, ia);
++		if (err)
++			goto out;
++		break;
++	}
++
++	/* for mmap */
++	if (ia->ia_valid & ATTR_SIZE) {
++		if (ia->ia_size != i_size_read(inode)) {
++			err = vmtruncate(inode, ia->ia_size);
++			if (err)
++				printk("unionfs_setattr: vmtruncate failed\n");
++		}
++	}
++
++	/* get the size from the first hidden inode */
++	hidden_inode = unionfs_lower_inode(dentry->d_inode);
++	fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
++	fsstack_copy_inode_size(inode, hidden_inode);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++struct inode_operations unionfs_symlink_iops = {
++	.readlink	= unionfs_readlink,
++	.permission	= unionfs_permission,
++	.follow_link	= unionfs_follow_link,
++	.setattr	= unionfs_setattr,
++	.put_link	= unionfs_put_link,
++};
++
++struct inode_operations unionfs_dir_iops = {
++	.create		= unionfs_create,
++	.lookup		= unionfs_lookup,
++	.link		= unionfs_link,
++	.unlink		= unionfs_unlink,
++	.symlink	= unionfs_symlink,
++	.mkdir		= unionfs_mkdir,
++	.rmdir		= unionfs_rmdir,
++	.mknod		= unionfs_mknod,
++	.rename		= unionfs_rename,
++	.permission	= unionfs_permission,
++	.setattr	= unionfs_setattr,
++#ifdef CONFIG_UNION_FS_XATTR
++	.setxattr	= unionfs_setxattr,
++	.getxattr	= unionfs_getxattr,
++	.removexattr	= unionfs_removexattr,
++	.listxattr	= unionfs_listxattr,
++#endif
++};
++
++struct inode_operations unionfs_main_iops = {
++	.permission	= unionfs_permission,
++	.setattr	= unionfs_setattr,
++#ifdef CONFIG_UNION_FS_XATTR
++	.setxattr	= unionfs_setxattr,
++	.getxattr	= unionfs_getxattr,
++	.removexattr	= unionfs_removexattr,
++	.listxattr	= unionfs_listxattr,
++#endif
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/lookup.c linux-2.6.22-try2/fs/unionfs/lookup.c
+--- linux-2.6.22-570/fs/unionfs/lookup.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/lookup.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,549 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* is the filename valid == !(whiteout for a file or opaque dir marker) */
++static int is_validname(const char *name)
++{
++	if (!strncmp(name, UNIONFS_WHPFX, UNIONFS_WHLEN))
++		return 0;
++	if (!strncmp(name, UNIONFS_DIR_OPAQUE_NAME,
++		     sizeof(UNIONFS_DIR_OPAQUE_NAME) - 1))
++		return 0;
++	return 1;
++}
++
++/* The rest of these are utility functions for lookup. */
++static noinline int is_opaque_dir(struct dentry *dentry, int bindex)
++{
++	int err = 0;
++	struct dentry *hidden_dentry;
++	struct dentry *wh_hidden_dentry;
++	struct inode *hidden_inode;
++	struct sioq_args args;
++
++	hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++	hidden_inode = hidden_dentry->d_inode;
++
++	BUG_ON(!S_ISDIR(hidden_inode->i_mode));
++
++	mutex_lock(&hidden_inode->i_mutex);
++
++	if (!permission(hidden_inode, MAY_EXEC, NULL))
++		wh_hidden_dentry =
++			lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry,
++				       sizeof(UNIONFS_DIR_OPAQUE) - 1);
++	else {
++		args.is_opaque.dentry = hidden_dentry;
++		run_sioq(__is_opaque_dir, &args);
++		wh_hidden_dentry = args.ret;
++	}
++
++	mutex_unlock(&hidden_inode->i_mutex);
++
++	if (IS_ERR(wh_hidden_dentry)) {
++		err = PTR_ERR(wh_hidden_dentry);
++		goto out;
++	}
++
++	/* This is an opaque dir iff wh_hidden_dentry is positive */
++	err = !!wh_hidden_dentry->d_inode;
++
++	dput(wh_hidden_dentry);
++out:
++	return err;
++}
++
++/* main (and complex) driver function for Unionfs's lookup */
++struct dentry *unionfs_lookup_backend(struct dentry *dentry,
++				      struct nameidata *nd, int lookupmode)
++{
++	int err = 0;
++	struct dentry *hidden_dentry = NULL;
++	struct dentry *wh_hidden_dentry = NULL;
++	struct dentry *hidden_dir_dentry = NULL;
++	struct dentry *parent_dentry = NULL;
++	int bindex, bstart, bend, bopaque;
++	int dentry_count = 0;	/* Number of positive dentries. */
++	int first_dentry_offset = -1; /* -1 is uninitialized */
++	struct dentry *first_dentry = NULL;
++	struct dentry *first_hidden_dentry = NULL;
++	struct vfsmount *first_hidden_mnt = NULL;
++	int locked_parent = 0;
++	int locked_child = 0;
++	int allocated_new_info = 0;
++
++	int opaque;
++	char *whname = NULL;
++	const char *name;
++	int namelen;
++
++	/*
++	 * We should already have a lock on this dentry in the case of a
++	 * partial lookup, or a revalidation. Otherwise it is returned from
++	 * new_dentry_private_data already locked.
++	 */
++	if (lookupmode == INTERPOSE_PARTIAL || lookupmode == INTERPOSE_REVAL ||
++	    lookupmode == INTERPOSE_REVAL_NEG)
++		verify_locked(dentry);
++	else {
++		BUG_ON(UNIONFS_D(dentry) != NULL);
++		locked_child = 1;
++	}
++
++	switch(lookupmode) {
++		case INTERPOSE_PARTIAL:
++			break;
++		case INTERPOSE_LOOKUP:
++			if ((err = new_dentry_private_data(dentry)))
++				goto out;
++			allocated_new_info = 1;
++			break;
++		default:
++			if ((err = realloc_dentry_private_data(dentry)))
++				goto out;
++			allocated_new_info = 1;
++			break;
++	}
++
++	/* must initialize dentry operations */
++	dentry->d_op = &unionfs_dops;
++
++	parent_dentry = dget_parent(dentry);
++	/* We never partial lookup the root directory. */
++	if (parent_dentry != dentry) {
++		unionfs_lock_dentry(parent_dentry);
++		locked_parent = 1;
++	} else {
++		dput(parent_dentry);
++		parent_dentry = NULL;
++		goto out;
++	}
++
++	name = dentry->d_name.name;
++	namelen = dentry->d_name.len;
++
++	/* No dentries should get created for possible whiteout names. */
++	if (!is_validname(name)) {
++		err = -EPERM;
++		goto out_free;
++	}
++
++	/* Now start the actual lookup procedure. */
++	bstart = dbstart(parent_dentry);
++	bend = dbend(parent_dentry);
++	bopaque = dbopaque(parent_dentry);
++	BUG_ON(bstart < 0);
++
++	/*
++	 * It would be ideal if we could convert partial lookups to only have
++	 * to do this work when they really need to.  It could probably improve
++	 * performance quite a bit, and maybe simplify the rest of the code.
++	 */
++	if (lookupmode == INTERPOSE_PARTIAL) {
++		bstart++;
++		if ((bopaque != -1) && (bopaque < bend))
++			bend = bopaque;
++	}
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (lookupmode == INTERPOSE_PARTIAL && hidden_dentry)
++			continue;
++		BUG_ON(hidden_dentry != NULL);
++
++		hidden_dir_dentry =
++			unionfs_lower_dentry_idx(parent_dentry, bindex);
++
++		/* if the parent hidden dentry does not exist skip this */
++		if (!(hidden_dir_dentry && hidden_dir_dentry->d_inode))
++			continue;
++
++		/* also skip it if the parent isn't a directory. */
++		if (!S_ISDIR(hidden_dir_dentry->d_inode->i_mode))
++			continue;
++
++		/* Reuse the whiteout name because its value doesn't change. */
++		if (!whname) {
++			whname = alloc_whname(name, namelen);
++			if (IS_ERR(whname)) {
++				err = PTR_ERR(whname);
++				goto out_free;
++			}
++		}
++
++		/* check if whiteout exists in this branch: lookup .wh.foo */
++		wh_hidden_dentry = lookup_one_len(whname, hidden_dir_dentry,
++						  namelen + UNIONFS_WHLEN);
++		if (IS_ERR(wh_hidden_dentry)) {
++			dput(first_hidden_dentry);
++			unionfs_mntput(first_dentry, first_dentry_offset);
++			err = PTR_ERR(wh_hidden_dentry);
++			goto out_free;
++		}
++
++		if (wh_hidden_dentry->d_inode) {
++			/* We found a whiteout so lets give up. */
++			if (S_ISREG(wh_hidden_dentry->d_inode->i_mode)) {
++				set_dbend(dentry, bindex);
++				set_dbopaque(dentry, bindex);
++				dput(wh_hidden_dentry);
++				break;
++			}
++			err = -EIO;
++			printk(KERN_NOTICE "unionfs: EIO: invalid whiteout "
++			       "entry type %d.\n",
++			       wh_hidden_dentry->d_inode->i_mode);
++			dput(wh_hidden_dentry);
++			dput(first_hidden_dentry);
++			unionfs_mntput(first_dentry, first_dentry_offset);
++			goto out_free;
++		}
++
++		dput(wh_hidden_dentry);
++		wh_hidden_dentry = NULL;
++
++		/* Now do regular lookup; lookup foo */
++		nd->dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		/* FIXME: fix following line for mount point crossing */
++		nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
++
++		hidden_dentry = lookup_one_len_nd(name, hidden_dir_dentry,
++						  namelen, nd);
++		if (IS_ERR(hidden_dentry)) {
++			dput(first_hidden_dentry);
++			unionfs_mntput(first_dentry, first_dentry_offset);
++			err = PTR_ERR(hidden_dentry);
++			goto out_free;
++		}
++
++		/*
++		 * Store the first negative dentry specially, because if they
++		 * are all negative we need this for future creates.
++		 */
++		if (!hidden_dentry->d_inode) {
++			if (!first_hidden_dentry && (dbstart(dentry) == -1)) {
++				first_hidden_dentry = hidden_dentry;
++				/*
++				 * FIXME: following line needs to be changed
++				 * to allow mount-point crossing
++				 */
++				first_dentry = parent_dentry;
++				first_hidden_mnt =
++					unionfs_mntget(parent_dentry, bindex);
++				first_dentry_offset = bindex;
++			} else
++				dput(hidden_dentry);
++
++			continue;
++		}
++
++		/* number of positive dentries */
++		dentry_count++;
++
++		/* store underlying dentry */
++		if (dbstart(dentry) == -1)
++			set_dbstart(dentry, bindex);
++		unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry);
++		/*
++		 * FIXME: the following line needs to get fixed to allow
++		 * mount-point crossing
++		 */
++		unionfs_set_lower_mnt_idx(dentry, bindex,
++					  unionfs_mntget(parent_dentry,
++							 bindex));
++		set_dbend(dentry, bindex);
++
++		/* update parent directory's atime with the bindex */
++		fsstack_copy_attr_atime(parent_dentry->d_inode,
++					hidden_dir_dentry->d_inode);
++
++		/* We terminate file lookups here. */
++		if (!S_ISDIR(hidden_dentry->d_inode->i_mode)) {
++			if (lookupmode == INTERPOSE_PARTIAL)
++				continue;
++			if (dentry_count == 1)
++				goto out_positive;
++			/* This can only happen with mixed D-*-F-* */
++			BUG_ON(!S_ISDIR(unionfs_lower_dentry(dentry)->
++					d_inode->i_mode));
++			continue;
++		}
++
++		opaque = is_opaque_dir(dentry, bindex);
++		if (opaque < 0) {
++			dput(first_hidden_dentry);
++			unionfs_mntput(first_dentry, first_dentry_offset);
++			err = opaque;
++			goto out_free;
++		} else if (opaque) {
++			set_dbend(dentry, bindex);
++			set_dbopaque(dentry, bindex);
++			break;
++		}
++	}
++
++	if (dentry_count)
++		goto out_positive;
++	else
++		goto out_negative;
++
++out_negative:
++	if (lookupmode == INTERPOSE_PARTIAL)
++		goto out;
++
++	/* If we've only got negative dentries, then use the leftmost one. */
++	if (lookupmode == INTERPOSE_REVAL) {
++		if (dentry->d_inode)
++			UNIONFS_I(dentry->d_inode)->stale = 1;
++
++		goto out;
++	}
++	/* This should only happen if we found a whiteout. */
++	if (first_dentry_offset == -1) {
++		nd->dentry = dentry;
++		/* FIXME: fix following line for mount point crossing */
++		nd->mnt = unionfs_lower_mnt_idx(parent_dentry, bindex);
++
++		first_hidden_dentry =
++			lookup_one_len_nd(name, hidden_dir_dentry,
++					  namelen, nd);
++		first_dentry_offset = bindex;
++		if (IS_ERR(first_hidden_dentry)) {
++			err = PTR_ERR(first_hidden_dentry);
++			goto out;
++		}
++
++		/*
++		 * FIXME: the following line needs to be changed to allow
++		 * mount-point crossing
++		 */
++		first_dentry = dentry;
++		first_hidden_mnt = unionfs_mntget(dentry, bindex);
++	}
++	unionfs_set_lower_dentry_idx(dentry, first_dentry_offset,
++				     first_hidden_dentry);
++	unionfs_set_lower_mnt_idx(dentry, first_dentry_offset,
++				  first_hidden_mnt);
++	set_dbstart(dentry, first_dentry_offset);
++	set_dbend(dentry, first_dentry_offset);
++
++	if (lookupmode == INTERPOSE_REVAL_NEG)
++		BUG_ON(dentry->d_inode != NULL);
++	else
++		d_add(dentry, NULL);
++	goto out;
++
++/* This part of the code is for positive dentries. */
++out_positive:
++	BUG_ON(dentry_count <= 0);
++
++	/*
++	 * If we're holding onto the first negative dentry & corresponding
++	 * vfsmount - throw it out.
++	 */
++	dput(first_hidden_dentry);
++	unionfs_mntput(first_dentry, first_dentry_offset);
++
++	/* Partial lookups need to re-interpose, or throw away older negs. */
++	if (lookupmode == INTERPOSE_PARTIAL) {
++		if (dentry->d_inode) {
++			unionfs_reinterpose(dentry);
++			goto out;
++		}
++
++		/*
++		 * This somehow turned positive, so it is as if we had a
++		 * negative revalidation.
++		 */
++		lookupmode = INTERPOSE_REVAL_NEG;
++
++		update_bstart(dentry);
++		bstart = dbstart(dentry);
++		bend = dbend(dentry);
++	}
++
++	err = unionfs_interpose(dentry, dentry->d_sb, lookupmode);
++	if (err)
++		goto out_drop;
++
++	goto out;
++
++out_drop:
++	d_drop(dentry);
++
++out_free:
++	/* should dput all the underlying dentries on error condition */
++	bstart = dbstart(dentry);
++	if (bstart >= 0) {
++		bend = dbend(dentry);
++		for (bindex = bstart; bindex <= bend; bindex++) {
++			dput(unionfs_lower_dentry_idx(dentry, bindex));
++			unionfs_mntput(dentry, bindex);
++		}
++	}
++	kfree(UNIONFS_D(dentry)->lower_paths);
++	UNIONFS_D(dentry)->lower_paths = NULL;
++	set_dbstart(dentry, -1);
++	set_dbend(dentry, -1);
++
++out:
++	if (!err && UNIONFS_D(dentry)) {
++		BUG_ON(dbend(dentry) > UNIONFS_D(dentry)->bcount);
++		BUG_ON(dbend(dentry) > sbmax(dentry->d_sb));
++		BUG_ON(dbstart(dentry) < 0);
++	}
++	kfree(whname);
++	if (locked_parent)
++		unionfs_unlock_dentry(parent_dentry);
++	dput(parent_dentry);
++	if (locked_child || (err && allocated_new_info))
++		unionfs_unlock_dentry(dentry);
++	return ERR_PTR(err);
++}
++
++/* This is a utility function that fills in a unionfs dentry */
++int unionfs_partial_lookup(struct dentry *dentry)
++{
++	struct dentry *tmp;
++	struct nameidata nd = { .flags = 0 };
++
++	tmp = unionfs_lookup_backend(dentry, &nd, INTERPOSE_PARTIAL);
++	if (!tmp)
++		return 0;
++	if (IS_ERR(tmp))
++		return PTR_ERR(tmp);
++	/* need to change the interface */
++	BUG_ON(tmp != dentry);
++	return -ENOSYS;
++}
++
++/* The dentry cache is just so we have properly sized dentries. */
++static struct kmem_cache *unionfs_dentry_cachep;
++int unionfs_init_dentry_cache(void)
++{
++	unionfs_dentry_cachep =
++		kmem_cache_create("unionfs_dentry",
++				  sizeof(struct unionfs_dentry_info),
++				  0, SLAB_RECLAIM_ACCOUNT, NULL, NULL);
++
++	return (unionfs_dentry_cachep ? 0 : -ENOMEM);
++}
++
++void unionfs_destroy_dentry_cache(void)
++{
++	if (unionfs_dentry_cachep)
++		kmem_cache_destroy(unionfs_dentry_cachep);
++}
++
++void free_dentry_private_data(struct dentry *dentry)
++{
++	if (!dentry || !dentry->d_fsdata)
++		return;
++	kmem_cache_free(unionfs_dentry_cachep, dentry->d_fsdata);
++	dentry->d_fsdata = NULL;
++}
++
++static inline int __realloc_dentry_private_data(struct dentry *dentry)
++{
++	struct unionfs_dentry_info *info = UNIONFS_D(dentry);
++	void *p;
++	int size;
++
++	BUG_ON(!info);
++
++	size = sizeof(struct path) * sbmax(dentry->d_sb);
++	p = krealloc(info->lower_paths, size, GFP_ATOMIC);
++	if (!p)
++		return -ENOMEM;
++
++	info->lower_paths = p;
++
++	info->bstart = -1;
++	info->bend = -1;
++	info->bopaque = -1;
++	info->bcount = sbmax(dentry->d_sb);
++	atomic_set(&info->generation,
++			atomic_read(&UNIONFS_SB(dentry->d_sb)->generation));
++
++	memset(info->lower_paths, 0, size);
++
++	return 0;
++}
++
++/* UNIONFS_D(dentry)->lock must be locked */
++int realloc_dentry_private_data(struct dentry *dentry)
++{
++	if (!__realloc_dentry_private_data(dentry))
++		return 0;
++
++	kfree(UNIONFS_D(dentry)->lower_paths);
++	free_dentry_private_data(dentry);
++	return -ENOMEM;
++}
++
++/* allocate new dentry private data */
++int new_dentry_private_data(struct dentry *dentry)
++{
++	struct unionfs_dentry_info *info = UNIONFS_D(dentry);
++
++	BUG_ON(info);
++
++	info = kmem_cache_alloc(unionfs_dentry_cachep, GFP_ATOMIC);
++	if (!info)
++		return -ENOMEM;
++
++	mutex_init(&info->lock);
++	mutex_lock(&info->lock);
++
++	info->lower_paths = NULL;
++
++	dentry->d_fsdata = info;
++
++	if (!__realloc_dentry_private_data(dentry))
++		return 0;
++
++	mutex_unlock(&info->lock);
++	free_dentry_private_data(dentry);
++	return -ENOMEM;
++}
++
++/*
++ * scan through the lower dentry objects, and set bstart to reflect the
++ * starting branch
++ */
++void update_bstart(struct dentry *dentry)
++{
++	int bindex;
++	int bstart = dbstart(dentry);
++	int bend = dbend(dentry);
++	struct dentry *hidden_dentry;
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++		if (hidden_dentry->d_inode) {
++			set_dbstart(dentry, bindex);
++			break;
++		}
++		dput(hidden_dentry);
++		unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++	}
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/main.c linux-2.6.22-try2/fs/unionfs/main.c
+--- linux-2.6.22-570/fs/unionfs/main.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/main.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,729 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++
++/*
++ * Connect a unionfs inode dentry/inode with several lower ones.  This is
++ * the classic stackable file system "vnode interposition" action.
++ *
++ * @sb: unionfs's super_block
++ */
++int unionfs_interpose(struct dentry *dentry, struct super_block *sb, int flag)
++{
++	struct inode *hidden_inode;
++	struct dentry *hidden_dentry;
++	int err = 0;
++	struct inode *inode;
++	int is_negative_dentry = 1;
++	int bindex, bstart, bend;
++
++	verify_locked(dentry);
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++
++	/* Make sure that we didn't get a negative dentry. */
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		if (unionfs_lower_dentry_idx(dentry, bindex) &&
++		    unionfs_lower_dentry_idx(dentry, bindex)->d_inode) {
++			is_negative_dentry = 0;
++			break;
++		}
++	}
++	BUG_ON(is_negative_dentry);
++
++	/*
++	 * We allocate our new inode below, by calling iget.
++	 * iget will call our read_inode which will initialize some
++	 * of the new inode's fields
++	 */
++
++	/*
++	 * On revalidate we've already got our own inode and just need
++	 * to fix it up.
++	 */
++	if (flag == INTERPOSE_REVAL) {
++		inode = dentry->d_inode;
++		UNIONFS_I(inode)->bstart = -1;
++		UNIONFS_I(inode)->bend = -1;
++		atomic_set(&UNIONFS_I(inode)->generation,
++			   atomic_read(&UNIONFS_SB(sb)->generation));
++
++		UNIONFS_I(inode)->lower_inodes =
++			kcalloc(sbmax(sb), sizeof(struct inode *), GFP_KERNEL);
++		if (!UNIONFS_I(inode)->lower_inodes) {
++			err = -ENOMEM;
++			goto out;
++		}
++	} else {
++		/* get unique inode number for unionfs */
++		inode = iget(sb, iunique(sb, UNIONFS_ROOT_INO));
++		if (!inode) {
++			err = -EACCES;
++			goto out;
++		}
++
++		if (atomic_read(&inode->i_count) > 1)
++			goto skip;
++	}
++
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry) {
++			unionfs_set_lower_inode_idx(inode, bindex, NULL);
++			continue;
++		}
++
++		/* Initialize the hidden inode to the new hidden inode. */
++		if (!hidden_dentry->d_inode)
++			continue;
++
++		unionfs_set_lower_inode_idx(inode, bindex,
++					    igrab(hidden_dentry->d_inode));
++	}
++
++	ibstart(inode) = dbstart(dentry);
++	ibend(inode) = dbend(dentry);
++
++	/* Use attributes from the first branch. */
++	hidden_inode = unionfs_lower_inode(inode);
++
++	/* Use different set of inode ops for symlinks & directories */
++	if (S_ISLNK(hidden_inode->i_mode))
++		inode->i_op = &unionfs_symlink_iops;
++	else if (S_ISDIR(hidden_inode->i_mode))
++		inode->i_op = &unionfs_dir_iops;
++
++	/* Use different set of file ops for directories */
++	if (S_ISDIR(hidden_inode->i_mode))
++		inode->i_fop = &unionfs_dir_fops;
++
++	/* properly initialize special inodes */
++	if (S_ISBLK(hidden_inode->i_mode) || S_ISCHR(hidden_inode->i_mode) ||
++	    S_ISFIFO(hidden_inode->i_mode) || S_ISSOCK(hidden_inode->i_mode))
++		init_special_inode(inode, hidden_inode->i_mode,
++				   hidden_inode->i_rdev);
++
++	/* all well, copy inode attributes */
++	fsstack_copy_attr_all(inode, hidden_inode, unionfs_get_nlinks);
++	fsstack_copy_inode_size(inode, hidden_inode);
++
++skip:
++	/* only (our) lookup wants to do a d_add */
++	switch (flag) {
++	case INTERPOSE_DEFAULT:
++	case INTERPOSE_REVAL_NEG:
++		d_instantiate(dentry, inode);
++		break;
++	case INTERPOSE_LOOKUP:
++		err = PTR_ERR(d_splice_alias(inode, dentry));
++		break;
++	case INTERPOSE_REVAL:
++		/* Do nothing. */
++		break;
++	default:
++		printk(KERN_ERR "unionfs: invalid interpose flag passed!");
++		BUG();
++	}
++
++out:
++	return err;
++}
++
++/* like interpose above, but for an already existing dentry */
++void unionfs_reinterpose(struct dentry *dentry)
++{
++	struct dentry *hidden_dentry;
++	struct inode *inode;
++	int bindex, bstart, bend;
++
++	verify_locked(dentry);
++
++	/* This is pre-allocated inode */
++	inode = dentry->d_inode;
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++		if (!hidden_dentry)
++			continue;
++
++		if (!hidden_dentry->d_inode)
++			continue;
++		if (unionfs_lower_inode_idx(inode, bindex))
++			continue;
++		unionfs_set_lower_inode_idx(inode, bindex,
++					    igrab(hidden_dentry->d_inode));
++	}
++	ibstart(inode) = dbstart(dentry);
++	ibend(inode) = dbend(dentry);
++}
++
++/*
++ * make sure the branch we just looked up (nd) makes sense:
++ *
++ * 1) we're not trying to stack unionfs on top of unionfs
++ * 2) it exists
++ * 3) is a directory
++ */
++int check_branch(struct nameidata *nd)
++{
++	if (!strcmp(nd->dentry->d_sb->s_type->name, "unionfs"))
++		return -EINVAL;
++	if (!nd->dentry->d_inode)
++		return -ENOENT;
++	if (!S_ISDIR(nd->dentry->d_inode->i_mode))
++		return -ENOTDIR;
++	return 0;
++}
++
++/* checks if two hidden_dentries have overlapping branches */
++static int is_branch_overlap(struct dentry *dent1, struct dentry *dent2)
++{
++	struct dentry *dent = NULL;
++
++	dent = dent1;
++	while ((dent != dent2) && (dent->d_parent != dent))
++		dent = dent->d_parent;
++
++	if (dent == dent2)
++		return 1;
++
++	dent = dent2;
++	while ((dent != dent1) && (dent->d_parent != dent))
++		dent = dent->d_parent;
++
++	return (dent == dent1);
++}
++
++/*
++ * Parse branch mode helper function
++ */
++int __parse_branch_mode(const char *name)
++{
++	if (!name)
++		return 0;
++	if (!strcmp(name, "ro"))
++		return MAY_READ;
++	if (!strcmp(name, "rw"))
++		return (MAY_READ | MAY_WRITE);
++	return 0;
++}
++
++/*
++ * Parse "ro" or "rw" options, but default to "rw" of no mode options
++ * was specified.
++ */
++int parse_branch_mode(const char *name)
++{
++	int perms =  __parse_branch_mode(name);
++
++	if (perms == 0)
++		perms = MAY_READ | MAY_WRITE;
++	return perms;
++}
++
++/* 
++ * parse the dirs= mount argument
++ *
++ * We don't need to lock the superblock private data's rwsem, as we get
++ * called only by unionfs_read_super - it is still a long time before anyone
++ * can even get a reference to us.
++ */
++static int parse_dirs_option(struct super_block *sb, struct unionfs_dentry_info
++			     *hidden_root_info, char *options)
++{
++	struct nameidata nd;
++	char *name;
++	int err = 0;
++	int branches = 1;
++	int bindex = 0;
++	int i = 0;
++	int j = 0;
++
++	struct dentry *dent1;
++	struct dentry *dent2;
++
++	if (options[0] == '\0') {
++		printk(KERN_WARNING "unionfs: no branches specified\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	/*
++	 * Each colon means we have a separator, this is really just a rough
++	 * guess, since strsep will handle empty fields for us.
++	 */
++	for (i = 0; options[i]; i++)
++		if (options[i] == ':')
++			branches++;
++
++	/* allocate space for underlying pointers to hidden dentry */
++	UNIONFS_SB(sb)->data =
++		kcalloc(branches, sizeof(struct unionfs_data), GFP_KERNEL);
++	if (!UNIONFS_SB(sb)->data) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	hidden_root_info->lower_paths =
++		kcalloc(branches, sizeof(struct path), GFP_KERNEL);
++	if (!hidden_root_info->lower_paths) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	/* now parsing a string such as "b1:b2=rw:b3=ro:b4" */
++	branches = 0;
++	while ((name = strsep(&options, ":")) != NULL) {
++		int perms;
++		char *mode = strchr(name, '=');
++
++		if (!name || !*name)
++			continue;
++
++		branches++;
++
++		/* strip off '=' if any */
++		if (mode)
++			*mode++ = '\0';
++
++		perms = parse_branch_mode(mode);
++		if (!bindex && !(perms & MAY_WRITE)) {
++			err = -EINVAL;
++			goto out;
++		}
++
++		err = path_lookup(name, LOOKUP_FOLLOW, &nd);
++		if (err) {
++			printk(KERN_WARNING "unionfs: error accessing "
++			       "hidden directory '%s' (error %d)\n",
++			       name, err);
++			goto out;
++		}
++
++		if ((err = check_branch(&nd))) {
++			printk(KERN_WARNING "unionfs: hidden directory "
++			       "'%s' is not a valid branch\n", name);
++			path_release(&nd);
++			goto out;
++		}
++
++		hidden_root_info->lower_paths[bindex].dentry = nd.dentry;
++		hidden_root_info->lower_paths[bindex].mnt = nd.mnt;
++
++		set_branchperms(sb, bindex, perms);
++		set_branch_count(sb, bindex, 0);
++		new_branch_id(sb, bindex);
++
++		if (hidden_root_info->bstart < 0)
++			hidden_root_info->bstart = bindex;
++		hidden_root_info->bend = bindex;
++		bindex++;
++	}
++
++	if (branches == 0) {
++		printk(KERN_WARNING "unionfs: no branches specified\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	BUG_ON(branches != (hidden_root_info->bend + 1));
++
++	/*
++	 * Ensure that no overlaps exist in the branches.
++	 *
++	 * This test is required because the Linux kernel has no support
++	 * currently for ensuring coherency between stackable layers and
++	 * branches.  If we were to allow overlapping branches, it would be
++	 * possible, for example, to delete a file via one branch, which
++	 * would not be reflected in another branch.  Such incoherency could
++	 * lead to inconsistencies and even kernel oopses.  Rather than
++	 * implement hacks to work around some of these cache-coherency
++	 * problems, we prevent branch overlapping, for now.  A complete
++	 * solution will involve proper kernel/VFS support for cache
++	 * coherency, at which time we could safely remove this
++	 * branch-overlapping test.
++	 */
++	for (i = 0; i < branches; i++) {
++		for (j = i + 1; j < branches; j++) {
++			dent1 = hidden_root_info->lower_paths[i].dentry;
++			dent2 = hidden_root_info->lower_paths[j].dentry;
++
++			if (is_branch_overlap(dent1, dent2)) {
++				printk(KERN_WARNING "unionfs: branches %d and "
++				       "%d overlap\n", i, j);
++				err = -EINVAL;
++				goto out;
++			}
++		}
++	}
++
++out:
++	if (err) {
++		for (i = 0; i < branches; i++)
++			if (hidden_root_info->lower_paths[i].dentry) {
++				dput(hidden_root_info->lower_paths[i].dentry);
++				/* initialize: can't use unionfs_mntput here */
++				mntput(hidden_root_info->lower_paths[i].mnt);
++			}
++
++		kfree(hidden_root_info->lower_paths);
++		kfree(UNIONFS_SB(sb)->data);
++
++		/*
++		 * MUST clear the pointers to prevent potential double free if
++		 * the caller dies later on
++		 */
++		hidden_root_info->lower_paths = NULL;
++		UNIONFS_SB(sb)->data = NULL;
++	}
++	return err;
++}
++
++/*
++ * Parse mount options.  See the manual page for usage instructions.
++ *
++ * Returns the dentry object of the lower-level (hidden) directory;
++ * We want to mount our stackable file system on top of that hidden directory.
++ */
++static struct unionfs_dentry_info *unionfs_parse_options(
++					struct super_block *sb,
++					char *options)
++{
++	struct unionfs_dentry_info *hidden_root_info;
++	char *optname;
++	int err = 0;
++	int bindex;
++	int dirsfound = 0;
++
++	/* allocate private data area */
++	err = -ENOMEM;
++	hidden_root_info =
++		kzalloc(sizeof(struct unionfs_dentry_info), GFP_KERNEL);
++	if (!hidden_root_info)
++		goto out_error;
++	hidden_root_info->bstart = -1;
++	hidden_root_info->bend = -1;
++	hidden_root_info->bopaque = -1;
++
++	while ((optname = strsep(&options, ",")) != NULL) {
++		char *optarg;
++		char *endptr;
++		int intval;
++
++		if (!optname || !*optname)
++			continue;
++
++		optarg = strchr(optname, '=');
++		if (optarg)
++			*optarg++ = '\0';
++
++		/*
++		 * All of our options take an argument now. Insert ones that
++		 * don't, above this check.
++		 */
++		if (!optarg) {
++			printk("unionfs: %s requires an argument.\n", optname);
++			err = -EINVAL;
++			goto out_error;
++		}
++
++		if (!strcmp("dirs", optname)) {
++			if (++dirsfound > 1) {
++				printk(KERN_WARNING
++				       "unionfs: multiple dirs specified\n");
++				err = -EINVAL;
++				goto out_error;
++			}
++			err = parse_dirs_option(sb, hidden_root_info, optarg);
++			if (err)
++				goto out_error;
++			continue;
++		}
++
++		/* All of these options require an integer argument. */
++		intval = simple_strtoul(optarg, &endptr, 0);
++		if (*endptr) {
++			printk(KERN_WARNING
++			       "unionfs: invalid %s option '%s'\n",
++			       optname, optarg);
++			err = -EINVAL;
++			goto out_error;
++		}
++
++		err = -EINVAL;
++		printk(KERN_WARNING
++		       "unionfs: unrecognized option '%s'\n", optname);
++		goto out_error;
++	}
++	if (dirsfound != 1) {
++		printk(KERN_WARNING "unionfs: dirs option required\n");
++		err = -EINVAL;
++		goto out_error;
++	}
++	goto out;
++
++out_error:
++	if (hidden_root_info && hidden_root_info->lower_paths) {
++		for (bindex = hidden_root_info->bstart;
++		     bindex >= 0 && bindex <= hidden_root_info->bend;
++		     bindex++) {
++			struct dentry *d;
++			struct vfsmount *m;
++
++			d = hidden_root_info->lower_paths[bindex].dentry;
++			m = hidden_root_info->lower_paths[bindex].mnt;
++
++			dput(d);
++			/* initializing: can't use unionfs_mntput here */
++			mntput(m);
++		}
++	}
++
++	kfree(hidden_root_info->lower_paths);
++	kfree(hidden_root_info);
++
++	kfree(UNIONFS_SB(sb)->data);
++	UNIONFS_SB(sb)->data = NULL;
++
++	hidden_root_info = ERR_PTR(err);
++out:
++	return hidden_root_info;
++}
++
++/*
++ * our custom d_alloc_root work-alike
++ *
++ * we can't use d_alloc_root if we want to use our own interpose function
++ * unchanged, so we simply call our own "fake" d_alloc_root
++ */
++static struct dentry *unionfs_d_alloc_root(struct super_block *sb)
++{
++	struct dentry *ret = NULL;
++
++	if (sb) {
++		static const struct qstr name = {.name = "/",.len = 1 };
++
++		ret = d_alloc(NULL, &name);
++		if (ret) {
++			ret->d_op = &unionfs_dops;
++			ret->d_sb = sb;
++			ret->d_parent = ret;
++		}
++	}
++	return ret;
++}
++
++/*
++ * There is no need to lock the unionfs_super_info's rwsem as there is no
++ * way anyone can have a reference to the superblock at this point in time.
++ */
++static int unionfs_read_super(struct super_block *sb, void *raw_data,
++			      int silent)
++{
++	int err = 0;
++
++	struct unionfs_dentry_info *hidden_root_info = NULL;
++	int bindex, bstart, bend;
++
++	if (!raw_data) {
++		printk(KERN_WARNING
++		       "unionfs: read_super: missing data argument\n");
++		err = -EINVAL;
++		goto out;
++	}
++
++	/* Allocate superblock private data */
++	sb->s_fs_info = kzalloc(sizeof(struct unionfs_sb_info), GFP_KERNEL);
++	if (!UNIONFS_SB(sb)) {
++		printk(KERN_WARNING "unionfs: read_super: out of memory\n");
++		err = -ENOMEM;
++		goto out;
++	}
++
++	UNIONFS_SB(sb)->bend = -1;
++	atomic_set(&UNIONFS_SB(sb)->generation, 1);
++	init_rwsem(&UNIONFS_SB(sb)->rwsem);
++	UNIONFS_SB(sb)->high_branch_id = -1; /* -1 == invalid branch ID */
++
++	hidden_root_info = unionfs_parse_options(sb, raw_data);
++	if (IS_ERR(hidden_root_info)) {
++		printk(KERN_WARNING
++		       "unionfs: read_super: error while parsing options "
++		       "(err = %ld)\n", PTR_ERR(hidden_root_info));
++		err = PTR_ERR(hidden_root_info);
++		hidden_root_info = NULL;
++		goto out_free;
++	}
++	if (hidden_root_info->bstart == -1) {
++		err = -ENOENT;
++		goto out_free;
++	}
++
++	/* set the hidden superblock field of upper superblock */
++	bstart = hidden_root_info->bstart;
++	BUG_ON(bstart != 0);
++	sbend(sb) = bend = hidden_root_info->bend;
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		struct dentry *d = hidden_root_info->lower_paths[bindex].dentry;
++		unionfs_set_lower_super_idx(sb, bindex, d->d_sb);
++	}
++
++	/* max Bytes is the maximum bytes from highest priority branch */
++	sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
++
++	sb->s_op = &unionfs_sops;
++
++	/* See comment next to the definition of unionfs_d_alloc_root */
++	sb->s_root = unionfs_d_alloc_root(sb);
++	if (!sb->s_root) {
++		err = -ENOMEM;
++		goto out_dput;
++	}
++
++	/* link the upper and lower dentries */
++	sb->s_root->d_fsdata = NULL;
++	if ((err = new_dentry_private_data(sb->s_root)))
++		goto out_freedpd;
++
++	/* Set the hidden dentries for s_root */
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		struct dentry *d;
++		struct vfsmount *m;
++
++		d = hidden_root_info->lower_paths[bindex].dentry;
++		m = hidden_root_info->lower_paths[bindex].mnt;
++
++		unionfs_set_lower_dentry_idx(sb->s_root, bindex, d);
++		unionfs_set_lower_mnt_idx(sb->s_root, bindex, m);
++	}
++	set_dbstart(sb->s_root, bstart);
++	set_dbend(sb->s_root, bend);
++
++	/* Set the generation number to one, since this is for the mount. */
++	atomic_set(&UNIONFS_D(sb->s_root)->generation, 1);
++
++	/* call interpose to create the upper level inode */
++	err = unionfs_interpose(sb->s_root, sb, 0);
++	unionfs_unlock_dentry(sb->s_root);
++	if (!err)
++		goto out;
++	/* else fall through */
++
++out_freedpd:
++	if (UNIONFS_D(sb->s_root)) {
++		kfree(UNIONFS_D(sb->s_root)->lower_paths);
++		free_dentry_private_data(sb->s_root);
++	}
++	dput(sb->s_root);
++
++out_dput:
++	if (hidden_root_info && !IS_ERR(hidden_root_info)) {
++		for (bindex = hidden_root_info->bstart;
++		     bindex <= hidden_root_info->bend; bindex++) {
++			struct dentry *d;
++			struct vfsmount *m;
++
++			d = hidden_root_info->lower_paths[bindex].dentry;
++			m = hidden_root_info->lower_paths[bindex].mnt;
++
++			dput(d);
++			/* initializing: can't use unionfs_mntput here */
++			mntput(m);
++		}
++		kfree(hidden_root_info->lower_paths);
++		kfree(hidden_root_info);
++		hidden_root_info = NULL;
++	}
++
++out_free:
++	kfree(UNIONFS_SB(sb)->data);
++	kfree(UNIONFS_SB(sb));
++	sb->s_fs_info = NULL;
++
++out:
++	if (hidden_root_info && !IS_ERR(hidden_root_info)) {
++		kfree(hidden_root_info->lower_paths);
++		kfree(hidden_root_info);
++	}
++	return err;
++}
++
++static int unionfs_get_sb(struct file_system_type *fs_type,
++			  int flags, const char *dev_name,
++			  void *raw_data, struct vfsmount *mnt)
++{
++	return get_sb_nodev(fs_type, flags, raw_data, unionfs_read_super, mnt);
++}
++
++static struct file_system_type unionfs_fs_type = {
++	.owner		= THIS_MODULE,
++	.name		= "unionfs",
++	.get_sb		= unionfs_get_sb,
++	.kill_sb	= generic_shutdown_super,
++	.fs_flags	= FS_REVAL_DOT,
++};
++
++static int __init init_unionfs_fs(void)
++{
++	int err;
++
++	printk("Registering unionfs " UNIONFS_VERSION "\n");
++
++	if ((err = unionfs_init_filldir_cache()))
++		goto out;
++	if ((err = unionfs_init_inode_cache()))
++		goto out;
++	if ((err = unionfs_init_dentry_cache()))
++		goto out;
++	if ((err = init_sioq()))
++		goto out;
++	err = register_filesystem(&unionfs_fs_type);
++out:
++	if (err) {
++		stop_sioq();
++		unionfs_destroy_filldir_cache();
++		unionfs_destroy_inode_cache();
++		unionfs_destroy_dentry_cache();
++	}
++	return err;
++}
++
++static void __exit exit_unionfs_fs(void)
++{
++	stop_sioq();
++	unionfs_destroy_filldir_cache();
++	unionfs_destroy_inode_cache();
++	unionfs_destroy_dentry_cache();
++	unregister_filesystem(&unionfs_fs_type);
++	printk("Completed unionfs module unload.\n");
++}
++
++MODULE_AUTHOR("Erez Zadok, Filesystems and Storage Lab, Stony Brook University"
++	      " (http://www.fsl.cs.sunysb.edu)");
++MODULE_DESCRIPTION("Unionfs " UNIONFS_VERSION
++		   " (http://unionfs.filesystems.org)");
++MODULE_LICENSE("GPL");
++
++module_init(init_unionfs_fs);
++module_exit(exit_unionfs_fs);
+diff -Nurb linux-2.6.22-570/fs/unionfs/mmap.c linux-2.6.22-try2/fs/unionfs/mmap.c
+--- linux-2.6.22-570/fs/unionfs/mmap.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/mmap.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,348 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2006      Shaya Potter
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of State University of New York
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Unionfs doesn't implement ->writepages, which is OK with the VFS and
++ * nkeeps our code simpler and smaller.  Nevertheless, somehow, our own
++ * ->writepage must be called so we can sync the upper pages with the lower
++ * pages: otherwise data changed at the upper layer won't get written to the
++ * lower layer.
++ *
++ * Some lower file systems (e.g., NFS) expect the VFS to call its writepages
++ * only, which in turn will call generic_writepages and invoke each of the
++ * lower file system's ->writepage.  NFS in particular uses the
++ * wbc->fs_private field in its nfs_writepage, which is set in its
++ * nfs_writepages.  So if we don't call the lower nfs_writepages first, then
++ * NFS's nfs_writepage will dereference a NULL wbc->fs_private and cause an
++ * OOPS.  If, however, we implement a unionfs_writepages and then we do call
++ * the lower nfs_writepages, then we "lose control" over the pages we're
++ * trying to write to the lower file system: we won't be writing our own
++ * new/modified data from the upper pages to the lower pages, and any
++ * mmap-based changes are lost.
++ *
++ * This is a fundamental cache-coherency problem in Linux.  The kernel isn't
++ * able to support such stacking abstractions cleanly.  One possible clean
++ * way would be that a lower file system's ->writepage method have some sort
++ * of a callback to validate if any upper pages for the same file+offset
++ * exist and have newer content in them.
++ *
++ * This whole NULL ptr dereference is triggered at the lower file system
++ * (NFS) because the wbc->for_writepages is set to 1.  Therefore, to avoid
++ * this NULL pointer dereference, we set this flag to 0 and restore it upon
++ * exit.  This probably means that we're slightly less efficient in writing
++ * pages out, doing them one at a time, but at least we avoid the oops until
++ * such day as Linux can better support address_space_ops in a stackable
++ * fashion.
++ */
++int unionfs_writepage(struct page *page, struct writeback_control *wbc)
++{
++	int err = -EIO;
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct page *lower_page;
++	char *kaddr, *lower_kaddr;
++	int saved_for_writepages = wbc->for_writepages;
++
++	inode = page->mapping->host;
++	lower_inode = unionfs_lower_inode(inode);
++
++	/* find lower page (returns a locked page) */
++	lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
++	if (!lower_page)
++		goto out;
++
++	/* get page address, and encode it */
++	kaddr = kmap(page);
++	lower_kaddr = kmap(lower_page);
++
++	memcpy(lower_kaddr, kaddr, PAGE_CACHE_SIZE);
++
++	kunmap(page);
++	kunmap(lower_page);
++
++	BUG_ON(!lower_inode->i_mapping->a_ops->writepage);
++
++	/* workaround for some lower file systems: see big comment on top */
++	if (wbc->for_writepages && !wbc->fs_private)
++		wbc->for_writepages = 0;
++
++	/* call lower writepage (expects locked page) */
++	err = lower_inode->i_mapping->a_ops->writepage(lower_page, wbc);
++	wbc->for_writepages = saved_for_writepages; /* restore value */
++
++	/*
++	 * update mtime and ctime of lower level file system
++	 * unionfs' mtime and ctime are updated by generic_file_write
++	 */
++	lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
++
++	page_cache_release(lower_page);	/* b/c grab_cache_page increased refcnt */
++
++	if (err)
++		ClearPageUptodate(page);
++	else
++		SetPageUptodate(page);
++
++out:
++	unlock_page(page);
++	return err;
++}
++
++/*
++ * readpage is called from generic_page_read and the fault handler.
++ * If your file system uses generic_page_read for the read op, it
++ * must implement readpage.
++ *
++ * Readpage expects a locked page, and must unlock it.
++ */
++static int unionfs_do_readpage(struct file *file, struct page *page)
++{
++	int err = -EIO;
++	struct dentry *dentry;
++	struct file *lower_file = NULL;
++	struct inode *inode, *lower_inode;
++	char *page_data;
++	struct page *lower_page;
++	char *lower_page_data;
++
++	dentry = file->f_dentry;
++	if (UNIONFS_F(file) == NULL) {
++		err = -ENOENT;
++		goto out_err;
++	}
++
++	lower_file = unionfs_lower_file(file);
++	inode = dentry->d_inode;
++	lower_inode = unionfs_lower_inode(inode);
++
++	lower_page = NULL;
++
++	/* find lower page (returns a locked page) */
++	lower_page = read_cache_page(lower_inode->i_mapping,
++				     page->index,
++				     (filler_t *) lower_inode->i_mapping->
++				     a_ops->readpage, (void *)lower_file);
++
++	if (IS_ERR(lower_page)) {
++		err = PTR_ERR(lower_page);
++		lower_page = NULL;
++		goto out_release;
++	}
++
++	/*
++	 * wait for the page data to show up
++	 * (signaled by readpage as unlocking the page)
++	 */
++	wait_on_page_locked(lower_page);
++	if (!PageUptodate(lower_page)) {
++		/*
++		 * call readpage() again if we returned from wait_on_page
++		 * with a page that's not up-to-date; that can happen when a
++		 * partial page has a few buffers which are ok, but not the
++		 * whole page.
++		 */
++		lock_page(lower_page);
++		err = lower_inode->i_mapping->a_ops->readpage(lower_file,
++							      lower_page);
++		if (err) {
++			lower_page = NULL;
++			goto out_release;
++		}
++
++		wait_on_page_locked(lower_page);
++		if (!PageUptodate(lower_page)) {
++			err = -EIO;
++			goto out_release;
++		}
++	}
++
++	/* map pages, get their addresses */
++	page_data = (char *)kmap(page);
++	lower_page_data = (char *)kmap(lower_page);
++
++	memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
++
++	err = 0;
++
++	kunmap(lower_page);
++	kunmap(page);
++
++out_release:
++	if (lower_page)
++		page_cache_release(lower_page);	/* undo read_cache_page */
++
++	if (err == 0)
++		SetPageUptodate(page);
++	else
++		ClearPageUptodate(page);
++
++out_err:
++	return err;
++}
++
++int unionfs_readpage(struct file *file, struct page *page)
++{
++	int err;
++
++	unionfs_read_lock(file->f_dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 0)))
++		goto out;
++
++	err = unionfs_do_readpage(file, page);
++
++	if (!err)
++		touch_atime(unionfs_lower_mnt(file->f_path.dentry),
++			    unionfs_lower_dentry(file->f_path.dentry));
++
++	/*
++	 * we have to unlock our page, b/c we _might_ have gotten a locked
++	 * page.  but we no longer have to wakeup on our page here, b/c
++	 * UnlockPage does it
++	 */
++out:
++	unlock_page(page);
++	unionfs_read_unlock(file->f_dentry->d_sb);
++
++	return err;
++}
++
++int unionfs_prepare_write(struct file *file, struct page *page, unsigned from,
++			  unsigned to)
++{
++	int err;
++
++	unionfs_read_lock(file->f_dentry->d_sb);
++
++	err = unionfs_file_revalidate(file, 1);
++
++	unionfs_read_unlock(file->f_dentry->d_sb);
++
++	return err;
++}
++
++int unionfs_commit_write(struct file *file, struct page *page, unsigned from,
++			 unsigned to)
++{
++	int err = -ENOMEM;
++	struct inode *inode, *lower_inode;
++	struct file *lower_file = NULL;
++	loff_t pos;
++	unsigned bytes = to - from;
++	char *page_data = NULL;
++	mm_segment_t old_fs;
++
++	BUG_ON(file == NULL);
++
++	unionfs_read_lock(file->f_dentry->d_sb);
++
++	if ((err = unionfs_file_revalidate(file, 1)))
++		goto out;
++
++	inode = page->mapping->host;
++	lower_inode = unionfs_lower_inode(inode);
++
++	if (UNIONFS_F(file) != NULL)
++		lower_file = unionfs_lower_file(file);
++
++	/* FIXME: is this assertion right here? */
++	BUG_ON(lower_file == NULL);
++
++	page_data = (char *)kmap(page);
++	lower_file->f_pos = (page->index << PAGE_CACHE_SHIFT) + from;
++
++	/* SP: I use vfs_write instead of copying page data and the
++	 * prepare_write/commit_write combo because file system's like
++	 * GFS/OCFS2 don't like things touching those directly,
++	 * calling the underlying write op, while a little bit slower, will
++	 * call all the FS specific code as well
++	 */
++	old_fs = get_fs();
++	set_fs(KERNEL_DS);
++	err = vfs_write(lower_file, page_data + from, bytes,
++			&lower_file->f_pos);
++	set_fs(old_fs);
++
++	kunmap(page);
++
++	if (err < 0)
++		goto out;
++
++	inode->i_blocks = lower_inode->i_blocks;
++	/* we may have to update i_size */
++	pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
++	if (pos > i_size_read(inode))
++		i_size_write(inode, pos);
++
++	/*
++	 * update mtime and ctime of lower level file system
++	 * unionfs' mtime and ctime are updated by generic_file_write
++	 */
++	lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
++
++	mark_inode_dirty_sync(inode);
++
++out:
++	if (err < 0)
++		ClearPageUptodate(page);
++
++	unionfs_read_unlock(file->f_dentry->d_sb);
++	return err;		/* assume all is ok */
++}
++
++void unionfs_sync_page(struct page *page)
++{
++	struct inode *inode;
++	struct inode *lower_inode;
++	struct page *lower_page;
++	struct address_space *mapping;
++
++	inode = page->mapping->host;
++	lower_inode = unionfs_lower_inode(inode);
++
++	/* find lower page (returns a locked page) */
++	lower_page = grab_cache_page(lower_inode->i_mapping, page->index);
++	if (!lower_page)
++		goto out;
++
++	/* do the actual sync */
++	mapping = lower_page->mapping;
++	/*
++	 * XXX: can we optimize ala RAIF and set the lower page to be
++	 * discarded after a successful sync_page?
++	 */
++	if (mapping && mapping->a_ops && mapping->a_ops->sync_page)
++		mapping->a_ops->sync_page(lower_page);
++
++	unlock_page(lower_page);	/* b/c grab_cache_page locked it */
++	page_cache_release(lower_page);	/* b/c grab_cache_page increased refcnt */
++
++out:
++	return;
++}
++
++struct address_space_operations unionfs_aops = {
++	.writepage	= unionfs_writepage,
++	.readpage	= unionfs_readpage,
++	.prepare_write	= unionfs_prepare_write,
++	.commit_write	= unionfs_commit_write,
++	.sync_page	= unionfs_sync_page,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/rdstate.c linux-2.6.22-try2/fs/unionfs/rdstate.c
+--- linux-2.6.22-570/fs/unionfs/rdstate.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/rdstate.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,282 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* This file contains the routines for maintaining readdir state. */
++
++/*
++ * There are two structures here, rdstate which is a hash table
++ * of the second structure which is a filldir_node.
++ */
++
++/*
++ * This is a struct kmem_cache for filldir nodes, because we allocate a lot
++ * of them and they shouldn't waste memory.  If the node has a small name
++ * (as defined by the dentry structure), then we use an inline name to
++ * preserve kmalloc space.
++ */
++static struct kmem_cache *unionfs_filldir_cachep;
++
++int unionfs_init_filldir_cache(void)
++{
++	unionfs_filldir_cachep =
++		kmem_cache_create("unionfs_filldir",
++				  sizeof(struct filldir_node), 0,
++				  SLAB_RECLAIM_ACCOUNT, NULL, NULL);
++
++	return (unionfs_filldir_cachep ? 0 : -ENOMEM);
++}
++
++void unionfs_destroy_filldir_cache(void)
++{
++	if (unionfs_filldir_cachep)
++		kmem_cache_destroy(unionfs_filldir_cachep);
++}
++
++/*
++ * This is a tuning parameter that tells us roughly how big to make the
++ * hash table in directory entries per page.  This isn't perfect, but
++ * at least we get a hash table size that shouldn't be too overloaded.
++ * The following averages are based on my home directory.
++ * 14.44693	Overall
++ * 12.29	Single Page Directories
++ * 117.93	Multi-page directories
++ */
++#define DENTPAGE 4096
++#define DENTPERONEPAGE 12
++#define DENTPERPAGE 118
++#define MINHASHSIZE 1
++static int guesstimate_hash_size(struct inode *inode)
++{
++	struct inode *hidden_inode;
++	int bindex;
++	int hashsize = MINHASHSIZE;
++
++	if (UNIONFS_I(inode)->hashsize > 0)
++		return UNIONFS_I(inode)->hashsize;
++
++	for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
++		if (!(hidden_inode = unionfs_lower_inode_idx(inode, bindex)))
++			continue;
++
++		if (hidden_inode->i_size == DENTPAGE)
++			hashsize += DENTPERONEPAGE;
++		else
++			hashsize += (hidden_inode->i_size / DENTPAGE) *
++				DENTPERPAGE;
++	}
++
++	return hashsize;
++}
++
++int init_rdstate(struct file *file)
++{
++	BUG_ON(sizeof(loff_t) !=
++	       (sizeof(unsigned int) + sizeof(unsigned int)));
++	BUG_ON(UNIONFS_F(file)->rdstate != NULL);
++
++	UNIONFS_F(file)->rdstate = alloc_rdstate(file->f_dentry->d_inode,
++						 fbstart(file));
++
++	return (UNIONFS_F(file)->rdstate ? 0 : -ENOMEM);
++}
++
++struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos)
++{
++	struct unionfs_dir_state *rdstate = NULL;
++	struct list_head *pos;
++
++	spin_lock(&UNIONFS_I(inode)->rdlock);
++	list_for_each(pos, &UNIONFS_I(inode)->readdircache) {
++		struct unionfs_dir_state *r =
++			list_entry(pos, struct unionfs_dir_state, cache);
++		if (fpos == rdstate2offset(r)) {
++			UNIONFS_I(inode)->rdcount--;
++			list_del(&r->cache);
++			rdstate = r;
++			break;
++		}
++	}
++	spin_unlock(&UNIONFS_I(inode)->rdlock);
++	return rdstate;
++}
++
++struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex)
++{
++	int i = 0;
++	int hashsize;
++	unsigned long mallocsize = sizeof(struct unionfs_dir_state);
++	struct unionfs_dir_state *rdstate;
++
++	hashsize = guesstimate_hash_size(inode);
++	mallocsize += hashsize * sizeof(struct list_head);
++	mallocsize = __roundup_pow_of_two(mallocsize);
++
++	/* This should give us about 500 entries anyway. */
++	if (mallocsize > PAGE_SIZE)
++		mallocsize = PAGE_SIZE;
++
++	hashsize = (mallocsize - sizeof(struct unionfs_dir_state)) /
++		sizeof(struct list_head);
++
++	rdstate = kmalloc(mallocsize, GFP_KERNEL);
++	if (!rdstate)
++		return NULL;
++
++	spin_lock(&UNIONFS_I(inode)->rdlock);
++	if (UNIONFS_I(inode)->cookie >= (MAXRDCOOKIE - 1))
++		UNIONFS_I(inode)->cookie = 1;
++	else
++		UNIONFS_I(inode)->cookie++;
++
++	rdstate->cookie = UNIONFS_I(inode)->cookie;
++	spin_unlock(&UNIONFS_I(inode)->rdlock);
++	rdstate->offset = 1;
++	rdstate->access = jiffies;
++	rdstate->bindex = bindex;
++	rdstate->dirpos = 0;
++	rdstate->hashentries = 0;
++	rdstate->size = hashsize;
++	for (i = 0; i < rdstate->size; i++)
++		INIT_LIST_HEAD(&rdstate->list[i]);
++
++	return rdstate;
++}
++
++static void free_filldir_node(struct filldir_node *node)
++{
++	if (node->namelen >= DNAME_INLINE_LEN_MIN)
++		kfree(node->name);
++	kmem_cache_free(unionfs_filldir_cachep, node);
++}
++
++void free_rdstate(struct unionfs_dir_state *state)
++{
++	struct filldir_node *tmp;
++	int i;
++
++	for (i = 0; i < state->size; i++) {
++		struct list_head *head = &(state->list[i]);
++		struct list_head *pos, *n;
++
++		/* traverse the list and deallocate space */
++		list_for_each_safe(pos, n, head) {
++			tmp = list_entry(pos, struct filldir_node, file_list);
++			list_del(&tmp->file_list);
++			free_filldir_node(tmp);
++		}
++	}
++
++	kfree(state);
++}
++
++struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
++				       const char *name, int namelen)
++{
++	int index;
++	unsigned int hash;
++	struct list_head *head;
++	struct list_head *pos;
++	struct filldir_node *cursor = NULL;
++	int found = 0;
++
++	BUG_ON(namelen <= 0);
++
++	hash = full_name_hash(name, namelen);
++	index = hash % rdstate->size;
++
++	head = &(rdstate->list[index]);
++	list_for_each(pos, head) {
++		cursor = list_entry(pos, struct filldir_node, file_list);
++
++		if (cursor->namelen == namelen && cursor->hash == hash &&
++		    !strncmp(cursor->name, name, namelen)) {
++			/*
++			 * a duplicate exists, and hence no need to create
++			 * entry to the list
++			 */
++			found = 1;
++
++			/*
++			 * if the duplicate is in this branch, then the file
++			 * system is corrupted.
++			 */
++			if (cursor->bindex == rdstate->bindex) {
++				printk(KERN_DEBUG "unionfs: filldir: possible "
++				       "I/O error: a file is duplicated "
++				       "in the same branch %d: %s\n",
++				       rdstate->bindex, cursor->name);
++			}
++			break;
++		}
++	}
++
++	if (!found)
++		cursor = NULL;
++
++	return cursor;
++}
++
++int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
++		     int namelen, int bindex, int whiteout)
++{
++	struct filldir_node *new;
++	unsigned int hash;
++	int index;
++	int err = 0;
++	struct list_head *head;
++
++	BUG_ON(namelen <= 0);
++
++	hash = full_name_hash(name, namelen);
++	index = hash % rdstate->size;
++	head = &(rdstate->list[index]);
++
++	new = kmem_cache_alloc(unionfs_filldir_cachep, GFP_KERNEL);
++	if (!new) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	INIT_LIST_HEAD(&new->file_list);
++	new->namelen = namelen;
++	new->hash = hash;
++	new->bindex = bindex;
++	new->whiteout = whiteout;
++
++	if (namelen < DNAME_INLINE_LEN_MIN)
++		new->name = new->iname;
++	else {
++		new->name = kmalloc(namelen + 1, GFP_KERNEL);
++		if (!new->name) {
++			kmem_cache_free(unionfs_filldir_cachep, new);
++			new = NULL;
++			goto out;
++		}
++	}
++
++	memcpy(new->name, name, namelen);
++	new->name[namelen] = '\0';
++
++	rdstate->hashentries++;
++
++	list_add(&(new->file_list), head);
++out:
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/rename.c linux-2.6.22-try2/fs/unionfs/rename.c
+--- linux-2.6.22-570/fs/unionfs/rename.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/rename.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,477 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++static int __unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++			    struct inode *new_dir, struct dentry *new_dentry,
++			    int bindex, struct dentry **wh_old)
++{
++	int err = 0;
++	struct dentry *hidden_old_dentry;
++	struct dentry *hidden_new_dentry;
++	struct dentry *hidden_old_dir_dentry;
++	struct dentry *hidden_new_dir_dentry;
++	struct dentry *hidden_wh_dentry;
++	struct dentry *hidden_wh_dir_dentry;
++	char *wh_name = NULL;
++
++	hidden_new_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
++	hidden_old_dentry = unionfs_lower_dentry_idx(old_dentry, bindex);
++
++	if (!hidden_new_dentry) {
++		hidden_new_dentry =
++			create_parents(new_dentry->d_parent->d_inode,
++				       new_dentry, bindex);
++		if (IS_ERR(hidden_new_dentry)) {
++			printk(KERN_DEBUG "unionfs: error creating directory "
++			       "tree for rename, bindex = %d, err = %ld\n",
++			       bindex, PTR_ERR(hidden_new_dentry));
++			err = PTR_ERR(hidden_new_dentry);
++			goto out;
++		}
++	}
++
++	wh_name = alloc_whname(new_dentry->d_name.name,
++			       new_dentry->d_name.len);
++	if (IS_ERR(wh_name)) {
++		err = PTR_ERR(wh_name);
++		goto out;
++	}
++
++	hidden_wh_dentry = lookup_one_len(wh_name, hidden_new_dentry->d_parent,
++					  new_dentry->d_name.len +
++					  UNIONFS_WHLEN);
++	if (IS_ERR(hidden_wh_dentry)) {
++		err = PTR_ERR(hidden_wh_dentry);
++		goto out;
++	}
++
++	if (hidden_wh_dentry->d_inode) {
++		/* get rid of the whiteout that is existing */
++		if (hidden_new_dentry->d_inode) {
++			printk(KERN_WARNING "unionfs: both a whiteout and a "
++			       "dentry exist when doing a rename!\n");
++			err = -EIO;
++
++			dput(hidden_wh_dentry);
++			goto out;
++		}
++
++		hidden_wh_dir_dentry = lock_parent(hidden_wh_dentry);
++		if (!(err = is_robranch_super(old_dentry->d_sb, bindex)))
++			err = vfs_unlink(hidden_wh_dir_dentry->d_inode,
++					 hidden_wh_dentry);
++
++		dput(hidden_wh_dentry);
++		unlock_dir(hidden_wh_dir_dentry);
++		if (err)
++			goto out;
++	} else
++		dput(hidden_wh_dentry);
++
++	dget(hidden_old_dentry);
++	hidden_old_dir_dentry = dget_parent(hidden_old_dentry);
++	hidden_new_dir_dentry = dget_parent(hidden_new_dentry);
++
++	lock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry);
++
++	err = is_robranch_super(old_dentry->d_sb, bindex);
++	if (err)
++		goto out_unlock;
++
++	/*
++	 * ready to whiteout for old_dentry. caller will create the actual
++	 * whiteout, and must dput(*wh_old)
++	 */
++	if (wh_old) {
++		char *whname;
++		whname = alloc_whname(old_dentry->d_name.name,
++				      old_dentry->d_name.len);
++		err = PTR_ERR(whname);
++		if (IS_ERR(whname))
++			goto out_unlock;
++		*wh_old = lookup_one_len(whname, hidden_old_dir_dentry,
++					 old_dentry->d_name.len +
++					 UNIONFS_WHLEN);
++		kfree(whname);
++		err = PTR_ERR(*wh_old);
++		if (IS_ERR(*wh_old)) {
++			*wh_old = NULL;
++			goto out_unlock;
++		}
++	}
++
++	err = vfs_rename(hidden_old_dir_dentry->d_inode, hidden_old_dentry,
++			 hidden_new_dir_dentry->d_inode, hidden_new_dentry);
++
++out_unlock:
++	unlock_rename(hidden_old_dir_dentry, hidden_new_dir_dentry);
++
++	dput(hidden_old_dir_dentry);
++	dput(hidden_new_dir_dentry);
++	dput(hidden_old_dentry);
++
++out:
++	if (!err) {
++		/* Fixup the new_dentry. */
++		if (bindex < dbstart(new_dentry))
++			set_dbstart(new_dentry, bindex);
++		else if (bindex > dbend(new_dentry))
++			set_dbend(new_dentry, bindex);
++	}
++
++	kfree(wh_name);
++
++	return err;
++}
++
++/*
++ * Main rename code.  This is sufficienly complex, that it's documented in
++ * Docmentation/filesystems/unionfs/rename.txt.  This routine calls
++ * __unionfs_rename() above to perform some of the work.
++ */
++static int do_unionfs_rename(struct inode *old_dir,
++			     struct dentry *old_dentry,
++			     struct inode *new_dir,
++			     struct dentry *new_dentry)
++{
++	int err = 0;
++	int bindex, bwh_old;
++	int old_bstart, old_bend;
++	int new_bstart, new_bend;
++	int do_copyup = -1;
++	struct dentry *parent_dentry;
++	int local_err = 0;
++	int eio = 0;
++	int revert = 0;
++	struct dentry *wh_old = NULL;
++
++	old_bstart = dbstart(old_dentry);
++	bwh_old = old_bstart;
++	old_bend = dbend(old_dentry);
++	parent_dentry = old_dentry->d_parent;
++
++	new_bstart = dbstart(new_dentry);
++	new_bend = dbend(new_dentry);
++
++	/* Rename source to destination. */
++	err = __unionfs_rename(old_dir, old_dentry, new_dir, new_dentry,
++			       old_bstart, &wh_old);
++	if (err) {
++		if (!IS_COPYUP_ERR(err))
++			goto out;
++		do_copyup = old_bstart - 1;
++	} else
++		revert = 1;
++
++	/*
++	 * Unlink all instances of destination that exist to the left of
++	 * bstart of source. On error, revert back, goto out.
++	 */
++	for (bindex = old_bstart - 1; bindex >= new_bstart; bindex--) {
++		struct dentry *unlink_dentry;
++		struct dentry *unlink_dir_dentry;
++
++		unlink_dentry = unionfs_lower_dentry_idx(new_dentry, bindex);
++		if (!unlink_dentry)
++			continue;
++
++		unlink_dir_dentry = lock_parent(unlink_dentry);
++		if (!(err = is_robranch_super(old_dir->i_sb, bindex)))
++			err = vfs_unlink(unlink_dir_dentry->d_inode,
++					 unlink_dentry);
++
++		fsstack_copy_attr_times(new_dentry->d_parent->d_inode,
++					unlink_dir_dentry->d_inode);
++		/* propagate number of hard-links */
++		new_dentry->d_parent->d_inode->i_nlink =
++			unionfs_get_nlinks(new_dentry->d_parent->d_inode);
++
++		unlock_dir(unlink_dir_dentry);
++		if (!err) {
++			if (bindex != new_bstart) {
++				dput(unlink_dentry);
++				unionfs_set_lower_dentry_idx(new_dentry,
++							     bindex, NULL);
++			}
++		} else if (IS_COPYUP_ERR(err)) {
++			do_copyup = bindex - 1;
++		} else if (revert) {
++			dput(wh_old);
++			goto revert;
++		}
++	}
++
++	if (do_copyup != -1) {
++		for (bindex = do_copyup; bindex >= 0; bindex--) {
++			/*
++			 * copyup the file into some left directory, so that
++			 * you can rename it
++			 */
++			err = copyup_dentry(old_dentry->d_parent->d_inode,
++					    old_dentry, old_bstart, bindex,
++					    NULL, old_dentry->d_inode->i_size);
++			if (!err) {
++				dput(wh_old);
++				bwh_old = bindex;
++				err = __unionfs_rename(old_dir, old_dentry,
++						       new_dir, new_dentry,
++						       bindex, &wh_old);
++				break;
++			}
++		}
++	}
++
++	/* make it opaque */
++	if (S_ISDIR(old_dentry->d_inode->i_mode)) {
++		err = make_dir_opaque(old_dentry, dbstart(old_dentry));
++		if (err)
++			goto revert;
++	}
++
++	/*
++	 * Create whiteout for source, only if:
++	 * (1) There is more than one underlying instance of source.
++	 * (2) We did a copy_up
++	 */
++	if ((old_bstart != old_bend) || (do_copyup != -1)) {
++		struct dentry *hidden_parent;
++		BUG_ON(!wh_old || wh_old->d_inode || bwh_old < 0);
++		hidden_parent = lock_parent(wh_old);
++		local_err = vfs_create(hidden_parent->d_inode, wh_old, S_IRUGO,
++				       NULL);
++		unlock_dir(hidden_parent);
++		if (!local_err)
++			set_dbopaque(old_dentry, bwh_old);
++		else {
++			/*
++			 * we can't fix anything now, so we cop-out and use
++			 * -EIO.
++			 */
++			printk(KERN_ERR "unionfs: can't create a whiteout for "
++			       "the source in rename!\n");
++			err = -EIO;
++		}
++	}
++
++out:
++	dput(wh_old);
++	return err;
++
++revert:
++	/* Do revert here. */
++	local_err = unionfs_refresh_hidden_dentry(new_dentry, old_bstart);
++	if (local_err) {
++		printk(KERN_WARNING "unionfs: revert failed in rename: "
++		       "the new refresh failed.\n");
++		eio = -EIO;
++	}
++
++	local_err = unionfs_refresh_hidden_dentry(old_dentry, old_bstart);
++	if (local_err) {
++		printk(KERN_WARNING "unionfs: revert failed in rename: "
++		       "the old refresh failed.\n");
++		eio = -EIO;
++		goto revert_out;
++	}
++
++	if (!unionfs_lower_dentry_idx(new_dentry, bindex) ||
++	    !unionfs_lower_dentry_idx(new_dentry, bindex)->d_inode) {
++		printk(KERN_WARNING "unionfs: revert failed in rename: "
++		       "the object disappeared from under us!\n");
++		eio = -EIO;
++		goto revert_out;
++	}
++
++	if (unionfs_lower_dentry_idx(old_dentry, bindex) &&
++	    unionfs_lower_dentry_idx(old_dentry, bindex)->d_inode) {
++		printk(KERN_WARNING "unionfs: revert failed in rename: "
++		       "the object was created underneath us!\n");
++		eio = -EIO;
++		goto revert_out;
++	}
++
++	local_err = __unionfs_rename(new_dir, new_dentry,
++				     old_dir, old_dentry, old_bstart, NULL);
++
++	/* If we can't fix it, then we cop-out with -EIO. */
++	if (local_err) {
++		printk(KERN_WARNING "unionfs: revert failed in rename!\n");
++		eio = -EIO;
++	}
++
++	local_err = unionfs_refresh_hidden_dentry(new_dentry, bindex);
++	if (local_err)
++		eio = -EIO;
++	local_err = unionfs_refresh_hidden_dentry(old_dentry, bindex);
++	if (local_err)
++		eio = -EIO;
++
++revert_out:
++	if (eio)
++		err = eio;
++	return err;
++}
++
++static struct dentry *lookup_whiteout(struct dentry *dentry)
++{
++	char *whname;
++	int bindex = -1, bstart = -1, bend = -1;
++	struct dentry *parent, *hidden_parent, *wh_dentry;
++
++	whname = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(whname))
++		return (void *)whname;
++
++	parent = dget_parent(dentry);
++	unionfs_lock_dentry(parent);
++	bstart = dbstart(parent);
++	bend = dbend(parent);
++	wh_dentry = ERR_PTR(-ENOENT);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_parent = unionfs_lower_dentry_idx(parent, bindex);
++		if (!hidden_parent)
++			continue;
++		wh_dentry = lookup_one_len(whname, hidden_parent,
++					   dentry->d_name.len + UNIONFS_WHLEN);
++		if (IS_ERR(wh_dentry))
++			continue;
++		if (wh_dentry->d_inode)
++			break;
++		dput(wh_dentry);
++		wh_dentry = ERR_PTR(-ENOENT);
++	}
++	unionfs_unlock_dentry(parent);
++	dput(parent);
++	kfree(whname);
++	return wh_dentry;
++}
++
++/*
++ * We can't copyup a directory, because it may involve huge numbers of
++ * children, etc.  Doing that in the kernel would be bad, so instead we
++ * return EXDEV to the user-space utility that caused this, and let the
++ * user-space recurse and ask us to copy up each file separately.
++ */
++static int may_rename_dir(struct dentry *dentry)
++{
++	int err, bstart;
++
++	err = check_empty(dentry, NULL);
++	if (err == -ENOTEMPTY) {
++		if (is_robranch(dentry))
++			return -EXDEV;
++	} else if (err)
++		return err;
++
++	bstart = dbstart(dentry);
++	if (dbend(dentry) == bstart || dbopaque(dentry) == bstart)
++		return 0;
++
++	set_dbstart(dentry, bstart + 1);
++	err = check_empty(dentry, NULL);
++	set_dbstart(dentry, bstart);
++	if (err == -ENOTEMPTY)
++		err = -EXDEV;
++	return err;
++}
++
++int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++		   struct inode *new_dir, struct dentry *new_dentry)
++{
++	int err = 0;
++	struct dentry *wh_dentry;
++
++	unionfs_read_lock(old_dentry->d_sb);
++	unionfs_double_lock_dentry(old_dentry, new_dentry);
++
++	if (!__unionfs_d_revalidate_chain(old_dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++	if (!d_deleted(new_dentry) && new_dentry->d_inode &&
++	    !__unionfs_d_revalidate_chain(new_dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	if (!S_ISDIR(old_dentry->d_inode->i_mode))
++		err = unionfs_partial_lookup(old_dentry);
++	else
++		err = may_rename_dir(old_dentry);
++
++	if (err)
++		goto out;
++
++	err = unionfs_partial_lookup(new_dentry);
++	if (err)
++		goto out;
++
++	/*
++	 * if new_dentry is already hidden because of whiteout,
++	 * simply override it even if the whited-out dir is not empty.
++	 */
++	wh_dentry = lookup_whiteout(new_dentry);
++	if (!IS_ERR(wh_dentry))
++		dput(wh_dentry);
++	else if (new_dentry->d_inode) {
++		if (S_ISDIR(old_dentry->d_inode->i_mode) !=
++		    S_ISDIR(new_dentry->d_inode->i_mode)) {
++			err = S_ISDIR(old_dentry->d_inode->i_mode) ?
++				-ENOTDIR : -EISDIR;
++			goto out;
++		}
++
++		if (S_ISDIR(new_dentry->d_inode->i_mode)) {
++			struct unionfs_dir_state *namelist;
++			/* check if this unionfs directory is empty or not */
++			err = check_empty(new_dentry, &namelist);
++			if (err)
++				goto out;
++
++			if (!is_robranch(new_dentry))
++				err = delete_whiteouts(new_dentry,
++						       dbstart(new_dentry),
++						       namelist);
++
++			free_rdstate(namelist);
++
++			if (err)
++				goto out;
++		}
++	}
++	err = do_unionfs_rename(old_dir, old_dentry, new_dir, new_dentry);
++
++out:
++	if (err)
++		/* clear the new_dentry stuff created */
++		d_drop(new_dentry);
++	else
++		/*
++		 * force re-lookup since the dir on ro branch is not renamed,
++		 * and hidden dentries still indicate the un-renamed ones.
++		 */
++		if (S_ISDIR(old_dentry->d_inode->i_mode))
++			atomic_dec(&UNIONFS_D(old_dentry)->generation);
++
++	unionfs_unlock_dentry(new_dentry);
++	unionfs_unlock_dentry(old_dentry);
++	unionfs_read_unlock(old_dentry->d_sb);
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.c linux-2.6.22-try2/fs/unionfs/sioq.c
+--- linux-2.6.22-570/fs/unionfs/sioq.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/sioq.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Super-user IO work Queue - sometimes we need to perform actions which
++ * would fail due to the unix permissions on the parent directory (e.g.,
++ * rmdir a directory which appears empty, but in reality contains
++ * whiteouts).
++ */
++
++static struct workqueue_struct *superio_workqueue;
++
++int __init init_sioq(void)
++{
++	int err;
++
++	superio_workqueue = create_workqueue("unionfs_siod");
++	if (!IS_ERR(superio_workqueue))
++		return 0;
++
++	err = PTR_ERR(superio_workqueue);
++	printk(KERN_ERR "unionfs: create_workqueue failed %d\n", err);
++	superio_workqueue = NULL;
++	return err;
++}
++
++void stop_sioq(void)
++{
++	if (superio_workqueue)
++		destroy_workqueue(superio_workqueue);
++}
++
++void run_sioq(work_func_t func, struct sioq_args *args)
++{
++	INIT_WORK(&args->work, func);
++
++	init_completion(&args->comp);
++	while (!queue_work(superio_workqueue, &args->work)) {
++		/* TODO: do accounting if needed */
++		schedule();
++	}
++	wait_for_completion(&args->comp);
++}
++
++void __unionfs_create(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct create_args *c = &args->create;
++
++	args->err = vfs_create(c->parent, c->dentry, c->mode, c->nd);
++	complete(&args->comp);
++}
++
++void __unionfs_mkdir(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct mkdir_args *m = &args->mkdir;
++
++	args->err = vfs_mkdir(m->parent, m->dentry, m->mode);
++	complete(&args->comp);
++}
++
++void __unionfs_mknod(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct mknod_args *m = &args->mknod;
++
++	args->err = vfs_mknod(m->parent, m->dentry, m->mode, m->dev);
++	complete(&args->comp);
++}
++
++void __unionfs_symlink(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct symlink_args *s = &args->symlink;
++
++	args->err = vfs_symlink(s->parent, s->dentry, s->symbuf, s->mode);
++	complete(&args->comp);
++}
++
++void __unionfs_unlink(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct unlink_args *u = &args->unlink;
++
++	args->err = vfs_unlink(u->parent, u->dentry);
++	complete(&args->comp);
++}
++
++void __delete_whiteouts(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++	struct deletewh_args *d = &args->deletewh;
++
++	args->err = do_delete_whiteouts(d->dentry, d->bindex, d->namelist);
++	complete(&args->comp);
++}
++
++void __is_opaque_dir(struct work_struct *work)
++{
++	struct sioq_args *args = container_of(work, struct sioq_args, work);
++
++	args->ret = lookup_one_len(UNIONFS_DIR_OPAQUE, args->is_opaque.dentry,
++				   sizeof(UNIONFS_DIR_OPAQUE) - 1);
++	complete(&args->comp);
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/sioq.h linux-2.6.22-try2/fs/unionfs/sioq.h
+--- linux-2.6.22-570/fs/unionfs/sioq.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/sioq.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _SIOQ_H
++#define _SIOQ_H
++
++struct deletewh_args {
++	struct unionfs_dir_state *namelist;
++	struct dentry *dentry;
++	int bindex;
++};
++
++struct is_opaque_args {
++	struct dentry *dentry;
++};
++
++struct create_args {
++	struct inode *parent;
++	struct dentry *dentry;
++	umode_t mode;
++	struct nameidata *nd;
++};
++
++struct mkdir_args {
++	struct inode *parent;
++	struct dentry *dentry;
++	umode_t mode;
++};
++
++struct mknod_args {
++	struct inode *parent;
++	struct dentry *dentry;
++	umode_t mode;
++	dev_t dev;
++};
++
++struct symlink_args {
++	struct inode *parent;
++	struct dentry *dentry;
++	char *symbuf;
++	umode_t mode;
++};
++
++struct unlink_args {
++	struct inode *parent;
++	struct dentry *dentry;
++};
++
++
++struct sioq_args {
++	struct completion comp;
++	struct work_struct work;
++	int err;
++	void *ret;
++
++	union {
++		struct deletewh_args deletewh;
++		struct is_opaque_args is_opaque;
++		struct create_args create;
++		struct mkdir_args mkdir;
++		struct mknod_args mknod;
++		struct symlink_args symlink;
++		struct unlink_args unlink;
++	};
++};
++
++/* Extern definitions for SIOQ functions */
++extern int __init init_sioq(void);
++extern void stop_sioq(void);
++extern void run_sioq(work_func_t func, struct sioq_args *args);
++
++/* Extern definitions for our privilege escalation helpers */
++extern void __unionfs_create(struct work_struct *work);
++extern void __unionfs_mkdir(struct work_struct *work);
++extern void __unionfs_mknod(struct work_struct *work);
++extern void __unionfs_symlink(struct work_struct *work);
++extern void __unionfs_unlink(struct work_struct *work);
++extern void __delete_whiteouts(struct work_struct *work);
++extern void __is_opaque_dir(struct work_struct *work);
++
++#endif /* _SIOQ_H */
+diff -Nurb linux-2.6.22-570/fs/unionfs/subr.c linux-2.6.22-try2/fs/unionfs/subr.c
+--- linux-2.6.22-570/fs/unionfs/subr.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/subr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * Pass an unionfs dentry and an index.  It will try to create a whiteout
++ * for the filename in dentry, and will try in branch 'index'.  On error,
++ * it will proceed to a branch to the left.
++ */
++int create_whiteout(struct dentry *dentry, int start)
++{
++	int bstart, bend, bindex;
++	struct dentry *hidden_dir_dentry;
++	struct dentry *hidden_dentry;
++	struct dentry *hidden_wh_dentry;
++	char *name = NULL;
++	int err = -EINVAL;
++
++	verify_locked(dentry);
++
++	bstart = dbstart(dentry);
++	bend = dbend(dentry);
++
++	/* create dentry's whiteout equivalent */
++	name = alloc_whname(dentry->d_name.name, dentry->d_name.len);
++	if (IS_ERR(name)) {
++		err = PTR_ERR(name);
++		goto out;
++	}
++
++	for (bindex = start; bindex >= 0; bindex--) {
++		hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++
++		if (!hidden_dentry) {
++			/*
++			 * if hidden dentry is not present, create the
++			 * entire hidden dentry directory structure and go
++			 * ahead.  Since we want to just create whiteout, we
++			 * only want the parent dentry, and hence get rid of
++			 * this dentry.
++			 */
++			hidden_dentry = create_parents(dentry->d_inode,
++						       dentry, bindex);
++			if (!hidden_dentry || IS_ERR(hidden_dentry)) {
++				printk(KERN_DEBUG "unionfs: create_parents "
++				       "failed for bindex = %d\n", bindex);
++				continue;
++			}
++		}
++
++		hidden_wh_dentry =
++			lookup_one_len(name, hidden_dentry->d_parent,
++				       dentry->d_name.len + UNIONFS_WHLEN);
++		if (IS_ERR(hidden_wh_dentry))
++			continue;
++
++		/*
++		 * The whiteout already exists. This used to be impossible,
++		 * but now is possible because of opaqueness.
++		 */
++		if (hidden_wh_dentry->d_inode) {
++			dput(hidden_wh_dentry);
++			err = 0;
++			goto out;
++		}
++
++		hidden_dir_dentry = lock_parent(hidden_wh_dentry);
++		if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++			err = vfs_create(hidden_dir_dentry->d_inode,
++					 hidden_wh_dentry,
++					 ~current->fs->umask & S_IRWXUGO,
++					 NULL);
++		unlock_dir(hidden_dir_dentry);
++		dput(hidden_wh_dentry);
++
++		if (!err || !IS_COPYUP_ERR(err))
++			break;
++	}
++
++	/* set dbopaque so that lookup will not proceed after this branch */
++	if (!err)
++		set_dbopaque(dentry, bindex);
++
++out:
++	kfree(name);
++	return err;
++}
++
++/*
++ * This is a helper function for rename, which ends up with hosed over
++ * dentries when it needs to revert.
++ */
++int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex)
++{
++	struct dentry *hidden_dentry;
++	struct dentry *hidden_parent;
++	int err = 0;
++
++	verify_locked(dentry);
++
++	unionfs_lock_dentry(dentry->d_parent);
++	hidden_parent = unionfs_lower_dentry_idx(dentry->d_parent, bindex);
++	unionfs_unlock_dentry(dentry->d_parent);
++
++	BUG_ON(!S_ISDIR(hidden_parent->d_inode->i_mode));
++
++	hidden_dentry = lookup_one_len(dentry->d_name.name, hidden_parent,
++				       dentry->d_name.len);
++	if (IS_ERR(hidden_dentry)) {
++		err = PTR_ERR(hidden_dentry);
++		goto out;
++	}
++
++	dput(unionfs_lower_dentry_idx(dentry, bindex));
++	iput(unionfs_lower_inode_idx(dentry->d_inode, bindex));
++	unionfs_set_lower_inode_idx(dentry->d_inode, bindex, NULL);
++
++	if (!hidden_dentry->d_inode) {
++		dput(hidden_dentry);
++		unionfs_set_lower_dentry_idx(dentry, bindex, NULL);
++	} else {
++		unionfs_set_lower_dentry_idx(dentry, bindex, hidden_dentry);
++		unionfs_set_lower_inode_idx(dentry->d_inode, bindex,
++					    igrab(hidden_dentry->d_inode));
++	}
++
++out:
++	return err;
++}
++
++int make_dir_opaque(struct dentry *dentry, int bindex)
++{
++	int err = 0;
++	struct dentry *hidden_dentry, *diropq;
++	struct inode *hidden_dir;
++
++	hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++	hidden_dir = hidden_dentry->d_inode;
++	BUG_ON(!S_ISDIR(dentry->d_inode->i_mode) ||
++	       !S_ISDIR(hidden_dir->i_mode));
++
++	mutex_lock(&hidden_dir->i_mutex);
++	diropq = lookup_one_len(UNIONFS_DIR_OPAQUE, hidden_dentry,
++				sizeof(UNIONFS_DIR_OPAQUE) - 1);
++	if (IS_ERR(diropq)) {
++		err = PTR_ERR(diropq);
++		goto out;
++	}
++
++	if (!diropq->d_inode)
++		err = vfs_create(hidden_dir, diropq, S_IRUGO, NULL);
++	if (!err)
++		set_dbopaque(dentry, bindex);
++
++	dput(diropq);
++
++out:
++	mutex_unlock(&hidden_dir->i_mutex);
++	return err;
++}
++
++/*
++ * returns the sum of the n_link values of all the underlying inodes of the
++ * passed inode
++ */
++int unionfs_get_nlinks(struct inode *inode)
++{
++	int sum_nlinks = 0;
++	int dirs = 0;
++	int bindex;
++	struct inode *hidden_inode;
++
++	/* don't bother to do all the work since we're unlinked */
++	if (inode->i_nlink == 0)
++		return 0;
++
++	if (!S_ISDIR(inode->i_mode))
++		return unionfs_lower_inode(inode)->i_nlink;
++
++	for (bindex = ibstart(inode); bindex <= ibend(inode); bindex++) {
++		hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++
++		/* ignore files */
++		if (!hidden_inode || !S_ISDIR(hidden_inode->i_mode))
++			continue;
++
++		BUG_ON(hidden_inode->i_nlink < 0);
++
++		/* A deleted directory. */
++		if (hidden_inode->i_nlink == 0)
++			continue;
++		dirs++;
++
++		/*
++		 * A broken directory...
++		 *
++		 * Some filesystems don't properly set the number of links
++		 * on empty directories
++		 */
++		if (hidden_inode->i_nlink == 1)
++			sum_nlinks += 2;
++		else
++			sum_nlinks += (hidden_inode->i_nlink - 2);
++	}
++
++	return (!dirs ? 0 : sum_nlinks + 2);
++}
++
++/* construct whiteout filename */
++char *alloc_whname(const char *name, int len)
++{
++	char *buf;
++
++	buf = kmalloc(len + UNIONFS_WHLEN + 1, GFP_KERNEL);
++	if (!buf)
++		return ERR_PTR(-ENOMEM);
++
++	strcpy(buf, UNIONFS_WHPFX);
++	strlcat(buf, name, len + UNIONFS_WHLEN + 1);
++
++	return buf;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/super.c linux-2.6.22-try2/fs/unionfs/super.c
+--- linux-2.6.22-570/fs/unionfs/super.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/super.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,1002 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/*
++ * The inode cache is used with alloc_inode for both our inode info and the
++ * vfs inode.
++ */
++static struct kmem_cache *unionfs_inode_cachep;
++
++static void unionfs_read_inode(struct inode *inode)
++{
++	extern struct address_space_operations unionfs_aops;
++	int size;
++	struct unionfs_inode_info *info = UNIONFS_I(inode);
++
++	unionfs_read_lock(inode->i_sb);
++
++	memset(info, 0, offsetof(struct unionfs_inode_info, vfs_inode));
++	info->bstart = -1;
++	info->bend = -1;
++	atomic_set(&info->generation,
++		   atomic_read(&UNIONFS_SB(inode->i_sb)->generation));
++	spin_lock_init(&info->rdlock);
++	info->rdcount = 1;
++	info->hashsize = -1;
++	INIT_LIST_HEAD(&info->readdircache);
++
++	size = sbmax(inode->i_sb) * sizeof(struct inode *);
++	info->lower_inodes = kzalloc(size, GFP_KERNEL);
++	if (!info->lower_inodes) {
++		printk(KERN_ERR "unionfs: no kernel memory when allocating "
++		       "lower-pointer array!\n");
++		BUG();
++	}
++
++	inode->i_version++;
++	inode->i_op = &unionfs_main_iops;
++	inode->i_fop = &unionfs_main_fops;
++
++	inode->i_mapping->a_ops = &unionfs_aops;
++
++	unionfs_read_unlock(inode->i_sb);
++}
++
++/*
++ * we now define delete_inode, because there are two VFS paths that may
++ * destroy an inode: one of them calls clear inode before doing everything
++ * else that's needed, and the other is fine.  This way we truncate the inode
++ * size (and its pages) and then clear our own inode, which will do an iput
++ * on our and the lower inode.
++ *
++ * No need to lock sb info's rwsem.
++ */
++static void unionfs_delete_inode(struct inode *inode)
++{
++	inode->i_size = 0;	/* every f/s seems to do that */
++
++	if (inode->i_data.nrpages)
++		truncate_inode_pages(&inode->i_data, 0);
++
++	clear_inode(inode);
++}
++
++/*
++ * final actions when unmounting a file system
++ *
++ * No need to lock rwsem.
++ */
++static void unionfs_put_super(struct super_block *sb)
++{
++	int bindex, bstart, bend;
++	struct unionfs_sb_info *spd;
++	int leaks = 0;
++
++	spd = UNIONFS_SB(sb);
++	if (!spd)
++		return;
++
++	bstart = sbstart(sb);
++	bend = sbend(sb);
++
++	/* Make sure we have no leaks of branchget/branchput. */
++	for (bindex = bstart; bindex <= bend; bindex++)
++		if (branch_count(sb, bindex) != 0) {
++			printk("unionfs: branch %d has %d references left!\n",
++			       bindex, branch_count(sb, bindex));
++			leaks = 1;
++		}
++	BUG_ON(leaks != 0);
++
++	kfree(spd->data);
++	kfree(spd);
++	sb->s_fs_info = NULL;
++}
++
++/*
++ * Since people use this to answer the "How big of a file can I write?"
++ * question, we report the size of the highest priority branch as the size of
++ * the union.
++ */
++static int unionfs_statfs(struct dentry *dentry, struct kstatfs *buf)
++{
++	int err	= 0;
++	struct super_block *sb;
++	struct dentry *lower_dentry;
++
++	sb = dentry->d_sb;
++
++	unionfs_read_lock(sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	lower_dentry = unionfs_lower_dentry(sb->s_root);
++	err = vfs_statfs(lower_dentry, buf);
++
++	/* set return buf to our f/s to avoid confusing user-level utils */
++	buf->f_type = UNIONFS_SUPER_MAGIC;
++
++	/*
++	 * Our maximum file name can is shorter by a few bytes because every
++	 * file name could potentially be whited-out.
++	 */
++	buf->f_namelen -= UNIONFS_WHLEN;
++
++	memset(&buf->f_fsid, 0, sizeof(__kernel_fsid_t));
++	memset(&buf->f_spare, 0, sizeof(buf->f_spare));
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(sb);
++	return err;
++}
++
++/* handle mode changing during remount */
++static noinline int do_remount_mode_option(char *optarg, int cur_branches,
++					   struct unionfs_data *new_data,
++					   struct path *new_lower_paths)
++{
++	int err = -EINVAL;
++	int perms, idx;
++	char *modename = strchr(optarg, '=');
++	struct nameidata nd;
++
++	/* by now, optarg contains the branch name */
++	if (!*optarg) {
++		printk("unionfs: no branch specified for mode change.\n");
++		goto out;
++	}
++	if (!modename) {
++		printk("unionfs: branch \"%s\" requires a mode.\n", optarg);
++		goto out;
++	}
++	*modename++ = '\0';
++	perms = __parse_branch_mode(modename);
++	if (perms == 0) {
++		printk("unionfs: invalid mode \"%s\" for \"%s\".\n",
++		       modename, optarg);
++		goto out;
++	}
++
++	/*
++	 * Find matching branch index.  For now, this assumes that nothing
++	 * has been mounted on top of this Unionfs stack.  Once we have /odf
++	 * and cache-coherency resolved, we'll address the branch-path
++	 * uniqueness.
++	 */
++	err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++	if (err) {
++		printk(KERN_WARNING "unionfs: error accessing "
++		       "hidden directory \"%s\" (error %d)\n",
++		       optarg, err);
++		goto out;
++	}
++	for (idx=0; idx<cur_branches; idx++)
++		if (nd.mnt == new_lower_paths[idx].mnt &&
++		    nd.dentry == new_lower_paths[idx].dentry)
++			break;
++	path_release(&nd);	/* no longer needed */
++	if (idx == cur_branches) {
++		err = -ENOENT;	/* err may have been reset above */
++		printk(KERN_WARNING "unionfs: branch \"%s\" "
++		       "not found\n", optarg);
++		goto out;
++	}
++	/* check/change mode for existing branch */
++	/* we don't warn if perms==branchperms */
++	new_data[idx].branchperms = perms;
++	err = 0;
++out:
++	return err;
++}
++
++/* handle branch deletion during remount */
++static noinline int do_remount_del_option(char *optarg, int cur_branches,
++					  struct unionfs_data *new_data,
++					  struct path *new_lower_paths)
++{
++	int err = -EINVAL;
++	int idx;
++	struct nameidata nd;
++
++	/* optarg contains the branch name to delete */
++
++	/*
++	 * Find matching branch index.  For now, this assumes that nothing
++	 * has been mounted on top of this Unionfs stack.  Once we have /odf
++	 * and cache-coherency resolved, we'll address the branch-path
++	 * uniqueness.
++	 */
++	err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++	if (err) {
++		printk(KERN_WARNING "unionfs: error accessing "
++		       "hidden directory \"%s\" (error %d)\n",
++		       optarg, err);
++		goto out;
++	}
++	for (idx=0; idx < cur_branches; idx++)
++		if (nd.mnt == new_lower_paths[idx].mnt &&
++		    nd.dentry == new_lower_paths[idx].dentry)
++			break;
++	path_release(&nd);	/* no longer needed */
++	if (idx == cur_branches) {
++		printk(KERN_WARNING "unionfs: branch \"%s\" "
++		       "not found\n", optarg);
++		err = -ENOENT;
++		goto out;
++	}
++	/* check if there are any open files on the branch to be deleted */
++	if (atomic_read(&new_data[idx].open_files) > 0) {
++		err = -EBUSY;
++		goto out;
++	}
++
++	/*
++	 * Now we have to delete the branch.  First, release any handles it
++	 * has.  Then, move the remaining array indexes past "idx" in
++	 * new_data and new_lower_paths one to the left.  Finally, adjust
++	 * cur_branches.
++	 */
++	pathput(&new_lower_paths[idx]);
++
++	if (idx < cur_branches - 1) {
++		/* if idx==cur_branches-1, we delete last branch: easy */
++		memmove(&new_data[idx], &new_data[idx+1],
++			(cur_branches - 1 - idx) *
++			sizeof(struct unionfs_data));
++		memmove(&new_lower_paths[idx], &new_lower_paths[idx+1],
++			(cur_branches - 1 - idx) * sizeof(struct path));
++	}
++
++	err = 0;
++out:
++	return err;
++}
++
++/* handle branch insertion during remount */
++static noinline int do_remount_add_option(char *optarg, int cur_branches,
++					  struct unionfs_data *new_data,
++					  struct path *new_lower_paths,
++					  int *high_branch_id)
++{
++	int err = -EINVAL;
++	int perms;
++	int idx = 0;		/* default: insert at beginning */
++	char *new_branch , *modename = NULL;
++	struct nameidata nd;
++
++	/*
++	 * optarg can be of several forms:
++	 *
++	 * /bar:/foo		insert /foo before /bar
++	 * /bar:/foo=ro		insert /foo in ro mode before /bar
++	 * /foo			insert /foo in the beginning (prepend)
++	 * :/foo		insert /foo at the end (append)
++	 */
++	if (*optarg == ':') {	/* append? */
++		new_branch = optarg + 1; /* skip ':' */
++		idx = cur_branches;
++		goto found_insertion_point;
++	}
++	new_branch = strchr(optarg, ':');
++	if (!new_branch) {	/* prepend? */
++		new_branch = optarg;
++		goto found_insertion_point;
++	}
++	*new_branch++ = '\0';	/* holds path+mode of new branch */
++
++	/*
++	 * Find matching branch index.  For now, this assumes that nothing
++	 * has been mounted on top of this Unionfs stack.  Once we have /odf
++	 * and cache-coherency resolved, we'll address the branch-path
++	 * uniqueness.
++	 */
++	err = path_lookup(optarg, LOOKUP_FOLLOW, &nd);
++	if (err) {
++		printk(KERN_WARNING "unionfs: error accessing "
++		       "hidden directory \"%s\" (error %d)\n",
++		       optarg, err);
++		goto out;
++	}
++	for (idx=0; idx < cur_branches; idx++)
++		if (nd.mnt == new_lower_paths[idx].mnt &&
++		    nd.dentry == new_lower_paths[idx].dentry)
++			break;
++	path_release(&nd);	/* no longer needed */
++	if (idx == cur_branches) {
++		printk(KERN_WARNING "unionfs: branch \"%s\" "
++		       "not found\n", optarg);
++		err = -ENOENT;
++		goto out;
++	}
++
++	/*
++	 * At this point idx will hold the index where the new branch should
++	 * be inserted before.
++	 */
++found_insertion_point:
++	/* find the mode for the new branch */
++	if (new_branch)
++		modename = strchr(new_branch, '=');
++	if (modename)
++		*modename++ = '\0';
++	perms = parse_branch_mode(modename);
++
++	if (!new_branch || !*new_branch) {
++		printk(KERN_WARNING "unionfs: null new branch\n");
++		err = -EINVAL;
++		goto out;
++	}
++	err = path_lookup(new_branch, LOOKUP_FOLLOW, &nd);
++	if (err) {
++		printk(KERN_WARNING "unionfs: error accessing "
++		       "hidden directory \"%s\" (error %d)\n",
++		       new_branch, err);
++		goto out;
++	}
++	/*
++	 * It's probably safe to check_mode the new branch to insert.  Note:
++	 * we don't allow inserting branches which are unionfs's by
++	 * themselves (check_branch returns EINVAL in that case).  This is
++	 * because this code base doesn't support stacking unionfs: the ODF
++	 * code base supports that correctly.
++	 */
++	if ((err = check_branch(&nd))) {
++		printk(KERN_WARNING "unionfs: hidden directory "
++		       "\"%s\" is not a valid branch\n", optarg);
++		path_release(&nd);
++		goto out;
++	}
++
++	/*
++	 * Now we have to insert the new branch.  But first, move the bits
++	 * to make space for the new branch, if needed.  Finally, adjust
++	 * cur_branches.
++	 * We don't release nd here; it's kept until umount/remount.
++	 */
++	if (idx < cur_branches) {
++		/* if idx==cur_branches, we append: easy */
++		memmove(&new_data[idx+1], &new_data[idx],
++			(cur_branches - idx) * sizeof(struct unionfs_data));
++		memmove(&new_lower_paths[idx+1], &new_lower_paths[idx],
++			(cur_branches - idx) * sizeof(struct path));
++	}
++	new_lower_paths[idx].dentry = nd.dentry;
++	new_lower_paths[idx].mnt = nd.mnt;
++
++	new_data[idx].sb = nd.dentry->d_sb;
++	atomic_set(&new_data[idx].open_files, 0);
++	new_data[idx].branchperms = perms;
++	new_data[idx].branch_id = ++*high_branch_id; /* assign new branch ID */
++
++	err = 0;
++out:
++	return err;
++}
++
++
++/*
++ * Support branch management options on remount.
++ *
++ * See Documentation/filesystems/unionfs/ for details.
++ *
++ * @flags: numeric mount options
++ * @options: mount options string
++ *
++ * This function can rearrange a mounted union dynamically, adding and
++ * removing branches, including changing branch modes.  Clearly this has to
++ * be done safely and atomically.  Luckily, the VFS already calls this
++ * function with lock_super(sb) and lock_kernel() held, preventing
++ * concurrent mixing of new mounts, remounts, and unmounts.  Moreover,
++ * do_remount_sb(), our caller function, already called shrink_dcache_sb(sb)
++ * to purge dentries/inodes from our superblock, and also called
++ * fsync_super(sb) to purge any dirty pages.  So we're good.
++ *
++ * XXX: however, our remount code may also need to invalidate mapped pages
++ * so as to force them to be re-gotten from the (newly reconfigured) lower
++ * branches.  This has to wait for proper mmap and cache coherency support
++ * in the VFS.
++ *
++ */
++static int unionfs_remount_fs(struct super_block *sb, int *flags,
++			      char *options)
++{
++	int err = 0;
++	int i;
++	char *optionstmp, *tmp_to_free;	/* kstrdup'ed of "options" */
++	char *optname;
++	int cur_branches = 0;	/* no. of current branches */
++	int new_branches = 0;	/* no. of branches actually left in the end */
++	int add_branches;	/* est. no. of branches to add */
++	int del_branches;	/* est. no. of branches to del */
++	int max_branches;	/* max possible no. of branches */
++	struct unionfs_data *new_data = NULL, *tmp_data = NULL;
++	struct path *new_lower_paths = NULL, *tmp_lower_paths = NULL;
++	struct inode **new_lower_inodes = NULL;
++	int new_high_branch_id;	/* new high branch ID */
++	int size;		/* memory allocation size, temp var */
++	int old_ibstart, old_ibend;
++
++	unionfs_write_lock(sb);
++
++	/*
++	 * The VFS will take care of "ro" and "rw" flags, and we can safely
++	 * ignore MS_SILENT, but anything else left over is an error.  So we
++	 * need to check if any other flags may have been passed (none are
++	 * allowed/supported as of now).
++	 */
++	if ((*flags & ~(MS_RDONLY | MS_SILENT)) != 0) {
++		printk(KERN_WARNING
++		       "unionfs: remount flags 0x%x unsupported\n", *flags);
++		err = -EINVAL;
++		goto out_error;
++	}
++
++	/*
++	 * If 'options' is NULL, it's probably because the user just changed
++	 * the union to a "ro" or "rw" and the VFS took care of it.  So
++	 * nothing to do and we're done.
++	 */
++	if (!options || options[0] == '\0')
++		goto out_error;
++
++	/*
++	 * Find out how many branches we will have in the end, counting
++	 * "add" and "del" commands.  Copy the "options" string because
++	 * strsep modifies the string and we need it later.
++	 */
++	optionstmp = tmp_to_free = kstrdup(options, GFP_KERNEL);
++	if (!optionstmp) {
++		err = -ENOMEM;
++		goto out_free;
++	}
++	new_branches = cur_branches = sbmax(sb); /* current no. branches */
++	add_branches = del_branches = 0;
++	new_high_branch_id = sbhbid(sb); /* save current high_branch_id */
++	while ((optname = strsep(&optionstmp, ",")) != NULL) {
++		char *optarg;
++
++		if (!optname || !*optname)
++			continue;
++
++		optarg = strchr(optname, '=');
++		if (optarg)
++			*optarg++ = '\0';
++
++		if (!strcmp("add", optname))
++			add_branches++;
++		else if (!strcmp("del", optname))
++			del_branches++;
++	}
++	kfree(tmp_to_free);
++	/* after all changes, will we have at least one branch left? */
++	if ((new_branches + add_branches - del_branches) < 1) {
++		printk(KERN_WARNING
++		       "unionfs: no branches left after remount\n");
++		err = -EINVAL;
++		goto out_free;
++	}
++
++	/*
++	 * Since we haven't actually parsed all the add/del options, nor
++	 * have we checked them for errors, we don't know for sure how many
++	 * branches we will have after all changes have taken place.  In
++	 * fact, the total number of branches left could be less than what
++	 * we have now.  So we need to allocate space for a temporary
++	 * placeholder that is at least as large as the maximum number of
++	 * branches we *could* have, which is the current number plus all
++	 * the additions.  Once we're done with these temp placeholders, we
++	 * may have to re-allocate the final size, copy over from the temp,
++	 * and then free the temps (done near the end of this function).
++	 */
++	max_branches = cur_branches + add_branches;
++	/* allocate space for new pointers to hidden dentry */
++	tmp_data = kcalloc(max_branches,
++			   sizeof(struct unionfs_data), GFP_KERNEL);
++	if (!tmp_data) {
++		err = -ENOMEM;
++		goto out_free;
++	}
++	/* allocate space for new pointers to lower paths */
++	tmp_lower_paths = kcalloc(max_branches,
++				  sizeof(struct path), GFP_KERNEL);
++	if (!tmp_lower_paths) {
++		err = -ENOMEM;
++		goto out_free;
++	}
++	/* copy current info into new placeholders, incrementing refcnts */
++	memcpy(tmp_data, UNIONFS_SB(sb)->data,
++	       cur_branches * sizeof(struct unionfs_data));
++	memcpy(tmp_lower_paths, UNIONFS_D(sb->s_root)->lower_paths,
++	       cur_branches * sizeof(struct path));
++	for (i=0; i<cur_branches; i++)
++		pathget(&tmp_lower_paths[i]); /* drop refs at end of fxn */
++
++	/*******************************************************************
++	 * For each branch command, do path_lookup on the requested branch,
++	 * and apply the change to a temp branch list.  To handle errors, we
++	 * already dup'ed the old arrays (above), and increased the refcnts
++	 * on various f/s objects.  So now we can do all the path_lookups
++	 * and branch-management commands on the new arrays.  If it fail mid
++	 * way, we free the tmp arrays and *put all objects.  If we succeed,
++	 * then we free old arrays and *put its objects, and then replace
++	 * the arrays with the new tmp list (we may have to re-allocate the
++	 * memory because the temp lists could have been larger than what we
++	 * actually needed).
++	 *******************************************************************/
++
++	while ((optname = strsep(&options, ",")) != NULL) {
++		char *optarg;
++
++		if (!optname || !*optname)
++			continue;
++		/*
++		 * At this stage optname holds a comma-delimited option, but
++		 * without the commas.  Next, we need to break the string on
++		 * the '=' symbol to separate CMD=ARG, where ARG itself can
++		 * be KEY=VAL.  For example, in mode=/foo=rw, CMD is "mode",
++		 * KEY is "/foo", and VAL is "rw".
++		 */
++		optarg = strchr(optname, '=');
++		if (optarg)
++			*optarg++ = '\0';
++		/* incgen remount option (instead of old ioctl) */
++		if (!strcmp("incgen", optname)) {
++			err = 0;
++			goto out_no_change;
++		}
++
++		/*
++		 * All of our options take an argument now.  (Insert ones
++		 * that don't above this check.)  So at this stage optname
++		 * contains the CMD part and optarg contains the ARG part.
++		 */
++		if (!optarg || !*optarg) {
++			printk("unionfs: all remount options require "
++			       "an argument (%s).\n", optname);
++			err = -EINVAL;
++			goto out_release;
++		}
++
++		if (!strcmp("add", optname)) {
++			err = do_remount_add_option(optarg, new_branches,
++						    tmp_data,
++						    tmp_lower_paths,
++						    &new_high_branch_id);
++			if (err)
++				goto out_release;
++			new_branches++;
++			if (new_branches > UNIONFS_MAX_BRANCHES) {
++				printk("unionfs: command exceeds "
++				       "%d branches\n", UNIONFS_MAX_BRANCHES);
++				err = -E2BIG;
++				goto out_release;
++			}
++			continue;
++		}
++		if (!strcmp("del", optname)) {
++			err = do_remount_del_option(optarg, new_branches,
++						    tmp_data,
++						    tmp_lower_paths);
++			if (err)
++				goto out_release;
++			new_branches--;
++			continue;
++		}
++		if (!strcmp("mode", optname)) {
++			err = do_remount_mode_option(optarg, new_branches,
++						     tmp_data,
++						     tmp_lower_paths);
++			if (err)
++				goto out_release;
++			continue;
++		}
++
++		/*
++		 * When you use "mount -o remount,ro", mount(8) will
++		 * reportedly pass the original dirs= string from
++		 * /proc/mounts.  So for now, we have to ignore dirs= and
++		 * not consider it an error, unless we want to allow users
++		 * to pass dirs= in remount.  Note that to allow the VFS to
++		 * actually process the ro/rw remount options, we have to
++		 * return 0 from this function.
++		 */
++		if (!strcmp("dirs", optname)) {
++			printk(KERN_WARNING
++			       "unionfs: remount ignoring option \"%s\".\n",
++			       optname);
++			continue;
++		}
++
++		err = -EINVAL;
++		printk(KERN_WARNING
++		       "unionfs: unrecognized option \"%s\"\n", optname);
++		goto out_release;
++	}
++
++out_no_change:
++
++	/******************************************************************
++	 * WE'RE ALMOST DONE: check if leftmost branch might be read-only,
++	 * see if we need to allocate a small-sized new vector, copy the
++	 * vectors to their correct place, release the refcnt of the older
++	 * ones, and return.  Also handle invalidating any pages that will
++	 * have to be re-read.
++	 *******************************************************************/
++
++	if (!(tmp_data[0].branchperms & MAY_WRITE)) {
++		printk("unionfs: leftmost branch cannot be read-only "
++		       "(use \"remount,ro\" to create a read-only union)\n");
++		err = -EINVAL;
++		goto out_release;
++	}
++
++	/* (re)allocate space for new pointers to hidden dentry */
++	size = new_branches * sizeof(struct unionfs_data);
++	new_data = krealloc(tmp_data, size, GFP_KERNEL);
++	if (!new_data) {
++		err = -ENOMEM;
++		goto out_release;
++	}
++
++	/* allocate space for new pointers to lower paths */
++	size = new_branches * sizeof(struct path);
++	new_lower_paths = krealloc(tmp_lower_paths, size, GFP_KERNEL);
++	if (!new_lower_paths) {
++		err = -ENOMEM;
++		goto out_release;
++	}
++
++	/* allocate space for new pointers to lower inodes */
++	new_lower_inodes = kcalloc(new_branches,
++				   sizeof(struct inode *), GFP_KERNEL);
++	if (!new_lower_inodes) {
++		err = -ENOMEM;
++		goto out_release;
++	}
++
++	/*
++	 * OK, just before we actually put the new set of branches in place,
++	 * we need to ensure that our own f/s has no dirty objects left.
++	 * Luckily, do_remount_sb() already calls shrink_dcache_sb(sb) and
++	 * fsync_super(sb), taking care of dentries, inodes, and dirty
++	 * pages.  So all that's left is for us to invalidate any leftover
++	 * (non-dirty) pages to ensure that they will be re-read from the
++	 * new lower branches (and to support mmap).
++	 */
++
++	/*
++	 * No we call drop_pagecache_sb() to invalidate all pages in this
++	 * super.  This function calls invalidate_inode_pages(mapping),
++	 * which calls invalidate_mapping_pages(): the latter, however, will
++	 * not invalidate pages which are dirty, locked, under writeback, or
++	 * mapped into page tables.  We shouldn't have to worry about dirty
++	 * or under-writeback pages, because do_remount_sb() called
++	 * fsync_super() which would not have returned until all dirty pages
++	 * were flushed.
++	 *
++	 * But do we have to worry about locked pages?  Is there any chance
++	 * that in here we'll get locked pages?
++	 *
++	 * XXX: what about pages mapped into pagetables?  Are these pages
++	 * which user processes may have mmap(2)'ed?  If so, then we need to
++	 * invalidate those too, no?  Maybe we'll have to write our own
++	 * version of invalidate_mapping_pages() which also handled mapped
++	 * pages.
++	 *
++	 * XXX: Alternatively, maybe we should call truncate_inode_pages(),
++	 * which use two passes over the pages list, and will truncate all
++	 * pages.
++	 */
++	drop_pagecache_sb(sb);
++
++	/* copy new vectors into their correct place */
++	tmp_data = UNIONFS_SB(sb)->data;
++	UNIONFS_SB(sb)->data = new_data;
++	new_data = NULL;	/* so don't free good pointers below */
++	tmp_lower_paths = UNIONFS_D(sb->s_root)->lower_paths;
++	UNIONFS_D(sb->s_root)->lower_paths = new_lower_paths;
++	new_lower_paths = NULL;	/* so don't free good pointers below */
++
++	/* update our unionfs_sb_info and root dentry index of last branch */
++	i = sbmax(sb);		/* save no. of branches to release at end */
++	sbend(sb) = new_branches - 1;
++	set_dbend(sb->s_root, new_branches - 1);
++	old_ibstart = ibstart(sb->s_root->d_inode);
++	old_ibend = ibend(sb->s_root->d_inode);
++	ibend(sb->s_root->d_inode) = new_branches - 1;
++	UNIONFS_D(sb->s_root)->bcount = new_branches;
++	new_branches = i; /* no. of branches to release below */
++
++	/*
++	 * Update lower inodes: 3 steps
++	 * 1. grab ref on all new lower inodes
++	 */
++	for (i=dbstart(sb->s_root); i<=dbend(sb->s_root); i++) {
++		struct dentry *lower_dentry =
++			unionfs_lower_dentry_idx(sb->s_root, i);
++		atomic_inc(&lower_dentry->d_inode->i_count);
++		new_lower_inodes[i] = lower_dentry->d_inode;
++	}
++	/* 2. release reference on all older lower inodes */
++	for (i=old_ibstart; i<=old_ibend; i++) {
++		iput(unionfs_lower_inode_idx(sb->s_root->d_inode, i));
++		unionfs_set_lower_inode_idx(sb->s_root->d_inode, i, NULL);
++	}
++	kfree(UNIONFS_I(sb->s_root->d_inode)->lower_inodes);
++	/* 3. update root dentry's inode to new lower_inodes array */
++	UNIONFS_I(sb->s_root->d_inode)->lower_inodes = new_lower_inodes;
++	new_lower_inodes = NULL;
++
++	/* maxbytes may have changed */
++	sb->s_maxbytes = unionfs_lower_super_idx(sb, 0)->s_maxbytes;
++	/* update high branch ID */
++	sbhbid(sb) = new_high_branch_id;
++
++	/* update our sb->generation for revalidating objects */
++	i = atomic_inc_return(&UNIONFS_SB(sb)->generation);
++	atomic_set(&UNIONFS_D(sb->s_root)->generation, i);
++	atomic_set(&UNIONFS_I(sb->s_root->d_inode)->generation, i);
++
++	err = 0;		/* reset to success */
++
++	if (!(*flags & MS_SILENT))
++		printk("unionfs: new generation number %d\n", i);
++
++	/*
++	 * The code above falls through to the next label, and releases the
++	 * refcnts of the older ones (stored in tmp_*): if we fell through
++	 * here, it means success.  However, if we jump directly to this
++	 * label from any error above, then an error occurred after we
++	 * grabbed various refcnts, and so we have to release the
++	 * temporarily constructed structures.
++	 */
++out_release:
++	/* no need to cleanup/release anything in tmp_data */
++	if (tmp_lower_paths)
++		for (i=0; i<new_branches; i++)
++			pathput(&tmp_lower_paths[i]);
++out_free:
++	kfree(tmp_lower_paths);
++	kfree(tmp_data);
++	kfree(new_lower_paths);
++	kfree(new_data);
++	kfree(new_lower_inodes);
++out_error:
++	unionfs_write_unlock(sb);
++	return err;
++}
++
++/*
++ * Called by iput() when the inode reference count reached zero
++ * and the inode is not hashed anywhere.  Used to clear anything
++ * that needs to be, before the inode is completely destroyed and put
++ * on the inode free list.
++ *
++ * No need to lock sb info's rwsem.
++ */
++static void unionfs_clear_inode(struct inode *inode)
++{
++	int bindex, bstart, bend;
++	struct inode *hidden_inode;
++	struct list_head *pos, *n;
++	struct unionfs_dir_state *rdstate;
++
++	list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
++		rdstate = list_entry(pos, struct unionfs_dir_state, cache);
++		list_del(&rdstate->cache);
++		free_rdstate(rdstate);
++	}
++
++	/*
++	 * Decrement a reference to a hidden_inode, which was incremented
++	 * by our read_inode when it was created initially.
++	 */
++	bstart = ibstart(inode);
++	bend = ibend(inode);
++	if (bstart >= 0) {
++		for (bindex = bstart; bindex <= bend; bindex++) {
++			hidden_inode = unionfs_lower_inode_idx(inode, bindex);
++			if (!hidden_inode)
++				continue;
++			iput(hidden_inode);
++		}
++	}
++
++	kfree(UNIONFS_I(inode)->lower_inodes);
++	UNIONFS_I(inode)->lower_inodes = NULL;
++}
++
++static struct inode *unionfs_alloc_inode(struct super_block *sb)
++{
++	struct unionfs_inode_info *i;
++
++	i = kmem_cache_alloc(unionfs_inode_cachep, GFP_KERNEL);
++	if (!i)
++		return NULL;
++
++	/* memset everything up to the inode to 0 */
++	memset(i, 0, offsetof(struct unionfs_inode_info, vfs_inode));
++
++	i->vfs_inode.i_version = 1;
++	return &i->vfs_inode;
++}
++
++static void unionfs_destroy_inode(struct inode *inode)
++{
++	kmem_cache_free(unionfs_inode_cachep, UNIONFS_I(inode));
++}
++
++/* unionfs inode cache constructor */
++static void init_once(void *v, struct kmem_cache *cachep, unsigned long flags)
++{
++	struct unionfs_inode_info *i = v;
++
++	inode_init_once(&i->vfs_inode);
++}
++
++int unionfs_init_inode_cache(void)
++{
++	int err = 0;
++
++	unionfs_inode_cachep =
++		kmem_cache_create("unionfs_inode_cache",
++				  sizeof(struct unionfs_inode_info), 0,
++				  SLAB_RECLAIM_ACCOUNT, init_once, NULL);
++	if (!unionfs_inode_cachep)
++		err = -ENOMEM;
++	return err;
++}
++
++/* unionfs inode cache destructor */
++void unionfs_destroy_inode_cache(void)
++{
++	if (unionfs_inode_cachep)
++		kmem_cache_destroy(unionfs_inode_cachep);
++}
++
++/*
++ * Called when we have a dirty inode, right here we only throw out
++ * parts of our readdir list that are too old.
++ *
++ * No need to grab sb info's rwsem.
++ */
++static int unionfs_write_inode(struct inode *inode, int sync)
++{
++	struct list_head *pos, *n;
++	struct unionfs_dir_state *rdstate;
++
++	spin_lock(&UNIONFS_I(inode)->rdlock);
++	list_for_each_safe(pos, n, &UNIONFS_I(inode)->readdircache) {
++		rdstate = list_entry(pos, struct unionfs_dir_state, cache);
++		/* We keep this list in LRU order. */
++		if ((rdstate->access + RDCACHE_JIFFIES) > jiffies)
++			break;
++		UNIONFS_I(inode)->rdcount--;
++		list_del(&rdstate->cache);
++		free_rdstate(rdstate);
++	}
++	spin_unlock(&UNIONFS_I(inode)->rdlock);
++
++	return 0;
++}
++
++/*
++ * Used only in nfs, to kill any pending RPC tasks, so that subsequent
++ * code can actually succeed and won't leave tasks that need handling.
++ */
++static void unionfs_umount_begin(struct vfsmount *mnt, int flags)
++{
++	struct super_block *sb, *hidden_sb;
++	struct vfsmount *hidden_mnt;
++	int bindex, bstart, bend;
++
++	if (!(flags & MNT_FORCE))
++		/*
++		 * we are not being MNT_FORCE'd, therefore we should emulate
++		 * old behavior
++		 */
++		return;
++
++	sb = mnt->mnt_sb;
++
++	unionfs_read_lock(sb);
++
++	bstart = sbstart(sb);
++	bend = sbend(sb);
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		hidden_mnt = unionfs_lower_mnt_idx(sb->s_root, bindex);
++		hidden_sb = unionfs_lower_super_idx(sb, bindex);
++
++		if (hidden_mnt && hidden_sb && hidden_sb->s_op &&
++		    hidden_sb->s_op->umount_begin)
++			hidden_sb->s_op->umount_begin(hidden_mnt, flags);
++	}
++
++	unionfs_read_unlock(sb);
++}
++
++static int unionfs_show_options(struct seq_file *m, struct vfsmount *mnt)
++{
++	struct super_block *sb = mnt->mnt_sb;
++	int ret = 0;
++	char *tmp_page;
++	char *path;
++	int bindex, bstart, bend;
++	int perms;
++
++	unionfs_read_lock(sb);
++
++	unionfs_lock_dentry(sb->s_root);
++
++	tmp_page = (char*) __get_free_page(GFP_KERNEL);
++	if (!tmp_page) {
++		ret = -ENOMEM;
++		goto out;
++	}
++
++	bstart = sbstart(sb);
++	bend = sbend(sb);
++
++	seq_printf(m, ",dirs=");
++	for (bindex = bstart; bindex <= bend; bindex++) {
++		path = d_path(unionfs_lower_dentry_idx(sb->s_root, bindex),
++			      unionfs_lower_mnt_idx(sb->s_root, bindex),
++			      tmp_page, PAGE_SIZE);
++		if (IS_ERR(path)) {
++			ret = PTR_ERR(path);
++			goto out;
++		}
++
++		perms = branchperms(sb, bindex);
++
++		seq_printf(m, "%s=%s", path,
++			   perms & MAY_WRITE ? "rw" : "ro");
++		if (bindex != bend)
++			seq_printf(m, ":");
++	}
++
++out:
++	free_page((unsigned long) tmp_page);
++
++	unionfs_unlock_dentry(sb->s_root);
++
++	unionfs_read_unlock(sb);
++
++	return ret;
++}
++
++struct super_operations unionfs_sops = {
++	.read_inode	= unionfs_read_inode,
++	.delete_inode	= unionfs_delete_inode,
++	.put_super	= unionfs_put_super,
++	.statfs		= unionfs_statfs,
++	.remount_fs	= unionfs_remount_fs,
++	.clear_inode	= unionfs_clear_inode,
++	.umount_begin	= unionfs_umount_begin,
++	.show_options	= unionfs_show_options,
++	.write_inode	= unionfs_write_inode,
++	.alloc_inode	= unionfs_alloc_inode,
++	.destroy_inode	= unionfs_destroy_inode,
++};
+diff -Nurb linux-2.6.22-570/fs/unionfs/union.h linux-2.6.22-try2/fs/unionfs/union.h
+--- linux-2.6.22-570/fs/unionfs/union.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/union.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,467 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _UNION_H_
++#define _UNION_H_
++
++#include <linux/dcache.h>
++#include <linux/file.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/page-flags.h>
++#include <linux/pagemap.h>
++#include <linux/poll.h>
++#include <linux/security.h>
++#include <linux/seq_file.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/smp_lock.h>
++#include <linux/statfs.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++#include <linux/writeback.h>
++#include <linux/buffer_head.h>
++#include <linux/xattr.h>
++#include <linux/fs_stack.h>
++#include <linux/magic.h>
++#include <linux/log2.h>
++
++#include <asm/mman.h>
++#include <asm/system.h>
++
++#include <linux/union_fs.h>
++
++/* the file system name */
++#define UNIONFS_NAME "unionfs"
++
++/* unionfs root inode number */
++#define UNIONFS_ROOT_INO     1
++
++/* number of times we try to get a unique temporary file name */
++#define GET_TMPNAM_MAX_RETRY	5
++
++/* maximum number of branches we support, to avoid memory blowup */
++#define UNIONFS_MAX_BRANCHES	128
++
++/* Operations vectors defined in specific files. */
++extern struct file_operations unionfs_main_fops;
++extern struct file_operations unionfs_dir_fops;
++extern struct inode_operations unionfs_main_iops;
++extern struct inode_operations unionfs_dir_iops;
++extern struct inode_operations unionfs_symlink_iops;
++extern struct super_operations unionfs_sops;
++extern struct dentry_operations unionfs_dops;
++
++/* How long should an entry be allowed to persist */
++#define RDCACHE_JIFFIES	(5*HZ)
++
++/* file private data. */
++struct unionfs_file_info {
++	int bstart;
++	int bend;
++	atomic_t generation;
++
++	struct unionfs_dir_state *rdstate;
++	struct file **lower_files;
++	int *saved_branch_ids; /* IDs of branches when file was opened */
++};
++
++/* unionfs inode data in memory */
++struct unionfs_inode_info {
++	int bstart;
++	int bend;
++	atomic_t generation;
++	int stale;
++	/* Stuff for readdir over NFS. */
++	spinlock_t rdlock;
++	struct list_head readdircache;
++	int rdcount;
++	int hashsize;
++	int cookie;
++
++	/* The hidden inodes */
++	struct inode **lower_inodes;
++	/* to keep track of reads/writes for unlinks before closes */
++	atomic_t totalopens;
++
++	struct inode vfs_inode;
++};
++
++/* unionfs dentry data in memory */
++struct unionfs_dentry_info {
++	/*
++	 * The semaphore is used to lock the dentry as soon as we get into a
++	 * unionfs function from the VFS.  Our lock ordering is that children
++	 * go before their parents.
++	 */
++	struct mutex lock;
++	int bstart;
++	int bend;
++	int bopaque;
++	int bcount;
++	atomic_t generation;
++	struct path *lower_paths;
++};
++
++/* These are the pointers to our various objects. */
++struct unionfs_data {
++	struct super_block *sb;
++	atomic_t open_files;	/* number of open files on branch */
++	int branchperms;
++	int branch_id;		/* unique branch ID at re/mount time */
++};
++
++/* unionfs super-block data in memory */
++struct unionfs_sb_info {
++	int bend;
++
++	atomic_t generation;
++
++	/*
++	 * This rwsem is used to make sure that a branch management
++	 * operation...
++	 *   1) will not begin before all currently in-flight operations
++	 *      complete
++	 *   2) any new operations do not execute until the currently
++	 *      running branch management operation completes
++	 */
++	struct rw_semaphore rwsem;
++	int high_branch_id;	/* last unique branch ID given */
++	struct unionfs_data *data;
++};
++
++/*
++ * structure for making the linked list of entries by readdir on left branch
++ * to compare with entries on right branch
++ */
++struct filldir_node {
++	struct list_head file_list;	/* list for directory entries */
++	char *name;		/* name entry */
++	int hash;		/* name hash */
++	int namelen;		/* name len since name is not 0 terminated */
++
++	/*
++	 * we can check for duplicate whiteouts and files in the same branch
++	 * in order to return -EIO.
++	 */
++	int bindex;
++
++	/* is this a whiteout entry? */
++	int whiteout;
++
++	/* Inline name, so we don't need to separately kmalloc small ones */
++	char iname[DNAME_INLINE_LEN_MIN];
++};
++
++/* Directory hash table. */
++struct unionfs_dir_state {
++	unsigned int cookie;	/* the cookie, based off of rdversion */
++	unsigned int offset;	/* The entry we have returned. */
++	int bindex;
++	loff_t dirpos;		/* offset within the lower level directory */
++	int size;		/* How big is the hash table? */
++	int hashentries;	/* How many entries have been inserted? */
++	unsigned long access;
++
++	/* This cache list is used when the inode keeps us around. */
++	struct list_head cache;
++	struct list_head list[0];
++};
++
++/* include miscellaneous macros */
++#include "fanout.h"
++#include "sioq.h"
++
++/* externs for cache creation/deletion routines */
++extern void unionfs_destroy_filldir_cache(void);
++extern int unionfs_init_filldir_cache(void);
++extern int unionfs_init_inode_cache(void);
++extern void unionfs_destroy_inode_cache(void);
++extern int unionfs_init_dentry_cache(void);
++extern void unionfs_destroy_dentry_cache(void);
++
++/* Initialize and free readdir-specific  state. */
++extern int init_rdstate(struct file *file);
++extern struct unionfs_dir_state *alloc_rdstate(struct inode *inode, int bindex);
++extern struct unionfs_dir_state *find_rdstate(struct inode *inode, loff_t fpos);
++extern void free_rdstate(struct unionfs_dir_state *state);
++extern int add_filldir_node(struct unionfs_dir_state *rdstate, const char *name,
++			    int namelen, int bindex, int whiteout);
++extern struct filldir_node *find_filldir_node(struct unionfs_dir_state *rdstate,
++					      const char *name, int namelen);
++
++extern struct dentry **alloc_new_dentries(int objs);
++extern struct unionfs_data *alloc_new_data(int objs);
++
++/* We can only use 32-bits of offset for rdstate --- blech! */
++#define DIREOF (0xfffff)
++#define RDOFFBITS 20		/* This is the number of bits in DIREOF. */
++#define MAXRDCOOKIE (0xfff)
++/* Turn an rdstate into an offset. */
++static inline off_t rdstate2offset(struct unionfs_dir_state *buf)
++{
++	off_t tmp;
++
++	tmp = ((buf->cookie & MAXRDCOOKIE) << RDOFFBITS)
++		| (buf->offset & DIREOF);
++	return tmp;
++}
++
++#define unionfs_read_lock(sb)	 down_read(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_read_unlock(sb)	 up_read(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_write_lock(sb)	 down_write(&UNIONFS_SB(sb)->rwsem)
++#define unionfs_write_unlock(sb) up_write(&UNIONFS_SB(sb)->rwsem)
++
++static inline void unionfs_double_lock_dentry(struct dentry *d1,
++					      struct dentry *d2)
++{
++	if (d2 < d1) {
++		struct dentry *tmp = d1;
++		d1 = d2;
++		d2 = tmp;
++	}
++	unionfs_lock_dentry(d1);
++	unionfs_lock_dentry(d2);
++}
++
++extern int realloc_dentry_private_data(struct dentry *dentry);
++extern int new_dentry_private_data(struct dentry *dentry);
++extern void free_dentry_private_data(struct dentry *dentry);
++extern void update_bstart(struct dentry *dentry);
++
++/*
++ * EXTERNALS:
++ */
++
++/* replicates the directory structure up to given dentry in given branch */
++extern struct dentry *create_parents(struct inode *dir, struct dentry *dentry,
++				     int bindex);
++extern int make_dir_opaque(struct dentry *dir, int bindex);
++
++/* partial lookup */
++extern int unionfs_partial_lookup(struct dentry *dentry);
++
++/*
++ * Pass an unionfs dentry and an index and it will try to create a whiteout
++ * in branch 'index'.
++ *
++ * On error, it will proceed to a branch to the left
++ */
++extern int create_whiteout(struct dentry *dentry, int start);
++/* copies a file from dbstart to newbindex branch */
++extern int copyup_file(struct inode *dir, struct file *file, int bstart,
++		       int newbindex, loff_t size);
++extern int copyup_named_file(struct inode *dir, struct file *file,
++			     char *name, int bstart, int new_bindex,
++			     loff_t len);
++/* copies a dentry from dbstart to newbindex branch */
++extern int copyup_dentry(struct inode *dir, struct dentry *dentry, int bstart,
++			 int new_bindex, struct file **copyup_file,
++			 loff_t len);
++
++extern int remove_whiteouts(struct dentry *dentry,
++			    struct dentry *hidden_dentry, int bindex);
++
++extern int do_delete_whiteouts(struct dentry *dentry, int bindex,
++			       struct unionfs_dir_state *namelist);
++
++extern int unionfs_get_nlinks(struct inode *inode);
++
++/* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
++extern int check_empty(struct dentry *dentry,
++		       struct unionfs_dir_state **namelist);
++/* Delete whiteouts from this directory in branch bindex. */
++extern int delete_whiteouts(struct dentry *dentry, int bindex,
++			    struct unionfs_dir_state *namelist);
++
++/* Re-lookup a hidden dentry. */
++extern int unionfs_refresh_hidden_dentry(struct dentry *dentry, int bindex);
++
++extern void unionfs_reinterpose(struct dentry *this_dentry);
++extern struct super_block *unionfs_duplicate_super(struct super_block *sb);
++
++/* Locking functions. */
++extern int unionfs_setlk(struct file *file, int cmd, struct file_lock *fl);
++extern int unionfs_getlk(struct file *file, struct file_lock *fl);
++
++/* Common file operations. */
++extern int unionfs_file_revalidate(struct file *file, int willwrite);
++extern int unionfs_open(struct inode *inode, struct file *file);
++extern int unionfs_file_release(struct inode *inode, struct file *file);
++extern int unionfs_flush(struct file *file, fl_owner_t id);
++extern long unionfs_ioctl(struct file *file, unsigned int cmd,
++			  unsigned long arg);
++
++/* Inode operations */
++extern int unionfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++			  struct inode *new_dir, struct dentry *new_dentry);
++extern int unionfs_unlink(struct inode *dir, struct dentry *dentry);
++extern int unionfs_rmdir(struct inode *dir, struct dentry *dentry);
++
++extern int __unionfs_d_revalidate_chain(struct dentry *dentry,
++					struct nameidata *nd);
++
++/* The values for unionfs_interpose's flag. */
++#define INTERPOSE_DEFAULT	0
++#define INTERPOSE_LOOKUP	1
++#define INTERPOSE_REVAL		2
++#define INTERPOSE_REVAL_NEG	3
++#define INTERPOSE_PARTIAL	4
++
++extern int unionfs_interpose(struct dentry *this_dentry,
++			     struct super_block *sb, int flag);
++
++#ifdef CONFIG_UNION_FS_XATTR
++/* Extended attribute functions. */
++extern void *unionfs_xattr_alloc(size_t size, size_t limit);
++extern void unionfs_xattr_free(void *ptr, size_t size);
++
++extern ssize_t unionfs_getxattr(struct dentry *dentry, const char *name,
++				void *value, size_t size);
++extern int unionfs_removexattr(struct dentry *dentry, const char *name);
++extern ssize_t unionfs_listxattr(struct dentry *dentry, char *list,
++				 size_t size);
++extern int unionfs_setxattr(struct dentry *dentry, const char *name,
++			    const void *value, size_t size, int flags);
++#endif /* CONFIG_UNION_FS_XATTR */
++
++/* The root directory is unhashed, but isn't deleted. */
++static inline int d_deleted(struct dentry *d)
++{
++	return d_unhashed(d) && (d != d->d_sb->s_root);
++}
++
++struct dentry *unionfs_lookup_backend(struct dentry *dentry,
++				      struct nameidata *nd, int lookupmode);
++
++/* unionfs_permission, check if we should bypass error to facilitate copyup */
++#define IS_COPYUP_ERR(err) ((err) == -EROFS)
++
++/* unionfs_open, check if we need to copyup the file */
++#define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
++#define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
++
++static inline int branchperms(const struct super_block *sb, int index)
++{
++	BUG_ON(index < 0);
++
++	return UNIONFS_SB(sb)->data[index].branchperms;
++}
++
++static inline int set_branchperms(struct super_block *sb, int index, int perms)
++{
++	BUG_ON(index < 0);
++
++	UNIONFS_SB(sb)->data[index].branchperms = perms;
++
++	return perms;
++}
++
++/* Is this file on a read-only branch? */
++static inline int is_robranch_super(const struct super_block *sb, int index)
++{
++	int ret;
++
++  	ret = (!(branchperms(sb, index) & MAY_WRITE)) ? -EROFS : 0;
++	return ret;
++}
++
++/* Is this file on a read-only branch? */
++static inline int is_robranch_idx(const struct dentry *dentry, int index)
++{
++	int err = 0;
++
++	BUG_ON(index < 0);
++
++	if ((!(branchperms(dentry->d_sb, index) & MAY_WRITE)) ||
++	    IS_RDONLY(unionfs_lower_dentry_idx(dentry, index)->d_inode))
++		err = -EROFS;
++	return err;
++}
++
++static inline int is_robranch(const struct dentry *dentry)
++{
++	int index;
++
++	index = UNIONFS_D(dentry)->bstart;
++	BUG_ON(index < 0);
++
++	return is_robranch_idx(dentry, index);
++}
++
++/* What do we use for whiteouts. */
++#define UNIONFS_WHPFX ".wh."
++#define UNIONFS_WHLEN 4
++/*
++ * If a directory contains this file, then it is opaque.  We start with the
++ * .wh. flag so that it is blocked by lookup.
++ */
++#define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
++#define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
++
++#ifndef DEFAULT_POLLMASK
++#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
++#endif
++
++/*
++ * EXTERNALS:
++ */
++extern char *alloc_whname(const char *name, int len);
++extern int check_branch(struct nameidata *nd);
++extern int __parse_branch_mode(const char *name);
++extern int parse_branch_mode(const char *name);
++
++/*
++ * These two functions are here because it is kind of daft to copy and paste
++ * the contents of the two functions to 32+ places in unionfs
++ */
++static inline struct dentry *lock_parent(struct dentry *dentry)
++{
++	struct dentry *dir = dget(dentry->d_parent);
++
++	mutex_lock(&dir->d_inode->i_mutex);
++	return dir;
++}
++
++static inline void unlock_dir(struct dentry *dir)
++{
++	mutex_unlock(&dir->d_inode->i_mutex);
++	dput(dir);
++}
++
++static inline struct vfsmount *unionfs_mntget(struct dentry *dentry,
++					      int bindex)
++{
++	BUG_ON(!dentry || bindex < 0);
++
++	return mntget(unionfs_lower_mnt_idx(dentry, bindex));
++}
++
++static inline void unionfs_mntput(struct dentry *dentry, int bindex)
++{
++	if (!dentry)
++		return;
++
++	BUG_ON(bindex < 0);
++
++	mntput(unionfs_lower_mnt_idx(dentry, bindex));
++}
++#endif	/* not _UNION_H_ */
+diff -Nurb linux-2.6.22-570/fs/unionfs/unlink.c linux-2.6.22-try2/fs/unionfs/unlink.c
+--- linux-2.6.22-570/fs/unionfs/unlink.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/unlink.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,176 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* unlink a file by creating a whiteout */
++static int unionfs_unlink_whiteout(struct inode *dir, struct dentry *dentry)
++{
++	struct dentry *hidden_dentry;
++	struct dentry *hidden_dir_dentry;
++	int bindex;
++	int err = 0;
++
++	if ((err = unionfs_partial_lookup(dentry)))
++		goto out;
++
++	bindex = dbstart(dentry);
++
++	hidden_dentry = unionfs_lower_dentry_idx(dentry, bindex);
++	if (!hidden_dentry)
++		goto out;
++
++	hidden_dir_dentry = lock_parent(hidden_dentry);
++
++	/* avoid destroying the hidden inode if the file is in use */
++	dget(hidden_dentry);
++	if (!(err = is_robranch_super(dentry->d_sb, bindex)))
++		err = vfs_unlink(hidden_dir_dentry->d_inode, hidden_dentry);
++	dput(hidden_dentry);
++	fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++	unlock_dir(hidden_dir_dentry);
++
++	if (err && !IS_COPYUP_ERR(err))
++		goto out;
++
++	if (err) {
++		if (dbstart(dentry) == 0)
++			goto out;
++
++		err = create_whiteout(dentry, dbstart(dentry) - 1);
++	} else if (dbopaque(dentry) != -1)
++		/* There is a hidden lower-priority file with the same name. */
++		err = create_whiteout(dentry, dbopaque(dentry));
++	else
++		err = create_whiteout(dentry, dbstart(dentry));
++
++out:
++	if (!err)
++		dentry->d_inode->i_nlink--;
++
++	/* We don't want to leave negative leftover dentries for revalidate. */
++	if (!err && (dbopaque(dentry) != -1))
++		update_bstart(dentry);
++
++	return err;
++}
++
++int unionfs_unlink(struct inode *dir, struct dentry *dentry)
++{
++	int err = 0;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	err = unionfs_unlink_whiteout(dir, dentry);
++	/* call d_drop so the system "forgets" about us */
++	if (!err)
++		d_drop(dentry);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++static int unionfs_rmdir_first(struct inode *dir, struct dentry *dentry,
++			       struct unionfs_dir_state *namelist)
++{
++	int err;
++	struct dentry *hidden_dentry;
++	struct dentry *hidden_dir_dentry = NULL;
++
++	/* Here we need to remove whiteout entries. */
++	err = delete_whiteouts(dentry, dbstart(dentry), namelist);
++	if (err)
++		goto out;
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	hidden_dir_dentry = lock_parent(hidden_dentry);
++
++	/* avoid destroying the hidden inode if the file is in use */
++	dget(hidden_dentry);
++	if (!(err = is_robranch(dentry)))
++		err = vfs_rmdir(hidden_dir_dentry->d_inode, hidden_dentry);
++	dput(hidden_dentry);
++
++	fsstack_copy_attr_times(dir, hidden_dir_dentry->d_inode);
++	/* propagate number of hard-links */
++	dentry->d_inode->i_nlink = unionfs_get_nlinks(dentry->d_inode);
++
++out:
++	if (hidden_dir_dentry)
++		unlock_dir(hidden_dir_dentry);
++	return err;
++}
++
++int unionfs_rmdir(struct inode *dir, struct dentry *dentry)
++{
++	int err = 0;
++	struct unionfs_dir_state *namelist = NULL;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	/* check if this unionfs directory is empty or not */
++	err = check_empty(dentry, &namelist);
++	if (err)
++		goto out;
++
++	err = unionfs_rmdir_first(dir, dentry, namelist);
++	/* create whiteout */
++	if (!err)
++		err = create_whiteout(dentry, dbstart(dentry));
++	else {
++		int new_err;
++
++		if (dbstart(dentry) == 0)
++			goto out;
++
++		/* exit if the error returned was NOT -EROFS */
++		if (!IS_COPYUP_ERR(err))
++			goto out;
++
++		new_err = create_whiteout(dentry, dbstart(dentry) - 1);
++		if (new_err != -EEXIST)
++			err = new_err;
++	}
++
++out:
++	/* call d_drop so the system "forgets" about us */
++	if (!err)
++		d_drop(dentry);
++
++	if (namelist)
++		free_rdstate(namelist);
++
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/unionfs/xattr.c linux-2.6.22-try2/fs/unionfs/xattr.c
+--- linux-2.6.22-570/fs/unionfs/xattr.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/fs/unionfs/xattr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,161 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2003-2006 Charles P. Wright
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2005-2006 Junjiro Okajima
++ * Copyright (c) 2005      Arun M. Krishnakumar
++ * Copyright (c) 2004-2006 David P. Quigley
++ * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
++ * Copyright (c) 2003      Puja Gupta
++ * Copyright (c) 2003      Harikesavan Krishnan
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include "union.h"
++
++/* This is lifted from fs/xattr.c */
++void *unionfs_xattr_alloc(size_t size, size_t limit)
++{
++	void *ptr;
++
++	if (size > limit)
++		return ERR_PTR(-E2BIG);
++
++	if (!size)		/* size request, no buffer is needed */
++		return NULL;
++	else if (size <= PAGE_SIZE)
++		ptr = kmalloc(size, GFP_KERNEL);
++	else
++		ptr = vmalloc(size);
++	if (!ptr)
++		return ERR_PTR(-ENOMEM);
++	return ptr;
++}
++
++void unionfs_xattr_free(void *ptr, size_t size)
++{
++	if (!size)		/* size request, no buffer was needed */
++		return;
++	else if (size <= PAGE_SIZE)
++		kfree(ptr);
++	else
++		vfree(ptr);
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++ssize_t unionfs_getxattr(struct dentry *dentry, const char *name, void *value,
++			 size_t size)
++{
++	struct dentry *hidden_dentry = NULL;
++	int err = -EOPNOTSUPP;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	err = vfs_getxattr(hidden_dentry, (char*) name, value, size);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++int unionfs_setxattr(struct dentry *dentry, const char *name,
++		     const void *value, size_t size, int flags)
++{
++	struct dentry *hidden_dentry = NULL;
++	int err = -EOPNOTSUPP;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	err = vfs_setxattr(hidden_dentry, (char*) name, (void*) value,
++			   size, flags);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++int unionfs_removexattr(struct dentry *dentry, const char *name)
++{
++	struct dentry *hidden_dentry = NULL;
++	int err = -EOPNOTSUPP;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	err = vfs_removexattr(hidden_dentry, (char*) name);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
++
++/*
++ * BKL held by caller.
++ * dentry->d_inode->i_mutex locked
++ */
++ssize_t unionfs_listxattr(struct dentry *dentry, char *list, size_t size)
++{
++	struct dentry *hidden_dentry = NULL;
++	int err = -EOPNOTSUPP;
++	char *encoded_list = NULL;
++
++	unionfs_read_lock(dentry->d_sb);
++	unionfs_lock_dentry(dentry);
++
++	if (!__unionfs_d_revalidate_chain(dentry, NULL)) {
++		err = -ESTALE;
++		goto out;
++	}
++
++	hidden_dentry = unionfs_lower_dentry(dentry);
++
++	encoded_list = list;
++	err = vfs_listxattr(hidden_dentry, encoded_list, size);
++
++out:
++	unionfs_unlock_dentry(dentry);
++	unionfs_read_unlock(dentry->d_sb);
++	return err;
++}
+diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c linux-2.6.22-try2/fs/xfs/linux-2.6/xfs_file.c
+--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_file.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/fs/xfs/linux-2.6/xfs_file.c	2007-12-19 15:29:24.000000000 -0500
+@@ -246,18 +246,19 @@
+ 
+ #ifdef CONFIG_XFS_DMAPI
+ STATIC struct page *
+-xfs_vm_nopage(
+-	struct vm_area_struct	*area,
+-	unsigned long		address,
+-	int			*type)
++xfs_vm_fault(
++	struct vm_area_struct	*vma,
++	struct fault_data	*fdata)
+ {
+-	struct inode	*inode = area->vm_file->f_path.dentry->d_inode;
++	struct inode	*inode = vma->vm_file->f_path.dentry->d_inode;
+ 	bhv_vnode_t	*vp = vn_from_inode(inode);
+ 
+ 	ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
+-	if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), area, 0))
++	if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0)) {
++		fdata->type = VM_FAULT_SIGBUS;
+ 		return NULL;
+-	return filemap_nopage(area, address, type);
++	}
++	return filemap_fault(vma, fdata);
+ }
+ #endif /* CONFIG_XFS_DMAPI */
+ 
+@@ -343,6 +344,7 @@
+ 	struct vm_area_struct *vma)
+ {
+ 	vma->vm_ops = &xfs_file_vm_ops;
++	vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ 
+ #ifdef CONFIG_XFS_DMAPI
+ 	if (vn_from_inode(filp->f_path.dentry->d_inode)->v_vfsp->vfs_flag & VFS_DMI)
+@@ -501,14 +503,12 @@
+ };
+ 
+ static struct vm_operations_struct xfs_file_vm_ops = {
+-	.nopage		= filemap_nopage,
+-	.populate	= filemap_populate,
++	.fault		= filemap_fault,
+ };
+ 
+ #ifdef CONFIG_XFS_DMAPI
+ static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
+-	.nopage		= xfs_vm_nopage,
+-	.populate	= filemap_populate,
++	.fault		= xfs_vm_fault,
+ #ifdef HAVE_VMOP_MPROTECT
+ 	.mprotect	= xfs_vm_mprotect,
+ #endif
+diff -Nurb linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c linux-2.6.22-try2/fs/xfs/linux-2.6/xfs_super.c
+--- linux-2.6.22-570/fs/xfs/linux-2.6/xfs_super.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/fs/xfs/linux-2.6/xfs_super.c	2007-12-19 15:29:24.000000000 -0500
+@@ -570,6 +570,7 @@
+ 	bhv_vfs_sync_work_t	*work, *n;
+ 	LIST_HEAD		(tmp);
+ 
++	set_freezable();
+ 	timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
+ 	for (;;) {
+ 		timeleft = schedule_timeout_interruptible(timeleft);
+diff -Nurb linux-2.6.22-570/include/acpi/acmacros.h linux-2.6.22-try2/include/acpi/acmacros.h
+--- linux-2.6.22-570/include/acpi/acmacros.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/acpi/acmacros.h	2007-12-19 15:29:22.000000000 -0500
+@@ -486,6 +486,8 @@
+ #define ACPI_FUNCTION_NAME(name)
+ #endif
+ 
++#ifdef DEBUG_FUNC_TRACE
++
+ #define ACPI_FUNCTION_TRACE(a)          ACPI_FUNCTION_NAME(a) \
+ 			  acpi_ut_trace(ACPI_DEBUG_PARAMETERS)
+ #define ACPI_FUNCTION_TRACE_PTR(a,b)    ACPI_FUNCTION_NAME(a) \
+@@ -563,6 +565,27 @@
+ 
+ #endif				/* ACPI_SIMPLE_RETURN_MACROS */
+ 
++#else /* !DEBUG_FUNC_TRACE */
++
++#define ACPI_FUNCTION_TRACE(a)
++#define ACPI_FUNCTION_TRACE_PTR(a,b)
++#define ACPI_FUNCTION_TRACE_U32(a,b)
++#define ACPI_FUNCTION_TRACE_STR(a,b)
++#define ACPI_FUNCTION_EXIT
++#define ACPI_FUNCTION_STATUS_EXIT(s)
++#define ACPI_FUNCTION_VALUE_EXIT(s)
++#define ACPI_FUNCTION_TRACE(a)
++#define ACPI_FUNCTION_ENTRY()
++
++#define return_VOID                     return
++#define return_ACPI_STATUS(s)           return(s)
++#define return_VALUE(s)                 return(s)
++#define return_UINT8(s)                 return(s)
++#define return_UINT32(s)                return(s)
++#define return_PTR(s)                   return(s)
++
++#endif /* DEBUG_FUNC_TRACE */
++
+ /* Conditional execution */
+ 
+ #define ACPI_DEBUG_EXEC(a)              a
+diff -Nurb linux-2.6.22-570/include/acpi/acoutput.h linux-2.6.22-try2/include/acpi/acoutput.h
+--- linux-2.6.22-570/include/acpi/acoutput.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/acpi/acoutput.h	2007-12-19 15:29:22.000000000 -0500
+@@ -178,8 +178,8 @@
+ 
+ /* Defaults for debug_level, debug and normal */
+ 
+-#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT)
+-#define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR | ACPI_LV_DEBUG_OBJECT)
++#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR)
++#define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_WARN | ACPI_LV_ERROR)
+ #define ACPI_DEBUG_ALL              (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
+ 
+ #endif				/* __ACOUTPUT_H__ */
+diff -Nurb linux-2.6.22-570/include/acpi/platform/acenv.h linux-2.6.22-try2/include/acpi/platform/acenv.h
+--- linux-2.6.22-570/include/acpi/platform/acenv.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/acpi/platform/acenv.h	2007-12-19 15:29:22.000000000 -0500
+@@ -136,7 +136,7 @@
+ 
+ /*! [Begin] no source code translation */
+ 
+-#if defined(__linux__)
++#if defined(_LINUX) || defined(__linux__)
+ #include "aclinux.h"
+ 
+ #elif defined(_AED_EFI)
+diff -Nurb linux-2.6.22-570/include/acpi/platform/aclinux.h linux-2.6.22-try2/include/acpi/platform/aclinux.h
+--- linux-2.6.22-570/include/acpi/platform/aclinux.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/acpi/platform/aclinux.h	2007-12-19 15:29:22.000000000 -0500
+@@ -91,7 +91,10 @@
+ #define ACPI_USE_NATIVE_DIVIDE
+ #endif
+ 
++#ifndef __cdecl
+ #define __cdecl
++#endif
++
+ #define ACPI_FLUSH_CPU_CACHE()
+ #endif				/* __KERNEL__ */
+ 
+diff -Nurb linux-2.6.22-570/include/acpi/processor.h linux-2.6.22-try2/include/acpi/processor.h
+--- linux-2.6.22-570/include/acpi/processor.h	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/include/acpi/processor.h	2007-12-19 15:47:40.000000000 -0500
+@@ -21,6 +21,8 @@
+ #define ACPI_PSD_REV0_REVISION		0	/* Support for _PSD as in ACPI 3.0 */
+ #define ACPI_PSD_REV0_ENTRIES		5
+ 
++#define ACPI_TSD_REV0_REVISION		0	/* Support for _PSD as in ACPI 3.0 */
++#define ACPI_TSD_REV0_ENTRIES		5
+ /*
+  * Types of coordination defined in ACPI 3.0. Same macros can be used across
+  * P, C and T states
+@@ -125,17 +127,53 @@
+ 
+ /* Throttling Control */
+ 
++struct acpi_tsd_package {
++	acpi_integer num_entries;
++	acpi_integer revision;
++	acpi_integer domain;
++	acpi_integer coord_type;
++	acpi_integer num_processors;
++} __attribute__ ((packed));
++
++struct acpi_ptc_register {
++	u8 descriptor;
++	u16 length;
++	u8 space_id;
++	u8 bit_width;
++	u8 bit_offset;
++	u8 reserved;
++	u64 address;
++} __attribute__ ((packed));
++
++struct acpi_processor_tx_tss {
++	acpi_integer freqpercentage;	/* */
++	acpi_integer power;	/* milliWatts */
++	acpi_integer transition_latency;	/* microseconds */
++	acpi_integer control;	/* control value */
++	acpi_integer status;	/* success indicator */
++};
+ struct acpi_processor_tx {
+ 	u16 power;
+ 	u16 performance;
+ };
+ 
++struct acpi_processor;
+ struct acpi_processor_throttling {
+-	int state;
++	unsigned int state;
++	unsigned int platform_limit;
++	struct acpi_pct_register control_register;
++	struct acpi_pct_register status_register;
++	unsigned int state_count;
++	struct acpi_processor_tx_tss *states_tss;
++	struct acpi_tsd_package domain_info;
++	cpumask_t shared_cpu_map;
++	int (*acpi_processor_get_throttling) (struct acpi_processor * pr);
++	int (*acpi_processor_set_throttling) (struct acpi_processor * pr,
++					      int state);
++
+ 	u32 address;
+ 	u8 duty_offset;
+ 	u8 duty_width;
+-	int state_count;
+ 	struct acpi_processor_tx states[ACPI_PROCESSOR_MAX_THROTTLING];
+ };
+ 
+@@ -161,6 +199,7 @@
+ 	u8 bm_check:1;
+ 	u8 has_cst:1;
+ 	u8 power_setup_done:1;
++	u8 bm_rld_set:1;
+ };
+ 
+ struct acpi_processor {
+@@ -169,6 +208,9 @@
+ 	u32 id;
+ 	u32 pblk;
+ 	int performance_platform_limit;
++	int throttling_platform_limit;
++	/* 0 - states 0..n-th state available */
++
+ 	struct acpi_processor_flags flags;
+ 	struct acpi_processor_power power;
+ 	struct acpi_processor_performance *performance;
+@@ -270,7 +312,7 @@
+ 
+ /* in processor_throttling.c */
+ int acpi_processor_get_throttling_info(struct acpi_processor *pr);
+-int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
++extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state);
+ extern struct file_operations acpi_processor_throttling_fops;
+ 
+ /* in processor_idle.c */
+@@ -279,6 +321,9 @@
+ int acpi_processor_cst_has_changed(struct acpi_processor *pr);
+ int acpi_processor_power_exit(struct acpi_processor *pr,
+ 			      struct acpi_device *device);
++
++extern struct cpuidle_driver acpi_idle_driver;
++void acpi_max_cstate_changed(void);
+ int acpi_processor_suspend(struct acpi_device * device, pm_message_t state);
+ int acpi_processor_resume(struct acpi_device * device);
+ 
+diff -Nurb linux-2.6.22-570/include/asm-alpha/page.h linux-2.6.22-try2/include/asm-alpha/page.h
+--- linux-2.6.22-570/include/asm-alpha/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-alpha/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -17,7 +17,8 @@
+ extern void clear_page(void *page);
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vmaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ extern void copy_page(void * _to, void * _from);
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h linux-2.6.22-try2/include/asm-arm/arch-iop13xx/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/adma.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/arch-iop13xx/adma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,544 @@
++/*
++ * Copyright(c) 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ADMA_H
++#define _ADMA_H
++#include <linux/types.h>
++#include <linux/io.h>
++#include <asm/hardware.h>
++#include <asm/hardware/iop_adma.h>
++
++#define ADMA_ACCR(chan)	(chan->mmr_base + 0x0)
++#define ADMA_ACSR(chan)	(chan->mmr_base + 0x4)
++#define ADMA_ADAR(chan)	(chan->mmr_base + 0x8)
++#define ADMA_IIPCR(chan)	(chan->mmr_base + 0x18)
++#define ADMA_IIPAR(chan)	(chan->mmr_base + 0x1c)
++#define ADMA_IIPUAR(chan)	(chan->mmr_base + 0x20)
++#define ADMA_ANDAR(chan)	(chan->mmr_base + 0x24)
++#define ADMA_ADCR(chan)	(chan->mmr_base + 0x28)
++#define ADMA_CARMD(chan)	(chan->mmr_base + 0x2c)
++#define ADMA_ABCR(chan)	(chan->mmr_base + 0x30)
++#define ADMA_DLADR(chan)	(chan->mmr_base + 0x34)
++#define ADMA_DUADR(chan)	(chan->mmr_base + 0x38)
++#define ADMA_SLAR(src, chan)	(chan->mmr_base + (0x3c + (src << 3)))
++#define ADMA_SUAR(src, chan)	(chan->mmr_base + (0x40 + (src << 3)))
++
++struct iop13xx_adma_src {
++	u32 src_addr;
++	union {
++		u32 upper_src_addr;
++		struct {
++			unsigned int pq_upper_src_addr:24;
++			unsigned int pq_dmlt:8;
++		};
++	};
++};
++
++struct iop13xx_adma_desc_ctrl {
++	unsigned int int_en:1;
++	unsigned int xfer_dir:2;
++	unsigned int src_select:4;
++	unsigned int zero_result:1;
++	unsigned int block_fill_en:1;
++	unsigned int crc_gen_en:1;
++	unsigned int crc_xfer_dis:1;
++	unsigned int crc_seed_fetch_dis:1;
++	unsigned int status_write_back_en:1;
++	unsigned int endian_swap_en:1;
++	unsigned int reserved0:2;
++	unsigned int pq_update_xfer_en:1;
++	unsigned int dual_xor_en:1;
++	unsigned int pq_xfer_en:1;
++	unsigned int p_xfer_dis:1;
++	unsigned int reserved1:10;
++	unsigned int relax_order_en:1;
++	unsigned int no_snoop_en:1;
++};
++
++struct iop13xx_adma_byte_count {
++	unsigned int byte_count:24;
++	unsigned int host_if:3;
++	unsigned int reserved:2;
++	unsigned int zero_result_err_q:1;
++	unsigned int zero_result_err:1;
++	unsigned int tx_complete:1;
++};
++
++struct iop13xx_adma_desc_hw {
++	u32 next_desc;
++	union {
++		u32 desc_ctrl;
++		struct iop13xx_adma_desc_ctrl desc_ctrl_field;
++	};
++	union {
++		u32 crc_addr;
++		u32 block_fill_data;
++		u32 q_dest_addr;
++	};
++	union {
++		u32 byte_count;
++		struct iop13xx_adma_byte_count byte_count_field;
++	};
++	union {
++		u32 dest_addr;
++		u32 p_dest_addr;
++	};
++	union {
++		u32 upper_dest_addr;
++		u32 pq_upper_dest_addr;
++	};
++	struct iop13xx_adma_src src[1];
++};
++
++struct iop13xx_adma_desc_dual_xor {
++	u32 next_desc;
++	u32 desc_ctrl;
++	u32 reserved;
++	u32 byte_count;
++	u32 h_dest_addr;
++	u32 h_upper_dest_addr;
++	u32 src0_addr;
++	u32 upper_src0_addr;
++	u32 src1_addr;
++	u32 upper_src1_addr;
++	u32 h_src_addr;
++	u32 h_upper_src_addr;
++	u32 d_src_addr;
++	u32 d_upper_src_addr;
++	u32 d_dest_addr;
++	u32 d_upper_dest_addr;
++};
++
++struct iop13xx_adma_desc_pq_update {
++	u32 next_desc;
++	u32 desc_ctrl;
++	u32 reserved;
++	u32 byte_count;
++	u32 p_dest_addr;
++	u32 p_upper_dest_addr;
++	u32 src0_addr;
++	u32 upper_src0_addr;
++	u32 src1_addr;
++	u32 upper_src1_addr;
++	u32 p_src_addr;
++	u32 p_upper_src_addr;
++	u32 q_src_addr;
++	struct {
++		unsigned int q_upper_src_addr:24;
++		unsigned int q_dmlt:8;
++	};
++	u32 q_dest_addr;
++	u32 q_upper_dest_addr;
++};
++
++static inline int iop_adma_get_max_xor(void)
++{
++	return 16;
++}
++
++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
++{
++	return __raw_readl(ADMA_ADAR(chan));
++}
++
++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
++						u32 next_desc_addr)
++{
++	__raw_writel(next_desc_addr, ADMA_ANDAR(chan));
++}
++
++#define ADMA_STATUS_BUSY (1 << 13)
++
++static inline char iop_chan_is_busy(struct iop_adma_chan *chan)
++{
++	if (__raw_readl(ADMA_ACSR(chan)) &
++		ADMA_STATUS_BUSY)
++		return 1;
++	else
++		return 0;
++}
++
++static inline int
++iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots)
++{
++	return 1;
++}
++#define iop_desc_is_aligned(x, y) 1
++
++static inline int
++iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
++{
++	*slots_per_op = 1;
++	return 1;
++}
++
++#define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s)
++
++static inline int
++iop_chan_memset_slot_count(size_t len, int *slots_per_op)
++{
++	*slots_per_op = 1;
++	return 1;
++}
++
++static inline int
++iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
++{
++	int num_slots;
++	/* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1
++	 * (1 source => 8 bytes) (1 slot => 32 bytes)
++	 */
++	num_slots = 1 + (((src_cnt - 1) << 3) >> 5);
++	if (((src_cnt - 1) << 3) & 0x1f)
++		num_slots++;
++
++	*slots_per_op = num_slots;
++
++	return num_slots;
++}
++
++#define ADMA_MAX_BYTE_COUNT	(16 * 1024 * 1024)
++#define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT
++#define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o)
++
++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	return hw_desc->dest_addr;
++}
++
++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	return hw_desc->byte_count_field.byte_count;
++}
++
++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					int src_idx)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	return hw_desc->src[src_idx].src_addr;
++}
++
++static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	return hw_desc->desc_ctrl_field.src_select + 1;
++}
++
++static inline void
++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop13xx_adma_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++	hw_desc->crc_addr = 0;
++}
++
++static inline void
++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop13xx_adma_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++	u_desc_ctrl.field.block_fill_en = 1;
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++	hw_desc->crc_addr = 0;
++}
++
++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
++static inline void
++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop13xx_adma_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.src_select = src_cnt - 1;
++	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++	hw_desc->crc_addr = 0;
++
++}
++#define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i)
++
++/* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
++static inline int
++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop13xx_adma_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.src_select = src_cnt - 1;
++	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
++	u_desc_ctrl.field.zero_result = 1;
++	u_desc_ctrl.field.status_write_back_en = 1;
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++	hw_desc->crc_addr = 0;
++
++	return 1;
++}
++
++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					u32 byte_count)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	hw_desc->byte_count = byte_count;
++}
++
++static inline void
++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
++{
++	int slots_per_op = desc->slots_per_op;
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
++	int i = 0;
++
++	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++		hw_desc->byte_count = len;
++	} else {
++		do {
++			iter = iop_hw_desc_slot_idx(hw_desc, i);
++			iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++			len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++			i += slots_per_op;
++		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
++
++		if (len) {
++			iter = iop_hw_desc_slot_idx(hw_desc, i);
++			iter->byte_count = len;
++		}
++	}
++}
++
++
++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					dma_addr_t addr)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	hw_desc->dest_addr = addr;
++	hw_desc->upper_dest_addr = 0;
++}
++
++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
++					dma_addr_t addr)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	hw_desc->src[0].src_addr = addr;
++	hw_desc->src[0].upper_src_addr = 0;
++}
++
++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
++					int src_idx, dma_addr_t addr)
++{
++	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter;
++	int i = 0;
++
++	do {
++		iter = iop_hw_desc_slot_idx(hw_desc, i);
++		iter->src[src_idx].src_addr = addr;
++		iter->src[src_idx].upper_src_addr = 0;
++		slot_cnt -= slots_per_op;
++		if (slot_cnt) {
++			i += slots_per_op;
++			addr += IOP_ADMA_XOR_MAX_BYTE_COUNT;
++		}
++	} while (slot_cnt);
++}
++
++static inline void
++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
++	struct iop_adma_chan *chan)
++{
++	iop_desc_init_memcpy(desc, 1);
++	iop_desc_set_byte_count(desc, chan, 0);
++	iop_desc_set_dest_addr(desc, chan, 0);
++	iop_desc_set_memcpy_src_addr(desc, 0);
++}
++
++#define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr
++
++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
++					u32 next_desc_addr)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	BUG_ON(hw_desc->next_desc);
++	hw_desc->next_desc = next_desc_addr;
++}
++
++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	return hw_desc->next_desc;
++}
++
++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	hw_desc->next_desc = 0;
++}
++
++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
++						u32 val)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	hw_desc->block_fill_data = val;
++}
++
++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
++{
++	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
++	struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
++	struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field;
++
++	BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result));
++
++	if (desc_ctrl.pq_xfer_en)
++		return byte_count.zero_result_err_q;
++	else
++		return byte_count.zero_result_err;
++}
++
++static inline void iop_chan_append(struct iop_adma_chan *chan)
++{
++	u32 adma_accr;
++
++	adma_accr = __raw_readl(ADMA_ACCR(chan));
++	adma_accr |= 0x2;
++	__raw_writel(adma_accr, ADMA_ACCR(chan));
++}
++
++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
++{
++	do { } while (0);
++}
++
++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
++{
++	return __raw_readl(ADMA_ACSR(chan));
++}
++
++static inline void iop_chan_disable(struct iop_adma_chan *chan)
++{
++	u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
++	adma_chan_ctrl &= ~0x1;
++	__raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
++}
++
++static inline void iop_chan_enable(struct iop_adma_chan *chan)
++{
++	u32 adma_chan_ctrl;
++
++	adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan));
++	adma_chan_ctrl |= 0x1;
++	__raw_writel(adma_chan_ctrl, ADMA_ACCR(chan));
++}
++
++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(ADMA_ACSR(chan));
++	status &= (1 << 12);
++	__raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(ADMA_ACSR(chan));
++	status &= (1 << 11);
++	__raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(ADMA_ACSR(chan));
++	status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3);
++	__raw_writel(status, ADMA_ACSR(chan));
++}
++
++static inline int
++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
++{
++	return test_bit(9, &status);
++}
++
++static inline int
++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return test_bit(5, &status);
++}
++
++static inline int
++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return test_bit(4, &status);
++}
++
++static inline int
++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return test_bit(3, &status);
++}
++
++static inline int
++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++static inline int
++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++static inline int
++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++#endif /* _ADMA_H */
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h linux-2.6.22-try2/include/asm-arm/arch-iop13xx/iop13xx.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop13xx/iop13xx.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-arm/arch-iop13xx/iop13xx.h	2007-12-19 15:29:23.000000000 -0500
+@@ -166,12 +166,22 @@
+ #define IOP13XX_INIT_I2C_1	      (1 << 1)
+ #define IOP13XX_INIT_I2C_2	      (1 << 2)
+ 
++/* ADMA selection flags */
++/* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */
++#define IOP13XX_INIT_ADMA_DEFAULT     (0)
++#define IOP13XX_INIT_ADMA_0           (1 << 0)
++#define IOP13XX_INIT_ADMA_1           (1 << 1)
++#define IOP13XX_INIT_ADMA_2           (1 << 2)
++
++/* Platform devices */
+ #define IQ81340_NUM_UART     2
+ #define IQ81340_NUM_I2C      3
+ #define IQ81340_NUM_PHYS_MAP_FLASH 1
+-#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\
+-				IQ81340_NUM_I2C +\
+-				IQ81340_NUM_PHYS_MAP_FLASH)
++#define IQ81340_NUM_ADMA     		3
++#define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \
++				IQ81340_NUM_I2C + \
++				IQ81340_NUM_PHYS_MAP_FLASH + \
++				IQ81340_NUM_ADMA)
+ 
+ /*========================== PMMR offsets for key registers ============*/
+ #define IOP13XX_ATU0_PMMR_OFFSET   	0x00048000
+@@ -444,22 +454,6 @@
+ /*==============================ADMA UNITS===============================*/
+ #define IOP13XX_ADMA_PHYS_BASE(chan)	IOP13XX_REG_ADDR32_PHYS((chan << 9))
+ #define IOP13XX_ADMA_UPPER_PA(chan)	(IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0)
+-#define IOP13XX_ADMA_OFFSET(chan, ofs)	IOP13XX_REG_ADDR32((chan << 9) + (ofs))
+-
+-#define IOP13XX_ADMA_ACCR(chan)      IOP13XX_ADMA_OFFSET(chan, 0x0)
+-#define IOP13XX_ADMA_ACSR(chan)      IOP13XX_ADMA_OFFSET(chan, 0x4)
+-#define IOP13XX_ADMA_ADAR(chan)      IOP13XX_ADMA_OFFSET(chan, 0x8)
+-#define IOP13XX_ADMA_IIPCR(chan)     IOP13XX_ADMA_OFFSET(chan, 0x18)
+-#define IOP13XX_ADMA_IIPAR(chan)     IOP13XX_ADMA_OFFSET(chan, 0x1c)
+-#define IOP13XX_ADMA_IIPUAR(chan)    IOP13XX_ADMA_OFFSET(chan, 0x20)
+-#define IOP13XX_ADMA_ANDAR(chan)     IOP13XX_ADMA_OFFSET(chan, 0x24)
+-#define IOP13XX_ADMA_ADCR(chan)      IOP13XX_ADMA_OFFSET(chan, 0x28)
+-#define IOP13XX_ADMA_CARMD(chan)     IOP13XX_ADMA_OFFSET(chan, 0x2c)
+-#define IOP13XX_ADMA_ABCR(chan)      IOP13XX_ADMA_OFFSET(chan, 0x30)
+-#define IOP13XX_ADMA_DLADR(chan)     IOP13XX_ADMA_OFFSET(chan, 0x34)
+-#define IOP13XX_ADMA_DUADR(chan)     IOP13XX_ADMA_OFFSET(chan, 0x38)
+-#define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3))
+-#define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3))
+ 
+ /*==============================XSI BRIDGE===============================*/
+ #define IOP13XX_XBG_BECSR		IOP13XX_REG_ADDR32(0x178c)
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h linux-2.6.22-try2/include/asm-arm/arch-iop32x/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop32x/adma.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/arch-iop32x/adma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,5 @@
++#ifndef IOP32X_ADMA_H
++#define IOP32X_ADMA_H
++#include <asm/hardware/iop3xx-adma.h>
++#endif
++
+diff -Nurb linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h linux-2.6.22-try2/include/asm-arm/arch-iop33x/adma.h
+--- linux-2.6.22-570/include/asm-arm/arch-iop33x/adma.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/arch-iop33x/adma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,5 @@
++#ifndef IOP33X_ADMA_H
++#define IOP33X_ADMA_H
++#include <asm/hardware/iop3xx-adma.h>
++#endif
++
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h linux-2.6.22-try2/include/asm-arm/hardware/iop3xx-adma.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx-adma.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/hardware/iop3xx-adma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,891 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ADMA_H
++#define _ADMA_H
++#include <linux/types.h>
++#include <linux/io.h>
++#include <asm/hardware.h>
++#include <asm/hardware/iop_adma.h>
++
++/* Memory copy units */
++#define DMA_CCR(chan)		(chan->mmr_base + 0x0)
++#define DMA_CSR(chan)		(chan->mmr_base + 0x4)
++#define DMA_DAR(chan)		(chan->mmr_base + 0xc)
++#define DMA_NDAR(chan)		(chan->mmr_base + 0x10)
++#define DMA_PADR(chan)		(chan->mmr_base + 0x14)
++#define DMA_PUADR(chan)	(chan->mmr_base + 0x18)
++#define DMA_LADR(chan)		(chan->mmr_base + 0x1c)
++#define DMA_BCR(chan)		(chan->mmr_base + 0x20)
++#define DMA_DCR(chan)		(chan->mmr_base + 0x24)
++
++/* Application accelerator unit  */
++#define AAU_ACR(chan)		(chan->mmr_base + 0x0)
++#define AAU_ASR(chan)		(chan->mmr_base + 0x4)
++#define AAU_ADAR(chan)		(chan->mmr_base + 0x8)
++#define AAU_ANDAR(chan)	(chan->mmr_base + 0xc)
++#define AAU_SAR(src, chan)	(chan->mmr_base + (0x10 + ((src) << 2)))
++#define AAU_DAR(chan)		(chan->mmr_base + 0x20)
++#define AAU_ABCR(chan)		(chan->mmr_base + 0x24)
++#define AAU_ADCR(chan)		(chan->mmr_base + 0x28)
++#define AAU_SAR_EDCR(src_edc)	(chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
++#define AAU_EDCR0_IDX	8
++#define AAU_EDCR1_IDX	17
++#define AAU_EDCR2_IDX	26
++
++#define DMA0_ID 0
++#define DMA1_ID 1
++#define AAU_ID 2
++
++struct iop3xx_aau_desc_ctrl {
++	unsigned int int_en:1;
++	unsigned int blk1_cmd_ctrl:3;
++	unsigned int blk2_cmd_ctrl:3;
++	unsigned int blk3_cmd_ctrl:3;
++	unsigned int blk4_cmd_ctrl:3;
++	unsigned int blk5_cmd_ctrl:3;
++	unsigned int blk6_cmd_ctrl:3;
++	unsigned int blk7_cmd_ctrl:3;
++	unsigned int blk8_cmd_ctrl:3;
++	unsigned int blk_ctrl:2;
++	unsigned int dual_xor_en:1;
++	unsigned int tx_complete:1;
++	unsigned int zero_result_err:1;
++	unsigned int zero_result_en:1;
++	unsigned int dest_write_en:1;
++};
++
++struct iop3xx_aau_e_desc_ctrl {
++	unsigned int reserved:1;
++	unsigned int blk1_cmd_ctrl:3;
++	unsigned int blk2_cmd_ctrl:3;
++	unsigned int blk3_cmd_ctrl:3;
++	unsigned int blk4_cmd_ctrl:3;
++	unsigned int blk5_cmd_ctrl:3;
++	unsigned int blk6_cmd_ctrl:3;
++	unsigned int blk7_cmd_ctrl:3;
++	unsigned int blk8_cmd_ctrl:3;
++	unsigned int reserved2:7;
++};
++
++struct iop3xx_dma_desc_ctrl {
++	unsigned int pci_transaction:4;
++	unsigned int int_en:1;
++	unsigned int dac_cycle_en:1;
++	unsigned int mem_to_mem_en:1;
++	unsigned int crc_data_tx_en:1;
++	unsigned int crc_gen_en:1;
++	unsigned int crc_seed_dis:1;
++	unsigned int reserved:21;
++	unsigned int crc_tx_complete:1;
++};
++
++struct iop3xx_desc_dma {
++	u32 next_desc;
++	union {
++		u32 pci_src_addr;
++		u32 pci_dest_addr;
++		u32 src_addr;
++	};
++	union {
++		u32 upper_pci_src_addr;
++		u32 upper_pci_dest_addr;
++	};
++	union {
++		u32 local_pci_src_addr;
++		u32 local_pci_dest_addr;
++		u32 dest_addr;
++	};
++	u32 byte_count;
++	union {
++		u32 desc_ctrl;
++		struct iop3xx_dma_desc_ctrl desc_ctrl_field;
++	};
++	u32 crc_addr;
++};
++
++struct iop3xx_desc_aau {
++	u32 next_desc;
++	u32 src[4];
++	u32 dest_addr;
++	u32 byte_count;
++	union {
++		u32 desc_ctrl;
++		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++	};
++	union {
++		u32 src_addr;
++		u32 e_desc_ctrl;
++		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
++	} src_edc[31];
++};
++
++struct iop3xx_aau_gfmr {
++	unsigned int gfmr1:8;
++	unsigned int gfmr2:8;
++	unsigned int gfmr3:8;
++	unsigned int gfmr4:8;
++};
++
++struct iop3xx_desc_pq_xor {
++	u32 next_desc;
++	u32 src[3];
++	union {
++		u32 data_mult1;
++		struct iop3xx_aau_gfmr data_mult1_field;
++	};
++	u32 dest_addr;
++	u32 byte_count;
++	union {
++		u32 desc_ctrl;
++		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++	};
++	union {
++		u32 src_addr;
++		u32 e_desc_ctrl;
++		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
++		u32 data_multiplier;
++		struct iop3xx_aau_gfmr data_mult_field;
++		u32 reserved;
++	} src_edc_gfmr[19];
++};
++
++struct iop3xx_desc_dual_xor {
++	u32 next_desc;
++	u32 src0_addr;
++	u32 src1_addr;
++	u32 h_src_addr;
++	u32 d_src_addr;
++	u32 h_dest_addr;
++	u32 byte_count;
++	union {
++		u32 desc_ctrl;
++		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
++	};
++	u32 d_dest_addr;
++};
++
++union iop3xx_desc {
++	struct iop3xx_desc_aau *aau;
++	struct iop3xx_desc_dma *dma;
++	struct iop3xx_desc_pq_xor *pq_xor;
++	struct iop3xx_desc_dual_xor *dual_xor;
++	void *ptr;
++};
++
++static inline int iop_adma_get_max_xor(void)
++{
++	return 32;
++}
++
++static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
++{
++	int id = chan->device->id;
++
++	switch (id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return __raw_readl(DMA_DAR(chan));
++	case AAU_ID:
++		return __raw_readl(AAU_ADAR(chan));
++	default:
++		BUG();
++	}
++	return 0;
++}
++
++static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
++						u32 next_desc_addr)
++{
++	int id = chan->device->id;
++
++	switch (id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		__raw_writel(next_desc_addr, DMA_NDAR(chan));
++		break;
++	case AAU_ID:
++		__raw_writel(next_desc_addr, AAU_ANDAR(chan));
++		break;
++	}
++
++}
++
++#define IOP_ADMA_STATUS_BUSY (1 << 10)
++#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
++#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
++#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
++
++static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(DMA_CSR(chan));
++	return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
++}
++
++static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
++					int num_slots)
++{
++	/* num_slots will only ever be 1, 2, 4, or 8 */
++	return (desc->idx & (num_slots - 1)) ? 0 : 1;
++}
++
++/* to do: support large (i.e. > hw max) buffer sizes */
++static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
++{
++	*slots_per_op = 1;
++	return 1;
++}
++
++/* to do: support large (i.e. > hw max) buffer sizes */
++static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
++{
++	*slots_per_op = 1;
++	return 1;
++}
++
++static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
++					int *slots_per_op)
++{
++	const static int slot_count_table[] = { 0,
++						1, 1, 1, 1, /* 01 - 04 */
++						2, 2, 2, 2, /* 05 - 08 */
++						4, 4, 4, 4, /* 09 - 12 */
++						4, 4, 4, 4, /* 13 - 16 */
++						8, 8, 8, 8, /* 17 - 20 */
++						8, 8, 8, 8, /* 21 - 24 */
++						8, 8, 8, 8, /* 25 - 28 */
++						8, 8, 8, 8, /* 29 - 32 */
++					      };
++	*slots_per_op = slot_count_table[src_cnt];
++	return *slots_per_op;
++}
++
++static inline int
++iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
++{
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return iop_chan_memcpy_slot_count(0, slots_per_op);
++	case AAU_ID:
++		return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
++	default:
++		BUG();
++	}
++	return 0;
++}
++
++static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
++						int *slots_per_op)
++{
++	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
++
++	if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
++		return slot_cnt;
++
++	len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
++	while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
++		len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
++		slot_cnt += *slots_per_op;
++	}
++
++	if (len)
++		slot_cnt += *slots_per_op;
++
++	return slot_cnt;
++}
++
++/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
++ * descriptors
++ */
++static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
++						int *slots_per_op)
++{
++	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
++
++	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
++		return slot_cnt;
++
++	len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++	while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++		len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++		slot_cnt += *slots_per_op;
++	}
++
++	if (len)
++		slot_cnt += *slots_per_op;
++
++	return slot_cnt;
++}
++
++static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return hw_desc.dma->dest_addr;
++	case AAU_ID:
++		return hw_desc.aau->dest_addr;
++	default:
++		BUG();
++	}
++	return 0;
++}
++
++static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return hw_desc.dma->byte_count;
++	case AAU_ID:
++		return hw_desc.aau->byte_count;
++	default:
++		BUG();
++	}
++	return 0;
++}
++
++/* translate the src_idx to a descriptor word index */
++static inline int __desc_idx(int src_idx)
++{
++	const static int desc_idx_table[] = { 0, 0, 0, 0,
++					      0, 1, 2, 3,
++					      5, 6, 7, 8,
++					      9, 10, 11, 12,
++					      14, 15, 16, 17,
++					      18, 19, 20, 21,
++					      23, 24, 25, 26,
++					      27, 28, 29, 30,
++					    };
++
++	return desc_idx_table[src_idx];
++}
++
++static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					int src_idx)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return hw_desc.dma->src_addr;
++	case AAU_ID:
++		break;
++	default:
++		BUG();
++	}
++
++	if (src_idx < 4)
++		return hw_desc.aau->src[src_idx];
++	else
++		return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
++}
++
++static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
++					int src_idx, dma_addr_t addr)
++{
++	if (src_idx < 4)
++		hw_desc->src[src_idx] = addr;
++	else
++		hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
++}
++
++static inline void
++iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
++{
++	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop3xx_dma_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.mem_to_mem_en = 1;
++	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++	hw_desc->upper_pci_src_addr = 0;
++	hw_desc->crc_addr = 0;
++}
++
++static inline void
++iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
++{
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop3xx_aau_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
++	u_desc_ctrl.field.dest_write_en = 1;
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++}
++
++static inline u32
++iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
++{
++	int i, shift;
++	u32 edcr;
++	union {
++		u32 value;
++		struct iop3xx_aau_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	switch (src_cnt) {
++	case 25 ... 32:
++		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++		edcr = 0;
++		shift = 1;
++		for (i = 24; i < src_cnt; i++) {
++			edcr |= (1 << shift);
++			shift += 3;
++		}
++		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
++		src_cnt = 24;
++		/* fall through */
++	case 17 ... 24:
++		if (!u_desc_ctrl.field.blk_ctrl) {
++			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++		}
++		edcr = 0;
++		shift = 1;
++		for (i = 16; i < src_cnt; i++) {
++			edcr |= (1 << shift);
++			shift += 3;
++		}
++		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
++		src_cnt = 16;
++		/* fall through */
++	case 9 ... 16:
++		if (!u_desc_ctrl.field.blk_ctrl)
++			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
++		edcr = 0;
++		shift = 1;
++		for (i = 8; i < src_cnt; i++) {
++			edcr |= (1 << shift);
++			shift += 3;
++		}
++		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
++		src_cnt = 8;
++		/* fall through */
++	case 2 ... 8:
++		shift = 1;
++		for (i = 0; i < src_cnt; i++) {
++			u_desc_ctrl.value |= (1 << shift);
++			shift += 3;
++		}
++
++		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
++			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
++	}
++
++	u_desc_ctrl.field.dest_write_en = 1;
++	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++
++	return u_desc_ctrl.value;
++}
++
++static inline void
++iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
++}
++
++/* return the number of operations */
++static inline int
++iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
++	union {
++		u32 value;
++		struct iop3xx_aau_desc_ctrl field;
++	} u_desc_ctrl;
++	int i, j;
++
++	hw_desc = desc->hw_desc;
++
++	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
++		i += slots_per_op, j++) {
++		iter = iop_hw_desc_slot_idx(hw_desc, i);
++		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
++		u_desc_ctrl.field.dest_write_en = 0;
++		u_desc_ctrl.field.zero_result_en = 1;
++		u_desc_ctrl.field.int_en = int_en;
++		iter->desc_ctrl = u_desc_ctrl.value;
++
++		/* for the subsequent descriptors preserve the store queue
++		 * and chain them together
++		 */
++		if (i) {
++			prev_hw_desc =
++				iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
++			prev_hw_desc->next_desc = (u32) (desc->phys + (i << 5));
++		}
++	}
++
++	return j;
++}
++
++static inline void
++iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
++{
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++	union {
++		u32 value;
++		struct iop3xx_aau_desc_ctrl field;
++	} u_desc_ctrl;
++
++	u_desc_ctrl.value = 0;
++	switch (src_cnt) {
++	case 25 ... 32:
++		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++		/* fall through */
++	case 17 ... 24:
++		if (!u_desc_ctrl.field.blk_ctrl) {
++			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
++			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
++		}
++		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
++		/* fall through */
++	case 9 ... 16:
++		if (!u_desc_ctrl.field.blk_ctrl)
++			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
++		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
++		/* fall through */
++	case 1 ... 8:
++		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
++			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
++	}
++
++	u_desc_ctrl.field.dest_write_en = 0;
++	u_desc_ctrl.field.int_en = int_en;
++	hw_desc->desc_ctrl = u_desc_ctrl.value;
++}
++
++static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					u32 byte_count)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		hw_desc.dma->byte_count = byte_count;
++		break;
++	case AAU_ID:
++		hw_desc.aau->byte_count = byte_count;
++		break;
++	default:
++		BUG();
++	}
++}
++
++static inline void
++iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
++			struct iop_adma_chan *chan)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		iop_desc_init_memcpy(desc, 1);
++		hw_desc.dma->byte_count = 0;
++		hw_desc.dma->dest_addr = 0;
++		hw_desc.dma->src_addr = 0;
++		break;
++	case AAU_ID:
++		iop_desc_init_null_xor(desc, 2, 1);
++		hw_desc.aau->byte_count = 0;
++		hw_desc.aau->dest_addr = 0;
++		hw_desc.aau->src[0] = 0;
++		hw_desc.aau->src[1] = 0;
++		break;
++	default:
++		BUG();
++	}
++}
++
++static inline void
++iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
++{
++	int slots_per_op = desc->slots_per_op;
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++	int i = 0;
++
++	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++		hw_desc->byte_count = len;
++	} else {
++		do {
++			iter = iop_hw_desc_slot_idx(hw_desc, i);
++			iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++			len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
++			i += slots_per_op;
++		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
++
++		if (len) {
++			iter = iop_hw_desc_slot_idx(hw_desc, i);
++			iter->byte_count = len;
++		}
++	}
++}
++
++static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
++					struct iop_adma_chan *chan,
++					dma_addr_t addr)
++{
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		hw_desc.dma->dest_addr = addr;
++		break;
++	case AAU_ID:
++		hw_desc.aau->dest_addr = addr;
++		break;
++	default:
++		BUG();
++	}
++}
++
++static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
++					dma_addr_t addr)
++{
++	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
++	hw_desc->src_addr = addr;
++}
++
++static inline void
++iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
++				dma_addr_t addr)
++{
++
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++	int i;
++
++	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
++		i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
++		iter = iop_hw_desc_slot_idx(hw_desc, i);
++		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
++	}
++}
++
++static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
++					int src_idx, dma_addr_t addr)
++{
++
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
++	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
++	int i;
++
++	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
++		i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
++		iter = iop_hw_desc_slot_idx(hw_desc, i);
++		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
++	}
++}
++
++static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
++					u32 next_desc_addr)
++{
++	/* hw_desc->next_desc is the same location for all channels */
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++	BUG_ON(hw_desc.dma->next_desc);
++	hw_desc.dma->next_desc = next_desc_addr;
++}
++
++static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
++{
++	/* hw_desc->next_desc is the same location for all channels */
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++	return hw_desc.dma->next_desc;
++}
++
++static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
++{
++	/* hw_desc->next_desc is the same location for all channels */
++	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
++	hw_desc.dma->next_desc = 0;
++}
++
++static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
++						u32 val)
++{
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++	hw_desc->src[0] = val;
++}
++
++static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
++{
++	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
++	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
++
++	BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
++	return desc_ctrl.zero_result_err;
++}
++
++static inline void iop_chan_append(struct iop_adma_chan *chan)
++{
++	u32 dma_chan_ctrl;
++	/* workaround dropped interrupts on 3xx */
++	mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3));
++
++	dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++	dma_chan_ctrl |= 0x2;
++	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan)
++{
++	if (!busy)
++		del_timer(&chan->cleanup_watchdog);
++}
++
++static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
++{
++	return __raw_readl(DMA_CSR(chan));
++}
++
++static inline void iop_chan_disable(struct iop_adma_chan *chan)
++{
++	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++	dma_chan_ctrl &= ~1;
++	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_chan_enable(struct iop_adma_chan *chan)
++{
++	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
++
++	dma_chan_ctrl |= 1;
++	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
++}
++
++static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(DMA_CSR(chan));
++	status &= (1 << 9);
++	__raw_writel(status, DMA_CSR(chan));
++}
++
++static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(DMA_CSR(chan));
++	status &= (1 << 8);
++	__raw_writel(status, DMA_CSR(chan));
++}
++
++static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
++{
++	u32 status = __raw_readl(DMA_CSR(chan));
++
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
++		break;
++	case AAU_ID:
++		status &= (1 << 5);
++		break;
++	default:
++		BUG();
++	}
++
++	__raw_writel(status, DMA_CSR(chan));
++}
++
++static inline int
++iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++static inline int
++iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++static inline int
++iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return 0;
++}
++
++static inline int
++iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	return test_bit(5, &status);
++}
++
++static inline int
++iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return test_bit(2, &status);
++	default:
++		return 0;
++	}
++}
++
++static inline int
++iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
++{
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return test_bit(3, &status);
++	default:
++		return 0;
++	}
++}
++
++static inline int
++iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
++{
++	switch (chan->device->id) {
++	case DMA0_ID:
++	case DMA1_ID:
++		return test_bit(1, &status);
++	default:
++		return 0;
++	}
++}
++#endif /* _ADMA_H */
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h linux-2.6.22-try2/include/asm-arm/hardware/iop3xx.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop3xx.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-arm/hardware/iop3xx.h	2007-12-19 15:29:23.000000000 -0500
+@@ -144,24 +144,9 @@
+ #define IOP3XX_IAR		(volatile u32 *)IOP3XX_REG_ADDR(0x0380)
+ 
+ /* DMA Controller  */
+-#define IOP3XX_DMA0_CCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0400)
+-#define IOP3XX_DMA0_CSR		(volatile u32 *)IOP3XX_REG_ADDR(0x0404)
+-#define IOP3XX_DMA0_DAR		(volatile u32 *)IOP3XX_REG_ADDR(0x040c)
+-#define IOP3XX_DMA0_NDAR	(volatile u32 *)IOP3XX_REG_ADDR(0x0410)
+-#define IOP3XX_DMA0_PADR	(volatile u32 *)IOP3XX_REG_ADDR(0x0414)
+-#define IOP3XX_DMA0_PUADR	(volatile u32 *)IOP3XX_REG_ADDR(0x0418)
+-#define IOP3XX_DMA0_LADR	(volatile u32 *)IOP3XX_REG_ADDR(0x041c)
+-#define IOP3XX_DMA0_BCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0420)
+-#define IOP3XX_DMA0_DCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0424)
+-#define IOP3XX_DMA1_CCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0440)
+-#define IOP3XX_DMA1_CSR		(volatile u32 *)IOP3XX_REG_ADDR(0x0444)
+-#define IOP3XX_DMA1_DAR		(volatile u32 *)IOP3XX_REG_ADDR(0x044c)
+-#define IOP3XX_DMA1_NDAR	(volatile u32 *)IOP3XX_REG_ADDR(0x0450)
+-#define IOP3XX_DMA1_PADR	(volatile u32 *)IOP3XX_REG_ADDR(0x0454)
+-#define IOP3XX_DMA1_PUADR	(volatile u32 *)IOP3XX_REG_ADDR(0x0458)
+-#define IOP3XX_DMA1_LADR	(volatile u32 *)IOP3XX_REG_ADDR(0x045c)
+-#define IOP3XX_DMA1_BCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0460)
+-#define IOP3XX_DMA1_DCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0464)
++#define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \
++					(0x400 + (chan << 6)))
++#define IOP3XX_DMA_UPPER_PA(chan)  (IOP3XX_DMA_PHYS_BASE(chan) + 0x27)
+ 
+ /* Peripheral bus interface  */
+ #define IOP3XX_PBCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0680)
+@@ -210,48 +195,8 @@
+ #define IOP_TMR_RATIO_1_1  0x00
+ 
+ /* Application accelerator unit  */
+-#define IOP3XX_AAU_ACR		(volatile u32 *)IOP3XX_REG_ADDR(0x0800)
+-#define IOP3XX_AAU_ASR		(volatile u32 *)IOP3XX_REG_ADDR(0x0804)
+-#define IOP3XX_AAU_ADAR		(volatile u32 *)IOP3XX_REG_ADDR(0x0808)
+-#define IOP3XX_AAU_ANDAR	(volatile u32 *)IOP3XX_REG_ADDR(0x080c)
+-#define IOP3XX_AAU_SAR1		(volatile u32 *)IOP3XX_REG_ADDR(0x0810)
+-#define IOP3XX_AAU_SAR2		(volatile u32 *)IOP3XX_REG_ADDR(0x0814)
+-#define IOP3XX_AAU_SAR3		(volatile u32 *)IOP3XX_REG_ADDR(0x0818)
+-#define IOP3XX_AAU_SAR4		(volatile u32 *)IOP3XX_REG_ADDR(0x081c)
+-#define IOP3XX_AAU_DAR		(volatile u32 *)IOP3XX_REG_ADDR(0x0820)
+-#define IOP3XX_AAU_ABCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0824)
+-#define IOP3XX_AAU_ADCR		(volatile u32 *)IOP3XX_REG_ADDR(0x0828)
+-#define IOP3XX_AAU_SAR5		(volatile u32 *)IOP3XX_REG_ADDR(0x082c)
+-#define IOP3XX_AAU_SAR6		(volatile u32 *)IOP3XX_REG_ADDR(0x0830)
+-#define IOP3XX_AAU_SAR7		(volatile u32 *)IOP3XX_REG_ADDR(0x0834)
+-#define IOP3XX_AAU_SAR8		(volatile u32 *)IOP3XX_REG_ADDR(0x0838)
+-#define IOP3XX_AAU_EDCR0	(volatile u32 *)IOP3XX_REG_ADDR(0x083c)
+-#define IOP3XX_AAU_SAR9		(volatile u32 *)IOP3XX_REG_ADDR(0x0840)
+-#define IOP3XX_AAU_SAR10	(volatile u32 *)IOP3XX_REG_ADDR(0x0844)
+-#define IOP3XX_AAU_SAR11	(volatile u32 *)IOP3XX_REG_ADDR(0x0848)
+-#define IOP3XX_AAU_SAR12	(volatile u32 *)IOP3XX_REG_ADDR(0x084c)
+-#define IOP3XX_AAU_SAR13	(volatile u32 *)IOP3XX_REG_ADDR(0x0850)
+-#define IOP3XX_AAU_SAR14	(volatile u32 *)IOP3XX_REG_ADDR(0x0854)
+-#define IOP3XX_AAU_SAR15	(volatile u32 *)IOP3XX_REG_ADDR(0x0858)
+-#define IOP3XX_AAU_SAR16	(volatile u32 *)IOP3XX_REG_ADDR(0x085c)
+-#define IOP3XX_AAU_EDCR1	(volatile u32 *)IOP3XX_REG_ADDR(0x0860)
+-#define IOP3XX_AAU_SAR17	(volatile u32 *)IOP3XX_REG_ADDR(0x0864)
+-#define IOP3XX_AAU_SAR18	(volatile u32 *)IOP3XX_REG_ADDR(0x0868)
+-#define IOP3XX_AAU_SAR19	(volatile u32 *)IOP3XX_REG_ADDR(0x086c)
+-#define IOP3XX_AAU_SAR20	(volatile u32 *)IOP3XX_REG_ADDR(0x0870)
+-#define IOP3XX_AAU_SAR21	(volatile u32 *)IOP3XX_REG_ADDR(0x0874)
+-#define IOP3XX_AAU_SAR22	(volatile u32 *)IOP3XX_REG_ADDR(0x0878)
+-#define IOP3XX_AAU_SAR23	(volatile u32 *)IOP3XX_REG_ADDR(0x087c)
+-#define IOP3XX_AAU_SAR24	(volatile u32 *)IOP3XX_REG_ADDR(0x0880)
+-#define IOP3XX_AAU_EDCR2	(volatile u32 *)IOP3XX_REG_ADDR(0x0884)
+-#define IOP3XX_AAU_SAR25	(volatile u32 *)IOP3XX_REG_ADDR(0x0888)
+-#define IOP3XX_AAU_SAR26	(volatile u32 *)IOP3XX_REG_ADDR(0x088c)
+-#define IOP3XX_AAU_SAR27	(volatile u32 *)IOP3XX_REG_ADDR(0x0890)
+-#define IOP3XX_AAU_SAR28	(volatile u32 *)IOP3XX_REG_ADDR(0x0894)
+-#define IOP3XX_AAU_SAR29	(volatile u32 *)IOP3XX_REG_ADDR(0x0898)
+-#define IOP3XX_AAU_SAR30	(volatile u32 *)IOP3XX_REG_ADDR(0x089c)
+-#define IOP3XX_AAU_SAR31	(volatile u32 *)IOP3XX_REG_ADDR(0x08a0)
+-#define IOP3XX_AAU_SAR32	(volatile u32 *)IOP3XX_REG_ADDR(0x08a4)
++#define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800)
++#define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7)
+ 
+ /* I2C bus interface unit  */
+ #define IOP3XX_ICR0		(volatile u32 *)IOP3XX_REG_ADDR(0x1680)
+@@ -329,6 +274,9 @@
+ 	asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val));
+ }
+ 
++extern struct platform_device iop3xx_dma_0_channel;
++extern struct platform_device iop3xx_dma_1_channel;
++extern struct platform_device iop3xx_aau_channel;
+ extern struct platform_device iop3xx_i2c0_device;
+ extern struct platform_device iop3xx_i2c1_device;
+ 
+diff -Nurb linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h linux-2.6.22-try2/include/asm-arm/hardware/iop_adma.h
+--- linux-2.6.22-570/include/asm-arm/hardware/iop_adma.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/hardware/iop_adma.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,120 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef IOP_ADMA_H
++#define IOP_ADMA_H
++#include <linux/types.h>
++#include <linux/dmaengine.h>
++#include <linux/interrupt.h>
++
++#define IOP_ADMA_SLOT_SIZE 32
++#define IOP_ADMA_THRESHOLD 4
++
++/**
++ * struct iop_adma_device - internal representation of an ADMA device
++ * @pdev: Platform device
++ * @id: HW ADMA Device selector
++ * @dma_desc_pool: base of DMA descriptor region (DMA address)
++ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
++ * @common: embedded struct dma_device
++ */
++struct iop_adma_device {
++	struct platform_device *pdev;
++	int id;
++	dma_addr_t dma_desc_pool;
++	void *dma_desc_pool_virt;
++	struct dma_device common;
++};
++
++/**
++ * struct iop_adma_chan - internal representation of an ADMA device
++ * @pending: allows batching of hardware operations
++ * @completed_cookie: identifier for the most recently completed operation
++ * @lock: serializes enqueue/dequeue operations to the slot pool
++ * @mmr_base: memory mapped register base
++ * @chain: device chain view of the descriptors
++ * @device: parent device
++ * @common: common dmaengine channel object members
++ * @last_used: place holder for allocation to continue from where it left off
++ * @all_slots: complete domain of slots usable by the channel
++ * @cleanup_watchdog: workaround missed interrupts on iop3xx
++ * @slots_allocated: records the actual size of the descriptor slot pool
++ * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs
++ */
++struct iop_adma_chan {
++	int pending;
++	dma_cookie_t completed_cookie;
++	spinlock_t lock; /* protects the descriptor slot pool */
++	void __iomem *mmr_base;
++	struct list_head chain;
++	struct iop_adma_device *device;
++	struct dma_chan common;
++	struct iop_adma_desc_slot *last_used;
++	struct list_head all_slots;
++	struct timer_list cleanup_watchdog;
++	int slots_allocated;
++	struct tasklet_struct irq_tasklet;
++};
++
++/**
++ * struct iop_adma_desc_slot - IOP-ADMA software descriptor
++ * @slot_node: node on the iop_adma_chan.all_slots list
++ * @chain_node: node on the op_adma_chan.chain list
++ * @hw_desc: virtual address of the hardware descriptor chain
++ * @phys: hardware address of the hardware descriptor chain
++ * @group_head: first operation in a transaction
++ * @slot_cnt: total slots used in an transaction (group of operations)
++ * @slots_per_op: number of slots per operation
++ * @idx: pool index
++ * @unmap_src_cnt: number of xor sources
++ * @unmap_len: transaction bytecount
++ * @async_tx: support for the async_tx api
++ * @group_list: list of slots that make up a multi-descriptor transaction
++ *	for example transfer lengths larger than the supported hw max
++ * @xor_check_result: result of zero sum
++ * @crc32_result: result crc calculation
++ */
++struct iop_adma_desc_slot {
++	struct list_head slot_node;
++	struct list_head chain_node;
++	void *hw_desc;
++	dma_addr_t phys;
++	struct iop_adma_desc_slot *group_head;
++	u16 slot_cnt;
++	u16 slots_per_op;
++	u16 idx;
++	u16 unmap_src_cnt;
++	size_t unmap_len;
++	struct dma_async_tx_descriptor async_tx;
++	struct list_head group_list;
++	union {
++		u32 *xor_check_result;
++		u32 *crc32_result;
++	};
++};
++
++struct iop_adma_platform_data {
++	int hw_id;
++	dma_cap_mask_t cap_mask;
++	size_t pool_size;
++};
++
++#define to_iop_sw_desc(addr_hw_desc) \
++	container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
++#define iop_hw_desc_slot_idx(hw_desc, idx) \
++	( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
++#endif
+diff -Nurb linux-2.6.22-570/include/asm-arm/kgdb.h linux-2.6.22-try2/include/asm-arm/kgdb.h
+--- linux-2.6.22-570/include/asm-arm/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-arm/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,103 @@
++/*
++ * include/asm-arm/kgdb.h
++ *
++ * ARM KGDB support
++ *
++ * Author: Deepak Saxena <dsaxena@mvista.com>
++ *
++ * Copyright (C) 2002 MontaVista Software Inc.
++ *
++ */
++
++#ifndef __ASM_KGDB_H__
++#define __ASM_KGDB_H__
++
++#include <asm/ptrace.h>
++#include <asm-generic/kgdb.h>
++
++
++/*
++ * GDB assumes that we're a user process being debugged, so
++ * it will send us an SWI command to write into memory as the
++ * debug trap. When an SWI occurs, the next instruction addr is
++ * placed into R14_svc before jumping to the vector trap.
++ * This doesn't work for kernel debugging as we are already in SVC
++ * we would loose the kernel's LR, which is a bad thing. This
++ * is  bad thing.
++ *
++ * By doing this as an undefined instruction trap, we force a mode
++ * switch from SVC to UND mode, allowing us to save full kernel state.
++ *
++ * We also define a KGDB_COMPILED_BREAK which can be used to compile
++ * in breakpoints. This is important for things like sysrq-G and for
++ * the initial breakpoint from trap_init().
++ *
++ * Note to ARM HW designers: Add real trap support like SH && PPC to
++ * make our lives much much simpler. :)
++ */
++#define	BREAK_INSTR_SIZE		4
++#define GDB_BREAKINST                   0xef9f0001
++#define KGDB_BREAKINST                  0xe7ffdefe
++#define KGDB_COMPILED_BREAK             0xe7ffdeff
++#define CACHE_FLUSH_IS_SAFE		1
++
++#ifndef	__ASSEMBLY__
++
++#define	BREAKPOINT()			asm(".word 	0xe7ffdeff")
++
++
++extern void kgdb_handle_bus_error(void);
++extern int kgdb_fault_expected;
++#endif /* !__ASSEMBLY__ */
++
++/*
++ * From Kevin Hilman:
++ *
++ * gdb is expecting the following registers layout.
++ *
++ * r0-r15: 1 long word each
++ * f0-f7:  unused, 3 long words each !!
++ * fps:    unused, 1 long word
++ * cpsr:   1 long word
++ *
++ * Even though f0-f7 and fps are not used, they need to be
++ * present in the registers sent for correct processing in
++ * the host-side gdb.
++ *
++ * In particular, it is crucial that CPSR is in the right place,
++ * otherwise gdb will not be able to correctly interpret stepping over
++ * conditional branches.
++ */
++#define _GP_REGS 		16
++#define _FP_REGS 		8
++#define _EXTRA_REGS 		2
++#define	GDB_MAX_REGS		(_GP_REGS + (_FP_REGS * 3) + _EXTRA_REGS)
++
++#define	KGDB_MAX_NO_CPUS	1
++#define	BUFMAX			400
++#define	NUMREGBYTES		(GDB_MAX_REGS << 2)
++#define	NUMCRITREGBYTES		(32 << 2)
++
++#define	_R0		0
++#define	_R1		1
++#define	_R2		2
++#define	_R3		3
++#define	_R4		4
++#define	_R5		5
++#define	_R6		6
++#define	_R7		7
++#define	_R8		8
++#define	_R9		9
++#define	_R10		10
++#define	_FP		11
++#define	_IP		12
++#define	_SP		13
++#define	_LR		14
++#define	_PC		15
++#define	_CPSR		(GDB_MAX_REGS - 1)
++
++/* So that we can denote the end of a frame for tracing, in the simple
++ * case. */
++#define CFI_END_FRAME(func)	__CFI_END_FRAME(_PC,_SP,func)
++
++#endif /* __ASM_KGDB_H__ */
+diff -Nurb linux-2.6.22-570/include/asm-arm/system.h linux-2.6.22-try2/include/asm-arm/system.h
+--- linux-2.6.22-570/include/asm-arm/system.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-arm/system.h	2007-12-19 15:29:24.000000000 -0500
+@@ -360,6 +360,41 @@
+ extern void disable_hlt(void);
+ extern void enable_hlt(void);
+ 
++#ifndef CONFIG_SMP
++/*
++ * Atomic compare and exchange.
++ */
++#define __HAVE_ARCH_CMPXCHG	1
++
++extern unsigned long wrong_size_cmpxchg(volatile void *ptr);
++
++static inline unsigned long __cmpxchg(volatile void *ptr,
++				    unsigned long old,
++				    unsigned long new, int size)
++{
++	unsigned long flags, prev;
++	volatile unsigned long *p = ptr;
++
++	if (size == 4) {
++		local_irq_save(flags);
++		if ((prev = *p) == old)
++			*p = new;
++		local_irq_restore(flags);
++		return(prev);
++	} else
++		return wrong_size_cmpxchg(ptr);
++}
++
++#define cmpxchg(ptr,o,n)					  	\
++({									\
++     __typeof__(*(ptr)) _o_ = (o);					\
++     __typeof__(*(ptr)) _n_ = (n);					\
++     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		\
++			   	 (unsigned long)_n_, sizeof(*(ptr)));	\
++})
++
++#endif
++
+ #endif /* __ASSEMBLY__ */
+ 
+ #define arch_align_stack(x) (x)
+diff -Nurb linux-2.6.22-570/include/asm-cris/page.h linux-2.6.22-try2/include/asm-cris/page.h
+--- linux-2.6.22-570/include/asm-cris/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-cris/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -20,7 +20,8 @@
+ #define clear_user_page(page, vaddr, pg)    clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-generic/kgdb.h linux-2.6.22-try2/include/asm-generic/kgdb.h
+--- linux-2.6.22-570/include/asm-generic/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-generic/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,100 @@
++/*
++ * include/asm-generic/kgdb.h
++ *
++ * This provides the assembly level information so that KGDB can provide
++ * a GDB that has been patched with enough information to know to stop
++ * trying to unwind the function.
++ *
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2005 (c) MontaVista Software, Inc.
++ * 2006 (c) Embedded Alley Solutions, Inc.
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#ifndef __ASM_GENERIC_KGDB_H__
++#define __ASM_GENERIC_KGDB_H__
++
++#ifdef CONFIG_X86
++/**
++ *	kgdb_skipexception - Bail of of KGDB when we've been triggered.
++ *	@exception: Exception vector number
++ *	@regs: Current &struct pt_regs.
++ *
++ *	On some architectures we need to skip a breakpoint exception when
++ *	it occurs after a breakpoint has been removed.
++ */
++int kgdb_skipexception(int exception, struct pt_regs *regs);
++#else
++#define kgdb_skipexception(exception, regs)	0
++#endif
++
++#if defined(CONFIG_X86)
++/**
++ *	kgdb_post_master_code - Save error vector/code numbers.
++ *	@regs: Original pt_regs.
++ *	@e_vector: Original error vector.
++ *	@err_code: Original error code.
++ *
++ *	This is needed on architectures which support SMP and KGDB.
++ *	This function is called after all the slave cpus have been put
++ *	to a know spin state and the master CPU has control over KGDB.
++ */
++extern void kgdb_post_master_code(struct pt_regs *regs, int e_vector,
++				  int err_code);
++
++/**
++ *	kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
++ *	@regs: Current &struct pt_regs.
++ *
++ *	This function will be called if the particular architecture must
++ *	disable hardware debugging while it is processing gdb packets or
++ *	handling exception.
++ */
++extern void kgdb_disable_hw_debug(struct pt_regs *regs);
++#else
++#define kgdb_disable_hw_debug(regs)		do { } while (0)
++#define kgdb_post_master_code(regs, v, c)	do { } while (0)
++#endif
++
++#ifdef CONFIG_KGDB_ARCH_HAS_SHADOW_INFO
++/**
++ *	kgdb_shadowinfo - Get shadowed information on @threadid.
++ *	@regs: The &struct pt_regs of the current process.
++ *	@buffer: A buffer of %BUFMAX size.
++ *	@threadid: The thread id of the shadowed process to get information on.
++ */
++extern void kgdb_shadowinfo(struct pt_regs *regs, char *buffer,
++			    unsigned threadid);
++
++/**
++ *	kgdb_get_shadow_thread - Get the shadowed &task_struct of @threadid.
++ *	@regs: The &struct pt_regs of the current thread.
++ *	@threadid: The thread id of the shadowed process to get information on.
++ *
++ *	RETURN:
++ *	This returns a pointer to the &struct task_struct of the shadowed
++ *	thread, @threadid.
++ */
++extern struct task_struct *kgdb_get_shadow_thread(struct pt_regs *regs,
++						  int threadid);
++
++/**
++ *	kgdb_shadow_regs - Return the shadowed registers of @threadid.
++ *	@regs: The &struct pt_regs of the current thread.
++ *	@threadid: The thread id we want the &struct pt_regs for.
++ *
++ *	RETURN:
++ *	The a pointer to the &struct pt_regs of the shadowed thread @threadid.
++ */
++extern struct pt_regs *kgdb_shadow_regs(struct pt_regs *regs, int threadid);
++#else
++#define kgdb_shadowinfo(regs, buf, threadid)		do { } while (0)
++#define kgdb_get_shadow_thread(regs, threadid)		NULL
++#define kgdb_shadow_regs(regs, threadid)		NULL
++#endif
++
++#endif				/* __ASM_GENERIC_KGDB_H__ */
+diff -Nurb linux-2.6.22-570/include/asm-generic/vmlinux.lds.h linux-2.6.22-try2/include/asm-generic/vmlinux.lds.h
+--- linux-2.6.22-570/include/asm-generic/vmlinux.lds.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-generic/vmlinux.lds.h	2007-12-19 15:29:23.000000000 -0500
+@@ -127,6 +127,8 @@
+ 		*(__ksymtab_strings)					\
+ 	}								\
+ 									\
++	EH_FRAME							\
++									\
+ 	/* Built-in module parameters. */				\
+ 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
+ 		VMLINUX_SYMBOL(__start___param) = .;			\
+@@ -177,6 +179,26 @@
+ 		*(.kprobes.text)					\
+ 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
+ 
++#ifdef CONFIG_STACK_UNWIND
++#define EH_FRAME							\
++		/* Unwind data binary search table */			\
++		. = ALIGN(8);						\
++        	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
++			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
++			*(.eh_frame_hdr)				\
++			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
++		}							\
++		/* Unwind data */					\
++		. = ALIGN(8);						\
++		.eh_frame : AT(ADDR(.eh_frame) - LOAD_OFFSET) {		\
++			VMLINUX_SYMBOL(__start_unwind) = .;		\
++		  	*(.eh_frame)					\
++			VMLINUX_SYMBOL(__end_unwind) = .;		\
++		}
++#else
++#define EH_FRAME
++#endif
++
+ 		/* DWARF debug sections.
+ 		Symbols in the DWARF debugging sections are relative to
+ 		the beginning of the section so we begin them at 0.  */
+diff -Nurb linux-2.6.22-570/include/asm-h8300/page.h linux-2.6.22-try2/include/asm-h8300/page.h
+--- linux-2.6.22-570/include/asm-h8300/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-h8300/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -22,7 +22,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-i386/kdebug.h linux-2.6.22-try2/include/asm-i386/kdebug.h
+--- linux-2.6.22-570/include/asm-i386/kdebug.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-i386/kdebug.h	2007-12-19 15:29:24.000000000 -0500
+@@ -28,6 +28,7 @@
+ 	DIE_CALL,
+ 	DIE_NMI_IPI,
+ 	DIE_PAGE_FAULT,
++	DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-i386/kgdb.h linux-2.6.22-try2/include/asm-i386/kgdb.h
+--- linux-2.6.22-570/include/asm-i386/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-i386/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,51 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++#include <asm-generic/kgdb.h>
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++/************************************************************************/
++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
++/* at least NUMREGBYTES*2 are needed for register packets */
++/* Longer buffer is needed to list all threads */
++#define BUFMAX			1024
++
++/* Number of bytes of registers.  */
++#define NUMREGBYTES		64
++/* Number of bytes of registers we need to save for a setjmp/longjmp. */
++#define NUMCRITREGBYTES		24
++
++/*
++ *  Note that this register image is in a different order than
++ *  the register image that Linux produces at interrupt time.
++ *
++ *  Linux's register image is defined by struct pt_regs in ptrace.h.
++ *  Just why GDB uses a different order is a historical mystery.
++ */
++enum regnames { _EAX,		/* 0 */
++	_ECX,			/* 1 */
++	_EDX,			/* 2 */
++	_EBX,			/* 3 */
++	_ESP,			/* 4 */
++	_EBP,			/* 5 */
++	_ESI,			/* 6 */
++	_EDI,			/* 7 */
++	_PC,			/* 8 also known as eip */
++	_PS,			/* 9 also known as eflags */
++	_CS,			/* 10 */
++	_SS,			/* 11 */
++	_DS,			/* 12 */
++	_ES,			/* 13 */
++	_FS,			/* 14 */
++	_GS			/* 15 */
++};
++
++#define BREAKPOINT()		asm("   int $3");
++#define BREAK_INSTR_SIZE	1
++#define CACHE_FLUSH_IS_SAFE	1
++#endif				/* _ASM_KGDB_H_ */
++#endif				/* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-i386/page.h linux-2.6.22-try2/include/asm-i386/page.h
+--- linux-2.6.22-570/include/asm-i386/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-i386/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -34,7 +34,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-i386/unistd.h linux-2.6.22-try2/include/asm-i386/unistd.h
+--- linux-2.6.22-570/include/asm-i386/unistd.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-i386/unistd.h	2007-12-19 15:29:24.000000000 -0500
+@@ -329,10 +329,13 @@
+ #define __NR_signalfd		321
+ #define __NR_timerfd		322
+ #define __NR_eventfd		323
++#define __NR_revokeat		324
++#define __NR_frevoke		325
++#define __NR_fallocate		326
+ 
+ #ifdef __KERNEL__
+ 
+-#define NR_syscalls 324
++#define NR_syscalls 327
+ 
+ #define __ARCH_WANT_IPC_PARSE_VERSION
+ #define __ARCH_WANT_OLD_READDIR
+diff -Nurb linux-2.6.22-570/include/asm-i386/unwind.h linux-2.6.22-try2/include/asm-i386/unwind.h
+--- linux-2.6.22-570/include/asm-i386/unwind.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-i386/unwind.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,6 +1,95 @@
+ #ifndef _ASM_I386_UNWIND_H
+ #define _ASM_I386_UNWIND_H
+ 
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/fixmap.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++
++struct unwind_frame_info
++{
++	struct pt_regs regs;
++	struct task_struct *task;
++	unsigned call_frame:1;
++};
++
++#define UNW_PC(frame)        (frame)->regs.eip
++#define UNW_SP(frame)        (frame)->regs.esp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.ebp
++#define FRAME_RETADDR_OFFSET 4
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    STACK_LIMIT((tsk)->thread.esp0)
++#define STACK_TOP(tsk)       ((tsk)->thread.esp0)
++#else
++#define UNW_FP(frame) ((void)(frame), 0)
++#endif
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++	PTREGS_INFO(eax), \
++	PTREGS_INFO(ecx), \
++	PTREGS_INFO(edx), \
++	PTREGS_INFO(ebx), \
++	PTREGS_INFO(esp), \
++	PTREGS_INFO(ebp), \
++	PTREGS_INFO(esi), \
++	PTREGS_INFO(edi), \
++	PTREGS_INFO(eip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++	((raItem).where == Memory && \
++	 !((raItem).value * (dataAlign) + 4))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++                                            /*const*/ struct pt_regs *regs)
++{
++	if (user_mode_vm(regs))
++		info->regs = *regs;
++	else {
++		memcpy(&info->regs, regs, offsetof(struct pt_regs, esp));
++		info->regs.esp = (unsigned long)&regs->esp;
++		info->regs.xss = __KERNEL_DS;
++	}
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++	memset(&info->regs, 0, sizeof(info->regs));
++	info->regs.eip = info->task->thread.eip;
++	info->regs.xcs = __KERNEL_CS;
++	__get_user(info->regs.ebp, (long *)info->task->thread.esp);
++	info->regs.esp = info->task->thread.esp;
++	info->regs.xss = __KERNEL_DS;
++	info->regs.xds = __USER_DS;
++	info->regs.xes = __USER_DS;
++	info->regs.xfs = __KERNEL_PERCPU;
++}
++
++extern asmlinkage int arch_unwind_init_running(struct unwind_frame_info *,
++                                               asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                                          void *arg),
++                                               void *arg);
++
++static inline int arch_unw_user_mode(/*const*/ struct unwind_frame_info *info)
++{
++	return user_mode_vm(&info->regs)
++	       || info->regs.eip < PAGE_OFFSET
++	       || (info->regs.eip >= __fix_to_virt(FIX_VDSO)
++	           && info->regs.eip < __fix_to_virt(FIX_VDSO) + PAGE_SIZE)
++	       || info->regs.esp < PAGE_OFFSET;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0)
+ #define UNW_SP(frame) ((void)(frame), 0)
+ #define UNW_FP(frame) ((void)(frame), 0)
+@@ -10,4 +99,6 @@
+ 	return 0;
+ }
+ 
++#endif
++
+ #endif /* _ASM_I386_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/asm-ia64/kdebug.h linux-2.6.22-try2/include/asm-ia64/kdebug.h
+--- linux-2.6.22-570/include/asm-ia64/kdebug.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ia64/kdebug.h	2007-12-19 15:29:24.000000000 -0500
+@@ -69,6 +69,7 @@
+ 	DIE_KDEBUG_LEAVE,
+ 	DIE_KDUMP_ENTER,
+ 	DIE_KDUMP_LEAVE,
++	DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-ia64/kgdb.h linux-2.6.22-try2/include/asm-ia64/kgdb.h
+--- linux-2.6.22-570/include/asm-ia64/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-ia64/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,37 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++#include <linux/threads.h>
++#include <asm-generic/kgdb.h>
++
++/************************************************************************/
++/* BUFMAX defines the maximum number of characters in inbound/outbound buffers*/
++/* at least NUMREGBYTES*2 are needed for register packets */
++/* Longer buffer is needed to list all threads */
++#define BUFMAX			1024
++
++/* Number of bytes of registers.  We set this to 0 so that certain GDB
++ * packets will fail, forcing the use of others, which are more friendly
++ * on ia64. */
++#define NUMREGBYTES		0
++
++#define NUMCRITREGBYTES		(70*8)
++#define JMP_REGS_ALIGNMENT	__attribute__ ((aligned (16)))
++
++#define BREAKNUM		0x00003333300LL
++#define KGDBBREAKNUM		0x6665UL
++#define BREAKPOINT()		asm volatile ("break.m 0x6665")
++#define BREAK_INSTR_SIZE	16
++#define CACHE_FLUSH_IS_SAFE	1
++
++struct pt_regs;
++extern volatile int kgdb_hwbreak_sstep[NR_CPUS];
++extern void smp_send_nmi_allbutself(void);
++extern void kgdb_wait_ipi(struct pt_regs *);
++#endif				/* _ASM_KGDB_H_ */
++#endif				/* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-ia64/page.h linux-2.6.22-try2/include/asm-ia64/page.h
+--- linux-2.6.22-570/include/asm-ia64/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ia64/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -87,9 +87,10 @@
+ } while (0)
+ 
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) \
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr)		\
+ ({						\
+-	struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \
++	struct page *page = alloc_page_vma(				\
++		GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr);	\
+ 	if (page)				\
+  		flush_dcache_page(page);	\
+ 	page;					\
+diff -Nurb linux-2.6.22-570/include/asm-ia64/processor.h linux-2.6.22-try2/include/asm-ia64/processor.h
+--- linux-2.6.22-570/include/asm-ia64/processor.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ia64/processor.h	2007-12-19 15:29:24.000000000 -0500
+@@ -295,9 +295,9 @@
+ 	regs->ar_bspstore = current->thread.rbs_bot;						\
+ 	regs->ar_fpsr = FPSR_DEFAULT;								\
+ 	regs->loadrs = 0;									\
+-	regs->r8 = current->mm->dumpable;	/* set "don't zap registers" flag */		\
++	regs->r8 = get_dumpable(current->mm);	/* set "don't zap registers" flag */		\
+ 	regs->r12 = new_sp - 16;	/* allocate 16 byte scratch area */			\
+-	if (unlikely(!current->mm->dumpable)) {							\
++	if (unlikely(!get_dumpable(current->mm))) {							\
+ 		/*										\
+ 		 * Zap scratch regs to avoid leaking bits between processes with different	\
+ 		 * uid/privileges.								\
+diff -Nurb linux-2.6.22-570/include/asm-m32r/page.h linux-2.6.22-try2/include/asm-m32r/page.h
+--- linux-2.6.22-570/include/asm-m32r/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-m32r/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -15,7 +15,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-m68knommu/page.h linux-2.6.22-try2/include/asm-m68knommu/page.h
+--- linux-2.6.22-570/include/asm-m68knommu/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-m68knommu/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -22,7 +22,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-32.h linux-2.6.22-try2/include/asm-mips/asmmacro-32.h
+--- linux-2.6.22-570/include/asm-mips/asmmacro-32.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-mips/asmmacro-32.h	2007-12-19 15:29:24.000000000 -0500
+@@ -11,6 +11,28 @@
+ #include <asm/regdef.h>
+ #include <asm/fpregdef.h>
+ #include <asm/mipsregs.h>
++#include <asm/gdb-stub.h>
++
++	.macro	fpu_save_double_kgdb stack status tmp1=t0
++	cfc1	\tmp1,  fcr31
++	sdc1	$f0, GDB_FR_FPR0(\stack)
++	sdc1	$f2, GDB_FR_FPR2(\stack)
++	sdc1	$f4, GDB_FR_FPR4(\stack)
++	sdc1	$f6, GDB_FR_FPR6(\stack)
++	sdc1	$f8, GDB_FR_FPR8(\stack)
++	sdc1	$f10, GDB_FR_FPR10(\stack)
++	sdc1	$f12, GDB_FR_FPR12(\stack)
++	sdc1	$f14, GDB_FR_FPR14(\stack)
++	sdc1	$f16, GDB_FR_FPR16(\stack)
++	sdc1	$f18, GDB_FR_FPR18(\stack)
++	sdc1	$f20, GDB_FR_FPR20(\stack)
++	sdc1	$f22, GDB_FR_FPR22(\stack)
++	sdc1	$f24, GDB_FR_FPR24(\stack)
++	sdc1	$f26, GDB_FR_FPR26(\stack)
++	sdc1	$f28, GDB_FR_FPR28(\stack)
++	sdc1	$f30, GDB_FR_FPR30(\stack)
++	sw	\tmp1, GDB_FR_FSR(\stack)
++	.endm
+ 
+ 	.macro	fpu_save_double thread status tmp1=t0
+ 	cfc1	\tmp1,  fcr31
+@@ -91,6 +113,27 @@
+ 	ctc1	\tmp, fcr31
+ 	.endm
+ 
++	.macro	fpu_restore_double_kgdb stack status tmp=t0
++	lw	\tmp, GDB_FR_FSR(\stack)
++	ldc1	$f0,  GDB_FR_FPR0(\stack)
++	ldc1	$f2,  GDB_FR_FPR2(\stack)
++	ldc1	$f4,  GDB_FR_FPR4(\stack)
++	ldc1	$f6,  GDB_FR_FPR6(\stack)
++	ldc1	$f8,  GDB_FR_FPR8(\stack)
++	ldc1	$f10, GDB_FR_FPR10(\stack)
++	ldc1	$f12, GDB_FR_FPR12(\stack)
++	ldc1	$f14, GDB_FR_FPR14(\stack)
++	ldc1	$f16, GDB_FR_FPR16(\stack)
++	ldc1	$f18, GDB_FR_FPR18(\stack)
++	ldc1	$f20, GDB_FR_FPR20(\stack)
++	ldc1	$f22, GDB_FR_FPR22(\stack)
++	ldc1	$f24, GDB_FR_FPR24(\stack)
++	ldc1	$f26, GDB_FR_FPR26(\stack)
++	ldc1	$f28, GDB_FR_FPR28(\stack)
++	ldc1	$f30, GDB_FR_FPR30(\stack)
++	ctc1	\tmp, fcr31
++	.endm
++
+ 	.macro	fpu_restore_single thread tmp=t0
+ 	lw	\tmp, THREAD_FCR31(\thread)
+ 	lwc1	$f0,  THREAD_FPR0(\thread)
+diff -Nurb linux-2.6.22-570/include/asm-mips/asmmacro-64.h linux-2.6.22-try2/include/asm-mips/asmmacro-64.h
+--- linux-2.6.22-570/include/asm-mips/asmmacro-64.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-mips/asmmacro-64.h	2007-12-19 15:29:24.000000000 -0500
+@@ -12,6 +12,7 @@
+ #include <asm/regdef.h>
+ #include <asm/fpregdef.h>
+ #include <asm/mipsregs.h>
++#include <asm/gdb-stub.h>
+ 
+ 	.macro	fpu_save_16even thread tmp=t0
+ 	cfc1	\tmp, fcr31
+@@ -53,6 +54,46 @@
+ 	sdc1	$f31, THREAD_FPR31(\thread)
+ 	.endm
+ 
++	.macro	fpu_save_16odd_kgdb stack
++	sdc1	$f1, GDB_FR_FPR1(\stack)
++	sdc1	$f3, GDB_FR_FPR3(\stack)
++	sdc1	$f5, GDB_FR_FPR5(\stack)
++	sdc1	$f7, GDB_FR_FPR7(\stack)
++	sdc1	$f9, GDB_FR_FPR9(\stack)
++	sdc1	$f11, GDB_FR_FPR11(\stack)
++	sdc1	$f13, GDB_FR_FPR13(\stack)
++	sdc1	$f15, GDB_FR_FPR15(\stack)
++	sdc1	$f17, GDB_FR_FPR17(\stack)
++	sdc1	$f19, GDB_FR_FPR19(\stack)
++	sdc1	$f21, GDB_FR_FPR21(\stack)
++	sdc1	$f23, GDB_FR_FPR23(\stack)
++	sdc1	$f25, GDB_FR_FPR25(\stack)
++	sdc1	$f27, GDB_FR_FPR27(\stack)
++	sdc1	$f29, GDB_FR_FPR29(\stack)
++	sdc1	$f31, GDB_FR_FPR31(\stack)
++	.endm
++
++	.macro	fpu_save_16even_kgdb stack tmp=t0
++	cfc1	\tmp,  fcr31
++	sdc1	$f0, GDB_FR_FPR0(\stack)
++	sdc1	$f2, GDB_FR_FPR2(\stack)
++	sdc1	$f4, GDB_FR_FPR4(\stack)
++	sdc1	$f6, GDB_FR_FPR6(\stack)
++	sdc1	$f8, GDB_FR_FPR8(\stack)
++	sdc1	$f10, GDB_FR_FPR10(\stack)
++	sdc1	$f12, GDB_FR_FPR12(\stack)
++	sdc1	$f14, GDB_FR_FPR14(\stack)
++	sdc1	$f16, GDB_FR_FPR16(\stack)
++	sdc1	$f18, GDB_FR_FPR18(\stack)
++	sdc1	$f20, GDB_FR_FPR20(\stack)
++	sdc1	$f22, GDB_FR_FPR22(\stack)
++	sdc1	$f24, GDB_FR_FPR24(\stack)
++	sdc1	$f26, GDB_FR_FPR26(\stack)
++	sdc1	$f28, GDB_FR_FPR28(\stack)
++	sdc1	$f30, GDB_FR_FPR30(\stack)
++	sw	\tmp, GDB_FR_FSR(\stack)
++	.endm
++
+ 	.macro	fpu_save_double thread status tmp
+ 	sll	\tmp, \status, 5
+ 	bgez	\tmp, 2f
+@@ -61,6 +102,15 @@
+ 	fpu_save_16even \thread \tmp
+ 	.endm
+ 
++	.macro	fpu_save_double_kgdb stack status tmp
++	sll	\tmp, \status, 5
++	bgez	\tmp, 2f
++	nop
++	fpu_save_16odd_kgdb \stack
++2:
++	fpu_save_16even_kgdb \stack \tmp
++	.endm
++
+ 	.macro	fpu_restore_16even thread tmp=t0
+ 	lw	\tmp, THREAD_FCR31(\thread)
+ 	ldc1	$f0,  THREAD_FPR0(\thread)
+@@ -101,6 +151,46 @@
+ 	ldc1	$f31, THREAD_FPR31(\thread)
+ 	.endm
+ 
++	.macro	fpu_restore_16even_kgdb stack tmp=t0
++	lw	\tmp, GDB_FR_FSR(\stack)
++	ldc1	$f0,  GDB_FR_FPR0(\stack)
++	ldc1	$f2,  GDB_FR_FPR2(\stack)
++	ldc1	$f4,  GDB_FR_FPR4(\stack)
++	ldc1	$f6,  GDB_FR_FPR6(\stack)
++	ldc1	$f8,  GDB_FR_FPR8(\stack)
++	ldc1	$f10, GDB_FR_FPR10(\stack)
++	ldc1	$f12, GDB_FR_FPR12(\stack)
++	ldc1	$f14, GDB_FR_FPR14(\stack)
++	ldc1	$f16, GDB_FR_FPR16(\stack)
++	ldc1	$f18, GDB_FR_FPR18(\stack)
++	ldc1	$f20, GDB_FR_FPR20(\stack)
++	ldc1	$f22, GDB_FR_FPR22(\stack)
++	ldc1	$f24, GDB_FR_FPR24(\stack)
++	ldc1	$f26, GDB_FR_FPR26(\stack)
++	ldc1	$f28, GDB_FR_FPR28(\stack)
++	ldc1	$f30, GDB_FR_FPR30(\stack)
++	ctc1	\tmp, fcr31
++	.endm
++
++	.macro	fpu_restore_16odd_kgdb stack
++	ldc1	$f1,  GDB_FR_FPR1(\stack)
++	ldc1	$f3,  GDB_FR_FPR3(\stack)
++	ldc1	$f5,  GDB_FR_FPR5(\stack)
++	ldc1	$f7,  GDB_FR_FPR7(\stack)
++	ldc1	$f9,  GDB_FR_FPR9(\stack)
++	ldc1	$f11, GDB_FR_FPR11(\stack)
++	ldc1	$f13, GDB_FR_FPR13(\stack)
++	ldc1	$f15, GDB_FR_FPR15(\stack)
++	ldc1	$f17, GDB_FR_FPR17(\stack)
++	ldc1	$f19, GDB_FR_FPR19(\stack)
++	ldc1	$f21, GDB_FR_FPR21(\stack)
++	ldc1	$f23, GDB_FR_FPR23(\stack)
++	ldc1	$f25, GDB_FR_FPR25(\stack)
++	ldc1	$f27, GDB_FR_FPR27(\stack)
++	ldc1	$f29, GDB_FR_FPR29(\stack)
++	ldc1	$f31, GDB_FR_FPR31(\stack)
++	.endm
++
+ 	.macro	fpu_restore_double thread status tmp
+ 	sll	\tmp, \status, 5
+ 	bgez	\tmp, 1f				# 16 register mode?
+@@ -109,6 +199,15 @@
+ 1:	fpu_restore_16even \thread \tmp
+ 	.endm
+ 
++	.macro	fpu_restore_double_kgdb stack status tmp
++	sll	\tmp, \status, 5
++	bgez	\tmp, 1f				# 16 register mode?
++	nop
++
++	fpu_restore_16odd_kgdb \stack
++1:	fpu_restore_16even_kgdb \stack \tmp
++	.endm
++
+ 	.macro	cpu_save_nonscratch thread
+ 	LONG_S	s0, THREAD_REG16(\thread)
+ 	LONG_S	s1, THREAD_REG17(\thread)
+diff -Nurb linux-2.6.22-570/include/asm-mips/kdebug.h linux-2.6.22-try2/include/asm-mips/kdebug.h
+--- linux-2.6.22-570/include/asm-mips/kdebug.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-mips/kdebug.h	2007-12-19 15:29:24.000000000 -0500
+@@ -1 +1,30 @@
+-#include <asm-generic/kdebug.h>
++/*
++ *
++ * Copyright (C) 2004  MontaVista Software Inc.
++ * Author: Manish Lachwani, mlachwani@mvista.com or manish@koffee-break.com
++ *
++ * This program is free software; you can redistribute  it and/or modify it
++ * under  the terms of  the GNU General  Public License as published by the
++ * Free Software Foundation;  either version 2 of the  License, or (at your
++ * option) any later version.
++ *
++ */
++#ifndef _MIPS_KDEBUG_H
++#define _MIPS_KDEBUG_H
++
++#include <linux/notifier.h>
++
++struct pt_regs;
++
++extern struct atomic_notifier_head mips_die_head;
++
++enum die_val {
++	DIE_OOPS = 1,
++	DIE_PANIC,
++	DIE_DIE,
++	DIE_KERNELDEBUG,
++	DIE_TRAP,
++	DIE_PAGE_FAULT,
++};
++
++#endif /* _MIPS_KDEBUG_H */
+diff -Nurb linux-2.6.22-570/include/asm-mips/kgdb.h linux-2.6.22-try2/include/asm-mips/kgdb.h
+--- linux-2.6.22-570/include/asm-mips/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-mips/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,41 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++#include <asm/sgidefs.h>
++#include <asm-generic/kgdb.h>
++
++#ifndef __ASSEMBLY__
++#if (_MIPS_ISA == _MIPS_ISA_MIPS1) || (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS32)
++
++typedef u32 gdb_reg_t;
++
++#elif (_MIPS_ISA == _MIPS_ISA_MIPS3) || (_MIPS_ISA == _MIPS_ISA_MIPS4) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
++
++#ifdef CONFIG_32BIT
++typedef u32 gdb_reg_t;
++#else /* CONFIG_CPU_32BIT */
++typedef u64 gdb_reg_t;
++#endif
++#else
++#error "Need to set typedef for gdb_reg_t"
++#endif /* _MIPS_ISA */
++
++#define BUFMAX			2048
++#define NUMREGBYTES		(90*sizeof(gdb_reg_t))
++#define NUMCRITREGBYTES		(12*sizeof(gdb_reg_t))
++#define BREAK_INSTR_SIZE	4
++#define BREAKPOINT()		__asm__ __volatile__(		\
++					".globl breakinst\n\t"	\
++					".set\tnoreorder\n\t"	\
++					"nop\n"			\
++					"breakinst:\tbreak\n\t"	\
++					"nop\n\t"		\
++					".set\treorder")
++#define CACHE_FLUSH_IS_SAFE	0
++
++extern int kgdb_early_setup;
++
++#endif				/* !__ASSEMBLY__ */
++#endif				/* _ASM_KGDB_H_ */
++#endif				/* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-mips/ptrace.h linux-2.6.22-try2/include/asm-mips/ptrace.h
+--- linux-2.6.22-570/include/asm-mips/ptrace.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-mips/ptrace.h	2007-12-19 15:29:24.000000000 -0500
+@@ -28,7 +28,7 @@
+  * system call/exception. As usual the registers k0/k1 aren't being saved.
+  */
+ struct pt_regs {
+-#ifdef CONFIG_32BIT
++#if defined(CONFIG_32BIT) || defined(CONFIG_KGDB)
+ 	/* Pad bytes for argument save space on the stack. */
+ 	unsigned long pad0[6];
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/cputable.h linux-2.6.22-try2/include/asm-powerpc/cputable.h
+--- linux-2.6.22-570/include/asm-powerpc/cputable.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/cputable.h	2007-12-19 15:29:22.000000000 -0500
+@@ -111,7 +111,7 @@
+ /* CPU kernel features */
+ 
+ /* Retain the 32b definitions all use bottom half of word */
+-#define CPU_FTR_SPLIT_ID_CACHE		ASM_CONST(0x0000000000000001)
++#define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x0000000000000001)
+ #define CPU_FTR_L2CR			ASM_CONST(0x0000000000000002)
+ #define CPU_FTR_SPEC7450		ASM_CONST(0x0000000000000004)
+ #define CPU_FTR_ALTIVEC			ASM_CONST(0x0000000000000008)
+@@ -135,6 +135,7 @@
+ #define CPU_FTR_PPC_LE			ASM_CONST(0x0000000000200000)
+ #define CPU_FTR_REAL_LE			ASM_CONST(0x0000000000400000)
+ #define CPU_FTR_FPU_UNAVAILABLE		ASM_CONST(0x0000000000800000)
++#define CPU_FTR_UNIFIED_ID_CACHE	ASM_CONST(0x0000000001000000)
+ 
+ /*
+  * Add the 64-bit processor unique features in the top half of the word;
+@@ -154,7 +155,6 @@
+ #define CPU_FTR_MMCRA			LONG_ASM_CONST(0x0000004000000000)
+ #define CPU_FTR_CTRL			LONG_ASM_CONST(0x0000008000000000)
+ #define CPU_FTR_SMT			LONG_ASM_CONST(0x0000010000000000)
+-#define CPU_FTR_COHERENT_ICACHE		LONG_ASM_CONST(0x0000020000000000)
+ #define CPU_FTR_LOCKLESS_TLBIE		LONG_ASM_CONST(0x0000040000000000)
+ #define CPU_FTR_CI_LARGE_PAGE		LONG_ASM_CONST(0x0000100000000000)
+ #define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000200000000000)
+@@ -206,164 +206,163 @@
+ 		     !defined(CONFIG_POWER3) && !defined(CONFIG_POWER4) && \
+ 		     !defined(CONFIG_BOOKE))
+ 
+-#define CPU_FTRS_PPC601	(CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE)
+-#define CPU_FTRS_603	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_PPC601	(CPU_FTR_COMMON | CPU_FTR_601 | CPU_FTR_HPTE_TABLE | \
++	CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
++#define CPU_FTRS_603	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_604	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_604	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_604_PERF_MON | CPU_FTR_HPTE_TABLE | \
+ 	    CPU_FTR_PPC_LE)
+-#define CPU_FTRS_740_NOTAU	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_740_NOTAU	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_740	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_740	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750CL	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750CL	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX1	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX1	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_DUAL_PLL_750FX | CPU_FTR_NO_DPM | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX2	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX2	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_NO_DPM | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750FX	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750FX	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_750GX	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_750GX	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_HPTE_TABLE | CPU_FTR_MAYBE_CAN_NAP | \
+ 	    CPU_FTR_DUAL_PLL_750FX | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7400_NOTAU	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7400_NOTAU	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7400	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7400	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB | CPU_FTR_L2CR | \
+ 	    CPU_FTR_TAU | CPU_FTR_ALTIVEC_COMP | CPU_FTR_HPTE_TABLE | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_20	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_20	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_21	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_21	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7450_23	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7450_23	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455_1	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455_1	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | CPU_FTR_L3CR | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455_20	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455_20	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_L3_DISABLE_NAP | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_HAS_HIGH_BATS | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7455	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7455	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447_10	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447_10	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_NO_BTIC | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_L3CR | CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7447A	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7447A	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_NEED_COHERENT | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_7448	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_7448	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | \
+ 	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_L2CR | CPU_FTR_ALTIVEC_COMP | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_SPEC7450 | \
+ 	    CPU_FTR_NAP_DISABLE_L2_PR | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_PPC_LE)
+-#define CPU_FTRS_82XX	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_82XX	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_MAYBE_CAN_DOZE | CPU_FTR_USE_TB)
+-#define CPU_FTRS_G2_LE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_G2_LE	(CPU_FTR_MAYBE_CAN_DOZE | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS)
+-#define CPU_FTRS_E300	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_E300	(CPU_FTR_MAYBE_CAN_DOZE | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_COMMON)
+-#define CPU_FTRS_E300C2	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_MAYBE_CAN_DOZE | \
++#define CPU_FTRS_E300C2	(CPU_FTR_MAYBE_CAN_DOZE | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_HAS_HIGH_BATS | \
+ 	    CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
+-#define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE | \
++#define CPU_FTRS_CLASSIC32	(CPU_FTR_COMMON | \
+ 	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE)
+-#define CPU_FTRS_8XX	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB)
+-#define CPU_FTRS_40X	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+-	    CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_44X	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+-	    CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E200	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E500	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
+-	    CPU_FTR_NODSISRALIGN)
+-#define CPU_FTRS_E500_2	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_8XX	(CPU_FTR_USE_TB)
++#define CPU_FTRS_40X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_44X	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_E200	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | \
++	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_UNIFIED_ID_CACHE)
++#define CPU_FTRS_E500	(CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN)
++#define CPU_FTRS_E500_2	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN)
+ #define CPU_FTRS_GENERIC_32	(CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN)
+ 
+ /* 64-bit CPUs */
+-#define CPU_FTRS_POWER3	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER3	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | CPU_FTR_PPC_LE)
+-#define CPU_FTRS_RS64	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_RS64	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_IABR | \
+ 	    CPU_FTR_MMCRA | CPU_FTR_CTRL)
+-#define CPU_FTRS_POWER4	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER4	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ 	    CPU_FTR_MMCRA)
+-#define CPU_FTRS_PPC970	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_PPC970	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CAN_NAP | CPU_FTR_MMCRA)
+-#define CPU_FTRS_POWER5	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER5	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ 	    CPU_FTR_PURR)
+-#define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_POWER6 (CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
+ 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
+ 	    CPU_FTR_DSCR)
+-#define CPU_FTRS_CELL	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_CELL	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
+ 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
+ 	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG)
+-#define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_PA6T (CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
+ 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
+ 	    CPU_FTR_PURR | CPU_FTR_REAL_LE)
+-#define CPU_FTRS_COMPATIBLE	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
++#define CPU_FTRS_COMPATIBLE	(CPU_FTR_USE_TB | \
+ 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2)
+ 
+ #ifdef __powerpc64__
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/floppy.h linux-2.6.22-try2/include/asm-powerpc/floppy.h
+--- linux-2.6.22-570/include/asm-powerpc/floppy.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/floppy.h	2007-12-19 15:29:22.000000000 -0500
+@@ -29,7 +29,7 @@
+ #define fd_free_irq()           free_irq(FLOPPY_IRQ, NULL);
+ 
+ #include <linux/pci.h>
+-#include <asm/ppc-pci.h>	/* for ppc64_isabridge_dev */
++#include <asm/ppc-pci.h>	/* for isa_bridge_pcidev */
+ 
+ #define fd_dma_setup(addr,size,mode,io) fd_ops->_dma_setup(addr,size,mode,io)
+ 
+@@ -139,12 +139,12 @@
+ 	if (bus_addr 
+ 	    && (addr != prev_addr || size != prev_size || dir != prev_dir)) {
+ 		/* different from last time -- unmap prev */
+-		pci_unmap_single(ppc64_isabridge_dev, bus_addr, prev_size, prev_dir);
++		pci_unmap_single(isa_bridge_pcidev, bus_addr, prev_size, prev_dir);
+ 		bus_addr = 0;
+ 	}
+ 
+ 	if (!bus_addr)	/* need to map it */
+-		bus_addr = pci_map_single(ppc64_isabridge_dev, addr, size, dir);
++		bus_addr = pci_map_single(isa_bridge_pcidev, addr, size, dir);
+ 
+ 	/* remember this one as prev */
+ 	prev_addr = addr;
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/io.h linux-2.6.22-try2/include/asm-powerpc/io.h
+--- linux-2.6.22-570/include/asm-powerpc/io.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/io.h	2007-12-19 15:29:22.000000000 -0500
+@@ -607,9 +607,9 @@
+  *
+  * * iounmap undoes such a mapping and can be hooked
+  *
+- * * __ioremap_explicit (and the pending __iounmap_explicit) are low level
+- *   functions to create hand-made mappings for use only by the PCI code
+- *   and cannot currently be hooked.
++ * * __ioremap_at (and the pending __iounmap_at) are low level functions to
++ *   create hand-made mappings for use only by the PCI code and cannot
++ *   currently be hooked. Must be page aligned.
+  *
+  * * __ioremap is the low level implementation used by ioremap and
+  *   ioremap_flags and cannot be hooked (but can be used by a hook on one
+@@ -629,19 +629,9 @@
+ 			       unsigned long flags);
+ extern void __iounmap(volatile void __iomem *addr);
+ 
+-extern int __ioremap_explicit(phys_addr_t p_addr, unsigned long v_addr,
++extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
+ 		     	      unsigned long size, unsigned long flags);
+-extern int __iounmap_explicit(volatile void __iomem *start,
+-			      unsigned long size);
+-
+-extern void __iomem * reserve_phb_iospace(unsigned long size);
+-
+-/* Those are more 32 bits only functions */
+-extern unsigned long iopa(unsigned long addr);
+-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__;
+-extern void io_block_mapping(unsigned long virt, phys_addr_t phys,
+-			     unsigned int size, int flags);
+-
++extern void __iounmap_at(void *ea, unsigned long size);
+ 
+ /*
+  * When CONFIG_PPC_INDIRECT_IO is set, we use the generic iomap implementation
+@@ -651,8 +641,8 @@
+  */
+ #define HAVE_ARCH_PIO_SIZE		1
+ #define PIO_OFFSET			0x00000000UL
+-#define PIO_MASK			0x3fffffffUL
+-#define PIO_RESERVED			0x40000000UL
++#define PIO_MASK			(FULL_IO_SIZE - 1)
++#define PIO_RESERVED			(FULL_IO_SIZE)
+ 
+ #define mmio_read16be(addr)		readw_be(addr)
+ #define mmio_read32be(addr)		readl_be(addr)
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/kgdb.h linux-2.6.22-try2/include/asm-powerpc/kgdb.h
+--- linux-2.6.22-570/include/asm-powerpc/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-powerpc/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,75 @@
++/*
++ * include/asm-powerpc/kgdb.h
++ *
++ * The PowerPC (32/64) specific defines / externs for KGDB.  Based on
++ * the previous 32bit and 64bit specific files, which had the following
++ * copyrights:
++ *
++ * PPC64 Mods (C) 2005 Frank Rowand (frowand@mvista.com)
++ * PPC Mods (C) 2004 Tom Rini (trini@mvista.com)
++ * PPC Mods (C) 2003 John Whitney (john.whitney@timesys.com)
++ * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu)
++ *
++ *
++ * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
++ * Author: Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2006 (c) MontaVista Software, Inc. This file is licensed under
++ * the terms of the GNU General Public License version 2. This program
++ * is licensed "as is" without any warranty of any kind, whether express
++ * or implied.
++ */
++#ifdef __KERNEL__
++#ifndef __POWERPC_KGDB_H__
++#define __POWERPC_KGDB_H__
++
++#include <asm-generic/kgdb.h>
++
++#ifndef __ASSEMBLY__
++
++#define BREAK_INSTR_SIZE	4
++#define BUFMAX			((NUMREGBYTES * 2) + 512)
++#define OUTBUFMAX		((NUMREGBYTES * 2) + 512)
++#define BREAKPOINT()		asm(".long 0x7d821008"); /* twge r2, r2 */
++#define CACHE_FLUSH_IS_SAFE	1
++
++/* The number bytes of registers we have to save depends on a few
++ * things.  For 64bit we default to not including vector registers and
++ * vector state registers. */
++#ifdef CONFIG_PPC64
++/*
++ * 64 bit (8 byte) registers:
++ *   32 gpr, 32 fpr, nip, msr, link, ctr
++ * 32 bit (4 byte) registers:
++ *   ccr, xer, fpscr
++ */
++#define NUMREGBYTES		((68 * 8) + (3 * 4))
++#if 0
++/* The following adds in vector registers and vector state registers. */
++/* 128 bit (16 byte) registers:
++ *   32 vr
++ * 64 bit (8 byte) registers:
++ *   32 gpr, 32 fpr, nip, msr, link, ctr
++ * 32 bit (4 byte) registers:
++ *   ccr, xer, fpscr, vscr, vrsave
++ */
++#define NUMREGBYTES		((128 * 16) + (68 * 8) + (5 * 4))
++#endif
++#define NUMCRITREGBYTES		184
++#else /* CONFIG_PPC32 */
++/* On non-E500 family PPC32 we determine the size by picking the last
++ * register we need, but on E500 we skip sections so we list what we
++ * need to store, and add it up. */
++#ifndef CONFIG_E500
++#define MAXREG			(PT_FPSCR+1)
++#else
++/* 32 GPRs (8 bytes), nip, msr, ccr, link, ctr, xer, acc (8 bytes), spefscr*/
++#define MAXREG                 ((32*2)+6+2+1)
++#endif
++#define NUMREGBYTES		(MAXREG * sizeof(int))
++/* CR/LR, R1, R2, R13-R31 inclusive. */
++#define NUMCRITREGBYTES		(23 * sizeof(int))
++#endif /* 32/64 */
++#endif /* !(__ASSEMBLY__) */
++#endif /* !__POWERPC_KGDB_H__ */
++#endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/lppaca.h linux-2.6.22-try2/include/asm-powerpc/lppaca.h
+--- linux-2.6.22-570/include/asm-powerpc/lppaca.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/lppaca.h	2007-12-19 15:29:22.000000000 -0500
+@@ -98,7 +98,7 @@
+ 	u64	saved_gpr5;		// Saved GPR5                   x30-x37
+ 
+ 	u8	reserved4;		// Reserved			x38-x38
+-	u8	cpuctls_task_attrs;	// Task attributes for cpuctls  x39-x39
++	u8	donate_dedicated_cpu;	// Donate dedicated CPU cycles  x39-x39
+ 	u8	fpregs_in_use;		// FP regs in use               x3A-x3A
+ 	u8	pmcregs_in_use;		// PMC regs in use              x3B-x3B
+ 	volatile u32 saved_decr;	// Saved Decr Value             x3C-x3F
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h linux-2.6.22-try2/include/asm-powerpc/mmu-hash32.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu-hash32.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-powerpc/mmu-hash32.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,91 @@
++#ifndef _ASM_POWERPC_MMU_HASH32_H_
++#define _ASM_POWERPC_MMU_HASH32_H_
++/*
++ * 32-bit hash table MMU support
++ */
++
++/*
++ * BATs
++ */
++
++/* Block size masks */
++#define BL_128K	0x000
++#define BL_256K 0x001
++#define BL_512K 0x003
++#define BL_1M   0x007
++#define BL_2M   0x00F
++#define BL_4M   0x01F
++#define BL_8M   0x03F
++#define BL_16M  0x07F
++#define BL_32M  0x0FF
++#define BL_64M  0x1FF
++#define BL_128M 0x3FF
++#define BL_256M 0x7FF
++
++/* BAT Access Protection */
++#define BPP_XX	0x00		/* No access */
++#define BPP_RX	0x01		/* Read only */
++#define BPP_RW	0x02		/* Read/write */
++
++#ifndef __ASSEMBLY__
++struct ppc_bat {
++	struct {
++		unsigned long bepi:15;	/* Effective page index (virtual address) */
++		unsigned long :4;	/* Unused */
++		unsigned long bl:11;	/* Block size mask */
++		unsigned long vs:1;	/* Supervisor valid */
++		unsigned long vp:1;	/* User valid */
++	} batu; 		/* Upper register */
++	struct {
++		unsigned long brpn:15;	/* Real page index (physical address) */
++		unsigned long :10;	/* Unused */
++		unsigned long w:1;	/* Write-thru cache */
++		unsigned long i:1;	/* Cache inhibit */
++		unsigned long m:1;	/* Memory coherence */
++		unsigned long g:1;	/* Guarded (MBZ in IBAT) */
++		unsigned long :1;	/* Unused */
++		unsigned long pp:2;	/* Page access protections */
++	} batl;			/* Lower register */
++};
++#endif /* !__ASSEMBLY__ */
++
++/*
++ * Hash table
++ */
++
++/* Values for PP (assumes Ks=0, Kp=1) */
++#define PP_RWXX	0	/* Supervisor read/write, User none */
++#define PP_RWRX 1	/* Supervisor read/write, User read */
++#define PP_RWRW 2	/* Supervisor read/write, User read/write */
++#define PP_RXRX 3	/* Supervisor read,       User read */
++
++#ifndef __ASSEMBLY__
++
++/* Hardware Page Table Entry */
++struct hash_pte {
++	unsigned long v:1;	/* Entry is valid */
++	unsigned long vsid:24;	/* Virtual segment identifier */
++	unsigned long h:1;	/* Hash algorithm indicator */
++	unsigned long api:6;	/* Abbreviated page index */
++	unsigned long rpn:20;	/* Real (physical) page number */
++	unsigned long    :3;	/* Unused */
++	unsigned long r:1;	/* Referenced */
++	unsigned long c:1;	/* Changed */
++	unsigned long w:1;	/* Write-thru cache mode */
++	unsigned long i:1;	/* Cache inhibited */
++	unsigned long m:1;	/* Memory coherence */
++	unsigned long g:1;	/* Guarded */
++	unsigned long  :1;	/* Unused */
++	unsigned long pp:2;	/* Page protection */
++};
++
++typedef struct {
++	unsigned long id;
++	unsigned long vdso_base;
++} mm_context_t;
++
++typedef unsigned long phys_addr_t;
++
++#endif /* !__ASSEMBLY__ */
++
++#endif /* _ASM_POWERPC_MMU_HASH32_H_ */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h linux-2.6.22-try2/include/asm-powerpc/mmu-hash64.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu-hash64.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/mmu-hash64.h	2007-12-19 15:29:22.000000000 -0500
+@@ -103,12 +103,12 @@
+ 
+ #ifndef __ASSEMBLY__
+ 
+-typedef struct {
++struct hash_pte {
+ 	unsigned long v;
+ 	unsigned long r;
+-} hpte_t;
++};
+ 
+-extern hpte_t *htab_address;
++extern struct hash_pte *htab_address;
+ extern unsigned long htab_size_bytes;
+ extern unsigned long htab_hash_mask;
+ 
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/mmu.h linux-2.6.22-try2/include/asm-powerpc/mmu.h
+--- linux-2.6.22-570/include/asm-powerpc/mmu.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/mmu.h	2007-12-19 15:29:22.000000000 -0500
+@@ -5,6 +5,9 @@
+ #ifdef CONFIG_PPC64
+ /* 64-bit classic hash table MMU */
+ #  include <asm/mmu-hash64.h>
++#elif defined(CONFIG_PPC_STD_MMU)
++/* 32-bit classic hash table MMU */
++#  include <asm/mmu-hash32.h>
+ #elif defined(CONFIG_44x)
+ /* 44x-style software loaded TLB */
+ #  include <asm/mmu-44x.h>
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci-bridge.h linux-2.6.22-try2/include/asm-powerpc/pci-bridge.h
+--- linux-2.6.22-570/include/asm-powerpc/pci-bridge.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/pci-bridge.h	2007-12-19 15:29:22.000000000 -0500
+@@ -31,6 +31,7 @@
+ 	int last_busno;
+ 
+ 	void __iomem *io_base_virt;
++	void *io_base_alloc;
+ 	resource_size_t io_base_phys;
+ 
+ 	/* Some machines have a non 1:1 mapping of
+@@ -70,19 +71,22 @@
+ 	int	devfn;			/* pci device and function number */
+ 	int	class_code;		/* pci device class */
+ 
+-#ifdef CONFIG_PPC_PSERIES
++	struct  pci_controller *phb;	/* for pci devices */
++	struct	iommu_table *iommu_table;	/* for phb's or bridges */
++	struct	pci_dev *pcidev;	/* back-pointer to the pci device */
++	struct	device_node *node;	/* back-pointer to the device_node */
++
++	int	pci_ext_config_space;	/* for pci devices */
++
++#ifdef CONFIG_EEH
+ 	int	eeh_mode;		/* See eeh.h for possible EEH_MODEs */
+ 	int	eeh_config_addr;
+ 	int	eeh_pe_config_addr; /* new-style partition endpoint address */
+ 	int 	eeh_check_count;	/* # times driver ignored error */
+ 	int 	eeh_freeze_count;	/* # times this device froze up. */
+-#endif
+-	int	pci_ext_config_space;	/* for pci devices */
+-	struct  pci_controller *phb;	/* for pci devices */
+-	struct	iommu_table *iommu_table;	/* for phb's or bridges */
+-	struct	pci_dev *pcidev;	/* back-pointer to the pci device */
+-	struct	device_node *node;	/* back-pointer to the device_node */
++	int 	eeh_false_positives;	/* # times this device reported #ff's */
+ 	u32	config_space[16];	/* saved PCI config space */
++#endif
+ };
+ 
+ /* Get the pointer to a device_node's pci_dn */
+@@ -164,6 +168,11 @@
+ }
+ #endif
+ 
++extern void isa_bridge_find_early(struct pci_controller *hose);
++
++extern int pcibios_unmap_io_space(struct pci_bus *bus);
++extern int pcibios_map_io_space(struct pci_bus *bus);
++
+ /* Return values for ppc_md.pci_probe_mode function */
+ #define PCI_PROBE_NONE		-1	/* Don't look at this bus at all */
+ #define PCI_PROBE_NORMAL	0	/* Do normal PCI probing */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pci.h linux-2.6.22-try2/include/asm-powerpc/pci.h
+--- linux-2.6.22-570/include/asm-powerpc/pci.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/pci.h	2007-12-19 15:29:22.000000000 -0500
+@@ -220,10 +220,6 @@
+ 	return root;
+ }
+ 
+-extern int unmap_bus_range(struct pci_bus *bus);
+-
+-extern int remap_bus_range(struct pci_bus *bus);
+-
+ extern void pcibios_fixup_device_resources(struct pci_dev *dev,
+ 			struct pci_bus *bus);
+ 
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h linux-2.6.22-try2/include/asm-powerpc/pgtable-ppc32.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc32.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/pgtable-ppc32.h	2007-12-19 15:29:22.000000000 -0500
+@@ -6,11 +6,7 @@
+ #ifndef __ASSEMBLY__
+ #include <linux/sched.h>
+ #include <linux/threads.h>
+-#include <asm/processor.h>		/* For TASK_SIZE */
+-#include <asm/mmu.h>
+-#include <asm/page.h>
+ #include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
+-struct mm_struct;
+ 
+ extern unsigned long va_to_phys(unsigned long address);
+ extern pte_t *va_to_pte(unsigned long address);
+@@ -488,14 +484,6 @@
+ #define pfn_pte(pfn, prot)	__pte(((pte_basic_t)(pfn) << PFN_SHIFT_OFFSET) |\
+ 					pgprot_val(prot))
+ #define mk_pte(page, prot)	pfn_pte(page_to_pfn(page), prot)
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern unsigned long empty_zero_page[1024];
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-
+ #endif /* __ASSEMBLY__ */
+ 
+ #define pte_none(pte)		((pte_val(pte) & ~_PTE_NONE_MASK) == 0)
+@@ -734,10 +722,6 @@
+ #define pte_unmap(pte)		kunmap_atomic(pte, KM_PTE0)
+ #define pte_unmap_nested(pte)	kunmap_atomic(pte, KM_PTE1)
+ 
+-extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
+-
+-extern void paging_init(void);
+-
+ /*
+  * Encode and decode a swap entry.
+  * Note that the bits we use in a PTE for representing a swap entry
+@@ -755,40 +739,6 @@
+ #define pte_to_pgoff(pte)	(pte_val(pte) >> 3)
+ #define pgoff_to_pte(off)	((pte_t) { ((off) << 3) | _PAGE_FILE })
+ 
+-/* CONFIG_APUS */
+-/* For virtual address to physical address conversion */
+-extern void cache_clear(__u32 addr, int length);
+-extern void cache_push(__u32 addr, int length);
+-extern int mm_end_of_chunk (unsigned long addr, int len);
+-extern unsigned long iopa(unsigned long addr);
+-extern unsigned long mm_ptov(unsigned long addr) __attribute_const__;
+-
+-/* Values for nocacheflag and cmode */
+-/* These are not used by the APUS kernel_map, but prevents
+-   compilation errors. */
+-#define	KERNELMAP_FULL_CACHING		0
+-#define	KERNELMAP_NOCACHE_SER		1
+-#define	KERNELMAP_NOCACHE_NONSER	2
+-#define	KERNELMAP_NO_COPYBACK		3
+-
+-/*
+- * Map some physical address range into the kernel address space.
+- */
+-extern unsigned long kernel_map(unsigned long paddr, unsigned long size,
+-				int nocacheflag, unsigned long *memavailp );
+-
+-/*
+- * Set cache mode of (kernel space) address range.
+- */
+-extern void kernel_set_cachemode (unsigned long address, unsigned long size,
+-                                 unsigned int cmode);
+-
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-#define kern_addr_valid(addr)	(1)
+-
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+-		remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ /*
+  * No page table caches to initialise
+  */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h linux-2.6.22-try2/include/asm-powerpc/pgtable-ppc64.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable-ppc64.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/pgtable-ppc64.h	2007-12-19 15:29:22.000000000 -0500
+@@ -7,11 +7,7 @@
+ 
+ #ifndef __ASSEMBLY__
+ #include <linux/stddef.h>
+-#include <asm/processor.h>		/* For TASK_SIZE */
+-#include <asm/mmu.h>
+-#include <asm/page.h>
+ #include <asm/tlbflush.h>
+-struct mm_struct;
+ #endif /* __ASSEMBLY__ */
+ 
+ #ifdef CONFIG_PPC_64K_PAGES
+@@ -27,7 +23,7 @@
+  */
+ #define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+                 	    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+-#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE)
++#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+ 
+ #if TASK_SIZE_USER64 > PGTABLE_RANGE
+ #error TASK_SIZE_USER64 exceeds pagetable range
+@@ -37,19 +33,28 @@
+ #error TASK_SIZE_USER64 exceeds user VSID range
+ #endif
+ 
++
+ /*
+  * Define the address range of the vmalloc VM area.
+  */
+ #define VMALLOC_START ASM_CONST(0xD000000000000000)
+-#define VMALLOC_SIZE  ASM_CONST(0x80000000000)
++#define VMALLOC_SIZE  (PGTABLE_RANGE >> 1)
+ #define VMALLOC_END   (VMALLOC_START + VMALLOC_SIZE)
+ 
+ /*
+- * Define the address range of the imalloc VM area.
+- */
+-#define PHBS_IO_BASE	VMALLOC_END
+-#define IMALLOC_BASE	(PHBS_IO_BASE + 0x80000000ul)	/* Reserve 2 gigs for PHBs */
+-#define IMALLOC_END	(VMALLOC_START + PGTABLE_RANGE)
++ * Define the address ranges for MMIO and IO space :
++ *
++ *  ISA_IO_BASE = VMALLOC_END, 64K reserved area
++ *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
++ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
++ */
++#define FULL_IO_SIZE	0x80000000ul
++#define  ISA_IO_BASE	(VMALLOC_END)
++#define  ISA_IO_END	(VMALLOC_END + 0x10000ul)
++#define  PHB_IO_BASE	(ISA_IO_END)
++#define  PHB_IO_END	(VMALLOC_END + FULL_IO_SIZE)
++#define IOREMAP_BASE	(PHB_IO_END)
++#define IOREMAP_END	(VMALLOC_START + PGTABLE_RANGE)
+ 
+ /*
+  * Region IDs
+@@ -134,16 +139,6 @@
+ #define __S110	PAGE_SHARED_X
+ #define __S111	PAGE_SHARED_X
+ 
+-#ifndef __ASSEMBLY__
+-
+-/*
+- * ZERO_PAGE is a global shared page that is always zero: used
+- * for zero-mapped memory areas etc..
+- */
+-extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+-#endif /* __ASSEMBLY__ */
+-
+ #ifdef CONFIG_HUGETLB_PAGE
+ 
+ #define HAVE_ARCH_UNMAPPED_AREA
+@@ -442,10 +437,6 @@
+ #define pgd_ERROR(e) \
+ 	printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+ 
+-extern pgd_t swapper_pg_dir[];
+-
+-extern void paging_init(void);
+-
+ /* Encode and de-code a swap entry */
+ #define __swp_type(entry)	(((entry).val >> 1) & 0x3f)
+ #define __swp_offset(entry)	((entry).val >> 8)
+@@ -456,17 +447,6 @@
+ #define pgoff_to_pte(off)	((pte_t) {((off) << PTE_RPN_SHIFT)|_PAGE_FILE})
+ #define PTE_FILE_MAX_BITS	(BITS_PER_LONG - PTE_RPN_SHIFT)
+ 
+-/*
+- * kern_addr_valid is intended to indicate whether an address is a valid
+- * kernel address.  Most 32-bit archs define it as always true (like this)
+- * but most 64-bit archs actually perform a test.  What should we do here?
+- * The only use is in fs/ncpfs/dir.c
+- */
+-#define kern_addr_valid(addr)	(1)
+-
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
+-		remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ void pgtable_cache_init(void);
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/pgtable.h linux-2.6.22-try2/include/asm-powerpc/pgtable.h
+--- linux-2.6.22-570/include/asm-powerpc/pgtable.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/pgtable.h	2007-12-19 15:29:22.000000000 -0500
+@@ -2,6 +2,13 @@
+ #define _ASM_POWERPC_PGTABLE_H
+ #ifdef __KERNEL__
+ 
++#ifndef __ASSEMBLY__
++#include <asm/processor.h>		/* For TASK_SIZE */
++#include <asm/mmu.h>
++#include <asm/page.h>
++struct mm_struct;
++#endif /* !__ASSEMBLY__ */
++
+ #if defined(CONFIG_PPC64)
+ #  include <asm/pgtable-ppc64.h>
+ #else
+@@ -9,6 +16,27 @@
+ #endif
+ 
+ #ifndef __ASSEMBLY__
++/*
++ * ZERO_PAGE is a global shared page that is always zero: used
++ * for zero-mapped memory areas etc..
++ */
++extern unsigned long empty_zero_page[];
++#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
++
++extern pgd_t swapper_pg_dir[];
++
++extern void paging_init(void);
++
++/*
++ * kern_addr_valid is intended to indicate whether an address is a valid
++ * kernel address.  Most 32-bit archs define it as always true (like this)
++ * but most 64-bit archs actually perform a test.  What should we do here?
++ */
++#define kern_addr_valid(addr)	(1)
++
++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)		\
++		remap_pfn_range(vma, vaddr, pfn, size, prot)
++
+ #include <asm-generic/pgtable.h>
+ #endif /* __ASSEMBLY__ */
+ 
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/ppc-pci.h linux-2.6.22-try2/include/asm-powerpc/ppc-pci.h
+--- linux-2.6.22-570/include/asm-powerpc/ppc-pci.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/ppc-pci.h	2007-12-19 15:29:22.000000000 -0500
+@@ -26,7 +26,7 @@
+ 
+ extern void find_and_init_phbs(void);
+ 
+-extern struct pci_dev *ppc64_isabridge_dev;	/* may be NULL if no ISA bus */
++extern struct pci_dev *isa_bridge_pcidev;	/* may be NULL if no ISA bus */
+ 
+ /** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
+ #define BUID_HI(buid) ((buid) >> 32)
+@@ -47,8 +47,8 @@
+ extern unsigned long get_phb_buid (struct device_node *);
+ extern int rtas_setup_phb(struct pci_controller *phb);
+ 
+-/* From pSeries_pci.h */
+-extern void pSeries_final_fixup(void);
++/* From iSeries PCI */
++extern void iSeries_pcibios_init(void);
+ 
+ extern unsigned long pci_probe_only;
+ 
+@@ -139,6 +139,9 @@
+  */
+ struct device_node * find_device_pe(struct device_node *dn);
+ 
++void eeh_sysfs_add_device(struct pci_dev *pdev);
++void eeh_sysfs_remove_device(struct pci_dev *pdev);
++
+ #endif /* CONFIG_EEH */
+ 
+ #else /* CONFIG_PCI */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/ptrace.h linux-2.6.22-try2/include/asm-powerpc/ptrace.h
+--- linux-2.6.22-570/include/asm-powerpc/ptrace.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/ptrace.h	2007-12-19 15:29:22.000000000 -0500
+@@ -92,6 +92,11 @@
+ 		set_thread_flag(TIF_NOERROR); \
+ 	} while(0)
+ 
++struct task_struct;
++extern unsigned long ptrace_get_reg(struct task_struct *task, int regno);
++extern int ptrace_put_reg(struct task_struct *task, int regno,
++			  unsigned long data);
++
+ /*
+  * We use the least-significant bit of the trap field to indicate
+  * whether we have saved the full set of registers, or only a
+@@ -158,9 +163,7 @@
+ 
+ #define PT_NIP	32
+ #define PT_MSR	33
+-#ifdef __KERNEL__
+ #define PT_ORIG_R3 34
+-#endif
+ #define PT_CTR	35
+ #define PT_LNK	36
+ #define PT_XER	37
+@@ -169,11 +172,12 @@
+ #define PT_MQ	39
+ #else
+ #define PT_SOFTE 39
++#endif
+ #define PT_TRAP	40
+ #define PT_DAR	41
+ #define PT_DSISR 42
+ #define PT_RESULT 43
+-#endif
++#define PT_REGS_COUNT 44
+ 
+ #define PT_FPR0	48	/* each FP reg occupies 2 slots in this space */
+ 
+@@ -229,7 +233,17 @@
+ #define PTRACE_GET_DEBUGREG	25
+ #define PTRACE_SET_DEBUGREG	26
+ 
+-/* Additional PTRACE requests implemented on PowerPC. */
++/* (new) PTRACE requests using the same numbers as x86 and the same
++ * argument ordering. Additionally, they support more registers too
++ */
++#define PTRACE_GETREGS            12
++#define PTRACE_SETREGS            13
++#define PTRACE_GETFPREGS          14
++#define PTRACE_SETFPREGS          15
++#define PTRACE_GETREGS64	  22
++#define PTRACE_SETREGS64	  23
++
++/* (old) PTRACE requests with inverted arguments */
+ #define PPC_PTRACE_GETREGS	0x99	/* Get GPRs 0 - 31 */
+ #define PPC_PTRACE_SETREGS	0x98	/* Set GPRs 0 - 31 */
+ #define PPC_PTRACE_GETFPREGS	0x97	/* Get FPRs 0 - 31 */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/syscalls.h linux-2.6.22-try2/include/asm-powerpc/syscalls.h
+--- linux-2.6.22-570/include/asm-powerpc/syscalls.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/syscalls.h	2007-12-19 15:29:22.000000000 -0500
+@@ -43,16 +43,9 @@
+ 
+ asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset,
+ 		size_t sigsetsize);
+-
+-#ifndef __powerpc64__
+-asmlinkage long sys_sigaltstack(const stack_t __user *uss,
+-		stack_t __user *uoss, int r5, int r6, int r7, int r8,
+-		struct pt_regs *regs);
+-#else /* __powerpc64__ */
+ asmlinkage long sys_sigaltstack(const stack_t __user *uss,
+ 		stack_t __user *uoss, unsigned long r5, unsigned long r6,
+ 		unsigned long r7, unsigned long r8, struct pt_regs *regs);
+-#endif /* __powerpc64__ */
+ 
+ #endif /* __KERNEL__ */
+ #endif /* __ASM_POWERPC_SYSCALLS_H */
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/systbl.h linux-2.6.22-try2/include/asm-powerpc/systbl.h
+--- linux-2.6.22-570/include/asm-powerpc/systbl.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/asm-powerpc/systbl.h	2007-12-19 15:29:24.000000000 -0500
+@@ -312,3 +312,4 @@
+ COMPAT_SYS_SPU(timerfd)
+ SYSCALL_SPU(eventfd)
+ COMPAT_SYS_SPU(sync_file_range2)
++COMPAT_SYS(fallocate)
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/thread_info.h linux-2.6.22-try2/include/asm-powerpc/thread_info.h
+--- linux-2.6.22-570/include/asm-powerpc/thread_info.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/thread_info.h	2007-12-19 15:29:22.000000000 -0500
+@@ -113,8 +113,8 @@
+ #define TIF_POLLING_NRFLAG	4	/* true if poll_idle() is polling
+ 					   TIF_NEED_RESCHED */
+ #define TIF_32BIT		5	/* 32 bit binary */
+-#define TIF_RUNLATCH		6	/* Is the runlatch enabled? */
+-#define TIF_ABI_PENDING		7	/* 32/64 bit switch needed */
++#define TIF_PERFMON_WORK	6	/* work for pfm_handle_work() */
++#define TIF_PERFMON_CTXSW	7	/* perfmon needs ctxsw calls */
+ #define TIF_SYSCALL_AUDIT	8	/* syscall auditing active */
+ #define TIF_SINGLESTEP		9	/* singlestepping active */
+ #define TIF_MEMDIE		10
+@@ -123,6 +123,8 @@
+ #define TIF_NOERROR		14	/* Force successful syscall return */
+ #define TIF_RESTORE_SIGMASK	15	/* Restore signal mask in do_signal */
+ #define TIF_FREEZE		16	/* Freezing for suspend */
++#define TIF_RUNLATCH		17	/* Is the runlatch enabled? */
++#define TIF_ABI_PENDING		18	/* 32/64 bit switch needed */
+ 
+ /* as above, but as bit values */
+ #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
+@@ -131,8 +133,8 @@
+ #define _TIF_NEED_RESCHED	(1<<TIF_NEED_RESCHED)
+ #define _TIF_POLLING_NRFLAG	(1<<TIF_POLLING_NRFLAG)
+ #define _TIF_32BIT		(1<<TIF_32BIT)
+-#define _TIF_RUNLATCH		(1<<TIF_RUNLATCH)
+-#define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
++#define _TIF_PERFMON_WORK	(1<<TIF_PERFMON_WORK)
++#define _TIF_PERFMON_CTXSW	(1<<TIF_PERFMON_CTXSW)
+ #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
+ #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
+ #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+@@ -140,6 +142,8 @@
+ #define _TIF_NOERROR		(1<<TIF_NOERROR)
+ #define _TIF_RESTORE_SIGMASK	(1<<TIF_RESTORE_SIGMASK)
+ #define _TIF_FREEZE		(1<<TIF_FREEZE)
++#define _TIF_RUNLATCH		(1<<TIF_RUNLATCH)
++#define _TIF_ABI_PENDING	(1<<TIF_ABI_PENDING)
+ #define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
+ 
+ #define _TIF_USER_WORK_MASK	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/tlbflush.h linux-2.6.22-try2/include/asm-powerpc/tlbflush.h
+--- linux-2.6.22-570/include/asm-powerpc/tlbflush.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-powerpc/tlbflush.h	2007-12-19 15:29:22.000000000 -0500
+@@ -155,6 +155,11 @@
+ {
+ }
+ 
++/* Private function for use by PCI IO mapping code */
++extern void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
++				     unsigned long end);
++
++
+ #endif
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-powerpc/unistd.h linux-2.6.22-try2/include/asm-powerpc/unistd.h
+--- linux-2.6.22-570/include/asm-powerpc/unistd.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/asm-powerpc/unistd.h	2007-12-19 15:29:24.000000000 -0500
+@@ -331,10 +331,11 @@
+ #define __NR_timerfd		306
+ #define __NR_eventfd		307
+ #define __NR_sync_file_range2	308
++#define __NR_fallocate		309
+ 
+ #ifdef __KERNEL__
+ 
+-#define __NR_syscalls		309
++#define __NR_syscalls		310
+ 
+ #define __NR__exit __NR_exit
+ #define NR_syscalls	__NR_syscalls
+diff -Nurb linux-2.6.22-570/include/asm-ppc/kgdb.h linux-2.6.22-try2/include/asm-ppc/kgdb.h
+--- linux-2.6.22-570/include/asm-ppc/kgdb.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ppc/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -1,57 +1,18 @@
+-/*
+- * kgdb.h: Defines and declarations for serial line source level
+- *         remote debugging of the Linux kernel using gdb.
+- *
+- * PPC Mods (C) 1998 Michael Tesch (tesch@cs.wisc.edu)
+- *
+- * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
+- */
+ #ifdef __KERNEL__
+-#ifndef _PPC_KGDB_H
+-#define _PPC_KGDB_H
+-
++#ifndef __PPC_KGDB_H__
++#define __PPC_KGDB_H__
++#include <asm-powerpc/kgdb.h>
+ #ifndef __ASSEMBLY__
+-
+-/* Things specific to the gen550 backend. */
+-struct uart_port;
+-
+-extern void gen550_progress(char *, unsigned short);
+-extern void gen550_kgdb_map_scc(void);
+-extern void gen550_init(int, struct uart_port *);
+-
+-/* Things specific to the pmac backend. */
+-extern void zs_kgdb_hook(int tty_num);
+-
+-/* To init the kgdb engine. (called by serial hook)*/
+-extern void set_debug_traps(void);
+-
+-/* To enter the debugger explicitly. */
+-extern void breakpoint(void);
+-
+-/* For taking exceptions
++ /* For taking exceptions
+  * these are defined in traps.c
+  */
+-extern int (*debugger)(struct pt_regs *regs);
++struct pt_regs;
++extern void (*debugger)(struct pt_regs *regs);
+ extern int (*debugger_bpt)(struct pt_regs *regs);
+ extern int (*debugger_sstep)(struct pt_regs *regs);
+ extern int (*debugger_iabr_match)(struct pt_regs *regs);
+ extern int (*debugger_dabr_match)(struct pt_regs *regs);
+ extern void (*debugger_fault_handler)(struct pt_regs *regs);
+-
+-/* What we bring to the party */
+-int kgdb_bpt(struct pt_regs *regs);
+-int kgdb_sstep(struct pt_regs *regs);
+-void kgdb(struct pt_regs *regs);
+-int kgdb_iabr_match(struct pt_regs *regs);
+-int kgdb_dabr_match(struct pt_regs *regs);
+-
+-/*
+- * external low-level support routines (ie macserial.c)
+- */
+-extern void kgdb_interruptible(int); /* control interrupts from serial */
+-extern void putDebugChar(char);   /* write a single character      */
+-extern char getDebugChar(void);   /* read and return a single char */
+-
+-#endif /* !(__ASSEMBLY__) */
+-#endif /* !(_PPC_KGDB_H) */
++#endif /* !__ASSEMBLY__ */
++#endif /* __PPC_KGDB_H__ */
+ #endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-ppc/machdep.h linux-2.6.22-try2/include/asm-ppc/machdep.h
+--- linux-2.6.22-570/include/asm-ppc/machdep.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ppc/machdep.h	2007-12-19 15:29:24.000000000 -0500
+@@ -72,9 +72,7 @@
+ 	unsigned long	(*find_end_of_memory)(void);
+ 	void		(*setup_io_mappings)(void);
+ 
+-	void		(*early_serial_map)(void);
+   	void		(*progress)(char *, unsigned short);
+-	void		(*kgdb_map_scc)(void);
+ 
+ 	unsigned char 	(*nvram_read_val)(int addr);
+ 	void		(*nvram_write_val)(int addr, unsigned char val);
+diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60.h linux-2.6.22-try2/include/asm-ppc/mv64x60.h
+--- linux-2.6.22-570/include/asm-ppc/mv64x60.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ppc/mv64x60.h	2007-12-19 15:29:24.000000000 -0500
+@@ -348,6 +348,8 @@
+ 
+ void mv64x60_progress_init(u32 base);
+ void mv64x60_mpsc_progress(char *s, unsigned short hex);
++struct platform_device * mv64x60_early_get_pdev_data(const char *name,
++		int id, int remove);
+ 
+ extern struct mv64x60_32bit_window
+ 	gt64260_32bit_windows[MV64x60_32BIT_WIN_COUNT];
+diff -Nurb linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h linux-2.6.22-try2/include/asm-ppc/mv64x60_defs.h
+--- linux-2.6.22-570/include/asm-ppc/mv64x60_defs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-ppc/mv64x60_defs.h	2007-12-19 15:29:24.000000000 -0500
+@@ -57,7 +57,8 @@
+ #define	MV64x60_IRQ_I2C				37
+ #define	MV64x60_IRQ_BRG				39
+ #define	MV64x60_IRQ_MPSC_0			40
+-#define	MV64x60_IRQ_MPSC_1			42
++#define	MV64360_IRQ_MPSC_1			41
++#define	GT64260_IRQ_MPSC_1			42
+ #define	MV64x60_IRQ_COMM			43
+ #define	MV64x60_IRQ_P0_GPP_0_7			56
+ #define	MV64x60_IRQ_P0_GPP_8_15			57
+diff -Nurb linux-2.6.22-570/include/asm-s390/page.h linux-2.6.22-try2/include/asm-s390/page.h
+--- linux-2.6.22-570/include/asm-s390/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-s390/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -64,7 +64,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/asm-sh/kgdb.h linux-2.6.22-try2/include/asm-sh/kgdb.h
+--- linux-2.6.22-570/include/asm-sh/kgdb.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-sh/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -2,78 +2,41 @@
+  * May be copied or modified under the terms of the GNU General Public
+  * License.  See linux/COPYING for more information.
+  *
+- * Based on original code by Glenn Engel, Jim Kingdon,
+- * David Grothe <dave@gcom.com>, Tigran Aivazian, <tigran@sco.com> and
+- * Amit S. Kale <akale@veritas.com>
++ * Based on a file that was modified or based on files by: Glenn Engel,
++ * Jim Kingdon, David Grothe <dave@gcom.com>, Tigran Aivazian <tigran@sco.com>,
++ * Amit S. Kale <akale@veritas.com>, sh-stub.c from Ben Lee and
++ * Steve Chamberlain, Henry Bell <henry.bell@st.com>
+  * 
+- * Super-H port based on sh-stub.c (Ben Lee and Steve Chamberlain) by
+- * Henry Bell <henry.bell@st.com>
+- * 
+- * Header file for low-level support for remote debug using GDB. 
++ * Maintainer: Tom Rini <trini@kernel.crashing.org>
+  *
+  */
+ 
+ #ifndef __KGDB_H
+ #define __KGDB_H
+ 
+-#include <asm/ptrace.h>
+-#include <asm/cacheflush.h>
++#include <asm-generic/kgdb.h>
+ 
+-struct console;
++/* Based on sh-gdb.c from gdb-6.1, Glenn
++     Engel at HP  Ben Lee and Steve Chamberlain */
++#define NUMREGBYTES	112	/* 92 */
++#define NUMCRITREGBYTES	(9 << 2)
++#define BUFMAX		400
+ 
+-/* Same as pt_regs but has vbr in place of syscall_nr */
++#ifndef __ASSEMBLY__
+ struct kgdb_regs {
+         unsigned long regs[16];
+         unsigned long pc;
+         unsigned long pr;
+-        unsigned long sr;
+         unsigned long gbr;
++        unsigned long vbr;
+         unsigned long mach;
+         unsigned long macl;
+-        unsigned long vbr;
++        unsigned long sr;
+ };
+ 
+-/* State info */
+-extern char kgdb_in_gdb_mode;
+-extern int kgdb_done_init;
+-extern int kgdb_enabled;
+-extern int kgdb_nofault;	/* Ignore bus errors (in gdb mem access) */
+-extern int kgdb_halt;		/* Execute initial breakpoint at startup */
+-extern char in_nmi;		/* Debounce flag to prevent NMI reentry*/
+-
+-/* SCI */
+-extern int kgdb_portnum;
+-extern int kgdb_baud;
+-extern char kgdb_parity;
+-extern char kgdb_bits;
+-
+-/* Init and interface stuff */
+-extern int kgdb_init(void);
+-extern int (*kgdb_getchar)(void);
+-extern void (*kgdb_putchar)(int);
+-
+-/* Trap functions */
+-typedef void (kgdb_debug_hook_t)(struct pt_regs *regs);
+-typedef void (kgdb_bus_error_hook_t)(void);
+-extern kgdb_debug_hook_t  *kgdb_debug_hook;
+-extern kgdb_bus_error_hook_t *kgdb_bus_err_hook;
+-
+-/* Console */
+-void kgdb_console_write(struct console *co, const char *s, unsigned count);
+-extern int kgdb_console_setup(struct console *, char *);
+-
+-/* Prototypes for jmp fns */
+-#define _JBLEN 9
+-typedef        int jmp_buf[_JBLEN];
+-extern void    longjmp(jmp_buf __jmpb, int __retval);
+-extern int     setjmp(jmp_buf __jmpb);
+-
+-/* Forced breakpoint */
+-#define breakpoint()					\
+-do {							\
+-	if (kgdb_enabled)				\
+-		__asm__ __volatile__("trapa   #0x3c");	\
+-} while (0)
++#define BREAKPOINT()		asm("trapa #0xff");
++#define BREAK_INSTR_SIZE	2
++#define CACHE_FLUSH_IS_SAFE	1
+ 
+ /* KGDB should be able to flush all kernel text space */
+ #if defined(CONFIG_CPU_SH4)
+@@ -100,4 +63,5 @@
+ {
+ 	return hexchars[x & 0xf];
+ }
++#endif				/* !__ASSEMBLY__ */
+ #endif
+diff -Nurb linux-2.6.22-570/include/asm-sh/system.h linux-2.6.22-try2/include/asm-sh/system.h
+--- linux-2.6.22-570/include/asm-sh/system.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-sh/system.h	2007-12-19 15:29:24.000000000 -0500
+@@ -264,6 +264,45 @@
+ #define instruction_size(insn)	(2)
+ #endif
+ 
++static inline unsigned long __cmpxchg_u32(volatile int * m, unsigned long old,
++	unsigned long new)
++{
++	__u32 retval;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	retval = *m;
++	if (retval == old)
++		*m = new;
++	local_irq_restore(flags);       /* implies memory barrier  */
++	return retval;
++}
++
++/* This function doesn't exist, so you'll get a linker error
++ * if something tries to do an invalid cmpxchg(). */
++extern void __cmpxchg_called_with_bad_pointer(void);
++
++#define __HAVE_ARCH_CMPXCHG	1
++
++static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
++		unsigned long new, int size)
++{
++	switch (size) {
++	case 4:
++		return __cmpxchg_u32(ptr, old, new);
++	}
++	__cmpxchg_called_with_bad_pointer();
++	return old;
++}
++
++#define cmpxchg(ptr,o,n)						 \
++  ({									 \
++     __typeof__(*(ptr)) _o_ = (o);					 \
++     __typeof__(*(ptr)) _n_ = (n);					 \
++     (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_,		 \
++				    (unsigned long)_n_, sizeof(*(ptr))); \
++  })
++
+ /* XXX
+  * disable hlt during certain critical i/o operations
+  */
+diff -Nurb linux-2.6.22-570/include/asm-um/thread_info.h linux-2.6.22-try2/include/asm-um/thread_info.h
+--- linux-2.6.22-570/include/asm-um/thread_info.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-um/thread_info.h	2007-12-19 15:29:24.000000000 -0500
+@@ -52,10 +52,21 @@
+ 	return ti;
+ }
+ 
++#ifdef CONFIG_DEBUG_STACK_USAGE
++
++#define alloc_thread_info(tsk) \
++	((struct thread_info *) __get_free_pages(GFP_KERNEL | __GFP_ZERO, \
++						 CONFIG_KERNEL_STACK_ORDER))
++#else
++
+ /* thread information allocation */
+ #define alloc_thread_info(tsk) \
+-	((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL))
+-#define free_thread_info(ti) kfree(ti)
++	((struct thread_info *) __get_free_pages(GFP_KERNEL, \
++						 CONFIG_KERNEL_STACK_ORDER))
++#endif
++
++#define free_thread_info(ti) \
++	free_pages((unsigned long)(ti),CONFIG_KERNEL_STACK_ORDER)
+ 
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/kdebug.h linux-2.6.22-try2/include/asm-x86_64/kdebug.h
+--- linux-2.6.22-570/include/asm-x86_64/kdebug.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-x86_64/kdebug.h	2007-12-19 15:29:24.000000000 -0500
+@@ -23,6 +23,7 @@
+ 	DIE_CALL,
+ 	DIE_NMI_IPI,
+ 	DIE_PAGE_FAULT,
++	DIE_PAGE_FAULT_NO_CONTEXT,
+ };
+ 
+ extern void printk_address(unsigned long address);
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/kgdb.h linux-2.6.22-try2/include/asm-x86_64/kgdb.h
+--- linux-2.6.22-570/include/asm-x86_64/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/asm-x86_64/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,52 @@
++#ifdef __KERNEL__
++#ifndef _ASM_KGDB_H_
++#define _ASM_KGDB_H_
++
++/*
++ * Copyright (C) 2001-2004 Amit S. Kale
++ */
++
++#include <asm-generic/kgdb.h>
++
++/*
++ *  Note that this register image is in a different order than
++ *  the register image that Linux produces at interrupt time.
++ *
++ *  Linux's register image is defined by struct pt_regs in ptrace.h.
++ *  Just why GDB uses a different order is a historical mystery.
++ */
++#define _RAX	0
++#define _RDX	1
++#define _RCX	2
++#define _RBX	3
++#define _RSI	4
++#define _RDI	5
++#define _RBP	6
++#define _RSP	7
++#define _R8	8
++#define _R9	9
++#define _R10	10
++#define _R11	11
++#define _R12	12
++#define _R13	13
++#define _R14	14
++#define _R15	15
++#define _PC	16
++#define _PS	17
++
++/* Number of bytes of registers.  */
++#define NUMREGBYTES		((_PS+1)*8)
++#define NUMCRITREGBYTES		(8 * 8)		/* 8 registers. */
++
++#ifndef __ASSEMBLY__
++/* BUFMAX defines the maximum number of characters in inbound/outbound
++ * buffers at least NUMREGBYTES*2 are needed for register packets, and
++ * a longer buffer is needed to list all threads. */
++#define BUFMAX			1024
++#define BREAKPOINT()		asm("   int $3");
++#define CHECK_EXCEPTION_STACK() ((&__get_cpu_var(init_tss))[0].ist[0])
++#define BREAK_INSTR_SIZE	1
++#define CACHE_FLUSH_IS_SAFE	1
++#endif				/* !__ASSEMBLY__ */
++#endif				/* _ASM_KGDB_H_ */
++#endif				/* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/page.h linux-2.6.22-try2/include/asm-x86_64/page.h
+--- linux-2.6.22-570/include/asm-x86_64/page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-x86_64/page.h	2007-12-19 15:29:24.000000000 -0500
+@@ -48,7 +48,8 @@
+ #define clear_user_page(page, vaddr, pg)	clear_page(page)
+ #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
+ 
+-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
++#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
++	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ /*
+  * These are used to make use of C type-checking..
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/proto.h linux-2.6.22-try2/include/asm-x86_64/proto.h
+--- linux-2.6.22-570/include/asm-x86_64/proto.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-x86_64/proto.h	2007-12-19 15:29:23.000000000 -0500
+@@ -75,8 +75,6 @@
+ extern void early_quirks(void);
+ extern void check_efer(void);
+ 
+-extern int unhandled_signal(struct task_struct *tsk, int sig);
+-
+ extern void select_idle_routine(const struct cpuinfo_x86 *c);
+ 
+ extern unsigned long table_start, table_end;
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/system.h linux-2.6.22-try2/include/asm-x86_64/system.h
+--- linux-2.6.22-570/include/asm-x86_64/system.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-x86_64/system.h	2007-12-19 15:29:24.000000000 -0500
+@@ -22,7 +22,9 @@
+ 
+ /* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev,next,last) \
+-	asm volatile(SAVE_CONTEXT						    \
++       asm volatile(".globl __switch_to_begin\n\t"				    \
++		     "__switch_to_begin:\n\t"					  \
++		     SAVE_CONTEXT						  \
+ 		     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+ 		     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+ 		     "call __switch_to\n\t"					  \
+@@ -34,6 +36,8 @@
+ 		     "movq %%rax,%%rdi\n\t" 					  \
+ 		     "jc   ret_from_fork\n\t"					  \
+ 		     RESTORE_CONTEXT						    \
++		     "\n.globl __switch_to_end\n\t"				  \
++		     "__switch_to_end:\n\t"					  \
+ 		     : "=a" (last)					  	  \
+ 		     : [next] "S" (next), [prev] "D" (prev),			  \
+ 		       [threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/unistd.h linux-2.6.22-try2/include/asm-x86_64/unistd.h
+--- linux-2.6.22-570/include/asm-x86_64/unistd.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/asm-x86_64/unistd.h	2007-12-19 15:29:24.000000000 -0500
+@@ -630,6 +630,8 @@
+ __SYSCALL(__NR_timerfd, sys_timerfd)
+ #define __NR_eventfd		284
+ __SYSCALL(__NR_eventfd, sys_eventfd)
++#define __NR_fallocate		284
++__SYSCALL(__NR_fallocate, sys_fallocate)
+ 
+ #ifndef __NO_STUBS
+ #define __ARCH_WANT_OLD_READDIR
+diff -Nurb linux-2.6.22-570/include/asm-x86_64/unwind.h linux-2.6.22-try2/include/asm-x86_64/unwind.h
+--- linux-2.6.22-570/include/asm-x86_64/unwind.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/asm-x86_64/unwind.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,6 +1,100 @@
+ #ifndef _ASM_X86_64_UNWIND_H
+ #define _ASM_X86_64_UNWIND_H
+ 
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ */
++
++#ifdef CONFIG_STACK_UNWIND
++
++#include <linux/sched.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <asm/vsyscall.h>
++
++struct unwind_frame_info
++{
++	struct pt_regs regs;
++	struct task_struct *task;
++	unsigned call_frame:1;
++};
++
++#define UNW_PC(frame)        (frame)->regs.rip
++#define UNW_SP(frame)        (frame)->regs.rsp
++#ifdef CONFIG_FRAME_POINTER
++#define UNW_FP(frame)        (frame)->regs.rbp
++#define FRAME_RETADDR_OFFSET 8
++#define FRAME_LINK_OFFSET    0
++#define STACK_BOTTOM(tsk)    (((tsk)->thread.rsp0 - 1) & ~(THREAD_SIZE - 1))
++#define STACK_TOP(tsk)       ((tsk)->thread.rsp0)
++#endif
++/* Might need to account for the special exception and interrupt handling
++   stacks here, since normally
++	EXCEPTION_STACK_ORDER < THREAD_ORDER < IRQSTACK_ORDER,
++   but the construct is needed only for getting across the stack switch to
++   the interrupt stack - thus considering the IRQ stack itself is unnecessary,
++   and the overhead of comparing against all exception handling stacks seems
++   not desirable. */
++#define STACK_LIMIT(ptr)     (((ptr) - 1) & ~(THREAD_SIZE - 1))
++
++#define UNW_REGISTER_INFO \
++	PTREGS_INFO(rax), \
++	PTREGS_INFO(rdx), \
++	PTREGS_INFO(rcx), \
++	PTREGS_INFO(rbx), \
++	PTREGS_INFO(rsi), \
++	PTREGS_INFO(rdi), \
++	PTREGS_INFO(rbp), \
++	PTREGS_INFO(rsp), \
++	PTREGS_INFO(r8), \
++	PTREGS_INFO(r9), \
++	PTREGS_INFO(r10), \
++	PTREGS_INFO(r11), \
++	PTREGS_INFO(r12), \
++	PTREGS_INFO(r13), \
++	PTREGS_INFO(r14), \
++	PTREGS_INFO(r15), \
++	PTREGS_INFO(rip)
++
++#define UNW_DEFAULT_RA(raItem, dataAlign) \
++	((raItem).where == Memory && \
++	 !((raItem).value * (dataAlign) + 8))
++
++static inline void arch_unw_init_frame_info(struct unwind_frame_info *info,
++                                            /*const*/ struct pt_regs *regs)
++{
++	info->regs = *regs;
++}
++
++static inline void arch_unw_init_blocked(struct unwind_frame_info *info)
++{
++	extern const char thread_return[];
++
++	memset(&info->regs, 0, sizeof(info->regs));
++	info->regs.rip = (unsigned long)thread_return;
++	info->regs.cs = __KERNEL_CS;
++	__get_user(info->regs.rbp, (unsigned long *)info->task->thread.rsp);
++	info->regs.rsp = info->task->thread.rsp;
++	info->regs.ss = __KERNEL_DS;
++}
++
++extern int arch_unwind_init_running(struct unwind_frame_info *,
++                                    int (*callback)(struct unwind_frame_info *,
++                                                    void *arg),
++                                    void *arg);
++
++static inline int arch_unw_user_mode(const struct unwind_frame_info *info)
++{
++	return user_mode(&info->regs)
++	       || (long)info->regs.rip >= 0
++	       || (info->regs.rip >= VSYSCALL_START && info->regs.rip < VSYSCALL_END)
++	       || (long)info->regs.rsp >= 0;
++}
++
++#else
++
+ #define UNW_PC(frame) ((void)(frame), 0UL)
+ #define UNW_SP(frame) ((void)(frame), 0UL)
+ 
+@@ -9,4 +103,6 @@
+ 	return 0;
+ }
+ 
++#endif
++
+ #endif /* _ASM_X86_64_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/linux/Kbuild linux-2.6.22-try2/include/linux/Kbuild
+--- linux-2.6.22-570/include/linux/Kbuild	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/Kbuild	2007-12-19 15:29:23.000000000 -0500
+@@ -91,7 +91,6 @@
+ header-y += in_route.h
+ header-y += ioctl.h
+ header-y += ipmi_msgdefs.h
+-header-y += ip_mp_alg.h
+ header-y += ipsec.h
+ header-y += ipx.h
+ header-y += irda.h
+diff -Nurb linux-2.6.22-570/include/linux/acpi.h linux-2.6.22-try2/include/linux/acpi.h
+--- linux-2.6.22-570/include/linux/acpi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/acpi.h	2007-12-19 15:29:22.000000000 -0500
+@@ -206,11 +206,8 @@
+ {
+ 	return max_cstate;
+ }
+-static inline void acpi_set_cstate_limit(unsigned int new_limit)
+-{
+-	max_cstate = new_limit;
+-	return;
+-}
++extern void (*acpi_do_set_cstate_limit)(void);
++extern void acpi_set_cstate_limit(unsigned int new_limit);
+ #else
+ static inline unsigned int acpi_get_cstate_limit(void) { return 0; }
+ static inline void acpi_set_cstate_limit(unsigned int new_limit) { return; }
+diff -Nurb linux-2.6.22-570/include/linux/async_tx.h linux-2.6.22-try2/include/linux/async_tx.h
+--- linux-2.6.22-570/include/linux/async_tx.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/async_tx.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,156 @@
++/*
++ * Copyright © 2006, Intel Corporation.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms and conditions of the GNU General Public License,
++ * version 2, as published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++ * more details.
++ *
++ * You should have received a copy of the GNU General Public License along with
++ * this program; if not, write to the Free Software Foundation, Inc.,
++ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ */
++#ifndef _ASYNC_TX_H_
++#define _ASYNC_TX_H_
++#include <linux/dmaengine.h>
++#include <linux/spinlock.h>
++#include <linux/interrupt.h>
++
++/**
++ * dma_chan_ref - object used to manage dma channels received from the
++ *   dmaengine core.
++ * @chan - the channel being tracked
++ * @node - node for the channel to be placed on async_tx_master_list
++ * @rcu - for list_del_rcu
++ * @count - number of times this channel is listed in the pool
++ *	(for channels with multiple capabiities)
++ */
++struct dma_chan_ref {
++	struct dma_chan *chan;
++	struct list_head node;
++	struct rcu_head rcu;
++	atomic_t count;
++};
++
++/**
++ * async_tx_flags - modifiers for the async_* calls
++ * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the
++ * the destination address is not a source.  The asynchronous case handles this
++ * implicitly, the synchronous case needs to zero the destination block.
++ * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is
++ * also one of the source addresses.  In the synchronous case the destination
++ * address is an implied source, whereas the asynchronous case it must be listed
++ * as a source.  The destination address must be the first address in the source
++ * array.
++ * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations
++ * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a
++ * dependency chain
++ * @ASYNC_TX_DEP_ACK: ack the dependency descriptor.  Useful for chaining.
++ * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously
++ * take an atomic mapping (KM_USER0) on the source page(s)
++ * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously
++ * take an atomic mapping (KM_USER0) on the dest page(s)
++ */
++enum async_tx_flags {
++	ASYNC_TX_XOR_ZERO_DST	 = (1 << 0),
++	ASYNC_TX_XOR_DROP_DST	 = (1 << 1),
++	ASYNC_TX_ASSUME_COHERENT = (1 << 2),
++	ASYNC_TX_ACK		 = (1 << 3),
++	ASYNC_TX_DEP_ACK	 = (1 << 4),
++	ASYNC_TX_KMAP_SRC	 = (1 << 5),
++	ASYNC_TX_KMAP_DST	 = (1 << 6),
++};
++
++#ifdef CONFIG_DMA_ENGINE
++void async_tx_issue_pending_all(void);
++enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx);
++void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx);
++struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++	enum dma_transaction_type tx_type);
++#else
++static inline void async_tx_issue_pending_all(void)
++{
++	do { } while (0);
++}
++
++static inline enum dma_status
++dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx)
++{
++	return DMA_SUCCESS;
++}
++
++static inline void
++async_tx_run_dependencies(struct dma_async_tx_descriptor *tx,
++	struct dma_chan *host_chan)
++{
++	do { } while (0);
++}
++
++static inline struct dma_chan *
++async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx,
++	enum dma_transaction_type tx_type)
++{
++	return NULL;
++}
++#endif
++
++/**
++ * async_tx_sync_epilog - actions to take if an operation is run synchronously
++ * @flags: async_tx flags
++ * @depend_tx: transaction depends on depend_tx
++ * @cb_fn: function to call when the transaction completes
++ * @cb_fn_param: parameter to pass to the callback routine
++ */
++static inline void
++async_tx_sync_epilog(unsigned long flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param)
++{
++	if (cb_fn)
++		cb_fn(cb_fn_param);
++
++	if (depend_tx && (flags & ASYNC_TX_DEP_ACK))
++		async_tx_ack(depend_tx);
++}
++
++void
++async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx,
++	enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_xor(struct page *dest, struct page **src_list, unsigned int offset,
++	int src_cnt, size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_xor_zero_sum(struct page *dest, struct page **src_list,
++	unsigned int offset, int src_cnt, size_t len,
++	u32 *result, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
++	unsigned int src_offset, size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_memset(struct page *dest, int val, unsigned int offset,
++	size_t len, enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++
++struct dma_async_tx_descriptor *
++async_trigger_callback(enum async_tx_flags flags,
++	struct dma_async_tx_descriptor *depend_tx,
++	dma_async_tx_callback cb_fn, void *cb_fn_param);
++#endif /* _ASYNC_TX_H_ */
+diff -Nurb linux-2.6.22-570/include/linux/configfs.h linux-2.6.22-try2/include/linux/configfs.h
+--- linux-2.6.22-570/include/linux/configfs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/configfs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -75,7 +75,6 @@
+ extern void config_item_init_type_name(struct config_item *item,
+ 				       const char *name,
+ 				       struct config_item_type *type);
+-extern void config_item_cleanup(struct config_item *);
+ 
+ extern struct config_item * config_item_get(struct config_item *);
+ extern void config_item_put(struct config_item *);
+@@ -157,6 +156,7 @@
+ 	struct config_item *(*make_item)(struct config_group *group, const char *name);
+ 	struct config_group *(*make_group)(struct config_group *group, const char *name);
+ 	int (*commit_item)(struct config_item *item);
++	void (*disconnect_notify)(struct config_group *group, struct config_item *item);
+ 	void (*drop_item)(struct config_group *group, struct config_item *item);
+ };
+ 
+@@ -175,6 +175,11 @@
+ int configfs_register_subsystem(struct configfs_subsystem *subsys);
+ void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+ 
++/* These functions can sleep and can alloc with GFP_KERNEL */
++/* WARNING: These cannot be called underneath configfs callbacks!! */
++int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target);
++void configfs_undepend_item(struct configfs_subsystem *subsys, struct config_item *target);
++
+ #endif  /* __KERNEL__ */
+ 
+ #endif /* _CONFIGFS_H_ */
+diff -Nurb linux-2.6.22-570/include/linux/container.h linux-2.6.22-try2/include/linux/container.h
+--- linux-2.6.22-570/include/linux/container.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/container.h	2007-12-19 15:29:25.000000000 -0500
+@@ -0,0 +1,295 @@
++#ifndef _LINUX_CONTAINER_H
++#define _LINUX_CONTAINER_H
++/*
++ *  container interface
++ *
++ *  Copyright (C) 2003 BULL SA
++ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ *
++ */
++
++#include <linux/sched.h>
++#include <linux/kref.h>
++#include <linux/cpumask.h>
++#include <linux/nodemask.h>
++#include <linux/rcupdate.h>
++
++#ifdef CONFIG_CONTAINERS
++
++struct containerfs_root;
++struct inode;
++
++extern int container_init_early(void);
++extern int container_init(void);
++extern void container_init_smp(void);
++extern void container_lock(void);
++extern void container_unlock(void);
++extern void container_fork(struct task_struct *p);
++extern void container_fork_callbacks(struct task_struct *p);
++extern void container_exit(struct task_struct *p, int run_callbacks);
++
++extern struct file_operations proc_container_operations;
++
++/* Define the enumeration of all container subsystems */
++#define SUBSYS(_x) _x ## _subsys_id,
++enum container_subsys_id {
++#include <linux/container_subsys.h>
++	CONTAINER_SUBSYS_COUNT
++};
++#undef SUBSYS
++
++/* Per-subsystem/per-container state maintained by the system. */
++struct container_subsys_state {
++	/* The container that this subsystem is attached to. Useful
++	 * for subsystems that want to know about the container
++	 * hierarchy structure */
++	struct container *container;
++
++	/* State maintained by the container system to allow
++	 * subsystems to be "busy". Should be accessed via css_get()
++	 * and css_put() */
++
++	atomic_t refcnt;
++};
++
++/*
++ * Call css_get() to hold a reference on the container;
++ *
++ */
++
++static inline void css_get(struct container_subsys_state *css)
++{
++	atomic_inc(&css->refcnt);
++}
++/*
++ * css_put() should be called to release a reference taken by
++ * css_get()
++ */
++void css_put(struct container_subsys_state *css);
++
++struct container {
++	unsigned long flags;		/* "unsigned long" so bitops work */
++
++	/* count users of this container. >0 means busy, but doesn't
++	 * necessarily indicate the number of tasks in the
++	 * container */
++	atomic_t count;
++
++	/*
++	 * We link our 'sibling' struct into our parent's 'children'.
++	 * Our children link their 'sibling' into our 'children'.
++	 */
++	struct list_head sibling;	/* my parent's children */
++	struct list_head children;	/* my children */
++
++	struct container *parent;	/* my parent */
++	struct dentry *dentry;	  	/* container fs entry */
++
++	/* Private pointers for each registered subsystem */
++	struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
++
++	struct containerfs_root *root;
++	struct container *top_container;
++
++	/*
++	 * List of cg_container_links pointing at css_groups with
++	 * tasks in this container. Protected by css_group_lock
++	 */
++	struct list_head css_groups;
++
++	/*
++	 * Linked list running through all containers that can
++	 * potentially be reaped by the release agent. Protected by
++	 * container_mutex
++	 */
++	struct list_head release_list;
++};
++
++/* A css_group is a structure holding pointers to a set of
++ * container_subsys_state objects. This saves space in the task struct
++ * object and speeds up fork()/exit(), since a single inc/dec and a
++ * list_add()/del() can bump the reference count on the entire
++ * container set for a task.
++ */
++
++struct css_group {
++
++	/* Reference count */
++	struct kref ref;
++
++	/*
++	 * List running through all container groups. Protected by
++	 * css_group_lock
++	 */
++	struct list_head list;
++
++	/*
++	 * List running through all tasks using this container
++	 * group. Protected by css_group_lock
++	 */
++	struct list_head tasks;
++
++	/*
++	 * List of cg_container_link objects on link chains from
++	 * containers referenced from this css_group. Protected by
++	 * css_group_lock
++	 */
++	struct list_head cg_links;
++
++	/* Set of subsystem states, one for each subsystem. NULL for
++	 * subsystems that aren't part of this hierarchy. These
++	 * pointers reduce the number of dereferences required to get
++	 * from a task to its state for a given container, but result
++	 * in increased space usage if tasks are in wildly different
++	 * groupings across different hierarchies. This array is
++	 * immutable after creation */
++	struct container_subsys_state *subsys[CONTAINER_SUBSYS_COUNT];
++
++};
++
++/* struct cftype:
++ *
++ * The files in the container filesystem mostly have a very simple read/write
++ * handling, some common function will take care of it. Nevertheless some cases
++ * (read tasks) are special and therefore I define this structure for every
++ * kind of file.
++ *
++ *
++ * When reading/writing to a file:
++ *	- the container to use in file->f_dentry->d_parent->d_fsdata
++ *	- the 'cftype' of the file is file->f_dentry->d_fsdata
++ */
++
++#define MAX_CFTYPE_NAME 64
++struct cftype {
++	/* By convention, the name should begin with the name of the
++	 * subsystem, followed by a period */
++	char name[MAX_CFTYPE_NAME];
++	int private;
++	int (*open) (struct inode *inode, struct file *file);
++	ssize_t (*read) (struct container *cont, struct cftype *cft,
++			 struct file *file,
++			 char __user *buf, size_t nbytes, loff_t *ppos);
++	/*
++	 * read_uint() is a shortcut for the common case of returning a
++	 * single integer. Use it in place of read()
++	 */
++	u64 (*read_uint) (struct container *cont, struct cftype *cft);
++	ssize_t (*write) (struct container *cont, struct cftype *cft,
++			  struct file *file,
++			  const char __user *buf, size_t nbytes, loff_t *ppos);
++	int (*release) (struct inode *inode, struct file *file);
++};
++
++/* Add a new file to the given container directory. Should only be
++ * called by subsystems from within a populate() method */
++int container_add_file(struct container *cont, const struct cftype *cft);
++
++/* Add a set of new files to the given container directory. Should
++ * only be called by subsystems from within a populate() method */
++int container_add_files(struct container *cont, const struct cftype cft[],
++			int count);
++
++int container_is_removed(const struct container *cont);
++
++int container_path(const struct container *cont, char *buf, int buflen);
++
++int container_task_count(const struct container *cont);
++
++/* Return true if the container is a descendant of the current container */
++int container_is_descendant(const struct container *cont);
++
++/* Container subsystem type. See Documentation/containers.txt for details */
++
++struct container_subsys {
++	int (*create)(struct container_subsys *ss,
++		      struct container *cont);
++	void (*destroy)(struct container_subsys *ss, struct container *cont);
++	int (*can_attach)(struct container_subsys *ss,
++			  struct container *cont, struct task_struct *tsk);
++	void (*attach)(struct container_subsys *ss, struct container *cont,
++			struct container *old_cont, struct task_struct *tsk);
++	void (*fork)(struct container_subsys *ss, struct task_struct *task);
++	void (*exit)(struct container_subsys *ss, struct task_struct *task);
++	int (*populate)(struct container_subsys *ss,
++			struct container *cont);
++	void (*post_clone)(struct container_subsys *ss, struct container *cont);
++	void (*bind)(struct container_subsys *ss, struct container *root);
++	int subsys_id;
++	int active;
++	int early_init;
++#define MAX_CONTAINER_TYPE_NAMELEN 32
++	const char *name;
++
++	/* Protected by RCU */
++	struct containerfs_root *root;
++
++	struct list_head sibling;
++
++	void *private;
++};
++
++#define SUBSYS(_x) extern struct container_subsys _x ## _subsys;
++#include <linux/container_subsys.h>
++#undef SUBSYS
++
++static inline struct container_subsys_state *container_subsys_state(
++	struct container *cont, int subsys_id)
++{
++	return cont->subsys[subsys_id];
++}
++
++static inline struct container_subsys_state *task_subsys_state(
++	struct task_struct *task, int subsys_id)
++{
++	return rcu_dereference(task->containers->subsys[subsys_id]);
++}
++
++static inline struct container* task_container(struct task_struct *task,
++					       int subsys_id)
++{
++	return task_subsys_state(task, subsys_id)->container;
++}
++
++int container_path(const struct container *cont, char *buf, int buflen);
++
++int container_clone(struct task_struct *tsk, struct container_subsys *ss);
++
++/* A container_iter should be treated as an opaque object */
++struct container_iter {
++	struct list_head *cg_link;
++	struct list_head *task;
++};
++
++/* To iterate across the tasks in a container:
++ *
++ * 1) call container_iter_start to intialize an iterator
++ *
++ * 2) call container_iter_next() to retrieve member tasks until it
++ *    returns NULL or until you want to end the iteration
++ *
++ * 3) call container_iter_end() to destroy the iterator.
++ */
++void container_iter_start(struct container *cont, struct container_iter *it);
++struct task_struct *container_iter_next(struct container *cont,
++					struct container_iter *it);
++void container_iter_end(struct container *cont, struct container_iter *it);
++
++void container_set_release_agent_path(struct container_subsys *ss,
++				      const char *path);
++
++#else /* !CONFIG_CONTAINERS */
++
++static inline int container_init_early(void) { return 0; }
++static inline int container_init(void) { return 0; }
++static inline void container_init_smp(void) {}
++static inline void container_fork(struct task_struct *p) {}
++static inline void container_fork_callbacks(struct task_struct *p) {}
++static inline void container_exit(struct task_struct *p, int callbacks) {}
++
++static inline void container_lock(void) {}
++static inline void container_unlock(void) {}
++
++#endif /* !CONFIG_CONTAINERS */
++
++#endif /* _LINUX_CONTAINER_H */
+diff -Nurb linux-2.6.22-570/include/linux/container_subsys.h linux-2.6.22-try2/include/linux/container_subsys.h
+--- linux-2.6.22-570/include/linux/container_subsys.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/container_subsys.h	2007-12-19 15:29:25.000000000 -0500
+@@ -0,0 +1,32 @@
++/* Add subsystem definitions of the form SUBSYS(<name>) in this
++ * file. Surround each one by a line of comment markers so that
++ * patches don't collide
++ */
++
++/* */
++
++/* */
++
++#ifdef CONFIG_CONTAINER_CPUACCT
++SUBSYS(cpuacct)
++#endif
++
++/* */
++
++#ifdef CONFIG_CPUSETS
++SUBSYS(cpuset)
++#endif
++
++/* */
++
++#ifdef CONFIG_CONTAINER_DEBUG
++SUBSYS(debug)
++#endif
++
++/* */
++
++#ifdef CONFIG_CONTAINER_NS
++SUBSYS(ns)
++#endif
++
++/* */
+diff -Nurb linux-2.6.22-570/include/linux/cpu_acct.h linux-2.6.22-try2/include/linux/cpu_acct.h
+--- linux-2.6.22-570/include/linux/cpu_acct.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/cpu_acct.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,14 @@
++
++#ifndef _LINUX_CPU_ACCT_H
++#define _LINUX_CPU_ACCT_H
++
++#include <linux/container.h>
++#include <asm/cputime.h>
++
++#ifdef CONFIG_CONTAINER_CPUACCT
++extern void cpuacct_charge(struct task_struct *, cputime_t cputime);
++#else
++static void inline cpuacct_charge(struct task_struct *p, cputime_t cputime) {}
++#endif
++
++#endif
+diff -Nurb linux-2.6.22-570/include/linux/cpuidle.h linux-2.6.22-try2/include/linux/cpuidle.h
+--- linux-2.6.22-570/include/linux/cpuidle.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/cpuidle.h	2007-12-19 15:29:22.000000000 -0500
+@@ -0,0 +1,189 @@
++/*
++ * cpuidle.h - a generic framework for CPU idle power management
++ *
++ * (C) 2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
++ *          Shaohua Li <shaohua.li@intel.com>
++ *          Adam Belay <abelay@novell.com>
++ *
++ * This code is licenced under the GPL.
++ */
++
++#ifndef _LINUX_CPUIDLE_H
++#define _LINUX_CPUIDLE_H
++
++#include <linux/percpu.h>
++#include <linux/list.h>
++#include <linux/module.h>
++#include <linux/kobject.h>
++#include <linux/completion.h>
++
++#define CPUIDLE_STATE_MAX	8
++#define CPUIDLE_NAME_LEN	16
++
++struct cpuidle_device;
++
++
++/****************************
++ * CPUIDLE DEVICE INTERFACE *
++ ****************************/
++
++struct cpuidle_state {
++	char		name[CPUIDLE_NAME_LEN];
++	void		*driver_data;
++
++	unsigned int	flags;
++	unsigned int	exit_latency; /* in US */
++	unsigned int	power_usage; /* in mW */
++	unsigned int	target_residency; /* in US */
++
++	unsigned int	usage;
++	unsigned int	time; /* in US */
++
++	int (*enter)	(struct cpuidle_device *dev,
++			 struct cpuidle_state *state);
++};
++
++/* Idle State Flags */
++#define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
++#define CPUIDLE_FLAG_CHECK_BM	(0x02) /* BM activity will exit state */
++#define CPUIDLE_FLAG_SHALLOW	(0x10) /* low latency, minimal savings */
++#define CPUIDLE_FLAG_BALANCED	(0x20) /* medium latency, moderate savings */
++#define CPUIDLE_FLAG_DEEP	(0x40) /* high latency, large savings */
++
++#define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
++
++/**
++ * cpuidle_get_statedata - retrieves private driver state data
++ * @state: the state
++ */
++static inline void * cpuidle_get_statedata(struct cpuidle_state *state)
++{
++	return state->driver_data;
++}
++
++/**
++ * cpuidle_set_statedata - stores private driver state data
++ * @state: the state
++ * @data: the private data
++ */
++static inline void
++cpuidle_set_statedata(struct cpuidle_state *state, void *data)
++{
++	state->driver_data = data;
++}
++
++struct cpuidle_state_kobj {
++	struct cpuidle_state *state;
++	struct completion kobj_unregister;
++	struct kobject kobj;
++};
++
++struct cpuidle_device {
++	unsigned int		status;
++	int			cpu;
++
++	int			last_residency;
++	int			state_count;
++	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
++	struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
++	struct cpuidle_state	*last_state;
++
++	struct list_head 	device_list;
++	struct kobject		kobj;
++	struct completion	kobj_unregister;
++	void			*governor_data;
++};
++
++DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
++
++/* Device Status Flags */
++#define CPUIDLE_STATUS_DETECTED		 (0x1)
++#define CPUIDLE_STATUS_DRIVER_ATTACHED	 (0x2)
++#define CPUIDLE_STATUS_GOVERNOR_ATTACHED (0x4)
++#define CPUIDLE_STATUS_DOIDLE		 (CPUIDLE_STATUS_DETECTED | \
++					  CPUIDLE_STATUS_DRIVER_ATTACHED | \
++					  CPUIDLE_STATUS_GOVERNOR_ATTACHED)
++
++/**
++ * cpuidle_get_last_residency - retrieves the last state's residency time
++ * @dev: the target CPU
++ *
++ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
++ */
++static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
++{
++	return dev->last_residency;
++}
++
++
++/****************************
++ * CPUIDLE DRIVER INTERFACE *
++ ****************************/
++
++struct cpuidle_driver {
++	char			name[CPUIDLE_NAME_LEN];
++	struct list_head 	driver_list;
++
++	int  (*init)		(struct cpuidle_device *dev);
++	void (*exit)		(struct cpuidle_device *dev);
++	int  (*redetect)	(struct cpuidle_device *dev);
++
++	int  (*bm_check)	(void);
++
++	struct module 		*owner;
++};
++
++#ifdef CONFIG_CPU_IDLE
++
++extern int cpuidle_register_driver(struct cpuidle_driver *drv);
++extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
++extern int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv);
++extern int cpuidle_force_redetect_devices(struct cpuidle_driver *drv);
++
++#else
++
++static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
++{return 0;}
++static inline void cpuidle_unregister_driver(struct cpuidle_driver *drv) { }
++static inline int cpuidle_force_redetect(struct cpuidle_device *dev, struct cpuidle_driver *drv)
++{return 0;}
++static inline int cpuidle_force_redetect_devices(struct cpuidle_driver *drv)
++{return 0;}
++
++#endif
++
++/******************************
++ * CPUIDLE GOVERNOR INTERFACE *
++ ******************************/
++
++struct cpuidle_governor {
++	char			name[CPUIDLE_NAME_LEN];
++	struct list_head 	governor_list;
++
++	int  (*init)		(struct cpuidle_device *dev);
++	void (*exit)		(struct cpuidle_device *dev);
++	void (*scan)		(struct cpuidle_device *dev);
++
++	int  (*select)		(struct cpuidle_device *dev);
++	void (*reflect)		(struct cpuidle_device *dev);
++
++	struct module 		*owner;
++};
++
++#ifdef CONFIG_CPU_IDLE
++
++extern int cpuidle_register_governor(struct cpuidle_governor *gov);
++extern void cpuidle_unregister_governor(struct cpuidle_governor *gov);
++extern int cpuidle_get_bm_activity(void);
++
++#else
++
++static inline int cpuidle_register_governor(struct cpuidle_governor *gov)
++{return 0;}
++static inline void cpuidle_unregister_governor(struct cpuidle_governor *gov) { }
++static inline int cpuidle_get_bm_activity(void)
++{return 0;}
++
++#endif
++
++#endif /* _LINUX_CPUIDLE_H */
+diff -Nurb linux-2.6.22-570/include/linux/cpuset.h linux-2.6.22-try2/include/linux/cpuset.h
+--- linux-2.6.22-570/include/linux/cpuset.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/cpuset.h	2007-12-19 15:29:24.000000000 -0500
+@@ -11,6 +11,7 @@
+ #include <linux/sched.h>
+ #include <linux/cpumask.h>
+ #include <linux/nodemask.h>
++#include <linux/container.h>
+ 
+ #ifdef CONFIG_CPUSETS
+ 
+@@ -19,8 +20,6 @@
+ extern int cpuset_init_early(void);
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+-extern void cpuset_fork(struct task_struct *p);
+-extern void cpuset_exit(struct task_struct *p);
+ extern cpumask_t cpuset_cpus_allowed(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+@@ -75,13 +74,13 @@
+ 
+ extern void cpuset_track_online_nodes(void);
+ 
++extern int current_cpuset_is_being_rebound(void);
++
+ #else /* !CONFIG_CPUSETS */
+ 
+ static inline int cpuset_init_early(void) { return 0; }
+ static inline int cpuset_init(void) { return 0; }
+ static inline void cpuset_init_smp(void) {}
+-static inline void cpuset_fork(struct task_struct *p) {}
+-static inline void cpuset_exit(struct task_struct *p) {}
+ 
+ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p)
+ {
+@@ -146,6 +145,11 @@
+ 
+ static inline void cpuset_track_online_nodes(void) {}
+ 
++static inline int current_cpuset_is_being_rebound(void)
++{
++	return 0;
++}
++
+ #endif /* !CONFIG_CPUSETS */
+ 
+ #endif /* _LINUX_CPUSET_H */
+diff -Nurb linux-2.6.22-570/include/linux/device.h linux-2.6.22-try2/include/linux/device.h
+--- linux-2.6.22-570/include/linux/device.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/device.h	2007-12-19 15:29:22.000000000 -0500
+@@ -238,7 +238,6 @@
+  * @devt: for internal use by the driver core only.
+  * @node: for internal use by the driver core only.
+  * @kobj: for internal use by the driver core only.
+- * @devt_attr: for internal use by the driver core only.
+  * @groups: optional additional groups to be created
+  * @dev: if set, a symlink to the struct device is created in the sysfs
+  * directory for this struct class device.
+@@ -263,8 +262,6 @@
+ 	struct kobject		kobj;
+ 	struct class		* class;	/* required */
+ 	dev_t			devt;		/* dev_t, creates the sysfs "dev" */
+-	struct class_device_attribute *devt_attr;
+-	struct class_device_attribute uevent_attr;
+ 	struct device		* dev;		/* not necessary, but nice to have */
+ 	void			* class_data;	/* class-specific data */
+ 	struct class_device	*parent;	/* parent of this child device, if there is one */
+@@ -419,8 +416,6 @@
+ 	struct device_type	*type;
+ 	unsigned		is_registered:1;
+ 	unsigned		uevent_suppress:1;
+-	struct device_attribute uevent_attr;
+-	struct device_attribute *devt_attr;
+ 
+ 	struct semaphore	sem;	/* semaphore to synchronize calls to
+ 					 * its driver.
+diff -Nurb linux-2.6.22-570/include/linux/dmaengine.h linux-2.6.22-try2/include/linux/dmaengine.h
+--- linux-2.6.22-570/include/linux/dmaengine.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/dmaengine.h	2007-12-19 15:29:23.000000000 -0500
+@@ -21,29 +21,40 @@
+ #ifndef DMAENGINE_H
+ #define DMAENGINE_H
+ 
+-#ifdef CONFIG_DMA_ENGINE
+-
+ #include <linux/device.h>
+ #include <linux/uio.h>
+ #include <linux/kref.h>
+ #include <linux/completion.h>
+ #include <linux/rcupdate.h>
++#include <linux/dma-mapping.h>
+ 
+ /**
+- * enum dma_event - resource PNP/power managment events
++ * enum dma_state - resource PNP/power managment state
+  * @DMA_RESOURCE_SUSPEND: DMA device going into low power state
+  * @DMA_RESOURCE_RESUME: DMA device returning to full power
+- * @DMA_RESOURCE_ADDED: DMA device added to the system
++ * @DMA_RESOURCE_AVAILABLE: DMA device available to the system
+  * @DMA_RESOURCE_REMOVED: DMA device removed from the system
+  */
+-enum dma_event {
++enum dma_state {
+ 	DMA_RESOURCE_SUSPEND,
+ 	DMA_RESOURCE_RESUME,
+-	DMA_RESOURCE_ADDED,
++	DMA_RESOURCE_AVAILABLE,
+ 	DMA_RESOURCE_REMOVED,
+ };
+ 
+ /**
++ * enum dma_state_client - state of the channel in the client
++ * @DMA_ACK: client would like to use, or was using this channel
++ * @DMA_DUP: client has already seen this channel, or is not using this channel
++ * @DMA_NAK: client does not want to see any more channels
++ */
++enum dma_state_client {
++	DMA_ACK,
++	DMA_DUP,
++	DMA_NAK,
++};
++
++/**
+  * typedef dma_cookie_t - an opaque DMA cookie
+  *
+  * if dma_cookie_t is >0 it's a DMA request cookie, <0 it's an error code
+@@ -65,6 +76,31 @@
+ };
+ 
+ /**
++ * enum dma_transaction_type - DMA transaction types/indexes
++ */
++enum dma_transaction_type {
++	DMA_MEMCPY,
++	DMA_XOR,
++	DMA_PQ_XOR,
++	DMA_DUAL_XOR,
++	DMA_PQ_UPDATE,
++	DMA_ZERO_SUM,
++	DMA_PQ_ZERO_SUM,
++	DMA_MEMSET,
++	DMA_MEMCPY_CRC32C,
++	DMA_INTERRUPT,
++};
++
++/* last transaction type for creation of the capabilities mask */
++#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
++
++/**
++ * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
++ * See linux/cpumask.h
++ */
++typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
++
++/**
+  * struct dma_chan_percpu - the per-CPU part of struct dma_chan
+  * @refcount: local_t used for open-coded "bigref" counting
+  * @memcpy_count: transaction counter
+@@ -80,7 +116,6 @@
+ 
+ /**
+  * struct dma_chan - devices supply DMA channels, clients use them
+- * @client: ptr to the client user of this chan, will be %NULL when unused
+  * @device: ptr to the dma device who supplies this channel, always !%NULL
+  * @cookie: last cookie value returned to client
+  * @chan_id: channel ID for sysfs
+@@ -88,12 +123,10 @@
+  * @refcount: kref, used in "bigref" slow-mode
+  * @slow_ref: indicates that the DMA channel is free
+  * @rcu: the DMA channel's RCU head
+- * @client_node: used to add this to the client chan list
+  * @device_node: used to add this to the device chan list
+  * @local: per-cpu pointer to a struct dma_chan_percpu
+  */
+ struct dma_chan {
+-	struct dma_client *client;
+ 	struct dma_device *device;
+ 	dma_cookie_t cookie;
+ 
+@@ -105,11 +138,11 @@
+ 	int slow_ref;
+ 	struct rcu_head rcu;
+ 
+-	struct list_head client_node;
+ 	struct list_head device_node;
+ 	struct dma_chan_percpu *local;
+ };
+ 
++
+ void dma_chan_cleanup(struct kref *kref);
+ 
+ static inline void dma_chan_get(struct dma_chan *chan)
+@@ -134,27 +167,68 @@
+ 
+ /*
+  * typedef dma_event_callback - function pointer to a DMA event callback
+- */
+-typedef void (*dma_event_callback) (struct dma_client *client,
+-		struct dma_chan *chan, enum dma_event event);
++ * For each channel added to the system this routine is called for each client.
++ * If the client would like to use the channel it returns '1' to signal (ack)
++ * the dmaengine core to take out a reference on the channel and its
++ * corresponding device.  A client must not 'ack' an available channel more
++ * than once.  When a channel is removed all clients are notified.  If a client
++ * is using the channel it must 'ack' the removal.  A client must not 'ack' a
++ * removed channel more than once.
++ * @client - 'this' pointer for the client context
++ * @chan - channel to be acted upon
++ * @state - available or removed
++ */
++struct dma_client;
++typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client,
++		struct dma_chan *chan, enum dma_state state);
+ 
+ /**
+  * struct dma_client - info on the entity making use of DMA services
+  * @event_callback: func ptr to call when something happens
+- * @chan_count: number of chans allocated
+- * @chans_desired: number of chans requested. Can be +/- chan_count
+- * @lock: protects access to the channels list
+- * @channels: the list of DMA channels allocated
++ * @cap_mask: only return channels that satisfy the requested capabilities
++ *  a value of zero corresponds to any capability
+  * @global_node: list_head for global dma_client_list
+  */
+ struct dma_client {
+ 	dma_event_callback	event_callback;
+-	unsigned int		chan_count;
+-	unsigned int		chans_desired;
++	dma_cap_mask_t		cap_mask;
++	struct list_head	global_node;
++};
+ 
++typedef void (*dma_async_tx_callback)(void *dma_async_param);
++/**
++ * struct dma_async_tx_descriptor - async transaction descriptor
++ * @cookie: tracking cookie for this transaction, set to -EBUSY if
++ *	this tx is sitting on a dependency list
++ * @ack: the descriptor can not be reused until the client acknowledges
++ *	receipt, i.e. has has a chance to establish any dependency chains
++ * @callback: routine to call after this operation is complete
++ * @callback_param: general parameter to pass to the callback routine
++ * @chan: target channel for this operation
++ * @tx_submit: execute an operation
++ * @tx_set_dest: set a destination address in a hardware descriptor
++ * @tx_set_src: set a source address in a hardware descriptor
++ * @depend_list: at completion this list of transactions are submitted
++ * @depend_node: allow this transaction to be executed after another
++ *	transaction has completed
++ * @parent: pointer to the next level up in the dependency chain
++ * @lock: protect the dependency list
++ */
++struct dma_async_tx_descriptor {
++	dma_cookie_t cookie;
++	int ack;
++	dma_async_tx_callback callback;
++	void *callback_param;
++	struct dma_chan *chan;
++	dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx);
++	void (*tx_set_dest)(dma_addr_t addr,
++		struct dma_async_tx_descriptor *tx, int index);
++	void (*tx_set_src)(dma_addr_t addr,
++		struct dma_async_tx_descriptor *tx, int index);
++	struct list_head depend_list;
++	struct list_head depend_node;
++	struct dma_async_tx_descriptor *parent;
+ 	spinlock_t		lock;
+-	struct list_head	channels;
+-	struct list_head	global_node;
+ };
+ 
+ /**
+@@ -162,141 +236,130 @@
+  * @chancnt: how many DMA channels are supported
+  * @channels: the list of struct dma_chan
+  * @global_node: list_head for global dma_device_list
++ * @cap_mask: one or more dma_capability flags
++ * @max_xor: maximum number of xor sources, 0 if no capability
+  * @refcount: reference count
+  * @done: IO completion struct
+  * @dev_id: unique device ID
++ * @dev: struct device reference for dma mapping api
+  * @device_alloc_chan_resources: allocate resources and return the
+  *	number of allocated descriptors
+  * @device_free_chan_resources: release DMA channel's resources
+- * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer
+- * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page
+- * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset
+- * @device_memcpy_complete: poll the status of an IOAT DMA transaction
+- * @device_memcpy_issue_pending: push appended descriptors to hardware
++ * @device_prep_dma_memcpy: prepares a memcpy operation
++ * @device_prep_dma_xor: prepares a xor operation
++ * @device_prep_dma_zero_sum: prepares a zero_sum operation
++ * @device_prep_dma_memset: prepares a memset operation
++ * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
++ * @device_dependency_added: async_tx notifies the channel about new deps
++ * @device_issue_pending: push pending transactions to hardware
+  */
+ struct dma_device {
+ 
+ 	unsigned int chancnt;
+ 	struct list_head channels;
+ 	struct list_head global_node;
++	dma_cap_mask_t  cap_mask;
++	int max_xor;
+ 
+ 	struct kref refcount;
+ 	struct completion done;
+ 
+ 	int dev_id;
++	struct device *dev;
+ 
+ 	int (*device_alloc_chan_resources)(struct dma_chan *chan);
+ 	void (*device_free_chan_resources)(struct dma_chan *chan);
+-	dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan,
+-			void *dest, void *src, size_t len);
+-	dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan,
+-			struct page *page, unsigned int offset, void *kdata,
+-			size_t len);
+-	dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan,
+-			struct page *dest_pg, unsigned int dest_off,
+-			struct page *src_pg, unsigned int src_off, size_t len);
+-	enum dma_status (*device_memcpy_complete)(struct dma_chan *chan,
++
++	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
++		struct dma_chan *chan, size_t len, int int_en);
++	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
++		struct dma_chan *chan, unsigned int src_cnt, size_t len,
++		int int_en);
++	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
++		struct dma_chan *chan, unsigned int src_cnt, size_t len,
++		u32 *result, int int_en);
++	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
++		struct dma_chan *chan, int value, size_t len, int int_en);
++	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
++		struct dma_chan *chan);
++
++	void (*device_dependency_added)(struct dma_chan *chan);
++	enum dma_status (*device_is_tx_complete)(struct dma_chan *chan,
+ 			dma_cookie_t cookie, dma_cookie_t *last,
+ 			dma_cookie_t *used);
+-	void (*device_memcpy_issue_pending)(struct dma_chan *chan);
++	void (*device_issue_pending)(struct dma_chan *chan);
+ };
+ 
+ /* --- public DMA engine API --- */
+ 
+-struct dma_client *dma_async_client_register(dma_event_callback event_callback);
++void dma_async_client_register(struct dma_client *client);
+ void dma_async_client_unregister(struct dma_client *client);
+-void dma_async_client_chan_request(struct dma_client *client,
+-		unsigned int number);
++void dma_async_client_chan_request(struct dma_client *client);
++dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
++	void *dest, void *src, size_t len);
++dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
++	struct page *page, unsigned int offset, void *kdata, size_t len);
++dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
++	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
++	unsigned int src_off, size_t len);
++void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
++	struct dma_chan *chan);
+ 
+-/**
+- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+- * @chan: DMA channel to offload copy to
+- * @dest: destination address (virtual)
+- * @src: source address (virtual)
+- * @len: length
+- *
+- * Both @dest and @src must be mappable to a bus address according to the
+- * DMA mapping API rules for streaming mappings.
+- * Both @dest and @src must stay memory resident (kernel memory or locked
+- * user space pages).
+- */
+-static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
+-	void *dest, void *src, size_t len)
++static inline void
++async_tx_ack(struct dma_async_tx_descriptor *tx)
+ {
+-	int cpu = get_cpu();
+-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+-	put_cpu();
+-
+-	return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len);
++	tx->ack = 1;
+ }
+ 
+-/**
+- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+- * @chan: DMA channel to offload copy to
+- * @page: destination page
+- * @offset: offset in page to copy to
+- * @kdata: source address (virtual)
+- * @len: length
+- *
+- * Both @page/@offset and @kdata must be mappable to a bus address according
+- * to the DMA mapping API rules for streaming mappings.
+- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+- * locked user space pages)
+- */
+-static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
+-	struct page *page, unsigned int offset, void *kdata, size_t len)
++#define first_dma_cap(mask) __first_dma_cap(&(mask))
++static inline int __first_dma_cap(const dma_cap_mask_t *srcp)
+ {
+-	int cpu = get_cpu();
+-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+-	put_cpu();
++	return min_t(int, DMA_TX_TYPE_END,
++		find_first_bit(srcp->bits, DMA_TX_TYPE_END));
++}
+ 
+-	return chan->device->device_memcpy_buf_to_pg(chan, page, offset,
+-	                                             kdata, len);
++#define next_dma_cap(n, mask) __next_dma_cap((n), &(mask))
++static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp)
++{
++	return min_t(int, DMA_TX_TYPE_END,
++		find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1));
+ }
+ 
+-/**
+- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+- * @chan: DMA channel to offload copy to
+- * @dest_pg: destination page
+- * @dest_off: offset in page to copy to
+- * @src_pg: source page
+- * @src_off: offset in page to copy from
+- * @len: length
+- *
+- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+- * address according to the DMA mapping API rules for streaming mappings.
+- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+- * (kernel memory or locked user space pages).
+- */
+-static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
+-	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
+-	unsigned int src_off, size_t len)
++#define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask))
++static inline void
++__dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp)
+ {
+-	int cpu = get_cpu();
+-	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+-	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+-	put_cpu();
++	set_bit(tx_type, dstp->bits);
++}
+ 
+-	return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off,
+-	                                            src_pg, src_off, len);
++#define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask))
++static inline int
++__dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp)
++{
++	return test_bit(tx_type, srcp->bits);
+ }
+ 
++#define for_each_dma_cap_mask(cap, mask) \
++	for ((cap) = first_dma_cap(mask);	\
++		(cap) < DMA_TX_TYPE_END;	\
++		(cap) = next_dma_cap((cap), (mask)))
++
+ /**
+- * dma_async_memcpy_issue_pending - flush pending copies to HW
++ * dma_async_issue_pending - flush pending transactions to HW
+  * @chan: target DMA channel
+  *
+  * This allows drivers to push copies to HW in batches,
+  * reducing MMIO writes where possible.
+  */
+-static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan)
++static inline void dma_async_issue_pending(struct dma_chan *chan)
+ {
+-	return chan->device->device_memcpy_issue_pending(chan);
++	return chan->device->device_issue_pending(chan);
+ }
+ 
++#define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan)
++
+ /**
+- * dma_async_memcpy_complete - poll for transaction completion
++ * dma_async_is_tx_complete - poll for transaction completion
+  * @chan: DMA channel
+  * @cookie: transaction identifier to check status of
+  * @last: returns last completed cookie, can be NULL
+@@ -306,12 +369,15 @@
+  * internal state and can be used with dma_async_is_complete() to check
+  * the status of multiple cookies without re-checking hardware state.
+  */
+-static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan,
++static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
+ 	dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
+ {
+-	return chan->device->device_memcpy_complete(chan, cookie, last, used);
++	return chan->device->device_is_tx_complete(chan, cookie, last, used);
+ }
+ 
++#define dma_async_memcpy_complete(chan, cookie, last, used)\
++	dma_async_is_tx_complete(chan, cookie, last, used)
++
+ /**
+  * dma_async_is_complete - test a cookie against chan state
+  * @cookie: transaction identifier to test status of
+@@ -334,6 +400,7 @@
+ 	return DMA_IN_PROGRESS;
+ }
+ 
++enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie);
+ 
+ /* --- DMA device --- */
+ 
+@@ -362,5 +429,4 @@
+ 	struct dma_pinned_list *pinned_list, struct page *page,
+ 	unsigned int offset, size_t len);
+ 
+-#endif /* CONFIG_DMA_ENGINE */
+ #endif /* DMAENGINE_H */
+diff -Nurb linux-2.6.22-570/include/linux/etherdevice.h linux-2.6.22-try2/include/linux/etherdevice.h
+--- linux-2.6.22-570/include/linux/etherdevice.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/etherdevice.h	2007-12-19 15:29:23.000000000 -0500
+@@ -40,12 +40,6 @@
+ 					 struct hh_cache *hh);
+ 
+ extern struct net_device *alloc_etherdev(int sizeof_priv);
+-static inline void eth_copy_and_sum (struct sk_buff *dest, 
+-				     const unsigned char *src, 
+-				     int len, int base)
+-{
+-	memcpy (dest->data, src, len);
+-}
+ 
+ /**
+  * is_zero_ether_addr - Determine if give Ethernet address is all zeros.
+diff -Nurb linux-2.6.22-570/include/linux/freezer.h linux-2.6.22-try2/include/linux/freezer.h
+--- linux-2.6.22-570/include/linux/freezer.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/freezer.h	2007-12-19 15:29:24.000000000 -0500
+@@ -115,6 +115,14 @@
+ 	return !!(p->flags & PF_FREEZER_SKIP);
+ }
+ 
++/*
++ * Tell the freezer that the current task should be frozen by it
++ */
++static inline void set_freezable(void)
++{
++	current->flags &= ~PF_NOFREEZE;
++}
++
+ #else
+ static inline int frozen(struct task_struct *p) { return 0; }
+ static inline int freezing(struct task_struct *p) { return 0; }
+@@ -130,4 +138,5 @@
+ static inline void freezer_do_not_count(void) {}
+ static inline void freezer_count(void) {}
+ static inline int freezer_should_skip(struct task_struct *p) { return 0; }
++static inline void set_freezable(void) {}
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/fs.h linux-2.6.22-try2/include/linux/fs.h
+--- linux-2.6.22-570/include/linux/fs.h	2007-12-12 18:08:41.000000000 -0500
++++ linux-2.6.22-try2/include/linux/fs.h	2007-12-19 15:51:08.000000000 -0500
+@@ -283,6 +283,17 @@
+ #define SYNC_FILE_RANGE_WRITE		2
+ #define SYNC_FILE_RANGE_WAIT_AFTER	4
+ 
++/*
++ * sys_fallocate modes
++ * Currently sys_fallocate supports two modes:
++ * FA_ALLOCATE  : This is the preallocate mode, using which an application/user
++ *		  may request (pre)allocation of blocks.
++ * FA_DEALLOCATE: This is the deallocate mode, which can be used to free
++ *		  the preallocated blocks.
++ */
++#define FA_ALLOCATE	0x1
++#define FA_DEALLOCATE	0x2
++
+ #ifdef __KERNEL__
+ 
+ #include <linux/linkage.h>
+@@ -300,6 +311,7 @@
+ #include <linux/init.h>
+ #include <linux/pid.h>
+ #include <linux/mutex.h>
++#include <linux/sysctl.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/semaphore.h>
+@@ -1139,6 +1151,7 @@
+ 	int (*flock) (struct file *, int, struct file_lock *);
+ 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+ 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
++	int (*revoke)(struct file *, struct address_space *);
+ };
+ 
+ struct inode_operations {
+@@ -1164,6 +1177,8 @@
+ 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ 	int (*removexattr) (struct dentry *, const char *);
+ 	void (*truncate_range)(struct inode *, loff_t, loff_t);
++ 	long (*fallocate)(struct inode *inode, int mode, loff_t offset,
++ 			  loff_t len);
+ 	int (*sync_flags) (struct inode *);
+ };
+ 
+@@ -1809,6 +1824,13 @@
+ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ 		size_t len, unsigned int flags);
+ 
++/* fs/revoke.c */
++#ifdef CONFIG_MMU
++extern int generic_file_revoke(struct file *, struct address_space *);
++#else
++#define generic_file_revoke NULL
++#endif
++
+ extern void
+ file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
+ extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
+@@ -2077,5 +2099,9 @@
+ { }
+ #endif	/* CONFIG_SECURITY */
+ 
++int proc_nr_files(ctl_table *table, int write, struct file *filp,
++		  void __user *buffer, size_t *lenp, loff_t *ppos);
++
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_FS_H */
+diff -Nurb linux-2.6.22-570/include/linux/fs_stack.h linux-2.6.22-try2/include/linux/fs_stack.h
+--- linux-2.6.22-570/include/linux/fs_stack.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/fs_stack.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,7 +1,19 @@
++/*
++ * Copyright (c) 2006-2007 Erez Zadok
++ * Copyright (c) 2006-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2006-2007 Stony Brook University
++ * Copyright (c) 2006-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
+ #ifndef _LINUX_FS_STACK_H
+ #define _LINUX_FS_STACK_H
+ 
+-/* This file defines generic functions used primarily by stackable
++/*
++ * This file defines generic functions used primarily by stackable
+  * filesystems; none of these functions require i_mutex to be held.
+  */
+ 
+@@ -11,7 +23,8 @@
+ extern void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
+ 				int (*get_nlinks)(struct inode *));
+ 
+-extern void fsstack_copy_inode_size(struct inode *dst, const struct inode *src);
++extern void fsstack_copy_inode_size(struct inode *dst,
++				    const struct inode *src);
+ 
+ /* inlines */
+ static inline void fsstack_copy_attr_atime(struct inode *dest,
+diff -Nurb linux-2.6.22-570/include/linux/gfp.h linux-2.6.22-try2/include/linux/gfp.h
+--- linux-2.6.22-570/include/linux/gfp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/gfp.h	2007-12-19 15:29:24.000000000 -0500
+@@ -30,6 +30,9 @@
+  * cannot handle allocation failures.
+  *
+  * __GFP_NORETRY: The VM implementation must not retry indefinitely.
++ *
++ * __GFP_MOVABLE: Flag that this page will be movable by the page migration
++ * mechanism or reclaimed
+  */
+ #define __GFP_WAIT	((__force gfp_t)0x10u)	/* Can wait and reschedule? */
+ #define __GFP_HIGH	((__force gfp_t)0x20u)	/* Should access emergency pools? */
+@@ -45,15 +48,21 @@
+ #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+ #define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+ #define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
++#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
++#define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+ 
+-#define __GFP_BITS_SHIFT 20	/* Room for 20 __GFP_FOO bits */
++#define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+ 
+ /* if you forget to add the bitmask here kernel will crash, period */
+ #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
+ 			__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
+ 			__GFP_NOFAIL|__GFP_NORETRY|__GFP_COMP| \
+-			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
++			__GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE| \
++			__GFP_RECLAIMABLE|__GFP_MOVABLE)
++
++/* This mask makes up all the page movable related flags */
++#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
+ 
+ /* This equals 0, but use constants in case they ever change */
+ #define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
+@@ -62,9 +71,20 @@
+ #define GFP_NOIO	(__GFP_WAIT)
+ #define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
+ #define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
++#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
++			 __GFP_RECLAIMABLE)
+ #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+ #define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ 			 __GFP_HIGHMEM)
++#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
++				 __GFP_HARDWALL | __GFP_HIGHMEM | \
++				 __GFP_MOVABLE)
++#define GFP_NOFS_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_MOVABLE)
++#define GFP_USER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
++				 __GFP_HARDWALL | __GFP_MOVABLE)
++#define GFP_HIGHUSER_PAGECACHE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
++				 __GFP_HARDWALL | __GFP_HIGHMEM | \
++				 __GFP_MOVABLE)
+ 
+ #ifdef CONFIG_NUMA
+ #define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
+@@ -99,6 +119,12 @@
+ 	return ZONE_NORMAL;
+ }
+ 
++static inline gfp_t set_migrateflags(gfp_t gfp, gfp_t migrate_flags)
++{
++	BUG_ON((gfp & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
++	return (gfp & ~(GFP_MOVABLE_MASK)) | migrate_flags;
++}
++
+ /*
+  * There is only one page-allocator function, and two main namespaces to
+  * it. The alloc_page*() variants return 'struct page *' and as such
+diff -Nurb linux-2.6.22-570/include/linux/highmem.h linux-2.6.22-try2/include/linux/highmem.h
+--- linux-2.6.22-570/include/linux/highmem.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/highmem.h	2007-12-19 15:29:24.000000000 -0500
+@@ -73,10 +73,27 @@
+ }
+ 
+ #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
++/**
++ * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags
++ * @movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA but the caller is expected
++ * to specify via movableflags whether the page will be movable in the
++ * future or not
++ *
++ * An architecture may override this function by defining
++ * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own
++ * implementation.
++ */
+ static inline struct page *
+-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
++__alloc_zeroed_user_highpage(gfp_t movableflags,
++			struct vm_area_struct *vma,
++			unsigned long vaddr)
+ {
+-	struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
++	struct page *page = alloc_page_vma(GFP_HIGHUSER | movableflags,
++			vma, vaddr);
+ 
+ 	if (page)
+ 		clear_user_highpage(page, vaddr);
+@@ -85,6 +102,36 @@
+ }
+ #endif
+ 
++/**
++ * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA that the caller knows will
++ * not be able to move in the future using move_pages() or reclaim. If it
++ * is known that the page can move, use alloc_zeroed_user_highpage_movable
++ */
++static inline struct page *
++alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
++{
++	return __alloc_zeroed_user_highpage(0, vma, vaddr);
++}
++
++/**
++ * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move
++ * @vma: The VMA the page is to be allocated for
++ * @vaddr: The virtual address the page will be inserted into
++ *
++ * This function will allocate a page for a VMA that the caller knows will
++ * be able to migrate in the future using move_pages() or reclaimed
++ */
++static inline struct page *
++alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
++					unsigned long vaddr)
++{
++	return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
++}
++
+ static inline void clear_highpage(struct page *page)
+ {
+ 	void *kaddr = kmap_atomic(page, KM_USER0);
+diff -Nurb linux-2.6.22-570/include/linux/hugetlb.h linux-2.6.22-try2/include/linux/hugetlb.h
+--- linux-2.6.22-570/include/linux/hugetlb.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/hugetlb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -15,6 +15,7 @@
+ }
+ 
+ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
++int hugetlb_treat_movable_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *);
+ int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
+ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
+ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
+@@ -29,6 +30,8 @@
+ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+ 
+ extern unsigned long max_huge_pages;
++extern unsigned long hugepages_treat_as_movable;
++extern gfp_t htlb_alloc_mask;
+ extern const unsigned long hugetlb_zero, hugetlb_infinity;
+ extern int sysctl_hugetlb_shm_group;
+ 
+diff -Nurb linux-2.6.22-570/include/linux/if_link.h linux-2.6.22-try2/include/linux/if_link.h
+--- linux-2.6.22-570/include/linux/if_link.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/if_link.h	2007-12-19 15:29:23.000000000 -0500
+@@ -76,6 +76,8 @@
+ #define IFLA_WEIGHT IFLA_WEIGHT
+ 	IFLA_OPERSTATE,
+ 	IFLA_LINKMODE,
++	IFLA_LINKINFO,
++#define IFLA_LINKINFO IFLA_LINKINFO
+ 	__IFLA_MAX
+ };
+ 
+@@ -140,4 +142,49 @@
+ 	__u32	retrans_time;
+ };
+ 
++enum
++{
++	IFLA_INFO_UNSPEC,
++	IFLA_INFO_KIND,
++	IFLA_INFO_DATA,
++	IFLA_INFO_XSTATS,
++	__IFLA_INFO_MAX,
++};
++
++#define IFLA_INFO_MAX	(__IFLA_INFO_MAX - 1)
++
++/* VLAN section */
++
++enum
++{
++	IFLA_VLAN_UNSPEC,
++	IFLA_VLAN_ID,
++	IFLA_VLAN_FLAGS,
++	IFLA_VLAN_EGRESS_QOS,
++	IFLA_VLAN_INGRESS_QOS,
++	__IFLA_VLAN_MAX,
++};
++
++#define IFLA_VLAN_MAX	(__IFLA_VLAN_MAX - 1)
++
++struct ifla_vlan_flags {
++	__u32	flags;
++	__u32	mask;
++};
++
++enum
++{
++	IFLA_VLAN_QOS_UNSPEC,
++	IFLA_VLAN_QOS_MAPPING,
++	__IFLA_VLAN_QOS_MAX
++};
++
++#define IFLA_VLAN_QOS_MAX	(__IFLA_VLAN_QOS_MAX - 1)
++
++struct ifla_vlan_qos_mapping
++{
++	__u32 from;
++	__u32 to;
++};
++
+ #endif /* _LINUX_IF_LINK_H */
+diff -Nurb linux-2.6.22-570/include/linux/if_tun.h linux-2.6.22-try2/include/linux/if_tun.h
+--- linux-2.6.22-570/include/linux/if_tun.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/if_tun.h	2007-12-19 15:29:23.000000000 -0500
+@@ -36,6 +36,7 @@
+ 	unsigned long 		flags;
+ 	int			attached;
+ 	uid_t			owner;
++	gid_t			group;
+ 
+ 	wait_queue_head_t	read_wait;
+ 	struct sk_buff_head	readq;
+@@ -78,6 +79,7 @@
+ #define TUNSETPERSIST _IOW('T', 203, int) 
+ #define TUNSETOWNER   _IOW('T', 204, int)
+ #define TUNSETLINK    _IOW('T', 205, int)
++#define TUNSETGROUP   _IOW('T', 206, int)
+ 
+ /* TUNSETIFF ifr flags */
+ #define IFF_TUN		0x0001
+diff -Nurb linux-2.6.22-570/include/linux/if_vlan.h linux-2.6.22-try2/include/linux/if_vlan.h
+--- linux-2.6.22-570/include/linux/if_vlan.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/if_vlan.h	2007-12-19 15:29:23.000000000 -0500
+@@ -99,7 +99,7 @@
+ }
+ 
+ struct vlan_priority_tci_mapping {
+-	unsigned long priority;
++	u32 priority;
+ 	unsigned short vlan_qos; /* This should be shifted when first set, so we only do it
+ 				  * at provisioning time.
+ 				  * ((skb->priority << 13) & 0xE000)
+@@ -112,7 +112,10 @@
+ 	/** This will be the mapping that correlates skb->priority to
+ 	 * 3 bits of VLAN QOS tags...
+ 	 */
+-	unsigned long ingress_priority_map[8];
++	unsigned int nr_ingress_mappings;
++	u32 ingress_priority_map[8];
++
++	unsigned int nr_egress_mappings;
+ 	struct vlan_priority_tci_mapping *egress_priority_map[16]; /* hash table */
+ 
+ 	unsigned short vlan_id;        /*  The VLAN Identifier for this interface. */
+@@ -395,6 +398,10 @@
+ 	GET_VLAN_VID_CMD /* Get the VID of this VLAN (specified by name) */
+ };
+ 
++enum vlan_flags {
++	VLAN_FLAG_REORDER_HDR	= 0x1,
++};
++
+ enum vlan_name_types {
+ 	VLAN_NAME_TYPE_PLUS_VID, /* Name will look like:  vlan0005 */
+ 	VLAN_NAME_TYPE_RAW_PLUS_VID, /* name will look like:  eth1.0005 */
+diff -Nurb linux-2.6.22-570/include/linux/init_task.h linux-2.6.22-try2/include/linux/init_task.h
+--- linux-2.6.22-570/include/linux/init_task.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/init_task.h	2007-12-19 15:29:24.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/lockdep.h>
+ #include <linux/ipc.h>
+ #include <linux/pid_namespace.h>
++#include <linux/user_namespace.h>
+ 
+ #define INIT_FDTABLE \
+ {							\
+@@ -78,6 +79,7 @@
+ 	.uts_ns		= &init_uts_ns,					\
+ 	.mnt_ns		= NULL,						\
+ 	INIT_IPC_NS(ipc_ns)						\
++	.user_ns	= &init_user_ns,				\
+ }
+ 
+ #define INIT_SIGHAND(sighand) {						\
+diff -Nurb linux-2.6.22-570/include/linux/io.h linux-2.6.22-try2/include/linux/io.h
+--- linux-2.6.22-570/include/linux/io.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/io.h	2007-12-19 15:29:24.000000000 -0500
+@@ -63,32 +63,7 @@
+ void __iomem * devm_ioremap_nocache(struct device *dev, unsigned long offset,
+ 				    unsigned long size);
+ void devm_iounmap(struct device *dev, void __iomem *addr);
+-
+-/**
+- *	check_signature		-	find BIOS signatures
+- *	@io_addr: mmio address to check
+- *	@signature:  signature block
+- *	@length: length of signature
+- *
+- *	Perform a signature comparison with the mmio address io_addr. This
+- *	address should have been obtained by ioremap.
+- *	Returns 1 on a match.
+- */
+-
+-static inline int check_signature(const volatile void __iomem *io_addr,
+-	const unsigned char *signature, int length)
+-{
+-	int retval = 0;
+-	do {
+-		if (readb(io_addr) != *signature)
+-			goto out;
+-		io_addr++;
+-		signature++;
+-		length--;
+-	} while (length);
+-	retval = 1;
+-out:
+-	return retval;
+-}
++int check_signature(const volatile void __iomem *io_addr,
++			const unsigned char *signature, int length);
+ 
+ #endif /* _LINUX_IO_H */
+diff -Nurb linux-2.6.22-570/include/linux/ip_mp_alg.h linux-2.6.22-try2/include/linux/ip_mp_alg.h
+--- linux-2.6.22-570/include/linux/ip_mp_alg.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/ip_mp_alg.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,22 +0,0 @@
+-/* ip_mp_alg.h: IPV4 multipath algorithm support, user-visible values.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#ifndef _LINUX_IP_MP_ALG_H
+-#define _LINUX_IP_MP_ALG_H
+-
+-enum ip_mp_alg {
+-	IP_MP_ALG_NONE,
+-	IP_MP_ALG_RR,
+-	IP_MP_ALG_DRR,
+-	IP_MP_ALG_RANDOM,
+-	IP_MP_ALG_WRANDOM,
+-	__IP_MP_ALG_MAX
+-};
+-
+-#define IP_MP_ALG_MAX (__IP_MP_ALG_MAX - 1)
+-
+-#endif /* _LINUX_IP_MP_ALG_H */
+-
+diff -Nurb linux-2.6.22-570/include/linux/ipc.h linux-2.6.22-try2/include/linux/ipc.h
+--- linux-2.6.22-570/include/linux/ipc.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/ipc.h	2007-12-19 15:29:24.000000000 -0500
+@@ -93,6 +93,7 @@
+ 
+ #ifdef CONFIG_SYSVIPC
+ #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
++extern void free_ipc_ns(struct kref *kref);
+ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
+ 						struct ipc_namespace *ns);
+ #else
+@@ -104,13 +105,9 @@
+ }
+ #endif
+ 
+-#ifdef CONFIG_IPC_NS
+-extern void free_ipc_ns(struct kref *kref);
+-#endif
+-
+ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
+ {
+-#ifdef CONFIG_IPC_NS
++#ifdef CONFIG_SYSVIPC
+ 	if (ns)
+ 		kref_get(&ns->kref);
+ #endif
+@@ -119,7 +116,7 @@
+ 
+ static inline void put_ipc_ns(struct ipc_namespace *ns)
+ {
+-#ifdef CONFIG_IPC_NS
++#ifdef CONFIG_SYSVIPC
+ 	kref_put(&ns->kref, free_ipc_ns);
+ #endif
+ }
+@@ -127,5 +124,3 @@
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _LINUX_IPC_H */
+-
+-
+diff -Nurb linux-2.6.22-570/include/linux/ipv6.h linux-2.6.22-try2/include/linux/ipv6.h
+--- linux-2.6.22-570/include/linux/ipv6.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/ipv6.h	2007-12-19 15:29:23.000000000 -0500
+@@ -247,7 +247,7 @@
+ 	__u16			lastopt;
+ 	__u32			nhoff;
+ 	__u16			flags;
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	__u16			dsthao;
+ #endif
+ 
+diff -Nurb linux-2.6.22-570/include/linux/kgdb.h linux-2.6.22-try2/include/linux/kgdb.h
+--- linux-2.6.22-570/include/linux/kgdb.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/kgdb.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,291 @@
++/*
++ * include/linux/kgdb.h
++ *
++ * This provides the hooks and functions that KGDB needs to share between
++ * the core, I/O and arch-specific portions.
++ *
++ * Author: Amit Kale <amitkale@linsyssoft.com> and
++ *         Tom Rini <trini@kernel.crashing.org>
++ *
++ * 2001-2004 (c) Amit S. Kale and 2003-2005 (c) MontaVista Software, Inc.
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++#ifdef __KERNEL__
++#ifndef _KGDB_H_
++#define _KGDB_H_
++
++#include <asm/atomic.h>
++
++#ifdef CONFIG_KGDB
++#include <asm/kgdb.h>
++#include <linux/serial_8250.h>
++#include <linux/linkage.h>
++#include <linux/init.h>
++
++#ifndef CHECK_EXCEPTION_STACK
++#define CHECK_EXCEPTION_STACK()	1
++#endif
++
++struct tasklet_struct;
++struct pt_regs;
++struct task_struct;
++struct uart_port;
++
++#ifdef CONFIG_KGDB_CONSOLE
++extern struct console kgdbcons;
++#endif
++
++/* To enter the debugger explicitly. */
++extern void breakpoint(void);
++extern int kgdb_connected;
++extern int kgdb_may_fault;
++extern struct tasklet_struct kgdb_tasklet_breakpoint;
++
++extern atomic_t kgdb_setting_breakpoint;
++extern atomic_t cpu_doing_single_step;
++extern atomic_t kgdb_sync_softlockup[NR_CPUS];
++
++extern struct task_struct *kgdb_usethread, *kgdb_contthread;
++
++enum kgdb_bptype {
++	bp_breakpoint = '0',
++	bp_hardware_breakpoint,
++	bp_write_watchpoint,
++	bp_read_watchpoint,
++	bp_access_watchpoint
++};
++
++enum kgdb_bpstate {
++	bp_none = 0,
++	bp_removed,
++	bp_set,
++	bp_active
++};
++
++struct kgdb_bkpt {
++	unsigned long bpt_addr;
++	unsigned char saved_instr[BREAK_INSTR_SIZE];
++	enum kgdb_bptype type;
++	enum kgdb_bpstate state;
++};
++
++/* The maximum number of KGDB I/O modules that can be loaded */
++#define MAX_KGDB_IO_HANDLERS 3
++
++#ifndef MAX_BREAKPOINTS
++#define MAX_BREAKPOINTS		1000
++#endif
++
++#define KGDB_HW_BREAKPOINT	1
++
++/* Required functions. */
++/**
++ *	kgdb_arch_init - Perform any architecture specific initalization.
++ *
++ *	This function will handle the initalization of any architecture
++ *	specific hooks.
++ */
++extern int kgdb_arch_init(void);
++
++/**
++ *	regs_to_gdb_regs - Convert ptrace regs to GDB regs
++ *	@gdb_regs: A pointer to hold the registers in the order GDB wants.
++ *	@regs: The &struct pt_regs of the current process.
++ *
++ *	Convert the pt_regs in @regs into the format for registers that
++ *	GDB expects, stored in @gdb_regs.
++ */
++extern void regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs);
++
++/**
++ *	sleeping_regs_to_gdb_regs - Convert ptrace regs to GDB regs
++ *	@gdb_regs: A pointer to hold the registers in the order GDB wants.
++ *	@p: The &struct task_struct of the desired process.
++ *
++ *	Convert the register values of the sleeping process in @p to
++ *	the format that GDB expects.
++ *	This function is called when kgdb does not have access to the
++ *	&struct pt_regs and therefore it should fill the gdb registers
++ *	@gdb_regs with what has	been saved in &struct thread_struct
++ *	thread field during switch_to.
++ */
++extern void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
++					struct task_struct *p);
++
++/**
++ *	gdb_regs_to_regs - Convert GDB regs to ptrace regs.
++ *	@gdb_regs: A pointer to hold the registers we've recieved from GDB.
++ *	@regs: A pointer to a &struct pt_regs to hold these values in.
++ *
++ *	Convert the GDB regs in @gdb_regs into the pt_regs, and store them
++ *	in @regs.
++ */
++extern void gdb_regs_to_regs(unsigned long *gdb_regs, struct pt_regs *regs);
++
++/**
++ *	kgdb_arch_handle_exception - Handle architecture specific GDB packets.
++ *	@vector: The error vector of the exception that happened.
++ *	@signo: The signal number of the exception that happened.
++ *	@err_code: The error code of the exception that happened.
++ *	@remcom_in_buffer: The buffer of the packet we have read.
++ *	@remcom_out_buffer: The buffer, of %BUFMAX to write a packet into.
++ *	@regs: The &struct pt_regs of the current process.
++ *
++ *	This function MUST handle the 'c' and 's' command packets,
++ *	as well packets to set / remove a hardware breakpoint, if used.
++ *	If there are additional packets which the hardware needs to handle,
++ *	they are handled here.  The code should return -1 if it wants to
++ *	process more packets, and a %0 or %1 if it wants to exit from the
++ *	kgdb hook.
++ */
++extern int kgdb_arch_handle_exception(int vector, int signo, int err_code,
++				      char *remcom_in_buffer,
++				      char *remcom_out_buffer,
++				      struct pt_regs *regs);
++
++/**
++ * 	kgdb_roundup_cpus - Get other CPUs into a holding pattern
++ * 	@flags: Current IRQ state
++ *
++ * 	On SMP systems, we need to get the attention of the other CPUs
++ * 	and get them be in a known state.  This should do what is needed
++ * 	to get the other CPUs to call kgdb_wait(). Note that on some arches,
++ *	the NMI approach is not used for rounding up all the CPUs. For example,
++ *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
++ *	this case, we have to make sure that interrupts are enabled before
++ *	calling smp_call_function(). The argument to this function is
++ *	the flags that will be used when restoring the interrupts. There is
++ *	local_irq_save() call before kgdb_roundup_cpus().
++ *
++ *	On non-SMP systems, this is not called.
++ */
++extern void kgdb_roundup_cpus(unsigned long flags);
++
++#ifndef JMP_REGS_ALIGNMENT
++#define JMP_REGS_ALIGNMENT
++#endif
++
++extern unsigned long kgdb_fault_jmp_regs[];
++
++/**
++ *	kgdb_fault_setjmp - Store state in case we fault.
++ *	@curr_context: An array to store state into.
++ *
++ *	Certain functions may try and access memory, and in doing so may
++ *	cause a fault.  When this happens, we trap it, restore state to
++ *	this call, and let ourself know that something bad has happened.
++ */
++extern asmlinkage int kgdb_fault_setjmp(unsigned long *curr_context);
++
++/**
++ *	kgdb_fault_longjmp - Restore state when we have faulted.
++ *	@curr_context: The previously stored state.
++ *
++ *	When something bad does happen, this function is called to
++ *	restore the known good state, and set the return value to 1, so
++ *	we know something bad happened.
++ */
++extern asmlinkage void kgdb_fault_longjmp(unsigned long *curr_context);
++
++/* Optional functions. */
++extern int kgdb_validate_break_address(unsigned long addr);
++extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr);
++extern int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle);
++
++/**
++ * struct kgdb_arch - Desribe architecture specific values.
++ * @gdb_bpt_instr: The instruction to trigger a breakpoint.
++ * @flags: Flags for the breakpoint, currently just %KGDB_HW_BREAKPOINT.
++ * @shadowth: A value of %1 indicates we shadow information on processes.
++ * @set_breakpoint: Allow an architecture to specify how to set a software
++ * breakpoint.
++ * @remove_breakpoint: Allow an architecture to specify how to remove a
++ * software breakpoint.
++ * @set_hw_breakpoint: Allow an architecture to specify how to set a hardware
++ * breakpoint.
++ * @remove_hw_breakpoint: Allow an architecture to specify how to remove a
++ * hardware breakpoint.
++ *
++ * The @shadowth flag is an option to shadow information not retrievable by
++ * gdb otherwise.  This is deprecated in favor of a binutils which supports
++ * CFI macros.
++ */
++struct kgdb_arch {
++	unsigned char gdb_bpt_instr[BREAK_INSTR_SIZE];
++	unsigned long flags;
++	unsigned shadowth;
++	int (*set_breakpoint) (unsigned long, char *);
++	int (*remove_breakpoint)(unsigned long, char *);
++	int (*set_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
++	int (*remove_hw_breakpoint)(unsigned long, int, enum kgdb_bptype);
++	void (*remove_all_hw_break)(void);
++	void (*correct_hw_break)(void);
++};
++
++/* Thread reference */
++typedef unsigned char threadref[8];
++
++/**
++ * struct kgdb_io - Desribe the interface for an I/O driver to talk with KGDB.
++ * @read_char: Pointer to a function that will return one char.
++ * @write_char: Pointer to a function that will write one char.
++ * @flush: Pointer to a function that will flush any pending writes.
++ * @init: Pointer to a function that will initialize the device.
++ * @late_init: Pointer to a function that will do any setup that has
++ * other dependencies.
++ * @pre_exception: Pointer to a function that will do any prep work for
++ * the I/O driver.
++ * @post_exception: Pointer to a function that will do any cleanup work
++ * for the I/O driver.
++ *
++ * The @init and @late_init function pointers allow for an I/O driver
++ * such as a serial driver to fully initialize the port with @init and
++ * be called very early, yet safely call request_irq() later in the boot
++ * sequence.
++ *
++ * @init is allowed to return a non-0 return value to indicate failure.
++ * If this is called early on, then KGDB will try again when it would call
++ * @late_init.  If it has failed later in boot as well, the user will be
++ * notified.
++ */
++struct kgdb_io {
++	int (*read_char) (void);
++	void (*write_char) (u8);
++	void (*flush) (void);
++	int (*init) (void);
++	void (*late_init) (void);
++	void (*pre_exception) (void);
++	void (*post_exception) (void);
++};
++
++extern struct kgdb_io kgdb_io_ops;
++extern struct kgdb_arch arch_kgdb_ops;
++extern int kgdb_initialized;
++
++extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
++extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
++
++extern void __init kgdb8250_add_port(int i, struct uart_port *serial_req);
++extern void __init kgdb8250_add_platform_port(int i, struct plat_serial8250_port *serial_req);
++
++extern int kgdb_hex2long(char **ptr, long *long_val);
++extern char *kgdb_mem2hex(char *mem, char *buf, int count);
++extern char *kgdb_hex2mem(char *buf, char *mem, int count);
++extern int kgdb_get_mem(char *addr, unsigned char *buf, int count);
++extern int kgdb_set_mem(char *addr, unsigned char *buf, int count);
++
++int kgdb_isremovedbreak(unsigned long addr);
++
++extern int kgdb_handle_exception(int ex_vector, int signo, int err_code,
++				struct pt_regs *regs);
++extern void kgdb_nmihook(int cpu, void *regs);
++extern int debugger_step;
++extern atomic_t debugger_active;
++#else
++/* Stubs for when KGDB is not set. */
++static const atomic_t debugger_active = ATOMIC_INIT(0);
++#endif				/* CONFIG_KGDB */
++#endif				/* _KGDB_H_ */
++#endif				/* __KERNEL__ */
+diff -Nurb linux-2.6.22-570/include/linux/kmod.h linux-2.6.22-try2/include/linux/kmod.h
+--- linux-2.6.22-570/include/linux/kmod.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/kmod.h	2007-12-19 15:29:23.000000000 -0500
+@@ -36,13 +36,57 @@
+ #define try_then_request_module(x, mod...) ((x) ?: (request_module(mod), (x)))
+ 
+ struct key;
+-extern int call_usermodehelper_keys(char *path, char *argv[], char *envp[],
+-				    struct key *session_keyring, int wait);
++struct file;
++struct subprocess_info;
++
++/* Allocate a subprocess_info structure */
++struct subprocess_info *call_usermodehelper_setup(char *path,
++						  char **argv, char **envp);
++
++/* Set various pieces of state into the subprocess_info structure */
++void call_usermodehelper_setkeys(struct subprocess_info *info,
++				 struct key *session_keyring);
++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
++				  struct file **filp);
++void call_usermodehelper_setcleanup(struct subprocess_info *info,
++				    void (*cleanup)(char **argv, char **envp));
++
++enum umh_wait {
++	UMH_NO_WAIT = -1,	/* don't wait at all */
++	UMH_WAIT_EXEC = 0,	/* wait for the exec, but not the process */
++	UMH_WAIT_PROC = 1,	/* wait for the process to complete */
++};
++
++/* Actually execute the sub-process */
++int call_usermodehelper_exec(struct subprocess_info *info, enum umh_wait wait);
++
++/* Free the subprocess_info. This is only needed if you're not going
++   to call call_usermodehelper_exec */
++void call_usermodehelper_freeinfo(struct subprocess_info *info);
+ 
+ static inline int
+-call_usermodehelper(char *path, char **argv, char **envp, int wait)
++call_usermodehelper(char *path, char **argv, char **envp, enum umh_wait wait)
+ {
+-	return call_usermodehelper_keys(path, argv, envp, NULL, wait);
++	struct subprocess_info *info;
++
++	info = call_usermodehelper_setup(path, argv, envp);
++	if (info == NULL)
++		return -ENOMEM;
++	return call_usermodehelper_exec(info, wait);
++}
++
++static inline int
++call_usermodehelper_keys(char *path, char **argv, char **envp,
++			 struct key *session_keyring, enum umh_wait wait)
++{
++	struct subprocess_info *info;
++
++	info = call_usermodehelper_setup(path, argv, envp);
++	if (info == NULL)
++		return -ENOMEM;
++
++	call_usermodehelper_setkeys(info, session_keyring);
++	return call_usermodehelper_exec(info, wait);
+ }
+ 
+ extern void usermodehelper_init(void);
+diff -Nurb linux-2.6.22-570/include/linux/kobject.h linux-2.6.22-try2/include/linux/kobject.h
+--- linux-2.6.22-570/include/linux/kobject.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/kobject.h	2007-12-19 15:29:23.000000000 -0500
+@@ -55,7 +55,7 @@
+ 	struct kobject		* parent;
+ 	struct kset		* kset;
+ 	struct kobj_type	* ktype;
+-	struct dentry		* dentry;
++	struct sysfs_dirent	* sd;
+ 	wait_queue_head_t	poll;
+ };
+ 
+@@ -71,12 +71,13 @@
+ extern void kobject_cleanup(struct kobject *);
+ 
+ extern int __must_check kobject_add(struct kobject *);
+-extern int __must_check kobject_shadow_add(struct kobject *, struct dentry *);
++extern int __must_check kobject_shadow_add(struct kobject *kobj,
++					   struct sysfs_dirent *shadow_parent);
+ extern void kobject_del(struct kobject *);
+ 
+ extern int __must_check kobject_rename(struct kobject *, const char *new_name);
+ extern int __must_check kobject_shadow_rename(struct kobject *kobj,
+-						struct dentry *new_parent,
++					      struct sysfs_dirent *new_parent,
+ 						const char *new_name);
+ extern int __must_check kobject_move(struct kobject *, struct kobject *);
+ 
+diff -Nurb linux-2.6.22-570/include/linux/ktime.h linux-2.6.22-try2/include/linux/ktime.h
+--- linux-2.6.22-570/include/linux/ktime.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/ktime.h	2007-12-19 15:29:23.000000000 -0500
+@@ -279,6 +279,16 @@
+ 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
+ }
+ 
++static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
++{
++       return ktime_to_us(ktime_sub(later, earlier));
++}
++
++static inline ktime_t ktime_add_us(const ktime_t kt, const u64 usec)
++{
++	return ktime_add_ns(kt, usec * 1000);
++}
++
+ /*
+  * The resolution of the clocks. The resolution value is returned in
+  * the clock_getres() system call to give application programmers an
+diff -Nurb linux-2.6.22-570/include/linux/magic.h linux-2.6.22-try2/include/linux/magic.h
+--- linux-2.6.22-570/include/linux/magic.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/magic.h	2007-12-19 15:29:24.000000000 -0500
+@@ -36,8 +36,12 @@
+ #define REISERFS_SUPER_MAGIC_STRING	"ReIsErFs"
+ #define REISER2FS_SUPER_MAGIC_STRING	"ReIsEr2Fs"
+ #define REISER2FS_JR_SUPER_MAGIC_STRING	"ReIsEr3Fs"
++#define REVOKEFS_MAGIC		0x5245564B      /* REVK */
++
++#define UNIONFS_SUPER_MAGIC 0xf15f083d
+ 
+ #define SMB_SUPER_MAGIC		0x517B
+ #define USBDEVICE_SUPER_MAGIC	0x9fa2
++#define CONTAINER_SUPER_MAGIC	0x27e0eb
+ 
+ #endif /* __LINUX_MAGIC_H__ */
+diff -Nurb linux-2.6.22-570/include/linux/mempolicy.h linux-2.6.22-try2/include/linux/mempolicy.h
+--- linux-2.6.22-570/include/linux/mempolicy.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mempolicy.h	2007-12-19 15:29:24.000000000 -0500
+@@ -148,18 +148,10 @@
+ 					const nodemask_t *new);
+ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
+ extern void mpol_fix_fork_child_flag(struct task_struct *p);
+-#define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
+-
+-#ifdef CONFIG_CPUSETS
+-#define current_cpuset_is_being_rebound() \
+-				(cpuset_being_rebound == current->cpuset)
+-#else
+-#define current_cpuset_is_being_rebound() 0
+-#endif
+ 
+ extern struct mempolicy default_policy;
+ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+-		unsigned long addr);
++		unsigned long addr, gfp_t gfp_flags);
+ extern unsigned slab_node(struct mempolicy *policy);
+ 
+ extern enum zone_type policy_zone;
+@@ -173,8 +165,6 @@
+ int do_migrate_pages(struct mm_struct *mm,
+ 	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+ 
+-extern void *cpuset_being_rebound;	/* Trigger mpol_copy vma rebind */
+-
+ #else
+ 
+ struct mempolicy {};
+@@ -253,12 +243,10 @@
+ {
+ }
+ 
+-#define set_cpuset_being_rebound(x) do {} while (0)
+-
+ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+-		unsigned long addr)
++		unsigned long addr, gfp_t gfp_flags)
+ {
+-	return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
++	return NODE_DATA(0)->node_zonelists + gfp_zone(gfp_flags);
+ }
+ 
+ static inline int do_migrate_pages(struct mm_struct *mm,
+diff -Nurb linux-2.6.22-570/include/linux/mm.h linux-2.6.22-try2/include/linux/mm.h
+--- linux-2.6.22-570/include/linux/mm.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mm.h	2007-12-19 15:29:24.000000000 -0500
+@@ -170,6 +170,13 @@
+ #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+ #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
+ 
++#define VM_CAN_INVALIDATE 0x08000000	/* The mapping may be invalidated,
++					 * eg. truncate or invalidate_inode_*.
++					 * In this case, do_no_page must
++					 * return with the page locked.
++					 */
++#define VM_CAN_NONLINEAR 0x10000000	/* Has ->fault & does nonlinear pages */
++
+ #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
+ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+ #endif
+@@ -192,6 +199,25 @@
+  */
+ extern pgprot_t protection_map[16];
+ 
++#define FAULT_FLAG_WRITE	0x01
++#define FAULT_FLAG_NONLINEAR	0x02
++
++/*
++ * fault_data is filled in the the pagefault handler and passed to the
++ * vma's ->fault function. That function is responsible for filling in
++ * 'type', which is the type of fault if a page is returned, or the type
++ * of error if NULL is returned.
++ *
++ * pgoff should be used in favour of address, if possible. If pgoff is
++ * used, one may set VM_CAN_NONLINEAR in the vma->vm_flags to get
++ * nonlinear mapping support.
++ */
++struct fault_data {
++	unsigned long address;
++	pgoff_t pgoff;
++	unsigned int flags;
++	int type;
++};
+ 
+ /*
+  * These are the virtual MM functions - opening of an area, closing and
+@@ -201,9 +227,15 @@
+ struct vm_operations_struct {
+ 	void (*open)(struct vm_area_struct * area);
+ 	void (*close)(struct vm_area_struct * area);
+-	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int *type);
+-	unsigned long (*nopfn)(struct vm_area_struct * area, unsigned long address);
+-	int (*populate)(struct vm_area_struct * area, unsigned long address, unsigned long len, pgprot_t prot, unsigned long pgoff, int nonblock);
++	struct page *(*fault)(struct vm_area_struct *vma,
++			struct fault_data *fdata);
++	struct page *(*nopage)(struct vm_area_struct *area,
++			unsigned long address, int *type);
++	unsigned long (*nopfn)(struct vm_area_struct *area,
++			unsigned long address);
++	int (*populate)(struct vm_area_struct *area, unsigned long address,
++			unsigned long len, pgprot_t prot, unsigned long pgoff,
++			int nonblock);
+ 
+ 	/* notification that a previously read-only page is about to become
+ 	 * writable, if an error is returned it will cause a SIGBUS */
+@@ -656,7 +688,6 @@
+  */
+ #define NOPAGE_SIGBUS	(NULL)
+ #define NOPAGE_OOM	((struct page *) (-1))
+-#define NOPAGE_REFAULT	((struct page *) (-2))	/* Return to userspace, rerun */
+ 
+ /*
+  * Error return values for the *_nopfn functions
+@@ -744,6 +775,16 @@
+ 		struct vm_area_struct *start_vma, unsigned long start_addr,
+ 		unsigned long end_addr, unsigned long *nr_accounted,
+ 		struct zap_details *);
++
++struct mm_walk {
++	int (*pgd_entry)(pgd_t *, unsigned long, unsigned long, void *);
++	int (*pud_entry)(pud_t *, unsigned long, unsigned long, void *);
++	int (*pmd_entry)(pmd_t *, unsigned long, unsigned long, void *);
++	int (*pte_entry)(pte_t *, unsigned long, unsigned long, void *);
++};
++
++int walk_page_range(struct mm_struct *, unsigned long addr, unsigned long end,
++		    struct mm_walk *walk, void *private);
+ void free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+ 		unsigned long end, unsigned long floor, unsigned long ceiling);
+ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *start_vma,
+@@ -1058,6 +1099,7 @@
+ extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
+ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
+ 	struct rb_node **, struct rb_node *);
++extern void __unlink_file_vma(struct vm_area_struct *);
+ extern void unlink_file_vma(struct vm_area_struct *);
+ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
+ 	unsigned long addr, unsigned long len, pgoff_t pgoff);
+@@ -1097,9 +1139,11 @@
+ 				       loff_t lstart, loff_t lend);
+ 
+ /* generic vm_area_ops exported for stackable file systems */
+-extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
+-extern int filemap_populate(struct vm_area_struct *, unsigned long,
+-		unsigned long, pgprot_t, unsigned long, int);
++extern struct page *filemap_fault(struct vm_area_struct *, struct fault_data *);
++extern struct page * __deprecated_for_modules
++filemap_nopage(struct vm_area_struct *, unsigned long, int *);
++extern int __deprecated_for_modules filemap_populate(struct vm_area_struct *,
++		unsigned long, unsigned long, pgprot_t, unsigned long, int);
+ 
+ /* mm/page-writeback.c */
+ int write_one_page(struct page *page, int wait);
+@@ -1199,6 +1243,7 @@
+ 					void __user *, size_t *, loff_t *);
+ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+ 			unsigned long lru_pages);
++extern void drop_pagecache_sb(struct super_block *);
+ void drop_pagecache(void);
+ void drop_slab(void);
+ 
+diff -Nurb linux-2.6.22-570/include/linux/mmc/card.h linux-2.6.22-try2/include/linux/mmc/card.h
+--- linux-2.6.22-570/include/linux/mmc/card.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mmc/card.h	2007-12-19 15:29:23.000000000 -0500
+@@ -72,6 +72,7 @@
+ #define MMC_STATE_READONLY	(1<<1)		/* card is read-only */
+ #define MMC_STATE_HIGHSPEED	(1<<2)		/* card is in high speed mode */
+ #define MMC_STATE_BLOCKADDR	(1<<3)		/* card uses block-addressing */
++#define MMC_STATE_LOCKED	(1<<4)		/* card is currently locked */
+ 	u32			raw_cid[4];	/* raw card CID */
+ 	u32			raw_csd[4];	/* raw card CSD */
+ 	u32			raw_scr[2];	/* raw card SCR */
+@@ -89,11 +90,16 @@
+ #define mmc_card_readonly(c)	((c)->state & MMC_STATE_READONLY)
+ #define mmc_card_highspeed(c)	((c)->state & MMC_STATE_HIGHSPEED)
+ #define mmc_card_blockaddr(c)	((c)->state & MMC_STATE_BLOCKADDR)
++#define mmc_card_locked(c)	((c)->state & MMC_STATE_LOCKED)
++
++#define mmc_card_lockable(c)	(((c)->csd.cmdclass & CCC_LOCK_CARD) && \
++				((c)->host->caps & MMC_CAP_BYTEBLOCK))
+ 
+ #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
+ #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
+ #define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
+ #define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
++#define mmc_card_set_locked(c)	((c)->state |= MMC_STATE_LOCKED)
+ 
+ #define mmc_card_name(c)	((c)->cid.prod_name)
+ #define mmc_card_id(c)		((c)->dev.bus_id)
+diff -Nurb linux-2.6.22-570/include/linux/mmc/mmc.h linux-2.6.22-try2/include/linux/mmc/mmc.h
+--- linux-2.6.22-570/include/linux/mmc/mmc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mmc/mmc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -253,5 +253,13 @@
+ #define MMC_SWITCH_MODE_CLEAR_BITS	0x02	/* Clear bits which are 1 in value */
+ #define MMC_SWITCH_MODE_WRITE_BYTE	0x03	/* Set target to value */
+ 
++/*
++ * MMC_LOCK_UNLOCK modes
++ */
++#define MMC_LOCK_MODE_ERASE	(1<<3)
++#define MMC_LOCK_MODE_UNLOCK	(1<<2)
++#define MMC_LOCK_MODE_CLR_PWD	(1<<1)
++#define MMC_LOCK_MODE_SET_PWD	(1<<0)
++
+ #endif  /* MMC_MMC_PROTOCOL_H */
+ 
+diff -Nurb linux-2.6.22-570/include/linux/mmzone.h linux-2.6.22-try2/include/linux/mmzone.h
+--- linux-2.6.22-570/include/linux/mmzone.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mmzone.h	2007-12-19 15:29:24.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/seqlock.h>
+ #include <linux/nodemask.h>
++#include <linux/pageblock-flags.h>
+ #include <asm/atomic.h>
+ #include <asm/page.h>
+ 
+@@ -24,8 +25,24 @@
+ #endif
+ #define MAX_ORDER_NR_PAGES (1 << (MAX_ORDER - 1))
+ 
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++#define MIGRATE_UNMOVABLE     0
++#define MIGRATE_RECLAIMABLE   1
++#define MIGRATE_MOVABLE       2
++#define MIGRATE_TYPES         3
++#else
++#define MIGRATE_UNMOVABLE     0
++#define MIGRATE_UNRECLAIMABLE 0
++#define MIGRATE_MOVABLE       0
++#define MIGRATE_TYPES         1
++#endif
++
++#define for_each_migratetype_order(order, type) \
++	for (order = 0; order < MAX_ORDER; order++) \
++		for (type = 0; type < MIGRATE_TYPES; type++)
++
+ struct free_area {
+-	struct list_head	free_list;
++	struct list_head	free_list[MIGRATE_TYPES];
+ 	unsigned long		nr_free;
+ };
+ 
+@@ -213,6 +230,14 @@
+ #endif
+ 	struct free_area	free_area[MAX_ORDER];
+ 
++#ifndef CONFIG_SPARSEMEM
++	/*
++	 * Flags for a MAX_ORDER_NR_PAGES block. See pageblock-flags.h.
++	 * In SPARSEMEM, this map is stored in struct mem_section
++	 */
++	unsigned long		*pageblock_flags;
++#endif /* CONFIG_SPARSEMEM */
++
+ 
+ 	ZONE_PADDING(_pad1_)
+ 
+@@ -468,6 +493,7 @@
+ void get_zone_counts(unsigned long *active, unsigned long *inactive,
+ 			unsigned long *free);
+ void build_all_zonelists(void);
++void raise_kswapd_order(unsigned int order);
+ void wakeup_kswapd(struct zone *zone, int order);
+ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ 		int classzone_idx, int alloc_flags);
+@@ -662,6 +688,9 @@
+ #define PAGES_PER_SECTION       (1UL << PFN_SECTION_SHIFT)
+ #define PAGE_SECTION_MASK	(~(PAGES_PER_SECTION-1))
+ 
++#define SECTION_BLOCKFLAGS_BITS \
++		((SECTION_SIZE_BITS - (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS)
++
+ #if (MAX_ORDER - 1 + PAGE_SHIFT) > SECTION_SIZE_BITS
+ #error Allocator MAX_ORDER exceeds SECTION_SIZE
+ #endif
+@@ -681,6 +710,7 @@
+ 	 * before using it wrong.
+ 	 */
+ 	unsigned long section_mem_map;
++	DECLARE_BITMAP(pageblock_flags, SECTION_BLOCKFLAGS_BITS);
+ };
+ 
+ #ifdef CONFIG_SPARSEMEM_EXTREME
+diff -Nurb linux-2.6.22-570/include/linux/mnt_namespace.h linux-2.6.22-try2/include/linux/mnt_namespace.h
+--- linux-2.6.22-570/include/linux/mnt_namespace.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/mnt_namespace.h	2007-12-19 15:29:24.000000000 -0500
+@@ -14,7 +14,7 @@
+ 	int event;
+ };
+ 
+-extern struct mnt_namespace *copy_mnt_ns(int, struct mnt_namespace *,
++extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
+ 		struct fs_struct *);
+ extern void __put_mnt_ns(struct mnt_namespace *ns);
+ 
+diff -Nurb linux-2.6.22-570/include/linux/module.h linux-2.6.22-try2/include/linux/module.h
+--- linux-2.6.22-570/include/linux/module.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/module.h	2007-12-19 15:29:24.000000000 -0500
+@@ -227,8 +227,17 @@
+ 	MODULE_STATE_LIVE,
+ 	MODULE_STATE_COMING,
+ 	MODULE_STATE_GOING,
++ 	MODULE_STATE_GONE,
+ };
+ 
++#ifdef CONFIG_KGDB
++#define MAX_SECTNAME 31
++struct mod_section {
++       void *address;
++       char name[MAX_SECTNAME + 1];
++};
++#endif
++
+ /* Similar stuff for section attributes. */
+ struct module_sect_attr
+ {
+@@ -256,6 +265,13 @@
+ 	/* Unique handle for this module */
+ 	char name[MODULE_NAME_LEN];
+ 
++#ifdef CONFIG_KGDB
++	/* keep kgdb info at the begining so that gdb doesn't have a chance to
++	 * miss out any fields */
++	unsigned long num_sections;
++	struct mod_section *mod_sections;
++#endif
++
+ 	/* Sysfs stuff. */
+ 	struct module_kobject mkobj;
+ 	struct module_param_attrs *param_attrs;
+diff -Nurb linux-2.6.22-570/include/linux/namei.h linux-2.6.22-try2/include/linux/namei.h
+--- linux-2.6.22-570/include/linux/namei.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/namei.h	2007-12-19 15:29:23.000000000 -0500
+@@ -3,6 +3,7 @@
+ 
+ #include <linux/dcache.h>
+ #include <linux/linkage.h>
++#include <linux/mount.h>
+ 
+ struct vfsmount;
+ 
+@@ -81,9 +82,16 @@
+ extern struct file *nameidata_to_filp(struct nameidata *nd, int flags);
+ extern void release_open_intent(struct nameidata *);
+ 
+-extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
++extern struct dentry * lookup_one_len_nd(const char *, struct dentry *,
++					 int, struct nameidata *);
+ extern struct dentry *lookup_one_len_kern(const char *, struct dentry *, int);
+ 
++static inline struct dentry *lookup_one_len(const char *name,
++			struct dentry *dir, int len)
++{
++	return lookup_one_len_nd(name, dir, len, NULL);
++}
++
+ extern int follow_down(struct vfsmount **, struct dentry **);
+ extern int follow_up(struct vfsmount **, struct dentry **);
+ 
+@@ -100,4 +108,16 @@
+ 	return nd->saved_names[nd->depth];
+ }
+ 
++static inline void pathget(struct path *path)
++{
++	mntget(path->mnt);
++	dget(path->dentry);
++}
++
++static inline void pathput(struct path *path)
++{
++	dput(path->dentry);
++	mntput(path->mnt);
++}
++
+ #endif /* _LINUX_NAMEI_H */
+diff -Nurb linux-2.6.22-570/include/linux/netdevice.h linux-2.6.22-try2/include/linux/netdevice.h
+--- linux-2.6.22-570/include/linux/netdevice.h	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/include/linux/netdevice.h	2007-12-19 15:29:23.000000000 -0500
+@@ -314,9 +314,10 @@
+ 	/* Net device features */
+ 	unsigned long		features;
+ #define NETIF_F_SG		1	/* Scatter/gather IO. */
+-#define NETIF_F_IP_CSUM		2	/* Can checksum only TCP/UDP over IPv4. */
++#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
+ #define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
+ #define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
++#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
+ #define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
+ #define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
+ #define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
+@@ -338,8 +339,11 @@
+ 	/* List of features with software fallbacks. */
+ #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6)
+ 
++
+ #define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
+-#define NETIF_F_ALL_CSUM	(NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM)
++#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
++#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
++#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+ 
+ 	struct net_device	*next_sched;
+ 
+@@ -540,13 +544,16 @@
+ 	struct device		dev;
+ 	/* space for optional statistics and wireless sysfs groups */
+ 	struct attribute_group  *sysfs_groups[3];
++
++	/* rtnetlink link ops */
++	const struct rtnl_link_ops *rtnl_link_ops;
+ };
+ #define to_net_dev(d) container_of(d, struct net_device, dev)
+ 
+ #define	NETDEV_ALIGN		32
+ #define	NETDEV_ALIGN_CONST	(NETDEV_ALIGN - 1)
+ 
+-static inline void *netdev_priv(struct net_device *dev)
++static inline void *netdev_priv(const struct net_device *dev)
+ {
+ 	return (char *)dev + ((sizeof(struct net_device)
+ 					+ NETDEV_ALIGN_CONST)
+diff -Nurb linux-2.6.22-570/include/linux/netlink.h linux-2.6.22-try2/include/linux/netlink.h
+--- linux-2.6.22-570/include/linux/netlink.h	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/include/linux/netlink.h	2007-12-19 15:29:22.000000000 -0500
+@@ -21,7 +21,7 @@
+ #define NETLINK_DNRTMSG		14	/* DECnet routing messages */
+ #define NETLINK_KOBJECT_UEVENT	15	/* Kernel messages to userspace */
+ #define NETLINK_GENERIC		16
+-/* leave room for NETLINK_DM (DM Events) */
++#define NETLINK_DM		17	/* Device Mapper */
+ #define NETLINK_SCSITRANSPORT	18	/* SCSI Transports */
+ #define NETLINK_ECRYPTFS	19
+ 
+diff -Nurb linux-2.6.22-570/include/linux/netpoll.h linux-2.6.22-try2/include/linux/netpoll.h
+--- linux-2.6.22-570/include/linux/netpoll.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/netpoll.h	2007-12-19 15:29:24.000000000 -0500
+@@ -16,7 +16,7 @@
+ 	struct net_device *dev;
+ 	char dev_name[IFNAMSIZ];
+ 	const char *name;
+-	void (*rx_hook)(struct netpoll *, int, char *, int);
++	void (*rx_hook)(struct netpoll *, int, char *, int, struct sk_buff *);
+ 
+ 	u32 local_ip, remote_ip;
+ 	u16 local_port, remote_port;
+diff -Nurb linux-2.6.22-570/include/linux/nfs4.h linux-2.6.22-try2/include/linux/nfs4.h
+--- linux-2.6.22-570/include/linux/nfs4.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs4.h	2007-12-19 15:29:23.000000000 -0500
+@@ -15,6 +15,7 @@
+ 
+ #include <linux/types.h>
+ 
++#define NFS4_BITMAP_SIZE	2
+ #define NFS4_VERIFIER_SIZE	8
+ #define NFS4_STATEID_SIZE	16
+ #define NFS4_FHSIZE		128
+diff -Nurb linux-2.6.22-570/include/linux/nfs4_mount.h linux-2.6.22-try2/include/linux/nfs4_mount.h
+--- linux-2.6.22-570/include/linux/nfs4_mount.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs4_mount.h	2007-12-19 15:29:23.000000000 -0500
+@@ -65,6 +65,7 @@
+ #define NFS4_MOUNT_NOCTO	0x0010	/* 1 */
+ #define NFS4_MOUNT_NOAC		0x0020	/* 1 */
+ #define NFS4_MOUNT_STRICTLOCK	0x1000	/* 1 */
++#define NFS4_MOUNT_UNSHARED	0x8000	/* 1 */
+ #define NFS4_MOUNT_FLAGMASK	0xFFFF
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/nfs_fs.h linux-2.6.22-try2/include/linux/nfs_fs.h
+--- linux-2.6.22-570/include/linux/nfs_fs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs_fs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -30,7 +30,9 @@
+ #ifdef __KERNEL__
+ 
+ #include <linux/in.h>
++#include <linux/kref.h>
+ #include <linux/mm.h>
++#include <linux/namei.h>
+ #include <linux/pagemap.h>
+ #include <linux/rbtree.h>
+ #include <linux/rwsem.h>
+@@ -69,9 +71,8 @@
+ 
+ struct nfs4_state;
+ struct nfs_open_context {
+-	atomic_t count;
+-	struct vfsmount *vfsmnt;
+-	struct dentry *dentry;
++	struct kref kref;
++	struct path path;
+ 	struct rpc_cred *cred;
+ 	struct nfs4_state *state;
+ 	fl_owner_t lockowner;
+@@ -156,12 +157,9 @@
+ 	 * This is the list of dirty unwritten pages.
+ 	 */
+ 	spinlock_t		req_lock;
+-	struct list_head	dirty;
+-	struct list_head	commit;
+ 	struct radix_tree_root	nfs_page_tree;
+ 
+-	unsigned int		ndirty,
+-				ncommit,
++	unsigned long		ncommit,
+ 				npages;
+ 
+ 	/* Open contexts for shared mmap writes */
+diff -Nurb linux-2.6.22-570/include/linux/nfs_fs_sb.h linux-2.6.22-try2/include/linux/nfs_fs_sb.h
+--- linux-2.6.22-570/include/linux/nfs_fs_sb.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs_fs_sb.h	2007-12-19 15:29:23.000000000 -0500
+@@ -16,7 +16,6 @@
+ #define NFS_CS_INITING		1		/* busy initialising */
+ 	int			cl_nfsversion;	/* NFS protocol version */
+ 	unsigned long		cl_res_state;	/* NFS resources state */
+-#define NFS_CS_RPCIOD		0		/* - rpciod started */
+ #define NFS_CS_CALLBACK		1		/* - callback started */
+ #define NFS_CS_IDMAP		2		/* - idmap started */
+ #define NFS_CS_RENEWD		3		/* - renewd started */
+diff -Nurb linux-2.6.22-570/include/linux/nfs_mount.h linux-2.6.22-try2/include/linux/nfs_mount.h
+--- linux-2.6.22-570/include/linux/nfs_mount.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/nfs_mount.h	2007-12-19 15:49:20.000000000 -0500
+@@ -62,6 +62,7 @@
+ #define NFS_MOUNT_STRICTLOCK	0x1000	/* reserved for NFSv4 */
+ #define NFS_MOUNT_SECFLAVOUR	0x2000	/* 5 */
+ #define NFS_MOUNT_NORDIRPLUS	0x4000	/* 5 */
++#define NFS_MOUNT_UNSHARED	0x8000	/* 5 */
+ #define NFS_MOUNT_TAGGED	0x8000	/* context tagging */
+ #define NFS_MOUNT_FLAGMASK	0xFFFF
+ 
+diff -Nurb linux-2.6.22-570/include/linux/nfs_page.h linux-2.6.22-try2/include/linux/nfs_page.h
+--- linux-2.6.22-570/include/linux/nfs_page.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs_page.h	2007-12-19 15:29:23.000000000 -0500
+@@ -16,12 +16,13 @@
+ #include <linux/sunrpc/auth.h>
+ #include <linux/nfs_xdr.h>
+ 
+-#include <asm/atomic.h>
++#include <linux/kref.h>
+ 
+ /*
+  * Valid flags for the radix tree
+  */
+-#define NFS_PAGE_TAG_WRITEBACK	0
++#define NFS_PAGE_TAG_LOCKED	0
++#define NFS_PAGE_TAG_COMMIT	1
+ 
+ /*
+  * Valid flags for a dirty buffer
+@@ -33,8 +34,7 @@
+ 
+ struct nfs_inode;
+ struct nfs_page {
+-	struct list_head	wb_list,	/* Defines state of page: */
+-				*wb_list_head;	/*      read/write/commit */
++	struct list_head	wb_list;	/* Defines state of page: */
+ 	struct page		*wb_page;	/* page to read in/write out */
+ 	struct nfs_open_context	*wb_context;	/* File state context info */
+ 	atomic_t		wb_complete;	/* i/os we're waiting for */
+@@ -42,7 +42,7 @@
+ 	unsigned int		wb_offset,	/* Offset & ~PAGE_CACHE_MASK */
+ 				wb_pgbase,	/* Start of page data */
+ 				wb_bytes;	/* Length of request */
+-	atomic_t		wb_count;	/* reference count */
++	struct kref		wb_kref;	/* reference count */
+ 	unsigned long		wb_flags;
+ 	struct nfs_writeverf	wb_verf;	/* Commit cookie */
+ };
+@@ -71,8 +71,8 @@
+ extern	void nfs_release_request(struct nfs_page *req);
+ 
+ 
+-extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
+-			  pgoff_t idx_start, unsigned int npages);
++extern	int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
++			  pgoff_t idx_start, unsigned int npages, int tag);
+ extern	void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
+ 			     struct inode *inode,
+ 			     int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int),
+@@ -84,12 +84,11 @@
+ extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+ extern  int nfs_wait_on_request(struct nfs_page *);
+ extern	void nfs_unlock_request(struct nfs_page *req);
+-extern  int nfs_set_page_writeback_locked(struct nfs_page *req);
+-extern  void nfs_clear_page_writeback(struct nfs_page *req);
++extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
+ 
+ 
+ /*
+- * Lock the page of an asynchronous request without incrementing the wb_count
++ * Lock the page of an asynchronous request without getting a new reference
+  */
+ static inline int
+ nfs_lock_request_dontget(struct nfs_page *req)
+@@ -98,14 +97,14 @@
+ }
+ 
+ /*
+- * Lock the page of an asynchronous request
++ * Lock the page of an asynchronous request and take a reference
+  */
+ static inline int
+ nfs_lock_request(struct nfs_page *req)
+ {
+ 	if (test_and_set_bit(PG_BUSY, &req->wb_flags))
+ 		return 0;
+-	atomic_inc(&req->wb_count);
++	kref_get(&req->wb_kref);
+ 	return 1;
+ }
+ 
+@@ -118,7 +117,6 @@
+ nfs_list_add_request(struct nfs_page *req, struct list_head *head)
+ {
+ 	list_add_tail(&req->wb_list, head);
+-	req->wb_list_head = head;
+ }
+ 
+ 
+@@ -132,7 +130,6 @@
+ 	if (list_empty(&req->wb_list))
+ 		return;
+ 	list_del_init(&req->wb_list);
+-	req->wb_list_head = NULL;
+ }
+ 
+ static inline struct nfs_page *
+diff -Nurb linux-2.6.22-570/include/linux/nfs_xdr.h linux-2.6.22-try2/include/linux/nfs_xdr.h
+--- linux-2.6.22-570/include/linux/nfs_xdr.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/nfs_xdr.h	2007-12-19 15:29:23.000000000 -0500
+@@ -144,6 +144,7 @@
+ 	nfs4_stateid		delegation;
+ 	__u32			do_recall;
+ 	__u64			maxsize;
++	__u32			attrset[NFS4_BITMAP_SIZE];
+ };
+ 
+ /*
+diff -Nurb linux-2.6.22-570/include/linux/nsproxy.h linux-2.6.22-try2/include/linux/nsproxy.h
+--- linux-2.6.22-570/include/linux/nsproxy.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/nsproxy.h	2007-12-19 15:50:41.000000000 -0500
+@@ -10,6 +10,12 @@
+ struct ipc_namespace;
+ struct pid_namespace;
+ 
++#ifdef CONFIG_CONTAINER_NS
++int ns_container_clone(struct task_struct *tsk);
++#else
++static inline int ns_container_clone(struct task_struct *tsk) { return 0; }
++#endif
++
+ /*
+  * A structure to contain pointers to all per-process
+  * namespaces - fs (mount), uts, network, sysvipc, etc.
+@@ -29,6 +35,7 @@
+ 	struct ipc_namespace *ipc_ns;
+ 	struct mnt_namespace *mnt_ns;
+ 	struct pid_namespace *pid_ns;
++	struct user_namespace *user_ns;
+ };
+ extern struct nsproxy init_nsproxy;
+ 
+diff -Nurb linux-2.6.22-570/include/linux/pageblock-flags.h linux-2.6.22-try2/include/linux/pageblock-flags.h
+--- linux-2.6.22-570/include/linux/pageblock-flags.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/pageblock-flags.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,52 @@
++/*
++ * Macros for manipulating and testing flags related to a
++ * MAX_ORDER_NR_PAGES block of pages.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation version 2 of the License
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright (C) IBM Corporation, 2006
++ *
++ * Original author, Mel Gorman
++ * Major cleanups and reduction of bit operations, Andy Whitcroft
++ */
++#ifndef PAGEBLOCK_FLAGS_H
++#define PAGEBLOCK_FLAGS_H
++
++#include <linux/types.h>
++
++/* Macro to aid the definition of ranges of bits */
++#define PB_range(name, required_bits) \
++	name, name ## _end = (name + required_bits) - 1
++
++/* Bit indices that affect a whole block of pages */
++enum pageblock_bits {
++	PB_range(PB_migrate, 2), /* 2 bits required for migrate types */
++	NR_PAGEBLOCK_BITS
++};
++
++/* Forward declaration */
++struct page;
++
++/* Declarations for getting and setting flags. See mm/page_alloc.c */
++unsigned long get_pageblock_flags_group(struct page *page,
++					int start_bitidx, int end_bitidx);
++void set_pageblock_flags_group(struct page *page, unsigned long flags,
++					int start_bitidx, int end_bitidx);
++
++#define get_pageblock_flags(page) \
++			get_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
++#define set_pageblock_flags(page) \
++			set_pageblock_flags_group(page, 0, NR_PAGEBLOCK_BITS-1)
++
++#endif	/* PAGEBLOCK_FLAGS_H */
+diff -Nurb linux-2.6.22-570/include/linux/pci_ids.h linux-2.6.22-try2/include/linux/pci_ids.h
+--- linux-2.6.22-570/include/linux/pci_ids.h	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/include/linux/pci_ids.h	2007-12-19 15:29:23.000000000 -0500
+@@ -2003,6 +2003,7 @@
+ 
+ #define PCI_VENDOR_ID_ENE		0x1524
+ #define PCI_DEVICE_ID_ENE_CB712_SD	0x0550
++#define PCI_DEVICE_ID_ENE_CB712_SD_2	0x0551
+ #define PCI_DEVICE_ID_ENE_1211		0x1211
+ #define PCI_DEVICE_ID_ENE_1225		0x1225
+ #define PCI_DEVICE_ID_ENE_1410		0x1410
+diff -Nurb linux-2.6.22-570/include/linux/pid_namespace.h linux-2.6.22-try2/include/linux/pid_namespace.h
+--- linux-2.6.22-570/include/linux/pid_namespace.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/pid_namespace.h	2007-12-19 15:29:24.000000000 -0500
+@@ -29,7 +29,7 @@
+ 	kref_get(&ns->kref);
+ }
+ 
+-extern struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *ns);
++extern struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *ns);
+ extern void free_pid_ns(struct kref *kref);
+ 
+ static inline void put_pid_ns(struct pid_namespace *ns)
+diff -Nurb linux-2.6.22-570/include/linux/pnp.h linux-2.6.22-try2/include/linux/pnp.h
+--- linux-2.6.22-570/include/linux/pnp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/pnp.h	2007-12-19 15:29:22.000000000 -0500
+@@ -335,6 +335,10 @@
+ 	int (*set)(struct pnp_dev *dev, struct pnp_resource_table *res);
+ 	int (*disable)(struct pnp_dev *dev);
+ 
++	/* protocol specific suspend/resume */
++	int (*suspend)(struct pnp_dev *dev, pm_message_t state);
++	int (*resume)(struct pnp_dev *dev);
++
+ 	/* used by pnp layer only (look but don't touch) */
+ 	unsigned char		number;		/* protocol number*/
+ 	struct device		dev;		/* link to driver model */
+diff -Nurb linux-2.6.22-570/include/linux/prctl.h linux-2.6.22-try2/include/linux/prctl.h
+--- linux-2.6.22-570/include/linux/prctl.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/prctl.h	2007-12-19 15:29:24.000000000 -0500
+@@ -59,4 +59,8 @@
+ # define PR_ENDIAN_LITTLE	1	/* True little endian mode */
+ # define PR_ENDIAN_PPC_LITTLE	2	/* "PowerPC" pseudo little endian */
+ 
++/* Get/set process seccomp mode */
++#define PR_GET_SECCOMP	21
++#define PR_SET_SECCOMP	22
++
+ #endif /* _LINUX_PRCTL_H */
+diff -Nurb linux-2.6.22-570/include/linux/proc_fs.h linux-2.6.22-try2/include/linux/proc_fs.h
+--- linux-2.6.22-570/include/linux/proc_fs.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/proc_fs.h	2007-12-19 15:29:24.000000000 -0500
+@@ -105,7 +105,6 @@
+ unsigned long task_vsize(struct mm_struct *);
+ int task_statm(struct mm_struct *, int *, int *, int *, int *);
+ char *task_mem(struct mm_struct *, char *);
+-void clear_refs_smap(struct mm_struct *mm);
+ 
+ struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+ void de_put(struct proc_dir_entry *de);
+diff -Nurb linux-2.6.22-570/include/linux/raid/raid5.h linux-2.6.22-try2/include/linux/raid/raid5.h
+--- linux-2.6.22-570/include/linux/raid/raid5.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/raid/raid5.h	2007-12-19 15:29:23.000000000 -0500
+@@ -116,13 +116,46 @@
+  *  attach a request to an active stripe (add_stripe_bh())
+  *     lockdev attach-buffer unlockdev
+  *  handle a stripe (handle_stripe())
+- *     lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io
++ *     lockstripe clrSTRIPE_HANDLE ...
++ *		(lockdev check-buffers unlockdev) ..
++ *		change-state ..
++ *		record io/ops needed unlockstripe schedule io/ops
+  *  release an active stripe (release_stripe())
+  *     lockdev if (!--cnt) { if  STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev
+  *
+  * The refcount counts each thread that have activated the stripe,
+  * plus raid5d if it is handling it, plus one for each active request
+- * on a cached buffer.
++ * on a cached buffer, and plus one if the stripe is undergoing stripe
++ * operations.
++ *
++ * Stripe operations are performed outside the stripe lock,
++ * the stripe operations are:
++ * -copying data between the stripe cache and user application buffers
++ * -computing blocks to save a disk access, or to recover a missing block
++ * -updating the parity on a write operation (reconstruct write and
++ *  read-modify-write)
++ * -checking parity correctness
++ * -running i/o to disk
++ * These operations are carried out by raid5_run_ops which uses the async_tx
++ * api to (optionally) offload operations to dedicated hardware engines.
++ * When requesting an operation handle_stripe sets the pending bit for the
++ * operation and increments the count.  raid5_run_ops is then run whenever
++ * the count is non-zero.
++ * There are some critical dependencies between the operations that prevent some
++ * from being requested while another is in flight.
++ * 1/ Parity check operations destroy the in cache version of the parity block,
++ *    so we prevent parity dependent operations like writes and compute_blocks
++ *    from starting while a check is in progress.  Some dma engines can perform
++ *    the check without damaging the parity block, in these cases the parity
++ *    block is re-marked up to date (assuming the check was successful) and is
++ *    not re-read from disk.
++ * 2/ When a write operation is requested we immediately lock the affected
++ *    blocks, and mark them as not up to date.  This causes new read requests
++ *    to be held off, as well as parity checks and compute block operations.
++ * 3/ Once a compute block operation has been requested handle_stripe treats
++ *    that block as if it is up to date.  raid5_run_ops guaruntees that any
++ *    operation that is dependent on the compute block result is initiated after
++ *    the compute block completes.
+  */
+ 
+ struct stripe_head {
+@@ -136,15 +169,46 @@
+ 	spinlock_t		lock;
+ 	int			bm_seq;	/* sequence number for bitmap flushes */
+ 	int			disks;			/* disks in stripe */
++	/* stripe_operations
++	 * @pending - pending ops flags (set for request->issue->complete)
++	 * @ack - submitted ops flags (set for issue->complete)
++	 * @complete - completed ops flags (set for complete)
++	 * @target - STRIPE_OP_COMPUTE_BLK target
++	 * @count - raid5_runs_ops is set to run when this is non-zero
++	 */
++	struct stripe_operations {
++		unsigned long	   pending;
++		unsigned long	   ack;
++		unsigned long	   complete;
++		int		   target;
++		int		   count;
++		u32		   zero_sum_result;
++	} ops;
+ 	struct r5dev {
+ 		struct bio	req;
+ 		struct bio_vec	vec;
+ 		struct page	*page;
+-		struct bio	*toread, *towrite, *written;
++		struct bio	*toread, *read, *towrite, *written;
+ 		sector_t	sector;			/* sector of this page */
+ 		unsigned long	flags;
+ 	} dev[1]; /* allocated with extra space depending of RAID geometry */
+ };
++
++/* stripe_head_state - collects and tracks the dynamic state of a stripe_head
++ *     for handle_stripe.  It is only valid under spin_lock(sh->lock);
++ */
++struct stripe_head_state {
++	int syncing, expanding, expanded;
++	int locked, uptodate, to_read, to_write, failed, written;
++	int to_fill, compute, req_compute, non_overwrite, dirty;
++	int failed_num;
++};
++
++/* r6_state - extra state data only relevant to r6 */
++struct r6_state {
++	int p_failed, q_failed, qd_idx, failed_num[2];
++};
++
+ /* Flags */
+ #define	R5_UPTODATE	0	/* page contains current data */
+ #define	R5_LOCKED	1	/* IO has been submitted on "req" */
+@@ -158,6 +222,15 @@
+ #define	R5_ReWrite	9	/* have tried to over-write the readerror */
+ 
+ #define	R5_Expanded	10	/* This block now has post-expand data */
++#define	R5_Wantcompute	11 /* compute_block in progress treat as
++				    * uptodate
++				    */
++#define	R5_Wantfill	12 /* dev->toread contains a bio that needs
++				    * filling
++				    */
++#define	R5_Wantprexor	13 /* distinguish blocks ready for rmw from
++				    * other "towrites"
++				    */
+ /*
+  * Write method
+  */
+@@ -180,6 +253,24 @@
+ #define	STRIPE_EXPAND_SOURCE	10
+ #define	STRIPE_EXPAND_READY	11
+ /*
++ * Operations flags (in issue order)
++ */
++#define STRIPE_OP_BIOFILL	0
++#define STRIPE_OP_COMPUTE_BLK	1
++#define STRIPE_OP_PREXOR	2
++#define STRIPE_OP_BIODRAIN	3
++#define STRIPE_OP_POSTXOR	4
++#define STRIPE_OP_CHECK	5
++#define STRIPE_OP_IO		6
++
++/* modifiers to the base operations
++ * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
++ * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check
++ */
++#define STRIPE_OP_MOD_REPAIR_PD 7
++#define STRIPE_OP_MOD_DMA_CHECK 8
++
++/*
+  * Plugging:
+  *
+  * To improve write throughput, we need to delay the handling of some
+diff -Nurb linux-2.6.22-570/include/linux/raid/xor.h linux-2.6.22-try2/include/linux/raid/xor.h
+--- linux-2.6.22-570/include/linux/raid/xor.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/raid/xor.h	2007-12-19 15:29:23.000000000 -0500
+@@ -3,9 +3,10 @@
+ 
+ #include <linux/raid/md.h>
+ 
+-#define MAX_XOR_BLOCKS 5
++#define MAX_XOR_BLOCKS 4
+ 
+-extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
++extern void xor_blocks(unsigned int count, unsigned int bytes,
++	void *dest, void **srcs);
+ 
+ struct xor_block_template {
+         struct xor_block_template *next;
+diff -Nurb linux-2.6.22-570/include/linux/reboot.h linux-2.6.22-try2/include/linux/reboot.h
+--- linux-2.6.22-570/include/linux/reboot.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/reboot.h	2007-12-19 15:29:23.000000000 -0500
+@@ -67,6 +67,11 @@
+ 
+ void ctrl_alt_del(void);
+ 
++#define POWEROFF_CMD_PATH_LEN	256
++extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN];
++
++extern int orderly_poweroff(bool force);
++
+ /*
+  * Emergency restart, callable from an interrupt handler.
+  */
+diff -Nurb linux-2.6.22-570/include/linux/revoked_fs_i.h linux-2.6.22-try2/include/linux/revoked_fs_i.h
+--- linux-2.6.22-570/include/linux/revoked_fs_i.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/revoked_fs_i.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,18 @@
++#ifndef _LINUX_REVOKED_FS_I_H
++#define _LINUX_REVOKED_FS_I_H
++
++struct revokefs_inode_info {
++	struct task_struct *owner;
++	struct file *file;
++	unsigned int fd;
++	struct inode vfs_inode;
++};
++
++static inline struct revokefs_inode_info *revokefs_i(struct inode *inode)
++{
++	return container_of(inode, struct revokefs_inode_info, vfs_inode);
++}
++
++void make_revoked_inode(struct inode *, int);
++
++#endif
+diff -Nurb linux-2.6.22-570/include/linux/rtnetlink.h linux-2.6.22-try2/include/linux/rtnetlink.h
+--- linux-2.6.22-570/include/linux/rtnetlink.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/rtnetlink.h	2007-12-19 15:29:23.000000000 -0500
+@@ -261,7 +261,7 @@
+ 	RTA_FLOW,
+ 	RTA_CACHEINFO,
+ 	RTA_SESSION,
+-	RTA_MP_ALGO,
++	RTA_MP_ALGO, /* no longer used */
+ 	RTA_TABLE,
+ 	__RTA_MAX
+ };
+@@ -570,10 +570,16 @@
+ }
+ 
+ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len);
++extern int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
++				        struct rtattr *rta, int len);
+ 
+ #define rtattr_parse_nested(tb, max, rta) \
+ 	rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta)))
+ 
++#define rtattr_parse_nested_compat(tb, max, rta, data, len) \
++({	data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
++	__rtattr_parse_nested_compat(tb, max, rta, len); })
++
+ extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+ extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+ extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
+@@ -638,6 +644,18 @@
+ ({	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
+ 	(skb)->len; })
+ 
++#define RTA_NEST_COMPAT(skb, type, attrlen, data) \
++({	struct rtattr *__start = (struct rtattr *)skb_tail_pointer(skb); \
++	RTA_PUT(skb, type, attrlen, data); \
++	RTA_NEST(skb, type); \
++	__start; })
++
++#define RTA_NEST_COMPAT_END(skb, start) \
++({	struct rtattr *__nest = (void *)(start) + NLMSG_ALIGN((start)->rta_len); \
++	(start)->rta_len = skb_tail_pointer(skb) - (unsigned char *)(start); \
++	RTA_NEST_END(skb, __nest); \
++	(skb)->len; })
++
+ #define RTA_NEST_CANCEL(skb, start) \
+ ({	if (start) \
+ 		skb_trim(skb, (unsigned char *) (start) - (skb)->data); \
+diff -Nurb linux-2.6.22-570/include/linux/sched.h linux-2.6.22-try2/include/linux/sched.h
+--- linux-2.6.22-570/include/linux/sched.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/sched.h	2007-12-19 15:50:06.000000000 -0500
+@@ -26,6 +26,7 @@
+ #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
+ #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
+ #define CLONE_NEWIPC		0x08000000	/* New ipcs */
++#define CLONE_NEWUSER		0x10000000	/* New user namespace */
+ #define CLONE_KTHREAD		0x10000000	/* clone a kernel thread */
+ 
+ /*
+@@ -266,6 +267,7 @@
+ asmlinkage void schedule(void);
+ 
+ struct nsproxy;
++struct user_namespace;
+ 
+ /* Maximum number of active map areas.. This is a random (large) number */
+ #define DEFAULT_MAX_MAP_COUNT	65536
+@@ -325,6 +327,27 @@
+ 		(mm)->hiwater_vm = (mm)->total_vm;	\
+ } while (0)
+ 
++extern void set_dumpable(struct mm_struct *mm, int value);
++extern int get_dumpable(struct mm_struct *mm);
++
++/* mm flags */
++/* dumpable bits */
++#define MMF_DUMPABLE      0  /* core dump is permitted */
++#define MMF_DUMP_SECURELY 1  /* core file is readable only by root */
++#define MMF_DUMPABLE_BITS 2
++
++/* coredump filter bits */
++#define MMF_DUMP_ANON_PRIVATE	2
++#define MMF_DUMP_ANON_SHARED	3
++#define MMF_DUMP_MAPPED_PRIVATE	4
++#define MMF_DUMP_MAPPED_SHARED	5
++#define MMF_DUMP_FILTER_SHIFT	MMF_DUMPABLE_BITS
++#define MMF_DUMP_FILTER_BITS	4
++#define MMF_DUMP_FILTER_MASK \
++	(((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT)
++#define MMF_DUMP_FILTER_DEFAULT \
++	((1 << MMF_DUMP_ANON_PRIVATE) |	(1 << MMF_DUMP_ANON_SHARED))
++
+ struct mm_struct {
+ 	struct vm_area_struct * mmap;		/* list of VMAs */
+ 	struct rb_root mm_rb;
+@@ -383,7 +406,7 @@
+ 	unsigned int token_priority;
+ 	unsigned int last_interval;
+ 
+-	unsigned char dumpable:2;
++	unsigned long flags; /* Must use atomic bitops to access the bits */
+ 
+ 	/* coredumping support */
+ 	int core_waiters;
+@@ -757,9 +780,6 @@
+ #endif
+ };
+ 
+-extern int partition_sched_domains(cpumask_t *partition1,
+-				    cpumask_t *partition2);
+-
+ /*
+  * Maximum cache size the migration-costs auto-tuning code will
+  * search from:
+@@ -770,8 +790,6 @@
+ 
+ 
+ struct io_context;			/* See blkdev.h */
+-struct cpuset;
+-
+ #define NGROUPS_SMALL		32
+ #define NGROUPS_PER_BLOCK	((int)(PAGE_SIZE / sizeof(gid_t)))
+ struct group_info {
+@@ -912,7 +930,7 @@
+ 	unsigned int rt_priority;
+ 	cputime_t utime, stime;
+ 	unsigned long nvcsw, nivcsw; /* context switch counts */
+-	struct timespec start_time;
++	struct timespec start_time, real_start_time;
+ /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
+ 	unsigned long min_flt, maj_flt;
+ 
+@@ -1067,11 +1085,16 @@
+ 	short il_next;
+ #endif
+ #ifdef CONFIG_CPUSETS
+-	struct cpuset *cpuset;
+ 	nodemask_t mems_allowed;
+ 	int cpuset_mems_generation;
+ 	int cpuset_mem_spread_rotor;
+ #endif
++#ifdef CONFIG_CONTAINERS
++	/* Container info protected by css_group_lock */
++	struct css_group *containers;
++	/* cg_list protected by css_group_lock and tsk->alloc_lock */
++	struct list_head cg_list;
++#endif
+ 	struct robust_list_head __user *robust_list;
+ #ifdef CONFIG_COMPAT
+ 	struct compat_robust_list_head __user *compat_robust_list;
+@@ -1514,7 +1537,8 @@
+ /*
+  * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
+  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
+- * pins the final release of task.io_context.  Also protects ->cpuset.
++ * pins the final release of task.io_context.  Also protects ->cpuset and
++ * ->container.subsys[].
+  *
+  * Nests both inside and outside of read_lock(&tasklist_lock).
+  * It must not be nested with write_lock_irq(&tasklist_lock),
+diff -Nurb linux-2.6.22-570/include/linux/seccomp.h linux-2.6.22-try2/include/linux/seccomp.h
+--- linux-2.6.22-570/include/linux/seccomp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/seccomp.h	2007-12-19 15:29:24.000000000 -0500
+@@ -4,8 +4,6 @@
+ 
+ #ifdef CONFIG_SECCOMP
+ 
+-#define NR_SECCOMP_MODES 1
+-
+ #include <linux/thread_info.h>
+ #include <asm/seccomp.h>
+ 
+@@ -23,6 +21,9 @@
+ 	return unlikely(test_ti_thread_flag(ti, TIF_SECCOMP));
+ }
+ 
++extern long prctl_get_seccomp(void);
++extern long prctl_set_seccomp(unsigned long);
++
+ #else /* CONFIG_SECCOMP */
+ 
+ typedef struct { } seccomp_t;
+@@ -34,6 +35,16 @@
+ 	return 0;
+ }
+ 
++static inline long prctl_get_seccomp(void)
++{
++	return -EINVAL;
++}
++
++static inline long prctl_set_seccomp(unsigned long arg2)
++{
++	return -EINVAL;
++}
++
+ #endif /* CONFIG_SECCOMP */
+ 
+ #endif /* _LINUX_SECCOMP_H */
+diff -Nurb linux-2.6.22-570/include/linux/security.h linux-2.6.22-try2/include/linux/security.h
+--- linux-2.6.22-570/include/linux/security.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/security.h	2007-12-19 15:29:23.000000000 -0500
+@@ -71,6 +71,7 @@
+ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
+ extern int cap_netlink_recv(struct sk_buff *skb, int cap);
+ 
++extern unsigned long mmap_min_addr;
+ /*
+  * Values used in the task_security_ops calls
+  */
+@@ -1241,8 +1242,9 @@
+ 	int (*file_ioctl) (struct file * file, unsigned int cmd,
+ 			   unsigned long arg);
+ 	int (*file_mmap) (struct file * file,
+-			  unsigned long reqprot,
+-			  unsigned long prot, unsigned long flags);
++			  unsigned long reqprot, unsigned long prot,
++			  unsigned long flags, unsigned long addr,
++			  unsigned long addr_only);
+ 	int (*file_mprotect) (struct vm_area_struct * vma,
+ 			      unsigned long reqprot,
+ 			      unsigned long prot);
+@@ -1814,9 +1816,12 @@
+ 
+ static inline int security_file_mmap (struct file *file, unsigned long reqprot,
+ 				      unsigned long prot,
+-				      unsigned long flags)
++				      unsigned long flags,
++				      unsigned long addr,
++				      unsigned long addr_only)
+ {
+-	return security_ops->file_mmap (file, reqprot, prot, flags);
++	return security_ops->file_mmap (file, reqprot, prot, flags, addr,
++					addr_only);
+ }
+ 
+ static inline int security_file_mprotect (struct vm_area_struct *vma,
+@@ -2489,7 +2494,9 @@
+ 
+ static inline int security_file_mmap (struct file *file, unsigned long reqprot,
+ 				      unsigned long prot,
+-				      unsigned long flags)
++				      unsigned long flags,
++				      unsigned long addr,
++				      unsigned long addr_only)
+ {
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/include/linux/serial_8250.h linux-2.6.22-try2/include/linux/serial_8250.h
+--- linux-2.6.22-570/include/linux/serial_8250.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/serial_8250.h	2007-12-19 15:29:24.000000000 -0500
+@@ -57,6 +57,7 @@
+ 
+ int serial8250_register_port(struct uart_port *);
+ void serial8250_unregister_port(int line);
++void serial8250_unregister_by_port(struct uart_port *port);
+ void serial8250_suspend_port(int line);
+ void serial8250_resume_port(int line);
+ 
+diff -Nurb linux-2.6.22-570/include/linux/signal.h linux-2.6.22-try2/include/linux/signal.h
+--- linux-2.6.22-570/include/linux/signal.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/signal.h	2007-12-19 15:29:23.000000000 -0500
+@@ -238,12 +238,15 @@
+ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+ extern long do_sigpending(void __user *, unsigned long);
+ extern int sigprocmask(int, sigset_t *, sigset_t *);
++extern int show_unhandled_signals;
+ 
+ struct pt_regs;
+ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
+ 
+ extern struct kmem_cache *sighand_cachep;
+ 
++int unhandled_signal(struct task_struct *tsk, int sig);
++
+ /*
+  * In POSIX a signal is sent either to a specific thread (Linux task)
+  * or to the process as a whole (Linux thread group).  How the signal
+diff -Nurb linux-2.6.22-570/include/linux/skbuff.h linux-2.6.22-try2/include/linux/skbuff.h
+--- linux-2.6.22-570/include/linux/skbuff.h	2007-12-12 18:08:39.000000000 -0500
++++ linux-2.6.22-try2/include/linux/skbuff.h	2007-12-19 15:29:23.000000000 -0500
+@@ -147,8 +147,8 @@
+ 
+ /* We divide dataref into two halves.  The higher 16 bits hold references
+  * to the payload part of skb->data.  The lower 16 bits hold references to
+- * the entire skb->data.  It is up to the users of the skb to agree on
+- * where the payload starts.
++ * the entire skb->data.  A clone of a headerless skb holds the length of
++ * the header in skb->hdr_len.
+  *
+  * All users must obey the rule that the skb->data reference count must be
+  * greater than or equal to the payload reference count.
+@@ -206,6 +206,7 @@
+  *	@len: Length of actual data
+  *	@data_len: Data length
+  *	@mac_len: Length of link layer header
++ *	@hdr_len: writable header length of cloned skb
+  *	@csum: Checksum (must include start/offset pair)
+  *	@csum_start: Offset from skb->head where checksumming should start
+  *	@csum_offset: Offset from csum_start where checksum should be stored
+@@ -260,8 +261,9 @@
+ 	char			cb[48];
+ 
+ 	unsigned int		len,
+-				data_len,
+-				mac_len;
++				data_len;
++	__u16			mac_len,
++				hdr_len;
+ 	union {
+ 		__wsum		csum;
+ 		struct {
+@@ -1323,6 +1325,20 @@
+ }
+ 
+ /**
++ *	skb_clone_writable - is the header of a clone writable
++ *	@skb: buffer to check
++ *	@len: length up to which to write
++ *
++ *	Returns true if modifying the header part of the cloned buffer
++ *	does not requires the data to be copied.
++ */
++static inline int skb_clone_writable(struct sk_buff *skb, int len)
++{
++	return !skb_header_cloned(skb) &&
++	       skb_headroom(skb) + len <= skb->hdr_len;
++}
++
++/**
+  *	skb_cow - copy header of skb when it is required
+  *	@skb: buffer to cow
+  *	@headroom: needed headroom
+diff -Nurb linux-2.6.22-570/include/linux/slab.h linux-2.6.22-try2/include/linux/slab.h
+--- linux-2.6.22-570/include/linux/slab.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/slab.h	2007-12-19 15:29:24.000000000 -0500
+@@ -26,12 +26,14 @@
+ #define SLAB_HWCACHE_ALIGN	0x00002000UL	/* Align objs on cache lines */
+ #define SLAB_CACHE_DMA		0x00004000UL	/* Use GFP_DMA memory */
+ #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */
+-#define SLAB_RECLAIM_ACCOUNT	0x00020000UL	/* Objects are reclaimable */
+ #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */
+ #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */
+ #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */
+ #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */
+ 
++/* The following flags affect the page allocator grouping pages by mobility */
++#define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
++#define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
+ /*
+  * struct kmem_cache related prototypes
+  */
+diff -Nurb linux-2.6.22-570/include/linux/string.h linux-2.6.22-try2/include/linux/string.h
+--- linux-2.6.22-570/include/linux/string.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/string.h	2007-12-19 15:29:23.000000000 -0500
+@@ -105,8 +105,12 @@
+ #endif
+ 
+ extern char *kstrdup(const char *s, gfp_t gfp);
++extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
+ extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+ 
++extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
++extern void argv_free(char **argv);
++
+ #ifdef __cplusplus
+ }
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth.h linux-2.6.22-try2/include/linux/sunrpc/auth.h
+--- linux-2.6.22-570/include/linux/sunrpc/auth.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/sunrpc/auth.h	2007-12-19 15:29:23.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/sunrpc/xdr.h>
+ 
+ #include <asm/atomic.h>
++#include <linux/rcupdate.h>
+ 
+ /* size of the nodename buffer */
+ #define UNX_MAXNODENAME	32
+@@ -31,22 +32,28 @@
+ /*
+  * Client user credentials
+  */
++struct rpc_auth;
++struct rpc_credops;
+ struct rpc_cred {
+ 	struct hlist_node	cr_hash;	/* hash chain */
+-	struct rpc_credops *	cr_ops;
+-	unsigned long		cr_expire;	/* when to gc */
+-	atomic_t		cr_count;	/* ref count */
+-	unsigned short		cr_flags;	/* various flags */
++	struct list_head	cr_lru;		/* lru garbage collection */
++	struct rcu_head		cr_rcu;
++	struct rpc_auth *	cr_auth;
++	const struct rpc_credops *cr_ops;
+ #ifdef RPC_DEBUG
+ 	unsigned long		cr_magic;	/* 0x0f4aa4f0 */
+ #endif
++	unsigned long		cr_expire;	/* when to gc */
++	unsigned long		cr_flags;	/* various flags */
++	atomic_t		cr_count;	/* ref count */
+ 
+ 	uid_t			cr_uid;
+ 
+ 	/* per-flavor data */
+ };
+-#define RPCAUTH_CRED_NEW	0x0001
+-#define RPCAUTH_CRED_UPTODATE	0x0002
++#define RPCAUTH_CRED_NEW	0
++#define RPCAUTH_CRED_UPTODATE	1
++#define RPCAUTH_CRED_HASHED	2
+ 
+ #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
+ 
+@@ -57,10 +64,10 @@
+ #define RPC_CREDCACHE_MASK	(RPC_CREDCACHE_NR - 1)
+ struct rpc_cred_cache {
+ 	struct hlist_head	hashtable[RPC_CREDCACHE_NR];
+-	unsigned long		nextgc;		/* next garbage collection */
+-	unsigned long		expire;		/* cache expiry interval */
++	spinlock_t		lock;
+ };
+ 
++struct rpc_authops;
+ struct rpc_auth {
+ 	unsigned int		au_cslack;	/* call cred size estimate */
+ 				/* guess at number of u32's auth adds before
+@@ -70,7 +77,7 @@
+ 	unsigned int		au_verfsize;
+ 
+ 	unsigned int		au_flags;	/* various flags */
+-	struct rpc_authops *	au_ops;		/* operations */
++	const struct rpc_authops *au_ops;		/* operations */
+ 	rpc_authflavor_t	au_flavor;	/* pseudoflavor (note may
+ 						 * differ from the flavor in
+ 						 * au_ops->au_flavor in gss
+@@ -116,17 +123,19 @@
+ 						void *, __be32 *, void *);
+ };
+ 
+-extern struct rpc_authops	authunix_ops;
+-extern struct rpc_authops	authnull_ops;
+-#ifdef CONFIG_SUNRPC_SECURE
+-extern struct rpc_authops	authdes_ops;
+-#endif
++extern const struct rpc_authops	authunix_ops;
++extern const struct rpc_authops	authnull_ops;
++
++void __init		rpc_init_authunix(void);
++void __init		rpcauth_init_module(void);
++void __exit		rpcauth_remove_module(void);
+ 
+-int			rpcauth_register(struct rpc_authops *);
+-int			rpcauth_unregister(struct rpc_authops *);
++int			rpcauth_register(const struct rpc_authops *);
++int			rpcauth_unregister(const struct rpc_authops *);
+ struct rpc_auth *	rpcauth_create(rpc_authflavor_t, struct rpc_clnt *);
+-void			rpcauth_destroy(struct rpc_auth *);
++void			rpcauth_release(struct rpc_auth *);
+ struct rpc_cred *	rpcauth_lookup_credcache(struct rpc_auth *, struct auth_cred *, int);
++void			rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
+ struct rpc_cred *	rpcauth_lookupcred(struct rpc_auth *, int);
+ struct rpc_cred *	rpcauth_bindcred(struct rpc_task *);
+ void			rpcauth_holdcred(struct rpc_task *);
+@@ -139,8 +148,9 @@
+ int			rpcauth_refreshcred(struct rpc_task *);
+ void			rpcauth_invalcred(struct rpc_task *);
+ int			rpcauth_uptodatecred(struct rpc_task *);
+-int			rpcauth_init_credcache(struct rpc_auth *, unsigned long);
+-void			rpcauth_free_credcache(struct rpc_auth *);
++int			rpcauth_init_credcache(struct rpc_auth *);
++void			rpcauth_destroy_credcache(struct rpc_auth *);
++void			rpcauth_clear_credcache(struct rpc_cred_cache *);
+ 
+ static inline
+ struct rpc_cred *	get_rpccred(struct rpc_cred *cred)
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/auth_gss.h linux-2.6.22-try2/include/linux/sunrpc/auth_gss.h
+--- linux-2.6.22-570/include/linux/sunrpc/auth_gss.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/sunrpc/auth_gss.h	2007-12-19 15:29:23.000000000 -0500
+@@ -85,11 +85,6 @@
+ 	struct gss_upcall_msg	*gc_upcall;
+ };
+ 
+-#define gc_uid			gc_base.cr_uid
+-#define gc_count		gc_base.cr_count
+-#define gc_flags		gc_base.cr_flags
+-#define gc_expire		gc_base.cr_expire
+-
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_SUNRPC_AUTH_GSS_H */
+ 
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/clnt.h linux-2.6.22-try2/include/linux/sunrpc/clnt.h
+--- linux-2.6.22-570/include/linux/sunrpc/clnt.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/sunrpc/clnt.h	2007-12-19 15:48:09.000000000 -0500
+@@ -24,8 +24,10 @@
+  * The high-level client handle
+  */
+ struct rpc_clnt {
+-	atomic_t		cl_count;	/* Number of clones */
+-	atomic_t		cl_users;	/* number of references */
++	struct kref		cl_kref;	/* Number of references */
++	struct list_head	cl_clients;	/* Global list of clients */
++	struct list_head	cl_tasks;	/* List of tasks */
++	spinlock_t		cl_lock;	/* spinlock */
+ 	struct rpc_xprt *	cl_xprt;	/* transport */
+ 	struct rpc_procinfo *	cl_procinfo;	/* procedure info */
+ 	u32			cl_prog,	/* RPC program number */
+@@ -41,10 +43,7 @@
+ 	unsigned int		cl_softrtry : 1,/* soft timeouts */
+ 				cl_intr     : 1,/* interruptible */
+ 				cl_discrtry : 1,/* disconnect before retry */
+-				cl_autobind : 1,/* use getport() */
+-				cl_oneshot  : 1,/* dispose after use */
+-				cl_dead     : 1,/* abandoned */
+-				cl_tag      : 1;/* context tagging */
++				cl_autobind : 1;/* use getport() */
+ 
+ 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
+ 
+@@ -111,17 +110,15 @@
+ #define RPC_CLNT_CREATE_HARDRTRY	(1UL << 0)
+ #define RPC_CLNT_CREATE_INTR		(1UL << 1)
+ #define RPC_CLNT_CREATE_AUTOBIND	(1UL << 2)
+-#define RPC_CLNT_CREATE_ONESHOT		(1UL << 3)
+-#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 4)
+-#define RPC_CLNT_CREATE_NOPING		(1UL << 5)
+-#define RPC_CLNT_CREATE_DISCRTRY	(1UL << 6)
++#define RPC_CLNT_CREATE_NONPRIVPORT	(1UL << 3)
++#define RPC_CLNT_CREATE_NOPING		(1UL << 4)
++#define RPC_CLNT_CREATE_DISCRTRY	(1UL << 5)
+ 
+ struct rpc_clnt *rpc_create(struct rpc_create_args *args);
+ struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
+ 				struct rpc_program *, int);
+ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
+-int		rpc_shutdown_client(struct rpc_clnt *);
+-int		rpc_destroy_client(struct rpc_clnt *);
++void		rpc_shutdown_client(struct rpc_clnt *);
+ void		rpc_release_client(struct rpc_clnt *);
+ int		rpcb_register(u32, u32, int, unsigned short, int *);
+ void		rpcb_getport(struct rpc_task *);
+@@ -133,13 +130,14 @@
+ 			       void *calldata);
+ int		rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
+ 			      int flags);
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred,
++			       int flags);
+ void		rpc_restart_call(struct rpc_task *);
+ void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
+ void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
+ size_t		rpc_max_payload(struct rpc_clnt *);
+ void		rpc_force_rebind(struct rpc_clnt *);
+-int		rpc_ping(struct rpc_clnt *clnt, int flags);
+ size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
+ char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+ 
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/gss_api.h linux-2.6.22-try2/include/linux/sunrpc/gss_api.h
+--- linux-2.6.22-570/include/linux/sunrpc/gss_api.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/sunrpc/gss_api.h	2007-12-19 15:29:23.000000000 -0500
+@@ -77,7 +77,7 @@
+ 	struct module		*gm_owner;
+ 	struct xdr_netobj	gm_oid;
+ 	char			*gm_name;
+-	struct gss_api_ops	*gm_ops;
++	const struct gss_api_ops *gm_ops;
+ 	/* pseudoflavors supported by this mechanism: */
+ 	int			gm_pf_num;
+ 	struct pf_desc *	gm_pfs;
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.22-try2/include/linux/sunrpc/rpc_pipe_fs.h
+--- linux-2.6.22-570/include/linux/sunrpc/rpc_pipe_fs.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/sunrpc/rpc_pipe_fs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -23,9 +23,11 @@
+ 	void *private;
+ 	struct list_head pipe;
+ 	struct list_head in_upcall;
++	struct list_head in_downcall;
+ 	int pipelen;
+ 	int nreaders;
+ 	int nwriters;
++	int nkern_readwriters;
+ 	wait_queue_head_t waitq;
+ #define RPC_PIPE_WAIT_FOR_OPEN	1
+ 	int flags;
+diff -Nurb linux-2.6.22-570/include/linux/sunrpc/sched.h linux-2.6.22-try2/include/linux/sunrpc/sched.h
+--- linux-2.6.22-570/include/linux/sunrpc/sched.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/sunrpc/sched.h	2007-12-19 15:29:23.000000000 -0500
+@@ -110,11 +110,6 @@
+ 	if (!list_empty(head) &&  \
+ 	    ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1))
+ 
+-/* .. and walking list of all tasks */
+-#define	alltask_for_each(task, pos, head) \
+-	list_for_each(pos, head) \
+-		if ((task=list_entry(pos, struct rpc_task, tk_task)),1)
+-
+ typedef void			(*rpc_action)(struct rpc_task *);
+ 
+ struct rpc_call_ops {
+diff -Nurb linux-2.6.22-570/include/linux/syscalls.h linux-2.6.22-try2/include/linux/syscalls.h
+--- linux-2.6.22-570/include/linux/syscalls.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/syscalls.h	2007-12-19 15:48:38.000000000 -0500
+@@ -110,6 +110,9 @@
+ asmlinkage long sys_capset(cap_user_header_t header,
+ 				const cap_user_data_t data);
+ asmlinkage long sys_personality(u_long personality);
++asmlinkage long sys_sync_file_range2(int fd, unsigned int flags,
++ 				     loff_t offset, loff_t nbytes);
++
+ 
+ asmlinkage long sys_sigpending(old_sigset_t __user *set);
+ asmlinkage long sys_sigprocmask(int how, old_sigset_t __user *set,
+@@ -612,7 +615,11 @@
+ asmlinkage long sys_timerfd(int ufd, int clockid, int flags,
+ 			    const struct itimerspec __user *utmr);
+ asmlinkage long sys_eventfd(unsigned int count);
++asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
+ 
+ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+ 
++asmlinkage long sys_revokeat(int dfd, const char __user *filename);
++asmlinkage long sys_frevoke(unsigned int fd);
++
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/sysctl.h linux-2.6.22-try2/include/linux/sysctl.h
+--- linux-2.6.22-570/include/linux/sysctl.h	2007-12-12 18:08:39.000000000 -0500
++++ linux-2.6.22-try2/include/linux/sysctl.h	2007-12-19 15:29:24.000000000 -0500
+@@ -166,6 +166,7 @@
+ 	KERN_MAX_LOCK_DEPTH=74,
+ 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
+ 	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
++	KERN_POWEROFF_CMD=77,	/* string: poweroff command line */
+ };
+ 
+ 
+@@ -208,6 +209,7 @@
+ 	VM_PANIC_ON_OOM=33,	/* panic at out-of-memory */
+ 	VM_VDSO_ENABLED=34,	/* map VDSO into new processes? */
+ 	VM_MIN_SLAB=35,		 /* Percent pages ignored by zone reclaim */
++	VM_HUGETLB_TREAT_MOVABLE=36, /* Allocate hugepages from ZONE_MOVABLE */
+ 
+ 	/* s390 vm cmm sysctls */
+ 	VM_CMM_PAGES=1111,
+@@ -843,6 +845,9 @@
+ };
+ 
+ /* CTL_DEBUG names: */
++enum {
++	DEBUG_UNHANDLED_SIGNALS = 1,
++};
+ 
+ /* CTL_DEV names: */
+ enum {
+diff -Nurb linux-2.6.22-570/include/linux/sysdev.h linux-2.6.22-try2/include/linux/sysdev.h
+--- linux-2.6.22-570/include/linux/sysdev.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/sysdev.h	2007-12-19 15:29:22.000000000 -0500
+@@ -101,8 +101,7 @@
+ 
+ #define _SYSDEV_ATTR(_name,_mode,_show,_store)			\
+ {								\
+-	.attr = { .name = __stringify(_name), .mode = _mode,	\
+-		 .owner = THIS_MODULE },			\
++	.attr = { .name = __stringify(_name), .mode = _mode },	\
+ 	.show	= _show,					\
+ 	.store	= _store,					\
+ }
+diff -Nurb linux-2.6.22-570/include/linux/sysfs.h linux-2.6.22-try2/include/linux/sysfs.h
+--- linux-2.6.22-570/include/linux/sysfs.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/linux/sysfs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -21,7 +21,12 @@
+ struct module;
+ struct nameidata;
+ struct dentry;
++struct sysfs_dirent;
+ 
++/* FIXME
++ * The *owner field is no longer used, but leave around
++ * until the tree gets cleaned up fully.
++ */
+ struct attribute {
+ 	const char		* name;
+ 	struct module 		* owner;
+@@ -41,13 +46,13 @@
+  */
+ 
+ #define __ATTR(_name,_mode,_show,_store) { \
+-	.attr = {.name = __stringify(_name), .mode = _mode, .owner = THIS_MODULE },	\
++	.attr = {.name = __stringify(_name), .mode = _mode },	\
+ 	.show	= _show,					\
+ 	.store	= _store,					\
+ }
+ 
+ #define __ATTR_RO(_name) { \
+-	.attr	= { .name = __stringify(_name), .mode = 0444, .owner = THIS_MODULE },	\
++	.attr	= { .name = __stringify(_name), .mode = 0444 },	\
+ 	.show	= _name##_show,	\
+ }
+ 
+@@ -61,8 +66,10 @@
+ 	struct attribute	attr;
+ 	size_t			size;
+ 	void			*private;
+-	ssize_t (*read)(struct kobject *, char *, loff_t, size_t);
+-	ssize_t (*write)(struct kobject *, char *, loff_t, size_t);
++	ssize_t (*read)(struct kobject *, struct bin_attribute *,
++			char *, loff_t, size_t);
++	ssize_t (*write)(struct kobject *, struct bin_attribute *,
++			 char *, loff_t, size_t);
+ 	int (*mmap)(struct kobject *, struct bin_attribute *attr,
+ 		    struct vm_area_struct *vma);
+ };
+@@ -72,12 +79,16 @@
+ 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
+ };
+ 
++#define SYSFS_TYPE_MASK		0x00ff
+ #define SYSFS_ROOT		0x0001
+ #define SYSFS_DIR		0x0002
+ #define SYSFS_KOBJ_ATTR 	0x0004
+ #define SYSFS_KOBJ_BIN_ATTR	0x0008
+ #define SYSFS_KOBJ_LINK 	0x0020
+-#define SYSFS_NOT_PINNED	(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR | SYSFS_KOBJ_LINK)
++#define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
++
++#define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK
++#define SYSFS_FLAG_REMOVED	0x0100
+ 
+ #ifdef CONFIG_SYSFS
+ 
+@@ -85,13 +96,14 @@
+ 		void (*func)(void *), void *data, struct module *owner);
+ 
+ extern int __must_check
+-sysfs_create_dir(struct kobject *, struct dentry *);
++sysfs_create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent_sd);
+ 
+ extern void
+ sysfs_remove_dir(struct kobject *);
+ 
+ extern int __must_check
+-sysfs_rename_dir(struct kobject *, struct dentry *, const char *new_name);
++sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
++		 const char *new_name);
+ 
+ extern int __must_check
+ sysfs_move_dir(struct kobject *, struct kobject *);
+@@ -131,8 +143,8 @@
+ 
+ extern int sysfs_make_shadowed_dir(struct kobject *kobj,
+ 	void * (*follow_link)(struct dentry *, struct nameidata *));
+-extern struct dentry *sysfs_create_shadow_dir(struct kobject *kobj);
+-extern void sysfs_remove_shadow_dir(struct dentry *dir);
++extern struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj);
++extern void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd);
+ 
+ extern int __must_check sysfs_init(void);
+ 
+@@ -144,7 +156,8 @@
+ 	return -ENOSYS;
+ }
+ 
+-static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow)
++static inline int sysfs_create_dir(struct kobject *kobj,
++				   struct sysfs_dirent *shadow_parent_sd)
+ {
+ 	return 0;
+ }
+@@ -154,8 +167,8 @@
+ 	;
+ }
+ 
+-static inline int sysfs_rename_dir(struct kobject * k,
+-					struct dentry *new_parent,
++static inline int sysfs_rename_dir(struct kobject *kobj,
++				   struct sysfs_dirent *new_parent_sd,
+ 					const char *new_name)
+ {
+ 	return 0;
+diff -Nurb linux-2.6.22-570/include/linux/taskstats.h linux-2.6.22-try2/include/linux/taskstats.h
+--- linux-2.6.22-570/include/linux/taskstats.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/taskstats.h	2007-12-19 15:29:24.000000000 -0500
+@@ -31,7 +31,7 @@
+  */
+ 
+ 
+-#define TASKSTATS_VERSION	4
++#define TASKSTATS_VERSION	5
+ #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
+ 					 * in linux/sched.h */
+ 
+@@ -149,6 +149,9 @@
+ 	__u64	read_bytes;		/* bytes of read I/O */
+ 	__u64	write_bytes;		/* bytes of write I/O */
+ 	__u64	cancelled_write_bytes;	/* bytes of cancelled write I/O */
++
++	__u64  nvcsw;			/* voluntary_ctxt_switches */
++	__u64  nivcsw;			/* nonvoluntary_ctxt_switches */
+ };
+ 
+ 
+diff -Nurb linux-2.6.22-570/include/linux/tick.h linux-2.6.22-try2/include/linux/tick.h
+--- linux-2.6.22-570/include/linux/tick.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/tick.h	2007-12-19 15:29:22.000000000 -0500
+@@ -40,6 +40,7 @@
+  * @idle_sleeps:	Number of idle calls, where the sched tick was stopped
+  * @idle_entrytime:	Time when the idle call was entered
+  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
++ * @sleep_length:	Duration of the current idle sleep
+  */
+ struct tick_sched {
+ 	struct hrtimer			sched_timer;
+@@ -52,6 +53,7 @@
+ 	unsigned long			idle_sleeps;
+ 	ktime_t				idle_entrytime;
+ 	ktime_t				idle_sleeptime;
++	ktime_t				sleep_length;
+ 	unsigned long			last_jiffies;
+ 	unsigned long			next_jiffies;
+ 	ktime_t				idle_expires;
+@@ -100,10 +102,18 @@
+ extern void tick_nohz_stop_sched_tick(void);
+ extern void tick_nohz_restart_sched_tick(void);
+ extern void tick_nohz_update_jiffies(void);
++extern ktime_t tick_nohz_get_sleep_length(void);
++extern unsigned long tick_nohz_get_idle_jiffies(void);
+ # else
+ static inline void tick_nohz_stop_sched_tick(void) { }
+ static inline void tick_nohz_restart_sched_tick(void) { }
+ static inline void tick_nohz_update_jiffies(void) { }
++static inline ktime_t tick_nohz_get_sleep_length(void)
++{
++	ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
++
++	return len;
++}
+ # endif /* !NO_HZ */
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/include/linux/union_fs.h linux-2.6.22-try2/include/linux/union_fs.h
+--- linux-2.6.22-570/include/linux/union_fs.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/union_fs.h	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,29 @@
++/*
++ * Copyright (c) 2003-2007 Erez Zadok
++ * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
++ * Copyright (c) 2003-2007 Stony Brook University
++ * Copyright (c) 2003-2007 The Research Foundation of SUNY
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#ifndef _LINUX_UNION_FS_H
++#define _LINUX_UNION_FS_H
++
++#define UNIONFS_VERSION  "2.0"
++/*
++ * DEFINITIONS FOR USER AND KERNEL CODE:
++ */
++# define UNIONFS_IOCTL_INCGEN		_IOR(0x15, 11, int)
++# define UNIONFS_IOCTL_QUERYFILE	_IOR(0x15, 15, int)
++
++/* We don't support normal remount, but unionctl uses it. */
++# define UNIONFS_REMOUNT_MAGIC		0x4a5a4380
++
++/* should be at least LAST_USED_UNIONFS_PERMISSION<<1 */
++#define MAY_NFSRO			16
++
++#endif /* _LINUX_UNIONFS_H */
++
+diff -Nurb linux-2.6.22-570/include/linux/unwind.h linux-2.6.22-try2/include/linux/unwind.h
+--- linux-2.6.22-570/include/linux/unwind.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/unwind.h	2007-12-19 15:29:23.000000000 -0500
+@@ -14,6 +14,63 @@
+ 
+ struct module;
+ 
++#ifdef CONFIG_STACK_UNWIND
++
++#include <asm/unwind.h>
++
++#ifndef ARCH_UNWIND_SECTION_NAME
++#define ARCH_UNWIND_SECTION_NAME ".eh_frame"
++#endif
++
++/*
++ * Initialize unwind support.
++ */
++extern void unwind_init(void);
++extern void unwind_setup(void);
++
++#ifdef CONFIG_MODULES
++
++extern void *unwind_add_table(struct module *,
++                              const void *table_start,
++                              unsigned long table_size);
++
++extern void unwind_remove_table(void *handle, int init_only);
++
++#endif
++
++extern int unwind_init_frame_info(struct unwind_frame_info *,
++                                  struct task_struct *,
++                                  /*const*/ struct pt_regs *);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++extern int unwind_init_blocked(struct unwind_frame_info *,
++                               struct task_struct *);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++extern int unwind_init_running(struct unwind_frame_info *,
++                               asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                          void *arg),
++                               void *arg);
++
++/*
++ * Unwind to previous to frame.  Returns 0 if successful, negative
++ * number in case of an error.
++ */
++extern int unwind(struct unwind_frame_info *);
++
++/*
++ * Unwind until the return pointer is in user-land (or until an error
++ * occurs).  Returns 0 if successful, negative number in case of
++ * error.
++ */
++extern int unwind_to_user(struct unwind_frame_info *);
++
++#else
++
+ struct unwind_frame_info {};
+ 
+ static inline void unwind_init(void) {}
+@@ -28,12 +85,12 @@
+ 	return NULL;
+ }
+ 
++#endif
++
+ static inline void unwind_remove_table(void *handle, int init_only)
+ {
+ }
+ 
+-#endif
+-
+ static inline int unwind_init_frame_info(struct unwind_frame_info *info,
+                                          struct task_struct *tsk,
+                                          const struct pt_regs *regs)
+@@ -65,4 +122,6 @@
+ 	return -ENOSYS;
+ }
+ 
++#endif
++
+ #endif /* _LINUX_UNWIND_H */
+diff -Nurb linux-2.6.22-570/include/linux/user_namespace.h linux-2.6.22-try2/include/linux/user_namespace.h
+--- linux-2.6.22-570/include/linux/user_namespace.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/include/linux/user_namespace.h	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,61 @@
++#ifndef _LINUX_USER_NAMESPACE_H
++#define _LINUX_USER_NAMESPACE_H
++
++#include <linux/kref.h>
++#include <linux/nsproxy.h>
++#include <linux/sched.h>
++#include <linux/err.h>
++
++#define UIDHASH_BITS	(CONFIG_BASE_SMALL ? 3 : 8)
++#define UIDHASH_SZ	(1 << UIDHASH_BITS)
++
++struct user_namespace {
++	struct kref		kref;
++	struct list_head	uidhash_table[UIDHASH_SZ];
++	struct user_struct	*root_user;
++};
++
++extern struct user_namespace init_user_ns;
++
++#ifdef CONFIG_USER_NS
++
++static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
++{
++	if (ns)
++		kref_get(&ns->kref);
++	return ns;
++}
++
++extern struct user_namespace *copy_user_ns(int flags,
++					   struct user_namespace *old_ns);
++extern void free_user_ns(struct kref *kref);
++
++static inline void put_user_ns(struct user_namespace *ns)
++{
++	if (ns)
++		kref_put(&ns->kref, free_user_ns);
++}
++
++#else
++
++static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
++{
++	return &init_user_ns;
++}
++
++static inline struct user_namespace *copy_user_ns(int flags,
++						  struct user_namespace *old_ns)
++{
++	if (flags & CLONE_NEWUSER)
++		return ERR_PTR(-EINVAL);
++
++	return NULL;
++}
++
++static inline void put_user_ns(struct user_namespace *ns)
++{
++}
++
++#endif
++
++#endif /* _LINUX_USER_H */
+diff -Nurb linux-2.6.22-570/include/linux/utsname.h linux-2.6.22-try2/include/linux/utsname.h
+--- linux-2.6.22-570/include/linux/utsname.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/utsname.h	2007-12-19 15:29:24.000000000 -0500
+@@ -48,26 +48,14 @@
+ 	kref_get(&ns->kref);
+ }
+ 
+-#ifdef CONFIG_UTS_NS
+-extern struct uts_namespace *copy_utsname(int flags, struct uts_namespace *ns);
++extern struct uts_namespace *copy_utsname(unsigned long flags,
++					struct uts_namespace *ns);
+ extern void free_uts_ns(struct kref *kref);
+ 
+ static inline void put_uts_ns(struct uts_namespace *ns)
+ {
+ 	kref_put(&ns->kref, free_uts_ns);
+ }
+-#else
+-static inline struct uts_namespace *copy_utsname(int flags,
+-						struct uts_namespace *ns)
+-{
+-	return ns;
+-}
+-
+-static inline void put_uts_ns(struct uts_namespace *ns)
+-{
+-}
+-#endif
+-
+ static inline struct new_utsname *utsname(void)
+ {
+ 	return &current->nsproxy->uts_ns->name;
+diff -Nurb linux-2.6.22-570/include/linux/vmalloc.h linux-2.6.22-try2/include/linux/vmalloc.h
+--- linux-2.6.22-570/include/linux/vmalloc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/linux/vmalloc.h	2007-12-19 15:29:22.000000000 -0500
+@@ -65,9 +65,10 @@
+ 					  unsigned long flags, int node,
+ 					  gfp_t gfp_mask);
+ extern struct vm_struct *remove_vm_area(void *addr);
++
+ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
+ 			struct page ***pages);
+-extern void unmap_vm_area(struct vm_struct *area);
++extern void unmap_kernel_range(unsigned long addr, unsigned long size);
+ 
+ /*
+  *	Internals.  Dont't use..
+diff -Nurb linux-2.6.22-570/include/net/addrconf.h linux-2.6.22-try2/include/net/addrconf.h
+--- linux-2.6.22-570/include/net/addrconf.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/net/addrconf.h	2007-12-19 15:29:23.000000000 -0500
+@@ -61,7 +61,7 @@
+ extern int			ipv6_chk_addr(struct in6_addr *addr,
+ 					      struct net_device *dev,
+ 					      int strict);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ extern int			ipv6_chk_home_addr(struct in6_addr *addr);
+ #endif
+ extern struct inet6_ifaddr *	ipv6_get_ifaddr(struct in6_addr *addr,
+diff -Nurb linux-2.6.22-570/include/net/dst.h linux-2.6.22-try2/include/net/dst.h
+--- linux-2.6.22-570/include/net/dst.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/dst.h	2007-12-19 15:29:23.000000000 -0500
+@@ -47,7 +47,6 @@
+ #define DST_NOXFRM		2
+ #define DST_NOPOLICY		4
+ #define DST_NOHASH		8
+-#define DST_BALANCED            0x10
+ 	unsigned long		expires;
+ 
+ 	unsigned short		header_len;	/* more space at head required */
+diff -Nurb linux-2.6.22-570/include/net/flow.h linux-2.6.22-try2/include/net/flow.h
+--- linux-2.6.22-570/include/net/flow.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/flow.h	2007-12-19 15:29:23.000000000 -0500
+@@ -67,20 +67,16 @@
+ 
+ 		__be32		spi;
+ 
+-#ifdef CONFIG_IPV6_MIP6
+ 		struct {
+ 			__u8	type;
+ 		} mht;
+-#endif
+ 	} uli_u;
+ #define fl_ip_sport	uli_u.ports.sport
+ #define fl_ip_dport	uli_u.ports.dport
+ #define fl_icmp_type	uli_u.icmpt.type
+ #define fl_icmp_code	uli_u.icmpt.code
+ #define fl_ipsec_spi	uli_u.spi
+-#ifdef CONFIG_IPV6_MIP6
+ #define fl_mh_type	uli_u.mht.type
+-#endif
+ 	__u32           secid;	/* used by xfrm; see secid.txt */
+ } __attribute__((__aligned__(BITS_PER_LONG/8)));
+ 
+diff -Nurb linux-2.6.22-570/include/net/ip_fib.h linux-2.6.22-try2/include/net/ip_fib.h
+--- linux-2.6.22-570/include/net/ip_fib.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/ip_fib.h	2007-12-19 15:29:23.000000000 -0500
+@@ -39,7 +39,6 @@
+ 	int			fc_mx_len;
+ 	int			fc_mp_len;
+ 	u32			fc_flow;
+-	u32			fc_mp_alg;
+ 	u32			fc_nlflags;
+ 	struct nl_info		fc_nlinfo;
+  };
+@@ -86,9 +85,6 @@
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ 	int			fib_power;
+ #endif
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	u32			fib_mp_alg;
+-#endif
+ 	struct fib_nh		fib_nh[0];
+ #define fib_dev		fib_nh[0].nh_dev
+ };
+@@ -103,10 +99,6 @@
+ 	unsigned char	nh_sel;
+ 	unsigned char	type;
+ 	unsigned char	scope;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	__be32          network;
+-	__be32          netmask;
+-#endif
+ 	struct fib_info *fi;
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ 	struct fib_rule	*r;
+@@ -145,14 +137,6 @@
+ #define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
+ #define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
+ 
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-#define FIB_RES_NETWORK(res)		((res).network)
+-#define FIB_RES_NETMASK(res)	        ((res).netmask)
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-#define FIB_RES_NETWORK(res)		(0)
+-#define FIB_RES_NETMASK(res)	        (0)
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */
+-
+ struct fib_table {
+ 	struct hlist_node tb_hlist;
+ 	u32		tb_id;
+diff -Nurb linux-2.6.22-570/include/net/ip_mp_alg.h linux-2.6.22-try2/include/net/ip_mp_alg.h
+--- linux-2.6.22-570/include/net/ip_mp_alg.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/ip_mp_alg.h	1969-12-31 19:00:00.000000000 -0500
+@@ -1,96 +0,0 @@
+-/* ip_mp_alg.h: IPV4 multipath algorithm support.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#ifndef _NET_IP_MP_ALG_H
+-#define _NET_IP_MP_ALG_H
+-
+-#include <linux/ip_mp_alg.h>
+-#include <net/flow.h>
+-#include <net/route.h>
+-
+-struct fib_nh;
+-
+-struct ip_mp_alg_ops {
+-	void	(*mp_alg_select_route)(const struct flowi *flp,
+-				       struct rtable *rth, struct rtable **rp);
+-	void	(*mp_alg_flush)(void);
+-	void	(*mp_alg_set_nhinfo)(__be32 network, __be32 netmask,
+-				     unsigned char prefixlen,
+-				     const struct fib_nh *nh);
+-	void	(*mp_alg_remove)(struct rtable *rth);
+-};
+-
+-extern int multipath_alg_register(struct ip_mp_alg_ops *, enum ip_mp_alg);
+-extern void multipath_alg_unregister(struct ip_mp_alg_ops *, enum ip_mp_alg);
+-
+-extern struct ip_mp_alg_ops *ip_mp_alg_table[];
+-
+-static inline int multipath_select_route(const struct flowi *flp,
+-					 struct rtable *rth,
+-					 struct rtable **rp)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+-	/* mp_alg_select_route _MUST_ be implemented */
+-	if (ops && (rth->u.dst.flags & DST_BALANCED)) {
+-		ops->mp_alg_select_route(flp, rth, rp);
+-		return 1;
+-	}
+-#endif
+-	return 0;
+-}
+-
+-static inline void multipath_flush(void)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	int i;
+-
+-	for (i = IP_MP_ALG_NONE; i <= IP_MP_ALG_MAX; i++) {
+-		struct ip_mp_alg_ops *ops = ip_mp_alg_table[i];
+-
+-		if (ops && ops->mp_alg_flush)
+-			ops->mp_alg_flush();
+-	}
+-#endif
+-}
+-
+-static inline void multipath_set_nhinfo(struct rtable *rth,
+-					__be32 network, __be32 netmask,
+-					unsigned char prefixlen,
+-					const struct fib_nh *nh)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+-	if (ops && ops->mp_alg_set_nhinfo)
+-		ops->mp_alg_set_nhinfo(network, netmask, prefixlen, nh);
+-#endif
+-}
+-
+-static inline void multipath_remove(struct rtable *rth)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	struct ip_mp_alg_ops *ops = ip_mp_alg_table[rth->rt_multipath_alg];
+-
+-	if (ops && ops->mp_alg_remove &&
+-	    (rth->u.dst.flags & DST_BALANCED))
+-		ops->mp_alg_remove(rth);
+-#endif
+-}
+-
+-static inline int multipath_comparekeys(const struct flowi *flp1,
+-					const struct flowi *flp2)
+-{
+-	return flp1->fl4_dst == flp2->fl4_dst &&
+-		flp1->fl4_src == flp2->fl4_src &&
+-		flp1->oif == flp2->oif &&
+-		flp1->mark == flp2->mark &&
+-		!((flp1->fl4_tos ^ flp2->fl4_tos) &
+-		  (IPTOS_RT_MASK | RTO_ONLINK));
+-}
+-
+-#endif /* _NET_IP_MP_ALG_H */
+diff -Nurb linux-2.6.22-570/include/net/mip6.h linux-2.6.22-try2/include/net/mip6.h
+--- linux-2.6.22-570/include/net/mip6.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/mip6.h	2007-12-19 15:29:23.000000000 -0500
+@@ -54,8 +54,4 @@
+ #define IP6_MH_TYPE_BERROR	7   /* Binding Error */
+ #define IP6_MH_TYPE_MAX		IP6_MH_TYPE_BERROR
+ 
+-extern int mip6_init(void);
+-extern void mip6_fini(void);
+-extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb);
+-
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/netlink.h linux-2.6.22-try2/include/net/netlink.h
+--- linux-2.6.22-570/include/net/netlink.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/netlink.h	2007-12-19 15:29:23.000000000 -0500
+@@ -118,6 +118,9 @@
+  * Nested Attributes Construction:
+  *   nla_nest_start(skb, type)		start a nested attribute
+  *   nla_nest_end(skb, nla)		finalize a nested attribute
++ *   nla_nest_compat_start(skb, type,	start a nested compat attribute
++ *			   len, data)
++ *   nla_nest_compat_end(skb, type)	finalize a nested compat attribute
+  *   nla_nest_cancel(skb, nla)		cancel nested attribute construction
+  *
+  * Attribute Length Calculations:
+@@ -152,6 +155,7 @@
+  *   nla_find_nested()			find attribute in nested attributes
+  *   nla_parse()			parse and validate stream of attrs
+  *   nla_parse_nested()			parse nested attribuets
++ *   nla_parse_nested_compat()		parse nested compat attributes
+  *   nla_for_each_attr()		loop over all attributes
+  *   nla_for_each_nested()		loop over the nested attributes
+  *=========================================================================
+@@ -170,6 +174,7 @@
+ 	NLA_FLAG,
+ 	NLA_MSECS,
+ 	NLA_NESTED,
++	NLA_NESTED_COMPAT,
+ 	NLA_NUL_STRING,
+ 	NLA_BINARY,
+ 	__NLA_TYPE_MAX,
+@@ -190,6 +195,7 @@
+  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
+  *    NLA_FLAG             Unused
+  *    NLA_BINARY           Maximum length of attribute payload
++ *    NLA_NESTED_COMPAT    Exact length of structure payload
+  *    All other            Exact length of attribute payload
+  *
+  * Example:
+@@ -733,6 +739,39 @@
+ {
+ 	return nla_parse(tb, maxtype, nla_data(nla), nla_len(nla), policy);
+ }
++
++/**
++ * nla_parse_nested_compat - parse nested compat attributes
++ * @tb: destination array with maxtype+1 elements
++ * @maxtype: maximum attribute type to be expected
++ * @nla: attribute containing the nested attributes
++ * @data: pointer to point to contained structure
++ * @len: length of contained structure
++ * @policy: validation policy
++ *
++ * Parse a nested compat attribute. The compat attribute contains a structure
++ * and optionally a set of nested attributes. On success the data pointer
++ * points to the nested data and tb contains the parsed attributes
++ * (see nla_parse).
++ */
++static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
++					    struct nlattr *nla,
++					    const struct nla_policy *policy,
++					    int len)
++{
++	if (nla_len(nla) < len)
++		return -1;
++	if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
++		return nla_parse_nested(tb, maxtype,
++					nla_data(nla) + NLA_ALIGN(len),
++					policy);
++	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
++	return 0;
++}
++
++#define nla_parse_nested_compat(tb, maxtype, nla, policy, data, len) \
++({	data = nla_len(nla) >= len ? nla_data(nla) : NULL; \
++	__nla_parse_nested_compat(tb, maxtype, nla, policy, len); })
+ /**
+  * nla_put_u8 - Add a u16 netlink attribute to a socket buffer
+  * @skb: socket buffer to add attribute to
+@@ -965,6 +1004,51 @@
+ }
+ 
+ /**
++ * nla_nest_compat_start - Start a new level of nested compat attributes
++ * @skb: socket buffer to add attributes to
++ * @attrtype: attribute type of container
++ * @attrlen: length of structure
++ * @data: pointer to structure
++ *
++ * Start a nested compat attribute that contains both a structure and
++ * a set of nested attributes.
++ *
++ * Returns the container attribute
++ */
++static inline struct nlattr *nla_nest_compat_start(struct sk_buff *skb,
++						   int attrtype, int attrlen,
++						   const void *data)
++{
++	struct nlattr *start = (struct nlattr *)skb_tail_pointer(skb);
++
++	if (nla_put(skb, attrtype, attrlen, data) < 0)
++		return NULL;
++	if (nla_nest_start(skb, attrtype) == NULL) {
++		nlmsg_trim(skb, start);
++		return NULL;
++	}
++	return start;
++}
++
++/**
++ * nla_nest_compat_end - Finalize nesting of compat attributes
++ * @skb: socket buffer the attribtues are stored in
++ * @start: container attribute
++ *
++ * Corrects the container attribute header to include the all
++ * appeneded attributes.
++ *
++ * Returns the total data length of the skb.
++ */
++static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
++{
++	struct nlattr *nest = (void *)start + NLMSG_ALIGN(start->nla_len);
++
++	start->nla_len = skb_tail_pointer(skb) - (unsigned char *)start;
++	return nla_nest_end(skb, nest);
++}
++
++/**
+  * nla_nest_cancel - Cancel nesting of attributes
+  * @skb: socket buffer the message is stored in
+  * @start: container attribute
+diff -Nurb linux-2.6.22-570/include/net/rawv6.h linux-2.6.22-try2/include/net/rawv6.h
+--- linux-2.6.22-570/include/net/rawv6.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/rawv6.h	2007-12-19 15:29:23.000000000 -0500
+@@ -3,6 +3,8 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <net/protocol.h>
++
+ #define RAWV6_HTABLE_SIZE	MAX_INET_PROTOS
+ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
+ extern rwlock_t raw_v6_lock;
+@@ -23,6 +25,13 @@
+ 					  int type, int code, 
+ 					  int offset, __be32 info);
+ 
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
++int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
++					   struct sk_buff *skb));
++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
++					     struct sk_buff *skb));
++#endif
++
+ #endif
+ 
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/route.h linux-2.6.22-try2/include/net/route.h
+--- linux-2.6.22-570/include/net/route.h	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/include/net/route.h	2007-12-19 15:29:23.000000000 -0500
+@@ -66,7 +66,6 @@
+ 	
+ 	unsigned		rt_flags;
+ 	__u16			rt_type;
+-	__u16			rt_multipath_alg;
+ 
+ 	__be32			rt_dst;	/* Path destination	*/
+ 	__be32			rt_src;	/* Path source		*/
+diff -Nurb linux-2.6.22-570/include/net/rtnetlink.h linux-2.6.22-try2/include/net/rtnetlink.h
+--- linux-2.6.22-570/include/net/rtnetlink.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/rtnetlink.h	2007-12-19 15:29:23.000000000 -0500
+@@ -22,4 +22,62 @@
+ 		return AF_UNSPEC;
+ }
+ 
++/**
++ *	struct rtnl_link_ops - rtnetlink link operations
++ *
++ *	@list: Used internally
++ *	@kind: Identifier
++ *	@maxtype: Highest device specific netlink attribute number
++ *	@policy: Netlink policy for device specific attribute validation
++ *	@validate: Optional validation function for netlink/changelink parameters
++ *	@priv_size: sizeof net_device private space
++ *	@setup: net_device setup function
++ *	@newlink: Function for configuring and registering a new device
++ *	@changelink: Function for changing parameters of an existing device
++ *	@dellink: Function to remove a device
++ *	@get_size: Function to calculate required room for dumping device
++ *		   specific netlink attributes
++ *	@fill_info: Function to dump device specific netlink attributes
++ *	@get_xstats_size: Function to calculate required room for dumping devic
++ *			  specific statistics
++ *	@fill_xstats: Function to dump device specific statistics
++ */
++struct rtnl_link_ops {
++	struct list_head	list;
++
++	const char		*kind;
++
++	size_t			priv_size;
++	void			(*setup)(struct net_device *dev);
++
++	int			maxtype;
++	const struct nla_policy	*policy;
++	int			(*validate)(struct nlattr *tb[],
++					    struct nlattr *data[]);
++
++	int			(*newlink)(struct net_device *dev,
++					   struct nlattr *tb[],
++					   struct nlattr *data[]);
++	int			(*changelink)(struct net_device *dev,
++					      struct nlattr *tb[],
++					      struct nlattr *data[]);
++	void			(*dellink)(struct net_device *dev);
++
++	size_t			(*get_size)(const struct net_device *dev);
++	int			(*fill_info)(struct sk_buff *skb,
++					     const struct net_device *dev);
++
++	size_t			(*get_xstats_size)(const struct net_device *dev);
++	int			(*fill_xstats)(struct sk_buff *skb,
++					       const struct net_device *dev);
++};
++
++extern int	__rtnl_link_register(struct rtnl_link_ops *ops);
++extern void	__rtnl_link_unregister(struct rtnl_link_ops *ops);
++
++extern int	rtnl_link_register(struct rtnl_link_ops *ops);
++extern void	rtnl_link_unregister(struct rtnl_link_ops *ops);
++
++#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
++
+ #endif
+diff -Nurb linux-2.6.22-570/include/net/tipc/tipc_port.h linux-2.6.22-try2/include/net/tipc/tipc_port.h
+--- linux-2.6.22-570/include/net/tipc/tipc_port.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/net/tipc/tipc_port.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * include/net/tipc/tipc_port.h: Include file for privileged access to TIPC ports
+  * 
+- * Copyright (c) 1994-2006, Ericsson AB
+- * Copyright (c) 2005, Wind River Systems
++ * Copyright (c) 1994-2007, Ericsson AB
++ * Copyright (c) 2005-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -55,6 +55,7 @@
+  * @conn_unacked: number of unacknowledged messages received from peer port
+  * @published: non-zero if port has one or more associated names
+  * @congested: non-zero if cannot send because of link or port congestion
++ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+  * @ref: unique reference to port in TIPC object registry
+  * @phdr: preformatted message header used when sending messages
+  */
+@@ -68,6 +69,7 @@
+ 	u32 conn_unacked;
+ 	int published;
+ 	u32 congested;
++	u32 max_pkt;
+ 	u32 ref;
+ 	struct tipc_msg phdr;
+ };
+diff -Nurb linux-2.6.22-570/include/net/xfrm.h linux-2.6.22-try2/include/net/xfrm.h
+--- linux-2.6.22-570/include/net/xfrm.h	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/include/net/xfrm.h	2007-12-19 15:29:23.000000000 -0500
+@@ -19,9 +19,19 @@
+ #include <net/ipv6.h>
+ #include <net/ip6_fib.h>
+ 
++#define XFRM_PROTO_ESP		50
++#define XFRM_PROTO_AH		51
++#define XFRM_PROTO_COMP		108
++#define XFRM_PROTO_IPIP		4
++#define XFRM_PROTO_IPV6		41
++#define XFRM_PROTO_ROUTING	IPPROTO_ROUTING
++#define XFRM_PROTO_DSTOPTS	IPPROTO_DSTOPTS
++
+ #define XFRM_ALIGN8(len)	(((len) + 7) & ~7)
+ #define MODULE_ALIAS_XFRM_MODE(family, encap) \
+ 	MODULE_ALIAS("xfrm-mode-" __stringify(family) "-" __stringify(encap))
++#define MODULE_ALIAS_XFRM_TYPE(family, proto) \
++	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
+ 
+ extern struct sock *xfrm_nl;
+ extern u32 sysctl_xfrm_aevent_etime;
+@@ -509,11 +519,9 @@
+ 	case IPPROTO_ICMPV6:
+ 		port = htons(fl->fl_icmp_type);
+ 		break;
+-#ifdef CONFIG_IPV6_MIP6
+ 	case IPPROTO_MH:
+ 		port = htons(fl->fl_mh_type);
+ 		break;
+-#endif
+ 	default:
+ 		port = 0;	/*XXX*/
+ 	}
+diff -Nurb linux-2.6.22-570/include/scsi/iscsi_if.h linux-2.6.22-try2/include/scsi/iscsi_if.h
+--- linux-2.6.22-570/include/scsi/iscsi_if.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/iscsi_if.h	2007-12-19 15:29:23.000000000 -0500
+@@ -48,6 +48,7 @@
+ 	ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT	= UEVENT_BASE + 14,
+ 
+ 	ISCSI_UEVENT_TGT_DSCVR		= UEVENT_BASE + 15,
++	ISCSI_UEVENT_SET_HOST_PARAM	= UEVENT_BASE + 16,
+ 
+ 	/* up events */
+ 	ISCSI_KEVENT_RECV_PDU		= KEVENT_BASE + 1,
+@@ -71,6 +72,8 @@
+ 		/* messages u -> k */
+ 		struct msg_create_session {
+ 			uint32_t	initial_cmdsn;
++			uint16_t	cmds_max;
++			uint16_t	queue_depth;
+ 		} c_session;
+ 		struct msg_destroy_session {
+ 			uint32_t	sid;
+@@ -136,6 +139,11 @@
+ 			 */
+ 			uint32_t	enable;
+ 		} tgt_dscvr;
++		struct msg_set_host_param {
++			uint32_t	host_no;
++			uint32_t	param; /* enum iscsi_host_param */
++			uint32_t	len;
++		} set_host_param;
+ 	} u;
+ 	union {
+ 		/* messages k -> u */
+@@ -223,6 +231,11 @@
+ 	ISCSI_PARAM_CONN_PORT,
+ 	ISCSI_PARAM_CONN_ADDRESS,
+ 
++	ISCSI_PARAM_USERNAME,
++	ISCSI_PARAM_USERNAME_IN,
++	ISCSI_PARAM_PASSWORD,
++	ISCSI_PARAM_PASSWORD_IN,
++
+ 	/* must always be last */
+ 	ISCSI_PARAM_MAX,
+ };
+@@ -249,6 +262,24 @@
+ #define ISCSI_SESS_RECOVERY_TMO		(1 << ISCSI_PARAM_SESS_RECOVERY_TMO)
+ #define ISCSI_CONN_PORT			(1 << ISCSI_PARAM_CONN_PORT)
+ #define ISCSI_CONN_ADDRESS		(1 << ISCSI_PARAM_CONN_ADDRESS)
++#define ISCSI_USERNAME			(1 << ISCSI_PARAM_USERNAME)
++#define ISCSI_USERNAME_IN		(1 << ISCSI_PARAM_USERNAME_IN)
++#define ISCSI_PASSWORD			(1 << ISCSI_PARAM_PASSWORD)
++#define ISCSI_PASSWORD_IN		(1 << ISCSI_PARAM_PASSWORD_IN)
++
++/* iSCSI HBA params */
++enum iscsi_host_param {
++	ISCSI_HOST_PARAM_HWADDRESS,
++	ISCSI_HOST_PARAM_INITIATOR_NAME,
++	ISCSI_HOST_PARAM_NETDEV_NAME,
++	ISCSI_HOST_PARAM_IPADDRESS,
++	ISCSI_HOST_PARAM_MAX,
++};
++
++#define ISCSI_HOST_HWADDRESS		(1 << ISCSI_HOST_PARAM_HWADDRESS)
++#define ISCSI_HOST_INITIATOR_NAME	(1 << ISCSI_HOST_PARAM_INITIATOR_NAME)
++#define ISCSI_HOST_NETDEV_NAME		(1 << ISCSI_HOST_PARAM_NETDEV_NAME)
++#define ISCSI_HOST_IPADDRESS		(1 << ISCSI_HOST_PARAM_IPADDRESS)
+ 
+ #define iscsi_ptr(_handle) ((void*)(unsigned long)_handle)
+ #define iscsi_handle(_ptr) ((uint64_t)(unsigned long)_ptr)
+@@ -272,6 +303,9 @@
+ #define CAP_MULTI_CONN		0x40
+ #define CAP_TEXT_NEGO		0x80
+ #define CAP_MARKERS		0x100
++#define CAP_FW_DB		0x200
++#define CAP_SENDTARGETS_OFFLOAD	0x400
++#define CAP_DATA_PATH_OFFLOAD	0x800
+ 
+ /*
+  * These flags describes reason of stop_conn() call
+diff -Nurb linux-2.6.22-570/include/scsi/libiscsi.h linux-2.6.22-try2/include/scsi/libiscsi.h
+--- linux-2.6.22-570/include/scsi/libiscsi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/libiscsi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -48,9 +48,8 @@
+ #define debug_scsi(fmt...)
+ #endif
+ 
+-#define ISCSI_XMIT_CMDS_MAX	128	/* must be power of 2 */
+-#define ISCSI_MGMT_CMDS_MAX	32	/* must be power of 2 */
+-#define ISCSI_CONN_MAX			1
++#define ISCSI_DEF_XMIT_CMDS_MAX	128	/* must be power of 2 */
++#define ISCSI_MGMT_CMDS_MAX	16	/* must be power of 2 */
+ 
+ #define ISCSI_MGMT_ITT_OFFSET	0xa00
+ 
+@@ -73,6 +72,8 @@
+ #define ISCSI_AGE_SHIFT			28
+ #define ISCSI_AGE_MASK			(0xf << ISCSI_AGE_SHIFT)
+ 
++#define ISCSI_ADDRESS_BUF_LEN		64
++
+ struct iscsi_mgmt_task {
+ 	/*
+ 	 * Becuae LLDs allocate their hdr differently, this is a pointer to
+@@ -80,7 +81,7 @@
+ 	 */
+ 	struct iscsi_hdr	*hdr;
+ 	char			*data;		/* mgmt payload */
+-	int			data_count;	/* counts data to be sent */
++	unsigned		data_count;	/* counts data to be sent */
+ 	uint32_t		itt;		/* this ITT */
+ 	void			*dd_data;	/* driver/transport data */
+ 	struct list_head	running;
+@@ -90,6 +91,7 @@
+ 	ISCSI_TASK_COMPLETED,
+ 	ISCSI_TASK_PENDING,
+ 	ISCSI_TASK_RUNNING,
++	ISCSI_TASK_ABORTING,
+ };
+ 
+ struct iscsi_cmd_task {
+@@ -99,16 +101,14 @@
+ 	 */
+ 	struct iscsi_cmd	*hdr;
+ 	int			itt;		/* this ITT */
+-	int			datasn;		/* DataSN */
+ 
+ 	uint32_t		unsol_datasn;
+-	int			imm_count;	/* imm-data (bytes)   */
+-	int			unsol_count;	/* unsolicited (bytes)*/
++	unsigned		imm_count;	/* imm-data (bytes)   */
++	unsigned		unsol_count;	/* unsolicited (bytes)*/
+ 	/* offset in unsolicited stream (bytes); */
+-	int			unsol_offset;
+-	int			data_count;	/* remaining Data-Out */
++	unsigned		unsol_offset;
++	unsigned		data_count;	/* remaining Data-Out */
+ 	struct scsi_cmnd	*sc;		/* associated SCSI cmd*/
+-	int			total_length;
+ 	struct iscsi_conn	*conn;		/* used connection    */
+ 	struct iscsi_mgmt_task	*mtask;		/* tmf mtask in progr */
+ 
+@@ -152,18 +152,11 @@
+ 	struct iscsi_cmd_task	*ctask;		/* xmit ctask in progress */
+ 
+ 	/* xmit */
+-	struct kfifo		*immqueue;	/* immediate xmit queue */
+ 	struct kfifo		*mgmtqueue;	/* mgmt (control) xmit queue */
+ 	struct list_head	mgmt_run_list;	/* list of control tasks */
+ 	struct list_head	xmitqueue;	/* data-path cmd queue */
+ 	struct list_head	run_list;	/* list of cmds in progress */
+ 	struct work_struct	xmitwork;	/* per-conn. xmit workqueue */
+-	/*
+-	 * serializes connection xmit, access to kfifos:
+-	 * xmitqueue, immqueue, mgmtqueue
+-	 */
+-	struct mutex		xmitmutex;
+-
+ 	unsigned long		suspend_tx;	/* suspend Tx */
+ 	unsigned long		suspend_rx;	/* suspend Rx */
+ 
+@@ -174,8 +167,8 @@
+ 	int			tmabort_state;	/* see TMABORT_INITIAL, etc.*/
+ 
+ 	/* negotiated params */
+-	int			max_recv_dlength; /* initiator_max_recv_dsl*/
+-	int			max_xmit_dlength; /* target_max_recv_dsl */
++	unsigned		max_recv_dlength; /* initiator_max_recv_dsl*/
++	unsigned		max_xmit_dlength; /* target_max_recv_dsl */
+ 	int			hdrdgst_en;
+ 	int			datadgst_en;
+ 	int			ifmarker_en;
+@@ -183,6 +176,12 @@
+ 	/* values userspace uses to id a conn */
+ 	int			persistent_port;
+ 	char			*persistent_address;
++	/* remote portal currently connected to */
++	int			portal_port;
++	char			portal_address[ISCSI_ADDRESS_BUF_LEN];
++	/* local address */
++	int			local_port;
++	char			local_address[ISCSI_ADDRESS_BUF_LEN];
+ 
+ 	/* MIB-statistics */
+ 	uint64_t		txdata_octets;
+@@ -213,18 +212,25 @@
+ 
+ 	/* configuration */
+ 	int			initial_r2t_en;
+-	int			max_r2t;
++	unsigned		max_r2t;
+ 	int			imm_data_en;
+-	int			first_burst;
+-	int			max_burst;
++	unsigned		first_burst;
++	unsigned		max_burst;
+ 	int			time2wait;
+ 	int			time2retain;
+ 	int			pdu_inorder_en;
+ 	int			dataseq_inorder_en;
+ 	int			erl;
+ 	int			tpgt;
++	char			*username;
++	char			*username_in;
++	char			*password;
++	char			*password_in;
+ 	char			*targetname;
+-
++	char			*initiatorname;
++	/* hw address or netdev iscsi connection is bound to */
++	char			*hwaddress;
++	char			*netdev;
+ 	/* control data */
+ 	struct iscsi_transport	*tt;
+ 	struct Scsi_Host	*host;
+@@ -255,12 +261,22 @@
+ extern int iscsi_queuecommand(struct scsi_cmnd *sc,
+ 			      void (*done)(struct scsi_cmnd *));
+ 
++
++/*
++ * iSCSI host helpers.
++ */
++extern int iscsi_host_set_param(struct Scsi_Host *shost,
++				enum iscsi_host_param param, char *buf,
++				int buflen);
++extern int iscsi_host_get_param(struct Scsi_Host *shost,
++				enum iscsi_host_param param, char *buf);
++
+ /*
+  * session management
+  */
+ extern struct iscsi_cls_session *
+ iscsi_session_setup(struct iscsi_transport *, struct scsi_transport_template *,
+-		    int, int, uint32_t, uint32_t *);
++		    uint16_t, uint16_t, int, int, uint32_t, uint32_t *);
+ extern void iscsi_session_teardown(struct iscsi_cls_session *);
+ extern struct iscsi_session *class_to_transport_session(struct iscsi_cls_session *);
+ extern void iscsi_session_recovery_timedout(struct iscsi_cls_session *);
+@@ -289,8 +305,7 @@
+ /*
+  * pdu and task processing
+  */
+-extern int iscsi_check_assign_cmdsn(struct iscsi_session *,
+-				    struct iscsi_nopin *);
++extern void iscsi_update_cmdsn(struct iscsi_session *, struct iscsi_nopin *);
+ extern void iscsi_prep_unsolicit_data_pdu(struct iscsi_cmd_task *,
+ 					struct iscsi_data *hdr);
+ extern int iscsi_conn_send_pdu(struct iscsi_cls_conn *, struct iscsi_hdr *,
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_cmnd.h linux-2.6.22-try2/include/scsi/scsi_cmnd.h
+--- linux-2.6.22-570/include/scsi/scsi_cmnd.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/scsi_cmnd.h	2007-12-19 15:29:23.000000000 -0500
+@@ -135,4 +135,24 @@
+ extern struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *, gfp_t);
+ extern void scsi_free_sgtable(struct scatterlist *, int);
+ 
++extern int scsi_dma_map(struct scsi_cmnd *cmd);
++extern void scsi_dma_unmap(struct scsi_cmnd *cmd);
++
++#define scsi_sg_count(cmd) ((cmd)->use_sg)
++#define scsi_sglist(cmd) ((struct scatterlist *)(cmd)->request_buffer)
++#define scsi_bufflen(cmd) ((cmd)->request_bufflen)
++
++static inline void scsi_set_resid(struct scsi_cmnd *cmd, int resid)
++{
++	cmd->resid = resid;
++}
++
++static inline int scsi_get_resid(struct scsi_cmnd *cmd)
++{
++	return cmd->resid;
++}
++
++#define scsi_for_each_sg(cmd, sg, nseg, __i)			\
++	for (__i = 0, sg = scsi_sglist(cmd); __i < (nseg); __i++, (sg)++)
++
+ #endif /* _SCSI_SCSI_CMND_H */
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_device.h linux-2.6.22-try2/include/scsi/scsi_device.h
+--- linux-2.6.22-570/include/scsi/scsi_device.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/scsi_device.h	2007-12-19 15:29:23.000000000 -0500
+@@ -287,6 +287,7 @@
+ extern void scsi_target_unblock(struct device *);
+ extern void scsi_remove_target(struct device *);
+ extern void int_to_scsilun(unsigned int, struct scsi_lun *);
++extern int scsilun_to_int(struct scsi_lun *);
+ extern const char *scsi_device_state_name(enum scsi_device_state);
+ extern int scsi_is_sdev_device(const struct device *);
+ extern int scsi_is_target_device(const struct device *);
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_host.h linux-2.6.22-try2/include/scsi/scsi_host.h
+--- linux-2.6.22-570/include/scsi/scsi_host.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/scsi_host.h	2007-12-19 15:29:23.000000000 -0500
+@@ -339,12 +339,6 @@
+ 	enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *);
+ 
+ 	/*
+-	 * suspend support
+-	 */
+-	int (*resume)(struct scsi_device *);
+-	int (*suspend)(struct scsi_device *, pm_message_t state);
+-
+-	/*
+ 	 * Name of proc directory
+ 	 */
+ 	char *proc_name;
+@@ -677,6 +671,10 @@
+ #define shost_printk(prefix, shost, fmt, a...)	\
+ 	dev_printk(prefix, &(shost)->shost_gendev, fmt, ##a)
+ 
++static inline void *shost_priv(struct Scsi_Host *shost)
++{
++	return (void *)shost->hostdata;
++}
+ 
+ int scsi_is_host_device(const struct device *);
+ 
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_fc.h linux-2.6.22-try2/include/scsi/scsi_transport_fc.h
+--- linux-2.6.22-570/include/scsi/scsi_transport_fc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/scsi_transport_fc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -19,7 +19,7 @@
+  *
+  *  ========
+  *
+- *  Copyright (C) 2004-2005   James Smart, Emulex Corporation
++ *  Copyright (C) 2004-2007   James Smart, Emulex Corporation
+  *    Rewrite for host, target, device, and remote port attributes,
+  *    statistics, and service functions...
+  *
+@@ -62,8 +62,10 @@
+ 	FC_PORTTYPE_NLPORT,		/* (Public) Loop w/ FLPort */
+ 	FC_PORTTYPE_LPORT,		/* (Private) Loop w/o FLPort */
+ 	FC_PORTTYPE_PTP,		/* Point to Point w/ another NPort */
++	FC_PORTTYPE_NPIV,		/* VPORT based on NPIV */
+ };
+ 
++
+ /*
+  * fc_port_state: If you alter this, you also need to alter scsi_transport_fc.c
+  * (for the ascii descriptions).
+@@ -84,6 +86,25 @@
+ 
+ 
+ /* 
++ * fc_vport_state: If you alter this, you also need to alter
++ * scsi_transport_fc.c (for the ascii descriptions).
++ */
++enum fc_vport_state {
++	FC_VPORT_UNKNOWN,
++	FC_VPORT_ACTIVE,
++	FC_VPORT_DISABLED,
++	FC_VPORT_LINKDOWN,
++	FC_VPORT_INITIALIZING,
++	FC_VPORT_NO_FABRIC_SUPP,
++	FC_VPORT_NO_FABRIC_RSCS,
++	FC_VPORT_FABRIC_LOGOUT,
++	FC_VPORT_FABRIC_REJ_WWN,
++	FC_VPORT_FAILED,
++};
++
++
++
++/*
+  * FC Classes of Service
+  * Note: values are not enumerated, as they can be "or'd" together
+  * for reporting (e.g. report supported_classes). If you alter this list,
+@@ -124,18 +145,116 @@
+ };
+ 
+ /*
+- * FC Remote Port Roles
++ * FC Port Roles
+  * Note: values are not enumerated, as they can be "or'd" together
+  * for reporting (e.g. report roles). If you alter this list,
+  * you also need to alter scsi_transport_fc.c (for the ascii descriptions).
+  */
+-#define FC_RPORT_ROLE_UNKNOWN			0x00
+-#define FC_RPORT_ROLE_FCP_TARGET		0x01
+-#define FC_RPORT_ROLE_FCP_INITIATOR		0x02
+-#define FC_RPORT_ROLE_IP_PORT			0x04
++#define FC_PORT_ROLE_UNKNOWN			0x00
++#define FC_PORT_ROLE_FCP_TARGET			0x01
++#define FC_PORT_ROLE_FCP_INITIATOR		0x02
++#define FC_PORT_ROLE_IP_PORT			0x04
++
++/* The following are for compatibility */
++#define FC_RPORT_ROLE_UNKNOWN			FC_PORT_ROLE_UNKNOWN
++#define FC_RPORT_ROLE_FCP_TARGET		FC_PORT_ROLE_FCP_TARGET
++#define FC_RPORT_ROLE_FCP_INITIATOR		FC_PORT_ROLE_FCP_INITIATOR
++#define FC_RPORT_ROLE_IP_PORT			FC_PORT_ROLE_IP_PORT
++
++
++/* Macro for use in defining Virtual Port attributes */
++#define FC_VPORT_ATTR(_name,_mode,_show,_store)				\
++struct class_device_attribute class_device_attr_vport_##_name = 	\
++	__ATTR(_name,_mode,_show,_store)
+ 
+ 
+ /*
++ * FC Virtual Port Attributes
++ *
++ * This structure exists for each FC port is a virtual FC port. Virtual
++ * ports share the physical link with the Physical port. Each virtual
++ * ports has a unique presense on the SAN, and may be instantiated via
++ * NPIV, Virtual Fabrics, or via additional ALPAs. As the vport is a
++ * unique presense, each vport has it's own view of the fabric,
++ * authentication priviledge, and priorities.
++ *
++ * A virtual port may support 1 or more FC4 roles. Typically it is a
++ * FCP Initiator. It could be a FCP Target, or exist sole for an IP over FC
++ * roles. FC port attributes for the vport will be reported on any
++ * fc_host class object allocated for an FCP Initiator.
++ *
++ * --
++ *
++ * Fixed attributes are not expected to change. The driver is
++ * expected to set these values after receiving the fc_vport structure
++ * via the vport_create() call from the transport.
++ * The transport fully manages all get functions w/o driver interaction.
++ *
++ * Dynamic attributes are expected to change. The driver participates
++ * in all get/set operations via functions provided by the driver.
++ *
++ * Private attributes are transport-managed values. They are fully
++ * managed by the transport w/o driver interaction.
++ */
++
++#define FC_VPORT_SYMBOLIC_NAMELEN		64
++struct fc_vport {
++	/* Fixed Attributes */
++
++	/* Dynamic Attributes */
++
++	/* Private (Transport-managed) Attributes */
++	enum fc_vport_state vport_state;
++	enum fc_vport_state vport_last_state;
++	u64 node_name;
++	u64 port_name;
++	u32 roles;
++	u32 vport_id;		/* Admin Identifier for the vport */
++	enum fc_port_type vport_type;
++	char symbolic_name[FC_VPORT_SYMBOLIC_NAMELEN];
++
++	/* exported data */
++	void *dd_data;			/* Used for driver-specific storage */
++
++	/* internal data */
++	struct Scsi_Host *shost;	/* Physical Port Parent */
++	unsigned int channel;
++	u32 number;
++	u8 flags;
++	struct list_head peers;
++	struct device dev;
++	struct work_struct vport_delete_work;
++} __attribute__((aligned(sizeof(unsigned long))));
++
++/* bit field values for struct fc_vport "flags" field: */
++#define FC_VPORT_CREATING		0x01
++#define FC_VPORT_DELETING		0x02
++#define FC_VPORT_DELETED		0x04
++#define FC_VPORT_DEL			0x06	/* Any DELETE state */
++
++#define	dev_to_vport(d)				\
++	container_of(d, struct fc_vport, dev)
++#define transport_class_to_vport(classdev)	\
++	dev_to_vport(classdev->dev)
++#define vport_to_shost(v)			\
++	(v->shost)
++#define vport_to_shost_channel(v)		\
++	(v->channel)
++#define vport_to_parent(v)			\
++	(v->dev.parent)
++
++
++/* Error return codes for vport_create() callback */
++#define VPCERR_UNSUPPORTED		-ENOSYS		/* no driver/adapter
++							   support */
++#define VPCERR_BAD_WWN			-ENOTUNIQ	/* driver validation
++							   of WWNs failed */
++#define VPCERR_NO_FABRIC_SUPP		-EOPNOTSUPP	/* Fabric connection
++							   is loop or the
++							   Fabric Port does
++							   not support NPIV */
++
++/*
+  * fc_rport_identifiers: This set of data contains all elements
+  * to uniquely identify a remote FC port. The driver uses this data
+  * to report the existence of a remote FC port in the topology. Internally,
+@@ -149,6 +268,7 @@
+ 	u32 roles;
+ };
+ 
++
+ /* Macro for use in defining Remote Port attributes */
+ #define FC_RPORT_ATTR(_name,_mode,_show,_store)				\
+ struct class_device_attribute class_device_attr_rport_##_name = 	\
+@@ -343,6 +463,7 @@
+ 	u8  supported_fc4s[FC_FC4_LIST_SIZE];
+ 	u32 supported_speeds;
+ 	u32 maxframe_size;
++	u16 max_npiv_vports;
+ 	char serial_number[FC_SERIAL_NUMBER_SIZE];
+ 
+ 	/* Dynamic Attributes */
+@@ -361,8 +482,11 @@
+ 	/* internal data */
+ 	struct list_head rports;
+ 	struct list_head rport_bindings;
++	struct list_head vports;
+ 	u32 next_rport_number;
+ 	u32 next_target_id;
++	u32 next_vport_number;
++	u16 npiv_vports_inuse;
+ 
+ 	/* work queues for rport state manipulation */
+ 	char work_q_name[KOBJ_NAME_LEN];
+@@ -388,6 +512,8 @@
+ 	(((struct fc_host_attrs *)(x)->shost_data)->supported_speeds)
+ #define fc_host_maxframe_size(x)	\
+ 	(((struct fc_host_attrs *)(x)->shost_data)->maxframe_size)
++#define fc_host_max_npiv_vports(x)	\
++	(((struct fc_host_attrs *)(x)->shost_data)->max_npiv_vports)
+ #define fc_host_serial_number(x)	\
+ 	(((struct fc_host_attrs *)(x)->shost_data)->serial_number)
+ #define fc_host_port_id(x)	\
+@@ -412,10 +538,16 @@
+ 	(((struct fc_host_attrs *)(x)->shost_data)->rports)
+ #define fc_host_rport_bindings(x) \
+ 	(((struct fc_host_attrs *)(x)->shost_data)->rport_bindings)
++#define fc_host_vports(x) \
++	(((struct fc_host_attrs *)(x)->shost_data)->vports)
+ #define fc_host_next_rport_number(x) \
+ 	(((struct fc_host_attrs *)(x)->shost_data)->next_rport_number)
+ #define fc_host_next_target_id(x) \
+ 	(((struct fc_host_attrs *)(x)->shost_data)->next_target_id)
++#define fc_host_next_vport_number(x) \
++	(((struct fc_host_attrs *)(x)->shost_data)->next_vport_number)
++#define fc_host_npiv_vports_inuse(x)	\
++	(((struct fc_host_attrs *)(x)->shost_data)->npiv_vports_inuse)
+ #define fc_host_work_q_name(x) \
+ 	(((struct fc_host_attrs *)(x)->shost_data)->work_q_name)
+ #define fc_host_work_q(x) \
+@@ -452,8 +584,14 @@
+ 	void    (*dev_loss_tmo_callbk)(struct fc_rport *);
+ 	void	(*terminate_rport_io)(struct fc_rport *);
+ 
++	void	(*set_vport_symbolic_name)(struct fc_vport *);
++	int  	(*vport_create)(struct fc_vport *, bool);
++	int	(*vport_disable)(struct fc_vport *, bool);
++	int  	(*vport_delete)(struct fc_vport *);
++
+ 	/* allocation lengths for host-specific data */
+ 	u32	 			dd_fcrport_size;
++	u32	 			dd_fcvport_size;
+ 
+ 	/* 
+ 	 * The driver sets these to tell the transport class it
+@@ -512,7 +650,7 @@
+ 
+ 	switch (rport->port_state) {
+ 	case FC_PORTSTATE_ONLINE:
+-		if (rport->roles & FC_RPORT_ROLE_FCP_TARGET)
++		if (rport->roles & FC_PORT_ROLE_FCP_TARGET)
+ 			result = 0;
+ 		else if (rport->flags & FC_RPORT_DEVLOSS_PENDING)
+ 			result = DID_IMM_RETRY << 16;
+@@ -549,6 +687,27 @@
+ 	wwn[7] = inm & 0xff;
+ }
+ 
++/**
++ * fc_vport_set_state() - called to set a vport's state. Saves the old state,
++ *   excepting the transitory states of initializing and sending the ELS
++ *   traffic to instantiate the vport on the link.
++ *
++ * Assumes the driver has surrounded this with the proper locking to ensure
++ * a coherent state change.
++ *
++ * @vport:	virtual port whose state is changing
++ * @new_state:  new state
++ **/
++static inline void
++fc_vport_set_state(struct fc_vport *vport, enum fc_vport_state new_state)
++{
++	if ((new_state != FC_VPORT_UNKNOWN) &&
++	    (new_state != FC_VPORT_INITIALIZING))
++		vport->vport_last_state = vport->vport_state;
++	vport->vport_state = new_state;
++}
++
++
+ struct scsi_transport_template *fc_attach_transport(
+ 			struct fc_function_template *);
+ void fc_release_transport(struct scsi_transport_template *);
+@@ -567,5 +726,6 @@
+ 	 *   be sure to read the Vendor Type and ID formatting requirements
+ 	 *   specified in scsi_netlink.h
+ 	 */
++int fc_vport_terminate(struct fc_vport *vport);
+ 
+ #endif /* SCSI_TRANSPORT_FC_H */
+diff -Nurb linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h linux-2.6.22-try2/include/scsi/scsi_transport_iscsi.h
+--- linux-2.6.22-570/include/scsi/scsi_transport_iscsi.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/include/scsi/scsi_transport_iscsi.h	2007-12-19 15:29:23.000000000 -0500
+@@ -79,7 +79,8 @@
+ 	char *name;
+ 	unsigned int caps;
+ 	/* LLD sets this to indicate what values it can export to sysfs */
+-	unsigned int param_mask;
++	uint64_t param_mask;
++	uint64_t host_param_mask;
+ 	struct scsi_host_template *host_template;
+ 	/* LLD connection data size */
+ 	int conndata_size;
+@@ -89,7 +90,8 @@
+ 	unsigned int max_conn;
+ 	unsigned int max_cmd_len;
+ 	struct iscsi_cls_session *(*create_session) (struct iscsi_transport *it,
+-		struct scsi_transport_template *t, uint32_t sn, uint32_t *hn);
++		struct scsi_transport_template *t, uint16_t, uint16_t,
++		uint32_t sn, uint32_t *hn);
+ 	void (*destroy_session) (struct iscsi_cls_session *session);
+ 	struct iscsi_cls_conn *(*create_conn) (struct iscsi_cls_session *sess,
+ 				uint32_t cid);
+@@ -105,14 +107,18 @@
+ 			       enum iscsi_param param, char *buf);
+ 	int (*get_session_param) (struct iscsi_cls_session *session,
+ 				  enum iscsi_param param, char *buf);
++	int (*get_host_param) (struct Scsi_Host *shost,
++				enum iscsi_host_param param, char *buf);
++	int (*set_host_param) (struct Scsi_Host *shost,
++			       enum iscsi_host_param param, char *buf,
++			       int buflen);
+ 	int (*send_pdu) (struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
+ 			 char *data, uint32_t data_size);
+ 	void (*get_stats) (struct iscsi_cls_conn *conn,
+ 			   struct iscsi_stats *stats);
+ 	void (*init_cmd_task) (struct iscsi_cmd_task *ctask);
+ 	void (*init_mgmt_task) (struct iscsi_conn *conn,
+-				struct iscsi_mgmt_task *mtask,
+-				char *data, uint32_t data_size);
++				struct iscsi_mgmt_task *mtask);
+ 	int (*xmit_cmd_task) (struct iscsi_conn *conn,
+ 			      struct iscsi_cmd_task *ctask);
+ 	void (*cleanup_cmd_task) (struct iscsi_conn *conn,
+@@ -124,7 +130,7 @@
+ 			   uint64_t *ep_handle);
+ 	int (*ep_poll) (uint64_t ep_handle, int timeout_ms);
+ 	void (*ep_disconnect) (uint64_t ep_handle);
+-	int (*tgt_dscvr) (enum iscsi_tgt_dscvr type, uint32_t host_no,
++	int (*tgt_dscvr) (struct Scsi_Host *shost, enum iscsi_tgt_dscvr type,
+ 			  uint32_t enable, struct sockaddr *dst_addr);
+ };
+ 
+diff -Nurb linux-2.6.22-570/init/Kconfig linux-2.6.22-try2/init/Kconfig
+--- linux-2.6.22-570/init/Kconfig	2007-12-12 18:08:41.000000000 -0500
++++ linux-2.6.22-try2/init/Kconfig	2007-12-19 15:29:25.000000000 -0500
+@@ -120,15 +120,6 @@
+ 	  section 6.4 of the Linux Programmer's Guide, available from
+ 	  <http://www.tldp.org/guides.html>.
+ 
+-config IPC_NS
+-	bool "IPC Namespaces"
+-	depends on SYSVIPC
+-	default n
+-	help
+-	  Support ipc namespaces.  This allows containers, i.e. virtual
+-	  environments, to use ipc namespaces to provide different ipc
+-	  objects for different servers.  If unsure, say N.
+-
+ config SYSVIPC_SYSCTL
+ 	bool
+ 	depends on SYSVIPC
+@@ -218,13 +209,14 @@
+ 
+ 	  Say N if unsure.
+ 
+-config UTS_NS
+-	bool "UTS Namespaces"
++config USER_NS
++	bool "User Namespaces (EXPERIMENTAL)"
+ 	default n
++	depends on EXPERIMENTAL
+ 	help
+-	  Support uts namespaces.  This allows containers, i.e.
+-	  vservers, to use uts namespaces to provide different
+-	  uts info for different servers.  If unsure, say N.
++	  Support user namespaces.  This allows containers, i.e.
++	  vservers, to use user namespaces to provide different
++	  user info for different servers.  If unsure, say N.
+ 
+ config AUDIT
+ 	bool "Auditing support"
+@@ -298,9 +290,23 @@
+ 	depends on !OOM_PANIC
+ 	default y
+ 
++config CONTAINERS
++	bool
++
++config CONTAINER_DEBUG
++	bool "Example debug container subsystem"
++	select CONTAINERS
++	help
++	  This option enables a simple container subsystem that
++	  exports useful debugging information about the containers
++	  framework
++
++	  Say N if unsure
++
+ config CPUSETS
+ 	bool "Cpuset support"
+ 	depends on SMP
++	select CONTAINERS
+ 	help
+ 	  This option will let you create and manage CPUSETs which
+ 	  allow dynamically partitioning a system into sets of CPUs and
+@@ -329,6 +335,27 @@
+ 	  If you are using a distro that was released in 2006 or later,
+ 	  it should be safe to say N here.
+ 
++config CONTAINER_CPUACCT
++	bool "Simple CPU accounting container subsystem"
++	select CONTAINERS
++	help
++	  Provides a simple Resource Controller for monitoring the
++	  total CPU consumed by the tasks in a container
++
++config CONTAINER_NS
++        bool "Namespace container subsystem"
++        select CONTAINERS
++        help
++          Provides a simple namespace container subsystem to
++          provide hierarchical naming of sets of namespaces,
++          for instance virtual servers and checkpoint/restart
++          jobs.
++
++config PROC_PID_CPUSET
++	bool "Include legacy /proc/<pid>/cpuset file"
++	depends on CPUSETS
++	default y
++
+ config RELAY
+ 	bool "Kernel->user space relay support (formerly relayfs)"
+ 	help
+@@ -605,6 +632,33 @@
+ 
+ endchoice
+ 
++config PROC_SMAPS
++	default y
++	bool "Enable /proc/pid/smaps support" if EMBEDDED && PROC_FS && MMU
++	help
++	  The /proc/pid/smaps interface reports a process's private and
++          shared memory per mapping. Disabling this interface will reduce
++          the size of the kernel for small machines.
++
++config PROC_CLEAR_REFS
++	default y
++	bool "Enable /proc/pid/clear_refs support" if EMBEDDED && PROC_FS && MMU
++	help
++	  The /proc/pid/clear_refs interface allows clearing the
++          referenced bits on a process's memory maps to allow monitoring
++          working set size. Disabling this interface will reduce
++          the size of the kernel for small machines.
++
++config PROC_PAGEMAP
++	default y
++	bool "Enable /proc/pid/pagemap support" if EMBEDDED && PROC_FS && MMU
++	help
++	  The /proc/pid/pagemap interface allows reading the
++          kernel's virtual memory to page frame mapping to determine which
++          individual pages a process has mapped and which pages it shares
++          with other processes. Disabling this interface will reduce the
++          size of the kernel for small machines.
++
+ endmenu		# General setup
+ 
+ config RT_MUTEXES
+@@ -620,6 +674,19 @@
+ 	default 0 if BASE_FULL
+ 	default 1 if !BASE_FULL
+ 
++config PAGE_GROUP_BY_MOBILITY
++	bool "Group pages based on their mobility in the page allocator"
++	def_bool y
++	help
++	  The standard allocator will fragment memory over time which means
++	  that high order allocations will fail even if kswapd is running. If
++	  this option is set, the allocator will try and group page types
++	  based on their ability to migrate or reclaim. This is a best effort
++	  attempt at lowering fragmentation which a few workloads care about.
++	  The loss is a more complex allocator that may perform slower. If
++	  you are interested in working with large pages, say Y and set
++	  /proc/sys/vm/min_free_bytes to 16374. Otherwise say N
++
+ menu "Loadable module support"
+ 
+ config MODULES
+diff -Nurb linux-2.6.22-570/init/do_mounts_initrd.c linux-2.6.22-try2/init/do_mounts_initrd.c
+--- linux-2.6.22-570/init/do_mounts_initrd.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/init/do_mounts_initrd.c	2007-12-19 15:29:24.000000000 -0500
+@@ -56,12 +56,9 @@
+ 	sys_chroot(".");
+ 
+ 	pid = kernel_thread(do_linuxrc, "/linuxrc", SIGCHLD);
+-	if (pid > 0) {
+-		while (pid != sys_wait4(-1, NULL, 0, NULL)) {
+-			try_to_freeze();
++	if (pid > 0)
++		while (pid != sys_wait4(-1, NULL, 0, NULL))
+ 			yield();
+-		}
+-	}
+ 
+ 	/* move initrd to rootfs' /old */
+ 	sys_fchdir(old_fd);
+diff -Nurb linux-2.6.22-570/init/main.c linux-2.6.22-try2/init/main.c
+--- linux-2.6.22-570/init/main.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/init/main.c	2007-12-19 15:29:24.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <linux/writeback.h>
+ #include <linux/cpu.h>
+ #include <linux/cpuset.h>
++#include <linux/container.h>
+ #include <linux/efi.h>
+ #include <linux/tick.h>
+ #include <linux/interrupt.h>
+@@ -502,6 +503,7 @@
+ 	char * command_line;
+ 	extern struct kernel_param __start___param[], __stop___param[];
+ 
++	container_init_early();
+ 	smp_setup_processor_id();
+ 
+ 	/*
+@@ -627,6 +629,7 @@
+ #ifdef CONFIG_PROC_FS
+ 	proc_root_init();
+ #endif
++	container_init();
+ 	cpuset_init();
+ 	taskstats_init_early();
+ 	delayacct_init();
+diff -Nurb linux-2.6.22-570/ipc/msg.c linux-2.6.22-try2/ipc/msg.c
+--- linux-2.6.22-570/ipc/msg.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/ipc/msg.c	2007-12-19 15:29:24.000000000 -0500
+@@ -88,7 +88,7 @@
+ static int sysvipc_msg_proc_show(struct seq_file *s, void *it);
+ #endif
+ 
+-static void __ipc_init __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __msg_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ 	ns->ids[IPC_MSG_IDS] = ids;
+ 	ns->msg_ctlmax = MSGMAX;
+@@ -97,7 +97,6 @@
+ 	ipc_init_ids(ids, ns->msg_ctlmni);
+ }
+ 
+-#ifdef CONFIG_IPC_NS
+ int msg_init_ns(struct ipc_namespace *ns)
+ {
+ 	struct ipc_ids *ids;
+@@ -129,7 +128,6 @@
+ 	kfree(ns->ids[IPC_MSG_IDS]);
+ 	ns->ids[IPC_MSG_IDS] = NULL;
+ }
+-#endif
+ 
+ void __init msg_init(void)
+ {
+diff -Nurb linux-2.6.22-570/ipc/sem.c linux-2.6.22-try2/ipc/sem.c
+--- linux-2.6.22-570/ipc/sem.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/ipc/sem.c	2007-12-19 15:29:24.000000000 -0500
+@@ -123,7 +123,7 @@
+ #define sc_semopm	sem_ctls[2]
+ #define sc_semmni	sem_ctls[3]
+ 
+-static void __ipc_init __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __sem_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ 	ns->ids[IPC_SEM_IDS] = ids;
+ 	ns->sc_semmsl = SEMMSL;
+@@ -134,7 +134,6 @@
+ 	ipc_init_ids(ids, ns->sc_semmni);
+ }
+ 
+-#ifdef CONFIG_IPC_NS
+ int sem_init_ns(struct ipc_namespace *ns)
+ {
+ 	struct ipc_ids *ids;
+@@ -166,7 +165,6 @@
+ 	kfree(ns->ids[IPC_SEM_IDS]);
+ 	ns->ids[IPC_SEM_IDS] = NULL;
+ }
+-#endif
+ 
+ void __init sem_init (void)
+ {
+diff -Nurb linux-2.6.22-570/ipc/shm.c linux-2.6.22-try2/ipc/shm.c
+--- linux-2.6.22-570/ipc/shm.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/ipc/shm.c	2007-12-19 15:29:24.000000000 -0500
+@@ -79,7 +79,7 @@
+ static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
+ #endif
+ 
+-static void __ipc_init __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
++static void __shm_init_ns(struct ipc_namespace *ns, struct ipc_ids *ids)
+ {
+ 	ns->ids[IPC_SHM_IDS] = ids;
+ 	ns->shm_ctlmax = SHMMAX;
+@@ -100,7 +100,6 @@
+ 		shm_destroy(ns, shp);
+ }
+ 
+-#ifdef CONFIG_IPC_NS
+ int shm_init_ns(struct ipc_namespace *ns)
+ {
+ 	struct ipc_ids *ids;
+@@ -132,7 +131,6 @@
+ 	kfree(ns->ids[IPC_SHM_IDS]);
+ 	ns->ids[IPC_SHM_IDS] = NULL;
+ }
+-#endif
+ 
+ void __init shm_init (void)
+ {
+@@ -234,13 +232,13 @@
+ 	mutex_unlock(&shm_ids(ns).mutex);
+ }
+ 
+-static struct page *shm_nopage(struct vm_area_struct *vma,
+-			       unsigned long address, int *type)
++static struct page *shm_fault(struct vm_area_struct *vma,
++					struct fault_data *fdata)
+ {
+ 	struct file *file = vma->vm_file;
+ 	struct shm_file_data *sfd = shm_file_data(file);
+ 
+-	return sfd->vm_ops->nopage(vma, address, type);
++	return sfd->vm_ops->fault(vma, fdata);
+ }
+ 
+ #ifdef CONFIG_NUMA
+@@ -279,6 +277,7 @@
+ 	if (ret != 0)
+ 		return ret;
+ 	sfd->vm_ops = vma->vm_ops;
++	BUG_ON(!sfd->vm_ops->fault);
+ 	vma->vm_ops = &shm_vm_ops;
+ 	shm_open(vma);
+ 
+@@ -337,7 +336,7 @@
+ static struct vm_operations_struct shm_vm_ops = {
+ 	.open	= shm_open,	/* callback for a new vm-area open */
+ 	.close	= shm_close,	/* callback for when the vm-area is released */
+-	.nopage	= shm_nopage,
++	.fault	= shm_fault,
+ #if defined(CONFIG_NUMA)
+ 	.set_policy = shm_set_policy,
+ 	.get_policy = shm_get_policy,
+diff -Nurb linux-2.6.22-570/ipc/util.c linux-2.6.22-try2/ipc/util.c
+--- linux-2.6.22-570/ipc/util.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/ipc/util.c	2007-12-19 15:29:24.000000000 -0500
+@@ -52,7 +52,6 @@
+ 	},
+ };
+ 
+-#ifdef CONFIG_IPC_NS
+ static struct ipc_namespace *clone_ipc_ns(struct ipc_namespace *old_ns)
+ {
+ 	int err;
+@@ -114,14 +113,6 @@
+ 	atomic_dec(&vs_global_ipc_ns);
+ 	kfree(ns);
+ }
+-#else
+-struct ipc_namespace *copy_ipcs(unsigned long flags, struct ipc_namespace *ns)
+-{
+-	if (flags & CLONE_NEWIPC)
+-		return ERR_PTR(-EINVAL);
+-	return ns;
+-}
+-#endif
+ 
+ /**
+  *	ipc_init	-	initialise IPC subsystem
+@@ -149,7 +140,7 @@
+  *	array itself. 
+  */
+  
+-void __ipc_init ipc_init_ids(struct ipc_ids* ids, int size)
++void ipc_init_ids(struct ipc_ids* ids, int size)
+ {
+ 	int i;
+ 
+diff -Nurb linux-2.6.22-570/ipc/util.h linux-2.6.22-try2/ipc/util.h
+--- linux-2.6.22-570/ipc/util.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/ipc/util.h	2007-12-19 15:29:24.000000000 -0500
+@@ -41,12 +41,8 @@
+ };
+ 
+ struct seq_file;
+-#ifdef CONFIG_IPC_NS
+-#define __ipc_init
+-#else
+-#define __ipc_init	__init
+-#endif
+-void __ipc_init ipc_init_ids(struct ipc_ids *ids, int size);
++
++void ipc_init_ids(struct ipc_ids *ids, int size);
+ #ifdef CONFIG_PROC_FS
+ void __init ipc_init_proc_interface(const char *path, const char *header,
+ 		int ids, int (*show)(struct seq_file *, void *));
+diff -Nurb linux-2.6.22-570/kernel/Makefile linux-2.6.22-try2/kernel/Makefile
+--- linux-2.6.22-570/kernel/Makefile	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/Makefile	2007-12-19 15:29:25.000000000 -0500
+@@ -4,11 +4,12 @@
+ 
+ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+ 	    exit.o itimer.o time.o softirq.o resource.o \
+-	    sysctl.o capability.o ptrace.o timer.o user.o \
++	    sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \
+ 	    signal.o sys.o kmod.o workqueue.o pid.o \
+ 	    rcupdate.o extable.o params.o posix-timers.o \
+ 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+-	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o
++	    hrtimer.o rwsem.o latency.o nsproxy.o srcu.o die_notifier.o \
++	    utsname.o
+ 
+ obj-y	  += vserver/
+ 
+@@ -33,16 +34,22 @@
+ obj-$(CONFIG_UID16) += uid16.o
+ obj-$(CONFIG_MODULES) += module.o
+ obj-$(CONFIG_KALLSYMS) += kallsyms.o
++obj-$(CONFIG_STACK_UNWIND) += unwind.o
+ obj-$(CONFIG_PM) += power/
+ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
+ obj-$(CONFIG_KEXEC) += kexec.o
+ obj-$(CONFIG_COMPAT) += compat.o
++obj-$(CONFIG_CONTAINERS) += container.o
++obj-$(CONFIG_CONTAINER_DEBUG) += container_debug.o
+ obj-$(CONFIG_CPUSETS) += cpuset.o
++obj-$(CONFIG_CONTAINER_CPUACCT) += cpu_acct.o
++obj-$(CONFIG_CONTAINER_NS) += ns_container.o
+ obj-$(CONFIG_IKCONFIG) += configs.o
+ obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+ obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
+ obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
++obj-$(CONFIG_KGDB) += kgdb.o
+ obj-$(CONFIG_SYSFS) += ksysfs.o
+ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
+ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+@@ -50,7 +57,6 @@
+ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+ obj-$(CONFIG_RELAY) += relay.o
+ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
+-obj-$(CONFIG_UTS_NS) += utsname.o
+ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
+ obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+ 
+diff -Nurb linux-2.6.22-570/kernel/audit.c linux-2.6.22-try2/kernel/audit.c
+--- linux-2.6.22-570/kernel/audit.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/audit.c	2007-12-19 15:29:24.000000000 -0500
+@@ -391,6 +391,7 @@
+ {
+ 	struct sk_buff *skb;
+ 
++	set_freezable();
+ 	while (!kthread_should_stop()) {
+ 		skb = skb_dequeue(&audit_skb_queue);
+ 		wake_up(&audit_backlog_wait);
+diff -Nurb linux-2.6.22-570/kernel/auditsc.c linux-2.6.22-try2/kernel/auditsc.c
+--- linux-2.6.22-570/kernel/auditsc.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/kernel/auditsc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1500,6 +1500,7 @@
+ 			context->names[idx].ino = (unsigned long)-1;
+ 	}
+ }
++EXPORT_SYMBOL(__audit_inode_child);
+ 
+ /**
+  * auditsc_get_stamp - get local copies of audit_context values
+diff -Nurb linux-2.6.22-570/kernel/container.c linux-2.6.22-try2/kernel/container.c
+--- linux-2.6.22-570/kernel/container.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/container.c	2007-12-19 22:03:06.000000000 -0500
+@@ -0,0 +1,2545 @@
++/*
++ *  kernel/container.c
++ *
++ *  Generic process-grouping system.
++ *
++ *  Based originally on the cpuset system, extracted by Paul Menage
++ *  Copyright (C) 2006 Google, Inc
++ *
++ *  Copyright notices from the original cpuset code:
++ *  --------------------------------------------------
++ *  Copyright (C) 2003 BULL SA.
++ *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ *
++ *  Portions derived from Patrick Mochel's sysfs code.
++ *  sysfs is Copyright (c) 2001-3 Patrick Mochel
++ *
++ *  2003-10-10 Written by Simon Derr.
++ *  2003-10-22 Updates by Stephen Hemminger.
++ *  2004 May-July Rework by Paul Jackson.
++ *  ---------------------------------------------------
++ *
++ *  This file is subject to the terms and conditions of the GNU General Public
++ *  License.  See the file COPYING in the main directory of the Linux
++ *  distribution for more details.
++ */
++
++#include <linux/cpu.h>
++#include <linux/cpumask.h>
++#include <linux/container.h>
++#include <linux/err.h>
++#include <linux/errno.h>
++#include <linux/file.h>
++#include <linux/fs.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/kernel.h>
++#include <linux/kmod.h>
++#include <linux/list.h>
++#include <linux/mempolicy.h>
++#include <linux/mm.h>
++#include <linux/mutex.h>
++#include <linux/module.h>
++#include <linux/mount.h>
++#include <linux/namei.h>
++#include <linux/pagemap.h>
++#include <linux/proc_fs.h>
++#include <linux/rcupdate.h>
++#include <linux/uaccess.h>
++#include <linux/sched.h>
++#include <linux/seq_file.h>
++#include <linux/security.h>
++#include <linux/slab.h>
++#include <linux/magic.h>
++#include <linux/smp_lock.h>
++#include <linux/spinlock.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/time.h>
++#include <linux/backing-dev.h>
++#include <linux/sort.h>
++
++#include <asm/atomic.h>
++
++static DEFINE_MUTEX(container_mutex);
++
++/* Generate an array of container subsystem pointers */
++#define SUBSYS(_x) &_x ## _subsys,
++
++static struct container_subsys *subsys[] = {
++#include <linux/container_subsys.h>
++};
++
++/* A containerfs_root represents the root of a container hierarchy,
++ * and may be associated with a superblock to form an active
++ * hierarchy */
++struct containerfs_root {
++	struct super_block *sb;
++
++	/* The bitmask of subsystems attached to this hierarchy */
++	unsigned long subsys_bits;
++
++	/* A list running through the attached subsystems */
++	struct list_head subsys_list;
++
++	/* The root container for this hierarchy */
++	struct container top_container;
++
++	/* Tracks how many containers are currently defined in hierarchy.*/
++	int number_of_containers;
++
++	/* A list running through the mounted hierarchies */
++	struct list_head root_list;
++
++	/* The path to use for release notifications. No locking
++	 * between setting and use - so if userspace updates this
++	 * while subcontainers exist, you could miss a
++	 * notification. We ensure that it's always a valid
++	 * NUL-terminated string */
++	char release_agent_path[PATH_MAX];
++};
++
++
++/* The "rootnode" hierarchy is the "dummy hierarchy", reserved for the
++ * subsystems that are otherwise unattached - it never has more than a
++ * single container, and all tasks are part of that container. */
++
++static struct containerfs_root rootnode;
++
++/* The list of hierarchy roots */
++
++static LIST_HEAD(roots);
++static int root_count;
++
++/* dummytop is a shorthand for the dummy hierarchy's top container */
++#define dummytop (&rootnode.top_container)
++
++/* This flag indicates whether tasks in the fork and exit paths should
++ * take callback_mutex and check for fork/exit handlers to call. This
++ * avoids us having to do extra work in the fork/exit path if none of the
++ * subsystems need to be called.
++ */
++static int need_forkexit_callback;
++
++/* bits in struct container flags field */
++enum {
++	/* Container is dead */
++	CONT_REMOVED,
++	/* Container has previously had a child container or a task,
++	 * but no longer (only if CONT_NOTIFY_ON_RELEASE is set) */
++	CONT_RELEASABLE,
++	/* Container requires release notifications to userspace */
++	CONT_NOTIFY_ON_RELEASE,
++};
++
++/* convenient tests for these bits */
++inline int container_is_removed(const struct container *cont)
++{
++	return test_bit(CONT_REMOVED, &cont->flags);
++}
++
++inline int container_is_releasable(const struct container *cont)
++{
++	const int bits =
++		(1 << CONT_RELEASABLE) |
++		(1 << CONT_NOTIFY_ON_RELEASE);
++	return (cont->flags & bits) == bits;
++}
++
++inline int notify_on_release(const struct container *cont)
++{
++	return test_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++}
++
++/* for_each_subsys() allows you to iterate on each subsystem attached to
++ * an active hierarchy */
++#define for_each_subsys(_root, _ss) \
++list_for_each_entry(_ss, &_root->subsys_list, sibling)
++
++/* for_each_root() allows you to iterate across the active hierarchies */
++#define for_each_root(_root) \
++list_for_each_entry(_root, &roots, root_list)
++
++/* the list of containers eligible for automatic release */
++static LIST_HEAD(release_list);
++static void container_release_agent(struct work_struct *work);
++static DECLARE_WORK(release_agent_work, container_release_agent);
++static void check_for_release(struct container *cont);
++
++/* Link structure for associating css_group objects with containers */
++struct cg_container_link {
++	/*
++	 * List running through cg_container_links associated with a
++	 * container, anchored on container->css_groups
++	 */
++	struct list_head cont_link_list;
++	/*
++	 * List running through cg_container_links pointing at a
++	 * single css_group object, anchored on css_group->cg_links
++	 */
++	struct list_head cg_link_list;
++	struct css_group *cg;
++};
++
++/* The default css_group - used by init and its children prior to any
++ * hierarchies being mounted. It contains a pointer to the root state
++ * for each subsystem. Also used to anchor the list of css_groups. Not
++ * reference-counted, to improve performance when child containers
++ * haven't been created.
++ */
++
++static struct css_group init_css_group;
++static struct cg_container_link init_css_group_link;
++
++/* css_group_lock protects the list of css_group objects, and the
++ * chain of tasks off each css_group. Nests inside task->alloc_lock */
++static DEFINE_RWLOCK(css_group_lock);
++static int css_group_count;
++
++
++/* When we create or destroy a css_group, the operation simply
++ * takes/releases a reference count on all the containers referenced
++ * by subsystems in this css_group. This can end up multiple-counting
++ * some containers, but that's OK - the ref-count is just a
++ * busy/not-busy indicator; ensuring that we only count each container
++ * once would require taking a global lock to ensure that no
++ * subsystems moved between hierarchies while we were doing so.
++ *
++ * Possible TODO: decide at boot time based on the number of
++ * registered subsystems and the number of CPUs or NUMA nodes whether
++ * it's better for performance to ref-count every subsystem, or to
++ * take a global lock and only add one ref count to each hierarchy.
++ */
++
++/*
++ * unlink a css_group from the list and free it
++ */
++static void unlink_css_group(struct css_group *cg)
++{
++	write_lock(&css_group_lock);
++	list_del(&cg->list);
++	css_group_count--;
++	while (!list_empty(&cg->cg_links)) {
++		struct cg_container_link *link;
++		link = list_entry(cg->cg_links.next,
++				  struct cg_container_link, cg_link_list);
++		list_del(&link->cg_link_list);
++		list_del(&link->cont_link_list);
++		kfree(link);
++	}
++	write_unlock(&css_group_lock);
++}
++
++static void release_css_group(struct kref *k)
++{
++	int i;
++	struct css_group *cg = container_of(k, struct css_group, ref);
++
++	BUG_ON(!mutex_is_locked(&container_mutex));
++	unlink_css_group(cg);
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container *cont = cg->subsys[i]->container;
++		if (atomic_dec_and_test(&cont->count) &&
++		    container_is_releasable(cont)) {
++			check_for_release(cont);
++		}
++	}
++	kfree(cg);
++}
++
++/*
++ * In the task exit path we want to avoid taking container_mutex
++ * unless absolutely necessary, so the release process is slightly
++ * different.
++ */
++static void release_css_group_taskexit(struct kref *k)
++{
++	int i;
++	struct css_group *cg = container_of(k, struct css_group, ref);
++
++	unlink_css_group(cg);
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container *cont = cg->subsys[i]->container;
++		if (notify_on_release(cont)) {
++			mutex_lock(&container_mutex);
++			set_bit(CONT_RELEASABLE, &cont->flags);
++			if (atomic_dec_and_test(&cont->count))
++				check_for_release(cont);
++			mutex_unlock(&container_mutex);
++		} else {
++			atomic_dec(&cont->count);
++		}
++	}
++	kfree(cg);
++}
++
++/*
++ * refcounted get/put for css_group objects
++ */
++static inline void get_css_group(struct css_group *cg)
++{
++	kref_get(&cg->ref);
++}
++
++static inline void put_css_group(struct css_group *cg)
++{
++	kref_put(&cg->ref, release_css_group);
++}
++
++static inline void put_css_group_taskexit(struct css_group *cg)
++{
++	kref_put(&cg->ref, release_css_group_taskexit);
++}
++
++/*
++ * find_existing_css_group() is a helper for
++ * find_css_group(), and checks to see whether an existing
++ * css_group is suitable. This currently walks a linked-list for
++ * simplicity; a later patch will use a hash table for better
++ * performance
++ *
++ * oldcg: the container group that we're using before the container
++ * transition
++ *
++ * cont: the container that we're moving into
++ *
++ * template: location in which to build the desired set of subsystem
++ * state objects for the new container group
++ */
++
++static struct css_group *find_existing_css_group(
++	struct css_group *oldcg,
++	struct container *cont,
++	struct container_subsys_state *template[])
++{
++	int i;
++	struct containerfs_root *root = cont->root;
++	struct list_head *l = &init_css_group.list;
++
++	/* Built the set of subsystem state objects that we want to
++	 * see in the new css_group */
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		if (root->subsys_bits & (1ull << i)) {
++			/* Subsystem is in this hierarchy. So we want
++			 * the subsystem state from the new
++			 * container */
++			template[i] = cont->subsys[i];
++		} else {
++			/* Subsystem is not in this hierarchy, so we
++			 * don't want to change the subsystem state */
++			template[i] = oldcg->subsys[i];
++		}
++	}
++
++	/* Look through existing container groups to find one to reuse */
++	do {
++		struct css_group *cg =
++			list_entry(l, struct css_group, list);
++
++		if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) {
++			/* All subsystems matched */
++			return cg;
++		}
++		/* Try the next container group */
++		l = l->next;
++	} while (l != &init_css_group.list);
++
++	/* No existing container group matched */
++	return NULL;
++}
++
++/*
++ * allocate_cg_links() allocates "count" cg_container_link structures
++ * and chains them on tmp through their cont_link_list fields. Returns 0 on
++ * success or a negative error
++ */
++
++static int allocate_cg_links(int count, struct list_head *tmp)
++{
++	struct cg_container_link *link;
++	int i;
++	INIT_LIST_HEAD(tmp);
++	for (i = 0; i < count; i++) {
++		link = kmalloc(sizeof(*link), GFP_KERNEL);
++		if (!link) {
++			while (!list_empty(tmp)) {
++				link = list_entry(tmp->next,
++						  struct cg_container_link,
++						  cont_link_list);
++				list_del(&link->cont_link_list);
++				kfree(link);
++			}
++			return -ENOMEM;
++		}
++		list_add(&link->cont_link_list, tmp);
++	}
++	return 0;
++}
++
++/*
++ * find_css_group() takes an existing container group and a
++ * container object, and returns a css_group object that's
++ * equivalent to the old group, but with the given container
++ * substituted into the appropriate hierarchy. Must be called with
++ * container_mutex held
++ */
++
++static struct css_group *find_css_group(
++	struct css_group *oldcg, struct container *cont)
++{
++	struct css_group *res;
++	struct container_subsys_state *template[CONTAINER_SUBSYS_COUNT];
++	int i;
++
++	struct list_head tmp_cg_links;
++	struct cg_container_link *link;
++
++	/* First see if we already have a container group that matches
++	 * the desired set */
++	write_lock(&css_group_lock);
++	res = find_existing_css_group(oldcg, cont, template);
++	if (res)
++		get_css_group(res);
++	write_unlock(&css_group_lock);
++
++	if (res)
++		return res;
++
++	res = kmalloc(sizeof(*res), GFP_KERNEL);
++	if (!res)
++		return NULL;
++
++	/* Allocate all the cg_container_link objects that we'll need */
++	if (allocate_cg_links(root_count, &tmp_cg_links) < 0) {
++		kfree(res);
++		return NULL;
++	}
++
++	kref_init(&res->ref);
++	INIT_LIST_HEAD(&res->cg_links);
++	INIT_LIST_HEAD(&res->tasks);
++
++	/* Copy the set of subsystem state objects generated in
++	 * find_existing_css_group() */
++	memcpy(res->subsys, template, sizeof(res->subsys));
++
++	write_lock(&css_group_lock);
++	/* Add reference counts and links from the new css_group. */
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container *cont = res->subsys[i]->container;
++		struct container_subsys *ss = subsys[i];
++		atomic_inc(&cont->count);
++		/*
++		 * We want to add a link once per container, so we
++		 * only do it for the first subsystem in each
++		 * hierarchy
++		 */
++		if (ss->root->subsys_list.next == &ss->sibling) {
++			BUG_ON(list_empty(&tmp_cg_links));
++			link = list_entry(tmp_cg_links.next,
++					  struct cg_container_link,
++					  cont_link_list);
++			list_del(&link->cont_link_list);
++			list_add(&link->cont_link_list, &cont->css_groups);
++			link->cg = res;
++			list_add(&link->cg_link_list, &res->cg_links);
++		}
++	}
++	if (list_empty(&rootnode.subsys_list)) {
++		link = list_entry(tmp_cg_links.next,
++				  struct cg_container_link,
++				  cont_link_list);
++		list_del(&link->cont_link_list);
++		list_add(&link->cont_link_list, &dummytop->css_groups);
++		link->cg = res;
++		list_add(&link->cg_link_list, &res->cg_links);
++	}
++
++	BUG_ON(!list_empty(&tmp_cg_links));
++
++	/* Link this container group into the list */
++	list_add(&res->list, &init_css_group.list);
++	css_group_count++;
++	INIT_LIST_HEAD(&res->tasks);
++	write_unlock(&css_group_lock);
++
++	return res;
++}
++
++/*
++ * There is one global container mutex. We also require taking
++ * task_lock() when dereferencing a task's container subsys pointers.
++ * See "The task_lock() exception", at the end of this comment.
++ *
++ * A task must hold container_mutex to modify containers.
++ *
++ * Any task can increment and decrement the count field without lock.
++ * So in general, code holding container_mutex can't rely on the count
++ * field not changing.  However, if the count goes to zero, then only
++ * attach_task() can increment it again.  Because a count of zero
++ * means that no tasks are currently attached, therefore there is no
++ * way a task attached to that container can fork (the other way to
++ * increment the count).  So code holding container_mutex can safely
++ * assume that if the count is zero, it will stay zero. Similarly, if
++ * a task holds container_mutex on a container with zero count, it
++ * knows that the container won't be removed, as container_rmdir()
++ * needs that mutex.
++ *
++ * The container_common_file_write handler for operations that modify
++ * the container hierarchy holds container_mutex across the entire operation,
++ * single threading all such container modifications across the system.
++ *
++ * The fork and exit callbacks container_fork() and container_exit(), don't
++ * (usually) take container_mutex.  These are the two most performance
++ * critical pieces of code here.  The exception occurs on container_exit(),
++ * when a task in a notify_on_release container exits.  Then container_mutex
++ * is taken, and if the container count is zero, a usermode call made
++ * to /sbin/container_release_agent with the name of the container (path
++ * relative to the root of container file system) as the argument.
++ *
++ * A container can only be deleted if both its 'count' of using tasks
++ * is zero, and its list of 'children' containers is empty.  Since all
++ * tasks in the system use _some_ container, and since there is always at
++ * least one task in the system (init, pid == 1), therefore, top_container
++ * always has either children containers and/or using tasks.  So we don't
++ * need a special hack to ensure that top_container cannot be deleted.
++ *
++ *	The task_lock() exception
++ *
++ * The need for this exception arises from the action of
++ * attach_task(), which overwrites one tasks container pointer with
++ * another.  It does so using container_mutexe, however there are
++ * several performance critical places that need to reference
++ * task->container without the expense of grabbing a system global
++ * mutex.  Therefore except as noted below, when dereferencing or, as
++ * in attach_task(), modifying a task'ss container pointer we use
++ * task_lock(), which acts on a spinlock (task->alloc_lock) already in
++ * the task_struct routinely used for such matters.
++ *
++ * P.S.  One more locking exception.  RCU is used to guard the
++ * update of a tasks container pointer by attach_task()
++ */
++
++/**
++ * container_lock - lock out any changes to container structures
++ *
++ */
++
++void container_lock(void)
++{
++	mutex_lock(&container_mutex);
++}
++
++/**
++ * container_unlock - release lock on container changes
++ *
++ * Undo the lock taken in a previous container_lock() call.
++ */
++
++void container_unlock(void)
++{
++	mutex_unlock(&container_mutex);
++}
++
++/*
++ * A couple of forward declarations required, due to cyclic reference loop:
++ * container_mkdir -> container_create -> container_populate_dir ->
++ * container_add_file -> container_create_file -> container_dir_inode_operations
++ * -> container_mkdir.
++ */
++
++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode);
++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry);
++static int container_populate_dir(struct container *cont);
++static struct inode_operations container_dir_inode_operations;
++static struct file_operations proc_containerstats_operations;
++
++static struct inode *container_new_inode(mode_t mode, struct super_block *sb)
++{
++	struct inode *inode = new_inode(sb);
++	static struct backing_dev_info container_backing_dev_info = {
++		.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
++	};
++
++	if (inode) {
++		inode->i_mode = mode;
++		inode->i_uid = current->fsuid;
++		inode->i_gid = current->fsgid;
++		inode->i_blocks = 0;
++		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
++		inode->i_mapping->backing_dev_info = &container_backing_dev_info;
++	}
++	return inode;
++}
++
++static void container_diput(struct dentry *dentry, struct inode *inode)
++{
++	/* is dentry a directory ? if so, kfree() associated container */
++	if (S_ISDIR(inode->i_mode)) {
++		struct container *cont = dentry->d_fsdata;
++		BUG_ON(!(container_is_removed(cont)));
++		kfree(cont);
++	}
++	iput(inode);
++}
++
++static struct dentry *container_get_dentry(struct dentry *parent,
++					   const char *name)
++{
++	struct dentry *d = lookup_one_len(name, parent, strlen(name));
++	static struct dentry_operations container_dops = {
++		.d_iput = container_diput,
++	};
++
++	if (!IS_ERR(d))
++		d->d_op = &container_dops;
++	return d;
++}
++
++static void remove_dir(struct dentry *d)
++{
++	struct dentry *parent = dget(d->d_parent);
++
++	d_delete(d);
++	simple_rmdir(parent->d_inode, d);
++	dput(parent);
++}
++
++static void container_clear_directory(struct dentry *dentry)
++{
++	struct list_head *node;
++
++	BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex));
++	spin_lock(&dcache_lock);
++	node = dentry->d_subdirs.next;
++	while (node != &dentry->d_subdirs) {
++		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
++		list_del_init(node);
++		if (d->d_inode) {
++			/* This should never be called on a container
++			 * directory with child containers */
++			BUG_ON(d->d_inode->i_mode & S_IFDIR);
++			d = dget_locked(d);
++			spin_unlock(&dcache_lock);
++			d_delete(d);
++			simple_unlink(dentry->d_inode, d);
++			dput(d);
++			spin_lock(&dcache_lock);
++		}
++		node = dentry->d_subdirs.next;
++	}
++	spin_unlock(&dcache_lock);
++}
++
++/*
++ * NOTE : the dentry must have been dget()'ed
++ */
++static void container_d_remove_dir(struct dentry *dentry)
++{
++	container_clear_directory(dentry);
++
++	spin_lock(&dcache_lock);
++	list_del_init(&dentry->d_u.d_child);
++	spin_unlock(&dcache_lock);
++	remove_dir(dentry);
++}
++
++static int rebind_subsystems(struct containerfs_root *root,
++			      unsigned long final_bits)
++{
++	unsigned long added_bits, removed_bits;
++	struct container *cont = &root->top_container;
++	int i;
++
++	removed_bits = root->subsys_bits & ~final_bits;
++	added_bits = final_bits & ~root->subsys_bits;
++	/* Check that any added subsystems are currently free */
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		unsigned long long bit = 1ull << i;
++		struct container_subsys *ss = subsys[i];
++		if (!(bit & added_bits))
++			continue;
++		if (ss->root != &rootnode) {
++			/* Subsystem isn't free */
++			return -EBUSY;
++		}
++	}
++
++	/* Currently we don't handle adding/removing subsystems when
++	 * any subcontainers exist. This is theoretically supportable
++	 * but involves complex error handling, so it's being left until
++	 * later */
++	if (!list_empty(&cont->children))
++		return -EBUSY;
++
++	/* Process each subsystem */
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container_subsys *ss = subsys[i];
++		unsigned long bit = 1UL << i;
++		if (bit & added_bits) {
++			/* We're binding this subsystem to this hierarchy */
++			BUG_ON(cont->subsys[i]);
++			BUG_ON(!dummytop->subsys[i]);
++			BUG_ON(dummytop->subsys[i]->container != dummytop);
++			cont->subsys[i] = dummytop->subsys[i];
++			cont->subsys[i]->container = cont;
++			list_add(&ss->sibling, &root->subsys_list);
++			rcu_assign_pointer(ss->root, root);
++			if (ss->bind)
++				ss->bind(ss, cont);
++
++		} else if (bit & removed_bits) {
++			/* We're removing this subsystem */
++			BUG_ON(cont->subsys[i] != dummytop->subsys[i]);
++			BUG_ON(cont->subsys[i]->container != cont);
++			if (ss->bind)
++				ss->bind(ss, dummytop);
++			dummytop->subsys[i]->container = dummytop;
++			cont->subsys[i] = NULL;
++			rcu_assign_pointer(subsys[i]->root, &rootnode);
++			list_del(&ss->sibling);
++		} else if (bit & final_bits) {
++			/* Subsystem state should already exist */
++			BUG_ON(!cont->subsys[i]);
++		} else {
++			/* Subsystem state shouldn't exist */
++			BUG_ON(cont->subsys[i]);
++		}
++	}
++	root->subsys_bits = final_bits;
++	synchronize_rcu();
++
++	return 0;
++}
++
++/*
++ * Release the last use of a hierarchy.  Will never be called when
++ * there are active subcontainers since each subcontainer bumps the
++ * value of sb->s_active.
++ */
++static void container_put_super(struct super_block *sb)
++{
++	struct containerfs_root *root = sb->s_fs_info;
++	struct container *cont = &root->top_container;
++	int ret;
++
++	root->sb = NULL;
++	sb->s_fs_info = NULL;
++
++	mutex_lock(&container_mutex);
++
++	BUG_ON(root->number_of_containers != 1);
++	BUG_ON(!list_empty(&cont->children));
++	BUG_ON(!list_empty(&cont->sibling));
++	BUG_ON(!root->subsys_bits);
++
++	/* Rebind all subsystems back to the default hierarchy */
++	ret = rebind_subsystems(root, 0);
++	BUG_ON(ret);
++
++	write_lock(&css_group_lock);
++	while (!list_empty(&cont->css_groups)) {
++		struct cg_container_link *link;
++		link = list_entry(cont->css_groups.next,
++				  struct cg_container_link, cont_link_list);
++		list_del(&link->cg_link_list);
++		list_del(&link->cont_link_list);
++		kfree(link);
++	}
++	write_unlock(&css_group_lock);
++
++	list_del(&root->root_list);
++	root_count--;
++	kfree(root);
++	mutex_unlock(&container_mutex);
++}
++
++static int container_show_options(struct seq_file *seq, struct vfsmount *vfs)
++{
++	struct containerfs_root *root = vfs->mnt_sb->s_fs_info;
++	struct container_subsys *ss;
++
++	for_each_subsys(root, ss)
++		seq_printf(seq, ",%s", ss->name);
++	return 0;
++}
++
++/* Convert a hierarchy specifier into a bitmask. LL=container_mutex */
++static int parse_containerfs_options(char *opts, unsigned long *bits)
++{
++	char *token, *o = opts ?: "all";
++
++	*bits = 0;
++
++	while ((token = strsep(&o, ",")) != NULL) {
++		if (!*token)
++			return -EINVAL;
++		if (!strcmp(token, "all")) {
++			*bits = (1 << CONTAINER_SUBSYS_COUNT) - 1;
++		} else {
++			struct container_subsys *ss;
++			int i;
++			for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++				ss = subsys[i];
++				if (!strcmp(token, ss->name)) {
++					*bits |= 1 << i;
++					break;
++				}
++			}
++			if (i == CONTAINER_SUBSYS_COUNT)
++				return -ENOENT;
++		}
++	}
++
++	/* We can't have an empty hierarchy */
++	if (!*bits)
++		return -EINVAL;
++
++	return 0;
++}
++
++static int container_remount(struct super_block *sb, int *flags, char *data)
++{
++	int ret = 0;
++	unsigned long subsys_bits;
++	struct containerfs_root *root = sb->s_fs_info;
++	struct container *cont = &root->top_container;
++
++	mutex_lock(&cont->dentry->d_inode->i_mutex);
++	mutex_lock(&container_mutex);
++
++	/* See what subsystems are wanted */
++	ret = parse_containerfs_options(data, &subsys_bits);
++	if (ret)
++		goto out_unlock;
++
++	ret = rebind_subsystems(root, subsys_bits);
++
++	/* (re)populate subsystem files */
++	if (!ret)
++		container_populate_dir(cont);
++
++ out_unlock:
++	mutex_unlock(&container_mutex);
++	mutex_unlock(&cont->dentry->d_inode->i_mutex);
++	return ret;
++}
++
++static struct super_operations container_ops = {
++	.statfs = simple_statfs,
++	.drop_inode = generic_delete_inode,
++	.put_super = container_put_super,
++	.show_options = container_show_options,
++	.remount_fs = container_remount,
++};
++
++static int container_fill_super(struct super_block *sb, void *options,
++				int unused_silent)
++{
++	struct inode *inode;
++	struct dentry *root;
++	struct containerfs_root *hroot = options;
++
++	sb->s_blocksize = PAGE_CACHE_SIZE;
++	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
++	sb->s_magic = CONTAINER_SUPER_MAGIC;
++	sb->s_op = &container_ops;
++
++	inode = container_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR, sb);
++	if (!inode)
++		return -ENOMEM;
++
++	inode->i_op = &simple_dir_inode_operations;
++	inode->i_fop = &simple_dir_operations;
++	inode->i_op = &container_dir_inode_operations;
++	/* directories start off with i_nlink == 2 (for "." entry) */
++	inc_nlink(inode);
++
++	root = d_alloc_root(inode);
++	if (!root) {
++		iput(inode);
++		return -ENOMEM;
++	}
++	sb->s_root = root;
++	root->d_fsdata = &hroot->top_container;
++	hroot->top_container.dentry = root;
++
++	strcpy(hroot->release_agent_path, "");
++	sb->s_fs_info = hroot;
++	hroot->sb = sb;
++
++	return 0;
++}
++
++static void init_container_root(struct containerfs_root *root)
++{
++	struct container *cont = &root->top_container;
++	INIT_LIST_HEAD(&root->subsys_list);
++	root->number_of_containers = 1;
++	cont->root = root;
++	cont->top_container = cont;
++	INIT_LIST_HEAD(&cont->sibling);
++	INIT_LIST_HEAD(&cont->children);
++	INIT_LIST_HEAD(&cont->css_groups);
++	INIT_LIST_HEAD(&cont->release_list);
++	list_add(&root->root_list, &roots);
++	root_count++;
++}
++
++static int container_get_sb(struct file_system_type *fs_type,
++			 int flags, const char *unused_dev_name,
++			 void *data, struct vfsmount *mnt)
++{
++	unsigned long subsys_bits = 0;
++	int ret = 0;
++	struct containerfs_root *root = NULL;
++	int use_existing = 0;
++
++	mutex_lock(&container_mutex);
++
++	/* First find the desired set of resource controllers */
++	ret = parse_containerfs_options(data, &subsys_bits);
++	if (ret)
++		goto out_unlock;
++
++	/* See if we already have a hierarchy containing this set */
++
++	for_each_root(root) {
++		/* We match - use this hieracrchy */
++		if (root->subsys_bits == subsys_bits) {
++			use_existing = 1;
++			break;
++		}
++		/* We clash - fail */
++		if (root->subsys_bits & subsys_bits) {
++			ret = -EBUSY;
++			goto out_unlock;
++		}
++	}
++
++	if (!use_existing) {
++		/* We need a new root */
++		struct list_head tmp_cg_links, *l;
++		root = kzalloc(sizeof(*root), GFP_KERNEL);
++		if (!root) {
++			ret = -ENOMEM;
++			goto out_unlock;
++		}
++		/* We're accessing css_group_count without locking
++		 * here, but that's OK - it can only be increased by
++		 * someone holding container_lock, and that's us. The
++		 * worst that can happen is that we have some link
++		 * structures left over */
++		ret = allocate_cg_links(css_group_count, &tmp_cg_links);
++		if (ret < 0) {
++			kfree(root);
++			goto out_unlock;
++		}
++		init_container_root(root);
++
++		/* Link the top container in this hierarchy into all
++		 * the css_group objects */
++		write_lock(&css_group_lock);
++		l = &init_css_group.list;
++		do {
++			struct css_group *cg;
++			struct cg_container_link *link;
++			cg = list_entry(l, struct css_group, list);
++			BUG_ON(list_empty(&tmp_cg_links));
++			link = list_entry(tmp_cg_links.next,
++					  struct cg_container_link,
++					  cont_link_list);
++			list_del(&link->cont_link_list);
++			link->cg = cg;
++			list_add(&link->cont_link_list,
++				 &root->top_container.css_groups);
++			list_add(&link->cg_link_list, &cg->cg_links);
++			l = l->next;
++		} while (l != &init_css_group.list);
++		write_unlock(&css_group_lock);
++
++		while (!list_empty(&tmp_cg_links)) {
++			/* Probably shouldn't happen */
++			struct cg_container_link *link;
++			printk(KERN_INFO "Freeing unused cg_container_link\n");
++			link = list_entry(tmp_cg_links.next,
++					  struct cg_container_link,
++					  cont_link_list);
++			list_del(&link->cont_link_list);
++			kfree(link);
++		}
++	}
++
++	if (!root->sb) {
++		/* We need a new superblock for this container combination */
++		struct container *cont = &root->top_container;
++
++		BUG_ON(root->subsys_bits);
++		ret = get_sb_nodev(fs_type, flags, root,
++				   container_fill_super, mnt);
++		if (ret)
++			goto out_unlock;
++
++		BUG_ON(!list_empty(&cont->sibling));
++		BUG_ON(!list_empty(&cont->children));
++		BUG_ON(root->number_of_containers != 1);
++
++		ret = rebind_subsystems(root, subsys_bits);
++
++		/* It's safe to nest i_mutex inside container_mutex in
++		 * this case, since no-one else can be accessing this
++		 * directory yet */
++		mutex_lock(&cont->dentry->d_inode->i_mutex);
++		container_populate_dir(cont);
++		mutex_unlock(&cont->dentry->d_inode->i_mutex);
++		BUG_ON(ret);
++	} else {
++		/* Reuse the existing superblock */
++		down_write(&(root->sb->s_umount));
++		ret = simple_set_mnt(mnt, root->sb);
++		if (!ret)
++			atomic_inc(&root->sb->s_active);
++	}
++
++ out_unlock:
++	mutex_unlock(&container_mutex);
++	return ret;
++}
++
++static struct file_system_type container_fs_type = {
++	.name = "container",
++	.get_sb = container_get_sb,
++	.kill_sb = kill_litter_super,
++};
++
++static inline struct container *__d_cont(struct dentry *dentry)
++{
++	return dentry->d_fsdata;
++}
++
++static inline struct cftype *__d_cft(struct dentry *dentry)
++{
++	return dentry->d_fsdata;
++}
++
++/*
++ * Called with container_mutex held.  Writes path of container into buf.
++ * Returns 0 on success, -errno on error.
++ */
++int container_path(const struct container *cont, char *buf, int buflen)
++{
++	char *start;
++
++	start = buf + buflen;
++
++	*--start = '\0';
++	for (;;) {
++		int len = cont->dentry->d_name.len;
++		if ((start -= len) < buf)
++			return -ENAMETOOLONG;
++		memcpy(start, cont->dentry->d_name.name, len);
++		cont = cont->parent;
++		if (!cont)
++			break;
++		if (!cont->parent)
++			continue;
++		if (--start < buf)
++			return -ENAMETOOLONG;
++		*start = '/';
++	}
++	memmove(buf, start, buf + buflen - start);
++	return 0;
++}
++
++static void get_first_subsys(const struct container *cont,
++			struct container_subsys_state **css, int *subsys_id)
++{
++	const struct containerfs_root *root = cont->root;
++	const struct container_subsys *test_ss;
++	BUG_ON(list_empty(&root->subsys_list));
++	test_ss = list_entry(root->subsys_list.next,
++			     struct container_subsys, sibling);
++	if (css) {
++		*css = cont->subsys[test_ss->subsys_id];
++		BUG_ON(!*css);
++	}
++	if (subsys_id)
++		*subsys_id = test_ss->subsys_id;
++}
++
++/*
++ * Attach task 'tsk' to container 'cont'
++ *
++ * Call holding container_mutex.  May take task_lock of
++ * the task 'pid' during call.
++ */
++static int attach_task(struct container *cont, struct task_struct *tsk)
++{
++	int retval = 0;
++	struct container_subsys *ss;
++	struct container *oldcont;
++	struct css_group *cg = tsk->containers;
++	struct css_group *newcg;
++	struct containerfs_root *root = cont->root;
++	int subsys_id;
++
++	get_first_subsys(cont, NULL, &subsys_id);
++
++	/* Nothing to do if the task is already in that container */
++	oldcont = task_container(tsk, subsys_id);
++	if (cont == oldcont)
++		return 0;
++
++	for_each_subsys(root, ss) {
++		if (ss->can_attach) {
++			retval = ss->can_attach(ss, cont, tsk);
++			if (retval) {
++				return retval;
++			}
++		}
++	}
++
++	/* Locate or allocate a new css_group for this task,
++	 * based on its final set of containers */
++	newcg = find_css_group(cg, cont);
++	if (!newcg) {
++		return -ENOMEM;
++	}
++
++	task_lock(tsk);
++	if (tsk->flags & PF_EXITING) {
++		task_unlock(tsk);
++		put_css_group(newcg);
++		return -ESRCH;
++	}
++	rcu_assign_pointer(tsk->containers, newcg);
++	if (!list_empty(&tsk->cg_list)) {
++		write_lock(&css_group_lock);
++		list_del(&tsk->cg_list);
++		list_add(&tsk->cg_list, &newcg->tasks);
++		write_unlock(&css_group_lock);
++	}
++	task_unlock(tsk);
++
++	for_each_subsys(root, ss) {
++		if (ss->attach) {
++			ss->attach(ss, cont, oldcont, tsk);
++		}
++	}
++	set_bit(CONT_RELEASABLE, &oldcont->flags);
++	synchronize_rcu();
++	put_css_group(cg);
++	return 0;
++}
++
++/*
++ * Attach task with pid 'pid' to container 'cont'. Call with
++ * container_mutex, may take task_lock of task
++ */
++static int attach_task_by_pid(struct container *cont, char *pidbuf)
++{
++	pid_t pid;
++	struct task_struct *tsk;
++	int ret;
++
++	if (sscanf(pidbuf, "%d", &pid) != 1)
++		return -EIO;
++
++	if (pid) {
++		rcu_read_lock();
++		tsk = find_task_by_pid(pid);
++		if (!tsk || tsk->flags & PF_EXITING) {
++			rcu_read_unlock();
++			return -ESRCH;
++		}
++		get_task_struct(tsk);
++		rcu_read_unlock();
++
++		if ((current->euid) && (current->euid != tsk->uid)
++		    && (current->euid != tsk->suid)) {
++			put_task_struct(tsk);
++			return -EACCES;
++		}
++	} else {
++		tsk = current;
++		get_task_struct(tsk);
++	}
++
++	ret = attach_task(cont, tsk);
++	put_task_struct(tsk);
++	return ret;
++}
++
++/* The various types of files and directories in a container file system */
++
++enum container_filetype {
++	FILE_ROOT,
++	FILE_DIR,
++	FILE_TASKLIST,
++	FILE_NOTIFY_ON_RELEASE,
++	FILE_RELEASABLE,
++	FILE_RELEASE_AGENT,
++};
++
++static ssize_t container_common_file_write(struct container *cont,
++					   struct cftype *cft,
++					   struct file *file,
++					   const char __user *userbuf,
++					   size_t nbytes, loff_t *unused_ppos)
++{
++	enum container_filetype type = cft->private;
++	char *buffer;
++	int retval = 0;
++
++	if (nbytes >= PATH_MAX)
++		return -E2BIG;
++
++	/* +1 for nul-terminator */
++	buffer = kmalloc(nbytes + 1, GFP_KERNEL);
++	if (buffer == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(buffer, userbuf, nbytes)) {
++		retval = -EFAULT;
++		goto out1;
++	}
++	buffer[nbytes] = 0;	/* nul-terminate */
++
++	mutex_lock(&container_mutex);
++
++	if (container_is_removed(cont)) {
++		retval = -ENODEV;
++		goto out2;
++	}
++
++	switch (type) {
++	case FILE_TASKLIST:
++		retval = attach_task_by_pid(cont, buffer);
++		break;
++	case FILE_NOTIFY_ON_RELEASE:
++		clear_bit(CONT_RELEASABLE, &cont->flags);
++		if (simple_strtoul(buffer, NULL, 10) != 0)
++			set_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++		else
++			clear_bit(CONT_NOTIFY_ON_RELEASE, &cont->flags);
++		break;
++	case FILE_RELEASE_AGENT:
++	{
++		struct containerfs_root *root = cont->root;
++		if (nbytes < sizeof(root->release_agent_path)) {
++			/* We never write anything other than '\0'
++			 * into the last char of release_agent_path,
++			 * so it always remains a NUL-terminated
++			 * string */
++			strncpy(root->release_agent_path, buffer, nbytes);
++			root->release_agent_path[nbytes] = 0;
++		} else {
++			retval = -ENOSPC;
++		}
++		break;
++	}
++	default:
++		retval = -EINVAL;
++		goto out2;
++	}
++
++	if (retval == 0)
++		retval = nbytes;
++out2:
++	mutex_unlock(&container_mutex);
++out1:
++	kfree(buffer);
++	return retval;
++}
++
++static ssize_t container_file_write(struct file *file, const char __user *buf,
++						size_t nbytes, loff_t *ppos)
++{
++	struct cftype *cft = __d_cft(file->f_dentry);
++	struct container *cont = __d_cont(file->f_dentry->d_parent);
++
++	if (!cft)
++		return -ENODEV;
++	if (!cft->write)
++		return -EINVAL;
++
++	return cft->write(cont, cft, file, buf, nbytes, ppos);
++}
++
++static ssize_t container_read_uint(struct container *cont, struct cftype *cft,
++				   struct file *file,
++				   char __user *buf, size_t nbytes,
++				   loff_t *ppos)
++{
++	char tmp[64];
++	u64 val = cft->read_uint(cont, cft);
++	int len = sprintf(tmp, "%llu\n", (unsigned long long) val);
++
++	return simple_read_from_buffer(buf, nbytes, ppos, tmp, len);
++}
++
++static ssize_t container_common_file_read(struct container *cont,
++					  struct cftype *cft,
++					  struct file *file,
++					  char __user *buf,
++					  size_t nbytes, loff_t *ppos)
++{
++	enum container_filetype type = cft->private;
++	char *page;
++	ssize_t retval = 0;
++	char *s;
++
++	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
++		return -ENOMEM;
++
++	s = page;
++
++	switch (type) {
++	case FILE_RELEASE_AGENT:
++	{
++		struct containerfs_root *root;
++		size_t n;
++		mutex_lock(&container_mutex);
++		root = cont->root;
++		n = strnlen(root->release_agent_path,
++			    sizeof(root->release_agent_path));
++		n = min(n, (size_t) PAGE_SIZE);
++		strncpy(s, root->release_agent_path, n);
++		mutex_unlock(&container_mutex);
++		s += n;
++		break;
++	}
++	default:
++		retval = -EINVAL;
++		goto out;
++	}
++	*s++ = '\n';
++
++	retval = simple_read_from_buffer(buf, nbytes, ppos, page, s - page);
++out:
++	free_page((unsigned long)page);
++	return retval;
++}
++
++static ssize_t container_file_read(struct file *file, char __user *buf,
++				   size_t nbytes, loff_t *ppos)
++{
++	struct cftype *cft = __d_cft(file->f_dentry);
++	struct container *cont = __d_cont(file->f_dentry->d_parent);
++
++	if (!cft)
++		return -ENODEV;
++
++	if (cft->read)
++		return cft->read(cont, cft, file, buf, nbytes, ppos);
++	if (cft->read_uint)
++		return container_read_uint(cont, cft, file, buf, nbytes, ppos);
++	return -EINVAL;
++}
++
++static int container_file_open(struct inode *inode, struct file *file)
++{
++	int err;
++	struct cftype *cft;
++
++	err = generic_file_open(inode, file);
++	if (err)
++		return err;
++
++	cft = __d_cft(file->f_dentry);
++	if (!cft)
++		return -ENODEV;
++	if (cft->open)
++		err = cft->open(inode, file);
++	else
++		err = 0;
++
++	return err;
++}
++
++static int container_file_release(struct inode *inode, struct file *file)
++{
++	struct cftype *cft = __d_cft(file->f_dentry);
++	if (cft->release)
++		return cft->release(inode, file);
++	return 0;
++}
++
++/*
++ * container_rename - Only allow simple rename of directories in place.
++ */
++static int container_rename(struct inode *old_dir, struct dentry *old_dentry,
++			    struct inode *new_dir, struct dentry *new_dentry)
++{
++	if (!S_ISDIR(old_dentry->d_inode->i_mode))
++		return -ENOTDIR;
++	if (new_dentry->d_inode)
++		return -EEXIST;
++	if (old_dir != new_dir)
++		return -EIO;
++	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
++}
++
++static struct file_operations container_file_operations = {
++	.read = container_file_read,
++	.write = container_file_write,
++	.llseek = generic_file_llseek,
++	.open = container_file_open,
++	.release = container_file_release,
++};
++
++static struct inode_operations container_dir_inode_operations = {
++	.lookup = simple_lookup,
++	.mkdir = container_mkdir,
++	.rmdir = container_rmdir,
++	.rename = container_rename,
++};
++
++static int container_create_file(struct dentry *dentry, int mode,
++				struct super_block *sb)
++{
++	struct inode *inode;
++
++	if (!dentry)
++		return -ENOENT;
++	if (dentry->d_inode)
++		return -EEXIST;
++
++	inode = container_new_inode(mode, sb);
++	if (!inode)
++		return -ENOMEM;
++
++	if (S_ISDIR(mode)) {
++		inode->i_op = &container_dir_inode_operations;
++		inode->i_fop = &simple_dir_operations;
++
++		/* start off with i_nlink == 2 (for "." entry) */
++		inc_nlink(inode);
++
++		/* start with the directory inode held, so that we can
++		 * populate it without racing with another mkdir */
++		mutex_lock(&inode->i_mutex);
++	} else if (S_ISREG(mode)) {
++		inode->i_size = 0;
++		inode->i_fop = &container_file_operations;
++	}
++
++	d_instantiate(dentry, inode);
++	dget(dentry);	/* Extra count - pin the dentry in core */
++	return 0;
++}
++
++/*
++ *	container_create_dir - create a directory for an object.
++ *	cont:	the container we create the directory for.
++ *		It must have a valid ->parent field
++ *		And we are going to fill its ->dentry field.
++ *	name:	The name to give to the container directory. Will be copied.
++ *	mode:	mode to set on new directory.
++ */
++static int container_create_dir(struct container *cont, struct dentry *dentry,
++				int mode)
++{
++	struct dentry *parent;
++	int error = 0;
++
++	parent = cont->parent->dentry;
++	if (IS_ERR(dentry))
++		return PTR_ERR(dentry);
++	error = container_create_file(dentry, S_IFDIR | mode, cont->root->sb);
++	if (!error) {
++		dentry->d_fsdata = cont;
++		inc_nlink(parent->d_inode);
++		cont->dentry = dentry;
++	}
++	dput(dentry);
++
++	return error;
++}
++
++int container_add_file(struct container *cont, const struct cftype *cft)
++{
++	struct dentry *dir = cont->dentry;
++	struct dentry *dentry;
++	int error;
++
++	BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
++	dentry = container_get_dentry(dir, cft->name);
++	if (!IS_ERR(dentry)) {
++		error = container_create_file(dentry, 0644 | S_IFREG,
++						cont->root->sb);
++		if (!error)
++			dentry->d_fsdata = (void *)cft;
++		dput(dentry);
++	} else
++		error = PTR_ERR(dentry);
++	return error;
++}
++
++int container_add_files(struct container *cont, const struct cftype cft[],
++			int count)
++{
++	int i, err;
++	for (i = 0; i < count; i++) {
++		err = container_add_file(cont, &cft[i]);
++		if (err)
++			return err;
++	}
++	return 0;
++}
++
++/* Count the number of tasks in a container. */
++
++int container_task_count(const struct container *cont)
++{
++	int count = 0;
++	struct list_head *l;
++
++	read_lock(&css_group_lock);
++	l = cont->css_groups.next;
++	while (l != &cont->css_groups) {
++		struct cg_container_link *link =
++			list_entry(l, struct cg_container_link, cont_link_list);
++		count += atomic_read(&link->cg->ref.refcount);
++		l = l->next;
++	}
++	read_unlock(&css_group_lock);
++	return count;
++}
++
++/* Advance a list_head iterator pointing at a cg_container_link's */
++static void container_advance_iter(struct container *cont,
++					  struct container_iter *it)
++{
++	struct list_head *l = it->cg_link;
++	struct cg_container_link *link;
++	struct css_group *cg;
++
++	/* Advance to the next non-empty css_group */
++	do {
++		l = l->next;
++		if (l == &cont->css_groups) {
++			it->cg_link = NULL;
++			return;
++		}
++		link = list_entry(l, struct cg_container_link, cont_link_list);
++		cg = link->cg;
++	} while (list_empty(&cg->tasks));
++	it->cg_link = l;
++	it->task = cg->tasks.next;
++}
++
++void container_iter_start(struct container *cont, struct container_iter *it)
++{
++	read_lock(&css_group_lock);
++	it->cg_link = &cont->css_groups;
++	container_advance_iter(cont, it);
++}
++
++struct task_struct *container_iter_next(struct container *cont,
++					struct container_iter *it)
++{
++	struct task_struct *res;
++	struct list_head *l = it->task;
++
++	/* If the iterator cg is NULL, we have no tasks */
++	if (!it->cg_link)
++		return NULL;
++	res = list_entry(l, struct task_struct, cg_list);
++	/* Advance iterator to find next entry */
++	l = l->next;
++	if (l == &res->containers->tasks) {
++		/* We reached the end of this task list - move on to
++		 * the next cg_container_link */
++		container_advance_iter(cont, it);
++	} else {
++		it->task = l;
++	}
++	return res;
++}
++
++void container_iter_end(struct container *cont, struct container_iter *it)
++{
++	read_unlock(&css_group_lock);
++}
++
++/*
++ * Stuff for reading the 'tasks' file.
++ *
++ * Reading this file can return large amounts of data if a container has
++ * *lots* of attached tasks. So it may need several calls to read(),
++ * but we cannot guarantee that the information we produce is correct
++ * unless we produce it entirely atomically.
++ *
++ * Upon tasks file open(), a struct ctr_struct is allocated, that
++ * will have a pointer to an array (also allocated here).  The struct
++ * ctr_struct * is stored in file->private_data.  Its resources will
++ * be freed by release() when the file is closed.  The array is used
++ * to sprintf the PIDs and then used by read().
++ */
++struct ctr_struct {
++	char *buf;
++	int bufsz;
++};
++
++/*
++ * Load into 'pidarray' up to 'npids' of the tasks using container
++ * 'cont'.  Return actual number of pids loaded.  No need to
++ * task_lock(p) when reading out p->container, since we're in an RCU
++ * read section, so the css_group can't go away, and is
++ * immutable after creation.
++ */
++static int pid_array_load(pid_t *pidarray, int npids, struct container *cont)
++{
++	int n = 0;
++	struct container_iter it;
++	struct task_struct *tsk;
++	container_iter_start(cont, &it);
++	while ((tsk = container_iter_next(cont, &it))) {
++		if (unlikely(n == npids))
++			break;
++		pidarray[n++] = pid_nr(task_pid(tsk));
++	}
++	container_iter_end(cont, &it);
++	return n;
++}
++
++static int cmppid(const void *a, const void *b)
++{
++	return *(pid_t *)a - *(pid_t *)b;
++}
++
++/*
++ * Convert array 'a' of 'npids' pid_t's to a string of newline separated
++ * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
++ * count 'cnt' of how many chars would be written if buf were large enough.
++ */
++static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
++{
++	int cnt = 0;
++	int i;
++
++	for (i = 0; i < npids; i++)
++		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
++	return cnt;
++}
++
++/*
++ * Handle an open on 'tasks' file.  Prepare a buffer listing the
++ * process id's of tasks currently attached to the container being opened.
++ *
++ * Does not require any specific container mutexes, and does not take any.
++ */
++static int container_tasks_open(struct inode *unused, struct file *file)
++{
++	struct container *cont = __d_cont(file->f_dentry->d_parent);
++	struct ctr_struct *ctr;
++	pid_t *pidarray;
++	int npids;
++	char c;
++
++	if (!(file->f_mode & FMODE_READ))
++		return 0;
++
++	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
++	if (!ctr)
++		goto err0;
++
++	/*
++	 * If container gets more users after we read count, we won't have
++	 * enough space - tough.  This race is indistinguishable to the
++	 * caller from the case that the additional container users didn't
++	 * show up until sometime later on.
++	 */
++	npids = container_task_count(cont);
++	if (npids) {
++		pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
++		if (!pidarray)
++			goto err1;
++
++		npids = pid_array_load(pidarray, npids, cont);
++		sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
++
++		/* Call pid_array_to_buf() twice, first just to get bufsz */
++		ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
++		ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
++		if (!ctr->buf)
++			goto err2;
++		ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
++
++		kfree(pidarray);
++	} else {
++		ctr->buf = 0;
++		ctr->bufsz = 0;
++	}
++	file->private_data = ctr;
++	return 0;
++
++err2:
++	kfree(pidarray);
++err1:
++	kfree(ctr);
++err0:
++	return -ENOMEM;
++}
++
++static ssize_t container_tasks_read(struct container *cont,
++				    struct cftype *cft,
++				    struct file *file, char __user *buf,
++				    size_t nbytes, loff_t *ppos)
++{
++	struct ctr_struct *ctr = file->private_data;
++
++	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
++}
++
++static int container_tasks_release(struct inode *unused_inode,
++					struct file *file)
++{
++	struct ctr_struct *ctr;
++
++	if (file->f_mode & FMODE_READ) {
++		ctr = file->private_data;
++		kfree(ctr->buf);
++		kfree(ctr);
++	}
++	return 0;
++}
++
++static u64 container_read_notify_on_release(struct container *cont,
++					    struct cftype *cft)
++{
++	return notify_on_release(cont);
++}
++
++static u64 container_read_releasable(struct container *cont, struct cftype *cft)
++{
++	return test_bit(CONT_RELEASABLE, &cont->flags);
++}
++
++/*
++ * for the common functions, 'private' gives the type of file
++ */
++static struct cftype files[] = {
++	{
++		.name = "tasks",
++		.open = container_tasks_open,
++		.read = container_tasks_read,
++		.write = container_common_file_write,
++		.release = container_tasks_release,
++		.private = FILE_TASKLIST,
++	},
++
++	{
++		.name = "notify_on_release",
++		.read_uint = container_read_notify_on_release,
++		.write = container_common_file_write,
++		.private = FILE_NOTIFY_ON_RELEASE,
++	},
++
++	{
++		.name = "releasable",
++		.read_uint = container_read_releasable,
++		.private = FILE_RELEASABLE,
++	}
++};
++
++static struct cftype cft_release_agent = {
++	.name = "release_agent",
++	.read = container_common_file_read,
++	.write = container_common_file_write,
++	.private = FILE_RELEASE_AGENT,
++};
++
++static int container_populate_dir(struct container *cont)
++{
++	int err;
++	struct container_subsys *ss;
++
++	/* First clear out any existing files */
++	container_clear_directory(cont->dentry);
++
++	err = container_add_files(cont, files, ARRAY_SIZE(files));
++     	if (err < 0)
++		return err;
++
++	if (cont == cont->top_container) {
++		if ((err = container_add_file(cont, &cft_release_agent)) < 0)
++			return err;
++	}
++
++	for_each_subsys(cont->root, ss) {
++		if (ss->populate && (err = ss->populate(ss, cont)) < 0)
++			return err;
++	}
++
++	return 0;
++}
++
++static void init_container_css(struct container_subsys *ss,
++			       struct container *cont)
++{
++	struct container_subsys_state *css = cont->subsys[ss->subsys_id];
++	css->container = cont;
++	atomic_set(&css->refcnt, 0);
++}
++
++/*
++ *	container_create - create a container
++ *	parent:	container that will be parent of the new container.
++ *	name:		name of the new container. Will be strcpy'ed.
++ *	mode:		mode to set on new inode
++ *
++ *	Must be called with the mutex on the parent inode held
++ */
++
++static long container_create(struct container *parent, struct dentry *dentry,
++			     int mode)
++{
++	struct container *cont;
++	struct containerfs_root *root = parent->root;
++	int err = 0;
++	struct container_subsys *ss;
++	struct super_block *sb = root->sb;
++
++	cont = kzalloc(sizeof(*cont), GFP_KERNEL);
++	if (!cont)
++		return -ENOMEM;
++
++	/* Grab a reference on the superblock so the hierarchy doesn't
++	 * get deleted on unmount if there are child containers.  This
++	 * can be done outside container_mutex, since the sb can't
++	 * disappear while someone has an open control file on the
++	 * fs */
++	atomic_inc(&sb->s_active);
++
++	mutex_lock(&container_mutex);
++
++	cont->flags = 0;
++	INIT_LIST_HEAD(&cont->sibling);
++	INIT_LIST_HEAD(&cont->children);
++	INIT_LIST_HEAD(&cont->css_groups);
++	INIT_LIST_HEAD(&cont->release_list);
++
++	cont->parent = parent;
++	cont->root = parent->root;
++	cont->top_container = parent->top_container;
++
++	for_each_subsys(root, ss) {
++		err = ss->create(ss, cont);
++		if (err)
++			goto err_destroy;
++		init_container_css(ss, cont);
++	}
++
++	list_add(&cont->sibling, &cont->parent->children);
++	root->number_of_containers++;
++
++	err = container_create_dir(cont, dentry, mode);
++	if (err < 0)
++		goto err_remove;
++
++	/* The container directory was pre-locked for us */
++	BUG_ON(!mutex_is_locked(&cont->dentry->d_inode->i_mutex));
++
++	err = container_populate_dir(cont);
++	/* If err < 0, we have a half-filled directory - oh well ;) */
++
++	mutex_unlock(&container_mutex);
++	mutex_unlock(&cont->dentry->d_inode->i_mutex);
++
++	return 0;
++
++ err_remove:
++
++	list_del(&cont->sibling);
++	root->number_of_containers--;
++
++ err_destroy:
++
++	for_each_subsys(root, ss) {
++		if (cont->subsys[ss->subsys_id])
++			ss->destroy(ss, cont);
++	}
++
++	mutex_unlock(&container_mutex);
++
++	/* Release the reference count that we took on the superblock */
++	deactivate_super(sb);
++
++	kfree(cont);
++	return err;
++}
++
++static int container_mkdir(struct inode *dir, struct dentry *dentry, int mode)
++{
++	struct container *c_parent = dentry->d_parent->d_fsdata;
++
++	/* the vfs holds inode->i_mutex already */
++	return container_create(c_parent, dentry, mode | S_IFDIR);
++}
++
++static inline int container_has_css_refs(struct container *cont)
++{
++	/* Check the reference count on each subsystem. Since we
++	 * already established that there are no tasks in the
++	 * container, if the css refcount is also 0, then there should
++	 * be no outstanding references, so the subsystem is safe to
++	 * destroy */
++	struct container_subsys *ss;
++	for_each_subsys(cont->root, ss) {
++		struct container_subsys_state *css;
++		css = cont->subsys[ss->subsys_id];
++		if (atomic_read(&css->refcnt)) {
++			return 1;
++		}
++	}
++	return 0;
++}
++
++static int container_rmdir(struct inode *unused_dir, struct dentry *dentry)
++{
++	struct container *cont = dentry->d_fsdata;
++	struct dentry *d;
++	struct container *parent;
++	struct container_subsys *ss;
++	struct super_block *sb;
++	struct containerfs_root *root;
++
++	/* the vfs holds both inode->i_mutex already */
++
++	mutex_lock(&container_mutex);
++	if (atomic_read(&cont->count) != 0) {
++		mutex_unlock(&container_mutex);
++		return -EBUSY;
++	}
++	if (!list_empty(&cont->children)) {
++		mutex_unlock(&container_mutex);
++		return -EBUSY;
++	}
++
++	parent = cont->parent;
++	root = cont->root;
++	sb = root->sb;
++
++	if (container_has_css_refs(cont)) {
++		mutex_unlock(&container_mutex);
++		return -EBUSY;
++	}
++
++	for_each_subsys(root, ss) {
++		if (cont->subsys[ss->subsys_id])
++			ss->destroy(ss, cont);
++	}
++
++	set_bit(CONT_REMOVED, &cont->flags);
++	/* delete my sibling from parent->children */
++	list_del(&cont->sibling);
++	spin_lock(&cont->dentry->d_lock);
++	d = dget(cont->dentry);
++	cont->dentry = NULL;
++	spin_unlock(&d->d_lock);
++
++	container_d_remove_dir(d);
++	dput(d);
++	root->number_of_containers--;
++
++	if (!list_empty(&cont->release_list))
++		list_del(&cont->release_list);
++	set_bit(CONT_RELEASABLE, &parent->flags);
++	check_for_release(parent);
++
++	mutex_unlock(&container_mutex);
++	/* Drop the active superblock reference that we took when we
++	 * created the container */
++	deactivate_super(sb);
++	return 0;
++}
++
++static void container_init_subsys(struct container_subsys *ss)
++{
++	int retval;
++	struct container_subsys_state *css;
++	struct list_head *l;
++	printk(KERN_ERR "Initializing container subsys %s\n", ss->name);
++
++	/* Create the top container state for this subsystem */
++	ss->root = &rootnode;
++	retval = ss->create(ss, dummytop);
++	BUG_ON(retval);
++	BUG_ON(!dummytop->subsys[ss->subsys_id]);
++	init_container_css(ss, dummytop);
++	css = dummytop->subsys[ss->subsys_id];
++
++	/* Update all container groups to contain a subsys
++	 * pointer to this state - since the subsystem is
++	 * newly registered, all tasks and hence all container
++	 * groups are in the subsystem's top container. */
++	write_lock(&css_group_lock);
++	l = &init_css_group.list;
++	do {
++		struct css_group *cg =
++			list_entry(l, struct css_group, list);
++		cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
++		l = l->next;
++	} while (l != &init_css_group.list);
++	write_unlock(&css_group_lock);
++
++ 	/* If this subsystem requested that it be notified with fork
++ 	 * events, we should send it one now for every process in the
++ 	 * system */
++ 	if (ss->fork) {
++ 		struct task_struct *g, *p;
++
++ 		read_lock(&tasklist_lock);
++ 		do_each_thread(g, p) {
++ 			ss->fork(ss, p);
++ 		} while_each_thread(g, p);
++ 		read_unlock(&tasklist_lock);
++ 	}
++
++	need_forkexit_callback |= ss->fork || ss->exit;
++
++	ss->active = 1;
++}
++
++/**
++ * container_init_early - initialize containers at system boot, and
++ * initialize any subsystems that request early init.
++ */
++int __init container_init_early(void)
++{
++	int i;
++	kref_init(&init_css_group.ref);
++	kref_get(&init_css_group.ref);
++	INIT_LIST_HEAD(&init_css_group.list);
++	INIT_LIST_HEAD(&init_css_group.cg_links);
++	INIT_LIST_HEAD(&init_css_group.tasks);
++	css_group_count = 1;
++	init_container_root(&rootnode);
++	init_task.containers = &init_css_group;
++
++	init_css_group_link.cg = &init_css_group;
++	list_add(&init_css_group_link.cont_link_list,
++		 &rootnode.top_container.css_groups);
++	list_add(&init_css_group_link.cg_link_list,
++		 &init_css_group.cg_links);
++
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container_subsys *ss = subsys[i];
++
++		BUG_ON(!ss->name);
++		BUG_ON(strlen(ss->name) > MAX_CONTAINER_TYPE_NAMELEN);
++		BUG_ON(!ss->create);
++		BUG_ON(!ss->destroy);
++		if (ss->subsys_id != i) {
++			printk(KERN_ERR "Subsys %s id == %d\n",
++			       ss->name, ss->subsys_id);
++			BUG();
++		}
++
++		if (ss->early_init)
++			container_init_subsys(ss);
++	}
++	return 0;
++}
++
++/**
++ * container_init - register container filesystem and /proc file, and
++ * initialize any subsystems that didn't request early init.
++ */
++int __init container_init(void)
++{
++	int err;
++	int i;
++	struct proc_dir_entry *entry;
++
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container_subsys *ss = subsys[i];
++		if (!ss->early_init)
++			container_init_subsys(ss);
++	}
++
++	err = register_filesystem(&container_fs_type);
++	if (err < 0)
++		goto out;
++
++	entry = create_proc_entry("containers", 0, NULL);
++	if (entry)
++		entry->proc_fops = &proc_containerstats_operations;
++
++out:
++	return err;
++}
++
++/*
++ * proc_container_show()
++ *  - Print task's container paths into seq_file, one line for each hierarchy
++ *  - Used for /proc/<pid>/container.
++ *  - No need to task_lock(tsk) on this tsk->container reference, as it
++ *    doesn't really matter if tsk->container changes after we read it,
++ *    and we take container_mutex, keeping attach_task() from changing it
++ *    anyway.  No need to check that tsk->container != NULL, thanks to
++ *    the_top_container_hack in container_exit(), which sets an exiting tasks
++ *    container to top_container.
++ */
++
++/* TODO: Use a proper seq_file iterator */
++static int proc_container_show(struct seq_file *m, void *v)
++{
++	struct pid *pid;
++	struct task_struct *tsk;
++	char *buf;
++	int retval;
++	struct containerfs_root *root;
++
++	retval = -ENOMEM;
++	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if (!buf)
++		goto out;
++
++	retval = -ESRCH;
++	pid = m->private;
++	tsk = get_pid_task(pid, PIDTYPE_PID);
++	if (!tsk)
++		goto out_free;
++
++	retval = 0;
++
++	mutex_lock(&container_mutex);
++
++	for_each_root(root) {
++		struct container_subsys *ss;
++		struct container *cont;
++		int subsys_id;
++		int count = 0;
++
++		/* Skip this hierarchy if it has no active subsystems */
++		if (!root->subsys_bits)
++			continue;
++		for_each_subsys(root, ss)
++			seq_printf(m, "%s%s", count++ ? "," : "", ss->name);
++		seq_putc(m, ':');
++		get_first_subsys(&root->top_container, NULL, &subsys_id);
++		cont = task_container(tsk, subsys_id);
++		retval = container_path(cont, buf, PAGE_SIZE);
++		if (retval < 0)
++			goto out_unlock;
++		seq_puts(m, buf);
++		seq_putc(m, '\n');
++	}
++
++out_unlock:
++	mutex_unlock(&container_mutex);
++	put_task_struct(tsk);
++out_free:
++	kfree(buf);
++out:
++	return retval;
++}
++
++static int container_open(struct inode *inode, struct file *file)
++{
++	struct pid *pid = PROC_I(inode)->pid;
++	return single_open(file, proc_container_show, pid);
++}
++
++struct file_operations proc_container_operations = {
++	.open		= container_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= single_release,
++};
++
++/* Display information about each subsystem and each hierarchy */
++static int proc_containerstats_show(struct seq_file *m, void *v)
++{
++	int i;
++	struct containerfs_root *root;
++
++	mutex_lock(&container_mutex);
++	seq_puts(m, "Hierarchies:\n");
++	for_each_root(root) {
++		struct container_subsys *ss;
++		int first = 1;
++		seq_printf(m, "%p: bits=%lx containers=%d (", root,
++			   root->subsys_bits, root->number_of_containers);
++		for_each_subsys(root, ss) {
++			seq_printf(m, "%s%s", first ? "" : ", ", ss->name);
++			first = false;
++		}
++		seq_putc(m, ')');
++		if (root->sb) {
++			seq_printf(m, " s_active=%d",
++				   atomic_read(&root->sb->s_active));
++		}
++		seq_putc(m, '\n');
++	}
++	seq_puts(m, "Subsystems:\n");
++	for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++		struct container_subsys *ss = subsys[i];
++		seq_printf(m, "%d: name=%s hierarchy=%p\n",
++			   i, ss->name, ss->root);
++	}
++	seq_printf(m, "Container groups: %d\n", css_group_count);
++	mutex_unlock(&container_mutex);
++	return 0;
++}
++
++static int containerstats_open(struct inode *inode, struct file *file)
++{
++	return single_open(file, proc_containerstats_show, 0);
++}
++
++static struct file_operations proc_containerstats_operations = {
++	.open = containerstats_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = single_release,
++};
++
++/**
++ * container_fork - attach newly forked task to its parents container.
++ * @tsk: pointer to task_struct of forking parent process.
++ *
++ * Description: A task inherits its parent's container at fork().
++ *
++ * A pointer to the shared css_group was automatically copied in
++ * fork.c by dup_task_struct().  However, we ignore that copy, since
++ * it was not made under the protection of RCU or container_mutex, so
++ * might no longer be a valid container pointer.  attach_task() might
++ * have already changed current->containers, allowing the previously
++ * referenced container group to be removed and freed.
++ *
++ * At the point that container_fork() is called, 'current' is the parent
++ * task, and the passed argument 'child' points to the child task.
++ */
++void container_fork(struct task_struct *child)
++{
++	write_lock(&css_group_lock);
++	child->containers = current->containers;
++	get_css_group(child->containers);
++	list_add(&child->cg_list, &child->containers->tasks);
++	write_unlock(&css_group_lock);
++}
++
++/**
++ * container_fork_callbacks - called on a new task very soon before
++ * adding it to the tasklist. No need to take any locks since no-one
++ * can be operating on this task
++ */
++void container_fork_callbacks(struct task_struct *child)
++{
++	if (need_forkexit_callback) {
++		int i;
++		for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++			struct container_subsys *ss = subsys[i];
++			if (ss->fork)
++				ss->fork(ss, child);
++		}
++	}
++}
++
++/**
++ * container_exit - detach container from exiting task
++ * @tsk: pointer to task_struct of exiting process
++ *
++ * Description: Detach container from @tsk and release it.
++ *
++ * Note that containers marked notify_on_release force every task in
++ * them to take the global container_mutex mutex when exiting.
++ * This could impact scaling on very large systems.  Be reluctant to
++ * use notify_on_release containers where very high task exit scaling
++ * is required on large systems.
++ *
++ * the_top_container_hack:
++ *
++ *    Set the exiting tasks container to the root container (top_container).
++ *
++ *    We call container_exit() while the task is still competent to
++ *    handle notify_on_release(), then leave the task attached to the
++ *    root container in each hierarchy for the remainder of its exit.
++ *
++ *    To do this properly, we would increment the reference count on
++ *    top_container, and near the very end of the kernel/exit.c do_exit()
++ *    code we would add a second container function call, to drop that
++ *    reference.  This would just create an unnecessary hot spot on
++ *    the top_container reference count, to no avail.
++ *
++ *    Normally, holding a reference to a container without bumping its
++ *    count is unsafe.   The container could go away, or someone could
++ *    attach us to a different container, decrementing the count on
++ *    the first container that we never incremented.  But in this case,
++ *    top_container isn't going away, and either task has PF_EXITING set,
++ *    which wards off any attach_task() attempts, or task is a failed
++ *    fork, never visible to attach_task.
++ *
++ */
++void container_exit(struct task_struct *tsk, int run_callbacks)
++{
++	int i;
++	struct css_group *cg = NULL;
++
++	if (run_callbacks && need_forkexit_callback) {
++		for (i = 0; i < CONTAINER_SUBSYS_COUNT; i++) {
++			struct container_subsys *ss = subsys[i];
++			if (ss->exit)
++				ss->exit(ss, tsk);
++		}
++	}
++
++	/* Reassign the task to the init_css_group. */
++	task_lock(tsk);
++	write_lock(&css_group_lock);
++	list_del(&tsk->cg_list);
++	write_unlock(&css_group_lock);
++
++	cg = tsk->containers;
++	tsk->containers = &init_css_group;
++	task_unlock(tsk);
++	if (cg)
++		put_css_group_taskexit(cg);
++}
++
++/**
++ * container_clone - duplicate the current container in the hierarchy
++ * that the given subsystem is attached to, and move this task into
++ * the new child
++ */
++int container_clone(struct task_struct *tsk, struct container_subsys *subsys)
++{
++	struct dentry *dentry;
++	int ret = 0;
++	char nodename[MAX_CONTAINER_TYPE_NAMELEN];
++	struct container *parent, *child;
++	struct inode *inode;
++	struct css_group *cg;
++	struct containerfs_root *root;
++	struct container_subsys *ss;
++
++	/* We shouldn't be called by an unregistered subsystem */
++	BUG_ON(!subsys->active);
++
++	/* First figure out what hierarchy and container we're dealing
++	 * with, and pin them so we can drop container_mutex */
++	mutex_lock(&container_mutex);
++ again:
++	root = subsys->root;
++	if (root == &rootnode) {
++		printk(KERN_INFO
++		       "Not cloning container for unused subsystem %s\n",
++		       subsys->name);
++		mutex_unlock(&container_mutex);
++		return 0;
++	}
++	cg = tsk->containers;
++	parent = task_container(tsk, subsys->subsys_id);
++
++	snprintf(nodename, MAX_CONTAINER_TYPE_NAMELEN, "node_%d", tsk->pid);
++
++	/* Pin the hierarchy */
++	atomic_inc(&parent->root->sb->s_active);
++
++	/* Keep the container alive */
++	get_css_group(cg);
++	mutex_unlock(&container_mutex);
++
++	/* Now do the VFS work to create a container */
++	inode = parent->dentry->d_inode;
++
++	/* Hold the parent directory mutex across this operation to
++	 * stop anyone else deleting the new container */
++	mutex_lock(&inode->i_mutex);
++	dentry = container_get_dentry(parent->dentry, nodename);
++	if (IS_ERR(dentry)) {
++		printk(KERN_INFO
++		       "Couldn't allocate dentry for %s: %ld\n", nodename,
++		       PTR_ERR(dentry));
++		ret = PTR_ERR(dentry);
++		goto out_release;
++	}
++
++	/* Create the container directory, which also creates the container */
++	ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755);
++	child = __d_cont(dentry);
++	dput(dentry);
++	if (ret) {
++		printk(KERN_INFO
++		       "Failed to create container %s: %d\n", nodename,
++		       ret);
++		goto out_release;
++	}
++
++	if (!child) {
++		printk(KERN_INFO
++		       "Couldn't find new container %s\n", nodename);
++		ret = -ENOMEM;
++		goto out_release;
++	}
++
++	/* The container now exists. Retake container_mutex and check
++	 * that we're still in the same state that we thought we
++	 * were. */
++	mutex_lock(&container_mutex);
++	if ((root != subsys->root) ||
++	    (parent != task_container(tsk, subsys->subsys_id))) {
++		/* Aargh, we raced ... */
++		mutex_unlock(&inode->i_mutex);
++		put_css_group(cg);
++
++		deactivate_super(parent->root->sb);
++		/* The container is still accessible in the VFS, but
++		 * we're not going to try to rmdir() it at this
++		 * point. */
++		printk(KERN_INFO
++		       "Race in container_clone() - leaking container %s\n",
++		       nodename);
++		goto again;
++	}
++
++	/* do any required auto-setup */
++	for_each_subsys(root, ss) {
++		if (ss->post_clone)
++			ss->post_clone(ss, child);
++	}
++
++	/* All seems fine. Finish by moving the task into the new container */
++	ret = attach_task(child, tsk);
++	mutex_unlock(&container_mutex);
++
++ out_release:
++	mutex_unlock(&inode->i_mutex);
++
++	mutex_lock(&container_mutex);
++	put_css_group(cg);
++	mutex_unlock(&container_mutex);
++	deactivate_super(parent->root->sb);
++	return ret;
++}
++
++/* See if "cont" is a descendant of the current task's container in
++ * the appropriate hierarchy */
++
++int container_is_descendant(const struct container *cont)
++{
++	int ret;
++	struct container *target;
++	int subsys_id;
++
++ 	if (cont == dummytop)
++ 		return 1;
++	get_first_subsys(cont, NULL, &subsys_id);
++	target = task_container(current, subsys_id);
++	while (cont != target && cont!= cont->top_container) {
++		cont = cont->parent;
++	}
++	ret = (cont == target);
++	return ret;
++}
++
++static void check_for_release(struct container *cont)
++{
++	BUG_ON(!mutex_is_locked(&container_mutex));
++	if (container_is_releasable(cont) && !atomic_read(&cont->count)
++	    && list_empty(&cont->children) && !container_has_css_refs(cont)) {
++		/* Container is currently removeable. If it's not
++		 * already queued for a userspace notification, queue
++		 * it now */
++		if (list_empty(&cont->release_list)) {
++			list_add(&cont->release_list, &release_list);
++			schedule_work(&release_agent_work);
++		}
++	}
++}
++
++void css_put(struct container_subsys_state *css)
++{
++	struct container *cont = css->container;
++	if (notify_on_release(cont)) {
++		mutex_lock(&container_mutex);
++		set_bit(CONT_RELEASABLE, &cont->flags);
++		if (atomic_dec_and_test(&css->refcnt)) {
++			check_for_release(cont);
++		}
++		mutex_unlock(&container_mutex);
++	} else {
++		atomic_dec(&css->refcnt);
++	}
++}
++
++void container_set_release_agent_path(struct container_subsys *ss,
++				      const char *path)
++{
++	mutex_lock(&container_mutex);
++	strcpy(ss->root->release_agent_path, path);
++	mutex_unlock(&container_mutex);
++}
++
++/*
++ * Notify userspace when a container is released, by running the
++ * configured release agent with the name of the container (path
++ * relative to the root of container file system) as the argument.
++ *
++ * Most likely, this user command will try to rmdir this container.
++ *
++ * This races with the possibility that some other task will be
++ * attached to this container before it is removed, or that some other
++ * user task will 'mkdir' a child container of this container.  That's ok.
++ * The presumed 'rmdir' will fail quietly if this container is no longer
++ * unused, and this container will be reprieved from its death sentence,
++ * to continue to serve a useful existence.  Next time it's released,
++ * we will get notified again, if it still has 'notify_on_release' set.
++ *
++ * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
++ * means only wait until the task is successfully execve()'d.  The
++ * separate release agent task is forked by call_usermodehelper(),
++ * then control in this thread returns here, without waiting for the
++ * release agent task.  We don't bother to wait because the caller of
++ * this routine has no use for the exit status of the release agent
++ * task, so no sense holding our caller up for that.
++ *
++ */
++
++static void container_release_agent(struct work_struct *work)
++{
++	BUG_ON(work != &release_agent_work);
++	mutex_lock(&container_mutex);
++	while (!list_empty(&release_list)) {
++		char *argv[3], *envp[3];
++		int i;
++		char *pathbuf;
++		struct container *cont = list_entry(release_list.next,
++						    struct container,
++						    release_list);
++		list_del_init(&cont->release_list);
++
++		pathbuf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++		if (!pathbuf)
++			continue;
++
++		if (container_path(cont, pathbuf, PAGE_SIZE) < 0) {
++			kfree(pathbuf);
++			continue;
++		}
++
++		i = 0;
++		argv[i++] = cont->root->release_agent_path;
++		argv[i++] = (char *)pathbuf;
++		argv[i] = NULL;
++
++		i = 0;
++		/* minimal command environment */
++		envp[i++] = "HOME=/";
++		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
++		envp[i] = NULL;
++
++		/* Drop the lock while we invoke the usermode helper,
++		 * since the exec could involve hitting disk and hence
++		 * be a slow process */
++		mutex_unlock(&container_mutex);
++		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
++		kfree(pathbuf);
++		mutex_lock(&container_mutex);
++	}
++	mutex_unlock(&container_mutex);
++}
+diff -Nurb linux-2.6.22-570/kernel/container_debug.c linux-2.6.22-try2/kernel/container_debug.c
+--- linux-2.6.22-570/kernel/container_debug.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/container_debug.c	2007-12-19 15:29:25.000000000 -0500
+@@ -0,0 +1,89 @@
++/*
++ * kernel/ccontainer_debug.c - Example container subsystem that
++ * exposes debug info
++ *
++ * Copyright (C) Google Inc, 2007
++ *
++ * Developed by Paul Menage (menage@google.com)
++ *
++ */
++
++#include <linux/container.h>
++#include <linux/fs.h>
++
++static int debug_create(struct container_subsys *ss, struct container *cont)
++{
++	struct container_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL);
++	if (!css)
++		return -ENOMEM;
++	cont->subsys[debug_subsys_id] = css;
++	return 0;
++}
++
++static void debug_destroy(struct container_subsys *ss, struct container *cont)
++{
++	kfree(cont->subsys[debug_subsys_id]);
++}
++
++static u64 container_refcount_read(struct container *cont, struct cftype *cft)
++{
++	return atomic_read(&cont->count);
++}
++
++static u64 taskcount_read(struct container *cont, struct cftype *cft)
++{
++	u64 count;
++	container_lock();
++	count = container_task_count(cont);
++	container_unlock();
++	return count;
++}
++
++static u64 current_css_group_read(struct container *cont, struct cftype *cft)
++{
++	return (u64) current->containers;
++}
++
++static u64 current_css_group_refcount_read(struct container *cont,
++					   struct cftype *cft)
++{
++	u64 count;
++	rcu_read_lock();
++	count = atomic_read(&current->containers->ref.refcount);
++	rcu_read_unlock();
++	return count;
++}
++
++static struct cftype files[] =  {
++	{
++		.name = "debug.container_refcount",
++		.read_uint = container_refcount_read,
++	},
++	{
++		.name = "debug.taskcount",
++		.read_uint = taskcount_read,
++	},
++
++	{
++		.name = "debug.current_css_group",
++		.read_uint = current_css_group_read,
++	},
++
++	{
++		.name = "debug.current_css_group_refcount",
++		.read_uint = current_css_group_refcount_read,
++	},
++};
++
++static int debug_populate(struct container_subsys *ss, struct container *cont)
++{
++	return container_add_files(cont, files, ARRAY_SIZE(files));
++}
++
++struct container_subsys debug_subsys = {
++	.name = "debug",
++	.create = debug_create,
++	.destroy = debug_destroy,
++	.populate = debug_populate,
++	.subsys_id = debug_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/cpu_acct.c linux-2.6.22-try2/kernel/cpu_acct.c
+--- linux-2.6.22-570/kernel/cpu_acct.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/cpu_acct.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,185 @@
++/*
++ * kernel/cpu_acct.c - CPU accounting container subsystem
++ *
++ * Copyright (C) Google Inc, 2006
++ *
++ * Developed by Paul Menage (menage@google.com) and Balbir Singh
++ * (balbir@in.ibm.com)
++ *
++ */
++
++/*
++ * Example container subsystem for reporting total CPU usage of tasks in a
++ * container, along with percentage load over a time interval
++ */
++
++#include <linux/module.h>
++#include <linux/container.h>
++#include <linux/fs.h>
++#include <asm/div64.h>
++
++struct cpuacct {
++	struct container_subsys_state css;
++	spinlock_t lock;
++	/* total time used by this class */
++	cputime64_t time;
++
++	/* time when next load calculation occurs */
++	u64 next_interval_check;
++
++	/* time used in current period */
++	cputime64_t current_interval_time;
++
++	/* time used in last period */
++	cputime64_t last_interval_time;
++};
++
++struct container_subsys cpuacct_subsys;
++
++static inline struct cpuacct *container_ca(struct container *cont)
++{
++	return container_of(container_subsys_state(cont, cpuacct_subsys_id),
++			    struct cpuacct, css);
++}
++
++static inline struct cpuacct *task_ca(struct task_struct *task)
++{
++	return container_of(task_subsys_state(task, cpuacct_subsys_id),
++			    struct cpuacct, css);
++}
++
++#define INTERVAL (HZ * 10)
++
++static inline u64 next_interval_boundary(u64 now) {
++	/* calculate the next interval boundary beyond the
++	 * current time */
++	do_div(now, INTERVAL);
++	return (now + 1) * INTERVAL;
++}
++
++static int cpuacct_create(struct container_subsys *ss, struct container *cont)
++{
++	struct cpuacct *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
++	if (!ca)
++		return -ENOMEM;
++	spin_lock_init(&ca->lock);
++	ca->next_interval_check = next_interval_boundary(get_jiffies_64());
++	cont->subsys[cpuacct_subsys_id] = &ca->css;
++	return 0;
++}
++
++static void cpuacct_destroy(struct container_subsys *ss,
++			    struct container *cont)
++{
++	kfree(container_ca(cont));
++}
++
++/* Lazily update the load calculation if necessary. Called with ca locked */
++static void cpuusage_update(struct cpuacct *ca)
++{
++	u64 now = get_jiffies_64();
++	/* If we're not due for an update, return */
++	if (ca->next_interval_check > now)
++		return;
++
++	if (ca->next_interval_check <= (now - INTERVAL)) {
++		/* If it's been more than an interval since the last
++		 * check, then catch up - the last interval must have
++		 * been zero load */
++		ca->last_interval_time = 0;
++		ca->next_interval_check = next_interval_boundary(now);
++	} else {
++		/* If a steal takes the last interval time negative,
++		 * then we just ignore it */
++		if ((s64)ca->current_interval_time > 0) {
++			ca->last_interval_time = ca->current_interval_time;
++		} else {
++			ca->last_interval_time = 0;
++		}
++		ca->next_interval_check += INTERVAL;
++	}
++	ca->current_interval_time = 0;
++}
++
++static u64 cpuusage_read(struct container *cont,
++			 struct cftype *cft)
++{
++	struct cpuacct *ca = container_ca(cont);
++	u64 time;
++
++	spin_lock_irq(&ca->lock);
++	cpuusage_update(ca);
++	time = cputime64_to_jiffies64(ca->time);
++	spin_unlock_irq(&ca->lock);
++
++	/* Convert 64-bit jiffies to seconds */
++	time *= 1000;
++	do_div(time, HZ);
++	return time;
++}
++
++static u64 load_read(struct container *cont,
++		     struct cftype *cft)
++{
++	struct cpuacct *ca = container_ca(cont);
++	u64 time;
++
++	/* Find the time used in the previous interval */
++	spin_lock_irq(&ca->lock);
++	cpuusage_update(ca);
++	time = cputime64_to_jiffies64(ca->last_interval_time);
++	spin_unlock_irq(&ca->lock);
++
++	/* Convert time to a percentage, to give the load in the
++	 * previous period */
++	time *= 100;
++	do_div(time, INTERVAL);
++
++	return time;
++}
++
++static struct cftype files[] = {
++	{
++		.name = "cpuacct.usage",
++		.read_uint = cpuusage_read,
++	},
++	{
++		.name = "cpuacct.load",
++		.read_uint = load_read,
++	}
++};
++
++static int cpuacct_populate(struct container_subsys *ss,
++			    struct container *cont)
++{
++	return container_add_files(cont, files, ARRAY_SIZE(files));
++}
++
++void cpuacct_charge(struct task_struct *task, cputime_t cputime)
++{
++
++	struct cpuacct *ca;
++	unsigned long flags;
++
++	if (!cpuacct_subsys.active)
++		return;
++	rcu_read_lock();
++	ca = task_ca(task);
++	if (ca) {
++		spin_lock_irqsave(&ca->lock, flags);
++		cpuusage_update(ca);
++		ca->time = cputime64_add(ca->time, cputime);
++		ca->current_interval_time =
++			cputime64_add(ca->current_interval_time, cputime);
++		spin_unlock_irqrestore(&ca->lock, flags);
++	}
++	rcu_read_unlock();
++}
++
++struct container_subsys cpuacct_subsys = {
++	.name = "cpuacct",
++	.create = cpuacct_create,
++	.destroy = cpuacct_destroy,
++	.populate = cpuacct_populate,
++	.subsys_id = cpuacct_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/cpuset.c linux-2.6.22-try2/kernel/cpuset.c
+--- linux-2.6.22-570/kernel/cpuset.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/cpuset.c	2007-12-19 15:29:25.000000000 -0500
+@@ -5,6 +5,7 @@
+  *
+  *  Copyright (C) 2003 BULL SA.
+  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
++ *  Copyright (C) 2006 Google, Inc
+  *
+  *  Portions derived from Patrick Mochel's sysfs code.
+  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
+@@ -12,6 +13,7 @@
+  *  2003-10-10 Written by Simon Derr.
+  *  2003-10-22 Updates by Stephen Hemminger.
+  *  2004 May-July Rework by Paul Jackson.
++ *  2006 Rework by Paul Menage to use generic containers
+  *
+  *  This file is subject to the terms and conditions of the GNU General Public
+  *  License.  See the file COPYING in the main directory of the Linux
+@@ -53,8 +55,6 @@
+ #include <asm/atomic.h>
+ #include <linux/mutex.h>
+ 
+-#define CPUSET_SUPER_MAGIC		0x27e0eb
+-
+ /*
+  * Tracks how many cpusets are currently defined in system.
+  * When there is only one cpuset (the root cpuset) we can
+@@ -62,6 +62,10 @@
+  */
+ int number_of_cpusets __read_mostly;
+ 
++/* Retrieve the cpuset from a container */
++struct container_subsys cpuset_subsys;
++struct cpuset;
++
+ /* See "Frequency meter" comments, below. */
+ 
+ struct fmeter {
+@@ -72,24 +76,13 @@
+ };
+ 
+ struct cpuset {
++	struct container_subsys_state css;
++
+ 	unsigned long flags;		/* "unsigned long" so bitops work */
+ 	cpumask_t cpus_allowed;		/* CPUs allowed to tasks in cpuset */
+ 	nodemask_t mems_allowed;	/* Memory Nodes allowed to tasks */
+ 
+-	/*
+-	 * Count is atomic so can incr (fork) or decr (exit) without a lock.
+-	 */
+-	atomic_t count;			/* count tasks using this cpuset */
+-
+-	/*
+-	 * We link our 'sibling' struct into our parents 'children'.
+-	 * Our children link their 'sibling' into our 'children'.
+-	 */
+-	struct list_head sibling;	/* my parents children */
+-	struct list_head children;	/* my children */
+-
+ 	struct cpuset *parent;		/* my parent */
+-	struct dentry *dentry;		/* cpuset fs entry */
+ 
+ 	/*
+ 	 * Copy of global cpuset_mems_generation as of the most
+@@ -100,13 +93,32 @@
+ 	struct fmeter fmeter;		/* memory_pressure filter */
+ };
+ 
++/* Update the cpuset for a container */
++static inline void set_container_cs(struct container *cont, struct cpuset *cs)
++{
++	cont->subsys[cpuset_subsys_id] = &cs->css;
++}
++
++/* Retrieve the cpuset for a container */
++static inline struct cpuset *container_cs(struct container *cont)
++{
++	return container_of(container_subsys_state(cont, cpuset_subsys_id),
++			    struct cpuset, css);
++}
++
++/* Retrieve the cpuset for a task */
++static inline struct cpuset *task_cs(struct task_struct *task)
++{
++	return container_of(task_subsys_state(task, cpuset_subsys_id),
++			    struct cpuset, css);
++}
++
++
+ /* bits in struct cpuset flags field */
+ typedef enum {
+ 	CS_CPU_EXCLUSIVE,
+ 	CS_MEM_EXCLUSIVE,
+ 	CS_MEMORY_MIGRATE,
+-	CS_REMOVED,
+-	CS_NOTIFY_ON_RELEASE,
+ 	CS_SPREAD_PAGE,
+ 	CS_SPREAD_SLAB,
+ } cpuset_flagbits_t;
+@@ -122,16 +134,6 @@
+ 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
+ }
+ 
+-static inline int is_removed(const struct cpuset *cs)
+-{
+-	return test_bit(CS_REMOVED, &cs->flags);
+-}
+-
+-static inline int notify_on_release(const struct cpuset *cs)
+-{
+-	return test_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+-}
+-
+ static inline int is_memory_migrate(const struct cpuset *cs)
+ {
+ 	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
+@@ -172,14 +174,8 @@
+ 	.flags = ((1 << CS_CPU_EXCLUSIVE) | (1 << CS_MEM_EXCLUSIVE)),
+ 	.cpus_allowed = CPU_MASK_ALL,
+ 	.mems_allowed = NODE_MASK_ALL,
+-	.count = ATOMIC_INIT(0),
+-	.sibling = LIST_HEAD_INIT(top_cpuset.sibling),
+-	.children = LIST_HEAD_INIT(top_cpuset.children),
+ };
+ 
+-static struct vfsmount *cpuset_mount;
+-static struct super_block *cpuset_sb;
+-
+ /*
+  * We have two global cpuset mutexes below.  They can nest.
+  * It is ok to first take manage_mutex, then nest callback_mutex.  We also
+@@ -263,297 +259,36 @@
+  * the routine cpuset_update_task_memory_state().
+  */
+ 
+-static DEFINE_MUTEX(manage_mutex);
+ static DEFINE_MUTEX(callback_mutex);
+ 
+-/*
+- * A couple of forward declarations required, due to cyclic reference loop:
+- *  cpuset_mkdir -> cpuset_create -> cpuset_populate_dir -> cpuset_add_file
+- *  -> cpuset_create_file -> cpuset_dir_inode_operations -> cpuset_mkdir.
+- */
+-
+-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode);
+-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry);
+-
+-static struct backing_dev_info cpuset_backing_dev_info = {
+-	.ra_pages = 0,		/* No readahead */
+-	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+-};
+-
+-static struct inode *cpuset_new_inode(mode_t mode)
+-{
+-	struct inode *inode = new_inode(cpuset_sb);
+-
+-	if (inode) {
+-		inode->i_mode = mode;
+-		inode->i_uid = current->fsuid;
+-		inode->i_gid = current->fsgid;
+-		inode->i_blocks = 0;
+-		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+-		inode->i_mapping->backing_dev_info = &cpuset_backing_dev_info;
+-	}
+-	return inode;
+-}
+-
+-static void cpuset_diput(struct dentry *dentry, struct inode *inode)
+-{
+-	/* is dentry a directory ? if so, kfree() associated cpuset */
+-	if (S_ISDIR(inode->i_mode)) {
+-		struct cpuset *cs = dentry->d_fsdata;
+-		BUG_ON(!(is_removed(cs)));
+-		kfree(cs);
+-	}
+-	iput(inode);
+-}
+-
+-static struct dentry_operations cpuset_dops = {
+-	.d_iput = cpuset_diput,
+-};
+-
+-static struct dentry *cpuset_get_dentry(struct dentry *parent, const char *name)
+-{
+-	struct dentry *d = lookup_one_len(name, parent, strlen(name));
+-	if (!IS_ERR(d))
+-		d->d_op = &cpuset_dops;
+-	return d;
+-}
+-
+-static void remove_dir(struct dentry *d)
+-{
+-	struct dentry *parent = dget(d->d_parent);
+-
+-	d_delete(d);
+-	simple_rmdir(parent->d_inode, d);
+-	dput(parent);
+-}
+-
+-/*
+- * NOTE : the dentry must have been dget()'ed
+- */
+-static void cpuset_d_remove_dir(struct dentry *dentry)
+-{
+-	struct list_head *node;
+-
+-	spin_lock(&dcache_lock);
+-	node = dentry->d_subdirs.next;
+-	while (node != &dentry->d_subdirs) {
+-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+-		list_del_init(node);
+-		if (d->d_inode) {
+-			d = dget_locked(d);
+-			spin_unlock(&dcache_lock);
+-			d_delete(d);
+-			simple_unlink(dentry->d_inode, d);
+-			dput(d);
+-			spin_lock(&dcache_lock);
+-		}
+-		node = dentry->d_subdirs.next;
+-	}
+-	list_del_init(&dentry->d_u.d_child);
+-	spin_unlock(&dcache_lock);
+-	remove_dir(dentry);
+-}
+-
+-static struct super_operations cpuset_ops = {
+-	.statfs = simple_statfs,
+-	.drop_inode = generic_delete_inode,
+-};
+-
+-static int cpuset_fill_super(struct super_block *sb, void *unused_data,
+-							int unused_silent)
+-{
+-	struct inode *inode;
+-	struct dentry *root;
+-
+-	sb->s_blocksize = PAGE_CACHE_SIZE;
+-	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+-	sb->s_magic = CPUSET_SUPER_MAGIC;
+-	sb->s_op = &cpuset_ops;
+-	cpuset_sb = sb;
+-
+-	inode = cpuset_new_inode(S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR);
+-	if (inode) {
+-		inode->i_op = &simple_dir_inode_operations;
+-		inode->i_fop = &simple_dir_operations;
+-		/* directories start off with i_nlink == 2 (for "." entry) */
+-		inc_nlink(inode);
+-	} else {
+-		return -ENOMEM;
+-	}
+-
+-	root = d_alloc_root(inode);
+-	if (!root) {
+-		iput(inode);
+-		return -ENOMEM;
+-	}
+-	sb->s_root = root;
+-	return 0;
+-}
+-
++/* This is ugly, but preserves the userspace API for existing cpuset
++ * users. If someone tries to mount the "cpuset" filesystem, we
++ * silently switch it to mount "container" instead */
+ static int cpuset_get_sb(struct file_system_type *fs_type,
+ 			 int flags, const char *unused_dev_name,
+ 			 void *data, struct vfsmount *mnt)
+ {
+-	return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt);
++	struct file_system_type *container_fs = get_fs_type("container");
++	int ret = -ENODEV;
++	if (container_fs) {
++		ret = container_fs->get_sb(container_fs, flags,
++					   unused_dev_name,
++					   "cpuset", mnt);
++		put_filesystem(container_fs);
++		if (!ret) {
++			container_set_release_agent_path(
++				&cpuset_subsys,
++				"/sbin/cpuset_release_agent");
++		}
++	}
++	return ret;
+ }
+ 
+ static struct file_system_type cpuset_fs_type = {
+ 	.name = "cpuset",
+ 	.get_sb = cpuset_get_sb,
+-	.kill_sb = kill_litter_super,
+-};
+-
+-/* struct cftype:
+- *
+- * The files in the cpuset filesystem mostly have a very simple read/write
+- * handling, some common function will take care of it. Nevertheless some cases
+- * (read tasks) are special and therefore I define this structure for every
+- * kind of file.
+- *
+- *
+- * When reading/writing to a file:
+- *	- the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+- *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
+- */
+-
+-struct cftype {
+-	char *name;
+-	int private;
+-	int (*open) (struct inode *inode, struct file *file);
+-	ssize_t (*read) (struct file *file, char __user *buf, size_t nbytes,
+-							loff_t *ppos);
+-	int (*write) (struct file *file, const char __user *buf, size_t nbytes,
+-							loff_t *ppos);
+-	int (*release) (struct inode *inode, struct file *file);
+ };
+ 
+-static inline struct cpuset *__d_cs(struct dentry *dentry)
+-{
+-	return dentry->d_fsdata;
+-}
+-
+-static inline struct cftype *__d_cft(struct dentry *dentry)
+-{
+-	return dentry->d_fsdata;
+-}
+-
+-/*
+- * Call with manage_mutex held.  Writes path of cpuset into buf.
+- * Returns 0 on success, -errno on error.
+- */
+-
+-static int cpuset_path(const struct cpuset *cs, char *buf, int buflen)
+-{
+-	char *start;
+-
+-	start = buf + buflen;
+-
+-	*--start = '\0';
+-	for (;;) {
+-		int len = cs->dentry->d_name.len;
+-		if ((start -= len) < buf)
+-			return -ENAMETOOLONG;
+-		memcpy(start, cs->dentry->d_name.name, len);
+-		cs = cs->parent;
+-		if (!cs)
+-			break;
+-		if (!cs->parent)
+-			continue;
+-		if (--start < buf)
+-			return -ENAMETOOLONG;
+-		*start = '/';
+-	}
+-	memmove(buf, start, buf + buflen - start);
+-	return 0;
+-}
+-
+-/*
+- * Notify userspace when a cpuset is released, by running
+- * /sbin/cpuset_release_agent with the name of the cpuset (path
+- * relative to the root of cpuset file system) as the argument.
+- *
+- * Most likely, this user command will try to rmdir this cpuset.
+- *
+- * This races with the possibility that some other task will be
+- * attached to this cpuset before it is removed, or that some other
+- * user task will 'mkdir' a child cpuset of this cpuset.  That's ok.
+- * The presumed 'rmdir' will fail quietly if this cpuset is no longer
+- * unused, and this cpuset will be reprieved from its death sentence,
+- * to continue to serve a useful existence.  Next time it's released,
+- * we will get notified again, if it still has 'notify_on_release' set.
+- *
+- * The final arg to call_usermodehelper() is 0, which means don't
+- * wait.  The separate /sbin/cpuset_release_agent task is forked by
+- * call_usermodehelper(), then control in this thread returns here,
+- * without waiting for the release agent task.  We don't bother to
+- * wait because the caller of this routine has no use for the exit
+- * status of the /sbin/cpuset_release_agent task, so no sense holding
+- * our caller up for that.
+- *
+- * When we had only one cpuset mutex, we had to call this
+- * without holding it, to avoid deadlock when call_usermodehelper()
+- * allocated memory.  With two locks, we could now call this while
+- * holding manage_mutex, but we still don't, so as to minimize
+- * the time manage_mutex is held.
+- */
+-
+-static void cpuset_release_agent(const char *pathbuf)
+-{
+-	char *argv[3], *envp[3];
+-	int i;
+-
+-	if (!pathbuf)
+-		return;
+-
+-	i = 0;
+-	argv[i++] = "/sbin/cpuset_release_agent";
+-	argv[i++] = (char *)pathbuf;
+-	argv[i] = NULL;
+-
+-	i = 0;
+-	/* minimal command environment */
+-	envp[i++] = "HOME=/";
+-	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+-	envp[i] = NULL;
+-
+-	call_usermodehelper(argv[0], argv, envp, 0);
+-	kfree(pathbuf);
+-}
+-
+-/*
+- * Either cs->count of using tasks transitioned to zero, or the
+- * cs->children list of child cpusets just became empty.  If this
+- * cs is notify_on_release() and now both the user count is zero and
+- * the list of children is empty, prepare cpuset path in a kmalloc'd
+- * buffer, to be returned via ppathbuf, so that the caller can invoke
+- * cpuset_release_agent() with it later on, once manage_mutex is dropped.
+- * Call here with manage_mutex held.
+- *
+- * This check_for_release() routine is responsible for kmalloc'ing
+- * pathbuf.  The above cpuset_release_agent() is responsible for
+- * kfree'ing pathbuf.  The caller of these routines is responsible
+- * for providing a pathbuf pointer, initialized to NULL, then
+- * calling check_for_release() with manage_mutex held and the address
+- * of the pathbuf pointer, then dropping manage_mutex, then calling
+- * cpuset_release_agent() with pathbuf, as set by check_for_release().
+- */
+-
+-static void check_for_release(struct cpuset *cs, char **ppathbuf)
+-{
+-	if (notify_on_release(cs) && atomic_read(&cs->count) == 0 &&
+-	    list_empty(&cs->children)) {
+-		char *buf;
+-
+-		buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+-		if (!buf)
+-			return;
+-		if (cpuset_path(cs, buf, PAGE_SIZE) < 0)
+-			kfree(buf);
+-		else
+-			*ppathbuf = buf;
+-	}
+-}
+-
+ /*
+  * Return in *pmask the portion of a cpusets's cpus_allowed that
+  * are online.  If none are online, walk up the cpuset hierarchy
+@@ -651,20 +386,19 @@
+ 	struct task_struct *tsk = current;
+ 	struct cpuset *cs;
+ 
+-	if (tsk->cpuset == &top_cpuset) {
++	if (task_cs(tsk) == &top_cpuset) {
+ 		/* Don't need rcu for top_cpuset.  It's never freed. */
+ 		my_cpusets_mem_gen = top_cpuset.mems_generation;
+ 	} else {
+ 		rcu_read_lock();
+-		cs = rcu_dereference(tsk->cpuset);
+-		my_cpusets_mem_gen = cs->mems_generation;
++		my_cpusets_mem_gen = task_cs(current)->mems_generation;
+ 		rcu_read_unlock();
+ 	}
+ 
+ 	if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) {
+ 		mutex_lock(&callback_mutex);
+ 		task_lock(tsk);
+-		cs = tsk->cpuset;	/* Maybe changed when task not locked */
++		cs = task_cs(tsk); /* Maybe changed when task not locked */
+ 		guarantee_online_mems(cs, &tsk->mems_allowed);
+ 		tsk->cpuset_mems_generation = cs->mems_generation;
+ 		if (is_spread_page(cs))
+@@ -719,11 +453,12 @@
+ 
+ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
+ {
++	struct container *cont;
+ 	struct cpuset *c, *par;
+ 
+ 	/* Each of our child cpusets must be a subset of us */
+-	list_for_each_entry(c, &cur->children, sibling) {
+-		if (!is_cpuset_subset(c, trial))
++	list_for_each_entry(cont, &cur->css.container->children, sibling) {
++		if (!is_cpuset_subset(container_cs(cont), trial))
+ 			return -EBUSY;
+ 	}
+ 
+@@ -738,7 +473,8 @@
+ 		return -EACCES;
+ 
+ 	/* If either I or some sibling (!= me) is exclusive, we can't overlap */
+-	list_for_each_entry(c, &par->children, sibling) {
++	list_for_each_entry(cont, &par->css.container->children, sibling) {
++		c = container_cs(cont);
+ 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
+ 		    c != cur &&
+ 		    cpus_intersects(trial->cpus_allowed, c->cpus_allowed))
+@@ -753,68 +489,13 @@
+ }
+ 
+ /*
+- * For a given cpuset cur, partition the system as follows
+- * a. All cpus in the parent cpuset's cpus_allowed that are not part of any
+- *    exclusive child cpusets
+- * b. All cpus in the current cpuset's cpus_allowed that are not part of any
+- *    exclusive child cpusets
+- * Build these two partitions by calling partition_sched_domains
+- *
+- * Call with manage_mutex held.  May nest a call to the
+- * lock_cpu_hotplug()/unlock_cpu_hotplug() pair.
+- * Must not be called holding callback_mutex, because we must
+- * not call lock_cpu_hotplug() while holding callback_mutex.
+- */
+-
+-static void update_cpu_domains(struct cpuset *cur)
+-{
+-	struct cpuset *c, *par = cur->parent;
+-	cpumask_t pspan, cspan;
+-
+-	if (par == NULL || cpus_empty(cur->cpus_allowed))
+-		return;
+-
+-	/*
+-	 * Get all cpus from parent's cpus_allowed not part of exclusive
+-	 * children
+-	 */
+-	pspan = par->cpus_allowed;
+-	list_for_each_entry(c, &par->children, sibling) {
+-		if (is_cpu_exclusive(c))
+-			cpus_andnot(pspan, pspan, c->cpus_allowed);
+-	}
+-	if (!is_cpu_exclusive(cur)) {
+-		cpus_or(pspan, pspan, cur->cpus_allowed);
+-		if (cpus_equal(pspan, cur->cpus_allowed))
+-			return;
+-		cspan = CPU_MASK_NONE;
+-	} else {
+-		if (cpus_empty(pspan))
+-			return;
+-		cspan = cur->cpus_allowed;
+-		/*
+-		 * Get all cpus from current cpuset's cpus_allowed not part
+-		 * of exclusive children
+-		 */
+-		list_for_each_entry(c, &cur->children, sibling) {
+-			if (is_cpu_exclusive(c))
+-				cpus_andnot(cspan, cspan, c->cpus_allowed);
+-		}
+-	}
+-
+-	lock_cpu_hotplug();
+-	partition_sched_domains(&pspan, &cspan);
+-	unlock_cpu_hotplug();
+-}
+-
+-/*
+  * Call with manage_mutex held.  May take callback_mutex during call.
+  */
+ 
+ static int update_cpumask(struct cpuset *cs, char *buf)
+ {
+ 	struct cpuset trialcs;
+-	int retval, cpus_unchanged;
++	int retval;
+ 
+ 	/* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */
+ 	if (cs == &top_cpuset)
+@@ -836,17 +517,15 @@
+ 	}
+ 	cpus_and(trialcs.cpus_allowed, trialcs.cpus_allowed, cpu_online_map);
+ 	/* cpus_allowed cannot be empty for a cpuset with attached tasks. */
+-	if (atomic_read(&cs->count) && cpus_empty(trialcs.cpus_allowed))
++	if (container_task_count(cs->css.container) &&
++            cpus_empty(trialcs.cpus_allowed))
+ 		return -ENOSPC;
+ 	retval = validate_change(cs, &trialcs);
+ 	if (retval < 0)
+ 		return retval;
+-	cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed);
+ 	mutex_lock(&callback_mutex);
+ 	cs->cpus_allowed = trialcs.cpus_allowed;
+ 	mutex_unlock(&callback_mutex);
+-	if (is_cpu_exclusive(cs) && !cpus_unchanged)
+-		update_cpu_domains(cs);
+ 	return 0;
+ }
+ 
+@@ -895,7 +574,7 @@
+ 	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL);
+ 
+ 	mutex_lock(&callback_mutex);
+-	guarantee_online_mems(tsk->cpuset, &tsk->mems_allowed);
++	guarantee_online_mems(task_cs(tsk),&tsk->mems_allowed);
+ 	mutex_unlock(&callback_mutex);
+ }
+ 
+@@ -913,16 +592,19 @@
+  * their mempolicies to the cpusets new mems_allowed.
+  */
+ 
++static void *cpuset_being_rebound;
++
+ static int update_nodemask(struct cpuset *cs, char *buf)
+ {
+ 	struct cpuset trialcs;
+ 	nodemask_t oldmem;
+-	struct task_struct *g, *p;
++	struct task_struct *p;
+ 	struct mm_struct **mmarray;
+ 	int i, n, ntasks;
+ 	int migrate;
+ 	int fudge;
+ 	int retval;
++	struct container_iter it;
+ 
+ 	/* top_cpuset.mems_allowed tracks node_online_map; it's read-only */
+ 	if (cs == &top_cpuset)
+@@ -949,7 +631,8 @@
+ 		goto done;
+ 	}
+ 	/* mems_allowed cannot be empty for a cpuset with attached tasks. */
+-	if (atomic_read(&cs->count) && nodes_empty(trialcs.mems_allowed)) {
++	if (container_task_count(cs->css.container) &&
++	    nodes_empty(trialcs.mems_allowed)) {
+ 		retval = -ENOSPC;
+ 		goto done;
+ 	}
+@@ -962,7 +645,7 @@
+ 	cs->mems_generation = cpuset_mems_generation++;
+ 	mutex_unlock(&callback_mutex);
+ 
+-	set_cpuset_being_rebound(cs);		/* causes mpol_copy() rebind */
++	cpuset_being_rebound = cs;		/* causes mpol_copy() rebind */
+ 
+ 	fudge = 10;				/* spare mmarray[] slots */
+ 	fudge += cpus_weight(cs->cpus_allowed);	/* imagine one fork-bomb/cpu */
+@@ -976,37 +659,37 @@
+ 	 * enough mmarray[] w/o using GFP_ATOMIC.
+ 	 */
+ 	while (1) {
+-		ntasks = atomic_read(&cs->count);	/* guess */
++		ntasks = container_task_count(cs->css.container);  /* guess */
+ 		ntasks += fudge;
+ 		mmarray = kmalloc(ntasks * sizeof(*mmarray), GFP_KERNEL);
+ 		if (!mmarray)
+ 			goto done;
+-		write_lock_irq(&tasklist_lock);		/* block fork */
+-		if (atomic_read(&cs->count) <= ntasks)
++		read_lock(&tasklist_lock);		/* block fork */
++		if (container_task_count(cs->css.container) <= ntasks)
+ 			break;				/* got enough */
+-		write_unlock_irq(&tasklist_lock);	/* try again */
++		read_unlock(&tasklist_lock);		/* try again */
+ 		kfree(mmarray);
+ 	}
+ 
+ 	n = 0;
+ 
+ 	/* Load up mmarray[] with mm reference for each task in cpuset. */
+-	do_each_thread(g, p) {
++	container_iter_start(cs->css.container, &it);
++	while ((p = container_iter_next(cs->css.container, &it))) {
+ 		struct mm_struct *mm;
+ 
+ 		if (n >= ntasks) {
+ 			printk(KERN_WARNING
+ 				"Cpuset mempolicy rebind incomplete.\n");
+-			continue;
++			break;
+ 		}
+-		if (p->cpuset != cs)
+-			continue;
+ 		mm = get_task_mm(p);
+ 		if (!mm)
+ 			continue;
+ 		mmarray[n++] = mm;
+-	} while_each_thread(g, p);
+-	write_unlock_irq(&tasklist_lock);
++	}
++	container_iter_end(cs->css.container, &it);
++	read_unlock(&tasklist_lock);
+ 
+ 	/*
+ 	 * Now that we've dropped the tasklist spinlock, we can
+@@ -1033,12 +716,17 @@
+ 
+ 	/* We're done rebinding vma's to this cpusets new mems_allowed. */
+ 	kfree(mmarray);
+-	set_cpuset_being_rebound(NULL);
++	cpuset_being_rebound = NULL;
+ 	retval = 0;
+ done:
+ 	return retval;
+ }
+ 
++int current_cpuset_is_being_rebound(void)
++{
++	return task_cs(current) == cpuset_being_rebound;
++}
++
+ /*
+  * Call with manage_mutex held.
+  */
+@@ -1067,7 +755,7 @@
+ {
+ 	int turning_on;
+ 	struct cpuset trialcs;
+-	int err, cpu_exclusive_changed;
++	int err;
+ 
+ 	turning_on = (simple_strtoul(buf, NULL, 10) != 0);
+ 
+@@ -1080,14 +768,10 @@
+ 	err = validate_change(cs, &trialcs);
+ 	if (err < 0)
+ 		return err;
+-	cpu_exclusive_changed =
+-		(is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs));
+ 	mutex_lock(&callback_mutex);
+ 	cs->flags = trialcs.flags;
+ 	mutex_unlock(&callback_mutex);
+ 
+-	if (cpu_exclusive_changed)
+-                update_cpu_domains(cs);
+ 	return 0;
+ }
+ 
+@@ -1189,85 +873,34 @@
+ 	return val;
+ }
+ 
+-/*
+- * Attack task specified by pid in 'pidbuf' to cpuset 'cs', possibly
+- * writing the path of the old cpuset in 'ppathbuf' if it needs to be
+- * notified on release.
+- *
+- * Call holding manage_mutex.  May take callback_mutex and task_lock of
+- * the task 'pid' during call.
+- */
+-
+-static int attach_task(struct cpuset *cs, char *pidbuf, char **ppathbuf)
++int cpuset_can_attach(struct container_subsys *ss,
++		      struct container *cont, struct task_struct *tsk)
+ {
+-	pid_t pid;
+-	struct task_struct *tsk;
+-	struct cpuset *oldcs;
+-	cpumask_t cpus;
+-	nodemask_t from, to;
+-	struct mm_struct *mm;
+-	int retval;
++	struct cpuset *cs = container_cs(cont);
+ 
+-	if (sscanf(pidbuf, "%d", &pid) != 1)
+-		return -EIO;
+ 	if (cpus_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))
+ 		return -ENOSPC;
+ 
+-	if (pid) {
+-		read_lock(&tasklist_lock);
+-
+-		tsk = find_task_by_pid(pid);
+-		if (!tsk || tsk->flags & PF_EXITING) {
+-			read_unlock(&tasklist_lock);
+-			return -ESRCH;
+-		}
+-
+-		get_task_struct(tsk);
+-		read_unlock(&tasklist_lock);
+-
+-		if ((current->euid) && (current->euid != tsk->uid)
+-		    && (current->euid != tsk->suid)) {
+-			put_task_struct(tsk);
+-			return -EACCES;
+-		}
+-	} else {
+-		tsk = current;
+-		get_task_struct(tsk);
+-	}
++	return security_task_setscheduler(tsk, 0, NULL);
++}
+ 
+-	retval = security_task_setscheduler(tsk, 0, NULL);
+-	if (retval) {
+-		put_task_struct(tsk);
+-		return retval;
+-	}
++void cpuset_attach(struct container_subsys *ss,
++		   struct container *cont, struct container *oldcont,
++		   struct task_struct *tsk)
++{
++	cpumask_t cpus;
++	nodemask_t from, to;
++	struct mm_struct *mm;
++	struct cpuset *cs = container_cs(cont);
++	struct cpuset *oldcs = container_cs(oldcont);
+ 
+ 	mutex_lock(&callback_mutex);
+-
+-	task_lock(tsk);
+-	oldcs = tsk->cpuset;
+-	/*
+-	 * After getting 'oldcs' cpuset ptr, be sure still not exiting.
+-	 * If 'oldcs' might be the top_cpuset due to the_top_cpuset_hack
+-	 * then fail this attach_task(), to avoid breaking top_cpuset.count.
+-	 */
+-	if (tsk->flags & PF_EXITING) {
+-		task_unlock(tsk);
+-		mutex_unlock(&callback_mutex);
+-		put_task_struct(tsk);
+-		return -ESRCH;
+-	}
+-	atomic_inc(&cs->count);
+-	rcu_assign_pointer(tsk->cpuset, cs);
+-	task_unlock(tsk);
+-
+ 	guarantee_online_cpus(cs, &cpus);
+ 	set_cpus_allowed(tsk, cpus);
++	mutex_unlock(&callback_mutex);
+ 
+ 	from = oldcs->mems_allowed;
+ 	to = cs->mems_allowed;
+-
+-	mutex_unlock(&callback_mutex);
+-
+ 	mm = get_task_mm(tsk);
+ 	if (mm) {
+ 		mpol_rebind_mm(mm, &to);
+@@ -1276,40 +909,31 @@
+ 		mmput(mm);
+ 	}
+ 
+-	put_task_struct(tsk);
+-	synchronize_rcu();
+-	if (atomic_dec_and_test(&oldcs->count))
+-		check_for_release(oldcs, ppathbuf);
+-	return 0;
+ }
+ 
+ /* The various types of files and directories in a cpuset file system */
+ 
+ typedef enum {
+-	FILE_ROOT,
+-	FILE_DIR,
+ 	FILE_MEMORY_MIGRATE,
+ 	FILE_CPULIST,
+ 	FILE_MEMLIST,
+ 	FILE_CPU_EXCLUSIVE,
+ 	FILE_MEM_EXCLUSIVE,
+-	FILE_NOTIFY_ON_RELEASE,
+ 	FILE_MEMORY_PRESSURE_ENABLED,
+ 	FILE_MEMORY_PRESSURE,
+ 	FILE_SPREAD_PAGE,
+ 	FILE_SPREAD_SLAB,
+-	FILE_TASKLIST,
+ } cpuset_filetype_t;
+ 
+-static ssize_t cpuset_common_file_write(struct file *file,
++static ssize_t cpuset_common_file_write(struct container *cont,
++					struct cftype *cft,
++					struct file *file,
+ 					const char __user *userbuf,
+ 					size_t nbytes, loff_t *unused_ppos)
+ {
+-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+-	struct cftype *cft = __d_cft(file->f_path.dentry);
++	struct cpuset *cs = container_cs(cont);
+ 	cpuset_filetype_t type = cft->private;
+ 	char *buffer;
+-	char *pathbuf = NULL;
+ 	int retval = 0;
+ 
+ 	/* Crude upper limit on largest legitimate cpulist user might write. */
+@@ -1326,9 +950,9 @@
+ 	}
+ 	buffer[nbytes] = 0;	/* nul-terminate */
+ 
+-	mutex_lock(&manage_mutex);
++	container_lock();
+ 
+-	if (is_removed(cs)) {
++	if (container_is_removed(cont)) {
+ 		retval = -ENODEV;
+ 		goto out2;
+ 	}
+@@ -1346,9 +970,6 @@
+ 	case FILE_MEM_EXCLUSIVE:
+ 		retval = update_flag(CS_MEM_EXCLUSIVE, cs, buffer);
+ 		break;
+-	case FILE_NOTIFY_ON_RELEASE:
+-		retval = update_flag(CS_NOTIFY_ON_RELEASE, cs, buffer);
+-		break;
+ 	case FILE_MEMORY_MIGRATE:
+ 		retval = update_flag(CS_MEMORY_MIGRATE, cs, buffer);
+ 		break;
+@@ -1366,9 +987,6 @@
+ 		retval = update_flag(CS_SPREAD_SLAB, cs, buffer);
+ 		cs->mems_generation = cpuset_mems_generation++;
+ 		break;
+-	case FILE_TASKLIST:
+-		retval = attach_task(cs, buffer, &pathbuf);
+-		break;
+ 	default:
+ 		retval = -EINVAL;
+ 		goto out2;
+@@ -1377,30 +995,12 @@
+ 	if (retval == 0)
+ 		retval = nbytes;
+ out2:
+-	mutex_unlock(&manage_mutex);
+-	cpuset_release_agent(pathbuf);
++	container_unlock();
+ out1:
+ 	kfree(buffer);
+ 	return retval;
+ }
+ 
+-static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
+-						size_t nbytes, loff_t *ppos)
+-{
+-	ssize_t retval = 0;
+-	struct cftype *cft = __d_cft(file->f_path.dentry);
+-	if (!cft)
+-		return -ENODEV;
+-
+-	/* special function ? */
+-	if (cft->write)
+-		retval = cft->write(file, buf, nbytes, ppos);
+-	else
+-		retval = cpuset_common_file_write(file, buf, nbytes, ppos);
+-
+-	return retval;
+-}
+-
+ /*
+  * These ascii lists should be read in a single call, by using a user
+  * buffer large enough to hold the entire map.  If read in smaller
+@@ -1435,17 +1035,19 @@
+ 	return nodelist_scnprintf(page, PAGE_SIZE, mask);
+ }
+ 
+-static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
++static ssize_t cpuset_common_file_read(struct container *cont,
++				       struct cftype *cft,
++				       struct file *file,
++				       char __user *buf,
+ 				size_t nbytes, loff_t *ppos)
+ {
+-	struct cftype *cft = __d_cft(file->f_path.dentry);
+-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
++	struct cpuset *cs = container_cs(cont);
+ 	cpuset_filetype_t type = cft->private;
+ 	char *page;
+ 	ssize_t retval = 0;
+ 	char *s;
+ 
+-	if (!(page = (char *)__get_free_page(GFP_KERNEL)))
++	if (!(page = (char *)__get_free_page(GFP_TEMPORARY)))
+ 		return -ENOMEM;
+ 
+ 	s = page;
+@@ -1463,9 +1065,6 @@
+ 	case FILE_MEM_EXCLUSIVE:
+ 		*s++ = is_mem_exclusive(cs) ? '1' : '0';
+ 		break;
+-	case FILE_NOTIFY_ON_RELEASE:
+-		*s++ = notify_on_release(cs) ? '1' : '0';
+-		break;
+ 	case FILE_MEMORY_MIGRATE:
+ 		*s++ = is_memory_migrate(cs) ? '1' : '0';
+ 		break;
+@@ -1493,390 +1092,140 @@
+ 	return retval;
+ }
+ 
+-static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbytes,
+-								loff_t *ppos)
+-{
+-	ssize_t retval = 0;
+-	struct cftype *cft = __d_cft(file->f_path.dentry);
+-	if (!cft)
+-		return -ENODEV;
+-
+-	/* special function ? */
+-	if (cft->read)
+-		retval = cft->read(file, buf, nbytes, ppos);
+-	else
+-		retval = cpuset_common_file_read(file, buf, nbytes, ppos);
+-
+-	return retval;
+-}
+-
+-static int cpuset_file_open(struct inode *inode, struct file *file)
+-{
+-	int err;
+-	struct cftype *cft;
+-
+-	err = generic_file_open(inode, file);
+-	if (err)
+-		return err;
+-
+-	cft = __d_cft(file->f_path.dentry);
+-	if (!cft)
+-		return -ENODEV;
+-	if (cft->open)
+-		err = cft->open(inode, file);
+-	else
+-		err = 0;
+-
+-	return err;
+-}
+-
+-static int cpuset_file_release(struct inode *inode, struct file *file)
+-{
+-	struct cftype *cft = __d_cft(file->f_path.dentry);
+-	if (cft->release)
+-		return cft->release(inode, file);
+-	return 0;
+-}
+-
+-/*
+- * cpuset_rename - Only allow simple rename of directories in place.
+- */
+-static int cpuset_rename(struct inode *old_dir, struct dentry *old_dentry,
+-                  struct inode *new_dir, struct dentry *new_dentry)
+-{
+-	if (!S_ISDIR(old_dentry->d_inode->i_mode))
+-		return -ENOTDIR;
+-	if (new_dentry->d_inode)
+-		return -EEXIST;
+-	if (old_dir != new_dir)
+-		return -EIO;
+-	return simple_rename(old_dir, old_dentry, new_dir, new_dentry);
+-}
+-
+-static const struct file_operations cpuset_file_operations = {
+-	.read = cpuset_file_read,
+-	.write = cpuset_file_write,
+-	.llseek = generic_file_llseek,
+-	.open = cpuset_file_open,
+-	.release = cpuset_file_release,
+-};
+-
+-static const struct inode_operations cpuset_dir_inode_operations = {
+-	.lookup = simple_lookup,
+-	.mkdir = cpuset_mkdir,
+-	.rmdir = cpuset_rmdir,
+-	.rename = cpuset_rename,
+-};
+-
+-static int cpuset_create_file(struct dentry *dentry, int mode)
+-{
+-	struct inode *inode;
+-
+-	if (!dentry)
+-		return -ENOENT;
+-	if (dentry->d_inode)
+-		return -EEXIST;
+-
+-	inode = cpuset_new_inode(mode);
+-	if (!inode)
+-		return -ENOMEM;
+-
+-	if (S_ISDIR(mode)) {
+-		inode->i_op = &cpuset_dir_inode_operations;
+-		inode->i_fop = &simple_dir_operations;
+-
+-		/* start off with i_nlink == 2 (for "." entry) */
+-		inc_nlink(inode);
+-	} else if (S_ISREG(mode)) {
+-		inode->i_size = 0;
+-		inode->i_fop = &cpuset_file_operations;
+-	}
+-
+-	d_instantiate(dentry, inode);
+-	dget(dentry);	/* Extra count - pin the dentry in core */
+-	return 0;
+-}
+-
+-/*
+- *	cpuset_create_dir - create a directory for an object.
+- *	cs:	the cpuset we create the directory for.
+- *		It must have a valid ->parent field
+- *		And we are going to fill its ->dentry field.
+- *	name:	The name to give to the cpuset directory. Will be copied.
+- *	mode:	mode to set on new directory.
+- */
+-
+-static int cpuset_create_dir(struct cpuset *cs, const char *name, int mode)
+-{
+-	struct dentry *dentry = NULL;
+-	struct dentry *parent;
+-	int error = 0;
+-
+-	parent = cs->parent->dentry;
+-	dentry = cpuset_get_dentry(parent, name);
+-	if (IS_ERR(dentry))
+-		return PTR_ERR(dentry);
+-	error = cpuset_create_file(dentry, S_IFDIR | mode);
+-	if (!error) {
+-		dentry->d_fsdata = cs;
+-		inc_nlink(parent->d_inode);
+-		cs->dentry = dentry;
+-	}
+-	dput(dentry);
+-
+-	return error;
+-}
+-
+-static int cpuset_add_file(struct dentry *dir, const struct cftype *cft)
+-{
+-	struct dentry *dentry;
+-	int error;
+-
+-	mutex_lock(&dir->d_inode->i_mutex);
+-	dentry = cpuset_get_dentry(dir, cft->name);
+-	if (!IS_ERR(dentry)) {
+-		error = cpuset_create_file(dentry, 0644 | S_IFREG);
+-		if (!error)
+-			dentry->d_fsdata = (void *)cft;
+-		dput(dentry);
+-	} else
+-		error = PTR_ERR(dentry);
+-	mutex_unlock(&dir->d_inode->i_mutex);
+-	return error;
+-}
+-
+-/*
+- * Stuff for reading the 'tasks' file.
+- *
+- * Reading this file can return large amounts of data if a cpuset has
+- * *lots* of attached tasks. So it may need several calls to read(),
+- * but we cannot guarantee that the information we produce is correct
+- * unless we produce it entirely atomically.
+- *
+- * Upon tasks file open(), a struct ctr_struct is allocated, that
+- * will have a pointer to an array (also allocated here).  The struct
+- * ctr_struct * is stored in file->private_data.  Its resources will
+- * be freed by release() when the file is closed.  The array is used
+- * to sprintf the PIDs and then used by read().
+- */
+-
+-/* cpusets_tasks_read array */
+-
+-struct ctr_struct {
+-	char *buf;
+-	int bufsz;
+-};
+-
+-/*
+- * Load into 'pidarray' up to 'npids' of the tasks using cpuset 'cs'.
+- * Return actual number of pids loaded.  No need to task_lock(p)
+- * when reading out p->cpuset, as we don't really care if it changes
+- * on the next cycle, and we are not going to try to dereference it.
+- */
+-static int pid_array_load(pid_t *pidarray, int npids, struct cpuset *cs)
+-{
+-	int n = 0;
+-	struct task_struct *g, *p;
+-
+-	read_lock(&tasklist_lock);
+-
+-	do_each_thread(g, p) {
+-		if (p->cpuset == cs) {
+-			if (unlikely(n == npids))
+-				goto array_full;
+-			pidarray[n++] = p->pid;
+-		}
+-	} while_each_thread(g, p);
+-
+-array_full:
+-	read_unlock(&tasklist_lock);
+-	return n;
+-}
+-
+-static int cmppid(const void *a, const void *b)
+-{
+-	return *(pid_t *)a - *(pid_t *)b;
+-}
+-
+-/*
+- * Convert array 'a' of 'npids' pid_t's to a string of newline separated
+- * decimal pids in 'buf'.  Don't write more than 'sz' chars, but return
+- * count 'cnt' of how many chars would be written if buf were large enough.
+- */
+-static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
+-{
+-	int cnt = 0;
+-	int i;
+-
+-	for (i = 0; i < npids; i++)
+-		cnt += snprintf(buf + cnt, max(sz - cnt, 0), "%d\n", a[i]);
+-	return cnt;
+-}
+-
+-/*
+- * Handle an open on 'tasks' file.  Prepare a buffer listing the
+- * process id's of tasks currently attached to the cpuset being opened.
+- *
+- * Does not require any specific cpuset mutexes, and does not take any.
+- */
+-static int cpuset_tasks_open(struct inode *unused, struct file *file)
+-{
+-	struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+-	struct ctr_struct *ctr;
+-	pid_t *pidarray;
+-	int npids;
+-	char c;
+-
+-	if (!(file->f_mode & FMODE_READ))
+-		return 0;
+-
+-	ctr = kmalloc(sizeof(*ctr), GFP_KERNEL);
+-	if (!ctr)
+-		goto err0;
+-
+-	/*
+-	 * If cpuset gets more users after we read count, we won't have
+-	 * enough space - tough.  This race is indistinguishable to the
+-	 * caller from the case that the additional cpuset users didn't
+-	 * show up until sometime later on.
+-	 */
+-	npids = atomic_read(&cs->count);
+-	pidarray = kmalloc(npids * sizeof(pid_t), GFP_KERNEL);
+-	if (!pidarray)
+-		goto err1;
+-
+-	npids = pid_array_load(pidarray, npids, cs);
+-	sort(pidarray, npids, sizeof(pid_t), cmppid, NULL);
+-
+-	/* Call pid_array_to_buf() twice, first just to get bufsz */
+-	ctr->bufsz = pid_array_to_buf(&c, sizeof(c), pidarray, npids) + 1;
+-	ctr->buf = kmalloc(ctr->bufsz, GFP_KERNEL);
+-	if (!ctr->buf)
+-		goto err2;
+-	ctr->bufsz = pid_array_to_buf(ctr->buf, ctr->bufsz, pidarray, npids);
+-
+-	kfree(pidarray);
+-	file->private_data = ctr;
+-	return 0;
+-
+-err2:
+-	kfree(pidarray);
+-err1:
+-	kfree(ctr);
+-err0:
+-	return -ENOMEM;
+-}
+-
+-static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
+-						size_t nbytes, loff_t *ppos)
+-{
+-	struct ctr_struct *ctr = file->private_data;
+ 
+-	return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
+-}
+ 
+-static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
+-{
+-	struct ctr_struct *ctr;
+ 
+-	if (file->f_mode & FMODE_READ) {
+-		ctr = file->private_data;
+-		kfree(ctr->buf);
+-		kfree(ctr);
+-	}
+-	return 0;
+-}
+ 
+ /*
+  * for the common functions, 'private' gives the type of file
+  */
+ 
+-static struct cftype cft_tasks = {
+-	.name = "tasks",
+-	.open = cpuset_tasks_open,
+-	.read = cpuset_tasks_read,
+-	.release = cpuset_tasks_release,
+-	.private = FILE_TASKLIST,
+-};
+-
+ static struct cftype cft_cpus = {
+ 	.name = "cpus",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_CPULIST,
+ };
+ 
+ static struct cftype cft_mems = {
+ 	.name = "mems",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_MEMLIST,
+ };
+ 
+ static struct cftype cft_cpu_exclusive = {
+ 	.name = "cpu_exclusive",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_CPU_EXCLUSIVE,
+ };
+ 
+ static struct cftype cft_mem_exclusive = {
+ 	.name = "mem_exclusive",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_MEM_EXCLUSIVE,
+ };
+ 
+-static struct cftype cft_notify_on_release = {
+-	.name = "notify_on_release",
+-	.private = FILE_NOTIFY_ON_RELEASE,
+-};
+-
+ static struct cftype cft_memory_migrate = {
+ 	.name = "memory_migrate",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_MEMORY_MIGRATE,
+ };
+ 
+ static struct cftype cft_memory_pressure_enabled = {
+ 	.name = "memory_pressure_enabled",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_MEMORY_PRESSURE_ENABLED,
+ };
+ 
+ static struct cftype cft_memory_pressure = {
+ 	.name = "memory_pressure",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_MEMORY_PRESSURE,
+ };
+ 
+ static struct cftype cft_spread_page = {
+ 	.name = "memory_spread_page",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_SPREAD_PAGE,
+ };
+ 
+ static struct cftype cft_spread_slab = {
+ 	.name = "memory_spread_slab",
++	.read = cpuset_common_file_read,
++	.write = cpuset_common_file_write,
+ 	.private = FILE_SPREAD_SLAB,
+ };
+ 
+-static int cpuset_populate_dir(struct dentry *cs_dentry)
++int cpuset_populate(struct container_subsys *ss, struct container *cont)
+ {
+ 	int err;
+ 
+-	if ((err = cpuset_add_file(cs_dentry, &cft_cpus)) < 0)
++	if ((err = container_add_file(cont, &cft_cpus)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_mems)) < 0)
++	if ((err = container_add_file(cont, &cft_mems)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_cpu_exclusive)) < 0)
++	if ((err = container_add_file(cont, &cft_cpu_exclusive)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_mem_exclusive)) < 0)
++	if ((err = container_add_file(cont, &cft_mem_exclusive)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_notify_on_release)) < 0)
++	if ((err = container_add_file(cont, &cft_memory_migrate)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_migrate)) < 0)
++	if ((err = container_add_file(cont, &cft_memory_pressure)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_memory_pressure)) < 0)
++	if ((err = container_add_file(cont, &cft_spread_page)) < 0)
+ 		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_page)) < 0)
+-		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_spread_slab)) < 0)
+-		return err;
+-	if ((err = cpuset_add_file(cs_dentry, &cft_tasks)) < 0)
++	if ((err = container_add_file(cont, &cft_spread_slab)) < 0)
+ 		return err;
++	/* memory_pressure_enabled is in root cpuset only */
++	if (err == 0 && !cont->parent)
++		err = container_add_file(cont, &cft_memory_pressure_enabled);
+ 	return 0;
+ }
+ 
+ /*
++ * post_clone() is called at the end of container_clone().
++ * 'container' was just created automatically as a result of
++ * a container_clone(), and the current task is about to
++ * be moved into 'container'.
++ *
++ * Currently we refuse to set up the container - thereby
++ * refusing the task to be entered, and as a result refusing
++ * the sys_unshare() or clone() which initiated it - if any
++ * sibling cpusets have exclusive cpus or mem.
++ *
++ * If this becomes a problem for some users who wish to
++ * allow that scenario, then cpuset_post_clone() could be
++ * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
++ * (and likewise for mems) to the new container.
++ */
++void cpuset_post_clone(struct container_subsys *ss,
++		struct container *container)
++{
++	struct container *parent, *child;
++	struct cpuset *cs, *parent_cs;
++
++	parent = container->parent;
++	list_for_each_entry(child, &parent->children, sibling) {
++		cs = container_cs(child);
++		if (is_mem_exclusive(cs) || is_cpu_exclusive(cs))
++			return;
++	}
++	cs = container_cs(container);
++	parent_cs = container_cs(parent);
++
++	cs->mems_allowed = parent_cs->mems_allowed;
++	cs->cpus_allowed = parent_cs->cpus_allowed;
++	return;
++}
++
++/*
+  *	cpuset_create - create a cpuset
+  *	parent:	cpuset that will be parent of the new cpuset.
+  *	name:		name of the new cpuset. Will be strcpy'ed.
+@@ -1885,124 +1234,62 @@
+  *	Must be called with the mutex on the parent inode held
+  */
+ 
+-static long cpuset_create(struct cpuset *parent, const char *name, int mode)
++int cpuset_create(struct container_subsys *ss, struct container *cont)
+ {
+ 	struct cpuset *cs;
+-	int err;
++	struct cpuset *parent;
+ 
++	if (!cont->parent) {
++		/* This is early initialization for the top container */
++		set_container_cs(cont, &top_cpuset);
++		top_cpuset.css.container = cont;
++		top_cpuset.mems_generation = cpuset_mems_generation++;
++		return 0;
++	}
++	parent = container_cs(cont->parent);
+ 	cs = kmalloc(sizeof(*cs), GFP_KERNEL);
+ 	if (!cs)
+ 		return -ENOMEM;
+ 
+-	mutex_lock(&manage_mutex);
+ 	cpuset_update_task_memory_state();
+ 	cs->flags = 0;
+-	if (notify_on_release(parent))
+-		set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags);
+ 	if (is_spread_page(parent))
+ 		set_bit(CS_SPREAD_PAGE, &cs->flags);
+ 	if (is_spread_slab(parent))
+ 		set_bit(CS_SPREAD_SLAB, &cs->flags);
+ 	cs->cpus_allowed = CPU_MASK_NONE;
+ 	cs->mems_allowed = NODE_MASK_NONE;
+-	atomic_set(&cs->count, 0);
+-	INIT_LIST_HEAD(&cs->sibling);
+-	INIT_LIST_HEAD(&cs->children);
+ 	cs->mems_generation = cpuset_mems_generation++;
+ 	fmeter_init(&cs->fmeter);
+ 
+ 	cs->parent = parent;
+-
+-	mutex_lock(&callback_mutex);
+-	list_add(&cs->sibling, &cs->parent->children);
++	set_container_cs(cont, cs);
++	cs->css.container = cont;
+ 	number_of_cpusets++;
+-	mutex_unlock(&callback_mutex);
+-
+-	err = cpuset_create_dir(cs, name, mode);
+-	if (err < 0)
+-		goto err;
+-
+-	/*
+-	 * Release manage_mutex before cpuset_populate_dir() because it
+-	 * will down() this new directory's i_mutex and if we race with
+-	 * another mkdir, we might deadlock.
+-	 */
+-	mutex_unlock(&manage_mutex);
+-
+-	err = cpuset_populate_dir(cs->dentry);
+-	/* If err < 0, we have a half-filled directory - oh well ;) */
+ 	return 0;
+-err:
+-	list_del(&cs->sibling);
+-	mutex_unlock(&manage_mutex);
+-	kfree(cs);
+-	return err;
+-}
+-
+-static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+-{
+-	struct cpuset *c_parent = dentry->d_parent->d_fsdata;
+-
+-	/* the vfs holds inode->i_mutex already */
+-	return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR);
+ }
+ 
+-/*
+- * Locking note on the strange update_flag() call below:
+- *
+- * If the cpuset being removed is marked cpu_exclusive, then simulate
+- * turning cpu_exclusive off, which will call update_cpu_domains().
+- * The lock_cpu_hotplug() call in update_cpu_domains() must not be
+- * made while holding callback_mutex.  Elsewhere the kernel nests
+- * callback_mutex inside lock_cpu_hotplug() calls.  So the reverse
+- * nesting would risk an ABBA deadlock.
+- */
+-
+-static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry)
++void cpuset_destroy(struct container_subsys *ss, struct container *cont)
+ {
+-	struct cpuset *cs = dentry->d_fsdata;
+-	struct dentry *d;
+-	struct cpuset *parent;
+-	char *pathbuf = NULL;
+-
+-	/* the vfs holds both inode->i_mutex already */
++	struct cpuset *cs = container_cs(cont);
+ 
+-	mutex_lock(&manage_mutex);
+ 	cpuset_update_task_memory_state();
+-	if (atomic_read(&cs->count) > 0) {
+-		mutex_unlock(&manage_mutex);
+-		return -EBUSY;
+-	}
+-	if (!list_empty(&cs->children)) {
+-		mutex_unlock(&manage_mutex);
+-		return -EBUSY;
+-	}
+-	if (is_cpu_exclusive(cs)) {
+-		int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0");
+-		if (retval < 0) {
+-			mutex_unlock(&manage_mutex);
+-			return retval;
+-		}
+-	}
+-	parent = cs->parent;
+-	mutex_lock(&callback_mutex);
+-	set_bit(CS_REMOVED, &cs->flags);
+-	list_del(&cs->sibling);	/* delete my sibling from parent->children */
+-	spin_lock(&cs->dentry->d_lock);
+-	d = dget(cs->dentry);
+-	cs->dentry = NULL;
+-	spin_unlock(&d->d_lock);
+-	cpuset_d_remove_dir(d);
+-	dput(d);
+ 	number_of_cpusets--;
+-	mutex_unlock(&callback_mutex);
+-	if (list_empty(&parent->children))
+-		check_for_release(parent, &pathbuf);
+-	mutex_unlock(&manage_mutex);
+-	cpuset_release_agent(pathbuf);
+-	return 0;
++	kfree(cs);
+ }
+ 
++struct container_subsys cpuset_subsys = {
++	.name = "cpuset",
++	.create = cpuset_create,
++	.destroy  = cpuset_destroy,
++	.can_attach = cpuset_can_attach,
++	.attach = cpuset_attach,
++	.populate = cpuset_populate,
++	.post_clone = cpuset_post_clone,
++	.subsys_id = cpuset_subsys_id,
++	.early_init = 1,
++};
++
+ /*
+  * cpuset_init_early - just enough so that the calls to
+  * cpuset_update_task_memory_state() in early init code
+@@ -2011,13 +1298,11 @@
+ 
+ int __init cpuset_init_early(void)
+ {
+-	struct task_struct *tsk = current;
+-
+-	tsk->cpuset = &top_cpuset;
+-	tsk->cpuset->mems_generation = cpuset_mems_generation++;
++	top_cpuset.mems_generation = cpuset_mems_generation++;
+ 	return 0;
+ }
+ 
++
+ /**
+  * cpuset_init - initialize cpusets at system boot
+  *
+@@ -2026,8 +1311,7 @@
+ 
+ int __init cpuset_init(void)
+ {
+-	struct dentry *root;
+-	int err;
++	int err = 0;
+ 
+ 	top_cpuset.cpus_allowed = CPU_MASK_ALL;
+ 	top_cpuset.mems_allowed = NODE_MASK_ALL;
+@@ -2035,30 +1319,12 @@
+ 	fmeter_init(&top_cpuset.fmeter);
+ 	top_cpuset.mems_generation = cpuset_mems_generation++;
+ 
+-	init_task.cpuset = &top_cpuset;
+-
+ 	err = register_filesystem(&cpuset_fs_type);
+ 	if (err < 0)
+-		goto out;
+-	cpuset_mount = kern_mount(&cpuset_fs_type);
+-	if (IS_ERR(cpuset_mount)) {
+-		printk(KERN_ERR "cpuset: could not mount!\n");
+-		err = PTR_ERR(cpuset_mount);
+-		cpuset_mount = NULL;
+-		goto out;
+-	}
+-	root = cpuset_mount->mnt_sb->s_root;
+-	root->d_fsdata = &top_cpuset;
+-	inc_nlink(root->d_inode);
+-	top_cpuset.dentry = root;
+-	root->d_inode->i_op = &cpuset_dir_inode_operations;
+-	number_of_cpusets = 1;
+-	err = cpuset_populate_dir(root);
+-	/* memory_pressure_enabled is in root cpuset only */
+-	if (err == 0)
+-		err = cpuset_add_file(root, &cft_memory_pressure_enabled);
+-out:
+ 	return err;
++
++	number_of_cpusets = 1;
++	return 0;
+ }
+ 
+ /*
+@@ -2084,10 +1350,12 @@
+ 
+ static void guarantee_online_cpus_mems_in_subtree(const struct cpuset *cur)
+ {
++	struct container *cont;
+ 	struct cpuset *c;
+ 
+ 	/* Each of our child cpusets mems must be online */
+-	list_for_each_entry(c, &cur->children, sibling) {
++	list_for_each_entry(cont, &cur->css.container->children, sibling) {
++		c = container_cs(cont);
+ 		guarantee_online_cpus_mems_in_subtree(c);
+ 		if (!cpus_empty(c->cpus_allowed))
+ 			guarantee_online_cpus(c, &c->cpus_allowed);
+@@ -2114,7 +1382,7 @@
+ 
+ static void common_cpu_mem_hotplug_unplug(void)
+ {
+-	mutex_lock(&manage_mutex);
++	container_lock();
+ 	mutex_lock(&callback_mutex);
+ 
+ 	guarantee_online_cpus_mems_in_subtree(&top_cpuset);
+@@ -2122,7 +1390,7 @@
+ 	top_cpuset.mems_allowed = node_online_map;
+ 
+ 	mutex_unlock(&callback_mutex);
+-	mutex_unlock(&manage_mutex);
++	container_unlock();
+ }
+ 
+ /*
+@@ -2170,109 +1438,7 @@
+ }
+ 
+ /**
+- * cpuset_fork - attach newly forked task to its parents cpuset.
+- * @tsk: pointer to task_struct of forking parent process.
+- *
+- * Description: A task inherits its parent's cpuset at fork().
+- *
+- * A pointer to the shared cpuset was automatically copied in fork.c
+- * by dup_task_struct().  However, we ignore that copy, since it was
+- * not made under the protection of task_lock(), so might no longer be
+- * a valid cpuset pointer.  attach_task() might have already changed
+- * current->cpuset, allowing the previously referenced cpuset to
+- * be removed and freed.  Instead, we task_lock(current) and copy
+- * its present value of current->cpuset for our freshly forked child.
+- *
+- * At the point that cpuset_fork() is called, 'current' is the parent
+- * task, and the passed argument 'child' points to the child task.
+- **/
+ 
+-void cpuset_fork(struct task_struct *child)
+-{
+-	task_lock(current);
+-	child->cpuset = current->cpuset;
+-	atomic_inc(&child->cpuset->count);
+-	task_unlock(current);
+-}
+-
+-/**
+- * cpuset_exit - detach cpuset from exiting task
+- * @tsk: pointer to task_struct of exiting process
+- *
+- * Description: Detach cpuset from @tsk and release it.
+- *
+- * Note that cpusets marked notify_on_release force every task in
+- * them to take the global manage_mutex mutex when exiting.
+- * This could impact scaling on very large systems.  Be reluctant to
+- * use notify_on_release cpusets where very high task exit scaling
+- * is required on large systems.
+- *
+- * Don't even think about derefencing 'cs' after the cpuset use count
+- * goes to zero, except inside a critical section guarded by manage_mutex
+- * or callback_mutex.   Otherwise a zero cpuset use count is a license to
+- * any other task to nuke the cpuset immediately, via cpuset_rmdir().
+- *
+- * This routine has to take manage_mutex, not callback_mutex, because
+- * it is holding that mutex while calling check_for_release(),
+- * which calls kmalloc(), so can't be called holding callback_mutex().
+- *
+- * the_top_cpuset_hack:
+- *
+- *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+- *
+- *    Don't leave a task unable to allocate memory, as that is an
+- *    accident waiting to happen should someone add a callout in
+- *    do_exit() after the cpuset_exit() call that might allocate.
+- *    If a task tries to allocate memory with an invalid cpuset,
+- *    it will oops in cpuset_update_task_memory_state().
+- *
+- *    We call cpuset_exit() while the task is still competent to
+- *    handle notify_on_release(), then leave the task attached to
+- *    the root cpuset (top_cpuset) for the remainder of its exit.
+- *
+- *    To do this properly, we would increment the reference count on
+- *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
+- *    code we would add a second cpuset function call, to drop that
+- *    reference.  This would just create an unnecessary hot spot on
+- *    the top_cpuset reference count, to no avail.
+- *
+- *    Normally, holding a reference to a cpuset without bumping its
+- *    count is unsafe.   The cpuset could go away, or someone could
+- *    attach us to a different cpuset, decrementing the count on
+- *    the first cpuset that we never incremented.  But in this case,
+- *    top_cpuset isn't going away, and either task has PF_EXITING set,
+- *    which wards off any attach_task() attempts, or task is a failed
+- *    fork, never visible to attach_task.
+- *
+- *    Another way to do this would be to set the cpuset pointer
+- *    to NULL here, and check in cpuset_update_task_memory_state()
+- *    for a NULL pointer.  This hack avoids that NULL check, for no
+- *    cost (other than this way too long comment ;).
+- **/
+-
+-void cpuset_exit(struct task_struct *tsk)
+-{
+-	struct cpuset *cs;
+-
+-	task_lock(current);
+-	cs = tsk->cpuset;
+-	tsk->cpuset = &top_cpuset;	/* the_top_cpuset_hack - see above */
+-	task_unlock(current);
+-
+-	if (notify_on_release(cs)) {
+-		char *pathbuf = NULL;
+-
+-		mutex_lock(&manage_mutex);
+-		if (atomic_dec_and_test(&cs->count))
+-			check_for_release(cs, &pathbuf);
+-		mutex_unlock(&manage_mutex);
+-		cpuset_release_agent(pathbuf);
+-	} else {
+-		atomic_dec(&cs->count);
+-	}
+-}
+-
+-/**
+  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
+  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
+  *
+@@ -2288,7 +1454,7 @@
+ 
+ 	mutex_lock(&callback_mutex);
+ 	task_lock(tsk);
+-	guarantee_online_cpus(tsk->cpuset, &mask);
++	guarantee_online_cpus(task_cs(tsk), &mask);
+ 	task_unlock(tsk);
+ 	mutex_unlock(&callback_mutex);
+ 
+@@ -2316,7 +1482,7 @@
+ 
+ 	mutex_lock(&callback_mutex);
+ 	task_lock(tsk);
+-	guarantee_online_mems(tsk->cpuset, &mask);
++	guarantee_online_mems(task_cs(tsk), &mask);
+ 	task_unlock(tsk);
+ 	mutex_unlock(&callback_mutex);
+ 
+@@ -2447,7 +1613,7 @@
+ 	mutex_lock(&callback_mutex);
+ 
+ 	task_lock(current);
+-	cs = nearest_exclusive_ancestor(current->cpuset);
++	cs = nearest_exclusive_ancestor(task_cs(current));
+ 	task_unlock(current);
+ 
+ 	allowed = node_isset(node, cs->mems_allowed);
+@@ -2584,7 +1750,7 @@
+ 		task_unlock(current);
+ 		goto done;
+ 	}
+-	cs1 = nearest_exclusive_ancestor(current->cpuset);
++	cs1 = nearest_exclusive_ancestor(task_cs(current));
+ 	task_unlock(current);
+ 
+ 	task_lock((struct task_struct *)p);
+@@ -2592,7 +1758,7 @@
+ 		task_unlock((struct task_struct *)p);
+ 		goto done;
+ 	}
+-	cs2 = nearest_exclusive_ancestor(p->cpuset);
++	cs2 = nearest_exclusive_ancestor(task_cs((struct task_struct *)p));
+ 	task_unlock((struct task_struct *)p);
+ 
+ 	overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed);
+@@ -2628,14 +1794,12 @@
+ 
+ void __cpuset_memory_pressure_bump(void)
+ {
+-	struct cpuset *cs;
+-
+ 	task_lock(current);
+-	cs = current->cpuset;
+-	fmeter_markevent(&cs->fmeter);
++	fmeter_markevent(&task_cs(current)->fmeter);
+ 	task_unlock(current);
+ }
+ 
++#ifdef CONFIG_PROC_PID_CPUSET
+ /*
+  * proc_cpuset_show()
+  *  - Print tasks cpuset path into seq_file.
+@@ -2652,6 +1816,7 @@
+ 	struct pid *pid;
+ 	struct task_struct *tsk;
+ 	char *buf;
++	struct container_subsys_state *css;
+ 	int retval;
+ 
+ 	retval = -ENOMEM;
+@@ -2666,15 +1831,15 @@
+ 		goto out_free;
+ 
+ 	retval = -EINVAL;
+-	mutex_lock(&manage_mutex);
+-
+-	retval = cpuset_path(tsk->cpuset, buf, PAGE_SIZE);
++	container_lock();
++	css = task_subsys_state(tsk, cpuset_subsys_id);
++	retval = container_path(css->container, buf, PAGE_SIZE);
+ 	if (retval < 0)
+ 		goto out_unlock;
+ 	seq_puts(m, buf);
+ 	seq_putc(m, '\n');
+ out_unlock:
+-	mutex_unlock(&manage_mutex);
++	container_unlock();
+ 	put_task_struct(tsk);
+ out_free:
+ 	kfree(buf);
+@@ -2694,6 +1859,7 @@
+ 	.llseek		= seq_lseek,
+ 	.release	= single_release,
+ };
++#endif /* CONFIG_PROC_PID_CPUSET */
+ 
+ /* Display task cpus_allowed, mems_allowed in /proc/<pid>/status file. */
+ char *cpuset_task_status_allowed(struct task_struct *task, char *buffer)
+diff -Nurb linux-2.6.22-570/kernel/exit.c linux-2.6.22-try2/kernel/exit.c
+--- linux-2.6.22-570/kernel/exit.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/exit.c	2007-12-19 15:29:24.000000000 -0500
+@@ -31,7 +31,8 @@
+ #include <linux/mempolicy.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/delayacct.h>
+-#include <linux/cpuset.h>
++#include <linux/freezer.h>
++#include <linux/container.h>
+ #include <linux/syscalls.h>
+ #include <linux/signal.h>
+ #include <linux/posix-timers.h>
+@@ -393,6 +394,11 @@
+ 	 * they would be locked into memory.
+ 	 */
+ 	exit_mm(current);
++	/*
++	 * We don't want to have TIF_FREEZE set if the system-wide hibernation
++	 * or suspend transition begins right now.
++	 */
++	current->flags |= PF_NOFREEZE;
+ 
+ 	set_special_pids(1, 1);
+ 	proc_clear_tty(current);
+@@ -875,6 +881,34 @@
+ 		release_task(tsk);
+ }
+ 
++#ifdef CONFIG_DEBUG_STACK_USAGE
++static void check_stack_usage(void)
++{
++	static DEFINE_SPINLOCK(low_water_lock);
++	static int lowest_to_date = THREAD_SIZE;
++	unsigned long *n = end_of_stack(current);
++	unsigned long free;
++
++	while (*n == 0)
++		n++;
++	free = (unsigned long)n - (unsigned long)end_of_stack(current);
++
++	if (free >= lowest_to_date)
++		return;
++
++	spin_lock(&low_water_lock);
++	if (free < lowest_to_date) {
++		printk(KERN_WARNING "%s used greatest stack depth: %lu bytes "
++				"left\n",
++				current->comm, free);
++		lowest_to_date = free;
++	}
++	spin_unlock(&low_water_lock);
++}
++#else
++static inline void check_stack_usage(void) {}
++#endif
++
+ fastcall NORET_TYPE void do_exit(long code)
+ {
+ 	struct task_struct *tsk = current;
+@@ -966,8 +1000,9 @@
+ 	exit_sem(tsk);
+ 	__exit_files(tsk);
+ 	__exit_fs(tsk);
++	check_stack_usage();
+ 	exit_thread();
+-	cpuset_exit(tsk);
++	container_exit(tsk, 1);
+ 	exit_keys(tsk);
+ 
+ 	if (group_dead && tsk->signal->leader)
+diff -Nurb linux-2.6.22-570/kernel/fork.c linux-2.6.22-try2/kernel/fork.c
+--- linux-2.6.22-570/kernel/fork.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/fork.c	2007-12-19 15:52:37.000000000 -0500
+@@ -29,7 +29,7 @@
+ #include <linux/nsproxy.h>
+ #include <linux/capability.h>
+ #include <linux/cpu.h>
+-#include <linux/cpuset.h>
++#include <linux/container.h>
+ #include <linux/security.h>
+ #include <linux/swap.h>
+ #include <linux/syscalls.h>
+@@ -342,6 +342,8 @@
+ 	atomic_set(&mm->mm_count, 1);
+ 	init_rwsem(&mm->mmap_sem);
+ 	INIT_LIST_HEAD(&mm->mmlist);
++	mm->flags = (current->mm) ? current->mm->flags
++				  : MMF_DUMP_FILTER_DEFAULT;
+ 	mm->core_waiters = 0;
+ 	mm->nr_ptes = 0;
+ 	__set_mm_counter(mm, file_rss, 0);
+@@ -936,7 +938,7 @@
+ {
+ 	unsigned long new_flags = p->flags;
+ 
+-	new_flags &= ~(PF_SUPERPRIV | PF_NOFREEZE);
++	new_flags &= ~PF_SUPERPRIV;
+ 	new_flags |= PF_FORKNOEXEC;
+ 	if (!(clone_flags & CLONE_PTRACE))
+ 		p->ptrace = 0;
+@@ -977,6 +979,7 @@
+ {
+ 	int retval;
+ 	struct task_struct *p = NULL;
+++ 	int container_callbacks_done = 0;
+ 	struct vx_info *vxi;
+ 	struct nx_info *nxi;
+ 
+@@ -1061,11 +1064,6 @@
+ 	delayacct_tsk_init(p);	/* Must remain after dup_task_struct() */
+ 	copy_flags(clone_flags, p);
+ 	p->pid = pid_nr(pid);
+-	retval = -EFAULT;
+-	if (clone_flags & CLONE_PARENT_SETTID)
+-		if (put_user(p->pid, parent_tidptr))
+-			goto bad_fork_cleanup_delays_binfmt;
+-
+ 	INIT_LIST_HEAD(&p->children);
+ 	INIT_LIST_HEAD(&p->sibling);
+ 	p->vfork_done = NULL;
+@@ -1095,17 +1093,19 @@
+ 
+ 	p->lock_depth = -1;		/* -1 = no lock */
+ 	do_posix_clock_monotonic_gettime(&p->start_time);
++	p->real_start_time = p->start_time;
++	monotonic_to_bootbased(&p->real_start_time);
+ 	p->security = NULL;
+ 	p->io_context = NULL;
+ 	p->io_wait = NULL;
+ 	p->audit_context = NULL;
+-	cpuset_fork(p);
++	container_fork(p);
+ #ifdef CONFIG_NUMA
+  	p->mempolicy = mpol_copy(p->mempolicy);
+  	if (IS_ERR(p->mempolicy)) {
+  		retval = PTR_ERR(p->mempolicy);
+  		p->mempolicy = NULL;
+- 		goto bad_fork_cleanup_cpuset;
++ 		goto bad_fork_cleanup_container;
+  	}
+ 	mpol_fix_fork_child_flag(p);
+ #endif
+@@ -1215,6 +1215,12 @@
+ 	/* Perform scheduler related setup. Assign this task to a CPU. */
+ 	sched_fork(p, clone_flags);
+ 
++	/* Now that the task is set up, run container callbacks if
++	 * necessary. We need to run them before the task is visible
++	 * on the tasklist. */
++	container_fork_callbacks(p);
++	container_callbacks_done = 1;
++
+ 	/* Need tasklist lock for parent etc handling! */
+ 	write_lock_irq(&tasklist_lock);
+ 
+@@ -1314,6 +1320,14 @@
+ 	if (nxi)
+ 		claim_nx_info(nxi, p);
+ 	write_unlock_irq(&tasklist_lock);
++
++	/*
++	 * Now that we know the fork has succeeded, record the new
++	 * TID.  It's too late to back out if this fails.
++	 */
++	if (clone_flags & CLONE_PARENT_SETTID)
++		put_user(p->pid, parent_tidptr);
++
+ 	proc_fork_connector(p);
+ 	return p;
+ 
+@@ -1341,10 +1355,9 @@
+ bad_fork_cleanup_policy:
+ #ifdef CONFIG_NUMA
+ 	mpol_free(p->mempolicy);
+-bad_fork_cleanup_cpuset:
++bad_fork_cleanup_container:
+ #endif
+-	cpuset_exit(p);
+-bad_fork_cleanup_delays_binfmt:
++	container_exit(p, container_callbacks_done);
+ 	delayacct_tsk_free(p);
+ 	if (p->binfmt)
+ 		module_put(p->binfmt->module);
+@@ -1661,7 +1674,7 @@
+ 	err = -EINVAL;
+ 	if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+ 				CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
+-				CLONE_NEWUTS|CLONE_NEWIPC))
++				CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWUSER))
+ 		goto bad_unshare_out;
+ 
+ 	if ((err = unshare_thread(unshare_flags)))
+diff -Nurb linux-2.6.22-570/kernel/kgdb.c linux-2.6.22-try2/kernel/kgdb.c
+--- linux-2.6.22-570/kernel/kgdb.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/kgdb.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,1866 @@
++/*
++ * kernel/kgdb.c
++ *
++ * Maintainer: Jason Wessel <jason.wessel@windriver.com>
++ *
++ * Copyright (C) 2000-2001 VERITAS Software Corporation.
++ * Copyright (C) 2002-2004 Timesys Corporation
++ * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
++ * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
++ * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
++ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
++ * Copyright (C) 2005-2007 Wind River Systems, Inc.
++ *
++ * Contributors at various stages not listed above:
++ *  Jason Wessel ( jason.wessel@windriver.com )
++ *  George Anzinger <george@mvista.com>
++ *  Anurekh Saxena (anurekh.saxena@timesys.com)
++ *  Lake Stevens Instrument Division (Glenn Engel)
++ *  Jim Kingdon, Cygnus Support.
++ *
++ * Original KGDB stub: David Grothe <dave@gcom.com>,
++ * Tigran Aivazian <tigran@sco.com>
++ *
++ * This file is licensed under the terms of the GNU General Public License
++ * version 2. This program is licensed "as is" without any warranty of any
++ * kind, whether express or implied.
++ */
++
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/interrupt.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <linux/mm.h>
++#include <linux/threads.h>
++#include <linux/reboot.h>
++#include <asm/system.h>
++#include <asm/ptrace.h>
++#include <asm/uaccess.h>
++#include <linux/kgdb.h>
++#include <asm/atomic.h>
++#include <linux/notifier.h>
++#include <linux/module.h>
++#include <asm/cacheflush.h>
++#include <linux/init.h>
++#include <linux/sysrq.h>
++#include <linux/console.h>
++#include <linux/sched.h>
++#include <linux/pid_namespace.h>
++#include <asm/byteorder.h>
++
++extern int pid_max;
++/* How many times to count all of the waiting CPUs */
++#define ROUNDUP_WAIT		640000	/* Arbitrary, increase if needed. */
++#define BUF_THREAD_ID_SIZE	16
++
++/*
++ * kgdb_initialized with a value of 1 indicates that kgdb is setup and is
++ * all ready to serve breakpoints and other kernel exceptions.  A value of
++ * -1 indicates that we have tried to initialize early, and need to try
++ * again later.
++ */
++int kgdb_initialized;
++/* Is a host GDB connected to us? */
++int kgdb_connected;
++/* Could we be about to try and access a bad memory location? If so we
++ * also need to flag this has happend. */
++int kgdb_may_fault;
++#ifdef CONFIG_PREEMPT
++static int kgdb_fault_preempt_count;
++#endif
++
++/* All the KGDB handlers are installed */
++int kgdb_from_module_registered = 0;
++/* Guard for recursive entry */
++static int exception_level = 0;
++
++/* We provide a kgdb_io_ops structure that may be overriden. */
++struct kgdb_io __attribute__ ((weak)) kgdb_io_ops;
++
++static struct kgdb_io kgdb_io_ops_prev[MAX_KGDB_IO_HANDLERS];
++static int kgdb_io_handler_cnt = 0;
++
++/* Export the following symbols for use with kernel modules */
++EXPORT_SYMBOL(kgdb_io_ops);
++EXPORT_SYMBOL(kgdb_tasklet_breakpoint);
++EXPORT_SYMBOL(kgdb_connected);
++EXPORT_SYMBOL(kgdb_register_io_module);
++EXPORT_SYMBOL(kgdb_unregister_io_module);
++EXPORT_SYMBOL(debugger_active);
++
++/*
++ * Holds information about breakpoints in a kernel. These breakpoints are
++ * added and removed by gdb.
++ */
++struct kgdb_bkpt kgdb_break[MAX_BREAKPOINTS];
++
++struct kgdb_arch *kgdb_ops = &arch_kgdb_ops;
++
++static const char hexchars[] = "0123456789abcdef";
++
++static spinlock_t slavecpulocks[NR_CPUS];
++static atomic_t procindebug[NR_CPUS];
++atomic_t kgdb_setting_breakpoint;
++EXPORT_SYMBOL(kgdb_setting_breakpoint);
++struct task_struct *kgdb_usethread, *kgdb_contthread;
++
++int debugger_step;
++atomic_t debugger_active;
++
++/* Our I/O buffers. */
++static char remcom_in_buffer[BUFMAX];
++static char remcom_out_buffer[BUFMAX];
++/* Storage for the registers, in GDB format. */
++static unsigned long gdb_regs[(NUMREGBYTES + sizeof(unsigned long) - 1) /
++			      sizeof(unsigned long)];
++/* Storage of registers for handling a fault. */
++unsigned long kgdb_fault_jmp_regs[NUMCRITREGBYTES / sizeof(unsigned long)]
++ JMP_REGS_ALIGNMENT;
++static int kgdb_notify_reboot(struct notifier_block *this,
++				unsigned long code ,void *x);
++struct debuggerinfo_struct {
++	void *debuggerinfo;
++	struct task_struct *task;
++} kgdb_info[NR_CPUS];
++
++/* to keep track of the CPU which is doing the single stepping*/
++atomic_t cpu_doing_single_step = ATOMIC_INIT(-1);
++
++atomic_t  kgdb_sync_softlockup[NR_CPUS] = {ATOMIC_INIT(0)};
++
++/* reboot notifier block */
++static struct notifier_block kgdb_reboot_notifier = {
++	.notifier_call  = kgdb_notify_reboot,
++	.next           = NULL,
++	.priority       = INT_MAX,
++};
++
++int __attribute__ ((weak))
++     kgdb_validate_break_address(unsigned long addr)
++{
++	int error = 0;
++	char tmp_variable[BREAK_INSTR_SIZE];
++	error = kgdb_get_mem((char *)addr, tmp_variable, BREAK_INSTR_SIZE);
++	return error;
++}
++
++int __attribute__ ((weak))
++     kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr)
++{
++	int error = 0;
++	if ((error = kgdb_get_mem((char *)addr,
++		saved_instr, BREAK_INSTR_SIZE)) < 0)
++			return error;
++
++	if ((error = kgdb_set_mem((char *)addr, kgdb_ops->gdb_bpt_instr,
++		BREAK_INSTR_SIZE)) < 0)
++			return error;
++	return 0;
++}
++
++int __attribute__ ((weak))
++     kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle)
++{
++
++	int error = 0;
++	if ((error =kgdb_set_mem((char *)addr, (char *)bundle,
++		BREAK_INSTR_SIZE)) < 0)
++			return error;
++	return 0;
++}
++
++unsigned long __attribute__ ((weak))
++    kgdb_arch_pc(int exception, struct pt_regs *regs)
++{
++	return instruction_pointer(regs);
++}
++
++static int hex(char ch)
++{
++	if ((ch >= 'a') && (ch <= 'f'))
++		return (ch - 'a' + 10);
++	if ((ch >= '0') && (ch <= '9'))
++		return (ch - '0');
++	if ((ch >= 'A') && (ch <= 'F'))
++		return (ch - 'A' + 10);
++	return (-1);
++}
++
++/* scan for the sequence $<data>#<checksum>	*/
++static void get_packet(char *buffer)
++{
++	unsigned char checksum;
++	unsigned char xmitcsum;
++	int count;
++	char ch;
++	if (!kgdb_io_ops.read_char)
++		return;
++	do {
++		/* Spin and wait around for the start character, ignore all
++		 * other characters */
++		while ((ch = (kgdb_io_ops.read_char())) != '$') ;
++		kgdb_connected = 1;
++		checksum = 0;
++		xmitcsum = -1;
++
++		count = 0;
++
++		/* now, read until a # or end of buffer is found */
++		while (count < (BUFMAX - 1)) {
++			ch = kgdb_io_ops.read_char();
++			if (ch == '#')
++				break;
++			checksum = checksum + ch;
++			buffer[count] = ch;
++			count = count + 1;
++		}
++		buffer[count] = 0;
++
++		if (ch == '#') {
++			xmitcsum = hex(kgdb_io_ops.read_char()) << 4;
++			xmitcsum += hex(kgdb_io_ops.read_char());
++
++			if (checksum != xmitcsum)
++				/* failed checksum */
++				kgdb_io_ops.write_char('-');
++			else
++				/* successful transfer */
++				kgdb_io_ops.write_char('+');
++			if (kgdb_io_ops.flush)
++				kgdb_io_ops.flush();
++		}
++	} while (checksum != xmitcsum);
++}
++
++static void kgdb_set_may_fault(void) {
++	kgdb_may_fault = 1;
++#ifdef CONFIG_PREEMPT
++	kgdb_fault_preempt_count = preempt_count();
++#endif
++}
++
++static void kgdb_unset_may_fault(void) {
++	kgdb_may_fault = 0;
++#ifdef CONFIG_PREEMPT
++	preempt_count() = kgdb_fault_preempt_count;
++#endif
++}
++
++/*
++ * Send the packet in buffer.
++ * Check for gdb connection if asked for.
++ */
++static void put_packet(char *buffer)
++{
++	unsigned char checksum;
++	int count;
++	char ch;
++
++	if (!kgdb_io_ops.write_char)
++		return;
++	/* $<packet info>#<checksum>. */
++	while (1) {
++		kgdb_io_ops.write_char('$');
++		checksum = 0;
++		count = 0;
++
++		while ((ch = buffer[count])) {
++			kgdb_io_ops.write_char(ch);
++			checksum += ch;
++			count++;
++		}
++
++		kgdb_io_ops.write_char('#');
++		kgdb_io_ops.write_char(hexchars[checksum >> 4]);
++		kgdb_io_ops.write_char(hexchars[checksum % 16]);
++		if (kgdb_io_ops.flush)
++			kgdb_io_ops.flush();
++
++		/* Now see what we get in reply. */
++		ch = kgdb_io_ops.read_char();
++
++		if (ch == 3)
++			ch = kgdb_io_ops.read_char();
++
++		/* If we get an ACK, we are done. */
++		if (ch == '+')
++			return;
++
++		/* If we get the start of another packet, this means
++		 * that GDB is attempting to reconnect.  We will NAK
++		 * the packet being sent, and stop trying to send this
++		 * packet. */
++		if (ch == '$') {
++			kgdb_io_ops.write_char('-');
++			if (kgdb_io_ops.flush)
++				kgdb_io_ops.flush();
++			return;
++		}
++	}
++}
++
++/*
++ * convert the memory pointed to by mem into hex, placing result in buf
++ * return a pointer to the last char put in buf (null). May return an error.
++ */
++char *kgdb_mem2hex(char *mem, char *buf, int count)
++{
++	kgdb_set_may_fault();
++	if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++		kgdb_unset_may_fault();
++		return ERR_PTR(-EINVAL);
++	}
++	/* Accessing some registers in a single load instruction is
++	 * required to avoid bad side effects for some I/O registers.
++	 */
++	if ((count == 2) && (((long)mem & 1) == 0)) {
++		unsigned short tmp_s = *(unsigned short *)mem;
++		mem += 2;
++#ifdef __BIG_ENDIAN
++		*buf++ = hexchars[(tmp_s >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_s >> 8) & 0xf];
++		*buf++ = hexchars[(tmp_s >> 4) & 0xf];
++		*buf++ = hexchars[tmp_s & 0xf];
++#else
++		*buf++ = hexchars[(tmp_s >> 4) & 0xf];
++		*buf++ = hexchars[tmp_s & 0xf];
++		*buf++ = hexchars[(tmp_s >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_s >> 8) & 0xf];
++#endif
++	} else if ((count == 4) && (((long)mem & 3) == 0)) {
++		unsigned long tmp_l = *(unsigned int *)mem;
++		mem += 4;
++#ifdef __BIG_ENDIAN
++		*buf++ = hexchars[(tmp_l >> 28) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 24) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 20) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 16) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 8) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 4) & 0xf];
++		*buf++ = hexchars[tmp_l & 0xf];
++#else
++		*buf++ = hexchars[(tmp_l >> 4) & 0xf];
++		*buf++ = hexchars[tmp_l & 0xf];
++		*buf++ = hexchars[(tmp_l >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 8) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 20) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 16) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 28) & 0xf];
++		*buf++ = hexchars[(tmp_l >> 24) & 0xf];
++#endif
++#ifdef CONFIG_64BIT
++	} else if ((count == 8) && (((long)mem & 7) == 0)) {
++		unsigned long long tmp_ll = *(unsigned long long *)mem;
++		mem += 8;
++#ifdef __BIG_ENDIAN
++		*buf++ = hexchars[(tmp_ll >> 60) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 56) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 52) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 48) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 44) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 40) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 36) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 32) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 28) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 24) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 20) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 16) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 8) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 4) & 0xf];
++		*buf++ = hexchars[tmp_ll & 0xf];
++#else
++		*buf++ = hexchars[(tmp_ll >> 4) & 0xf];
++		*buf++ = hexchars[tmp_ll & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 12) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 8) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 20) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 16) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 28) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 24) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 36) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 32) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 44) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 40) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 52) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 48) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 60) & 0xf];
++		*buf++ = hexchars[(tmp_ll >> 56) & 0xf];
++#endif
++#endif
++	} else {
++		while (count-- > 0) {
++			unsigned char ch = *mem++;
++			*buf++ = hexchars[ch >> 4];
++			*buf++ = hexchars[ch & 0xf];
++		}
++	}
++	kgdb_unset_may_fault();
++	*buf = 0;
++	return (buf);
++}
++
++/*
++ * Copy the binary array pointed to by buf into mem.  Fix $, #, and
++ * 0x7d escaped with 0x7d.  Return a pointer to the character after
++ * the last byte written.
++ */
++static char *kgdb_ebin2mem(char *buf, char *mem, int count)
++{
++	kgdb_set_may_fault();
++	if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++		kgdb_unset_may_fault();
++		return ERR_PTR(-EINVAL);
++	}
++	for (; count > 0; count--, buf++) {
++		if (*buf == 0x7d)
++			*mem++ = *(++buf) ^ 0x20;
++		else
++			*mem++ = *buf;
++	}
++	kgdb_unset_may_fault();
++	return mem;
++}
++
++/*
++ * convert the hex array pointed to by buf into binary to be placed in mem
++ * return a pointer to the character AFTER the last byte written
++ * May return an error.
++ */
++char *kgdb_hex2mem(char *buf, char *mem, int count)
++{
++	kgdb_set_may_fault();
++	if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++		kgdb_unset_may_fault();
++		return ERR_PTR(-EINVAL);
++	}
++	if ((count == 2) && (((long)mem & 1) == 0)) {
++		unsigned short tmp_s = 0;
++#ifdef __BIG_ENDIAN
++		tmp_s |= hex(*buf++) << 12;
++		tmp_s |= hex(*buf++) << 8;
++		tmp_s |= hex(*buf++) << 4;
++		tmp_s |= hex(*buf++);
++#else
++		tmp_s |= hex(*buf++) << 4;
++		tmp_s |= hex(*buf++);
++		tmp_s |= hex(*buf++) << 12;
++		tmp_s |= hex(*buf++) << 8;
++#endif
++		*(unsigned short *)mem = tmp_s;
++		mem += 2;
++	} else if ((count == 4) && (((long)mem & 3) == 0)) {
++		unsigned long tmp_l = 0;
++#ifdef __BIG_ENDIAN
++		tmp_l |= hex(*buf++) << 28;
++		tmp_l |= hex(*buf++) << 24;
++		tmp_l |= hex(*buf++) << 20;
++		tmp_l |= hex(*buf++) << 16;
++		tmp_l |= hex(*buf++) << 12;
++		tmp_l |= hex(*buf++) << 8;
++		tmp_l |= hex(*buf++) << 4;
++		tmp_l |= hex(*buf++);
++#else
++		tmp_l |= hex(*buf++) << 4;
++		tmp_l |= hex(*buf++);
++		tmp_l |= hex(*buf++) << 12;
++		tmp_l |= hex(*buf++) << 8;
++		tmp_l |= hex(*buf++) << 20;
++		tmp_l |= hex(*buf++) << 16;
++		tmp_l |= hex(*buf++) << 28;
++		tmp_l |= hex(*buf++) << 24;
++#endif
++		*(unsigned long *)mem = tmp_l;
++		mem += 4;
++	} else {
++		int i;
++		for (i = 0; i < count; i++) {
++			unsigned char ch = hex(*buf++) << 4;
++			ch |= hex(*buf++);
++			*mem++ = ch;
++		}
++	}
++	kgdb_unset_may_fault();
++	return (mem);
++}
++
++/*
++ * While we find nice hex chars, build a long_val.
++ * Return number of chars processed.
++ */
++int kgdb_hex2long(char **ptr, long *long_val)
++{
++	int hex_val, num = 0;
++
++	*long_val = 0;
++
++	while (**ptr) {
++		hex_val = hex(**ptr);
++		if (hex_val >= 0) {
++			*long_val = (*long_val << 4) | hex_val;
++			num++;
++		} else
++			break;
++
++		(*ptr)++;
++	}
++
++	return (num);
++}
++
++/* Write memory due to an 'M' or 'X' packet. */
++static char *write_mem_msg(int binary)
++{
++	char *ptr = &remcom_in_buffer[1];
++	unsigned long addr, length;
++
++	if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
++	    kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
++		if (binary)
++			ptr = kgdb_ebin2mem(ptr, (char *)addr, length);
++		else
++			ptr = kgdb_hex2mem(ptr, (char *)addr, length);
++		if (CACHE_FLUSH_IS_SAFE)
++			flush_icache_range(addr, addr + length + 1);
++		if (IS_ERR(ptr))
++			return ptr;
++		return NULL;
++	}
++
++	return ERR_PTR(-EINVAL);
++}
++
++static inline char *pack_hex_byte(char *pkt, int byte)
++{
++	*pkt++ = hexchars[(byte >> 4) & 0xf];
++	*pkt++ = hexchars[(byte & 0xf)];
++	return pkt;
++}
++
++static inline void error_packet(char *pkt, int error)
++{
++	error = -error;
++	pkt[0] = 'E';
++	pkt[1] = hexchars[(error / 10)];
++	pkt[2] = hexchars[(error % 10)];
++	pkt[3] = '\0';
++}
++
++static char *pack_threadid(char *pkt, threadref * id)
++{
++	char *limit;
++	unsigned char *altid;
++
++	altid = (unsigned char *)id;
++	limit = pkt + BUF_THREAD_ID_SIZE;
++	while (pkt < limit)
++		pkt = pack_hex_byte(pkt, *altid++);
++
++	return pkt;
++}
++
++void int_to_threadref(threadref * id, int value)
++{
++	unsigned char *scan;
++	int i = 4;
++
++	scan = (unsigned char *)id;
++	while (i--)
++		*scan++ = 0;
++	*scan++ = (value >> 24) & 0xff;
++	*scan++ = (value >> 16) & 0xff;
++	*scan++ = (value >> 8) & 0xff;
++	*scan++ = (value & 0xff);
++}
++
++static struct task_struct *getthread(struct pt_regs *regs, int tid)
++{
++	if (init_pid_ns.last_pid == 0)
++		return current;
++
++	if (num_online_cpus() &&
++	    (tid >= pid_max + num_online_cpus() + kgdb_ops->shadowth))
++		return NULL;
++
++	if (kgdb_ops->shadowth && (tid >= pid_max + num_online_cpus()))
++		return kgdb_get_shadow_thread(regs, tid - pid_max -
++					      num_online_cpus());
++
++	if (tid >= pid_max)
++		return idle_task(tid - pid_max);
++
++	if (!tid)
++		return NULL;
++
++	return find_task_by_pid(tid);
++}
++
++#ifdef CONFIG_SMP
++static void kgdb_wait(struct pt_regs *regs)
++{
++	unsigned long flags;
++	int processor;
++
++	local_irq_save(flags);
++	processor = raw_smp_processor_id();
++	kgdb_info[processor].debuggerinfo = regs;
++	kgdb_info[processor].task = current;
++	atomic_set(&procindebug[processor], 1);
++	atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1);
++
++	/* Wait till master processor goes completely into the debugger.
++	 * FIXME: this looks racy */
++	while (!atomic_read(&procindebug[atomic_read(&debugger_active) - 1])) {
++		int i = 10;	/* an arbitrary number */
++
++		while (--i)
++			cpu_relax();
++	}
++
++	/* Wait till master processor is done with debugging */
++	spin_lock(&slavecpulocks[processor]);
++
++	kgdb_info[processor].debuggerinfo = NULL;
++	kgdb_info[processor].task = NULL;
++
++	/* fix up hardware debug registers on local cpu */
++	if (kgdb_ops->correct_hw_break)
++		kgdb_ops->correct_hw_break();
++	/* Signal the master processor that we are done */
++	atomic_set(&procindebug[processor], 0);
++	spin_unlock(&slavecpulocks[processor]);
++	local_irq_restore(flags);
++}
++#endif
++
++int kgdb_get_mem(char *addr, unsigned char *buf, int count)
++{
++	kgdb_set_may_fault();
++	if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++		kgdb_unset_may_fault();
++		return -EINVAL;
++	}
++	while (count) {
++		if ((unsigned long)addr < TASK_SIZE) {
++			kgdb_unset_may_fault();
++			return -EINVAL;
++		}
++		*buf++ = *addr++;
++		count--;
++	}
++	kgdb_unset_may_fault();
++	return 0;
++}
++
++int kgdb_set_mem(char *addr, unsigned char *buf, int count)
++{
++	kgdb_set_may_fault();
++	if ((kgdb_fault_setjmp(kgdb_fault_jmp_regs)) != 0) {
++		kgdb_unset_may_fault();
++		return -EINVAL;
++	}
++	while (count) {
++		if ((unsigned long)addr < TASK_SIZE) {
++			kgdb_unset_may_fault();
++			return -EINVAL;
++		}
++		*addr++ = *buf++;
++		count--;
++	}
++	kgdb_unset_may_fault();
++	return 0;
++}
++int kgdb_activate_sw_breakpoints(void)
++{
++	int i;
++	int error = 0;
++	unsigned long addr;
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if (kgdb_break[i].state != bp_set)
++			continue;
++		addr = kgdb_break[i].bpt_addr;
++		if ((error = kgdb_arch_set_breakpoint(addr,
++					kgdb_break[i].saved_instr)))
++			return error;
++
++		if (CACHE_FLUSH_IS_SAFE) {
++			if (current->mm && addr < TASK_SIZE)
++				flush_cache_range(current->mm->mmap_cache,
++						addr, addr + BREAK_INSTR_SIZE);
++			else
++				flush_icache_range(addr, addr +
++						BREAK_INSTR_SIZE);
++		}
++
++		kgdb_break[i].state = bp_active;
++        }
++	return 0;
++}
++
++static int kgdb_set_sw_break(unsigned long addr)
++{
++	int i, breakno = -1;
++	int error = 0;
++	if ((error = kgdb_validate_break_address(addr)) < 0)
++		return error;
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if ((kgdb_break[i].state == bp_set) &&
++			(kgdb_break[i].bpt_addr == addr))
++			return -EEXIST;
++	}
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if (kgdb_break[i].state == bp_removed &&
++				kgdb_break[i].bpt_addr == addr) {
++			breakno = i;
++			break;
++		}
++	}
++
++	if (breakno == -1) {
++		for (i = 0; i < MAX_BREAKPOINTS; i++) {
++			if (kgdb_break[i].state == bp_none) {
++				breakno = i;
++				break;
++			}
++		}
++	}
++	if (breakno == -1)
++		return -E2BIG;
++
++	kgdb_break[breakno].state = bp_set;
++	kgdb_break[breakno].type = bp_breakpoint;
++	kgdb_break[breakno].bpt_addr = addr;
++
++	return 0;
++}
++
++int kgdb_deactivate_sw_breakpoints(void)
++{
++	int i;
++	int error = 0;
++	unsigned long addr;
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if (kgdb_break[i].state != bp_active)
++			continue;
++		addr = kgdb_break[i].bpt_addr;
++		if ((error = kgdb_arch_remove_breakpoint(addr,
++					kgdb_break[i].saved_instr)))
++			return error;
++
++		if (CACHE_FLUSH_IS_SAFE && current->mm &&
++				addr < TASK_SIZE)
++			flush_cache_range(current->mm->mmap_cache,
++					addr, addr + BREAK_INSTR_SIZE);
++		else if (CACHE_FLUSH_IS_SAFE)
++			flush_icache_range(addr,
++					addr + BREAK_INSTR_SIZE);
++		kgdb_break[i].state = bp_set;
++	}
++	return 0;
++}
++
++static int kgdb_remove_sw_break(unsigned long addr)
++{
++	int i;
++
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if ((kgdb_break[i].state == bp_set) &&
++			(kgdb_break[i].bpt_addr == addr)) {
++			kgdb_break[i].state = bp_removed;
++			return 0;
++		}
++	}
++	return -ENOENT;
++}
++
++int kgdb_isremovedbreak(unsigned long addr)
++{
++	int i;
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if ((kgdb_break[i].state == bp_removed) &&
++			(kgdb_break[i].bpt_addr == addr)) {
++			return 1;
++		}
++	}
++	return 0;
++}
++
++int remove_all_break(void)
++{
++	int i;
++	int error;
++	unsigned long addr;
++
++	/* Clear memory breakpoints. */
++	for (i = 0; i < MAX_BREAKPOINTS; i++) {
++		if (kgdb_break[i].state != bp_set)
++			continue;
++		addr = kgdb_break[i].bpt_addr;
++		if ((error = kgdb_arch_remove_breakpoint(addr,
++					kgdb_break[i].saved_instr)))
++			return error;
++		kgdb_break[i].state = bp_removed;
++	}
++
++	/* Clear hardware breakpoints. */
++	if (kgdb_ops->remove_all_hw_break)
++		kgdb_ops->remove_all_hw_break();
++
++	return 0;
++}
++
++static inline int shadow_pid(int realpid)
++{
++	if (realpid) {
++		return realpid;
++	}
++	return pid_max + raw_smp_processor_id();
++}
++
++static char gdbmsgbuf[BUFMAX + 1];
++static void kgdb_msg_write(const char *s, int len)
++{
++	int i;
++	int wcount;
++	char *bufptr;
++
++	/* 'O'utput */
++	gdbmsgbuf[0] = 'O';
++
++	/* Fill and send buffers... */
++	while (len > 0) {
++		bufptr = gdbmsgbuf + 1;
++
++		/* Calculate how many this time */
++		if ((len << 1) > (BUFMAX - 2))
++			wcount = (BUFMAX - 2) >> 1;
++		else
++			wcount = len;
++
++		/* Pack in hex chars */
++		for (i = 0; i < wcount; i++)
++			bufptr = pack_hex_byte(bufptr, s[i]);
++		*bufptr = '\0';
++
++		/* Move up */
++		s += wcount;
++		len -= wcount;
++
++		/* Write packet */
++		put_packet(gdbmsgbuf);
++	}
++}
++
++/*
++ * This function does all command procesing for interfacing to gdb.
++ *
++ * Locking hierarchy:
++ *	interface locks, if any (begin_session)
++ *	kgdb lock (debugger_active)
++ *
++ * Note that since we can be in here prior to our cpumask being filled
++ * out, we err on the side of caution and loop over NR_CPUS instead
++ * of a for_each_online_cpu.
++ *
++ */
++int kgdb_handle_exception(int ex_vector, int signo, int err_code,
++			  struct pt_regs *linux_regs)
++{
++	unsigned long length, addr;
++	char *ptr;
++	unsigned long flags;
++	unsigned i;
++	long threadid;
++	threadref thref;
++	struct task_struct *thread = NULL;
++	unsigned procid;
++	int numshadowth = num_online_cpus() + kgdb_ops->shadowth;
++	long kgdb_usethreadid = 0;
++	int error = 0, all_cpus_synced = 0;
++	struct pt_regs *shadowregs;
++	int processor = raw_smp_processor_id();
++	void *local_debuggerinfo;
++
++	/* Panic on recursive debugger calls. */
++	if (atomic_read(&debugger_active) == raw_smp_processor_id() + 1) {
++		exception_level++;
++		addr = kgdb_arch_pc(ex_vector, linux_regs);
++		kgdb_deactivate_sw_breakpoints();
++		if (kgdb_remove_sw_break(addr) == 0) {
++			/* If the break point removed ok at the place exception
++			 * occurred, try to recover and print a warning to the end
++			 * user because the user planted a breakpoint in a place
++			 * that KGDB needs in order to function.
++			 */
++			exception_level = 0;
++			kgdb_skipexception(ex_vector, linux_regs);
++			kgdb_activate_sw_breakpoints();
++			printk(KERN_CRIT "KGDB: re-enter exception: breakpoint removed\n");
++			WARN_ON(1);
++			return 0;
++		}
++		remove_all_break();
++		kgdb_skipexception(ex_vector, linux_regs);
++		if (exception_level > 1)
++			panic("Recursive entry to debugger");
++
++		printk(KERN_CRIT "KGDB: re-enter exception: ALL breakpoints removed\n");
++		panic("Recursive entry to debugger");
++		return 0;
++	}
++
++ acquirelock:
++
++	/*
++	 * Interrupts will be restored by the 'trap return' code, except when
++	 * single stepping.
++	 */
++	local_irq_save(flags);
++
++	/* Hold debugger_active */
++	procid = raw_smp_processor_id();
++
++	while (cmpxchg(&atomic_read(&debugger_active), 0, (procid + 1)) != 0) {
++		int i = 25;	/* an arbitrary number */
++
++		while (--i)
++			cpu_relax();
++
++		if (atomic_read(&cpu_doing_single_step) != -1 &&
++				atomic_read(&cpu_doing_single_step) != procid)
++			udelay(1);
++	}
++
++	atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 1);
++
++	/*
++	 * Don't enter if the last instance of the exception handler wanted to
++	 * come into the debugger again.
++	 */
++	if (atomic_read(&cpu_doing_single_step) != -1 &&
++	    atomic_read(&cpu_doing_single_step) != procid) {
++		atomic_set(&debugger_active, 0);
++		local_irq_restore(flags);
++		goto acquirelock;
++	}
++
++	/*
++	* Don't enter if we have hit a removed breakpoint.
++	*/
++	if (kgdb_skipexception(ex_vector, linux_regs))
++		goto kgdb_restore;
++
++	/*
++	 * Call the I/O drivers pre_exception routine
++	 * if the I/O driver defined one
++	 */
++	if (kgdb_io_ops.pre_exception)
++		kgdb_io_ops.pre_exception();
++
++	kgdb_info[processor].debuggerinfo = linux_regs;
++	kgdb_info[processor].task = current;
++
++	kgdb_disable_hw_debug(linux_regs);
++
++	if (!debugger_step || !kgdb_contthread)
++		for (i = 0; i < NR_CPUS; i++)
++			spin_lock(&slavecpulocks[i]);
++
++#ifdef CONFIG_SMP
++	/* Make sure we get the other CPUs */
++	if (!debugger_step || !kgdb_contthread)
++		kgdb_roundup_cpus(flags);
++#endif
++
++	/* spin_lock code is good enough as a barrier so we don't
++	 * need one here */
++	atomic_set(&procindebug[processor], 1);
++
++	/* Wait a reasonable time for the other CPUs to be notified and
++	 * be waiting for us.  Very early on this could be imperfect
++	 * as num_online_cpus() could be 0.*/
++	for (i = 0; i < ROUNDUP_WAIT; i++) {
++		int cpu, num = 0;
++		for (cpu = 0; cpu < NR_CPUS; cpu++) {
++			if (atomic_read(&procindebug[cpu]))
++				num++;
++		}
++		if (num >= num_online_cpus()) {
++			all_cpus_synced = 1;
++			break;
++		}
++	}
++
++	/* Clear the out buffer. */
++	memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
++
++	/* Master processor is completely in the debugger */
++	kgdb_post_master_code(linux_regs, ex_vector, err_code);
++	kgdb_deactivate_sw_breakpoints();
++	debugger_step = 0;
++	kgdb_contthread = NULL;
++	exception_level = 0;
++
++	if (kgdb_connected) {
++		/* If we're still unable to roundup all of the CPUs,
++		 * send an 'O' packet informing the user again. */
++		if (!all_cpus_synced)
++			kgdb_msg_write("Not all CPUs have been synced for "
++				       "KGDB\n", 39);
++		/* Reply to host that an exception has occurred */
++		ptr = remcom_out_buffer;
++		*ptr++ = 'T';
++		*ptr++ = hexchars[(signo >> 4) % 16];
++		*ptr++ = hexchars[signo % 16];
++		ptr += strlen(strcpy(ptr, "thread:"));
++		int_to_threadref(&thref, shadow_pid(current->pid));
++		ptr = pack_threadid(ptr, &thref);
++		*ptr++ = ';';
++
++		put_packet(remcom_out_buffer);
++	}
++
++	kgdb_usethread = kgdb_info[processor].task;
++	kgdb_usethreadid = shadow_pid(kgdb_info[processor].task->pid);
++
++	while (kgdb_io_ops.read_char) {
++		char *bpt_type;
++		error = 0;
++
++		/* Clear the out buffer. */
++		memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
++
++		get_packet(remcom_in_buffer);
++
++		switch (remcom_in_buffer[0]) {
++		case '?':
++			/* We know that this packet is only sent
++			 * during initial connect.  So to be safe,
++			 * we clear out our breakpoints now incase
++			 * GDB is reconnecting. */
++			remove_all_break();
++			/* Also, if we haven't been able to roundup all
++			 * CPUs, send an 'O' packet informing the user
++			 * as much.  Only need to do this once. */
++			if (!all_cpus_synced)
++				kgdb_msg_write("Not all CPUs have been "
++					       "synced for KGDB\n", 39);
++			remcom_out_buffer[0] = 'S';
++			remcom_out_buffer[1] = hexchars[signo >> 4];
++			remcom_out_buffer[2] = hexchars[signo % 16];
++			break;
++
++		case 'g':	/* return the value of the CPU registers */
++			thread = kgdb_usethread;
++
++			if (!thread) {
++				thread = kgdb_info[processor].task;
++				local_debuggerinfo =
++				    kgdb_info[processor].debuggerinfo;
++			} else {
++				local_debuggerinfo = NULL;
++				for (i = 0; i < NR_CPUS; i++) {
++					/* Try to find the task on some other
++					 * or possibly this node if we do not
++					 * find the matching task then we try
++					 * to approximate the results.
++					 */
++					if (thread == kgdb_info[i].task)
++						local_debuggerinfo =
++						    kgdb_info[i].debuggerinfo;
++				}
++			}
++
++			/* All threads that don't have debuggerinfo should be
++			 * in __schedule() sleeping, since all other CPUs
++			 * are in kgdb_wait, and thus have debuggerinfo. */
++			if (kgdb_ops->shadowth &&
++			    kgdb_usethreadid >= pid_max + num_online_cpus()) {
++				shadowregs = kgdb_shadow_regs(linux_regs,
++							      kgdb_usethreadid -
++							      pid_max -
++							      num_online_cpus
++							      ());
++				if (!shadowregs) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++				regs_to_gdb_regs(gdb_regs, shadowregs);
++			} else if (local_debuggerinfo)
++				regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
++			else {
++				/* Pull stuff saved during
++				 * switch_to; nothing else is
++				 * accessible (or even particularly relevant).
++				 * This should be enough for a stack trace. */
++				sleeping_thread_to_gdb_regs(gdb_regs, thread);
++			}
++			kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer,
++				     NUMREGBYTES);
++			break;
++
++			/* set the value of the CPU registers - return OK */
++		case 'G':
++			kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs,
++				     NUMREGBYTES);
++
++			if (kgdb_usethread && kgdb_usethread != current)
++				error_packet(remcom_out_buffer, -EINVAL);
++			else {
++				gdb_regs_to_regs(gdb_regs, linux_regs);
++				strcpy(remcom_out_buffer, "OK");
++			}
++			break;
++
++			/* mAA..AA,LLLL  Read LLLL bytes at address AA..AA */
++		case 'm':
++			ptr = &remcom_in_buffer[1];
++			if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
++			    kgdb_hex2long(&ptr, &length) > 0) {
++				if (IS_ERR(ptr = kgdb_mem2hex((char *)addr,
++							      remcom_out_buffer,
++							      length)))
++					error_packet(remcom_out_buffer,
++						     PTR_ERR(ptr));
++			} else
++				error_packet(remcom_out_buffer, -EINVAL);
++			break;
++
++			/* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
++		case 'M':
++			if (IS_ERR(ptr = write_mem_msg(0)))
++				error_packet(remcom_out_buffer, PTR_ERR(ptr));
++			else
++				strcpy(remcom_out_buffer, "OK");
++			break;
++			/* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
++		case 'X':
++			if (IS_ERR(ptr = write_mem_msg(1)))
++				error_packet(remcom_out_buffer, PTR_ERR(ptr));
++			else
++				strcpy(remcom_out_buffer, "OK");
++			break;
++
++			/* kill or detach. KGDB should treat this like a
++			 * continue.
++			 */
++		case 'D':
++			if ((error = remove_all_break()) < 0) {
++				error_packet(remcom_out_buffer, error);
++			} else {
++				strcpy(remcom_out_buffer, "OK");
++				kgdb_connected = 0;
++			}
++			put_packet(remcom_out_buffer);
++			goto default_handle;
++
++		case 'k':
++			/* Don't care about error from remove_all_break */
++			remove_all_break();
++			kgdb_connected = 0;
++			goto default_handle;
++
++			/* Reboot */
++		case 'R':
++			/* For now, only honor R0 */
++			if (strcmp(remcom_in_buffer, "R0") == 0) {
++				printk(KERN_CRIT "Executing reboot\n");
++				strcpy(remcom_out_buffer, "OK");
++				put_packet(remcom_out_buffer);
++				emergency_sync();
++				/* Execution should not return from
++				 * machine_restart()
++				 */
++				machine_restart(NULL);
++				kgdb_connected = 0;
++				goto default_handle;
++			}
++
++			/* query */
++		case 'q':
++			switch (remcom_in_buffer[1]) {
++			case 's':
++			case 'f':
++				if (memcmp(remcom_in_buffer + 2, "ThreadInfo",
++					   10)) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++
++				/*
++				 * If we have not yet completed in
++				 * pidhash_init() there isn't much we
++				 * can give back.
++				 */
++				if (init_pid_ns.last_pid == 0) {
++					if (remcom_in_buffer[1] == 'f')
++						strcpy(remcom_out_buffer,
++						       "m0000000000000001");
++					break;
++				}
++
++				if (remcom_in_buffer[1] == 'f') {
++					threadid = 1;
++				}
++				remcom_out_buffer[0] = 'm';
++				ptr = remcom_out_buffer + 1;
++				for (i = 0; i < 17 && threadid < pid_max +
++				     numshadowth; threadid++) {
++					thread = getthread(linux_regs,
++							   threadid);
++					if (thread) {
++						int_to_threadref(&thref,
++								 threadid);
++						pack_threadid(ptr, &thref);
++						ptr += 16;
++						*(ptr++) = ',';
++						i++;
++					}
++				}
++				*(--ptr) = '\0';
++				break;
++
++			case 'C':
++				/* Current thread id */
++				strcpy(remcom_out_buffer, "QC");
++
++				threadid = shadow_pid(current->pid);
++
++				int_to_threadref(&thref, threadid);
++				pack_threadid(remcom_out_buffer + 2, &thref);
++				break;
++			case 'T':
++				if (memcmp(remcom_in_buffer + 1,
++					   "ThreadExtraInfo,", 16)) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++				threadid = 0;
++				ptr = remcom_in_buffer + 17;
++				kgdb_hex2long(&ptr, &threadid);
++				if (!getthread(linux_regs, threadid)) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++				if (threadid < pid_max) {
++					kgdb_mem2hex(getthread(linux_regs,
++							       threadid)->comm,
++						     remcom_out_buffer, 16);
++				} else if (threadid >= pid_max +
++					   num_online_cpus()) {
++					kgdb_shadowinfo(linux_regs,
++							remcom_out_buffer,
++							threadid - pid_max -
++							num_online_cpus());
++				} else {
++					static char tmpstr[23 +
++							   BUF_THREAD_ID_SIZE];
++					sprintf(tmpstr, "Shadow task %d"
++						" for pid 0",
++						(int)(threadid - pid_max));
++					kgdb_mem2hex(tmpstr, remcom_out_buffer,
++						     strlen(tmpstr));
++				}
++				break;
++			}
++			break;
++
++			/* task related */
++		case 'H':
++			switch (remcom_in_buffer[1]) {
++			case 'g':
++				ptr = &remcom_in_buffer[2];
++				kgdb_hex2long(&ptr, &threadid);
++				thread = getthread(linux_regs, threadid);
++				if (!thread && threadid > 0) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++				kgdb_usethread = thread;
++				kgdb_usethreadid = threadid;
++				strcpy(remcom_out_buffer, "OK");
++				break;
++
++			case 'c':
++				ptr = &remcom_in_buffer[2];
++				kgdb_hex2long(&ptr, &threadid);
++				if (!threadid) {
++					kgdb_contthread = NULL;
++				} else {
++					thread = getthread(linux_regs,
++							   threadid);
++					if (!thread && threadid > 0) {
++						error_packet(remcom_out_buffer,
++							     -EINVAL);
++						break;
++					}
++					kgdb_contthread = thread;
++				}
++				strcpy(remcom_out_buffer, "OK");
++				break;
++			}
++			break;
++
++			/* Query thread status */
++		case 'T':
++			ptr = &remcom_in_buffer[1];
++			kgdb_hex2long(&ptr, &threadid);
++			thread = getthread(linux_regs, threadid);
++			if (thread)
++				strcpy(remcom_out_buffer, "OK");
++			else
++				error_packet(remcom_out_buffer, -EINVAL);
++			break;
++		/* Since GDB-5.3, it's been drafted that '0' is a software
++		 * breakpoint, '1' is a hardware breakpoint, so let's do
++		 * that.
++		 */
++		case 'z':
++		case 'Z':
++			bpt_type = &remcom_in_buffer[1];
++			ptr = &remcom_in_buffer[2];
++
++			if (kgdb_ops->set_hw_breakpoint && *bpt_type >= '1') {
++				/* Unsupported */
++				if (*bpt_type > '4')
++					break;
++			} else if (*bpt_type != '0' && *bpt_type != '1')
++				/* Unsupported. */
++				break;
++			/* Test if this is a hardware breakpoint, and
++			 * if we support it. */
++			if (*bpt_type == '1' &&
++			    !(kgdb_ops->flags & KGDB_HW_BREAKPOINT))
++				/* Unsupported. */
++				break;
++
++			if (*(ptr++) != ',') {
++				error_packet(remcom_out_buffer, -EINVAL);
++				break;
++			} else if (kgdb_hex2long(&ptr, &addr)) {
++				if (*(ptr++) != ',' ||
++				    !kgdb_hex2long(&ptr, &length)) {
++					error_packet(remcom_out_buffer,
++						     -EINVAL);
++					break;
++				}
++			} else {
++				error_packet(remcom_out_buffer, -EINVAL);
++				break;
++			}
++
++			if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
++				error = kgdb_set_sw_break(addr);
++			else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
++				error = kgdb_remove_sw_break(addr);
++			else if (remcom_in_buffer[0] == 'Z')
++				error = kgdb_ops->set_hw_breakpoint(addr,
++								    (int)length,
++								    *bpt_type);
++			else if (remcom_in_buffer[0] == 'z')
++				error = kgdb_ops->remove_hw_breakpoint(addr,
++								       (int)
++								       length,
++								       *bpt_type);
++
++			if (error == 0)
++				strcpy(remcom_out_buffer, "OK");
++			else
++				error_packet(remcom_out_buffer, error);
++
++			break;
++		case 'c':
++		case 's':
++			if (kgdb_contthread && kgdb_contthread != current) {
++				/* Can't switch threads in kgdb */
++				error_packet(remcom_out_buffer, -EINVAL);
++				break;
++			}
++			kgdb_activate_sw_breakpoints();
++			/* Followthrough to default processing */
++		default:
++		      default_handle:
++			error = kgdb_arch_handle_exception(ex_vector, signo,
++							   err_code,
++							   remcom_in_buffer,
++							   remcom_out_buffer,
++							   linux_regs);
++
++			if (error >= 0 || remcom_in_buffer[0] == 'D' ||
++			    remcom_in_buffer[0] == 'k')
++				goto kgdb_exit;
++
++		}		/* switch */
++
++		/* reply to the request */
++		put_packet(remcom_out_buffer);
++	}
++
++ kgdb_exit:
++	/*
++	 * Call the I/O driver's post_exception routine
++	 * if the I/O driver defined one.
++	 */
++	if (kgdb_io_ops.post_exception)
++		kgdb_io_ops.post_exception();
++
++	kgdb_info[processor].debuggerinfo = NULL;
++	kgdb_info[processor].task = NULL;
++	atomic_set(&procindebug[processor], 0);
++
++	if (!debugger_step || !kgdb_contthread) {
++		for (i = 0; i < NR_CPUS; i++)
++			spin_unlock(&slavecpulocks[i]);
++		/* Wait till all the processors have quit
++		 * from the debugger. */
++		for (i = 0; i < NR_CPUS; i++) {
++			while (atomic_read(&procindebug[i])) {
++				int j = 10;	/* an arbitrary number */
++
++				while (--j)
++					cpu_relax();
++			}
++		}
++	}
++
++#ifdef CONFIG_SMP
++	/* This delay has a real purpose.  The problem is that if you
++	 * are single-stepping, you are sending an NMI to all the
++	 * other processors to stop them.  Interrupts come in, but
++	 * don't get handled.  Then you let them go just long enough
++	 * to get into their interrupt routines and use up some stack.
++	 * You stop them again, and then do the same thing.  After a
++	 * while you blow the stack on the other processors.  This
++	 * delay gives some time for interrupts to be cleared out on
++	 * the other processors.
++	 */
++	if (debugger_step)
++		mdelay(2);
++#endif
++ kgdb_restore:
++	/* Free debugger_active */
++	atomic_set(&debugger_active, 0);
++	local_irq_restore(flags);
++
++	return error;
++}
++
++/*
++ * GDB places a breakpoint at this function to know dynamically
++ * loaded objects. It's not defined static so that only one instance with this
++ * name exists in the kernel.
++ */
++
++int module_event(struct notifier_block *self, unsigned long val, void *data)
++{
++	return 0;
++}
++
++static struct notifier_block kgdb_module_load_nb = {
++	.notifier_call = module_event,
++};
++
++void kgdb_nmihook(int cpu, void *regs)
++{
++#ifdef CONFIG_SMP
++	if (!atomic_read(&procindebug[cpu]) && atomic_read(&debugger_active) != (cpu + 1))
++		kgdb_wait((struct pt_regs *)regs);
++#endif
++}
++
++/*
++ * This is called when a panic happens.  All we need to do is
++ * breakpoint().
++ */
++static int kgdb_panic_notify(struct notifier_block *self, unsigned long cmd,
++			     void *ptr)
++{
++	breakpoint();
++
++	return 0;
++}
++
++static struct notifier_block kgdb_panic_notifier = {
++	.notifier_call = kgdb_panic_notify,
++};
++
++/*
++ * Initialization that needs to be done in either of our entry points.
++ */
++static void __init kgdb_internal_init(void)
++{
++	int i;
++
++	/* Initialize our spinlocks. */
++	for (i = 0; i < NR_CPUS; i++)
++		spin_lock_init(&slavecpulocks[i]);
++
++	for (i = 0; i < MAX_BREAKPOINTS; i++)
++		kgdb_break[i].state = bp_none;
++
++	/* Initialize the I/O handles */
++	memset(&kgdb_io_ops_prev, 0, sizeof(kgdb_io_ops_prev));
++
++	/* We can't do much if this fails */
++	register_module_notifier(&kgdb_module_load_nb);
++
++	kgdb_initialized = 1;
++}
++
++static void kgdb_register_for_panic(void)
++{
++	/* Register for panics(). */
++	/* The registration is done in the kgdb_register_for_panic
++	 * routine because KGDB should not try to handle a panic when
++	 * there are no kgdb_io_ops setup. It is assumed that the
++	 * kgdb_io_ops are setup at the time this method is called.
++	 */
++	if (!kgdb_from_module_registered) {
++		atomic_notifier_chain_register(&panic_notifier_list,
++					&kgdb_panic_notifier);
++		kgdb_from_module_registered = 1;
++	}
++}
++
++static void kgdb_unregister_for_panic(void)
++{
++	/* When this routine is called KGDB should unregister from the
++	 * panic handler and clean up, making sure it is not handling any
++	 * break exceptions at the time.
++	 */
++	if (kgdb_from_module_registered) {
++		kgdb_from_module_registered = 0;
++		atomic_notifier_chain_unregister(&panic_notifier_list,
++					  &kgdb_panic_notifier);
++	}
++}
++
++int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops)
++{
++
++	if (kgdb_connected) {
++		printk(KERN_ERR "kgdb: Cannot load I/O module while KGDB "
++		       "connected.\n");
++		return -EINVAL;
++	}
++
++	/* Save the old values so they can be restored */
++	if (kgdb_io_handler_cnt >= MAX_KGDB_IO_HANDLERS) {
++		printk(KERN_ERR "kgdb: No more I/O handles available.\n");
++		return -EINVAL;
++	}
++
++	/* Check to see if there is an existing driver and if so save its
++	 * values.  Also check to make sure the same driver was not trying
++	 * to re-register.
++	 */
++	if (kgdb_io_ops.read_char != NULL &&
++        kgdb_io_ops.read_char != local_kgdb_io_ops->read_char) {
++		memcpy(&kgdb_io_ops_prev[kgdb_io_handler_cnt],
++		       &kgdb_io_ops, sizeof(struct kgdb_io));
++		kgdb_io_handler_cnt++;
++	}
++
++	/* Initialize the io values for this module */
++	memcpy(&kgdb_io_ops, local_kgdb_io_ops, sizeof(struct kgdb_io));
++
++	/* Make the call to register kgdb if is not initialized */
++	kgdb_register_for_panic();
++
++	return 0;
++}
++
++void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops)
++{
++	int i;
++
++	/* Unregister KGDB if there were no other prior io hooks, else
++	 * restore the io hooks.
++	 */
++	if (kgdb_io_handler_cnt > 0 && kgdb_io_ops_prev[0].read_char != NULL) {
++		/* First check if the hook that is in use is the one being
++		 * removed */
++		if (kgdb_io_ops.read_char == local_kgdb_io_ops->read_char) {
++			/* Set 'i' to the value of where the list should be
++			 * shifed */
++			i = kgdb_io_handler_cnt - 1;
++			memcpy(&kgdb_io_ops, &kgdb_io_ops_prev[i],
++			       sizeof(struct kgdb_io));
++		} else {
++			/* Simple case to remove an entry for an I/O handler
++			 * that is not in use */
++			for (i = 0; i < kgdb_io_handler_cnt; i++) {
++				if (kgdb_io_ops_prev[i].read_char ==
++				    local_kgdb_io_ops->read_char)
++					break;
++			}
++		}
++
++		/* Shift all the entries in the handler array so it is
++		 * ordered from oldest to newest.
++		 */
++		kgdb_io_handler_cnt--;
++		for (; i < kgdb_io_handler_cnt; i++) {
++			memcpy(&kgdb_io_ops_prev[i], &kgdb_io_ops_prev[i + 1],
++			       sizeof(struct kgdb_io));
++		}
++		/* Handle the case if we are on the last element and set it
++		 * to NULL; */
++		memset(&kgdb_io_ops_prev[kgdb_io_handler_cnt], 0,
++				sizeof(struct kgdb_io));
++
++		if (kgdb_connected)
++			printk(KERN_ERR "kgdb: WARNING: I/O method changed "
++			       "while kgdb was connected state.\n");
++	} else {
++		/* KGDB is no longer able to communicate out, so
++		 * unregister our hooks and reset state. */
++		kgdb_unregister_for_panic();
++		if (kgdb_connected) {
++			printk(KERN_CRIT "kgdb: I/O module was unloaded while "
++					"a debugging session was running.  "
++					"KGDB will be reset.\n");
++			if (remove_all_break() < 0)
++				printk(KERN_CRIT "kgdb: Reset failed.\n");
++			kgdb_connected = 0;
++		}
++		memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io));
++	}
++}
++
++/*
++ * There are times we need to call a tasklet to cause a breakpoint
++ * as calling breakpoint() at that point might be fatal.  We have to
++ * check that the exception stack is setup, as tasklets may be scheduled
++ * prior to this.  When that happens, it is up to the architecture to
++ * schedule this when it is safe to run.
++ */
++static void kgdb_tasklet_bpt(unsigned long ing)
++{
++	if (CHECK_EXCEPTION_STACK())
++		breakpoint();
++}
++
++DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0);
++
++/*
++ * This function can be called very early, either via early_param() or
++ * an explicit breakpoint() early on.
++ */
++static void __init kgdb_early_entry(void)
++{
++	/* Let the architecture do any setup that it needs to. */
++	kgdb_arch_init();
++
++	/*
++	 * Don't try and do anything until the architecture is able to
++	 * setup the exception stack.  In this case, it is up to the
++	 * architecture to hook in and look at us when they are ready.
++	 */
++
++	if (!CHECK_EXCEPTION_STACK()) {
++		kgdb_initialized = -1;
++		/* any kind of break point is deferred to late_init */
++		return;
++	}
++
++	/* Now try the I/O. */
++	/* For early entry kgdb_io_ops.init must be defined */
++	if (!kgdb_io_ops.init || kgdb_io_ops.init()) {
++		/* Try again later. */
++		kgdb_initialized = -1;
++		return;
++	}
++
++	/* Finish up. */
++	kgdb_internal_init();
++
++	/* KGDB can assume that if kgdb_io_ops.init was defined that the
++	 * panic registion should be performed at this time. This means
++	 * kgdb_io_ops.init did not come from a kernel module and was
++	 * initialized statically by a built in.
++	 */
++	if (kgdb_io_ops.init)
++		kgdb_register_for_panic();
++}
++
++/*
++ * This function will always be invoked to make sure that KGDB will grab
++ * what it needs to so that if something happens while the system is
++ * running, KGDB will get involved.  If kgdb_early_entry() has already
++ * been invoked, there is little we need to do.
++ */
++static int __init kgdb_late_entry(void)
++{
++	int need_break = 0;
++
++	/* If kgdb_initialized is -1 then we were passed kgdbwait. */
++	if (kgdb_initialized == -1)
++		need_break = 1;
++
++	/*
++	 * If we haven't tried to initialize KGDB yet, we need to call
++	 * kgdb_arch_init before moving onto the I/O.
++	 */
++	if (!kgdb_initialized)
++		kgdb_arch_init();
++
++	if (kgdb_initialized != 1) {
++		if (kgdb_io_ops.init && kgdb_io_ops.init()) {
++			/* When KGDB allows I/O via modules and the core
++			 * I/O init fails KGDB must default to defering the
++			 * I/O setup, and appropriately print an error about
++			 * it.
++			 */
++			printk(KERN_ERR "kgdb: Could not setup core I/O "
++			       "for KGDB.\n");
++			printk(KERN_INFO "kgdb: Defering I/O setup to kernel "
++			       "module.\n");
++			memset(&kgdb_io_ops, 0, sizeof(struct kgdb_io));
++		}
++
++		kgdb_internal_init();
++
++		/* KGDB can assume that if kgdb_io_ops.init was defined that
++		 * panic registion should be performed at this time. This means
++		 * kgdb_io_ops.init did not come from a kernel module and was
++		 * initialized statically by a built in.
++		 */
++		if (kgdb_io_ops.init)
++			kgdb_register_for_panic();
++	}
++
++	/* Registering to reboot notifier list*/
++	register_reboot_notifier(&kgdb_reboot_notifier);
++
++	/* Now do any late init of the I/O. */
++	if (kgdb_io_ops.late_init)
++		kgdb_io_ops.late_init();
++
++	if (need_break) {
++		printk(KERN_CRIT "kgdb: Waiting for connection from remote"
++		       " gdb...\n");
++		breakpoint();
++	}
++
++	return 0;
++}
++
++late_initcall(kgdb_late_entry);
++
++/*
++ * This function will generate a breakpoint exception.  It is used at the
++ * beginning of a program to sync up with a debugger and can be used
++ * otherwise as a quick means to stop program execution and "break" into
++ * the debugger.
++ */
++void breakpoint(void)
++{
++	atomic_set(&kgdb_setting_breakpoint, 1);
++	wmb();
++	BREAKPOINT();
++	wmb();
++	atomic_set(&kgdb_setting_breakpoint, 0);
++}
++
++EXPORT_SYMBOL(breakpoint);
++
++#ifdef CONFIG_MAGIC_SYSRQ
++static void sysrq_handle_gdb(int key, struct tty_struct *tty)
++{
++	printk("Entering GDB stub\n");
++	breakpoint();
++}
++static struct sysrq_key_op sysrq_gdb_op = {
++	.handler = sysrq_handle_gdb,
++	.help_msg = "Gdb",
++	.action_msg = "GDB",
++};
++
++static int gdb_register_sysrq(void)
++{
++	printk("Registering GDB sysrq handler\n");
++	register_sysrq_key('g', &sysrq_gdb_op);
++	return 0;
++}
++
++module_init(gdb_register_sysrq);
++#endif
++
++static int kgdb_notify_reboot(struct notifier_block *this,
++                            unsigned long code, void *x)
++{
++
++	unsigned long flags;
++
++	/* If we're debugging, or KGDB has not connected, don't try
++	 * and print. */
++	if (!kgdb_connected || atomic_read(&debugger_active) != 0)
++		return 0;
++	if ((code == SYS_RESTART) || (code == SYS_HALT) || (code == SYS_POWER_OFF)){
++		local_irq_save(flags);
++		put_packet("X00");
++		local_irq_restore(flags);
++	}
++	return NOTIFY_DONE;
++}
++
++#ifdef CONFIG_KGDB_CONSOLE
++void kgdb_console_write(struct console *co, const char *s, unsigned count)
++{
++	unsigned long flags;
++
++	/* If we're debugging, or KGDB has not connected, don't try
++	 * and print. */
++	if (!kgdb_connected || atomic_read(&debugger_active) != 0)
++		return;
++
++	local_irq_save(flags);
++	kgdb_msg_write(s, count);
++	local_irq_restore(flags);
++}
++
++struct console kgdbcons = {
++	.name = "kgdb",
++	.write = kgdb_console_write,
++	.flags = CON_PRINTBUFFER | CON_ENABLED,
++};
++static int __init kgdb_console_init(void)
++{
++	register_console(&kgdbcons);
++	return 0;
++}
++
++console_initcall(kgdb_console_init);
++#endif
++
++static int __init opt_kgdb_enter(char *str)
++{
++	/* We've already done this by an explicit breakpoint() call. */
++	if (kgdb_initialized)
++		return 0;
++
++	kgdb_early_entry();
++	if (kgdb_initialized == 1)
++		printk(KERN_CRIT "Waiting for connection from remote "
++		       "gdb...\n");
++	else {
++		printk(KERN_CRIT "KGDB cannot initialize I/O yet.\n");
++		return 0;
++	}
++
++	breakpoint();
++
++	return 0;
++}
++
++early_param("kgdbwait", opt_kgdb_enter);
+diff -Nurb linux-2.6.22-570/kernel/kmod.c linux-2.6.22-try2/kernel/kmod.c
+--- linux-2.6.22-570/kernel/kmod.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/kmod.c	2007-12-19 15:29:23.000000000 -0500
+@@ -119,9 +119,10 @@
+ 	char **argv;
+ 	char **envp;
+ 	struct key *ring;
+-	int wait;
++	enum umh_wait wait;
+ 	int retval;
+ 	struct file *stdin;
++	void (*cleanup)(char **argv, char **envp);
+ };
+ 
+ /*
+@@ -180,6 +181,14 @@
+ 	do_exit(0);
+ }
+ 
++void call_usermodehelper_freeinfo(struct subprocess_info *info)
++{
++	if (info->cleanup)
++		(*info->cleanup)(info->argv, info->envp);
++	kfree(info);
++}
++EXPORT_SYMBOL(call_usermodehelper_freeinfo);
++
+ /* Keventd can't block, but this (a child) can. */
+ static int wait_for_helper(void *data)
+ {
+@@ -216,8 +225,8 @@
+ 			sub_info->retval = ret;
+ 	}
+ 
+-	if (sub_info->wait < 0)
+-		kfree(sub_info);
++	if (sub_info->wait == UMH_NO_WAIT)
++		call_usermodehelper_freeinfo(sub_info);
+ 	else
+ 		complete(sub_info->complete);
+ 	return 0;
+@@ -229,101 +238,102 @@
+ 	struct subprocess_info *sub_info =
+ 		container_of(work, struct subprocess_info, work);
+ 	pid_t pid;
+-	int wait = sub_info->wait;
++	enum umh_wait wait = sub_info->wait;
+ 
+ 	/* CLONE_VFORK: wait until the usermode helper has execve'd
+ 	 * successfully We need the data structures to stay around
+ 	 * until that is done.  */
+-	if (wait)
++	if (wait == UMH_WAIT_PROC)
+ 		pid = kernel_thread(wait_for_helper, sub_info,
+ 				    CLONE_FS | CLONE_FILES | SIGCHLD);
+ 	else
+ 		pid = kernel_thread(____call_usermodehelper, sub_info,
+ 				    CLONE_VFORK | SIGCHLD);
+ 
+-	if (wait < 0)
+-		return;
+-
+-	if (pid < 0) {
++	switch(wait) {
++	case UMH_NO_WAIT:
++		break;
++
++	case UMH_WAIT_PROC:
++		if (pid > 0)
++			break;
+ 		sub_info->retval = pid;
++		/* FALLTHROUGH */
++
++	case UMH_WAIT_EXEC:
+ 		complete(sub_info->complete);
+-	} else if (!wait)
+-		complete(sub_info->complete);
++	}
+ }
+ 
+ /**
+- * call_usermodehelper_keys - start a usermode application
+- * @path: pathname for the application
+- * @argv: null-terminated argument list
+- * @envp: null-terminated environment list
+- * @session_keyring: session keyring for process (NULL for an empty keyring)
+- * @wait: wait for the application to finish and return status.
+- *        when -1 don't wait at all, but you get no useful error back when
+- *        the program couldn't be exec'ed. This makes it safe to call
+- *        from interrupt context.
++ * call_usermodehelper_setup - prepare to call a usermode helper
++ * @path - path to usermode executable
++ * @argv - arg vector for process
++ * @envp - environment for process
+  *
+- * Runs a user-space application.  The application is started
+- * asynchronously if wait is not set, and runs as a child of keventd.
+- * (ie. it runs with full root capabilities).
+- *
+- * Must be called from process context.  Returns a negative error code
+- * if program was not execed successfully, or 0.
++ * Returns either NULL on allocation failure, or a subprocess_info
++ * structure.  This should be passed to call_usermodehelper_exec to
++ * exec the process and free the structure.
+  */
+-int call_usermodehelper_keys(char *path, char **argv, char **envp,
+-			     struct key *session_keyring, int wait)
++struct subprocess_info *call_usermodehelper_setup(char *path,
++						  char **argv, char **envp)
+ {
+-	DECLARE_COMPLETION_ONSTACK(done);
+ 	struct subprocess_info *sub_info;
+-	int retval;
+-
+-	if (!khelper_wq)
+-		return -EBUSY;
+-
+-	if (path[0] == '\0')
+-		return 0;
+-
+ 	sub_info = kzalloc(sizeof(struct subprocess_info),  GFP_ATOMIC);
+ 	if (!sub_info)
+-		return -ENOMEM;
++		goto out;
+ 
+ 	INIT_WORK(&sub_info->work, __call_usermodehelper);
+-	sub_info->complete = &done;
+ 	sub_info->path = path;
+ 	sub_info->argv = argv;
+ 	sub_info->envp = envp;
+-	sub_info->ring = session_keyring;
+-	sub_info->wait = wait;
+ 
+-	queue_work(khelper_wq, &sub_info->work);
+-	if (wait < 0) /* task has freed sub_info */
+-		return 0;
+-	wait_for_completion(&done);
+-	retval = sub_info->retval;
+-	kfree(sub_info);
+-	return retval;
++  out:
++	return sub_info;
+ }
+-EXPORT_SYMBOL(call_usermodehelper_keys);
++EXPORT_SYMBOL(call_usermodehelper_setup);
+ 
+-int call_usermodehelper_pipe(char *path, char **argv, char **envp,
+-			     struct file **filp)
++/**
++ * call_usermodehelper_setkeys - set the session keys for usermode helper
++ * @info: a subprocess_info returned by call_usermodehelper_setup
++ * @session_keyring: the session keyring for the process
++ */
++void call_usermodehelper_setkeys(struct subprocess_info *info,
++				 struct key *session_keyring)
+ {
+-	DECLARE_COMPLETION(done);
+-	struct subprocess_info sub_info = {
+-		.work		= __WORK_INITIALIZER(sub_info.work,
+-						     __call_usermodehelper),
+-		.complete	= &done,
+-		.path		= path,
+-		.argv		= argv,
+-		.envp		= envp,
+-		.retval		= 0,
+-	};
+-	struct file *f;
++	info->ring = session_keyring;
++}
++EXPORT_SYMBOL(call_usermodehelper_setkeys);
+ 
+-	if (!khelper_wq)
+-		return -EBUSY;
++/**
++ * call_usermodehelper_setcleanup - set a cleanup function
++ * @info: a subprocess_info returned by call_usermodehelper_setup
++ * @cleanup: a cleanup function
++ *
++ * The cleanup function is just befor ethe subprocess_info is about to
++ * be freed.  This can be used for freeing the argv and envp.  The
++ * Function must be runnable in either a process context or the
++ * context in which call_usermodehelper_exec is called.
++ */
++void call_usermodehelper_setcleanup(struct subprocess_info *info,
++				    void (*cleanup)(char **argv, char **envp))
++{
++	info->cleanup = cleanup;
++}
++EXPORT_SYMBOL(call_usermodehelper_setcleanup);
+ 
+-	if (path[0] == '\0')
+-		return 0;
++/**
++ * call_usermodehelper_stdinpipe - set up a pipe to be used for stdin
++ * @sub_info: a subprocess_info returned by call_usermodehelper_setup
++ * @filp: set to the write-end of a pipe
++ *
++ * This constructs a pipe, and sets the read end to be the stdin of the
++ * subprocess, and returns the write-end in *@filp.
++ */
++int call_usermodehelper_stdinpipe(struct subprocess_info *sub_info,
++				  struct file **filp)
++{
++	struct file *f;
+ 
+ 	f = create_write_pipe();
+ 	if (IS_ERR(f))
+@@ -335,11 +345,85 @@
+ 		free_write_pipe(*filp);
+ 		return PTR_ERR(f);
+ 	}
+-	sub_info.stdin = f;
++	sub_info->stdin = f;
++
++	return 0;
++}
++EXPORT_SYMBOL(call_usermodehelper_stdinpipe);
+ 
+-	queue_work(khelper_wq, &sub_info.work);
++/**
++ * call_usermodehelper_exec - start a usermode application
++ * @sub_info: information about the subprocessa
++ * @wait: wait for the application to finish and return status.
++ *        when -1 don't wait at all, but you get no useful error back when
++ *        the program couldn't be exec'ed. This makes it safe to call
++ *        from interrupt context.
++ *
++ * Runs a user-space application.  The application is started
++ * asynchronously if wait is not set, and runs as a child of keventd.
++ * (ie. it runs with full root capabilities).
++ */
++int call_usermodehelper_exec(struct subprocess_info *sub_info,
++			     enum umh_wait wait)
++{
++	DECLARE_COMPLETION_ONSTACK(done);
++	int retval;
++
++	if (sub_info->path[0] == '\0') {
++		retval = 0;
++		goto out;
++	}
++
++	if (!khelper_wq) {
++		retval = -EBUSY;
++		goto out;
++	}
++
++	sub_info->complete = &done;
++	sub_info->wait = wait;
++
++	queue_work(khelper_wq, &sub_info->work);
++	if (wait == UMH_NO_WAIT) /* task has freed sub_info */
++		return 0;
+ 	wait_for_completion(&done);
+-	return sub_info.retval;
++	retval = sub_info->retval;
++
++  out:
++	call_usermodehelper_freeinfo(sub_info);
++	return retval;
++}
++EXPORT_SYMBOL(call_usermodehelper_exec);
++
++/**
++ * call_usermodehelper_pipe - call a usermode helper process with a pipe stdin
++ * @path: path to usermode executable
++ * @argv: arg vector for process
++ * @envp: environment for process
++ * @filp: set to the write-end of a pipe
++ *
++ * This is a simple wrapper which executes a usermode-helper function
++ * with a pipe as stdin.  It is implemented entirely in terms of
++ * lower-level call_usermodehelper_* functions.
++ */
++int call_usermodehelper_pipe(char *path, char **argv, char **envp,
++			     struct file **filp)
++{
++	struct subprocess_info *sub_info;
++	int ret;
++
++	sub_info = call_usermodehelper_setup(path, argv, envp);
++	if (sub_info == NULL)
++		return -ENOMEM;
++
++	ret = call_usermodehelper_stdinpipe(sub_info, filp);
++	if (ret < 0)
++		goto out;
++
++	return call_usermodehelper_exec(sub_info, 1);
++
++  out:
++	call_usermodehelper_freeinfo(sub_info);
++	return ret;
+ }
+ EXPORT_SYMBOL(call_usermodehelper_pipe);
+ 
+diff -Nurb linux-2.6.22-570/kernel/module.c linux-2.6.22-try2/kernel/module.c
+--- linux-2.6.22-570/kernel/module.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/module.c	2007-12-19 15:29:24.000000000 -0500
+@@ -67,6 +67,7 @@
+ /* List of modules, protected by module_mutex AND modlist_lock */
+ static DEFINE_MUTEX(module_mutex);
+ static LIST_HEAD(modules);
++static DECLARE_MUTEX(notify_mutex);
+ 
+ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
+ 
+@@ -488,8 +489,7 @@
+         mod->field = NULL;                                            \
+ }                                                                     \
+ static struct module_attribute modinfo_##field = {                    \
+-	.attr = { .name = __stringify(field), .mode = 0444,           \
+-		  .owner = THIS_MODULE },                             \
++	.attr = { .name = __stringify(field), .mode = 0444 },         \
+ 	.show = show_modinfo_##field,                                 \
+ 	.setup = setup_modinfo_##field,                               \
+ 	.test = modinfo_##field##_exists,                             \
+@@ -713,6 +713,12 @@
+ 	if (ret != 0)
+ 		goto out;
+ 
++	down(&notify_mutex);
++	blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING,
++        			mod);
++	up(&notify_mutex);
++
++
+ 	/* Never wait if forced. */
+ 	if (!forced && module_refcount(mod) != 0)
+ 		wait_for_zero_refcount(mod);
+@@ -725,6 +731,11 @@
+ 	}
+ 	free_module(mod);
+ 
++	down(&notify_mutex);
++	blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GONE,
++			NULL);
++	up(&notify_mutex);
++
+  out:
+ 	mutex_unlock(&module_mutex);
+ 	return ret;
+@@ -793,7 +804,7 @@
+ }
+ 
+ static struct module_attribute refcnt = {
+-	.attr = { .name = "refcnt", .mode = 0444, .owner = THIS_MODULE },
++	.attr = { .name = "refcnt", .mode = 0444 },
+ 	.show = show_refcnt,
+ };
+ 
+@@ -846,12 +857,15 @@
+ 	case MODULE_STATE_GOING:
+ 		state = "going";
+ 		break;
++	case MODULE_STATE_GONE:
++		state = "gone";
++		break;
+ 	}
+ 	return sprintf(buffer, "%s\n", state);
+ }
+ 
+ static struct module_attribute initstate = {
+-	.attr = { .name = "initstate", .mode = 0444, .owner = THIS_MODULE },
++	.attr = { .name = "initstate", .mode = 0444 },
+ 	.show = show_initstate,
+ };
+ 
+@@ -1032,7 +1046,6 @@
+ 		sattr->mattr.show = module_sect_show;
+ 		sattr->mattr.store = NULL;
+ 		sattr->mattr.attr.name = sattr->name;
+-		sattr->mattr.attr.owner = mod;
+ 		sattr->mattr.attr.mode = S_IRUGO;
+ 		*(gattr++) = &(sattr++)->mattr.attr;
+ 	}
+@@ -1090,7 +1103,6 @@
+ 		if (!attr->test ||
+ 		    (attr->test && attr->test(mod))) {
+ 			memcpy(temp_attr, attr, sizeof(*temp_attr));
+-			temp_attr->attr.owner = mod;
+ 			error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+ 			++temp_attr;
+ 		}
+@@ -1212,6 +1224,11 @@
+ 	/* Arch-specific cleanup. */
+ 	module_arch_cleanup(mod);
+ 
++#ifdef CONFIG_KGDB
++	/* kgdb info */
++	vfree(mod->mod_sections);
++#endif
++
+ 	/* Module unload stuff */
+ 	module_unload_free(mod);
+ 
+@@ -1471,6 +1488,31 @@
+ 	}
+ }
+ 
++#ifdef CONFIG_KGDB
++int add_modsects (struct module *mod, Elf_Ehdr *hdr, Elf_Shdr *sechdrs, const
++                char *secstrings)
++{
++        int i;
++
++        mod->num_sections = hdr->e_shnum - 1;
++        mod->mod_sections = vmalloc((hdr->e_shnum - 1)*
++		sizeof (struct mod_section));
++
++        if (mod->mod_sections == NULL) {
++                return -ENOMEM;
++        }
++
++        for (i = 1; i < hdr->e_shnum; i++) {
++                mod->mod_sections[i - 1].address = (void *)sechdrs[i].sh_addr;
++                strncpy(mod->mod_sections[i - 1].name, secstrings +
++                                sechdrs[i].sh_name, MAX_SECTNAME);
++                mod->mod_sections[i - 1].name[MAX_SECTNAME] = '\0';
++	}
++
++	return 0;
++}
++#endif
++
+ #ifdef CONFIG_KALLSYMS
+ static int is_exported(const char *name, const struct module *mod)
+ {
+@@ -1886,6 +1928,12 @@
+ 
+ 	add_kallsyms(mod, sechdrs, symindex, strindex, secstrings);
+ 
++#ifdef CONFIG_KGDB
++        if ((err = add_modsects(mod, hdr, sechdrs, secstrings)) < 0) {
++                goto nomodsectinfo;
++        }
++#endif
++
+ 	err = module_finalize(hdr, sechdrs, mod);
+ 	if (err < 0)
+ 		goto cleanup;
+@@ -1946,6 +1994,11 @@
+  arch_cleanup:
+ 	module_arch_cleanup(mod);
+  cleanup:
++
++#ifdef CONFIG_KGDB
++nomodsectinfo:
++       vfree(mod->mod_sections);
++#endif
+ 	module_unload_free(mod);
+ 	module_free(mod, mod->module_init);
+  free_core:
+@@ -2017,6 +2070,10 @@
+ 		/* Init routine failed: abort.  Try to protect us from
+                    buggy refcounters. */
+ 		mod->state = MODULE_STATE_GOING;
++		down(&notify_mutex);
++		blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING,
++				mod);
++		up(&notify_mutex);
+ 		synchronize_sched();
+ 		if (mod->unsafe)
+ 			printk(KERN_ERR "%s: module is now stuck!\n",
+diff -Nurb linux-2.6.22-570/kernel/ns_container.c linux-2.6.22-try2/kernel/ns_container.c
+--- linux-2.6.22-570/kernel/ns_container.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/ns_container.c	2007-12-19 15:29:25.000000000 -0500
+@@ -0,0 +1,99 @@
++/*
++ * ns_container.c - namespace container subsystem
++ *
++ * Copyright 2006, 2007 IBM Corp
++ */
++
++#include <linux/module.h>
++#include <linux/container.h>
++#include <linux/fs.h>
++
++struct ns_container {
++	struct container_subsys_state css;
++	spinlock_t lock;
++};
++
++struct container_subsys ns_subsys;
++
++static inline struct ns_container *container_to_ns(
++		struct container *container)
++{
++	return container_of(container_subsys_state(container, ns_subsys_id),
++			    struct ns_container, css);
++}
++
++int ns_container_clone(struct task_struct *task)
++{
++	return container_clone(task, &ns_subsys);
++}
++
++/*
++ * Rules:
++ *   1. you can only enter a container which is a child of your current
++ *     container
++ *   2. you can only place another process into a container if
++ *     a. you have CAP_SYS_ADMIN
++ *     b. your container is an ancestor of task's destination container
++ *       (hence either you are in the same container as task, or in an
++ *        ancestor container thereof)
++ */
++static int ns_can_attach(struct container_subsys *ss,
++		struct container *new_container, struct task_struct *task)
++{
++	struct container *orig;
++
++	if (current != task) {
++		if (!capable(CAP_SYS_ADMIN))
++			return -EPERM;
++
++		if (!container_is_descendant(new_container))
++			return -EPERM;
++	}
++
++	if (atomic_read(&new_container->count) != 0)
++		return -EPERM;
++
++	orig = task_container(task, ns_subsys_id);
++	if (orig && orig != new_container->parent)
++		return -EPERM;
++
++	return 0;
++}
++
++/*
++ * Rules: you can only create a container if
++ *     1. you are capable(CAP_SYS_ADMIN)
++ *     2. the target container is a descendant of your own container
++ */
++static int ns_create(struct container_subsys *ss, struct container *container)
++{
++	struct ns_container *ns_container;
++
++	if (!capable(CAP_SYS_ADMIN))
++		return -EPERM;
++	if (!container_is_descendant(container))
++		return -EPERM;
++
++	ns_container = kzalloc(sizeof(*ns_container), GFP_KERNEL);
++	if (!ns_container) return -ENOMEM;
++	spin_lock_init(&ns_container->lock);
++	container->subsys[ns_subsys.subsys_id] = &ns_container->css;
++	return 0;
++}
++
++static void ns_destroy(struct container_subsys *ss,
++			struct container *container)
++{
++	struct ns_container *ns_container;
++
++	ns_container = container_to_ns(container);
++	kfree(ns_container);
++}
++
++struct container_subsys ns_subsys = {
++	.name = "ns",
++	.can_attach = ns_can_attach,
++	.create = ns_create,
++	.destroy  = ns_destroy,
++	.subsys_id = ns_subsys_id,
++};
+diff -Nurb linux-2.6.22-570/kernel/nsproxy.c linux-2.6.22-try2/kernel/nsproxy.c
+--- linux-2.6.22-570/kernel/nsproxy.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/nsproxy.c	2007-12-19 21:24:51.000000000 -0500
+@@ -23,6 +23,8 @@
+ #include <linux/vserver/global.h>
+ #include <linux/vserver/debug.h>
+ 
++static struct kmem_cache *nsproxy_cachep;
++
+ struct nsproxy init_nsproxy = INIT_NSPROXY(init_nsproxy);
+ 
+ void get_task_namespaces(struct task_struct *tsk)
+@@ -83,8 +85,15 @@
+ 	if (IS_ERR(new_nsp->pid_ns))
+ 		goto out_pid;
+ 
++	new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns);
++	if (IS_ERR(new_nsp->user_ns))
++		goto out_user;
++
+ 	return new_nsp;
+ 
++out_user:
++	if (new_nsp->pid_ns)
++		put_pid_ns(new_nsp->pid_ns);
+ out_pid:
+ 	if (new_nsp->ipc_ns)
+ 		put_ipc_ns(new_nsp->ipc_ns);
+@@ -95,11 +104,11 @@
+ 	if (new_nsp->mnt_ns)
+ 		put_mnt_ns(new_nsp->mnt_ns);
+ out_ns:
+-	kfree(new_nsp);
+-	return ERR_PTR(-ENOMEM);
++	kmem_cache_free(nsproxy_cachep, new_nsp);
++	return ERR_PTR(err);
+ }
+ 
+-static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk,
++static struct nsproxy *create_new_namespaces(unsigned long flags, struct task_struct *tsk,
+ 			struct fs_struct *new_fs)
+ {
+ 	return unshare_namespaces(flags, tsk->nsproxy, new_fs);
+@@ -130,7 +139,7 @@
+  * called from clone.  This now handles copy for nsproxy and all
+  * namespaces therein.
+  */
+-int copy_namespaces(int flags, struct task_struct *tsk)
++int copy_namespaces(unsigned long flags, struct task_struct *tsk)
+ {
+ 	struct nsproxy *old_ns = tsk->nsproxy;
+ 	struct nsproxy *new_ns = NULL;
+@@ -144,7 +153,7 @@
+ 
+ 	get_nsproxy(old_ns);
+ 
+-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
++	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
+ 		return 0;
+ 
+ 	if (!capable(CAP_SYS_ADMIN)) {
+@@ -158,7 +167,14 @@
+ 		goto out;
+ 	}
+ 
++	err = ns_container_clone(tsk);
++	if (err) {
++		put_nsproxy(new_ns);
++		goto out;
++	}
++
+ 	tsk->nsproxy = new_ns;
++
+ out:
+ 	put_nsproxy(old_ns);
+ 	vxdprintk(VXD_CBIT(space, 3),
+@@ -194,25 +210,33 @@
+ 		"unshare_nsproxy_namespaces(0x%08lx,[%p])",
+ 		unshare_flags, current->nsproxy);
+ 
+-	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC)))
++	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
++			       CLONE_NEWUSER)))
+ 		return 0;
+ 
+-#ifndef CONFIG_IPC_NS
+-	if (unshare_flags & CLONE_NEWIPC)
+-		return -EINVAL;
+-#endif
+-
+-#ifndef CONFIG_UTS_NS
+-	if (unshare_flags & CLONE_NEWUTS)
+-		return -EINVAL;
+-#endif
+-
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
+ 
+ 	*new_nsp = create_new_namespaces(unshare_flags, current,
+ 				new_fs ? new_fs : current->fs);
+-	if (IS_ERR(*new_nsp))
++	if (IS_ERR(*new_nsp)) {
+ 		err = PTR_ERR(*new_nsp);
++		goto out;
++	}
++
++	err = ns_container_clone(current);
++	if (err)
++		put_nsproxy(*new_nsp);
++
++out:
+ 	return err;
+ }
++
++static int __init nsproxy_cache_init(void)
++{
++	nsproxy_cachep = kmem_cache_create("nsproxy", sizeof(struct nsproxy),
++					   0, SLAB_PANIC, NULL, NULL);
++	return 0;
++}
++
++module_init(nsproxy_cache_init);
+diff -Nurb linux-2.6.22-570/kernel/params.c linux-2.6.22-try2/kernel/params.c
+--- linux-2.6.22-570/kernel/params.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/kernel/params.c	2007-12-19 15:29:22.000000000 -0500
+@@ -491,7 +491,6 @@
+ 			pattr->mattr.show = param_attr_show;
+ 			pattr->mattr.store = param_attr_store;
+ 			pattr->mattr.attr.name = (char *)&kp->name[name_skip];
+-			pattr->mattr.attr.owner = mk->mod;
+ 			pattr->mattr.attr.mode = kp->perm;
+ 			*(gattr++) = &(pattr++)->mattr.attr;
+ 		}
+diff -Nurb linux-2.6.22-570/kernel/pid.c linux-2.6.22-try2/kernel/pid.c
+--- linux-2.6.22-570/kernel/pid.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/pid.c	2007-12-19 15:29:24.000000000 -0500
+@@ -379,7 +379,7 @@
+ }
+ EXPORT_SYMBOL_GPL(find_get_pid);
+ 
+-struct pid_namespace *copy_pid_ns(int flags, struct pid_namespace *old_ns)
++struct pid_namespace *copy_pid_ns(unsigned long flags, struct pid_namespace *old_ns)
+ {
+ 	BUG_ON(!old_ns);
+ 	get_pid_ns(old_ns);
+diff -Nurb linux-2.6.22-570/kernel/ptrace.c linux-2.6.22-try2/kernel/ptrace.c
+--- linux-2.6.22-570/kernel/ptrace.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/ptrace.c	2007-12-19 15:29:24.000000000 -0500
+@@ -143,7 +143,7 @@
+ 		return -EPERM;
+ 	smp_rmb();
+ 	if (task->mm)
+-		dumpable = task->mm->dumpable;
++		dumpable = get_dumpable(task->mm);
+ 	if (!dumpable && !capable(CAP_SYS_PTRACE))
+ 		return -EPERM;
+ 	if (!vx_check(task->xid, VS_ADMIN_P|VS_IDENT))
+diff -Nurb linux-2.6.22-570/kernel/rcutorture.c linux-2.6.22-try2/kernel/rcutorture.c
+--- linux-2.6.22-570/kernel/rcutorture.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/rcutorture.c	2007-12-19 15:29:24.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <linux/moduleparam.h>
+ #include <linux/percpu.h>
+ #include <linux/notifier.h>
++#include <linux/freezer.h>
+ #include <linux/cpu.h>
+ #include <linux/random.h>
+ #include <linux/delay.h>
+@@ -518,7 +519,6 @@
+ 
+ 	VERBOSE_PRINTK_STRING("rcu_torture_writer task started");
+ 	set_user_nice(current, 19);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	do {
+ 		schedule_timeout_uninterruptible(1);
+@@ -558,7 +558,6 @@
+ 
+ 	VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task started");
+ 	set_user_nice(current, 19);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	do {
+ 		schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
+@@ -589,7 +588,6 @@
+ 
+ 	VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
+ 	set_user_nice(current, 19);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	do {
+ 		idx = cur_ops->readlock();
+diff -Nurb linux-2.6.22-570/kernel/rtmutex-tester.c linux-2.6.22-try2/kernel/rtmutex-tester.c
+--- linux-2.6.22-570/kernel/rtmutex-tester.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/rtmutex-tester.c	2007-12-19 15:29:24.000000000 -0500
+@@ -260,6 +260,7 @@
+ 	int ret;
+ 
+ 	current->flags |= PF_MUTEX_TESTER;
++	set_freezable();
+ 	allow_signal(SIGHUP);
+ 
+ 	for(;;) {
+diff -Nurb linux-2.6.22-570/kernel/sched.c linux-2.6.22-try2/kernel/sched.c
+--- linux-2.6.22-570/kernel/sched.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/sched.c	2007-12-19 21:36:30.000000000 -0500
+@@ -51,8 +51,10 @@
+ #include <linux/times.h>
+ #include <linux/tsacct_kern.h>
+ #include <linux/kprobes.h>
++#include <linux/kgdb.h>
+ #include <linux/delayacct.h>
+ #include <linux/reciprocal_div.h>
++#include <linux/cpu_acct.h>
+ 
+ #include <asm/tlb.h>
+ #include <asm/unistd.h>
+@@ -3399,9 +3401,16 @@
+ 	struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat;
+ 	struct vx_info *vxi = p->vx_info;  /* p is _always_ current */
+ 	cputime64_t tmp;
++ 	struct rq *rq = this_rq();
+ 	int nice = (TASK_NICE(p) > 0);
+ 
+ 	p->utime = cputime_add(p->utime, cputime);
++
++
++ 	if (p != rq->idle)
++ 		cpuacct_charge(p, cputime);
++ 
++
+ 	vx_account_user(vxi, cputime, nice);
+ 
+ 	/* Add user time to cpustat. */
+@@ -3435,9 +3444,10 @@
+ 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
+ 	else if (softirq_count())
+ 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
+-	else if (p != rq->idle)
++	else if (p != rq->idle) {
+ 		cpustat->system = cputime64_add(cpustat->system, tmp);
+-	else if (atomic_read(&rq->nr_iowait) > 0)
++		cpuacct_charge(p, cputime);
++	} else if (atomic_read(&rq->nr_iowait) > 0)
+ 		cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ 	else
+ 		cpustat->idle = cputime64_add(cpustat->idle, tmp);
+@@ -3462,8 +3472,10 @@
+ 			cpustat->iowait = cputime64_add(cpustat->iowait, tmp);
+ 		else
+ 			cpustat->idle = cputime64_add(cpustat->idle, tmp);
+-	} else
++	} else {
+ 		cpustat->steal = cputime64_add(cpustat->steal, tmp);
++		cpuacct_charge(p, -tmp);
++	}
+ }
+ 
+ static void task_running_tick(struct rq *rq, struct task_struct *p, int cpu)
+@@ -5287,8 +5299,6 @@
+ 		struct migration_req *req;
+ 		struct list_head *head;
+ 
+-		try_to_freeze();
+-
+ 		spin_lock_irq(&rq->lock);
+ 
+ 		if (cpu_is_offline(cpu)) {
+@@ -5522,7 +5532,6 @@
+ 		p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+ 		if (IS_ERR(p))
+ 			return NOTIFY_BAD;
+-		p->flags |= PF_NOFREEZE;
+ 		kthread_bind(p, cpu);
+ 		/* Must be high prio: stop_machine expects to yield to it. */
+ 		rq = task_rq_lock(p, &flags);
+@@ -6926,33 +6935,6 @@
+ 	arch_destroy_sched_domains(cpu_map);
+ }
+ 
+-/*
+- * Partition sched domains as specified by the cpumasks below.
+- * This attaches all cpus from the cpumasks to the NULL domain,
+- * waits for a RCU quiescent period, recalculates sched
+- * domain information and then attaches them back to the
+- * correct sched domains
+- * Call with hotplug lock held
+- */
+-int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2)
+-{
+-	cpumask_t change_map;
+-	int err = 0;
+-
+-	cpus_and(*partition1, *partition1, cpu_online_map);
+-	cpus_and(*partition2, *partition2, cpu_online_map);
+-	cpus_or(change_map, *partition1, *partition2);
+-
+-	/* Detach sched domains from all of the affected cpus */
+-	detach_destroy_domains(&change_map);
+-	if (!cpus_empty(*partition1))
+-		err = build_sched_domains(partition1);
+-	if (!err && !cpus_empty(*partition2))
+-		err = build_sched_domains(partition2);
+-
+-	return err;
+-}
+-
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+ int arch_reinit_sched_domains(void)
+ {
+@@ -7177,6 +7159,9 @@
+ #ifdef in_atomic
+ 	static unsigned long prev_jiffy;	/* ratelimiting */
+ 
++	if (atomic_read(&debugger_active))
++		return;
++
+ 	if ((in_atomic() || irqs_disabled()) &&
+ 	    system_state == SYSTEM_RUNNING && !oops_in_progress) {
+ 		if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+diff -Nurb linux-2.6.22-570/kernel/seccomp.c linux-2.6.22-try2/kernel/seccomp.c
+--- linux-2.6.22-570/kernel/seccomp.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/seccomp.c	2007-12-19 15:29:24.000000000 -0500
+@@ -10,6 +10,7 @@
+ #include <linux/sched.h>
+ 
+ /* #define SECCOMP_DEBUG 1 */
++#define NR_SECCOMP_MODES 1
+ 
+ /*
+  * Secure computing mode 1 allows only read/write/exit/sigreturn.
+@@ -54,3 +55,28 @@
+ #endif
+ 	do_exit(SIGKILL);
+ }
++
++long prctl_get_seccomp(void)
++{
++	return current->seccomp.mode;
++}
++
++long prctl_set_seccomp(unsigned long seccomp_mode)
++{
++	long ret;
++
++	/* can set it only once to be even more secure */
++	ret = -EPERM;
++	if (unlikely(current->seccomp.mode))
++		goto out;
++
++	ret = -EINVAL;
++	if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
++		current->seccomp.mode = seccomp_mode;
++		set_thread_flag(TIF_SECCOMP);
++		ret = 0;
++	}
++
++ out:
++	return ret;
++}
+diff -Nurb linux-2.6.22-570/kernel/signal.c linux-2.6.22-try2/kernel/signal.c
+--- linux-2.6.22-570/kernel/signal.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/signal.c	2007-12-19 15:29:23.000000000 -0500
+@@ -257,6 +257,16 @@
+ 	}
+ }
+ 
++int unhandled_signal(struct task_struct *tsk, int sig)
++{
++	if (is_init(tsk))
++		return 1;
++	if (tsk->ptrace & PT_PTRACED)
++		return 0;
++	return (tsk->sighand->action[sig-1].sa.sa_handler == SIG_IGN) ||
++		(tsk->sighand->action[sig-1].sa.sa_handler == SIG_DFL);
++}
++
+ 
+ /* Notify the system that a driver wants to block all signals for this
+  * process, and wants to be notified if any signals at all were to be
+diff -Nurb linux-2.6.22-570/kernel/softirq.c linux-2.6.22-try2/kernel/softirq.c
+--- linux-2.6.22-570/kernel/softirq.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/softirq.c	2007-12-19 15:29:24.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/notifier.h>
+ #include <linux/percpu.h>
+ #include <linux/cpu.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
+ #include <linux/smp.h>
+@@ -304,11 +305,6 @@
+ 	if (!in_interrupt() && local_softirq_pending())
+ 		invoke_softirq();
+ 
+-#ifdef CONFIG_NO_HZ
+-	/* Make sure that timer wheel updates are propagated */
+-	if (!in_interrupt() && idle_cpu(smp_processor_id()) && !need_resched())
+-		tick_nohz_stop_sched_tick();
+-#endif
+ 	preempt_enable_no_resched();
+ }
+ 
+@@ -490,7 +486,6 @@
+ static int ksoftirqd(void * __bind_cpu)
+ {
+ 	set_user_nice(current, 19);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	set_current_state(TASK_INTERRUPTIBLE);
+ 
+diff -Nurb linux-2.6.22-570/kernel/softlockup.c linux-2.6.22-try2/kernel/softlockup.c
+--- linux-2.6.22-570/kernel/softlockup.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/softlockup.c	2007-12-19 15:29:24.000000000 -0500
+@@ -10,9 +10,11 @@
+ #include <linux/cpu.h>
+ #include <linux/init.h>
+ #include <linux/delay.h>
++#include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/notifier.h>
+ #include <linux/module.h>
++#include <linux/kgdb.h>
+ 
+ static DEFINE_SPINLOCK(print_lock);
+ 
+@@ -47,6 +49,9 @@
+ void touch_softlockup_watchdog(void)
+ {
+ 	__raw_get_cpu_var(touch_timestamp) = get_timestamp();
++#ifdef CONFIG_KGDB
++	atomic_set(&kgdb_sync_softlockup[raw_smp_processor_id()], 0);
++#endif
+ }
+ EXPORT_SYMBOL(touch_softlockup_watchdog);
+ 
+@@ -116,7 +121,6 @@
+ 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+ 
+ 	sched_setscheduler(current, SCHED_FIFO, &param);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	/* initialize timestamp */
+ 	touch_softlockup_watchdog();
+diff -Nurb linux-2.6.22-570/kernel/sys.c linux-2.6.22-try2/kernel/sys.c
+--- linux-2.6.22-570/kernel/sys.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/sys.c	2007-12-19 21:25:05.000000000 -0500
+@@ -31,6 +31,7 @@
+ #include <linux/cn_proc.h>
+ #include <linux/getcpu.h>
+ #include <linux/task_io_accounting_ops.h>
++#include <linux/seccomp.h>
+ #include <linux/cpu.h>
+ 
+ #include <linux/compat.h>
+@@ -1043,7 +1044,7 @@
+ 			return -EPERM;
+ 	}
+ 	if (new_egid != old_egid) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 		smp_wmb();
+ 	}
+ 	if (rgid != (gid_t) -1 ||
+@@ -1073,13 +1074,13 @@
+ 
+ 	if (capable(CAP_SETGID)) {
+ 		if (old_egid != gid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->gid = current->egid = current->sgid = current->fsgid = gid;
+ 	} else if ((gid == current->gid) || (gid == current->sgid)) {
+ 		if (old_egid != gid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->egid = current->fsgid = gid;
+@@ -1110,7 +1111,7 @@
+ 	switch_uid(new_user);
+ 
+ 	if (dumpclear) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 		smp_wmb();
+ 	}
+ 	current->uid = new_ruid;
+@@ -1166,7 +1167,7 @@
+ 		return -EAGAIN;
+ 
+ 	if (new_euid != old_euid) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 		smp_wmb();
+ 	}
+ 	current->fsuid = current->euid = new_euid;
+@@ -1216,7 +1217,7 @@
+ 		return -EPERM;
+ 
+ 	if (old_euid != uid) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 		smp_wmb();
+ 	}
+ 	current->fsuid = current->euid = uid;
+@@ -1261,7 +1262,7 @@
+ 	}
+ 	if (euid != (uid_t) -1) {
+ 		if (euid != current->euid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->euid = euid;
+@@ -1311,7 +1312,7 @@
+ 	}
+ 	if (egid != (gid_t) -1) {
+ 		if (egid != current->egid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->egid = egid;
+@@ -1357,7 +1358,7 @@
+ 	    uid == current->suid || uid == current->fsuid || 
+ 	    capable(CAP_SETUID)) {
+ 		if (uid != old_fsuid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->fsuid = uid;
+@@ -1386,7 +1387,7 @@
+ 	    gid == current->sgid || gid == current->fsgid || 
+ 	    capable(CAP_SETGID)) {
+ 		if (gid != old_fsgid) {
+-			current->mm->dumpable = suid_dumpable;
++			set_dumpable(current->mm, suid_dumpable);
+ 			smp_wmb();
+ 		}
+ 		current->fsgid = gid;
+@@ -2185,14 +2186,14 @@
+ 			error = put_user(current->pdeath_signal, (int __user *)arg2);
+ 			break;
+ 		case PR_GET_DUMPABLE:
+-			error = current->mm->dumpable;
++			error = get_dumpable(current->mm);
+ 			break;
+ 		case PR_SET_DUMPABLE:
+ 			if (arg2 < 0 || arg2 > 1) {
+ 				error = -EINVAL;
+ 				break;
+ 			}
+-			current->mm->dumpable = arg2;
++			set_dumpable(current->mm, arg2);
+ 			break;
+ 
+ 		case PR_SET_UNALIGN:
+@@ -2261,6 +2262,13 @@
+ 			error = SET_ENDIAN(current, arg2);
+ 			break;
+ 
++		case PR_GET_SECCOMP:
++			error = prctl_get_seccomp();
++			break;
++		case PR_SET_SECCOMP:
++			error = prctl_set_seccomp(arg2);
++			break;
++
+ 		default:
+ 			error = -EINVAL;
+ 			break;
+@@ -2297,3 +2305,61 @@
+ 	}
+ 	return err ? -EFAULT : 0;
+ }
++
++char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
++
++static void argv_cleanup(char **argv, char **envp)
++{
++	argv_free(argv);
++}
++
++/**
++ * Trigger an orderly system poweroff
++ * @force: force poweroff if command execution fails
++ *
++ * This may be called from any context to trigger a system shutdown.
++ * If the orderly shutdown fails, it will force an immediate shutdown.
++ */
++int orderly_poweroff(bool force)
++{
++	int argc;
++	char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
++	static char *envp[] = {
++		"HOME=/",
++		"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
++		NULL
++	};
++	int ret = -ENOMEM;
++	struct subprocess_info *info;
++
++	if (argv == NULL) {
++		printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
++		       __func__, poweroff_cmd);
++		goto out;
++	}
++
++	info = call_usermodehelper_setup(argv[0], argv, envp);
++	if (info == NULL) {
++		argv_free(argv);
++		goto out;
++	}
++
++	call_usermodehelper_setcleanup(info, argv_cleanup);
++
++	ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
++
++  out:
++	if (ret && force) {
++		printk(KERN_WARNING "Failed to start orderly shutdown: "
++		       "forcing the issue\n");
++
++		/* I guess this should try to kick off some daemon to
++		   sync and poweroff asap.  Or not even bother syncing
++		   if we're doing an emergency shutdown? */
++		emergency_sync();
++		kernel_power_off();
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(orderly_poweroff);
+diff -Nurb linux-2.6.22-570/kernel/sysctl.c linux-2.6.22-try2/kernel/sysctl.c
+--- linux-2.6.22-570/kernel/sysctl.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/sysctl.c	2007-12-19 15:29:24.000000000 -0500
+@@ -45,13 +45,12 @@
+ #include <linux/syscalls.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/acpi.h>
++#include <linux/reboot.h>
++#include <linux/fs.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+ 
+-extern int proc_nr_files(ctl_table *table, int write, struct file *filp,
+-                     void __user *buffer, size_t *lenp, loff_t *ppos);
+-
+ #ifdef CONFIG_X86
+ #include <asm/nmi.h>
+ #include <asm/stacktrace.h>
+@@ -203,7 +202,10 @@
+ 		.mode		= 0555,
+ 		.child		= dev_table,
+ 	},
+-
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -217,6 +219,15 @@
+ 		.proc_handler	= &proc_dointvec,
+ 	},
+ 	{
++		.ctl_name	= KERN_POWEROFF_CMD,
++		.procname	= "poweroff_cmd",
++		.data		= &poweroff_cmd,
++		.maxlen		= POWEROFF_CMD_PATH_LEN,
++		.mode		= 0644,
++		.proc_handler	= &proc_dostring,
++		.strategy	= &sysctl_string,
++	},
++	{
+ 		.ctl_name	= KERN_CORE_USES_PID,
+ 		.procname	= "core_uses_pid",
+ 		.data		= &core_uses_pid,
+@@ -625,7 +636,20 @@
+ 		.proc_handler   = &proc_dointvec,
+ 	},
+ #endif
+-
++#ifdef CONFIG_SECURITY
++	{
++		.ctl_name	= CTL_UNNUMBERED,
++		.procname	= "mmap_min_addr",
++		.data		= &mmap_min_addr,
++		.maxlen         = sizeof(unsigned long),
++		.mode		= 0644,
++		.proc_handler	= &proc_doulongvec_minmax,
++	},
++#endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -744,6 +768,14 @@
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec,
+ 	 },
++	 {
++		.ctl_name	= VM_HUGETLB_TREAT_MOVABLE,
++		.procname	= "hugepages_treat_as_movable",
++		.data		= &hugepages_treat_as_movable,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &hugetlb_treat_movable_handler,
++	},
+ #endif
+ 	{
+ 		.ctl_name	= VM_LOWMEM_RESERVE_RATIO,
+@@ -892,6 +924,10 @@
+ 		.extra1		= &zero,
+ 	},
+ #endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ 	{ .ctl_name = 0 }
+ };
+ 
+@@ -1032,10 +1068,28 @@
+ 		.child		= binfmt_misc_table,
+ 	},
+ #endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ 	{ .ctl_name = 0 }
+ };
+ 
+ static ctl_table debug_table[] = {
++#ifdef CONFIG_X86
++	{
++		.ctl_name	= DEBUG_UNHANDLED_SIGNALS,
++		.procname	= "show-unhandled-signals",
++		.data		= &show_unhandled_signals,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= proc_dointvec
++	},
++#endif
++/*
++ * NOTE: do not add new entries to this table unless you have read
++ * Documentation/sysctl/ctl_unnumbered.txt
++ */
+ 	{ .ctl_name = 0 }
+ };
+ 
+diff -Nurb linux-2.6.22-570/kernel/taskstats.c linux-2.6.22-try2/kernel/taskstats.c
+--- linux-2.6.22-570/kernel/taskstats.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/taskstats.c	2007-12-19 15:29:24.000000000 -0500
+@@ -196,6 +196,8 @@
+ 
+ 	/* fill in basic acct fields */
+ 	stats->version = TASKSTATS_VERSION;
++	stats->nvcsw = tsk->nvcsw;
++	stats->nivcsw = tsk->nivcsw;
+ 	bacct_add_tsk(stats, tsk);
+ 
+ 	/* fill in extended acct fields */
+@@ -242,6 +244,8 @@
+ 		 */
+ 		delayacct_add_tsk(stats, tsk);
+ 
++		stats->nvcsw += tsk->nvcsw;
++		stats->nivcsw += tsk->nivcsw;
+ 	} while_each_thread(first, tsk);
+ 
+ 	unlock_task_sighand(first, &flags);
+diff -Nurb linux-2.6.22-570/kernel/time/tick-sched.c linux-2.6.22-try2/kernel/time/tick-sched.c
+--- linux-2.6.22-570/kernel/time/tick-sched.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/time/tick-sched.c	2007-12-19 15:29:22.000000000 -0500
+@@ -153,6 +153,7 @@
+ 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies, flags;
+ 	struct tick_sched *ts;
+ 	ktime_t last_update, expires, now, delta;
++	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
+ 	int cpu;
+ 
+ 	local_irq_save(flags);
+@@ -290,11 +291,34 @@
+ out:
+ 	ts->next_jiffies = next_jiffies;
+ 	ts->last_jiffies = last_jiffies;
++	ts->sleep_length = ktime_sub(dev->next_event, now);
+ end:
+ 	local_irq_restore(flags);
+ }
+ 
+ /**
++ * tick_nohz_get_sleep_length - return the length of the current sleep
++ *
++ * Called from power state control code with interrupts disabled
++ */
++ktime_t tick_nohz_get_sleep_length(void)
++{
++       struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
++
++       return ts->sleep_length;
++}
++
++/**
++ * tick_nohz_get_idle_jiffies - returns the current idle jiffie count
++ */
++unsigned long tick_nohz_get_idle_jiffies(void)
++{
++       struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
++
++       return ts->idle_jiffies;
++}
++
++/**
+  * nohz_restart_sched_tick - restart the idle tick from the idle task
+  *
+  * Restart the idle tick when the CPU is woken up from idle
+diff -Nurb linux-2.6.22-570/kernel/timer.c linux-2.6.22-try2/kernel/timer.c
+--- linux-2.6.22-570/kernel/timer.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/timer.c	2007-12-19 15:52:07.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/delay.h>
+ #include <linux/tick.h>
+ #include <linux/kallsyms.h>
++#include <linux/kgdb.h>
+ #include <linux/vs_base.h>
+ #include <linux/vs_cvirt.h>
+ #include <linux/vs_pid.h>
+@@ -886,7 +887,11 @@
+  */
+ void run_local_timers(void)
+ {
++	int this_cpu = smp_processor_id();
+ 	raise_softirq(TIMER_SOFTIRQ);
++#ifdef CONFIG_KGDB
++	if(!atomic_read(&kgdb_sync_softlockup[this_cpu]))
++#endif
+ 	softlockup_tick();
+ }
+ 
+diff -Nurb linux-2.6.22-570/kernel/unwind.c linux-2.6.22-try2/kernel/unwind.c
+--- linux-2.6.22-570/kernel/unwind.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/unwind.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,1288 @@
++/*
++ * Copyright (C) 2002-2006 Novell, Inc.
++ *	Jan Beulich <jbeulich@novell.com>
++ * This code is released under version 2 of the GNU GPL.
++ *
++ * A simple API for unwinding kernel stacks.  This is used for
++ * debugging and error reporting purposes.  The kernel doesn't need
++ * full-blown stack unwinding with all the bells and whistles, so there
++ * is not much point in implementing the full Dwarf2 unwind API.
++ */
++
++#include <linux/unwind.h>
++#include <linux/module.h>
++#include <linux/bootmem.h>
++#include <linux/sort.h>
++#include <linux/stop_machine.h>
++#include <linux/uaccess.h>
++#include <asm/sections.h>
++#include <asm/uaccess.h>
++#include <asm/unaligned.h>
++
++extern const char __start_unwind[], __end_unwind[];
++extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
++
++#define MAX_STACK_DEPTH 8
++
++#define EXTRA_INFO(f) { \
++		BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
++		                  % FIELD_SIZEOF(struct unwind_frame_info, f)) \
++		+ offsetof(struct unwind_frame_info, f) \
++		  / FIELD_SIZEOF(struct unwind_frame_info, f), \
++		FIELD_SIZEOF(struct unwind_frame_info, f) \
++	}
++#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
++
++static const struct {
++	unsigned offs:BITS_PER_LONG / 2;
++	unsigned width:BITS_PER_LONG / 2;
++} reg_info[] = {
++	UNW_REGISTER_INFO
++};
++
++#undef PTREGS_INFO
++#undef EXTRA_INFO
++
++#ifndef REG_INVALID
++#define REG_INVALID(r) (reg_info[r].width == 0)
++#endif
++
++#define DW_CFA_nop                          0x00
++#define DW_CFA_set_loc                      0x01
++#define DW_CFA_advance_loc1                 0x02
++#define DW_CFA_advance_loc2                 0x03
++#define DW_CFA_advance_loc4                 0x04
++#define DW_CFA_offset_extended              0x05
++#define DW_CFA_restore_extended             0x06
++#define DW_CFA_undefined                    0x07
++#define DW_CFA_same_value                   0x08
++#define DW_CFA_register                     0x09
++#define DW_CFA_remember_state               0x0a
++#define DW_CFA_restore_state                0x0b
++#define DW_CFA_def_cfa                      0x0c
++#define DW_CFA_def_cfa_register             0x0d
++#define DW_CFA_def_cfa_offset               0x0e
++#define DW_CFA_def_cfa_expression           0x0f
++#define DW_CFA_expression                   0x10
++#define DW_CFA_offset_extended_sf           0x11
++#define DW_CFA_def_cfa_sf                   0x12
++#define DW_CFA_def_cfa_offset_sf            0x13
++#define DW_CFA_val_offset                   0x14
++#define DW_CFA_val_offset_sf                0x15
++#define DW_CFA_val_expression               0x16
++#define DW_CFA_lo_user                      0x1c
++#define DW_CFA_GNU_window_save              0x2d
++#define DW_CFA_GNU_args_size                0x2e
++#define DW_CFA_GNU_negative_offset_extended 0x2f
++#define DW_CFA_hi_user                      0x3f
++
++#define DW_EH_PE_FORM     0x07
++#define DW_EH_PE_native   0x00
++#define DW_EH_PE_leb128   0x01
++#define DW_EH_PE_data2    0x02
++#define DW_EH_PE_data4    0x03
++#define DW_EH_PE_data8    0x04
++#define DW_EH_PE_signed   0x08
++#define DW_EH_PE_ADJUST   0x70
++#define DW_EH_PE_abs      0x00
++#define DW_EH_PE_pcrel    0x10
++#define DW_EH_PE_textrel  0x20
++#define DW_EH_PE_datarel  0x30
++#define DW_EH_PE_funcrel  0x40
++#define DW_EH_PE_aligned  0x50
++#define DW_EH_PE_indirect 0x80
++#define DW_EH_PE_omit     0xff
++
++typedef unsigned long uleb128_t;
++typedef   signed long sleb128_t;
++#define sleb128abs __builtin_labs
++
++static struct unwind_table {
++	struct {
++		unsigned long pc;
++		unsigned long range;
++	} core, init;
++	const void *address;
++	unsigned long size;
++	const unsigned char *header;
++	unsigned long hdrsz;
++	struct unwind_table *link;
++	const char *name;
++} root_table;
++
++struct unwind_item {
++	enum item_location {
++		Nowhere,
++		Memory,
++		Register,
++		Value
++	} where;
++	uleb128_t value;
++};
++
++struct unwind_state {
++	uleb128_t loc, org;
++	const u8 *cieStart, *cieEnd;
++	uleb128_t codeAlign;
++	sleb128_t dataAlign;
++	struct cfa {
++		uleb128_t reg, offs;
++	} cfa;
++	struct unwind_item regs[ARRAY_SIZE(reg_info)];
++	unsigned stackDepth:8;
++	unsigned version:8;
++	const u8 *label;
++	const u8 *stack[MAX_STACK_DEPTH];
++};
++
++static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
++
++static unsigned unwind_debug;
++static int __init unwind_debug_setup(char *s)
++{
++	unwind_debug = simple_strtoul(s, NULL, 0);
++	return 1;
++}
++__setup("unwind_debug=", unwind_debug_setup);
++#define dprintk(lvl, fmt, args...) \
++	((void)(lvl > unwind_debug \
++	 || printk(KERN_DEBUG "unwind: " fmt "\n", ##args)))
++
++static struct unwind_table *find_table(unsigned long pc)
++{
++	struct unwind_table *table;
++
++	for (table = &root_table; table; table = table->link)
++		if ((pc >= table->core.pc
++		     && pc < table->core.pc + table->core.range)
++		    || (pc >= table->init.pc
++		        && pc < table->init.pc + table->init.range))
++			break;
++
++	return table;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++                                  const void *end,
++                                  signed ptrType,
++                                  unsigned long text_base,
++                                  unsigned long data_base);
++
++static void init_unwind_table(struct unwind_table *table,
++                              const char *name,
++                              const void *core_start,
++                              unsigned long core_size,
++                              const void *init_start,
++                              unsigned long init_size,
++                              const void *table_start,
++                              unsigned long table_size,
++                              const u8 *header_start,
++                              unsigned long header_size)
++{
++	const u8 *ptr = header_start + 4;
++	const u8 *end = header_start + header_size;
++
++	table->core.pc = (unsigned long)core_start;
++	table->core.range = core_size;
++	table->init.pc = (unsigned long)init_start;
++	table->init.range = init_size;
++	table->address = table_start;
++	table->size = table_size;
++	/* See if the linker provided table looks valid. */
++	if (header_size <= 4
++	    || header_start[0] != 1
++	    || (void *)read_pointer(&ptr, end, header_start[1], 0, 0)
++	       != table_start
++	    || !read_pointer(&ptr, end, header_start[2], 0, 0)
++	    || !read_pointer(&ptr, end, header_start[3], 0,
++	                     (unsigned long)header_start)
++	    || !read_pointer(&ptr, end, header_start[3], 0,
++	                     (unsigned long)header_start))
++		header_start = NULL;
++	table->hdrsz = header_size;
++	smp_wmb();
++	table->header = header_start;
++	table->link = NULL;
++	table->name = name;
++}
++
++void __init unwind_init(void)
++{
++	init_unwind_table(&root_table, "kernel",
++	                  _text, _end - _text,
++	                  NULL, 0,
++	                  __start_unwind, __end_unwind - __start_unwind,
++	                  __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
++}
++
++static const u32 bad_cie, not_fde;
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
++static signed fde_pointer_type(const u32 *cie);
++
++struct eh_frame_hdr_table_entry {
++	unsigned long start, fde;
++};
++
++static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
++{
++	const struct eh_frame_hdr_table_entry *e1 = p1;
++	const struct eh_frame_hdr_table_entry *e2 = p2;
++
++	return (e1->start > e2->start) - (e1->start < e2->start);
++}
++
++static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
++{
++	struct eh_frame_hdr_table_entry *e1 = p1;
++	struct eh_frame_hdr_table_entry *e2 = p2;
++	unsigned long v;
++
++	v = e1->start;
++	e1->start = e2->start;
++	e2->start = v;
++	v = e1->fde;
++	e1->fde = e2->fde;
++	e2->fde = v;
++}
++
++static void __init setup_unwind_table(struct unwind_table *table,
++					void *(*alloc)(unsigned long))
++{
++	const u8 *ptr;
++	unsigned long tableSize = table->size, hdrSize;
++	unsigned n;
++	const u32 *fde;
++	struct {
++		u8 version;
++		u8 eh_frame_ptr_enc;
++		u8 fde_count_enc;
++		u8 table_enc;
++		unsigned long eh_frame_ptr;
++		unsigned int fde_count;
++		struct eh_frame_hdr_table_entry table[];
++	} __attribute__((__packed__)) *header;
++
++	if (table->header)
++		return;
++
++	if (table->hdrsz)
++		printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
++		       table->name);
++
++	if (tableSize & (sizeof(*fde) - 1))
++		return;
++
++	for (fde = table->address, n = 0;
++	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
++	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++		const u32 *cie = cie_for_fde(fde, table);
++		signed ptrType;
++
++		if (cie == &not_fde)
++			continue;
++		if (cie == NULL
++		    || cie == &bad_cie
++		    || (ptrType = fde_pointer_type(cie)) < 0)
++			return;
++		ptr = (const u8 *)(fde + 2);
++		if (!read_pointer(&ptr,
++		                  (const u8 *)(fde + 1) + *fde,
++		                  ptrType, 0, 0))
++			return;
++		++n;
++	}
++
++	if (tableSize || !n)
++		return;
++
++	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
++	        + 2 * n * sizeof(unsigned long);
++	dprintk(2, "Binary lookup table size for %s: %lu bytes", table->name, hdrSize);
++	header = alloc(hdrSize);
++	if (!header)
++		return;
++	header->version          = 1;
++	header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
++	header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
++	header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
++	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
++	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
++	             % __alignof(typeof(header->fde_count)));
++	header->fde_count        = n;
++
++	BUILD_BUG_ON(offsetof(typeof(*header), table)
++	             % __alignof(typeof(*header->table)));
++	for (fde = table->address, tableSize = table->size, n = 0;
++	     tableSize;
++	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
++		const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
++
++		if (!fde[1])
++			continue; /* this is a CIE */
++		ptr = (const u8 *)(fde + 2);
++		header->table[n].start = read_pointer(&ptr,
++		                                      (const u8 *)(fde + 1) + *fde,
++		                                      fde_pointer_type(cie), 0, 0);
++		header->table[n].fde = (unsigned long)fde;
++		++n;
++	}
++	WARN_ON(n != header->fde_count);
++
++	sort(header->table,
++	     n,
++	     sizeof(*header->table),
++	     cmp_eh_frame_hdr_table_entries,
++	     swap_eh_frame_hdr_table_entries);
++
++	table->hdrsz = hdrSize;
++	smp_wmb();
++	table->header = (const void *)header;
++}
++
++static void *__init balloc(unsigned long sz)
++{
++	return __alloc_bootmem_nopanic(sz,
++	                               sizeof(unsigned int),
++	                               __pa(MAX_DMA_ADDRESS));
++}
++
++void __init unwind_setup(void)
++{
++	setup_unwind_table(&root_table, balloc);
++}
++
++#ifdef CONFIG_MODULES
++
++static struct unwind_table *last_table;
++
++/* Must be called with module_mutex held. */
++void *unwind_add_table(struct module *module,
++                       const void *table_start,
++                       unsigned long table_size)
++{
++	struct unwind_table *table;
++
++	if (table_size <= 0)
++		return NULL;
++
++	table = kmalloc(sizeof(*table), GFP_KERNEL);
++	if (!table)
++		return NULL;
++
++	init_unwind_table(table, module->name,
++	                  module->module_core, module->core_size,
++	                  module->module_init, module->init_size,
++	                  table_start, table_size,
++	                  NULL, 0);
++
++	if (last_table)
++		last_table->link = table;
++	else
++		root_table.link = table;
++	last_table = table;
++
++	return table;
++}
++
++struct unlink_table_info
++{
++	struct unwind_table *table;
++	int init_only;
++};
++
++static int unlink_table(void *arg)
++{
++	struct unlink_table_info *info = arg;
++	struct unwind_table *table = info->table, *prev;
++
++	for (prev = &root_table; prev->link && prev->link != table; prev = prev->link)
++		;
++
++	if (prev->link) {
++		if (info->init_only) {
++			table->init.pc = 0;
++			table->init.range = 0;
++			info->table = NULL;
++		} else {
++			prev->link = table->link;
++			if (!prev->link)
++				last_table = prev;
++		}
++	} else
++		info->table = NULL;
++
++	return 0;
++}
++
++/* Must be called with module_mutex held. */
++void unwind_remove_table(void *handle, int init_only)
++{
++	struct unwind_table *table = handle;
++	struct unlink_table_info info;
++
++	if (!table || table == &root_table)
++		return;
++
++	if (init_only && table == last_table) {
++		table->init.pc = 0;
++		table->init.range = 0;
++		return;
++	}
++
++	info.table = table;
++	info.init_only = init_only;
++	stop_machine_run(unlink_table, &info, NR_CPUS);
++
++	if (info.table)
++		kfree(table);
++}
++
++#endif /* CONFIG_MODULES */
++
++static uleb128_t get_uleb128(const u8 **pcur, const u8 *end)
++{
++	const u8 *cur = *pcur;
++	uleb128_t value;
++	unsigned shift;
++
++	for (shift = 0, value = 0; cur < end; shift += 7) {
++		if (shift + 7 > 8 * sizeof(value)
++		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++			cur = end + 1;
++			break;
++		}
++		value |= (uleb128_t)(*cur & 0x7f) << shift;
++		if (!(*cur++ & 0x80))
++			break;
++	}
++	*pcur = cur;
++
++	return value;
++}
++
++static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
++{
++	const u8 *cur = *pcur;
++	sleb128_t value;
++	unsigned shift;
++
++	for (shift = 0, value = 0; cur < end; shift += 7) {
++		if (shift + 7 > 8 * sizeof(value)
++		    && (*cur & 0x7fU) >= (1U << (8 * sizeof(value) - shift))) {
++			cur = end + 1;
++			break;
++		}
++		value |= (sleb128_t)(*cur & 0x7f) << shift;
++		if (!(*cur & 0x80)) {
++			value |= -(*cur++ & 0x40) << shift;
++			break;
++		}
++	}
++	*pcur = cur;
++
++	return value;
++}
++
++static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
++{
++	const u32 *cie;
++
++	if (!*fde || (*fde & (sizeof(*fde) - 1)))
++		return &bad_cie;
++	if (!fde[1])
++		return &not_fde; /* this is a CIE */
++	if ((fde[1] & (sizeof(*fde) - 1))
++	    || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
++		return NULL; /* this is not a valid FDE */
++	cie = fde + 1 - fde[1] / sizeof(*fde);
++	if (*cie <= sizeof(*cie) + 4
++	    || *cie >= fde[1] - sizeof(*fde)
++	    || (*cie & (sizeof(*cie) - 1))
++	    || cie[1])
++		return NULL; /* this is not a (valid) CIE */
++	return cie;
++}
++
++static unsigned long read_pointer(const u8 **pLoc,
++                                  const void *end,
++                                  signed ptrType,
++                                  unsigned long text_base,
++                                  unsigned long data_base)
++{
++	unsigned long value = 0;
++	union {
++		const u8 *p8;
++		const u16 *p16u;
++		const s16 *p16s;
++		const u32 *p32u;
++		const s32 *p32s;
++		const unsigned long *pul;
++	} ptr;
++
++	if (ptrType < 0 || ptrType == DW_EH_PE_omit) {
++		dprintk(1, "Invalid pointer encoding %02X (%p,%p).", ptrType, *pLoc, end);
++		return 0;
++	}
++	ptr.p8 = *pLoc;
++	switch(ptrType & DW_EH_PE_FORM) {
++	case DW_EH_PE_data2:
++		if (end < (const void *)(ptr.p16u + 1)) {
++			dprintk(1, "Data16 overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		if(ptrType & DW_EH_PE_signed)
++			value = get_unaligned(ptr.p16s++);
++		else
++			value = get_unaligned(ptr.p16u++);
++		break;
++	case DW_EH_PE_data4:
++#ifdef CONFIG_64BIT
++		if (end < (const void *)(ptr.p32u + 1)) {
++			dprintk(1, "Data32 overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		if(ptrType & DW_EH_PE_signed)
++			value = get_unaligned(ptr.p32s++);
++		else
++			value = get_unaligned(ptr.p32u++);
++		break;
++	case DW_EH_PE_data8:
++		BUILD_BUG_ON(sizeof(u64) != sizeof(value));
++#else
++		BUILD_BUG_ON(sizeof(u32) != sizeof(value));
++#endif
++	case DW_EH_PE_native:
++		if (end < (const void *)(ptr.pul + 1)) {
++			dprintk(1, "DataUL overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		value = get_unaligned(ptr.pul++);
++		break;
++	case DW_EH_PE_leb128:
++		BUILD_BUG_ON(sizeof(uleb128_t) > sizeof(value));
++		value = ptrType & DW_EH_PE_signed
++		        ? get_sleb128(&ptr.p8, end)
++		        : get_uleb128(&ptr.p8, end);
++		if ((const void *)ptr.p8 > end) {
++			dprintk(1, "DataLEB overrun (%p,%p).", ptr.p8, end);
++			return 0;
++		}
++		break;
++	default:
++		dprintk(2, "Cannot decode pointer type %02X (%p,%p).",
++		        ptrType, ptr.p8, end);
++		return 0;
++	}
++	switch(ptrType & DW_EH_PE_ADJUST) {
++	case DW_EH_PE_abs:
++		break;
++	case DW_EH_PE_pcrel:
++		value += (unsigned long)*pLoc;
++		break;
++	case DW_EH_PE_textrel:
++		if (likely(text_base)) {
++			value += text_base;
++			break;
++		}
++		dprintk(2, "Text-relative encoding %02X (%p,%p), but zero text base.",
++		        ptrType, *pLoc, end);
++		return 0;
++	case DW_EH_PE_datarel:
++		if (likely(data_base)) {
++			value += data_base;
++			break;
++		}
++		dprintk(2, "Data-relative encoding %02X (%p,%p), but zero data base.",
++		        ptrType, *pLoc, end);
++		return 0;
++	default:
++		dprintk(2, "Cannot adjust pointer type %02X (%p,%p).",
++		        ptrType, *pLoc, end);
++		return 0;
++	}
++	if ((ptrType & DW_EH_PE_indirect)
++	    && probe_kernel_address((unsigned long *)value, value)) {
++		dprintk(1, "Cannot read indirect value %lx (%p,%p).",
++		        value, *pLoc, end);
++		return 0;
++	}
++	*pLoc = ptr.p8;
++
++	return value;
++}
++
++static signed fde_pointer_type(const u32 *cie)
++{
++	const u8 *ptr = (const u8 *)(cie + 2);
++	unsigned version = *ptr;
++
++	if (version != 1)
++		return -1; /* unsupported */
++	if (*++ptr) {
++		const char *aug;
++		const u8 *end = (const u8 *)(cie + 1) + *cie;
++		uleb128_t len;
++
++		/* check if augmentation size is first (and thus present) */
++		if (*ptr != 'z')
++			return -1;
++		/* check if augmentation string is nul-terminated */
++		if ((ptr = memchr(aug = (const void *)ptr, 0, end - ptr)) == NULL)
++			return -1;
++		++ptr; /* skip terminator */
++		get_uleb128(&ptr, end); /* skip code alignment */
++		get_sleb128(&ptr, end); /* skip data alignment */
++		/* skip return address column */
++		version <= 1 ? (void)++ptr : (void)get_uleb128(&ptr, end);
++		len = get_uleb128(&ptr, end); /* augmentation length */
++		if (ptr + len < ptr || ptr + len > end)
++			return -1;
++		end = ptr + len;
++		while (*++aug) {
++			if (ptr >= end)
++				return -1;
++			switch(*aug) {
++			case 'L':
++				++ptr;
++				break;
++			case 'P': {
++					signed ptrType = *ptr++;
++
++					if (!read_pointer(&ptr, end, ptrType, 0, 0)
++					    || ptr > end)
++						return -1;
++				}
++				break;
++			case 'R':
++				return *ptr;
++			default:
++				return -1;
++			}
++		}
++	}
++	return DW_EH_PE_native|DW_EH_PE_abs;
++}
++
++static int advance_loc(unsigned long delta, struct unwind_state *state)
++{
++	state->loc += delta * state->codeAlign;
++
++	return delta > 0;
++}
++
++static void set_rule(uleb128_t reg,
++                     enum item_location where,
++                     uleb128_t value,
++                     struct unwind_state *state)
++{
++	if (reg < ARRAY_SIZE(state->regs)) {
++		state->regs[reg].where = where;
++		state->regs[reg].value = value;
++	}
++}
++
++static int processCFI(const u8 *start,
++                      const u8 *end,
++                      unsigned long targetLoc,
++                      signed ptrType,
++                      struct unwind_state *state)
++{
++	union {
++		const u8 *p8;
++		const u16 *p16;
++		const u32 *p32;
++	} ptr;
++	int result = 1;
++
++	if (start != state->cieStart) {
++		state->loc = state->org;
++		result = processCFI(state->cieStart, state->cieEnd, 0, ptrType, state);
++		if (targetLoc == 0 && state->label == NULL)
++			return result;
++	}
++	for (ptr.p8 = start; result && ptr.p8 < end; ) {
++		switch(*ptr.p8 >> 6) {
++			uleb128_t value;
++
++		case 0:
++			switch(*ptr.p8++) {
++			case DW_CFA_nop:
++				break;
++			case DW_CFA_set_loc:
++				state->loc = read_pointer(&ptr.p8, end, ptrType, 0, 0);
++				if (state->loc == 0)
++					result = 0;
++				break;
++			case DW_CFA_advance_loc1:
++				result = ptr.p8 < end && advance_loc(*ptr.p8++, state);
++				break;
++			case DW_CFA_advance_loc2:
++				result = ptr.p8 <= end + 2
++				         && advance_loc(*ptr.p16++, state);
++				break;
++			case DW_CFA_advance_loc4:
++				result = ptr.p8 <= end + 4
++				         && advance_loc(*ptr.p32++, state);
++				break;
++			case DW_CFA_offset_extended:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_val_offset:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Value, get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_offset_extended_sf:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Memory, get_sleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_val_offset_sf:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value, Value, get_sleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_restore_extended:
++			case DW_CFA_undefined:
++			case DW_CFA_same_value:
++				set_rule(get_uleb128(&ptr.p8, end), Nowhere, 0, state);
++				break;
++			case DW_CFA_register:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value,
++				         Register,
++				         get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_remember_state:
++				if (ptr.p8 == state->label) {
++					state->label = NULL;
++					return 1;
++				}
++				if (state->stackDepth >= MAX_STACK_DEPTH) {
++					dprintk(1, "State stack overflow (%p,%p).", ptr.p8, end);
++					return 0;
++				}
++				state->stack[state->stackDepth++] = ptr.p8;
++				break;
++			case DW_CFA_restore_state:
++				if (state->stackDepth) {
++					const uleb128_t loc = state->loc;
++					const u8 *label = state->label;
++
++					state->label = state->stack[state->stackDepth - 1];
++					memcpy(&state->cfa, &badCFA, sizeof(state->cfa));
++					memset(state->regs, 0, sizeof(state->regs));
++					state->stackDepth = 0;
++					result = processCFI(start, end, 0, ptrType, state);
++					state->loc = loc;
++					state->label = label;
++				} else {
++					dprintk(1, "State stack underflow (%p,%p).", ptr.p8, end);
++					return 0;
++				}
++				break;
++			case DW_CFA_def_cfa:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				/*nobreak*/
++			case DW_CFA_def_cfa_offset:
++				state->cfa.offs = get_uleb128(&ptr.p8, end);
++				break;
++			case DW_CFA_def_cfa_sf:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				/*nobreak*/
++			case DW_CFA_def_cfa_offset_sf:
++				state->cfa.offs = get_sleb128(&ptr.p8, end)
++				                  * state->dataAlign;
++				break;
++			case DW_CFA_def_cfa_register:
++				state->cfa.reg = get_uleb128(&ptr.p8, end);
++				break;
++			/*todo case DW_CFA_def_cfa_expression: */
++			/*todo case DW_CFA_expression: */
++			/*todo case DW_CFA_val_expression: */
++			case DW_CFA_GNU_args_size:
++				get_uleb128(&ptr.p8, end);
++				break;
++			case DW_CFA_GNU_negative_offset_extended:
++				value = get_uleb128(&ptr.p8, end);
++				set_rule(value,
++				         Memory,
++				         (uleb128_t)0 - get_uleb128(&ptr.p8, end), state);
++				break;
++			case DW_CFA_GNU_window_save:
++			default:
++				dprintk(1, "Unrecognized CFI op %02X (%p,%p).", ptr.p8[-1], ptr.p8 - 1, end);
++				result = 0;
++				break;
++			}
++			break;
++		case 1:
++			result = advance_loc(*ptr.p8++ & 0x3f, state);
++			break;
++		case 2:
++			value = *ptr.p8++ & 0x3f;
++			set_rule(value, Memory, get_uleb128(&ptr.p8, end), state);
++			break;
++		case 3:
++			set_rule(*ptr.p8++ & 0x3f, Nowhere, 0, state);
++			break;
++		}
++		if (ptr.p8 > end) {
++			dprintk(1, "Data overrun (%p,%p).", ptr.p8, end);
++			result = 0;
++		}
++		if (result && targetLoc != 0 && targetLoc < state->loc)
++			return 1;
++	}
++
++	if (result && ptr.p8 < end)
++		dprintk(1, "Data underrun (%p,%p).", ptr.p8, end);
++
++	return result
++	   && ptr.p8 == end
++	   && (targetLoc == 0
++	    || (/*todo While in theory this should apply, gcc in practice omits
++	          everything past the function prolog, and hence the location
++	          never reaches the end of the function.
++	        targetLoc < state->loc &&*/ state->label == NULL));
++}
++
++/* Unwind to previous to frame.  Returns 0 if successful, negative
++ * number in case of an error. */
++int unwind(struct unwind_frame_info *frame)
++{
++#define FRAME_REG(r, t) (((t *)frame)[reg_info[r].offs])
++	const u32 *fde = NULL, *cie = NULL;
++	const u8 *ptr = NULL, *end = NULL;
++	unsigned long pc = UNW_PC(frame) - frame->call_frame, sp;
++	unsigned long startLoc = 0, endLoc = 0, cfa;
++	unsigned i;
++	signed ptrType = -1;
++	uleb128_t retAddrReg = 0;
++	const struct unwind_table *table;
++	struct unwind_state state;
++
++	if (UNW_PC(frame) == 0)
++		return -EINVAL;
++	if ((table = find_table(pc)) != NULL
++	    && !(table->size & (sizeof(*fde) - 1))) {
++		const u8 *hdr = table->header;
++		unsigned long tableSize;
++
++		smp_rmb();
++		if (hdr && hdr[0] == 1) {
++			switch(hdr[3] & DW_EH_PE_FORM) {
++			case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
++			case DW_EH_PE_data2: tableSize = 2; break;
++			case DW_EH_PE_data4: tableSize = 4; break;
++			case DW_EH_PE_data8: tableSize = 8; break;
++			default: tableSize = 0; break;
++			}
++			ptr = hdr + 4;
++			end = hdr + table->hdrsz;
++			if (tableSize
++			    && read_pointer(&ptr, end, hdr[1], 0, 0)
++			       == (unsigned long)table->address
++			    && (i = read_pointer(&ptr, end, hdr[2], 0, 0)) > 0
++			    && i == (end - ptr) / (2 * tableSize)
++			    && !((end - ptr) % (2 * tableSize))) {
++				do {
++					const u8 *cur = ptr + (i / 2) * (2 * tableSize);
++
++					startLoc = read_pointer(&cur,
++					                        cur + tableSize,
++					                        hdr[3], 0,
++					                        (unsigned long)hdr);
++					if (pc < startLoc)
++						i /= 2;
++					else {
++						ptr = cur - tableSize;
++						i = (i + 1) / 2;
++					}
++				} while (startLoc && i > 1);
++				if (i == 1
++				    && (startLoc = read_pointer(&ptr,
++				                                ptr + tableSize,
++				                                hdr[3], 0,
++				                                (unsigned long)hdr)) != 0
++				    && pc >= startLoc)
++					fde = (void *)read_pointer(&ptr,
++					                           ptr + tableSize,
++					                           hdr[3], 0,
++					                           (unsigned long)hdr);
++			}
++		}
++		if(hdr && !fde)
++			dprintk(3, "Binary lookup for %lx failed.", pc);
++
++		if (fde != NULL) {
++			cie = cie_for_fde(fde, table);
++			ptr = (const u8 *)(fde + 2);
++			if(cie != NULL
++			   && cie != &bad_cie
++			   && cie != &not_fde
++			   && (ptrType = fde_pointer_type(cie)) >= 0
++			   && read_pointer(&ptr,
++			                   (const u8 *)(fde + 1) + *fde,
++			                   ptrType, 0, 0) == startLoc) {
++				if (!(ptrType & DW_EH_PE_indirect))
++					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++				endLoc = startLoc
++				         + read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if(pc >= endLoc)
++					fde = NULL;
++			} else
++				fde = NULL;
++			if(!fde)
++				dprintk(1, "Binary lookup result for %lx discarded.", pc);
++		}
++		if (fde == NULL) {
++			for (fde = table->address, tableSize = table->size;
++			     cie = NULL, tableSize > sizeof(*fde)
++			     && tableSize - sizeof(*fde) >= *fde;
++			     tableSize -= sizeof(*fde) + *fde,
++			     fde += 1 + *fde / sizeof(*fde)) {
++				cie = cie_for_fde(fde, table);
++				if (cie == &bad_cie) {
++					cie = NULL;
++					break;
++				}
++				if (cie == NULL
++				    || cie == &not_fde
++				    || (ptrType = fde_pointer_type(cie)) < 0)
++					continue;
++				ptr = (const u8 *)(fde + 2);
++				startLoc = read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if (!startLoc)
++					continue;
++				if (!(ptrType & DW_EH_PE_indirect))
++					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
++				endLoc = startLoc
++				         + read_pointer(&ptr,
++				                        (const u8 *)(fde + 1) + *fde,
++				                        ptrType, 0, 0);
++				if (pc >= startLoc && pc < endLoc)
++					break;
++			}
++			if(!fde)
++				dprintk(3, "Linear lookup for %lx failed.", pc);
++		}
++	}
++	if (cie != NULL) {
++		memset(&state, 0, sizeof(state));
++		state.cieEnd = ptr; /* keep here temporarily */
++		ptr = (const u8 *)(cie + 2);
++		end = (const u8 *)(cie + 1) + *cie;
++		frame->call_frame = 1;
++		if ((state.version = *ptr) != 1)
++			cie = NULL; /* unsupported version */
++		else if (*++ptr) {
++			/* check if augmentation size is first (and thus present) */
++			if (*ptr == 'z') {
++				while (++ptr < end && *ptr) {
++					switch(*ptr) {
++					/* check for ignorable (or already handled)
++					 * nul-terminated augmentation string */
++					case 'L':
++					case 'P':
++					case 'R':
++						continue;
++					case 'S':
++						frame->call_frame = 0;
++						continue;
++					default:
++						break;
++					}
++					break;
++				}
++			}
++			if (ptr >= end || *ptr)
++				cie = NULL;
++		}
++		if(!cie)
++			dprintk(1, "CIE unusable (%p,%p).", ptr, end);
++		++ptr;
++	}
++	if (cie != NULL) {
++		/* get code aligment factor */
++		state.codeAlign = get_uleb128(&ptr, end);
++		/* get data aligment factor */
++		state.dataAlign = get_sleb128(&ptr, end);
++		if (state.codeAlign == 0 || state.dataAlign == 0 || ptr >= end)
++			cie = NULL;
++		else if (UNW_PC(frame) % state.codeAlign
++		         || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++			dprintk(1, "Input pointer(s) misaligned (%lx,%lx).",
++			        UNW_PC(frame), UNW_SP(frame));
++			return -EPERM;
++		} else {
++			retAddrReg = state.version <= 1 ? *ptr++ : get_uleb128(&ptr, end);
++			/* skip augmentation */
++			if (((const char *)(cie + 2))[1] == 'z') {
++				uleb128_t augSize = get_uleb128(&ptr, end);
++
++				ptr += augSize;
++			}
++			if (ptr > end
++			   || retAddrReg >= ARRAY_SIZE(reg_info)
++			   || REG_INVALID(retAddrReg)
++			   || reg_info[retAddrReg].width != sizeof(unsigned long))
++				cie = NULL;
++		}
++		if(!cie)
++			dprintk(1, "CIE validation failed (%p,%p).", ptr, end);
++	}
++	if (cie != NULL) {
++		state.cieStart = ptr;
++		ptr = state.cieEnd;
++		state.cieEnd = end;
++		end = (const u8 *)(fde + 1) + *fde;
++		/* skip augmentation */
++		if (((const char *)(cie + 2))[1] == 'z') {
++			uleb128_t augSize = get_uleb128(&ptr, end);
++
++			if ((ptr += augSize) > end)
++				fde = NULL;
++		}
++		if(!fde)
++			dprintk(1, "FDE validation failed (%p,%p).", ptr, end);
++	}
++	if (cie == NULL || fde == NULL) {
++#ifdef CONFIG_FRAME_POINTER
++		unsigned long top, bottom;
++
++		if ((UNW_SP(frame) | UNW_FP(frame)) % sizeof(unsigned long))
++			return -EPERM;
++		top = STACK_TOP(frame->task);
++		bottom = STACK_BOTTOM(frame->task);
++# if FRAME_RETADDR_OFFSET < 0
++		if (UNW_SP(frame) < top
++		    && UNW_FP(frame) <= UNW_SP(frame)
++		    && bottom < UNW_FP(frame)
++# else
++		if (UNW_SP(frame) > top
++		    && UNW_FP(frame) >= UNW_SP(frame)
++		    && bottom > UNW_FP(frame)
++# endif
++		   && !((UNW_SP(frame) | UNW_FP(frame))
++		        & (sizeof(unsigned long) - 1))) {
++			unsigned long link;
++
++			if (!probe_kernel_address(
++			                (unsigned long *)(UNW_FP(frame)
++			                                  + FRAME_LINK_OFFSET),
++						  link)
++# if FRAME_RETADDR_OFFSET < 0
++			   && link > bottom && link < UNW_FP(frame)
++# else
++			   && link > UNW_FP(frame) && link < bottom
++# endif
++			   && !(link & (sizeof(link) - 1))
++			   && !probe_kernel_address(
++			                  (unsigned long *)(UNW_FP(frame)
++			                                    + FRAME_RETADDR_OFFSET), UNW_PC(frame))) {
++				UNW_SP(frame) = UNW_FP(frame) + FRAME_RETADDR_OFFSET
++# if FRAME_RETADDR_OFFSET < 0
++					-
++# else
++					+
++# endif
++					  sizeof(UNW_PC(frame));
++				UNW_FP(frame) = link;
++				return 0;
++			}
++		}
++#endif
++		return -ENXIO;
++	}
++	state.org = startLoc;
++	memcpy(&state.cfa, &badCFA, sizeof(state.cfa));
++	/* process instructions */
++	if (!processCFI(ptr, end, pc, ptrType, &state)
++	   || state.loc > endLoc
++	   || state.regs[retAddrReg].where == Nowhere
++	   || state.cfa.reg >= ARRAY_SIZE(reg_info)
++	   || reg_info[state.cfa.reg].width != sizeof(unsigned long)
++	   || FRAME_REG(state.cfa.reg, unsigned long) % sizeof(unsigned long)
++	   || state.cfa.offs % sizeof(unsigned long)) {
++		dprintk(1, "Unusable unwind info (%p,%p).", ptr, end);
++		return -EIO;
++	}
++	/* update frame */
++#ifndef CONFIG_AS_CFI_SIGNAL_FRAME
++	if(frame->call_frame
++	   && !UNW_DEFAULT_RA(state.regs[retAddrReg], state.dataAlign))
++		frame->call_frame = 0;
++#endif
++	cfa = FRAME_REG(state.cfa.reg, unsigned long) + state.cfa.offs;
++	startLoc = min((unsigned long)UNW_SP(frame), cfa);
++	endLoc = max((unsigned long)UNW_SP(frame), cfa);
++	if (STACK_LIMIT(startLoc) != STACK_LIMIT(endLoc)) {
++		startLoc = min(STACK_LIMIT(cfa), cfa);
++		endLoc = max(STACK_LIMIT(cfa), cfa);
++	}
++#ifndef CONFIG_64BIT
++# define CASES CASE(8); CASE(16); CASE(32)
++#else
++# define CASES CASE(8); CASE(16); CASE(32); CASE(64)
++#endif
++	pc = UNW_PC(frame);
++	sp = UNW_SP(frame);
++	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++		if (REG_INVALID(i)) {
++			if (state.regs[i].where == Nowhere)
++				continue;
++			dprintk(1, "Cannot restore register %u (%d).",
++			        i, state.regs[i].where);
++			return -EIO;
++		}
++		switch(state.regs[i].where) {
++		default:
++			break;
++		case Register:
++			if (state.regs[i].value >= ARRAY_SIZE(reg_info)
++			   || REG_INVALID(state.regs[i].value)
++			   || reg_info[i].width > reg_info[state.regs[i].value].width) {
++				dprintk(1, "Cannot restore register %u from register %lu.",
++				        i, state.regs[i].value);
++				return -EIO;
++			}
++			switch(reg_info[state.regs[i].value].width) {
++#define CASE(n) \
++			case sizeof(u##n): \
++				state.regs[i].value = FRAME_REG(state.regs[i].value, \
++				                                const u##n); \
++				break
++			CASES;
++#undef CASE
++			default:
++				dprintk(1, "Unsupported register size %u (%lu).",
++				        reg_info[state.regs[i].value].width,
++				        state.regs[i].value);
++				return -EIO;
++			}
++			break;
++		}
++	}
++	for (i = 0; i < ARRAY_SIZE(state.regs); ++i) {
++		if (REG_INVALID(i))
++			continue;
++		switch(state.regs[i].where) {
++		case Nowhere:
++			if (reg_info[i].width != sizeof(UNW_SP(frame))
++			   || &FRAME_REG(i, __typeof__(UNW_SP(frame)))
++			      != &UNW_SP(frame))
++				continue;
++			UNW_SP(frame) = cfa;
++			break;
++		case Register:
++			switch(reg_info[i].width) {
++#define CASE(n) case sizeof(u##n): \
++				FRAME_REG(i, u##n) = state.regs[i].value; \
++				break
++			CASES;
++#undef CASE
++			default:
++				dprintk(1, "Unsupported register size %u (%u).",
++				        reg_info[i].width, i);
++				return -EIO;
++			}
++			break;
++		case Value:
++			if (reg_info[i].width != sizeof(unsigned long)) {
++				dprintk(1, "Unsupported value size %u (%u).",
++				        reg_info[i].width, i);
++				return -EIO;
++			}
++			FRAME_REG(i, unsigned long) = cfa + state.regs[i].value
++			                                    * state.dataAlign;
++			break;
++		case Memory: {
++				unsigned long addr = cfa + state.regs[i].value
++				                           * state.dataAlign;
++
++				if ((state.regs[i].value * state.dataAlign)
++				    % sizeof(unsigned long)
++				    || addr < startLoc
++				    || addr + sizeof(unsigned long) < addr
++				    || addr + sizeof(unsigned long) > endLoc) {
++					dprintk(1, "Bad memory location %lx (%lx).",
++					        addr, state.regs[i].value);
++					return -EIO;
++				}
++				switch(reg_info[i].width) {
++#define CASE(n)     case sizeof(u##n): \
++					probe_kernel_address((u##n *)addr, FRAME_REG(i, u##n)); \
++					break
++				CASES;
++#undef CASE
++				default:
++					dprintk(1, "Unsupported memory size %u (%u).",
++					        reg_info[i].width, i);
++					return -EIO;
++				}
++			}
++			break;
++		}
++	}
++
++	if (UNW_PC(frame) % state.codeAlign
++	    || UNW_SP(frame) % sleb128abs(state.dataAlign)) {
++		dprintk(1, "Output pointer(s) misaligned (%lx,%lx).",
++		        UNW_PC(frame), UNW_SP(frame));
++		return -EIO;
++	}
++	if (pc == UNW_PC(frame) && sp == UNW_SP(frame)) {
++		dprintk(1, "No progress (%lx,%lx).", pc, sp);
++		return -EIO;
++	}
++
++	return 0;
++#undef CASES
++#undef FRAME_REG
++}
++EXPORT_SYMBOL(unwind);
++
++int unwind_init_frame_info(struct unwind_frame_info *info,
++                           struct task_struct *tsk,
++                           /*const*/ struct pt_regs *regs)
++{
++	info->task = tsk;
++	info->call_frame = 0;
++	arch_unw_init_frame_info(info, regs);
++
++	return 0;
++}
++EXPORT_SYMBOL(unwind_init_frame_info);
++
++/*
++ * Prepare to unwind a blocked task.
++ */
++int unwind_init_blocked(struct unwind_frame_info *info,
++                        struct task_struct *tsk)
++{
++	info->task = tsk;
++	info->call_frame = 0;
++	arch_unw_init_blocked(info);
++
++	return 0;
++}
++EXPORT_SYMBOL(unwind_init_blocked);
++
++/*
++ * Prepare to unwind the currently running thread.
++ */
++int unwind_init_running(struct unwind_frame_info *info,
++                        asmlinkage int (*callback)(struct unwind_frame_info *,
++                                                   void *arg),
++                        void *arg)
++{
++	info->task = current;
++	info->call_frame = 0;
++
++	return arch_unwind_init_running(info, callback, arg);
++}
++EXPORT_SYMBOL(unwind_init_running);
++
+diff -Nurb linux-2.6.22-570/kernel/user.c linux-2.6.22-try2/kernel/user.c
+--- linux-2.6.22-570/kernel/user.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/user.c	2007-12-19 21:35:36.000000000 -0500
+@@ -14,17 +14,17 @@
+ #include <linux/bitops.h>
+ #include <linux/key.h>
+ #include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/user_namespace.h>
+ 
+ /*
+  * UID task count cache, to get fast user lookup in "alloc_uid"
+  * when changing user ID's (ie setuid() and friends).
+  */
+ 
+-#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8)
+-#define UIDHASH_SZ		(1 << UIDHASH_BITS)
+ #define UIDHASH_MASK		(UIDHASH_SZ - 1)
+ #define __uidhashfn(xid,uid)	((((uid) >> UIDHASH_BITS) + ((uid)^(xid))) & UIDHASH_MASK)
+-#define uidhashentry(xid,uid)	(uidhash_table + __uidhashfn((xid),(uid)))
++#define uidhashentry(ns, uid)	((ns)->uidhash_table + __uidhashfn((uid)))
+ 
+ static struct kmem_cache *uid_cachep;
+ static struct list_head uidhash_table[UIDHASH_SZ];
+@@ -94,9 +94,10 @@
+ {
+ 	struct user_struct *ret;
+ 	unsigned long flags;
++ 	struct user_namespace *ns = current->nsproxy->user_ns;
+ 
+ 	spin_lock_irqsave(&uidhash_lock, flags);
+-	ret = uid_hash_find(xid, uid, uidhashentry(xid, uid));
++ 	ret = uid_hash_find(uid, uidhashentry(ns, uid));
+ 	spin_unlock_irqrestore(&uidhash_lock, flags);
+ 	return ret;
+ }
+@@ -120,9 +121,9 @@
+ 	}
+ }
+ 
+-struct user_struct * alloc_uid(xid_t xid, uid_t uid)
++struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
+ {
+-	struct list_head *hashent = uidhashentry(xid, uid);
++ 	struct list_head *hashent = uidhashentry(ns, uid);
+ 	struct user_struct *up;
+ 
+ 	spin_lock_irq(&uidhash_lock);
+@@ -212,11 +213,11 @@
+ 			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ 
+ 	for(n = 0; n < UIDHASH_SZ; ++n)
+-		INIT_LIST_HEAD(uidhash_table + n);
++ 		INIT_LIST_HEAD(init_user_ns.uidhash_table + n);
+ 
+ 	/* Insert the root user immediately (init already runs as root) */
+ 	spin_lock_irq(&uidhash_lock);
+-	uid_hash_insert(&root_user, uidhashentry(0,0));
++ 	uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
+ 	spin_unlock_irq(&uidhash_lock);
+ 
+ 	return 0;
+diff -Nurb linux-2.6.22-570/kernel/user_namespace.c linux-2.6.22-try2/kernel/user_namespace.c
+--- linux-2.6.22-570/kernel/user_namespace.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/kernel/user_namespace.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,87 @@
++/*
++ *  This program is free software; you can redistribute it and/or
++ *  modify it under the terms of the GNU General Public License as
++ *  published by the Free Software Foundation, version 2 of the
++ *  License.
++ */
++
++#include <linux/module.h>
++#include <linux/version.h>
++#include <linux/nsproxy.h>
++#include <linux/user_namespace.h>
++
++struct user_namespace init_user_ns = {
++	.kref = {
++		.refcount	= ATOMIC_INIT(2),
++	},
++	.root_user = &root_user,
++};
++
++EXPORT_SYMBOL_GPL(init_user_ns);
++
++#ifdef CONFIG_USER_NS
++
++/*
++ * Clone a new ns copying an original user ns, setting refcount to 1
++ * @old_ns: namespace to clone
++ * Return NULL on error (failure to kmalloc), new ns otherwise
++ */
++static struct user_namespace *clone_user_ns(struct user_namespace *old_ns)
++{
++	struct user_namespace *ns;
++	struct user_struct *new_user;
++	int n;
++
++	ns = kmalloc(sizeof(struct user_namespace), GFP_KERNEL);
++	if (!ns)
++		return ERR_PTR(-ENOMEM);
++
++	kref_init(&ns->kref);
++
++	for (n = 0; n < UIDHASH_SZ; ++n)
++		INIT_LIST_HEAD(ns->uidhash_table + n);
++
++	/* Insert new root user.  */
++	ns->root_user = alloc_uid(ns, 0);
++	if (!ns->root_user) {
++		kfree(ns);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	/* Reset current->user with a new one */
++	new_user = alloc_uid(ns, current->uid);
++	if (!new_user) {
++		free_uid(ns->root_user);
++		kfree(ns);
++		return ERR_PTR(-ENOMEM);
++	}
++
++	switch_uid(new_user);
++	return ns;
++}
++
++struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns)
++{
++	struct user_namespace *new_ns;
++
++	BUG_ON(!old_ns);
++	get_user_ns(old_ns);
++
++	if (!(flags & CLONE_NEWUSER))
++		return old_ns;
++
++	new_ns = clone_user_ns(old_ns);
++
++	put_user_ns(old_ns);
++	return new_ns;
++}
++
++void free_user_ns(struct kref *kref)
++{
++	struct user_namespace *ns;
++
++	ns = container_of(kref, struct user_namespace, kref);
++	kfree(ns);
++}
++
++#endif /* CONFIG_USER_NS */
+diff -Nurb linux-2.6.22-570/kernel/utsname.c linux-2.6.22-try2/kernel/utsname.c
+--- linux-2.6.22-570/kernel/utsname.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/kernel/utsname.c	2007-12-19 21:36:04.000000000 -0500
+@@ -25,11 +25,12 @@
+ 	struct uts_namespace *ns;
+ 
+ 	ns = kmalloc(sizeof(struct uts_namespace), GFP_KERNEL);
+-	if (ns) {
++ 	if (!ns)
++ 		return ERR_PTR(-ENOMEM);
++ 
+ 		memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ 		kref_init(&ns->kref);
+-		atomic_inc(&vs_global_uts_ns);
+-	}
++
+ 	return ns;
+ }
+ 
+@@ -39,7 +40,7 @@
+  * utsname of this process won't be seen by parent, and vice
+  * versa.
+  */
+-struct uts_namespace *copy_utsname(int flags, struct uts_namespace *old_ns)
++struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
+ {
+ 	struct uts_namespace *new_ns;
+ 
+diff -Nurb linux-2.6.22-570/kernel/utsname_sysctl.c linux-2.6.22-try2/kernel/utsname_sysctl.c
+--- linux-2.6.22-570/kernel/utsname_sysctl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/kernel/utsname_sysctl.c	2007-12-19 15:29:24.000000000 -0500
+@@ -18,10 +18,7 @@
+ static void *get_uts(ctl_table *table, int write)
+ {
+ 	char *which = table->data;
+-#ifdef CONFIG_UTS_NS
+-	struct uts_namespace *uts_ns = current->nsproxy->uts_ns;
+-	which = (which - (char *)&init_uts_ns) + (char *)uts_ns;
+-#endif
++
+ 	if (!write)
+ 		down_read(&uts_sem);
+ 	else
+diff -Nurb linux-2.6.22-570/kernel/workqueue.c linux-2.6.22-try2/kernel/workqueue.c
+--- linux-2.6.22-570/kernel/workqueue.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/kernel/workqueue.c	2007-12-19 15:29:24.000000000 -0500
+@@ -282,8 +282,8 @@
+ 	struct cpu_workqueue_struct *cwq = __cwq;
+ 	DEFINE_WAIT(wait);
+ 
+-	if (!cwq->wq->freezeable)
+-		current->flags |= PF_NOFREEZE;
++	if (cwq->wq->freezeable)
++		set_freezable();
+ 
+ 	set_user_nice(current, -5);
+ 
+diff -Nurb linux-2.6.22-570/lib/Kconfig.debug linux-2.6.22-try2/lib/Kconfig.debug
+--- linux-2.6.22-570/lib/Kconfig.debug	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/lib/Kconfig.debug	2007-12-19 15:29:24.000000000 -0500
+@@ -364,6 +364,24 @@
+ 	  some architectures or if you use external debuggers.
+ 	  If you don't debug the kernel, you can say N.
+ 
++config UNWIND_INFO
++	bool "Compile the kernel with frame unwind information"
++	depends on !IA64 && !PARISC && !ARM
++	depends on !MODULES || !(MIPS || PPC || SUPERH || V850)
++	help
++	  If you say Y here the resulting kernel image will be slightly larger
++	  but not slower, and it will give very useful debugging information.
++	  If you don't debug the kernel, you can say N, but we may not be able
++	  to solve problems without frame unwind information or frame pointers.
++
++config STACK_UNWIND
++	bool "Stack unwind support"
++	depends on UNWIND_INFO
++	depends on X86
++	help
++	  This enables more precise stack traces, omitting all unrelated
++	  occurrences of pointers into kernel code from the dump.
++
+ config FORCED_INLINING
+ 	bool "Force gcc to inline functions marked 'inline'"
+ 	depends on DEBUG_KERNEL
+@@ -409,6 +427,9 @@
+ config FAULT_INJECTION
+ 	bool "Fault-injection framework"
+ 	depends on DEBUG_KERNEL
++	# could support fp on X86_32 here too, but let's not
++	select UNWIND_INFO if X86
++	select STACK_UNWIND if X86
+ 	help
+ 	  Provide fault-injection framework.
+ 	  For more details, see Documentation/fault-injection/.
+@@ -445,3 +466,5 @@
+ 	select FRAME_POINTER
+ 	help
+ 	  Provide stacktrace filter for fault-injection capabilities
++
++source "lib/Kconfig.kgdb"
+diff -Nurb linux-2.6.22-570/lib/Kconfig.kgdb linux-2.6.22-try2/lib/Kconfig.kgdb
+--- linux-2.6.22-570/lib/Kconfig.kgdb	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/lib/Kconfig.kgdb	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,255 @@
++
++config WANT_EXTRA_DEBUG_INFORMATION
++	bool
++	select DEBUG_INFO
++	select UNWIND_INFO
++	select FRAME_POINTER if X86 || SUPERH
++	default n
++
++config UNWIND_INFO
++	bool
++	default n
++
++config KGDB
++	bool "KGDB: kernel debugging with remote gdb"
++	select WANT_EXTRA_DEBUG_INFORMATION
++	select KGDB_ARCH_HAS_SHADOW_INFO if X86_64
++	depends on DEBUG_KERNEL && (ARM || X86 || MIPS || (SUPERH && !SUPERH64) || IA64 || PPC)
++	help
++	  If you say Y here, it will be possible to remotely debug the
++	  kernel using gdb.  Documentation of kernel debugger is available
++	  at http://kgdb.sourceforge.net as well as in DocBook form
++	  in Documentation/DocBook/.  If unsure, say N.
++
++config KGDB_ARCH_HAS_SHADOW_INFO
++	bool
++
++config KGDB_CONSOLE
++	bool "KGDB: Console messages through gdb"
++	depends on KGDB
++	  help
++	    If you say Y here, console messages will appear through gdb.
++	    Other consoles such as tty or ttyS will continue to work as usual.
++	    Note, that if you use this in conjunction with KGDB_ETH, if the
++	    ethernet driver runs into an error condition during use with KGDB
++	    it is possible to hit an infinite recusrion, causing the kernel
++	    to crash, and typically reboot.  For this reason, it is preferable
++	    to use NETCONSOLE in conjunction with KGDB_ETH instead of
++	    KGDB_CONSOLE.
++
++choice
++	prompt "Method for KGDB communication"
++	depends on KGDB
++	default KGDB_MPSC if SERIAL_MPSC
++	default KGDB_CPM_UART if (CPM2 || 8xx)
++	default KGDB_SIBYTE if SIBYTE_SB1xxx_SOC
++ 	default KGDB_TXX9 if CPU_TX49XX
++	default KGDB_SH_SCI if SERIAL_SH_SCI
++	default KGDB_PXA_SERIAL if ARCH_PXA
++	default KGDB_AMBA_PL011 if ARM_AMBA
++	default KGDB_8250_NOMODULE
++	help
++	  There are a number of different ways in which you can communicate
++	  with KGDB.  The most common is via serial, with the 8250 driver
++	  (should your hardware have an 8250, or ns1655x style uart).
++	  Another option is to use the NETPOLL framework and UDP, should
++	  your ethernet card support this.  Other options may exist.
++	  You can elect to have one core I/O driver that is built into the
++	  kernel for debugging as the kernel is booting, or using only
++	  kernel modules.
++
++config KGDB_ONLY_MODULES
++	bool "KGDB: Use only kernel modules for I/O"
++	depends on MODULES
++	help
++	  Use only kernel modules to configure KGDB I/O after the
++	  kernel is booted.
++
++config KGDB_8250_NOMODULE
++	bool "KGDB: On generic serial port (8250)"
++	select KGDB_8250
++	help
++	  Uses generic serial port (8250) to communicate with the host
++	  GDB.  This is independent of the normal (SERIAL_8250) driver
++	  for this chipset.
++
++config KGDBOE_NOMODULE
++	bool "KGDB: On ethernet - in kernel"
++	select KGDBOE
++	help
++	  Uses the NETPOLL API to communicate with the host GDB via UDP.
++	  In order for this to work, the ethernet interface specified must
++	  support the NETPOLL API, and this must be initialized at boot.
++	  See the documentation for syntax.
++
++config KGDB_MPSC
++	bool "KGDB: On MV64x60 MPSC"
++	depends on SERIAL_MPSC
++	help
++	  Uses a Marvell GT64260B or MV64x60 Multi-Purpose Serial
++	  Controller (MPSC) channel. Note that the GT64260A is not
++	  supported.
++
++config KGDB_CPM_UART
++ 	bool "KGDB: On CPM UART"
++	depends on PPC && (CPM2 || 8xx)
++ 	help
++ 	  Uses CPM UART to communicate with the host GDB.
++
++config KGDB_SIBYTE
++	bool "KGDB: On Broadcom SB1xxx serial port"
++	depends on MIPS && SIBYTE_SB1xxx_SOC
++
++config KGDB_TXX9
++	bool "KGDB: On TX49xx serial port"
++	depends on MIPS && CPU_TX49XX
++	help
++	  Uses TX49xx serial port to communicate with the host KGDB.
++
++config KGDB_SH_SCI
++	bool "KGDB: On SH SCI(F) serial port"
++	depends on SUPERH && SERIAL_SH_SCI
++	help
++	  Uses the SH SCI(F) serial port to communicate with the host GDB.
++
++config KGDB_AMBA_PL011
++	bool "KGDB: On ARM AMBA PL011 Serial Port"
++	depends on ARM && ARCH_VERSATILE
++	help
++	  Enables the KGDB serial driver for the AMBA bus PL011 serial
++          devices from ARM.
++
++config KGDB_PXA_SERIAL
++	bool "KGDB: On the PXA2xx serial port"
++	depends on ARCH_PXA
++	help
++	  Enables the KGDB serial driver for Intel PXA SOC
++endchoice
++
++choice
++	prompt "PXA UART to use for KGDB"
++	depends on KGDB_PXA_SERIAL
++	default KGDB_PXA_FFUART
++
++config KGDB_PXA_FFUART
++	bool "FFUART"
++
++config KGDB_PXA_BTUART
++	bool "BTUART"
++
++config KGDB_PXA_STUART
++	bool "STUART"
++endchoice
++
++choice
++	prompt "SCC/SMC to use for KGDB"
++	depends on KGDB_CPM_UART
++	default KGDB_CPM_UART_SCC4 if ADS8272
++
++config KGDB_CPM_UART_SCC1
++	bool "SCC1"
++	depends on SERIAL_CPM_SCC1
++
++config KGDB_CPM_UART_SCC2
++	bool "SCC2"
++	depends on SERIAL_CPM_SCC2
++
++config KGDB_CPM_UART_SCC3
++	bool "SCC3"
++	depends on SERIAL_CPM_SCC3
++
++config KGDB_CPM_UART_SCC4
++	bool "SCC4"
++	depends on SERIAL_CPM_SCC4
++
++config KGDB_CPM_UART_SMC1
++	bool "SMC1"
++	depends on SERIAL_CPM_SMC1
++
++config KGDB_CPM_UART_SMC2
++	bool "SMC2"
++	depends on SERIAL_CPM_SMC2
++endchoice
++
++config KGDBOE
++	tristate "KGDB: On ethernet" if !KGDBOE_NOMODULE
++	depends on m && KGDB
++	select NETPOLL
++	select NETPOLL_TRAP
++	help
++	  Uses the NETPOLL API to communicate with the host GDB via UDP.
++	  In order for this to work, the ethernet interface specified must
++	  support the NETPOLL API, and this must be initialized at boot.
++	  See the documentation for syntax.
++
++config KGDB_8250
++	tristate "KGDB: On generic serial port (8250)" if !KGDB_8250_NOMODULE
++	depends on m && KGDB_ONLY_MODULES
++	help
++	  Uses generic serial port (8250) to communicate with the host
++	  GDB.  This is independent of the normal (SERIAL_8250) driver
++	  for this chipset.
++
++config KGDB_SIMPLE_SERIAL
++	bool "Simple selection of KGDB serial port"
++	depends on KGDB_8250_NOMODULE
++	default y
++	help
++	  If you say Y here, you will only have to pick the baud rate
++	  and port number that you wish to use for KGDB.  Note that this
++	  only works on architectures that register known serial ports
++	  early on.  If you say N, you will have to provide, either here
++	  or on the command line, the type (I/O or MMIO), IRQ and
++	  address to use.  If in doubt, say Y.
++
++config KGDB_BAUDRATE
++	int "Debug serial port baud rate"
++	depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || \
++		KGDB_MPSC || KGDB_CPM_UART || \
++		KGDB_TXX9 || KGDB_PXA_SERIAL || KGDB_AMBA_PL011
++	default "115200"
++	help
++	  gdb and the kernel stub need to agree on the baud rate to be
++	  used.  Standard rates from 9600 to 115200 are allowed, and this
++	  may be overridden via the commandline.
++
++config KGDB_PORT_NUM
++	int "Serial port number for KGDB"
++	range 0 1 if KGDB_MPSC
++	range 0 3
++	depends on (KGDB_8250 && KGDB_SIMPLE_SERIAL) || KGDB_MPSC || KGDB_TXX9
++	default "1"
++	help
++	  Pick the port number (0 based) for KGDB to use.
++
++config KGDB_AMBA_BASE
++	hex "AMBA PL011 Serial Port Base Address"
++	default 0x101f2000 if ARCH_VERSATILE
++	depends on KGDB_AMBA_PL011
++	help
++	  Base address of the AMBA port that KGDB will use.
++
++config KGDB_AMBA_UARTCLK
++	int "AMBAPL011 Serial UART Clock Frequency"
++	default 24000000 if ARCH_VERSATILE
++	depends on KGDB_AMBA_PL011
++	help
++	  Frequency (in HZ) of the ARM AMBA UART clock
++
++config KGDB_AMBA_IRQ
++	int "AMBA PL011 Serial Port IRQ"
++	default 13 if ARCH_VERSATILE
++	depends on KGDB_AMBA_PL011
++	help
++	  Pick the IRQ of the AMBA port that KGDB will use.
++
++config KGDB_8250_CONF_STRING
++	string "Configuration string for KGDB"
++	depends on KGDB_8250_NOMODULE && !KGDB_SIMPLE_SERIAL
++	default "io,2f8,115200,3" if X86
++	help
++	  The format of this string should be <io or
++	  mmio>,<address>,<baud rate>,<irq>.  For example, to use the
++	  serial port on an i386 box located at 0x2f8 and 115200 baud
++	  on IRQ 3 at use:
++	  io,2f8,115200,3
+diff -Nurb linux-2.6.22-570/lib/Makefile linux-2.6.22-try2/lib/Makefile
+--- linux-2.6.22-570/lib/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/lib/Makefile	2007-12-19 15:29:24.000000000 -0500
+@@ -5,9 +5,10 @@
+ lib-y := ctype.o string.o vsprintf.o cmdline.o \
+ 	 rbtree.o radix-tree.o dump_stack.o \
+ 	 idr.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+-	 sha1.o irq_regs.o reciprocal_div.o
++	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
++	 check_signature.o
+ 
+-lib-$(CONFIG_MMU) += ioremap.o
++lib-$(CONFIG_MMU) += ioremap.o pagewalk.o
+ lib-$(CONFIG_SMP) += cpumask.o
+ 
+ lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
+diff -Nurb linux-2.6.22-570/lib/argv_split.c linux-2.6.22-try2/lib/argv_split.c
+--- linux-2.6.22-570/lib/argv_split.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/lib/argv_split.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,159 @@
++/*
++ * Helper function for splitting a string into an argv-like array.
++ */
++
++#ifndef TEST
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/bug.h>
++#endif
++
++static const char *skip_sep(const char *cp)
++{
++	while (*cp && isspace(*cp))
++		cp++;
++
++	return cp;
++}
++
++static const char *skip_arg(const char *cp)
++{
++	while (*cp && !isspace(*cp))
++		cp++;
++
++	return cp;
++}
++
++static int count_argc(const char *str)
++{
++	int count = 0;
++
++	while (*str) {
++		str = skip_sep(str);
++		if (*str) {
++			count++;
++			str = skip_arg(str);
++		}
++	}
++
++	return count;
++}
++
++/**
++ * argv_free - free an argv
++ *
++ * @argv - the argument vector to be freed
++ *
++ * Frees an argv and the strings it points to.
++ */
++void argv_free(char **argv)
++{
++	char **p;
++	for (p = argv; *p; p++)
++		kfree(*p);
++
++	kfree(argv);
++}
++EXPORT_SYMBOL(argv_free);
++
++/**
++ * argv_split - split a string at whitespace, returning an argv
++ * @gfp: the GFP mask used to allocate memory
++ * @str: the string to be split
++ * @argcp: returned argument count
++ *
++ * Returns an array of pointers to strings which are split out from
++ * @str.  This is performed by strictly splitting on white-space; no
++ * quote processing is performed.  Multiple whitespace characters are
++ * considered to be a single argument separator.  The returned array
++ * is always NULL-terminated.  Returns NULL on memory allocation
++ * failure.
++ */
++char **argv_split(gfp_t gfp, const char *str, int *argcp)
++{
++	int argc = count_argc(str);
++	char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp);
++	char **argvp;
++
++	if (argv == NULL)
++		goto out;
++
++	*argcp = argc;
++	argvp = argv;
++
++	while (*str) {
++		str = skip_sep(str);
++
++		if (*str) {
++			const char *p = str;
++			char *t;
++
++			str = skip_arg(str);
++
++			t = kstrndup(p, str-p, gfp);
++			if (t == NULL)
++				goto fail;
++			*argvp++ = t;
++		}
++	}
++	*argvp = NULL;
++
++  out:
++	return argv;
++
++  fail:
++	argv_free(argv);
++	return NULL;
++}
++EXPORT_SYMBOL(argv_split);
++
++#ifdef TEST
++#define _GNU_SOURCE
++#include <ctype.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <string.h>
++
++typedef enum {
++	GFP_KERNEL,
++} gfp_t;
++#define kzalloc(size, x)	malloc(size)
++#define kfree(x)		free(x)
++#define kstrndup(s, n, gfp)	strndup(s, n)
++#define BUG()	abort()
++
++int main() {
++	const char *testvec[] = {
++		"",
++		"x",
++		"\"",
++		"\\\0",
++		"\"",
++		"test one two three",
++		"arg\"foo\"bar biff",
++		"one two\\ three four",
++		"one \"two three\" four",
++		NULL,
++	};
++	const char **t;
++
++	for (t = testvec; *t; t++) {
++		char **argv;
++		int argc;
++		char **a;
++
++		printf("%d: test [%s]\n", t-testvec, *t);
++
++		argv = argv_split(GFP_KERNEL, *t, &argc);
++
++		printf("argc=%d vec=", argc);
++		for (a = argv; *a; a++)
++			printf("[%s] ", *a);
++		printf("\n");
++
++		argv_free(argv);
++	}
++
++	return 0;
++}
++#endif
+diff -Nurb linux-2.6.22-570/lib/check_signature.c linux-2.6.22-try2/lib/check_signature.c
+--- linux-2.6.22-570/lib/check_signature.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/lib/check_signature.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,26 @@
++#include <linux/io.h>
++#include <linux/module.h>
++
++/**
++ *	check_signature		-	find BIOS signatures
++ *	@io_addr: mmio address to check
++ *	@signature:  signature block
++ *	@length: length of signature
++ *
++ *	Perform a signature comparison with the mmio address io_addr. This
++ *	address should have been obtained by ioremap.
++ *	Returns 1 on a match.
++ */
++
++int check_signature(const volatile void __iomem *io_addr,
++			const unsigned char *signature, int length)
++{
++	while (length--) {
++		if (readb(io_addr) != *signature)
++			return 0;
++		io_addr++;
++		signature++;
++	}
++	return 1;
++}
++EXPORT_SYMBOL(check_signature);
+diff -Nurb linux-2.6.22-570/lib/kobject.c linux-2.6.22-try2/lib/kobject.c
+--- linux-2.6.22-570/lib/kobject.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/lib/kobject.c	2007-12-19 15:29:23.000000000 -0500
+@@ -44,7 +44,7 @@
+ 	return error;
+ }
+ 
+-static int create_dir(struct kobject * kobj, struct dentry *shadow_parent)
++static int create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent)
+ {
+ 	int error = 0;
+ 	if (kobject_name(kobj)) {
+@@ -162,7 +162,7 @@
+  *	@shadow_parent: sysfs directory to add to.
+  */
+ 
+-int kobject_shadow_add(struct kobject * kobj, struct dentry *shadow_parent)
++int kobject_shadow_add(struct kobject *kobj, struct sysfs_dirent *shadow_parent)
+ {
+ 	int error = 0;
+ 	struct kobject * parent;
+@@ -338,7 +338,7 @@
+ 	/* Note : if we want to send the new name alone, not the full path,
+ 	 * we could probably use kobject_name(kobj); */
+ 
+-	error = sysfs_rename_dir(kobj, kobj->parent->dentry, new_name);
++	error = sysfs_rename_dir(kobj, kobj->parent->sd, new_name);
+ 
+ 	/* This function is mostly/only used for network interface.
+ 	 * Some hotplug package track interfaces by their name and
+@@ -361,8 +361,8 @@
+  *	@new_name: object's new name
+  */
+ 
+-int kobject_shadow_rename(struct kobject * kobj, struct dentry *new_parent,
+-			  const char *new_name)
++int kobject_shadow_rename(struct kobject *kobj,
++			  struct sysfs_dirent *new_parent, const char *new_name)
+ {
+ 	int error = 0;
+ 
+diff -Nurb linux-2.6.22-570/lib/kobject_uevent.c linux-2.6.22-try2/lib/kobject_uevent.c
+--- linux-2.6.22-570/lib/kobject_uevent.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/lib/kobject_uevent.c	2007-12-19 15:29:23.000000000 -0500
+@@ -208,7 +208,7 @@
+ 		argv [0] = uevent_helper;
+ 		argv [1] = (char *)subsystem;
+ 		argv [2] = NULL;
+-		call_usermodehelper (argv[0], argv, envp, 0);
++		call_usermodehelper (argv[0], argv, envp, UMH_WAIT_EXEC);
+ 	}
+ 
+ exit:
+diff -Nurb linux-2.6.22-570/lib/pagewalk.c linux-2.6.22-try2/lib/pagewalk.c
+--- linux-2.6.22-570/lib/pagewalk.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/lib/pagewalk.c	2007-12-19 15:29:24.000000000 -0500
+@@ -0,0 +1,112 @@
++#include <linux/mm.h>
++#include <linux/highmem.h>
++
++static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
++			  struct mm_walk *walk, void *private)
++{
++	pte_t *pte;
++	int err;
++
++	for (pte = pte_offset_map(pmd, addr); addr != end;
++	     addr += PAGE_SIZE, pte++) {
++		if (pte_none(*pte))
++			continue;
++		err = walk->pte_entry(pte, addr, addr, private);
++		if (err) {
++			pte_unmap(pte);
++			return err;
++		}
++	}
++	pte_unmap(pte);
++	return 0;
++}
++
++static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
++			  struct mm_walk *walk, void *private)
++{
++	pmd_t *pmd;
++	unsigned long next;
++	int err;
++
++	for (pmd = pmd_offset(pud, addr); addr != end;
++	     pmd++, addr = next) {
++		next = pmd_addr_end(addr, end);
++		if (pmd_none_or_clear_bad(pmd))
++			continue;
++		if (walk->pmd_entry) {
++			err = walk->pmd_entry(pmd, addr, next, private);
++			if (err)
++				return err;
++		}
++		if (walk->pte_entry) {
++			err = walk_pte_range(pmd, addr, next, walk, private);
++			if (err)
++				return err;
++		}
++	}
++	return 0;
++}
++
++static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
++			  struct mm_walk *walk, void *private)
++{
++	pud_t *pud;
++	unsigned long next;
++	int err;
++
++	for (pud = pud_offset(pgd, addr); addr != end;
++	     pud++, addr = next) {
++		next = pud_addr_end(addr, end);
++		if (pud_none_or_clear_bad(pud))
++			continue;
++		if (walk->pud_entry) {
++			err = walk->pud_entry(pud, addr, next, private);
++			if (err)
++				return err;
++		}
++		if (walk->pmd_entry || walk->pte_entry) {
++			err = walk_pmd_range(pud, addr, next, walk, private);
++			if (err)
++				return err;
++		}
++	}
++	return 0;
++}
++
++/*
++ * walk_page_range - walk a memory map's page tables with a callback
++ * @mm - memory map to walk
++ * @addr - starting address
++ * @end - ending address
++ * @walk - set of callbacks to invoke for each level of the tree
++ * @private - private data passed to the callback function
++ *
++ * Recursively walk the page table for the memory area in a VMA, calling
++ * a callback for every bottom-level (PTE) page table.
++ */
++int walk_page_range(struct mm_struct *mm,
++		    unsigned long addr, unsigned long end,
++		    struct mm_walk *walk, void *private)
++{
++	pgd_t *pgd;
++	unsigned long next;
++	int err;
++
++	for (pgd = pgd_offset(mm, addr); addr != end;
++	     pgd++, addr = next) {
++		next = pgd_addr_end(addr, end);
++		if (pgd_none_or_clear_bad(pgd))
++			continue;
++		if (walk->pgd_entry) {
++			err = walk->pgd_entry(pgd, addr, next, private);
++			if (err)
++				return err;
++		}
++		if (walk->pud_entry || walk->pmd_entry || walk->pte_entry) {
++			err = walk_pud_range(pgd, addr, next, walk, private);
++			if (err)
++				return err;
++		}
++	}
++	return 0;
++}
+diff -Nurb linux-2.6.22-570/lib/radix-tree.c linux-2.6.22-try2/lib/radix-tree.c
+--- linux-2.6.22-570/lib/radix-tree.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/lib/radix-tree.c	2007-12-19 15:29:24.000000000 -0500
+@@ -93,7 +93,8 @@
+ 	struct radix_tree_node *ret;
+ 	gfp_t gfp_mask = root_gfp_mask(root);
+ 
+-	ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
++	ret = kmem_cache_alloc(radix_tree_node_cachep,
++				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
+ 	if (ret == NULL && !(gfp_mask & __GFP_WAIT)) {
+ 		struct radix_tree_preload *rtp;
+ 
+@@ -137,7 +138,8 @@
+ 	rtp = &__get_cpu_var(radix_tree_preloads);
+ 	while (rtp->nr < ARRAY_SIZE(rtp->nodes)) {
+ 		preempt_enable();
+-		node = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
++		node = kmem_cache_alloc(radix_tree_node_cachep,
++				set_migrateflags(gfp_mask, __GFP_RECLAIMABLE));
+ 		if (node == NULL)
+ 			goto out;
+ 		preempt_disable();
+diff -Nurb linux-2.6.22-570/mm/filemap.c linux-2.6.22-try2/mm/filemap.c
+--- linux-2.6.22-570/mm/filemap.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/filemap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1334,39 +1334,38 @@
+ #define MMAP_LOTSAMISS  (100)
+ 
+ /**
+- * filemap_nopage - read in file data for page fault handling
+- * @area:	the applicable vm_area
+- * @address:	target address to read in
+- * @type:	returned with VM_FAULT_{MINOR,MAJOR} if not %NULL
++ * filemap_fault - read in file data for page fault handling
++ * @vma:	user vma (not used)
++ * @fdata:	the applicable fault_data
+  *
+- * filemap_nopage() is invoked via the vma operations vector for a
++ * filemap_fault() is invoked via the vma operations vector for a
+  * mapped memory region to read in file data during a page fault.
+  *
+  * The goto's are kind of ugly, but this streamlines the normal case of having
+  * it in the page cache, and handles the special cases reasonably without
+  * having a lot of duplicated code.
+  */
+-struct page *filemap_nopage(struct vm_area_struct *area,
+-				unsigned long address, int *type)
++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
+ {
+ 	int error;
+-	struct file *file = area->vm_file;
++	struct file *file = vma->vm_file;
+ 	struct address_space *mapping = file->f_mapping;
+ 	struct file_ra_state *ra = &file->f_ra;
+ 	struct inode *inode = mapping->host;
+ 	struct page *page;
+-	unsigned long size, pgoff;
+-	int did_readaround = 0, majmin = VM_FAULT_MINOR;
++	unsigned long size;
++	int did_readaround = 0;
+ 
+-	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
++	fdata->type = VM_FAULT_MINOR;
++
++	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+ 
+-retry_all:
+ 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+-	if (pgoff >= size)
++	if (fdata->pgoff >= size)
+ 		goto outside_data_content;
+ 
+ 	/* If we don't want any read-ahead, don't bother */
+-	if (VM_RandomReadHint(area))
++	if (VM_RandomReadHint(vma))
+ 		goto no_cached_page;
+ 
+ 	/*
+@@ -1375,19 +1374,19 @@
+ 	 *
+ 	 * For sequential accesses, we use the generic readahead logic.
+ 	 */
+-	if (VM_SequentialReadHint(area))
+-		page_cache_readahead(mapping, ra, file, pgoff, 1);
++	if (VM_SequentialReadHint(vma))
++		page_cache_readahead(mapping, ra, file, fdata->pgoff, 1);
+ 
+ 	/*
+ 	 * Do we have something in the page cache already?
+ 	 */
+ retry_find:
+-	page = find_get_page(mapping, pgoff);
++	page = find_lock_page(mapping, fdata->pgoff);
+ 	if (!page) {
+ 		unsigned long ra_pages;
+ 
+-		if (VM_SequentialReadHint(area)) {
+-			handle_ra_miss(mapping, ra, pgoff);
++		if (VM_SequentialReadHint(vma)) {
++			handle_ra_miss(mapping, ra, fdata->pgoff);
+ 			goto no_cached_page;
+ 		}
+ 		ra->mmap_miss++;
+@@ -1404,7 +1403,7 @@
+ 		 * check did_readaround, as this is an inner loop.
+ 		 */
+ 		if (!did_readaround) {
+-			majmin = VM_FAULT_MAJOR;
++			fdata->type = VM_FAULT_MAJOR;
+ 			count_vm_event(PGMAJFAULT);
+ 		}
+ 		did_readaround = 1;
+@@ -1412,11 +1411,11 @@
+ 		if (ra_pages) {
+ 			pgoff_t start = 0;
+ 
+-			if (pgoff > ra_pages / 2)
+-				start = pgoff - ra_pages / 2;
++			if (fdata->pgoff > ra_pages / 2)
++				start = fdata->pgoff - ra_pages / 2;
+ 			do_page_cache_readahead(mapping, file, start, ra_pages);
+ 		}
+-		page = find_get_page(mapping, pgoff);
++		page = find_lock_page(mapping, fdata->pgoff);
+ 		if (!page)
+ 			goto no_cached_page;
+ 	}
+@@ -1425,19 +1424,23 @@
+ 		ra->mmap_hit++;
+ 
+ 	/*
+-	 * Ok, found a page in the page cache, now we need to check
+-	 * that it's up-to-date.
++	 * We have a locked page in the page cache, now we need to check
++	 * that it's up-to-date. If not, it is going to be due to an error.
+ 	 */
+-	if (!PageUptodate(page))
++	if (unlikely(!PageUptodate(page)))
+ 		goto page_not_uptodate;
+ 
+-success:
++	/* Must recheck i_size under page lock */
++	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
++	if (unlikely(fdata->pgoff >= size)) {
++		unlock_page(page);
++		goto outside_data_content;
++	}
++
+ 	/*
+ 	 * Found the page and have a reference on it.
+ 	 */
+ 	mark_page_accessed(page);
+-	if (type)
+-		*type = majmin;
+ 	return page;
+ 
+ outside_data_content:
+@@ -1445,15 +1448,17 @@
+ 	 * An external ptracer can access pages that normally aren't
+ 	 * accessible..
+ 	 */
+-	if (area->vm_mm == current->mm)
+-		return NOPAGE_SIGBUS;
++	if (vma->vm_mm == current->mm) {
++		fdata->type = VM_FAULT_SIGBUS;
++		return NULL;
++	}
+ 	/* Fall through to the non-read-ahead case */
+ no_cached_page:
+ 	/*
+ 	 * We're only likely to ever get here if MADV_RANDOM is in
+ 	 * effect.
+ 	 */
+-	error = page_cache_read(file, pgoff);
++	error = page_cache_read(file, fdata->pgoff);
+ 
+ 	/*
+ 	 * The page we want has now been added to the page cache.
+@@ -1469,12 +1474,15 @@
+ 	 * to schedule I/O.
+ 	 */
+ 	if (error == -ENOMEM)
+-		return NOPAGE_OOM;
+-	return NOPAGE_SIGBUS;
++		fdata->type = VM_FAULT_OOM;
++	else
++		fdata->type = VM_FAULT_SIGBUS;
++	return NULL;
+ 
+ page_not_uptodate:
++	/* IO error path */
+ 	if (!did_readaround) {
+-		majmin = VM_FAULT_MAJOR;
++		fdata->type = VM_FAULT_MAJOR;
+ 		count_vm_event(PGMAJFAULT);
+ 	}
+ 
+@@ -1484,38 +1492,39 @@
+ 	 * because there really aren't any performance issues here
+ 	 * and we need to check for errors.
+ 	 */
+-	lock_page(page);
+-
+-	/* Somebody truncated the page on us? */
+-	if (!page->mapping) {
+-		unlock_page(page);
+-		page_cache_release(page);
+-		goto retry_all;
+-	}
+-
+-	/* Somebody else successfully read it in? */
+-	if (PageUptodate(page)) {
+-		unlock_page(page);
+-		goto success;
+-	}
+ 	ClearPageError(page);
+ 	error = mapping->a_ops->readpage(file, page);
+-	if (!error) {
+-		wait_on_page_locked(page);
+-		if (PageUptodate(page))
+-			goto success;
+-	} else if (error == AOP_TRUNCATED_PAGE) {
+ 		page_cache_release(page);
++
++	if (!error || error == AOP_TRUNCATED_PAGE)
+ 		goto retry_find;
+-	}
+ 
+-	/*
+-	 * Things didn't work out. Return zero to tell the
+-	 * mm layer so, possibly freeing the page cache page first.
+-	 */
++	/* Things didn't work out. Return zero to tell the mm layer so. */
+ 	shrink_readahead_size_eio(file, ra);
+-	page_cache_release(page);
+-	return NOPAGE_SIGBUS;
++	fdata->type = VM_FAULT_SIGBUS;
++	return NULL;
++}
++EXPORT_SYMBOL(filemap_fault);
++
++/*
++ * filemap_nopage and filemap_populate are legacy exports that are not used
++ * in tree. Scheduled for removal.
++ */
++struct page *filemap_nopage(struct vm_area_struct *area,
++				unsigned long address, int *type)
++{
++	struct page *page;
++	struct fault_data fdata;
++	fdata.address = address;
++	fdata.pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
++			+ area->vm_pgoff;
++	fdata.flags = 0;
++
++	page = filemap_fault(area, &fdata);
++	if (type)
++		*type = fdata.type;
++
++	return page;
+ }
+ EXPORT_SYMBOL(filemap_nopage);
+ 
+@@ -1693,8 +1702,7 @@
+ EXPORT_SYMBOL(filemap_populate);
+ 
+ struct vm_operations_struct generic_file_vm_ops = {
+-	.nopage		= filemap_nopage,
+-	.populate	= filemap_populate,
++	.fault		= filemap_fault,
+ };
+ 
+ /* This is used for a general mmap of a disk file */
+@@ -1707,6 +1715,7 @@
+ 		return -ENOEXEC;
+ 	file_accessed(file);
+ 	vma->vm_ops = &generic_file_vm_ops;
++	vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/mm/filemap_xip.c linux-2.6.22-try2/mm/filemap_xip.c
+--- linux-2.6.22-570/mm/filemap_xip.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/filemap_xip.c	2007-12-19 15:29:24.000000000 -0500
+@@ -228,62 +228,67 @@
+ }
+ 
+ /*
+- * xip_nopage() is invoked via the vma operations vector for a
++ * xip_fault() is invoked via the vma operations vector for a
+  * mapped memory region to read in file data during a page fault.
+  *
+- * This function is derived from filemap_nopage, but used for execute in place
++ * This function is derived from filemap_fault, but used for execute in place
+  */
+-static struct page *
+-xip_file_nopage(struct vm_area_struct * area,
+-		   unsigned long address,
+-		   int *type)
++static struct page *xip_file_fault(struct vm_area_struct *area,
++					struct fault_data *fdata)
+ {
+ 	struct file *file = area->vm_file;
+ 	struct address_space *mapping = file->f_mapping;
+ 	struct inode *inode = mapping->host;
+ 	struct page *page;
+-	unsigned long size, pgoff, endoff;
++	pgoff_t size;
+ 
+-	pgoff = ((address - area->vm_start) >> PAGE_CACHE_SHIFT)
+-		+ area->vm_pgoff;
+-	endoff = ((area->vm_end - area->vm_start) >> PAGE_CACHE_SHIFT)
+-		+ area->vm_pgoff;
++	/* XXX: are VM_FAULT_ codes OK? */
+ 
+ 	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+-	if (pgoff >= size)
+-		return NOPAGE_SIGBUS;
++	if (fdata->pgoff >= size) {
++		fdata->type = VM_FAULT_SIGBUS;
++		return NULL;
++	}
+ 
+-	page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
++	page = mapping->a_ops->get_xip_page(mapping,
++					fdata->pgoff*(PAGE_SIZE/512), 0);
+ 	if (!IS_ERR(page))
+ 		goto out;
+-	if (PTR_ERR(page) != -ENODATA)
+-		return NOPAGE_SIGBUS;
++	if (PTR_ERR(page) != -ENODATA) {
++		fdata->type = VM_FAULT_OOM;
++		return NULL;
++	}
+ 
+ 	/* sparse block */
+ 	if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) &&
+ 	    (area->vm_flags & (VM_SHARED| VM_MAYSHARE)) &&
+ 	    (!(mapping->host->i_sb->s_flags & MS_RDONLY))) {
+ 		/* maybe shared writable, allocate new block */
+-		page = mapping->a_ops->get_xip_page (mapping,
+-			pgoff*(PAGE_SIZE/512), 1);
+-		if (IS_ERR(page))
+-			return NOPAGE_SIGBUS;
++		page = mapping->a_ops->get_xip_page(mapping,
++					fdata->pgoff*(PAGE_SIZE/512), 1);
++		if (IS_ERR(page)) {
++			fdata->type = VM_FAULT_SIGBUS;
++			return NULL;
++		}
+ 		/* unmap page at pgoff from all other vmas */
+-		__xip_unmap(mapping, pgoff);
++		__xip_unmap(mapping, fdata->pgoff);
+ 	} else {
+ 		/* not shared and writable, use xip_sparse_page() */
+ 		page = xip_sparse_page();
+-		if (!page)
+-			return NOPAGE_OOM;
++		if (!page) {
++			fdata->type = VM_FAULT_OOM;
++			return NULL;
++		}
+ 	}
+ 
+ out:
++	fdata->type = VM_FAULT_MINOR;
+ 	page_cache_get(page);
+ 	return page;
+ }
+ 
+ static struct vm_operations_struct xip_file_vm_ops = {
+-	.nopage         = xip_file_nopage,
++	.fault	= xip_file_fault,
+ };
+ 
+ int xip_file_mmap(struct file * file, struct vm_area_struct * vma)
+@@ -292,6 +297,7 @@
+ 
+ 	file_accessed(file);
+ 	vma->vm_ops = &xip_file_vm_ops;
++	vma->vm_flags |= VM_CAN_NONLINEAR;
+ 	return 0;
+ }
+ EXPORT_SYMBOL_GPL(xip_file_mmap);
+diff -Nurb linux-2.6.22-570/mm/fremap.c linux-2.6.22-try2/mm/fremap.c
+--- linux-2.6.22-570/mm/fremap.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/fremap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -129,6 +129,25 @@
+ 	return err;
+ }
+ 
++static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
++			unsigned long addr, unsigned long size, pgoff_t pgoff)
++{
++	int err;
++
++	do {
++		err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
++		if (err)
++			return err;
++
++		size -= PAGE_SIZE;
++		addr += PAGE_SIZE;
++		pgoff++;
++	} while (size);
++
++        return 0;
++
++}
++
+ /***
+  * sys_remap_file_pages - remap arbitrary pages of a shared backing store
+  *                        file within an existing vma.
+@@ -186,15 +205,27 @@
+ 	 * the single existing vma.  vm_private_data is used as a
+ 	 * swapout cursor in a VM_NONLINEAR vma.
+ 	 */
+-	if (vma && (vma->vm_flags & VM_SHARED) &&
+-		(!vma->vm_private_data || (vma->vm_flags & VM_NONLINEAR)) &&
+-		vma->vm_ops && vma->vm_ops->populate &&
+-			end > start && start >= vma->vm_start &&
+-				end <= vma->vm_end) {
++	if (!vma || !(vma->vm_flags & VM_SHARED))
++		goto out;
++
++	if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
++		goto out;
++
++	if ((!vma->vm_ops || !vma->vm_ops->populate) &&
++					!(vma->vm_flags & VM_CAN_NONLINEAR))
++		goto out;
++
++	if (end <= start || start < vma->vm_start || end > vma->vm_end)
++		goto out;
+ 
+ 		/* Must set VM_NONLINEAR before any pages are populated. */
+-		if (pgoff != linear_page_index(vma, start) &&
+-		    !(vma->vm_flags & VM_NONLINEAR)) {
++	if (!(vma->vm_flags & VM_NONLINEAR)) {
++		/* Don't need a nonlinear mapping, exit success */
++		if (pgoff == linear_page_index(vma, start)) {
++			err = 0;
++			goto out;
++		}
++
+ 			if (!has_write_lock) {
+ 				up_read(&mm->mmap_sem);
+ 				down_write(&mm->mmap_sem);
+@@ -211,8 +242,17 @@
+ 			spin_unlock(&mapping->i_mmap_lock);
+ 		}
+ 
+-		err = vma->vm_ops->populate(vma, start, size,
+-					    vma->vm_page_prot,
++	if (vma->vm_flags & VM_CAN_NONLINEAR) {
++		err = populate_range(mm, vma, start, size, pgoff);
++		if (!err && !(flags & MAP_NONBLOCK)) {
++			if (unlikely(has_write_lock)) {
++				downgrade_write(&mm->mmap_sem);
++				has_write_lock = 0;
++			}
++			make_pages_present(start, start+size);
++		}
++	} else
++		err = vma->vm_ops->populate(vma, start, size, vma->vm_page_prot,
+ 					    pgoff, flags & MAP_NONBLOCK);
+ 
+ 		/*
+@@ -220,7 +260,8 @@
+ 		 * it after ->populate completes, and that would prevent
+ 		 * downgrading the lock.  (Locks can't be upgraded).
+ 		 */
+-	}
++
++out:
+ 	if (likely(!has_write_lock))
+ 		up_read(&mm->mmap_sem);
+ 	else
+diff -Nurb linux-2.6.22-570/mm/hugetlb.c linux-2.6.22-try2/mm/hugetlb.c
+--- linux-2.6.22-570/mm/hugetlb.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/hugetlb.c	2007-12-19 22:07:18.000000000 -0500
+@@ -28,6 +28,9 @@
+ static struct list_head hugepage_freelists[MAX_NUMNODES];
+ static unsigned int nr_huge_pages_node[MAX_NUMNODES];
+ static unsigned int free_huge_pages_node[MAX_NUMNODES];
++gfp_t htlb_alloc_mask = GFP_HIGHUSER;
++unsigned long hugepages_treat_as_movable;
++
+ /*
+  * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
+  */
+@@ -67,14 +70,15 @@
+ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+ 				unsigned long address)
+ {
+-	int nid = numa_node_id();
++	int nid;
+ 	struct page *page = NULL;
+-	struct zonelist *zonelist = huge_zonelist(vma, address);
++	struct zonelist *zonelist = huge_zonelist(vma, address,
++						htlb_alloc_mask);
+ 	struct zone **z;
+ 
+ 	for (z = zonelist->zones; *z; z++) {
+ 		nid = zone_to_nid(*z);
+-		if (cpuset_zone_allowed_softwall(*z, GFP_HIGHUSER) &&
++		if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) &&
+ 		    !list_empty(&hugepage_freelists[nid]))
+ 			break;
+ 	}
+@@ -114,7 +118,7 @@
+ 	prev_nid = nid;
+ 	spin_unlock(&nid_lock);
+ 
+-	page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
++ 	page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_NOWARN,
+ 					HUGETLB_PAGE_ORDER);
+ 	if (page) {
+ 		set_compound_page_dtor(page, free_huge_page);
+@@ -264,6 +268,19 @@
+ 	max_huge_pages = set_max_huge_pages(max_huge_pages);
+ 	return 0;
+ }
++
++int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
++			struct file *file, void __user *buffer,
++			size_t *length, loff_t *ppos)
++{
++	proc_dointvec(table, write, file, buffer, length, ppos);
++	if (hugepages_treat_as_movable)
++		htlb_alloc_mask = GFP_HIGH_MOVABLE;
++	else
++		htlb_alloc_mask = GFP_HIGHUSER;
++	return 0;
++}
++
+ #endif /* CONFIG_SYSCTL */
+ 
+ int hugetlb_report_meminfo(char *buf)
+diff -Nurb linux-2.6.22-570/mm/memory.c linux-2.6.22-try2/mm/memory.c
+--- linux-2.6.22-570/mm/memory.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/memory.c	2007-12-19 22:05:46.000000000 -0500
+@@ -1052,7 +1052,8 @@
+ 		if (pages)
+ 			foll_flags |= FOLL_GET;
+ 		if (!write && !(vma->vm_flags & VM_LOCKED) &&
+-		    (!vma->vm_ops || !vma->vm_ops->nopage))
++		    (!vma->vm_ops || (!vma->vm_ops->nopage &&
++					!vma->vm_ops->fault)))
+ 			foll_flags |= FOLL_ANON;
+ 
+ 		do {
+@@ -1712,11 +1713,11 @@
+ 	if (unlikely(anon_vma_prepare(vma)))
+ 		goto oom;
+ 	if (old_page == ZERO_PAGE(address)) {
+-		new_page = alloc_zeroed_user_highpage(vma, address);
++		new_page = alloc_zeroed_user_highpage_movable(vma, address);
+ 		if (!new_page)
+ 			goto oom;
+ 	} else {
+-		new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
++		new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
+ 		if (!new_page)
+ 			goto oom;
+ 		cow_user_page(new_page, old_page, address, vma);
+@@ -1828,6 +1829,13 @@
+ 	unsigned long restart_addr;
+ 	int need_break;
+ 
++	/*
++	 * files that support invalidating or truncating portions of the
++	 * file from under mmaped areas must set the VM_CAN_INVALIDATE flag, and
++	 * have their .nopage function return the page locked.
++	 */
++	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
++
+ again:
+ 	restart_addr = vma->vm_truncate_count;
+ 	if (is_restart_addr(restart_addr) && start_addr < restart_addr) {
+@@ -1956,17 +1964,8 @@
+ 
+ 	spin_lock(&mapping->i_mmap_lock);
+ 
+-	/* serialize i_size write against truncate_count write */
+-	smp_wmb();
+-	/* Protect against page faults, and endless unmapping loops */
++	/* Protect against endless unmapping loops */
+ 	mapping->truncate_count++;
+-	/*
+-	 * For archs where spin_lock has inclusive semantics like ia64
+-	 * this smp_mb() will prevent to read pagetable contents
+-	 * before the truncate_count increment is visible to
+-	 * other cpus.
+-	 */
+-	smp_mb();
+ 	if (unlikely(is_restart_addr(mapping->truncate_count))) {
+ 		if (mapping->truncate_count == 0)
+ 			reset_vma_truncate_counts(mapping);
+@@ -2005,8 +2004,18 @@
+ 	if (IS_SWAPFILE(inode))
+ 		goto out_busy;
+ 	i_size_write(inode, offset);
++
++	/*
++	 * unmap_mapping_range is called twice, first simply for efficiency
++	 * so that truncate_inode_pages does fewer single-page unmaps. However
++	 * after this first call, and before truncate_inode_pages finishes,
++	 * it is possible for private pages to be COWed, which remain after
++	 * truncate_inode_pages finishes, hence the second unmap_mapping_range
++	 * call must be made for correctness.
++	 */
+ 	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ 	truncate_inode_pages(mapping, offset);
++	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
+ 	goto out_truncate;
+ 
+ do_expand:
+@@ -2046,6 +2055,7 @@
+ 	down_write(&inode->i_alloc_sem);
+ 	unmap_mapping_range(mapping, offset, (end - offset), 1);
+ 	truncate_inode_pages_range(mapping, offset, end);
++	unmap_mapping_range(mapping, offset, (end - offset), 1);
+ 	inode->i_op->truncate_range(inode, offset, end);
+ 	up_write(&inode->i_alloc_sem);
+ 	mutex_unlock(&inode->i_mutex);
+@@ -2208,7 +2218,6 @@
+ 
+ 	/* No need to invalidate - it was non-present before */
+ 	update_mmu_cache(vma, address, pte);
+-	lazy_mmu_prot_update(pte);
+ unlock:
+ 	pte_unmap_unlock(page_table, ptl);
+ out:
+@@ -2241,7 +2250,7 @@
+ 			goto oom;
+ 		if (unlikely(anon_vma_prepare(vma)))
+ 			goto oom;
+-		page = alloc_zeroed_user_highpage(vma, address);
++		page = alloc_zeroed_user_highpage_movable(vma, address);
+ 		if (!page)
+ 			goto oom;
+ 
+@@ -2284,10 +2293,10 @@
+ }
+ 
+ /*
+- * do_no_page() tries to create a new page mapping. It aggressively
++ * __do_fault() tries to create a new page mapping. It aggressively
+  * tries to share with existing pages, but makes a separate copy if
+- * the "write_access" parameter is true in order to avoid the next
+- * page fault.
++ * the FAULT_FLAG_WRITE is set in the flags parameter in order to avoid
++ * the next page fault.
+  *
+  * As this is called only for pages that do not currently exist, we
+  * do not need to flush old virtual caches or the TLB.
+@@ -2296,92 +2305,85 @@
+  * but allow concurrent faults), and pte mapped but not yet locked.
+  * We return with mmap_sem still held, but pte unmapped and unlocked.
+  */
+-static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
++static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ 		unsigned long address, pte_t *page_table, pmd_t *pmd,
+-		int write_access)
+-{
++ 		pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
++  {
+ 	spinlock_t *ptl;
+-	struct page *new_page;
+-	struct address_space *mapping = NULL;
++	struct page *page, *faulted_page;
+ 	pte_t entry;
+-	unsigned int sequence = 0;
+-	int ret = VM_FAULT_MINOR;
+ 	int anon = 0;
+ 	struct page *dirty_page = NULL;
++ 	struct fault_data fdata;
++ 
++ 	fdata.address = address & PAGE_MASK;
++ 	fdata.pgoff = pgoff;
++ 	fdata.flags = flags;
+ 
+ 	pte_unmap(page_table);
+ 	BUG_ON(vma->vm_flags & VM_PFNMAP);
+ 
+-	if (!vx_rss_avail(mm, 1))
++ 	if (likely(vma->vm_ops->fault)) {
++ 		fdata.type = -1;
++ 		faulted_page = vma->vm_ops->fault(vma, &fdata);
++ 		WARN_ON(fdata.type == -1);
++		if (unlikely(!faulted_page))
++ 			return fdata.type;
++ 	} else {
++ 		/* Legacy ->nopage path */
++ 		fdata.type = VM_FAULT_MINOR;
++ 		faulted_page = vma->vm_ops->nopage(vma, address & PAGE_MASK,
++ 								&fdata.type);
++ 		/* no page was available -- either SIGBUS or OOM */
++ 		if (unlikely(faulted_page == NOPAGE_SIGBUS))
++ 			return VM_FAULT_SIGBUS;
++ 		else if (unlikely(faulted_page == NOPAGE_OOM))
+ 		return VM_FAULT_OOM;
++ 	}
+ 
+-	if (vma->vm_file) {
+-		mapping = vma->vm_file->f_mapping;
+-		sequence = mapping->truncate_count;
+-		smp_rmb(); /* serializes i_size against truncate_count */
+-	}
+-retry:
+-	new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
+-	/*
+-	 * No smp_rmb is needed here as long as there's a full
+-	 * spin_lock/unlock sequence inside the ->nopage callback
+-	 * (for the pagecache lookup) that acts as an implicit
+-	 * smp_mb() and prevents the i_size read to happen
+-	 * after the next truncate_count read.
++  	/*
++ 	 * For consistency in subsequent calls, make the faulted_page always
++  	 * locked.
+ 	 */
+-
+-	/* no page was available -- either SIGBUS, OOM or REFAULT */
+-	if (unlikely(new_page == NOPAGE_SIGBUS))
+-		return VM_FAULT_SIGBUS;
+-	else if (unlikely(new_page == NOPAGE_OOM))
+-		return VM_FAULT_OOM;
+-	else if (unlikely(new_page == NOPAGE_REFAULT))
+-		return VM_FAULT_MINOR;
++  	if (unlikely(!(vma->vm_flags & VM_CAN_INVALIDATE)))
++ 		lock_page(faulted_page);
++ 	else
++ 		BUG_ON(!PageLocked(faulted_page));
+ 
+ 	/*
+ 	 * Should we do an early C-O-W break?
+ 	 */
+-	if (write_access) {
++ 	page = faulted_page;
++ 	if (flags & FAULT_FLAG_WRITE) {
+ 		if (!(vma->vm_flags & VM_SHARED)) {
+-			struct page *page;
+-
+-			if (unlikely(anon_vma_prepare(vma)))
+-				goto oom;
+-			page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+-			if (!page)
+-				goto oom;
+-			copy_user_highpage(page, new_page, address, vma);
+-			page_cache_release(new_page);
+-			new_page = page;
+ 			anon = 1;
+-
++  			if (unlikely(anon_vma_prepare(vma))) {
++ 				fdata.type = VM_FAULT_OOM;
++ 				goto out;
++  			}
++  			page = alloc_page_vma(GFP_HIGHUSER, vma, address);
++  			if (!page) {
++ 				fdata.type = VM_FAULT_OOM;
++ 				goto out;
++  			}
++ 			copy_user_highpage(page, faulted_page, address, vma);
+ 		} else {
+-			/* if the page will be shareable, see if the backing
++ 			/*
++ 			 * If the page will be shareable, see if the backing
+ 			 * address space wants to know that the page is about
+-			 * to become writable */
++ 			 * to become writable
++ 			 */
+ 			if (vma->vm_ops->page_mkwrite &&
+-			    vma->vm_ops->page_mkwrite(vma, new_page) < 0
+-			    ) {
+-				page_cache_release(new_page);
+-				return VM_FAULT_SIGBUS;
++  			    vma->vm_ops->page_mkwrite(vma, page) < 0) {
++ 				fdata.type = VM_FAULT_SIGBUS;
++ 				anon = 1; /* no anon but release faulted_page */
++ 				goto out;
+ 			}
+ 		}
++ 
+ 	}
+ 
+ 	page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+-	/*
+-	 * For a file-backed vma, someone could have truncated or otherwise
+-	 * invalidated this page.  If unmap_mapping_range got called,
+-	 * retry getting the page.
+-	 */
+-	if (mapping && unlikely(sequence != mapping->truncate_count)) {
+-		pte_unmap_unlock(page_table, ptl);
+-		page_cache_release(new_page);
+-		cond_resched();
+-		sequence = mapping->truncate_count;
+-		smp_rmb();
+-		goto retry;
+-	}
+ 
+ 	/*
+ 	 * This silly early PAGE_DIRTY setting removes a race
+@@ -2394,43 +2396,68 @@
+ 	 * handle that later.
+ 	 */
+ 	/* Only go through if we didn't race with anybody else... */
+-	if (pte_none(*page_table)) {
+-		flush_icache_page(vma, new_page);
+-		entry = mk_pte(new_page, vma->vm_page_prot);
+-		if (write_access)
++	if (likely(pte_same(*page_table, orig_pte))) {
++		flush_icache_page(vma, page);
++		entry = mk_pte(page, vma->vm_page_prot);
++		if (flags & FAULT_FLAG_WRITE)
+ 			entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ 		set_pte_at(mm, address, page_table, entry);
+ 		if (anon) {
+ 			inc_mm_counter(mm, anon_rss);
+-			lru_cache_add_active(new_page);
+-			page_add_new_anon_rmap(new_page, vma, address);
++                        lru_cache_add_active(page);
++                        page_add_new_anon_rmap(page, vma, address);
+ 		} else {
+ 			inc_mm_counter(mm, file_rss);
+-			page_add_file_rmap(new_page);
+-			if (write_access) {
+-				dirty_page = new_page;
++			page_add_file_rmap(page);
++			if (flags & FAULT_FLAG_WRITE) {
++				dirty_page = page;
+ 				get_page(dirty_page);
+ 			}
+ 		}
+-	} else {
+-		/* One of our sibling threads was faster, back out. */
+-		page_cache_release(new_page);
+-		goto unlock;
+-	}
+ 
+-	/* no need to invalidate: a not-present page shouldn't be cached */
++		/* no need to invalidate: a not-present page won't be cached */
+ 	update_mmu_cache(vma, address, entry);
+ 	lazy_mmu_prot_update(entry);
+-unlock:
++	} else {
++		if (anon)
++			page_cache_release(page);
++		else
++			anon = 1; /* no anon but release faulted_page */
++	}
++
+ 	pte_unmap_unlock(page_table, ptl);
+-	if (dirty_page) {
++
++out:
++	unlock_page(faulted_page);
++	if (anon)
++		page_cache_release(faulted_page);
++	else if (dirty_page) {
+ 		set_page_dirty_balance(dirty_page);
+ 		put_page(dirty_page);
+ 	}
+-	return ret;
+-oom:
+-	page_cache_release(new_page);
+-	return VM_FAULT_OOM;
++
++	return fdata.type;
++}
++
++static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++		unsigned long address, pte_t *page_table, pmd_t *pmd,
++		int write_access, pte_t orig_pte)
++{
++	pgoff_t pgoff = (((address & PAGE_MASK)
++			- vma->vm_start) >> PAGE_CACHE_SHIFT) + vma->vm_pgoff;
++	unsigned int flags = (write_access ? FAULT_FLAG_WRITE : 0);
++
++	return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
++}
++
++static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
++		unsigned long address, pte_t *page_table, pmd_t *pmd,
++		int write_access, pgoff_t pgoff, pte_t orig_pte)
++{
++	unsigned int flags = FAULT_FLAG_NONLINEAR |
++				(write_access ? FAULT_FLAG_WRITE : 0);
++
++	return __do_fault(mm, vma, address, page_table, pmd, pgoff, flags, orig_pte);
+ }
+ 
+ /*
+@@ -2509,9 +2536,14 @@
+ 		print_bad_pte(vma, orig_pte, address);
+ 		return VM_FAULT_OOM;
+ 	}
+-	/* We can then assume vm->vm_ops && vma->vm_ops->populate */
+ 
+ 	pgoff = pte_to_pgoff(orig_pte);
++
++	if (vma->vm_ops && vma->vm_ops->fault)
++		return do_nonlinear_fault(mm, vma, address, page_table, pmd,
++					write_access, pgoff, orig_pte);
++
++	/* We can then assume vm->vm_ops && vma->vm_ops->populate */
+ 	err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+ 					vma->vm_page_prot, pgoff, 0);
+ 	if (err == -ENOMEM)
+@@ -2546,10 +2578,9 @@
+ 	if (!pte_present(entry)) {
+ 		if (pte_none(entry)) {
+ 			if (vma->vm_ops) {
+-				if (vma->vm_ops->nopage)
+-					return do_no_page(mm, vma, address,
+-							  pte, pmd,
+-							  write_access);
++				if (vma->vm_ops->fault || vma->vm_ops->nopage)
++					return do_linear_fault(mm, vma, address,
++						pte, pmd, write_access, entry);
+ 				if (unlikely(vma->vm_ops->nopfn))
+ 					return do_no_pfn(mm, vma, address, pte,
+ 							 pmd, write_access);
+diff -Nurb linux-2.6.22-570/mm/mempolicy.c linux-2.6.22-try2/mm/mempolicy.c
+--- linux-2.6.22-570/mm/mempolicy.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/mempolicy.c	2007-12-19 15:29:24.000000000 -0500
+@@ -594,7 +594,7 @@
+ 
+ static struct page *new_node_page(struct page *page, unsigned long node, int **x)
+ {
+-	return alloc_pages_node(node, GFP_HIGHUSER, 0);
++	return alloc_pages_node(node, GFP_HIGHUSER_MOVABLE, 0);
+ }
+ 
+ /*
+@@ -710,7 +710,8 @@
+ {
+ 	struct vm_area_struct *vma = (struct vm_area_struct *)private;
+ 
+-	return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma));
++	return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
++					page_address_in_vma(page, vma));
+ }
+ #else
+ 
+@@ -1202,7 +1203,8 @@
+ 
+ #ifdef CONFIG_HUGETLBFS
+ /* Return a zonelist suitable for a huge page allocation. */
+-struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
++struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
++							gfp_t gfp_flags)
+ {
+ 	struct mempolicy *pol = get_vma_policy(current, vma, addr);
+ 
+@@ -1210,7 +1212,7 @@
+ 		unsigned nid;
+ 
+ 		nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+-		return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
++		return NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_flags);
+ 	}
+ 	return zonelist_policy(GFP_HIGHUSER, pol);
+ }
+@@ -1309,7 +1311,6 @@
+  * keeps mempolicies cpuset relative after its cpuset moves.  See
+  * further kernel/cpuset.c update_nodemask().
+  */
+-void *cpuset_being_rebound;
+ 
+ /* Slow path of a mempolicy copy */
+ struct mempolicy *__mpol_copy(struct mempolicy *old)
+@@ -1908,4 +1909,3 @@
+ 		m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
+ 	return 0;
+ }
+-
+diff -Nurb linux-2.6.22-570/mm/migrate.c linux-2.6.22-try2/mm/migrate.c
+--- linux-2.6.22-570/mm/migrate.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/migrate.c	2007-12-19 15:29:24.000000000 -0500
+@@ -761,7 +761,8 @@
+ 
+ 	*result = &pm->status;
+ 
+-	return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
++	return alloc_pages_node(pm->node,
++				GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
+ }
+ 
+ /*
+diff -Nurb linux-2.6.22-570/mm/mmap.c linux-2.6.22-try2/mm/mmap.c
+--- linux-2.6.22-570/mm/mmap.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/mmap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -202,6 +202,17 @@
+ }
+ 
+ /*
++ * Requires inode->i_mapping->i_mmap_lock
++ */
++void __unlink_file_vma(struct vm_area_struct *vma)
++{
++	struct file *file = vma->vm_file;
++	struct address_space *mapping = file->f_mapping;
++
++	__remove_shared_vm_struct(vma, file, mapping);
++}
++
++/*
+  * Unlink a file-based vm structure from its prio_tree, to hide
+  * vma from rmap and vmtruncate before freeing its page tables.
+  */
+@@ -1023,7 +1034,7 @@
+ 		}
+ 	}
+ 
+-	error = security_file_mmap(file, reqprot, prot, flags);
++	error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+ 	if (error)
+ 		return error;
+ 		
+@@ -1150,12 +1161,8 @@
+ 		vx_vmlocked_add(mm, len >> PAGE_SHIFT);
+ 		make_pages_present(addr, addr + len);
+ 	}
+-	if (flags & MAP_POPULATE) {
+-		up_write(&mm->mmap_sem);
+-		sys_remap_file_pages(addr, len, 0,
+-					pgoff, flags & MAP_NONBLOCK);
+-		down_write(&mm->mmap_sem);
+-	}
++	if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
++		make_pages_present(addr, addr + len);
+ 	return addr;
+ 
+ unmap_and_free_vma:
+diff -Nurb linux-2.6.22-570/mm/mremap.c linux-2.6.22-try2/mm/mremap.c
+--- linux-2.6.22-570/mm/mremap.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/mremap.c	2007-12-19 15:29:23.000000000 -0500
+@@ -292,6 +292,10 @@
+ 		if ((addr <= new_addr) && (addr+old_len) > new_addr)
+ 			goto out;
+ 
++		ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
++		if (ret)
++			goto out;
++
+ 		ret = do_munmap(mm, new_addr, new_len);
+ 		if (ret)
+ 			goto out;
+@@ -394,8 +398,13 @@
+ 
+ 			new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
+ 						vma->vm_pgoff, map_flags);
++			if (new_addr & ~PAGE_MASK) {
+ 			ret = new_addr;
+-			if (new_addr & ~PAGE_MASK)
++				goto out;
++			}
++
++			ret = security_file_mmap(0, 0, 0, 0, new_addr, 1);
++			if (ret)
+ 				goto out;
+ 		}
+ 		ret = move_vma(vma, addr, old_len, new_len, new_addr);
+diff -Nurb linux-2.6.22-570/mm/nommu.c linux-2.6.22-try2/mm/nommu.c
+--- linux-2.6.22-570/mm/nommu.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/nommu.c	2007-12-19 15:29:24.000000000 -0500
+@@ -639,7 +639,7 @@
+ 	}
+ 
+ 	/* allow the security API to have its say */
+-	ret = security_file_mmap(file, reqprot, prot, flags);
++	ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -1336,8 +1336,7 @@
+ 	return 0;
+ }
+ 
+-struct page *filemap_nopage(struct vm_area_struct *area,
+-			unsigned long address, int *type)
++struct page *filemap_fault(struct vm_area_struct *vma, struct fault_data *fdata)
+ {
+ 	BUG();
+ 	return NULL;
+diff -Nurb linux-2.6.22-570/mm/page_alloc.c linux-2.6.22-try2/mm/page_alloc.c
+--- linux-2.6.22-570/mm/page_alloc.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/page_alloc.c	2007-12-19 15:29:24.000000000 -0500
+@@ -143,6 +143,42 @@
+ EXPORT_SYMBOL(nr_node_ids);
+ #endif
+ 
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++static inline int get_pageblock_migratetype(struct page *page)
++{
++	return get_pageblock_flags_group(page, PB_migrate, PB_migrate_end);
++}
++
++static void set_pageblock_migratetype(struct page *page, int migratetype)
++{
++	set_pageblock_flags_group(page, (unsigned long)migratetype,
++					PB_migrate, PB_migrate_end);
++}
++
++static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
++{
++	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
++
++	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
++		((gfp_flags & __GFP_RECLAIMABLE) != 0);
++}
++
++#else
++static inline int get_pageblock_migratetype(struct page *page)
++{
++	return MIGRATE_UNMOVABLE;
++}
++
++static void set_pageblock_migratetype(struct page *page, int migratetype)
++{
++}
++
++static inline int gfpflags_to_migratetype(gfp_t gfp_flags)
++{
++	return MIGRATE_UNMOVABLE;
++}
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ #ifdef CONFIG_DEBUG_VM
+ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
+ {
+@@ -397,6 +433,7 @@
+ {
+ 	unsigned long page_idx;
+ 	int order_size = 1 << order;
++	int migratetype = get_pageblock_migratetype(page);
+ 
+ 	if (unlikely(PageCompound(page)))
+ 		destroy_compound_page(page, order);
+@@ -409,7 +446,6 @@
+ 	__mod_zone_page_state(zone, NR_FREE_PAGES, order_size);
+ 	while (order < MAX_ORDER-1) {
+ 		unsigned long combined_idx;
+-		struct free_area *area;
+ 		struct page *buddy;
+ 
+ 		buddy = __page_find_buddy(page, page_idx, order);
+@@ -417,8 +453,7 @@
+ 			break;		/* Move the buddy up one level. */
+ 
+ 		list_del(&buddy->lru);
+-		area = zone->free_area + order;
+-		area->nr_free--;
++		zone->free_area[order].nr_free--;
+ 		rmv_page_order(buddy);
+ 		combined_idx = __find_combined_index(page_idx, order);
+ 		page = page + (combined_idx - page_idx);
+@@ -426,7 +461,8 @@
+ 		order++;
+ 	}
+ 	set_page_order(page, order);
+-	list_add(&page->lru, &zone->free_area[order].free_list);
++	list_add(&page->lru,
++		&zone->free_area[order].free_list[migratetype]);
+ 	zone->free_area[order].nr_free++;
+ }
+ 
+@@ -566,7 +602,8 @@
+  * -- wli
+  */
+ static inline void expand(struct zone *zone, struct page *page,
+- 	int low, int high, struct free_area *area)
++	int low, int high, struct free_area *area,
++	int migratetype)
+ {
+ 	unsigned long size = 1 << high;
+ 
+@@ -575,7 +612,7 @@
+ 		high--;
+ 		size >>= 1;
+ 		VM_BUG_ON(bad_range(zone, &page[size]));
+-		list_add(&page[size].lru, &area->free_list);
++		list_add(&page[size].lru, &area->free_list[migratetype]);
+ 		area->nr_free++;
+ 		set_page_order(&page[size], high);
+ 	}
+@@ -628,31 +665,172 @@
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++/*
++ * This array describes the order lists are fallen back to when
++ * the free lists for the desirable migrate type are depleted
++ */
++static int fallbacks[MIGRATE_TYPES][MIGRATE_TYPES-1] = {
++	[MIGRATE_UNMOVABLE]   = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE   },
++	[MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE,   MIGRATE_MOVABLE   },
++	[MIGRATE_MOVABLE]     = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE },
++};
++
++/*
++ * Move the free pages in a range to the free lists of the requested type.
++ * Note that start_page and end_pages are not aligned in a MAX_ORDER_NR_PAGES
++ * boundary. If alignment is required, use move_freepages_block()
++ */
++int move_freepages(struct zone *zone,
++			struct page *start_page, struct page *end_page,
++			int migratetype)
++{
++	struct page *page;
++	unsigned long order;
++	int blocks_moved = 0;
++
++#ifndef CONFIG_HOLES_IN_ZONE
++	/*
++	 * page_zone is not safe to call in this context when
++	 * CONFIG_HOLES_IN_ZONE is set. This bug check is probably redundant
++	 * anyway as we check zone boundaries in move_freepages_block().
++	 * Remove at a later date when no bug reports exist related to
++	 * CONFIG_PAGE_GROUP_BY_MOBILITY
++	 */
++	BUG_ON(page_zone(start_page) != page_zone(end_page));
++#endif
++
++	for (page = start_page; page <= end_page;) {
++		if (!pfn_valid_within(page_to_pfn(page))) {
++			page++;
++			continue;
++		}
++
++		if (!PageBuddy(page)) {
++			page++;
++			continue;
++		}
++
++		order = page_order(page);
++		list_del(&page->lru);
++		list_add(&page->lru,
++			&zone->free_area[order].free_list[migratetype]);
++		page += 1 << order;
++		blocks_moved++;
++	}
++
++	return blocks_moved;
++}
++
++int move_freepages_block(struct zone *zone, struct page *page, int migratetype)
++{
++	unsigned long start_pfn, end_pfn;
++	struct page *start_page, *end_page;
++
++	start_pfn = page_to_pfn(page);
++	start_pfn = start_pfn & ~(MAX_ORDER_NR_PAGES-1);
++	start_page = pfn_to_page(start_pfn);
++	end_page = start_page + MAX_ORDER_NR_PAGES - 1;
++	end_pfn = start_pfn + MAX_ORDER_NR_PAGES - 1;
++
++	/* Do not cross zone boundaries */
++	if (start_pfn < zone->zone_start_pfn)
++		start_page = page;
++	if (end_pfn >= zone->zone_start_pfn + zone->spanned_pages)
++		return 0;
++
++	return move_freepages(zone, start_page, end_page, migratetype);
++}
++
++/* Remove an element from the buddy allocator from the fallback list */
++static struct page *__rmqueue_fallback(struct zone *zone, int order,
++						int start_migratetype)
++{
++	struct free_area * area;
++	int current_order;
++	struct page *page;
++	int migratetype, i;
++
++	/* Find the largest possible block of pages in the other list */
++	for (current_order = MAX_ORDER-1; current_order >= order;
++						--current_order) {
++		for (i = 0; i < MIGRATE_TYPES - 1; i++) {
++			migratetype = fallbacks[start_migratetype][i];
++
++			area = &(zone->free_area[current_order]);
++			if (list_empty(&area->free_list[migratetype]))
++				continue;
++
++			page = list_entry(area->free_list[migratetype].next,
++					struct page, lru);
++			area->nr_free--;
++
++			/*
++			 * If breaking a large block of pages, move all free
++			 * pages to the preferred allocation list
++			 */
++			if (unlikely(current_order >= MAX_ORDER / 2)) {
++				migratetype = start_migratetype;
++				move_freepages_block(zone, page, migratetype);
++			}
++
++			/* Remove the page from the freelists */
++			list_del(&page->lru);
++			rmv_page_order(page);
++			__mod_zone_page_state(zone, NR_FREE_PAGES,
++							-(1UL << order));
++
++			if (current_order == MAX_ORDER - 1)
++				set_pageblock_migratetype(page,
++							start_migratetype);
++
++			expand(zone, page, order, current_order, area, migratetype);
++			return page;
++		}
++	}
++
++	return NULL;
++}
++#else
++static struct page *__rmqueue_fallback(struct zone *zone, int order,
++						int start_migratetype)
++{
++	return NULL;
++}
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ /* 
+  * Do the hard work of removing an element from the buddy allocator.
+  * Call me with the zone->lock already held.
+  */
+-static struct page *__rmqueue(struct zone *zone, unsigned int order)
++static struct page *__rmqueue(struct zone *zone, unsigned int order,
++						int migratetype)
+ {
+ 	struct free_area * area;
+ 	unsigned int current_order;
+ 	struct page *page;
+ 
++	/* Find a page of the appropriate size in the preferred list */
+ 	for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+-		area = zone->free_area + current_order;
+-		if (list_empty(&area->free_list))
++		area = &(zone->free_area[current_order]);
++		if (list_empty(&area->free_list[migratetype]))
+ 			continue;
+ 
+-		page = list_entry(area->free_list.next, struct page, lru);
++		page = list_entry(area->free_list[migratetype].next,
++							struct page, lru);
+ 		list_del(&page->lru);
+ 		rmv_page_order(page);
+ 		area->nr_free--;
+ 		__mod_zone_page_state(zone, NR_FREE_PAGES, - (1UL << order));
+-		expand(zone, page, order, current_order, area);
+-		return page;
++		expand(zone, page, order, current_order, area, migratetype);
++		goto got_page;
+ 	}
+ 
+-	return NULL;
++	page = __rmqueue_fallback(zone, order, migratetype);
++
++got_page:
++
++	return page;
+ }
+ 
+ /* 
+@@ -661,16 +839,18 @@
+  * Returns the number of new pages which were placed at *list.
+  */
+ static int rmqueue_bulk(struct zone *zone, unsigned int order, 
+-			unsigned long count, struct list_head *list)
++			unsigned long count, struct list_head *list,
++			int migratetype)
+ {
+ 	int i;
+ 	
+ 	spin_lock(&zone->lock);
+ 	for (i = 0; i < count; ++i) {
+-		struct page *page = __rmqueue(zone, order);
++		struct page *page = __rmqueue(zone, order, migratetype);
+ 		if (unlikely(page == NULL))
+ 			break;
+-		list_add_tail(&page->lru, list);
++		list_add(&page->lru, list);
++		set_page_private(page, migratetype);
+ 	}
+ 	spin_unlock(&zone->lock);
+ 	return i;
+@@ -732,7 +912,7 @@
+ {
+ 	unsigned long pfn, max_zone_pfn;
+ 	unsigned long flags;
+-	int order;
++	int order, t;
+ 	struct list_head *curr;
+ 
+ 	if (!zone->spanned_pages)
+@@ -749,15 +929,15 @@
+ 				swsusp_unset_page_free(page);
+ 		}
+ 
+-	for (order = MAX_ORDER - 1; order >= 0; --order)
+-		list_for_each(curr, &zone->free_area[order].free_list) {
++	for_each_migratetype_order(order, t) {
++		list_for_each(curr, &zone->free_area[order].free_list[t]) {
+ 			unsigned long i;
+ 
+ 			pfn = page_to_pfn(list_entry(curr, struct page, lru));
+ 			for (i = 0; i < (1UL << order); i++)
+ 				swsusp_set_page_free(pfn_to_page(pfn + i));
+ 		}
+-
++	}
+ 	spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ 
+@@ -797,6 +977,7 @@
+ 	local_irq_save(flags);
+ 	__count_vm_event(PGFREE);
+ 	list_add(&page->lru, &pcp->list);
++	set_page_private(page, get_pageblock_migratetype(page));
+ 	pcp->count++;
+ 	if (pcp->count >= pcp->high) {
+ 		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+@@ -846,6 +1027,7 @@
+ 	struct page *page;
+ 	int cold = !!(gfp_flags & __GFP_COLD);
+ 	int cpu;
++	int migratetype = gfpflags_to_migratetype(gfp_flags);
+ 
+ again:
+ 	cpu  = get_cpu();
+@@ -856,16 +1038,32 @@
+ 		local_irq_save(flags);
+ 		if (!pcp->count) {
+ 			pcp->count = rmqueue_bulk(zone, 0,
+-						pcp->batch, &pcp->list);
++					pcp->batch, &pcp->list, migratetype);
+ 			if (unlikely(!pcp->count))
+ 				goto failed;
+ 		}
++
++#ifdef CONFIG_PAGE_GROUP_BY_MOBILITY
++		/* Find a page of the appropriate migrate type */
++		list_for_each_entry(page, &pcp->list, lru)
++			if (page_private(page) == migratetype)
++				break;
++
++		/* Allocate more to the pcp list if necessary */
++		if (unlikely(&page->lru == &pcp->list)) {
++			pcp->count += rmqueue_bulk(zone, 0,
++					pcp->batch, &pcp->list, migratetype);
++			page = list_entry(pcp->list.next, struct page, lru);
++		}
++#else
+ 		page = list_entry(pcp->list.next, struct page, lru);
++#endif /* CONFIG_PAGE_GROUP_BY_MOBILITY */
++
+ 		list_del(&page->lru);
+ 		pcp->count--;
+ 	} else {
+ 		spin_lock_irqsave(&zone->lock, flags);
+-		page = __rmqueue(zone, order);
++		page = __rmqueue(zone, order, migratetype);
+ 		spin_unlock(&zone->lock);
+ 		if (!page)
+ 			goto failed;
+@@ -1952,6 +2150,16 @@
+ 		init_page_count(page);
+ 		reset_page_mapcount(page);
+ 		SetPageReserved(page);
++
++		/*
++		 * Mark the block movable so that blocks are reserved for
++		 * movable at startup. This will force kernel allocations
++		 * to reserve their blocks rather than leaking throughout
++		 * the address space during boot when many long-lived
++		 * kernel allocations are made
++		 */
++		set_pageblock_migratetype(page, MIGRATE_MOVABLE);
++
+ 		INIT_LIST_HEAD(&page->lru);
+ #ifdef WANT_PAGE_VIRTUAL
+ 		/* The shift won't overflow because ZONE_NORMAL is below 4G. */
+@@ -1964,9 +2172,9 @@
+ void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
+ 				unsigned long size)
+ {
+-	int order;
+-	for (order = 0; order < MAX_ORDER ; order++) {
+-		INIT_LIST_HEAD(&zone->free_area[order].free_list);
++	int order, t;
++	for_each_migratetype_order(order, t) {
++		INIT_LIST_HEAD(&zone->free_area[order].free_list[t]);
+ 		zone->free_area[order].nr_free = 0;
+ 	}
+ }
+@@ -2584,6 +2792,41 @@
+ 							realtotalpages);
+ }
+ 
++#ifndef CONFIG_SPARSEMEM
++/*
++ * Calculate the size of the zone->blockflags rounded to an unsigned long
++ * Start by making sure zonesize is a multiple of MAX_ORDER-1 by rounding up
++ * Then figure 1 NR_PAGEBLOCK_BITS worth of bits per MAX_ORDER-1, finally
++ * round what is now in bits to nearest long in bits, then return it in
++ * bytes.
++ */
++static unsigned long __init usemap_size(unsigned long zonesize)
++{
++	unsigned long usemapsize;
++
++	usemapsize = roundup(zonesize, MAX_ORDER_NR_PAGES);
++	usemapsize = usemapsize >> (MAX_ORDER-1);
++	usemapsize *= NR_PAGEBLOCK_BITS;
++	usemapsize = roundup(usemapsize, 8 * sizeof(unsigned long));
++
++	return usemapsize / 8;
++}
++
++static void __init setup_usemap(struct pglist_data *pgdat,
++				struct zone *zone, unsigned long zonesize)
++{
++	unsigned long usemapsize = usemap_size(zonesize);
++	zone->pageblock_flags = NULL;
++	if (usemapsize) {
++		zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize);
++		memset(zone->pageblock_flags, 0, usemapsize);
++	}
++}
++#else
++static void inline setup_usemap(struct pglist_data *pgdat,
++				struct zone *zone, unsigned long zonesize) {}
++#endif /* CONFIG_SPARSEMEM */
++
+ /*
+  * Set up the zone data structures:
+  *   - mark all pages reserved
+@@ -2664,6 +2907,7 @@
+ 		if (!size)
+ 			continue;
+ 
++		setup_usemap(pgdat, zone, size);
+ 		ret = init_currently_empty_zone(zone, zone_start_pfn,
+ 						size, MEMMAP_EARLY);
+ 		BUG_ON(ret);
+@@ -3363,6 +3607,21 @@
+ 			for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++)
+ 				;
+ 			table = (void*) __get_free_pages(GFP_ATOMIC, order);
++			/*
++			 * If bucketsize is not a power-of-two, we may free
++			 * some pages at the end of hash table.
++			 */
++			if (table) {
++				unsigned long alloc_end = (unsigned long)table +
++						(PAGE_SIZE << order);
++				unsigned long used = (unsigned long)table +
++						PAGE_ALIGN(size);
++				split_page(virt_to_page(table), order);
++				while (used < alloc_end) {
++					free_page(used);
++					used += PAGE_SIZE;
++				}
++			}
+ 		}
+ 	} while (!table && size > PAGE_SIZE && --log2qty);
+ 
+@@ -3396,4 +3655,79 @@
+ EXPORT_SYMBOL(page_to_pfn);
+ #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
+ 
++/* Return a pointer to the bitmap storing bits affecting a block of pages */
++static inline unsigned long *get_pageblock_bitmap(struct zone *zone,
++							unsigned long pfn)
++{
++#ifdef CONFIG_SPARSEMEM
++	return __pfn_to_section(pfn)->pageblock_flags;
++#else
++	return zone->pageblock_flags;
++#endif /* CONFIG_SPARSEMEM */
++}
+ 
++static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
++{
++#ifdef CONFIG_SPARSEMEM
++	pfn &= (PAGES_PER_SECTION-1);
++	return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
++#else
++	pfn = pfn - zone->zone_start_pfn;
++	return (pfn >> (MAX_ORDER-1)) * NR_PAGEBLOCK_BITS;
++#endif /* CONFIG_SPARSEMEM */
++}
++
++/**
++ * get_pageblock_flags_group - Return the requested group of flags for the MAX_ORDER_NR_PAGES block of pages
++ * @page: The page within the block of interest
++ * @start_bitidx: The first bit of interest to retrieve
++ * @end_bitidx: The last bit of interest
++ * returns pageblock_bits flags
++ */
++unsigned long get_pageblock_flags_group(struct page *page,
++					int start_bitidx, int end_bitidx)
++{
++	struct zone *zone;
++	unsigned long *bitmap;
++	unsigned long pfn, bitidx;
++	unsigned long flags = 0;
++	unsigned long value = 1;
++
++	zone = page_zone(page);
++	pfn = page_to_pfn(page);
++	bitmap = get_pageblock_bitmap(zone, pfn);
++	bitidx = pfn_to_bitidx(zone, pfn);
++
++	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
++		if (test_bit(bitidx + start_bitidx, bitmap))
++			flags |= value;
++
++	return flags;
++}
++
++/**
++ * set_pageblock_flags_group - Set the requested group of flags for a MAX_ORDER_NR_PAGES block of pages
++ * @page: The page within the block of interest
++ * @start_bitidx: The first bit of interest
++ * @end_bitidx: The last bit of interest
++ * @flags: The flags to set
++ */
++void set_pageblock_flags_group(struct page *page, unsigned long flags,
++					int start_bitidx, int end_bitidx)
++{
++	struct zone *zone;
++	unsigned long *bitmap;
++	unsigned long pfn, bitidx;
++	unsigned long value = 1;
++
++	zone = page_zone(page);
++	pfn = page_to_pfn(page);
++	bitmap = get_pageblock_bitmap(zone, pfn);
++	bitidx = pfn_to_bitidx(zone, pfn);
++
++	for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
++		if (flags & value)
++			__set_bit(bitidx + start_bitidx, bitmap);
++		else
++			__clear_bit(bitidx + start_bitidx, bitmap);
++}
+diff -Nurb linux-2.6.22-570/mm/pdflush.c linux-2.6.22-try2/mm/pdflush.c
+--- linux-2.6.22-570/mm/pdflush.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/pdflush.c	2007-12-19 15:29:24.000000000 -0500
+@@ -92,6 +92,7 @@
+ static int __pdflush(struct pdflush_work *my_work)
+ {
+ 	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
++	set_freezable();
+ 	my_work->fn = NULL;
+ 	my_work->who = current;
+ 	INIT_LIST_HEAD(&my_work->list);
+diff -Nurb linux-2.6.22-570/mm/rmap.c linux-2.6.22-try2/mm/rmap.c
+--- linux-2.6.22-570/mm/rmap.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/rmap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -622,8 +622,10 @@
+ 			printk (KERN_EMERG "  page->count = %x\n", page_count(page));
+ 			printk (KERN_EMERG "  page->mapping = %p\n", page->mapping);
+ 			print_symbol (KERN_EMERG "  vma->vm_ops = %s\n", (unsigned long)vma->vm_ops);
+-			if (vma->vm_ops)
++			if (vma->vm_ops) {
+ 				print_symbol (KERN_EMERG "  vma->vm_ops->nopage = %s\n", (unsigned long)vma->vm_ops->nopage);
++				print_symbol (KERN_EMERG "  vma->vm_ops->fault = %s\n", (unsigned long)vma->vm_ops->fault);
++			}
+ 			if (vma->vm_file && vma->vm_file->f_op)
+ 				print_symbol (KERN_EMERG "  vma->vm_file->f_op->mmap = %s\n", (unsigned long)vma->vm_file->f_op->mmap);
+ 			BUG();
+diff -Nurb linux-2.6.22-570/mm/shmem.c linux-2.6.22-try2/mm/shmem.c
+--- linux-2.6.22-570/mm/shmem.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/shmem.c	2007-12-19 15:29:24.000000000 -0500
+@@ -81,6 +81,7 @@
+ 	SGP_READ,	/* don't exceed i_size, don't allocate page */
+ 	SGP_CACHE,	/* don't exceed i_size, may allocate page */
+ 	SGP_WRITE,	/* may exceed i_size, may allocate page */
++	SGP_FAULT,	/* same as SGP_CACHE, return with page locked */
+ };
+ 
+ static int shmem_getpage(struct inode *inode, unsigned long idx,
+@@ -92,8 +93,11 @@
+ 	 * The above definition of ENTRIES_PER_PAGE, and the use of
+ 	 * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
+ 	 * might be reconsidered if it ever diverges from PAGE_SIZE.
++	 *
++	 * Mobility flags are masked out as swap vectors cannot move
+ 	 */
+-	return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
++	return alloc_pages((gfp_mask & ~GFP_MOVABLE_MASK) | __GFP_ZERO,
++				PAGE_CACHE_SHIFT-PAGE_SHIFT);
+ }
+ 
+ static inline void shmem_dir_free(struct page *page)
+@@ -371,7 +375,7 @@
+ 		}
+ 
+ 		spin_unlock(&info->lock);
+-		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
++		page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
+ 		if (page)
+ 			set_page_private(page, 0);
+ 		spin_lock(&info->lock);
+@@ -1110,6 +1114,10 @@
+ 
+ 	if (idx >= SHMEM_MAX_INDEX)
+ 		return -EFBIG;
++
++	if (type)
++		*type = VM_FAULT_MINOR;
++
+ 	/*
+ 	 * Normally, filepage is NULL on entry, and either found
+ 	 * uptodate immediately, or allocated and zeroed, or read
+@@ -1299,8 +1307,10 @@
+ 	}
+ done:
+ 	if (*pagep != filepage) {
+-		unlock_page(filepage);
+ 		*pagep = filepage;
++		if (sgp != SGP_FAULT)
++			unlock_page(filepage);
++
+ 	}
+ 	return 0;
+ 
+@@ -1312,72 +1322,29 @@
+ 	return error;
+ }
+ 
+-static struct page *shmem_nopage(struct vm_area_struct *vma,
+-				 unsigned long address, int *type)
++static struct page *shmem_fault(struct vm_area_struct *vma,
++					struct fault_data *fdata)
+ {
+ 	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ 	struct page *page = NULL;
+-	unsigned long idx;
+ 	int error;
+ 
+-	idx = (address - vma->vm_start) >> PAGE_SHIFT;
+-	idx += vma->vm_pgoff;
+-	idx >>= PAGE_CACHE_SHIFT - PAGE_SHIFT;
+-	if (((loff_t) idx << PAGE_CACHE_SHIFT) >= i_size_read(inode))
+-		return NOPAGE_SIGBUS;
+-
+-	error = shmem_getpage(inode, idx, &page, SGP_CACHE, type);
+-	if (error)
+-		return (error == -ENOMEM)? NOPAGE_OOM: NOPAGE_SIGBUS;
+-
+-	mark_page_accessed(page);
+-	return page;
+-}
+-
+-static int shmem_populate(struct vm_area_struct *vma,
+-	unsigned long addr, unsigned long len,
+-	pgprot_t prot, unsigned long pgoff, int nonblock)
+-{
+-	struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+-	struct mm_struct *mm = vma->vm_mm;
+-	enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
+-	unsigned long size;
+-
+-	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
+-		return -EINVAL;
++	BUG_ON(!(vma->vm_flags & VM_CAN_INVALIDATE));
+ 
+-	while ((long) len > 0) {
+-		struct page *page = NULL;
+-		int err;
+-		/*
+-		 * Will need changing if PAGE_CACHE_SIZE != PAGE_SIZE
+-		 */
+-		err = shmem_getpage(inode, pgoff, &page, sgp, NULL);
+-		if (err)
+-			return err;
+-		/* Page may still be null, but only if nonblock was set. */
+-		if (page) {
+-			mark_page_accessed(page);
+-			err = install_page(mm, vma, addr, page, prot);
+-			if (err) {
+-				page_cache_release(page);
+-				return err;
+-			}
+-		} else if (vma->vm_flags & VM_NONLINEAR) {
+-			/* No page was found just because we can't read it in
+-			 * now (being here implies nonblock != 0), but the page
+-			 * may exist, so set the PTE to fault it in later. */
+-    			err = install_file_pte(mm, vma, addr, pgoff, prot);
+-			if (err)
+-	    			return err;
++	if (((loff_t)fdata->pgoff << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
++		fdata->type = VM_FAULT_SIGBUS;
++		return NULL;
+ 		}
+ 
+-		len -= PAGE_SIZE;
+-		addr += PAGE_SIZE;
+-		pgoff++;
++	error = shmem_getpage(inode, fdata->pgoff, &page,
++						SGP_FAULT, &fdata->type);
++	if (error) {
++		fdata->type = ((error == -ENOMEM)?VM_FAULT_OOM:VM_FAULT_SIGBUS);
++		return NULL;
+ 	}
+-	return 0;
++
++	mark_page_accessed(page);
++	return page;
+ }
+ 
+ #ifdef CONFIG_NUMA
+@@ -1424,6 +1391,7 @@
+ {
+ 	file_accessed(file);
+ 	vma->vm_ops = &shmem_vm_ops;
++	vma->vm_flags |= VM_CAN_INVALIDATE | VM_CAN_NONLINEAR;
+ 	return 0;
+ }
+ 
+@@ -2477,8 +2445,7 @@
+ };
+ 
+ static struct vm_operations_struct shmem_vm_ops = {
+-	.nopage		= shmem_nopage,
+-	.populate	= shmem_populate,
++	.fault		= shmem_fault,
+ #ifdef CONFIG_NUMA
+ 	.set_policy     = shmem_set_policy,
+ 	.get_policy     = shmem_get_policy,
+@@ -2614,5 +2581,6 @@
+ 		fput(vma->vm_file);
+ 	vma->vm_file = file;
+ 	vma->vm_ops = &shmem_vm_ops;
++	vma->vm_flags |= VM_CAN_INVALIDATE;
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/mm/slab.c linux-2.6.22-try2/mm/slab.c
+--- linux-2.6.22-570/mm/slab.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/mm/slab.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1639,6 +1639,8 @@
+ #endif
+ 
+ 	flags |= cachep->gfpflags;
++	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
++		flags |= __GFP_RECLAIMABLE;
+ 
+ 	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
+ 	if (!page)
+diff -Nurb linux-2.6.22-570/mm/slub.c linux-2.6.22-try2/mm/slub.c
+--- linux-2.6.22-570/mm/slub.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/mm/slub.c	2007-12-19 15:29:24.000000000 -0500
+@@ -985,6 +985,9 @@
+ 	if (s->flags & SLAB_CACHE_DMA)
+ 		flags |= SLUB_DMA;
+ 
++	if (s->flags & SLAB_RECLAIM_ACCOUNT)
++		flags |= __GFP_RECLAIMABLE;
++
+ 	if (node == -1)
+ 		page = alloc_pages(flags, s->order);
+ 	else
+@@ -1989,6 +1992,7 @@
+ #ifdef CONFIG_NUMA
+ 	s->defrag_ratio = 100;
+ #endif
++	raise_kswapd_order(s->order);
+ 
+ 	if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA))
+ 		return 1;
+diff -Nurb linux-2.6.22-570/mm/swap_state.c linux-2.6.22-try2/mm/swap_state.c
+--- linux-2.6.22-570/mm/swap_state.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/swap_state.c	2007-12-19 15:29:24.000000000 -0500
+@@ -334,7 +334,8 @@
+ 		 * Get a new page to read into from swap.
+ 		 */
+ 		if (!new_page) {
+-			new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
++			new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE,
++								vma, addr);
+ 			if (!new_page)
+ 				break;		/* Out of memory */
+ 		}
+diff -Nurb linux-2.6.22-570/mm/truncate.c linux-2.6.22-try2/mm/truncate.c
+--- linux-2.6.22-570/mm/truncate.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/truncate.c	2007-12-19 15:29:24.000000000 -0500
+@@ -82,7 +82,7 @@
+ /*
+  * If truncate cannot remove the fs-private metadata from the page, the page
+  * becomes anonymous.  It will be left on the LRU and may even be mapped into
+- * user pagetables if we're racing with filemap_nopage().
++ * user pagetables if we're racing with filemap_fault().
+  *
+  * We need to bale out if page->mapping is no longer equal to the original
+  * mapping.  This happens a) when the VM reclaimed the page while we waited on
+@@ -192,6 +192,11 @@
+ 				unlock_page(page);
+ 				continue;
+ 			}
++			if (page_mapped(page)) {
++				unmap_mapping_range(mapping,
++				  (loff_t)page_index<<PAGE_CACHE_SHIFT,
++				  PAGE_CACHE_SIZE, 0);
++			}
+ 			truncate_complete_page(mapping, page);
+ 			unlock_page(page);
+ 		}
+@@ -229,6 +234,11 @@
+ 				break;
+ 			lock_page(page);
+ 			wait_on_page_writeback(page);
++			if (page_mapped(page)) {
++				unmap_mapping_range(mapping,
++				  (loff_t)page->index<<PAGE_CACHE_SHIFT,
++				  PAGE_CACHE_SIZE, 0);
++			}
+ 			if (page->index > next)
+ 				next = page->index;
+ 			next++;
+@@ -397,7 +407,7 @@
+ 				break;
+ 			}
+ 			wait_on_page_writeback(page);
+-			while (page_mapped(page)) {
++			if (page_mapped(page)) {
+ 				if (!did_range_unmap) {
+ 					/*
+ 					 * Zap the rest of the file in one hit.
+@@ -417,6 +427,7 @@
+ 					  PAGE_CACHE_SIZE, 0);
+ 				}
+ 			}
++			BUG_ON(page_mapped(page));
+ 			ret = do_launder_page(mapping, page);
+ 			if (ret == 0 && !invalidate_complete_page2(mapping, page))
+ 				ret = -EIO;
+diff -Nurb linux-2.6.22-570/mm/util.c linux-2.6.22-try2/mm/util.c
+--- linux-2.6.22-570/mm/util.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/util.c	2007-12-19 15:29:23.000000000 -0500
+@@ -18,9 +18,8 @@
+ }
+ EXPORT_SYMBOL(__kzalloc);
+ 
+-/*
++/**
+  * kstrdup - allocate space for and copy an existing string
+- *
+  * @s: the string to duplicate
+  * @gfp: the GFP mask used in the kmalloc() call when allocating memory
+  */
+@@ -41,6 +40,32 @@
+ EXPORT_SYMBOL(kstrdup);
+ 
+ /**
++ * kstrndup - allocate space for and copy an existing string
++ * @s: the string to duplicate
++ * @max: read at most @max chars from @s
++ * @gfp: the GFP mask used in the kmalloc() call when allocating memory
++ */
++char *kstrndup(const char *s, size_t max, gfp_t gfp)
++{
++	size_t len;
++	char *buf;
++
++	if (!s)
++		return NULL;
++
++	len = strlen(s);
++	if (len > max)
++		len = max;
++	buf = kmalloc_track_caller(len+1, gfp);
++ 	if (buf) {
++		memcpy(buf, s, len);
++		buf[len] = '\0';
++	}
++	return buf;
++}
++EXPORT_SYMBOL(kstrndup);
++
++/**
+  * kmemdup - duplicate region of memory
+  *
+  * @src: memory region to duplicate
+@@ -60,7 +85,6 @@
+ 
+ /*
+  * strndup_user - duplicate an existing string from user space
+- *
+  * @s: The string to duplicate
+  * @n: Maximum number of bytes to copy, including the trailing NUL.
+  */
+diff -Nurb linux-2.6.22-570/mm/vmalloc.c linux-2.6.22-try2/mm/vmalloc.c
+--- linux-2.6.22-570/mm/vmalloc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/vmalloc.c	2007-12-19 15:29:22.000000000 -0500
+@@ -68,12 +68,12 @@
+ 	} while (pud++, addr = next, addr != end);
+ }
+ 
+-void unmap_vm_area(struct vm_struct *area)
++void unmap_kernel_range(unsigned long addr, unsigned long size)
+ {
+ 	pgd_t *pgd;
+ 	unsigned long next;
+-	unsigned long addr = (unsigned long) area->addr;
+-	unsigned long end = addr + area->size;
++	unsigned long start = addr;
++	unsigned long end = addr + size;
+ 
+ 	BUG_ON(addr >= end);
+ 	pgd = pgd_offset_k(addr);
+@@ -84,7 +84,12 @@
+ 			continue;
+ 		vunmap_pud_range(pgd, addr, next);
+ 	} while (pgd++, addr = next, addr != end);
+-	flush_tlb_kernel_range((unsigned long) area->addr, end);
++	flush_tlb_kernel_range(start, end);
++}
++
++static void unmap_vm_area(struct vm_struct *area)
++{
++	unmap_kernel_range((unsigned long)area->addr, area->size);
+ }
+ 
+ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+diff -Nurb linux-2.6.22-570/mm/vmscan.c linux-2.6.22-try2/mm/vmscan.c
+--- linux-2.6.22-570/mm/vmscan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/mm/vmscan.c	2007-12-19 15:29:24.000000000 -0500
+@@ -1272,6 +1272,34 @@
+ 	return nr_reclaimed;
+ }
+ 
++static unsigned int kswapd_min_order __read_mostly;
++
++static inline int kswapd_order(unsigned int order)
++{
++	return max(kswapd_min_order, order);
++}
++
++/**
++ * raise_kswapd_order - Raise the minimum order that kswapd reclaims
++ * @order: The minimum order kswapd should reclaim at
++ *
++ * kswapd normally reclaims at order 0 unless there is a higher-order
++ * allocation being serviced. This function is used to set the minimum
++ * order that kswapd reclaims at when it is known there will be regular
++ * high-order allocations at a given order.
++ */
++void raise_kswapd_order(unsigned int order)
++{
++	if (order >= MAX_ORDER)
++		return;
++
++	/* Update order if necessary and inform if changed */
++	if (order > kswapd_min_order) {
++		kswapd_min_order = order;
++		printk(KERN_INFO "kswapd reclaim order set to %d\n", order);
++	}
++}
++
+ /*
+  * The background pageout daemon, started as a kernel thread
+  * from the init process. 
+@@ -1314,13 +1342,14 @@
+ 	 * trying to free the first piece of memory in the first place).
+ 	 */
+ 	tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
++	set_freezable();
+ 
+-	order = 0;
++	order = kswapd_order(0);
+ 	for ( ; ; ) {
+ 		unsigned long new_order;
+ 
+ 		prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
+-		new_order = pgdat->kswapd_max_order;
++		new_order = kswapd_order(pgdat->kswapd_max_order);
+ 		pgdat->kswapd_max_order = 0;
+ 		if (order < new_order) {
+ 			/*
+@@ -1332,7 +1361,7 @@
+ 			if (!freezing(current))
+ 				schedule();
+ 
+-			order = pgdat->kswapd_max_order;
++			order = kswapd_order(pgdat->kswapd_max_order);
+ 		}
+ 		finish_wait(&pgdat->kswapd_wait, &wait);
+ 
+diff -Nurb linux-2.6.22-570/net/8021q/Makefile linux-2.6.22-try2/net/8021q/Makefile
+--- linux-2.6.22-570/net/8021q/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/8021q/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -4,7 +4,7 @@
+ 
+ obj-$(CONFIG_VLAN_8021Q) += 8021q.o
+ 
+-8021q-objs := vlan.o vlan_dev.o
++8021q-objs := vlan.o vlan_dev.o vlan_netlink.o
+ 
+ ifeq ($(CONFIG_PROC_FS),y)
+ 8021q-objs += vlanproc.o
+diff -Nurb linux-2.6.22-570/net/8021q/vlan.c linux-2.6.22-try2/net/8021q/vlan.c
+--- linux-2.6.22-570/net/8021q/vlan.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/8021q/vlan.c	2007-12-19 15:29:23.000000000 -0500
+@@ -97,15 +97,22 @@
+ 
+ 	/* Register us to receive netdevice events */
+ 	err = register_netdevice_notifier(&vlan_notifier_block);
+-	if (err < 0) {
+-		dev_remove_pack(&vlan_packet_type);
+-		vlan_proc_cleanup();
+-		return err;
+-	}
++	if (err < 0)
++		goto err1;
+ 
+-	vlan_ioctl_set(vlan_ioctl_handler);
++	err = vlan_netlink_init();
++	if (err < 0)
++		goto err2;
+ 
++	vlan_ioctl_set(vlan_ioctl_handler);
+ 	return 0;
++
++err2:
++	unregister_netdevice_notifier(&vlan_notifier_block);
++err1:
++	vlan_proc_cleanup();
++	dev_remove_pack(&vlan_packet_type);
++	return err;
+ }
+ 
+ /* Cleanup all vlan devices
+@@ -136,6 +143,7 @@
+ {
+ 	int i;
+ 
++	vlan_netlink_fini();
+ 	vlan_ioctl_set(NULL);
+ 
+ 	/* Un-register us from receiving netdevice events */
+@@ -197,6 +205,34 @@
+ 	kfree(grp);
+ }
+ 
++static struct vlan_group *vlan_group_alloc(int ifindex)
++{
++	struct vlan_group *grp;
++	unsigned int size;
++	unsigned int i;
++
++	grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
++	if (!grp)
++		return NULL;
++
++	size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN;
++
++	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
++		grp->vlan_devices_arrays[i] = kzalloc(size, GFP_KERNEL);
++		if (!grp->vlan_devices_arrays[i])
++			goto err;
++	}
++
++	grp->real_dev_ifindex = ifindex;
++	hlist_add_head_rcu(&grp->hlist,
++			   &vlan_group_hash[vlan_grp_hashfn(ifindex)]);
++	return grp;
++
++err:
++	vlan_group_free(grp);
++	return NULL;
++}
++
+ static void vlan_rcu_free(struct rcu_head *rcu)
+ {
+ 	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
+@@ -278,47 +314,62 @@
+ 	return ret;
+ }
+ 
+-static int unregister_vlan_device(const char *vlan_IF_name)
++int unregister_vlan_device(struct net_device *dev)
+ {
+-	struct net_device *dev = NULL;
+ 	int ret;
+ 
+-
+-	dev = dev_get_by_name(vlan_IF_name);
+-	ret = -EINVAL;
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+-			rtnl_lock();
+-
+ 			ret = unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+ 						  VLAN_DEV_INFO(dev)->vlan_id);
+-
+-			dev_put(dev);
+ 			unregister_netdevice(dev);
+ 
+-			rtnl_unlock();
+-
+ 			if (ret == 1)
+ 				ret = 0;
++	return ret;
++}
++
++/*
++ * vlan network devices have devices nesting below it, and are a special
++ * "super class" of normal network devices; split their locks off into a
++ * separate class since they always nest.
++ */
++static struct lock_class_key vlan_netdev_xmit_lock_key;
++
++static int vlan_dev_init(struct net_device *dev)
++{
++	struct net_device *real_dev = VLAN_DEV_INFO(dev)->real_dev;
++
++	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
++	dev->flags  = real_dev->flags & ~IFF_UP;
++	dev->iflink = real_dev->ifindex;
++	dev->state  = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
++					  (1<<__LINK_STATE_DORMANT))) |
++		      (1<<__LINK_STATE_PRESENT);
++
++	/* TODO: maybe just assign it to be ETHERNET? */
++	dev->type = real_dev->type;
++
++	memcpy(dev->broadcast, real_dev->broadcast, real_dev->addr_len);
++	memcpy(dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
++	dev->addr_len = real_dev->addr_len;
++
++	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
++		dev->hard_header     = real_dev->hard_header;
++		dev->hard_header_len = real_dev->hard_header_len;
++		dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
++		dev->rebuild_header  = real_dev->rebuild_header;
+ 		} else {
+-			printk(VLAN_ERR
+-			       "%s: ERROR:	Tried to remove a non-vlan device "
+-			       "with VLAN code, name: %s  priv_flags: %hX\n",
+-			       __FUNCTION__, dev->name, dev->priv_flags);
+-			dev_put(dev);
+-			ret = -EPERM;
+-		}
+-	} else {
+-#ifdef VLAN_DEBUG
+-		printk(VLAN_DBG "%s: WARNING: Could not find dev.\n", __FUNCTION__);
+-#endif
+-		ret = -EINVAL;
++		dev->hard_header     = vlan_dev_hard_header;
++		dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN;
++		dev->hard_start_xmit = vlan_dev_hard_start_xmit;
++		dev->rebuild_header  = vlan_dev_rebuild_header;
+ 	}
++	dev->hard_header_parse = real_dev->hard_header_parse;
+ 
+-	return ret;
++	lockdep_set_class(&dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
++	return 0;
+ }
+ 
+-static void vlan_setup(struct net_device *new_dev)
++void vlan_setup(struct net_device *new_dev)
+ {
+ 	SET_MODULE_OWNER(new_dev);
+ 
+@@ -338,6 +389,7 @@
+ 
+ 	/* set up method calls */
+ 	new_dev->change_mtu = vlan_dev_change_mtu;
++	new_dev->init = vlan_dev_init;
+ 	new_dev->open = vlan_dev_open;
+ 	new_dev->stop = vlan_dev_stop;
+ 	new_dev->set_mac_address = vlan_dev_set_mac_address;
+@@ -366,77 +418,110 @@
+ 	}
+ }
+ 
+-/*
+- * vlan network devices have devices nesting below it, and are a special
+- * "super class" of normal network devices; split their locks off into a
+- * separate class since they always nest.
+- */
+-static struct lock_class_key vlan_netdev_xmit_lock_key;
+-
+-
+-/*  Attach a VLAN device to a mac address (ie Ethernet Card).
+- *  Returns the device that was created, or NULL if there was
+- *  an error of some kind.
+- */
+-static struct net_device *register_vlan_device(const char *eth_IF_name,
+-					       unsigned short VLAN_ID)
++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id)
+ {
+-	struct vlan_group *grp;
+-	struct net_device *new_dev;
+-	struct net_device *real_dev; /* the ethernet device */
+-	char name[IFNAMSIZ];
+-	int i;
+-
+-#ifdef VLAN_DEBUG
+-	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
+-		__FUNCTION__, eth_IF_name, VLAN_ID);
+-#endif
+-
+-	if (VLAN_ID >= VLAN_VID_MASK)
+-		goto out_ret_null;
+-
+-	/* find the device relating to eth_IF_name. */
+-	real_dev = dev_get_by_name(eth_IF_name);
+-	if (!real_dev)
+-		goto out_ret_null;
+-
+ 	if (real_dev->features & NETIF_F_VLAN_CHALLENGED) {
+ 		printk(VLAN_DBG "%s: VLANs not supported on %s.\n",
+ 			__FUNCTION__, real_dev->name);
+-		goto out_put_dev;
++		return -EOPNOTSUPP;
+ 	}
+ 
+ 	if ((real_dev->features & NETIF_F_HW_VLAN_RX) &&
+ 	    !real_dev->vlan_rx_register) {
+ 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+ 			__FUNCTION__, real_dev->name);
+-		goto out_put_dev;
++		return -EOPNOTSUPP;
+ 	}
+ 
+ 	if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
+ 	    (!real_dev->vlan_rx_add_vid || !real_dev->vlan_rx_kill_vid)) {
+ 		printk(VLAN_DBG "%s: Device %s has buggy VLAN hw accel.\n",
+ 			__FUNCTION__, real_dev->name);
+-		goto out_put_dev;
++		return -EOPNOTSUPP;
+ 	}
+ 
+-	/* From this point on, all the data structures must remain
+-	 * consistent.
+-	 */
+-	rtnl_lock();
+-
+ 	/* The real device must be up and operating in order to
+ 	 * assosciate a VLAN device with it.
+ 	 */
+ 	if (!(real_dev->flags & IFF_UP))
+-		goto out_unlock;
++		return -ENETDOWN;
+ 
+-	if (__find_vlan_dev(real_dev, VLAN_ID) != NULL) {
++	if (__find_vlan_dev(real_dev, vlan_id) != NULL) {
+ 		/* was already registered. */
+ 		printk(VLAN_DBG "%s: ALREADY had VLAN registered\n", __FUNCTION__);
+-		goto out_unlock;
++		return -EEXIST;
+ 	}
+ 
++	return 0;
++}
++
++int register_vlan_dev(struct net_device *dev)
++{
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++	struct net_device *real_dev = vlan->real_dev;
++	unsigned short vlan_id = vlan->vlan_id;
++	struct vlan_group *grp, *ngrp = NULL;
++	int err;
++
++	grp = __vlan_find_group(real_dev->ifindex);
++	if (!grp) {
++		ngrp = grp = vlan_group_alloc(real_dev->ifindex);
++		if (!grp)
++			return -ENOBUFS;
++	}
++
++	err = register_netdevice(dev);
++	if (err < 0)
++		goto out_free_group;
++
++	/* Account for reference in struct vlan_dev_info */
++	dev_hold(real_dev);
++
++	vlan_transfer_operstate(real_dev, dev);
++	linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */
++
++	/* So, got the sucker initialized, now lets place
++	 * it into our local structure.
++	 */
++	vlan_group_set_device(grp, vlan_id, dev);
++	if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX)
++		real_dev->vlan_rx_register(real_dev, ngrp);
++	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
++		real_dev->vlan_rx_add_vid(real_dev, vlan_id);
++
++	if (vlan_proc_add_dev(dev) < 0)
++		printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
++		       dev->name);
++	return 0;
++
++out_free_group:
++	if (ngrp)
++		vlan_group_free(ngrp);
++	return err;
++}
++
++/*  Attach a VLAN device to a mac address (ie Ethernet Card).
++ *  Returns 0 if the device was created or a negative error code otherwise.
++ */
++static int register_vlan_device(struct net_device *real_dev,
++				unsigned short VLAN_ID)
++{
++	struct net_device *new_dev;
++	char name[IFNAMSIZ];
++	int err;
++
++#ifdef VLAN_DEBUG
++	printk(VLAN_DBG "%s: if_name -:%s:-	vid: %i\n",
++		__FUNCTION__, eth_IF_name, VLAN_ID);
++#endif
++
++	if (VLAN_ID >= VLAN_VID_MASK)
++		return -ERANGE;
++
++	err = vlan_check_real_dev(real_dev, VLAN_ID);
++	if (err < 0)
++		return err;
++
+ 	/* Gotta set up the fields for the device. */
+ #ifdef VLAN_DEBUG
+ 	printk(VLAN_DBG "About to allocate name, vlan_name_type: %i\n",
+@@ -471,138 +556,40 @@
+ 			       vlan_setup);
+ 
+ 	if (new_dev == NULL)
+-		goto out_unlock;
+-
+-#ifdef VLAN_DEBUG
+-	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
+-#endif
+-	/* IFF_BROADCAST|IFF_MULTICAST; ??? */
+-	new_dev->flags = real_dev->flags;
+-	new_dev->flags &= ~IFF_UP;
+-
+-	new_dev->state = (real_dev->state & ((1<<__LINK_STATE_NOCARRIER) |
+-					     (1<<__LINK_STATE_DORMANT))) |
+-			 (1<<__LINK_STATE_PRESENT);
++		return -ENOBUFS;
+ 
+ 	/* need 4 bytes for extra VLAN header info,
+ 	 * hope the underlying device can handle it.
+ 	 */
+ 	new_dev->mtu = real_dev->mtu;
+ 
+-	/* TODO: maybe just assign it to be ETHERNET? */
+-	new_dev->type = real_dev->type;
+-
+-	new_dev->hard_header_len = real_dev->hard_header_len;
+-	if (!(real_dev->features & NETIF_F_HW_VLAN_TX)) {
+-		/* Regular ethernet + 4 bytes (18 total). */
+-		new_dev->hard_header_len += VLAN_HLEN;
+-	}
+-
++#ifdef VLAN_DEBUG
++	printk(VLAN_DBG "Allocated new name -:%s:-\n", new_dev->name);
+ 	VLAN_MEM_DBG("new_dev->priv malloc, addr: %p  size: %i\n",
+ 		     new_dev->priv,
+ 		     sizeof(struct vlan_dev_info));
+-
+-	memcpy(new_dev->broadcast, real_dev->broadcast, real_dev->addr_len);
+-	memcpy(new_dev->dev_addr, real_dev->dev_addr, real_dev->addr_len);
+-	new_dev->addr_len = real_dev->addr_len;
+-
+-	if (real_dev->features & NETIF_F_HW_VLAN_TX) {
+-		new_dev->hard_header = real_dev->hard_header;
+-		new_dev->hard_start_xmit = vlan_dev_hwaccel_hard_start_xmit;
+-		new_dev->rebuild_header = real_dev->rebuild_header;
+-	} else {
+-		new_dev->hard_header = vlan_dev_hard_header;
+-		new_dev->hard_start_xmit = vlan_dev_hard_start_xmit;
+-		new_dev->rebuild_header = vlan_dev_rebuild_header;
+-	}
+-	new_dev->hard_header_parse = real_dev->hard_header_parse;
++#endif
+ 
+ 	VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
+ 	VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
+ 	VLAN_DEV_INFO(new_dev)->dent = NULL;
+-	VLAN_DEV_INFO(new_dev)->flags = 1;
+-
+-#ifdef VLAN_DEBUG
+-	printk(VLAN_DBG "About to go find the group for idx: %i\n",
+-	       real_dev->ifindex);
+-#endif
++	VLAN_DEV_INFO(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+ 
+-	if (register_netdevice(new_dev))
++	new_dev->rtnl_link_ops = &vlan_link_ops;
++	err = register_vlan_dev(new_dev);
++	if (err < 0)
+ 		goto out_free_newdev;
+ 
+-	lockdep_set_class(&new_dev->_xmit_lock, &vlan_netdev_xmit_lock_key);
+-
+-	new_dev->iflink = real_dev->ifindex;
+-	vlan_transfer_operstate(real_dev, new_dev);
+-	linkwatch_fire_event(new_dev); /* _MUST_ call rfc2863_policy() */
+-
+-	/* So, got the sucker initialized, now lets place
+-	 * it into our local structure.
+-	 */
+-	grp = __vlan_find_group(real_dev->ifindex);
+-
+-	/* Note, we are running under the RTNL semaphore
+-	 * so it cannot "appear" on us.
+-	 */
+-	if (!grp) { /* need to add a new group */
+-		grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
+-		if (!grp)
+-			goto out_free_unregister;
+-
+-		for (i=0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++) {
+-			grp->vlan_devices_arrays[i] = kzalloc(
+-				sizeof(struct net_device *)*VLAN_GROUP_ARRAY_PART_LEN,
+-				GFP_KERNEL);
+-
+-			if (!grp->vlan_devices_arrays[i])
+-				goto out_free_arrays;
+-		}
+-
+-		/* printk(KERN_ALERT "VLAN REGISTER:  Allocated new group.\n"); */
+-		grp->real_dev_ifindex = real_dev->ifindex;
+-
+-		hlist_add_head_rcu(&grp->hlist,
+-				   &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
+-
+-		if (real_dev->features & NETIF_F_HW_VLAN_RX)
+-			real_dev->vlan_rx_register(real_dev, grp);
+-	}
+-
+-	vlan_group_set_device(grp, VLAN_ID, new_dev);
+-
+-	if (vlan_proc_add_dev(new_dev)<0)/* create it's proc entry */
+-		printk(KERN_WARNING "VLAN: failed to add proc entry for %s\n",
+-							 new_dev->name);
+-
+-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
+-		real_dev->vlan_rx_add_vid(real_dev, VLAN_ID);
+-
+-	rtnl_unlock();
+-
+-
++	/* Account for reference in struct vlan_dev_info */
++	dev_hold(real_dev);
+ #ifdef VLAN_DEBUG
+ 	printk(VLAN_DBG "Allocated new device successfully, returning.\n");
+ #endif
+-	return new_dev;
+-
+-out_free_arrays:
+-	vlan_group_free(grp);
+-
+-out_free_unregister:
+-	unregister_netdev(new_dev);
+-	goto out_unlock;
++	return 0;
+ 
+ out_free_newdev:
+ 	free_netdev(new_dev);
+-
+-out_unlock:
+-	rtnl_unlock();
+-
+-out_put_dev:
+-	dev_put(real_dev);
+-
+-out_ret_null:
+-	return NULL;
++	return err;
+ }
+ 
+ static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr)
+@@ -693,9 +680,10 @@
+  */
+ static int vlan_ioctl_handler(void __user *arg)
+ {
+-	int err = 0;
++	int err;
+ 	unsigned short vid = 0;
+ 	struct vlan_ioctl_args args;
++	struct net_device *dev = NULL;
+ 
+ 	if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args)))
+ 		return -EFAULT;
+@@ -708,35 +696,61 @@
+ 	printk(VLAN_DBG "%s: args.cmd: %x\n", __FUNCTION__, args.cmd);
+ #endif
+ 
++	rtnl_lock();
++
++	switch (args.cmd) {
++	case SET_VLAN_INGRESS_PRIORITY_CMD:
++	case SET_VLAN_EGRESS_PRIORITY_CMD:
++	case SET_VLAN_FLAG_CMD:
++	case ADD_VLAN_CMD:
++	case DEL_VLAN_CMD:
++	case GET_VLAN_REALDEV_NAME_CMD:
++	case GET_VLAN_VID_CMD:
++		err = -ENODEV;
++		dev = __dev_get_by_name(args.device1);
++		if (!dev)
++			goto out;
++
++		err = -EINVAL;
++		if (args.cmd != ADD_VLAN_CMD &&
++		    !(dev->priv_flags & IFF_802_1Q_VLAN))
++			goto out;
++	}
++
+ 	switch (args.cmd) {
+ 	case SET_VLAN_INGRESS_PRIORITY_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+-			return -EPERM;
+-		err = vlan_dev_set_ingress_priority(args.device1,
++			break;
++		vlan_dev_set_ingress_priority(dev,
+ 						    args.u.skb_priority,
+ 						    args.vlan_qos);
+ 		break;
+ 
+ 	case SET_VLAN_EGRESS_PRIORITY_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+-			return -EPERM;
+-		err = vlan_dev_set_egress_priority(args.device1,
++			break;
++		err = vlan_dev_set_egress_priority(dev,
+ 						   args.u.skb_priority,
+ 						   args.vlan_qos);
+ 		break;
+ 
+ 	case SET_VLAN_FLAG_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+-			return -EPERM;
+-		err = vlan_dev_set_vlan_flag(args.device1,
++			break;
++		err = vlan_dev_set_vlan_flag(dev,
+ 					     args.u.flag,
+ 					     args.vlan_qos);
+ 		break;
+ 
+ 	case SET_VLAN_NAME_TYPE_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+ 			return -EPERM;
+-		if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) {
++		if ((args.u.name_type >= 0) &&
++		    (args.u.name_type < VLAN_NAME_TYPE_HIGHEST)) {
+ 			vlan_name_type = args.u.name_type;
+ 			err = 0;
+ 		} else {
+@@ -745,26 +759,17 @@
+ 		break;
+ 
+ 	case ADD_VLAN_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+-			return -EPERM;
+-		/* we have been given the name of the Ethernet Device we want to
+-		 * talk to:  args.dev1	 We also have the
+-		 * VLAN ID:  args.u.VID
+-		 */
+-		if (register_vlan_device(args.device1, args.u.VID)) {
+-			err = 0;
+-		} else {
+-			err = -EINVAL;
+-		}
++			break;
++		err = register_vlan_device(dev, args.u.VID);
+ 		break;
+ 
+ 	case DEL_VLAN_CMD:
++		err = -EPERM;
+ 		if (!capable(CAP_NET_ADMIN))
+-			return -EPERM;
+-		/* Here, the args.dev1 is the actual VLAN we want
+-		 * to get rid of.
+-		 */
+-		err = unregister_vlan_device(args.device1);
++			break;
++		err = unregister_vlan_device(dev);
+ 		break;
+ 
+ 	case GET_VLAN_INGRESS_PRIORITY_CMD:
+@@ -788,9 +793,7 @@
+ 		err = -EINVAL;
+ 		break;
+ 	case GET_VLAN_REALDEV_NAME_CMD:
+-		err = vlan_dev_get_realdev_name(args.device1, args.u.device2);
+-		if (err)
+-			goto out;
++		vlan_dev_get_realdev_name(dev, args.u.device2);
+ 		if (copy_to_user(arg, &args,
+ 				 sizeof(struct vlan_ioctl_args))) {
+ 			err = -EFAULT;
+@@ -798,9 +801,7 @@
+ 		break;
+ 
+ 	case GET_VLAN_VID_CMD:
+-		err = vlan_dev_get_vid(args.device1, &vid);
+-		if (err)
+-			goto out;
++		vlan_dev_get_vid(dev, &vid);
+ 		args.u.VID = vid;
+ 		if (copy_to_user(arg, &args,
+ 				 sizeof(struct vlan_ioctl_args))) {
+@@ -812,9 +813,11 @@
+ 		/* pass on to underlying device instead?? */
+ 		printk(VLAN_DBG "%s: Unknown VLAN CMD: %x \n",
+ 			__FUNCTION__, args.cmd);
+-		return -EINVAL;
++		err = -EINVAL;
++		break;
+ 	}
+ out:
++	rtnl_unlock();
+ 	return err;
+ }
+ 
+diff -Nurb linux-2.6.22-570/net/8021q/vlan.h linux-2.6.22-try2/net/8021q/vlan.h
+--- linux-2.6.22-570/net/8021q/vlan.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/8021q/vlan.h	2007-12-19 15:29:23.000000000 -0500
+@@ -62,11 +62,24 @@
+ int vlan_dev_open(struct net_device* dev);
+ int vlan_dev_stop(struct net_device* dev);
+ int vlan_dev_ioctl(struct net_device* dev, struct ifreq *ifr, int cmd);
+-int vlan_dev_set_ingress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+-int vlan_dev_set_egress_priority(char* dev_name, __u32 skb_prio, short vlan_prio);
+-int vlan_dev_set_vlan_flag(char* dev_name, __u32 flag, short flag_val);
+-int vlan_dev_get_realdev_name(const char* dev_name, char* result);
+-int vlan_dev_get_vid(const char* dev_name, unsigned short* result);
++void vlan_dev_set_ingress_priority(const struct net_device *dev,
++				   u32 skb_prio, short vlan_prio);
++int vlan_dev_set_egress_priority(const struct net_device *dev,
++				 u32 skb_prio, short vlan_prio);
++int vlan_dev_set_vlan_flag(const struct net_device *dev,
++			   u32 flag, short flag_val);
++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result);
++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result);
+ void vlan_dev_set_multicast_list(struct net_device *vlan_dev);
+ 
++int vlan_check_real_dev(struct net_device *real_dev, unsigned short vlan_id);
++void vlan_setup(struct net_device *dev);
++int register_vlan_dev(struct net_device *dev);
++int unregister_vlan_device(struct net_device *dev);
++
++int vlan_netlink_init(void);
++void vlan_netlink_fini(void);
++
++extern struct rtnl_link_ops vlan_link_ops;
++
+ #endif /* !(__BEN_VLAN_802_1Q_INC__) */
+diff -Nurb linux-2.6.22-570/net/8021q/vlan_dev.c linux-2.6.22-try2/net/8021q/vlan_dev.c
+--- linux-2.6.22-570/net/8021q/vlan_dev.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/8021q/vlan_dev.c	2007-12-19 15:29:23.000000000 -0500
+@@ -73,7 +73,7 @@
+ 
+ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb)
+ {
+-	if (VLAN_DEV_INFO(skb->dev)->flags & 1) {
++	if (VLAN_DEV_INFO(skb->dev)->flags & VLAN_FLAG_REORDER_HDR) {
+ 		if (skb_shared(skb) || skb_cloned(skb)) {
+ 			struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
+ 			kfree_skb(skb);
+@@ -360,7 +360,8 @@
+ 	 * header shuffling in the hard_start_xmit.  Users can turn off this
+ 	 * REORDER behaviour with the vconfig tool.
+ 	 */
+-	build_vlan_header = ((VLAN_DEV_INFO(dev)->flags & 1) == 0);
++	if (!(VLAN_DEV_INFO(dev)->flags & VLAN_FLAG_REORDER_HDR))
++		build_vlan_header = 1;
+ 
+ 	if (build_vlan_header) {
+ 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
+@@ -544,136 +545,83 @@
+ 	return 0;
+ }
+ 
+-int vlan_dev_set_ingress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
++void vlan_dev_set_ingress_priority(const struct net_device *dev,
++				   u32 skb_prio, short vlan_prio)
+ {
+-	struct net_device *dev = dev_get_by_name(dev_name);
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+ 
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+-			/* see if a priority mapping exists.. */
+-			VLAN_DEV_INFO(dev)->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+-			dev_put(dev);
+-			return 0;
+-		}
++	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
++		vlan->nr_ingress_mappings--;
++	else if (!vlan->ingress_priority_map[vlan_prio & 0x7] && skb_prio)
++		vlan->nr_ingress_mappings++;
+ 
+-		dev_put(dev);
+-	}
+-	return -EINVAL;
++	vlan->ingress_priority_map[vlan_prio & 0x7] = skb_prio;
+ }
+ 
+-int vlan_dev_set_egress_priority(char *dev_name, __u32 skb_prio, short vlan_prio)
++int vlan_dev_set_egress_priority(const struct net_device *dev,
++				 u32 skb_prio, short vlan_prio)
+ {
+-	struct net_device *dev = dev_get_by_name(dev_name);
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
+ 	struct vlan_priority_tci_mapping *mp = NULL;
+ 	struct vlan_priority_tci_mapping *np;
++	u32 vlan_qos = (vlan_prio << 13) & 0xE000;
+ 
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ 			/* See if a priority mapping exists.. */
+-			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
++	mp = vlan->egress_priority_map[skb_prio & 0xF];
+ 			while (mp) {
+ 				if (mp->priority == skb_prio) {
+-					mp->vlan_qos = ((vlan_prio << 13) & 0xE000);
+-					dev_put(dev);
++			if (mp->vlan_qos && !vlan_qos)
++				vlan->nr_egress_mappings--;
++			else if (!mp->vlan_qos && vlan_qos)
++				vlan->nr_egress_mappings++;
++			mp->vlan_qos = vlan_qos;
+ 					return 0;
+ 				}
+ 				mp = mp->next;
+ 			}
+ 
+ 			/* Create a new mapping then. */
+-			mp = VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF];
++	mp = vlan->egress_priority_map[skb_prio & 0xF];
+ 			np = kmalloc(sizeof(struct vlan_priority_tci_mapping), GFP_KERNEL);
+-			if (np) {
++	if (!np)
++		return -ENOBUFS;
++
+ 				np->next = mp;
+ 				np->priority = skb_prio;
+-				np->vlan_qos = ((vlan_prio << 13) & 0xE000);
+-				VLAN_DEV_INFO(dev)->egress_priority_map[skb_prio & 0xF] = np;
+-				dev_put(dev);
++	np->vlan_qos = vlan_qos;
++	vlan->egress_priority_map[skb_prio & 0xF] = np;
++	if (vlan_qos)
++		vlan->nr_egress_mappings++;
+ 				return 0;
+-			} else {
+-				dev_put(dev);
+-				return -ENOBUFS;
+-			}
+-		}
+-		dev_put(dev);
+-	}
+-	return -EINVAL;
+ }
+ 
+-/* Flags are defined in the vlan_dev_info class in include/linux/if_vlan.h file. */
+-int vlan_dev_set_vlan_flag(char *dev_name, __u32 flag, short flag_val)
++/* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
++int vlan_dev_set_vlan_flag(const struct net_device *dev,
++			   u32 flag, short flag_val)
+ {
+-	struct net_device *dev = dev_get_by_name(dev_name);
+-
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ 			/* verify flag is supported */
+-			if (flag == 1) {
++	if (flag == VLAN_FLAG_REORDER_HDR) {
+ 				if (flag_val) {
+-					VLAN_DEV_INFO(dev)->flags |= 1;
++			VLAN_DEV_INFO(dev)->flags |= VLAN_FLAG_REORDER_HDR;
+ 				} else {
+-					VLAN_DEV_INFO(dev)->flags &= ~1;
++			VLAN_DEV_INFO(dev)->flags &= ~VLAN_FLAG_REORDER_HDR;
+ 				}
+-				dev_put(dev);
+ 				return 0;
+-			} else {
+-				printk(KERN_ERR  "%s: flag %i is not valid.\n",
+-					__FUNCTION__, (int)(flag));
+-				dev_put(dev);
+-				return -EINVAL;
+ 			}
+-		} else {
+-			printk(KERN_ERR
+-			       "%s: %s is not a vlan device, priv_flags: %hX.\n",
+-			       __FUNCTION__, dev->name, dev->priv_flags);
+-			dev_put(dev);
+-		}
+-	} else {
+-		printk(KERN_ERR  "%s: Could not find device: %s\n",
+-			__FUNCTION__, dev_name);
+-	}
+-
++	printk(KERN_ERR "%s: flag %i is not valid.\n", __FUNCTION__, flag);
+ 	return -EINVAL;
+ }
+ 
+-
+-int vlan_dev_get_realdev_name(const char *dev_name, char* result)
++void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
+ {
+-	struct net_device *dev = dev_get_by_name(dev_name);
+-	int rv = 0;
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ 			strncpy(result, VLAN_DEV_INFO(dev)->real_dev->name, 23);
+-			rv = 0;
+-		} else {
+-			rv = -EINVAL;
+-		}
+-		dev_put(dev);
+-	} else {
+-		rv = -ENODEV;
+-	}
+-	return rv;
+ }
+ 
+-int vlan_dev_get_vid(const char *dev_name, unsigned short* result)
++void vlan_dev_get_vid(const struct net_device *dev, unsigned short *result)
+ {
+-	struct net_device *dev = dev_get_by_name(dev_name);
+-	int rv = 0;
+-	if (dev) {
+-		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ 			*result = VLAN_DEV_INFO(dev)->vlan_id;
+-			rv = 0;
+-		} else {
+-			rv = -EINVAL;
+-		}
+-		dev_put(dev);
+-	} else {
+-		rv = -ENODEV;
+-	}
+-	return rv;
+ }
+ 
+-
+ int vlan_dev_set_mac_address(struct net_device *dev, void *addr_struct_p)
+ {
+ 	struct sockaddr *addr = (struct sockaddr *)(addr_struct_p);
+diff -Nurb linux-2.6.22-570/net/8021q/vlan_netlink.c linux-2.6.22-try2/net/8021q/vlan_netlink.c
+--- linux-2.6.22-570/net/8021q/vlan_netlink.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/net/8021q/vlan_netlink.c	2007-12-19 15:29:23.000000000 -0500
+@@ -0,0 +1,236 @@
++/*
++ *	VLAN netlink control interface
++ *
++ * 	Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
++ *
++ *	This program is free software; you can redistribute it and/or
++ *	modify it under the terms of the GNU General Public License
++ *	version 2 as published by the Free Software Foundation.
++ */
++
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/if_vlan.h>
++#include <net/netlink.h>
++#include <net/rtnetlink.h>
++#include "vlan.h"
++
++
++static const struct nla_policy vlan_policy[IFLA_VLAN_MAX + 1] = {
++	[IFLA_VLAN_ID]		= { .type = NLA_U16 },
++	[IFLA_VLAN_FLAGS]	= { .len = sizeof(struct ifla_vlan_flags) },
++	[IFLA_VLAN_EGRESS_QOS]	= { .type = NLA_NESTED },
++	[IFLA_VLAN_INGRESS_QOS] = { .type = NLA_NESTED },
++};
++
++static const struct nla_policy vlan_map_policy[IFLA_VLAN_QOS_MAX + 1] = {
++	[IFLA_VLAN_QOS_MAPPING] = { .len = sizeof(struct ifla_vlan_qos_mapping) },
++};
++
++
++static inline int vlan_validate_qos_map(struct nlattr *attr)
++{
++	if (!attr)
++		return 0;
++	return nla_validate_nested(attr, IFLA_VLAN_QOS_MAX, vlan_map_policy);
++}
++
++static int vlan_validate(struct nlattr *tb[], struct nlattr *data[])
++{
++	struct ifla_vlan_flags *flags;
++	u16 id;
++	int err;
++
++	if (!data)
++		return -EINVAL;
++
++	if (data[IFLA_VLAN_ID]) {
++		id = nla_get_u16(data[IFLA_VLAN_ID]);
++		if (id >= VLAN_VID_MASK)
++			return -ERANGE;
++	}
++	if (data[IFLA_VLAN_FLAGS]) {
++		flags = nla_data(data[IFLA_VLAN_FLAGS]);
++		if ((flags->flags & flags->mask) & ~VLAN_FLAG_REORDER_HDR)
++			return -EINVAL;
++	}
++
++	err = vlan_validate_qos_map(data[IFLA_VLAN_INGRESS_QOS]);
++	if (err < 0)
++		return err;
++	err = vlan_validate_qos_map(data[IFLA_VLAN_EGRESS_QOS]);
++	if (err < 0)
++		return err;
++	return 0;
++}
++
++static int vlan_changelink(struct net_device *dev,
++			   struct nlattr *tb[], struct nlattr *data[])
++{
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++	struct ifla_vlan_flags *flags;
++	struct ifla_vlan_qos_mapping *m;
++	struct nlattr *attr;
++	int rem;
++
++	if (data[IFLA_VLAN_FLAGS]) {
++		flags = nla_data(data[IFLA_VLAN_FLAGS]);
++		vlan->flags = (vlan->flags & ~flags->mask) |
++			      (flags->flags & flags->mask);
++	}
++	if (data[IFLA_VLAN_INGRESS_QOS]) {
++		nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
++			m = nla_data(attr);
++			vlan_dev_set_ingress_priority(dev, m->to, m->from);
++		}
++	}
++	if (data[IFLA_VLAN_EGRESS_QOS]) {
++		nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
++			m = nla_data(attr);
++			vlan_dev_set_egress_priority(dev, m->from, m->to);
++		}
++	}
++	return 0;
++}
++
++static int vlan_newlink(struct net_device *dev,
++			struct nlattr *tb[], struct nlattr *data[])
++{
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++	struct net_device *real_dev;
++	int err;
++
++	if (!data[IFLA_VLAN_ID])
++		return -EINVAL;
++
++	if (!tb[IFLA_LINK])
++		return -EINVAL;
++	real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK]));
++	if (!real_dev)
++		return -ENODEV;
++
++	vlan->vlan_id  = nla_get_u16(data[IFLA_VLAN_ID]);
++	vlan->real_dev = real_dev;
++	vlan->flags    = VLAN_FLAG_REORDER_HDR;
++
++	err = vlan_check_real_dev(real_dev, vlan->vlan_id);
++	if (err < 0)
++		return err;
++
++	if (!tb[IFLA_MTU])
++		dev->mtu = real_dev->mtu;
++	else if (dev->mtu > real_dev->mtu)
++		return -EINVAL;
++
++	err = vlan_changelink(dev, tb, data);
++	if (err < 0)
++		return err;
++
++	return register_vlan_dev(dev);
++}
++
++static void vlan_dellink(struct net_device *dev)
++{
++	unregister_vlan_device(dev);
++}
++
++static inline size_t vlan_qos_map_size(unsigned int n)
++{
++	if (n == 0)
++		return 0;
++	/* IFLA_VLAN_{EGRESS,INGRESS}_QOS + n * IFLA_VLAN_QOS_MAPPING */
++	return nla_total_size(sizeof(struct nlattr)) +
++	       nla_total_size(sizeof(struct ifla_vlan_qos_mapping)) * n;
++}
++
++static size_t vlan_get_size(const struct net_device *dev)
++{
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++
++	return nla_total_size(2) +	/* IFLA_VLAN_ID */
++	       vlan_qos_map_size(vlan->nr_ingress_mappings) +
++	       vlan_qos_map_size(vlan->nr_egress_mappings);
++}
++
++static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
++{
++	struct vlan_dev_info *vlan = VLAN_DEV_INFO(dev);
++	struct vlan_priority_tci_mapping *pm;
++	struct ifla_vlan_flags f;
++	struct ifla_vlan_qos_mapping m;
++	struct nlattr *nest;
++	unsigned int i;
++
++	NLA_PUT_U16(skb, IFLA_VLAN_ID, VLAN_DEV_INFO(dev)->vlan_id);
++	if (vlan->flags) {
++		f.flags = vlan->flags;
++		f.mask  = ~0;
++		NLA_PUT(skb, IFLA_VLAN_FLAGS, sizeof(f), &f);
++	}
++	if (vlan->nr_ingress_mappings) {
++		nest = nla_nest_start(skb, IFLA_VLAN_INGRESS_QOS);
++		if (nest == NULL)
++			goto nla_put_failure;
++
++		for (i = 0; i < ARRAY_SIZE(vlan->ingress_priority_map); i++) {
++			if (!vlan->ingress_priority_map[i])
++				continue;
++
++			m.from = i;
++			m.to   = vlan->ingress_priority_map[i];
++			NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
++				sizeof(m), &m);
++		}
++		nla_nest_end(skb, nest);
++	}
++
++	if (vlan->nr_egress_mappings) {
++		nest = nla_nest_start(skb, IFLA_VLAN_EGRESS_QOS);
++		if (nest == NULL)
++			goto nla_put_failure;
++
++		for (i = 0; i < ARRAY_SIZE(vlan->egress_priority_map); i++) {
++			for (pm = vlan->egress_priority_map[i]; pm;
++			     pm = pm->next) {
++				if (!pm->vlan_qos)
++					continue;
++
++				m.from = pm->priority;
++				m.to   = (pm->vlan_qos >> 13) & 0x7;
++				NLA_PUT(skb, IFLA_VLAN_QOS_MAPPING,
++					sizeof(m), &m);
++			}
++		}
++		nla_nest_end(skb, nest);
++	}
++	return 0;
++
++nla_put_failure:
++	return -EMSGSIZE;
++}
++
++struct rtnl_link_ops vlan_link_ops __read_mostly = {
++	.kind		= "vlan",
++	.maxtype	= IFLA_VLAN_MAX,
++	.policy		= vlan_policy,
++	.priv_size	= sizeof(struct vlan_dev_info),
++	.setup		= vlan_setup,
++	.validate	= vlan_validate,
++	.newlink	= vlan_newlink,
++	.changelink	= vlan_changelink,
++	.dellink	= vlan_dellink,
++	.get_size	= vlan_get_size,
++	.fill_info	= vlan_fill_info,
++};
++
++int __init vlan_netlink_init(void)
++{
++	return rtnl_link_register(&vlan_link_ops);
++}
++
++void __exit vlan_netlink_fini(void)
++{
++	rtnl_link_unregister(&vlan_link_ops);
++}
++
++MODULE_ALIAS_RTNL_LINK("vlan");
+diff -Nurb linux-2.6.22-570/net/8021q/vlanproc.c linux-2.6.22-try2/net/8021q/vlanproc.c
+--- linux-2.6.22-570/net/8021q/vlanproc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/8021q/vlanproc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -342,7 +342,7 @@
+ 	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+ 	/* now show all PRIORITY mappings relating to this VLAN */
+ 	seq_printf(seq,
+-		       "\nINGRESS priority mappings: 0:%lu  1:%lu  2:%lu  3:%lu  4:%lu  5:%lu  6:%lu 7:%lu\n",
++		       "\nINGRESS priority mappings: 0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
+ 		       dev_info->ingress_priority_map[0],
+ 		       dev_info->ingress_priority_map[1],
+ 		       dev_info->ingress_priority_map[2],
+@@ -357,7 +357,7 @@
+ 		const struct vlan_priority_tci_mapping *mp
+ 			= dev_info->egress_priority_map[i];
+ 		while (mp) {
+-			seq_printf(seq, "%lu:%hu ",
++			seq_printf(seq, "%u:%hu ",
+ 				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
+ 			mp = mp->next;
+ 		}
+diff -Nurb linux-2.6.22-570/net/bluetooth/bnep/core.c linux-2.6.22-try2/net/bluetooth/bnep/core.c
+--- linux-2.6.22-570/net/bluetooth/bnep/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bluetooth/bnep/core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/signal.h>
+ #include <linux/init.h>
+ #include <linux/wait.h>
++#include <linux/freezer.h>
+ #include <linux/errno.h>
+ #include <linux/net.h>
+ #include <net/sock.h>
+@@ -474,7 +475,6 @@
+ 
+ 	daemonize("kbnepd %s", dev->name);
+ 	set_user_nice(current, -15);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	init_waitqueue_entry(&wait, current);
+ 	add_wait_queue(sk->sk_sleep, &wait);
+diff -Nurb linux-2.6.22-570/net/bluetooth/cmtp/core.c linux-2.6.22-try2/net/bluetooth/cmtp/core.c
+--- linux-2.6.22-570/net/bluetooth/cmtp/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bluetooth/cmtp/core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/slab.h>
+ #include <linux/poll.h>
+ #include <linux/fcntl.h>
++#include <linux/freezer.h>
+ #include <linux/skbuff.h>
+ #include <linux/socket.h>
+ #include <linux/ioctl.h>
+@@ -287,7 +288,6 @@
+ 
+ 	daemonize("kcmtpd_ctr_%d", session->num);
+ 	set_user_nice(current, -15);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	init_waitqueue_entry(&wait, current);
+ 	add_wait_queue(sk->sk_sleep, &wait);
+diff -Nurb linux-2.6.22-570/net/bluetooth/hidp/core.c linux-2.6.22-try2/net/bluetooth/hidp/core.c
+--- linux-2.6.22-570/net/bluetooth/hidp/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bluetooth/hidp/core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/poll.h>
++#include <linux/freezer.h>
+ #include <linux/fcntl.h>
+ #include <linux/skbuff.h>
+ #include <linux/socket.h>
+@@ -547,7 +548,6 @@
+ 
+ 	daemonize("khidpd_%04x%04x", vendor, product);
+ 	set_user_nice(current, -15);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	init_waitqueue_entry(&ctrl_wait, current);
+ 	init_waitqueue_entry(&intr_wait, current);
+diff -Nurb linux-2.6.22-570/net/bluetooth/rfcomm/core.c linux-2.6.22-try2/net/bluetooth/rfcomm/core.c
+--- linux-2.6.22-570/net/bluetooth/rfcomm/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bluetooth/rfcomm/core.c	2007-12-19 15:29:24.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/sched.h>
+ #include <linux/signal.h>
+ #include <linux/init.h>
++#include <linux/freezer.h>
+ #include <linux/wait.h>
+ #include <linux/device.h>
+ #include <linux/net.h>
+@@ -1940,7 +1941,6 @@
+ 
+ 	daemonize("krfcommd");
+ 	set_user_nice(current, -10);
+-	current->flags |= PF_NOFREEZE;
+ 
+ 	BT_DBG("");
+ 
+diff -Nurb linux-2.6.22-570/net/bridge/br_stp_if.c linux-2.6.22-try2/net/bridge/br_stp_if.c
+--- linux-2.6.22-570/net/bridge/br_stp_if.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bridge/br_stp_if.c	2007-12-19 15:29:23.000000000 -0500
+@@ -125,7 +125,7 @@
+ 	char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
+ 	char *envp[] = { NULL };
+ 
+-	r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
++	r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ 	if (r == 0) {
+ 		br->stp_enabled = BR_USER_STP;
+ 		printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_br.c linux-2.6.22-try2/net/bridge/br_sysfs_br.c
+--- linux-2.6.22-570/net/bridge/br_sysfs_br.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bridge/br_sysfs_br.c	2007-12-19 15:29:22.000000000 -0500
+@@ -360,8 +360,9 @@
+  *
+  * Returns the number of bytes read.
+  */
+-static ssize_t brforward_read(struct kobject *kobj, char *buf,
+-			   loff_t off, size_t count)
++static ssize_t brforward_read(struct kobject *kobj,
++			      struct bin_attribute *bin_attr,
++			      char *buf, loff_t off, size_t count)
+ {
+ 	struct device *dev = to_dev(kobj);
+ 	struct net_bridge *br = to_bridge(dev);
+@@ -383,8 +384,7 @@
+ 
+ static struct bin_attribute bridge_forward = {
+ 	.attr = { .name = SYSFS_BRIDGE_FDB,
+-		  .mode = S_IRUGO,
+-		  .owner = THIS_MODULE, },
++		  .mode = S_IRUGO, },
+ 	.read = brforward_read,
+ };
+ 
+diff -Nurb linux-2.6.22-570/net/bridge/br_sysfs_if.c linux-2.6.22-try2/net/bridge/br_sysfs_if.c
+--- linux-2.6.22-570/net/bridge/br_sysfs_if.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/bridge/br_sysfs_if.c	2007-12-19 15:29:22.000000000 -0500
+@@ -29,8 +29,7 @@
+ #define BRPORT_ATTR(_name,_mode,_show,_store)		        \
+ struct brport_attribute brport_attr_##_name = { 	        \
+ 	.attr = {.name = __stringify(_name), 			\
+-		 .mode = _mode, 				\
+-		 .owner = THIS_MODULE, },			\
++		 .mode = _mode },				\
+ 	.show	= _show,					\
+ 	.store	= _store,					\
+ };
+diff -Nurb linux-2.6.22-570/net/core/dev.c linux-2.6.22-try2/net/core/dev.c
+--- linux-2.6.22-570/net/core/dev.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/core/dev.c	2007-12-19 15:29:25.000000000 -0500
+@@ -152,9 +152,22 @@
+ static struct list_head ptype_all __read_mostly;	/* Taps */
+ 
+ #ifdef CONFIG_NET_DMA
+-static struct dma_client *net_dma_client;
+-static unsigned int net_dma_count;
+-static spinlock_t net_dma_event_lock;
++struct net_dma {
++	struct dma_client client;
++	spinlock_t lock;
++	cpumask_t channel_mask;
++	struct dma_chan *channels[NR_CPUS];
++};
++
++static enum dma_state_client
++netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
++	enum dma_state state);
++
++static struct net_dma net_dma = {
++	.client = {
++		.event_callback = netdev_dma_event,
++	},
++};
+ #endif
+ 
+ /*
+@@ -822,8 +835,12 @@
+ 	}
+ 	else if (__dev_get_by_name(newname))
+ 		return -EEXIST;
+-	else
++	else {
++		if (strncmp(newname, dev->name, IFNAMSIZ))
++			printk(KERN_INFO "%s renamed to %s\n",
++					dev->name, newname);
+ 		strlcpy(dev->name, newname, IFNAMSIZ);
++	}
+ 
+ 	device_rename(&dev->dev, dev->name);
+ 	hlist_del(&dev->name_hlist);
+@@ -1510,9 +1527,11 @@
+ 		skb_set_transport_header(skb, skb->csum_start -
+ 					      skb_headroom(skb));
+ 
+-		if (!(dev->features & NETIF_F_GEN_CSUM) &&
+-		    (!(dev->features & NETIF_F_IP_CSUM) ||
+-		     skb->protocol != htons(ETH_P_IP)))
++		if (!(dev->features & NETIF_F_GEN_CSUM)
++		    || ((dev->features & NETIF_F_IP_CSUM)
++			&& skb->protocol == htons(ETH_P_IP))
++		    || ((dev->features & NETIF_F_IPV6_CSUM)
++			&& skb->protocol == htons(ETH_P_IPV6)))
+ 			if (skb_checksum_help(skb))
+ 				goto out_kfree_skb;
+ 	}
+@@ -2016,12 +2035,13 @@
+ 	 * There may not be any more sk_buffs coming right now, so push
+ 	 * any pending DMA copies to hardware
+ 	 */
+-	if (net_dma_client) {
+-		struct dma_chan *chan;
+-		rcu_read_lock();
+-		list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
++	if (!cpus_empty(net_dma.channel_mask)) {
++		int chan_idx;
++		for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
++			struct dma_chan *chan = net_dma.channels[chan_idx];
++			if (chan)
+ 			dma_async_memcpy_issue_pending(chan);
+-		rcu_read_unlock();
++		}
+ 	}
+ #endif
+ 	return;
+@@ -3113,6 +3133,22 @@
+ 		}
+ 	}
+ 
++	/* Fix illegal checksum combinations */
++	if ((dev->features & NETIF_F_HW_CSUM) &&
++	    (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++		printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
++		       dev->name);
++		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
++	}
++
++	if ((dev->features & NETIF_F_NO_CSUM) &&
++	    (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++		printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
++		       dev->name);
++		dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
++	}
++
++
+ 	/* Fix illegal SG+CSUM combinations. */
+ 	if ((dev->features & NETIF_F_SG) &&
+ 	    !(dev->features & NETIF_F_ALL_CSUM)) {
+@@ -3569,12 +3605,13 @@
+  * This is called when the number of channels allocated to the net_dma_client
+  * changes.  The net_dma_client tries to have one DMA channel per CPU.
+  */
+-static void net_dma_rebalance(void)
++
++static void net_dma_rebalance(struct net_dma *net_dma)
+ {
+-	unsigned int cpu, i, n;
++	unsigned int cpu, i, n, chan_idx;
+ 	struct dma_chan *chan;
+ 
+-	if (net_dma_count == 0) {
++	if (cpus_empty(net_dma->channel_mask)) {
+ 		for_each_online_cpu(cpu)
+ 			rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
+ 		return;
+@@ -3583,10 +3620,12 @@
+ 	i = 0;
+ 	cpu = first_cpu(cpu_online_map);
+ 
+-	rcu_read_lock();
+-	list_for_each_entry(chan, &net_dma_client->channels, client_node) {
+-		n = ((num_online_cpus() / net_dma_count)
+-		   + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
++	for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
++		chan = net_dma->channels[chan_idx];
++
++		n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
++		   + (i < (num_online_cpus() %
++			cpus_weight(net_dma->channel_mask)) ? 1 : 0));
+ 
+ 		while(n) {
+ 			per_cpu(softnet_data, cpu).net_dma = chan;
+@@ -3595,7 +3634,6 @@
+ 		}
+ 		i++;
+ 	}
+-	rcu_read_unlock();
+ }
+ 
+ /**
+@@ -3604,23 +3642,53 @@
+  * @chan: DMA channel for the event
+  * @event: event type
+  */
+-static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
+-	enum dma_event event)
+-{
+-	spin_lock(&net_dma_event_lock);
+-	switch (event) {
+-	case DMA_RESOURCE_ADDED:
+-		net_dma_count++;
+-		net_dma_rebalance();
++static enum dma_state_client
++netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
++	enum dma_state state)
++{
++	int i, found = 0, pos = -1;
++	struct net_dma *net_dma =
++		container_of(client, struct net_dma, client);
++	enum dma_state_client ack = DMA_DUP; /* default: take no action */
++
++	spin_lock(&net_dma->lock);
++	switch (state) {
++	case DMA_RESOURCE_AVAILABLE:
++		for (i = 0; i < NR_CPUS; i++)
++			if (net_dma->channels[i] == chan) {
++				found = 1;
++				break;
++			} else if (net_dma->channels[i] == NULL && pos < 0)
++				pos = i;
++
++		if (!found && pos >= 0) {
++			ack = DMA_ACK;
++			net_dma->channels[pos] = chan;
++			cpu_set(pos, net_dma->channel_mask);
++			net_dma_rebalance(net_dma);
++		}
+ 		break;
+ 	case DMA_RESOURCE_REMOVED:
+-		net_dma_count--;
+-		net_dma_rebalance();
++		for (i = 0; i < NR_CPUS; i++)
++			if (net_dma->channels[i] == chan) {
++				found = 1;
++				pos = i;
++				break;
++			}
++
++		if (found) {
++			ack = DMA_ACK;
++			cpu_clear(pos, net_dma->channel_mask);
++			net_dma->channels[i] = NULL;
++			net_dma_rebalance(net_dma);
++		}
+ 		break;
+ 	default:
+ 		break;
+ 	}
+-	spin_unlock(&net_dma_event_lock);
++	spin_unlock(&net_dma->lock);
++
++	return ack;
+ }
+ 
+ /**
+@@ -3628,12 +3696,10 @@
+  */
+ static int __init netdev_dma_register(void)
+ {
+-	spin_lock_init(&net_dma_event_lock);
+-	net_dma_client = dma_async_client_register(netdev_dma_event);
+-	if (net_dma_client == NULL)
+-		return -ENOMEM;
+-
+-	dma_async_client_chan_request(net_dma_client, num_online_cpus());
++	spin_lock_init(&net_dma.lock);
++	dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
++	dma_async_client_register(&net_dma.client);
++	dma_async_client_chan_request(&net_dma.client);
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/net/core/netpoll.c linux-2.6.22-try2/net/core/netpoll.c
+--- linux-2.6.22-570/net/core/netpoll.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/core/netpoll.c	2007-12-19 15:29:24.000000000 -0500
+@@ -503,7 +503,8 @@
+ 
+ 	np->rx_hook(np, ntohs(uh->source),
+ 		    (char *)(uh+1),
+-		    ulen - sizeof(struct udphdr));
++		    ulen - sizeof(struct udphdr),
++		    skb);
+ 
+ 	kfree_skb(skb);
+ 	return 1;
+diff -Nurb linux-2.6.22-570/net/core/pktgen.c linux-2.6.22-try2/net/core/pktgen.c
+--- linux-2.6.22-570/net/core/pktgen.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/core/pktgen.c	2007-12-19 15:29:24.000000000 -0500
+@@ -3284,6 +3284,8 @@
+ 
+ 	set_current_state(TASK_INTERRUPTIBLE);
+ 
++	set_freezable();
++
+ 	while (!kthread_should_stop()) {
+ 		pkt_dev = next_to_run(t);
+ 
+diff -Nurb linux-2.6.22-570/net/core/rtnetlink.c linux-2.6.22-try2/net/core/rtnetlink.c
+--- linux-2.6.22-570/net/core/rtnetlink.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/core/rtnetlink.c	2007-12-19 15:29:23.000000000 -0500
+@@ -97,6 +97,19 @@
+ 	return 0;
+ }
+ 
++int __rtattr_parse_nested_compat(struct rtattr *tb[], int maxattr,
++			         struct rtattr *rta, int len)
++{
++	if (RTA_PAYLOAD(rta) < len)
++		return -1;
++	if (RTA_PAYLOAD(rta) >= RTA_ALIGN(len) + sizeof(struct rtattr)) {
++		rta = RTA_DATA(rta) + RTA_ALIGN(len);
++		return rtattr_parse_nested(tb, maxattr, rta);
++	}
++	memset(tb, 0, sizeof(struct rtattr *) * maxattr);
++	return 0;
++}
++
+ static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+ 
+ static inline int rtm_msgindex(int msgtype)
+@@ -243,6 +256,143 @@
+ 
+ EXPORT_SYMBOL_GPL(rtnl_unregister_all);
+ 
++static LIST_HEAD(link_ops);
++
++/**
++ * __rtnl_link_register - Register rtnl_link_ops with rtnetlink.
++ * @ops: struct rtnl_link_ops * to register
++ *
++ * The caller must hold the rtnl_mutex. This function should be used
++ * by drivers that create devices during module initialization. It
++ * must be called before registering the devices.
++ *
++ * Returns 0 on success or a negative error code.
++ */
++int __rtnl_link_register(struct rtnl_link_ops *ops)
++{
++	list_add_tail(&ops->list, &link_ops);
++	return 0;
++}
++
++EXPORT_SYMBOL_GPL(__rtnl_link_register);
++
++/**
++ * rtnl_link_register - Register rtnl_link_ops with rtnetlink.
++ * @ops: struct rtnl_link_ops * to register
++ *
++ * Returns 0 on success or a negative error code.
++ */
++int rtnl_link_register(struct rtnl_link_ops *ops)
++{
++	int err;
++
++	rtnl_lock();
++	err = __rtnl_link_register(ops);
++	rtnl_unlock();
++	return err;
++}
++
++EXPORT_SYMBOL_GPL(rtnl_link_register);
++
++/**
++ * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
++ * @ops: struct rtnl_link_ops * to unregister
++ *
++ * The caller must hold the rtnl_mutex. This function should be used
++ * by drivers that unregister devices during module unloading. It must
++ * be called after unregistering the devices.
++ */
++void __rtnl_link_unregister(struct rtnl_link_ops *ops)
++{
++	list_del(&ops->list);
++}
++
++EXPORT_SYMBOL_GPL(__rtnl_link_unregister);
++
++/**
++ * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
++ * @ops: struct rtnl_link_ops * to unregister
++ */
++void rtnl_link_unregister(struct rtnl_link_ops *ops)
++{
++	rtnl_lock();
++	__rtnl_link_unregister(ops);
++	rtnl_unlock();
++}
++
++EXPORT_SYMBOL_GPL(rtnl_link_unregister);
++
++static const struct rtnl_link_ops *rtnl_link_ops_get(const char *kind)
++{
++	const struct rtnl_link_ops *ops;
++
++	list_for_each_entry(ops, &link_ops, list) {
++		if (!strcmp(ops->kind, kind))
++			return ops;
++	}
++	return NULL;
++}
++
++static size_t rtnl_link_get_size(const struct net_device *dev)
++{
++	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
++	size_t size;
++
++	if (!ops)
++		return 0;
++
++	size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
++	       nlmsg_total_size(strlen(ops->kind) + 1);	 /* IFLA_INFO_KIND */
++
++	if (ops->get_size)
++		/* IFLA_INFO_DATA + nested data */
++		size += nlmsg_total_size(sizeof(struct nlattr)) +
++			ops->get_size(dev);
++
++	if (ops->get_xstats_size)
++		size += ops->get_xstats_size(dev);	/* IFLA_INFO_XSTATS */
++
++	return size;
++}
++
++static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev)
++{
++	const struct rtnl_link_ops *ops = dev->rtnl_link_ops;
++	struct nlattr *linkinfo, *data;
++	int err = -EMSGSIZE;
++
++	linkinfo = nla_nest_start(skb, IFLA_LINKINFO);
++	if (linkinfo == NULL)
++		goto out;
++
++	if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0)
++		goto err_cancel_link;
++	if (ops->fill_xstats) {
++		err = ops->fill_xstats(skb, dev);
++		if (err < 0)
++			goto err_cancel_link;
++	}
++	if (ops->fill_info) {
++		data = nla_nest_start(skb, IFLA_INFO_DATA);
++		if (data == NULL)
++			goto err_cancel_link;
++		err = ops->fill_info(skb, dev);
++		if (err < 0)
++			goto err_cancel_data;
++		nla_nest_end(skb, data);
++	}
++
++	nla_nest_end(skb, linkinfo);
++	return 0;
++
++err_cancel_data:
++	nla_nest_cancel(skb, data);
++err_cancel_link:
++	nla_nest_cancel(skb, linkinfo);
++out:
++	return err;
++}
++
+ static const int rtm_min[RTM_NR_FAMILIES] =
+ {
+ 	[RTM_FAM(RTM_NEWLINK)]      = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
+@@ -437,7 +587,7 @@
+ 	a->tx_compressed = b->tx_compressed;
+ };
+ 
+-static inline size_t if_nlmsg_size(void)
++static inline size_t if_nlmsg_size(const struct net_device *dev)
+ {
+ 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
+ 	       + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */
+@@ -452,7 +602,8 @@
+ 	       + nla_total_size(4) /* IFLA_LINK */
+ 	       + nla_total_size(4) /* IFLA_MASTER */
+ 	       + nla_total_size(1) /* IFLA_OPERSTATE */
+-	       + nla_total_size(1); /* IFLA_LINKMODE */
++	       + nla_total_size(1) /* IFLA_LINKMODE */
++	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
+ }
+ 
+ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
+@@ -522,6 +673,11 @@
+ 		}
+ 	}
+ 
++	if (dev->rtnl_link_ops) {
++		if (rtnl_link_fill(skb, dev) < 0)
++			goto nla_put_failure;
++	}
++
+ 	return nlmsg_end(skb, nlh);
+ 
+ nla_put_failure:
+@@ -555,6 +711,8 @@
+ 
+ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
+ 	[IFLA_IFNAME]		= { .type = NLA_STRING, .len = IFNAMSIZ-1 },
++	[IFLA_ADDRESS]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
++	[IFLA_BROADCAST]	= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+ 	[IFLA_MAP]		= { .len = sizeof(struct rtnl_link_ifmap) },
+ 	[IFLA_MTU]		= { .type = NLA_U32 },
+ 	[IFLA_TXQLEN]		= { .type = NLA_U32 },
+@@ -563,44 +721,16 @@
+ 	[IFLA_LINKMODE]		= { .type = NLA_U8 },
+ };
+ 
+-static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+-{
+-	struct ifinfomsg *ifm;
+-	struct net_device *dev;
+-	int err, send_addr_notify = 0, modified = 0;
+-	struct nlattr *tb[IFLA_MAX+1];
+-	char ifname[IFNAMSIZ];
+-
+-	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
+-	if (err < 0)
+-		goto errout;
+-
+-	if (tb[IFLA_IFNAME])
+-		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+-	else
+-		ifname[0] = '\0';
+-
+-	err = -EINVAL;
+-	ifm = nlmsg_data(nlh);
+-	if (ifm->ifi_index > 0)
+-		dev = dev_get_by_index(ifm->ifi_index);
+-	else if (tb[IFLA_IFNAME])
+-		dev = dev_get_by_name(ifname);
+-	else
+-		goto errout;
+-
+-	if (dev == NULL) {
+-		err = -ENODEV;
+-		goto errout;
+-	}
+-
+-	if (tb[IFLA_ADDRESS] &&
+-	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+-		goto errout_dev;
++static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
++	[IFLA_INFO_KIND]	= { .type = NLA_STRING },
++	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
++};
+ 
+-	if (tb[IFLA_BROADCAST] &&
+-	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+-		goto errout_dev;
++static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
++		      struct nlattr **tb, char *ifname, int modified)
++{
++	int send_addr_notify = 0;
++	int err;
+ 
+ 	if (tb[IFLA_MAP]) {
+ 		struct rtnl_link_ifmap *u_map;
+@@ -608,12 +738,12 @@
+ 
+ 		if (!dev->set_config) {
+ 			err = -EOPNOTSUPP;
+-			goto errout_dev;
++			goto errout;
+ 		}
+ 
+ 		if (!netif_device_present(dev)) {
+ 			err = -ENODEV;
+-			goto errout_dev;
++			goto errout;
+ 		}
+ 
+ 		u_map = nla_data(tb[IFLA_MAP]);
+@@ -626,7 +756,7 @@
+ 
+ 		err = dev->set_config(dev, &k_map);
+ 		if (err < 0)
+-			goto errout_dev;
++			goto errout;
+ 
+ 		modified = 1;
+ 	}
+@@ -637,19 +767,19 @@
+ 
+ 		if (!dev->set_mac_address) {
+ 			err = -EOPNOTSUPP;
+-			goto errout_dev;
++			goto errout;
+ 		}
+ 
+ 		if (!netif_device_present(dev)) {
+ 			err = -ENODEV;
+-			goto errout_dev;
++			goto errout;
+ 		}
+ 
+ 		len = sizeof(sa_family_t) + dev->addr_len;
+ 		sa = kmalloc(len, GFP_KERNEL);
+ 		if (!sa) {
+ 			err = -ENOMEM;
+-			goto errout_dev;
++			goto errout;
+ 		}
+ 		sa->sa_family = dev->type;
+ 		memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]),
+@@ -657,7 +787,7 @@
+ 		err = dev->set_mac_address(dev, sa);
+ 		kfree(sa);
+ 		if (err)
+-			goto errout_dev;
++			goto errout;
+ 		send_addr_notify = 1;
+ 		modified = 1;
+ 	}
+@@ -665,7 +795,7 @@
+ 	if (tb[IFLA_MTU]) {
+ 		err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
+ 		if (err < 0)
+-			goto errout_dev;
++			goto errout;
+ 		modified = 1;
+ 	}
+ 
+@@ -677,7 +807,7 @@
+ 	if (ifm->ifi_index > 0 && ifname[0]) {
+ 		err = dev_change_name(dev, ifname);
+ 		if (err < 0)
+-			goto errout_dev;
++			goto errout;
+ 		modified = 1;
+ 	}
+ 
+@@ -686,7 +816,6 @@
+ 		send_addr_notify = 1;
+ 	}
+ 
+-
+ 	if (ifm->ifi_flags || ifm->ifi_change) {
+ 		unsigned int flags = ifm->ifi_flags;
+ 
+@@ -714,7 +843,7 @@
+ 
+ 	err = 0;
+ 
+-errout_dev:
++errout:
+ 	if (err < 0 && modified && net_ratelimit())
+ 		printk(KERN_WARNING "A link change request failed with "
+ 		       "some changes comitted already. Interface %s may "
+@@ -723,12 +852,231 @@
+ 
+ 	if (send_addr_notify)
+ 		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
++	return err;
++}
+ 
++static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++	struct ifinfomsg *ifm;
++	struct net_device *dev;
++	int err;
++	struct nlattr *tb[IFLA_MAX+1];
++	char ifname[IFNAMSIZ];
++
++	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++	if (err < 0)
++		goto errout;
++
++	if (tb[IFLA_IFNAME])
++		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++	else
++		ifname[0] = '\0';
++
++	err = -EINVAL;
++	ifm = nlmsg_data(nlh);
++	if (ifm->ifi_index > 0)
++		dev = dev_get_by_index(ifm->ifi_index);
++	else if (tb[IFLA_IFNAME])
++		dev = dev_get_by_name(ifname);
++	else
++		goto errout;
++
++	if (dev == NULL) {
++		err = -ENODEV;
++		goto errout;
++	}
++
++	if (tb[IFLA_ADDRESS] &&
++	    nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
++		goto errout_dev;
++
++	if (tb[IFLA_BROADCAST] &&
++	    nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
++		goto errout_dev;
++
++	err = do_setlink(dev, ifm, tb, ifname, 0);
++errout_dev:
+ 	dev_put(dev);
+ errout:
+ 	return err;
+ }
+ 
++static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++	const struct rtnl_link_ops *ops;
++	struct net_device *dev;
++	struct ifinfomsg *ifm;
++	char ifname[IFNAMSIZ];
++	struct nlattr *tb[IFLA_MAX+1];
++	int err;
++
++	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++	if (err < 0)
++		return err;
++
++	if (tb[IFLA_IFNAME])
++		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++
++	ifm = nlmsg_data(nlh);
++	if (ifm->ifi_index > 0)
++		dev = __dev_get_by_index(ifm->ifi_index);
++	else if (tb[IFLA_IFNAME])
++		dev = __dev_get_by_name(ifname);
++	else
++		return -EINVAL;
++
++	if (!dev)
++		return -ENODEV;
++
++	ops = dev->rtnl_link_ops;
++	if (!ops)
++		return -EOPNOTSUPP;
++
++	ops->dellink(dev);
++	return 0;
++}
++
++static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
++{
++	const struct rtnl_link_ops *ops;
++	struct net_device *dev;
++	struct ifinfomsg *ifm;
++	char kind[MODULE_NAME_LEN];
++	char ifname[IFNAMSIZ];
++	struct nlattr *tb[IFLA_MAX+1];
++	struct nlattr *linkinfo[IFLA_INFO_MAX+1];
++	int err;
++
++replay:
++	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
++	if (err < 0)
++		return err;
++
++	if (tb[IFLA_IFNAME])
++		nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
++	else
++		ifname[0] = '\0';
++
++	ifm = nlmsg_data(nlh);
++	if (ifm->ifi_index > 0)
++		dev = __dev_get_by_index(ifm->ifi_index);
++	else if (ifname[0])
++		dev = __dev_get_by_name(ifname);
++	else
++		dev = NULL;
++
++	if (tb[IFLA_LINKINFO]) {
++		err = nla_parse_nested(linkinfo, IFLA_INFO_MAX,
++				       tb[IFLA_LINKINFO], ifla_info_policy);
++		if (err < 0)
++			return err;
++	} else
++		memset(linkinfo, 0, sizeof(linkinfo));
++
++	if (linkinfo[IFLA_INFO_KIND]) {
++		nla_strlcpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind));
++		ops = rtnl_link_ops_get(kind);
++	} else {
++		kind[0] = '\0';
++		ops = NULL;
++	}
++
++	if (1) {
++		struct nlattr *attr[ops ? ops->maxtype + 1 : 0], **data = NULL;
++
++		if (ops) {
++			if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
++				err = nla_parse_nested(attr, ops->maxtype,
++						       linkinfo[IFLA_INFO_DATA],
++						       ops->policy);
++				if (err < 0)
++					return err;
++				data = attr;
++			}
++			if (ops->validate) {
++				err = ops->validate(tb, data);
++				if (err < 0)
++					return err;
++			}
++		}
++
++		if (dev) {
++			int modified = 0;
++
++			if (nlh->nlmsg_flags & NLM_F_EXCL)
++				return -EEXIST;
++			if (nlh->nlmsg_flags & NLM_F_REPLACE)
++				return -EOPNOTSUPP;
++
++			if (linkinfo[IFLA_INFO_DATA]) {
++				if (!ops || ops != dev->rtnl_link_ops ||
++				    !ops->changelink)
++					return -EOPNOTSUPP;
++
++				err = ops->changelink(dev, tb, data);
++				if (err < 0)
++					return err;
++				modified = 1;
++			}
++
++			return do_setlink(dev, ifm, tb, ifname, modified);
++		}
++
++		if (!(nlh->nlmsg_flags & NLM_F_CREATE))
++			return -ENODEV;
++
++		if (ifm->ifi_index || ifm->ifi_flags || ifm->ifi_change)
++			return -EOPNOTSUPP;
++		if (tb[IFLA_ADDRESS] || tb[IFLA_BROADCAST] || tb[IFLA_MAP] ||
++		    tb[IFLA_MASTER] || tb[IFLA_PROTINFO])
++			return -EOPNOTSUPP;
++
++		if (!ops) {
++#ifdef CONFIG_KMOD
++			if (kind[0]) {
++				__rtnl_unlock();
++				request_module("rtnl-link-%s", kind);
++				rtnl_lock();
++				ops = rtnl_link_ops_get(kind);
++				if (ops)
++					goto replay;
++			}
++#endif
++			return -EOPNOTSUPP;
++		}
++
++		if (!ifname[0])
++			snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
++		dev = alloc_netdev(ops->priv_size, ifname, ops->setup);
++		if (!dev)
++			return -ENOMEM;
++
++		if (strchr(dev->name, '%')) {
++			err = dev_alloc_name(dev, dev->name);
++			if (err < 0)
++				goto err_free;
++		}
++		dev->rtnl_link_ops = ops;
++
++		if (tb[IFLA_MTU])
++			dev->mtu = nla_get_u32(tb[IFLA_MTU]);
++		if (tb[IFLA_TXQLEN])
++			dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
++		if (tb[IFLA_WEIGHT])
++			dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
++		if (tb[IFLA_OPERSTATE])
++			set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
++		if (tb[IFLA_LINKMODE])
++			dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
++
++		err = ops->newlink(dev, tb, data);
++err_free:
++		if (err < 0)
++			free_netdev(dev);
++		return err;
++	}
++}
++
+ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
+ 	struct ifinfomsg *ifm;
+@@ -749,7 +1097,7 @@
+ 	} else
+ 		return -EINVAL;
+ 
+-	nskb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
++	nskb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ 	if (nskb == NULL) {
+ 		err = -ENOBUFS;
+ 		goto errout;
+@@ -802,7 +1150,7 @@
+ 	if (!nx_dev_visible(current->nx_info, dev))
+ 		return;
+ 
+-	skb = nlmsg_new(if_nlmsg_size(), GFP_KERNEL);
++	skb = nlmsg_new(if_nlmsg_size(dev), GFP_KERNEL);
+ 	if (skb == NULL)
+ 		goto errout;
+ 
+@@ -957,6 +1305,8 @@
+ 
+ 	rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, rtnl_dump_ifinfo);
+ 	rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL);
++	rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL);
++	rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL);
+ 
+ 	rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all);
+ 	rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all);
+@@ -965,6 +1315,7 @@
+ EXPORT_SYMBOL(__rta_fill);
+ EXPORT_SYMBOL(rtattr_strlcpy);
+ EXPORT_SYMBOL(rtattr_parse);
++EXPORT_SYMBOL(__rtattr_parse_nested_compat);
+ EXPORT_SYMBOL(rtnetlink_put_metrics);
+ EXPORT_SYMBOL(rtnl_lock);
+ EXPORT_SYMBOL(rtnl_trylock);
+diff -Nurb linux-2.6.22-570/net/core/skbuff.c linux-2.6.22-try2/net/core/skbuff.c
+--- linux-2.6.22-570/net/core/skbuff.c	2007-12-12 18:08:39.000000000 -0500
++++ linux-2.6.22-try2/net/core/skbuff.c	2007-12-19 15:29:24.000000000 -0500
+@@ -417,6 +417,7 @@
+ 	C(csum);
+ 	C(local_df);
+ 	n->cloned = 1;
++	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
+ 	n->nohdr = 0;
+ 	C(pkt_type);
+ 	C(ip_summed);
+@@ -681,6 +682,7 @@
+ 	skb->network_header   += off;
+ 	skb->mac_header	      += off;
+ 	skb->cloned   = 0;
++	skb->hdr_len  = 0;
+ 	skb->nohdr    = 0;
+ 	atomic_set(&skb_shinfo(skb)->dataref, 1);
+ 	return 0;
+@@ -2012,13 +2014,13 @@
+ 	skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
+ 					      sizeof(struct sk_buff),
+ 					      0,
+-					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++				SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY,
+ 					      NULL, NULL);
+ 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
+ 						(2*sizeof(struct sk_buff)) +
+ 						sizeof(atomic_t),
+ 						0,
+-						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++				SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TEMPORARY,
+ 						NULL, NULL);
+ }
+ 
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.c linux-2.6.22-try2/net/dccp/ccids/ccid3.c
+--- linux-2.6.22-570/net/dccp/ccids/ccid3.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/dccp/ccids/ccid3.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  *  net/dccp/ccids/ccid3.c
+  *
+- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+  *
+  *  An implementation of the DCCP protocol
+  *
+@@ -49,7 +49,6 @@
+ 
+ static struct dccp_tx_hist *ccid3_tx_hist;
+ static struct dccp_rx_hist *ccid3_rx_hist;
+-static struct dccp_li_hist *ccid3_li_hist;
+ 
+ /*
+  *	Transmitter Half-Connection Routines
+@@ -194,25 +193,20 @@
+  *	The algorithm is not applicable if RTT < 4 microseconds.
+  */
+ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hctx,
+-						struct timeval *now)
++						ktime_t now)
+ {
+-	suseconds_t delta;
+ 	u32 quarter_rtts;
+ 
+ 	if (unlikely(hctx->ccid3hctx_rtt < 4))	/* avoid divide-by-zero */
+ 		return;
+ 
+-	delta = timeval_delta(now, &hctx->ccid3hctx_t_last_win_count);
+-	DCCP_BUG_ON(delta < 0);
+-
+-	quarter_rtts = (u32)delta / (hctx->ccid3hctx_rtt / 4);
++	quarter_rtts = ktime_us_delta(now, hctx->ccid3hctx_t_last_win_count);
++	quarter_rtts /= hctx->ccid3hctx_rtt / 4;
+ 
+ 	if (quarter_rtts > 0) {
+-		hctx->ccid3hctx_t_last_win_count = *now;
++		hctx->ccid3hctx_t_last_win_count = now;
+ 		hctx->ccid3hctx_last_win_count	+= min_t(u32, quarter_rtts, 5);
+ 		hctx->ccid3hctx_last_win_count	&= 0xF;		/* mod 16 */
+-
+-		ccid3_pr_debug("now at %#X\n", hctx->ccid3hctx_last_win_count);
+ 	}
+ }
+ 
+@@ -312,8 +306,8 @@
+ {
+ 	struct dccp_sock *dp = dccp_sk(sk);
+ 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
+-	struct timeval now;
+-	suseconds_t delay;
++	ktime_t now = ktime_get_real();
++	s64 delay;
+ 
+ 	BUG_ON(hctx == NULL);
+ 
+@@ -325,8 +319,6 @@
+ 	if (unlikely(skb->len == 0))
+ 		return -EBADMSG;
+ 
+-	dccp_timestamp(sk, &now);
+-
+ 	switch (hctx->ccid3hctx_state) {
+ 	case TFRC_SSTATE_NO_SENT:
+ 		sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer,
+@@ -349,7 +341,7 @@
+ 			ccid3_pr_debug("SYN RTT = %uus\n", dp->dccps_syn_rtt);
+ 			hctx->ccid3hctx_rtt  = dp->dccps_syn_rtt;
+ 			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
+-			hctx->ccid3hctx_t_ld = now;
++			hctx->ccid3hctx_t_ld = ktime_to_timeval(now);
+ 		} else {
+ 			/* Sender does not have RTT sample: X = MSS/second */
+ 			hctx->ccid3hctx_x = dp->dccps_mss_cache;
+@@ -361,7 +353,7 @@
+ 		break;
+ 	case TFRC_SSTATE_NO_FBACK:
+ 	case TFRC_SSTATE_FBACK:
+-		delay = timeval_delta(&hctx->ccid3hctx_t_nom, &now);
++		delay = ktime_us_delta(hctx->ccid3hctx_t_nom, now);
+ 		ccid3_pr_debug("delay=%ld\n", (long)delay);
+ 		/*
+ 		 *	Scheduling of packet transmissions [RFC 3448, 4.6]
+@@ -371,10 +363,10 @@
+ 		 * else
+ 		 *       // send the packet in (t_nom - t_now) milliseconds.
+ 		 */
+-		if (delay - (suseconds_t)hctx->ccid3hctx_delta >= 0)
+-			return delay / 1000L;
++		if (delay - (s64)hctx->ccid3hctx_delta >= 1000)
++			return (u32)delay / 1000L;
+ 
+-		ccid3_hc_tx_update_win_count(hctx, &now);
++		ccid3_hc_tx_update_win_count(hctx, now);
+ 		break;
+ 	case TFRC_SSTATE_TERM:
+ 		DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
+@@ -387,8 +379,8 @@
+ 	hctx->ccid3hctx_idle = 0;
+ 
+ 	/* set the nominal send time for the next following packet */
+-	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
+-
++	hctx->ccid3hctx_t_nom = ktime_add_us(hctx->ccid3hctx_t_nom,
++					     hctx->ccid3hctx_t_ipi);
+ 	return 0;
+ }
+ 
+@@ -819,154 +811,6 @@
+ 	return 0;
+ }
+ 
+-/* calculate first loss interval
+- *
+- * returns estimated loss interval in usecs */
+-
+-static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+-{
+-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+-	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
+-	u32 x_recv, p;
+-	suseconds_t rtt, delta;
+-	struct timeval tstamp = { 0, };
+-	int interval = 0;
+-	int win_count = 0;
+-	int step = 0;
+-	u64 fval;
+-
+-	list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist,
+-				 dccphrx_node) {
+-		if (dccp_rx_hist_entry_data_packet(entry)) {
+-			tail = entry;
+-
+-			switch (step) {
+-			case 0:
+-				tstamp	  = entry->dccphrx_tstamp;
+-				win_count = entry->dccphrx_ccval;
+-				step = 1;
+-				break;
+-			case 1:
+-				interval = win_count - entry->dccphrx_ccval;
+-				if (interval < 0)
+-					interval += TFRC_WIN_COUNT_LIMIT;
+-				if (interval > 4)
+-					goto found;
+-				break;
+-			}
+-		}
+-	}
+-
+-	if (unlikely(step == 0)) {
+-		DCCP_WARN("%s(%p), packet history has no data packets!\n",
+-			  dccp_role(sk), sk);
+-		return ~0;
+-	}
+-
+-	if (unlikely(interval == 0)) {
+-		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
+-			  "Defaulting to 1\n", dccp_role(sk), sk);
+-		interval = 1;
+-	}
+-found:
+-	if (!tail) {
+-		DCCP_CRIT("tail is null\n");
+-		return ~0;
+-	}
+-
+-	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
+-	DCCP_BUG_ON(delta < 0);
+-
+-	rtt = delta * 4 / interval;
+-	ccid3_pr_debug("%s(%p), approximated RTT to %dus\n",
+-		       dccp_role(sk), sk, (int)rtt);
+-
+-	/*
+-	 * Determine the length of the first loss interval via inverse lookup.
+-	 * Assume that X_recv can be computed by the throughput equation
+-	 *		    s
+-	 *	X_recv = --------
+-	 *		 R * fval
+-	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
+-	 */
+-	if (rtt == 0) {			/* would result in divide-by-zero */
+-		DCCP_WARN("RTT==0\n");
+-		return ~0;
+-	}
+-
+-	dccp_timestamp(sk, &tstamp);
+-	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+-	DCCP_BUG_ON(delta <= 0);
+-
+-	x_recv = scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
+-	if (x_recv == 0) {		/* would also trigger divide-by-zero */
+-		DCCP_WARN("X_recv==0\n");
+-		if ((x_recv = hcrx->ccid3hcrx_x_recv) == 0) {
+-			DCCP_BUG("stored value of X_recv is zero");
+-			return ~0;
+-		}
+-	}
+-
+-	fval = scaled_div(hcrx->ccid3hcrx_s, rtt);
+-	fval = scaled_div32(fval, x_recv);
+-	p = tfrc_calc_x_reverse_lookup(fval);
+-
+-	ccid3_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
+-		       "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
+-
+-	if (p == 0)
+-		return ~0;
+-	else
+-		return 1000000 / p;
+-}
+-
+-static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+-{
+-	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
+-	struct dccp_li_hist_entry *head;
+-	u64 seq_temp;
+-
+-	if (list_empty(&hcrx->ccid3hcrx_li_hist)) {
+-		if (!dccp_li_hist_interval_new(ccid3_li_hist,
+-		   &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss))
+-			return;
+-
+-		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+-		   struct dccp_li_hist_entry, dccplih_node);
+-		head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk);
+-	} else {
+-		struct dccp_li_hist_entry *entry;
+-		struct list_head *tail;
+-
+-		head = list_entry(hcrx->ccid3hcrx_li_hist.next,
+-		   struct dccp_li_hist_entry, dccplih_node);
+-		/* FIXME win count check removed as was wrong */
+-		/* should make this check with receive history */
+-		/* and compare there as per section 10.2 of RFC4342 */
+-
+-		/* new loss event detected */
+-		/* calculate last interval length */
+-		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
+-		entry = dccp_li_hist_entry_new(ccid3_li_hist, GFP_ATOMIC);
+-
+-		if (entry == NULL) {
+-			DCCP_BUG("out of memory - can not allocate entry");
+-			return;
+-		}
+-
+-		list_add(&entry->dccplih_node, &hcrx->ccid3hcrx_li_hist);
+-
+-		tail = hcrx->ccid3hcrx_li_hist.prev;
+-		list_del(tail);
+-		kmem_cache_free(ccid3_li_hist->dccplih_slab, tail);
+-
+-		/* Create the newest interval */
+-		entry->dccplih_seqno = seq_loss;
+-		entry->dccplih_interval = seq_temp;
+-		entry->dccplih_win_count = win_loss;
+-	}
+-}
+-
+ static int ccid3_hc_rx_detect_loss(struct sock *sk,
+ 				    struct dccp_rx_hist_entry *packet)
+ {
+@@ -992,7 +836,14 @@
+ 	while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
+ 	   > TFRC_RECV_NUM_LATE_LOSS) {
+ 		loss = 1;
+-		ccid3_hc_rx_update_li(sk, hcrx->ccid3hcrx_seqno_nonloss,
++		dccp_li_update_li(sk,
++				  &hcrx->ccid3hcrx_li_hist,
++				  &hcrx->ccid3hcrx_hist,
++				  &hcrx->ccid3hcrx_tstamp_last_feedback,
++				  hcrx->ccid3hcrx_s,
++				  hcrx->ccid3hcrx_bytes_recv,
++				  hcrx->ccid3hcrx_x_recv,
++				  hcrx->ccid3hcrx_seqno_nonloss,
+ 		   hcrx->ccid3hcrx_ccval_nonloss);
+ 		tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
+ 		dccp_inc_seqno(&tmp_seqno);
+@@ -1152,7 +1003,7 @@
+ 	dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist);
+ 
+ 	/* Empty loss interval history */
+-	dccp_li_hist_purge(ccid3_li_hist, &hcrx->ccid3hcrx_li_hist);
++	dccp_li_hist_purge(&hcrx->ccid3hcrx_li_hist);
+ }
+ 
+ static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
+@@ -1236,19 +1087,12 @@
+ 	if (ccid3_tx_hist == NULL)
+ 		goto out_free_rx;
+ 
+-	ccid3_li_hist = dccp_li_hist_new("ccid3");
+-	if (ccid3_li_hist == NULL)
+-		goto out_free_tx;
+-
+ 	rc = ccid_register(&ccid3);
+ 	if (rc != 0)
+-		goto out_free_loss_interval_history;
++		goto out_free_tx;
+ out:
+ 	return rc;
+ 
+-out_free_loss_interval_history:
+-	dccp_li_hist_delete(ccid3_li_hist);
+-	ccid3_li_hist = NULL;
+ out_free_tx:
+ 	dccp_tx_hist_delete(ccid3_tx_hist);
+ 	ccid3_tx_hist = NULL;
+@@ -1271,10 +1115,6 @@
+ 		dccp_rx_hist_delete(ccid3_rx_hist);
+ 		ccid3_rx_hist = NULL;
+ 	}
+-	if (ccid3_li_hist != NULL) {
+-		dccp_li_hist_delete(ccid3_li_hist);
+-		ccid3_li_hist = NULL;
+-	}
+ }
+ module_exit(ccid3_module_exit);
+ 
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/ccid3.h linux-2.6.22-try2/net/dccp/ccids/ccid3.h
+--- linux-2.6.22-570/net/dccp/ccids/ccid3.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/dccp/ccids/ccid3.h	2007-12-19 15:29:23.000000000 -0500
+@@ -36,6 +36,7 @@
+ #ifndef _DCCP_CCID3_H_
+ #define _DCCP_CCID3_H_
+ 
++#include <linux/ktime.h>
+ #include <linux/list.h>
+ #include <linux/time.h>
+ #include <linux/types.h>
+@@ -108,10 +109,10 @@
+ 	enum ccid3_hc_tx_states		ccid3hctx_state:8;
+ 	u8				ccid3hctx_last_win_count;
+ 	u8				ccid3hctx_idle;
+-	struct timeval			ccid3hctx_t_last_win_count;
++	ktime_t				ccid3hctx_t_last_win_count;
+ 	struct timer_list		ccid3hctx_no_feedback_timer;
+ 	struct timeval			ccid3hctx_t_ld;
+-	struct timeval			ccid3hctx_t_nom;
++	ktime_t				ccid3hctx_t_nom;
+ 	u32				ccid3hctx_delta;
+ 	struct list_head		ccid3hctx_hist;
+ 	struct ccid3_options_received	ccid3hctx_options_received;
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c linux-2.6.22-try2/net/dccp/ccids/lib/loss_interval.c
+--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/dccp/ccids/lib/loss_interval.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  *  net/dccp/ccids/lib/loss_interval.c
+  *
+- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- *  Copyright (c) 2005-6 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+  *
+  *  This program is free software; you can redistribute it and/or modify
+@@ -15,58 +15,38 @@
+ #include <net/sock.h>
+ #include "../../dccp.h"
+ #include "loss_interval.h"
++#include "packet_history.h"
++#include "tfrc.h"
+ 
+-struct dccp_li_hist *dccp_li_hist_new(const char *name)
+-{
+-	struct dccp_li_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC);
+-	static const char dccp_li_hist_mask[] = "li_hist_%s";
+-	char *slab_name;
+-
+-	if (hist == NULL)
+-		goto out;
+-
+-	slab_name = kmalloc(strlen(name) + sizeof(dccp_li_hist_mask) - 1,
+-			    GFP_ATOMIC);
+-	if (slab_name == NULL)
+-		goto out_free_hist;
++#define DCCP_LI_HIST_IVAL_F_LENGTH  8
+ 
+-	sprintf(slab_name, dccp_li_hist_mask, name);
+-	hist->dccplih_slab = kmem_cache_create(slab_name,
+-					     sizeof(struct dccp_li_hist_entry),
+-					       0, SLAB_HWCACHE_ALIGN,
+-					       NULL, NULL);
+-	if (hist->dccplih_slab == NULL)
+-		goto out_free_slab_name;
+-out:
+-	return hist;
+-out_free_slab_name:
+-	kfree(slab_name);
+-out_free_hist:
+-	kfree(hist);
+-	hist = NULL;
+-	goto out;
+-}
++struct dccp_li_hist_entry {
++	struct list_head dccplih_node;
++	u64		 dccplih_seqno:48,
++			 dccplih_win_count:4;
++	u32		 dccplih_interval;
++};
+ 
+-EXPORT_SYMBOL_GPL(dccp_li_hist_new);
++struct kmem_cache *dccp_li_cachep __read_mostly;
+ 
+-void dccp_li_hist_delete(struct dccp_li_hist *hist)
++static inline struct dccp_li_hist_entry *dccp_li_hist_entry_new(const gfp_t prio)
+ {
+-	const char* name = kmem_cache_name(hist->dccplih_slab);
+-
+-	kmem_cache_destroy(hist->dccplih_slab);
+-	kfree(name);
+-	kfree(hist);
++	return kmem_cache_alloc(dccp_li_cachep, prio);
+ }
+ 
+-EXPORT_SYMBOL_GPL(dccp_li_hist_delete);
++static inline void dccp_li_hist_entry_delete(struct dccp_li_hist_entry *entry)
++{
++	if (entry != NULL)
++		kmem_cache_free(dccp_li_cachep, entry);
++}
+ 
+-void dccp_li_hist_purge(struct dccp_li_hist *hist, struct list_head *list)
++void dccp_li_hist_purge(struct list_head *list)
+ {
+ 	struct dccp_li_hist_entry *entry, *next;
+ 
+ 	list_for_each_entry_safe(entry, next, list, dccplih_node) {
+ 		list_del_init(&entry->dccplih_node);
+-		kmem_cache_free(hist->dccplih_slab, entry);
++		kmem_cache_free(dccp_li_cachep, entry);
+ 	}
+ }
+ 
+@@ -118,16 +98,16 @@
+ 
+ EXPORT_SYMBOL_GPL(dccp_li_hist_calc_i_mean);
+ 
+-int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+-   struct list_head *list, const u64 seq_loss, const u8 win_loss)
++static int dccp_li_hist_interval_new(struct list_head *list,
++				     const u64 seq_loss, const u8 win_loss)
+ {
+ 	struct dccp_li_hist_entry *entry;
+ 	int i;
+ 
+ 	for (i = 0; i < DCCP_LI_HIST_IVAL_F_LENGTH; i++) {
+-		entry = dccp_li_hist_entry_new(hist, GFP_ATOMIC);
++		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
+ 		if (entry == NULL) {
+-			dccp_li_hist_purge(hist, list);
++			dccp_li_hist_purge(list);
+ 			DCCP_BUG("loss interval list entry is NULL");
+ 			return 0;
+ 		}
+@@ -140,4 +120,176 @@
+ 	return 1;
+ }
+ 
+-EXPORT_SYMBOL_GPL(dccp_li_hist_interval_new);
++/* calculate first loss interval
++ *
++ * returns estimated loss interval in usecs */
++static u32 dccp_li_calc_first_li(struct sock *sk,
++				 struct list_head *hist_list,
++				 struct timeval *last_feedback,
++				 u16 s, u32 bytes_recv,
++				 u32 previous_x_recv)
++{
++	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
++	u32 x_recv, p;
++	suseconds_t rtt, delta;
++	struct timeval tstamp = { 0, 0 };
++	int interval = 0;
++	int win_count = 0;
++	int step = 0;
++	u64 fval;
++
++	list_for_each_entry_safe(entry, next, hist_list, dccphrx_node) {
++		if (dccp_rx_hist_entry_data_packet(entry)) {
++			tail = entry;
++
++			switch (step) {
++			case 0:
++				tstamp	  = entry->dccphrx_tstamp;
++				win_count = entry->dccphrx_ccval;
++				step = 1;
++				break;
++			case 1:
++				interval = win_count - entry->dccphrx_ccval;
++				if (interval < 0)
++					interval += TFRC_WIN_COUNT_LIMIT;
++				if (interval > 4)
++					goto found;
++				break;
++			}
++		}
++	}
++
++	if (unlikely(step == 0)) {
++		DCCP_WARN("%s(%p), packet history has no data packets!\n",
++			  dccp_role(sk), sk);
++		return ~0;
++	}
++
++	if (unlikely(interval == 0)) {
++		DCCP_WARN("%s(%p), Could not find a win_count interval > 0."
++			  "Defaulting to 1\n", dccp_role(sk), sk);
++		interval = 1;
++	}
++found:
++	if (!tail) {
++		DCCP_CRIT("tail is null\n");
++		return ~0;
++	}
++
++	delta = timeval_delta(&tstamp, &tail->dccphrx_tstamp);
++	DCCP_BUG_ON(delta < 0);
++
++	rtt = delta * 4 / interval;
++	dccp_pr_debug("%s(%p), approximated RTT to %dus\n",
++		      dccp_role(sk), sk, (int)rtt);
++
++	/*
++	 * Determine the length of the first loss interval via inverse lookup.
++	 * Assume that X_recv can be computed by the throughput equation
++	 *		    s
++	 *	X_recv = --------
++	 *		 R * fval
++	 * Find some p such that f(p) = fval; return 1/p [RFC 3448, 6.3.1].
++	 */
++	if (rtt == 0) {			/* would result in divide-by-zero */
++		DCCP_WARN("RTT==0\n");
++		return ~0;
++	}
++
++	dccp_timestamp(sk, &tstamp);
++	delta = timeval_delta(&tstamp, last_feedback);
++	DCCP_BUG_ON(delta <= 0);
++
++	x_recv = scaled_div32(bytes_recv, delta);
++	if (x_recv == 0) {		/* would also trigger divide-by-zero */
++		DCCP_WARN("X_recv==0\n");
++		if (previous_x_recv == 0) {
++			DCCP_BUG("stored value of X_recv is zero");
++			return ~0;
++		}
++		x_recv = previous_x_recv;
++	}
++
++	fval = scaled_div(s, rtt);
++	fval = scaled_div32(fval, x_recv);
++	p = tfrc_calc_x_reverse_lookup(fval);
++
++	dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
++		      "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
++
++	if (p == 0)
++		return ~0;
++	else
++		return 1000000 / p;
++}
++
++void dccp_li_update_li(struct sock *sk,
++		       struct list_head *li_hist_list,
++		       struct list_head *hist_list,
++		       struct timeval *last_feedback, u16 s, u32 bytes_recv,
++                       u32 previous_x_recv, u64 seq_loss, u8 win_loss)
++{
++	struct dccp_li_hist_entry *head;
++	u64 seq_temp;
++
++	if (list_empty(li_hist_list)) {
++		if (!dccp_li_hist_interval_new(li_hist_list, seq_loss,
++					       win_loss))
++			return;
++
++		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
++				  dccplih_node);
++		head->dccplih_interval = dccp_li_calc_first_li(sk, hist_list,
++							       last_feedback,
++							       s, bytes_recv,
++							       previous_x_recv);
++	} else {
++		struct dccp_li_hist_entry *entry;
++		struct list_head *tail;
++
++		head = list_entry(li_hist_list->next, struct dccp_li_hist_entry,
++				  dccplih_node);
++		/* FIXME win count check removed as was wrong */
++		/* should make this check with receive history */
++		/* and compare there as per section 10.2 of RFC4342 */
++
++		/* new loss event detected */
++		/* calculate last interval length */
++		seq_temp = dccp_delta_seqno(head->dccplih_seqno, seq_loss);
++		entry = dccp_li_hist_entry_new(GFP_ATOMIC);
++
++		if (entry == NULL) {
++			DCCP_BUG("out of memory - can not allocate entry");
++			return;
++		}
++
++		list_add(&entry->dccplih_node, li_hist_list);
++
++		tail = li_hist_list->prev;
++		list_del(tail);
++		kmem_cache_free(dccp_li_cachep, tail);
++
++		/* Create the newest interval */
++		entry->dccplih_seqno = seq_loss;
++		entry->dccplih_interval = seq_temp;
++		entry->dccplih_win_count = win_loss;
++	}
++}
++
++EXPORT_SYMBOL_GPL(dccp_li_update_li);
++
++static __init int dccp_li_init(void)
++{
++	dccp_li_cachep = kmem_cache_create("dccp_li_hist",
++					   sizeof(struct dccp_li_hist_entry),
++					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
++	return dccp_li_cachep == NULL ? -ENOBUFS : 0;
++}
++
++static __exit void dccp_li_exit(void)
++{
++	kmem_cache_destroy(dccp_li_cachep);
++}
++
++module_init(dccp_li_init);
++module_exit(dccp_li_exit);
+diff -Nurb linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h linux-2.6.22-try2/net/dccp/ccids/lib/loss_interval.h
+--- linux-2.6.22-570/net/dccp/ccids/lib/loss_interval.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/dccp/ccids/lib/loss_interval.h	2007-12-19 15:29:23.000000000 -0500
+@@ -3,8 +3,8 @@
+ /*
+  *  net/dccp/ccids/lib/loss_interval.h
+  *
+- *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+- *  Copyright (c) 2005 Ian McDonald <ian.mcdonald@jandi.co.nz>
++ *  Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
++ *  Copyright (c) 2005-7 Ian McDonald <ian.mcdonald@jandi.co.nz>
+  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+  *
+  *  This program is free software; you can redistribute it and/or modify it
+@@ -14,44 +14,16 @@
+  */
+ 
+ #include <linux/list.h>
+-#include <linux/slab.h>
+ #include <linux/time.h>
+ 
+-#define DCCP_LI_HIST_IVAL_F_LENGTH  8
+-
+-struct dccp_li_hist {
+-	struct kmem_cache *dccplih_slab;
+-};
+-
+-extern struct dccp_li_hist *dccp_li_hist_new(const char *name);
+-extern void dccp_li_hist_delete(struct dccp_li_hist *hist);
+-
+-struct dccp_li_hist_entry {
+-	struct list_head dccplih_node;
+-	u64		 dccplih_seqno:48,
+-			 dccplih_win_count:4;
+-	u32		 dccplih_interval;
+-};
+-
+-static inline struct dccp_li_hist_entry *
+-		dccp_li_hist_entry_new(struct dccp_li_hist *hist,
+-				       const gfp_t prio)
+-{
+-	return kmem_cache_alloc(hist->dccplih_slab, prio);
+-}
+-
+-static inline void dccp_li_hist_entry_delete(struct dccp_li_hist *hist,
+-					     struct dccp_li_hist_entry *entry)
+-{
+-	if (entry != NULL)
+-		kmem_cache_free(hist->dccplih_slab, entry);
+-}
+-
+-extern void dccp_li_hist_purge(struct dccp_li_hist *hist,
+-			       struct list_head *list);
++extern void dccp_li_hist_purge(struct list_head *list);
+ 
+ extern u32 dccp_li_hist_calc_i_mean(struct list_head *list);
+ 
+-extern int dccp_li_hist_interval_new(struct dccp_li_hist *hist,
+-   struct list_head *list, const u64 seq_loss, const u8 win_loss);
++extern void dccp_li_update_li(struct sock *sk,
++			      struct list_head *li_hist_list,
++			      struct list_head *hist_list,
++			      struct timeval *last_feedback, u16 s,
++			      u32 bytes_recv, u32 previous_x_recv,
++			      u64 seq_loss, u8 win_loss);
+ #endif /* _DCCP_LI_HIST_ */
+diff -Nurb linux-2.6.22-570/net/dccp/dccp.h linux-2.6.22-try2/net/dccp/dccp.h
+--- linux-2.6.22-570/net/dccp/dccp.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/dccp/dccp.h	2007-12-19 15:29:23.000000000 -0500
+@@ -184,7 +184,7 @@
+ /*
+  * 	Checksumming routines
+  */
+-static inline int dccp_csum_coverage(const struct sk_buff *skb)
++static inline unsigned int dccp_csum_coverage(const struct sk_buff *skb)
+ {
+ 	const struct dccp_hdr* dh = dccp_hdr(skb);
+ 
+@@ -195,7 +195,7 @@
+ 
+ static inline void dccp_csum_outgoing(struct sk_buff *skb)
+ {
+-	int cov = dccp_csum_coverage(skb);
++	unsigned int cov = dccp_csum_coverage(skb);
+ 
+ 	if (cov >= skb->len)
+ 		dccp_hdr(skb)->dccph_cscov = 0;
+diff -Nurb linux-2.6.22-570/net/ipv4/Kconfig linux-2.6.22-try2/net/ipv4/Kconfig
+--- linux-2.6.22-570/net/ipv4/Kconfig	2007-12-12 18:08:39.000000000 -0500
++++ linux-2.6.22-try2/net/ipv4/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -116,48 +116,6 @@
+ 	  equal "cost" and chooses one of them in a non-deterministic fashion
+ 	  if a matching packet arrives.
+ 
+-config IP_ROUTE_MULTIPATH_CACHED
+-	bool "IP: equal cost multipath with caching support (EXPERIMENTAL)"
+-	depends on IP_ROUTE_MULTIPATH
+-	help
+-	  Normally, equal cost multipath routing is not supported by the
+-	  routing cache. If you say Y here, alternative routes are cached
+-	  and on cache lookup a route is chosen in a configurable fashion.
+-
+-	  If unsure, say N.
+-
+-config IP_ROUTE_MULTIPATH_RR
+-	tristate "MULTIPATH: round robin algorithm"
+-	depends on IP_ROUTE_MULTIPATH_CACHED
+-	help
+-	  Multipath routes are chosen according to Round Robin
+-
+-config IP_ROUTE_MULTIPATH_RANDOM
+-	tristate "MULTIPATH: random algorithm"
+-	depends on IP_ROUTE_MULTIPATH_CACHED
+-	help
+-	  Multipath routes are chosen in a random fashion. Actually,
+-	  there is no weight for a route. The advantage of this policy
+-	  is that it is implemented stateless and therefore introduces only
+-	  a very small delay.
+-
+-config IP_ROUTE_MULTIPATH_WRANDOM
+-	tristate "MULTIPATH: weighted random algorithm"
+-	depends on IP_ROUTE_MULTIPATH_CACHED
+-	help
+-	  Multipath routes are chosen in a weighted random fashion. 
+-	  The per route weights are the weights visible via ip route 2. As the
+-	  corresponding state management introduces some overhead routing delay
+-	  is increased.
+-
+-config IP_ROUTE_MULTIPATH_DRR
+-	tristate "MULTIPATH: interface round robin algorithm"
+-	depends on IP_ROUTE_MULTIPATH_CACHED
+-	help
+-	  Connections are distributed in a round robin fashion over the
+-	  available interfaces. This policy makes sense if the connections 
+-	  should be primarily distributed on interfaces and not on routes. 
+-
+ config IP_ROUTE_VERBOSE
+ 	bool "IP: verbose route monitoring"
+ 	depends on IP_ADVANCED_ROUTER
+diff -Nurb linux-2.6.22-570/net/ipv4/Makefile linux-2.6.22-try2/net/ipv4/Makefile
+--- linux-2.6.22-570/net/ipv4/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -29,14 +29,9 @@
+ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
+ obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
+ obj-$(CONFIG_IP_PNP) += ipconfig.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RR) += multipath_rr.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_RANDOM) += multipath_random.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o
+ obj-$(CONFIG_NETFILTER)	+= netfilter.o netfilter/
+ obj-$(CONFIG_IP_VS) += ipvs/
+ obj-$(CONFIG_INET_DIAG) += inet_diag.o 
+-obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o
+ obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o
+ obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o
+diff -Nurb linux-2.6.22-570/net/ipv4/af_inet.c linux-2.6.22-try2/net/ipv4/af_inet.c
+--- linux-2.6.22-570/net/ipv4/af_inet.c	2007-12-12 18:08:39.000000000 -0500
++++ linux-2.6.22-try2/net/ipv4/af_inet.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1183,6 +1183,9 @@
+ 	int ihl;
+ 	int id;
+ 
++	if (!(features & NETIF_F_V4_CSUM))
++		features &= ~NETIF_F_SG;
++
+ 	if (unlikely(skb_shinfo(skb)->gso_type &
+ 		     ~(SKB_GSO_TCPV4 |
+ 		       SKB_GSO_UDP |
+diff -Nurb linux-2.6.22-570/net/ipv4/ah4.c linux-2.6.22-try2/net/ipv4/ah4.c
+--- linux-2.6.22-570/net/ipv4/ah4.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv4/ah4.c	2007-12-19 15:29:23.000000000 -0500
+@@ -339,3 +339,4 @@
+ module_init(ah4_init);
+ module_exit(ah4_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
+diff -Nurb linux-2.6.22-570/net/ipv4/esp4.c linux-2.6.22-try2/net/ipv4/esp4.c
+--- linux-2.6.22-570/net/ipv4/esp4.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/esp4.c	2007-12-19 15:29:23.000000000 -0500
+@@ -481,3 +481,4 @@
+ module_init(esp4_init);
+ module_exit(esp4_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_frontend.c linux-2.6.22-try2/net/ipv4/fib_frontend.c
+--- linux-2.6.22-570/net/ipv4/fib_frontend.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/fib_frontend.c	2007-12-19 15:29:23.000000000 -0500
+@@ -453,7 +453,6 @@
+ 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+ 	[RTA_PROTOINFO]		= { .type = NLA_U32 },
+ 	[RTA_FLOW]		= { .type = NLA_U32 },
+-	[RTA_MP_ALGO]		= { .type = NLA_U32 },
+ };
+ 
+ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
+@@ -515,9 +514,6 @@
+ 		case RTA_FLOW:
+ 			cfg->fc_flow = nla_get_u32(attr);
+ 			break;
+-		case RTA_MP_ALGO:
+-			cfg->fc_mp_alg = nla_get_u32(attr);
+-			break;
+ 		case RTA_TABLE:
+ 			cfg->fc_table = nla_get_u32(attr);
+ 			break;
+diff -Nurb linux-2.6.22-570/net/ipv4/fib_semantics.c linux-2.6.22-try2/net/ipv4/fib_semantics.c
+--- linux-2.6.22-570/net/ipv4/fib_semantics.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/fib_semantics.c	2007-12-19 15:29:23.000000000 -0500
+@@ -42,7 +42,6 @@
+ #include <net/tcp.h>
+ #include <net/sock.h>
+ #include <net/ip_fib.h>
+-#include <net/ip_mp_alg.h>
+ #include <net/netlink.h>
+ #include <net/nexthop.h>
+ 
+@@ -697,13 +696,6 @@
+ 			goto err_inval;
+ 	}
+ #endif
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	if (cfg->fc_mp_alg) {
+-		if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
+-		    cfg->fc_mp_alg > IP_MP_ALG_MAX)
+-			goto err_inval;
+-	}
+-#endif
+ 
+ 	err = -ENOBUFS;
+ 	if (fib_info_cnt >= fib_hash_size) {
+@@ -791,10 +783,6 @@
+ #endif
+ 	}
+ 
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	fi->fib_mp_alg = cfg->fc_mp_alg;
+-#endif
+-
+ 	if (fib_props[cfg->fc_type].error) {
+ 		if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
+ 			goto err_inval;
+@@ -940,10 +928,6 @@
+ 	res->type = fa->fa_type;
+ 	res->scope = fa->fa_scope;
+ 	res->fi = fa->fa_info;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	res->netmask = mask;
+-	res->network = zone & inet_make_mask(prefixlen);
+-#endif
+ 	atomic_inc(&res->fi->fib_clntref);
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/ip_output.c linux-2.6.22-try2/net/ipv4/ip_output.c
+--- linux-2.6.22-570/net/ipv4/ip_output.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/ip_output.c	2007-12-19 15:29:23.000000000 -0500
+@@ -837,7 +837,7 @@
+ 	 */
+ 	if (transhdrlen &&
+ 	    length + fragheaderlen <= mtu &&
+-	    rt->u.dst.dev->features & NETIF_F_ALL_CSUM &&
++	    rt->u.dst.dev->features & NETIF_F_V4_CSUM &&
+ 	    !exthdrlen)
+ 		csummode = CHECKSUM_PARTIAL;
+ 
+diff -Nurb linux-2.6.22-570/net/ipv4/ipcomp.c linux-2.6.22-try2/net/ipv4/ipcomp.c
+--- linux-2.6.22-570/net/ipv4/ipcomp.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv4/ipcomp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -486,3 +486,4 @@
+ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
+ MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
+ 
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath.c linux-2.6.22-try2/net/ipv4/multipath.c
+--- linux-2.6.22-570/net/ipv4/multipath.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/multipath.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,55 +0,0 @@
+-/* multipath.c: IPV4 multipath algorithm support.
+- *
+- * Copyright (C) 2004, 2005 Einar Lueck <elueck@de.ibm.com>
+- * Copyright (C) 2005 David S. Miller <davem@davemloft.net>
+- */
+-
+-#include <linux/module.h>
+-#include <linux/errno.h>
+-#include <linux/netdevice.h>
+-#include <linux/spinlock.h>
+-
+-#include <net/ip_mp_alg.h>
+-
+-static DEFINE_SPINLOCK(alg_table_lock);
+-struct ip_mp_alg_ops *ip_mp_alg_table[IP_MP_ALG_MAX + 1];
+-
+-int multipath_alg_register(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+-{
+-	struct ip_mp_alg_ops **slot;
+-	int err;
+-
+-	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX ||
+-	    !ops->mp_alg_select_route)
+-		return -EINVAL;
+-
+-	spin_lock(&alg_table_lock);
+-	slot = &ip_mp_alg_table[n];
+-	if (*slot != NULL) {
+-		err = -EBUSY;
+-	} else {
+-		*slot = ops;
+-		err = 0;
+-	}
+-	spin_unlock(&alg_table_lock);
+-
+-	return err;
+-}
+-EXPORT_SYMBOL(multipath_alg_register);
+-
+-void multipath_alg_unregister(struct ip_mp_alg_ops *ops, enum ip_mp_alg n)
+-{
+-	struct ip_mp_alg_ops **slot;
+-
+-	if (n < IP_MP_ALG_NONE || n > IP_MP_ALG_MAX)
+-		return;
+-
+-	spin_lock(&alg_table_lock);
+-	slot = &ip_mp_alg_table[n];
+-	if (*slot == ops)
+-		*slot = NULL;
+-	spin_unlock(&alg_table_lock);
+-
+-	synchronize_net();
+-}
+-EXPORT_SYMBOL(multipath_alg_unregister);
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_drr.c linux-2.6.22-try2/net/ipv4/multipath_drr.c
+--- linux-2.6.22-570/net/ipv4/multipath_drr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/multipath_drr.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,249 +0,0 @@
+-/*
+- *              Device round robin policy for multipath.
+- *
+- *
+- * Version:	$Id: multipath_drr.c,v 1.1.2.1 2004/09/16 07:42:34 elueck Exp $
+- *
+- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-struct multipath_device {
+-	int		ifi; /* interface index of device */
+-	atomic_t	usecount;
+-	int 		allocated;
+-};
+-
+-#define MULTIPATH_MAX_DEVICECANDIDATES 10
+-
+-static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES];
+-static DEFINE_SPINLOCK(state_lock);
+-
+-static int inline __multipath_findslot(void)
+-{
+-	int i;
+-
+-	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+-		if (state[i].allocated == 0)
+-			return i;
+-	}
+-	return -1;
+-}
+-
+-static int inline __multipath_finddev(int ifindex)
+-{
+-	int i;
+-
+-	for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++) {
+-		if (state[i].allocated != 0 &&
+-		    state[i].ifi == ifindex)
+-			return i;
+-	}
+-	return -1;
+-}
+-
+-static int drr_dev_event(struct notifier_block *this,
+-			 unsigned long event, void *ptr)
+-{
+-	struct net_device *dev = ptr;
+-	int devidx;
+-
+-	switch (event) {
+-	case NETDEV_UNREGISTER:
+-	case NETDEV_DOWN:
+-		spin_lock_bh(&state_lock);
+-
+-		devidx = __multipath_finddev(dev->ifindex);
+-		if (devidx != -1) {
+-			state[devidx].allocated = 0;
+-			state[devidx].ifi = 0;
+-			atomic_set(&state[devidx].usecount, 0);
+-		}
+-
+-		spin_unlock_bh(&state_lock);
+-		break;
+-	}
+-
+-	return NOTIFY_DONE;
+-}
+-
+-static struct notifier_block drr_dev_notifier = {
+-	.notifier_call	= drr_dev_event,
+-};
+-
+-
+-static void drr_safe_inc(atomic_t *usecount)
+-{
+-	int n;
+-
+-	atomic_inc(usecount);
+-
+-	n = atomic_read(usecount);
+-	if (n <= 0) {
+-		int i;
+-
+-		spin_lock_bh(&state_lock);
+-
+-		for (i = 0; i < MULTIPATH_MAX_DEVICECANDIDATES; i++)
+-			atomic_set(&state[i].usecount, 0);
+-
+-		spin_unlock_bh(&state_lock);
+-	}
+-}
+-
+-static void drr_select_route(const struct flowi *flp,
+-			     struct rtable *first, struct rtable **rp)
+-{
+-	struct rtable *nh, *result, *cur_min;
+-	int min_usecount = -1;
+-	int devidx = -1;
+-	int cur_min_devidx = -1;
+-
+-	/* 1. make sure all alt. nexthops have the same GC related data */
+-	/* 2. determine the new candidate to be returned */
+-	result = NULL;
+-	cur_min = NULL;
+-	for (nh = rcu_dereference(first); nh;
+-	     nh = rcu_dereference(nh->u.dst.rt_next)) {
+-		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+-		    multipath_comparekeys(&nh->fl, flp)) {
+-			int nh_ifidx = nh->u.dst.dev->ifindex;
+-
+-			nh->u.dst.lastuse = jiffies;
+-			nh->u.dst.__use++;
+-			if (result != NULL)
+-				continue;
+-
+-			/* search for the output interface */
+-
+-			/* this is not SMP safe, only add/remove are
+-			 * SMP safe as wrong usecount updates have no big
+-			 * impact
+-			 */
+-			devidx = __multipath_finddev(nh_ifidx);
+-			if (devidx == -1) {
+-				/* add the interface to the array
+-				 * SMP safe
+-				 */
+-				spin_lock_bh(&state_lock);
+-
+-				/* due to SMP: search again */
+-				devidx = __multipath_finddev(nh_ifidx);
+-				if (devidx == -1) {
+-					/* add entry for device */
+-					devidx = __multipath_findslot();
+-					if (devidx == -1) {
+-						/* unlikely but possible */
+-						continue;
+-					}
+-
+-					state[devidx].allocated = 1;
+-					state[devidx].ifi = nh_ifidx;
+-					atomic_set(&state[devidx].usecount, 0);
+-					min_usecount = 0;
+-				}
+-
+-				spin_unlock_bh(&state_lock);
+-			}
+-
+-			if (min_usecount == 0) {
+-				/* if the device has not been used it is
+-				 * the primary target
+-				 */
+-				drr_safe_inc(&state[devidx].usecount);
+-				result = nh;
+-			} else {
+-				int count =
+-					atomic_read(&state[devidx].usecount);
+-
+-				if (min_usecount == -1 ||
+-				    count < min_usecount) {
+-					cur_min = nh;
+-					cur_min_devidx = devidx;
+-					min_usecount = count;
+-				}
+-			}
+-		}
+-	}
+-
+-	if (!result) {
+-		if (cur_min) {
+-			drr_safe_inc(&state[cur_min_devidx].usecount);
+-			result = cur_min;
+-		} else {
+-			result = first;
+-		}
+-	}
+-
+-	*rp = result;
+-}
+-
+-static struct ip_mp_alg_ops drr_ops = {
+-	.mp_alg_select_route	=	drr_select_route,
+-};
+-
+-static int __init drr_init(void)
+-{
+-	int err = register_netdevice_notifier(&drr_dev_notifier);
+-
+-	if (err)
+-		return err;
+-
+-	err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR);
+-	if (err)
+-		goto fail;
+-
+-	return 0;
+-
+-fail:
+-	unregister_netdevice_notifier(&drr_dev_notifier);
+-	return err;
+-}
+-
+-static void __exit drr_exit(void)
+-{
+-	unregister_netdevice_notifier(&drr_dev_notifier);
+-	multipath_alg_unregister(&drr_ops, IP_MP_ALG_DRR);
+-}
+-
+-module_init(drr_init);
+-module_exit(drr_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_random.c linux-2.6.22-try2/net/ipv4/multipath_random.c
+--- linux-2.6.22-570/net/ipv4/multipath_random.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/multipath_random.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,114 +0,0 @@
+-/*
+- *              Random policy for multipath.
+- *
+- *
+- * Version:	$Id: multipath_random.c,v 1.1.2.3 2004/09/21 08:42:11 elueck Exp $
+- *
+- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <linux/random.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-#define MULTIPATH_MAX_CANDIDATES 40
+-
+-static void random_select_route(const struct flowi *flp,
+-				struct rtable *first,
+-				struct rtable **rp)
+-{
+-	struct rtable *rt;
+-	struct rtable *decision;
+-	unsigned char candidate_count = 0;
+-
+-	/* count all candidate */
+-	for (rt = rcu_dereference(first); rt;
+-	     rt = rcu_dereference(rt->u.dst.rt_next)) {
+-		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+-		    multipath_comparekeys(&rt->fl, flp))
+-			++candidate_count;
+-	}
+-
+-	/* choose a random candidate */
+-	decision = first;
+-	if (candidate_count > 1) {
+-		unsigned char i = 0;
+-		unsigned char candidate_no = (unsigned char)
+-			(random32() % candidate_count);
+-
+-		/* find chosen candidate and adjust GC data for all candidates
+-		 * to ensure they stay in cache
+-		 */
+-		for (rt = first; rt; rt = rt->u.dst.rt_next) {
+-			if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+-			    multipath_comparekeys(&rt->fl, flp)) {
+-				rt->u.dst.lastuse = jiffies;
+-
+-				if (i == candidate_no)
+-					decision = rt;
+-
+-				if (i >= candidate_count)
+-					break;
+-
+-				i++;
+-			}
+-		}
+-	}
+-
+-	decision->u.dst.__use++;
+-	*rp = decision;
+-}
+-
+-static struct ip_mp_alg_ops random_ops = {
+-	.mp_alg_select_route	=	random_select_route,
+-};
+-
+-static int __init random_init(void)
+-{
+-	return multipath_alg_register(&random_ops, IP_MP_ALG_RANDOM);
+-}
+-
+-static void __exit random_exit(void)
+-{
+-	multipath_alg_unregister(&random_ops, IP_MP_ALG_RANDOM);
+-}
+-
+-module_init(random_init);
+-module_exit(random_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_rr.c linux-2.6.22-try2/net/ipv4/multipath_rr.c
+--- linux-2.6.22-570/net/ipv4/multipath_rr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/multipath_rr.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,95 +0,0 @@
+-/*
+- *              Round robin policy for multipath.
+- *
+- *
+- * Version:	$Id: multipath_rr.c,v 1.1.2.2 2004/09/16 07:42:34 elueck Exp $
+- *
+- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_mp_alg.h>
+-
+-static void rr_select_route(const struct flowi *flp,
+-			    struct rtable *first, struct rtable **rp)
+-{
+-	struct rtable *nh, *result, *min_use_cand = NULL;
+-	int min_use = -1;
+-
+-	/* 1. make sure all alt. nexthops have the same GC related data
+-	 * 2. determine the new candidate to be returned
+-	 */
+-	result = NULL;
+-	for (nh = rcu_dereference(first); nh;
+-	     nh = rcu_dereference(nh->u.dst.rt_next)) {
+-		if ((nh->u.dst.flags & DST_BALANCED) != 0 &&
+-		    multipath_comparekeys(&nh->fl, flp)) {
+-			nh->u.dst.lastuse = jiffies;
+-
+-			if (min_use == -1 || nh->u.dst.__use < min_use) {
+-				min_use = nh->u.dst.__use;
+-				min_use_cand = nh;
+-			}
+-		}
+-	}
+-	result = min_use_cand;
+-	if (!result)
+-		result = first;
+-
+-	result->u.dst.__use++;
+-	*rp = result;
+-}
+-
+-static struct ip_mp_alg_ops rr_ops = {
+-	.mp_alg_select_route	=	rr_select_route,
+-};
+-
+-static int __init rr_init(void)
+-{
+-	return multipath_alg_register(&rr_ops, IP_MP_ALG_RR);
+-}
+-
+-static void __exit rr_exit(void)
+-{
+-	multipath_alg_unregister(&rr_ops, IP_MP_ALG_RR);
+-}
+-
+-module_init(rr_init);
+-module_exit(rr_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/multipath_wrandom.c linux-2.6.22-try2/net/ipv4/multipath_wrandom.c
+--- linux-2.6.22-570/net/ipv4/multipath_wrandom.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/multipath_wrandom.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,329 +0,0 @@
+-/*
+- *              Weighted random policy for multipath.
+- *
+- *
+- * Version:	$Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $
+- *
+- * Authors:	Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de>
+- *
+- *		This program is free software; you can redistribute it and/or
+- *		modify it under the terms of the GNU General Public License
+- *		as published by the Free Software Foundation; either version
+- *		2 of the License, or (at your option) any later version.
+- */
+-
+-#include <asm/system.h>
+-#include <asm/uaccess.h>
+-#include <linux/types.h>
+-#include <linux/errno.h>
+-#include <linux/timer.h>
+-#include <linux/mm.h>
+-#include <linux/kernel.h>
+-#include <linux/fcntl.h>
+-#include <linux/stat.h>
+-#include <linux/socket.h>
+-#include <linux/in.h>
+-#include <linux/inet.h>
+-#include <linux/netdevice.h>
+-#include <linux/inetdevice.h>
+-#include <linux/igmp.h>
+-#include <linux/proc_fs.h>
+-#include <linux/seq_file.h>
+-#include <linux/module.h>
+-#include <linux/mroute.h>
+-#include <linux/init.h>
+-#include <linux/random.h>
+-#include <net/ip.h>
+-#include <net/protocol.h>
+-#include <linux/skbuff.h>
+-#include <net/sock.h>
+-#include <net/icmp.h>
+-#include <net/udp.h>
+-#include <net/raw.h>
+-#include <linux/notifier.h>
+-#include <linux/if_arp.h>
+-#include <linux/netfilter_ipv4.h>
+-#include <net/ipip.h>
+-#include <net/checksum.h>
+-#include <net/ip_fib.h>
+-#include <net/ip_mp_alg.h>
+-
+-#define MULTIPATH_STATE_SIZE 15
+-
+-struct multipath_candidate {
+-	struct multipath_candidate	*next;
+-	int				power;
+-	struct rtable			*rt;
+-};
+-
+-struct multipath_dest {
+-	struct list_head	list;
+-
+-	const struct fib_nh	*nh_info;
+-	__be32			netmask;
+-	__be32			network;
+-	unsigned char		prefixlen;
+-
+-	struct rcu_head		rcu;
+-};
+-
+-struct multipath_bucket {
+-	struct list_head	head;
+-	spinlock_t		lock;
+-};
+-
+-struct multipath_route {
+-	struct list_head	list;
+-
+-	int			oif;
+-	__be32			gw;
+-	struct list_head	dests;
+-
+-	struct rcu_head		rcu;
+-};
+-
+-/* state: primarily weight per route information */
+-static struct multipath_bucket state[MULTIPATH_STATE_SIZE];
+-
+-static unsigned char __multipath_lookup_weight(const struct flowi *fl,
+-					       const struct rtable *rt)
+-{
+-	const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE;
+-	struct multipath_route *r;
+-	struct multipath_route *target_route = NULL;
+-	struct multipath_dest *d;
+-	int weight = 1;
+-
+-	/* lookup the weight information for a certain route */
+-	rcu_read_lock();
+-
+-	/* find state entry for gateway or add one if necessary */
+-	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+-		if (r->gw == rt->rt_gateway &&
+-		    r->oif == rt->idev->dev->ifindex) {
+-			target_route = r;
+-			break;
+-		}
+-	}
+-
+-	if (!target_route) {
+-		/* this should not happen... but we are prepared */
+-		printk( KERN_CRIT"%s: missing state for gateway: %u and " \
+-			"device %d\n", __FUNCTION__, rt->rt_gateway,
+-			rt->idev->dev->ifindex);
+-		goto out;
+-	}
+-
+-	/* find state entry for destination */
+-	list_for_each_entry_rcu(d, &target_route->dests, list) {
+-		__be32 targetnetwork = fl->fl4_dst &
+-			inet_make_mask(d->prefixlen);
+-
+-		if ((targetnetwork & d->netmask) == d->network) {
+-			weight = d->nh_info->nh_weight;
+-			goto out;
+-		}
+-	}
+-
+-out:
+-	rcu_read_unlock();
+-	return weight;
+-}
+-
+-static void wrandom_init_state(void)
+-{
+-	int i;
+-
+-	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+-		INIT_LIST_HEAD(&state[i].head);
+-		spin_lock_init(&state[i].lock);
+-	}
+-}
+-
+-static void wrandom_select_route(const struct flowi *flp,
+-				 struct rtable *first,
+-				 struct rtable **rp)
+-{
+-	struct rtable *rt;
+-	struct rtable *decision;
+-	struct multipath_candidate *first_mpc = NULL;
+-	struct multipath_candidate *mpc, *last_mpc = NULL;
+-	int power = 0;
+-	int last_power;
+-	int selector;
+-	const size_t size_mpc = sizeof(struct multipath_candidate);
+-
+-	/* collect all candidates and identify their weights */
+-	for (rt = rcu_dereference(first); rt;
+-	     rt = rcu_dereference(rt->u.dst.rt_next)) {
+-		if ((rt->u.dst.flags & DST_BALANCED) != 0 &&
+-		    multipath_comparekeys(&rt->fl, flp)) {
+-			struct multipath_candidate* mpc =
+-				(struct multipath_candidate*)
+-				kmalloc(size_mpc, GFP_ATOMIC);
+-
+-			if (!mpc)
+-				return;
+-
+-			power += __multipath_lookup_weight(flp, rt) * 10000;
+-
+-			mpc->power = power;
+-			mpc->rt = rt;
+-			mpc->next = NULL;
+-
+-			if (!first_mpc)
+-				first_mpc = mpc;
+-			else
+-				last_mpc->next = mpc;
+-
+-			last_mpc = mpc;
+-		}
+-	}
+-
+-	/* choose a weighted random candidate */
+-	decision = first;
+-	selector = random32() % power;
+-	last_power = 0;
+-
+-	/* select candidate, adjust GC data and cleanup local state */
+-	decision = first;
+-	last_mpc = NULL;
+-	for (mpc = first_mpc; mpc; mpc = mpc->next) {
+-		mpc->rt->u.dst.lastuse = jiffies;
+-		if (last_power <= selector && selector < mpc->power)
+-			decision = mpc->rt;
+-
+-		last_power = mpc->power;
+-		kfree(last_mpc);
+-		last_mpc = mpc;
+-	}
+-
+-	/* concurrent __multipath_flush may lead to !last_mpc */
+-	kfree(last_mpc);
+-
+-	decision->u.dst.__use++;
+-	*rp = decision;
+-}
+-
+-static void wrandom_set_nhinfo(__be32 network,
+-			       __be32 netmask,
+-			       unsigned char prefixlen,
+-			       const struct fib_nh *nh)
+-{
+-	const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE;
+-	struct multipath_route *r, *target_route = NULL;
+-	struct multipath_dest *d, *target_dest = NULL;
+-
+-	/* store the weight information for a certain route */
+-	spin_lock_bh(&state[state_idx].lock);
+-
+-	/* find state entry for gateway or add one if necessary */
+-	list_for_each_entry_rcu(r, &state[state_idx].head, list) {
+-		if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) {
+-			target_route = r;
+-			break;
+-		}
+-	}
+-
+-	if (!target_route) {
+-		const size_t size_rt = sizeof(struct multipath_route);
+-		target_route = (struct multipath_route *)
+-			kmalloc(size_rt, GFP_ATOMIC);
+-
+-		target_route->gw = nh->nh_gw;
+-		target_route->oif = nh->nh_oif;
+-		memset(&target_route->rcu, 0, sizeof(struct rcu_head));
+-		INIT_LIST_HEAD(&target_route->dests);
+-
+-		list_add_rcu(&target_route->list, &state[state_idx].head);
+-	}
+-
+-	/* find state entry for destination or add one if necessary */
+-	list_for_each_entry_rcu(d, &target_route->dests, list) {
+-		if (d->nh_info == nh) {
+-			target_dest = d;
+-			break;
+-		}
+-	}
+-
+-	if (!target_dest) {
+-		const size_t size_dst = sizeof(struct multipath_dest);
+-		target_dest = (struct multipath_dest*)
+-			kmalloc(size_dst, GFP_ATOMIC);
+-
+-		target_dest->nh_info = nh;
+-		target_dest->network = network;
+-		target_dest->netmask = netmask;
+-		target_dest->prefixlen = prefixlen;
+-		memset(&target_dest->rcu, 0, sizeof(struct rcu_head));
+-
+-		list_add_rcu(&target_dest->list, &target_route->dests);
+-	}
+-	/* else: we already stored this info for another destination =>
+-	 * we are finished
+-	 */
+-
+-	spin_unlock_bh(&state[state_idx].lock);
+-}
+-
+-static void __multipath_free(struct rcu_head *head)
+-{
+-	struct multipath_route *rt = container_of(head, struct multipath_route,
+-						  rcu);
+-	kfree(rt);
+-}
+-
+-static void __multipath_free_dst(struct rcu_head *head)
+-{
+-	struct multipath_dest *dst = container_of(head,
+-						  struct multipath_dest,
+-						  rcu);
+-	kfree(dst);
+-}
+-
+-static void wrandom_flush(void)
+-{
+-	int i;
+-
+-	/* defere delete to all entries */
+-	for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) {
+-		struct multipath_route *r;
+-
+-		spin_lock_bh(&state[i].lock);
+-		list_for_each_entry_rcu(r, &state[i].head, list) {
+-			struct multipath_dest *d;
+-			list_for_each_entry_rcu(d, &r->dests, list) {
+-				list_del_rcu(&d->list);
+-				call_rcu(&d->rcu,
+-					 __multipath_free_dst);
+-			}
+-			list_del_rcu(&r->list);
+-			call_rcu(&r->rcu,
+-				 __multipath_free);
+-		}
+-
+-		spin_unlock_bh(&state[i].lock);
+-	}
+-}
+-
+-static struct ip_mp_alg_ops wrandom_ops = {
+-	.mp_alg_select_route	=	wrandom_select_route,
+-	.mp_alg_flush		=	wrandom_flush,
+-	.mp_alg_set_nhinfo	=	wrandom_set_nhinfo,
+-};
+-
+-static int __init wrandom_init(void)
+-{
+-	wrandom_init_state();
+-
+-	return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM);
+-}
+-
+-static void __exit wrandom_exit(void)
+-{
+-	multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM);
+-}
+-
+-module_init(wrandom_init);
+-module_exit(wrandom_exit);
+-MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_helper.c
+--- linux-2.6.22-570/net/ipv4/netfilter/nf_nat_helper.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_helper.c	2007-12-19 15:29:23.000000000 -0500
+@@ -178,7 +178,7 @@
+ 	datalen = (*pskb)->len - iph->ihl*4;
+ 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ 		if (!(rt->rt_flags & RTCF_LOCAL) &&
+-		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
++		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
+ 			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ 			(*pskb)->csum_start = skb_headroom(*pskb) +
+ 					      skb_network_offset(*pskb) +
+@@ -265,7 +265,7 @@
+ 
+ 	if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) {
+ 		if (!(rt->rt_flags & RTCF_LOCAL) &&
+-		    (*pskb)->dev->features & NETIF_F_ALL_CSUM) {
++		    (*pskb)->dev->features & NETIF_F_V4_CSUM) {
+ 			(*pskb)->ip_summed = CHECKSUM_PARTIAL;
+ 			(*pskb)->csum_start = skb_headroom(*pskb) +
+ 					      skb_network_offset(*pskb) +
+diff -Nurb linux-2.6.22-570/net/ipv4/route.c linux-2.6.22-try2/net/ipv4/route.c
+--- linux-2.6.22-570/net/ipv4/route.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/route.c	2007-12-19 15:29:23.000000000 -0500
+@@ -101,7 +101,6 @@
+ #include <net/tcp.h>
+ #include <net/icmp.h>
+ #include <net/xfrm.h>
+-#include <net/ip_mp_alg.h>
+ #include <net/netevent.h>
+ #include <net/rtnetlink.h>
+ #ifdef CONFIG_SYSCTL
+@@ -495,13 +494,11 @@
+ 
+ static __inline__ void rt_free(struct rtable *rt)
+ {
+-	multipath_remove(rt);
+ 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+ }
+ 
+ static __inline__ void rt_drop(struct rtable *rt)
+ {
+-	multipath_remove(rt);
+ 	ip_rt_put(rt);
+ 	call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free);
+ }
+@@ -574,52 +571,6 @@
+ 		(fl1->iif ^ fl2->iif)) == 0;
+ }
+ 
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
+-						struct rtable *expentry,
+-						int *removed_count)
+-{
+-	int passedexpired = 0;
+-	struct rtable **nextstep = NULL;
+-	struct rtable **rthp = chain_head;
+-	struct rtable *rth;
+-
+-	if (removed_count)
+-		*removed_count = 0;
+-
+-	while ((rth = *rthp) != NULL) {
+-		if (rth == expentry)
+-			passedexpired = 1;
+-
+-		if (((*rthp)->u.dst.flags & DST_BALANCED) != 0  &&
+-		    compare_keys(&(*rthp)->fl, &expentry->fl)) {
+-			if (*rthp == expentry) {
+-				*rthp = rth->u.dst.rt_next;
+-				continue;
+-			} else {
+-				*rthp = rth->u.dst.rt_next;
+-				rt_free(rth);
+-				if (removed_count)
+-					++(*removed_count);
+-			}
+-		} else {
+-			if (!((*rthp)->u.dst.flags & DST_BALANCED) &&
+-			    passedexpired && !nextstep)
+-				nextstep = &rth->u.dst.rt_next;
+-
+-			rthp = &rth->u.dst.rt_next;
+-		}
+-	}
+-
+-	rt_free(expentry);
+-	if (removed_count)
+-		++(*removed_count);
+-
+-	return nextstep;
+-}
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-
+-
+ /* This runs via a timer and thus is always in BH context. */
+ static void rt_check_expire(unsigned long dummy)
+ {
+@@ -658,22 +609,8 @@
+ 			}
+ 
+ 			/* Cleanup aged off entries. */
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-			/* remove all related balanced entries if necessary */
+-			if (rth->u.dst.flags & DST_BALANCED) {
+-				rthp = rt_remove_balanced_route(
+-					&rt_hash_table[i].chain,
+-					rth, NULL);
+-				if (!rthp)
+-					break;
+-			} else {
+-				*rthp = rth->u.dst.rt_next;
+-				rt_free(rth);
+-			}
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+ 			*rthp = rth->u.dst.rt_next;
+ 			rt_free(rth);
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+ 		}
+ 		spin_unlock(rt_hash_lock_addr(i));
+ 
+@@ -721,9 +658,6 @@
+ 	if (delay < 0)
+ 		delay = ip_rt_min_delay;
+ 
+-	/* flush existing multipath state*/
+-	multipath_flush();
+-
+ 	spin_lock_bh(&rt_flush_lock);
+ 
+ 	if (del_timer(&rt_flush_timer) && delay > 0 && rt_deadline) {
+@@ -842,30 +776,9 @@
+ 					rthp = &rth->u.dst.rt_next;
+ 					continue;
+ 				}
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-				/* remove all related balanced entries
+-				 * if necessary
+-				 */
+-				if (rth->u.dst.flags & DST_BALANCED) {
+-					int r;
+-
+-					rthp = rt_remove_balanced_route(
+-						&rt_hash_table[k].chain,
+-						rth,
+-						&r);
+-					goal -= r;
+-					if (!rthp)
+-						break;
+-				} else {
+-					*rthp = rth->u.dst.rt_next;
+-					rt_free(rth);
+-					goal--;
+-				}
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+ 				*rthp = rth->u.dst.rt_next;
+ 				rt_free(rth);
+ 				goal--;
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+ 			}
+ 			spin_unlock_bh(rt_hash_lock_addr(k));
+ 			if (goal <= 0)
+@@ -939,12 +852,7 @@
+ 
+ 	spin_lock_bh(rt_hash_lock_addr(hash));
+ 	while ((rth = *rthp) != NULL) {
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-		if (!(rth->u.dst.flags & DST_BALANCED) &&
+-		    compare_keys(&rth->fl, &rt->fl)) {
+-#else
+ 		if (compare_keys(&rth->fl, &rt->fl)) {
+-#endif
+ 			/* Put it first */
+ 			*rthp = rth->u.dst.rt_next;
+ 			/*
+@@ -1774,10 +1682,6 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	if (res->fi->fib_nhs > 1)
+-		rth->u.dst.flags |= DST_BALANCED;
+-#endif
+ 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ 		rth->u.dst.flags |= DST_NOPOLICY;
+ 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
+@@ -1812,7 +1716,7 @@
+ 	return err;
+ }
+ 
+-static inline int ip_mkroute_input_def(struct sk_buff *skb,
++static inline int ip_mkroute_input(struct sk_buff *skb,
+ 				       struct fib_result* res,
+ 				       const struct flowi *fl,
+ 				       struct in_device *in_dev,
+@@ -1837,63 +1741,6 @@
+ 	return rt_intern_hash(hash, rth, (struct rtable**)&skb->dst);
+ }
+ 
+-static inline int ip_mkroute_input(struct sk_buff *skb,
+-				   struct fib_result* res,
+-				   const struct flowi *fl,
+-				   struct in_device *in_dev,
+-				   __be32 daddr, __be32 saddr, u32 tos)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	struct rtable* rth = NULL, *rtres;
+-	unsigned char hop, hopcount;
+-	int err = -EINVAL;
+-	unsigned int hash;
+-
+-	if (res->fi)
+-		hopcount = res->fi->fib_nhs;
+-	else
+-		hopcount = 1;
+-
+-	/* distinguish between multipath and singlepath */
+-	if (hopcount < 2)
+-		return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
+-					    saddr, tos);
+-
+-	/* add all alternatives to the routing cache */
+-	for (hop = 0; hop < hopcount; hop++) {
+-		res->nh_sel = hop;
+-
+-		/* put reference to previous result */
+-		if (hop)
+-			ip_rt_put(rtres);
+-
+-		/* create a routing cache entry */
+-		err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
+-				      &rth);
+-		if (err)
+-			return err;
+-
+-		/* put it into the cache */
+-		hash = rt_hash(daddr, saddr, fl->iif);
+-		err = rt_intern_hash(hash, rth, &rtres);
+-		if (err)
+-			return err;
+-
+-		/* forward hop information to multipath impl. */
+-		multipath_set_nhinfo(rth,
+-				     FIB_RES_NETWORK(*res),
+-				     FIB_RES_NETMASK(*res),
+-				     res->prefixlen,
+-				     &FIB_RES_NH(*res));
+-	}
+-	skb->dst = &rtres->u.dst;
+-	return err;
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
+-	return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
+-#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED  */
+-}
+-
+-
+ /*
+  *	NOTE. We drop all the packets that has local source
+  *	addresses, because every properly looped back packet
+@@ -2211,13 +2058,6 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	if (res->fi) {
+-		rth->rt_multipath_alg = res->fi->fib_mp_alg;
+-		if (res->fi->fib_nhs > 1)
+-			rth->u.dst.flags |= DST_BALANCED;
+-	}
+-#endif
+ 	if (IN_DEV_CONF_GET(in_dev, NOXFRM))
+ 		rth->u.dst.flags |= DST_NOXFRM;
+ 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+@@ -2277,7 +2117,7 @@
+ 	return err;
+ }
+ 
+-static inline int ip_mkroute_output_def(struct rtable **rp,
++static inline int ip_mkroute_output(struct rtable **rp,
+ 					struct fib_result* res,
+ 					const struct flowi *fl,
+ 					const struct flowi *oldflp,
+@@ -2295,68 +2135,6 @@
+ 	return err;
+ }
+ 
+-static inline int ip_mkroute_output(struct rtable** rp,
+-				    struct fib_result* res,
+-				    const struct flowi *fl,
+-				    const struct flowi *oldflp,
+-				    struct net_device *dev_out,
+-				    unsigned flags)
+-{
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	unsigned char hop;
+-	unsigned hash;
+-	int err = -EINVAL;
+-	struct rtable *rth = NULL;
+-
+-	if (res->fi && res->fi->fib_nhs > 1) {
+-		unsigned char hopcount = res->fi->fib_nhs;
+-
+-		for (hop = 0; hop < hopcount; hop++) {
+-			struct net_device *dev2nexthop;
+-
+-			res->nh_sel = hop;
+-
+-			/* hold a work reference to the output device */
+-			dev2nexthop = FIB_RES_DEV(*res);
+-			dev_hold(dev2nexthop);
+-
+-			/* put reference to previous result */
+-			if (hop)
+-				ip_rt_put(*rp);
+-
+-			err = __mkroute_output(&rth, res, fl, oldflp,
+-					       dev2nexthop, flags);
+-
+-			if (err != 0)
+-				goto cleanup;
+-
+-			hash = rt_hash(oldflp->fl4_dst, oldflp->fl4_src,
+-					oldflp->oif);
+-			err = rt_intern_hash(hash, rth, rp);
+-
+-			/* forward hop information to multipath impl. */
+-			multipath_set_nhinfo(rth,
+-					     FIB_RES_NETWORK(*res),
+-					     FIB_RES_NETMASK(*res),
+-					     res->prefixlen,
+-					     &FIB_RES_NH(*res));
+-		cleanup:
+-			/* release work reference to output device */
+-			dev_put(dev2nexthop);
+-
+-			if (err != 0)
+-				return err;
+-		}
+-		return err;
+-	} else {
+-		return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out,
+-					     flags);
+-	}
+-#else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
+-	return ip_mkroute_output_def(rp, res, fl, oldflp, dev_out, flags);
+-#endif
+-}
+-
+ /*
+  * Major route resolver routine.
+  */
+@@ -2570,17 +2348,6 @@
+ 		    rth->fl.mark == flp->mark &&
+ 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+-
+-			/* check for multipath routes and choose one if
+-			 * necessary
+-			 */
+-			if (multipath_select_route(flp, rth, rp)) {
+-				dst_hold(&(*rp)->u.dst);
+-				RT_CACHE_STAT_INC(out_hit);
+-				rcu_read_unlock_bh();
+-				return 0;
+-			}
+-
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+ 			rth->u.dst.__use++;
+@@ -2729,10 +2496,6 @@
+ 	if (rt->u.dst.tclassid)
+ 		NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid);
+ #endif
+-#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
+-	if (rt->rt_multipath_alg != IP_MP_ALG_NONE)
+-		NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg);
+-#endif
+ 	if (rt->fl.iif)
+ 		NLA_PUT_BE32(skb, RTA_PREFSRC, rt->rt_spec_dst);
+ 	else if (rt->rt_src != rt->fl.fl4_src)
+diff -Nurb linux-2.6.22-570/net/ipv4/tcp_ipv4.c linux-2.6.22-try2/net/ipv4/tcp_ipv4.c
+--- linux-2.6.22-570/net/ipv4/tcp_ipv4.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv4/tcp_ipv4.c	2007-12-19 15:29:23.000000000 -0500
+@@ -2054,10 +2054,7 @@
+ 		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw;
+ 
+-		/* We can reschedule _before_ having picked the target: */
+-		cond_resched_softirq();
+-
+-		read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
++		read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
+ 			vxdprintk(VXD_CBIT(net, 6),
+ 				"sk,egf: %p [#%d] (from %d)",
+@@ -2082,7 +2079,7 @@
+ 			rc = tw;
+ 			goto out;
+ 		}
+-		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
++		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		st->state = TCP_SEQ_STATE_ESTABLISHED;
+ 	}
+ out:
+@@ -2110,14 +2107,11 @@
+ 			cur = tw;
+ 			goto out;
+ 		}
+-		read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
++		read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		st->state = TCP_SEQ_STATE_ESTABLISHED;
+ 
+-		/* We can reschedule between buckets: */
+-		cond_resched_softirq();
+-
+ 		if (++st->bucket < tcp_hashinfo.ehash_size) {
+-			read_lock(&tcp_hashinfo.ehash[st->bucket].lock);
++			read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 			sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain);
+ 		} else {
+ 			cur = NULL;
+@@ -2167,7 +2161,6 @@
+ 
+ 	if (!rc) {
+ 		inet_listen_unlock(&tcp_hashinfo);
+-		local_bh_disable();
+ 		st->state = TCP_SEQ_STATE_ESTABLISHED;
+ 		rc	  = established_get_idx(seq, pos);
+ 	}
+@@ -2200,7 +2193,6 @@
+ 		rc = listening_get_next(seq, v);
+ 		if (!rc) {
+ 			inet_listen_unlock(&tcp_hashinfo);
+-			local_bh_disable();
+ 			st->state = TCP_SEQ_STATE_ESTABLISHED;
+ 			rc	  = established_get_first(seq);
+ 		}
+@@ -2232,8 +2224,7 @@
+ 	case TCP_SEQ_STATE_TIME_WAIT:
+ 	case TCP_SEQ_STATE_ESTABLISHED:
+ 		if (v)
+-			read_unlock(&tcp_hashinfo.ehash[st->bucket].lock);
+-		local_bh_enable();
++			read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		break;
+ 	}
+ }
+diff -Nurb linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c linux-2.6.22-try2/net/ipv4/xfrm4_tunnel.c
+--- linux-2.6.22-570/net/ipv4/xfrm4_tunnel.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv4/xfrm4_tunnel.c	2007-12-19 15:29:23.000000000 -0500
+@@ -109,3 +109,4 @@
+ module_init(ipip_init);
+ module_exit(ipip_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
+diff -Nurb linux-2.6.22-570/net/ipv6/Kconfig linux-2.6.22-try2/net/ipv6/Kconfig
+--- linux-2.6.22-570/net/ipv6/Kconfig	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/Kconfig	2007-12-19 15:29:23.000000000 -0500
+@@ -109,7 +109,7 @@
+ 	  If unsure, say Y.
+ 
+ config IPV6_MIP6
+-	bool "IPv6: Mobility (EXPERIMENTAL)"
++	tristate "IPv6: Mobility (EXPERIMENTAL)"
+ 	depends on IPV6 && EXPERIMENTAL
+ 	select XFRM
+ 	---help---
+diff -Nurb linux-2.6.22-570/net/ipv6/Makefile linux-2.6.22-try2/net/ipv6/Makefile
+--- linux-2.6.22-570/net/ipv6/Makefile	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/Makefile	2007-12-19 15:29:23.000000000 -0500
+@@ -14,7 +14,6 @@
+ 	xfrm6_output.o
+ ipv6-$(CONFIG_NETFILTER) += netfilter.o
+ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
+-ipv6-$(CONFIG_IPV6_MIP6) += mip6.o
+ ipv6-$(CONFIG_PROC_FS) += proc.o
+ 
+ ipv6-objs += $(ipv6-y)
+@@ -28,6 +27,7 @@
+ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
+ obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
+ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
++obj-$(CONFIG_IPV6_MIP6) += mip6.o
+ obj-$(CONFIG_NETFILTER)	+= netfilter/
+ 
+ obj-$(CONFIG_IPV6_SIT) += sit.o
+diff -Nurb linux-2.6.22-570/net/ipv6/addrconf.c linux-2.6.22-try2/net/ipv6/addrconf.c
+--- linux-2.6.22-570/net/ipv6/addrconf.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/addrconf.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1047,7 +1047,7 @@
+ 			}
+ 
+ 			/* Rule 4: Prefer home address */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 			if (hiscore.rule < 4) {
+ 				if (ifa_result->flags & IFA_F_HOMEADDRESS)
+ 					hiscore.attrs |= IPV6_SADDR_SCORE_HOA;
+@@ -2861,7 +2861,7 @@
+ }
+ #endif	/* CONFIG_PROC_FS */
+ 
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ /* Check if address is a home address configured on any interface. */
+ int ipv6_chk_home_addr(struct in6_addr *addr)
+ {
+diff -Nurb linux-2.6.22-570/net/ipv6/af_inet6.c linux-2.6.22-try2/net/ipv6/af_inet6.c
+--- linux-2.6.22-570/net/ipv6/af_inet6.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/af_inet6.c	2007-12-19 15:29:23.000000000 -0500
+@@ -59,9 +59,6 @@
+ #ifdef CONFIG_IPV6_TUNNEL
+ #include <net/ip6_tunnel.h>
+ #endif
+-#ifdef CONFIG_IPV6_MIP6
+-#include <net/mip6.h>
+-#endif
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -876,9 +873,6 @@
+ 	ipv6_frag_init();
+ 	ipv6_nodata_init();
+ 	ipv6_destopt_init();
+-#ifdef CONFIG_IPV6_MIP6
+-	mip6_init();
+-#endif
+ 
+ 	/* Init v6 transport protocols. */
+ 	udpv6_init();
+@@ -944,9 +938,7 @@
+ 
+ 	/* Cleanup code parts. */
+ 	ipv6_packet_cleanup();
+-#ifdef CONFIG_IPV6_MIP6
+-	mip6_fini();
+-#endif
++
+ 	addrconf_cleanup();
+ 	ip6_flowlabel_cleanup();
+ 	ip6_route_cleanup();
+diff -Nurb linux-2.6.22-570/net/ipv6/ah6.c linux-2.6.22-try2/net/ipv6/ah6.c
+--- linux-2.6.22-570/net/ipv6/ah6.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/ah6.c	2007-12-19 15:29:23.000000000 -0500
+@@ -74,7 +74,7 @@
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ /**
+  *	ipv6_rearrange_destopt - rearrange IPv6 destination options header
+  *	@iph: IPv6 header
+@@ -132,6 +132,8 @@
+ bad:
+ 	return;
+ }
++#else
++static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) {}
+ #endif
+ 
+ /**
+@@ -189,10 +191,8 @@
+ 	while (exthdr.raw < end) {
+ 		switch (nexthdr) {
+ 		case NEXTHDR_DEST:
+-#ifdef CONFIG_IPV6_MIP6
+ 			if (dir == XFRM_POLICY_OUT)
+ 				ipv6_rearrange_destopt(iph, exthdr.opth);
+-#endif
+ 		case NEXTHDR_HOP:
+ 			if (!zero_out_mutable_opts(exthdr.opth)) {
+ 				LIMIT_NETDEBUG(
+@@ -228,7 +228,7 @@
+ 	u8 nexthdr;
+ 	char tmp_base[8];
+ 	struct {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		struct in6_addr saddr;
+ #endif
+ 		struct in6_addr daddr;
+@@ -255,7 +255,7 @@
+ 			err = -ENOMEM;
+ 			goto error;
+ 		}
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		memcpy(tmp_ext, &top_iph->saddr, extlen);
+ #else
+ 		memcpy(tmp_ext, &top_iph->daddr, extlen);
+@@ -294,7 +294,7 @@
+ 
+ 	memcpy(top_iph, tmp_base, sizeof(tmp_base));
+ 	if (tmp_ext) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		memcpy(&top_iph->saddr, tmp_ext, extlen);
+ #else
+ 		memcpy(&top_iph->daddr, tmp_ext, extlen);
+@@ -554,3 +554,4 @@
+ module_exit(ah6_fini);
+ 
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
+diff -Nurb linux-2.6.22-570/net/ipv6/datagram.c linux-2.6.22-try2/net/ipv6/datagram.c
+--- linux-2.6.22-570/net/ipv6/datagram.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/datagram.c	2007-12-19 15:29:23.000000000 -0500
+@@ -658,7 +658,7 @@
+ 
+ 			switch (rthdr->type) {
+ 			case IPV6_SRCRT_TYPE_0:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 			case IPV6_SRCRT_TYPE_2:
+ #endif
+ 				break;
+diff -Nurb linux-2.6.22-570/net/ipv6/esp6.c linux-2.6.22-try2/net/ipv6/esp6.c
+--- linux-2.6.22-570/net/ipv6/esp6.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/esp6.c	2007-12-19 15:29:23.000000000 -0500
+@@ -421,3 +421,4 @@
+ module_exit(esp6_fini);
+ 
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
+diff -Nurb linux-2.6.22-570/net/ipv6/exthdrs.c linux-2.6.22-try2/net/ipv6/exthdrs.c
+--- linux-2.6.22-570/net/ipv6/exthdrs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/exthdrs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -42,7 +42,7 @@
+ #include <net/ndisc.h>
+ #include <net/ip6_route.h>
+ #include <net/addrconf.h>
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/xfrm.h>
+ #endif
+ 
+@@ -90,6 +90,7 @@
+  bad:
+ 	return -1;
+ }
++EXPORT_SYMBOL_GPL(ipv6_find_tlv);
+ 
+ /*
+  *	Parsing tlv encoded headers.
+@@ -196,7 +197,7 @@
+   Destination options header.
+  *****************************/
+ 
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
+ {
+ 	struct sk_buff *skb = *skbp;
+@@ -270,7 +271,7 @@
+ #endif
+ 
+ static struct tlvtype_proc tlvprocdestopt_lst[] = {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	{
+ 		.type	= IPV6_TLV_HAO,
+ 		.func	= ipv6_dest_hao,
+@@ -283,7 +284,7 @@
+ {
+ 	struct sk_buff *skb = *skbp;
+ 	struct inet6_skb_parm *opt = IP6CB(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	__u16 dstbuf;
+ #endif
+ 	struct dst_entry *dst;
+@@ -298,7 +299,7 @@
+ 	}
+ 
+ 	opt->lastopt = opt->dst1 = skb_network_header_len(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	dstbuf = opt->dst1;
+ #endif
+ 
+@@ -308,7 +309,7 @@
+ 		skb = *skbp;
+ 		skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
+ 		opt = IP6CB(skb);
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		opt->nhoff = dstbuf;
+ #else
+ 		opt->nhoff = opt->dst1;
+@@ -427,7 +428,7 @@
+ looped_back:
+ 	if (hdr->segments_left == 0) {
+ 		switch (hdr->type) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		case IPV6_SRCRT_TYPE_2:
+ 			/* Silently discard type 2 header unless it was
+ 			 * processed by own
+@@ -463,7 +464,7 @@
+ 			return -1;
+ 		}
+ 		break;
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	case IPV6_SRCRT_TYPE_2:
+ 		/* Silently discard invalid RTH type 2 */
+ 		if (hdr->hdrlen != 2 || hdr->segments_left != 1) {
+@@ -520,7 +521,7 @@
+ 	addr += i - 1;
+ 
+ 	switch (hdr->type) {
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	case IPV6_SRCRT_TYPE_2:
+ 		if (xfrm6_input_addr(skb, (xfrm_address_t *)addr,
+ 				     (xfrm_address_t *)&ipv6_hdr(skb)->saddr,
+diff -Nurb linux-2.6.22-570/net/ipv6/icmp.c linux-2.6.22-try2/net/ipv6/icmp.c
+--- linux-2.6.22-570/net/ipv6/icmp.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/icmp.c	2007-12-19 15:29:23.000000000 -0500
+@@ -272,7 +272,7 @@
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ static void mip6_addr_swap(struct sk_buff *skb)
+ {
+ 	struct ipv6hdr *iph = ipv6_hdr(skb);
+diff -Nurb linux-2.6.22-570/net/ipv6/ip6_output.c linux-2.6.22-try2/net/ipv6/ip6_output.c
+--- linux-2.6.22-570/net/ipv6/ip6_output.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/ip6_output.c	2007-12-19 15:29:23.000000000 -0500
+@@ -543,7 +543,7 @@
+ 			found_rhdr = 1;
+ 			break;
+ 		case NEXTHDR_DEST:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 			if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
+ 				break;
+ #endif
+diff -Nurb linux-2.6.22-570/net/ipv6/ipcomp6.c linux-2.6.22-try2/net/ipv6/ipcomp6.c
+--- linux-2.6.22-570/net/ipv6/ipcomp6.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/ipcomp6.c	2007-12-19 15:29:23.000000000 -0500
+@@ -501,4 +501,4 @@
+ MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
+ MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>");
+ 
+-
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
+diff -Nurb linux-2.6.22-570/net/ipv6/ipv6_sockglue.c linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c
+--- linux-2.6.22-570/net/ipv6/ipv6_sockglue.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c	2007-12-19 15:29:23.000000000 -0500
+@@ -123,7 +123,7 @@
+ 	struct ipv6hdr *ipv6h;
+ 	struct inet6_protocol *ops;
+ 
+-	if (!(features & NETIF_F_HW_CSUM))
++	if (!(features & NETIF_F_V6_CSUM))
+ 		features &= ~NETIF_F_SG;
+ 
+ 	if (unlikely(skb_shinfo(skb)->gso_type &
+@@ -417,7 +417,7 @@
+ 			struct ipv6_rt_hdr *rthdr = opt->srcrt;
+ 			switch (rthdr->type) {
+ 			case IPV6_SRCRT_TYPE_0:
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 			case IPV6_SRCRT_TYPE_2:
+ #endif
+ 				break;
+diff -Nurb linux-2.6.22-570/net/ipv6/mip6.c linux-2.6.22-try2/net/ipv6/mip6.c
+--- linux-2.6.22-570/net/ipv6/mip6.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/mip6.c	2007-12-19 15:29:23.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <net/sock.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_checksum.h>
++#include <net/rawv6.h>
+ #include <net/xfrm.h>
+ #include <net/mip6.h>
+ 
+@@ -86,7 +87,7 @@
+ 	return len;
+ }
+ 
+-int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
++static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
+ {
+ 	struct ip6_mh *mh;
+ 
+@@ -471,7 +472,7 @@
+ 	.remote_addr	= mip6_xfrm_addr,
+ };
+ 
+-int __init mip6_init(void)
++static int __init mip6_init(void)
+ {
+ 	printk(KERN_INFO "Mobile IPv6\n");
+ 
+@@ -483,18 +484,35 @@
+ 		printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__);
+ 		goto mip6_rthdr_xfrm_fail;
+ 	}
++	if (rawv6_mh_filter_register(mip6_mh_filter) < 0) {
++		printk(KERN_INFO "%s: can't add rawv6 mh filter\n", __FUNCTION__);
++		goto mip6_rawv6_mh_fail;
++	}
++
++
+ 	return 0;
+ 
++ mip6_rawv6_mh_fail:
++	xfrm_unregister_type(&mip6_rthdr_type, AF_INET6);
+  mip6_rthdr_xfrm_fail:
+ 	xfrm_unregister_type(&mip6_destopt_type, AF_INET6);
+  mip6_destopt_xfrm_fail:
+ 	return -EAGAIN;
+ }
+ 
+-void __exit mip6_fini(void)
++static void __exit mip6_fini(void)
+ {
++	if (rawv6_mh_filter_unregister(mip6_mh_filter) < 0)
++		printk(KERN_INFO "%s: can't remove rawv6 mh filter\n", __FUNCTION__);
+ 	if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0)
+ 		printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__);
+ 	if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0)
+ 		printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__);
+ }
++
++module_init(mip6_init);
++module_exit(mip6_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
+diff -Nurb linux-2.6.22-570/net/ipv6/raw.c linux-2.6.22-try2/net/ipv6/raw.c
+--- linux-2.6.22-570/net/ipv6/raw.c	2007-12-12 18:08:34.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/raw.c	2007-12-19 15:29:23.000000000 -0500
+@@ -49,7 +49,7 @@
+ #include <net/udp.h>
+ #include <net/inet_common.h>
+ #include <net/tcp_states.h>
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/mip6.h>
+ #endif
+ 
+@@ -137,6 +137,28 @@
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
++static int (*mh_filter)(struct sock *sock, struct sk_buff *skb);
++
++int rawv6_mh_filter_register(int (*filter)(struct sock *sock,
++					   struct sk_buff *skb))
++{
++	rcu_assign_pointer(mh_filter, filter);
++	return 0;
++}
++EXPORT_SYMBOL(rawv6_mh_filter_register);
++
++int rawv6_mh_filter_unregister(int (*filter)(struct sock *sock,
++					     struct sk_buff *skb))
++{
++	rcu_assign_pointer(mh_filter, NULL);
++	synchronize_rcu();
++	return 0;
++}
++EXPORT_SYMBOL(rawv6_mh_filter_unregister);
++
++#endif
++
+ /*
+  *	demultiplex raw sockets.
+  *	(should consider queueing the skb in the sock receive_queue
+@@ -178,16 +200,22 @@
+ 		case IPPROTO_ICMPV6:
+ 			filtered = icmpv6_filter(sk, skb);
+ 			break;
+-#ifdef CONFIG_IPV6_MIP6
++
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		case IPPROTO_MH:
++		{
+ 			/* XXX: To validate MH only once for each packet,
+ 			 * this is placed here. It should be after checking
+ 			 * xfrm policy, however it doesn't. The checking xfrm
+ 			 * policy is placed in rawv6_rcv() because it is
+ 			 * required for each socket.
+ 			 */
+-			filtered = mip6_mh_filter(sk, skb);
++			int (*filter)(struct sock *sock, struct sk_buff *skb);
++
++			filter = rcu_dereference(mh_filter);
++			filtered = filter ? filter(sk, skb) : 0;
+ 			break;
++		}
+ #endif
+ 		default:
+ 			filtered = 0;
+@@ -611,9 +639,7 @@
+ 	struct iovec *iov;
+ 	u8 __user *type = NULL;
+ 	u8 __user *code = NULL;
+-#ifdef CONFIG_IPV6_MIP6
+ 	u8 len = 0;
+-#endif
+ 	int probed = 0;
+ 	int i;
+ 
+@@ -646,7 +672,6 @@
+ 				probed = 1;
+ 			}
+ 			break;
+-#ifdef CONFIG_IPV6_MIP6
+ 		case IPPROTO_MH:
+ 			if (iov->iov_base && iov->iov_len < 1)
+ 				break;
+@@ -660,7 +685,6 @@
+ 				len += iov->iov_len;
+ 
+ 			break;
+-#endif
+ 		default:
+ 			probed = 1;
+ 			break;
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_policy.c linux-2.6.22-try2/net/ipv6/xfrm6_policy.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_policy.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/ipv6/xfrm6_policy.c	2007-12-19 15:29:23.000000000 -0500
+@@ -18,7 +18,7 @@
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/mip6.h>
+ #endif
+ 
+@@ -318,7 +318,7 @@
+ 			fl->proto = nexthdr;
+ 			return;
+ 
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 		case IPPROTO_MH:
+ 			if (pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
+ 				struct ip6_mh *mh;
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_state.c linux-2.6.22-try2/net/ipv6/xfrm6_state.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_state.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/xfrm6_state.c	2007-12-19 15:29:23.000000000 -0500
+@@ -65,7 +65,7 @@
+ 		goto end;
+ 
+ 	/* Rule 2: select MIPv6 RO or inbound trigger */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	for (i = 0; i < n; i++) {
+ 		if (src[i] &&
+ 		    (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+@@ -130,7 +130,7 @@
+ 		goto end;
+ 
+ 	/* Rule 2: select MIPv6 RO or inbound trigger */
+-#ifdef CONFIG_IPV6_MIP6
++#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ 	for (i = 0; i < n; i++) {
+ 		if (src[i] &&
+ 		    (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION ||
+diff -Nurb linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c linux-2.6.22-try2/net/ipv6/xfrm6_tunnel.c
+--- linux-2.6.22-570/net/ipv6/xfrm6_tunnel.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/ipv6/xfrm6_tunnel.c	2007-12-19 15:29:23.000000000 -0500
+@@ -379,3 +379,4 @@
+ module_init(xfrm6_tunnel_init);
+ module_exit(xfrm6_tunnel_fini);
+ MODULE_LICENSE("GPL");
++MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
+diff -Nurb linux-2.6.22-570/net/irda/irias_object.c linux-2.6.22-try2/net/irda/irias_object.c
+--- linux-2.6.22-570/net/irda/irias_object.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/irda/irias_object.c	2007-12-19 15:29:23.000000000 -0500
+@@ -36,39 +36,6 @@
+  */
+ struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}};
+ 
+-/*
+- * Function strndup (str, max)
+- *
+- *    My own kernel version of strndup!
+- *
+- * Faster, check boundary... Jean II
+- */
+-static char *strndup(char *str, size_t max)
+-{
+-	char *new_str;
+-	int len;
+-
+-	/* Check string */
+-	if (str == NULL)
+-		return NULL;
+-	/* Check length, truncate */
+-	len = strlen(str);
+-	if(len > max)
+-		len = max;
+-
+-	/* Allocate new string */
+-	new_str = kmalloc(len + 1, GFP_ATOMIC);
+-	if (new_str == NULL) {
+-		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+-		return NULL;
+-	}
+-
+-	/* Copy and truncate */
+-	memcpy(new_str, str, len);
+-	new_str[len] = '\0';
+-
+-	return new_str;
+-}
+ 
+ /*
+  * Function ias_new_object (name, id)
+@@ -90,7 +57,7 @@
+ 	}
+ 
+ 	obj->magic = IAS_OBJECT_MAGIC;
+-	obj->name = strndup(name, IAS_MAX_CLASSNAME);
++	obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC);
+ 	if (!obj->name) {
+ 		IRDA_WARNING("%s(), Unable to allocate name!\n",
+ 			     __FUNCTION__);
+@@ -360,7 +327,7 @@
+ 	}
+ 
+ 	attrib->magic = IAS_ATTRIB_MAGIC;
+-	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+ 
+ 	/* Insert value */
+ 	attrib->value = irias_new_integer_value(value);
+@@ -404,7 +371,7 @@
+ 	}
+ 
+ 	attrib->magic = IAS_ATTRIB_MAGIC;
+-	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+ 
+ 	attrib->value = irias_new_octseq_value( octets, len);
+ 	if (!attrib->name || !attrib->value) {
+@@ -446,7 +413,7 @@
+ 	}
+ 
+ 	attrib->magic = IAS_ATTRIB_MAGIC;
+-	attrib->name = strndup(name, IAS_MAX_ATTRIBNAME);
++	attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC);
+ 
+ 	attrib->value = irias_new_string_value(value);
+ 	if (!attrib->name || !attrib->value) {
+@@ -506,7 +473,7 @@
+ 
+ 	value->type = IAS_STRING;
+ 	value->charset = CS_ASCII;
+-	value->t.string = strndup(string, IAS_MAX_STRING);
++	value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC);
+ 	if (!value->t.string) {
+ 		IRDA_WARNING("%s: Unable to kmalloc!\n", __FUNCTION__);
+ 		kfree(value);
+diff -Nurb linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c linux-2.6.22-try2/net/mac80211/ieee80211_ioctl.c
+--- linux-2.6.22-570/net/mac80211/ieee80211_ioctl.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/mac80211/ieee80211_ioctl.c	2007-12-19 15:29:23.000000000 -0500
+@@ -838,6 +838,29 @@
+ }
+ 
+ 
++static int ieee80211_ioctl_giwrate(struct net_device *dev,
++				  struct iw_request_info *info,
++				  struct iw_param *rate, char *extra)
++{
++	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
++	struct sta_info *sta;
++	struct ieee80211_sub_if_data *sdata;
++
++	sdata = IEEE80211_DEV_TO_SUB_IF(dev);
++	if (sdata->type == IEEE80211_IF_TYPE_STA)
++		sta = sta_info_get(local, sdata->u.sta.bssid);
++	else
++		return -EOPNOTSUPP;
++	if (!sta)
++		return -ENODEV;
++	if (sta->txrate < local->oper_hw_mode->num_rates)
++		rate->value = local->oper_hw_mode->rates[sta->txrate].rate * 100000;
++	else
++		rate->value = 0;
++	sta_info_put(sta);
++	return 0;
++}
++
+ static int ieee80211_ioctl_siwrts(struct net_device *dev,
+ 				  struct iw_request_info *info,
+ 				  struct iw_param *rts, char *extra)
+@@ -1779,7 +1802,7 @@
+ 	(iw_handler) NULL,				/* -- hole -- */
+ 	(iw_handler) NULL,				/* -- hole -- */
+ 	(iw_handler) NULL,				/* SIOCSIWRATE */
+-	(iw_handler) NULL,				/* SIOCGIWRATE */
++	(iw_handler) ieee80211_ioctl_giwrate,		/* SIOCGIWRATE */
+ 	(iw_handler) ieee80211_ioctl_siwrts,		/* SIOCSIWRTS */
+ 	(iw_handler) ieee80211_ioctl_giwrts,		/* SIOCGIWRTS */
+ 	(iw_handler) ieee80211_ioctl_siwfrag,		/* SIOCSIWFRAG */
+diff -Nurb linux-2.6.22-570/net/netfilter/core.c linux-2.6.22-try2/net/netfilter/core.c
+--- linux-2.6.22-570/net/netfilter/core.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/netfilter/core.c	2007-12-19 15:29:23.000000000 -0500
+@@ -203,7 +203,9 @@
+ 		return 0;
+ 
+ 	/* Not exclusive use of packet?  Must copy. */
+-	if (skb_shared(*pskb) || skb_cloned(*pskb))
++	if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len))
++		goto copy_skb;
++	if (skb_shared(*pskb))
+ 		goto copy_skb;
+ 
+ 	return pskb_may_pull(*pskb, writable_len);
+diff -Nurb linux-2.6.22-570/net/netlink/attr.c linux-2.6.22-try2/net/netlink/attr.c
+--- linux-2.6.22-570/net/netlink/attr.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/netlink/attr.c	2007-12-19 15:29:23.000000000 -0500
+@@ -72,6 +72,17 @@
+ 			return -ERANGE;
+ 		break;
+ 
++	case NLA_NESTED_COMPAT:
++		if (attrlen < pt->len)
++			return -ERANGE;
++		if (attrlen < NLA_ALIGN(pt->len))
++			break;
++		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
++			return -ERANGE;
++		nla = nla_data(nla) + NLA_ALIGN(pt->len);
++		if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
++			return -ERANGE;
++		break;
+ 	default:
+ 		if (pt->len)
+ 			minlen = pt->len;
+diff -Nurb linux-2.6.22-570/net/sched/sch_generic.c linux-2.6.22-try2/net/sched/sch_generic.c
+--- linux-2.6.22-570/net/sched/sch_generic.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sched/sch_generic.c	2007-12-19 15:29:23.000000000 -0500
+@@ -59,122 +59,143 @@
+ 	spin_unlock_bh(&dev->queue_lock);
+ }
+ 
+-/*
+-   dev->queue_lock serializes queue accesses for this device
+-   AND dev->qdisc pointer itself.
++static inline int qdisc_qlen(struct Qdisc *q)
++{
++	return q->q.qlen;
++}
+ 
+-   netif_tx_lock serializes accesses to device driver.
++static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev,
++				  struct Qdisc *q)
++{
++	if (unlikely(skb->next))
++		dev->gso_skb = skb;
++	else
++		q->ops->requeue(skb, q);
+ 
+-   dev->queue_lock and netif_tx_lock are mutually exclusive,
+-   if one is grabbed, another must be free.
+- */
++	netif_schedule(dev);
++	return 0;
++}
+ 
++static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev,
++					      struct Qdisc *q)
++{
++	struct sk_buff *skb;
+ 
+-/* Kick device.
++	if ((skb = dev->gso_skb))
++		dev->gso_skb = NULL;
++	else
++		skb = q->dequeue(q);
+ 
+-   Returns:  0  - queue is empty or throttled.
+-	    >0  - queue is not empty.
++	return skb;
++}
+ 
+-   NOTE: Called under dev->queue_lock with locally disabled BH.
+-*/
++static inline int handle_dev_cpu_collision(struct sk_buff *skb,
++					   struct net_device *dev,
++					   struct Qdisc *q)
++{
++	int ret;
+ 
++	if (unlikely(dev->xmit_lock_owner == smp_processor_id())) {
++		/*
++		 * Same CPU holding the lock. It may be a transient
++		 * configuration error, when hard_start_xmit() recurses. We
++		 * detect it by checking xmit owner and drop the packet when
++		 * deadloop is detected. Return OK to try the next skb.
++		 */
++		kfree_skb(skb);
++		if (net_ratelimit())
++			printk(KERN_WARNING "Dead loop on netdevice %s, "
++			       "fix it urgently!\n", dev->name);
++		ret = qdisc_qlen(q);
++	} else {
++		/*
++		 * Another cpu is holding lock, requeue & delay xmits for
++		 * some time.
++		 */
++		__get_cpu_var(netdev_rx_stat).cpu_collision++;
++		ret = dev_requeue_skb(skb, dev, q);
++	}
++
++	return ret;
++}
++
++/*
++ * NOTE: Called under dev->queue_lock with locally disabled BH.
++ *
++ * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this
++ * device at a time. dev->queue_lock serializes queue accesses for
++ * this device AND dev->qdisc pointer itself.
++ *
++ *  netif_tx_lock serializes accesses to device driver.
++ *
++ *  dev->queue_lock and netif_tx_lock are mutually exclusive,
++ *  if one is grabbed, another must be free.
++ *
++ * Note, that this procedure can be called by a watchdog timer
++ *
++ * Returns to the caller:
++ *				0  - queue is empty or throttled.
++ *				>0 - queue is not empty.
++ *
++ */
+ static inline int qdisc_restart(struct net_device *dev)
+ {
+ 	struct Qdisc *q = dev->qdisc;
+ 	struct sk_buff *skb;
++	unsigned lockless;
++	int ret;
+ 
+ 	/* Dequeue packet */
+-	if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) {
+-		unsigned nolock = (dev->features & NETIF_F_LLTX);
+-
+-		dev->gso_skb = NULL;
++	if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL))
++		return 0;
+ 
+ 		/*
+-		 * When the driver has LLTX set it does its own locking
+-		 * in start_xmit. No need to add additional overhead by
+-		 * locking again. These checks are worth it because
+-		 * even uncongested locks can be quite expensive.
+-		 * The driver can do trylock like here too, in case
+-		 * of lock congestion it should return -1 and the packet
+-		 * will be requeued.
+-		 */
+-		if (!nolock) {
+-			if (!netif_tx_trylock(dev)) {
+-			collision:
+-				/* So, someone grabbed the driver. */
+-
+-				/* It may be transient configuration error,
+-				   when hard_start_xmit() recurses. We detect
+-				   it by checking xmit owner and drop the
+-				   packet when deadloop is detected.
++	 * When the driver has LLTX set, it does its own locking in
++	 * start_xmit. These checks are worth it because even uncongested
++	 * locks can be quite expensive. The driver can do a trylock, as
++	 * is being done here; in case of lock contention it should return
++	 * NETDEV_TX_LOCKED and the packet will be requeued.
+ 				*/
+-				if (dev->xmit_lock_owner == smp_processor_id()) {
+-					kfree_skb(skb);
+-					if (net_ratelimit())
+-						printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+-					goto out;
+-				}
+-				__get_cpu_var(netdev_rx_stat).cpu_collision++;
+-				goto requeue;
+-			}
++	lockless = (dev->features & NETIF_F_LLTX);
++
++	if (!lockless && !netif_tx_trylock(dev)) {
++		/* Another CPU grabbed the driver tx lock */
++		return handle_dev_cpu_collision(skb, dev, q);
+ 		}
+ 
+-		{
+ 			/* And release queue */
+ 			spin_unlock(&dev->queue_lock);
+ 
+-			if (!netif_queue_stopped(dev)) {
+-				int ret;
+-
+ 				ret = dev_hard_start_xmit(skb, dev);
+-				if (ret == NETDEV_TX_OK) {
+-					if (!nolock) {
+-						netif_tx_unlock(dev);
+-					}
+-					spin_lock(&dev->queue_lock);
+-					q = dev->qdisc;
+-					goto out;
+-				}
+-				if (ret == NETDEV_TX_LOCKED && nolock) {
+-					spin_lock(&dev->queue_lock);
+-					q = dev->qdisc;
+-					goto collision;
+-				}
+-			}
+ 
+-			/* NETDEV_TX_BUSY - we need to requeue */
+-			/* Release the driver */
+-			if (!nolock) {
++	if (!lockless)
+ 				netif_tx_unlock(dev);
+-			}
++
+ 			spin_lock(&dev->queue_lock);
+ 			q = dev->qdisc;
+-		}
+ 
+-		/* Device kicked us out :(
+-		   This is possible in three cases:
++	switch (ret) {
++	case NETDEV_TX_OK:
++		/* Driver sent out skb successfully */
++		ret = qdisc_qlen(q);
++		break;
+ 
+-		   0. driver is locked
+-		   1. fastroute is enabled
+-		   2. device cannot determine busy state
+-		      before start of transmission (f.e. dialout)
+-		   3. device is buggy (ppp)
+-		 */
++	case NETDEV_TX_LOCKED:
++		/* Driver try lock failed */
++		ret = handle_dev_cpu_collision(skb, dev, q);
++		break;
+ 
+-requeue:
+-		if (unlikely(q == &noop_qdisc))
+-			kfree_skb(skb);
+-		else if (skb->next)
+-			dev->gso_skb = skb;
+-		else
+-			q->ops->requeue(skb, q);
+-		netif_schedule(dev);
++	default:
++		/* Driver returned NETDEV_TX_BUSY - requeue skb */
++		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
++			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
++			       dev->name, ret, q->q.qlen);
++
++		ret = dev_requeue_skb(skb, dev, q);
++		break;
+ 	}
+-	return 0;
+ 
+-out:
+-	BUG_ON((int) q->q.qlen < 0);
+-	return q->q.qlen;
++	return ret;
+ }
+ 
+ void __qdisc_run(struct net_device *dev)
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth.c linux-2.6.22-try2/net/sunrpc/auth.c
+--- linux-2.6.22-570/net/sunrpc/auth.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/sunrpc/auth.c	2007-12-19 15:29:23.000000000 -0500
+@@ -19,12 +19,16 @@
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+ #endif
+ 
+-static struct rpc_authops *	auth_flavors[RPC_AUTH_MAXFLAVOR] = {
++static DEFINE_SPINLOCK(rpc_authflavor_lock);
++static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+ 	&authnull_ops,		/* AUTH_NULL */
+ 	&authunix_ops,		/* AUTH_UNIX */
+ 	NULL,			/* others can be loadable modules */
+ };
+ 
++static LIST_HEAD(cred_unused);
++static unsigned long number_cred_unused;
++
+ static u32
+ pseudoflavor_to_flavor(u32 flavor) {
+ 	if (flavor >= RPC_AUTH_MAXFLAVOR)
+@@ -33,55 +37,67 @@
+ }
+ 
+ int
+-rpcauth_register(struct rpc_authops *ops)
++rpcauth_register(const struct rpc_authops *ops)
+ {
+ 	rpc_authflavor_t flavor;
++	int ret = -EPERM;
+ 
+ 	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ 		return -EINVAL;
+-	if (auth_flavors[flavor] != NULL)
+-		return -EPERM;		/* what else? */
++	spin_lock(&rpc_authflavor_lock);
++	if (auth_flavors[flavor] == NULL) {
+ 	auth_flavors[flavor] = ops;
+-	return 0;
++		ret = 0;
++	}
++	spin_unlock(&rpc_authflavor_lock);
++	return ret;
+ }
+ 
+ int
+-rpcauth_unregister(struct rpc_authops *ops)
++rpcauth_unregister(const struct rpc_authops *ops)
+ {
+ 	rpc_authflavor_t flavor;
++	int ret = -EPERM;
+ 
+ 	if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
+ 		return -EINVAL;
+-	if (auth_flavors[flavor] != ops)
+-		return -EPERM;		/* what else? */
++	spin_lock(&rpc_authflavor_lock);
++	if (auth_flavors[flavor] == ops) {
+ 	auth_flavors[flavor] = NULL;
+-	return 0;
++		ret = 0;
++	}
++	spin_unlock(&rpc_authflavor_lock);
++	return ret;
+ }
+ 
+ struct rpc_auth *
+ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
+ {
+ 	struct rpc_auth		*auth;
+-	struct rpc_authops	*ops;
++	const struct rpc_authops *ops;
+ 	u32			flavor = pseudoflavor_to_flavor(pseudoflavor);
+ 
+ 	auth = ERR_PTR(-EINVAL);
+ 	if (flavor >= RPC_AUTH_MAXFLAVOR)
+ 		goto out;
+ 
+-	/* FIXME - auth_flavors[] really needs an rw lock,
+-	 * and module refcounting. */
+ #ifdef CONFIG_KMOD
+ 	if ((ops = auth_flavors[flavor]) == NULL)
+ 		request_module("rpc-auth-%u", flavor);
+ #endif
+-	if ((ops = auth_flavors[flavor]) == NULL)
++	spin_lock(&rpc_authflavor_lock);
++	ops = auth_flavors[flavor];
++	if (ops == NULL || !try_module_get(ops->owner)) {
++		spin_unlock(&rpc_authflavor_lock);
+ 		goto out;
++	}
++	spin_unlock(&rpc_authflavor_lock);
+ 	auth = ops->create(clnt, pseudoflavor);
++	module_put(ops->owner);
+ 	if (IS_ERR(auth))
+ 		return auth;
+ 	if (clnt->cl_auth)
+-		rpcauth_destroy(clnt->cl_auth);
++		rpcauth_release(clnt->cl_auth);
+ 	clnt->cl_auth = auth;
+ 
+ out:
+@@ -89,7 +105,7 @@
+ }
+ 
+ void
+-rpcauth_destroy(struct rpc_auth *auth)
++rpcauth_release(struct rpc_auth *auth)
+ {
+ 	if (!atomic_dec_and_test(&auth->au_count))
+ 		return;
+@@ -98,11 +114,31 @@
+ 
+ static DEFINE_SPINLOCK(rpc_credcache_lock);
+ 
++static void
++rpcauth_unhash_cred_locked(struct rpc_cred *cred)
++{
++	hlist_del_rcu(&cred->cr_hash);
++	smp_mb__before_clear_bit();
++	clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++}
++
++static void
++rpcauth_unhash_cred(struct rpc_cred *cred)
++{
++	spinlock_t *cache_lock;
++
++	cache_lock = &cred->cr_auth->au_credcache->lock;
++	spin_lock(cache_lock);
++	if (atomic_read(&cred->cr_count) == 0)
++		rpcauth_unhash_cred_locked(cred);
++	spin_unlock(cache_lock);
++}
++
+ /*
+  * Initialize RPC credential cache
+  */
+ int
+-rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire)
++rpcauth_init_credcache(struct rpc_auth *auth)
+ {
+ 	struct rpc_cred_cache *new;
+ 	int i;
+@@ -112,8 +148,7 @@
+ 		return -ENOMEM;
+ 	for (i = 0; i < RPC_CREDCACHE_NR; i++)
+ 		INIT_HLIST_HEAD(&new->hashtable[i]);
+-	new->expire = expire;
+-	new->nextgc = jiffies + (expire >> 1);
++	spin_lock_init(&new->lock);
+ 	auth->au_credcache = new;
+ 	return 0;
+ }
+@@ -122,13 +157,13 @@
+  * Destroy a list of credentials
+  */
+ static inline
+-void rpcauth_destroy_credlist(struct hlist_head *head)
++void rpcauth_destroy_credlist(struct list_head *head)
+ {
+ 	struct rpc_cred *cred;
+ 
+-	while (!hlist_empty(head)) {
+-		cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
+-		hlist_del_init(&cred->cr_hash);
++	while (!list_empty(head)) {
++		cred = list_entry(head->next, struct rpc_cred, cr_lru);
++		list_del_init(&cred->cr_lru);
+ 		put_rpccred(cred);
+ 	}
+ }
+@@ -138,58 +173,95 @@
+  * that are not referenced.
+  */
+ void
+-rpcauth_free_credcache(struct rpc_auth *auth)
++rpcauth_clear_credcache(struct rpc_cred_cache *cache)
+ {
+-	struct rpc_cred_cache *cache = auth->au_credcache;
+-	HLIST_HEAD(free);
+-	struct hlist_node *pos, *next;
++	LIST_HEAD(free);
++	struct hlist_head *head;
+ 	struct rpc_cred	*cred;
+ 	int		i;
+ 
+ 	spin_lock(&rpc_credcache_lock);
++	spin_lock(&cache->lock);
+ 	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+-		hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+-			cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+-			__hlist_del(&cred->cr_hash);
+-			hlist_add_head(&cred->cr_hash, &free);
++		head = &cache->hashtable[i];
++		while (!hlist_empty(head)) {
++			cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
++			get_rpccred(cred);
++			if (!list_empty(&cred->cr_lru)) {
++				list_del(&cred->cr_lru);
++				number_cred_unused--;
+ 		}
++			list_add_tail(&cred->cr_lru, &free);
++			rpcauth_unhash_cred_locked(cred);
+ 	}
++	}
++	spin_unlock(&cache->lock);
+ 	spin_unlock(&rpc_credcache_lock);
+ 	rpcauth_destroy_credlist(&free);
+ }
+ 
+-static void
+-rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free)
++/*
++ * Destroy the RPC credential cache
++ */
++void
++rpcauth_destroy_credcache(struct rpc_auth *auth)
+ {
+-	if (atomic_read(&cred->cr_count) != 1)
+-	       return;
+-	if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire))
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+-	if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) {
+-		__hlist_del(&cred->cr_hash);
+-		hlist_add_head(&cred->cr_hash, free);
++	struct rpc_cred_cache *cache = auth->au_credcache;
++
++	if (cache) {
++		auth->au_credcache = NULL;
++		rpcauth_clear_credcache(cache);
++		kfree(cache);
+ 	}
+ }
+ 
+ /*
+  * Remove stale credentials. Avoid sleeping inside the loop.
+  */
+-static void
+-rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free)
++static int
++rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
+ {
+-	struct rpc_cred_cache *cache = auth->au_credcache;
+-	struct hlist_node *pos, *next;
++	spinlock_t *cache_lock;
+ 	struct rpc_cred	*cred;
+-	int		i;
+ 
+-	dprintk("RPC:       gc'ing RPC credentials for auth %p\n", auth);
+-	for (i = 0; i < RPC_CREDCACHE_NR; i++) {
+-		hlist_for_each_safe(pos, next, &cache->hashtable[i]) {
+-			cred = hlist_entry(pos, struct rpc_cred, cr_hash);
+-			rpcauth_prune_expired(auth, cred, free);
++	while(!list_empty(&cred_unused)) {
++		cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru);
++		list_del_init(&cred->cr_lru);
++		number_cred_unused--;
++		if (atomic_read(&cred->cr_count) != 0)
++			continue;
++		cache_lock = &cred->cr_auth->au_credcache->lock;
++		spin_lock(cache_lock);
++		if (atomic_read(&cred->cr_count) == 0) {
++			get_rpccred(cred);
++			list_add_tail(&cred->cr_lru, free);
++			rpcauth_unhash_cred_locked(cred);
++			nr_to_scan --;
+ 		}
++		spin_unlock(cache_lock);
++		if (nr_to_scan == 0)
++			break;
+ 	}
+-	cache->nextgc = jiffies + cache->expire;
++	return nr_to_scan;
++}
++
++/*
++ * Run memory cache shrinker.
++ */
++static int
++rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
++{
++	LIST_HEAD(free);
++	int res;
++
++	if (list_empty(&cred_unused))
++		return 0;
++	spin_lock(&rpc_credcache_lock);
++	nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
++	res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
++	spin_unlock(&rpc_credcache_lock);
++	rpcauth_destroy_credlist(&free);
++	return res;
+ }
+ 
+ /*
+@@ -199,53 +271,56 @@
+ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
+ 		int flags)
+ {
++	LIST_HEAD(free);
+ 	struct rpc_cred_cache *cache = auth->au_credcache;
+-	HLIST_HEAD(free);
+-	struct hlist_node *pos, *next;
+-	struct rpc_cred	*new = NULL,
+-			*cred = NULL;
++	struct hlist_node *pos;
++	struct rpc_cred	*cred = NULL,
++			*entry, *new;
+ 	int		nr = 0;
+ 
+ 	if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS))
+ 		nr = acred->uid & RPC_CREDCACHE_MASK;
+-retry:
+-	spin_lock(&rpc_credcache_lock);
+-	if (time_before(cache->nextgc, jiffies))
+-		rpcauth_gc_credcache(auth, &free);
+-	hlist_for_each_safe(pos, next, &cache->hashtable[nr]) {
+-		struct rpc_cred *entry;
+-		entry = hlist_entry(pos, struct rpc_cred, cr_hash);
+-		if (entry->cr_ops->crmatch(acred, entry, flags)) {
+-			hlist_del(&entry->cr_hash);
+-			cred = entry;
+-			break;
+-		}
+-		rpcauth_prune_expired(auth, entry, &free);
+-	}
+-	if (new) {
+-		if (cred)
+-			hlist_add_head(&new->cr_hash, &free);
+-		else
+-			cred = new;
++
++	rcu_read_lock();
++	hlist_for_each_entry_rcu(entry, pos, &cache->hashtable[nr], cr_hash) {
++		if (!entry->cr_ops->crmatch(acred, entry, flags))
++			continue;
++		spin_lock(&cache->lock);
++		if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
++			spin_unlock(&cache->lock);
++			continue;
+ 	}
+-	if (cred) {
+-		hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]);
+-		get_rpccred(cred);
++		cred = get_rpccred(entry);
++		spin_unlock(&cache->lock);
++		break;
+ 	}
+-	spin_unlock(&rpc_credcache_lock);
++	rcu_read_unlock();
+ 
+-	rpcauth_destroy_credlist(&free);
++	if (cred != NULL)
++		goto found;
+ 
+-	if (!cred) {
+ 		new = auth->au_ops->crcreate(auth, acred, flags);
+-		if (!IS_ERR(new)) {
+-#ifdef RPC_DEBUG
+-			new->cr_magic = RPCAUTH_CRED_MAGIC;
+-#endif
+-			goto retry;
+-		} else
++	if (IS_ERR(new)) {
++		cred = new;
++		goto out;
++	}
++
++	spin_lock(&cache->lock);
++	hlist_for_each_entry(entry, pos, &cache->hashtable[nr], cr_hash) {
++		if (!entry->cr_ops->crmatch(acred, entry, flags))
++			continue;
++		cred = get_rpccred(entry);
++		break;
++	}
++	if (cred == NULL) {
+ 			cred = new;
+-	} else if ((cred->cr_flags & RPCAUTH_CRED_NEW)
++		set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
++		hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
++	} else
++		list_add_tail(&new->cr_lru, &free);
++	spin_unlock(&cache->lock);
++found:
++	if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)
+ 			&& cred->cr_ops->cr_init != NULL
+ 			&& !(flags & RPCAUTH_LOOKUP_NEW)) {
+ 		int res = cred->cr_ops->cr_init(auth, cred);
+@@ -254,8 +329,9 @@
+ 			cred = ERR_PTR(res);
+ 		}
+ 	}
+-
+-	return (struct rpc_cred *) cred;
++	rpcauth_destroy_credlist(&free);
++out:
++	return cred;
+ }
+ 
+ struct rpc_cred *
+@@ -277,6 +353,23 @@
+ 	return ret;
+ }
+ 
++void
++rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
++		  struct rpc_auth *auth, const struct rpc_credops *ops)
++{
++	INIT_HLIST_NODE(&cred->cr_hash);
++	INIT_LIST_HEAD(&cred->cr_lru);
++	atomic_set(&cred->cr_count, 1);
++	cred->cr_auth = auth;
++	cred->cr_ops = ops;
++	cred->cr_expire = jiffies;
++#ifdef RPC_DEBUG
++	cred->cr_magic = RPCAUTH_CRED_MAGIC;
++#endif
++	cred->cr_uid = acred->uid;
++}
++EXPORT_SYMBOL(rpcauth_init_cred);
++
+ struct rpc_cred *
+ rpcauth_bindcred(struct rpc_task *task)
+ {
+@@ -317,9 +410,31 @@
+ void
+ put_rpccred(struct rpc_cred *cred)
+ {
+-	cred->cr_expire = jiffies;
++	/* Fast path for unhashed credentials */
++	if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
++		goto need_lock;
++
+ 	if (!atomic_dec_and_test(&cred->cr_count))
+ 		return;
++	goto out_destroy;
++need_lock:
++	if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
++		return;
++	if (!list_empty(&cred->cr_lru)) {
++		number_cred_unused--;
++		list_del_init(&cred->cr_lru);
++	}
++	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0)
++		rpcauth_unhash_cred(cred);
++	else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
++		cred->cr_expire = jiffies;
++		list_add_tail(&cred->cr_lru, &cred_unused);
++		number_cred_unused++;
++		spin_unlock(&rpc_credcache_lock);
++		return;
++	}
++	spin_unlock(&rpc_credcache_lock);
++out_destroy:
+ 	cred->cr_ops->crdestroy(cred);
+ }
+ 
+@@ -404,17 +519,34 @@
+ void
+ rpcauth_invalcred(struct rpc_task *task)
+ {
++	struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
+ 	dprintk("RPC: %5u invalidating %s cred %p\n",
+-		task->tk_pid, task->tk_auth->au_ops->au_name, task->tk_msg.rpc_cred);
+-	spin_lock(&rpc_credcache_lock);
+-	if (task->tk_msg.rpc_cred)
+-		task->tk_msg.rpc_cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
+-	spin_unlock(&rpc_credcache_lock);
++		task->tk_pid, task->tk_auth->au_ops->au_name, cred);
++	if (cred)
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ }
+ 
+ int
+ rpcauth_uptodatecred(struct rpc_task *task)
+ {
+-	return !(task->tk_msg.rpc_cred) ||
+-		(task->tk_msg.rpc_cred->cr_flags & RPCAUTH_CRED_UPTODATE);
++	struct rpc_cred *cred = task->tk_msg.rpc_cred;
++
++	return cred == NULL ||
++		test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0;
++}
++
++
++static struct shrinker *rpc_cred_shrinker;
++
++void __init rpcauth_init_module(void)
++{
++	rpc_init_authunix();
++	rpc_cred_shrinker = set_shrinker(DEFAULT_SEEKS, rpcauth_cache_shrinker);
++}
++
++void __exit rpcauth_remove_module(void)
++{
++	if (rpc_cred_shrinker != NULL)
++		remove_shrinker(rpc_cred_shrinker);
+ }
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c linux-2.6.22-try2/net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/auth_gss.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/auth_gss/auth_gss.c	2007-12-19 15:29:23.000000000 -0500
+@@ -54,9 +54,9 @@
+ #include <linux/sunrpc/gss_api.h>
+ #include <asm/uaccess.h>
+ 
+-static struct rpc_authops authgss_ops;
++static const struct rpc_authops authgss_ops;
+ 
+-static struct rpc_credops gss_credops;
++static const struct rpc_credops gss_credops;
+ 
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY	RPCDBG_AUTH
+@@ -64,7 +64,6 @@
+ 
+ #define NFS_NGROUPS	16
+ 
+-#define GSS_CRED_EXPIRE		(60 * HZ)	/* XXX: reasonable? */
+ #define GSS_CRED_SLACK		1024		/* XXX: unused */
+ /* length of a krb5 verifier (48), plus data added before arguments when
+  * using integrity (two 4-byte integers): */
+@@ -85,10 +84,8 @@
+ 	struct rpc_auth rpc_auth;
+ 	struct gss_api_mech *mech;
+ 	enum rpc_gss_svc service;
+-	struct list_head upcalls;
+ 	struct rpc_clnt *client;
+ 	struct dentry *dentry;
+-	spinlock_t lock;
+ };
+ 
+ static void gss_destroy_ctx(struct gss_cl_ctx *);
+@@ -116,8 +113,8 @@
+ 	write_lock(&gss_ctx_lock);
+ 	old = gss_cred->gc_ctx;
+ 	gss_cred->gc_ctx = ctx;
+-	cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
+-	cred->cr_flags &= ~RPCAUTH_CRED_NEW;
++	set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
++	clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
+ 	write_unlock(&gss_ctx_lock);
+ 	if (old)
+ 		gss_put_ctx(old);
+@@ -130,7 +127,7 @@
+ 	int res = 0;
+ 
+ 	read_lock(&gss_ctx_lock);
+-	if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx)
++	if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) && gss_cred->gc_ctx)
+ 		res = 1;
+ 	read_unlock(&gss_ctx_lock);
+ 	return res;
+@@ -269,10 +266,10 @@
+ }
+ 
+ static struct gss_upcall_msg *
+-__gss_find_upcall(struct gss_auth *gss_auth, uid_t uid)
++__gss_find_upcall(struct rpc_inode *rpci, uid_t uid)
+ {
+ 	struct gss_upcall_msg *pos;
+-	list_for_each_entry(pos, &gss_auth->upcalls, list) {
++	list_for_each_entry(pos, &rpci->in_downcall, list) {
+ 		if (pos->uid != uid)
+ 			continue;
+ 		atomic_inc(&pos->count);
+@@ -290,24 +287,24 @@
+ static inline struct gss_upcall_msg *
+ gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg)
+ {
++	struct inode *inode = gss_auth->dentry->d_inode;
++	struct rpc_inode *rpci = RPC_I(inode);
+ 	struct gss_upcall_msg *old;
+ 
+-	spin_lock(&gss_auth->lock);
+-	old = __gss_find_upcall(gss_auth, gss_msg->uid);
++	spin_lock(&inode->i_lock);
++	old = __gss_find_upcall(rpci, gss_msg->uid);
+ 	if (old == NULL) {
+ 		atomic_inc(&gss_msg->count);
+-		list_add(&gss_msg->list, &gss_auth->upcalls);
++		list_add(&gss_msg->list, &rpci->in_downcall);
+ 	} else
+ 		gss_msg = old;
+-	spin_unlock(&gss_auth->lock);
++	spin_unlock(&inode->i_lock);
+ 	return gss_msg;
+ }
+ 
+ static void
+ __gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+-	if (list_empty(&gss_msg->list))
+-		return;
+ 	list_del_init(&gss_msg->list);
+ 	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+ 	wake_up_all(&gss_msg->waitqueue);
+@@ -318,10 +315,14 @@
+ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
+ {
+ 	struct gss_auth *gss_auth = gss_msg->auth;
++	struct inode *inode = gss_auth->dentry->d_inode;
+ 
+-	spin_lock(&gss_auth->lock);
++	if (list_empty(&gss_msg->list))
++		return;
++	spin_lock(&inode->i_lock);
++	if (!list_empty(&gss_msg->list))
+ 	__gss_unhash_msg(gss_msg);
+-	spin_unlock(&gss_auth->lock);
++	spin_unlock(&inode->i_lock);
+ }
+ 
+ static void
+@@ -330,16 +331,16 @@
+ 	struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
+ 			struct gss_cred, gc_base);
+ 	struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall;
++	struct inode *inode = gss_msg->auth->dentry->d_inode;
+ 
+-	BUG_ON(gss_msg == NULL);
+ 	if (gss_msg->ctx)
+ 		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx));
+ 	else
+ 		task->tk_status = gss_msg->msg.errno;
+-	spin_lock(&gss_msg->auth->lock);
++	spin_lock(&inode->i_lock);
+ 	gss_cred->gc_upcall = NULL;
+ 	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+-	spin_unlock(&gss_msg->auth->lock);
++	spin_unlock(&inode->i_lock);
+ 	gss_release_msg(gss_msg);
+ }
+ 
+@@ -386,11 +387,12 @@
+ gss_refresh_upcall(struct rpc_task *task)
+ {
+ 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
+-	struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth,
++	struct gss_auth *gss_auth = container_of(cred->cr_auth,
+ 			struct gss_auth, rpc_auth);
+ 	struct gss_cred *gss_cred = container_of(cred,
+ 			struct gss_cred, gc_base);
+ 	struct gss_upcall_msg *gss_msg;
++	struct inode *inode = gss_auth->dentry->d_inode;
+ 	int err = 0;
+ 
+ 	dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid,
+@@ -400,7 +402,7 @@
+ 		err = PTR_ERR(gss_msg);
+ 		goto out;
+ 	}
+-	spin_lock(&gss_auth->lock);
++	spin_lock(&inode->i_lock);
+ 	if (gss_cred->gc_upcall != NULL)
+ 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL);
+ 	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
+@@ -411,7 +413,7 @@
+ 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL);
+ 	} else
+ 		err = gss_msg->msg.errno;
+-	spin_unlock(&gss_auth->lock);
++	spin_unlock(&inode->i_lock);
+ 	gss_release_msg(gss_msg);
+ out:
+ 	dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n",
+@@ -422,6 +424,7 @@
+ static inline int
+ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+ {
++	struct inode *inode = gss_auth->dentry->d_inode;
+ 	struct rpc_cred *cred = &gss_cred->gc_base;
+ 	struct gss_upcall_msg *gss_msg;
+ 	DEFINE_WAIT(wait);
+@@ -435,12 +438,12 @@
+ 	}
+ 	for (;;) {
+ 		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
+-		spin_lock(&gss_auth->lock);
++		spin_lock(&inode->i_lock);
+ 		if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
+-			spin_unlock(&gss_auth->lock);
++			spin_unlock(&inode->i_lock);
+ 			break;
+ 		}
+-		spin_unlock(&gss_auth->lock);
++		spin_unlock(&inode->i_lock);
+ 		if (signalled()) {
+ 			err = -ERESTARTSYS;
+ 			goto out_intr;
+@@ -489,12 +492,11 @@
+ 	const void *p, *end;
+ 	void *buf;
+ 	struct rpc_clnt *clnt;
+-	struct gss_auth *gss_auth;
+-	struct rpc_cred *cred;
+ 	struct gss_upcall_msg *gss_msg;
++	struct inode *inode = filp->f_path.dentry->d_inode;
+ 	struct gss_cl_ctx *ctx;
+ 	uid_t uid;
+-	int err = -EFBIG;
++	ssize_t err = -EFBIG;
+ 
+ 	if (mlen > MSG_BUF_MAXSIZE)
+ 		goto out;
+@@ -503,7 +505,7 @@
+ 	if (!buf)
+ 		goto out;
+ 
+-	clnt = RPC_I(filp->f_path.dentry->d_inode)->private;
++	clnt = RPC_I(inode)->private;
+ 	err = -EFAULT;
+ 	if (copy_from_user(buf, src, mlen))
+ 		goto err;
+@@ -519,43 +521,38 @@
+ 	ctx = gss_alloc_context();
+ 	if (ctx == NULL)
+ 		goto err;
+-	err = 0;
+-	gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth);
+-	p = gss_fill_context(p, end, ctx, gss_auth->mech);
++
++	err = -ENOENT;
++	/* Find a matching upcall */
++	spin_lock(&inode->i_lock);
++	gss_msg = __gss_find_upcall(RPC_I(inode), uid);
++	if (gss_msg == NULL) {
++		spin_unlock(&inode->i_lock);
++		goto err_put_ctx;
++	}
++	list_del_init(&gss_msg->list);
++	spin_unlock(&inode->i_lock);
++
++	p = gss_fill_context(p, end, ctx, gss_msg->auth->mech);
+ 	if (IS_ERR(p)) {
+ 		err = PTR_ERR(p);
+-		if (err != -EACCES)
+-			goto err_put_ctx;
++		gss_msg->msg.errno = (err == -EACCES) ? -EACCES : -EAGAIN;
++		goto err_release_msg;
+ 	}
+-	spin_lock(&gss_auth->lock);
+-	gss_msg = __gss_find_upcall(gss_auth, uid);
+-	if (gss_msg) {
+-		if (err == 0 && gss_msg->ctx == NULL)
+ 			gss_msg->ctx = gss_get_ctx(ctx);
+-		gss_msg->msg.errno = err;
++	err = mlen;
++
++err_release_msg:
++	spin_lock(&inode->i_lock);
+ 		__gss_unhash_msg(gss_msg);
+-		spin_unlock(&gss_auth->lock);
++	spin_unlock(&inode->i_lock);
+ 		gss_release_msg(gss_msg);
+-	} else {
+-		struct auth_cred acred = { .uid = uid };
+-		spin_unlock(&gss_auth->lock);
+-		cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, RPCAUTH_LOOKUP_NEW);
+-		if (IS_ERR(cred)) {
+-			err = PTR_ERR(cred);
+-			goto err_put_ctx;
+-		}
+-		gss_cred_set_ctx(cred, gss_get_ctx(ctx));
+-	}
+-	gss_put_ctx(ctx);
+-	kfree(buf);
+-	dprintk("RPC:       gss_pipe_downcall returning length %Zu\n", mlen);
+-	return mlen;
+ err_put_ctx:
+ 	gss_put_ctx(ctx);
+ err:
+ 	kfree(buf);
+ out:
+-	dprintk("RPC:       gss_pipe_downcall returning %d\n", err);
++	dprintk("RPC:       gss_pipe_downcall returning %Zd\n", err);
+ 	return err;
+ }
+ 
+@@ -563,27 +560,21 @@
+ gss_pipe_release(struct inode *inode)
+ {
+ 	struct rpc_inode *rpci = RPC_I(inode);
+-	struct rpc_clnt *clnt;
+-	struct rpc_auth *auth;
+-	struct gss_auth *gss_auth;
+-
+-	clnt = rpci->private;
+-	auth = clnt->cl_auth;
+-	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+-	spin_lock(&gss_auth->lock);
+-	while (!list_empty(&gss_auth->upcalls)) {
+ 		struct gss_upcall_msg *gss_msg;
+ 
+-		gss_msg = list_entry(gss_auth->upcalls.next,
++	spin_lock(&inode->i_lock);
++	while (!list_empty(&rpci->in_downcall)) {
++
++		gss_msg = list_entry(rpci->in_downcall.next,
+ 				struct gss_upcall_msg, list);
+ 		gss_msg->msg.errno = -EPIPE;
+ 		atomic_inc(&gss_msg->count);
+ 		__gss_unhash_msg(gss_msg);
+-		spin_unlock(&gss_auth->lock);
++		spin_unlock(&inode->i_lock);
+ 		gss_release_msg(gss_msg);
+-		spin_lock(&gss_auth->lock);
++		spin_lock(&inode->i_lock);
+ 	}
+-	spin_unlock(&gss_auth->lock);
++	spin_unlock(&inode->i_lock);
+ }
+ 
+ static void
+@@ -637,8 +628,6 @@
+ 	gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
+ 	if (gss_auth->service == 0)
+ 		goto err_put_mech;
+-	INIT_LIST_HEAD(&gss_auth->upcalls);
+-	spin_lock_init(&gss_auth->lock);
+ 	auth = &gss_auth->rpc_auth;
+ 	auth->au_cslack = GSS_CRED_SLACK >> 2;
+ 	auth->au_rslack = GSS_VERF_SLACK >> 2;
+@@ -646,10 +635,6 @@
+ 	auth->au_flavor = flavor;
+ 	atomic_set(&auth->au_count, 1);
+ 
+-	err = rpcauth_init_credcache(auth, GSS_CRED_EXPIRE);
+-	if (err)
+-		goto err_put_mech;
+-
+ 	gss_auth->dentry = rpc_mkpipe(clnt->cl_dentry, gss_auth->mech->gm_name,
+ 			clnt, &gss_upcall_ops, RPC_PIPE_WAIT_FOR_OPEN);
+ 	if (IS_ERR(gss_auth->dentry)) {
+@@ -657,7 +642,13 @@
+ 		goto err_put_mech;
+ 	}
+ 
++	err = rpcauth_init_credcache(auth);
++	if (err)
++		goto err_unlink_pipe;
++
+ 	return auth;
++err_unlink_pipe:
++	rpc_unlink(gss_auth->dentry);
+ err_put_mech:
+ 	gss_mech_put(gss_auth->mech);
+ err_free:
+@@ -675,12 +666,13 @@
+ 	dprintk("RPC:       destroying GSS authenticator %p flavor %d\n",
+ 			auth, auth->au_flavor);
+ 
++	rpcauth_destroy_credcache(auth);
++
+ 	gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+ 	rpc_unlink(gss_auth->dentry);
+ 	gss_auth->dentry = NULL;
+ 	gss_mech_put(gss_auth->mech);
+ 
+-	rpcauth_free_credcache(auth);
+ 	kfree(gss_auth);
+ 	module_put(THIS_MODULE);
+ }
+@@ -701,17 +693,27 @@
+ }
+ 
+ static void
+-gss_destroy_cred(struct rpc_cred *rc)
++gss_free_cred(struct gss_cred *cred)
+ {
+-	struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base);
+-
+-	dprintk("RPC:       gss_destroy_cred \n");
+-
++	dprintk("RPC:       gss_free_cred %p\n", cred);
+ 	if (cred->gc_ctx)
+ 		gss_put_ctx(cred->gc_ctx);
+ 	kfree(cred);
+ }
+ 
++static void
++gss_free_cred_callback(struct rcu_head *head)
++{
++	struct gss_cred *cred = container_of(head, struct gss_cred, gc_base.cr_rcu);
++	gss_free_cred(cred);
++}
++
++static void
++gss_destroy_cred(struct rpc_cred *rc)
++{
++	call_rcu(&rc->cr_rcu, gss_free_cred_callback);
++}
++
+ /*
+  * Lookup RPCSEC_GSS cred for the current process
+  */
+@@ -734,15 +736,12 @@
+ 	if (!(cred = kzalloc(sizeof(*cred), GFP_KERNEL)))
+ 		goto out_err;
+ 
+-	atomic_set(&cred->gc_count, 1);
+-	cred->gc_uid = acred->uid;
++	rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops);
+ 	/*
+ 	 * Note: in order to force a call to call_refresh(), we deliberately
+ 	 * fail to flag the credential as RPCAUTH_CRED_UPTODATE.
+ 	 */
+-	cred->gc_flags = 0;
+-	cred->gc_base.cr_ops = &gss_credops;
+-	cred->gc_base.cr_flags = RPCAUTH_CRED_NEW;
++	cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW;
+ 	cred->gc_service = gss_auth->service;
+ 	return &cred->gc_base;
+ 
+@@ -774,7 +773,7 @@
+ 	 * we don't really care if the credential has expired or not,
+ 	 * since the caller should be prepared to reinitialise it.
+ 	 */
+-	if ((flags & RPCAUTH_LOOKUP_NEW) && (rc->cr_flags & RPCAUTH_CRED_NEW))
++	if ((flags & RPCAUTH_LOOKUP_NEW) && test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags))
+ 		goto out;
+ 	/* Don't match with creds that have expired. */
+ 	if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry))
+@@ -830,7 +829,7 @@
+ 	mic.data = (u8 *)(p + 1);
+ 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	} else if (maj_stat != 0) {
+ 		printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
+ 		goto out_put_ctx;
+@@ -883,7 +882,7 @@
+ 
+ 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	if (maj_stat)
+ 		goto out_bad;
+ 	/* We leave it to unwrap to calculate au_rslack. For now we just
+@@ -937,7 +936,7 @@
+ 	maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ 	status = -EIO; /* XXX? */
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	else if (maj_stat)
+ 		return status;
+ 	q = xdr_encode_opaque(p, NULL, mic.len);
+@@ -1036,7 +1035,7 @@
+ 	/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
+ 	 * done anyway, so it's safe to put the request on the wire: */
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	else if (maj_stat)
+ 		return status;
+ 
+@@ -1123,7 +1122,7 @@
+ 
+ 	maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	if (maj_stat != GSS_S_COMPLETE)
+ 		return status;
+ 	return 0;
+@@ -1148,7 +1147,7 @@
+ 
+ 	maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
+ 	if (maj_stat == GSS_S_CONTEXT_EXPIRED)
+-		cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE;
++		clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
+ 	if (maj_stat != GSS_S_COMPLETE)
+ 		return status;
+ 	if (ntohl(*(*p)++) != rqstp->rq_seqno)
+@@ -1199,7 +1198,7 @@
+ 	return status;
+ }
+ 
+-static struct rpc_authops authgss_ops = {
++static const struct rpc_authops authgss_ops = {
+ 	.owner		= THIS_MODULE,
+ 	.au_flavor	= RPC_AUTH_GSS,
+ #ifdef RPC_DEBUG
+@@ -1211,7 +1210,7 @@
+ 	.crcreate	= gss_create_cred
+ };
+ 
+-static struct rpc_credops gss_credops = {
++static const struct rpc_credops gss_credops = {
+ 	.cr_name	= "AUTH_GSS",
+ 	.crdestroy	= gss_destroy_cred,
+ 	.cr_init	= gss_cred_init,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c linux-2.6.22-try2/net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_krb5_mech.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/auth_gss/gss_krb5_mech.c	2007-12-19 15:29:23.000000000 -0500
+@@ -201,7 +201,7 @@
+ 	kfree(kctx);
+ }
+ 
+-static struct gss_api_ops gss_kerberos_ops = {
++static const struct gss_api_ops gss_kerberos_ops = {
+ 	.gss_import_sec_context	= gss_import_sec_context_kerberos,
+ 	.gss_get_mic		= gss_get_mic_kerberos,
+ 	.gss_verify_mic		= gss_verify_mic_kerberos,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c linux-2.6.22-try2/net/sunrpc/auth_gss/gss_spkm3_mech.c
+--- linux-2.6.22-570/net/sunrpc/auth_gss/gss_spkm3_mech.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/auth_gss/gss_spkm3_mech.c	2007-12-19 15:29:23.000000000 -0500
+@@ -202,7 +202,7 @@
+ 	return err;
+ }
+ 
+-static struct gss_api_ops gss_spkm3_ops = {
++static const struct gss_api_ops gss_spkm3_ops = {
+ 	.gss_import_sec_context	= gss_import_sec_context_spkm3,
+ 	.gss_get_mic		= gss_get_mic_spkm3,
+ 	.gss_verify_mic		= gss_verify_mic_spkm3,
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_null.c linux-2.6.22-try2/net/sunrpc/auth_null.c
+--- linux-2.6.22-570/net/sunrpc/auth_null.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/auth_null.c	2007-12-19 15:29:23.000000000 -0500
+@@ -76,7 +76,7 @@
+ static int
+ nul_refresh(struct rpc_task *task)
+ {
+-	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ 	return 0;
+ }
+ 
+@@ -101,7 +101,7 @@
+ 	return p;
+ }
+ 
+-struct rpc_authops authnull_ops = {
++const struct rpc_authops authnull_ops = {
+ 	.owner		= THIS_MODULE,
+ 	.au_flavor	= RPC_AUTH_NULL,
+ #ifdef RPC_DEBUG
+@@ -122,7 +122,7 @@
+ };
+ 
+ static
+-struct rpc_credops	null_credops = {
++const struct rpc_credops null_credops = {
+ 	.cr_name	= "AUTH_NULL",
+ 	.crdestroy	= nul_destroy_cred,
+ 	.crmatch	= nul_match,
+@@ -133,9 +133,11 @@
+ 
+ static
+ struct rpc_cred null_cred = {
++	.cr_lru		= LIST_HEAD_INIT(null_cred.cr_lru),
++	.cr_auth	= &null_auth,
+ 	.cr_ops		= &null_credops,
+ 	.cr_count	= ATOMIC_INIT(1),
+-	.cr_flags	= RPCAUTH_CRED_UPTODATE,
++	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
+ #ifdef RPC_DEBUG
+ 	.cr_magic	= RPCAUTH_CRED_MAGIC,
+ #endif
+diff -Nurb linux-2.6.22-570/net/sunrpc/auth_unix.c linux-2.6.22-try2/net/sunrpc/auth_unix.c
+--- linux-2.6.22-570/net/sunrpc/auth_unix.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/sunrpc/auth_unix.c	2007-12-19 22:09:36.000000000 -0500
+@@ -22,11 +22,6 @@
+ 	gid_t			uc_gids[NFS_NGROUPS];
+ };
+ #define uc_uid			uc_base.cr_uid
+-#define uc_count		uc_base.cr_count
+-#define uc_flags		uc_base.cr_flags
+-#define uc_expire		uc_base.cr_expire
+-
+-#define UNX_CRED_EXPIRE		(60 * HZ)
+ 
+ #define UNX_WRITESLACK		(21 + (UNX_MAXNODENAME >> 2))
+ 
+@@ -36,15 +31,14 @@
+ 
+ static struct rpc_auth		unix_auth;
+ static struct rpc_cred_cache	unix_cred_cache;
+-static struct rpc_credops	unix_credops;
++static const struct rpc_credops	unix_credops;
+ 
+ static struct rpc_auth *
+ unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
+ {
+ 	dprintk("RPC:       creating UNIX authenticator for client %p\n",
+ 			clnt);
+-	if (atomic_inc_return(&unix_auth.au_count) == 0)
+-		unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1);
++	atomic_inc(&unix_auth.au_count);
+ 	return &unix_auth;
+ }
+ 
+@@ -52,7 +46,7 @@
+ unx_destroy(struct rpc_auth *auth)
+ {
+ 	dprintk("RPC:       destroying UNIX authenticator %p\n", auth);
+-	rpcauth_free_credcache(auth);
++	rpcauth_clear_credcache(auth->au_credcache);
+ }
+ 
+ /*
+@@ -76,8 +70,8 @@
+ 	if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	atomic_set(&cred->uc_count, 1);
+-	cred->uc_flags = RPCAUTH_CRED_UPTODATE;
++	rpcauth_init_cred(&cred->uc_base, acred, &unix_auth, &unix_credops);
++	cred->uc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
+ 	if (flags & RPCAUTH_LOOKUP_ROOTCREDS) {
+ 		cred->uc_uid = 0;
+ 		cred->uc_gid = 0;
+@@ -88,7 +82,6 @@
+ 		if (groups > NFS_NGROUPS)
+ 			groups = NFS_NGROUPS;
+ 
+-		cred->uc_uid = acred->uid;
+ 		cred->uc_gid = acred->gid;
+ 		cred->uc_tag = acred->tag;
+ 		for (i = 0; i < groups; i++)
+@@ -96,17 +89,31 @@
+ 		if (i < NFS_NGROUPS)
+ 		  cred->uc_gids[i] = NOGROUP;
+ 	}
+-	cred->uc_base.cr_ops = &unix_credops;
+ 
+-	return (struct rpc_cred *) cred;
++ 	return &cred->uc_base;
+ }
+ 
+ static void
+-unx_destroy_cred(struct rpc_cred *cred)
++ unx_free_cred(struct unx_cred *cred)
+ {
++ 	dprintk("RPC:       unx_free_cred %p\n", cred);
+ 	kfree(cred);
+ }
+ 
++static void
++unx_free_cred_callback(struct rcu_head *head)
++{
++	struct unx_cred *cred = container_of(head, struct unx_cred, uc_base.cr_rcu);
++	unx_free_cred(cred);
++}
++
++static void
++unx_destroy_cred(struct rpc_cred *cred)
++{
++	call_rcu(&cred->cr_rcu, unx_free_cred_callback);
++}
++
++
+ /*
+  * Match credentials against current process creds.
+  * The root_override argument takes care of cases where the caller may
+@@ -115,7 +122,7 @@
+ static int
+ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
+ {
+-	struct unx_cred	*cred = (struct unx_cred *) rcred;
++	struct unx_cred	*cred = container_of(rcred, struct unx_cred, uc_base);
+ 	int		i;
+ 
+ 	if (!(flags & RPCAUTH_LOOKUP_ROOTCREDS)) {
+@@ -147,7 +154,7 @@
+ unx_marshal(struct rpc_task *task, __be32 *p)
+ {
+ 	struct rpc_clnt	*clnt = task->tk_client;
+-	struct unx_cred	*cred = (struct unx_cred *) task->tk_msg.rpc_cred;
++	struct unx_cred	*cred = container_of(task->tk_msg.rpc_cred, struct unx_cred, uc_base);
+ 	__be32		*base, *hold;
+ 	int		i, tag;
+ 
+@@ -183,7 +190,7 @@
+ static int
+ unx_refresh(struct rpc_task *task)
+ {
+-	task->tk_msg.rpc_cred->cr_flags |= RPCAUTH_CRED_UPTODATE;
++	set_bit(RPCAUTH_CRED_UPTODATE, &task->tk_msg.rpc_cred->cr_flags);
+ 	return 0;
+ }
+ 
+@@ -212,7 +219,12 @@
+ 	return p;
+ }
+ 
+-struct rpc_authops	authunix_ops = {
++void __init rpc_init_authunix(void)
++{
++	spin_lock_init(&unix_cred_cache.lock);
++}
++
++const struct rpc_authops authunix_ops = {
+ 	.owner		= THIS_MODULE,
+ 	.au_flavor	= RPC_AUTH_UNIX,
+ #ifdef RPC_DEBUG
+@@ -226,7 +238,6 @@
+ 
+ static
+ struct rpc_cred_cache	unix_cred_cache = {
+-	.expire		= UNX_CRED_EXPIRE,
+ };
+ 
+ static
+@@ -240,7 +251,7 @@
+ };
+ 
+ static
+-struct rpc_credops	unix_credops = {
++const struct rpc_credops unix_credops = {
+ 	.cr_name	= "AUTH_UNIX",
+ 	.crdestroy	= unx_destroy_cred,
+ 	.crmatch	= unx_match,
+diff -Nurb linux-2.6.22-570/net/sunrpc/clnt.c linux-2.6.22-try2/net/sunrpc/clnt.c
+--- linux-2.6.22-570/net/sunrpc/clnt.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/net/sunrpc/clnt.c	2007-12-19 15:29:23.000000000 -0500
+@@ -45,6 +45,12 @@
+ 	dprintk("RPC: %5u %s (status %d)\n", t->tk_pid,		\
+ 			__FUNCTION__, t->tk_status)
+ 
++/*
++ * All RPC clients are linked into this list
++ */
++static LIST_HEAD(all_clients);
++static DEFINE_SPINLOCK(rpc_client_lock);
++
+ static DECLARE_WAIT_QUEUE_HEAD(destroy_wait);
+ 
+ 
+@@ -67,6 +73,21 @@
+ static __be32 *	call_header(struct rpc_task *task);
+ static __be32 *	call_verify(struct rpc_task *task);
+ 
++static int	rpc_ping(struct rpc_clnt *clnt, int flags);
++
++static void rpc_register_client(struct rpc_clnt *clnt)
++{
++	spin_lock(&rpc_client_lock);
++	list_add(&clnt->cl_clients, &all_clients);
++	spin_unlock(&rpc_client_lock);
++}
++
++static void rpc_unregister_client(struct rpc_clnt *clnt)
++{
++	spin_lock(&rpc_client_lock);
++	list_del(&clnt->cl_clients);
++	spin_unlock(&rpc_client_lock);
++}
+ 
+ static int
+ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
+@@ -112,6 +133,9 @@
+ 	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
+ 			program->name, servname, xprt);
+ 
++	err = rpciod_up();
++	if (err)
++		goto out_no_rpciod;
+ 	err = -EINVAL;
+ 	if (!xprt)
+ 		goto out_no_xprt;
+@@ -122,8 +146,6 @@
+ 	clnt = kzalloc(sizeof(*clnt), GFP_KERNEL);
+ 	if (!clnt)
+ 		goto out_err;
+-	atomic_set(&clnt->cl_users, 0);
+-	atomic_set(&clnt->cl_count, 1);
+ 	clnt->cl_parent = clnt;
+ 
+ 	clnt->cl_server = clnt->cl_inline_name;
+@@ -149,6 +171,8 @@
+ 	if (clnt->cl_metrics == NULL)
+ 		goto out_no_stats;
+ 	clnt->cl_program  = program;
++	INIT_LIST_HEAD(&clnt->cl_tasks);
++	spin_lock_init(&clnt->cl_lock);
+ 
+ 	if (!xprt_bound(clnt->cl_xprt))
+ 		clnt->cl_autobind = 1;
+@@ -156,6 +180,8 @@
+ 	clnt->cl_rtt = &clnt->cl_rtt_default;
+ 	rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+ 
++	kref_init(&clnt->cl_kref);
++
+ 	err = rpc_setup_pipedir(clnt, program->pipe_dir_name);
+ 	if (err < 0)
+ 		goto out_no_path;
+@@ -173,6 +199,7 @@
+ 	if (clnt->cl_nodelen > UNX_MAXNODENAME)
+ 		clnt->cl_nodelen = UNX_MAXNODENAME;
+ 	memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen);
++	rpc_register_client(clnt);
+ 	return clnt;
+ 
+ out_no_auth:
+@@ -189,6 +216,8 @@
+ out_err:
+ 	xprt_put(xprt);
+ out_no_xprt:
++	rpciod_down();
++out_no_rpciod:
+ 	return ERR_PTR(err);
+ }
+ 
+@@ -246,8 +275,6 @@
+ 		clnt->cl_intr = 1;
+ 	if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
+ 		clnt->cl_autobind = 1;
+-	if (args->flags & RPC_CLNT_CREATE_ONESHOT)
+-		clnt->cl_oneshot = 1;
+ 	if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
+ 		clnt->cl_discrtry = 1;
+ 	/* TODO: handle RPC_CLNT_CREATE_TAGGED
+@@ -271,24 +298,25 @@
+ 	new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
+ 	if (!new)
+ 		goto out_no_clnt;
+-	atomic_set(&new->cl_count, 1);
+-	atomic_set(&new->cl_users, 0);
++	new->cl_parent = clnt;
++	/* Turn off autobind on clones */
++	new->cl_autobind = 0;
++	INIT_LIST_HEAD(&new->cl_tasks);
++	spin_lock_init(&new->cl_lock);
++	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ 	new->cl_metrics = rpc_alloc_iostats(clnt);
+ 	if (new->cl_metrics == NULL)
+ 		goto out_no_stats;
++	kref_init(&new->cl_kref);
+ 	err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
+ 	if (err != 0)
+ 		goto out_no_path;
+-	new->cl_parent = clnt;
+-	atomic_inc(&clnt->cl_count);
+-	new->cl_xprt = xprt_get(clnt->cl_xprt);
+-	/* Turn off autobind on clones */
+-	new->cl_autobind = 0;
+-	new->cl_oneshot = 0;
+-	new->cl_dead = 0;
+-	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+ 	if (new->cl_auth)
+ 		atomic_inc(&new->cl_auth->au_count);
++	xprt_get(clnt->cl_xprt);
++	kref_get(&clnt->cl_kref);
++	rpc_register_client(new);
++	rpciod_up();
+ 	return new;
+ out_no_path:
+ 	rpc_free_iostats(new->cl_metrics);
+@@ -301,52 +329,34 @@
+ 
+ /*
+  * Properly shut down an RPC client, terminating all outstanding
+- * requests. Note that we must be certain that cl_oneshot and
+- * cl_dead are cleared, or else the client would be destroyed
+- * when the last task releases it.
++ * requests.
+  */
+-int
+-rpc_shutdown_client(struct rpc_clnt *clnt)
++void rpc_shutdown_client(struct rpc_clnt *clnt)
+ {
+-	dprintk("RPC:       shutting down %s client for %s, tasks=%d\n",
+-			clnt->cl_protname, clnt->cl_server,
+-			atomic_read(&clnt->cl_users));
+-
+-	while (atomic_read(&clnt->cl_users) > 0) {
+-		/* Don't let rpc_release_client destroy us */
+-		clnt->cl_oneshot = 0;
+-		clnt->cl_dead = 0;
++	dprintk("RPC:       shutting down %s client for %s\n",
++			clnt->cl_protname, clnt->cl_server);
++
++	while (!list_empty(&clnt->cl_tasks)) {
+ 		rpc_killall_tasks(clnt);
+ 		wait_event_timeout(destroy_wait,
+-			!atomic_read(&clnt->cl_users), 1*HZ);
+-	}
+-
+-	if (atomic_read(&clnt->cl_users) < 0) {
+-		printk(KERN_ERR "RPC: rpc_shutdown_client clnt %p tasks=%d\n",
+-				clnt, atomic_read(&clnt->cl_users));
+-#ifdef RPC_DEBUG
+-		rpc_show_tasks();
+-#endif
+-		BUG();
++			list_empty(&clnt->cl_tasks), 1*HZ);
+ 	}
+ 
+-	return rpc_destroy_client(clnt);
++	rpc_release_client(clnt);
+ }
+ 
+ /*
+- * Delete an RPC client
++ * Free an RPC client
+  */
+-int
+-rpc_destroy_client(struct rpc_clnt *clnt)
++static void
++rpc_free_client(struct kref *kref)
+ {
+-	if (!atomic_dec_and_test(&clnt->cl_count))
+-		return 1;
+-	BUG_ON(atomic_read(&clnt->cl_users) != 0);
++	struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref);
+ 
+ 	dprintk("RPC:       destroying %s client for %s\n",
+ 			clnt->cl_protname, clnt->cl_server);
+ 	if (clnt->cl_auth) {
+-		rpcauth_destroy(clnt->cl_auth);
++		rpcauth_release(clnt->cl_auth);
+ 		clnt->cl_auth = NULL;
+ 	}
+ 	if (!IS_ERR(clnt->cl_dentry)) {
+@@ -354,33 +364,31 @@
+ 		rpc_put_mount();
+ 	}
+ 	if (clnt->cl_parent != clnt) {
+-		rpc_destroy_client(clnt->cl_parent);
++		rpc_release_client(clnt->cl_parent);
+ 		goto out_free;
+ 	}
+ 	if (clnt->cl_server != clnt->cl_inline_name)
+ 		kfree(clnt->cl_server);
+ out_free:
++	rpc_unregister_client(clnt);
+ 	rpc_free_iostats(clnt->cl_metrics);
+ 	clnt->cl_metrics = NULL;
+ 	xprt_put(clnt->cl_xprt);
++	rpciod_down();
+ 	kfree(clnt);
+-	return 0;
+ }
+ 
+ /*
+- * Release an RPC client
++ * Release reference to the RPC client
+  */
+ void
+ rpc_release_client(struct rpc_clnt *clnt)
+ {
+-	dprintk("RPC:       rpc_release_client(%p, %d)\n",
+-			clnt, atomic_read(&clnt->cl_users));
++	dprintk("RPC:       rpc_release_client(%p)\n", clnt);
+ 
+-	if (!atomic_dec_and_test(&clnt->cl_users))
+-		return;
++	if (list_empty(&clnt->cl_tasks))
+ 	wake_up(&destroy_wait);
+-	if (clnt->cl_oneshot || clnt->cl_dead)
+-		rpc_destroy_client(clnt);
++	kref_put(&clnt->cl_kref, rpc_free_client);
+ }
+ 
+ /**
+@@ -471,82 +479,96 @@
+ 	rpc_restore_sigmask(oldset);
+ }
+ 
+-/*
+- * New rpc_call implementation
+- */
+-int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
++static
++struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
++		struct rpc_message *msg,
++		int flags,
++		const struct rpc_call_ops *ops,
++		void *data)
+ {
+-	struct rpc_task	*task;
++	struct rpc_task *task, *ret;
+ 	sigset_t	oldset;
+-	int		status;
+-
+-	/* If this client is slain all further I/O fails */
+-	if (clnt->cl_dead)
+-		return -EIO;
+-
+-	BUG_ON(flags & RPC_TASK_ASYNC);
+ 
+-	task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
+-	if (task == NULL)
+-		return -ENOMEM;
++	task = rpc_new_task(clnt, flags, ops, data);
++	if (task == NULL) {
++		rpc_release_calldata(ops, data);
++		return ERR_PTR(-ENOMEM);
++	}
+ 
+-	/* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
++	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
+ 	rpc_task_sigmask(task, &oldset);
+-
+-	/* Set up the call info struct and execute the task */
++	if (msg != NULL) {
+ 	rpc_call_setup(task, msg, 0);
+-	if (task->tk_status == 0) {
++		if (task->tk_status != 0) {
++			ret = ERR_PTR(task->tk_status);
++			rpc_put_task(task);
++			goto out;
++		}
++	}
+ 		atomic_inc(&task->tk_count);
+ 		rpc_execute(task);
+-	}
++	ret = task;
++out:
++	rpc_restore_sigmask(&oldset);
++	return ret;
++}
++
++/**
++ * rpc_call_sync - Perform a synchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
++ */
++int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
++{
++	struct rpc_task	*task;
++	int status;
++
++	BUG_ON(flags & RPC_TASK_ASYNC);
++
++	task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
+ 	status = task->tk_status;
+ 	rpc_put_task(task);
+-	rpc_restore_sigmask(&oldset);
+ 	return status;
+ }
+ 
+-/*
+- * New rpc_call implementation
++/**
++ * rpc_call_async - Perform an asynchronous RPC call
++ * @clnt: pointer to RPC client
++ * @msg: RPC call parameters
++ * @flags: RPC call flags
++ * @ops: RPC call ops
++ * @data: user call data
+  */
+ int
+ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
+ 	       const struct rpc_call_ops *tk_ops, void *data)
+ {
+ 	struct rpc_task	*task;
+-	sigset_t	oldset;
+-	int		status;
+ 
+-	/* If this client is slain all further I/O fails */
+-	status = -EIO;
+-	if (clnt->cl_dead)
+-		goto out_release;
+-
+-	flags |= RPC_TASK_ASYNC;
+-
+-	/* Create/initialize a new RPC task */
+-	status = -ENOMEM;
+-	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
+-		goto out_release;
+-
+-	/* Mask signals on GSS_AUTH upcalls */
+-	rpc_task_sigmask(task, &oldset);
+-
+-	rpc_call_setup(task, msg, 0);
+-
+-	/* Set up the call info struct and execute the task */
+-	status = task->tk_status;
+-	if (status == 0)
+-		rpc_execute(task);
+-	else
++	task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
++	if (IS_ERR(task))
++		return PTR_ERR(task);
+ 		rpc_put_task(task);
+-
+-	rpc_restore_sigmask(&oldset);
+-	return status;
+-out_release:
+-	rpc_release_calldata(tk_ops, data);
+-	return status;
++	return 0;
+ }
+ 
++/**
++ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
++ * @clnt: pointer to RPC client
++ * @flags: RPC flags
++ * @ops: RPC call ops
++ * @data: user call data
++ */
++struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
++					const struct rpc_call_ops *tk_ops,
++					void *data)
++{
++	return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
++}
++EXPORT_SYMBOL(rpc_run_task);
+ 
+ void
+ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+@@ -1424,7 +1446,7 @@
+ 	.p_decode = rpcproc_decode_null,
+ };
+ 
+-int rpc_ping(struct rpc_clnt *clnt, int flags)
++static int rpc_ping(struct rpc_clnt *clnt, int flags)
+ {
+ 	struct rpc_message msg = {
+ 		.rpc_proc = &rpcproc_null,
+@@ -1435,3 +1457,51 @@
+ 	put_rpccred(msg.rpc_cred);
+ 	return err;
+ }
++
++struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int flags)
++{
++	struct rpc_message msg = {
++		.rpc_proc = &rpcproc_null,
++		.rpc_cred = cred,
++	};
++	return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
++}
++EXPORT_SYMBOL(rpc_call_null);
++
++#ifdef RPC_DEBUG
++void rpc_show_tasks(void)
++{
++	struct rpc_clnt *clnt;
++	struct rpc_task *t;
++
++	spin_lock(&rpc_client_lock);
++	if (list_empty(&all_clients))
++		goto out;
++	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
++		"-rpcwait -action- ---ops--\n");
++	list_for_each_entry(clnt, &all_clients, cl_clients) {
++		if (list_empty(&clnt->cl_tasks))
++			continue;
++		spin_lock(&clnt->cl_lock);
++		list_for_each_entry(t, &clnt->cl_tasks, tk_task) {
++			const char *rpc_waitq = "none"; 
++
++			if (RPC_IS_QUEUED(t))
++				rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
++
++			printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
++				t->tk_pid,
++				(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
++				t->tk_flags, t->tk_status,
++				t->tk_client,
++				(t->tk_client ? t->tk_client->cl_prog : 0),
++				t->tk_rqstp, t->tk_timeout,
++				rpc_waitq,
++				t->tk_action, t->tk_ops);
++		}
++		spin_unlock(&clnt->cl_lock);
++	}
++out:
++	spin_unlock(&rpc_client_lock);
++}
++#endif
+diff -Nurb linux-2.6.22-570/net/sunrpc/rpc_pipe.c linux-2.6.22-try2/net/sunrpc/rpc_pipe.c
+--- linux-2.6.22-570/net/sunrpc/rpc_pipe.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/rpc_pipe.c	2007-12-19 15:29:23.000000000 -0500
+@@ -14,7 +14,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/mount.h>
+ #include <linux/namei.h>
+-#include <linux/dnotify.h>
++#include <linux/fsnotify.h>
+ #include <linux/kernel.h>
+ 
+ #include <asm/ioctls.h>
+@@ -344,7 +344,7 @@
+ 		mutex_lock(&inode->i_mutex);
+ 		clnt = RPC_I(inode)->private;
+ 		if (clnt) {
+-			atomic_inc(&clnt->cl_users);
++			kref_get(&clnt->cl_kref);
+ 			m->private = clnt;
+ 		} else {
+ 			single_release(inode, file);
+@@ -448,6 +448,15 @@
+ 	simple_release_fs(&rpc_mount, &rpc_mount_count);
+ }
+ 
++static int rpc_delete_dentry(struct dentry *dentry)
++{
++	return 1;
++}
++
++static struct dentry_operations rpc_dentry_operations = {
++	.d_delete = rpc_delete_dentry,
++};
++
+ static int
+ rpc_lookup_parent(char *path, struct nameidata *nd)
+ {
+@@ -506,7 +515,7 @@
+  * FIXME: This probably has races.
+  */
+ static void
+-rpc_depopulate(struct dentry *parent)
++rpc_depopulate(struct dentry *parent, int start, int eof)
+ {
+ 	struct inode *dir = parent->d_inode;
+ 	struct list_head *pos, *next;
+@@ -518,6 +527,10 @@
+ 	spin_lock(&dcache_lock);
+ 	list_for_each_safe(pos, next, &parent->d_subdirs) {
+ 		dentry = list_entry(pos, struct dentry, d_u.d_child);
++		if (!dentry->d_inode ||
++				dentry->d_inode->i_ino < start ||
++				dentry->d_inode->i_ino >= eof)
++			continue;
+ 		spin_lock(&dentry->d_lock);
+ 		if (!d_unhashed(dentry)) {
+ 			dget_locked(dentry);
+@@ -533,11 +546,11 @@
+ 	if (n) {
+ 		do {
+ 			dentry = dvec[--n];
+-			if (dentry->d_inode) {
+-				rpc_close_pipes(dentry->d_inode);
++			if (S_ISREG(dentry->d_inode->i_mode))
+ 				simple_unlink(dir, dentry);
+-			}
+-			inode_dir_notify(dir, DN_DELETE);
++			else if (S_ISDIR(dentry->d_inode->i_mode)) 
++				simple_rmdir(dir, dentry);
++			d_delete(dentry);
+ 			dput(dentry);
+ 		} while (n);
+ 		goto repeat;
+@@ -560,6 +573,7 @@
+ 		dentry = d_alloc_name(parent, files[i].name);
+ 		if (!dentry)
+ 			goto out_bad;
++		dentry->d_op = &rpc_dentry_operations;
+ 		mode = files[i].mode;
+ 		inode = rpc_get_inode(dir->i_sb, mode);
+ 		if (!inode) {
+@@ -574,6 +588,7 @@
+ 		if (S_ISDIR(mode))
+ 			inc_nlink(dir);
+ 		d_add(dentry, inode);
++		fsnotify_create(dir, dentry);
+ 	}
+ 	mutex_unlock(&dir->i_mutex);
+ 	return 0;
+@@ -595,7 +610,7 @@
+ 	inode->i_ino = iunique(dir->i_sb, 100);
+ 	d_instantiate(dentry, inode);
+ 	inc_nlink(dir);
+-	inode_dir_notify(dir, DN_CREATE);
++	fsnotify_mkdir(dir, dentry);
+ 	return 0;
+ out_err:
+ 	printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n",
+@@ -607,21 +622,14 @@
+ __rpc_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+ 	int error;
+-
+-	shrink_dcache_parent(dentry);
+-	if (d_unhashed(dentry))
+-		return 0;
+-	if ((error = simple_rmdir(dir, dentry)) != 0)
++	error = simple_rmdir(dir, dentry);
++	if (!error)
++		d_delete(dentry);
+ 		return error;
+-	if (!error) {
+-		inode_dir_notify(dir, DN_DELETE);
+-		d_drop(dentry);
+-	}
+-	return 0;
+ }
+ 
+ static struct dentry *
+-rpc_lookup_create(struct dentry *parent, const char *name, int len)
++rpc_lookup_create(struct dentry *parent, const char *name, int len, int exclusive)
+ {
+ 	struct inode *dir = parent->d_inode;
+ 	struct dentry *dentry;
+@@ -630,7 +638,9 @@
+ 	dentry = lookup_one_len(name, parent, len);
+ 	if (IS_ERR(dentry))
+ 		goto out_err;
+-	if (dentry->d_inode) {
++	if (!dentry->d_inode)
++		dentry->d_op = &rpc_dentry_operations;
++	else if (exclusive) {
+ 		dput(dentry);
+ 		dentry = ERR_PTR(-EEXIST);
+ 		goto out_err;
+@@ -649,7 +659,7 @@
+ 
+ 	if ((error = rpc_lookup_parent(path, nd)) != 0)
+ 		return ERR_PTR(error);
+-	dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len);
++	dentry = rpc_lookup_create(nd->dentry, nd->last.name, nd->last.len, 1);
+ 	if (IS_ERR(dentry))
+ 		rpc_release_path(nd);
+ 	return dentry;
+@@ -681,7 +691,7 @@
+ 	rpc_release_path(&nd);
+ 	return dentry;
+ err_depopulate:
+-	rpc_depopulate(dentry);
++	rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ 	__rpc_rmdir(dir, dentry);
+ err_dput:
+ 	dput(dentry);
+@@ -701,7 +711,7 @@
+ 	parent = dget_parent(dentry);
+ 	dir = parent->d_inode;
+ 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+-	rpc_depopulate(dentry);
++	rpc_depopulate(dentry, RPCAUTH_info, RPCAUTH_EOF);
+ 	error = __rpc_rmdir(dir, dentry);
+ 	dput(dentry);
+ 	mutex_unlock(&dir->i_mutex);
+@@ -716,10 +726,21 @@
+ 	struct inode *dir, *inode;
+ 	struct rpc_inode *rpci;
+ 
+-	dentry = rpc_lookup_create(parent, name, strlen(name));
++	dentry = rpc_lookup_create(parent, name, strlen(name), 0);
+ 	if (IS_ERR(dentry))
+ 		return dentry;
+ 	dir = parent->d_inode;
++	if (dentry->d_inode) {
++		rpci = RPC_I(dentry->d_inode);
++		if (rpci->private != private ||
++				rpci->ops != ops ||
++				rpci->flags != flags) {
++			dput (dentry);
++			dentry = ERR_PTR(-EBUSY);
++		}
++		rpci->nkern_readwriters++;
++		goto out;
++	}
+ 	inode = rpc_get_inode(dir->i_sb, S_IFIFO | S_IRUSR | S_IWUSR);
+ 	if (!inode)
+ 		goto err_dput;
+@@ -730,7 +751,8 @@
+ 	rpci->private = private;
+ 	rpci->flags = flags;
+ 	rpci->ops = ops;
+-	inode_dir_notify(dir, DN_CREATE);
++	rpci->nkern_readwriters = 1;
++	fsnotify_create(dir, dentry);
+ 	dget(dentry);
+ out:
+ 	mutex_unlock(&dir->i_mutex);
+@@ -754,13 +776,11 @@
+ 	parent = dget_parent(dentry);
+ 	dir = parent->d_inode;
+ 	mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+-	if (!d_unhashed(dentry)) {
+-		d_drop(dentry);
+-		if (dentry->d_inode) {
++	if (--RPC_I(dentry->d_inode)->nkern_readwriters == 0) {
+ 			rpc_close_pipes(dentry->d_inode);
+ 			error = simple_unlink(dir, dentry);
+-		}
+-		inode_dir_notify(dir, DN_DELETE);
++		if (!error)
++			d_delete(dentry);
+ 	}
+ 	dput(dentry);
+ 	mutex_unlock(&dir->i_mutex);
+@@ -833,6 +853,7 @@
+ 	rpci->nreaders = 0;
+ 	rpci->nwriters = 0;
+ 	INIT_LIST_HEAD(&rpci->in_upcall);
++	INIT_LIST_HEAD(&rpci->in_downcall);
+ 	INIT_LIST_HEAD(&rpci->pipe);
+ 	rpci->pipelen = 0;
+ 	init_waitqueue_head(&rpci->waitq);
+diff -Nurb linux-2.6.22-570/net/sunrpc/rpcb_clnt.c linux-2.6.22-try2/net/sunrpc/rpcb_clnt.c
+--- linux-2.6.22-570/net/sunrpc/rpcb_clnt.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/rpcb_clnt.c	2007-12-19 15:29:23.000000000 -0500
+@@ -184,8 +184,7 @@
+ 		.program	= &rpcb_program,
+ 		.version	= version,
+ 		.authflavor	= RPC_AUTH_UNIX,
+-		.flags		= (RPC_CLNT_CREATE_ONESHOT |
+-				   RPC_CLNT_CREATE_NOPING),
++		.flags		= RPC_CLNT_CREATE_NOPING,
+ 	};
+ 
+ 	((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT);
+@@ -238,6 +237,7 @@
+ 
+ 	error = rpc_call_sync(rpcb_clnt, &msg, 0);
+ 
++	rpc_shutdown_client(rpcb_clnt);
+ 	if (error < 0)
+ 		printk(KERN_WARNING "RPC: failed to contact local rpcbind "
+ 				"server (errno %d).\n", -error);
+@@ -286,6 +286,7 @@
+ 		return PTR_ERR(rpcb_clnt);
+ 
+ 	status = rpc_call_sync(rpcb_clnt, &msg, 0);
++	rpc_shutdown_client(rpcb_clnt);
+ 
+ 	if (status >= 0) {
+ 		if (map.r_port != 0)
+@@ -379,6 +380,7 @@
+ 	}
+ 
+ 	child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
++	rpc_release_client(rpcb_clnt);
+ 	if (IS_ERR(child)) {
+ 		status = -EIO;
+ 		dprintk("RPC: %5u rpcb_getport rpc_run_task failed\n",
+diff -Nurb linux-2.6.22-570/net/sunrpc/sched.c linux-2.6.22-try2/net/sunrpc/sched.c
+--- linux-2.6.22-570/net/sunrpc/sched.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/sched.c	2007-12-19 15:29:23.000000000 -0500
+@@ -25,7 +25,6 @@
+ #ifdef RPC_DEBUG
+ #define RPCDBG_FACILITY		RPCDBG_SCHED
+ #define RPC_TASK_MAGIC_ID	0xf00baa
+-static int			rpc_task_id;
+ #endif
+ 
+ /*
+@@ -40,7 +39,6 @@
+ static mempool_t	*rpc_buffer_mempool __read_mostly;
+ 
+ static void			__rpc_default_timer(struct rpc_task *task);
+-static void			rpciod_killall(void);
+ static void			rpc_async_schedule(struct work_struct *);
+ static void			 rpc_release_task(struct rpc_task *task);
+ 
+@@ -50,23 +48,13 @@
+ static RPC_WAITQ(delay_queue, "delayq");
+ 
+ /*
+- * All RPC tasks are linked into this list
+- */
+-static LIST_HEAD(all_tasks);
+-
+-/*
+  * rpciod-related stuff
+  */
+ static DEFINE_MUTEX(rpciod_mutex);
+-static unsigned int		rpciod_users;
++static atomic_t rpciod_users = ATOMIC_INIT(0);
+ struct workqueue_struct *rpciod_workqueue;
+ 
+ /*
+- * Spinlock for other critical sections of code.
+- */
+-static DEFINE_SPINLOCK(rpc_sched_lock);
+-
+-/*
+  * Disable the timer for a given RPC task. Should be called with
+  * queue->lock and bh_disabled in order to avoid races within
+  * rpc_run_timer().
+@@ -267,18 +255,33 @@
+ 	return 0;
+ }
+ 
++#ifdef RPC_DEBUG
++static void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++	static atomic_t rpc_pid;
++
++	task->tk_magic = RPC_TASK_MAGIC_ID;
++	task->tk_pid = atomic_inc_return(&rpc_pid);
++}
++#else
++static inline void rpc_task_set_debuginfo(struct rpc_task *task)
++{
++}
++#endif
++
+ static void rpc_set_active(struct rpc_task *task)
+ {
++	struct rpc_clnt *clnt;
+ 	if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
+ 		return;
+-	spin_lock(&rpc_sched_lock);
+-#ifdef RPC_DEBUG
+-	task->tk_magic = RPC_TASK_MAGIC_ID;
+-	task->tk_pid = rpc_task_id++;
+-#endif
++	rpc_task_set_debuginfo(task);
+ 	/* Add to global list of all tasks */
+-	list_add_tail(&task->tk_task, &all_tasks);
+-	spin_unlock(&rpc_sched_lock);
++	clnt = task->tk_client;
++	if (clnt != NULL) {
++		spin_lock(&clnt->cl_lock);
++		list_add_tail(&task->tk_task, &clnt->cl_tasks);
++		spin_unlock(&clnt->cl_lock);
++	}
+ }
+ 
+ /*
+@@ -818,6 +821,7 @@
+ 	if (tk_ops->rpc_call_prepare != NULL)
+ 		task->tk_action = rpc_prepare_task;
+ 	task->tk_calldata = calldata;
++	INIT_LIST_HEAD(&task->tk_task);
+ 
+ 	/* Initialize retry counters */
+ 	task->tk_garb_retry = 2;
+@@ -830,7 +834,7 @@
+ 	task->tk_workqueue = rpciod_workqueue;
+ 
+ 	if (clnt) {
+-		atomic_inc(&clnt->cl_users);
++		kref_get(&clnt->cl_kref);
+ 		if (clnt->cl_softrtry)
+ 			task->tk_flags |= RPC_TASK_SOFT;
+ 		if (!clnt->cl_intr)
+@@ -860,9 +864,7 @@
+ }
+ 
+ /*
+- * Create a new task for the specified client.  We have to
+- * clean up after an allocation failure, as the client may
+- * have specified "oneshot".
++ * Create a new task for the specified client.
+  */
+ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+ {
+@@ -870,7 +872,7 @@
+ 
+ 	task = rpc_alloc_task();
+ 	if (!task)
+-		goto cleanup;
++		goto out;
+ 
+ 	rpc_init_task(task, clnt, flags, tk_ops, calldata);
+ 
+@@ -878,16 +880,6 @@
+ 	task->tk_flags |= RPC_TASK_DYNAMIC;
+ out:
+ 	return task;
+-
+-cleanup:
+-	/* Check whether to release the client */
+-	if (clnt) {
+-		printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
+-			atomic_read(&clnt->cl_users), clnt->cl_oneshot);
+-		atomic_inc(&clnt->cl_users); /* pretend we were used ... */
+-		rpc_release_client(clnt);
+-	}
+-	goto out;
+ }
+ 
+ 
+@@ -920,11 +912,13 @@
+ #endif
+ 	dprintk("RPC: %5u release task\n", task->tk_pid);
+ 
+-	/* Remove from global task list */
+-	spin_lock(&rpc_sched_lock);
++	if (!list_empty(&task->tk_task)) {
++		struct rpc_clnt *clnt = task->tk_client;
++		/* Remove from client task list */
++		spin_lock(&clnt->cl_lock);
+ 	list_del(&task->tk_task);
+-	spin_unlock(&rpc_sched_lock);
+-
++		spin_unlock(&clnt->cl_lock); 
++	}
+ 	BUG_ON (RPC_IS_QUEUED(task));
+ 
+ 	/* Synchronously delete any running timer */
+@@ -939,29 +933,6 @@
+ 	rpc_put_task(task);
+ }
+ 
+-/**
+- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+- * @clnt: pointer to RPC client
+- * @flags: RPC flags
+- * @ops: RPC call ops
+- * @data: user call data
+- */
+-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+-					const struct rpc_call_ops *ops,
+-					void *data)
+-{
+-	struct rpc_task *task;
+-	task = rpc_new_task(clnt, flags, ops, data);
+-	if (task == NULL) {
+-		rpc_release_calldata(ops, data);
+-		return ERR_PTR(-ENOMEM);
+-	}
+-	atomic_inc(&task->tk_count);
+-	rpc_execute(task);
+-	return task;
+-}
+-EXPORT_SYMBOL(rpc_run_task);
+-
+ /*
+  * Kill all tasks for the given client.
+  * XXX: kill their descendants as well?
+@@ -969,44 +940,25 @@
+ void rpc_killall_tasks(struct rpc_clnt *clnt)
+ {
+ 	struct rpc_task	*rovr;
+-	struct list_head *le;
+ 
+-	dprintk("RPC:       killing all tasks for client %p\n", clnt);
+ 
++	if (list_empty(&clnt->cl_tasks))
++		return;
++	dprintk("RPC:       killing all tasks for client %p\n", clnt);
+ 	/*
+ 	 * Spin lock all_tasks to prevent changes...
+ 	 */
+-	spin_lock(&rpc_sched_lock);
+-	alltask_for_each(rovr, le, &all_tasks) {
++	spin_lock(&clnt->cl_lock);
++	list_for_each_entry(rovr, &clnt->cl_tasks, tk_task) {
+ 		if (! RPC_IS_ACTIVATED(rovr))
+ 			continue;
+-		if (!clnt || rovr->tk_client == clnt) {
++		if (!(rovr->tk_flags & RPC_TASK_KILLED)) {
+ 			rovr->tk_flags |= RPC_TASK_KILLED;
+ 			rpc_exit(rovr, -EIO);
+ 			rpc_wake_up_task(rovr);
+ 		}
+ 	}
+-	spin_unlock(&rpc_sched_lock);
+-}
+-
+-static void rpciod_killall(void)
+-{
+-	unsigned long flags;
+-
+-	while (!list_empty(&all_tasks)) {
+-		clear_thread_flag(TIF_SIGPENDING);
+-		rpc_killall_tasks(NULL);
+-		flush_workqueue(rpciod_workqueue);
+-		if (!list_empty(&all_tasks)) {
+-			dprintk("RPC:       rpciod_killall: waiting for tasks "
+-					"to exit\n");
+-			yield();
+-		}
+-	}
+-
+-	spin_lock_irqsave(&current->sighand->siglock, flags);
+-	recalc_sigpending();
+-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
++	spin_unlock(&clnt->cl_lock);
+ }
+ 
+ /*
+@@ -1018,28 +970,27 @@
+ 	struct workqueue_struct *wq;
+ 	int error = 0;
+ 
++	if (atomic_inc_not_zero(&rpciod_users))
++		return 0;
++
+ 	mutex_lock(&rpciod_mutex);
+-	dprintk("RPC:       rpciod_up: users %u\n", rpciod_users);
+-	rpciod_users++;
+-	if (rpciod_workqueue)
+-		goto out;
+-	/*
+-	 * If there's no pid, we should be the first user.
+-	 */
+-	if (rpciod_users > 1)
+-		printk(KERN_WARNING "rpciod_up: no workqueue, %u users??\n", rpciod_users);
++
++	/* Guard against races with rpciod_down() */
++	if (rpciod_workqueue != NULL)
++		goto out_ok;
+ 	/*
+ 	 * Create the rpciod thread and wait for it to start.
+ 	 */
++	dprintk("RPC:       creating workqueue rpciod\n");
+ 	error = -ENOMEM;
+ 	wq = create_workqueue("rpciod");
+-	if (wq == NULL) {
+-		printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error);
+-		rpciod_users--;
++	if (wq == NULL)
+ 		goto out;
+-	}
++
+ 	rpciod_workqueue = wq;
+ 	error = 0;
++out_ok:
++	atomic_inc(&rpciod_users);
+ out:
+ 	mutex_unlock(&rpciod_mutex);
+ 	return error;
+@@ -1048,58 +999,18 @@
+ void
+ rpciod_down(void)
+ {
+-	mutex_lock(&rpciod_mutex);
+-	dprintk("RPC:       rpciod_down sema %u\n", rpciod_users);
+-	if (rpciod_users) {
+-		if (--rpciod_users)
+-			goto out;
+-	} else
+-		printk(KERN_WARNING "rpciod_down: no users??\n");
++	if (!atomic_dec_and_test(&rpciod_users))
++		return;
+ 
+-	if (!rpciod_workqueue) {
+-		dprintk("RPC:       rpciod_down: Nothing to do!\n");
+-		goto out;
+-	}
+-	rpciod_killall();
++	mutex_lock(&rpciod_mutex);
++	dprintk("RPC:       destroying workqueue rpciod\n");
+ 
++	if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) {
+ 	destroy_workqueue(rpciod_workqueue);
+ 	rpciod_workqueue = NULL;
+- out:
+-	mutex_unlock(&rpciod_mutex);
+-}
+-
+-#ifdef RPC_DEBUG
+-void rpc_show_tasks(void)
+-{
+-	struct list_head *le;
+-	struct rpc_task *t;
+-
+-	spin_lock(&rpc_sched_lock);
+-	if (list_empty(&all_tasks)) {
+-		spin_unlock(&rpc_sched_lock);
+-		return;
+-	}
+-	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
+-		"-rpcwait -action- ---ops--\n");
+-	alltask_for_each(t, le, &all_tasks) {
+-		const char *rpc_waitq = "none";
+-
+-		if (RPC_IS_QUEUED(t))
+-			rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq);
+-
+-		printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n",
+-			t->tk_pid,
+-			(t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1),
+-			t->tk_flags, t->tk_status,
+-			t->tk_client,
+-			(t->tk_client ? t->tk_client->cl_prog : 0),
+-			t->tk_rqstp, t->tk_timeout,
+-			rpc_waitq,
+-			t->tk_action, t->tk_ops);
+ 	}
+-	spin_unlock(&rpc_sched_lock);
++	mutex_unlock(&rpciod_mutex);
+ }
+-#endif
+ 
+ void
+ rpc_destroy_mempool(void)
+diff -Nurb linux-2.6.22-570/net/sunrpc/sunrpc_syms.c linux-2.6.22-try2/net/sunrpc/sunrpc_syms.c
+--- linux-2.6.22-570/net/sunrpc/sunrpc_syms.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/sunrpc_syms.c	2007-12-19 15:29:23.000000000 -0500
+@@ -28,15 +28,11 @@
+ EXPORT_SYMBOL(rpc_sleep_on);
+ EXPORT_SYMBOL(rpc_wake_up_next);
+ EXPORT_SYMBOL(rpc_wake_up_task);
+-EXPORT_SYMBOL(rpciod_down);
+-EXPORT_SYMBOL(rpciod_up);
+-EXPORT_SYMBOL(rpc_new_task);
+ EXPORT_SYMBOL(rpc_wake_up_status);
+ 
+ /* RPC client functions */
+ EXPORT_SYMBOL(rpc_clone_client);
+ EXPORT_SYMBOL(rpc_bind_new_program);
+-EXPORT_SYMBOL(rpc_destroy_client);
+ EXPORT_SYMBOL(rpc_shutdown_client);
+ EXPORT_SYMBOL(rpc_killall_tasks);
+ EXPORT_SYMBOL(rpc_call_sync);
+@@ -61,7 +57,7 @@
+ EXPORT_SYMBOL(rpcauth_create);
+ EXPORT_SYMBOL(rpcauth_lookupcred);
+ EXPORT_SYMBOL(rpcauth_lookup_credcache);
+-EXPORT_SYMBOL(rpcauth_free_credcache);
++EXPORT_SYMBOL(rpcauth_destroy_credcache);
+ EXPORT_SYMBOL(rpcauth_init_credcache);
+ EXPORT_SYMBOL(put_rpccred);
+ 
+@@ -156,6 +152,7 @@
+ 	cache_register(&ip_map_cache);
+ 	cache_register(&unix_gid_cache);
+ 	init_socket_xprt();
++	rpcauth_init_module();
+ out:
+ 	return err;
+ }
+@@ -163,6 +160,7 @@
+ static void __exit
+ cleanup_sunrpc(void)
+ {
++	rpcauth_remove_module();
+ 	cleanup_socket_xprt();
+ 	unregister_rpc_pipefs();
+ 	rpc_destroy_mempool();
+diff -Nurb linux-2.6.22-570/net/sunrpc/xprt.c linux-2.6.22-try2/net/sunrpc/xprt.c
+--- linux-2.6.22-570/net/sunrpc/xprt.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/xprt.c	2007-12-19 15:29:23.000000000 -0500
+@@ -127,7 +127,7 @@
+ 		clear_bit(XPRT_LOCKED, &xprt->state);
+ 		smp_mb__after_clear_bit();
+ 	} else
+-		schedule_work(&xprt->task_cleanup);
++		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ }
+ 
+ /*
+@@ -515,7 +515,7 @@
+ 	if (xprt_connecting(xprt))
+ 		xprt_release_write(xprt, NULL);
+ 	else
+-		schedule_work(&xprt->task_cleanup);
++		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ 	return;
+ out_abort:
+ 	spin_unlock(&xprt->transport_lock);
+diff -Nurb linux-2.6.22-570/net/sunrpc/xprtsock.c linux-2.6.22-try2/net/sunrpc/xprtsock.c
+--- linux-2.6.22-570/net/sunrpc/xprtsock.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/sunrpc/xprtsock.c	2007-12-19 15:29:23.000000000 -0500
+@@ -653,8 +653,7 @@
+ 
+ 	dprintk("RPC:       xs_destroy xprt %p\n", xprt);
+ 
+-	cancel_delayed_work(&transport->connect_worker);
+-	flush_scheduled_work();
++	cancel_rearming_delayed_work(&transport->connect_worker);
+ 
+ 	xprt_disconnect(xprt);
+ 	xs_close(xprt);
+@@ -1001,7 +1000,7 @@
+ 		/* Try to schedule an autoclose RPC calls */
+ 		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ 		if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+-			schedule_work(&xprt->task_cleanup);
++			queue_work(rpciod_workqueue, &xprt->task_cleanup);
+ 	default:
+ 		xprt_disconnect(xprt);
+ 	}
+@@ -1410,18 +1409,16 @@
+ 		dprintk("RPC:       xs_connect delayed xprt %p for %lu "
+ 				"seconds\n",
+ 				xprt, xprt->reestablish_timeout / HZ);
+-		schedule_delayed_work(&transport->connect_worker,
++		queue_delayed_work(rpciod_workqueue,
++				   &transport->connect_worker,
+ 					xprt->reestablish_timeout);
+ 		xprt->reestablish_timeout <<= 1;
+ 		if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
+ 			xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
+ 	} else {
+ 		dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
+-		schedule_delayed_work(&transport->connect_worker, 0);
+-
+-		/* flush_scheduled_work can sleep... */
+-		if (!RPC_IS_ASYNC(task))
+-			flush_scheduled_work();
++		queue_delayed_work(rpciod_workqueue,
++				   &transport->connect_worker, 0);
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-570/net/tipc/eth_media.c linux-2.6.22-try2/net/tipc/eth_media.c
+--- linux-2.6.22-570/net/tipc/eth_media.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/tipc/eth_media.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * net/tipc/eth_media.c: Ethernet bearer support for TIPC
+  *
+- * Copyright (c) 2001-2006, Ericsson AB
+- * Copyright (c) 2005-2006, Wind River Systems
++ * Copyright (c) 2001-2007, Ericsson AB
++ * Copyright (c) 2005-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -87,6 +87,9 @@
+ /**
+  * recv_msg - handle incoming TIPC message from an Ethernet interface
+  *
++ * Accept only packets explicitly sent to this node, or broadcast packets;
++ * ignores packets sent using Ethernet multicast, and traffic sent to other
++ * nodes (which can happen if interface is running in promiscuous mode).
+  * Routine truncates any Ethernet padding/CRC appended to the message,
+  * and ensures message size matches actual length
+  */
+@@ -98,9 +101,7 @@
+ 	u32 size;
+ 
+ 	if (likely(eb_ptr->bearer)) {
+-	       if (likely(!dev->promiscuity) ||
+-		   !memcmp(skb_mac_header(buf), dev->dev_addr, ETH_ALEN) ||
+-		   !memcmp(skb_mac_header(buf), dev->broadcast, ETH_ALEN)) {
++		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
+ 			size = msg_size((struct tipc_msg *)buf->data);
+ 			skb_trim(buf, size);
+ 			if (likely(buf->len == size)) {
+diff -Nurb linux-2.6.22-570/net/tipc/link.c linux-2.6.22-try2/net/tipc/link.c
+--- linux-2.6.22-570/net/tipc/link.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/tipc/link.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * net/tipc/link.c: TIPC link code
+  *
+- * Copyright (c) 1996-2006, Ericsson AB
+- * Copyright (c) 2004-2006, Wind River Systems
++ * Copyright (c) 1996-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -1260,7 +1260,7 @@
+ 	 * (Must not hold any locks while building message.)
+ 	 */
+ 
+-	res = msg_build(hdr, msg_sect, num_sect, sender->max_pkt,
++	res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
+ 			!sender->user_port, &buf);
+ 
+ 	read_lock_bh(&tipc_net_lock);
+@@ -1271,7 +1271,7 @@
+ 		if (likely(l_ptr)) {
+ 			if (likely(buf)) {
+ 				res = link_send_buf_fast(l_ptr, buf,
+-							 &sender->max_pkt);
++							 &sender->publ.max_pkt);
+ 				if (unlikely(res < 0))
+ 					buf_discard(buf);
+ exit:
+@@ -1299,12 +1299,12 @@
+ 			 * then re-try fast path or fragment the message
+ 			 */
+ 
+-			sender->max_pkt = link_max_pkt(l_ptr);
++			sender->publ.max_pkt = link_max_pkt(l_ptr);
+ 			tipc_node_unlock(node);
+ 			read_unlock_bh(&tipc_net_lock);
+ 
+ 
+-			if ((msg_hdr_sz(hdr) + res) <= sender->max_pkt)
++			if ((msg_hdr_sz(hdr) + res) <= sender->publ.max_pkt)
+ 				goto again;
+ 
+ 			return link_send_sections_long(sender, msg_sect,
+@@ -1357,7 +1357,7 @@
+ 
+ again:
+ 	fragm_no = 1;
+-	max_pkt = sender->max_pkt - INT_H_SIZE;
++	max_pkt = sender->publ.max_pkt - INT_H_SIZE;
+ 		/* leave room for tunnel header in case of link changeover */
+ 	fragm_sz = max_pkt - INT_H_SIZE;
+ 		/* leave room for fragmentation header in each fragment */
+@@ -1463,7 +1463,7 @@
+ 			goto reject;
+ 		}
+ 		if (link_max_pkt(l_ptr) < max_pkt) {
+-			sender->max_pkt = link_max_pkt(l_ptr);
++			sender->publ.max_pkt = link_max_pkt(l_ptr);
+ 			tipc_node_unlock(node);
+ 			for (; buf_chain; buf_chain = buf) {
+ 				buf = buf_chain->next;
+diff -Nurb linux-2.6.22-570/net/tipc/port.c linux-2.6.22-try2/net/tipc/port.c
+--- linux-2.6.22-570/net/tipc/port.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/tipc/port.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * net/tipc/port.c: TIPC port code
+  *
+- * Copyright (c) 1992-2006, Ericsson AB
+- * Copyright (c) 2004-2005, Wind River Systems
++ * Copyright (c) 1992-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -239,6 +239,8 @@
+ 	}
+ 
+ 	tipc_port_lock(ref);
++	p_ptr->publ.usr_handle = usr_handle;
++	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
+ 	p_ptr->publ.ref = ref;
+ 	msg = &p_ptr->publ.phdr;
+ 	msg_init(msg, DATA_LOW, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, 0);
+@@ -248,11 +250,9 @@
+ 	msg_set_importance(msg,importance);
+ 	p_ptr->last_in_seqno = 41;
+ 	p_ptr->sent = 1;
+-	p_ptr->publ.usr_handle = usr_handle;
+ 	INIT_LIST_HEAD(&p_ptr->wait_list);
+ 	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
+ 	p_ptr->congested_link = NULL;
+-	p_ptr->max_pkt = MAX_PKT_DEFAULT;
+ 	p_ptr->dispatcher = dispatcher;
+ 	p_ptr->wakeup = wakeup;
+ 	p_ptr->user_port = NULL;
+@@ -1243,7 +1243,7 @@
+ 	res = TIPC_OK;
+ exit:
+ 	tipc_port_unlock(p_ptr);
+-	p_ptr->max_pkt = tipc_link_get_max_pkt(peer->node, ref);
++	p_ptr->publ.max_pkt = tipc_link_get_max_pkt(peer->node, ref);
+ 	return res;
+ }
+ 
+diff -Nurb linux-2.6.22-570/net/tipc/port.h linux-2.6.22-try2/net/tipc/port.h
+--- linux-2.6.22-570/net/tipc/port.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/tipc/port.h	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * net/tipc/port.h: Include file for TIPC port code
+  *
+- * Copyright (c) 1994-2006, Ericsson AB
+- * Copyright (c) 2004-2005, Wind River Systems
++ * Copyright (c) 1994-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -81,7 +81,6 @@
+  * @acked:
+  * @publications: list of publications for port
+  * @pub_count: total # of publications port has made during its lifetime
+- * @max_pkt: maximum packet size "hint" used when building messages sent by port
+  * @probing_state:
+  * @probing_interval:
+  * @last_in_seqno:
+@@ -102,7 +101,6 @@
+ 	u32 acked;
+ 	struct list_head publications;
+ 	u32 pub_count;
+-	u32 max_pkt;
+ 	u32 probing_state;
+ 	u32 probing_interval;
+ 	u32 last_in_seqno;
+diff -Nurb linux-2.6.22-570/net/tipc/socket.c linux-2.6.22-try2/net/tipc/socket.c
+--- linux-2.6.22-570/net/tipc/socket.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/net/tipc/socket.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1,8 +1,8 @@
+ /*
+  * net/tipc/socket.c: TIPC socket API
+  *
+- * Copyright (c) 2001-2006, Ericsson AB
+- * Copyright (c) 2004-2006, Wind River Systems
++ * Copyright (c) 2001-2007, Ericsson AB
++ * Copyright (c) 2004-2007, Wind River Systems
+  * All rights reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+@@ -607,23 +607,24 @@
+ static int send_stream(struct kiocb *iocb, struct socket *sock,
+ 		       struct msghdr *m, size_t total_len)
+ {
++	struct tipc_port *tport;
+ 	struct msghdr my_msg;
+ 	struct iovec my_iov;
+ 	struct iovec *curr_iov;
+ 	int curr_iovlen;
+ 	char __user *curr_start;
++	u32 hdr_size;
+ 	int curr_left;
+ 	int bytes_to_send;
+ 	int bytes_sent;
+ 	int res;
+ 
+-	if (likely(total_len <= TIPC_MAX_USER_MSG_SIZE))
+-		return send_packet(iocb, sock, m, total_len);
+-
+-	/* Can only send large data streams if already connected */
++	/* Handle special cases where there is no connection */
+ 
+ 	if (unlikely(sock->state != SS_CONNECTED)) {
+-		if (sock->state == SS_DISCONNECTING)
++		if (sock->state == SS_UNCONNECTED)
++			return send_packet(iocb, sock, m, total_len);
++		else if (sock->state == SS_DISCONNECTING)
+ 			return -EPIPE;
+ 		else
+ 			return -ENOTCONN;
+@@ -648,17 +649,25 @@
+ 	my_msg.msg_name = NULL;
+ 	bytes_sent = 0;
+ 
++	tport = tipc_sk(sock->sk)->p;
++	hdr_size = msg_hdr_sz(&tport->phdr);
++
+ 	while (curr_iovlen--) {
+ 		curr_start = curr_iov->iov_base;
+ 		curr_left = curr_iov->iov_len;
+ 
+ 		while (curr_left) {
+-			bytes_to_send = (curr_left < TIPC_MAX_USER_MSG_SIZE)
+-				? curr_left : TIPC_MAX_USER_MSG_SIZE;
++			bytes_to_send = tport->max_pkt - hdr_size;
++			if (bytes_to_send > TIPC_MAX_USER_MSG_SIZE)
++				bytes_to_send = TIPC_MAX_USER_MSG_SIZE;
++			if (curr_left < bytes_to_send)
++				bytes_to_send = curr_left;
+ 			my_iov.iov_base = curr_start;
+ 			my_iov.iov_len = bytes_to_send;
+ 			if ((res = send_packet(iocb, sock, &my_msg, 0)) < 0) {
+-				return bytes_sent ? bytes_sent : res;
++				if (bytes_sent != 0)
++					res = bytes_sent;
++				return res;
+ 			}
+ 			curr_left -= bytes_to_send;
+ 			curr_start += bytes_to_send;
+@@ -1600,33 +1609,6 @@
+ }
+ 
+ /**
+- * Placeholders for non-implemented functionality
+- *
+- * Returns error code (POSIX-compliant where defined)
+- */
+-
+-static int ioctl(struct socket *s, u32 cmd, unsigned long arg)
+-{
+-	return -EINVAL;
+-}
+-
+-static int no_mmap(struct file *file, struct socket *sock,
+-		   struct vm_area_struct *vma)
+-{
+-	return -EINVAL;
+-}
+-static ssize_t no_sendpage(struct socket *sock, struct page *page,
+-			   int offset, size_t size, int flags)
+-{
+-	return -EINVAL;
+-}
+-
+-static int no_skpair(struct socket *s1, struct socket *s2)
+-{
+-	return -EOPNOTSUPP;
+-}
+-
+-/**
+  * Protocol switches for the various types of TIPC sockets
+  */
+ 
+@@ -1636,19 +1618,19 @@
+ 	.release	= release,
+ 	.bind		= bind,
+ 	.connect	= connect,
+-	.socketpair	= no_skpair,
++	.socketpair	= sock_no_socketpair,
+ 	.accept		= accept,
+ 	.getname	= get_name,
+ 	.poll		= poll,
+-	.ioctl		= ioctl,
++	.ioctl		= sock_no_ioctl,
+ 	.listen		= listen,
+ 	.shutdown	= shutdown,
+ 	.setsockopt	= setsockopt,
+ 	.getsockopt	= getsockopt,
+ 	.sendmsg	= send_msg,
+ 	.recvmsg	= recv_msg,
+-	.mmap		= no_mmap,
+-	.sendpage	= no_sendpage
++        .mmap		= sock_no_mmap,
++        .sendpage	= sock_no_sendpage
+ };
+ 
+ static struct proto_ops packet_ops = {
+@@ -1657,19 +1639,19 @@
+ 	.release	= release,
+ 	.bind		= bind,
+ 	.connect	= connect,
+-	.socketpair	= no_skpair,
++	.socketpair	= sock_no_socketpair,
+ 	.accept		= accept,
+ 	.getname	= get_name,
+ 	.poll		= poll,
+-	.ioctl		= ioctl,
++	.ioctl		= sock_no_ioctl,
+ 	.listen		= listen,
+ 	.shutdown	= shutdown,
+ 	.setsockopt	= setsockopt,
+ 	.getsockopt	= getsockopt,
+ 	.sendmsg	= send_packet,
+ 	.recvmsg	= recv_msg,
+-	.mmap		= no_mmap,
+-	.sendpage	= no_sendpage
++        .mmap		= sock_no_mmap,
++        .sendpage	= sock_no_sendpage
+ };
+ 
+ static struct proto_ops stream_ops = {
+@@ -1678,19 +1660,19 @@
+ 	.release	= release,
+ 	.bind		= bind,
+ 	.connect	= connect,
+-	.socketpair	= no_skpair,
++	.socketpair	= sock_no_socketpair,
+ 	.accept		= accept,
+ 	.getname	= get_name,
+ 	.poll		= poll,
+-	.ioctl		= ioctl,
++	.ioctl		= sock_no_ioctl,
+ 	.listen		= listen,
+ 	.shutdown	= shutdown,
+ 	.setsockopt	= setsockopt,
+ 	.getsockopt	= getsockopt,
+ 	.sendmsg	= send_stream,
+ 	.recvmsg	= recv_stream,
+-	.mmap		= no_mmap,
+-	.sendpage	= no_sendpage
++        .mmap		= sock_no_mmap,
++        .sendpage	= sock_no_sendpage
+ };
+ 
+ static struct net_proto_family tipc_family_ops = {
+diff -Nurb linux-2.6.22-570/rej linux-2.6.22-try2/rej
+--- linux-2.6.22-570/rej	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/rej	2007-12-19 15:33:57.000000000 -0500
+@@ -0,0 +1,28 @@
++vi -o ./drivers/dma/ioatdma.c ./drivers/dma/ioatdma.c.rej
++vi -o ./fs/nfs/super.c ./fs/nfs/super.c.rej
++vi -o ./fs/ocfs2/aops.c ./fs/ocfs2/aops.c.rej
++vi -o ./fs/ocfs2/file.c ./fs/ocfs2/file.c.rej
++vi -o ./fs/ocfs2/super.c ./fs/ocfs2/super.c.rej
++vi -o ./fs/proc/base.c ./fs/proc/base.c.rej
++vi -o ./fs/sysfs/file.c ./fs/sysfs/file.c.rej
++vi -o ./fs/sync.c ./fs/sync.c.rej
++vi -o ./include/acpi/processor.h ./include/acpi/processor.h.rej
++vi -o ./include/linux/sunrpc/clnt.h ./include/linux/sunrpc/clnt.h.rej
++vi -o ./include/linux/syscalls.h ./include/linux/syscalls.h.rej
++vi -o ./include/linux/nfs_mount.h ./include/linux/nfs_mount.h.rej
++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej
++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej
++vi -o ./include/linux/fs.h ./include/linux/fs.h.rej
++vi -o ./kernel/timer.c ./kernel/timer.c.rej
++vi -o ./kernel/fork.c ./kernel/fork.c.rej
++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej
++vi -o ./kernel/sys.c ./kernel/sys.c.rej
++vi -o ./kernel/user.c ./kernel/user.c.rej
++vi -o ./kernel/utsname.c ./kernel/utsname.c.rej
++vi -o ./kernel/sched.c ./kernel/sched.c.rej
++vi -o ./kernel/container.c ./kernel/container.c.rej
++vi -o ./mm/memory.c ./mm/memory.c.rej
++vi -o ./mm/hugetlb.c ./mm/hugetlb.c.rej
++vi -o ./net/bridge/br_if.c ./net/bridge/br_if.c.rej
++vi -o ./net/sunrpc/auth_unix.c ./net/sunrpc/auth_unix.c.rej
++vi -o ./scripts/checksyscalls.sh ./scripts/checksyscalls.sh.rej
+diff -Nurb linux-2.6.22-570/security/commoncap.c linux-2.6.22-try2/security/commoncap.c
+--- linux-2.6.22-570/security/commoncap.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/security/commoncap.c	2007-12-19 15:29:24.000000000 -0500
+@@ -150,7 +150,7 @@
+ 
+ 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
+ 	    !cap_issubset (new_permitted, current->cap_permitted)) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 
+ 		if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) {
+ 			if (!capable(CAP_SETUID)) {
+diff -Nurb linux-2.6.22-570/security/dummy.c linux-2.6.22-try2/security/dummy.c
+--- linux-2.6.22-570/security/dummy.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/security/dummy.c	2007-12-19 15:29:24.000000000 -0500
+@@ -131,7 +131,7 @@
+ static void dummy_bprm_apply_creds (struct linux_binprm *bprm, int unsafe)
+ {
+ 	if (bprm->e_uid != current->uid || bprm->e_gid != current->gid) {
+-		current->mm->dumpable = suid_dumpable;
++		set_dumpable(current->mm, suid_dumpable);
+ 
+ 		if ((unsafe & ~LSM_UNSAFE_PTRACE_CAP) && !capable(CAP_SETUID)) {
+ 			bprm->e_uid = current->uid;
+@@ -421,8 +421,12 @@
+ 
+ static int dummy_file_mmap (struct file *file, unsigned long reqprot,
+ 			    unsigned long prot,
+-			    unsigned long flags)
++			    unsigned long flags,
++			    unsigned long addr,
++			    unsigned long addr_only)
+ {
++	if (addr < mmap_min_addr)
++		return -EACCES;
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-570/security/keys/request_key.c linux-2.6.22-try2/security/keys/request_key.c
+--- linux-2.6.22-570/security/keys/request_key.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/keys/request_key.c	2007-12-19 15:29:23.000000000 -0500
+@@ -108,7 +108,8 @@
+ 	argv[i] = NULL;
+ 
+ 	/* do it */
+-	ret = call_usermodehelper_keys(argv[0], argv, envp, keyring, 1);
++	ret = call_usermodehelper_keys(argv[0], argv, envp, keyring,
++				       UMH_WAIT_PROC);
+ 
+ error_link:
+ 	key_put(keyring);
+diff -Nurb linux-2.6.22-570/security/security.c linux-2.6.22-try2/security/security.c
+--- linux-2.6.22-570/security/security.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/security.c	2007-12-19 15:29:23.000000000 -0500
+@@ -24,6 +24,7 @@
+ extern void security_fixup_ops(struct security_operations *ops);
+ 
+ struct security_operations *security_ops;	/* Initialized to NULL */
++unsigned long mmap_min_addr;		/* 0 means no protection */
+ 
+ static inline int verify(struct security_operations *ops)
+ {
+@@ -176,4 +177,5 @@
+ EXPORT_SYMBOL_GPL(unregister_security);
+ EXPORT_SYMBOL_GPL(mod_reg_security);
+ EXPORT_SYMBOL_GPL(mod_unreg_security);
++EXPORT_SYMBOL_GPL(mmap_min_addr);
+ EXPORT_SYMBOL(security_ops);
+diff -Nurb linux-2.6.22-570/security/selinux/avc.c linux-2.6.22-try2/security/selinux/avc.c
+--- linux-2.6.22-570/security/selinux/avc.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/avc.c	2007-12-19 15:29:23.000000000 -0500
+@@ -586,7 +586,7 @@
+ 				}
+ 			}
+ 			if (inode)
+-				audit_log_format(ab, " dev=%s ino=%ld",
++				audit_log_format(ab, " dev=%s ino=%lu",
+ 						 inode->i_sb->s_id,
+ 						 inode->i_ino);
+ 			break;
+@@ -832,6 +832,7 @@
+  * @tsid: target security identifier
+  * @tclass: target security class
+  * @requested: requested permissions, interpreted based on @tclass
++ * @flags:  AVC_STRICT or 0
+  * @avd: access vector decisions
+  *
+  * Check the AVC to determine whether the @requested permissions are granted
+@@ -847,6 +848,7 @@
+  */
+ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
+                          u16 tclass, u32 requested,
++			 unsigned flags,
+                          struct av_decision *avd)
+ {
+ 	struct avc_node *node;
+@@ -874,7 +876,7 @@
+ 	denied = requested & ~(p_ae->avd.allowed);
+ 
+ 	if (!requested || denied) {
+-		if (selinux_enforcing)
++		if (selinux_enforcing || (flags & AVC_STRICT))
+ 			rc = -EACCES;
+ 		else
+ 			if (node)
+@@ -909,7 +911,7 @@
+ 	struct av_decision avd;
+ 	int rc;
+ 
+-	rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, &avd);
++	rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd);
+ 	avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata);
+ 	return rc;
+ }
+diff -Nurb linux-2.6.22-570/security/selinux/hooks.c linux-2.6.22-try2/security/selinux/hooks.c
+--- linux-2.6.22-570/security/selinux/hooks.c	2007-12-12 18:08:37.000000000 -0500
++++ linux-2.6.22-try2/security/selinux/hooks.c	2007-12-19 15:29:23.000000000 -0500
+@@ -111,6 +111,9 @@
+ /* Original (dummy) security module. */
+ static struct security_operations *original_ops = NULL;
+ 
++/* Did we enable minimum mmap address checking? */
++static int enabled_mmap_min_addr;
++
+ /* Minimal support for a secondary security module,
+    just to allow the use of the dummy or capability modules.
+    The owlsm module can alternatively be used as a secondary
+@@ -1593,6 +1596,7 @@
+ 		rc = avc_has_perm_noaudit(tsec->sid, tsec->sid,
+ 					SECCLASS_CAPABILITY,
+ 					CAP_TO_MASK(CAP_SYS_ADMIN),
++					  0,
+ 					NULL);
+ 
+ 	if (rc == 0)
+@@ -2570,12 +2574,16 @@
+ }
+ 
+ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
+-			     unsigned long prot, unsigned long flags)
++			     unsigned long prot, unsigned long flags,
++			     unsigned long addr, unsigned long addr_only)
+ {
+-	int rc;
++	int rc = 0;
++	u32 sid = ((struct task_security_struct*)(current->security))->sid;
+ 
+-	rc = secondary_ops->file_mmap(file, reqprot, prot, flags);
+-	if (rc)
++	if (addr < mmap_min_addr)
++		rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT,
++				  MEMPROTECT__MMAP_ZERO, NULL);
++	if (rc || addr_only)
+ 		return rc;
+ 
+ 	if (selinux_checkreqprot)
+@@ -4628,7 +4636,7 @@
+ 		if (p->ptrace & PT_PTRACED) {
+ 			error = avc_has_perm_noaudit(tsec->ptrace_sid, sid,
+ 						     SECCLASS_PROCESS,
+-						     PROCESS__PTRACE, &avd);
++						     PROCESS__PTRACE, 0, &avd);
+ 			if (!error)
+ 				tsec->sid = sid;
+ 			task_unlock(p);
+@@ -4910,6 +4918,16 @@
+ 	sel_inode_cache = kmem_cache_create("selinux_inode_security",
+ 					    sizeof(struct inode_security_struct),
+ 					    0, SLAB_PANIC, NULL, NULL);
++
++	/*
++	 * Tasks cannot mmap below this without the mmap_zero permission.
++	 * If not enabled already, do so by setting it to 64KB.
++	 */
++	if (mmap_min_addr == 0) {
++		enabled_mmap_min_addr = 1;
++		mmap_min_addr = 65536;
++	}
++
+ 	avc_init();
+ 
+ 	original_ops = secondary_ops = security_ops;
+@@ -5060,6 +5078,10 @@
+ 	selinux_disabled = 1;
+ 	selinux_enabled = 0;
+ 
++	/* Disable minimum mmap address check only if we enabled it */
++	if (enabled_mmap_min_addr)
++		mmap_min_addr = 0;
++
+ 	/* Reset security_ops to the secondary module, dummy or capability. */
+ 	security_ops = secondary_ops;
+ 
+diff -Nurb linux-2.6.22-570/security/selinux/include/av_perm_to_string.h linux-2.6.22-try2/security/selinux/include/av_perm_to_string.h
+--- linux-2.6.22-570/security/selinux/include/av_perm_to_string.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/av_perm_to_string.h	2007-12-19 15:29:23.000000000 -0500
+@@ -158,3 +158,4 @@
+    S_(SECCLASS_KEY, KEY__CREATE, "create")
+    S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NODE_BIND, "node_bind")
+    S_(SECCLASS_DCCP_SOCKET, DCCP_SOCKET__NAME_CONNECT, "name_connect")
++   S_(SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, "mmap_zero")
+diff -Nurb linux-2.6.22-570/security/selinux/include/av_permissions.h linux-2.6.22-try2/security/selinux/include/av_permissions.h
+--- linux-2.6.22-570/security/selinux/include/av_permissions.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/av_permissions.h	2007-12-19 15:29:23.000000000 -0500
+@@ -823,3 +823,4 @@
+ #define DCCP_SOCKET__NAME_BIND                    0x00200000UL
+ #define DCCP_SOCKET__NODE_BIND                    0x00400000UL
+ #define DCCP_SOCKET__NAME_CONNECT                 0x00800000UL
++#define MEMPROTECT__MMAP_ZERO                     0x00000001UL
+diff -Nurb linux-2.6.22-570/security/selinux/include/avc.h linux-2.6.22-try2/security/selinux/include/avc.h
+--- linux-2.6.22-570/security/selinux/include/avc.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/avc.h	2007-12-19 15:29:23.000000000 -0500
+@@ -102,8 +102,10 @@
+                u16 tclass, u32 requested,
+                struct av_decision *avd, int result, struct avc_audit_data *auditdata);
+ 
++#define AVC_STRICT 1 /* Ignore permissive mode. */
+ int avc_has_perm_noaudit(u32 ssid, u32 tsid,
+                          u16 tclass, u32 requested,
++			 unsigned flags,
+                          struct av_decision *avd);
+ 
+ int avc_has_perm(u32 ssid, u32 tsid,
+diff -Nurb linux-2.6.22-570/security/selinux/include/class_to_string.h linux-2.6.22-try2/security/selinux/include/class_to_string.h
+--- linux-2.6.22-570/security/selinux/include/class_to_string.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/class_to_string.h	2007-12-19 15:29:23.000000000 -0500
+@@ -63,3 +63,4 @@
+     S_("key")
+     S_(NULL)
+     S_("dccp_socket")
++    S_("memprotect")
+diff -Nurb linux-2.6.22-570/security/selinux/include/flask.h linux-2.6.22-try2/security/selinux/include/flask.h
+--- linux-2.6.22-570/security/selinux/include/flask.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/flask.h	2007-12-19 15:29:23.000000000 -0500
+@@ -49,6 +49,7 @@
+ #define SECCLASS_PACKET                                  57
+ #define SECCLASS_KEY                                     58
+ #define SECCLASS_DCCP_SOCKET                             60
++#define SECCLASS_MEMPROTECT                              61
+ 
+ /*
+  * Security identifier indices for initial entities
+diff -Nurb linux-2.6.22-570/security/selinux/include/security.h linux-2.6.22-try2/security/selinux/include/security.h
+--- linux-2.6.22-570/security/selinux/include/security.h	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/include/security.h	2007-12-19 15:29:23.000000000 -0500
+@@ -41,6 +41,7 @@
+ 
+ int security_load_policy(void * data, size_t len);
+ 
++#define SEL_VEC_MAX 32
+ struct av_decision {
+ 	u32 allowed;
+ 	u32 decided;
+@@ -87,6 +88,9 @@
+ 
+ int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid);
+ 
++int security_get_classes(char ***classes, int *nclasses);
++int security_get_permissions(char *class, char ***perms, int *nperms);
++
+ #define SECURITY_FS_USE_XATTR		1 /* use xattr */
+ #define SECURITY_FS_USE_TRANS		2 /* use transition SIDs, e.g. devpts/tmpfs */
+ #define SECURITY_FS_USE_TASK		3 /* use task SIDs, e.g. pipefs/sockfs */
+diff -Nurb linux-2.6.22-570/security/selinux/selinuxfs.c linux-2.6.22-try2/security/selinux/selinuxfs.c
+--- linux-2.6.22-570/security/selinux/selinuxfs.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/selinuxfs.c	2007-12-19 15:29:23.000000000 -0500
+@@ -67,6 +67,10 @@
+ static int bool_num = 0;
+ static int *bool_pending_values = NULL;
+ 
++/* global data for classes */
++static struct dentry *class_dir = NULL;
++static unsigned long last_class_ino;
++
+ extern void selnl_notify_setenforce(int val);
+ 
+ /* Check whether a task is allowed to use a security operation. */
+@@ -106,6 +110,7 @@
+ 
+ #define SEL_INITCON_INO_OFFSET 	0x01000000
+ #define SEL_BOOL_INO_OFFSET	0x02000000
++#define SEL_CLASS_INO_OFFSET	0x04000000
+ #define SEL_INO_MASK		0x00ffffff
+ 
+ #define TMPBUFLEN	12
+@@ -237,6 +242,11 @@
+ 
+ /* declaration for sel_write_load */
+ static int sel_make_bools(void);
++static int sel_make_classes(void);
++
++/* declaration for sel_make_class_dirs */
++static int sel_make_dir(struct inode *dir, struct dentry *dentry,
++			unsigned long *ino);
+ 
+ static ssize_t sel_read_mls(struct file *filp, char __user *buf,
+ 				size_t count, loff_t *ppos)
+@@ -287,10 +297,18 @@
+ 		goto out;
+ 
+ 	ret = sel_make_bools();
++	if (ret) {
++		length = ret;
++		goto out1;
++	}
++
++	ret = sel_make_classes();
+ 	if (ret)
+ 		length = ret;
+ 	else
+ 		length = count;
++
++out1:
+ 	audit_log(current->audit_context, GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
+ 		"policy loaded auid=%u",
+ 		audit_get_loginuid(current->audit_context));
+@@ -940,9 +958,8 @@
+ 	.write          = sel_commit_bools_write,
+ };
+ 
+-/* delete booleans - partial revoke() from
+- * fs/proc/generic.c proc_kill_inodes */
+-static void sel_remove_bools(struct dentry *de)
++/* partial revoke() from fs/proc/generic.c proc_kill_inodes */
++static void sel_remove_entries(struct dentry *de)
+ {
+ 	struct list_head *p, *node;
+ 	struct super_block *sb = de->d_sb;
+@@ -998,7 +1015,7 @@
+ 	kfree(bool_pending_values);
+ 	bool_pending_values = NULL;
+ 
+-	sel_remove_bools(dir);
++	sel_remove_entries(dir);
+ 
+ 	if (!(page = (char*)get_zeroed_page(GFP_KERNEL)))
+ 		return -ENOMEM;
+@@ -1048,7 +1065,7 @@
+ 	return ret;
+ err:
+ 	kfree(values);
+-	sel_remove_bools(dir);
++	sel_remove_entries(dir);
+ 	ret = -ENOMEM;
+ 	goto out;
+ }
+@@ -1294,7 +1311,227 @@
+ 	return ret;
+ }
+ 
+-static int sel_make_dir(struct inode *dir, struct dentry *dentry)
++static inline unsigned int sel_div(unsigned long a, unsigned long b)
++{
++	return a / b - (a % b < 0);
++}
++
++static inline unsigned long sel_class_to_ino(u16 class)
++{
++	return (class * (SEL_VEC_MAX + 1)) | SEL_CLASS_INO_OFFSET;
++}
++
++static inline u16 sel_ino_to_class(unsigned long ino)
++{
++	return sel_div(ino & SEL_INO_MASK, SEL_VEC_MAX + 1);
++}
++
++static inline unsigned long sel_perm_to_ino(u16 class, u32 perm)
++{
++	return (class * (SEL_VEC_MAX + 1) + perm) | SEL_CLASS_INO_OFFSET;
++}
++
++static inline u32 sel_ino_to_perm(unsigned long ino)
++{
++	return (ino & SEL_INO_MASK) % (SEL_VEC_MAX + 1);
++}
++
++static ssize_t sel_read_class(struct file * file, char __user *buf,
++				size_t count, loff_t *ppos)
++{
++	ssize_t rc, len;
++	char *page;
++	unsigned long ino = file->f_path.dentry->d_inode->i_ino;
++
++	page = (char *)__get_free_page(GFP_KERNEL);
++	if (!page) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino));
++	rc = simple_read_from_buffer(buf, count, ppos, page, len);
++	free_page((unsigned long)page);
++out:
++	return rc;
++}
++
++static const struct file_operations sel_class_ops = {
++	.read		= sel_read_class,
++};
++
++static ssize_t sel_read_perm(struct file * file, char __user *buf,
++				size_t count, loff_t *ppos)
++{
++	ssize_t rc, len;
++	char *page;
++	unsigned long ino = file->f_path.dentry->d_inode->i_ino;
++
++	page = (char *)__get_free_page(GFP_KERNEL);
++	if (!page) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	len = snprintf(page, PAGE_SIZE,"%d", sel_ino_to_perm(ino));
++	rc = simple_read_from_buffer(buf, count, ppos, page, len);
++	free_page((unsigned long)page);
++out:
++	return rc;
++}
++
++static const struct file_operations sel_perm_ops = {
++	.read		= sel_read_perm,
++};
++
++static int sel_make_perm_files(char *objclass, int classvalue,
++				struct dentry *dir)
++{
++	int i, rc = 0, nperms;
++	char **perms;
++
++	rc = security_get_permissions(objclass, &perms, &nperms);
++	if (rc)
++		goto out;
++
++	for (i = 0; i < nperms; i++) {
++		struct inode *inode;
++		struct dentry *dentry;
++
++		dentry = d_alloc_name(dir, perms[i]);
++		if (!dentry) {
++			rc = -ENOMEM;
++			goto out1;
++		}
++
++		inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
++		if (!inode) {
++			rc = -ENOMEM;
++			goto out1;
++		}
++		inode->i_fop = &sel_perm_ops;
++		/* i+1 since perm values are 1-indexed */
++		inode->i_ino = sel_perm_to_ino(classvalue, i+1);
++		d_add(dentry, inode);
++	}
++
++out1:
++	for (i = 0; i < nperms; i++)
++		kfree(perms[i]);
++	kfree(perms);
++out:
++	return rc;
++}
++
++static int sel_make_class_dir_entries(char *classname, int index,
++					struct dentry *dir)
++{
++	struct dentry *dentry = NULL;
++	struct inode *inode = NULL;
++	int rc;
++
++	dentry = d_alloc_name(dir, "index");
++	if (!dentry) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO);
++	if (!inode) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	inode->i_fop = &sel_class_ops;
++	inode->i_ino = sel_class_to_ino(index);
++	d_add(dentry, inode);
++
++	dentry = d_alloc_name(dir, "perms");
++	if (!dentry) {
++		rc = -ENOMEM;
++		goto out;
++	}
++
++	rc = sel_make_dir(dir->d_inode, dentry, &last_class_ino);
++	if (rc)
++		goto out;
++
++	rc = sel_make_perm_files(classname, index, dentry);
++
++out:
++	return rc;
++}
++
++static void sel_remove_classes(void)
++{
++	struct list_head *class_node;
++
++	list_for_each(class_node, &class_dir->d_subdirs) {
++		struct dentry *class_subdir = list_entry(class_node,
++					struct dentry, d_u.d_child);
++		struct list_head *class_subdir_node;
++
++		list_for_each(class_subdir_node, &class_subdir->d_subdirs) {
++			struct dentry *d = list_entry(class_subdir_node,
++						struct dentry, d_u.d_child);
++
++			if (d->d_inode)
++				if (d->d_inode->i_mode & S_IFDIR)
++					sel_remove_entries(d);
++		}
++
++		sel_remove_entries(class_subdir);
++	}
++
++	sel_remove_entries(class_dir);
++}
++
++static int sel_make_classes(void)
++{
++	int rc = 0, nclasses, i;
++	char **classes;
++
++	/* delete any existing entries */
++	sel_remove_classes();
++
++	rc = security_get_classes(&classes, &nclasses);
++	if (rc < 0)
++		goto out;
++
++	/* +2 since classes are 1-indexed */
++	last_class_ino = sel_class_to_ino(nclasses+2);
++
++	for (i = 0; i < nclasses; i++) {
++		struct dentry *class_name_dir;
++
++		class_name_dir = d_alloc_name(class_dir, classes[i]);
++		if (!class_name_dir) {
++			rc = -ENOMEM;
++			goto out1;
++		}
++
++		rc = sel_make_dir(class_dir->d_inode, class_name_dir,
++				&last_class_ino);
++		if (rc)
++			goto out1;
++
++		/* i+1 since class values are 1-indexed */
++		rc = sel_make_class_dir_entries(classes[i], i+1,
++				class_name_dir);
++		if (rc)
++			goto out1;
++	}
++
++out1:
++	for (i = 0; i < nclasses; i++)
++		kfree(classes[i]);
++	kfree(classes);
++out:
++	return rc;
++}
++
++static int sel_make_dir(struct inode *dir, struct dentry *dentry,
++			unsigned long *ino)
+ {
+ 	int ret = 0;
+ 	struct inode *inode;
+@@ -1306,7 +1543,7 @@
+ 	}
+ 	inode->i_op = &simple_dir_inode_operations;
+ 	inode->i_fop = &simple_dir_operations;
+-	inode->i_ino = ++sel_last_ino;
++	inode->i_ino = ++(*ino);
+ 	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+ 	inc_nlink(inode);
+ 	d_add(dentry, inode);
+@@ -1352,7 +1589,7 @@
+ 		goto err;
+ 	}
+ 
+-	ret = sel_make_dir(root_inode, dentry);
++	ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ 	if (ret)
+ 		goto err;
+ 
+@@ -1385,7 +1622,7 @@
+ 		goto err;
+ 	}
+ 
+-	ret = sel_make_dir(root_inode, dentry);
++	ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ 	if (ret)
+ 		goto err;
+ 
+@@ -1399,7 +1636,7 @@
+ 		goto err;
+ 	}
+ 
+-	ret = sel_make_dir(root_inode, dentry);
++	ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
+ 	if (ret)
+ 		goto err;
+ 
+@@ -1407,6 +1644,18 @@
+ 	if (ret)
+ 		goto err;
+ 
++	dentry = d_alloc_name(sb->s_root, "class");
++	if (!dentry) {
++		ret = -ENOMEM;
++		goto err;
++	}
++
++	ret = sel_make_dir(root_inode, dentry, &sel_last_ino);
++	if (ret)
++		goto err;
++
++	class_dir = dentry;
++
+ out:
+ 	return ret;
+ err:
+diff -Nurb linux-2.6.22-570/security/selinux/ss/policydb.c linux-2.6.22-try2/security/selinux/ss/policydb.c
+--- linux-2.6.22-570/security/selinux/ss/policydb.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/ss/policydb.c	2007-12-19 15:29:23.000000000 -0500
+@@ -21,6 +21,7 @@
+  */
+ 
+ #include <linux/kernel.h>
++#include <linux/sched.h>
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/errno.h>
+@@ -598,6 +599,7 @@
+ 	struct range_trans *rt, *lrt = NULL;
+ 
+ 	for (i = 0; i < SYM_NUM; i++) {
++		cond_resched();
+ 		hashtab_map(p->symtab[i].table, destroy_f[i], NULL);
+ 		hashtab_destroy(p->symtab[i].table);
+ 	}
+@@ -612,6 +614,7 @@
+ 	avtab_destroy(&p->te_avtab);
+ 
+ 	for (i = 0; i < OCON_NUM; i++) {
++		cond_resched();
+ 		c = p->ocontexts[i];
+ 		while (c) {
+ 			ctmp = c;
+@@ -623,6 +626,7 @@
+ 
+ 	g = p->genfs;
+ 	while (g) {
++		cond_resched();
+ 		kfree(g->fstype);
+ 		c = g->head;
+ 		while (c) {
+@@ -639,18 +643,21 @@
+ 	cond_policydb_destroy(p);
+ 
+ 	for (tr = p->role_tr; tr; tr = tr->next) {
++		cond_resched();
+ 		kfree(ltr);
+ 		ltr = tr;
+ 	}
+ 	kfree(ltr);
+ 
+ 	for (ra = p->role_allow; ra; ra = ra -> next) {
++		cond_resched();
+ 		kfree(lra);
+ 		lra = ra;
+ 	}
+ 	kfree(lra);
+ 
+ 	for (rt = p->range_tr; rt; rt = rt -> next) {
++		cond_resched();
+ 		if (lrt) {
+ 			ebitmap_destroy(&lrt->target_range.level[0].cat);
+ 			ebitmap_destroy(&lrt->target_range.level[1].cat);
+diff -Nurb linux-2.6.22-570/security/selinux/ss/services.c linux-2.6.22-try2/security/selinux/ss/services.c
+--- linux-2.6.22-570/security/selinux/ss/services.c	2007-07-08 19:32:17.000000000 -0400
++++ linux-2.6.22-try2/security/selinux/ss/services.c	2007-12-19 15:29:23.000000000 -0500
+@@ -1587,19 +1587,18 @@
+ 			   u32 *nel)
+ {
+ 	struct context *fromcon, usercon;
+-	u32 *mysids, *mysids2, sid;
++	u32 *mysids = NULL, *mysids2, sid;
+ 	u32 mynel = 0, maxnel = SIDS_NEL;
+ 	struct user_datum *user;
+ 	struct role_datum *role;
+-	struct av_decision avd;
+ 	struct ebitmap_node *rnode, *tnode;
+ 	int rc = 0, i, j;
+ 
+-	if (!ss_initialized) {
+ 		*sids = NULL;
+ 		*nel = 0;
++
++	if (!ss_initialized)
+ 		goto out;
+-	}
+ 
+ 	POLICY_RDLOCK;
+ 
+@@ -1635,17 +1634,9 @@
+ 			if (mls_setup_user_range(fromcon, user, &usercon))
+ 				continue;
+ 
+-			rc = context_struct_compute_av(fromcon, &usercon,
+-						       SECCLASS_PROCESS,
+-						       PROCESS__TRANSITION,
+-						       &avd);
+-			if (rc ||  !(avd.allowed & PROCESS__TRANSITION))
+-				continue;
+ 			rc = sidtab_context_to_sid(&sidtab, &usercon, &sid);
+-			if (rc) {
+-				kfree(mysids);
++			if (rc)
+ 				goto out_unlock;
+-			}
+ 			if (mynel < maxnel) {
+ 				mysids[mynel++] = sid;
+ 			} else {
+@@ -1653,7 +1644,6 @@
+ 				mysids2 = kcalloc(maxnel, sizeof(*mysids2), GFP_ATOMIC);
+ 				if (!mysids2) {
+ 					rc = -ENOMEM;
+-					kfree(mysids);
+ 					goto out_unlock;
+ 				}
+ 				memcpy(mysids2, mysids, mynel * sizeof(*mysids2));
+@@ -1664,11 +1654,32 @@
+ 		}
+ 	}
+ 
+-	*sids = mysids;
+-	*nel = mynel;
+-
+ out_unlock:
+ 	POLICY_RDUNLOCK;
++	if (rc || !mynel) {
++		kfree(mysids);
++		goto out;
++	}
++
++	mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL);
++	if (!mysids2) {
++		rc = -ENOMEM;
++		kfree(mysids);
++		goto out;
++	}
++	for (i = 0, j = 0; i < mynel; i++) {
++		rc = avc_has_perm_noaudit(fromsid, mysids[i],
++					  SECCLASS_PROCESS,
++					  PROCESS__TRANSITION, AVC_STRICT,
++					  NULL);
++		if (!rc)
++			mysids2[j++] = mysids[i];
++		cond_resched();
++	}
++	rc = 0;
++	kfree(mysids);
++	*sids = mysids2;
++	*nel = j;
+ out:
+ 	return rc;
+ }
+@@ -1996,6 +2007,101 @@
+ 	return rc;
+ }
+ 
++static int get_classes_callback(void *k, void *d, void *args)
++{
++	struct class_datum *datum = d;
++	char *name = k, **classes = args;
++	int value = datum->value - 1;
++
++	classes[value] = kstrdup(name, GFP_ATOMIC);
++	if (!classes[value])
++		return -ENOMEM;
++
++	return 0;
++}
++
++int security_get_classes(char ***classes, int *nclasses)
++{
++	int rc = -ENOMEM;
++
++	POLICY_RDLOCK;
++
++	*nclasses = policydb.p_classes.nprim;
++	*classes = kcalloc(*nclasses, sizeof(*classes), GFP_ATOMIC);
++	if (!*classes)
++		goto out;
++
++	rc = hashtab_map(policydb.p_classes.table, get_classes_callback,
++			*classes);
++	if (rc < 0) {
++		int i;
++		for (i = 0; i < *nclasses; i++)
++			kfree((*classes)[i]);
++		kfree(*classes);
++	}
++
++out:
++	POLICY_RDUNLOCK;
++	return rc;
++}
++
++static int get_permissions_callback(void *k, void *d, void *args)
++{
++	struct perm_datum *datum = d;
++	char *name = k, **perms = args;
++	int value = datum->value - 1;
++
++	perms[value] = kstrdup(name, GFP_ATOMIC);
++	if (!perms[value])
++		return -ENOMEM;
++
++	return 0;
++}
++
++int security_get_permissions(char *class, char ***perms, int *nperms)
++{
++	int rc = -ENOMEM, i;
++	struct class_datum *match;
++
++	POLICY_RDLOCK;
++
++	match = hashtab_search(policydb.p_classes.table, class);
++	if (!match) {
++		printk(KERN_ERR "%s:  unrecognized class %s\n",
++			__FUNCTION__, class);
++		rc = -EINVAL;
++		goto out;
++	}
++
++	*nperms = match->permissions.nprim;
++	*perms = kcalloc(*nperms, sizeof(*perms), GFP_ATOMIC);
++	if (!*perms)
++		goto out;
++
++	if (match->comdatum) {
++		rc = hashtab_map(match->comdatum->permissions.table,
++				get_permissions_callback, *perms);
++		if (rc < 0)
++			goto err;
++	}
++
++	rc = hashtab_map(match->permissions.table, get_permissions_callback,
++			*perms);
++	if (rc < 0)
++		goto err;
++
++out:
++	POLICY_RDUNLOCK;
++	return rc;
++
++err:
++	POLICY_RDUNLOCK;
++	for (i = 0; i < *nperms; i++)
++		kfree((*perms)[i]);
++	kfree(*perms);
++	return rc;
++}
++
+ struct selinux_audit_rule {
+ 	u32 au_seqno;
+ 	struct context au_ctxt;
+diff -Nurb linux-2.6.22-570/trellis-mm1-1.sh linux-2.6.22-try2/trellis-mm1-1.sh
+--- linux-2.6.22-570/trellis-mm1-1.sh	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2/trellis-mm1-1.sh	2007-12-19 14:36:24.000000000 -0500
+@@ -0,0 +1,142 @@
++cat ../broken-out/origin.patch | patch -p1
++cat ../broken-out/ioatdma-fix-section-mismatches.patch | patch -p1
++cat ../broken-out/introduce-fixed-sys_sync_file_range2-syscall-implement-on.patch | patch -p1
++cat ../broken-out/git-acpi.patch | patch -p1
++cat ../broken-out/agk-dm-dm-netlink.patch | patch -p1
++cat ../broken-out/git-powerpc.patch | patch -p1
++cat ../broken-out/make-drivers-char-hvc_consoleckhvcd-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-release_sysfs_dirent-to-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-allocate-inode-number-using-ida.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_put-ignore-null-sd.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-error-handling-in-binattr-write.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-cleanup-paths-in-sysfs_add_link-and-create_dir.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-flatten-and-fix-sysfs_rename_dir-error-handling.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs_dirent-creation-functions.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_parent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-add-sysfs_dirent-s_name.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_dirent-s_element-a-union.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-kobj_sysfs_assoc_lock.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-symlink-using-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-bin_buffer.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_dirent-active-reference-and-immediate-disconnect.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-attribute-file-orphaning.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-separate-out-sysfs_attach_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reimplement-sysfs_drop_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-kill-unnecessary-attribute-owner.patch | patch -p1
++cat ../broken-out/gregkh-driver-driver-core-make-devt_attr-and-uevent_attr-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_alloc_ino-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-parent-refcounting-during-rename-and-move.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-reorganize-sysfs_new_indoe-and-sysfs_create.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-iget_locked-instead-of-new_inode.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-s_active-functions-to-fs-sysfs-dirc.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-slim-down-sysfs_dirent-s_active.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-singly-linked-list-for-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-fix-oops-in-sysfs_drop_dentry-on-x86_64.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-sysfs_drop_dentry-access-inodes-using-ilookup.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-rename-sysfs_dirent-s_type-to-s_flags-and-make-room-for-flags.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_flag_removed-flag.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_find_dirent-and-sysfs_get_dirent.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-kobj-point-to-sysfs_dirent-instead-of-dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-consolidate-sysfs-spinlocks.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-use-sysfs_mutex-to-protect-the-sysfs_dirent-tree.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-restructure-add-remove-paths-and-fix-inode-update.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-move-sysfs_drop_dentry-to-dirc-and-make-it-static.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-implement-sysfs_get_dentry.patch | patch -p1
++cat ../broken-out/gregkh-driver-sysfs-make-directory-dentries-and-inodes-reclaimable.patch | patch -p1
++cat ../broken-out/gregkh-driver-block-device.patch | patch -p1
++cat ../broken-out/revert-gregkh-driver-block-device.patch | patch -p1
++cat ../broken-out/driver-core-check-return-code-of-sysfs_create_link.patch | patch -p1
++cat ../broken-out/git-md-accel.patch | patch -p1
++cat ../broken-out/git-mmc.patch | patch -p1
++cat ../broken-out/git-net.patch | patch -p1
++cat ../broken-out/tun-tap-allow-group-ownership-of-tun-tap-devices.patch | patch -p1
++cat ../broken-out/git-nfs.patch | patch -p1
++cat ../broken-out/git-ocfs2.patch | patch -p1
++cat ../broken-out/git-selinux.patch | patch -p1
++cat ../broken-out/revert-acpi-change-for-scsi.patch | patch -p1
++cat ../broken-out/git-scsi-misc.patch | patch -p1
++cat ../broken-out/git-unionfs.patch | patch -p1
++cat ../broken-out/x86_64-mm-unwinder.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-add-kstrndup.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-add-argv_split.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-split-usermodehelper-setup-from-execution.patch | patch -p1
++cat ../broken-out/x86_64-mm-add-common-orderly_poweroff.patch | patch -p1
++cat ../broken-out/x86_64-mm-xencleanup-tidy-up-usermode-helper-waiting-a-bit.patch | patch -p1
++cat ../broken-out/x86_64-mm-xen-add-the-xen-virtual-network-device-driver.patch | patch -p1
++cat ../broken-out/i386-show-unhandled-signals.patch | patch -p1
++cat ../broken-out/git-kgdb.patch | patch -p1
++cat ../broken-out/hugetlb-remove-unnecessary-nid-initialization.patch | patch -p1
++cat ../broken-out/mm-alloc_large_system_hash-can-free-some-memory-for.patch | patch -p1
++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings.patch | patch -p1
++cat ../broken-out/mm-fix-fault-vs-invalidate-race-for-linear-mappings-fix.patch | patch -p1
++cat ../broken-out/mm-merge-populate-and-nopage-into-fault-fixes-nonlinear.patch | patch -p1
++cat ../broken-out/add-a-bitmap-that-is-used-to-track-flags-affecting-a-block-of-pages.patch | patch -p1
++cat ../broken-out/add-__gfp_movable-for-callers-to-flag-allocations-from-high-memory-that-may-be-migrated.patch | patch -p1
++cat ../broken-out/split-the-free-lists-for-movable-and-unmovable-allocations.patch | patch -p1
++cat ../broken-out/choose-pages-from-the-per-cpu-list-based-on-migration-type.patch | patch -p1
++cat ../broken-out/add-a-configure-option-to-group-pages-by-mobility.patch | patch -p1
++cat ../broken-out/move-free-pages-between-lists-on-steal.patch | patch -p1
++cat ../broken-out/group-short-lived-and-reclaimable-kernel-allocations.patch | patch -p1
++cat ../broken-out/allow-huge-page-allocations-to-use-gfp_high_movable.patch | patch -p1
++cat ../broken-out/maps2-uninline-some-functions-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-eliminate-the-pmd_walker-struct-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-remove-vma-from-args-in-the-page-walker.patch | patch -p1
++cat ../broken-out/maps2-propagate-errors-from-callback-in-page-walker.patch | patch -p1
++cat ../broken-out/maps2-add-callbacks-for-each-level-to-page-walker.patch | patch -p1
++cat ../broken-out/maps2-move-the-page-walker-code-to-lib.patch | patch -p1
++cat ../broken-out/maps2-simplify-interdependence-of-proc-pid-maps-and-smaps.patch | patch -p1
++cat ../broken-out/maps2-move-clear_refs-code-to-task_mmuc.patch | patch -p1
++cat ../broken-out/maps2-regroup-task_mmu-by-interface.patch | patch -p1
++cat ../broken-out/maps2-make-proc-pid-smaps-optional-under-config_embedded.patch | patch -p1
++cat ../broken-out/maps2-make-proc-pid-clear_refs-option-under-config_embedded.patch | patch -p1
++cat ../broken-out/maps2-add-proc-pid-pagemap-interface.patch | patch -p1
++cat ../broken-out/have-kswapd-keep-a-minimum-order-free-other-than-order-0.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix.patch | patch -p1
++cat ../broken-out/freezer-make-kernel-threads-nonfreezable-by-default-fix-2.patch | patch -p1
++cat ../broken-out/uml-use-get_free_pages-to-allocate-kernel-stacks.patch | patch -p1
++cat ../broken-out/add-generic-exit-time-stack-depth-checking-to-config_debug_stack_usage.patch | patch -p1
++cat ../broken-out/cpuset-remove-sched-domain-hooks-from-cpusets.patch | patch -p1
++cat ../broken-out/clone-flag-clone_parent_tidptr-leaves-invalid-results-in-memory.patch | patch -p1
++cat ../broken-out/use-boot-based-time-for-process-start-time-and-boot-time.patch | patch -p1
++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock.patch | patch -p1
++cat ../broken-out/reduce-cpusetc-write_lock_irq-to-read_lock-fix.patch | patch -p1
++cat ../broken-out/taskstats-add-context-switch-counters.patch | patch -p1
++cat ../broken-out/taskstats-add-context-switch-counters-fix.patch | patch -p1
++cat ../broken-out/remove-config_uts_ns-and-config_ipc_ns.patch | patch -p1
++cat ../broken-out/user-namespace-add-the-framework.patch | patch -p1
++cat ../broken-out/user-namespace-add-unshare.patch | patch -p1
++cat ../broken-out/mm-fix-create_new_namespaces-return-value.patch | patch -p1
++cat ../broken-out/add-a-kmem_cache-for-nsproxy-objects.patch | patch -p1
++cat ../broken-out/namespace-ensure-clone_flags-are-always-stored-in-an-unsigned-long.patch | patch -p1
++cat ../broken-out/sysctlc-add-text-telling-people-to-use-ctl_unnumbered.patch | patch -p1
++cat ../broken-out/proper-prototype-for-proc_nr_files.patch | patch -p1
++cat ../broken-out/move-seccomp-from-proc-to-a-prctl.patch | patch -p1
++cat ../broken-out/uninline-check_signature.patch | patch -p1
++cat ../broken-out/revoke-core-code.patch | patch -p1
++cat ../broken-out/revoke-wire-up-i386-system-calls.patch | patch -p1
++cat ../broken-out/fallocate-implementation-on-i86-x86_64-and-powerpc.patch | patch -p1
++cat ../broken-out/coredump-masking-reimplementation-of-dumpable-using-two-flags.patch | patch -p1
++cat ../broken-out/coredump-masking-add-an-interface-for-core-dump-filter.patch | patch -p1
++cat ../broken-out/cpuset-zero-malloc-revert-the-old-cpuset-fix.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework-fix.patch | patch -p1
++cat ../broken-out/containersv10-basic-container-framework-fix-for-bad-lock-balance-in-containers.patch | patch -p1
++cat ../broken-out/containersv10-example-cpu-accounting-subsystem.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface-fix.patch | patch -p1
++cat ../broken-out/containersv10-add-tasks-file-interface-fix-2.patch | patch -p1
++cat ../broken-out/containersv10-add-fork-exit-hooks.patch | patch -p1
++cat ../broken-out/containersv10-add-fork-exit-hooks-fix.patch | patch -p1
++cat ../broken-out/containersv10-add-container_clone-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-procfs-interface.patch | patch -p1
++cat ../broken-out/containersv10-add-procfs-interface-fix.patch | patch -p1
++cat ../broken-out/containersv10-make-cpusets-a-client-of-containers.patch | patch -p1
++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships.patch | patch -p1
++cat ../broken-out/containersv10-share-css_group-arrays-between-tasks-with-same-container-memberships-cpuset-zero-malloc-fix-for-new-containers.patch | patch -p1
++cat ../broken-out/containersv10-simple-debug-info-subsystem.patch | patch -p1
++cat ../broken-out/containersv10-support-for-automatic-userspace-release-agents.patch | patch -p1
++cat ../broken-out/containers-implement-subsys-post_clone.patch | patch -p1
++cat ../broken-out/containers-implement-namespace-tracking-subsystem-v3.patch | patch -p1
++cat ../broken-out/keep-track-of-network-interface-renaming.patch | patch -p1
++cat ../broken-out/v2.6.22-rc6-mm1-netns23.patch | patch -p1
diff --git a/trellis-netns.patch b/trellis-netns.patch
new file mode 100644
index 000000000..5e852a0e2
--- /dev/null
+++ b/trellis-netns.patch
@@ -0,0 +1,24088 @@
+diff -Nurb linux-2.6.22-try2/arch/ia64/hp/sim/simeth.c linux-2.6.22-try2-netns/arch/ia64/hp/sim/simeth.c
+--- linux-2.6.22-try2/arch/ia64/hp/sim/simeth.c	2007-12-19 13:37:12.000000000 -0500
++++ linux-2.6.22-try2-netns/arch/ia64/hp/sim/simeth.c	2007-12-19 22:49:13.000000000 -0500
+@@ -300,6 +300,9 @@
+ 		return NOTIFY_DONE;
+ 	}
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
+ 
+ 	/*
+diff -Nurb linux-2.6.22-try2/arch/s390/appldata/appldata_net_sum.c linux-2.6.22-try2-netns/arch/s390/appldata/appldata_net_sum.c
+--- linux-2.6.22-try2/arch/s390/appldata/appldata_net_sum.c	2007-12-19 13:37:20.000000000 -0500
++++ linux-2.6.22-try2-netns/arch/s390/appldata/appldata_net_sum.c	2007-12-19 22:49:13.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include <linux/errno.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ 
+ #include "appldata.h"
+ 
+@@ -107,7 +108,7 @@
+ 	tx_dropped = 0;
+ 	collisions = 0;
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		stats = dev->get_stats(dev);
+ 		rx_packets += stats->rx_packets;
+ 		tx_packets += stats->tx_packets;
+diff -Nurb linux-2.6.22-try2/arch/sparc64/solaris/ioctl.c linux-2.6.22-try2-netns/arch/sparc64/solaris/ioctl.c
+--- linux-2.6.22-try2/arch/sparc64/solaris/ioctl.c	2007-12-19 13:37:22.000000000 -0500
++++ linux-2.6.22-try2-netns/arch/sparc64/solaris/ioctl.c	2007-12-19 22:49:13.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/compat.h>
+ 
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/termios.h>
+@@ -686,7 +687,7 @@
+ 			int i = 0;
+ 			
+ 			read_lock_bh(&dev_base_lock);
+-			for_each_netdev(d)
++			for_each_netdev(&init_net, d)
+ 				i++;
+ 			read_unlock_bh(&dev_base_lock);
+ 
+diff -Nurb linux-2.6.22-try2/drivers/atm/idt77252.c linux-2.6.22-try2-netns/drivers/atm/idt77252.c
+--- linux-2.6.22-try2/drivers/atm/idt77252.c	2007-12-19 13:37:27.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/atm/idt77252.c	2007-12-19 22:49:13.000000000 -0500
+@@ -3576,7 +3576,7 @@
+ 	 * XXX: <hack>
+ 	 */
+ 	sprintf(tname, "eth%d", card->index);
+-	tmp = dev_get_by_name(tname);	/* jhs: was "tmp = dev_get(tname);" */
++	tmp = dev_get_by_name(&init_net, tname);	/* jhs: was "tmp = dev_get(tname);" */
+ 	if (tmp) {
+ 		memcpy(card->atmdev->esi, tmp->dev_addr, 6);
+ 
+diff -Nurb linux-2.6.22-try2/drivers/base/class.c linux-2.6.22-try2-netns/drivers/base/class.c
+--- linux-2.6.22-try2/drivers/base/class.c	2007-12-19 15:29:22.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/base/class.c	2007-12-19 22:49:13.000000000 -0500
+@@ -134,6 +134,17 @@
+ 	}
+ }
+ 
++static int class_setup_shadowing(struct class *cls)
++{
++	const struct shadow_dir_operations *shadow_ops;
++
++	shadow_ops = cls->shadow_ops;
++	if (!shadow_ops)
++		return 0;
++
++	return sysfs_enable_shadowing(&cls->subsys.kobj, shadow_ops);
++}
++
+ int class_register(struct class * cls)
+ {
+ 	int error;
+@@ -152,11 +163,22 @@
+ 	subsys_set_kset(cls, class_subsys);
+ 
+ 	error = subsystem_register(&cls->subsys);
+-	if (!error) {
+-		error = add_class_attrs(class_get(cls));
+-		class_put(cls);
+-	}
++	if (error)
++		goto out;
++
++	error = class_setup_shadowing(cls);
++	if (error)
++		goto out_unregister;
++
++	error = add_class_attrs(cls);
++	if (error)
++		goto out_unregister;
++
++out:
+ 	return error;
++out_unregister:
++	subsystem_unregister(&cls->subsys);
++	goto out;
+ }
+ 
+ void class_unregister(struct class * cls)
+diff -Nurb linux-2.6.22-try2/drivers/base/core.c linux-2.6.22-try2-netns/drivers/base/core.c
+--- linux-2.6.22-try2/drivers/base/core.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/base/core.c	2007-12-19 22:49:13.000000000 -0500
+@@ -622,8 +622,14 @@
+ 			return kobj;
+ 
+ 		/* or create a new class-directory at the parent device */
+-		return kobject_kset_add_dir(&dev->class->class_dirs,
++		kobj = kobject_kset_add_dir(&dev->class->class_dirs,
+ 					    parent_kobj, dev->class->name);
++
++		/* If we created a new class-directory setup shadowing */
++		if (kobj && dev->class->shadow_ops)
++			sysfs_enable_shadowing(kobj, dev->class->shadow_ops);
++
++		return kobj;
+ 	}
+ 
+ 	if (parent)
+@@ -913,8 +919,8 @@
+ 		/* If this is not a "fake" compatible device, remove the
+ 		 * symlink from the class to the device. */
+ 		if (dev->kobj.parent != &dev->class->subsys.kobj)
+-			sysfs_remove_link(&dev->class->subsys.kobj,
+-					  dev->bus_id);
++			sysfs_delete_link(&dev->class->subsys.kobj,
++					  &dev->kobj, dev->bus_id);
+ 		if (parent) {
+ #ifdef CONFIG_SYSFS_DEPRECATED
+ 			char *class_name = make_class_name(dev->class->name,
+@@ -1212,6 +1218,13 @@
+ 	strlcpy(old_device_name, dev->bus_id, BUS_ID_SIZE);
+ 	strlcpy(dev->bus_id, new_name, BUS_ID_SIZE);
+ 
++	if (dev->class && (dev->kobj.parent != &dev->class->subsys.kobj)) {
++		error = sysfs_rename_link(&dev->class->subsys.kobj,
++			&dev->kobj, old_device_name, new_name);
++		if (error)
++			goto out;
++	}
++
+ 	error = kobject_rename(&dev->kobj, new_name);
+ 	if (error) {
+ 		strlcpy(dev->bus_id, old_device_name, BUS_ID_SIZE);
+@@ -1220,27 +1233,17 @@
+ 
+ #ifdef CONFIG_SYSFS_DEPRECATED
+ 	if (old_class_name) {
++		error = -ENOMEM;
+ 		new_class_name = make_class_name(dev->class->name, &dev->kobj);
+-		if (new_class_name) {
+-			error = sysfs_create_link(&dev->parent->kobj,
+-						  &dev->kobj, new_class_name);
++		if (!new_class_name)
++			goto out;
++
++		error = sysfs_rename_link(&dev->parent->kobj, &dev->kobj,
++					  old_class_name, new_class_name);
+ 			if (error)
+ 				goto out;
+-			sysfs_remove_link(&dev->parent->kobj, old_class_name);
+-		}
+ 	}
+ #endif
+-
+-	if (dev->class) {
+-		sysfs_remove_link(&dev->class->subsys.kobj, old_device_name);
+-		error = sysfs_create_link(&dev->class->subsys.kobj, &dev->kobj,
+-					  dev->bus_id);
+-		if (error) {
+-			/* Uh... how to unravel this if restoring can fail? */
+-			dev_err(dev, "%s: sysfs_create_symlink failed (%d)\n",
+-				__FUNCTION__, error);
+-		}
+-	}
+ out:
+ 	put_device(dev);
+ 
+diff -Nurb linux-2.6.22-try2/drivers/block/aoe/aoecmd.c linux-2.6.22-try2-netns/drivers/block/aoe/aoecmd.c
+--- linux-2.6.22-try2/drivers/block/aoe/aoecmd.c	2007-12-19 13:37:27.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/block/aoe/aoecmd.c	2007-12-19 22:49:13.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netdevice.h>
+ #include <linux/genhd.h>
++#include <net/net_namespace.h>
+ #include <asm/unaligned.h>
+ #include "aoe.h"
+ 
+@@ -194,7 +195,7 @@
+ 	sl = sl_tail = NULL;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(ifp) {
++	for_each_netdev(&init_net, ifp) {
+ 		dev_hold(ifp);
+ 		if (!is_aoe_netif(ifp))
+ 			goto cont;
+diff -Nurb linux-2.6.22-try2/drivers/block/aoe/aoenet.c linux-2.6.22-try2-netns/drivers/block/aoe/aoenet.c
+--- linux-2.6.22-try2/drivers/block/aoe/aoenet.c	2007-12-19 13:37:27.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/block/aoe/aoenet.c	2007-12-19 22:49:13.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/netdevice.h>
+ #include <linux/moduleparam.h>
++#include <net/net_namespace.h>
+ #include <asm/unaligned.h>
+ #include "aoe.h"
+ 
+@@ -114,6 +115,9 @@
+ 	struct aoe_hdr *h;
+ 	u32 n;
+ 
++	if (ifp->nd_net != &init_net)
++		goto exit;
++
+ 	skb = skb_share_check(skb, GFP_ATOMIC);
+ 	if (skb == NULL)
+ 		return 0;
+diff -Nurb linux-2.6.22-try2/drivers/connector/connector.c linux-2.6.22-try2-netns/drivers/connector/connector.c
+--- linux-2.6.22-try2/drivers/connector/connector.c	2007-12-19 13:37:28.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/connector/connector.c	2007-12-19 22:49:13.000000000 -0500
+@@ -446,7 +446,7 @@
+ 	dev->id.idx = cn_idx;
+ 	dev->id.val = cn_val;
+ 
+-	dev->nls = netlink_kernel_create(NETLINK_CONNECTOR,
++	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
+ 					 CN_NETLINK_USERS + 0xf,
+ 					 dev->input, NULL, THIS_MODULE);
+ 	if (!dev->nls)
+diff -Nurb linux-2.6.22-try2/drivers/infiniband/core/addr.c linux-2.6.22-try2-netns/drivers/infiniband/core/addr.c
+--- linux-2.6.22-try2/drivers/infiniband/core/addr.c	2007-12-19 13:37:29.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/infiniband/core/addr.c	2007-12-19 22:49:13.000000000 -0500
+@@ -110,7 +110,7 @@
+ 	__be32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
+ 	int ret;
+ 
+-	dev = ip_dev_find(ip);
++	dev = ip_dev_find(&init_net, ip);
+ 	if (!dev)
+ 		return -EADDRNOTAVAIL;
+ 
+@@ -157,6 +157,7 @@
+ 	u32 dst_ip = dst_in->sin_addr.s_addr;
+ 
+ 	memset(&fl, 0, sizeof fl);
++	fl.fl_net = &init_net;
+ 	fl.nl_u.ip4_u.daddr = dst_ip;
+ 	if (ip_route_output_key(&rt, &fl))
+ 		return;
+@@ -178,6 +179,7 @@
+ 	int ret;
+ 
+ 	memset(&fl, 0, sizeof fl);
++	fl.fl_net = &init_net;
+ 	fl.nl_u.ip4_u.daddr = dst_ip;
+ 	fl.nl_u.ip4_u.saddr = src_ip;
+ 	ret = ip_route_output_key(&rt, &fl);
+@@ -262,7 +264,7 @@
+ 	__be32 dst_ip = dst_in->sin_addr.s_addr;
+ 	int ret;
+ 
+-	dev = ip_dev_find(dst_ip);
++	dev = ip_dev_find(&init_net, dst_ip);
+ 	if (!dev)
+ 		return -EADDRNOTAVAIL;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/infiniband/core/cma.c linux-2.6.22-try2-netns/drivers/infiniband/core/cma.c
+--- linux-2.6.22-try2/drivers/infiniband/core/cma.c	2007-12-19 13:37:29.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/infiniband/core/cma.c	2007-12-19 22:49:13.000000000 -0500
+@@ -1267,7 +1267,7 @@
+ 	atomic_inc(&conn_id->dev_remove);
+ 	conn_id->state = CMA_CONNECT;
+ 
+-	dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
++	dev = ip_dev_find(&init_net, iw_event->local_addr.sin_addr.s_addr);
+ 	if (!dev) {
+ 		ret = -EADDRNOTAVAIL;
+ 		cma_enable_remove(conn_id);
+@@ -1880,18 +1880,18 @@
+ 	if (ret)
+ 		goto err1;
+ 
+-	if (port > sysctl_local_port_range[1]) {
+-		if (next_port != sysctl_local_port_range[0]) {
++	if (port > init_net.sysctl_local_port_range[1]) {
++		if (next_port != init_net.sysctl_local_port_range[0]) {
+ 			idr_remove(ps, port);
+-			next_port = sysctl_local_port_range[0];
++			next_port = init_net.sysctl_local_port_range[0];
+ 			goto retry;
+ 		}
+ 		ret = -EADDRNOTAVAIL;
+ 		goto err2;
+ 	}
+ 
+-	if (port == sysctl_local_port_range[1])
+-		next_port = sysctl_local_port_range[0];
++	if (port == init_net.sysctl_local_port_range[1])
++		next_port = init_net.sysctl_local_port_range[0];
+ 	else
+ 		next_port = port + 1;
+ 
+@@ -2774,8 +2774,9 @@
+ 
+ 	get_random_bytes(&next_port, sizeof next_port);
+ 	next_port = ((unsigned int) next_port %
+-		    (sysctl_local_port_range[1] - sysctl_local_port_range[0])) +
+-		    sysctl_local_port_range[0];
++		     (init_net.sysctl_local_port_range[1] - 
++		      init_net.sysctl_local_port_range[0])) +
++		    init_net.sysctl_local_port_range[0];
+ 	cma_wq = create_singlethread_workqueue("rdma_cm");
+ 	if (!cma_wq)
+ 		return -ENOMEM;
+diff -Nurb linux-2.6.22-try2/drivers/isdn/divert/divert_procfs.c linux-2.6.22-try2-netns/drivers/isdn/divert/divert_procfs.c
+--- linux-2.6.22-try2/drivers/isdn/divert/divert_procfs.c	2007-12-19 13:37:29.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/isdn/divert/divert_procfs.c	2007-12-19 22:49:13.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/fs.h>
+ #endif
+ #include <linux/isdnif.h>
++#include <net/net_namespace.h>
+ #include "isdn_divert.h"
+ 
+ 
+@@ -284,12 +285,12 @@
+ 	init_waitqueue_head(&rd_queue);
+ 
+ #ifdef CONFIG_PROC_FS
+-	isdn_proc_entry = proc_mkdir("net/isdn", NULL);
++	isdn_proc_entry = proc_mkdir("isdn", init_net.proc_net);
+ 	if (!isdn_proc_entry)
+ 		return (-1);
+ 	isdn_divert_entry = create_proc_entry("divert", S_IFREG | S_IRUGO, isdn_proc_entry);
+ 	if (!isdn_divert_entry) {
+-		remove_proc_entry("net/isdn", NULL);
++		remove_proc_entry("isdn", init_net.proc_net);
+ 		return (-1);
+ 	}
+ 	isdn_divert_entry->proc_fops = &isdn_fops; 
+@@ -309,7 +310,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ 	remove_proc_entry("divert", isdn_proc_entry);
+-	remove_proc_entry("net/isdn", NULL);
++	remove_proc_entry("isdn", init_net.proc_net);
+ #endif	/* CONFIG_PROC_FS */
+ 
+ 	return (0);
+diff -Nurb linux-2.6.22-try2/drivers/isdn/hardware/eicon/diva_didd.c linux-2.6.22-try2-netns/drivers/isdn/hardware/eicon/diva_didd.c
+--- linux-2.6.22-try2/drivers/isdn/hardware/eicon/diva_didd.c	2007-12-19 13:37:29.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/isdn/hardware/eicon/diva_didd.c	2007-12-19 22:49:13.000000000 -0500
+@@ -15,6 +15,7 @@
+ #include <linux/init.h>
+ #include <linux/kernel.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+ 
+ #include "platform.h"
+ #include "di_defs.h"
+@@ -86,7 +87,7 @@
+ 
+ static int DIVA_INIT_FUNCTION create_proc(void)
+ {
+-	proc_net_eicon = proc_mkdir("net/eicon", NULL);
++	proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
+ 
+ 	if (proc_net_eicon) {
+ 		if ((proc_didd =
+@@ -102,7 +103,7 @@
+ static void remove_proc(void)
+ {
+ 	remove_proc_entry(DRIVERLNAME, proc_net_eicon);
+-	remove_proc_entry("net/eicon", NULL);
++	remove_proc_entry("eicon", init_net.proc_net);
+ }
+ 
+ static int DIVA_INIT_FUNCTION divadidd_init(void)
+diff -Nurb linux-2.6.22-try2/drivers/isdn/hysdn/hysdn_procconf.c linux-2.6.22-try2-netns/drivers/isdn/hysdn/hysdn_procconf.c
+--- linux-2.6.22-try2/drivers/isdn/hysdn/hysdn_procconf.c	2007-12-19 13:37:29.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/isdn/hysdn/hysdn_procconf.c	2007-12-19 22:49:13.000000000 -0500
+@@ -392,7 +392,7 @@
+ 	hysdn_card *card;
+ 	unsigned char conf_name[20];
+ 
+-	hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, proc_net);
++	hysdn_proc_entry = proc_mkdir(PROC_SUBDIR_NAME, init_net.proc_net);
+ 	if (!hysdn_proc_entry) {
+ 		printk(KERN_ERR "HYSDN: unable to create hysdn subdir\n");
+ 		return (-1);
+@@ -437,5 +437,5 @@
+ 		card = card->next;	/* point to next card */
+ 	}
+ 
+-	remove_proc_entry(PROC_SUBDIR_NAME, proc_net);
++	remove_proc_entry(PROC_SUBDIR_NAME, init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-try2/drivers/net/Kconfig linux-2.6.22-try2-netns/drivers/net/Kconfig
+--- linux-2.6.22-try2/drivers/net/Kconfig	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/Kconfig	2007-12-19 22:49:13.000000000 -0500
+@@ -119,6 +119,20 @@
+ 
+ 	  If you don't know what to use this for, you don't need it.
+ 
++config ETUN
++	tristate "Ethernet tunnel device driver support"
++	depends on SYSFS
++	---help---
++	  ETUN provices a pair of network devices that can be used for
++	  configuring interesting topolgies.  What one devices transmits
++	  the other receives and vice versa.  The link level framing
++	  is ethernet for wide compatibility with network stacks.
++
++	  To compile this driver as a module, choose M here: the module
++	  will be called etun.
++
++	  If you don't know what to use this for, you don't need it.
++
+ config NET_SB1000
+ 	tristate "General Instruments Surfboard 1000"
+ 	depends on PNP
+diff -Nurb linux-2.6.22-try2/drivers/net/Makefile linux-2.6.22-try2-netns/drivers/net/Makefile
+--- linux-2.6.22-try2/drivers/net/Makefile	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/Makefile	2007-12-19 22:49:13.000000000 -0500
+@@ -186,6 +186,7 @@
+ obj-$(CONFIG_MACMACE) += macmace.o
+ obj-$(CONFIG_MAC89x0) += mac89x0.o
+ obj-$(CONFIG_TUN) += tun.o
++obj-$(CONFIG_ETUN) += etun.o
+ obj-$(CONFIG_NET_NETX) += netx-eth.o
+ obj-$(CONFIG_DL2K) += dl2k.o
+ obj-$(CONFIG_R8169) += r8169.o
+diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_3ad.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_3ad.c
+--- linux-2.6.22-try2/drivers/net/bonding/bond_3ad.c	2007-12-19 13:37:30.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_3ad.c	2007-12-19 22:49:13.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include <linux/ethtool.h>
+ #include <linux/if_bonding.h>
+ #include <linux/pkt_sched.h>
++#include <net/net_namespace.h>
+ #include "bonding.h"
+ #include "bond_3ad.h"
+ 
+@@ -2448,6 +2449,9 @@
+ 	struct slave *slave = NULL;
+ 	int ret = NET_RX_DROP;
+ 
++	if (dev->nd_net != &init_net)
++		goto out;
++
+ 	if (!(dev->flags & IFF_MASTER))
+ 		goto out;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_alb.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_alb.c
+--- linux-2.6.22-try2/drivers/net/bonding/bond_alb.c	2007-12-19 13:37:30.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_alb.c	2007-12-19 22:49:13.000000000 -0500
+@@ -345,6 +345,9 @@
+ 	struct arp_pkt *arp = (struct arp_pkt *)skb->data;
+ 	int res = NET_RX_DROP;
+ 
++	if (bond_dev->nd_net != &init_net)
++		goto out;
++
+ 	if (!(bond_dev->flags & IFF_MASTER))
+ 		goto out;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_main.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_main.c
+--- linux-2.6.22-try2/drivers/net/bonding/bond_main.c	2007-12-19 13:37:30.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_main.c	2007-12-19 22:49:13.000000000 -0500
+@@ -75,6 +75,7 @@
+ #include <linux/if_vlan.h>
+ #include <linux/if_bonding.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+ #include "bonding.h"
+ #include "bond_3ad.h"
+ #include "bond_alb.h"
+@@ -2376,6 +2377,7 @@
+ 		 * can tag the ARP with the proper VLAN tag.
+ 		 */
+ 		memset(&fl, 0, sizeof(fl));
++		fl.fl_net = &init_net;
+ 		fl.fl4_dst = targets[i];
+ 		fl.fl4_tos = RTO_ONLINK;
+ 
+@@ -2485,6 +2487,9 @@
+ 	unsigned char *arp_ptr;
+ 	u32 sip, tip;
+ 
++	if (dev->nd_net != &init_net)
++		goto out;
++
+ 	if (!(dev->priv_flags & IFF_BONDING) || !(dev->flags & IFF_MASTER))
+ 		goto out;
+ 
+@@ -3172,7 +3177,7 @@
+ {
+ 	int len = strlen(DRV_NAME);
+ 
+-	for (bond_proc_dir = proc_net->subdir; bond_proc_dir;
++	for (bond_proc_dir = init_net.proc_net->subdir; bond_proc_dir;
+ 	     bond_proc_dir = bond_proc_dir->next) {
+ 		if ((bond_proc_dir->namelen == len) &&
+ 		    !memcmp(bond_proc_dir->name, DRV_NAME, len)) {
+@@ -3181,7 +3186,7 @@
+ 	}
+ 
+ 	if (!bond_proc_dir) {
+-		bond_proc_dir = proc_mkdir(DRV_NAME, proc_net);
++		bond_proc_dir = proc_mkdir(DRV_NAME, init_net.proc_net);
+ 		if (bond_proc_dir) {
+ 			bond_proc_dir->owner = THIS_MODULE;
+ 		} else {
+@@ -3216,7 +3221,7 @@
+ 			bond_proc_dir->owner = NULL;
+ 		}
+ 	} else {
+-		remove_proc_entry(DRV_NAME, proc_net);
++		remove_proc_entry(DRV_NAME, init_net.proc_net);
+ 		bond_proc_dir = NULL;
+ 	}
+ }
+@@ -3323,6 +3328,9 @@
+ {
+ 	struct net_device *event_dev = (struct net_device *)ptr;
+ 
++	if (event_dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	dprintk("event_dev: %s, event: %lx\n",
+ 		(event_dev ? event_dev->name : "None"),
+ 		event);
+@@ -3740,7 +3748,7 @@
+ 	}
+ 
+ 	down_write(&(bonding_rwsem));
+-	slave_dev = dev_get_by_name(ifr->ifr_slave);
++	slave_dev = dev_get_by_name(&init_net, ifr->ifr_slave);
+ 
+ 	dprintk("slave_dev=%p: \n", slave_dev);
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/bonding/bond_sysfs.c linux-2.6.22-try2-netns/drivers/net/bonding/bond_sysfs.c
+--- linux-2.6.22-try2/drivers/net/bonding/bond_sysfs.c	2007-12-19 13:37:31.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/bonding/bond_sysfs.c	2007-12-19 22:49:13.000000000 -0500
+@@ -35,6 +35,7 @@
+ #include <linux/ctype.h>
+ #include <linux/inet.h>
+ #include <linux/rtnetlink.h>
++#include <net/net_namespace.h>
+ 
+ /* #define BONDING_DEBUG 1 */
+ #include "bonding.h"
+@@ -299,7 +300,7 @@
+ 		read_unlock_bh(&bond->lock);
+ 		printk(KERN_INFO DRV_NAME ": %s: Adding slave %s.\n",
+ 		       bond->dev->name, ifname);
+-		dev = dev_get_by_name(ifname);
++		dev = dev_get_by_name(&init_net, ifname);
+ 		if (!dev) {
+ 			printk(KERN_INFO DRV_NAME
+ 			       ": %s: Interface %s does not exist!\n",
+diff -Nurb linux-2.6.22-try2/drivers/net/eql.c linux-2.6.22-try2-netns/drivers/net/eql.c
+--- linux-2.6.22-try2/drivers/net/eql.c	2007-12-19 13:37:31.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/eql.c	2007-12-19 22:49:13.000000000 -0500
+@@ -116,6 +116,7 @@
+ #include <linux/init.h>
+ #include <linux/timer.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/if.h>
+ #include <linux/if_arp.h>
+@@ -412,7 +413,7 @@
+ 	if (copy_from_user(&srq, srqp, sizeof (slaving_request_t)))
+ 		return -EFAULT;
+ 
+-	slave_dev  = dev_get_by_name(srq.slave_name);
++	slave_dev  = dev_get_by_name(&init_net, srq.slave_name);
+ 	if (slave_dev) {
+ 		if ((master_dev->flags & IFF_UP) == IFF_UP) {
+ 			/* slave is not a master & not already a slave: */
+@@ -460,7 +461,7 @@
+ 	if (copy_from_user(&srq, srqp, sizeof (slaving_request_t)))
+ 		return -EFAULT;
+ 
+-	slave_dev = dev_get_by_name(srq.slave_name);
++	slave_dev = dev_get_by_name(&init_net, srq.slave_name);
+ 	ret = -EINVAL;
+ 	if (slave_dev) {
+ 		spin_lock_bh(&eql->queue.lock);
+@@ -493,7 +494,7 @@
+ 	if (copy_from_user(&sc, scp, sizeof (slave_config_t)))
+ 		return -EFAULT;
+ 
+-	slave_dev = dev_get_by_name(sc.slave_name);
++	slave_dev = dev_get_by_name(&init_net, sc.slave_name);
+ 	if (!slave_dev)
+ 		return -ENODEV;
+ 
+@@ -528,7 +529,7 @@
+ 	if (copy_from_user(&sc, scp, sizeof (slave_config_t)))
+ 		return -EFAULT;
+ 
+-	slave_dev = dev_get_by_name(sc.slave_name);
++	slave_dev = dev_get_by_name(&init_net, sc.slave_name);
+ 	if (!slave_dev)
+ 		return -ENODEV;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/etun.c linux-2.6.22-try2-netns/drivers/net/etun.c
+--- linux-2.6.22-try2/drivers/net/etun.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/etun.c	2007-12-19 22:49:13.000000000 -0500
+@@ -0,0 +1,489 @@
++/*
++ *  ETUN - Universal ETUN device driver.
++ *  Copyright (C) 2006 Linux Networx
++ *
++ */
++
++#define DRV_NAME	"etun"
++#define DRV_VERSION	"1.0"
++#define DRV_DESCRIPTION	"Ethernet pseudo tunnel device driver"
++#define DRV_COPYRIGHT	"(C) 2007 Linux Networx"
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/ethtool.h>
++#include <linux/rtnetlink.h>
++#include <linux/if.h>
++#include <linux/if_ether.h>
++#include <linux/ctype.h>
++#include <linux/nsproxy.h>
++#include <net/net_namespace.h>
++#include <net/dst.h>
++
++
++/* Device cheksum strategy.
++ *
++ * etun is designed to a be a pair of virutal devices
++ * connecting two network stack instances.
++ *
++ * Typically it will either be used with ethernet bridging or
++ * it will be used to route packets between the two stacks.
++ *
++ * The only checksum offloading I can do is to completely
++ * skip the checksumming step all together.
++ *
++ * When used for ethernet bridging I don't believe any
++ * checksum off loading is safe.
++ * - If my source is an external interface the checksum may be
++ *   invalid so I don't want to report I have already checked it.
++ * - If my destination is an external interface I don't want to put
++ *   a packet on the wire with someone computing the checksum.
++ *
++ * When used for routing between two stacks checksums should
++ * be as unnecessary as they are on the loopback device.
++ *
++ * So by default I am safe and disable checksumming and
++ * other advanced features like SG and TSO.
++ *
++ * However because I think these features could be useful
++ * I provide the ethtool functions to and enable/disable
++ * them at runtime.
++ *
++ * If you think you can correctly enable these go ahead.
++ * For checksums both the transmitter and the receiver must
++ * agree before the are actually disabled.
++ */
++
++#define ETUN_NUM_STATS 1
++static struct {
++	const char string[ETH_GSTRING_LEN];
++} ethtool_stats_keys[ETUN_NUM_STATS] = {
++	{ "partner_ifindex" },
++};
++
++struct etun_info {
++	struct net_device	*rx_dev;
++	unsigned		ip_summed;
++	struct net_device_stats	stats;
++	struct list_head	list;
++	struct net_device	*dev;
++};
++
++/*
++ * I have to hold the rtnl_lock during device delete.
++ * So I use the rtnl_lock to protect my list manipulations
++ * as well.  Crude but simple.
++ */
++static LIST_HEAD(etun_list);
++
++/*
++ * The higher levels take care of making this non-reentrant (it's
++ * called with bh's disabled).
++ */
++static int etun_xmit(struct sk_buff *skb, struct net_device *tx_dev)
++{
++	struct etun_info *tx_info = tx_dev->priv;
++	struct net_device *rx_dev = tx_info->rx_dev;
++	struct etun_info *rx_info = rx_dev->priv;
++
++	tx_info->stats.tx_packets++;
++	tx_info->stats.tx_bytes += skb->len;
++
++	/* Drop the skb state that was needed to get here */
++	skb_orphan(skb);
++	if (skb->dst)
++		skb->dst = dst_pop(skb->dst);	/* Allow for smart routing */
++
++	/* Switch to the receiving device */
++	skb->pkt_type = PACKET_HOST;
++	skb->protocol = eth_type_trans(skb, rx_dev);
++	skb->dev = rx_dev;
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* If both halves agree no checksum is needed */
++	if (tx_dev->features & NETIF_F_NO_CSUM)
++		skb->ip_summed = rx_info->ip_summed;
++
++	rx_dev->last_rx = jiffies;
++	rx_info->stats.rx_packets++;
++	rx_info->stats.rx_bytes += skb->len;
++	netif_rx(skb);
++
++	return 0;
++}
++
++static struct net_device_stats *etun_get_stats(struct net_device *dev)
++{
++	struct etun_info *info = dev->priv;
++	return &info->stats;
++}
++
++/* ethtool interface */
++static int etun_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
++{
++	cmd->supported		= 0;
++	cmd->advertising	= 0;
++	cmd->speed		= SPEED_10000; /* Memory is fast! */
++	cmd->duplex		= DUPLEX_FULL;
++	cmd->port		= PORT_TP;
++	cmd->phy_address	= 0;
++	cmd->transceiver	= XCVR_INTERNAL;
++	cmd->autoneg		= AUTONEG_DISABLE;
++	cmd->maxtxpkt		= 0;
++	cmd->maxrxpkt		= 0;
++	return 0;
++}
++
++static void etun_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
++{
++	strcpy(info->driver, DRV_NAME);
++	strcpy(info->version, DRV_VERSION);
++	strcpy(info->fw_version, "N/A");
++}
++
++static void etun_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
++{
++	switch(stringset) {
++	case ETH_SS_STATS:
++		memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
++		break;
++	case ETH_SS_TEST:
++	default:
++		break;
++	}
++}
++
++static int etun_get_stats_count(struct net_device *dev)
++{
++	return ETUN_NUM_STATS;
++}
++
++static void etun_get_ethtool_stats(struct net_device *dev,
++	struct ethtool_stats *stats, u64 *data)
++{
++	struct etun_info *info = dev->priv;
++
++	data[0] = info->rx_dev->ifindex;
++}
++
++static u32 etun_get_rx_csum(struct net_device *dev)
++{
++	struct etun_info *info = dev->priv;
++	return info->ip_summed == CHECKSUM_UNNECESSARY;
++}
++
++static int etun_set_rx_csum(struct net_device *dev, u32 data)
++{
++	struct etun_info *info = dev->priv;
++
++	info->ip_summed = data ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
++
++	return 0;
++}
++
++static u32 etun_get_tx_csum(struct net_device *dev)
++{
++	return (dev->features & NETIF_F_NO_CSUM) != 0;
++}
++
++static int etun_set_tx_csum(struct net_device *dev, u32 data)
++{
++	dev->features &= ~NETIF_F_NO_CSUM;
++	if (data)
++		dev->features |= NETIF_F_NO_CSUM;
++
++	return 0;
++}
++
++static struct ethtool_ops etun_ethtool_ops = {
++	.get_settings		= etun_get_settings,
++	.get_drvinfo		= etun_get_drvinfo,
++	.get_link		= ethtool_op_get_link,
++	.get_rx_csum		= etun_get_rx_csum,
++	.set_rx_csum		= etun_set_rx_csum,
++	.get_tx_csum		= etun_get_tx_csum,
++	.set_tx_csum		= etun_set_tx_csum,
++	.get_sg			= ethtool_op_get_sg,
++	.set_sg			= ethtool_op_set_sg,
++#if 0 /* Does just setting the bit successfuly emulate tso? */
++	.get_tso		= ethtool_op_get_tso,
++	.set_tso		= ethtool_op_set_tso,
++#endif
++	.get_strings		= etun_get_strings,
++	.get_stats_count	= etun_get_stats_count,
++	.get_ethtool_stats	= etun_get_ethtool_stats,
++	.get_perm_addr		= ethtool_op_get_perm_addr,
++};
++
++static int etun_open(struct net_device *tx_dev)
++{
++	struct etun_info *tx_info = tx_dev->priv;
++	struct net_device *rx_dev = tx_info->rx_dev;
++	/* If we attempt to bring up etun in the small window before
++	 * it is connected to it's partner error.
++	 */
++	if (!rx_dev)
++		return -ENOTCONN;
++	if (rx_dev->flags & IFF_UP) {
++		netif_carrier_on(tx_dev);
++		netif_carrier_on(rx_dev);
++	}
++	netif_start_queue(tx_dev);
++	return 0;
++}
++
++static int etun_stop(struct net_device *tx_dev)
++{
++	struct etun_info *tx_info = tx_dev->priv;
++	struct net_device *rx_dev = tx_info->rx_dev;
++	netif_stop_queue(tx_dev);
++	if (netif_carrier_ok(tx_dev)) {
++		netif_carrier_off(tx_dev);
++		netif_carrier_off(rx_dev);
++	}
++	return 0;
++}
++
++static int etun_change_mtu(struct net_device *dev, int new_mtu)
++{
++	/* Don't allow ridiculously small mtus */
++	if (new_mtu < (ETH_ZLEN - ETH_HLEN))
++		return -EINVAL;
++	dev->mtu = new_mtu;
++	return 0;
++}
++
++static void etun_set_multicast_list(struct net_device *dev)
++{
++	/* Nothing sane I can do here */
++	return;
++}
++
++static int etun_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
++{
++	return -EOPNOTSUPP;
++}
++
++/* Only allow letters and numbers in an etun device name */
++static int is_valid_name(const char *name)
++{
++	const char *ptr;
++	for (ptr = name; *ptr; ptr++) {
++		if (!isalnum(*ptr))
++			return 0;
++	}
++	return 1;
++}
++
++static struct net_device *etun_alloc(struct net *net, const char *name)
++{
++	struct net_device *dev;
++	struct etun_info *info;
++	int err;
++
++	if (!name || !is_valid_name(name))
++		return ERR_PTR(-EINVAL);
++
++	dev = alloc_netdev(sizeof(struct etun_info), name, ether_setup);
++	if (!dev)
++		return ERR_PTR(-ENOMEM);
++
++	info = dev->priv;
++	info->dev = dev;
++	dev->nd_net = net;
++
++	random_ether_addr(dev->dev_addr);
++	dev->tx_queue_len	= 0; /* A queue is silly for a loopback device */
++	dev->hard_start_xmit	= etun_xmit;
++	dev->get_stats		= etun_get_stats;
++	dev->open		= etun_open;
++	dev->stop		= etun_stop;
++	dev->set_multicast_list	= etun_set_multicast_list;
++	dev->do_ioctl		= etun_ioctl;
++	dev->features		= NETIF_F_FRAGLIST
++				  | NETIF_F_HIGHDMA
++				  | NETIF_F_LLTX;
++	dev->flags		= IFF_BROADCAST | IFF_MULTICAST |IFF_PROMISC;
++	dev->ethtool_ops	= &etun_ethtool_ops;
++	dev->destructor		= free_netdev;
++	dev->change_mtu		= etun_change_mtu;
++	err = register_netdev(dev);
++	if (err) {
++		free_netdev(dev);
++		dev = ERR_PTR(err);
++		goto out;
++	}
++	netif_carrier_off(dev);
++out:
++	return dev;
++}
++
++static int etun_alloc_pair(struct net *net, const char *name0, const char *name1)
++{
++	struct net_device *dev0, *dev1;
++	struct etun_info *info0, *info1;
++
++	dev0 = etun_alloc(net, name0);
++	if (IS_ERR(dev0)) {
++		return PTR_ERR(dev0);
++	}
++	info0 = dev0->priv;
++
++	dev1 = etun_alloc(net, name1);
++	if (IS_ERR(dev1)) {
++		unregister_netdev(dev0);
++		return PTR_ERR(dev1);
++	}
++	info1 = dev1->priv;
++
++	dev_hold(dev0);
++	dev_hold(dev1);
++	info0->rx_dev = dev1;
++	info1->rx_dev = dev0;
++
++	/* Only place one member of the pair on the list
++	 * so I don't confuse list_for_each_entry_safe,
++	 * by deleting two list entries at once.
++	 */
++	rtnl_lock();
++	list_add(&info0->list, &etun_list);
++	INIT_LIST_HEAD(&info1->list);
++	rtnl_unlock();
++
++	return 0;
++}
++
++static int etun_unregister_pair(struct net_device *dev0)
++{
++	struct etun_info *info0, *info1;
++	struct net_device *dev1;
++
++	ASSERT_RTNL();
++
++	if (!dev0)
++		return -ENODEV;
++
++	/* Ensure my network devices are not passing packets */
++	dev_close(dev0);
++	info0 = dev0->priv;
++	dev1  = info0->rx_dev;
++	info1 = dev1->priv;
++	dev_close(dev1);
++
++	/* Drop the cross device references */
++	dev_put(dev0);
++	dev_put(dev1);
++
++	/* Remove from the etun list */
++	if (!list_empty(&info0->list))
++		list_del_init(&info0->list);
++	if (!list_empty(&info1->list))
++		list_del_init(&info1->list);
++
++	unregister_netdevice(dev0);
++	unregister_netdevice(dev1);
++	return 0;
++}
++
++static int etun_noget(char *buffer, struct kernel_param *kp)
++{
++	return 0;
++}
++
++static int etun_newif(const char *val, struct kernel_param *kp)
++{
++	char name0[IFNAMSIZ], name1[IFNAMSIZ];
++	const char *mid;
++	int len, len0, len1;
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	/* Avoid frustration by removing trailing whitespace */
++	len = strlen(val);
++	while (isspace(val[len - 1]))
++		len--;
++
++	/* Split the string into 2 names */
++	mid = memchr(val, ',', len);
++	if (!mid)
++		return -EINVAL;
++
++	/* Get the first device name */
++	len0 = mid - val;
++	if (len0 > sizeof(name0) - 1)
++		len = sizeof(name0) - 1;
++	strncpy(name0, val, len0);
++	name0[len0] = '\0';
++
++	/* And the second device name */
++	len1 = len - (len0 + 1);
++	if (len1 > sizeof(name1) - 1)
++		len1 = sizeof(name1) - 1;
++	strncpy(name1, mid + 1, len1);
++	name1[len1] = '\0';
++
++	return etun_alloc_pair(current->nsproxy->net_ns, name0, name1);
++}
++
++static int etun_delif(const char *val, struct kernel_param *kp)
++{
++	char name[IFNAMSIZ];
++	int len;
++	struct net_device *dev;
++	int err;
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++
++	/* Avoid frustration by removing trailing whitespace */
++	len = strlen(val);
++	while (isspace(val[len - 1]))
++		len--;
++
++	/* Get the device name */
++	if (len > sizeof(name) - 1)
++		return -EINVAL;
++	strncpy(name, val, len);
++	name[len] = '\0';
++
++	/* Double check I don't have strange characters in my device name */
++	if (!is_valid_name(name))
++		return -EINVAL;
++
++	rtnl_lock();
++	err = -ENODEV;
++	dev = __dev_get_by_name(current->nsproxy->net_ns, name);
++	err = etun_unregister_pair(dev);
++	rtnl_unlock();
++	return err;
++}
++
++static int __init etun_init(void)
++{
++	printk(KERN_INFO "etun: %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
++	printk(KERN_INFO "etun: %s\n", DRV_COPYRIGHT);
++
++	return 0;
++}
++
++static void etun_cleanup(void)
++{
++	struct etun_info *info, *tmp;
++	rtnl_lock();
++	list_for_each_entry_safe(info, tmp, &etun_list, list) {
++		etun_unregister_pair(info->dev);
++	}
++	rtnl_unlock();
++}
++
++module_param_call(newif, etun_newif, etun_noget, NULL, S_IWUSR);
++module_param_call(delif, etun_delif, etun_noget, NULL, S_IWUSR);
++module_init(etun_init);
++module_exit(etun_cleanup);
++MODULE_DESCRIPTION(DRV_DESCRIPTION);
++MODULE_AUTHOR("Eric Biederman <ebiederm@xmission.com>");
++MODULE_LICENSE("GPL");
+diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/bpqether.c linux-2.6.22-try2-netns/drivers/net/hamradio/bpqether.c
+--- linux-2.6.22-try2/drivers/net/hamradio/bpqether.c	2007-12-19 13:37:31.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/hamradio/bpqether.c	2007-12-19 22:49:13.000000000 -0500
+@@ -83,6 +83,7 @@
+ 
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/bpqether.h>
+ 
+@@ -172,6 +173,9 @@
+ 	struct ethhdr *eth;
+ 	struct bpqdev *bpq;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ 		return NET_RX_DROP;
+ 
+@@ -559,6 +563,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *)ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (!dev_is_ethdev(dev))
+ 		return NOTIFY_DONE;
+ 
+@@ -594,7 +601,7 @@
+ static int __init bpq_init_driver(void)
+ {
+ #ifdef CONFIG_PROC_FS
+-	if (!proc_net_fops_create("bpqether", S_IRUGO, &bpq_info_fops)) {
++	if (!proc_net_fops_create(&init_net, "bpqether", S_IRUGO, &bpq_info_fops)) {
+ 		printk(KERN_ERR
+ 			"bpq: cannot create /proc/net/bpqether entry.\n");
+ 		return -ENOENT;
+@@ -618,7 +625,7 @@
+ 
+ 	unregister_netdevice_notifier(&bpq_dev_notifier);
+ 
+-	proc_net_remove("bpqether");
++	proc_net_remove(&init_net, "bpqether");
+ 
+ 	rtnl_lock();
+ 	while (!list_empty(&bpq_devices)) {
+diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/scc.c linux-2.6.22-try2-netns/drivers/net/hamradio/scc.c
+--- linux-2.6.22-try2/drivers/net/hamradio/scc.c	2007-12-19 13:37:31.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/hamradio/scc.c	2007-12-19 22:49:13.000000000 -0500
+@@ -174,6 +174,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/bitops.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/ax25.h>
+ 
+ #include <asm/irq.h>
+@@ -2114,7 +2115,7 @@
+ 	}
+ 	rtnl_unlock();
+ 
+-	proc_net_fops_create("z8530drv", 0, &scc_net_seq_fops);
++	proc_net_fops_create(&init_net, "z8530drv", 0, &scc_net_seq_fops);
+ 
+ 	return 0;
+ }
+@@ -2169,7 +2170,7 @@
+ 	if (Vector_Latch)
+ 		release_region(Vector_Latch, 1);
+ 
+-	proc_net_remove("z8530drv");
++	proc_net_remove(&init_net, "z8530drv");
+ }
+ 
+ MODULE_AUTHOR("Joerg Reuter <jreuter@yaina.de>");
+diff -Nurb linux-2.6.22-try2/drivers/net/hamradio/yam.c linux-2.6.22-try2-netns/drivers/net/hamradio/yam.c
+--- linux-2.6.22-try2/drivers/net/hamradio/yam.c	2007-12-19 13:37:31.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/hamradio/yam.c	2007-12-19 22:49:13.000000000 -0500
+@@ -61,6 +61,7 @@
+ #include <linux/etherdevice.h>
+ #include <linux/skbuff.h>
+ #include <net/ax25.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/kernel.h>
+ #include <linux/proc_fs.h>
+@@ -1142,7 +1143,7 @@
+ 	yam_timer.expires = jiffies + HZ / 100;
+ 	add_timer(&yam_timer);
+ 
+-	proc_net_fops_create("yam", S_IRUGO, &yam_info_fops);
++	proc_net_fops_create(&init_net, "yam", S_IRUGO, &yam_info_fops);
+ 	return 0;
+  error:
+ 	while (--i >= 0) {
+@@ -1174,7 +1175,7 @@
+ 		kfree(p);
+ 	}
+ 
+-	proc_net_remove("yam");
++	proc_net_remove(&init_net, "yam");
+ }
+ 
+ /* --------------------------------------------------------------------- */
+diff -Nurb linux-2.6.22-try2/drivers/net/ibmveth.c linux-2.6.22-try2-netns/drivers/net/ibmveth.c
+--- linux-2.6.22-try2/drivers/net/ibmveth.c	2007-12-19 15:29:22.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/ibmveth.c	2007-12-19 22:49:13.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <linux/mm.h>
+ #include <linux/ethtool.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+ #include <asm/semaphore.h>
+ #include <asm/hvcall.h>
+ #include <asm/atomic.h>
+@@ -97,7 +98,7 @@
+ static struct kobj_type ktype_veth_pool;
+ 
+ #ifdef CONFIG_PROC_FS
+-#define IBMVETH_PROC_DIR "net/ibmveth"
++#define IBMVETH_PROC_DIR "ibmveth"
+ static struct proc_dir_entry *ibmveth_proc_dir;
+ #endif
+ 
+@@ -1093,7 +1094,7 @@
+ #ifdef CONFIG_PROC_FS
+ static void ibmveth_proc_register_driver(void)
+ {
+-	ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, NULL);
++	ibmveth_proc_dir = proc_mkdir(IBMVETH_PROC_DIR, init_net.proc_net);
+ 	if (ibmveth_proc_dir) {
+ 		SET_MODULE_OWNER(ibmveth_proc_dir);
+ 	}
+@@ -1101,7 +1102,7 @@
+ 
+ static void ibmveth_proc_unregister_driver(void)
+ {
+-	remove_proc_entry(IBMVETH_PROC_DIR, NULL);
++	remove_proc_entry(IBMVETH_PROC_DIR, init_net.proc_net);
+ }
+ 
+ static void *ibmveth_seq_start(struct seq_file *seq, loff_t *pos)
+diff -Nurb linux-2.6.22-try2/drivers/net/loopback.c linux-2.6.22-try2-netns/drivers/net/loopback.c
+--- linux-2.6.22-try2/drivers/net/loopback.c	2007-12-19 13:37:32.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/loopback.c	2007-12-19 22:49:13.000000000 -0500
+@@ -57,6 +57,7 @@
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+ #include <linux/percpu.h>
++#include <net/net_namespace.h>
+ 
+ struct pcpu_lstats {
+ 	unsigned long packets;
+@@ -199,39 +200,52 @@
+ 	.get_rx_csum		= always_on,
+ };
+ 
++static int loopback_net_init(struct net *net)
++{
++	struct net_device *lo = &net->loopback_dev;
+ /*
+  * The loopback device is special. There is only one instance and
+  * it is statically allocated. Don't do this for other devices.
+  */
+-struct net_device loopback_dev = {
+-	.name	 		= "lo",
+-	.get_stats		= &get_stats,
+-	.mtu			= (16 * 1024) + 20 + 20 + 12,
+-	.hard_start_xmit	= loopback_xmit,
+-	.hard_header		= eth_header,
+-	.hard_header_cache	= eth_header_cache,
+-	.header_cache_update	= eth_header_cache_update,
+-	.hard_header_len	= ETH_HLEN,	/* 14	*/
+-	.addr_len		= ETH_ALEN,	/* 6	*/
+-	.tx_queue_len		= 0,
+-	.type			= ARPHRD_LOOPBACK,	/* 0x0001*/
+-	.rebuild_header		= eth_rebuild_header,
+-	.flags			= IFF_LOOPBACK,
+-	.features 		= NETIF_F_SG | NETIF_F_FRAGLIST
++	strcpy(lo->name, "lo");
++	lo->get_stats		= &get_stats,
++	lo->mtu			= (16 * 1024) + 20 + 20 + 12,
++	lo->hard_start_xmit	= loopback_xmit,
++	lo->hard_header		= eth_header,
++	lo->hard_header_cache	= eth_header_cache,
++	lo->header_cache_update	= eth_header_cache_update,
++	lo->hard_header_len	= ETH_HLEN,	/* 14	*/
++	lo->addr_len		= ETH_ALEN,	/* 6	*/
++	lo->tx_queue_len	= 0,
++	lo->type		= ARPHRD_LOOPBACK,	/* 0x0001*/
++	lo->rebuild_header	= eth_rebuild_header,
++	lo->flags		= IFF_LOOPBACK,
++	lo->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
+ #ifdef LOOPBACK_TSO
+ 				  | NETIF_F_TSO
+ #endif
+ 				  | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA
+-				  | NETIF_F_LLTX,
+-	.ethtool_ops		= &loopback_ethtool_ops,
++				  | NETIF_F_LLTX
++				  | NETIF_F_NETNS_LOCAL,
++	lo->ethtool_ops		= &loopback_ethtool_ops,
++	lo->nd_net = net;
++	return register_netdev(lo);
++}
++
++static void loopback_net_exit(struct net *net)
++{
++	unregister_netdev(&net->loopback_dev);
++}
++
++static struct pernet_operations loopback_net_ops = {
++	.init = loopback_net_init,
++	.exit = loopback_net_exit,
+ };
+ 
+ /* Setup and register the loopback device. */
+ static int __init loopback_init(void)
+ {
+-	return register_netdev(&loopback_dev);
++	return register_pernet_device(&loopback_net_ops);
+ };
+ 
+ module_init(loopback_init);
+-
+-EXPORT_SYMBOL(loopback_dev);
+diff -Nurb linux-2.6.22-try2/drivers/net/pppoe.c linux-2.6.22-try2-netns/drivers/net/pppoe.c
+--- linux-2.6.22-try2/drivers/net/pppoe.c	2007-12-19 13:37:34.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/pppoe.c	2007-12-19 22:49:13.000000000 -0500
+@@ -78,6 +78,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ 
+ #include <asm/uaccess.h>
+@@ -210,7 +211,7 @@
+ 	struct net_device *dev;
+ 	int ifindex;
+ 
+-	dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
++	dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev);
+ 	if(!dev)
+ 		return NULL;
+ 	ifindex = dev->ifindex;
+@@ -295,6 +296,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *) ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* Only look at sockets that are using this specific device. */
+ 	switch (event) {
+ 	case NETDEV_CHANGEMTU:
+@@ -380,6 +384,9 @@
+ 	struct pppoe_hdr *ph;
+ 	struct pppox_sock *po;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ 		goto drop;
+ 
+@@ -412,6 +419,9 @@
+ 	struct pppoe_hdr *ph;
+ 	struct pppox_sock *po;
+ 
++	if (dev->nd_net != &init_net)
++		goto abort;
++
+ 	if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr)))
+ 		goto abort;
+ 
+@@ -471,12 +481,12 @@
+  * Initialize a new struct sock.
+  *
+  **********************************************************************/
+-static int pppoe_create(struct socket *sock)
++static int pppoe_create(struct net *net, struct socket *sock)
+ {
+ 	int error = -ENOMEM;
+ 	struct sock *sk;
+ 
+-	sk = sk_alloc(PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1);
++	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, 1);
+ 	if (!sk)
+ 		goto out;
+ 
+@@ -588,7 +598,7 @@
+ 
+ 	/* Don't re-bind if sid==0 */
+ 	if (sp->sa_addr.pppoe.sid != 0) {
+-		dev = dev_get_by_name(sp->sa_addr.pppoe.dev);
++		dev = dev_get_by_name(&init_net, sp->sa_addr.pppoe.dev);
+ 
+ 		error = -ENODEV;
+ 		if (!dev)
+@@ -1064,7 +1074,7 @@
+ {
+ 	struct proc_dir_entry *p;
+ 
+-	p = create_proc_entry("net/pppoe", S_IRUGO, NULL);
++	p = create_proc_entry("pppoe", S_IRUGO, init_net.proc_net);
+ 	if (!p)
+ 		return -ENOMEM;
+ 
+@@ -1135,7 +1145,7 @@
+ 	dev_remove_pack(&pppoes_ptype);
+ 	dev_remove_pack(&pppoed_ptype);
+ 	unregister_netdevice_notifier(&pppoe_notifier);
+-	remove_proc_entry("net/pppoe", NULL);
++	remove_proc_entry("pppoe", init_net.proc_net);
+ 	proto_unregister(&pppoe_sk_proto);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/pppox.c linux-2.6.22-try2-netns/drivers/net/pppox.c
+--- linux-2.6.22-try2/drivers/net/pppox.c	2007-12-19 13:37:34.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/pppox.c	2007-12-19 22:49:13.000000000 -0500
+@@ -107,10 +107,13 @@
+ 
+ EXPORT_SYMBOL(pppox_ioctl);
+ 
+-static int pppox_create(struct socket *sock, int protocol)
++static int pppox_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	int rc = -EPROTOTYPE;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (protocol < 0 || protocol > PX_MAX_PROTO)
+ 		goto out;
+ 
+@@ -126,7 +129,7 @@
+ 	    !try_module_get(pppox_protos[protocol]->owner))
+ 		goto out;
+ 
+-	rc = pppox_protos[protocol]->create(sock);
++	rc = pppox_protos[protocol]->create(net, sock);
+ 
+ 	module_put(pppox_protos[protocol]->owner);
+ out:
+diff -Nurb linux-2.6.22-try2/drivers/net/shaper.c linux-2.6.22-try2-netns/drivers/net/shaper.c
+--- linux-2.6.22-try2/drivers/net/shaper.c	2007-12-19 13:37:34.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/shaper.c	2007-12-19 22:49:13.000000000 -0500
+@@ -86,6 +86,7 @@
+ 
+ #include <net/dst.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ struct shaper_cb {
+ 	unsigned long	shapeclock;		/* Time it should go out */
+@@ -488,7 +489,7 @@
+ 	{
+ 		case SHAPER_SET_DEV:
+ 		{
+-			struct net_device *them=__dev_get_by_name(ss->ss_name);
++			struct net_device *them=__dev_get_by_name(&init_net, ss->ss_name);
+ 			if(them==NULL)
+ 				return -ENODEV;
+ 			if(sh->dev)
+diff -Nurb linux-2.6.22-try2/drivers/net/tokenring/lanstreamer.c linux-2.6.22-try2-netns/drivers/net/tokenring/lanstreamer.c
+--- linux-2.6.22-try2/drivers/net/tokenring/lanstreamer.c	2007-12-19 13:37:37.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/tokenring/lanstreamer.c	2007-12-19 22:49:13.000000000 -0500
+@@ -250,7 +250,7 @@
+ #if STREAMER_NETWORK_MONITOR
+ #ifdef CONFIG_PROC_FS
+ 	if (!dev_streamer)
+-		create_proc_read_entry("net/streamer_tr", 0, 0,
++		create_proc_read_entry("streamer_tr", 0, init_net.proc_net,
+ 					streamer_proc_info, NULL); 
+ 	streamer_priv->next = dev_streamer;
+ 	dev_streamer = streamer_priv;
+@@ -423,7 +423,7 @@
+ 			}
+ 		}
+ 		if (!dev_streamer)
+-			remove_proc_entry("net/streamer_tr", NULL);
++			remove_proc_entry("streamer_tr", init_net.proc_net);
+ 	}
+ #endif
+ #endif
+diff -Nurb linux-2.6.22-try2/drivers/net/tokenring/olympic.c linux-2.6.22-try2-netns/drivers/net/tokenring/olympic.c
+--- linux-2.6.22-try2/drivers/net/tokenring/olympic.c	2007-12-19 13:37:37.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/tokenring/olympic.c	2007-12-19 22:49:13.000000000 -0500
+@@ -101,6 +101,7 @@
+ #include <linux/bitops.h>
+ #include <linux/jiffies.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/checksum.h>
+ 
+ #include <asm/io.h>
+@@ -268,9 +269,9 @@
+ 	printk("Olympic: %s registered as: %s\n",olympic_priv->olympic_card_name,dev->name);
+ 	if (olympic_priv->olympic_network_monitor) { /* Must go after register_netdev as we need the device name */ 
+ 		char proc_name[20] ; 
+-		strcpy(proc_name,"net/olympic_") ; 
++		strcpy(proc_name,"olympic_") ; 
+ 		strcat(proc_name,dev->name) ; 
+-		create_proc_read_entry(proc_name,0,NULL,olympic_proc_info,(void *)dev) ; 
++		create_proc_read_entry(proc_name,0,init_net.proc_net,olympic_proc_info,(void *)dev) ; 
+ 		printk("Olympic: Network Monitor information: /proc/%s\n",proc_name); 
+ 	}
+ 	return  0 ;
+@@ -1752,9 +1753,9 @@
+ 
+ 	if (olympic_priv->olympic_network_monitor) { 
+ 		char proc_name[20] ; 
+-		strcpy(proc_name,"net/olympic_") ; 
++		strcpy(proc_name,"olympic_") ; 
+ 		strcat(proc_name,dev->name) ;
+-		remove_proc_entry(proc_name,NULL); 
++		remove_proc_entry(proc_name,init_net.proc_net);
+ 	}
+ 	unregister_netdev(dev) ; 
+ 	iounmap(olympic_priv->olympic_mmio) ; 
+diff -Nurb linux-2.6.22-try2/drivers/net/tun.c linux-2.6.22-try2-netns/drivers/net/tun.c
+--- linux-2.6.22-try2/drivers/net/tun.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/tun.c	2007-12-19 22:49:13.000000000 -0500
+@@ -62,6 +62,7 @@
+ #include <linux/if_ether.h>
+ #include <linux/if_tun.h>
+ #include <linux/crc32.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/system.h>
+ #include <asm/uaccess.h>
+@@ -475,7 +476,7 @@
+ 		     !capable(CAP_NET_ADMIN))
+ 			return -EPERM;
+ 	}
+-	else if (__dev_get_by_name(ifr->ifr_name))
++	else if (__dev_get_by_name(&init_net, ifr->ifr_name))
+ 		return -EINVAL;
+ 	else {
+ 		char *name;
+diff -Nurb linux-2.6.22-try2/drivers/net/wan/dlci.c linux-2.6.22-try2-netns/drivers/net/wan/dlci.c
+--- linux-2.6.22-try2/drivers/net/wan/dlci.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wan/dlci.c	2007-12-19 22:49:13.000000000 -0500
+@@ -361,7 +361,7 @@
+ 
+ 
+ 	/* validate slave device */
+-	slave = dev_get_by_name(dlci->devname);
++	slave = dev_get_by_name(&init_net, dlci->devname);
+ 	if (!slave)
+ 		return -ENODEV;
+ 
+@@ -427,7 +427,7 @@
+ 	int			err;
+ 
+ 	/* validate slave device */
+-	master = __dev_get_by_name(dlci->devname);
++	master = __dev_get_by_name(&init_net, dlci->devname);
+ 	if (!master)
+ 		return(-ENODEV);
+ 
+@@ -513,6 +513,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *) ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_UNREGISTER) {
+ 		struct dlci_local *dlp;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/wan/hdlc.c linux-2.6.22-try2-netns/drivers/net/wan/hdlc.c
+--- linux-2.6.22-try2/drivers/net/wan/hdlc.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wan/hdlc.c	2007-12-19 22:49:13.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/notifier.h>
+ #include <linux/hdlc.h>
++#include <net/net_namespace.h>
+ 
+ 
+ static const char* version = "HDLC support module revision 1.21";
+@@ -66,6 +67,12 @@
+ 		    struct packet_type *p, struct net_device *orig_dev)
+ {
+ 	struct hdlc_device_desc *desc = dev_to_desc(dev);
++
++	if (dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	if (desc->netif_rx)
+ 		return desc->netif_rx(skb);
+ 
+@@ -102,6 +109,9 @@
+ 	unsigned long flags;
+ 	int on;
+  
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (dev->get_stats != hdlc_get_stats)
+ 		return NOTIFY_DONE; /* not an HDLC device */
+  
+diff -Nurb linux-2.6.22-try2/drivers/net/wan/lapbether.c linux-2.6.22-try2-netns/drivers/net/wan/lapbether.c
+--- linux-2.6.22-try2/drivers/net/wan/lapbether.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wan/lapbether.c	2007-12-19 22:49:13.000000000 -0500
+@@ -91,6 +91,9 @@
+ 	int len, err;
+ 	struct lapbethdev *lapbeth;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ 		return NET_RX_DROP;
+ 
+@@ -391,6 +394,9 @@
+ 	struct lapbethdev *lapbeth;
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (!dev_is_ethdev(dev))
+ 		return NOTIFY_DONE;
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/wan/sbni.c linux-2.6.22-try2-netns/drivers/net/wan/sbni.c
+--- linux-2.6.22-try2/drivers/net/wan/sbni.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wan/sbni.c	2007-12-19 22:49:13.000000000 -0500
+@@ -54,6 +54,7 @@
+ #include <linux/init.h>
+ #include <linux/delay.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/arp.h>
+ 
+ #include <asm/io.h>
+@@ -1362,7 +1363,7 @@
+ 
+ 		if (copy_from_user( slave_name, ifr->ifr_data, sizeof slave_name ))
+ 			return -EFAULT;
+-		slave_dev = dev_get_by_name( slave_name );
++		slave_dev = dev_get_by_name(&init_net, slave_name );
+ 		if( !slave_dev  ||  !(slave_dev->flags & IFF_UP) ) {
+ 			printk( KERN_ERR "%s: trying to enslave non-active "
+ 				"device %s\n", dev->name, slave_name );
+diff -Nurb linux-2.6.22-try2/drivers/net/wan/syncppp.c linux-2.6.22-try2-netns/drivers/net/wan/syncppp.c
+--- linux-2.6.22-try2/drivers/net/wan/syncppp.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wan/syncppp.c	2007-12-19 22:49:13.000000000 -0500
+@@ -51,6 +51,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/syncppp.h>
+ 
+ #include <asm/byteorder.h>
+@@ -1445,6 +1446,11 @@
+ 
+ static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev)
+ {
++	if (dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ 		return NET_RX_DROP;
+ 	sppp_input(dev,skb);
+diff -Nurb linux-2.6.22-try2/drivers/net/wireless/hostap/hostap_main.c linux-2.6.22-try2-netns/drivers/net/wireless/hostap/hostap_main.c
+--- linux-2.6.22-try2/drivers/net/wireless/hostap/hostap_main.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wireless/hostap/hostap_main.c	2007-12-19 22:49:13.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/wireless.h>
+ #include <linux/etherdevice.h>
++#include <net/net_namespace.h>
+ #include <net/iw_handler.h>
+ #include <net/ieee80211.h>
+ #include <net/ieee80211_crypt.h>
+@@ -1094,8 +1095,8 @@
+ 
+ static int __init hostap_init(void)
+ {
+-	if (proc_net != NULL) {
+-		hostap_proc = proc_mkdir("hostap", proc_net);
++	if (init_net.proc_net != NULL) {
++		hostap_proc = proc_mkdir("hostap", init_net.proc_net);
+ 		if (!hostap_proc)
+ 			printk(KERN_WARNING "Failed to mkdir "
+ 			       "/proc/net/hostap\n");
+@@ -1110,7 +1111,7 @@
+ {
+ 	if (hostap_proc != NULL) {
+ 		hostap_proc = NULL;
+-		remove_proc_entry("hostap", proc_net);
++		remove_proc_entry("hostap", init_net.proc_net);
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-try2/drivers/net/wireless/strip.c linux-2.6.22-try2-netns/drivers/net/wireless/strip.c
+--- linux-2.6.22-try2/drivers/net/wireless/strip.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/net/wireless/strip.c	2007-12-19 22:49:13.000000000 -0500
+@@ -107,6 +107,7 @@
+ #include <linux/serialP.h>
+ #include <linux/rcupdate.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/ip.h>
+ #include <linux/tcp.h>
+@@ -1971,7 +1972,7 @@
+ 		      sizeof(zero_address))) {
+ 		struct net_device *dev;
+ 		read_lock_bh(&dev_base_lock);
+-		for_each_netdev(dev) {
++		for_each_netdev(&init_net, dev) {
+ 			if (dev->type == strip_info->dev->type &&
+ 			    !memcmp(dev->dev_addr,
+ 				    &strip_info->true_dev_addr,
+@@ -2787,7 +2788,7 @@
+ 	/*
+ 	 * Register the status file with /proc
+ 	 */
+-	proc_net_fops_create("strip", S_IFREG | S_IRUGO, &strip_seq_fops);
++	proc_net_fops_create(&init_net, "strip", S_IFREG | S_IRUGO, &strip_seq_fops);
+ 
+ 	return status;
+ }
+@@ -2809,7 +2810,7 @@
+ 	}
+ 
+ 	/* Unregister with the /proc/net file here. */
+-	proc_net_remove("strip");
++	proc_net_remove(&init_net, "strip");
+ 
+ 	if ((i = tty_unregister_ldisc(N_STRIP)))
+ 		printk(KERN_ERR "STRIP: can't unregister line discipline (err = %d)\n", i);
+diff -Nurb linux-2.6.22-try2/drivers/parisc/led.c linux-2.6.22-try2-netns/drivers/parisc/led.c
+--- linux-2.6.22-try2/drivers/parisc/led.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/parisc/led.c	2007-12-19 22:49:13.000000000 -0500
+@@ -359,7 +359,7 @@
+ 	 * for reading should be OK */
+ 	read_lock(&dev_base_lock);
+ 	rcu_read_lock();
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 	    struct net_device_stats *stats;
+ 	    struct in_device *in_dev = __in_dev_get_rcu(dev);
+ 	    if (!in_dev || !in_dev->ifa_list)
+diff -Nurb linux-2.6.22-try2/drivers/s390/net/qeth_main.c linux-2.6.22-try2-netns/drivers/s390/net/qeth_main.c
+--- linux-2.6.22-try2/drivers/s390/net/qeth_main.c	2007-12-19 13:37:38.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/s390/net/qeth_main.c	2007-12-19 22:49:13.000000000 -0500
+@@ -8127,7 +8127,7 @@
+ 	neigh->parms = neigh_parms_clone(parms);
+ 	rcu_read_unlock();
+ 
+-	neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key);
++	neigh->type = inet_addr_type(&init_net, *(__be32 *) neigh->primary_key);
+ 	neigh->nud_state = NUD_NOARP;
+ 	neigh->ops = arp_direct_ops;
+ 	neigh->output = neigh->ops->queue_xmit;
+diff -Nurb linux-2.6.22-try2/drivers/scsi/scsi_netlink.c linux-2.6.22-try2-netns/drivers/scsi/scsi_netlink.c
+--- linux-2.6.22-try2/drivers/scsi/scsi_netlink.c	2007-12-19 13:37:39.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/scsi/scsi_netlink.c	2007-12-19 22:49:13.000000000 -0500
+@@ -167,7 +167,7 @@
+ 		return;
+ 	}
+ 
+-	scsi_nl_sock = netlink_kernel_create(NETLINK_SCSITRANSPORT,
++	scsi_nl_sock = netlink_kernel_create(&init_net, NETLINK_SCSITRANSPORT,
+ 				SCSI_NL_GRP_CNT, scsi_nl_rcv, NULL,
+ 				THIS_MODULE);
+ 	if (!scsi_nl_sock) {
+diff -Nurb linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c linux-2.6.22-try2-netns/drivers/scsi/scsi_transport_iscsi.c
+--- linux-2.6.22-try2/drivers/scsi/scsi_transport_iscsi.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/drivers/scsi/scsi_transport_iscsi.c	2007-12-19 22:49:13.000000000 -0500
+@@ -1523,7 +1523,7 @@
+ 	if (err)
+ 		goto unregister_conn_class;
+ 
+-	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
++	nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, 1, iscsi_if_rx, NULL,
+ 			THIS_MODULE);
+ 	if (!nls) {
+ 		err = -ENOBUFS;
+diff -Nurb linux-2.6.22-try2/edit linux-2.6.22-try2-netns/edit
+--- linux-2.6.22-try2/edit	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2-netns/edit	2007-12-19 22:57:06.000000000 -0500
+@@ -0,0 +1,19 @@
++vi -o ./fs/proc/root.c ./fs/proc/root.c.rej
++vi -o ./include/linux/nsproxy.h ./include/linux/nsproxy.h.rej
++vi -o ./include/linux/sched.h ./include/linux/sched.h.rej
++vi -o ./include/net/inet_timewait_sock.h ./include/net/inet_timewait_sock.h.rej
++vi -o ./include/net/route.h ./include/net/route.h.rej
++vi -o ./include/net/sock.h ./include/net/sock.h.rej
++vi -o ./kernel/nsproxy.c ./kernel/nsproxy.c.rej
++vi -o ./lib/Makefile ./lib/Makefile.rej
++vi -o ./net/core/dev.c ./net/core/dev.c.rej
++vi -o ./net/core/rtnetlink.c ./net/core/rtnetlink.c.rej
++vi -o ./net/core/sock.c ./net/core/sock.c.rej
++vi -o ./net/ipv4/af_inet.c ./net/ipv4/af_inet.c.rej
++vi -o ./net/ipv4/inet_connection_sock.c ./net/ipv4/inet_connection_sock.c.rej
++vi -o ./net/ipv4/inet_hashtables.c ./net/ipv4/inet_hashtables.c.rej
++vi -o ./net/ipv4/raw.c ./net/ipv4/raw.c.rej
++vi -o ./net/ipv4/tcp_ipv4.c ./net/ipv4/tcp_ipv4.c.rej
++vi -o ./net/ipv4/udp.c ./net/ipv4/udp.c.rej
++vi -o ./net/ipv6/addrconf.c ./net/ipv6/addrconf.c.rej
++vi -o ./net/unix/af_unix.c ./net/unix/af_unix.c.rej
+diff -Nurb linux-2.6.22-try2/fs/afs/netdevices.c linux-2.6.22-try2-netns/fs/afs/netdevices.c
+--- linux-2.6.22-try2/fs/afs/netdevices.c	2007-12-19 13:37:40.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/afs/netdevices.c	2007-12-19 22:49:13.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/inetdevice.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
++#include <net/net_namespace.h>
+ #include "internal.h"
+ 
+ /*
+@@ -23,7 +24,7 @@
+ 		BUG();
+ 
+ 	rtnl_lock();
+-	dev = __dev_getfirstbyhwtype(ARPHRD_ETHER);
++	dev = __dev_getfirstbyhwtype(&init_net, ARPHRD_ETHER);
+ 	if (dev) {
+ 		memcpy(mac, dev->dev_addr, maclen);
+ 		ret = 0;
+@@ -47,7 +48,7 @@
+ 	ASSERT(maxbufs > 0);
+ 
+ 	rtnl_lock();
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (dev->type == ARPHRD_LOOPBACK && !wantloopback)
+ 			continue;
+ 		idev = __in_dev_get_rtnl(dev);
+diff -Nurb linux-2.6.22-try2/fs/compat_ioctl.c linux-2.6.22-try2-netns/fs/compat_ioctl.c
+--- linux-2.6.22-try2/fs/compat_ioctl.c	2007-12-19 13:37:40.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/compat_ioctl.c	2007-12-19 22:49:13.000000000 -0500
+@@ -319,22 +319,21 @@
+ 
+ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
+ {
+-	struct net_device *dev;
+-	struct ifreq32 ifr32;
++	struct ifreq __user *uifr;
+ 	int err;
+ 
+-	if (copy_from_user(&ifr32, compat_ptr(arg), sizeof(ifr32)))
++	uifr = compat_alloc_user_space(sizeof(struct ifreq));
++	if (copy_in_user(uifr, compat_ptr(arg), sizeof(struct ifreq32)));
+ 		return -EFAULT;
+ 
+-	dev = dev_get_by_index(ifr32.ifr_ifindex);
+-	if (!dev)
+-		return -ENODEV;
++	err = sys_ioctl(fd, SIOCGIFNAME, (unsigned long)uifr);
++	if (err)
++		return err;
+ 
+-	strlcpy(ifr32.ifr_name, dev->name, sizeof(ifr32.ifr_name));
+-	dev_put(dev);
++	if (copy_in_user(compat_ptr(arg), uifr, sizeof(struct ifreq32)))
++		return -EFAULT;
+ 	
+-	err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32));
+-	return (err ? -EFAULT : 0);
++	return 0;
+ }
+ 
+ static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
+diff -Nurb linux-2.6.22-try2/fs/proc/Makefile linux-2.6.22-try2-netns/fs/proc/Makefile
+--- linux-2.6.22-try2/fs/proc/Makefile	2007-12-19 13:37:46.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/proc/Makefile	2007-12-19 22:49:13.000000000 -0500
+@@ -11,6 +11,7 @@
+ 		proc_tty.o proc_misc.o
+ 
+ proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
++proc-$(CONFIG_NET)		+= proc_net.o
+ proc-$(CONFIG_PROC_KCORE)	+= kcore.o
+ proc-$(CONFIG_PROC_VMCORE)	+= vmcore.o
+ proc-$(CONFIG_PROC_DEVICETREE)	+= proc_devtree.o
+diff -Nurb linux-2.6.22-try2/fs/proc/internal.h linux-2.6.22-try2-netns/fs/proc/internal.h
+--- linux-2.6.22-try2/fs/proc/internal.h	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/proc/internal.h	2007-12-19 22:49:13.000000000 -0500
+@@ -17,6 +17,11 @@
+ #else
+ static inline void proc_sys_init(void) { }
+ #endif
++#ifdef CONFIG_NET
++extern int proc_net_init(void);
++#else
++static inline int proc_net_init(void) { return 0; }
++#endif
+ 
+ struct vmalloc_info {
+ 	unsigned long	used;
+diff -Nurb linux-2.6.22-try2/fs/proc/proc_net.c linux-2.6.22-try2-netns/fs/proc/proc_net.c
+--- linux-2.6.22-try2/fs/proc/proc_net.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/proc/proc_net.c	2007-12-19 22:49:13.000000000 -0500
+@@ -0,0 +1,154 @@
++/*
++ *  linux/fs/proc/net.c
++ *
++ *  Copyright (C) 2007
++ *
++ *  Author: Eric Biederman <ebiederm@xmission.com>
++ *
++ *  proc net directory handling functions
++ */
++
++#include <asm/uaccess.h>
++
++#include <linux/errno.h>
++#include <linux/time.h>
++#include <linux/proc_fs.h>
++#include <linux/stat.h>
++#include <linux/init.h>
++#include <linux/sched.h>
++#include <linux/module.h>
++#include <linux/bitops.h>
++#include <linux/smp_lock.h>
++#include <linux/mount.h>
++#include <linux/nsproxy.h>
++#include <net/net_namespace.h>
++
++#include "internal.h"
++
++static struct proc_dir_entry *proc_net_shadow;
++
++static struct dentry *proc_net_shadow_dentry(struct dentry *parent,
++						struct proc_dir_entry *de)
++{
++	struct dentry *shadow = NULL;
++	struct inode *inode;
++	if (!de)
++		goto out;
++	de_get(de);
++	inode = proc_get_inode(parent->d_inode->i_sb, de->low_ino, de);
++	if (!inode)
++		goto out_de_put;
++	shadow = d_alloc_name(parent, de->name);
++	if (!shadow)
++		goto out_iput;
++	shadow->d_op = parent->d_op; /* proc_dentry_operations */
++	d_instantiate(shadow, inode);
++out:
++	return shadow;
++out_iput:
++	iput(inode);
++out_de_put:
++	de_put(de);
++	goto out;
++}
++
++static void *proc_net_follow_link(struct dentry *parent, struct nameidata *nd)
++{
++	struct net *net = current->nsproxy->net_ns;
++	struct dentry *shadow;
++	shadow = proc_net_shadow_dentry(parent, net->proc_net);
++	if (!shadow)
++		return ERR_PTR(-ENOENT);
++
++	dput(nd->dentry);
++	/* My dentry count is 1 and that should be enough as the 
++	 * shadow dentry is thrown away immediately.
++	 */
++	nd->dentry = shadow;
++	return NULL;
++}
++
++static struct dentry *proc_net_lookup(struct inode *dir, struct dentry *dentry,
++				      struct nameidata *nd)
++{
++	struct net *net = current->nsproxy->net_ns;
++	struct dentry *shadow;
++	
++	shadow = proc_net_shadow_dentry(nd->dentry, net->proc_net);
++	if (!shadow)
++		return ERR_PTR(-ENOENT);
++
++	dput(nd->dentry);
++	nd->dentry = shadow;
++	
++	return shadow->d_inode->i_op->lookup(shadow->d_inode, dentry, nd);
++}
++
++static int proc_net_setattr(struct dentry *dentry, struct iattr *iattr)
++{
++	struct net *net = current->nsproxy->net_ns;
++	struct dentry *shadow;
++	int ret;
++
++	shadow = proc_net_shadow_dentry(dentry->d_parent, net->proc_net);
++	if (!shadow)
++		return -ENOENT;
++	ret = shadow->d_inode->i_op->setattr(shadow, iattr);
++	dput(shadow);
++	return ret;
++}
++
++static const struct file_operations proc_net_dir_operations = {
++	.read			= generic_read_dir,
++};
++
++static struct inode_operations proc_net_dir_inode_operations = {
++	.follow_link	= proc_net_follow_link,
++	.lookup		= proc_net_lookup,
++	.setattr	= proc_net_setattr,
++};
++
++
++static int proc_net_ns_init(struct net *net)
++{
++	struct proc_dir_entry *netd, *net_statd;
++
++	netd = proc_mkdir("net", &net->proc_net_root);
++	if (!netd)
++		return -EEXIST;
++
++	net_statd = proc_mkdir("stat", netd);
++	if (!net_statd) {
++		remove_proc_entry("net", &net->proc_net_root);
++		return -EEXIST;
++	}
++
++	netd->data = net;
++	net_statd->data = net;
++	net->proc_net_root.data = net;
++	net->proc_net = netd;
++	net->proc_net_stat = net_statd;
++
++	return 0;
++}
++
++static void proc_net_ns_exit(struct net *net)
++{
++	remove_proc_entry("stat", net->proc_net);
++	remove_proc_entry("net", &net->proc_net_root);
++
++}
++
++struct pernet_operations proc_net_ns_ops = {
++	.init = proc_net_ns_init,
++	.exit = proc_net_ns_exit,
++};
++
++int proc_net_init(void)
++{
++	proc_net_shadow = proc_mkdir("net", NULL);
++	proc_net_shadow->proc_iops = &proc_net_dir_inode_operations;
++	proc_net_shadow->proc_fops = &proc_net_dir_operations;
++
++	return register_pernet_subsys(&proc_net_ns_ops);
++}
+diff -Nurb linux-2.6.22-try2/fs/proc/root.c linux-2.6.22-try2-netns/fs/proc/root.c
+--- linux-2.6.22-try2/fs/proc/root.c	2007-12-19 13:37:46.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/proc/root.c	2007-12-19 22:57:39.000000000 -0500
+@@ -21,11 +21,11 @@
+ 
+ #include "internal.h"
+ 
+-struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
+ struct proc_dir_entry *proc_virtual;
+ 
+ extern void proc_vx_init(void);
+ 
++struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver;
+ static int proc_get_sb(struct file_system_type *fs_type,
+ 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+ {
+@@ -64,8 +64,8 @@
+ 		return;
+ 	}
+ 	proc_misc_init();
+-	proc_net = proc_mkdir("net", NULL);
+-	proc_net_stat = proc_mkdir("net/stat", NULL);
++
++	proc_net_init();
+ 
+ #ifdef CONFIG_SYSVIPC
+ 	proc_mkdir("sysvipc", NULL);
+@@ -163,7 +163,5 @@
+ EXPORT_SYMBOL(remove_proc_entry);
+ EXPORT_SYMBOL(proc_root);
+ EXPORT_SYMBOL(proc_root_fs);
+-EXPORT_SYMBOL(proc_net);
+-EXPORT_SYMBOL(proc_net_stat);
+ EXPORT_SYMBOL(proc_bus);
+ EXPORT_SYMBOL(proc_root_driver);
+diff -Nurb linux-2.6.22-try2/fs/sysfs/bin.c linux-2.6.22-try2-netns/fs/sysfs/bin.c
+--- linux-2.6.22-try2/fs/sysfs/bin.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/bin.c	2007-12-19 22:49:13.000000000 -0500
+@@ -248,7 +248,7 @@
+ 
+ void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+ {
+-	if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) {
++	if (sysfs_hash_and_remove(kobj, kobj->sd, attr->attr.name) < 0) {
+ 		printk(KERN_ERR "%s: "
+ 			"bad dentry or inode or no such file: \"%s\"\n",
+ 			__FUNCTION__, attr->attr.name);
+diff -Nurb linux-2.6.22-try2/fs/sysfs/dir.c linux-2.6.22-try2-netns/fs/sysfs/dir.c
+--- linux-2.6.22-try2/fs/sysfs/dir.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/dir.c	2007-12-19 22:49:13.000000000 -0500
+@@ -14,12 +14,33 @@
+ #include <asm/semaphore.h>
+ #include "sysfs.h"
+ 
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd);
++
+ DEFINE_MUTEX(sysfs_mutex);
+ spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
+ 
+ static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
+ static DEFINE_IDA(sysfs_ino_ida);
+ 
++static struct sysfs_dirent *find_shadow_sd(struct sysfs_dirent *parent_sd, const void *target)
++{
++	/* Find the shadow directory for the specified tag */
++	struct sysfs_dirent *sd;
++
++	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
++		if (sd->s_name != target)
++			continue;
++		break;
++	}
++	return sd;
++}
++
++static const void *find_shadow_tag(struct kobject *kobj)
++{
++	/* Find the tag the current kobj is cached with */
++	return kobj->sd->s_parent->s_name;
++}
++
+ /**
+  *	sysfs_link_sibling - link sysfs_dirent into sibling list
+  *	@sd: sysfs_dirent of interest
+@@ -323,6 +344,7 @@
+ 	if (sysfs_type(sd) & SYSFS_COPY_NAME)
+ 		kfree(sd->s_name);
+ 	kfree(sd->s_iattr);
++	if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
+ 	sysfs_free_ino(sd->s_ino);
+ 	kmem_cache_free(sysfs_dir_cachep, sd);
+ 
+@@ -413,6 +435,7 @@
+ 	sd->s_dentry = dentry;
+ 	spin_unlock(&sysfs_assoc_lock);
+ 
++	if (dentry->d_flags & DCACHE_UNHASHED)
+ 	d_rehash(dentry);
+ }
+ 
+@@ -568,8 +591,9 @@
+ 	spin_unlock(&dcache_lock);
+ 	spin_unlock(&sysfs_assoc_lock);
+ 
+-	/* dentries for shadowed inodes are pinned, unpin */
+-	if (dentry && sysfs_is_shadowed_inode(dentry->d_inode))
++	/* dentries for shadowed directories are pinned, unpin */
++	if ((sysfs_type(sd) == SYSFS_SHADOW_DIR) ||
++	    (sd->s_flags & SYSFS_FLAG_SHADOWED))
+ 		dput(dentry);
+ 	dput(dentry);
+ 
+@@ -624,6 +648,7 @@
+ 		acxt->removed = sd->s_sibling;
+ 		sd->s_sibling = NULL;
+ 
++		sysfs_prune_shadow_sd(sd->s_parent);
+ 		sysfs_drop_dentry(sd);
+ 		sysfs_deactivate(sd);
+ 		sysfs_put(sd);
+@@ -689,6 +714,7 @@
+ 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+ 	struct sysfs_addrm_cxt acxt;
+ 	struct sysfs_dirent *sd;
++	int err;
+ 
+ 	/* allocate */
+ 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
+@@ -698,17 +724,24 @@
+ 
+ 	/* link in */
+ 	sysfs_addrm_start(&acxt, parent_sd);
+-	if (!sysfs_find_dirent(parent_sd, name)) {
++	err = -ENOENT;
++	if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++		goto addrm_finish;
++
++	err = -EEXIST;
++	if (!sysfs_find_dirent(acxt.parent_sd, name)) {
+ 		sysfs_add_one(&acxt, sd);
+ 		sysfs_link_sibling(sd);
++		err = 0;
+ 	}
++addrm_finish:
+ 	if (sysfs_addrm_finish(&acxt)) {
+ 		*p_sd = sd;
+ 		return 0;
+ 	}
+ 
+ 	sysfs_put(sd);
+-	return -EEXIST;
++	return err;
+ }
+ 
+ int sysfs_create_subdir(struct kobject *kobj, const char *name,
+@@ -720,19 +753,15 @@
+ /**
+  *	sysfs_create_dir - create a directory for an object.
+  *	@kobj:		object we're creating directory for. 
+- *	@shadow_parent:	parent object.
+  */
+-int sysfs_create_dir(struct kobject *kobj,
+-		     struct sysfs_dirent *shadow_parent_sd)
++int sysfs_create_dir(struct kobject * kobj)
+ {
+ 	struct sysfs_dirent *parent_sd, *sd;
+ 	int error = 0;
+ 
+ 	BUG_ON(!kobj);
+ 
+-	if (shadow_parent_sd)
+-		parent_sd = shadow_parent_sd;
+-	else if (kobj->parent)
++	if (kobj->parent)
+ 		parent_sd = kobj->parent->sd;
+ 	else if (sysfs_mount && sysfs_mount->mnt_sb)
+ 		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
+@@ -817,18 +846,56 @@
+ 	return NULL;
+ }
+ 
++static void *sysfs_shadow_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	struct sysfs_dirent *sd;
++	struct dentry *dest;
++
++	sd = dentry->d_fsdata;
++	dest = NULL;
++	if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++		const struct shadow_dir_operations *shadow_ops;
++		const void *tag;
++
++		mutex_lock(&sysfs_mutex);
++
++		shadow_ops = dentry->d_inode->i_private;
++		tag = shadow_ops->current_tag();
++
++		sd = find_shadow_sd(sd, tag);
++		if (sd)
++			dest = sd->s_dentry;
++		dget(dest);
++
++		mutex_unlock(&sysfs_mutex);
++	}
++	if (!dest)
++		dest = dget(dentry);
++	dput(nd->dentry);
++	nd->dentry = dest;
++
++	return NULL;
++}
++
++
+ const struct inode_operations sysfs_dir_inode_operations = {
+ 	.lookup		= sysfs_lookup,
+ 	.setattr	= sysfs_setattr,
++	.follow_link	= sysfs_shadow_follow_link,
+ };
+ 
++static void __remove_dir(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
++{
++	sysfs_unlink_sibling(sd);
++	sysfs_remove_one(acxt, sd);
++}
++
+ static void remove_dir(struct sysfs_dirent *sd)
+ {
+ 	struct sysfs_addrm_cxt acxt;
+ 
+ 	sysfs_addrm_start(&acxt, sd->s_parent);
+-	sysfs_unlink_sibling(sd);
+-	sysfs_remove_one(&acxt, sd);
++	__remove_dir(&acxt, sd);
+ 	sysfs_addrm_finish(&acxt);
+ }
+ 
+@@ -837,17 +904,11 @@
+ 	remove_dir(sd);
+ }
+ 
+-
+-static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
++static void sysfs_empty_dir(struct sysfs_addrm_cxt *acxt,
++			    struct sysfs_dirent *dir_sd)
+ {
+-	struct sysfs_addrm_cxt acxt;
+ 	struct sysfs_dirent **pos;
+ 
+-	if (!dir_sd)
+-		return;
+-
+-	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
+-	sysfs_addrm_start(&acxt, dir_sd);
+ 	pos = &dir_sd->s_children;
+ 	while (*pos) {
+ 		struct sysfs_dirent *sd = *pos;
+@@ -855,10 +916,39 @@
+ 		if (sysfs_type(sd) && sysfs_type(sd) != SYSFS_DIR) {
+ 			*pos = sd->s_sibling;
+ 			sd->s_sibling = NULL;
+-			sysfs_remove_one(&acxt, sd);
++			sysfs_remove_one(acxt, sd);
+ 		} else
+ 			pos = &(*pos)->s_sibling;
+ 	}
++}
++
++static void sysfs_remove_shadows(struct sysfs_addrm_cxt * acxt,
++					struct sysfs_dirent *dir_sd)
++{
++	struct sysfs_dirent **pos;
++
++	pos = &dir_sd->s_children;
++	while (*pos) {
++		struct sysfs_dirent *sd = *pos;
++
++		sysfs_empty_dir(acxt, sd);
++		__remove_dir(acxt, sd);
++	}
++}
++
++static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
++{
++	struct sysfs_addrm_cxt acxt;
++
++	if (!dir_sd)
++		return;
++
++	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
++	sysfs_addrm_start(&acxt, dir_sd);
++	if (sysfs_type(dir_sd) == SYSFS_DIR)
++		sysfs_empty_dir(&acxt, dir_sd);
++	else
++		sysfs_remove_shadows(&acxt, dir_sd);
+ 	sysfs_addrm_finish(&acxt);
+ 
+ 	remove_dir(dir_sd);
+@@ -884,89 +974,77 @@
+ 	__sysfs_remove_dir(sd);
+ }
+ 
+-int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
+-		     const char *new_name)
++int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+ {
+-	struct sysfs_dirent *sd = kobj->sd;
+-	struct dentry *new_parent = NULL;
+-	struct dentry *old_dentry = NULL, *new_dentry = NULL;
+-	const char *dup_name = NULL;
++	struct dentry *old_dentry, *new_dentry, *parent;
++	struct sysfs_addrm_cxt acxt;
++	struct sysfs_dirent *sd;
++	const char *dup_name;
+ 	int error;
+ 
+-	/* get dentries */
+-	old_dentry = sysfs_get_dentry(sd);
+-	if (IS_ERR(old_dentry)) {
+-		error = PTR_ERR(old_dentry);
+-		goto out_dput;
+-	}
++	dup_name = NULL;
++	new_dentry = NULL;
+ 
+-	new_parent = sysfs_get_dentry(new_parent_sd);
+-	if (IS_ERR(new_parent)) {
+-		error = PTR_ERR(new_parent);
+-		goto out_dput;
+-	}
++	sd = kobj->sd;
++	sysfs_addrm_start(&acxt, sd->s_parent);
++	error = -ENOENT;
++	if (!sysfs_resolve_for_create(kobj, &acxt.parent_sd))
++		goto addrm_finish;
+ 
+-	/* lock new_parent and get dentry for new name */
+-	mutex_lock(&new_parent->d_inode->i_mutex);
++	error = -EEXIST;
++	if (sysfs_find_dirent(acxt.parent_sd, new_name))
++		goto addrm_finish;
+ 
+-	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
++	error = -EINVAL;
++	if ((sd->s_parent == acxt.parent_sd) &&
++	    (strcmp(new_name, sd->s_name) == 0))
++		goto addrm_finish;
++
++	old_dentry = sd->s_dentry;
++	parent = acxt.parent_sd->s_dentry;
++	if (old_dentry) {
++		old_dentry = sd->s_dentry;
++		parent = acxt.parent_sd->s_dentry;
++		new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
+ 	if (IS_ERR(new_dentry)) {
+ 		error = PTR_ERR(new_dentry);
+-		goto out_unlock;
++			goto addrm_finish;
+ 	}
+ 
+-	/* By allowing two different directories with the same
+-	 * d_parent we allow this routine to move between different
+-	 * shadows of the same directory
+-	 */
+ 	error = -EINVAL;
+-	if (old_dentry->d_parent->d_inode != new_parent->d_inode ||
+-	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
+-	    old_dentry == new_dentry)
+-		goto out_unlock;
+-
+-	error = -EEXIST;
+-	if (new_dentry->d_inode)
+-		goto out_unlock;
++		if (old_dentry == new_dentry)
++			goto addrm_finish;
++	}
+ 
+ 	/* rename kobject and sysfs_dirent */
+ 	error = -ENOMEM;
+ 	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
+ 	if (!new_name)
+-		goto out_drop;
++		goto addrm_finish;
+ 
+ 	error = kobject_set_name(kobj, "%s", new_name);
+ 	if (error)
+-		goto out_drop;
++		goto addrm_finish;
+ 
+ 	dup_name = sd->s_name;
+ 	sd->s_name = new_name;
+ 
+ 	/* move under the new parent */
+-	d_add(new_dentry, NULL);
+-	d_move(sd->s_dentry, new_dentry);
+-
+-	mutex_lock(&sysfs_mutex);
+-
+ 	sysfs_unlink_sibling(sd);
+-	sysfs_get(new_parent_sd);
++	sysfs_get(acxt.parent_sd);
+ 	sysfs_put(sd->s_parent);
+-	sd->s_parent = new_parent_sd;
++	sd->s_parent = acxt.parent_sd;
+ 	sysfs_link_sibling(sd);
+ 
+-	mutex_unlock(&sysfs_mutex);
+-
++	if (new_dentry) {
++		d_add(new_dentry, NULL);
++		d_move(old_dentry, new_dentry);
++	}
+ 	error = 0;
+-	goto out_unlock;
++addrm_finish:
++	sysfs_addrm_finish(&acxt);
+ 
+- out_drop:
+-	d_drop(new_dentry);
+- out_unlock:
+-	mutex_unlock(&new_parent->d_inode->i_mutex);
+- out_dput:
+ 	kfree(dup_name);
+-	dput(new_parent);
+-	dput(old_dentry);
+ 	dput(new_dentry);
+ 	return error;
+ }
+@@ -1103,8 +1181,11 @@
+ 			i++;
+ 			/* fallthrough */
+ 		default:
+-			mutex_lock(&sysfs_mutex);
++			/* If I am the shadow master return nothing. */
++			if (parent_sd->s_flags & SYSFS_FLAG_SHADOWED)
++				return 0;
+ 
++			mutex_lock(&sysfs_mutex);
+ 			pos = &parent_sd->s_children;
+ 			while (*pos != cursor)
+ 				pos = &(*pos)->s_sibling;
+@@ -1186,125 +1267,192 @@
+ 	return offset;
+ }
+ 
++const struct file_operations sysfs_dir_operations = {
++	.open		= sysfs_dir_open,
++	.release	= sysfs_dir_close,
++	.llseek		= sysfs_dir_lseek,
++	.read		= generic_read_dir,
++	.readdir	= sysfs_readdir,
++};
+ 
+-/**
+- *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
+- *	@kobj:	object we're creating shadow of.
+- */
+ 
+-int sysfs_make_shadowed_dir(struct kobject *kobj,
+-	void * (*follow_link)(struct dentry *, struct nameidata *))
++static void sysfs_prune_shadow_sd(struct sysfs_dirent *sd)
+ {
+-	struct dentry *dentry;
+-	struct inode *inode;
+-	struct inode_operations *i_op;
++	struct sysfs_addrm_cxt acxt;
+ 
+-	/* get dentry for @kobj->sd, dentry of a shadowed dir is pinned */
+-	dentry = sysfs_get_dentry(kobj->sd);
+-	if (IS_ERR(dentry))
+-		return PTR_ERR(dentry);
++	/* If a shadow directory goes empty remove it. */
++	if (sysfs_type(sd) != SYSFS_SHADOW_DIR)
++		return;
+ 
+-	inode = dentry->d_inode;
+-	if (inode->i_op != &sysfs_dir_inode_operations) {
+-		dput(dentry);
+-		return -EINVAL;
+-	}
++	if (sd->s_children)
++		return;
+ 
+-	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
+-	if (!i_op)
+-		return -ENOMEM;
++	sysfs_addrm_start(&acxt, sd->s_parent);
+ 
+-	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
+-	i_op->follow_link = follow_link;
++	if (sd->s_flags & SYSFS_FLAG_REMOVED)
++		goto addrm_finish;
+ 
+-	/* Locking of inode->i_op?
+-	 * Since setting i_op is a single word write and they
+-	 * are atomic we should be ok here.
+-	 */
+-	inode->i_op = i_op;
+-	return 0;
+-}
++	if (sd->s_children)
++		goto addrm_finish;
+ 
+-/**
+- *	sysfs_create_shadow_dir - create a shadow directory for an object.
+- *	@kobj:	object we're creating directory for.
+- *
+- *	sysfs_make_shadowed_dir must already have been called on this
+- *	directory.
+- */
++	__remove_dir(&acxt, sd);
++addrm_finish:
++	sysfs_addrm_finish(&acxt);
++}
+ 
+-struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
++static struct sysfs_dirent *add_shadow_sd(struct sysfs_dirent *parent_sd, const void *tag)
+ {
+-	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+-	struct dentry *dir, *parent, *shadow;
++	struct sysfs_dirent *sd = NULL;
++	struct dentry *dir, *shadow;
+ 	struct inode *inode;
+-	struct sysfs_dirent *sd;
+-	struct sysfs_addrm_cxt acxt;
+-
+-	dir = sysfs_get_dentry(kobj->sd);
+-	if (IS_ERR(dir)) {
+-		sd = (void *)dir;
+-		goto out;
+-	}
+-	parent = dir->d_parent;
+ 
++	dir = parent_sd->s_dentry;
+ 	inode = dir->d_inode;
+-	sd = ERR_PTR(-EINVAL);
+-	if (!sysfs_is_shadowed_inode(inode))
+-		goto out_dput;
+ 
+-	shadow = d_alloc(parent, &dir->d_name);
++	shadow = d_alloc(dir->d_parent, &dir->d_name);
+ 	if (!shadow)
+-		goto nomem;
++		goto out;
+ 
+-	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
++	/* Since the shadow directory is reachable make it look
++	 * like it is actually hashed.
++	 */
++	shadow->d_hash.pprev = &shadow->d_hash.next;
++	shadow->d_hash.next = NULL;
++	shadow->d_flags &= ~DCACHE_UNHASHED;
++
++	sd = sysfs_new_dirent(tag, parent_sd->s_mode, SYSFS_SHADOW_DIR);
+ 	if (!sd)
+-		goto nomem;
+-	sd->s_elem.dir.kobj = kobj;
++		goto error;
+ 
+-	sysfs_addrm_start(&acxt, parent_sd);
++	sd->s_elem.dir.kobj = parent_sd->s_elem.dir.kobj;
++	sd->s_parent = sysfs_get(parent_sd);
+ 
+-	/* add but don't link into children list */
+-	sysfs_add_one(&acxt, sd);
++	/* Use the inode number of the parent we are shadowing */
++	sysfs_free_ino(sd->s_ino);
++	sd->s_ino = parent_sd->s_ino;
++
++	inc_nlink(inode);
++	inc_nlink(dir->d_parent->d_inode);
+ 
+-	/* attach and instantiate dentry */
++	sysfs_link_sibling(sd);
++	__iget(inode);
++	sysfs_instantiate(shadow, inode);
+ 	sysfs_attach_dentry(sd, shadow);
+-	d_instantiate(shadow, igrab(inode));
+-	inc_nlink(inode);	/* tj: synchronization? */
++out:
++	return sd;
++error:
++	dput(shadow);
++	goto out;
++}
+ 
+-	sysfs_addrm_finish(&acxt);
++int sysfs_resolve_for_create(struct kobject *kobj,
++				struct sysfs_dirent **parent_sd)
++{
++	const struct shadow_dir_operations *shadow_ops;
++	struct sysfs_dirent *sd, *shadow_sd;
++
++	sd = *parent_sd;
++	if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++		sd = sd->s_parent;
++
++	if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++		const void *tag;
++
++		shadow_ops = sd->s_dentry->d_inode->i_private;
++		tag = shadow_ops->kobject_tag(kobj);
++
++		shadow_sd = find_shadow_sd(sd, tag);
++		if (!shadow_sd)
++			shadow_sd = add_shadow_sd(sd, tag);
++		sd = shadow_sd;
++	}
++	if (sd) {
++		*parent_sd = sd;
++		return 1;
++	}
++	return 0;
++}
+ 
+-	dget(shadow);		/* Extra count - pin the dentry in core */
++int sysfs_resolve_for_remove(struct kobject *kobj,
++				struct sysfs_dirent **parent_sd)
++{
++	struct sysfs_dirent *sd;
++	/* If dentry is a shadow directory find the shadow that is
++	 * stored under the same tag as kobj.  This allows removal
++	 * of dirents to function properly even if the value of
++	 * kobject_tag() has changed since we initially created
++	 * the dirents assoctated with kobj.
++	 */
+ 
+-	goto out_dput;
++	sd = *parent_sd;
++	if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++		sd = sd->s_parent;
++	if (sd->s_flags & SYSFS_FLAG_SHADOWED) {
++		const void *tag;
+ 
+- nomem:
+-	dput(shadow);
+-	sd = ERR_PTR(-ENOMEM);
+- out_dput:
+-	dput(dir);
+- out:
+-	return sd;
++		tag = find_shadow_tag(kobj);
++		sd = find_shadow_sd(sd, tag);
++	}
++	if (sd) {
++		*parent_sd = sd;
++		return 1;
++	}
++	return 0;
+ }
+ 
+ /**
+- *	sysfs_remove_shadow_dir - remove an object's directory.
+- *	@shadow_sd: sysfs_dirent of shadow directory
++ *	sysfs_enable_shadowing - Automatically create shadows of a directory
++ *	@kobj:	object to automatically shadow
+  *
+- *	The only thing special about this is that we remove any files in
+- *	the directory before we remove the directory, and we've inlined
+- *	what used to be sysfs_rmdir() below, instead of calling separately.
++ *	Once shadowing has been enabled on a directory the contents
++ *	of the directory become dependent upon context.
++ *
++ *	shadow_ops->current_tag() returns the context for the current
++ *	process.
++ *
++ *	shadow_ops->kobject_tag() returns the context that a given kobj
++ *	resides in.
++ *
++ *	Using those methods the sysfs code on shadowed directories
++ *	carefully stores the files so that when we lookup files
++ *	we get the proper answer for our context.
++ *
++ *	If the context of a kobject is changed it is expected that
++ *	the kobject will be renamed so the appopriate sysfs data structures
++ *	can be updated.
+  */
+-
+-void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
++int sysfs_enable_shadowing(struct kobject *kobj,
++	const struct shadow_dir_operations *shadow_ops)
+ {
+-	__sysfs_remove_dir(shadow_sd);
++	struct sysfs_dirent *sd;
++	struct dentry *dentry;
++	int err;
++
++	/* Find the dentry for the shadowed directory and
++	 * increase it's count.
++	 */
++	err = -ENOENT;
++	sd = kobj->sd;
++	dentry = sysfs_get_dentry(sd);
++	if (!dentry)
++		goto out;
++
++	mutex_lock(&sysfs_mutex);
++	err = -EINVAL;
++	/* We can only enable shadowing on empty directories
++	 * where shadowing is not already enabled.
++	 */
++	if (!sd->s_children && (sysfs_type(sd) == SYSFS_DIR) &&
++	    !(sd->s_flags & SYSFS_FLAG_REMOVED) &&
++	    !(sd->s_flags & SYSFS_FLAG_SHADOWED)) {
++		sd->s_flags |= SYSFS_FLAG_SHADOWED;
++		dentry->d_inode->i_private = (void *)shadow_ops;
++		err = 0;
++	}
++	mutex_unlock(&sysfs_mutex);
++out:
++	if (err)
++		dput(dentry);
++	return err;
+ }
+ 
+-const struct file_operations sysfs_dir_operations = {
+-	.open		= sysfs_dir_open,
+-	.release	= sysfs_dir_close,
+-	.llseek		= sysfs_dir_lseek,
+-	.read		= generic_read_dir,
+-	.readdir	= sysfs_readdir,
+-};
+diff -Nurb linux-2.6.22-try2/fs/sysfs/file.c linux-2.6.22-try2-netns/fs/sysfs/file.c
+--- linux-2.6.22-try2/fs/sysfs/file.c	2007-12-19 15:46:06.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/file.c	2007-12-19 22:49:13.000000000 -0500
+@@ -556,7 +556,7 @@
+ 
+ void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
+ {
+-	sysfs_hash_and_remove(kobj->sd, attr->name);
++	sysfs_hash_and_remove(kobj, kobj->sd, attr->name);
+ }
+ 
+ 
+@@ -573,7 +573,7 @@
+ 
+ 	dir_sd = sysfs_get_dirent(kobj->sd, group);
+ 	if (dir_sd) {
+-		sysfs_hash_and_remove(dir_sd, attr->name);
++		sysfs_hash_and_remove(kobj, dir_sd, attr->name);
+ 		sysfs_put(dir_sd);
+ 	}
+ }
+diff -Nurb linux-2.6.22-try2/fs/sysfs/group.c linux-2.6.22-try2-netns/fs/sysfs/group.c
+--- linux-2.6.22-try2/fs/sysfs/group.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/group.c	2007-12-19 22:49:13.000000000 -0500
+@@ -13,21 +13,20 @@
+ #include <linux/dcache.h>
+ #include <linux/namei.h>
+ #include <linux/err.h>
+-#include <linux/fs.h>
+ #include <asm/semaphore.h>
+ #include "sysfs.h"
+ 
+ 
+-static void remove_files(struct sysfs_dirent *dir_sd,
++static void remove_files(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ 			 const struct attribute_group *grp)
+ {
+ 	struct attribute *const* attr;
+ 
+ 	for (attr = grp->attrs; *attr; attr++)
+-		sysfs_hash_and_remove(dir_sd, (*attr)->name);
++		sysfs_hash_and_remove(kobj, dir_sd, (*attr)->name);
+ }
+ 
+-static int create_files(struct sysfs_dirent *dir_sd,
++static int create_files(struct kobject *kobj, struct sysfs_dirent *dir_sd,
+ 			const struct attribute_group *grp)
+ {
+ 	struct attribute *const* attr;
+@@ -36,7 +35,7 @@
+ 	for (attr = grp->attrs; *attr && !error; attr++)
+ 		error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR);
+ 	if (error)
+-		remove_files(dir_sd, grp);
++		remove_files(kobj, dir_sd, grp);
+ 	return error;
+ }
+ 
+@@ -56,7 +55,7 @@
+ 	} else
+ 		sd = kobj->sd;
+ 	sysfs_get(sd);
+-	error = create_files(sd, grp);
++	error = create_files(kobj, sd, grp);
+ 	if (error) {
+ 		if (grp->name)
+ 			sysfs_remove_subdir(sd);
+@@ -77,7 +76,7 @@
+ 	} else
+ 		sd = sysfs_get(dir_sd);
+ 
+-	remove_files(sd, grp);
++	remove_files(kobj, sd, grp);
+ 	if (grp->name)
+ 		sysfs_remove_subdir(sd);
+ 
+diff -Nurb linux-2.6.22-try2/fs/sysfs/inode.c linux-2.6.22-try2-netns/fs/sysfs/inode.c
+--- linux-2.6.22-try2/fs/sysfs/inode.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/inode.c	2007-12-19 22:49:13.000000000 -0500
+@@ -34,16 +34,6 @@
+ 	.setattr	= sysfs_setattr,
+ };
+ 
+-void sysfs_delete_inode(struct inode *inode)
+-{
+-	/* Free the shadowed directory inode operations */
+-	if (sysfs_is_shadowed_inode(inode)) {
+-		kfree(inode->i_op);
+-		inode->i_op = NULL;
+-	}
+-	return generic_delete_inode(inode);
+-}
+-
+ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
+ {
+ 	struct inode * inode = dentry->d_inode;
+@@ -197,17 +187,16 @@
+ 	d_instantiate(dentry, inode);
+ }
+ 
+-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
++int sysfs_hash_and_remove(struct kobject *kobj, struct sysfs_dirent *dir_sd, const char *name)
+ {
+ 	struct sysfs_addrm_cxt acxt;
+ 	struct sysfs_dirent **pos, *sd;
+ 
+-	if (!dir_sd)
+-		return -ENOENT;
+-
+ 	sysfs_addrm_start(&acxt, dir_sd);
++	if (!sysfs_resolve_for_remove(kobj, &acxt.parent_sd))
++		goto addrm_finish;
+ 
+-	for (pos = &dir_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
++	for (pos = &acxt.parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
+ 		sd = *pos;
+ 
+ 		if (!sysfs_type(sd))
+@@ -219,7 +208,7 @@
+ 			break;
+ 		}
+ 	}
+-
++addrm_finish:
+ 	if (sysfs_addrm_finish(&acxt))
+ 		return 0;
+ 	return -ENOENT;
+diff -Nurb linux-2.6.22-try2/fs/sysfs/mount.c linux-2.6.22-try2-netns/fs/sysfs/mount.c
+--- linux-2.6.22-try2/fs/sysfs/mount.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/mount.c	2007-12-19 22:49:13.000000000 -0500
+@@ -19,7 +19,7 @@
+ 
+ static const struct super_operations sysfs_ops = {
+ 	.statfs		= simple_statfs,
+-	.drop_inode	= sysfs_delete_inode,
++	.drop_inode	= generic_delete_inode,
+ };
+ 
+ struct sysfs_dirent sysfs_root = {
+diff -Nurb linux-2.6.22-try2/fs/sysfs/symlink.c linux-2.6.22-try2-netns/fs/sysfs/symlink.c
+--- linux-2.6.22-try2/fs/sysfs/symlink.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/symlink.c	2007-12-19 22:49:13.000000000 -0500
+@@ -15,8 +15,11 @@
+ {
+ 	int depth = 0;
+ 
+-	for (; sd->s_parent; sd = sd->s_parent)
++	for (; sd->s_parent; sd = sd->s_parent) {
++		if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++			continue;
+ 		depth++;
++	}
+ 
+ 	return depth;
+ }
+@@ -25,17 +28,24 @@
+ {
+ 	int length = 1;
+ 
+-	for (; sd->s_parent; sd = sd->s_parent)
++	for (; sd->s_parent; sd = sd->s_parent) {
++		if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++			continue;
+ 		length += strlen(sd->s_name) + 1;
++	}
+ 
+ 	return length;
+ }
+ 
+ static void fill_object_path(struct sysfs_dirent *sd, char *buffer, int length)
+ {
++	int cur;
+ 	--length;
+ 	for (; sd->s_parent; sd = sd->s_parent) {
+-		int cur = strlen(sd->s_name);
++		if (sysfs_type(sd) == SYSFS_SHADOW_DIR)
++			continue;
++
++		cur = strlen(sd->s_name);
+ 
+ 		/* back up enough to print this bus id with '/' */
+ 		length -= cur;
+@@ -89,12 +99,15 @@
+ 	sd->s_elem.symlink.target_sd = target_sd;
+ 
+ 	sysfs_addrm_start(&acxt, parent_sd);
++	if (!sysfs_resolve_for_create(target, &acxt.parent_sd))
++		goto addrm_finish;
+ 
+-	if (!sysfs_find_dirent(parent_sd, name)) {
++	if (!sysfs_find_dirent(acxt.parent_sd, name)) {
+ 		sysfs_add_one(&acxt, sd);
+ 		sysfs_link_sibling(sd);
+ 	}
+ 
++addrm_finish:
+ 	if (sysfs_addrm_finish(&acxt))
+ 		return 0;
+ 
+@@ -108,6 +121,21 @@
+ 
+ 
+ /**
++ *	sysfs_delete_link - remove symlink in object's directory.
++ *	@kobj:	object we're acting for.
++ *	@targ:	object we're pointing to.
++ *	@name:	name of the symlink to remove.
++ *
++ *	Unlike sysfs_remove_link sysfs_delete_link has enough information
++ *	to successfully delete symlinks in shadow directories.
++ */
++void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
++			const char *name)
++{
++	sysfs_hash_and_remove(targ, kobj->sd, name);
++}
++
++/**
+  *	sysfs_remove_link - remove symlink in object's directory.
+  *	@kobj:	object we're acting for.
+  *	@name:	name of the symlink to remove.
+@@ -115,7 +143,23 @@
+ 
+ void sysfs_remove_link(struct kobject * kobj, const char * name)
+ {
+-	sysfs_hash_and_remove(kobj->sd, name);
++	sysfs_hash_and_remove(kobj, kobj->sd, name);
++}
++
++/**
++ *	sysfs_rename_link - rename symlink in object's directory.
++ *	@kobj:	object we're acting for.
++ *	@targ:	object we're pointing to.
++ *	@old:	previous name of the symlink.
++ *	@new:	new name of the symlink.
++ *
++ *	A helper function for the common rename symlink idiom.
++ */
++int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
++			const char *old, const char *new)
++{
++	sysfs_delete_link(kobj, targ, old);
++	return sysfs_create_link(kobj, targ, new);
+ }
+ 
+ static int sysfs_get_target_path(struct sysfs_dirent * parent_sd,
+diff -Nurb linux-2.6.22-try2/fs/sysfs/sysfs.h linux-2.6.22-try2-netns/fs/sysfs/sysfs.h
+--- linux-2.6.22-try2/fs/sysfs/sysfs.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/fs/sysfs/sysfs.h	2007-12-19 22:49:13.000000000 -0500
+@@ -58,6 +58,12 @@
+ extern struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd);
+ extern void sysfs_link_sibling(struct sysfs_dirent *sd);
+ extern void sysfs_unlink_sibling(struct sysfs_dirent *sd);
++
++extern int sysfs_resolve_for_create(struct kobject *kobj,
++				    struct sysfs_dirent **parent_sd);
++extern int sysfs_resolve_for_remove(struct kobject *kobj,
++				    struct sysfs_dirent **parent_sd);
++
+ extern struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd);
+ extern void sysfs_put_active(struct sysfs_dirent *sd);
+ extern struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
+@@ -70,7 +76,6 @@
+ 			     struct sysfs_dirent *sd);
+ extern int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
+ 
+-extern void sysfs_delete_inode(struct inode *inode);
+ extern void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode);
+ extern struct inode * sysfs_get_inode(struct sysfs_dirent *sd);
+ extern void sysfs_instantiate(struct dentry *dentry, struct inode *inode);
+@@ -85,7 +90,8 @@
+ 
+ extern int sysfs_add_file(struct sysfs_dirent *dir_sd,
+ 			  const struct attribute *attr, int type);
+-extern int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
++extern int sysfs_hash_and_remove(struct kobject *kobj,
++				 struct sysfs_dirent *dir_sd, const char *name);
+ extern struct sysfs_dirent *sysfs_find(struct sysfs_dirent *dir, const char * name);
+ 
+ extern int sysfs_create_subdir(struct kobject *kobj, const char *name,
+@@ -122,8 +128,3 @@
+ 	if (sd && atomic_dec_and_test(&sd->s_count))
+ 		release_sysfs_dirent(sd);
+ }
+-
+-static inline int sysfs_is_shadowed_inode(struct inode *inode)
+-{
+-	return S_ISDIR(inode->i_mode) && inode->i_op->follow_link;
+-}
+diff -Nurb linux-2.6.22-try2/include/linux/device.h linux-2.6.22-try2-netns/include/linux/device.h
+--- linux-2.6.22-try2/include/linux/device.h	2007-12-19 15:29:22.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/device.h	2007-12-19 22:49:13.000000000 -0500
+@@ -200,6 +200,8 @@
+ 
+ 	int	(*suspend)(struct device *, pm_message_t state);
+ 	int	(*resume)(struct device *);
++
++	const struct shadow_dir_operations *shadow_ops;
+ };
+ 
+ extern int __must_check class_register(struct class *);
+diff -Nurb linux-2.6.22-try2/include/linux/if_bridge.h linux-2.6.22-try2-netns/include/linux/if_bridge.h
+--- linux-2.6.22-try2/include/linux/if_bridge.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/if_bridge.h	2007-12-19 22:49:13.000000000 -0500
+@@ -104,7 +104,7 @@
+ 
+ #include <linux/netdevice.h>
+ 
+-extern void brioctl_set(int (*ioctl_hook)(unsigned int, void __user *));
++extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
+ extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
+ 					       struct sk_buff *skb);
+ extern int (*br_should_route_hook)(struct sk_buff **pskb);
+diff -Nurb linux-2.6.22-try2/include/linux/if_pppox.h linux-2.6.22-try2-netns/include/linux/if_pppox.h
+--- linux-2.6.22-try2/include/linux/if_pppox.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/if_pppox.h	2007-12-19 22:49:13.000000000 -0500
+@@ -160,7 +160,7 @@
+ struct module;
+ 
+ struct pppox_proto {
+-	int		(*create)(struct socket *sock);
++	int		(*create)(struct net *net, struct socket *sock);
+ 	int		(*ioctl)(struct socket *sock, unsigned int cmd,
+ 				 unsigned long arg);
+ 	struct module	*owner;
+diff -Nurb linux-2.6.22-try2/include/linux/if_vlan.h linux-2.6.22-try2-netns/include/linux/if_vlan.h
+--- linux-2.6.22-try2/include/linux/if_vlan.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/if_vlan.h	2007-12-19 22:49:13.000000000 -0500
+@@ -62,7 +62,7 @@
+ #define VLAN_VID_MASK	0xfff
+ 
+ /* found in socket.c */
+-extern void vlan_ioctl_set(int (*hook)(void __user *));
++extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
+ 
+ #define VLAN_NAME "vlan"
+ 
+diff -Nurb linux-2.6.22-try2/include/linux/inetdevice.h linux-2.6.22-try2-netns/include/linux/inetdevice.h
+--- linux-2.6.22-try2/include/linux/inetdevice.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/inetdevice.h	2007-12-19 22:49:13.000000000 -0500
+@@ -17,8 +17,6 @@
+ 	DECLARE_BITMAP(state, __NET_IPV4_CONF_MAX - 1);
+ };
+ 
+-extern struct ipv4_devconf ipv4_devconf;
+-
+ struct in_device
+ {
+ 	struct net_device	*dev;
+@@ -44,7 +42,7 @@
+ };
+ 
+ #define IPV4_DEVCONF(cnf, attr) ((cnf).data[NET_IPV4_CONF_ ## attr - 1])
+-#define IPV4_DEVCONF_ALL(attr) IPV4_DEVCONF(ipv4_devconf, attr)
++#define IPV4_DEVCONF_ALL(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf), attr)
+ 
+ static inline int ipv4_devconf_get(struct in_device *in_dev, int index)
+ {
+@@ -71,14 +69,14 @@
+ 	ipv4_devconf_set((in_dev), NET_IPV4_CONF_ ## attr, (val))
+ 
+ #define IN_DEV_ANDCONF(in_dev, attr) \
+-	(IPV4_DEVCONF_ALL(attr) && IN_DEV_CONF_GET((in_dev), attr))
++	(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) && IN_DEV_CONF_GET((in_dev), attr))
+ #define IN_DEV_ORCONF(in_dev, attr) \
+-	(IPV4_DEVCONF_ALL(attr) || IN_DEV_CONF_GET((in_dev), attr))
++	(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr) || IN_DEV_CONF_GET((in_dev), attr))
+ #define IN_DEV_MAXCONF(in_dev, attr) \
+-	(max(IPV4_DEVCONF_ALL(attr), IN_DEV_CONF_GET((in_dev), attr)))
++	(max(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, attr), IN_DEV_CONF_GET((in_dev), attr)))
+ 
+ #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
+-#define IN_DEV_MFORWARD(in_dev)		(IPV4_DEVCONF_ALL(MC_FORWARDING) && \
++#define IN_DEV_MFORWARD(in_dev)		(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, MC_FORWARDING) && \
+ 					 IPV4_DEVCONF((in_dev)->cnf, \
+ 						      MC_FORWARDING))
+ #define IN_DEV_RPFILTER(in_dev)		IN_DEV_ANDCONF((in_dev), RP_FILTER)
+@@ -127,15 +125,15 @@
+ extern int register_inetaddr_notifier(struct notifier_block *nb);
+ extern int unregister_inetaddr_notifier(struct notifier_block *nb);
+ 
+-extern struct net_device 	*ip_dev_find(__be32 addr);
++extern struct net_device 	*ip_dev_find(struct net *net, __be32 addr);
+ extern int		inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b);
+-extern int		devinet_ioctl(unsigned int cmd, void __user *);
++extern int		devinet_ioctl(struct net *net, unsigned int cmd, void __user *);
+ extern void		devinet_init(void);
+-extern struct in_device	*inetdev_by_index(int);
++extern struct in_device	*inetdev_by_index(struct net *, int);
+ extern __be32		inet_select_addr(const struct net_device *dev, __be32 dst, int scope);
+-extern __be32		inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope);
++extern __be32		inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope);
+ extern struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask);
+-extern void		inet_forward_change(void);
++extern void		inet_forward_change(struct net *net);
+ 
+ static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa)
+ {
+diff -Nurb linux-2.6.22-try2/include/linux/init_task.h linux-2.6.22-try2-netns/include/linux/init_task.h
+--- linux-2.6.22-try2/include/linux/init_task.h	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/init_task.h	2007-12-19 22:49:13.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <linux/ipc.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/user_namespace.h>
++#include <net/net_namespace.h>
+ 
+ #define INIT_FDTABLE \
+ {							\
+@@ -78,6 +79,7 @@
+ 	.nslock		= __SPIN_LOCK_UNLOCKED(nsproxy.nslock),		\
+ 	.uts_ns		= &init_uts_ns,					\
+ 	.mnt_ns		= NULL,						\
++	.net_ns		= &init_net,					\
+ 	INIT_IPC_NS(ipc_ns)						\
+ 	.user_ns	= &init_user_ns,				\
+ }
+diff -Nurb linux-2.6.22-try2/include/linux/kobject.h linux-2.6.22-try2-netns/include/linux/kobject.h
+--- linux-2.6.22-try2/include/linux/kobject.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/kobject.h	2007-12-19 22:49:13.000000000 -0500
+@@ -71,14 +71,9 @@
+ extern void kobject_cleanup(struct kobject *);
+ 
+ extern int __must_check kobject_add(struct kobject *);
+-extern int __must_check kobject_shadow_add(struct kobject *kobj,
+-					   struct sysfs_dirent *shadow_parent);
+ extern void kobject_del(struct kobject *);
+ 
+ extern int __must_check kobject_rename(struct kobject *, const char *new_name);
+-extern int __must_check kobject_shadow_rename(struct kobject *kobj,
+-					      struct sysfs_dirent *new_parent,
+-					      const char *new_name);
+ extern int __must_check kobject_move(struct kobject *, struct kobject *);
+ 
+ extern int __must_check kobject_register(struct kobject *);
+diff -Nurb linux-2.6.22-try2/include/linux/net.h linux-2.6.22-try2-netns/include/linux/net.h
+--- linux-2.6.22-try2/include/linux/net.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/net.h	2007-12-19 22:49:13.000000000 -0500
+@@ -23,6 +23,7 @@
+ 
+ struct poll_table_struct;
+ struct inode;
++struct net;
+ 
+ #define NPROTO		34		/* should be enough for now..	*/
+ 
+@@ -170,7 +171,7 @@
+ 
+ struct net_proto_family {
+ 	int		family;
+-	int		(*create)(struct socket *sock, int protocol);
++	int		(*create)(struct net *net, struct socket *sock, int protocol);
+ 	struct module	*owner;
+ };
+ 
+diff -Nurb linux-2.6.22-try2/include/linux/netdevice.h linux-2.6.22-try2-netns/include/linux/netdevice.h
+--- linux-2.6.22-try2/include/linux/netdevice.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netdevice.h	2007-12-19 22:49:13.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <linux/percpu.h>
+ #include <linux/dmaengine.h>
+ 
++struct net;
+ struct vlan_group;
+ struct ethtool_ops;
+ struct netpoll_info;
+@@ -326,6 +327,7 @@
+ #define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
+ #define NETIF_F_GSO		2048	/* Enable software GSO. */
+ #define NETIF_F_LLTX		4096	/* LockLess TX */
++#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
+ 
+ 	/* Segmentation offload features */
+ #define NETIF_F_GSO_SHIFT	16
+@@ -537,6 +539,9 @@
+ 	void                    (*poll_controller)(struct net_device *dev);
+ #endif
+ 
++	/* Network namespace this network device is inside */
++	struct net		*nd_net;
++
+ 	/* bridge stuff */
+ 	struct net_bridge_port	*br_port;
+ 
+@@ -583,45 +588,48 @@
+ #include <linux/interrupt.h>
+ #include <linux/notifier.h>
+ 
+-extern struct net_device		loopback_dev;		/* The loopback */
+-extern struct list_head			dev_base_head;		/* All devices */
+ extern rwlock_t				dev_base_lock;		/* Device list lock */
+ 
+-#define for_each_netdev(d)		\
+-		list_for_each_entry(d, &dev_base_head, dev_list)
+-#define for_each_netdev_safe(d, n)	\
+-		list_for_each_entry_safe(d, n, &dev_base_head, dev_list)
+-#define for_each_netdev_continue(d)		\
+-		list_for_each_entry_continue(d, &dev_base_head, dev_list)
+-#define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
+-
+-static inline struct net_device *next_net_device(struct net_device *dev)
+-{
+-	struct list_head *lh;
+ 
+-	lh = dev->dev_list.next;
+-	return lh == &dev_base_head ? NULL : net_device_entry(lh);
+-}
++#define for_each_netdev(net, d)		\
++		list_for_each_entry(d, &(net)->dev_base_head, dev_list)
++#define for_each_netdev_safe(net, d, n)	\
++		list_for_each_entry_safe(d, n, &(net)->dev_base_head, dev_list)
++#define for_each_netdev_continue(net, d)		\
++		list_for_each_entry_continue(d, &(net)->dev_base_head, dev_list)
++#define net_device_entry(lh)	list_entry(lh, struct net_device, dev_list)
+ 
+-static inline struct net_device *first_net_device(void)
+-{
+-	return list_empty(&dev_base_head) ? NULL :
+-		net_device_entry(dev_base_head.next);
+-}
++#define next_net_device(d) 						\
++({									\
++	struct net_device *dev = d;					\
++	struct list_head *lh;						\
++	struct net *net;						\
++									\
++	net = dev->nd_net;						\
++	lh = dev->dev_list.next;					\
++	lh == &net->dev_base_head ? NULL : net_device_entry(lh);	\
++})
++
++#define first_net_device(N)					\
++({								\
++	struct net *NET = (N);					\
++	list_empty(&NET->dev_base_head) ? NULL :		\
++		net_device_entry(NET->dev_base_head.next);	\
++})
+ 
+ extern int 			netdev_boot_setup_check(struct net_device *dev);
+ extern unsigned long		netdev_boot_base(const char *prefix, int unit);
+-extern struct net_device    *dev_getbyhwaddr(unsigned short type, char *hwaddr);
+-extern struct net_device *dev_getfirstbyhwtype(unsigned short type);
+-extern struct net_device *__dev_getfirstbyhwtype(unsigned short type);
++extern struct net_device    *dev_getbyhwaddr(struct net *net, unsigned short type, char *hwaddr);
++extern struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
++extern struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type);
+ extern void		dev_add_pack(struct packet_type *pt);
+ extern void		dev_remove_pack(struct packet_type *pt);
+ extern void		__dev_remove_pack(struct packet_type *pt);
+ 
+-extern struct net_device	*dev_get_by_flags(unsigned short flags,
++extern struct net_device	*dev_get_by_flags(struct net *net, unsigned short flags,
+ 						  unsigned short mask);
+-extern struct net_device	*dev_get_by_name(const char *name);
+-extern struct net_device	*__dev_get_by_name(const char *name);
++extern struct net_device	*dev_get_by_name(struct net *net, const char *name);
++extern struct net_device	*__dev_get_by_name(struct net *net, const char *name);
+ extern int		dev_alloc_name(struct net_device *dev, const char *name);
+ extern int		dev_open(struct net_device *dev);
+ extern int		dev_close(struct net_device *dev);
+@@ -632,9 +640,9 @@
+ extern void		synchronize_net(void);
+ extern int 		register_netdevice_notifier(struct notifier_block *nb);
+ extern int		unregister_netdevice_notifier(struct notifier_block *nb);
+-extern int		call_netdevice_notifiers(unsigned long val, void *v);
+-extern struct net_device	*dev_get_by_index(int ifindex);
+-extern struct net_device	*__dev_get_by_index(int ifindex);
++extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev);
++extern struct net_device	*dev_get_by_index(struct net *net, int ifindex);
++extern struct net_device	*__dev_get_by_index(struct net *net, int ifindex);
+ extern int		dev_restart(struct net_device *dev);
+ #ifdef CONFIG_NETPOLL_TRAP
+ extern int		netpoll_trap(void);
+@@ -739,11 +747,13 @@
+ #define HAVE_NETIF_RECEIVE_SKB 1
+ extern int		netif_receive_skb(struct sk_buff *skb);
+ extern int		dev_valid_name(const char *name);
+-extern int		dev_ioctl(unsigned int cmd, void __user *);
+-extern int		dev_ethtool(struct ifreq *);
++extern int		dev_ioctl(struct net *net, unsigned int cmd, void __user *);
++extern int		dev_ethtool(struct net *net, struct ifreq *);
+ extern unsigned		dev_get_flags(const struct net_device *);
+ extern int		dev_change_flags(struct net_device *, unsigned);
+ extern int		dev_change_name(struct net_device *, char *);
++extern int		dev_change_net_namespace(struct net_device *,
++						 struct net *, const char *);
+ extern int		dev_set_mtu(struct net_device *, int);
+ extern int		dev_set_mac_address(struct net_device *,
+ 					    struct sockaddr *);
+@@ -1013,7 +1023,7 @@
+ extern void		netdev_state_change(struct net_device *dev);
+ extern void		netdev_features_change(struct net_device *dev);
+ /* Load a device via the kmod */
+-extern void		dev_load(const char *name);
++extern void		dev_load(struct net *net, const char *name);
+ extern void		dev_mcast_init(void);
+ extern int		netdev_max_backlog;
+ extern int		weight_p;
+diff -Nurb linux-2.6.22-try2/include/linux/netfilter/x_tables.h linux-2.6.22-try2-netns/include/linux/netfilter/x_tables.h
+--- linux-2.6.22-try2/include/linux/netfilter/x_tables.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netfilter/x_tables.h	2007-12-19 22:49:13.000000000 -0500
+@@ -289,7 +289,7 @@
+ 			   unsigned int size, const char *table, unsigned int hook,
+ 			   unsigned short proto, int inv_proto);
+ 
+-extern int xt_register_table(struct xt_table *table,
++extern int xt_register_table(struct net *net, struct xt_table *table,
+ 			     struct xt_table_info *bootstrap,
+ 			     struct xt_table_info *newinfo);
+ extern void *xt_unregister_table(struct xt_table *table);
+@@ -306,7 +306,7 @@
+ extern int xt_find_revision(int af, const char *name, u8 revision, int target,
+ 			    int *err);
+ 
+-extern struct xt_table *xt_find_table_lock(int af, const char *name);
++extern struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name);
+ extern void xt_table_unlock(struct xt_table *t);
+ 
+ extern int xt_proto_init(int af);
+diff -Nurb linux-2.6.22-try2/include/linux/netfilter.h linux-2.6.22-try2-netns/include/linux/netfilter.h
+--- linux-2.6.22-try2/include/linux/netfilter.h	2007-12-19 13:37:51.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netfilter.h	2007-12-19 22:49:13.000000000 -0500
+@@ -362,11 +362,6 @@
+ #endif
+ }
+ 
+-#ifdef CONFIG_PROC_FS
+-#include <linux/proc_fs.h>
+-extern struct proc_dir_entry *proc_net_netfilter;
+-#endif
+-
+ #else /* !CONFIG_NETFILTER */
+ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb)
+ #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb)
+diff -Nurb linux-2.6.22-try2/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.22-try2-netns/include/linux/netfilter_ipv4/ip_tables.h
+--- linux-2.6.22-try2/include/linux/netfilter_ipv4/ip_tables.h	2007-12-19 13:37:52.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netfilter_ipv4/ip_tables.h	2007-12-19 22:49:13.000000000 -0500
+@@ -292,7 +292,7 @@
+ #include <linux/init.h>
+ extern void ipt_init(void) __init;
+ 
+-extern int ipt_register_table(struct xt_table *table,
++extern int ipt_register_table(struct net *net, struct xt_table *table,
+ 			      const struct ipt_replace *repl);
+ extern void ipt_unregister_table(struct xt_table *table);
+ 
+diff -Nurb linux-2.6.22-try2/include/linux/netfilter_ipv4.h linux-2.6.22-try2-netns/include/linux/netfilter_ipv4.h
+--- linux-2.6.22-try2/include/linux/netfilter_ipv4.h	2007-12-19 13:37:52.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netfilter_ipv4.h	2007-12-19 22:49:13.000000000 -0500
+@@ -75,7 +75,7 @@
+ #define SO_ORIGINAL_DST 80
+ 
+ #ifdef __KERNEL__
+-extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
++extern int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type);
+ extern int ip_xfrm_me_harder(struct sk_buff **pskb);
+ extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ 				   unsigned int dataoff, u_int8_t protocol);
+diff -Nurb linux-2.6.22-try2/include/linux/netlink.h linux-2.6.22-try2-netns/include/linux/netlink.h
+--- linux-2.6.22-try2/include/linux/netlink.h	2007-12-19 15:29:22.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/netlink.h	2007-12-19 22:49:13.000000000 -0500
+@@ -27,6 +27,8 @@
+ 
+ #define MAX_LINKS 32		
+ 
++struct net;
++
+ struct sockaddr_nl
+ {
+ 	sa_family_t	nl_family;	/* AF_NETLINK	*/
+@@ -157,7 +159,8 @@
+ #define NETLINK_CREDS(skb)	(&NETLINK_CB((skb)).creds)
+ 
+ 
+-extern struct sock *netlink_kernel_create(int unit, unsigned int groups,
++extern struct sock *netlink_kernel_create(struct net *net,
++					  int unit,unsigned int groups,
+ 					  void (*input)(struct sock *sk, int len),
+ 					  struct mutex *cb_mutex,
+ 					  struct module *module);
+@@ -204,6 +207,7 @@
+ 
+ struct netlink_notify
+ {
++	struct net *net;
+ 	int pid;
+ 	int protocol;
+ };
+diff -Nurb linux-2.6.22-try2/include/linux/nsproxy.h linux-2.6.22-try2-netns/include/linux/nsproxy.h
+--- linux-2.6.22-try2/include/linux/nsproxy.h	2007-12-19 15:50:41.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/nsproxy.h	2007-12-19 22:57:59.000000000 -0500
+@@ -36,6 +36,7 @@
+ 	struct mnt_namespace *mnt_ns;
+ 	struct pid_namespace *pid_ns;
+ 	struct user_namespace *user_ns;
++ 	struct net 	     *net_ns;
+ };
+ extern struct nsproxy init_nsproxy;
+ 
+diff -Nurb linux-2.6.22-try2/include/linux/proc_fs.h linux-2.6.22-try2-netns/include/linux/proc_fs.h
+--- linux-2.6.22-try2/include/linux/proc_fs.h	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/proc_fs.h	2007-12-19 22:49:13.000000000 -0500
+@@ -86,8 +86,6 @@
+ 
+ extern struct proc_dir_entry proc_root;
+ extern struct proc_dir_entry *proc_root_fs;
+-extern struct proc_dir_entry *proc_net;
+-extern struct proc_dir_entry *proc_net_stat;
+ extern struct proc_dir_entry *proc_bus;
+ extern struct proc_dir_entry *proc_root_driver;
+ extern struct proc_dir_entry *proc_root_kcore;
+@@ -112,6 +110,10 @@
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ 						struct proc_dir_entry *parent);
+ extern void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
++static inline void remove_proc_pde(struct proc_dir_entry *pde)
++{
++	return remove_proc_entry(pde->name, pde->parent);
++}
+ 
+ extern struct vfsmount *proc_mnt;
+ extern int proc_fill_super(struct super_block *,void *,int);
+@@ -181,42 +183,18 @@
+ 	return res;
+ }
+  
+-static inline struct proc_dir_entry *proc_net_create(const char *name,
+-	mode_t mode, get_info_t *get_info)
+-{
+-	return create_proc_info_entry(name,mode,proc_net,get_info);
+-}
+-
+-static inline struct proc_dir_entry *proc_net_fops_create(const char *name,
+-	mode_t mode, const struct file_operations *fops)
+-{
+-	struct proc_dir_entry *res = create_proc_entry(name, mode, proc_net);
+-	if (res)
+-		res->proc_fops = fops;
+-	return res;
+-}
+-
+-static inline void proc_net_remove(const char *name)
+-{
+-	remove_proc_entry(name,proc_net);
+-}
+-
+ #else
+ 
+ #define proc_root_driver NULL
+-#define proc_net NULL
+ #define proc_bus NULL
+ 
+-#define proc_net_fops_create(name, mode, fops)  ({ (void)(mode), NULL; })
+-#define proc_net_create(name, mode, info)	({ (void)(mode), NULL; })
+-static inline void proc_net_remove(const char *name) {}
+-
+ static inline void proc_flush_task(struct task_struct *task) { }
+ 
+ static inline struct proc_dir_entry *create_proc_entry(const char *name,
+ 	mode_t mode, struct proc_dir_entry *parent) { return NULL; }
+ 
+ #define remove_proc_entry(name, parent) do {} while (0)
++#define remove_proc_pde(PDE) do {} while (0)
+ 
+ static inline struct proc_dir_entry *proc_symlink(const char *name,
+ 		struct proc_dir_entry *parent,const char *dest) {return NULL;}
+diff -Nurb linux-2.6.22-try2/include/linux/rtnetlink.h linux-2.6.22-try2-netns/include/linux/rtnetlink.h
+--- linux-2.6.22-try2/include/linux/rtnetlink.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/rtnetlink.h	2007-12-19 22:49:13.000000000 -0500
+@@ -580,11 +580,11 @@
+ ({	data = RTA_PAYLOAD(rta) >= len ? RTA_DATA(rta) : NULL; \
+ 	__rtattr_parse_nested_compat(tb, max, rta, len); })
+ 
+-extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+-extern int rtnl_unicast(struct sk_buff *skb, u32 pid);
+-extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
++extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
++extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid);
++extern int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ 		       struct nlmsghdr *nlh, gfp_t flags);
+-extern void rtnl_set_sk_err(u32 group, int error);
++extern void rtnl_set_sk_err(struct net *net, u32 group, int error);
+ extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
+ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
+ 			      u32 id, u32 ts, u32 tsage, long expires,
+diff -Nurb linux-2.6.22-try2/include/linux/sched.h linux-2.6.22-try2-netns/include/linux/sched.h
+--- linux-2.6.22-try2/include/linux/sched.h	2007-12-19 15:50:06.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/sched.h	2007-12-19 22:58:22.000000000 -0500
+@@ -28,6 +28,7 @@
+ #define CLONE_NEWIPC		0x08000000	/* New ipcs */
+ #define CLONE_NEWUSER		0x10000000	/* New user namespace */
+ #define CLONE_KTHREAD		0x10000000	/* clone a kernel thread */
++#define CLONE_NEWNET		0x40000000	/* New network namespace */
+ 
+ /*
+  * Scheduling policies
+diff -Nurb linux-2.6.22-try2/include/linux/socket.h linux-2.6.22-try2-netns/include/linux/socket.h
+--- linux-2.6.22-try2/include/linux/socket.h	2007-12-19 13:37:52.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/socket.h	2007-12-19 22:49:13.000000000 -0500
+@@ -24,7 +24,6 @@
+ #include <linux/types.h>		/* pid_t			*/
+ #include <linux/compiler.h>		/* __user			*/
+ 
+-extern int sysctl_somaxconn;
+ #ifdef CONFIG_PROC_FS
+ struct seq_file;
+ extern void socket_seq_show(struct seq_file *seq);
+diff -Nurb linux-2.6.22-try2/include/linux/sysctl.h linux-2.6.22-try2-netns/include/linux/sysctl.h
+--- linux-2.6.22-try2/include/linux/sysctl.h	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/sysctl.h	2007-12-19 22:49:13.000000000 -0500
+@@ -31,6 +31,7 @@
+ 
+ struct file;
+ struct completion;
++struct net;
+ 
+ #define CTL_MAXNAME 10		/* how many path components do we allow in a
+ 				   call to sysctl?   In other words, what is
+@@ -985,6 +986,7 @@
+ 			       void __user *oldval, size_t __user *oldlenp,
+ 			       void __user *newval, size_t newlen);
+ 
++extern ctl_handler sysctl_data;
+ extern ctl_handler sysctl_string;
+ extern ctl_handler sysctl_intvec;
+ extern ctl_handler sysctl_jiffies;
+@@ -1061,6 +1063,12 @@
+ 
+ void unregister_sysctl_table(struct ctl_table_header * table);
+ 
++#ifdef CONFIG_NET
++extern struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table);
++extern void unregister_net_sysctl_table(struct ctl_table_header *header);
++extern ctl_table net_root_table[];
++#endif
++
+ #else /* __KERNEL__ */
+ 
+ #endif /* __KERNEL__ */
+diff -Nurb linux-2.6.22-try2/include/linux/sysfs.h linux-2.6.22-try2-netns/include/linux/sysfs.h
+--- linux-2.6.22-try2/include/linux/sysfs.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/linux/sysfs.h	2007-12-19 22:49:13.000000000 -0500
+@@ -19,9 +19,6 @@
+ 
+ struct kobject;
+ struct module;
+-struct nameidata;
+-struct dentry;
+-struct sysfs_dirent;
+ 
+ /* FIXME
+  * The *owner field is no longer used, but leave around
+@@ -79,16 +76,23 @@
+ 	ssize_t	(*store)(struct kobject *,struct attribute *,const char *, size_t);
+ };
+ 
++struct shadow_dir_operations {
++	const void *(*current_tag)(void);
++	const void *(*kobject_tag)(struct kobject *kobj);
++};
++
+ #define SYSFS_TYPE_MASK		0x00ff
+ #define SYSFS_ROOT		0x0001
+ #define SYSFS_DIR		0x0002
+ #define SYSFS_KOBJ_ATTR 	0x0004
+ #define SYSFS_KOBJ_BIN_ATTR	0x0008
+ #define SYSFS_KOBJ_LINK 	0x0020
++#define SYSFS_SHADOW_DIR	0x0040
+ #define SYSFS_COPY_NAME		(SYSFS_DIR | SYSFS_KOBJ_LINK)
+ 
+ #define SYSFS_FLAG_MASK		~SYSFS_TYPE_MASK
+ #define SYSFS_FLAG_REMOVED	0x0100
++#define SYSFS_FLAG_SHADOWED	0x0200
+ 
+ #ifdef CONFIG_SYSFS
+ 
+@@ -96,14 +100,13 @@
+ 		void (*func)(void *), void *data, struct module *owner);
+ 
+ extern int __must_check
+-sysfs_create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent_sd);
++sysfs_create_dir(struct kobject *);
+ 
+ extern void
+ sysfs_remove_dir(struct kobject *);
+ 
+ extern int __must_check
+-sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
+-		 const char *new_name);
++sysfs_rename_dir(struct kobject *kobj, const char *new_name);
+ 
+ extern int __must_check
+ sysfs_move_dir(struct kobject *, struct kobject *);
+@@ -126,6 +129,13 @@
+ extern void
+ sysfs_remove_link(struct kobject *, const char * name);
+ 
++extern int
++sysfs_rename_link(struct kobject *kobj, struct kobject *target,
++			const char *old_name, const char *new_name);
++
++extern void
++sysfs_delete_link(struct kobject *dir, struct kobject *targ, const char *name);
++
+ int __must_check sysfs_create_bin_file(struct kobject *kobj,
+ 					struct bin_attribute *attr);
+ void sysfs_remove_bin_file(struct kobject *kobj, struct bin_attribute *attr);
+@@ -140,11 +150,7 @@
+ 
+ void sysfs_notify(struct kobject * k, char *dir, char *attr);
+ 
+-
+-extern int sysfs_make_shadowed_dir(struct kobject *kobj,
+-	void * (*follow_link)(struct dentry *, struct nameidata *));
+-extern struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj);
+-extern void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd);
++int sysfs_enable_shadowing(struct kobject *, const struct shadow_dir_operations *);
+ 
+ extern int __must_check sysfs_init(void);
+ 
+@@ -156,8 +162,7 @@
+ 	return -ENOSYS;
+ }
+ 
+-static inline int sysfs_create_dir(struct kobject *kobj,
+-				   struct sysfs_dirent *shadow_parent_sd)
++static inline int sysfs_create_dir(struct kobject * kobj)
+ {
+ 	return 0;
+ }
+@@ -167,9 +172,7 @@
+ 	;
+ }
+ 
+-static inline int sysfs_rename_dir(struct kobject *kobj,
+-				   struct sysfs_dirent *new_parent_sd,
+-				   const char *new_name)
++static inline int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+ {
+ 	return 0;
+ }
+@@ -208,6 +211,17 @@
+ 	;
+ }
+ 
++static inline int
++sysfs_rename_link(struct kobject * k, struct kobject *t,
++			const char *old_name, const char * new_name)
++{
++	return 0;
++}
++
++static inline void
++sysfs_delete_link(struct kobject *k, struct kobject *t, const char *name)
++{
++}
+ 
+ static inline int sysfs_create_bin_file(struct kobject * k, struct bin_attribute * a)
+ {
+@@ -244,8 +258,8 @@
+ {
+ }
+ 
+-static inline int sysfs_make_shadowed_dir(struct kobject *kobj,
+-	void * (*follow_link)(struct dentry *, struct nameidata *))
++static inline int sysfs_enable_shadowing(struct kobject *kobj,
++				const struct shadow_dir_operations *shadow_ops)
+ {
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-try2/include/net/af_unix.h linux-2.6.22-try2-netns/include/net/af_unix.h
+--- linux-2.6.22-try2/include/net/af_unix.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/af_unix.h	2007-12-19 22:49:13.000000000 -0500
+@@ -91,12 +91,11 @@
+ #define unix_sk(__sk) ((struct unix_sock *)__sk)
+ 
+ #ifdef CONFIG_SYSCTL
+-extern int sysctl_unix_max_dgram_qlen;
+-extern void unix_sysctl_register(void);
+-extern void unix_sysctl_unregister(void);
++extern void unix_sysctl_register(struct net *net);
++extern void unix_sysctl_unregister(struct net *net);
+ #else
+-static inline void unix_sysctl_register(void) {}
+-static inline void unix_sysctl_unregister(void) {}
++static inline void unix_sysctl_register(struct net *net) {}
++static inline void unix_sysctl_unregister(struct net *net) {}
+ #endif
+ #endif
+ #endif
+diff -Nurb linux-2.6.22-try2/include/net/arp.h linux-2.6.22-try2-netns/include/net/arp.h
+--- linux-2.6.22-try2/include/net/arp.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/arp.h	2007-12-19 22:49:13.000000000 -0500
+@@ -11,7 +11,7 @@
+ 
+ extern void	arp_init(void);
+ extern int	arp_find(unsigned char *haddr, struct sk_buff *skb);
+-extern int	arp_ioctl(unsigned int cmd, void __user *arg);
++extern int	arp_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+ extern void     arp_send(int type, int ptype, __be32 dest_ip,
+ 			 struct net_device *dev, __be32 src_ip,
+ 			 unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th);
+diff -Nurb linux-2.6.22-try2/include/net/fib_rules.h linux-2.6.22-try2-netns/include/net/fib_rules.h
+--- linux-2.6.22-try2/include/net/fib_rules.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/fib_rules.h	2007-12-19 22:49:13.000000000 -0500
+@@ -56,12 +56,12 @@
+ 	int			(*fill)(struct fib_rule *, struct sk_buff *,
+ 					struct nlmsghdr *,
+ 					struct fib_rule_hdr *);
+-	u32			(*default_pref)(void);
++	u32			(*default_pref)(struct fib_rules_ops *ops);
+ 	size_t			(*nlmsg_payload)(struct fib_rule *);
+ 
+ 	/* Called after modifications to the rules set, must flush
+ 	 * the route cache if one exists. */
+-	void			(*flush_cache)(void);
++	void			(*flush_cache)(struct fib_rules_ops *ops);
+ 
+ 	int			nlgroup;
+ 	const struct nla_policy	*policy;
+@@ -101,8 +101,8 @@
+ 	return frh->table;
+ }
+ 
+-extern int			fib_rules_register(struct fib_rules_ops *);
+-extern int			fib_rules_unregister(struct fib_rules_ops *);
++extern int			fib_rules_register(struct net *net, struct fib_rules_ops *);
++extern int			fib_rules_unregister(struct net *net, struct fib_rules_ops *);
+ 
+ extern int			fib_rules_lookup(struct fib_rules_ops *,
+ 						 struct flowi *, int flags,
+diff -Nurb linux-2.6.22-try2/include/net/flow.h linux-2.6.22-try2-netns/include/net/flow.h
+--- linux-2.6.22-try2/include/net/flow.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/flow.h	2007-12-19 22:49:13.000000000 -0500
+@@ -8,9 +8,11 @@
+ #define _NET_FLOW_H
+ 
+ #include <linux/in6.h>
++#include <net/net_namespace.h>
+ #include <asm/atomic.h>
+ 
+ struct flowi {
++	struct net *fl_net;
+ 	int	oif;
+ 	int	iif;
+ 	__u32	mark;
+diff -Nurb linux-2.6.22-try2/include/net/inet6_hashtables.h linux-2.6.22-try2-netns/include/net/inet6_hashtables.h
+--- linux-2.6.22-try2/include/net/inet6_hashtables.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/inet6_hashtables.h	2007-12-19 22:49:13.000000000 -0500
+@@ -62,31 +62,31 @@
+ 					   const __be16 sport,
+ 					   const struct in6_addr *daddr,
+ 					   const u16 hnum,
+-					   const int dif);
++					   const int dif, struct net *net);
+ 
+ extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+ 					  const struct in6_addr *daddr,
+ 					  const unsigned short hnum,
+-					  const int dif);
++					  const int dif, struct net *net);
+ 
+ static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
+ 					  const struct in6_addr *saddr,
+ 					  const __be16 sport,
+ 					  const struct in6_addr *daddr,
+ 					  const u16 hnum,
+-					  const int dif)
++					  const int dif, struct net *net)
+ {
+ 	struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
+-						     daddr, hnum, dif);
++						     daddr, hnum, dif, net);
+ 	if (sk)
+ 		return sk;
+ 
+-	return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
++	return inet6_lookup_listener(hashinfo, daddr, hnum, dif, net);
+ }
+ 
+ extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+ 				 const struct in6_addr *saddr, const __be16 sport,
+ 				 const struct in6_addr *daddr, const __be16 dport,
+-				 const int dif);
++				 const int dif, struct net *net);
+ #endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
+ #endif /* _INET6_HASHTABLES_H */
+diff -Nurb linux-2.6.22-try2/include/net/inet_hashtables.h linux-2.6.22-try2-netns/include/net/inet_hashtables.h
+--- linux-2.6.22-try2/include/net/inet_hashtables.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/inet_hashtables.h	2007-12-19 22:49:13.000000000 -0500
+@@ -75,6 +75,7 @@
+  * ports are created in O(1) time?  I thought so. ;-)	-DaveM
+  */
+ struct inet_bind_bucket {
++	struct net		*net;
+ 	unsigned short		port;
+ 	signed short		fastreuse;
+ 	struct hlist_node	node;
+@@ -138,34 +139,35 @@
+ extern struct inet_bind_bucket *
+ 		    inet_bind_bucket_create(struct kmem_cache *cachep,
+ 					    struct inet_bind_hashbucket *head,
++					    struct net *net,
+ 					    const unsigned short snum);
+ extern void inet_bind_bucket_destroy(struct kmem_cache *cachep,
+ 				     struct inet_bind_bucket *tb);
+ 
+-static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
++static inline int inet_bhashfn(struct net *net, const __u16 lport, const int bhash_size)
+ {
+-	return lport & (bhash_size - 1);
++	return (((unsigned long)net) ^ lport) & (bhash_size - 1);
+ }
+ 
+ extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
+ 			   const unsigned short snum);
+ 
+ /* These can have wildcards, don't try too hard. */
+-static inline int inet_lhashfn(const unsigned short num)
++static inline int inet_lhashfn(struct net *net, const unsigned short num)
+ {
+-	return num & (INET_LHTABLE_SIZE - 1);
++	return (((unsigned long)net) ^ num) & (INET_LHTABLE_SIZE - 1);
+ }
+ 
+ static inline int inet_sk_listen_hashfn(const struct sock *sk)
+ {
+-	return inet_lhashfn(inet_sk(sk)->num);
++	return inet_lhashfn(sk->sk_net, inet_sk(sk)->num);
+ }
+ 
+ /* Caller must disable local BH processing. */
+ static inline void __inet_inherit_port(struct inet_hashinfo *table,
+ 				       struct sock *sk, struct sock *child)
+ {
+-	const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
++	const int bhash = inet_bhashfn(sk->sk_net, inet_sk(child)->num, table->bhash_size);
+ 	struct inet_bind_hashbucket *head = &table->bhash[bhash];
+ 	struct inet_bind_bucket *tb;
+ 
+@@ -274,12 +276,13 @@
+ extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ 					   const __be32 daddr,
+ 					   const unsigned short hnum,
+-					   const int dif);
++					   const int dif, struct net *net);
+ 
+ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo,
+-						__be32 daddr, __be16 dport, int dif)
++						__be32 daddr, __be16 dport,
++						int dif, struct net *net)
+ {
+-	return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif);
++	return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif, net);
+ }
+ 
+ /* Socket demux engine toys. */
+@@ -313,30 +316,34 @@
+ 				   (((__force __u64)(__be32)(__daddr)) << 32) | \
+ 				   ((__force __u64)(__be32)(__saddr)));
+ #endif /* __BIG_ENDIAN */
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie))	&&	\
+ 	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\
++	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++	 ((__sk)->sk_net == __net))
++#define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net)\
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) &&	\
+ 	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
+-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
++	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++	 ((__sk)->sk_net == __net))
+ #else /* 32-bit arch */
+ #define INET_ADDR_COOKIE(__name, __saddr, __daddr)
+-#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)	\
++#define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif, __net) \
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 (inet_sk(__sk)->daddr		== (__saddr))		&&	\
+ 	 (inet_sk(__sk)->rcv_saddr	== (__daddr))		&&	\
+ 	 ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports))	&&	\
+-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+-#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif)	\
++	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++	 ((__sk)->sk_net == __net))
++#define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif, __net) \
+ 	(((__sk)->sk_hash == (__hash))				&&	\
+ 	 (inet_twsk(__sk)->tw_daddr	== (__saddr))		&&	\
+ 	 (inet_twsk(__sk)->tw_rcv_saddr	== (__daddr))		&&	\
+ 	 ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) &&	\
+-	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
++	 (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))) && \
++	 ((__sk)->sk_net == __net))
+ #endif /* 64-bit arch */
+ 
+ /*
+@@ -349,7 +356,7 @@
+ 	__inet_lookup_established(struct inet_hashinfo *hashinfo,
+ 				  const __be32 saddr, const __be16 sport,
+ 				  const __be32 daddr, const u16 hnum,
+-				  const int dif)
++				  const int dif, struct net *net)
+ {
+ 	INET_ADDR_COOKIE(acookie, saddr, daddr)
+ 	const __portpair ports = INET_COMBINED_PORTS(sport, hnum);
+@@ -358,19 +365,19 @@
+ 	/* Optimize here for direct hit, only listening connections can
+ 	 * have wildcards anyways.
+ 	 */
+-	unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport);
++	unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport);
+ 	struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash);
+ 
+ 	prefetch(head->chain.first);
+ 	read_lock(&head->lock);
+ 	sk_for_each(sk, node, &head->chain) {
+-		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net))
+ 			goto hit; /* You sunk my battleship! */
+ 	}
+ 
+ 	/* Must check for a TIME_WAIT'er before going to listener hash. */
+ 	sk_for_each(sk, node, &head->twchain) {
+-		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_TW_MATCH(sk, hash, acookie, saddr, daddr, ports, dif, net))
+ 			goto hit;
+ 	}
+ 	sk = NULL;
+@@ -386,32 +393,32 @@
+ 	inet_lookup_established(struct inet_hashinfo *hashinfo,
+ 				const __be32 saddr, const __be16 sport,
+ 				const __be32 daddr, const __be16 dport,
+-				const int dif)
++				const int dif, struct net *net)
+ {
+ 	return __inet_lookup_established(hashinfo, saddr, sport, daddr,
+-					 ntohs(dport), dif);
++					 ntohs(dport), dif, net);
+ }
+ 
+ static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
+ 					 const __be32 saddr, const __be16 sport,
+ 					 const __be32 daddr, const __be16 dport,
+-					 const int dif)
++					 const int dif, struct net *net)
+ {
+ 	u16 hnum = ntohs(dport);
+ 	struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
+-						    hnum, dif);
+-	return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif);
++						    hnum, dif, net);
++	return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif, net);
+ }
+ 
+ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
+ 				       const __be32 saddr, const __be16 sport,
+ 				       const __be32 daddr, const __be16 dport,
+-				       const int dif)
++				       const int dif, struct net *net)
+ {
+ 	struct sock *sk;
+ 
+ 	local_bh_disable();
+-	sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif);
++	sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif, net);
+ 	local_bh_enable();
+ 
+ 	return sk;
+diff -Nurb linux-2.6.22-try2/include/net/inet_sock.h linux-2.6.22-try2-netns/include/net/inet_sock.h
+--- linux-2.6.22-try2/include/net/inet_sock.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/inet_sock.h	2007-12-19 22:49:13.000000000 -0500
+@@ -171,10 +171,12 @@
+ extern u32 inet_ehash_secret;
+ extern void build_ehash_secret(void);
+ 
+-static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport,
++static inline unsigned int inet_ehashfn(struct net *net,
++					const __be32 laddr, const __u16 lport,
+ 					const __be32 faddr, const __be16 fport)
+ {
+-	return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr,
++	return jhash_2words((__force __u32) laddr ^ (__force __u32) faddr ^
++			    (__force __u32) ((unsigned long)net),
+ 			    ((__u32) lport) << 16 | (__force __u32)fport,
+ 			    inet_ehash_secret);
+ }
+@@ -187,7 +189,7 @@
+ 	const __be32 faddr = inet->daddr;
+ 	const __be16 fport = inet->dport;
+ 
+-	return inet_ehashfn(laddr, lport, faddr, fport);
++	return inet_ehashfn(sk->sk_net, laddr, lport, faddr, fport);
+ }
+ 
+ #endif	/* _INET_SOCK_H */
+diff -Nurb linux-2.6.22-try2/include/net/inet_timewait_sock.h linux-2.6.22-try2-netns/include/net/inet_timewait_sock.h
+--- linux-2.6.22-try2/include/net/inet_timewait_sock.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/inet_timewait_sock.h	2007-12-19 22:58:33.000000000 -0500
+@@ -115,6 +115,7 @@
+ #define tw_refcnt		__tw_common.skc_refcnt
+ #define tw_hash			__tw_common.skc_hash
+ #define tw_prot			__tw_common.skc_prot
++#define tw_net			__tw_common.skc_net
+ #define tw_xid		__tw_common.skc_xid
+ #define tw_vx_info		__tw_common.skc_vx_info
+ #define tw_nid		__tw_common.skc_nid
+diff -Nurb linux-2.6.22-try2/include/net/inetpeer.h linux-2.6.22-try2-netns/include/net/inetpeer.h
+--- linux-2.6.22-try2/include/net/inetpeer.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/inetpeer.h	2007-12-19 22:49:13.000000000 -0500
+@@ -15,6 +15,8 @@
+ #include <linux/spinlock.h>
+ #include <asm/atomic.h>
+ 
++struct net;
++
+ struct inet_peer
+ {
+ 	/* group together avl_left,avl_right,v4daddr to speedup lookups */
+@@ -22,7 +24,11 @@
+ 	__be32			v4daddr;	/* peer's address */
+ 	__u16			avl_height;
+ 	__u16			ip_id_count;	/* IP ID for the next packet */
+-	struct inet_peer	*unused_next, **unused_prevp;
++	union {
++		struct inet_peer	*unused_next;
++		struct net 		*net;
++	} u;
++	struct inet_peer	**unused_prevp;
+ 	__u32			dtime;		/* the time of last use of not
+ 						 * referenced entries */
+ 	atomic_t		refcnt;
+@@ -34,7 +40,7 @@
+ void			inet_initpeers(void) __init;
+ 
+ /* can be called with or without local BH being disabled */
+-struct inet_peer	*inet_getpeer(__be32 daddr, int create);
++struct inet_peer	*inet_getpeer(struct net *net, __be32 daddr, int create);
+ 
+ /* can be called from BH context or outside */
+ extern void inet_putpeer(struct inet_peer *p);
+diff -Nurb linux-2.6.22-try2/include/net/ip.h linux-2.6.22-try2-netns/include/net/ip.h
+--- linux-2.6.22-try2/include/net/ip.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/ip.h	2007-12-19 22:49:13.000000000 -0500
+@@ -149,13 +149,6 @@
+ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ 		   unsigned int len); 
+ 
+-struct ipv4_config
+-{
+-	int	log_martians;
+-	int	no_pmtu_disc;
+-};
+-
+-extern struct ipv4_config ipv4_config;
+ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics);
+ #define IP_INC_STATS(field)		SNMP_INC_STATS(ip_statistics, field)
+ #define IP_INC_STATS_BH(field)		SNMP_INC_STATS_BH(ip_statistics, field)
+@@ -171,27 +164,6 @@
+ extern int snmp_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
+ extern void snmp_mib_free(void *ptr[2]);
+ 
+-extern int sysctl_local_port_range[2];
+-extern int sysctl_ip_default_ttl;
+-extern int sysctl_ip_nonlocal_bind;
+-
+-/* From ip_fragment.c */
+-extern int sysctl_ipfrag_high_thresh; 
+-extern int sysctl_ipfrag_low_thresh;
+-extern int sysctl_ipfrag_time;
+-extern int sysctl_ipfrag_secret_interval;
+-extern int sysctl_ipfrag_max_dist;
+-
+-/* From inetpeer.c */
+-extern int inet_peer_threshold;
+-extern int inet_peer_minttl;
+-extern int inet_peer_maxttl;
+-extern int inet_peer_gc_mintime;
+-extern int inet_peer_gc_maxtime;
+-
+-/* From ip_output.c */
+-extern int sysctl_ip_dynaddr;
+-
+ extern void ipfrag_init(void);
+ 
+ #ifdef CONFIG_INET
+@@ -332,8 +304,6 @@
+ };
+ 
+ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user);
+-extern int ip_frag_nqueues;
+-extern atomic_t ip_frag_mem;
+ 
+ /*
+  *	Functions provided by ip_forward.c
+@@ -392,5 +362,6 @@
+ #endif
+ 
+ extern struct ctl_table ipv4_table[];
++extern struct ctl_table multi_ipv4_table[];
+ 
+ #endif	/* _IP_H */
+diff -Nurb linux-2.6.22-try2/include/net/ip_fib.h linux-2.6.22-try2-netns/include/net/ip_fib.h
+--- linux-2.6.22-try2/include/net/ip_fib.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/ip_fib.h	2007-12-19 22:49:13.000000000 -0500
+@@ -85,6 +85,10 @@
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ 	int			fib_power;
+ #endif
++#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
++	u32			fib_mp_alg;
++#endif
++	struct net *		fib_net;
+ 	struct fib_nh		fib_nh[0];
+ #define fib_dev		fib_nh[0].nh_dev
+ };
+@@ -155,43 +159,43 @@
+ 
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 
+-extern struct fib_table *ip_fib_local_table;
+-extern struct fib_table *ip_fib_main_table;
+-
+-static inline struct fib_table *fib_get_table(u32 id)
++static inline struct fib_table *fib_get_table(struct net *net, u32 id)
+ {
+ 	if (id != RT_TABLE_LOCAL)
+-		return ip_fib_main_table;
+-	return ip_fib_local_table;
++		return net->ip_fib_main_table;
++	return net->ip_fib_local_table;
+ }
+ 
+-static inline struct fib_table *fib_new_table(u32 id)
++static inline struct fib_table *fib_new_table(struct net *net, u32 id)
+ {
+-	return fib_get_table(id);
++	return fib_get_table(net, id);
+ }
+ 
+ static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
+ {
+-	if (ip_fib_local_table->tb_lookup(ip_fib_local_table, flp, res) &&
+-	    ip_fib_main_table->tb_lookup(ip_fib_main_table, flp, res))
++	struct net *net = flp->fl_net;
++	struct fib_table *local_table = net->ip_fib_local_table;
++	struct fib_table *main_table = net->ip_fib_main_table;
++	if (local_table->tb_lookup(local_table, flp, res) &&
++	    main_table->tb_lookup(main_table, flp, res))
+ 		return -ENETUNREACH;
+ 	return 0;
+ }
+ 
+ static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
+ {
++	struct net *net = flp->fl_net;
++	struct fib_table *main_table = net->ip_fib_main_table;
+ 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+-		ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res);
++		main_table->tb_select_default(main_table, flp, res);
+ }
+ 
+ #else /* CONFIG_IP_MULTIPLE_TABLES */
+-#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL)
+-#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN)
+ 
+ extern int fib_lookup(struct flowi *flp, struct fib_result *res);
+ 
+-extern struct fib_table *fib_new_table(u32 id);
+-extern struct fib_table *fib_get_table(u32 id);
++extern struct fib_table *fib_new_table(struct net *net, u32 id);
++extern struct fib_table *fib_get_table(struct net *net, u32 id);
+ extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
+ 
+ #endif /* CONFIG_IP_MULTIPLE_TABLES */
+@@ -207,15 +211,17 @@
+ 
+ /* Exported by fib_semantics.c */
+ extern int ip_fib_check_default(__be32 gw, struct net_device *dev);
+-extern int fib_sync_down(__be32 local, struct net_device *dev, int force);
++extern int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force);
+ extern int fib_sync_up(struct net_device *dev);
+ extern __be32  __fib_res_prefsrc(struct fib_result *res);
+ 
+ /* Exported by fib_hash.c */
+ extern struct fib_table *fib_hash_init(u32 id);
++extern void fib_hash_exit(struct fib_table *tb);
+ 
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+-extern void __init fib4_rules_init(void);
++extern void fib4_rules_init(struct net * net);
++extern void fib4_rules_exit(struct net * net);
+ 
+ #ifdef CONFIG_NET_CLS_ROUTE
+ extern u32 fib_rules_tclass(struct fib_result *res);
+@@ -258,8 +264,11 @@
+ }
+ 
+ #ifdef CONFIG_PROC_FS
+-extern int  fib_proc_init(void);
+-extern void fib_proc_exit(void);
++extern int  fib_proc_init(struct net * net);
++extern void fib_proc_exit(struct net * net);
+ #endif
+ 
++extern int  fib_info_init(struct net *net);
++extern void fib_info_exit(struct net *net);
++
+ #endif  /* _NET_FIB_H */
+diff -Nurb linux-2.6.22-try2/include/net/llc_conn.h linux-2.6.22-try2-netns/include/net/llc_conn.h
+--- linux-2.6.22-try2/include/net/llc_conn.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/llc_conn.h	2007-12-19 22:49:13.000000000 -0500
+@@ -93,7 +93,7 @@
+ 	return skb->cb[sizeof(skb->cb) - 1];
+ }
+ 
+-extern struct sock *llc_sk_alloc(int family, gfp_t priority,
++extern struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority,
+ 				 struct proto *prot);
+ extern void llc_sk_free(struct sock *sk);
+ 
+diff -Nurb linux-2.6.22-try2/include/net/neighbour.h linux-2.6.22-try2-netns/include/net/neighbour.h
+--- linux-2.6.22-try2/include/net/neighbour.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/neighbour.h	2007-12-19 22:49:13.000000000 -0500
+@@ -34,6 +34,7 @@
+ 
+ struct neigh_parms
+ {
++	struct net *net;
+ 	struct net_device *dev;
+ 	struct neigh_parms *next;
+ 	int	(*neigh_setup)(struct neighbour *);
+@@ -126,6 +127,7 @@
+ struct pneigh_entry
+ {
+ 	struct pneigh_entry	*next;
++	struct net		*net;
+ 	struct net_device		*dev;
+ 	u8			flags;
+ 	u8			key[0];
+@@ -187,6 +189,7 @@
+ 					     const void *pkey,
+ 					     struct net_device *dev);
+ extern struct neighbour *	neigh_lookup_nodev(struct neigh_table *tbl,
++						   struct net *net,
+ 						   const void *pkey);
+ extern struct neighbour *	neigh_create(struct neigh_table *tbl,
+ 					     const void *pkey,
+@@ -205,21 +208,24 @@
+ 						struct net_device *dev);
+ 
+ extern struct neigh_parms	*neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
++extern struct neigh_parms	*neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net);
+ extern void			neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
+ extern void			neigh_parms_destroy(struct neigh_parms *parms);
+ extern unsigned long		neigh_rand_reach_time(unsigned long base);
+ 
+ extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
+ 					       struct sk_buff *skb);
+-extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat);
+-extern int			pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev);
++extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat);
++extern int			pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev);
+ 
+ extern void neigh_app_ns(struct neighbour *n);
+ extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
+ extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *));
+ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
+ 
+-struct neigh_seq_state {
++struct neigh_seq_state
++{
++	struct net *net;
+ 	struct neigh_table *tbl;
+ 	void *(*neigh_sub_iter)(struct neigh_seq_state *state,
+ 				struct neighbour *n, loff_t *pos);
+diff -Nurb linux-2.6.22-try2/include/net/net_namespace.h linux-2.6.22-try2-netns/include/net/net_namespace.h
+--- linux-2.6.22-try2/include/net/net_namespace.h	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/net_namespace.h	2007-12-19 22:49:13.000000000 -0500
+@@ -0,0 +1,236 @@
++/*
++ * Operations on the network namespace
++ */
++#ifndef __NET_NET_NAMESPACE_H
++#define __NET_NET_NAMESPACE_H
++
++#include <asm/atomic.h>
++#include <linux/workqueue.h>
++#include <linux/list.h>
++#include <linux/proc_fs.h>
++#include <linux/sysctl.h>
++#include <linux/netdevice.h>
++#include <linux/timer.h>
++
++struct sock;
++struct xt_af_pernet;
++struct ipv4_devconf;
++struct neigh_parms;
++struct inet_peer;
++struct xt_table;
++struct net {
++	atomic_t count;		/* To decided when the network namespace
++				 * should go
++				 */
++	atomic_t use_count;	/* For references we destroy on demand */
++	struct list_head list;	/* list of network namespace structures */
++	struct work_struct work;	/* work struct for freeing */
++
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry *proc_net;
++	struct proc_dir_entry *proc_net_stat;
++	struct proc_dir_entry proc_net_root;
++# ifdef CONFIG_NETFILTER
++	struct proc_dir_entry *proc_net_netfilter;
++# endif
++#endif
++#ifdef CONFIG_SYSCTL
++	struct ctl_table_header net_table_header;
++#endif
++	struct net_device	loopback_dev;		/* The loopback */
++	struct list_head	dev_base_head;		/* All devices */
++
++	struct hlist_head 	*dev_name_head;
++	struct hlist_head	*dev_index_head;
++
++	struct sock *		rtnl;	/* rtnetlink socket */
++
++
++	/* core netfilter */
++	struct xt_af_pernet *	xtn;
++
++	/* core fib_rules */
++	struct list_head	rules_ops;
++	spinlock_t		rules_mod_lock;
++
++#ifdef CONFIG_XFRM
++	u32			sysctl_xfrm_aevent_etime;
++	u32			sysctl_xfrm_aevent_rseqth;
++	int			sysctl_xfrm_larval_drop;
++	u32			sysctl_xfrm_acq_expires;
++#endif /* CONFIG_XFRM */
++
++	int			sysctl_somaxconn;
++
++#ifdef CONFIG_PACKET
++	/* List of all packet sockets. */
++	rwlock_t		packet_sklist_lock;
++	struct hlist_head	packet_sklist;
++#endif /* CONFIG_PACKET */
++#ifdef CONFIG_UNIX
++	int			sysctl_unix_max_dgram_qlen;
++	void *			unix_sysctl;
++#endif /* CONFIG_UNIX */
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	void *			fib4_table;
++#endif /* CONFIG_IP_MULTIPLE_TABLES */
++#ifdef CONFIG_IP_FIB_HASH
++	int			fn_hash_last_dflt;
++#endif
++#ifdef CONFIG_IP_FIB_TRIE
++	int			trie_last_dflt;
++#endif
++#ifndef CONFIG_IP_MULTIPLE_TABLES
++	struct fib_table	*ip_fib_local_table;
++	struct fib_table	*ip_fib_main_table;
++#endif
++	struct hlist_head	*ip_fib_table_hash;
++	struct sock		*nlfl;
++
++	/* fib_semantics */
++	struct hlist_head	*fib_info_hash;
++	struct hlist_head	*fib_info_laddrhash;
++	unsigned int		fib_info_hash_size;
++	unsigned int		fib_info_cnt;
++	struct hlist_head	*fib_info_devhash;
++
++	/* af_inet.c */
++	int			sysctl_ip_nonlocal_bind; /* __read_mostly */
++	int			sysctl_ip_default_ttl;	/* __read_mostly */
++	int			sysctl_ipfrag_high_thresh;
++	int			sysctl_ipfrag_low_thresh;
++	int			sysctl_ipfrag_time;
++	int			sysctl_ipfrag_secret_interval;
++	int			sysctl_ipfrag_max_dist;
++	int			sysctl_ipv4_no_pmtu_disc;
++	int			sysctl_local_port_range[2];
++	int			sysctl_ip_dynaddr;
++	int			sysctl_tcp_timestamps;	/* __read_mostly */
++	int			sysctl_tcp_window_scaling; /* __read_mostly */
++	/* inetpeer.c */
++	int			inet_peer_threshold;
++	int			inet_peer_minttl;
++	int			inet_peer_maxttl;
++	int			inet_peer_gc_mintime;
++	int			inet_peer_gc_maxtime;
++
++	/* devinet */
++	struct ipv4_devconf	*ipv4_devconf;
++	struct ipv4_devconf	*ipv4_devconf_dflt;
++
++	/* arp.c */
++	struct neigh_parms	*arp_neigh_parms_default;
++
++	/* icmp.c */
++	struct socket 		**__icmp_socket;
++
++	/* inetpeer.c */
++	struct inet_peer	*peer_root;
++	int			peer_total;
++	struct inet_peer	*inet_peer_unused_head;
++	struct inet_peer	**inet_peer_unused_tailp;
++	struct timer_list	peer_periodic_timer;
++
++	/* ip_fragment.c */
++	struct hlist_head	*ipq_hash;
++	u32			ipfrag_hash_rnd;
++	struct list_head	ipq_lru_list;
++	int			ip_frag_nqueues;
++	atomic_t		ip_frag_mem;
++	struct timer_list	ipfrag_secret_timer;
++
++	/* udp.c */
++	int			udp_port_rover;
++
++	/* iptable_filter.c */
++	struct xt_table		*ip_packet_filter;
++};
++
++extern struct net init_net;
++extern struct list_head net_namespace_list;
++
++extern struct net *copy_net_ns(unsigned long flags, struct net *net_ns);
++extern void __put_net(struct net *net);
++
++static inline struct net *get_net(struct net *net)
++{
++	atomic_inc(&net->count);
++	return net;
++}
++
++static inline void put_net(struct net *net)
++{
++	if (atomic_dec_and_test(&net->count))
++		__put_net(net);
++}
++
++static inline struct net *hold_net(struct net *net)
++{
++	atomic_inc(&net->use_count);
++	return net;
++}
++
++static inline void release_net(struct net *net)
++{
++	atomic_dec(&net->use_count);
++}
++
++extern void net_lock(void);
++extern void net_unlock(void);
++
++#define for_each_net(VAR)				\
++	list_for_each_entry(VAR, &net_namespace_list, list)
++
++
++struct pernet_operations {
++	struct list_head list;
++	int (*init)(struct net *net);
++	void (*exit)(struct net *net);
++};
++
++extern int register_pernet_subsys(struct pernet_operations *);
++extern void unregister_pernet_subsys(struct pernet_operations *);
++extern int register_pernet_device(struct pernet_operations *);
++extern void unregister_pernet_device(struct pernet_operations *);
++
++#ifdef CONFIG_PROC_FS
++static inline struct net *PDE_NET(struct proc_dir_entry *pde)
++{
++	return pde->parent->data;
++}
++
++static inline struct net *PROC_NET(const struct inode *inode)
++{
++	return PDE_NET(PDE(inode));
++}
++
++static inline struct proc_dir_entry *proc_net_create(struct net *net,
++	const char *name, mode_t mode, get_info_t *get_info)
++{
++	return create_proc_info_entry(name,mode, net->proc_net, get_info);
++}
++
++static inline struct proc_dir_entry *proc_net_fops_create(struct net *net,
++	const char *name, mode_t mode, const struct file_operations *fops)
++{
++	struct proc_dir_entry *res = 
++		create_proc_entry(name, mode, net->proc_net);
++	if (res)
++		res->proc_fops = fops;
++	return res;
++}
++
++static inline void proc_net_remove(struct net *net, const char *name)
++{
++	remove_proc_entry(name, net->proc_net);
++}
++
++#else
++
++#define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
++#define proc_net_create(net, name, mode, info)	({ (void)(mode), NULL; })
++static inline void proc_net_remove(struct net *net, const char *name) {}
++
++#endif /* CONFIG_PROC_FS */
++
++#endif /* __NET_NET_NAMESPACE_H */
+diff -Nurb linux-2.6.22-try2/include/net/netlink.h linux-2.6.22-try2-netns/include/net/netlink.h
+--- linux-2.6.22-try2/include/net/netlink.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/netlink.h	2007-12-19 22:49:13.000000000 -0500
+@@ -218,6 +218,7 @@
+ struct nl_info {
+ 	struct nlmsghdr		*nlh;
+ 	u32			pid;
++	struct net 		*net;
+ };
+ 
+ extern void		netlink_run_queue(struct sock *sk, unsigned int *qlen,
+diff -Nurb linux-2.6.22-try2/include/net/pkt_cls.h linux-2.6.22-try2-netns/include/net/pkt_cls.h
+--- linux-2.6.22-try2/include/net/pkt_cls.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/pkt_cls.h	2007-12-19 22:49:13.000000000 -0500
+@@ -2,6 +2,7 @@
+ #define __NET_PKT_CLS_H
+ 
+ #include <linux/pkt_cls.h>
++#include <net/net_namespace.h>
+ #include <net/sch_generic.h>
+ #include <net/act_api.h>
+ 
+@@ -357,7 +358,7 @@
+ 	if (indev[0]) {
+ 		if  (!skb->iif)
+ 			return 0;
+-		dev = __dev_get_by_index(skb->iif);
++		dev = __dev_get_by_index(&init_net, skb->iif);
+ 		if (!dev || strcmp(indev, dev->name))
+ 			return 0;
+ 	}
+diff -Nurb linux-2.6.22-try2/include/net/protocol.h linux-2.6.22-try2-netns/include/net/protocol.h
+--- linux-2.6.22-try2/include/net/protocol.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/protocol.h	2007-12-19 22:49:13.000000000 -0500
+@@ -86,6 +86,7 @@
+ #define INET_PROTOSW_REUSE 0x01	     /* Are ports automatically reusable? */
+ #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
+ #define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
++#define INET_PROTOSW_NETNS     0x08  /* Multiple namespaces support? */
+ 
+ extern struct net_protocol *inet_protocol_base;
+ extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
+diff -Nurb linux-2.6.22-try2/include/net/raw.h linux-2.6.22-try2-netns/include/net/raw.h
+--- linux-2.6.22-try2/include/net/raw.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/raw.h	2007-12-19 22:49:13.000000000 -0500
+@@ -34,7 +34,7 @@
+ extern rwlock_t raw_v4_lock;
+ 
+ 
+-extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
++extern struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num,
+ 				    __be32 raddr, __be32 laddr,
+ 				    int dif, int tag);
+ 
+diff -Nurb linux-2.6.22-try2/include/net/route.h linux-2.6.22-try2-netns/include/net/route.h
+--- linux-2.6.22-try2/include/net/route.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/route.h	2007-12-19 22:58:46.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <net/dst.h>
+ #include <net/inetpeer.h>
+ #include <net/flow.h>
++#include <net/sock.h>
+ #include <net/inet_sock.h>
+ #include <linux/in_route.h>
+ #include <linux/rtnetlink.h>
+@@ -122,9 +123,9 @@
+ extern unsigned short	ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
+ extern void		ip_rt_send_redirect(struct sk_buff *skb);
+ 
+-extern unsigned		inet_addr_type(__be32 addr);
++extern unsigned		inet_addr_type(struct net *net, __be32 addr);
+ extern void		ip_rt_multicast_event(struct in_device *);
+-extern int		ip_rt_ioctl(unsigned int cmd, void __user *arg);
++extern int		ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+ extern void		ip_rt_get_source(u8 *src, struct rtable *rt);
+ extern int		ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb);
+ 
+@@ -153,7 +154,8 @@
+ 				   __be16 sport, __be16 dport, struct sock *sk,
+ 				   int flags)
+ {
+-	struct flowi fl = { .oif = oif,
++	struct flowi fl = { .fl_net = sk->sk_net,
++			    .oif = oif,
+ 			    .nl_u = { .ip4_u = { .daddr = dst,
+ 						 .saddr = src,
+ 						 .tos   = tos } },
+@@ -198,6 +200,7 @@
+ 		struct flowi fl;
+ 
+ 		memcpy(&fl, &(*rp)->fl, sizeof(fl));
++		fl.fl_net = sk->sk_net;
+ 		fl.fl_ip_sport = sport;
+ 		fl.fl_ip_dport = dport;
+ 		fl.proto = protocol;
+diff -Nurb linux-2.6.22-try2/include/net/sock.h linux-2.6.22-try2-netns/include/net/sock.h
+--- linux-2.6.22-try2/include/net/sock.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/sock.h	2007-12-19 22:59:14.000000000 -0500
+@@ -55,6 +55,7 @@
+ #include <asm/atomic.h>
+ #include <net/dst.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ 
+ /*
+  * This structure really needs to be cleaned up.
+@@ -105,6 +106,7 @@
+  *	@skc_refcnt: reference count
+  *	@skc_hash: hash value used with various protocol lookup tables
+  *	@skc_prot: protocol handlers inside a network family
++ *	@skc_net: reference to the network namespace of this socket
+  *
+  *	This is the minimal network layer representation of sockets, the header
+  *	for struct sock and struct inet_timewait_sock.
+@@ -119,6 +121,7 @@
+ 	atomic_t		skc_refcnt;
+ 	unsigned int		skc_hash;
+ 	struct proto		*skc_prot;
++ 	struct net	 	*skc_net;
+ 	xid_t			skc_xid;
+ 	struct vx_info		*skc_vx_info;
+ 	nid_t			skc_nid;
+@@ -199,6 +202,7 @@
+ #define sk_refcnt		__sk_common.skc_refcnt
+ #define sk_hash			__sk_common.skc_hash
+ #define sk_prot			__sk_common.skc_prot
++#define sk_net			__sk_common.skc_net
+ #define sk_xid			__sk_common.skc_xid
+ #define sk_vx_info		__sk_common.skc_vx_info
+ #define sk_nid			__sk_common.skc_nid
+@@ -781,7 +785,7 @@
+ 				SINGLE_DEPTH_NESTING)
+ #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
+ 
+-extern struct sock		*sk_alloc(int family,
++extern struct sock		*sk_alloc(struct net *net, int family,
+ 					  gfp_t priority,
+ 					  struct proto *prot, int zero_it);
+ extern void			sk_free(struct sock *sk);
+@@ -1010,6 +1014,7 @@
+ #endif
+ 
+ 	memcpy(nsk, osk, osk->sk_prot->obj_size);
++	get_net(nsk->sk_net);
+ #ifdef CONFIG_SECURITY_NETWORK
+ 	nsk->sk_security = sptr;
+ 	security_sk_clone(osk, nsk);
+@@ -1373,6 +1378,7 @@
+ 
+ #ifdef CONFIG_SYSCTL
+ extern struct ctl_table core_table[];
++extern struct ctl_table multi_core_table[];
+ #endif
+ 
+ extern int sysctl_optmem_max;
+diff -Nurb linux-2.6.22-try2/include/net/tcp.h linux-2.6.22-try2-netns/include/net/tcp.h
+--- linux-2.6.22-try2/include/net/tcp.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/tcp.h	2007-12-19 22:49:13.000000000 -0500
+@@ -191,8 +191,6 @@
+ extern struct inet_timewait_death_row tcp_death_row;
+ 
+ /* sysctl variables for tcp */
+-extern int sysctl_tcp_timestamps;
+-extern int sysctl_tcp_window_scaling;
+ extern int sysctl_tcp_sack;
+ extern int sysctl_tcp_fin_timeout;
+ extern int sysctl_tcp_keepalive_time;
+@@ -1293,6 +1291,7 @@
+ };
+ 
+ struct tcp_iter_state {
++	struct net		*net;
+ 	sa_family_t		family;
+ 	enum tcp_seq_states	state;
+ 	struct sock		*syn_wait_sk;
+@@ -1300,8 +1299,8 @@
+ 	struct seq_operations	seq_ops;
+ };
+ 
+-extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
+-extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
++extern int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo);
++extern void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo);
+ 
+ extern struct request_sock_ops tcp_request_sock_ops;
+ 
+diff -Nurb linux-2.6.22-try2/include/net/udp.h linux-2.6.22-try2-netns/include/net/udp.h
+--- linux-2.6.22-try2/include/net/udp.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/udp.h	2007-12-19 22:49:13.000000000 -0500
+@@ -160,6 +160,7 @@
+ };
+ 
+ struct udp_iter_state {
++	struct net 		*net;
+ 	sa_family_t		family;
+ 	struct hlist_head	*hashtable;
+ 	int			bucket;
+@@ -167,8 +168,8 @@
+ };
+ 
+ #ifdef CONFIG_PROC_FS
+-extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
+-extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
++extern int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo);
++extern void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo);
+ 
+ extern int  udp4_proc_init(void);
+ extern void udp4_proc_exit(void);
+diff -Nurb linux-2.6.22-try2/include/net/wext.h linux-2.6.22-try2-netns/include/net/wext.h
+--- linux-2.6.22-try2/include/net/wext.h	2007-12-19 13:37:54.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/wext.h	2007-12-19 22:49:13.000000000 -0500
+@@ -5,16 +5,23 @@
+  * wireless extensions interface to the core code
+  */
+ 
++struct net;
++
+ #ifdef CONFIG_WIRELESS_EXT
+-extern int wext_proc_init(void);
+-extern int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++extern int wext_proc_init(struct net *net);
++extern void wext_proc_exit(struct net *net);
++extern int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ 			     void __user *arg);
+ #else
+-static inline int wext_proc_init(void)
++static inline int wext_proc_init(struct net *net)
+ {
+ 	return 0;
+ }
+-static inline int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++static inline void wext_proc_exit(struct net *net)
++{
++	return;
++}
++static inline int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ 				    void __user *arg)
+ {
+ 	return -EINVAL;
+diff -Nurb linux-2.6.22-try2/include/net/xfrm.h linux-2.6.22-try2-netns/include/net/xfrm.h
+--- linux-2.6.22-try2/include/net/xfrm.h	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/include/net/xfrm.h	2007-12-19 22:49:13.000000000 -0500
+@@ -34,8 +34,6 @@
+ 	MODULE_ALIAS("xfrm-type-" __stringify(family) "-" __stringify(proto))
+ 
+ extern struct sock *xfrm_nl;
+-extern u32 sysctl_xfrm_aevent_etime;
+-extern u32 sysctl_xfrm_aevent_rseqth;
+ 
+ extern struct mutex xfrm_cfg_mutex;
+ 
+diff -Nurb linux-2.6.22-try2/kernel/audit.c linux-2.6.22-try2-netns/kernel/audit.c
+--- linux-2.6.22-try2/kernel/audit.c	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/kernel/audit.c	2007-12-19 22:49:13.000000000 -0500
+@@ -795,8 +795,8 @@
+ 
+ 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
+ 	       audit_default ? "enabled" : "disabled");
+-	audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive,
+-					   NULL, THIS_MODULE);
++	audit_sock = netlink_kernel_create(&init_net, NETLINK_AUDIT, 0,
++					   audit_receive, NULL, THIS_MODULE);
+ 	if (!audit_sock)
+ 		audit_panic("cannot initialize netlink socket");
+ 	else
+diff -Nurb linux-2.6.22-try2/kernel/nsproxy.c linux-2.6.22-try2-netns/kernel/nsproxy.c
+--- linux-2.6.22-try2/kernel/nsproxy.c	2007-12-19 21:24:51.000000000 -0500
++++ linux-2.6.22-try2-netns/kernel/nsproxy.c	2007-12-19 23:01:55.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/init_task.h>
+ #include <linux/mnt_namespace.h>
+ #include <linux/utsname.h>
++#include <net/net_namespace.h>
+ #include <linux/pid_namespace.h>
+ #include <linux/vserver/global.h>
+ #include <linux/vserver/debug.h>
+@@ -89,8 +90,17 @@
+ 	if (IS_ERR(new_nsp->user_ns))
+ 		goto out_user;
+ 
++	new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns);
++	if (IS_ERR(new_nsp->net_ns))
++		goto out_net;
++
+ 	return new_nsp;
+ 
++out_net:
++	if (new_nsp->user_ns)
++		put_user_ns(new_nsp->user_ns);
++	if (ns->net_ns)
++ 		put_net(ns->net_ns);
+ out_user:
+ 	if (new_nsp->pid_ns)
+ 		put_pid_ns(new_nsp->pid_ns);
+@@ -153,9 +163,15 @@
+ 
+ 	get_nsproxy(old_ns);
+ 
+-	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER)))
++	if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWNET)))
+ 		return 0;
+ 
++	 #ifndef CONFIG_NET_NS
++		if (unshare_flags & CLONE_NEWNET)
++			return -EINVAL;
++	 #endif
++
++
+ 	if (!capable(CAP_SYS_ADMIN)) {
+ 		err = -EPERM;
+ 		goto out;
+@@ -211,9 +227,13 @@
+ 		unshare_flags, current->nsproxy);
+ 
+ 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
+-			       CLONE_NEWUSER)))
++			       CLONE_NEWUSER | CLONE_NEWNET)))
+ 		return 0;
+ 
++#ifndef CONFIG_NET_NS
++	if (unshare_flags & CLONE_NEWNET)
++		return -EINVAL;
++#endif
+ 	if (!capable(CAP_SYS_ADMIN))
+ 		return -EPERM;
+ 
+diff -Nurb linux-2.6.22-try2/kernel/sysctl.c linux-2.6.22-try2-netns/kernel/sysctl.c
+--- linux-2.6.22-try2/kernel/sysctl.c	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/kernel/sysctl.c	2007-12-19 22:49:13.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <linux/acpi.h>
+ #include <linux/reboot.h>
+ #include <linux/fs.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+@@ -139,6 +140,10 @@
+ 			       void __user *buffer, size_t *lenp, loff_t *ppos);
+ #endif
+ 
++#ifdef CONFIG_NET
++static void sysctl_net_init(struct net *net);
++#endif
++
+ static ctl_table root_table[];
+ static struct ctl_table_header root_table_header =
+ 	{ root_table, LIST_HEAD_INIT(root_table_header.ctl_entry) };
+@@ -1151,6 +1156,11 @@
+ {
+ 	struct ctl_table_header *head;
+ 	struct list_head *tmp;
++	struct net *net = current->nsproxy->net_ns;
++
++	if (!net->net_table_header.ctl_table)
++		sysctl_net_init(net);
++
+ 	spin_lock(&sysctl_lock);
+ 	if (prev) {
+ 		tmp = &prev->ctl_entry;
+@@ -1168,6 +1178,10 @@
+ 	next:
+ 		tmp = tmp->next;
+ 		if (tmp == &root_table_header.ctl_entry)
++#ifdef CONFIG_NET
++			tmp = &net->net_table_header.ctl_entry;
++		else if (tmp == &net->net_table_header.ctl_entry)
++#endif
+ 			break;
+ 	}
+ 	spin_unlock(&sysctl_lock);
+@@ -1283,7 +1297,6 @@
+ 			void __user *newval, size_t newlen)
+ {
+ 	int op = 0, rc;
+-	size_t len;
+ 
+ 	if (oldval)
+ 		op |= 004;
+@@ -1304,25 +1317,10 @@
+ 	/* If there is no strategy routine, or if the strategy returns
+ 	 * zero, proceed with automatic r/w */
+ 	if (table->data && table->maxlen) {
+-		if (oldval && oldlenp) {
+-			if (get_user(len, oldlenp))
+-				return -EFAULT;
+-			if (len) {
+-				if (len > table->maxlen)
+-					len = table->maxlen;
+-				if(copy_to_user(oldval, table->data, len))
+-					return -EFAULT;
+-				if(put_user(len, oldlenp))
+-					return -EFAULT;
+-			}
+-		}
+-		if (newval && newlen) {
+-			len = newlen;
+-			if (len > table->maxlen)
+-				len = table->maxlen;
+-			if(copy_from_user(table->data, newval, len))
+-				return -EFAULT;
+-		}
++		rc = sysctl_data(table, name, nlen, oldval, oldlenp,
++				 newval, newlen);
++		if (rc < 0)
++			return rc;
+ 	}
+ 	return 0;
+ }
+@@ -1413,7 +1411,8 @@
+  * This routine returns %NULL on a failure to register, and a pointer
+  * to the table header on success.
+  */
+-struct ctl_table_header *register_sysctl_table(ctl_table * table)
++static struct ctl_table_header *__register_sysctl_table(
++	struct ctl_table_header *root, ctl_table * table)
+ {
+ 	struct ctl_table_header *tmp;
+ 	tmp = kmalloc(sizeof(struct ctl_table_header), GFP_KERNEL);
+@@ -1425,11 +1424,16 @@
+ 	tmp->unregistering = NULL;
+ 	sysctl_set_parent(NULL, table);
+ 	spin_lock(&sysctl_lock);
+-	list_add_tail(&tmp->ctl_entry, &root_table_header.ctl_entry);
++	list_add_tail(&tmp->ctl_entry, &root->ctl_entry);
+ 	spin_unlock(&sysctl_lock);
+ 	return tmp;
+ }
+ 
++struct ctl_table_header *register_sysctl_table(ctl_table *table)
++{
++	return __register_sysctl_table(&root_table_header, table);
++}
++
+ /**
+  * unregister_sysctl_table - unregister a sysctl table hierarchy
+  * @header: the header returned from register_sysctl_table
+@@ -1446,6 +1450,92 @@
+ 	kfree(header);
+ }
+ 
++#ifdef CONFIG_NET
++
++static void *fixup_table_addr(void *addr,
++			      const char *start, size_t size, const char *new)
++{
++	char *ptr = addr;
++	if ((ptr >= start) && (ptr < (start + size)))
++		ptr += new - start;
++	return ptr;
++}
++
++static void table_fixup(struct ctl_table *table,
++			const void *start, size_t size, const void *new)
++{
++	for (; table->ctl_name || table->procname; table++) {
++		table->data   = fixup_table_addr(table->data, start, size, new);
++		table->extra1 = fixup_table_addr(table->extra1, start, size, new);
++		table->extra2 = fixup_table_addr(table->extra2, start, size, new);
++
++		/* Whee recursive functions on the kernel stack */
++		if (table->child)
++			table_fixup(table->child, start, size, new);
++	}
++}
++
++static unsigned count_table_entries(struct ctl_table *table)
++{
++	unsigned entries = 0;
++	for (; table->ctl_name || table->procname; table++) {
++		entries += 1;
++
++		if (table->child)
++			entries += count_table_entries(table->child);
++	}
++	entries += 1; /* Null terminating entry */
++	return entries;
++}
++
++static struct ctl_table *copy_table_entries(
++	struct ctl_table *dest, struct ctl_table *src)
++{
++	struct ctl_table *table = dest;
++	for (; src->ctl_name || src->procname; src++) {
++		*dest++ = *table;
++	}
++	dest++; /* Null terminating entry */
++	for (; table->ctl_name || table->procname; table++) {
++		if (table->child)
++			dest = copy_table_entries(dest, table->child);
++	}
++	return dest;
++}
++
++static void sysctl_net_init(struct net *net)
++{
++	unsigned entries;
++	struct ctl_table *table;
++	
++	entries = count_table_entries(net_root_table);
++	table = kzalloc(GFP_KERNEL, sizeof(*table)*entries);
++	/* FIXME free table... */
++
++	copy_table_entries(table, net_root_table);
++	table_fixup(table, &init_net, sizeof(init_net), net);
++
++	net->net_table_header.ctl_table = table;
++	INIT_LIST_HEAD(&net->net_table_header.ctl_entry);
++}
++
++struct ctl_table_header *register_net_sysctl_table(struct net *net, struct ctl_table *table)
++{
++	if (!net->net_table_header.ctl_table)
++		sysctl_net_init(net);
++	table_fixup(table, &init_net, sizeof(init_net), net);
++	return __register_sysctl_table(&net->net_table_header, table);
++}
++EXPORT_SYMBOL_GPL(register_net_sysctl_table);
++
++void unregister_net_sysctl_table(struct ctl_table_header *header)
++{
++	return unregister_sysctl_table(header);
++}
++EXPORT_SYMBOL_GPL(unregister_net_sysctl_table);
++#endif
++
++
+ #else /* !CONFIG_SYSCTL */
+ struct ctl_table_header *register_sysctl_table(ctl_table * table)
+ {
+@@ -2221,6 +2311,40 @@
+  * General sysctl support routines 
+  */
+ 
++/* The generic sysctl data routine (used if no strategy routine supplied) */
++int sysctl_data(ctl_table *table, int __user *name, int nlen,
++		void __user *oldval, size_t __user *oldlenp,
++		void __user *newval, size_t newlen)
++{
++	size_t len;
++
++	/* Get out of I don't have a variable */
++	if (!table->data || !table->maxlen)
++		return -ENOTDIR;
++
++	if (oldval && oldlenp) {
++		if (get_user(len, oldlenp))
++			return -EFAULT;
++		if (len) {
++			if (len > table->maxlen)
++				len = table->maxlen;
++			if (copy_to_user(oldval, table->data, len))
++				return -EFAULT;
++			if (put_user(len, oldlenp))
++				return -EFAULT;
++		}
++	}
++
++	if (newval && newlen) {
++		if (newlen > table->maxlen)
++			newlen = table->maxlen;
++
++		if (copy_from_user(table->data, newval, newlen))
++			return -EFAULT;
++	}
++	return 1;
++}
++
+ /* The generic string strategy routine: */
+ int sysctl_string(ctl_table *table, int __user *name, int nlen,
+ 		  void __user *oldval, size_t __user *oldlenp,
+@@ -2409,6 +2533,13 @@
+ 	return -ENOSYS;
+ }
+ 
++int sysctl_data(ctl_table *table, int __user *name, int nlen,
++		  void __user *oldval, size_t __user *oldlenp,
++		  void __user *newval, size_t newlen)
++{
++	return -ENOSYS;
++}
++
+ int sysctl_string(ctl_table *table, int __user *name, int nlen,
+ 		  void __user *oldval, size_t __user *oldlenp,
+ 		  void __user *newval, size_t newlen)
+@@ -2456,4 +2587,5 @@
+ EXPORT_SYMBOL(sysctl_jiffies);
+ EXPORT_SYMBOL(sysctl_ms_jiffies);
+ EXPORT_SYMBOL(sysctl_string);
++EXPORT_SYMBOL(sysctl_data);
+ EXPORT_SYMBOL(unregister_sysctl_table);
+diff -Nurb linux-2.6.22-try2/lib/kobject.c linux-2.6.22-try2-netns/lib/kobject.c
+--- linux-2.6.22-try2/lib/kobject.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/lib/kobject.c	2007-12-19 22:49:18.000000000 -0500
+@@ -44,11 +44,11 @@
+ 	return error;
+ }
+ 
+-static int create_dir(struct kobject *kobj, struct sysfs_dirent *shadow_parent)
++static int create_dir(struct kobject * kobj)
+ {
+ 	int error = 0;
+ 	if (kobject_name(kobj)) {
+-		error = sysfs_create_dir(kobj, shadow_parent);
++		error = sysfs_create_dir(kobj);
+ 		if (!error) {
+ 			if ((error = populate_dir(kobj)))
+ 				sysfs_remove_dir(kobj);
+@@ -157,12 +157,11 @@
+ }
+ 
+ /**
+- *	kobject_shadow_add - add an object to the hierarchy.
++ *	kobject_add - add an object to the hierarchy.
+  *	@kobj:	object.
+- *	@shadow_parent: sysfs directory to add to.
+  */
+ 
+-int kobject_shadow_add(struct kobject *kobj, struct sysfs_dirent *shadow_parent)
++int kobject_add(struct kobject * kobj)
+ {
+ 	int error = 0;
+ 	struct kobject * parent;
+@@ -194,7 +193,7 @@
+ 		kobj->parent = parent;
+ 	}
+ 
+-	error = create_dir(kobj, shadow_parent);
++	error = create_dir(kobj);
+ 	if (error) {
+ 		/* unlink does the kobject_put() for us */
+ 		unlink(kobj);
+@@ -216,16 +215,6 @@
+ }
+ 
+ /**
+- *	kobject_add - add an object to the hierarchy.
+- *	@kobj:	object.
+- */
+-int kobject_add(struct kobject * kobj)
+-{
+-	return kobject_shadow_add(kobj, NULL);
+-}
+-
+-
+-/**
+  *	kobject_register - initialize and add an object.
+  *	@kobj:	object in question.
+  */
+@@ -338,7 +327,7 @@
+ 	/* Note : if we want to send the new name alone, not the full path,
+ 	 * we could probably use kobject_name(kobj); */
+ 
+-	error = sysfs_rename_dir(kobj, kobj->parent->sd, new_name);
++	error = sysfs_rename_dir(kobj, new_name);
+ 
+ 	/* This function is mostly/only used for network interface.
+ 	 * Some hotplug package track interfaces by their name and
+@@ -355,27 +344,6 @@
+ }
+ 
+ /**
+- *	kobject_rename - change the name of an object
+- *	@kobj:	object in question.
+- *	@new_parent: object's new parent
+- *	@new_name: object's new name
+- */
+-
+-int kobject_shadow_rename(struct kobject *kobj,
+-			  struct sysfs_dirent *new_parent, const char *new_name)
+-{
+-	int error = 0;
+-
+-	kobj = kobject_get(kobj);
+-	if (!kobj)
+-		return -EINVAL;
+-	error = sysfs_rename_dir(kobj, new_parent, new_name);
+-	kobject_put(kobj);
+-
+-	return error;
+-}
+-
+-/**
+  *	kobject_move - move object to another parent
+  *	@kobj:	object in question.
+  *	@new_parent: object's new parent (can be NULL)
+diff -Nurb linux-2.6.22-try2/lib/kobject_uevent.c linux-2.6.22-try2-netns/lib/kobject_uevent.c
+--- linux-2.6.22-try2/lib/kobject_uevent.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/lib/kobject_uevent.c	2007-12-19 22:49:18.000000000 -0500
+@@ -290,9 +290,8 @@
+ #if defined(CONFIG_NET)
+ static int __init kobject_uevent_init(void)
+ {
+-	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
+-					    NULL, THIS_MODULE);
+-
++	uevent_sock = netlink_kernel_create(&init_net, NETLINK_KOBJECT_UEVENT,
++					    1, NULL, NULL, THIS_MODULE);
+ 	if (!uevent_sock) {
+ 		printk(KERN_ERR
+ 		       "kobject_uevent: unable to create netlink socket!\n");
+diff -Nurb linux-2.6.22-try2/net/802/tr.c linux-2.6.22-try2-netns/net/802/tr.c
+--- linux-2.6.22-try2/net/802/tr.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/802/tr.c	2007-12-19 22:49:18.000000000 -0500
+@@ -36,6 +36,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/init.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
+ static void rif_check_expire(unsigned long dummy);
+@@ -532,7 +533,7 @@
+ 		seq_puts(seq,
+ 		     "if     TR address       TTL   rcf   routing segments\n");
+ 	else {
+-		struct net_device *dev = dev_get_by_index(entry->iface);
++		struct net_device *dev = dev_get_by_index(&init_net, entry->iface);
+ 		long ttl = (long) (entry->last_used + sysctl_tr_rif_timeout)
+ 				- (long) jiffies;
+ 
+@@ -639,7 +640,7 @@
+ 	rif_timer.function = rif_check_expire;
+ 	add_timer(&rif_timer);
+ 
+-	proc_net_fops_create("tr_rif", S_IRUGO, &rif_seq_fops);
++	proc_net_fops_create(&init_net, "tr_rif", S_IRUGO, &rif_seq_fops);
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/8021q/vlan.c linux-2.6.22-try2-netns/net/8021q/vlan.c
+--- linux-2.6.22-try2/net/8021q/vlan.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/8021q/vlan.c	2007-12-19 22:49:18.000000000 -0500
+@@ -31,6 +31,7 @@
+ #include <net/arp.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/notifier.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/if_vlan.h>
+ #include "vlan.h"
+@@ -50,7 +51,7 @@
+ static char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
+ 
+ static int vlan_device_event(struct notifier_block *, unsigned long, void *);
+-static int vlan_ioctl_handler(void __user *);
++static int vlan_ioctl_handler(struct net *net, void __user *);
+ static int unregister_vlan_dev(struct net_device *, unsigned short );
+ 
+ static struct notifier_block vlan_notifier_block = {
+@@ -124,7 +125,7 @@
+ 	struct net_device *dev, *nxt;
+ 
+ 	rtnl_lock();
+-	for_each_netdev_safe(dev, nxt) {
++	for_each_netdev_safe(&init_net, dev, nxt) {
+ 		if (dev->priv_flags & IFF_802_1Q_VLAN) {
+ 			unregister_vlan_dev(VLAN_DEV_INFO(dev)->real_dev,
+ 					    VLAN_DEV_INFO(dev)->vlan_id);
+@@ -599,6 +600,9 @@
+ 	int i, flgs;
+ 	struct net_device *vlandev;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (!grp)
+ 		goto out;
+ 
+@@ -678,7 +682,7 @@
+  *	o execute requested action or pass command to the device driver
+  *   arg is really a struct vlan_ioctl_args __user *.
+  */
+-static int vlan_ioctl_handler(void __user *arg)
++static int vlan_ioctl_handler(struct net *net, void __user *arg)
+ {
+ 	int err;
+ 	unsigned short vid = 0;
+@@ -707,7 +711,7 @@
+ 	case GET_VLAN_REALDEV_NAME_CMD:
+ 	case GET_VLAN_VID_CMD:
+ 		err = -ENODEV;
+-		dev = __dev_get_by_name(args.device1);
++		dev = __dev_get_by_name(&init_net, args.device1);
+ 		if (!dev)
+ 			goto out;
+ 
+diff -Nurb linux-2.6.22-try2/net/8021q/vlan_dev.c linux-2.6.22-try2-netns/net/8021q/vlan_dev.c
+--- linux-2.6.22-try2/net/8021q/vlan_dev.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/8021q/vlan_dev.c	2007-12-19 22:49:18.000000000 -0500
+@@ -132,6 +132,11 @@
+ 
+ 	vhdr = (struct vlan_hdr *)(skb->data);
+ 
++	if (dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	/* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */
+ 	vlan_TCI = ntohs(vhdr->h_vlan_TCI);
+ 
+@@ -776,7 +781,7 @@
+ 		break;
+ 
+ 	case SIOCETHTOOL:
+-		err = dev_ethtool(&ifrr);
++		err = dev_ethtool(real_dev->nd_net, &ifrr);
+ 	}
+ 
+ 	if (!err)
+diff -Nurb linux-2.6.22-try2/net/8021q/vlan_netlink.c linux-2.6.22-try2-netns/net/8021q/vlan_netlink.c
+--- linux-2.6.22-try2/net/8021q/vlan_netlink.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/8021q/vlan_netlink.c	2007-12-19 22:49:18.000000000 -0500
+@@ -11,6 +11,7 @@
+ #include <linux/kernel.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_vlan.h>
++#include <net/net_namespace.h>
+ #include <net/netlink.h>
+ #include <net/rtnetlink.h>
+ #include "vlan.h"
+@@ -105,7 +106,7 @@
+ 
+ 	if (!tb[IFLA_LINK])
+ 		return -EINVAL;
+-	real_dev = __dev_get_by_index(nla_get_u32(tb[IFLA_LINK]));
++	real_dev = __dev_get_by_index(&init_net, nla_get_u32(tb[IFLA_LINK]));
+ 	if (!real_dev)
+ 		return -ENODEV;
+ 
+diff -Nurb linux-2.6.22-try2/net/8021q/vlanproc.c linux-2.6.22-try2-netns/net/8021q/vlanproc.c
+--- linux-2.6.22-try2/net/8021q/vlanproc.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/8021q/vlanproc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/fs.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_vlan.h>
++#include <net/net_namespace.h>
+ #include "vlanproc.h"
+ #include "vlan.h"
+ 
+@@ -143,7 +144,7 @@
+ 		remove_proc_entry(name_conf, proc_vlan_dir);
+ 
+ 	if (proc_vlan_dir)
+-		proc_net_remove(name_root);
++		proc_net_remove(&init_net, name_root);
+ 
+ 	/* Dynamically added entries should be cleaned up as their vlan_device
+ 	 * is removed, so we should not have to take care of it here...
+@@ -156,7 +157,7 @@
+ 
+ int __init vlan_proc_init(void)
+ {
+-	proc_vlan_dir = proc_mkdir(name_root, proc_net);
++	proc_vlan_dir = proc_mkdir(name_root, init_net.proc_net);
+ 	if (proc_vlan_dir) {
+ 		proc_vlan_conf = create_proc_entry(name_conf,
+ 						   S_IFREG|S_IRUSR|S_IWUSR,
+@@ -253,7 +254,7 @@
+ 	if (*pos == 0)
+ 		return SEQ_START_TOKEN;
+ 
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (!is_vlan_dev(dev))
+ 			continue;
+ 
+@@ -272,9 +273,9 @@
+ 
+ 	dev = (struct net_device *)v;
+ 	if (v == SEQ_START_TOKEN)
+-		dev = net_device_entry(&dev_base_head);
++		dev = net_device_entry(&init_net.dev_base_head);
+ 
+-	for_each_netdev_continue(dev) {
++	for_each_netdev_continue(&init_net, dev) {
+ 		if (!is_vlan_dev(dev))
+ 			continue;
+ 
+diff -Nurb linux-2.6.22-try2/net/Kconfig linux-2.6.22-try2-netns/net/Kconfig
+--- linux-2.6.22-try2/net/Kconfig	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/Kconfig	2007-12-19 22:49:18.000000000 -0500
+@@ -27,6 +27,13 @@
+ 
+ menu "Networking options"
+ 
++config NET_NS
++	bool "Network namespace support"
++	depends on EXPERIMENTAL
++	help
++	  Support what appear to user space as multiple instances of the 
++ 	  network stack.
++
+ source "net/packet/Kconfig"
+ source "net/unix/Kconfig"
+ source "net/xfrm/Kconfig"
+diff -Nurb linux-2.6.22-try2/net/appletalk/aarp.c linux-2.6.22-try2-netns/net/appletalk/aarp.c
+--- linux-2.6.22-try2/net/appletalk/aarp.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/appletalk/aarp.c	2007-12-19 22:49:18.000000000 -0500
+@@ -330,15 +330,19 @@
+ static int aarp_device_event(struct notifier_block *this, unsigned long event,
+ 			     void *ptr)
+ {
++	struct net_device *dev = ptr;
+ 	int ct;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_DOWN) {
+ 		write_lock_bh(&aarp_lock);
+ 
+ 		for (ct = 0; ct < AARP_HASH_SIZE; ct++) {
+-			__aarp_expire_device(&resolved[ct], ptr);
+-			__aarp_expire_device(&unresolved[ct], ptr);
+-			__aarp_expire_device(&proxies[ct], ptr);
++			__aarp_expire_device(&resolved[ct], dev);
++			__aarp_expire_device(&unresolved[ct], dev);
++			__aarp_expire_device(&proxies[ct], dev);
+ 		}
+ 
+ 		write_unlock_bh(&aarp_lock);
+@@ -712,6 +716,9 @@
+ 	struct atalk_addr sa, *ma, da;
+ 	struct atalk_iface *ifa;
+ 
++	if (dev->nd_net != &init_net)
++		goto out0;
++
+ 	/* We only do Ethernet SNAP AARP. */
+ 	if (dev->type != ARPHRD_ETHER)
+ 		goto out0;
+diff -Nurb linux-2.6.22-try2/net/appletalk/atalk_proc.c linux-2.6.22-try2-netns/net/appletalk/atalk_proc.c
+--- linux-2.6.22-try2/net/appletalk/atalk_proc.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/appletalk/atalk_proc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -13,6 +13,7 @@
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
+ #include <linux/atalk.h>
++#include <net/net_namespace.h>
+ 
+ 
+ static __inline__ struct atalk_iface *atalk_get_interface_idx(loff_t pos)
+@@ -271,7 +272,7 @@
+ 	struct proc_dir_entry *p;
+ 	int rc = -ENOMEM;
+ 
+-	atalk_proc_dir = proc_mkdir("atalk", proc_net);
++	atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net);
+ 	if (!atalk_proc_dir)
+ 		goto out;
+ 	atalk_proc_dir->owner = THIS_MODULE;
+@@ -306,7 +307,7 @@
+ out_route:
+ 	remove_proc_entry("interface", atalk_proc_dir);
+ out_interface:
+-	remove_proc_entry("atalk", proc_net);
++	remove_proc_entry("atalk", init_net.proc_net);
+ 	goto out;
+ }
+ 
+@@ -316,5 +317,5 @@
+ 	remove_proc_entry("route", atalk_proc_dir);
+ 	remove_proc_entry("socket", atalk_proc_dir);
+ 	remove_proc_entry("arp", atalk_proc_dir);
+-	remove_proc_entry("atalk", proc_net);
++	remove_proc_entry("atalk", init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-try2/net/appletalk/ddp.c linux-2.6.22-try2-netns/net/appletalk/ddp.c
+--- linux-2.6.22-try2/net/appletalk/ddp.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/appletalk/ddp.c	2007-12-19 22:49:18.000000000 -0500
+@@ -647,9 +647,14 @@
+ static int ddp_device_event(struct notifier_block *this, unsigned long event,
+ 			    void *ptr)
+ {
++	struct net_device *dev = ptr;
++
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_DOWN)
+ 		/* Discard any use of this */
+-		atalk_dev_down(ptr);
++		atalk_dev_down(dev);
+ 
+ 	return NOTIFY_DONE;
+ }
+@@ -672,7 +677,7 @@
+ 	if (copy_from_user(&atreq, arg, sizeof(atreq)))
+ 		return -EFAULT;
+ 
+-	dev = __dev_get_by_name(atreq.ifr_name);
++	dev = __dev_get_by_name(&init_net, atreq.ifr_name);
+ 	if (!dev)
+ 		return -ENODEV;
+ 
+@@ -896,7 +901,7 @@
+ 				if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
+ 					return -EFAULT;
+ 				name[IFNAMSIZ-1] = '\0';
+-				dev = __dev_get_by_name(name);
++				dev = __dev_get_by_name(&init_net, name);
+ 				if (!dev)
+ 					return -ENODEV;
+ 			}
+@@ -1024,11 +1029,14 @@
+  * Create a socket. Initialise the socket, blank the addresses
+  * set the state.
+  */
+-static int atalk_create(struct socket *sock, int protocol)
++static int atalk_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	int rc = -ESOCKTNOSUPPORT;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	/*
+ 	 * We permit SOCK_DGRAM and RAW is an extension. It is trivial to do
+ 	 * and gives you the full ELAP frame. Should be handy for CAP 8)
+@@ -1036,7 +1044,7 @@
+ 	if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
+ 		goto out;
+ 	rc = -ENOMEM;
+-	sk = sk_alloc(PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1);
++	sk = sk_alloc(net, PF_APPLETALK, GFP_KERNEL, &ddp_proto, 1);
+ 	if (!sk)
+ 		goto out;
+ 	rc = 0;
+@@ -1265,7 +1273,7 @@
+ 
+ static int handle_ip_over_ddp(struct sk_buff *skb)
+ {
+-	struct net_device *dev = __dev_get_by_name("ipddp0");
++	struct net_device *dev = __dev_get_by_name(&init_net, "ipddp0");
+ 	struct net_device_stats *stats;
+ 
+ 	/* This needs to be able to handle ipddp"N" devices */
+@@ -1398,6 +1406,9 @@
+ 	int origlen;
+ 	__u16 len_hops;
+ 
++	if (dev->nd_net != &init_net)
++		goto freeit;
++
+ 	/* Don't mangle buffer if shared */
+ 	if (!(skb = skb_share_check(skb, GFP_ATOMIC)))
+ 		goto out;
+@@ -1483,6 +1494,9 @@
+ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev,
+ 		     struct packet_type *pt, struct net_device *orig_dev)
+ {
++	if (dev->nd_net != &init_net)
++		goto freeit;
++
+ 	/* Expand any short form frames */
+ 	if (skb_mac_header(skb)[2] == 1) {
+ 		struct ddpehdr *ddp;
+diff -Nurb linux-2.6.22-try2/net/atm/clip.c linux-2.6.22-try2-netns/net/atm/clip.c
+--- linux-2.6.22-try2/net/atm/clip.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/clip.c	2007-12-19 22:49:18.000000000 -0500
+@@ -293,7 +293,7 @@
+ 	struct neigh_parms *parms;
+ 
+ 	DPRINTK("clip_constructor (neigh %p, entry %p)\n", neigh, entry);
+-	neigh->type = inet_addr_type(entry->ip);
++	neigh->type = inet_addr_type(&init_net, entry->ip);
+ 	if (neigh->type != RTN_UNICAST)
+ 		return -EINVAL;
+ 
+@@ -525,7 +525,10 @@
+ 	struct atmarp_entry *entry;
+ 	int error;
+ 	struct clip_vcc *clip_vcc;
+-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} };
++	struct flowi fl = { 
++		.fl_net = &init_net,
++		.nl_u = { .ip4_u = { .daddr = ip, .tos = 1}} 
++	};
+ 	struct rtable *rt;
+ 
+ 	if (vcc->push != clip_push) {
+@@ -620,6 +623,9 @@
+ {
+ 	struct net_device *dev = arg;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_UNREGISTER) {
+ 		neigh_ifdown(&clip_tbl, dev);
+ 		return NOTIFY_DONE;
+@@ -954,6 +960,7 @@
+ 
+ 	seq = file->private_data;
+ 	seq->private = state;
++	state->ns.net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ 
+@@ -962,11 +969,19 @@
+ 	goto out;
+ }
+ 
++static int arp_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct clip_seq_state *state = seq->private;
++	put_net(state->ns.net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations arp_seq_fops = {
+ 	.open		= arp_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++	.release	= arp_seq_release,
+ 	.owner		= THIS_MODULE
+ };
+ #endif
+diff -Nurb linux-2.6.22-try2/net/atm/common.c linux-2.6.22-try2-netns/net/atm/common.c
+--- linux-2.6.22-try2/net/atm/common.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/common.c	2007-12-19 22:49:18.000000000 -0500
+@@ -132,7 +132,7 @@
+ 	.obj_size = sizeof(struct atm_vcc),
+ };
+ 
+-int vcc_create(struct socket *sock, int protocol, int family)
++int vcc_create(struct net *net, struct socket *sock, int protocol, int family)
+ {
+ 	struct sock *sk;
+ 	struct atm_vcc *vcc;
+@@ -140,7 +140,7 @@
+ 	sock->sk = NULL;
+ 	if (sock->type == SOCK_STREAM)
+ 		return -EINVAL;
+-	sk = sk_alloc(family, GFP_KERNEL, &vcc_proto, 1);
++	sk = sk_alloc(net, family, GFP_KERNEL, &vcc_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 	sock_init_data(sock, sk);
+diff -Nurb linux-2.6.22-try2/net/atm/common.h linux-2.6.22-try2-netns/net/atm/common.h
+--- linux-2.6.22-try2/net/atm/common.h	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/common.h	2007-12-19 22:49:18.000000000 -0500
+@@ -10,7 +10,7 @@
+ #include <linux/poll.h> /* for poll_table */
+ 
+ 
+-int vcc_create(struct socket *sock, int protocol, int family);
++int vcc_create(struct net *net, struct socket *sock, int protocol, int family);
+ int vcc_release(struct socket *sock);
+ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
+ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+diff -Nurb linux-2.6.22-try2/net/atm/mpc.c linux-2.6.22-try2-netns/net/atm/mpc.c
+--- linux-2.6.22-try2/net/atm/mpc.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/mpc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -244,7 +244,7 @@
+ 	char name[IFNAMSIZ];
+ 
+ 	sprintf(name, "lec%d", itf);
+-	dev = dev_get_by_name(name);
++	dev = dev_get_by_name(&init_net, name);
+ 
+ 	return dev;
+ }
+@@ -956,6 +956,10 @@
+ 	struct lec_priv *priv;
+ 
+ 	dev = (struct net_device *)dev_ptr;
++
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (dev->name == NULL || strncmp(dev->name, "lec", 3))
+ 		return NOTIFY_DONE; /* we are only interested in lec:s */
+ 
+diff -Nurb linux-2.6.22-try2/net/atm/proc.c linux-2.6.22-try2-netns/net/atm/proc.c
+--- linux-2.6.22-try2/net/atm/proc.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/proc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/atmclip.h>
+ #include <linux/init.h> /* for __init */
++#include <net/net_namespace.h>
+ #include <net/atmclip.h>
+ #include <asm/uaccess.h>
+ #include <asm/atomic.h>
+@@ -475,7 +476,7 @@
+ 		if (e->dirent)
+ 			remove_proc_entry(e->name, atm_proc_root);
+ 	}
+-	remove_proc_entry("net/atm", NULL);
++	remove_proc_entry("atm", init_net.proc_net);
+ }
+ 
+ int __init atm_proc_init(void)
+@@ -483,7 +484,7 @@
+ 	static struct atm_proc_entry *e;
+ 	int ret;
+ 
+-	atm_proc_root = proc_mkdir("net/atm",NULL);
++	atm_proc_root = proc_mkdir("atm", init_net.proc_net);
+ 	if (!atm_proc_root)
+ 		goto err_out;
+ 	for (e = atm_proc_ents; e->name; e++) {
+diff -Nurb linux-2.6.22-try2/net/atm/pvc.c linux-2.6.22-try2-netns/net/atm/pvc.c
+--- linux-2.6.22-try2/net/atm/pvc.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/pvc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -124,10 +124,13 @@
+ };
+ 
+ 
+-static int pvc_create(struct socket *sock,int protocol)
++static int pvc_create(struct net *net, struct socket *sock,int protocol)
+ {
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	sock->ops = &pvc_proto_ops;
+-	return vcc_create(sock, protocol, PF_ATMPVC);
++	return vcc_create(net, sock, protocol, PF_ATMPVC);
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-try2/net/atm/svc.c linux-2.6.22-try2-netns/net/atm/svc.c
+--- linux-2.6.22-try2/net/atm/svc.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/atm/svc.c	2007-12-19 22:49:18.000000000 -0500
+@@ -33,7 +33,7 @@
+ #endif
+ 
+ 
+-static int svc_create(struct socket *sock,int protocol);
++static int svc_create(struct net *net, struct socket *sock,int protocol);
+ 
+ 
+ /*
+@@ -335,7 +335,7 @@
+ 
+ 	lock_sock(sk);
+ 
+-	error = svc_create(newsock,0);
++	error = svc_create(sk->sk_net, newsock,0);
+ 	if (error)
+ 		goto out;
+ 
+@@ -636,12 +636,15 @@
+ };
+ 
+ 
+-static int svc_create(struct socket *sock,int protocol)
++static int svc_create(struct net *net, struct socket *sock,int protocol)
+ {
+ 	int error;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	sock->ops = &svc_proto_ops;
+-	error = vcc_create(sock, protocol, AF_ATMSVC);
++	error = vcc_create(net, sock, protocol, AF_ATMSVC);
+ 	if (error) return error;
+ 	ATM_SD(sock)->local.sas_family = AF_ATMSVC;
+ 	ATM_SD(sock)->remote.sas_family = AF_ATMSVC;
+diff -Nurb linux-2.6.22-try2/net/ax25/af_ax25.c linux-2.6.22-try2-netns/net/ax25/af_ax25.c
+--- linux-2.6.22-try2/net/ax25/af_ax25.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ax25/af_ax25.c	2007-12-19 22:49:18.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <net/tcp_states.h>
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ 
+ 
+@@ -103,6 +104,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *)ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* Reject non AX.25 devices */
+ 	if (dev->type != ARPHRD_AX25)
+ 		return NOTIFY_DONE;
+@@ -627,7 +631,7 @@
+ 			break;
+ 		}
+ 
+-		dev = dev_get_by_name(devname);
++		dev = dev_get_by_name(&init_net, devname);
+ 		if (dev == NULL) {
+ 			res = -ENODEV;
+ 			break;
+@@ -779,11 +783,14 @@
+ 	.obj_size = sizeof(struct sock),
+ };
+ 
+-static int ax25_create(struct socket *sock, int protocol)
++static int ax25_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	ax25_cb *ax25;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	switch (sock->type) {
+ 	case SOCK_DGRAM:
+ 		if (protocol == 0 || protocol == PF_AX25)
+@@ -829,7 +836,7 @@
+ 		return -ESOCKTNOSUPPORT;
+ 	}
+ 
+-	if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL)
++	if ((sk = sk_alloc(net, PF_AX25, GFP_ATOMIC, &ax25_proto, 1)) == NULL)
+ 		return -ENOMEM;
+ 
+ 	ax25 = sk->sk_protinfo = ax25_create_cb();
+@@ -854,7 +861,7 @@
+ 	struct sock *sk;
+ 	ax25_cb *ax25, *oax25;
+ 
+-	if ((sk = sk_alloc(PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
++	if ((sk = sk_alloc(osk->sk_net, PF_AX25, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+ 		return NULL;
+ 
+ 	if ((ax25 = ax25_create_cb()) == NULL) {
+@@ -1998,9 +2005,9 @@
+ 	register_netdevice_notifier(&ax25_dev_notifier);
+ 	ax25_register_sysctl();
+ 
+-	proc_net_fops_create("ax25_route", S_IRUGO, &ax25_route_fops);
+-	proc_net_fops_create("ax25", S_IRUGO, &ax25_info_fops);
+-	proc_net_fops_create("ax25_calls", S_IRUGO, &ax25_uid_fops);
++	proc_net_fops_create(&init_net, "ax25_route", S_IRUGO, &ax25_route_fops);
++	proc_net_fops_create(&init_net, "ax25", S_IRUGO, &ax25_info_fops);
++	proc_net_fops_create(&init_net, "ax25_calls", S_IRUGO, &ax25_uid_fops);
+ out:
+ 	return rc;
+ }
+@@ -2014,9 +2021,9 @@
+ 
+ static void __exit ax25_exit(void)
+ {
+-	proc_net_remove("ax25_route");
+-	proc_net_remove("ax25");
+-	proc_net_remove("ax25_calls");
++	proc_net_remove(&init_net, "ax25_route");
++	proc_net_remove(&init_net, "ax25");
++	proc_net_remove(&init_net, "ax25_calls");
+ 	ax25_rt_free();
+ 	ax25_uid_free();
+ 	ax25_dev_free();
+diff -Nurb linux-2.6.22-try2/net/ax25/ax25_in.c linux-2.6.22-try2-netns/net/ax25/ax25_in.c
+--- linux-2.6.22-try2/net/ax25/ax25_in.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ax25/ax25_in.c	2007-12-19 22:49:18.000000000 -0500
+@@ -451,6 +451,11 @@
+ 	skb->sk = NULL;		/* Initially we don't know who it's for */
+ 	skb->destructor = NULL;	/* Who initializes this, dammit?! */
+ 
++	if (dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	if ((*skb->data & 0x0F) != 0) {
+ 		kfree_skb(skb);	/* Not a KISS data frame */
+ 		return 0;
+diff -Nurb linux-2.6.22-try2/net/bluetooth/af_bluetooth.c linux-2.6.22-try2-netns/net/bluetooth/af_bluetooth.c
+--- linux-2.6.22-try2/net/bluetooth/af_bluetooth.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/af_bluetooth.c	2007-12-19 22:49:18.000000000 -0500
+@@ -95,10 +95,13 @@
+ }
+ EXPORT_SYMBOL(bt_sock_unregister);
+ 
+-static int bt_sock_create(struct socket *sock, int proto)
++static int bt_sock_create(struct net *net, struct socket *sock, int proto)
+ {
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (proto < 0 || proto >= BT_MAX_PROTO)
+ 		return -EINVAL;
+ 
+@@ -113,7 +116,7 @@
+ 	read_lock(&bt_proto_lock);
+ 
+ 	if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) {
+-		err = bt_proto[proto]->create(sock, proto);
++		err = bt_proto[proto]->create(net, sock, proto);
+ 		module_put(bt_proto[proto]->owner);
+ 	}
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/bnep/sock.c linux-2.6.22-try2-netns/net/bluetooth/bnep/sock.c
+--- linux-2.6.22-try2/net/bluetooth/bnep/sock.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/bnep/sock.c	2007-12-19 22:49:18.000000000 -0500
+@@ -204,7 +204,7 @@
+ 	.obj_size	= sizeof(struct bt_sock)
+ };
+ 
+-static int bnep_sock_create(struct socket *sock, int protocol)
++static int bnep_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -213,7 +213,7 @@
+ 	if (sock->type != SOCK_RAW)
+ 		return -ESOCKTNOSUPPORT;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &bnep_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/cmtp/sock.c linux-2.6.22-try2-netns/net/bluetooth/cmtp/sock.c
+--- linux-2.6.22-try2/net/bluetooth/cmtp/sock.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/cmtp/sock.c	2007-12-19 22:49:18.000000000 -0500
+@@ -195,7 +195,7 @@
+ 	.obj_size	= sizeof(struct bt_sock)
+ };
+ 
+-static int cmtp_sock_create(struct socket *sock, int protocol)
++static int cmtp_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -204,7 +204,7 @@
+ 	if (sock->type != SOCK_RAW)
+ 		return -ESOCKTNOSUPPORT;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &cmtp_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/hci_sock.c linux-2.6.22-try2-netns/net/bluetooth/hci_sock.c
+--- linux-2.6.22-try2/net/bluetooth/hci_sock.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/hci_sock.c	2007-12-19 22:49:18.000000000 -0500
+@@ -618,7 +618,7 @@
+ 	.obj_size	= sizeof(struct hci_pinfo)
+ };
+ 
+-static int hci_sock_create(struct socket *sock, int protocol)
++static int hci_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -629,7 +629,7 @@
+ 
+ 	sock->ops = &hci_sock_ops;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hci_sk_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/hidp/sock.c linux-2.6.22-try2-netns/net/bluetooth/hidp/sock.c
+--- linux-2.6.22-try2/net/bluetooth/hidp/sock.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/hidp/sock.c	2007-12-19 22:49:18.000000000 -0500
+@@ -246,7 +246,7 @@
+ 	.obj_size	= sizeof(struct bt_sock)
+ };
+ 
+-static int hidp_sock_create(struct socket *sock, int protocol)
++static int hidp_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -255,7 +255,7 @@
+ 	if (sock->type != SOCK_RAW)
+ 		return -ESOCKTNOSUPPORT;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, GFP_ATOMIC, &hidp_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/l2cap.c linux-2.6.22-try2-netns/net/bluetooth/l2cap.c
+--- linux-2.6.22-try2/net/bluetooth/l2cap.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/l2cap.c	2007-12-19 22:49:18.000000000 -0500
+@@ -518,11 +518,11 @@
+ 	.obj_size	= sizeof(struct l2cap_pinfo)
+ };
+ 
+-static struct sock *l2cap_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ 	struct sock *sk;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, prio, &l2cap_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, prio, &l2cap_proto, 1);
+ 	if (!sk)
+ 		return NULL;
+ 
+@@ -543,7 +543,7 @@
+ 	return sk;
+ }
+ 
+-static int l2cap_sock_create(struct socket *sock, int protocol)
++static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -560,7 +560,7 @@
+ 
+ 	sock->ops = &l2cap_sock_ops;
+ 
+-	sk = l2cap_sock_alloc(sock, protocol, GFP_ATOMIC);
++	sk = l2cap_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+@@ -1425,7 +1425,7 @@
+ 		goto response;
+ 	}
+ 
+-	sk = l2cap_sock_alloc(NULL, BTPROTO_L2CAP, GFP_ATOMIC);
++	sk = l2cap_sock_alloc(parent->sk_net, NULL, BTPROTO_L2CAP, GFP_ATOMIC);
+ 	if (!sk)
+ 		goto response;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/rfcomm/sock.c linux-2.6.22-try2-netns/net/bluetooth/rfcomm/sock.c
+--- linux-2.6.22-try2/net/bluetooth/rfcomm/sock.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/rfcomm/sock.c	2007-12-19 22:49:18.000000000 -0500
+@@ -282,12 +282,12 @@
+ 	.obj_size	= sizeof(struct rfcomm_pinfo)
+ };
+ 
+-static struct sock *rfcomm_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ 	struct rfcomm_dlc *d;
+ 	struct sock *sk;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, prio, &rfcomm_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, prio, &rfcomm_proto, 1);
+ 	if (!sk)
+ 		return NULL;
+ 
+@@ -323,7 +323,7 @@
+ 	return sk;
+ }
+ 
+-static int rfcomm_sock_create(struct socket *sock, int protocol)
++static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -336,7 +336,7 @@
+ 
+ 	sock->ops = &rfcomm_sock_ops;
+ 
+-	sk = rfcomm_sock_alloc(sock, protocol, GFP_ATOMIC);
++	sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+@@ -868,7 +868,7 @@
+ 		goto done;
+ 	}
+ 
+-	sk = rfcomm_sock_alloc(NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
++	sk = rfcomm_sock_alloc(parent->sk_net, NULL, BTPROTO_RFCOMM, GFP_ATOMIC);
+ 	if (!sk)
+ 		goto done;
+ 
+diff -Nurb linux-2.6.22-try2/net/bluetooth/sco.c linux-2.6.22-try2-netns/net/bluetooth/sco.c
+--- linux-2.6.22-try2/net/bluetooth/sco.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bluetooth/sco.c	2007-12-19 22:49:18.000000000 -0500
+@@ -414,11 +414,11 @@
+ 	.obj_size	= sizeof(struct sco_pinfo)
+ };
+ 
+-static struct sock *sco_sock_alloc(struct socket *sock, int proto, gfp_t prio)
++static struct sock *sco_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio)
+ {
+ 	struct sock *sk;
+ 
+-	sk = sk_alloc(PF_BLUETOOTH, prio, &sco_proto, 1);
++	sk = sk_alloc(net, PF_BLUETOOTH, prio, &sco_proto, 1);
+ 	if (!sk)
+ 		return NULL;
+ 
+@@ -439,7 +439,7 @@
+ 	return sk;
+ }
+ 
+-static int sco_sock_create(struct socket *sock, int protocol)
++static int sco_sock_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
+@@ -452,7 +452,7 @@
+ 
+ 	sock->ops = &sco_sock_ops;
+ 
+-	sk = sco_sock_alloc(sock, protocol, GFP_ATOMIC);
++	sk = sco_sock_alloc(net, sock, protocol, GFP_ATOMIC);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+@@ -807,7 +807,7 @@
+ 
+ 		bh_lock_sock(parent);
+ 
+-		sk = sco_sock_alloc(NULL, BTPROTO_SCO, GFP_ATOMIC);
++		sk = sco_sock_alloc(parent->sk_net, NULL, BTPROTO_SCO, GFP_ATOMIC);
+ 		if (!sk) {
+ 			bh_unlock_sock(parent);
+ 			goto done;
+diff -Nurb linux-2.6.22-try2/net/bridge/br_if.c linux-2.6.22-try2-netns/net/bridge/br_if.c
+--- linux-2.6.22-try2/net/bridge/br_if.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_if.c	2007-12-19 22:49:18.000000000 -0500
+@@ -45,7 +45,7 @@
+ 
+ 	old_fs = get_fs();
+ 	set_fs(KERNEL_DS);
+-	err = dev_ethtool(&ifr);
++	err = dev_ethtool(dev->nd_net, &ifr);
+ 	set_fs(old_fs);
+ 
+ 	if (!err) {
+@@ -314,7 +314,7 @@
+ 	int ret = 0;
+ 
+ 	rtnl_lock();
+-	dev = __dev_get_by_name(name);
++	dev = __dev_get_by_name(&init_net, name);
+ 	if (dev == NULL)
+ 		ret =  -ENXIO; 	/* Could not find device */
+ 
+@@ -455,7 +455,7 @@
+ 	struct net_device *dev, *nxt;
+ 
+ 	rtnl_lock();
+-	for_each_netdev_safe(dev, nxt)
++	for_each_netdev_safe(&init_net, dev, nxt)
+ 		if (dev->priv_flags & IFF_EBRIDGE)
+ 			del_br(dev->priv);
+ 	rtnl_unlock();
+diff -Nurb linux-2.6.22-try2/net/bridge/br_ioctl.c linux-2.6.22-try2-netns/net/bridge/br_ioctl.c
+--- linux-2.6.22-try2/net/bridge/br_ioctl.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_ioctl.c	2007-12-19 22:49:18.000000000 -0500
+@@ -18,6 +18,7 @@
+ #include <linux/if_bridge.h>
+ #include <linux/netdevice.h>
+ #include <linux/times.h>
++#include <net/net_namespace.h>
+ #include <asm/uaccess.h>
+ #include "br_private.h"
+ 
+@@ -27,7 +28,7 @@
+ 	struct net_device *dev;
+ 	int i = 0;
+ 
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (i >= num)
+ 			break;
+ 		if (dev->priv_flags & IFF_EBRIDGE)
+@@ -90,7 +91,7 @@
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+-	dev = dev_get_by_index(ifindex);
++	dev = dev_get_by_index(&init_net, ifindex);
+ 	if (dev == NULL)
+ 		return -EINVAL;
+ 
+@@ -364,7 +365,7 @@
+ 	return -EOPNOTSUPP;
+ }
+ 
+-int br_ioctl_deviceless_stub(unsigned int cmd, void __user *uarg)
++int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uarg)
+ {
+ 	switch (cmd) {
+ 	case SIOCGIFBR:
+diff -Nurb linux-2.6.22-try2/net/bridge/br_netfilter.c linux-2.6.22-try2-netns/net/bridge/br_netfilter.c
+--- linux-2.6.22-try2/net/bridge/br_netfilter.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_netfilter.c	2007-12-19 22:49:18.000000000 -0500
+@@ -310,6 +310,7 @@
+ 		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
+ 			struct rtable *rt;
+ 			struct flowi fl = {
++				.fl_net = &init_net,
+ 				.nl_u = {
+ 					.ip4_u = {
+ 						 .daddr = iph->daddr,
+@@ -518,6 +519,10 @@
+ 	if (unlikely(!pskb_may_pull(skb, len)))
+ 		goto out;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) ||
+ 	    IS_PPPOE_IPV6(skb)) {
+ #ifdef CONFIG_SYSCTL
+@@ -591,6 +596,10 @@
+ {
+ 	struct sk_buff *skb = *pskb;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (skb->dst == (struct dst_entry *)&__fake_rtable) {
+ 		dst_release(skb->dst);
+ 		skb->dst = NULL;
+@@ -635,6 +644,10 @@
+ 	struct net_device *parent;
+ 	int pf;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (!skb->nf_bridge)
+ 		return NF_ACCEPT;
+ 
+@@ -674,6 +687,10 @@
+ 	struct sk_buff *skb = *pskb;
+ 	struct net_device **d = (struct net_device **)(skb->cb);
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ #ifdef CONFIG_SYSCTL
+ 	if (!brnf_call_arptables)
+ 		return NF_ACCEPT;
+@@ -718,6 +735,10 @@
+ 	struct sk_buff *skb = *pskb;
+ 	struct nf_bridge_info *nf_bridge;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (!skb->nf_bridge)
+ 		return NF_ACCEPT;
+ 
+@@ -762,6 +783,10 @@
+ 	struct net_device *realoutdev = bridge_parent(skb->dev);
+ 	int pf;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	/* Be very paranoid. This probably won't happen anymore, but let's
+ 	 * keep the check just to be sure... */
+@@ -833,6 +858,10 @@
+ 				   const struct net_device *out,
+ 				   int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if ((*pskb)->nf_bridge &&
+ 	    !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ 		return NF_STOP;
+diff -Nurb linux-2.6.22-try2/net/bridge/br_netlink.c linux-2.6.22-try2-netns/net/bridge/br_netlink.c
+--- linux-2.6.22-try2/net/bridge/br_netlink.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_netlink.c	2007-12-19 22:49:18.000000000 -0500
+@@ -12,6 +12,8 @@
+ 
+ #include <linux/kernel.h>
+ #include <net/rtnetlink.h>
++#include <net/net_namespace.h>
++#include <net/sock.h>
+ #include "br_private.h"
+ 
+ static inline size_t br_nlmsg_size(void)
+@@ -95,10 +97,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++	err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_LINK, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err);
+ }
+ 
+ /*
+@@ -106,11 +108,15 @@
+  */
+ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct net_device *dev;
+ 	int idx;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		/* not a bridge port */
+ 		if (dev->br_port == NULL || idx < cb->args[0])
+ 			goto skip;
+@@ -134,12 +140,16 @@
+  */
+ static int br_rtm_setlink(struct sk_buff *skb,  struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ifinfomsg *ifm;
+ 	struct nlattr *protinfo;
+ 	struct net_device *dev;
+ 	struct net_bridge_port *p;
+ 	u8 new_state;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	if (nlmsg_len(nlh) < sizeof(*ifm))
+ 		return -EINVAL;
+ 
+@@ -155,7 +165,7 @@
+ 	if (new_state > BR_STATE_BLOCKING)
+ 		return -EINVAL;
+ 
+-	dev = __dev_get_by_index(ifm->ifi_index);
++	dev = __dev_get_by_index(&init_net, ifm->ifi_index);
+ 	if (!dev)
+ 		return -ENODEV;
+ 
+diff -Nurb linux-2.6.22-try2/net/bridge/br_notify.c linux-2.6.22-try2-netns/net/bridge/br_notify.c
+--- linux-2.6.22-try2/net/bridge/br_notify.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_notify.c	2007-12-19 22:49:18.000000000 -0500
+@@ -15,6 +15,7 @@
+ 
+ #include <linux/kernel.h>
+ #include <linux/rtnetlink.h>
++#include <net/net_namespace.h>
+ 
+ #include "br_private.h"
+ 
+@@ -36,6 +37,9 @@
+ 	struct net_bridge_port *p = dev->br_port;
+ 	struct net_bridge *br;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* not a port of a bridge */
+ 	if (p == NULL)
+ 		return NOTIFY_DONE;
+diff -Nurb linux-2.6.22-try2/net/bridge/br_private.h linux-2.6.22-try2-netns/net/bridge/br_private.h
+--- linux-2.6.22-try2/net/bridge/br_private.h	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_private.h	2007-12-19 22:49:18.000000000 -0500
+@@ -196,7 +196,7 @@
+ 
+ /* br_ioctl.c */
+ extern int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
+-extern int br_ioctl_deviceless_stub(unsigned int cmd, void __user *arg);
++extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *arg);
+ 
+ /* br_netfilter.c */
+ #ifdef CONFIG_BRIDGE_NETFILTER
+diff -Nurb linux-2.6.22-try2/net/bridge/br_stp_bpdu.c linux-2.6.22-try2-netns/net/bridge/br_stp_bpdu.c
+--- linux-2.6.22-try2/net/bridge/br_stp_bpdu.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/br_stp_bpdu.c	2007-12-19 22:49:18.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/netfilter_bridge.h>
+ #include <linux/etherdevice.h>
+ #include <linux/llc.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_pdu.h>
+ #include <asm/unaligned.h>
+@@ -141,6 +142,9 @@
+ 	struct net_bridge *br;
+ 	const unsigned char *buf;
+ 
++	if (dev->nd_net != &init_net)
++		goto err;
++
+ 	if (!p)
+ 		goto err;
+ 
+diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebt_ulog.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebt_ulog.c
+--- linux-2.6.22-try2/net/bridge/netfilter/ebt_ulog.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebt_ulog.c	2007-12-19 22:49:18.000000000 -0500
+@@ -301,8 +301,9 @@
+ 		spin_lock_init(&ulog_buffers[i].lock);
+ 	}
+ 
+-	ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS,
+-					  NULL, NULL, THIS_MODULE);
++	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
++					  EBT_ULOG_MAXNLGROUPS, NULL, NULL,
++					  THIS_MODULE);
+ 	if (!ebtulognl)
+ 		ret = -ENOMEM;
+ 	else if ((ret = ebt_register_watcher(&ulog)))
+diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtable_filter.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_filter.c
+--- linux-2.6.22-try2/net/bridge/netfilter/ebtable_filter.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_filter.c	2007-12-19 22:49:18.000000000 -0500
+@@ -64,6 +64,10 @@
+ ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in,
+    const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ebt_do_table(hook, pskb, in, out, &frame_filter);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtable_nat.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_nat.c
+--- linux-2.6.22-try2/net/bridge/netfilter/ebtable_nat.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtable_nat.c	2007-12-19 22:49:18.000000000 -0500
+@@ -64,6 +64,10 @@
+ ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+    , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ }
+ 
+@@ -71,6 +75,10 @@
+ ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in
+    , const struct net_device *out, int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ebt_do_table(hook, pskb, in, out, &frame_nat);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/bridge/netfilter/ebtables.c linux-2.6.22-try2-netns/net/bridge/netfilter/ebtables.c
+--- linux-2.6.22-try2/net/bridge/netfilter/ebtables.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/bridge/netfilter/ebtables.c	2007-12-19 22:49:18.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/smp.h>
+ #include <linux/cpumask.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ /* needed for logical [in,out]-dev filtering */
+ #include "../br_private.h"
+ 
+@@ -1438,6 +1439,9 @@
+ {
+ 	int ret;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	switch(cmd) {
+ 	case EBT_SO_SET_ENTRIES:
+ 		ret = do_replace(user, len);
+@@ -1457,6 +1461,9 @@
+ 	struct ebt_replace tmp;
+ 	struct ebt_table *t;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (copy_from_user(&tmp, user, sizeof(tmp)))
+ 		return -EFAULT;
+ 
+diff -Nurb linux-2.6.22-try2/net/core/Makefile linux-2.6.22-try2-netns/net/core/Makefile
+--- linux-2.6.22-try2/net/core/Makefile	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/Makefile	2007-12-19 22:49:18.000000000 -0500
+@@ -3,7 +3,7 @@
+ #
+ 
+ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+-	 gen_stats.o gen_estimator.o
++	 gen_stats.o gen_estimator.o net_namespace.o
+ 
+ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
+ 
+diff -Nurb linux-2.6.22-try2/net/core/dev.c linux-2.6.22-try2-netns/net/core/dev.c
+--- linux-2.6.22-try2/net/core/dev.c	2007-12-19 15:29:25.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/dev.c	2007-12-19 23:03:34.000000000 -0500
+@@ -116,6 +116,7 @@
+ #include <linux/dmaengine.h>
+ #include <linux/err.h>
+ #include <linux/ctype.h>
++#include <net/net_namespace.h>
+ #include <linux/if_arp.h>
+ #include <linux/vs_inet.h>
+ 
+@@ -189,25 +190,50 @@
+  * unregister_netdevice(), which must be called with the rtnl
+  * semaphore held.
+  */
+-LIST_HEAD(dev_base_head);
+ DEFINE_RWLOCK(dev_base_lock);
+ 
+-EXPORT_SYMBOL(dev_base_head);
+ EXPORT_SYMBOL(dev_base_lock);
+ 
+ #define NETDEV_HASHBITS	8
+-static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
+-static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
++#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
+ 
+-static inline struct hlist_head *dev_name_hash(const char *name)
++static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
+ {
+ 	unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+-	return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
++	return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
+ }
+ 
+-static inline struct hlist_head *dev_index_hash(int ifindex)
++static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
+ {
+-	return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
++	return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
++}
++
++/* Device list insertion */
++static int list_netdevice(struct net_device *dev)
++{
++	struct net *net = dev->nd_net;
++
++	ASSERT_RTNL();
++
++	write_lock_bh(&dev_base_lock);
++	list_add_tail(&dev->dev_list, &net->dev_base_head);
++	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
++	hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
++	write_unlock_bh(&dev_base_lock);
++	return 0;
++}
++
++/* Device list removal */
++static void unlist_netdevice(struct net_device *dev)
++{
++	ASSERT_RTNL();
++
++	/* Unlink dev from the device chain */
++	write_lock_bh(&dev_base_lock);
++	list_del(&dev->dev_list);
++	hlist_del(&dev->name_hlist);
++	hlist_del(&dev->index_hlist);
++	write_unlock_bh(&dev_base_lock);
+ }
+ 
+ /*
+@@ -490,7 +516,7 @@
+ 	 * If device already registered then return base of 1
+ 	 * to indicate not to probe for this interface
+ 	 */
+-	if (__dev_get_by_name(name))
++	if (__dev_get_by_name(&init_net, name))
+ 		return 1;
+ 
+ 	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
+@@ -545,11 +571,11 @@
+  *	careful with locks.
+  */
+ 
+-struct net_device *__dev_get_by_name(const char *name)
++struct net_device *__dev_get_by_name(struct net *net, const char *name)
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_name_hash(name)) {
++	hlist_for_each(p, dev_name_hash(net, name)) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, name_hlist);
+ 		if (!strncmp(dev->name, name, IFNAMSIZ))
+@@ -569,12 +595,12 @@
+  *	matching device is found.
+  */
+ 
+-struct net_device *dev_get_by_name(const char *name)
++struct net_device *dev_get_by_name(struct net *net, const char *name)
+ {
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	dev = __dev_get_by_name(name);
++	dev = __dev_get_by_name(net, name);
+ 	if (dev)
+ 		dev_hold(dev);
+ 	read_unlock(&dev_base_lock);
+@@ -592,11 +618,11 @@
+  *	or @dev_base_lock.
+  */
+ 
+-struct net_device *__dev_get_by_index(int ifindex)
++struct net_device *__dev_get_by_index(struct net *net, int ifindex)
+ {
+ 	struct hlist_node *p;
+ 
+-	hlist_for_each(p, dev_index_hash(ifindex)) {
++	hlist_for_each(p, dev_index_hash(net, ifindex)) {
+ 		struct net_device *dev
+ 			= hlist_entry(p, struct net_device, index_hlist);
+ 		if (dev->ifindex == ifindex)
+@@ -616,12 +642,12 @@
+  *	dev_put to indicate they have finished with it.
+  */
+ 
+-struct net_device *dev_get_by_index(int ifindex)
++struct net_device *dev_get_by_index(struct net *net, int ifindex)
+ {
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	dev = __dev_get_by_index(ifindex);
++	dev = __dev_get_by_index(net, ifindex);
+ 	if (dev)
+ 		dev_hold(dev);
+ 	read_unlock(&dev_base_lock);
+@@ -642,13 +668,13 @@
+  *	If the API was consistent this would be __dev_get_by_hwaddr
+  */
+ 
+-struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+ {
+ 	struct net_device *dev;
+ 
+ 	ASSERT_RTNL();
+ 
+-	for_each_netdev(dev)
++	for_each_netdev(&init_net, dev)
+ 		if (dev->type == type &&
+ 		    !memcmp(dev->dev_addr, ha, dev->addr_len))
+ 			return dev;
+@@ -658,12 +684,12 @@
+ 
+ EXPORT_SYMBOL(dev_getbyhwaddr);
+ 
+-struct net_device *__dev_getfirstbyhwtype(unsigned short type)
++struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
+ {
+ 	struct net_device *dev;
+ 
+ 	ASSERT_RTNL();
+-	for_each_netdev(dev)
++	for_each_netdev(net, dev)
+ 		if (dev->type == type)
+ 			return dev;
+ 
+@@ -672,12 +698,12 @@
+ 
+ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+ 
+-struct net_device *dev_getfirstbyhwtype(unsigned short type)
++struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
+ {
+ 	struct net_device *dev;
+ 
+ 	rtnl_lock();
+-	dev = __dev_getfirstbyhwtype(type);
++	dev = __dev_getfirstbyhwtype(net, type);
+ 	if (dev)
+ 		dev_hold(dev);
+ 	rtnl_unlock();
+@@ -697,13 +723,13 @@
+  *	dev_put to indicate they have finished with it.
+  */
+ 
+-struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
++struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
+ {
+ 	struct net_device *dev, *ret;
+ 
+ 	ret = NULL;
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if (((dev->flags ^ if_flags) & mask) == 0) {
+ 			dev_hold(dev);
+ 			ret = dev;
+@@ -740,9 +766,10 @@
+ }
+ 
+ /**
+- *	dev_alloc_name - allocate a name for a device
+- *	@dev: device
++ *	__dev_alloc_name - allocate a name for a device
++ *	@net: network namespace to allocate the device name in
+  *	@name: name format string
++ *	@buf:  scratch buffer and result name string
+  *
+  *	Passed a format string - eg "lt%d" it will try and find a suitable
+  *	id. It scans list of devices to build up a free map, then chooses
+@@ -753,10 +780,9 @@
+  *	Returns the number of the unit assigned or a negative errno code.
+  */
+ 
+-int dev_alloc_name(struct net_device *dev, const char *name)
++static int __dev_alloc_name(struct net *net, const char *name, char *buf)
+ {
+ 	int i = 0;
+-	char buf[IFNAMSIZ];
+ 	const char *p;
+ 	const int max_netdevices = 8*PAGE_SIZE;
+ 	long *inuse;
+@@ -777,14 +803,14 @@
+ 		if (!inuse)
+ 			return -ENOMEM;
+ 
+-		for_each_netdev(d) {
++		for_each_netdev(net, d) {
+ 			if (!sscanf(d->name, name, &i))
+ 				continue;
+ 			if (i < 0 || i >= max_netdevices)
+ 				continue;
+ 
+ 			/*  avoid cases where sscanf is not exact inverse of printf */
+-			snprintf(buf, sizeof(buf), name, i);
++			snprintf(buf, IFNAMSIZ, name, i);
+ 			if (!strncmp(buf, d->name, IFNAMSIZ))
+ 				set_bit(i, inuse);
+ 		}
+@@ -793,11 +819,9 @@
+ 		free_page((unsigned long) inuse);
+ 	}
+ 
+-	snprintf(buf, sizeof(buf), name, i);
+-	if (!__dev_get_by_name(buf)) {
+-		strlcpy(dev->name, buf, IFNAMSIZ);
++	snprintf(buf, IFNAMSIZ, name, i);
++	if (!__dev_get_by_name(net, buf))
+ 		return i;
+-	}
+ 
+ 	/* It is possible to run out of possible slots
+ 	 * when the name is long and there isn't enough space left
+@@ -806,6 +830,34 @@
+ 	return -ENFILE;
+ }
+ 
++/**
++ *	dev_alloc_name - allocate a name for a device
++ *	@dev: device
++ *	@name: name format string
++ *
++ *	Passed a format string - eg "lt%d" it will try and find a suitable
++ *	id. It scans list of devices to build up a free map, then chooses
++ *	the first empty slot. The caller must hold the dev_base or rtnl lock
++ *	while allocating the name and adding the device in order to avoid
++ *	duplicates.
++ *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
++ *	Returns the number of the unit assigned or a negative errno code.
++ */
++
++int dev_alloc_name(struct net_device *dev, const char *name)
++{
++	char buf[IFNAMSIZ];
++	struct net *net;
++	int ret;
++
++	BUG_ON(!dev->nd_net);
++	net = dev->nd_net;
++	ret = __dev_alloc_name(net, name, buf);
++	if (ret >= 0)
++		strlcpy(dev->name, buf, IFNAMSIZ);
++	return ret;
++}
++
+ 
+ /**
+  *	dev_change_name - change name of a device
+@@ -818,9 +870,12 @@
+ int dev_change_name(struct net_device *dev, char *newname)
+ {
+ 	int err = 0;
++	struct net *net;
+ 
+ 	ASSERT_RTNL();
++	BUG_ON(!dev->nd_net);
+ 
++	net = dev->nd_net;
+ 	if (dev->flags & IFF_UP)
+ 		return -EBUSY;
+ 
+@@ -833,7 +888,7 @@
+ 			return err;
+ 		strcpy(newname, dev->name);
+ 	}
+-	else if (__dev_get_by_name(newname))
++	else if (__dev_get_by_name(net, newname))
+ 		return -EEXIST;
+ 	else {
+ 		if (strncmp(newname, dev->name, IFNAMSIZ))
+@@ -844,7 +899,7 @@
+ 
+ 	device_rename(&dev->dev, dev->name);
+ 	hlist_del(&dev->name_hlist);
+-	hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
++	hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
+ 	raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
+ 
+ 	return err;
+@@ -888,12 +943,12 @@
+  *	available in this kernel then it becomes a nop.
+  */
+ 
+-void dev_load(const char *name)
++void dev_load(struct net *net, const char *name)
+ {
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	dev = __dev_get_by_name(name);
++	dev = __dev_get_by_name(net, name);
+ 	read_unlock(&dev_base_lock);
+ 
+ 	if (!dev && capable(CAP_SYS_MODULE))
+@@ -1036,6 +1091,8 @@
+ }
+ 
+ 
++static int dev_boot_phase = 1;
++
+ /*
+  *	Device change register/unregister. These are not inline or static
+  *	as we export them to the world.
+@@ -1062,14 +1119,17 @@
+ 
+ 	rtnl_lock();
+ 	err = raw_notifier_chain_register(&netdev_chain, nb);
+-	if (!err) {
+-		for_each_netdev(dev) {
++	if (!err && !dev_boot_phase) {
++		struct net *net;
++		for_each_net(net) {
++			for_each_netdev(net, dev) {
+ 			nb->notifier_call(nb, NETDEV_REGISTER, dev);
+ 
+ 			if (dev->flags & IFF_UP)
+ 				nb->notifier_call(nb, NETDEV_UP, dev);
+ 		}
+ 	}
++	}
+ 	rtnl_unlock();
+ 	return err;
+ }
+@@ -1103,9 +1163,9 @@
+  *	are as for raw_notifier_call_chain().
+  */
+ 
+-int call_netdevice_notifiers(unsigned long val, void *v)
++int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
+ {
+-	return raw_notifier_call_chain(&netdev_chain, val, v);
++	return raw_notifier_call_chain(&netdev_chain, val, dev);
+ }
+ 
+ /* When > 0 there are consumers of rx skb time stamps */
+@@ -2083,7 +2143,7 @@
+  *	match.  --pb
+  */
+ 
+-static int dev_ifname(struct ifreq __user *arg)
++static int dev_ifname(struct net *net, struct ifreq __user *arg)
+ {
+ 	struct net_device *dev;
+ 	struct ifreq ifr;
+@@ -2096,7 +2156,7 @@
+ 		return -EFAULT;
+ 
+ 	read_lock(&dev_base_lock);
+-	dev = __dev_get_by_index(ifr.ifr_ifindex);
++	dev = __dev_get_by_index(net, ifr.ifr_ifindex);
+ 	if (!dev) {
+ 		read_unlock(&dev_base_lock);
+ 		return -ENODEV;
+@@ -2116,7 +2176,7 @@
+  *	Thus we will need a 'compatibility mode'.
+  */
+ 
+-static int dev_ifconf(char __user *arg)
++static int dev_ifconf(struct net *net, char __user *arg)
+ {
+ 	struct ifconf ifc;
+ 	struct net_device *dev;
+@@ -2140,7 +2200,7 @@
+ 	 */
+ 
+ 	total = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if (!nx_dev_visible(current->nx_info, dev))
+ 			continue;
+ 		for (i = 0; i < NPROTO; i++) {
+@@ -2176,6 +2236,7 @@
+  */
+ void *dev_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++	struct net *net = seq->private;
+ 	loff_t off;
+ 	struct net_device *dev;
+ 
+@@ -2184,7 +2245,7 @@
+ 		return SEQ_START_TOKEN;
+ 
+ 	off = 1;
+-	for_each_netdev(dev)
++	for_each_netdev(net, dev)
+ 		if (off++ == *pos)
+ 			return dev;
+ 
+@@ -2193,9 +2254,10 @@
+ 
+ void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++	struct net *net = seq->private;
+ 	++*pos;
+ 	return v == SEQ_START_TOKEN ?
+-		first_net_device() : next_net_device((struct net_device *)v);
++		first_net_device(net) : next_net_device((struct net_device *)v);
+ }
+ 
+ void dev_seq_stop(struct seq_file *seq, void *v)
+@@ -2294,7 +2356,22 @@
+ 
+ static int dev_seq_open(struct inode *inode, struct file *file)
+ {
+-	return seq_open(file, &dev_seq_ops);
++	struct seq_file *seq;
++	int res;
++	res =  seq_open(file, &dev_seq_ops);
++	if (!res) {
++		seq = file->private_data;
++		seq->private = get_net(PROC_NET(inode));
++	}
++	return res;
++}
++
++static int dev_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct net *net = seq->private;
++	put_net(net);
++	return seq_release(inode, file);
+ }
+ 
+ static const struct file_operations dev_seq_fops = {
+@@ -2302,7 +2379,7 @@
+ 	.open    = dev_seq_open,
+ 	.read    = seq_read,
+ 	.llseek  = seq_lseek,
+-	.release = seq_release,
++	.release = dev_seq_release,
+ };
+ 
+ static const struct seq_operations softnet_seq_ops = {
+@@ -2454,30 +2531,49 @@
+ };
+ 
+ 
+-static int __init dev_proc_init(void)
++static int dev_proc_net_init(struct net *net)
+ {
+ 	int rc = -ENOMEM;
+ 
+-	if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
++	if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
+ 		goto out;
+-	if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
++	if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
+ 		goto out_dev;
+-	if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
+-		goto out_dev2;
+-
+-	if (wext_proc_init())
++	if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
+ 		goto out_softnet;
++
++	if (wext_proc_init(net))
++		goto out_ptype;
+ 	rc = 0;
+ out:
+ 	return rc;
++out_ptype:
++	proc_net_remove(net, "ptype");
+ out_softnet:
+-	proc_net_remove("ptype");
+-out_dev2:
+-	proc_net_remove("softnet_stat");
++	proc_net_remove(net, "softnet_stat");
+ out_dev:
+-	proc_net_remove("dev");
++	proc_net_remove(net, "dev");
+ 	goto out;
+ }
++
++static void dev_proc_net_exit(struct net *net)
++{
++	wext_proc_exit(net);
++
++	proc_net_remove(net, "ptype");
++	proc_net_remove(net, "softnet_stat");
++	proc_net_remove(net, "dev");
++}
++
++static struct pernet_operations dev_proc_ops = {
++	.init = dev_proc_net_init,
++	.exit = dev_proc_net_exit,
++};
++
++static int __init dev_proc_init(void)
++{
++	return register_pernet_subsys(&dev_proc_ops);
++}
+ #else
+ #define dev_proc_init() 0
+ #endif	/* CONFIG_PROC_FS */
+@@ -2711,10 +2807,10 @@
+ /*
+  *	Perform the SIOCxIFxxx calls.
+  */
+-static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
++static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
+ {
+ 	int err;
+-	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
++	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ 
+ 	if (!dev)
+ 		return -ENODEV;
+@@ -2867,7 +2963,7 @@
+  *	positive or a negative errno code on error.
+  */
+ 
+-int dev_ioctl(unsigned int cmd, void __user *arg)
++int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ 	struct ifreq ifr;
+ 	int ret;
+@@ -2880,12 +2976,12 @@
+ 
+ 	if (cmd == SIOCGIFCONF) {
+ 		rtnl_lock();
+-		ret = dev_ifconf((char __user *) arg);
++		ret = dev_ifconf(net, (char __user *) arg);
+ 		rtnl_unlock();
+ 		return ret;
+ 	}
+ 	if (cmd == SIOCGIFNAME)
+-		return dev_ifname((struct ifreq __user *)arg);
++		return dev_ifname(net, (struct ifreq __user *)arg);
+ 
+ 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ 		return -EFAULT;
+@@ -2915,9 +3011,9 @@
+ 		case SIOCGIFMAP:
+ 		case SIOCGIFINDEX:
+ 		case SIOCGIFTXQLEN:
+-			dev_load(ifr.ifr_name);
++			dev_load(net, ifr.ifr_name);
+ 			read_lock(&dev_base_lock);
+-			ret = dev_ifsioc(&ifr, cmd);
++			ret = dev_ifsioc(net, &ifr, cmd);
+ 			read_unlock(&dev_base_lock);
+ 			if (!ret) {
+ 				if (colon)
+@@ -2929,9 +3025,9 @@
+ 			return ret;
+ 
+ 		case SIOCETHTOOL:
+-			dev_load(ifr.ifr_name);
++			dev_load(net, ifr.ifr_name);
+ 			rtnl_lock();
+-			ret = dev_ethtool(&ifr);
++			ret = dev_ethtool(net, &ifr);
+ 			rtnl_unlock();
+ 			if (!ret) {
+ 				if (colon)
+@@ -2953,9 +3049,9 @@
+ 		case SIOCSIFNAME:
+ 			if (!capable(CAP_NET_ADMIN))
+ 				return -EPERM;
+-			dev_load(ifr.ifr_name);
++			dev_load(net, ifr.ifr_name);
+ 			rtnl_lock();
+-			ret = dev_ifsioc(&ifr, cmd);
++			ret = dev_ifsioc(net, &ifr, cmd);
+ 			rtnl_unlock();
+ 			if (!ret) {
+ 				if (colon)
+@@ -2994,9 +3090,9 @@
+ 			/* fall through */
+ 		case SIOCBONDSLAVEINFOQUERY:
+ 		case SIOCBONDINFOQUERY:
+-			dev_load(ifr.ifr_name);
++			dev_load(net, ifr.ifr_name);
+ 			rtnl_lock();
+-			ret = dev_ifsioc(&ifr, cmd);
++			ret = dev_ifsioc(net, &ifr, cmd);
+ 			rtnl_unlock();
+ 			return ret;
+ 
+@@ -3016,9 +3112,9 @@
+ 			if (cmd == SIOCWANDEV ||
+ 			    (cmd >= SIOCDEVPRIVATE &&
+ 			     cmd <= SIOCDEVPRIVATE + 15)) {
+-				dev_load(ifr.ifr_name);
++				dev_load(net, ifr.ifr_name);
+ 				rtnl_lock();
+-				ret = dev_ifsioc(&ifr, cmd);
++				ret = dev_ifsioc(net, &ifr, cmd);
+ 				rtnl_unlock();
+ 				if (!ret && copy_to_user(arg, &ifr,
+ 							 sizeof(struct ifreq)))
+@@ -3027,7 +3123,7 @@
+ 			}
+ 			/* Take care of Wireless Extensions */
+ 			if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
+-				return wext_handle_ioctl(&ifr, cmd, arg);
++				return wext_handle_ioctl(net, &ifr, cmd, arg);
+ 			return -EINVAL;
+ 	}
+ }
+@@ -3040,19 +3136,17 @@
+  *	number.  The caller must hold the rtnl semaphore or the
+  *	dev_base_lock to be sure it remains unique.
+  */
+-static int dev_new_index(void)
++static int dev_new_index(struct net *net)
+ {
+ 	static int ifindex;
+ 	for (;;) {
+ 		if (++ifindex <= 0)
+ 			ifindex = 1;
+-		if (!__dev_get_by_index(ifindex))
++		if (!__dev_get_by_index(net, ifindex))
+ 			return ifindex;
+ 	}
+ }
+ 
+-static int dev_boot_phase = 1;
+-
+ /* Delayed registration/unregisteration */
+ static DEFINE_SPINLOCK(net_todo_list_lock);
+ static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
+@@ -3086,6 +3180,7 @@
+ 	struct hlist_head *head;
+ 	struct hlist_node *p;
+ 	int ret;
++	struct net *net;
+ 
+ 	BUG_ON(dev_boot_phase);
+ 	ASSERT_RTNL();
+@@ -3094,6 +3189,8 @@
+ 
+ 	/* When net_device's are persistent, this will be fatal. */
+ 	BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
++	BUG_ON(!dev->nd_net);
++	net = dev->nd_net;
+ 
+ 	spin_lock_init(&dev->queue_lock);
+ 	spin_lock_init(&dev->_xmit_lock);
+@@ -3118,12 +3215,12 @@
+ 		goto out;
+ 	}
+ 
+-	dev->ifindex = dev_new_index();
++	dev->ifindex = dev_new_index(net);
+ 	if (dev->iflink == -1)
+ 		dev->iflink = dev->ifindex;
+ 
+ 	/* Check for existence of name */
+-	head = dev_name_hash(dev->name);
++	head = dev_name_hash(net, dev->name);
+ 	hlist_for_each(p, head) {
+ 		struct net_device *d
+ 			= hlist_entry(p, struct net_device, name_hlist);
+@@ -3200,12 +3297,8 @@
+ 	set_bit(__LINK_STATE_PRESENT, &dev->state);
+ 
+ 	dev_init_scheduler(dev);
+-	write_lock_bh(&dev_base_lock);
+-	list_add_tail(&dev->dev_list, &dev_base_head);
+-	hlist_add_head(&dev->name_hlist, head);
+-	hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
+ 	dev_hold(dev);
+-	write_unlock_bh(&dev_base_lock);
++	list_netdevice(dev);
+ 
+ 	/* Notify protocols, that a new device appeared. */
+ 	raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
+@@ -3415,6 +3508,7 @@
+ 	dev = (struct net_device *)
+ 		(((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
+ 	dev->padded = (char *)dev - (char *)p;
++	dev->nd_net = &init_net;
+ 
+ 	if (sizeof_priv)
+ 		dev->priv = netdev_priv(dev);
+@@ -3493,11 +3587,7 @@
+ 		dev_close(dev);
+ 
+ 	/* And unlink it from device chain. */
+-	write_lock_bh(&dev_base_lock);
+-	list_del(&dev->dev_list);
+-	hlist_del(&dev->name_hlist);
+-	hlist_del(&dev->index_hlist);
+-	write_unlock_bh(&dev_base_lock);
++	unlist_netdevice(dev);
+ 
+ 	dev->reg_state = NETREG_UNREGISTERING;
+ 
+@@ -3555,6 +3645,122 @@
+ 
+ EXPORT_SYMBOL(unregister_netdev);
+ 
++/**
++ *	dev_change_net_namespace - move device to different nethost namespace
++ *	@dev: device
++ *	@net: network namespace
++ *	@pat: If not NULL name pattern to try if the current device name
++ *	      is already taken in the destination network namespace.
++ *
++ *	This function shuts down a device interface and moves it
++ *	to a new network namespace. On success 0 is returned, on
++ *	a failure a netagive errno code is returned.
++ *
++ *	Callers must hold the rtnl semaphore.
++ */
++
++int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
++{
++	char buf[IFNAMSIZ];
++	const char *destname;
++	int err;
++
++	ASSERT_RTNL();
++
++	/* Don't allow namespace local devices to be moved. */
++	err = -EINVAL;
++	if (dev->features & NETIF_F_NETNS_LOCAL)
++		goto out;
++
++	/* Ensure the device has been registrered */
++	err = -EINVAL;
++	if (dev->reg_state != NETREG_REGISTERED)
++		goto out;
++	
++	/* Get out if there is nothing todo */
++	err = 0;
++	if (dev->nd_net == net)
++		goto out;
++
++	/* Pick the destination device name, and ensure
++	 * we can use it in the destination network namespace.
++	 */
++	err = -EEXIST;
++	destname = dev->name;
++	if (__dev_get_by_name(net, destname)) {
++		/* We get here if we can't use the current device name */
++		if (!pat)
++			goto out;
++		if (!dev_valid_name(pat))
++			goto out;
++		if (strchr(pat, '%')) {
++			if (__dev_alloc_name(net, pat, buf) < 0)
++				goto out;
++			destname = buf;
++		} else
++			destname = pat;
++		if (__dev_get_by_name(net, destname))
++			goto out;
++	}
++
++	/*
++	 * And now a mini version of register_netdevice unregister_netdevice. 
++	 */
++
++	/* If device is running close it first. */
++	if (dev->flags & IFF_UP)
++		dev_close(dev);
++
++	/* And unlink it from device chain */
++	err = -ENODEV;
++	unlist_netdevice(dev);
++	
++	synchronize_net();
++	
++	/* Shutdown queueing discipline. */
++	dev_shutdown(dev);
++
++	/* Notify protocols, that we are about to destroy
++	   this device. They should clean all the things.
++	*/
++	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++	
++	/*
++	 *	Flush the multicast chain
++	 */
++	dev_mc_discard(dev);
++
++	/* Actually switch the network namespace */
++	dev->nd_net = net;
++	
++	/* Assign the new device name */
++	if (destname != dev->name)
++		strcpy(dev->name, destname);
++
++	/* If there is an ifindex conflict assign a new one */
++	if (__dev_get_by_index(net, dev->ifindex)) {
++		int iflink = (dev->iflink == dev->ifindex);
++		dev->ifindex = dev_new_index(net);
++		if (iflink)
++			dev->iflink = dev->ifindex;
++	}
++
++	/* Fixup sysfs */
++	err = device_rename(&dev->dev, dev->name);
++	BUG_ON(err);
++
++	/* Add the device back in the hashes */
++	list_netdevice(dev);
++
++	/* Notify protocols, that a new device appeared. */
++	call_netdevice_notifiers(NETDEV_REGISTER, dev);
++
++	synchronize_net();
++	err = 0;
++out:
++	return err;
++}
++
+ static int dev_cpu_callback(struct notifier_block *nfb,
+ 			    unsigned long action,
+ 			    void *ocpu)
+@@ -3745,6 +3951,75 @@
+ }
+ EXPORT_SYMBOL(netdev_compute_features);
+ 
++/* Initialize per network namespace state */
++static int netdev_init(struct net *net)
++{
++	int i;
++	INIT_LIST_HEAD(&net->dev_base_head);
++	rwlock_init(&dev_base_lock);
++
++	net->dev_name_head = kmalloc(
++		sizeof(*net->dev_name_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
++	if (!net->dev_name_head)
++		return -ENOMEM;
++
++	net->dev_index_head = kmalloc(
++		sizeof(*net->dev_index_head)*NETDEV_HASHENTRIES, GFP_KERNEL);
++	if (!net->dev_index_head) {
++		kfree(net->dev_name_head);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < NETDEV_HASHENTRIES; i++)
++		INIT_HLIST_HEAD(&net->dev_name_head[i]);
++	
++	for (i = 0; i < NETDEV_HASHENTRIES; i++)
++		INIT_HLIST_HEAD(&net->dev_index_head[i]);
++
++	return 0;
++}
++
++static void netdev_exit(struct net *net)
++{
++	kfree(net->dev_name_head);
++	kfree(net->dev_index_head);
++}
++
++static struct pernet_operations netdev_net_ops = {
++	.init = netdev_init,
++	.exit = netdev_exit,
++};
++
++static void default_device_exit(struct net *net)
++{
++	struct net_device *dev, *next;
++	/*
++	 * Push all migratable of the network devices back to the
++	 * initial network namespace 
++	 */
++	rtnl_lock();
++	for_each_netdev_safe(net, dev, next) {
++		int err;
++
++		/* Ignore unmoveable devices (i.e. loopback) */
++		if (dev->features & NETIF_F_NETNS_LOCAL)
++			continue;
++
++		/* Push remaing network devices to init_net */
++		err = dev_change_net_namespace(dev, &init_net, "dev%d");
++		if (err) {
++			printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
++				__func__, dev->name, err);
++			unregister_netdevice(dev);
++		}
++	}
++	rtnl_unlock();
++}
++
++static struct pernet_operations default_device_ops = {
++	.exit = default_device_exit,
++};
++
+ /*
+  *	Initialize the DEV module. At boot time this walks the device list and
+  *	unhooks any devices that fail to initialise (normally hardware not
+@@ -3772,11 +4047,11 @@
+ 	for (i = 0; i < 16; i++)
+ 		INIT_LIST_HEAD(&ptype_base[i]);
+ 
+-	for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
+-		INIT_HLIST_HEAD(&dev_name_head[i]);
++	if (register_pernet_subsys(&netdev_net_ops))
++		goto out;
+ 
+-	for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
+-		INIT_HLIST_HEAD(&dev_index_head[i]);
++	if (register_pernet_device(&default_device_ops))
++		goto out;
+ 
+ 	/*
+ 	 *	Initialise the packet receive queues.
+diff -Nurb linux-2.6.22-try2/net/core/dev_mcast.c linux-2.6.22-try2-netns/net/core/dev_mcast.c
+--- linux-2.6.22-try2/net/core/dev_mcast.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/dev_mcast.c	2007-12-19 22:49:18.000000000 -0500
+@@ -46,6 +46,7 @@
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ 
+ /*
+@@ -219,11 +220,12 @@
+ #ifdef CONFIG_PROC_FS
+ static void *dev_mc_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++	struct net *net = seq->private;
+ 	struct net_device *dev;
+ 	loff_t off = 0;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if (off++ == *pos)
+ 			return dev;
+ 	}
+@@ -272,7 +274,22 @@
+ 
+ static int dev_mc_seq_open(struct inode *inode, struct file *file)
+ {
+-	return seq_open(file, &dev_mc_seq_ops);
++	struct seq_file *seq;
++	int res;
++	res = seq_open(file, &dev_mc_seq_ops);
++	if (!res) {
++		seq = file->private_data;
++		seq->private = get_net(PROC_NET(inode));
++	}
++	return res;
++}
++
++static int dev_mc_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct net *net = seq->private;
++	put_net(net);
++	return seq_release(inode, file);
+ }
+ 
+ static const struct file_operations dev_mc_seq_fops = {
+@@ -280,14 +297,31 @@
+ 	.open    = dev_mc_seq_open,
+ 	.read    = seq_read,
+ 	.llseek  = seq_lseek,
+-	.release = seq_release,
++	.release = dev_mc_seq_release,
+ };
+ 
+ #endif
+ 
++static int dev_mc_net_init(struct net *net)
++{
++	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
++		return -ENOMEM;
++	return 0;
++}
++
++static void dev_mc_net_exit(struct net *net)
++{
++	proc_net_remove(net, "dev_mcast");
++}
++
++static struct pernet_operations dev_mc_net_ops = {
++	.init = dev_mc_net_init,
++	.exit = dev_mc_net_exit,
++};
++
+ void __init dev_mcast_init(void)
+ {
+-	proc_net_fops_create("dev_mcast", 0, &dev_mc_seq_fops);
++	register_pernet_subsys(&dev_mc_net_ops);
+ }
+ 
+ EXPORT_SYMBOL(dev_mc_add);
+diff -Nurb linux-2.6.22-try2/net/core/dst.c linux-2.6.22-try2-netns/net/core/dst.c
+--- linux-2.6.22-try2/net/core/dst.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/dst.c	2007-12-19 22:49:18.000000000 -0500
+@@ -15,7 +15,9 @@
+ #include <linux/skbuff.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
++#include <net/net_namespace.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/dst.h>
+ 
+ /* Locking strategy:
+@@ -236,13 +238,14 @@
+ 	if (!unregister) {
+ 		dst->input = dst->output = dst_discard;
+ 	} else {
+-		dst->dev = &loopback_dev;
+-		dev_hold(&loopback_dev);
++		struct net *net = dev->nd_net;
++		dst->dev = &net->loopback_dev;
++		dev_hold(dst->dev);
+ 		dev_put(dev);
+ 		if (dst->neighbour && dst->neighbour->dev == dev) {
+-			dst->neighbour->dev = &loopback_dev;
++			dst->neighbour->dev = &net->loopback_dev;
+ 			dev_put(dev);
+-			dev_hold(&loopback_dev);
++			dev_hold(dst->neighbour->dev);
+ 		}
+ 	}
+ }
+@@ -252,6 +255,9 @@
+ 	struct net_device *dev = ptr;
+ 	struct dst_entry *dst;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 	case NETDEV_UNREGISTER:
+ 	case NETDEV_DOWN:
+diff -Nurb linux-2.6.22-try2/net/core/ethtool.c linux-2.6.22-try2-netns/net/core/ethtool.c
+--- linux-2.6.22-try2/net/core/ethtool.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/ethtool.c	2007-12-19 22:49:18.000000000 -0500
+@@ -798,9 +798,9 @@
+ 
+ /* The main entry point in this file.  Called from net/core/dev.c */
+ 
+-int dev_ethtool(struct ifreq *ifr)
++int dev_ethtool(struct net *net, struct ifreq *ifr)
+ {
+-	struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
++	struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
+ 	void __user *useraddr = ifr->ifr_data;
+ 	u32 ethcmd;
+ 	int rc;
+diff -Nurb linux-2.6.22-try2/net/core/fib_rules.c linux-2.6.22-try2-netns/net/core/fib_rules.c
+--- linux-2.6.22-try2/net/core/fib_rules.c	2007-12-19 13:37:56.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/fib_rules.c	2007-12-19 22:49:18.000000000 -0500
+@@ -11,21 +11,20 @@
+ #include <linux/types.h>
+ #include <linux/kernel.h>
+ #include <linux/list.h>
++#include <net/net_namespace.h>
++#include <net/sock.h>
+ #include <net/fib_rules.h>
+ 
+-static LIST_HEAD(rules_ops);
+-static DEFINE_SPINLOCK(rules_mod_lock);
+-
+-static void notify_rule_change(int event, struct fib_rule *rule,
++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
+ 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ 			       u32 pid);
+ 
+-static struct fib_rules_ops *lookup_rules_ops(int family)
++static struct fib_rules_ops *lookup_rules_ops(struct net *net, int family)
+ {
+ 	struct fib_rules_ops *ops;
+ 
+ 	rcu_read_lock();
+-	list_for_each_entry_rcu(ops, &rules_ops, list) {
++	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+ 		if (ops->family == family) {
+ 			if (!try_module_get(ops->owner))
+ 				ops = NULL;
+@@ -47,10 +46,10 @@
+ static void flush_route_cache(struct fib_rules_ops *ops)
+ {
+ 	if (ops->flush_cache)
+-		ops->flush_cache();
++		ops->flush_cache(ops);
+ }
+ 
+-int fib_rules_register(struct fib_rules_ops *ops)
++int fib_rules_register(struct net *net, struct fib_rules_ops *ops)
+ {
+ 	int err = -EEXIST;
+ 	struct fib_rules_ops *o;
+@@ -63,15 +62,16 @@
+ 	    ops->action == NULL)
+ 		return -EINVAL;
+ 
+-	spin_lock(&rules_mod_lock);
+-	list_for_each_entry(o, &rules_ops, list)
++	spin_lock(&net->rules_mod_lock);
++	list_for_each_entry(o, &net->rules_ops, list)
+ 		if (ops->family == o->family)
+ 			goto errout;
+ 
+-	list_add_tail_rcu(&ops->list, &rules_ops);
++	hold_net(net);
++	list_add_tail_rcu(&ops->list, &net->rules_ops);
+ 	err = 0;
+ errout:
+-	spin_unlock(&rules_mod_lock);
++	spin_unlock(&net->rules_mod_lock);
+ 
+ 	return err;
+ }
+@@ -88,13 +88,13 @@
+ 	}
+ }
+ 
+-int fib_rules_unregister(struct fib_rules_ops *ops)
++int fib_rules_unregister(struct net *net, struct fib_rules_ops *ops)
+ {
+ 	int err = 0;
+ 	struct fib_rules_ops *o;
+ 
+-	spin_lock(&rules_mod_lock);
+-	list_for_each_entry(o, &rules_ops, list) {
++	spin_lock(&net->rules_mod_lock);
++	list_for_each_entry(o, &net->rules_ops, list) {
+ 		if (o == ops) {
+ 			list_del_rcu(&o->list);
+ 			cleanup_ops(ops);
+@@ -104,9 +104,11 @@
+ 
+ 	err = -ENOENT;
+ out:
+-	spin_unlock(&rules_mod_lock);
++	spin_unlock(&net->rules_mod_lock);
+ 
+ 	synchronize_rcu();
++	if (!err)
++		release_net(net);
+ 
+ 	return err;
+ }
+@@ -197,6 +199,7 @@
+ 
+ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ 	struct fib_rules_ops *ops = NULL;
+ 	struct fib_rule *rule, *r, *last = NULL;
+@@ -206,7 +209,7 @@
+ 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ 		goto errout;
+ 
+-	ops = lookup_rules_ops(frh->family);
++	ops = lookup_rules_ops(net, frh->family);
+ 	if (ops == NULL) {
+ 		err = EAFNOSUPPORT;
+ 		goto errout;
+@@ -234,7 +237,7 @@
+ 
+ 		rule->ifindex = -1;
+ 		nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ);
+-		dev = __dev_get_by_name(rule->ifname);
++		dev = __dev_get_by_name(net, rule->ifname);
+ 		if (dev)
+ 			rule->ifindex = dev->ifindex;
+ 	}
+@@ -256,7 +259,7 @@
+ 	rule->table = frh_get_table(frh, tb);
+ 
+ 	if (!rule->pref && ops->default_pref)
+-		rule->pref = ops->default_pref();
++		rule->pref = ops->default_pref(ops);
+ 
+ 	err = -EINVAL;
+ 	if (tb[FRA_GOTO]) {
+@@ -319,7 +322,7 @@
+ 	else
+ 		list_add_rcu(&rule->list, ops->rules_list);
+ 
+-	notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
++	notify_rule_change(net, RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid);
+ 	flush_route_cache(ops);
+ 	rules_ops_put(ops);
+ 	return 0;
+@@ -333,6 +336,7 @@
+ 
+ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib_rule_hdr *frh = nlmsg_data(nlh);
+ 	struct fib_rules_ops *ops = NULL;
+ 	struct fib_rule *rule, *tmp;
+@@ -342,7 +346,7 @@
+ 	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+ 		goto errout;
+ 
+-	ops = lookup_rules_ops(frh->family);
++	ops = lookup_rules_ops(net, frh->family);
+ 	if (ops == NULL) {
+ 		err = EAFNOSUPPORT;
+ 		goto errout;
+@@ -408,7 +412,7 @@
+ 		}
+ 
+ 		synchronize_rcu();
+-		notify_rule_change(RTM_DELRULE, rule, ops, nlh,
++		notify_rule_change(net, RTM_DELRULE, rule, ops, nlh,
+ 				   NETLINK_CB(skb).pid);
+ 		fib_rule_put(rule);
+ 		flush_route_cache(ops);
+@@ -514,13 +518,17 @@
+ 
+ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib_rules_ops *ops;
+ 	int idx = 0, family;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	family = rtnl_msg_family(cb->nlh);
+ 	if (family != AF_UNSPEC) {
+ 		/* Protocol specific dump request */
+-		ops = lookup_rules_ops(family);
++		ops = lookup_rules_ops(net, family);
+ 		if (ops == NULL)
+ 			return -EAFNOSUPPORT;
+ 
+@@ -528,7 +536,7 @@
+ 	}
+ 
+ 	rcu_read_lock();
+-	list_for_each_entry_rcu(ops, &rules_ops, list) {
++	list_for_each_entry_rcu(ops, &net->rules_ops, list) {
+ 		if (idx < cb->args[0] || !try_module_get(ops->owner))
+ 			goto skip;
+ 
+@@ -545,7 +553,7 @@
+ 	return skb->len;
+ }
+ 
+-static void notify_rule_change(int event, struct fib_rule *rule,
++static void notify_rule_change(struct net *net, int event, struct fib_rule *rule,
+ 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
+ 			       u32 pid)
+ {
+@@ -563,10 +571,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL);
++	err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(ops->nlgroup, err);
++		rtnl_set_sk_err(net, ops->nlgroup, err);
+ }
+ 
+ static void attach_rules(struct list_head *rules, struct net_device *dev)
+@@ -594,19 +602,23 @@
+ 			    void *ptr)
+ {
+ 	struct net_device *dev = ptr;
++	struct net *net = dev->nd_net;
+ 	struct fib_rules_ops *ops;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	ASSERT_RTNL();
+ 	rcu_read_lock();
+ 
+ 	switch (event) {
+ 	case NETDEV_REGISTER:
+-		list_for_each_entry(ops, &rules_ops, list)
++		list_for_each_entry(ops, &net->rules_ops, list)
+ 			attach_rules(ops->rules_list, dev);
+ 		break;
+ 
+ 	case NETDEV_UNREGISTER:
+-		list_for_each_entry(ops, &rules_ops, list)
++		list_for_each_entry(ops, &net->rules_ops, list)
+ 			detach_rules(ops->rules_list, dev);
+ 		break;
+ 	}
+@@ -620,13 +632,28 @@
+ 	.notifier_call = fib_rules_event,
+ };
+ 
++static int fib_rules_net_init(struct net *net)
++{
++	INIT_LIST_HEAD(&net->rules_ops);
++	spin_lock_init(&net->rules_mod_lock);
++	return 0;
++}
++
++static struct pernet_operations fib_rules_net_ops = {
++	.init = fib_rules_net_init,
++};
++
+ static int __init fib_rules_init(void)
+ {
++	int ret;
+ 	rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL);
+ 	rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL);
+ 	rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule);
+ 
+-	return register_netdevice_notifier(&fib_rules_notifier);
++	ret = register_pernet_subsys(&fib_rules_net_ops);
++	if (!ret)
++		ret = register_netdevice_notifier(&fib_rules_notifier);
++	return ret;
+ }
+ 
+ subsys_initcall(fib_rules_init);
+diff -Nurb linux-2.6.22-try2/net/core/neighbour.c linux-2.6.22-try2-netns/net/core/neighbour.c
+--- linux-2.6.22-try2/net/core/neighbour.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/neighbour.c	2007-12-19 22:49:18.000000000 -0500
+@@ -33,6 +33,7 @@
+ #include <linux/rtnetlink.h>
+ #include <linux/random.h>
+ #include <linux/string.h>
++#include <net/net_namespace.h>
+ 
+ #define NEIGH_DEBUG 1
+ 
+@@ -361,7 +362,7 @@
+ 	return n;
+ }
+ 
+-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
++struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net * net, const void *pkey)
+ {
+ 	struct neighbour *n;
+ 	int key_len = tbl->key_len;
+@@ -371,7 +372,8 @@
+ 
+ 	read_lock_bh(&tbl->lock);
+ 	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
+-		if (!memcmp(n->primary_key, pkey, key_len)) {
++		if (!memcmp(n->primary_key, pkey, key_len) &&
++		    (net == n->dev->nd_net)) {
+ 			neigh_hold(n);
+ 			NEIGH_CACHE_STAT_INC(tbl, hits);
+ 			break;
+@@ -449,7 +451,8 @@
+ 	goto out;
+ }
+ 
+-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
++struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
++				    struct net * net, const void *pkey,
+ 				    struct net_device *dev, int creat)
+ {
+ 	struct pneigh_entry *n;
+@@ -465,6 +468,7 @@
+ 
+ 	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
+ 		if (!memcmp(n->key, pkey, key_len) &&
++		    (n->net == net) &&
+ 		    (n->dev == dev || !n->dev)) {
+ 			read_unlock_bh(&tbl->lock);
+ 			goto out;
+@@ -479,6 +483,7 @@
+ 	if (!n)
+ 		goto out;
+ 
++	n->net = hold_net(net);
+ 	memcpy(n->key, pkey, key_len);
+ 	n->dev = dev;
+ 	if (dev)
+@@ -501,7 +506,7 @@
+ }
+ 
+ 
+-int pneigh_delete(struct neigh_table *tbl, const void *pkey,
++int pneigh_delete(struct neigh_table *tbl, struct net * net, const void *pkey,
+ 		  struct net_device *dev)
+ {
+ 	struct pneigh_entry *n, **np;
+@@ -516,13 +521,15 @@
+ 	write_lock_bh(&tbl->lock);
+ 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
+ 	     np = &n->next) {
+-		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
++		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
++		    (n->net == net)) {
+ 			*np = n->next;
+ 			write_unlock_bh(&tbl->lock);
+ 			if (tbl->pdestructor)
+ 				tbl->pdestructor(n);
+ 			if (n->dev)
+ 				dev_put(n->dev);
++			release_net(n->net);
+ 			kfree(n);
+ 			return 0;
+ 		}
+@@ -545,6 +552,7 @@
+ 					tbl->pdestructor(n);
+ 				if (n->dev)
+ 					dev_put(n->dev);
++				release_net(n->net);
+ 				kfree(n);
+ 				continue;
+ 			}
+@@ -1266,12 +1274,37 @@
+ 	spin_unlock(&tbl->proxy_queue.lock);
+ }
+ 
++static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
++						      struct net * net, int ifindex)
++{
++	struct neigh_parms *p;
++	
++	for (p = &tbl->parms; p; p = p->next) {
++		if (p->net != net)
++			continue;
++		if ((p->dev && p->dev->ifindex == ifindex) ||
++		    (!p->dev && !ifindex))
++			return p;
++	}
++
++	return NULL;
++}
+ 
+ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
+ 				      struct neigh_table *tbl)
+ {
+-	struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
++	struct neigh_parms *p, *ref;
++	struct net * net;
++
++	net = &init_net;
++	if (dev)
++		net = dev->nd_net;
++
++	ref = lookup_neigh_params(tbl, net, 0);
++	if (!ref)
++		return NULL;
+ 
++	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
+ 	if (p) {
+ 		p->tbl		  = tbl;
+ 		atomic_set(&p->refcnt, 1);
+@@ -1287,6 +1320,7 @@
+ 			dev_hold(dev);
+ 			p->dev = dev;
+ 		}
++		p->net = hold_net(net);
+ 		p->sysctl_table = NULL;
+ 		write_lock_bh(&tbl->lock);
+ 		p->next		= tbl->parms.next;
+@@ -1296,6 +1330,20 @@
+ 	return p;
+ }
+ 
++struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl,
++						struct net *net)
++{
++	struct neigh_parms *parms;
++	if (net != &init_net) {
++		parms = neigh_parms_alloc(NULL, tbl);
++		release_net(parms->net);
++		parms->net = hold_net(net);
++	}
++	else
++		parms = neigh_parms_clone(&tbl->parms);
++	return parms;
++}
++
+ static void neigh_rcu_free_parms(struct rcu_head *head)
+ {
+ 	struct neigh_parms *parms =
+@@ -1328,6 +1376,7 @@
+ 
+ void neigh_parms_destroy(struct neigh_parms *parms)
+ {
++	release_net(parms->net);
+ 	kfree(parms);
+ }
+ 
+@@ -1338,6 +1387,7 @@
+ 	unsigned long now = jiffies;
+ 	unsigned long phsize;
+ 
++	tbl->parms.net = &init_net;
+ 	atomic_set(&tbl->parms.refcnt, 1);
+ 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
+ 	tbl->parms.reachable_time =
+@@ -1353,7 +1403,7 @@
+ 		panic("cannot create neighbour cache statistics");
+ 
+ #ifdef CONFIG_PROC_FS
+-	tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
++	tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
+ 	if (!tbl->pde)
+ 		panic("cannot create neighbour proc dir entry");
+ 	tbl->pde->proc_fops = &neigh_stat_seq_fops;
+@@ -1443,6 +1493,7 @@
+ 
+ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ndmsg *ndm;
+ 	struct nlattr *dst_attr;
+ 	struct neigh_table *tbl;
+@@ -1458,7 +1509,7 @@
+ 
+ 	ndm = nlmsg_data(nlh);
+ 	if (ndm->ndm_ifindex) {
+-		dev = dev_get_by_index(ndm->ndm_ifindex);
++		dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ 		if (dev == NULL) {
+ 			err = -ENODEV;
+ 			goto out;
+@@ -1477,7 +1528,7 @@
+ 			goto out_dev_put;
+ 
+ 		if (ndm->ndm_flags & NTF_PROXY) {
+-			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
++			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
+ 			goto out_dev_put;
+ 		}
+ 
+@@ -1508,6 +1559,7 @@
+ 
+ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ndmsg *ndm;
+ 	struct nlattr *tb[NDA_MAX+1];
+ 	struct neigh_table *tbl;
+@@ -1524,7 +1576,7 @@
+ 
+ 	ndm = nlmsg_data(nlh);
+ 	if (ndm->ndm_ifindex) {
+-		dev = dev_get_by_index(ndm->ndm_ifindex);
++		dev = dev_get_by_index(net, ndm->ndm_ifindex);
+ 		if (dev == NULL) {
+ 			err = -ENODEV;
+ 			goto out;
+@@ -1553,7 +1605,7 @@
+ 			struct pneigh_entry *pn;
+ 
+ 			err = -ENOBUFS;
+-			pn = pneigh_lookup(tbl, dst, dev, 1);
++			pn = pneigh_lookup(tbl, net, dst, dev, 1);
+ 			if (pn) {
+ 				pn->flags = ndm->ndm_flags;
+ 				err = 0;
+@@ -1748,19 +1800,6 @@
+ 	return -EMSGSIZE;
+ }
+ 
+-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+-						      int ifindex)
+-{
+-	struct neigh_parms *p;
+-
+-	for (p = &tbl->parms; p; p = p->next)
+-		if ((p->dev && p->dev->ifindex == ifindex) ||
+-		    (!p->dev && !ifindex))
+-			return p;
+-
+-	return NULL;
+-}
+-
+ static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
+ 	[NDTA_NAME]		= { .type = NLA_STRING },
+ 	[NDTA_THRESH1]		= { .type = NLA_U32 },
+@@ -1788,6 +1827,7 @@
+ 
+ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct neigh_table *tbl;
+ 	struct ndtmsg *ndtmsg;
+ 	struct nlattr *tb[NDTA_MAX+1];
+@@ -1837,7 +1877,7 @@
+ 		if (tbp[NDTPA_IFINDEX])
+ 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
+ 
+-		p = lookup_neigh_params(tbl, ifindex);
++		p = lookup_neigh_params(tbl, net, ifindex);
+ 		if (p == NULL) {
+ 			err = -ENOENT;
+ 			goto errout_tbl_lock;
+@@ -1912,6 +1952,7 @@
+ 
+ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int family, tidx, nidx = 0;
+ 	int tbl_skip = cb->args[0];
+ 	int neigh_skip = cb->args[1];
+@@ -1931,8 +1972,11 @@
+ 				       NLM_F_MULTI) <= 0)
+ 			break;
+ 
+-		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
+-			if (nidx < neigh_skip)
++		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
++			if (net != p->net)
++				continue;
++
++			if (nidx++ < neigh_skip)
+ 				continue;
+ 
+ 			if (neightbl_fill_param_info(skb, tbl, p,
+@@ -2003,6 +2047,7 @@
+ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
+ 			    struct netlink_callback *cb)
+ {
++	struct net * net = skb->sk->sk_net;
+ 	struct neighbour *n;
+ 	int rc, h, s_h = cb->args[1];
+ 	int idx, s_idx = idx = cb->args[2];
+@@ -2013,8 +2058,12 @@
+ 			continue;
+ 		if (h > s_h)
+ 			s_idx = 0;
+-		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
+-			if (idx < s_idx)
++		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
++			int lidx;
++			if (n->dev->nd_net != net)
++				continue;
++			lidx = idx++;
++			if (lidx < s_idx)
+ 				continue;
+ 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
+ 					    cb->nlh->nlmsg_seq,
+@@ -2109,6 +2158,7 @@
+ static struct neighbour *neigh_get_first(struct seq_file *seq)
+ {
+ 	struct neigh_seq_state *state = seq->private;
++	struct net * net = state->net;
+ 	struct neigh_table *tbl = state->tbl;
+ 	struct neighbour *n = NULL;
+ 	int bucket = state->bucket;
+@@ -2118,6 +2168,8 @@
+ 		n = tbl->hash_buckets[bucket];
+ 
+ 		while (n) {
++			if (n->dev->nd_net != net)
++				goto next;
+ 			if (state->neigh_sub_iter) {
+ 				loff_t fakep = 0;
+ 				void *v;
+@@ -2147,6 +2199,7 @@
+ 					loff_t *pos)
+ {
+ 	struct neigh_seq_state *state = seq->private;
++	struct net * net = state->net;
+ 	struct neigh_table *tbl = state->tbl;
+ 
+ 	if (state->neigh_sub_iter) {
+@@ -2158,6 +2211,8 @@
+ 
+ 	while (1) {
+ 		while (n) {
++			if (n->dev->nd_net != net)
++				goto next;
+ 			if (state->neigh_sub_iter) {
+ 				void *v = state->neigh_sub_iter(state, n, pos);
+ 				if (v)
+@@ -2204,6 +2259,7 @@
+ static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
+ {
+ 	struct neigh_seq_state *state = seq->private;
++	struct net * net = state->net;
+ 	struct neigh_table *tbl = state->tbl;
+ 	struct pneigh_entry *pn = NULL;
+ 	int bucket = state->bucket;
+@@ -2211,6 +2267,8 @@
+ 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
+ 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
+ 		pn = tbl->phash_buckets[bucket];
++		while (pn && (pn->net != net))
++			pn = pn->next;
+ 		if (pn)
+ 			break;
+ 	}
+@@ -2224,6 +2282,7 @@
+ 					    loff_t *pos)
+ {
+ 	struct neigh_seq_state *state = seq->private;
++	struct net * net = state->net;
+ 	struct neigh_table *tbl = state->tbl;
+ 
+ 	pn = pn->next;
+@@ -2231,6 +2290,8 @@
+ 		if (++state->bucket > PNEIGH_HASHMASK)
+ 			break;
+ 		pn = tbl->phash_buckets[state->bucket];
++		while (pn && (pn->net != net))
++			pn = pn->next;
+ 		if (pn)
+ 			break;
+ 	}
+@@ -2433,6 +2494,7 @@
+ 
+ static void __neigh_notify(struct neighbour *n, int type, int flags)
+ {
++	struct net * net = n->dev->nd_net;
+ 	struct sk_buff *skb;
+ 	int err = -ENOBUFS;
+ 
+@@ -2447,10 +2509,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
++	err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_NEIGH, err);
++		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
+ }
+ 
+ void neigh_app_ns(struct neighbour *n)
+@@ -2648,6 +2710,7 @@
+ 
+ 	if (!t)
+ 		return -ENOBUFS;
++
+ 	t->neigh_vars[0].data  = &p->mcast_probes;
+ 	t->neigh_vars[1].data  = &p->ucast_probes;
+ 	t->neigh_vars[2].data  = &p->app_probes;
+@@ -2716,7 +2779,7 @@
+ 	t->neigh_proto_dir[0].child    = t->neigh_neigh_dir;
+ 	t->neigh_root_dir[0].child     = t->neigh_proto_dir;
+ 
+-	t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
++	t->sysctl_header = register_net_sysctl_table(p->net, t->neigh_root_dir);
+ 	if (!t->sysctl_header) {
+ 		err = -ENOBUFS;
+ 		goto free_procname;
+@@ -2738,7 +2801,7 @@
+ 	if (p->sysctl_table) {
+ 		struct neigh_sysctl_table *t = p->sysctl_table;
+ 		p->sysctl_table = NULL;
+-		unregister_sysctl_table(t->sysctl_header);
++		unregister_net_sysctl_table(t->sysctl_header);
+ 		kfree(t->neigh_dev[0].procname);
+ 		kfree(t);
+ 	}
+@@ -2771,6 +2834,7 @@
+ EXPORT_SYMBOL(neigh_lookup);
+ EXPORT_SYMBOL(neigh_lookup_nodev);
+ EXPORT_SYMBOL(neigh_parms_alloc);
++EXPORT_SYMBOL(neigh_parms_alloc_default);
+ EXPORT_SYMBOL(neigh_parms_release);
+ EXPORT_SYMBOL(neigh_rand_reach_time);
+ EXPORT_SYMBOL(neigh_resolve_output);
+diff -Nurb linux-2.6.22-try2/net/core/net-sysfs.c linux-2.6.22-try2-netns/net/core/net-sysfs.c
+--- linux-2.6.22-try2/net/core/net-sysfs.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/net-sysfs.c	2007-12-19 22:49:18.000000000 -0500
+@@ -13,7 +13,9 @@
+ #include <linux/kernel.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
++#include <linux/nsproxy.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/wireless.h>
+ #include <net/iw_handler.h>
+@@ -29,16 +31,16 @@
+ }
+ 
+ /* use same locking rules as GIF* ioctl's */
+-static ssize_t netdev_show(const struct device *dev,
++static ssize_t netdev_show(const struct device *device,
+ 			   struct device_attribute *attr, char *buf,
+ 			   ssize_t (*format)(const struct net_device *, char *))
+ {
+-	struct net_device *net = to_net_dev(dev);
++	struct net_device *dev = to_net_dev(device);
+ 	ssize_t ret = -EINVAL;
+ 
+ 	read_lock(&dev_base_lock);
+-	if (dev_isalive(net))
+-		ret = (*format)(net, buf);
++	if (dev_isalive(dev))
++		ret = (*format)(dev, buf);
+ 	read_unlock(&dev_base_lock);
+ 
+ 	return ret;
+@@ -46,9 +48,9 @@
+ 
+ /* generate a show function for simple field */
+ #define NETDEVICE_SHOW(field, format_string)				\
+-static ssize_t format_##field(const struct net_device *net, char *buf)	\
++static ssize_t format_##field(const struct net_device *dev, char *buf)	\
+ {									\
+-	return sprintf(buf, format_string, net->field);			\
++	return sprintf(buf, format_string, dev->field);			\
+ }									\
+ static ssize_t show_##field(struct device *dev,				\
+ 			    struct device_attribute *attr, char *buf)	\
+@@ -58,11 +60,11 @@
+ 
+ 
+ /* use same locking and permission rules as SIF* ioctl's */
+-static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
++static ssize_t netdev_store(struct device *device, struct device_attribute *attr,
+ 			    const char *buf, size_t len,
+ 			    int (*set)(struct net_device *, unsigned long))
+ {
+-	struct net_device *net = to_net_dev(dev);
++	struct net_device *dev = to_net_dev(device);
+ 	char *endp;
+ 	unsigned long new;
+ 	int ret = -EINVAL;
+@@ -75,8 +77,8 @@
+ 		goto err;
+ 
+ 	rtnl_lock();
+-	if (dev_isalive(net)) {
+-		if ((ret = (*set)(net, new)) == 0)
++	if (dev_isalive(dev)) {
++		if ((ret = (*set)(dev, new)) == 0)
+ 			ret = len;
+ 	}
+ 	rtnl_unlock();
+@@ -103,45 +105,45 @@
+ 	return cp - buf;
+ }
+ 
+-static ssize_t show_address(struct device *dev, struct device_attribute *attr,
++static ssize_t show_address(struct device *device, struct device_attribute *attr,
+ 			    char *buf)
+ {
+-	struct net_device *net = to_net_dev(dev);
++	struct net_device *dev = to_net_dev(device);
+ 	ssize_t ret = -EINVAL;
+ 
+ 	read_lock(&dev_base_lock);
+-	if (dev_isalive(net))
+-	    ret = format_addr(buf, net->dev_addr, net->addr_len);
++	if (dev_isalive(dev))
++	    ret = format_addr(buf, dev->dev_addr, dev->addr_len);
+ 	read_unlock(&dev_base_lock);
+ 	return ret;
+ }
+ 
+-static ssize_t show_broadcast(struct device *dev,
++static ssize_t show_broadcast(struct device *device,
+ 			    struct device_attribute *attr, char *buf)
+ {
+-	struct net_device *net = to_net_dev(dev);
+-	if (dev_isalive(net))
+-		return format_addr(buf, net->broadcast, net->addr_len);
++	struct net_device *dev = to_net_dev(device);
++	if (dev_isalive(dev))
++		return format_addr(buf, dev->broadcast, dev->addr_len);
+ 	return -EINVAL;
+ }
+ 
+-static ssize_t show_carrier(struct device *dev,
++static ssize_t show_carrier(struct device *device,
+ 			    struct device_attribute *attr, char *buf)
+ {
+-	struct net_device *netdev = to_net_dev(dev);
+-	if (netif_running(netdev)) {
+-		return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev));
++	struct net_device *dev = to_net_dev(device);
++	if (netif_running(dev)) {
++		return sprintf(buf, fmt_dec, !!netif_carrier_ok(dev));
+ 	}
+ 	return -EINVAL;
+ }
+ 
+-static ssize_t show_dormant(struct device *dev,
++static ssize_t show_dormant(struct device *device,
+ 			    struct device_attribute *attr, char *buf)
+ {
+-	struct net_device *netdev = to_net_dev(dev);
++	struct net_device *dev = to_net_dev(device);
+ 
+-	if (netif_running(netdev))
+-		return sprintf(buf, fmt_dec, !!netif_dormant(netdev));
++	if (netif_running(dev))
++		return sprintf(buf, fmt_dec, !!netif_dormant(dev));
+ 
+ 	return -EINVAL;
+ }
+@@ -156,15 +158,15 @@
+ 	"up"
+ };
+ 
+-static ssize_t show_operstate(struct device *dev,
++static ssize_t show_operstate(struct device *device,
+ 			      struct device_attribute *attr, char *buf)
+ {
+-	const struct net_device *netdev = to_net_dev(dev);
++	const struct net_device *dev = to_net_dev(device);
+ 	unsigned char operstate;
+ 
+ 	read_lock(&dev_base_lock);
+-	operstate = netdev->operstate;
+-	if (!netif_running(netdev))
++	operstate = dev->operstate;
++	if (!netif_running(dev))
+ 		operstate = IF_OPER_DOWN;
+ 	read_unlock(&dev_base_lock);
+ 
+@@ -177,57 +179,57 @@
+ /* read-write attributes */
+ NETDEVICE_SHOW(mtu, fmt_dec);
+ 
+-static int change_mtu(struct net_device *net, unsigned long new_mtu)
++static int change_mtu(struct net_device *dev, unsigned long new_mtu)
+ {
+-	return dev_set_mtu(net, (int) new_mtu);
++	return dev_set_mtu(dev, (int) new_mtu);
+ }
+ 
+-static ssize_t store_mtu(struct device *dev, struct device_attribute *attr,
++static ssize_t store_mtu(struct device *device, struct device_attribute *attr,
+ 			 const char *buf, size_t len)
+ {
+-	return netdev_store(dev, attr, buf, len, change_mtu);
++	return netdev_store(device, attr, buf, len, change_mtu);
+ }
+ 
+ NETDEVICE_SHOW(flags, fmt_hex);
+ 
+-static int change_flags(struct net_device *net, unsigned long new_flags)
++static int change_flags(struct net_device *dev, unsigned long new_flags)
+ {
+-	return dev_change_flags(net, (unsigned) new_flags);
++	return dev_change_flags(dev, (unsigned) new_flags);
+ }
+ 
+-static ssize_t store_flags(struct device *dev, struct device_attribute *attr,
++static ssize_t store_flags(struct device *device, struct device_attribute *attr,
+ 			   const char *buf, size_t len)
+ {
+-	return netdev_store(dev, attr, buf, len, change_flags);
++	return netdev_store(device, attr, buf, len, change_flags);
+ }
+ 
+ NETDEVICE_SHOW(tx_queue_len, fmt_ulong);
+ 
+-static int change_tx_queue_len(struct net_device *net, unsigned long new_len)
++static int change_tx_queue_len(struct net_device *dev, unsigned long new_len)
+ {
+-	net->tx_queue_len = new_len;
++	dev->tx_queue_len = new_len;
+ 	return 0;
+ }
+ 
+-static ssize_t store_tx_queue_len(struct device *dev,
++static ssize_t store_tx_queue_len(struct device *device,
+ 				  struct device_attribute *attr,
+ 				  const char *buf, size_t len)
+ {
+-	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
++	return netdev_store(device, attr, buf, len, change_tx_queue_len);
+ }
+ 
+ NETDEVICE_SHOW(weight, fmt_dec);
+ 
+-static int change_weight(struct net_device *net, unsigned long new_weight)
++static int change_weight(struct net_device *dev, unsigned long new_weight)
+ {
+-	net->weight = new_weight;
++	dev->weight = new_weight;
+ 	return 0;
+ }
+ 
+-static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
++static ssize_t store_weight(struct device *device, struct device_attribute *attr,
+ 			    const char *buf, size_t len)
+ {
+-	return netdev_store(dev, attr, buf, len, change_weight);
++	return netdev_store(device, attr, buf, len, change_weight);
+ }
+ 
+ static struct device_attribute net_class_attributes[] = {
+@@ -447,6 +449,23 @@
+ 	kfree((char *)dev - dev->padded);
+ }
+ 
++static const void *net_current_tag(void)
++{
++	return current->nsproxy->net_ns;
++}
++
++static const void *net_kobject_tag(struct kobject *kobj)
++{
++	struct net_device *dev;
++	dev = container_of(kobj, struct net_device, dev.kobj);
++	return dev->nd_net;
++}
++
++static const struct shadow_dir_operations net_shadow_dir_operations = {
++	.current_tag = net_current_tag,
++	.kobject_tag = net_kobject_tag,
++};
++
+ static struct class net_class = {
+ 	.name = "net",
+ 	.dev_release = netdev_release,
+@@ -454,42 +473,43 @@
+ #ifdef CONFIG_HOTPLUG
+ 	.dev_uevent = netdev_uevent,
+ #endif
++	.shadow_ops = &net_shadow_dir_operations,
+ };
+ 
+ /* Delete sysfs entries but hold kobject reference until after all
+  * netdev references are gone.
+  */
+-void netdev_unregister_sysfs(struct net_device * net)
++void netdev_unregister_sysfs(struct net_device * dev)
+ {
+-	struct device *dev = &(net->dev);
++	struct device *device = &(dev->dev);
+ 
+-	kobject_get(&dev->kobj);
+-	device_del(dev);
++	kobject_get(&device->kobj);
++	device_del(device);
+ }
+ 
+ /* Create sysfs entries for network device. */
+-int netdev_register_sysfs(struct net_device *net)
++int netdev_register_sysfs(struct net_device *dev)
+ {
+-	struct device *dev = &(net->dev);
+-	struct attribute_group **groups = net->sysfs_groups;
++	struct device *device = &(dev->dev);
++	struct attribute_group **groups = dev->sysfs_groups;
+ 
+-	device_initialize(dev);
+-	dev->class = &net_class;
+-	dev->platform_data = net;
+-	dev->groups = groups;
++	device_initialize(device);
++	device->class = &net_class;
++	device->platform_data = dev;
++	device->groups = groups;
+ 
+ 	BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
+-	strlcpy(dev->bus_id, net->name, BUS_ID_SIZE);
++	strlcpy(device->bus_id, dev->name, BUS_ID_SIZE);
+ 
+-	if (net->get_stats)
++	if (dev->get_stats)
+ 		*groups++ = &netstat_group;
+ 
+ #ifdef CONFIG_WIRELESS_EXT
+-	if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats)
++	if (dev->wireless_handlers && dev->wireless_handlers->get_wireless_stats)
+ 		*groups++ = &wireless_group;
+ #endif
+ 
+-	return device_add(dev);
++	return device_add(device);
+ }
+ 
+ int netdev_sysfs_init(void)
+diff -Nurb linux-2.6.22-try2/net/core/net_namespace.c linux-2.6.22-try2-netns/net/core/net_namespace.c
+--- linux-2.6.22-try2/net/core/net_namespace.c	1969-12-31 19:00:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/net_namespace.c	2007-12-19 22:49:18.000000000 -0500
+@@ -0,0 +1,332 @@
++#include <linux/workqueue.h>
++#include <linux/rtnetlink.h>
++#include <linux/cache.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/delay.h>
++#include <net/net_namespace.h>
++
++/*
++ *	Our network namespace constructor/destructor lists
++ */
++
++static LIST_HEAD(pernet_list);
++static struct list_head *first_device = &pernet_list;
++static DEFINE_MUTEX(net_mutex);
++
++static DEFINE_MUTEX(net_list_mutex);
++LIST_HEAD(net_namespace_list);
++
++static struct kmem_cache *net_cachep;
++
++struct net init_net;
++EXPORT_SYMBOL_GPL(init_net);
++
++void net_lock(void)
++{
++	mutex_lock(&net_list_mutex);
++}
++
++void net_unlock(void)
++{
++	mutex_unlock(&net_list_mutex);
++}
++
++static struct net *net_alloc(void)
++{
++	return kmem_cache_alloc(net_cachep, GFP_KERNEL);
++}
++
++static void net_free(struct net *net)
++{
++	if (!net)
++		return;
++
++	if (unlikely(atomic_read(&net->use_count) != 0)) {
++		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
++			atomic_read(&net->use_count));
++		return;
++	}
++
++	kmem_cache_free(net_cachep, net);
++}
++
++static void cleanup_net(struct work_struct *work)
++{
++	struct pernet_operations *ops;
++	struct list_head *ptr;
++	struct net *net;
++
++	net = container_of(work, struct net, work);
++
++	mutex_lock(&net_mutex);
++
++	/* Don't let anyone else find us. */
++	net_lock();
++	list_del(&net->list);
++	net_unlock();
++
++	/* Run all of the network namespace exit methods */
++	list_for_each_prev(ptr, &pernet_list) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->exit)
++			ops->exit(net);
++	}
++
++	mutex_unlock(&net_mutex);
++
++	/* Ensure there are no outstanding rcu callbacks using this
++	 * network namespace.
++	 */
++	rcu_barrier();
++
++	/* Finally it is safe to free my network namespace structure */
++	net_free(net);
++}
++
++
++void __put_net(struct net *net)
++{
++	/* Cleanup the network namespace in process context */
++	INIT_WORK(&net->work, cleanup_net);
++	schedule_work(&net->work);
++}
++EXPORT_SYMBOL_GPL(__put_net);
++
++/*
++ * setup_net runs the initializers for the network namespace object.
++ */
++static int setup_net(struct net *net)
++{
++	/* Must be called with net_mutex held */
++	struct pernet_operations *ops;
++	struct list_head *ptr;
++	int error;
++
++	memset(net, 0, sizeof(struct net));
++	atomic_set(&net->count, 1);
++	atomic_set(&net->use_count, 0);
++
++	error = 0;
++	list_for_each(ptr, &pernet_list) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->init) {
++			error = ops->init(net);
++			if (error < 0)
++				goto out_undo;
++		}
++	}
++out:
++	return error;
++out_undo:
++	/* Walk through the list backwards calling the exit functions
++	 * for the pernet modules whose init functions did not fail.
++	 */
++	for (ptr = ptr->prev; ptr != &pernet_list; ptr = ptr->prev) {
++		ops = list_entry(ptr, struct pernet_operations, list);
++		if (ops->exit)
++			ops->exit(net);
++	}
++	goto out;
++}
++
++struct net *copy_net_ns(unsigned long flags, struct net *old_net)
++{
++	struct net *new_net = NULL;
++	int err;
++
++	get_net(old_net);
++
++	if (!(flags & CLONE_NEWNET))
++		return old_net;
++
++	err = -EPERM;
++	if (!capable(CAP_SYS_ADMIN))
++		goto out;
++
++	err = -ENOMEM;
++	new_net = net_alloc();
++	if (!new_net)
++		goto out;
++
++	mutex_lock(&net_mutex);
++	err = setup_net(new_net);
++	if (err)
++		goto out_unlock;
++
++	net_lock();
++	list_add_tail(&new_net->list, &net_namespace_list);
++	net_unlock();
++
++
++out_unlock:
++	mutex_unlock(&net_mutex);
++out:
++	put_net(old_net);
++	if (err) {
++		net_free(new_net);
++		new_net = ERR_PTR(err);
++	}
++	return new_net;
++}
++
++static int __init net_ns_init(void)
++{
++	int err;
++
++	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
++	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
++					SMP_CACHE_BYTES,
++					SLAB_PANIC, NULL, NULL);
++	mutex_lock(&net_mutex);
++	err = setup_net(&init_net);
++
++	net_lock();
++	list_add_tail(&init_net.list, &net_namespace_list);
++	net_unlock();
++
++	mutex_unlock(&net_mutex);
++	if (err)
++		panic("Could not setup the initial network namespace");
++
++	return 0;
++}
++
++pure_initcall(net_ns_init);
++
++static int register_pernet_operations(struct list_head *list,
++				      struct pernet_operations *ops)
++{
++	struct net *net, *undo_net;
++	int error;
++
++	error = 0;
++	list_add_tail(&ops->list, list);
++	for_each_net(net) {
++		if (ops->init) {
++			error = ops->init(net);
++			if (error)
++				goto out_undo;
++		}
++	}
++out:
++	return error;
++
++out_undo:
++	/* If I have an error cleanup all namespaces I initialized */
++	list_del(&ops->list);
++	for_each_net(undo_net) {
++		if (undo_net == net)
++			goto undone;
++		if (ops->exit)
++			ops->exit(undo_net);
++	}
++undone:
++	goto out;
++}
++
++static void unregister_pernet_operations(struct pernet_operations *ops)
++{
++	struct net *net;
++
++	list_del(&ops->list);
++	for_each_net(net)
++		if (ops->exit)
++			ops->exit(net);
++}
++
++/**
++ *      register_pernet_subsys - register a network namespace subsystem
++ *	@ops:  pernet operations structure for the subsystem
++ *
++ *	Register a subsystem which has init and exit functions
++ *	that are called when network namespaces are created and
++ *	destroyed respectively.
++ *
++ *	When registered all network namespace init functions are
++ *	called for every existing network namespace.  Allowing kernel
++ *	modules to have a race free view of the set of network namespaces.
++ *
++ *	When a new network namespace is created all of the init
++ *	methods are called in the order in which they were registered.
++ *
++ *	When a network namespace is destroyed all of the exit methods
++ *	are called in the reverse of the order with which they were
++ *	registered.
++ */
++int register_pernet_subsys(struct pernet_operations *ops)
++{
++	int error;
++	mutex_lock(&net_mutex);
++	error =  register_pernet_operations(first_device, ops);
++	mutex_unlock(&net_mutex);
++	return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_subsys);
++
++/**
++ *      unregister_pernet_subsys - unregister a network namespace subsystem
++ *	@ops: pernet operations structure to manipulate
++ *
++ *	Remove the pernet operations structure from the list to be
++ *	used when network namespaces are created or destoryed.  In
++ *	addition run the exit method for all existing network
++ *	namespaces.
++ */
++void unregister_pernet_subsys(struct pernet_operations *module)
++{
++	mutex_lock(&net_mutex);
++	unregister_pernet_operations(module);
++	mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
++
++/**
++ *      register_pernet_device - register a network namespace device
++ *	@ops:  pernet operations structure for the subsystem
++ *
++ *	Register a device which has init and exit functions
++ *	that are called when network namespaces are created and
++ *	destroyed respectively.
++ *
++ *	When registered all network namespace init functions are
++ *	called for every existing network namespace.  Allowing kernel
++ *	modules to have a race free view of the set of network namespaces.
++ *
++ *	When a new network namespace is created all of the init
++ *	methods are called in the order in which they were registered.
++ *
++ *	When a network namespace is destroyed all of the exit methods
++ *	are called in the reverse of the order with which they were
++ *	registered.
++ */
++int register_pernet_device(struct pernet_operations *ops)
++{
++	int error;
++	mutex_lock(&net_mutex);
++	error = register_pernet_operations(&pernet_list, ops);
++	if (!error && (first_device == &pernet_list))
++		first_device = &ops->list;
++	mutex_unlock(&net_mutex);
++	return error;
++}
++EXPORT_SYMBOL_GPL(register_pernet_device);
++
++/**
++ *      unregister_pernet_device - unregister a network namespace netdevice
++ *	@ops: pernet operations structure to manipulate
++ *
++ *	Remove the pernet operations structure from the list to be
++ *	used when network namespaces are created or destoryed.  In
++ *	addition run the exit method for all existing network
++ *	namespaces.
++ */
++void unregister_pernet_device(struct pernet_operations *ops)
++{
++	mutex_lock(&net_mutex);
++	if (&ops->list == first_device)
++		first_device = first_device->next;
++	unregister_pernet_operations(ops);
++	mutex_unlock(&net_mutex);
++}
++EXPORT_SYMBOL_GPL(unregister_pernet_device);
+diff -Nurb linux-2.6.22-try2/net/core/netpoll.c linux-2.6.22-try2-netns/net/core/netpoll.c
+--- linux-2.6.22-try2/net/core/netpoll.c	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/netpoll.c	2007-12-19 22:49:18.000000000 -0500
+@@ -634,7 +634,7 @@
+ 	int err;
+ 
+ 	if (np->dev_name)
+-		ndev = dev_get_by_name(np->dev_name);
++		ndev = dev_get_by_name(&init_net, np->dev_name);
+ 	if (!ndev) {
+ 		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
+ 		       np->name, np->dev_name);
+diff -Nurb linux-2.6.22-try2/net/core/pktgen.c linux-2.6.22-try2-netns/net/core/pktgen.c
+--- linux-2.6.22-try2/net/core/pktgen.c	2007-12-19 15:29:24.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/pktgen.c	2007-12-19 22:49:18.000000000 -0500
+@@ -155,6 +155,7 @@
+ #include <net/checksum.h>
+ #include <net/ipv6.h>
+ #include <net/addrconf.h>
++#include <net/net_namespace.h>
+ #include <asm/byteorder.h>
+ #include <linux/rcupdate.h>
+ #include <asm/bitops.h>
+@@ -1903,6 +1904,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* It is OK that we do not hold the group lock right now,
+ 	 * as we run under the RTNL lock.
+ 	 */
+@@ -1933,7 +1937,7 @@
+ 		pkt_dev->odev = NULL;
+ 	}
+ 
+-	odev = dev_get_by_name(ifname);
++	odev = dev_get_by_name(&init_net, ifname);
+ 	if (!odev) {
+ 		printk("pktgen: no such netdevice: \"%s\"\n", ifname);
+ 		return -ENODEV;
+@@ -3570,7 +3574,7 @@
+ 
+ 	printk(version);
+ 
+-	pg_proc_dir = proc_mkdir(PG_PROC_DIR, proc_net);
++	pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net);
+ 	if (!pg_proc_dir)
+ 		return -ENODEV;
+ 	pg_proc_dir->owner = THIS_MODULE;
+@@ -3579,7 +3583,7 @@
+ 	if (pe == NULL) {
+ 		printk("pktgen: ERROR: cannot create %s procfs entry.\n",
+ 		       PGCTRL);
+-		proc_net_remove(PG_PROC_DIR);
++		proc_net_remove(&init_net, PG_PROC_DIR);
+ 		return -EINVAL;
+ 	}
+ 
+@@ -3602,7 +3606,7 @@
+ 		printk("pktgen: ERROR: Initialization failed for all threads\n");
+ 		unregister_netdevice_notifier(&pktgen_notifier_block);
+ 		remove_proc_entry(PGCTRL, pg_proc_dir);
+-		proc_net_remove(PG_PROC_DIR);
++		proc_net_remove(&init_net, PG_PROC_DIR);
+ 		return -ENODEV;
+ 	}
+ 
+@@ -3629,7 +3633,7 @@
+ 
+ 	/* Clean up proc file system */
+ 	remove_proc_entry(PGCTRL, pg_proc_dir);
+-	proc_net_remove(PG_PROC_DIR);
++	proc_net_remove(&init_net, PG_PROC_DIR);
+ }
+ 
+ module_init(pg_init);
+diff -Nurb linux-2.6.22-try2/net/core/rtnetlink.c linux-2.6.22-try2-netns/net/core/rtnetlink.c
+--- linux-2.6.22-try2/net/core/rtnetlink.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/rtnetlink.c	2007-12-19 23:04:00.000000000 -0500
+@@ -59,7 +59,6 @@
+ };
+ 
+ static DEFINE_MUTEX(rtnl_mutex);
+-static struct sock *rtnl;
+ 
+ void rtnl_lock(void)
+ {
+@@ -73,9 +72,17 @@
+ 
+ void rtnl_unlock(void)
+ {
++	struct net *net;
+ 	mutex_unlock(&rtnl_mutex);
++	
++	net_lock();
++	for_each_net(net) {
++		struct sock *rtnl = net->rtnl;
+ 	if (rtnl && rtnl->sk_receive_queue.qlen)
+ 		rtnl->sk_data_ready(rtnl, 0);
++	}
++	net_unlock();
++
+ 	netdev_run_todo();
+ }
+ 
+@@ -446,8 +453,9 @@
+ 	return ret;
+ }
+ 
+-int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
++int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned group, int echo)
+ {
++	struct sock *rtnl = net->rtnl;
+ 	int err = 0;
+ 
+ 	NETLINK_CB(skb).dst_group = group;
+@@ -459,14 +467,17 @@
+ 	return err;
+ }
+ 
+-int rtnl_unicast(struct sk_buff *skb, u32 pid)
++int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
+ {
++	struct sock *rtnl = net->rtnl;
++
+ 	return nlmsg_unicast(rtnl, skb, pid);
+ }
+ 
+-int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group,
++int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ 		struct nlmsghdr *nlh, gfp_t flags)
+ {
++	struct sock *rtnl = net->rtnl;
+ 	int report = 0;
+ 
+ 	if (nlh)
+@@ -475,8 +486,10 @@
+ 	return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+ }
+ 
+-void rtnl_set_sk_err(u32 group, int error)
++void rtnl_set_sk_err(struct net *net, u32 group, int error)
+ {
++	struct sock *rtnl = net->rtnl;
++
+ 	netlink_set_err(rtnl, 0, group, error);
+ }
+ 
+@@ -687,12 +700,13 @@
+ 
+ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ 	struct net *net = skb->sk->sk_net;
+ 	int idx;
+ 	int s_idx = cb->args[0];
+ 	struct net_device *dev;
+ 
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if (!nx_dev_visible(skb->sk->sk_nx_info, dev))
+ 			continue;
+ 		if (idx < s_idx)
+@@ -857,6 +871,7 @@
+ 
+ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ifinfomsg *ifm;
+ 	struct net_device *dev;
+ 	int err;
+@@ -875,9 +890,9 @@
+ 	err = -EINVAL;
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifi_index > 0)
+-		dev = dev_get_by_index(ifm->ifi_index);
++		dev = dev_get_by_index(net, ifm->ifi_index);
+ 	else if (tb[IFLA_IFNAME])
+-		dev = dev_get_by_name(ifname);
++		dev = dev_get_by_name(net, ifname);
+ 	else
+ 		goto errout;
+ 
+@@ -903,6 +918,7 @@
+ 
+ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	const struct rtnl_link_ops *ops;
+ 	struct net_device *dev;
+ 	struct ifinfomsg *ifm;
+@@ -919,9 +935,9 @@
+ 
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifi_index > 0)
+-		dev = __dev_get_by_index(ifm->ifi_index);
++		dev = __dev_get_by_index(net, ifm->ifi_index);
+ 	else if (tb[IFLA_IFNAME])
+-		dev = __dev_get_by_name(ifname);
++		dev = __dev_get_by_name(net, ifname);
+ 	else
+ 		return -EINVAL;
+ 
+@@ -938,6 +954,7 @@
+ 
+ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	const struct rtnl_link_ops *ops;
+ 	struct net_device *dev;
+ 	struct ifinfomsg *ifm;
+@@ -959,9 +976,9 @@
+ 
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifi_index > 0)
+-		dev = __dev_get_by_index(ifm->ifi_index);
++		dev = __dev_get_by_index(net, ifm->ifi_index);
+ 	else if (ifname[0])
+-		dev = __dev_get_by_name(ifname);
++		dev = __dev_get_by_name(net, ifname);
+ 	else
+ 		dev = NULL;
+ 
+@@ -1079,6 +1096,7 @@
+ 
+ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ifinfomsg *ifm;
+ 	struct nlattr *tb[IFLA_MAX+1];
+ 	struct net_device *dev = NULL;
+@@ -1091,7 +1109,7 @@
+ 
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifi_index > 0) {
+-		dev = dev_get_by_index(ifm->ifi_index);
++		dev = dev_get_by_index(net, ifm->ifi_index);
+ 		if (dev == NULL)
+ 			return -ENODEV;
+ 	} else
+@@ -1111,7 +1129,7 @@
+ 		kfree_skb(nskb);
+ 		goto errout;
+ 	}
+-	err = rtnl_unicast(nskb, NETLINK_CB(skb).pid);
++	err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid);
+ errout:
+ 	dev_put(dev);
+ 
+@@ -1144,6 +1162,7 @@
+ 
+ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
+ {
++	struct net *net = dev->nd_net;
+ 	struct sk_buff *skb;
+ 	int err = -ENOBUFS;
+ 
+@@ -1161,10 +1180,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
++	err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_LINK, err);
++		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+ }
+ 
+ /* Protected by RTNL sempahore.  */
+@@ -1175,6 +1194,7 @@
+ 
+ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	rtnl_doit_func doit;
+ 	int sz_idx, kind;
+ 	int min_len;
+@@ -1203,6 +1223,7 @@
+ 		return -EPERM;
+ 
+ 	if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) {
++		struct sock *rtnl;
+ 		rtnl_dumpit_func dumpit;
+ 
+ 		dumpit = rtnl_get_dumpit(family, type);
+@@ -1210,6 +1231,7 @@
+ 			return -EOPNOTSUPP;
+ 
+ 		__rtnl_unlock();
++		rtnl = net->rtnl;
+ 		err = netlink_dump_start(rtnl, skb, nlh, dumpit, NULL);
+ 		rtnl_lock();
+ 		return err;
+@@ -1259,6 +1281,10 @@
+ static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
+ 	struct net_device *dev = ptr;
++
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 	case NETDEV_UNREGISTER:
+ 		rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
+@@ -1284,6 +1310,36 @@
+ 	.notifier_call	= rtnetlink_event,
+ };
+ 
++
++static int rtnetlink_net_init(struct net *net)
++{
++	struct sock *sk;
++	sk = netlink_kernel_create(net, NETLINK_ROUTE, RTNLGRP_MAX,
++				   rtnetlink_rcv, &rtnl_mutex, THIS_MODULE);
++	if (!sk)
++		return -ENOMEM;
++
++	/* Don't hold an extra reference on the namespace */
++	put_net(sk->sk_net);
++	net->rtnl = sk;
++	return 0;
++}
++
++static void rtnetlink_net_exit(struct net *net)
++{
++	/* At the last minute lie and say this is a socket for the
++	 * initial network namespace.  So the socket will be safe to
++	 * free.
++	 */
++	net->rtnl->sk_net = get_net(&init_net);
++	sock_put(net->rtnl);
++}
++
++static struct pernet_operations rtnetlink_net_ops = {
++	.init = rtnetlink_net_init,
++	.exit = rtnetlink_net_exit,
++};
++
+ void __init rtnetlink_init(void)
+ {
+ 	int i;
+@@ -1296,10 +1352,9 @@
+ 	if (!rta_buf)
+ 		panic("rtnetlink_init: cannot allocate rta_buf\n");
+ 
+-	rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv,
+-				     &rtnl_mutex, THIS_MODULE);
+-	if (rtnl == NULL)
++	if (register_pernet_subsys(&rtnetlink_net_ops))
+ 		panic("rtnetlink_init: cannot initialize rtnetlink\n");
++
+ 	netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV);
+ 	register_netdevice_notifier(&rtnetlink_dev_notifier);
+ 
+diff -Nurb linux-2.6.22-try2/net/core/sock.c linux-2.6.22-try2-netns/net/core/sock.c
+--- linux-2.6.22-try2/net/core/sock.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/sock.c	2007-12-19 23:04:11.000000000 -0500
+@@ -123,6 +123,7 @@
+ #include <net/sock.h>
+ #include <net/xfrm.h>
+ #include <linux/ipsec.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/filter.h>
+ #include <linux/vs_socket.h>
+@@ -360,6 +361,7 @@
+ 		    char __user *optval, int optlen)
+ {
+ 	struct sock *sk=sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct sk_filter *filter;
+ 	int val;
+ 	int valbool;
+@@ -614,7 +616,7 @@
+ 			if (devname[0] == '\0') {
+ 				sk->sk_bound_dev_if = 0;
+ 			} else {
+-				struct net_device *dev = dev_get_by_name(devname);
++				struct net_device *dev = dev_get_by_name(net, devname);
+ 				if (!dev) {
+ 					ret = -ENODEV;
+ 					break;
+@@ -867,7 +869,7 @@
+  *	@prot: struct proto associated with this new sock instance
+  *	@zero_it: if we should zero the newly allocated sock
+  */
+-struct sock *sk_alloc(int family, gfp_t priority,
++struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+ 		      struct proto *prot, int zero_it)
+ {
+ 	struct sock *sk = NULL;
+@@ -888,6 +890,7 @@
+ 			 */
+ 			sk->sk_prot = sk->sk_prot_creator = prot;
+ 			sock_lock_init(sk);
++			sk->sk_net = get_net(net);
+ 		}
+ 		sock_vx_init(sk);
+ 		sock_nx_init(sk);
+@@ -929,6 +932,7 @@
+ 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
+ 
+ 	security_sk_free(sk);
++ 	put_net(sk->sk_net);
+ 	vx_sock_dec(sk);
+ 	clr_vx_info(&sk->sk_vx_info);
+ 	sk->sk_xid = -1;
+@@ -943,7 +947,7 @@
+ 
+ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
+ {
+-	struct sock *newsk = sk_alloc(sk->sk_family, priority, sk->sk_prot, 0);
++	struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
+ 
+ 	if (newsk != NULL) {
+ 		struct sk_filter *filter;
+@@ -2017,7 +2021,7 @@
+ static int __init proto_init(void)
+ {
+ 	/* register /proc/net/protocols */
+-	return proc_net_fops_create("protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
++	return proc_net_fops_create(&init_net, "protocols", S_IRUGO, &proto_seq_fops) == NULL ? -ENOBUFS : 0;
+ }
+ 
+ subsys_initcall(proto_init);
+diff -Nurb linux-2.6.22-try2/net/core/sysctl_net_core.c linux-2.6.22-try2-netns/net/core/sysctl_net_core.c
+--- linux-2.6.22-try2/net/core/sysctl_net_core.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/core/sysctl_net_core.c	2007-12-19 22:49:18.000000000 -0500
+@@ -9,25 +9,10 @@
+ #include <linux/sysctl.h>
+ #include <linux/module.h>
+ #include <linux/socket.h>
++#include <linux/netdevice.h>
++#include <net/xfrm.h>
+ #include <net/sock.h>
+ 
+-#ifdef CONFIG_SYSCTL
+-
+-extern int netdev_max_backlog;
+-extern int weight_p;
+-
+-extern __u32 sysctl_wmem_max;
+-extern __u32 sysctl_rmem_max;
+-
+-extern int sysctl_core_destroy_delay;
+-
+-#ifdef CONFIG_XFRM
+-extern u32 sysctl_xfrm_aevent_etime;
+-extern u32 sysctl_xfrm_aevent_rseqth;
+-extern int sysctl_xfrm_larval_drop;
+-extern u32 sysctl_xfrm_acq_expires;
+-#endif
+-
+ ctl_table core_table[] = {
+ #ifdef CONFIG_NET
+ 	{
+@@ -103,11 +88,32 @@
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+ 	},
++#endif /* CONFIG_NET */
++	{
++		.ctl_name	= NET_CORE_BUDGET,
++		.procname	= "netdev_budget",
++		.data		= &netdev_budget,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_CORE_WARNINGS,
++		.procname	= "warnings",
++		.data		= &net_msg_warn,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{ .ctl_name = 0 }
++};
++
++struct ctl_table multi_core_table[] = {
+ #ifdef CONFIG_XFRM
+ 	{
+ 		.ctl_name	= NET_CORE_AEVENT_ETIME,
+ 		.procname	= "xfrm_aevent_etime",
+-		.data		= &sysctl_xfrm_aevent_etime,
++		.data		= &init_net.sysctl_xfrm_aevent_etime,
+ 		.maxlen		= sizeof(u32),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+@@ -115,7 +121,7 @@
+ 	{
+ 		.ctl_name	= NET_CORE_AEVENT_RSEQTH,
+ 		.procname	= "xfrm_aevent_rseqth",
+-		.data		= &sysctl_xfrm_aevent_rseqth,
++		.data		= &init_net.sysctl_xfrm_aevent_rseqth,
+ 		.maxlen		= sizeof(u32),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+@@ -123,7 +129,7 @@
+ 	{
+ 		.ctl_name	= CTL_UNNUMBERED,
+ 		.procname	= "xfrm_larval_drop",
+-		.data		= &sysctl_xfrm_larval_drop,
++		.data		= &init_net.sysctl_xfrm_larval_drop,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+@@ -131,38 +137,19 @@
+ 	{
+ 		.ctl_name	= CTL_UNNUMBERED,
+ 		.procname	= "xfrm_acq_expires",
+-		.data		= &sysctl_xfrm_acq_expires,
++		.data		= &init_net.sysctl_xfrm_acq_expires,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ #endif /* CONFIG_XFRM */
+-#endif /* CONFIG_NET */
+ 	{
+ 		.ctl_name	= NET_CORE_SOMAXCONN,
+ 		.procname	= "somaxconn",
+-		.data		= &sysctl_somaxconn,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_CORE_BUDGET,
+-		.procname	= "netdev_budget",
+-		.data		= &netdev_budget,
++		.data		= &init_net.sysctl_somaxconn,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+ 	},
+-	{
+-		.ctl_name	= NET_CORE_WARNINGS,
+-		.procname	= "warnings",
+-		.data		= &net_msg_warn,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{ .ctl_name = 0 }
++	{}
+ };
+-
+-#endif
+diff -Nurb linux-2.6.22-try2/net/dccp/ipv4.c linux-2.6.22-try2-netns/net/dccp/ipv4.c
+--- linux-2.6.22-try2/net/dccp/ipv4.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/dccp/ipv4.c	2007-12-19 22:49:18.000000000 -0500
+@@ -202,6 +202,7 @@
+  */
+ static void dccp_v4_err(struct sk_buff *skb, u32 info)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	const struct iphdr *iph = (struct iphdr *)skb->data;
+ 	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
+ 							(iph->ihl << 2));
+@@ -213,13 +214,16 @@
+ 	__u64 seq;
+ 	int err;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (skb->len < (iph->ihl << 2) + 8) {
+ 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ 		return;
+ 	}
+ 
+ 	sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+-			 iph->saddr, dh->dccph_sport, inet_iif(skb));
++			 iph->saddr, dh->dccph_sport, inet_iif(skb), net);
+ 	if (sk == NULL) {
+ 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ 		return;
+@@ -441,7 +445,7 @@
+ 	nsk = inet_lookup_established(&dccp_hashinfo,
+ 				      iph->saddr, dh->dccph_sport,
+ 				      iph->daddr, dh->dccph_dport,
+-				      inet_iif(skb));
++				      inet_iif(skb), sk->sk_net);
+ 	if (nsk != NULL) {
+ 		if (nsk->sk_state != DCCP_TIME_WAIT) {
+ 			bh_lock_sock(nsk);
+@@ -458,7 +462,8 @@
+ 					   struct sk_buff *skb)
+ {
+ 	struct rtable *rt;
+-	struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
++	struct flowi fl = { .fl_net = &init_net,
++			    .oif = ((struct rtable *)skb->dst)->rt_iif,
+ 			    .nl_u = { .ip4_u =
+ 				      { .daddr = ip_hdr(skb)->saddr,
+ 					.saddr = ip_hdr(skb)->daddr,
+@@ -809,11 +814,16 @@
+ /* this is called when real data arrives */
+ static int dccp_v4_rcv(struct sk_buff *skb)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	const struct dccp_hdr *dh;
+ 	const struct iphdr *iph;
+ 	struct sock *sk;
+ 	int min_cov;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	/* Step 1: Check header basics */
+ 
+ 	if (dccp_invalid_packet(skb))
+@@ -852,7 +862,7 @@
+ 	 *	Look up flow ID in table and get corresponding socket */
+ 	sk = __inet_lookup(&dccp_hashinfo,
+ 			   iph->saddr, dh->dccph_sport,
+-			   iph->daddr, dh->dccph_dport, inet_iif(skb));
++			   iph->daddr, dh->dccph_dport, inet_iif(skb), net);
+ 	/*
+ 	 * Step 2:
+ 	 *	If no socket ...
+diff -Nurb linux-2.6.22-try2/net/dccp/ipv6.c linux-2.6.22-try2-netns/net/dccp/ipv6.c
+--- linux-2.6.22-try2/net/dccp/ipv6.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/dccp/ipv6.c	2007-12-19 22:49:18.000000000 -0500
+@@ -94,6 +94,7 @@
+ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ 			int type, int code, int offset, __be32 info)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct ipv6hdr *hdr = (struct ipv6hdr *)skb->data;
+ 	const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset);
+ 	struct ipv6_pinfo *np;
+@@ -102,7 +103,7 @@
+ 	__u64 seq;
+ 
+ 	sk = inet6_lookup(&dccp_hashinfo, &hdr->daddr, dh->dccph_dport,
+-			  &hdr->saddr, dh->dccph_sport, inet6_iif(skb));
++			  &hdr->saddr, dh->dccph_sport, inet6_iif(skb), net);
+ 
+ 	if (sk == NULL) {
+ 		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+@@ -142,6 +143,7 @@
+ 			   for now.
+ 			 */
+ 			memset(&fl, 0, sizeof(fl));
++			fl.fl_net = &init_net;
+ 			fl.proto = IPPROTO_DCCP;
+ 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+@@ -242,6 +244,7 @@
+ 	int err = -1;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net,
+ 	fl.proto = IPPROTO_DCCP;
+ 	ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ 	ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr);
+@@ -358,6 +361,7 @@
+ 						      &rxip6h->daddr);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	ipv6_addr_copy(&fl.fl6_dst, &rxip6h->saddr);
+ 	ipv6_addr_copy(&fl.fl6_src, &rxip6h->daddr);
+ 
+@@ -407,7 +411,7 @@
+ 	nsk = __inet6_lookup_established(&dccp_hashinfo,
+ 					 &iph->saddr, dh->dccph_sport,
+ 					 &iph->daddr, ntohs(dh->dccph_dport),
+-					 inet6_iif(skb));
++					 inet6_iif(skb), sk->sk_net);
+ 	if (nsk != NULL) {
+ 		if (nsk->sk_state != DCCP_TIME_WAIT) {
+ 			bh_lock_sock(nsk);
+@@ -584,6 +588,7 @@
+ 		struct flowi fl;
+ 
+ 		memset(&fl, 0, sizeof(fl));
++		fl.fl_net = &init_net;
+ 		fl.proto = IPPROTO_DCCP;
+ 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
+ 		if (opt != NULL && opt->srcrt != NULL) {
+@@ -819,6 +824,7 @@
+ {
+ 	const struct dccp_hdr *dh;
+ 	struct sk_buff *skb = *pskb;
++	struct net *net = skb->dev->nd_net;
+ 	struct sock *sk;
+ 	int min_cov;
+ 
+@@ -849,7 +855,7 @@
+ 	sk = __inet6_lookup(&dccp_hashinfo, &ipv6_hdr(skb)->saddr,
+ 			    dh->dccph_sport,
+ 			    &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport),
+-			    inet6_iif(skb));
++			    inet6_iif(skb), net);
+ 	/*
+ 	 * Step 2:
+ 	 *	If no socket ...
+@@ -937,6 +943,7 @@
+ 		return -EAFNOSUPPORT;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 
+ 	if (np->sndflow) {
+ 		fl.fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
+diff -Nurb linux-2.6.22-try2/net/dccp/probe.c linux-2.6.22-try2-netns/net/dccp/probe.c
+--- linux-2.6.22-try2/net/dccp/probe.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/dccp/probe.c	2007-12-19 22:49:18.000000000 -0500
+@@ -30,6 +30,7 @@
+ #include <linux/module.h>
+ #include <linux/kfifo.h>
+ #include <linux/vmalloc.h>
++#include <net/net_namespace.h>
+ 
+ #include "dccp.h"
+ #include "ccid.h"
+@@ -168,7 +169,7 @@
+ 	if (IS_ERR(dccpw.fifo))
+ 		return PTR_ERR(dccpw.fifo);
+ 
+-	if (!proc_net_fops_create(procname, S_IRUSR, &dccpprobe_fops))
++	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &dccpprobe_fops))
+ 		goto err0;
+ 
+ 	ret = register_jprobe(&dccp_send_probe);
+@@ -178,7 +179,7 @@
+ 	pr_info("DCCP watch registered (port=%d)\n", port);
+ 	return 0;
+ err1:
+-	proc_net_remove(procname);
++	proc_net_remove(&init_net, procname);
+ err0:
+ 	kfifo_free(dccpw.fifo);
+ 	return ret;
+@@ -188,7 +189,7 @@
+ static __exit void dccpprobe_exit(void)
+ {
+ 	kfifo_free(dccpw.fifo);
+-	proc_net_remove(procname);
++	proc_net_remove(&init_net, procname);
+ 	unregister_jprobe(&dccp_send_probe);
+ 
+ }
+diff -Nurb linux-2.6.22-try2/net/decnet/af_decnet.c linux-2.6.22-try2-netns/net/decnet/af_decnet.c
+--- linux-2.6.22-try2/net/decnet/af_decnet.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/af_decnet.c	2007-12-19 22:49:18.000000000 -0500
+@@ -131,6 +131,7 @@
+ #include <net/neighbour.h>
+ #include <net/dst.h>
+ #include <net/fib_rules.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_nsp.h>
+ #include <net/dn_dev.h>
+@@ -470,10 +471,10 @@
+ 	.obj_size		= sizeof(struct dn_sock),
+ };
+ 
+-static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
++static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp)
+ {
+ 	struct dn_scp *scp;
+-	struct sock *sk = sk_alloc(PF_DECnet, gfp, &dn_proto, 1);
++	struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, 1);
+ 
+ 	if  (!sk)
+ 		goto out;
+@@ -674,10 +675,13 @@
+ 
+ 
+ 
+-static int dn_create(struct socket *sock, int protocol)
++static int dn_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	switch(sock->type) {
+ 		case SOCK_SEQPACKET:
+ 			if (protocol != DNPROTO_NSP)
+@@ -690,7 +694,7 @@
+ 	}
+ 
+ 
+-	if ((sk = dn_alloc_sock(sock, GFP_KERNEL)) == NULL)
++	if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL)) == NULL)
+ 		return -ENOBUFS;
+ 
+ 	sk->sk_protocol = protocol;
+@@ -747,7 +751,7 @@
+ 		if (dn_ntohs(saddr->sdn_nodeaddrl)) {
+ 			read_lock(&dev_base_lock);
+ 			ldev = NULL;
+-			for_each_netdev(dev) {
++			for_each_netdev(&init_net, dev) {
+ 				if (!dev->dn_ptr)
+ 					continue;
+ 				if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
+@@ -943,6 +947,7 @@
+ 
+ 	err = -EHOSTUNREACH;
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.oif = sk->sk_bound_dev_if;
+ 	fl.fld_dst = dn_saddr2dn(&scp->peer);
+ 	fl.fld_src = dn_saddr2dn(&scp->addr);
+@@ -1090,7 +1095,7 @@
+ 
+ 	cb = DN_SKB_CB(skb);
+ 	sk->sk_ack_backlog--;
+-	newsk = dn_alloc_sock(newsock, sk->sk_allocation);
++	newsk = dn_alloc_sock(sk->sk_net, newsock, sk->sk_allocation);
+ 	if (newsk == NULL) {
+ 		release_sock(sk);
+ 		kfree_skb(skb);
+@@ -2085,6 +2090,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *)ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch(event) {
+ 		case NETDEV_UP:
+ 			dn_dev_up(dev);
+@@ -2399,7 +2407,7 @@
+ 	dev_add_pack(&dn_dix_packet_type);
+ 	register_netdevice_notifier(&dn_dev_notifier);
+ 
+-	proc_net_fops_create("decnet", S_IRUGO, &dn_socket_seq_fops);
++	proc_net_fops_create(&init_net, "decnet", S_IRUGO, &dn_socket_seq_fops);
+ 	dn_register_sysctl();
+ out:
+ 	return rc;
+@@ -2428,7 +2436,7 @@
+ 	dn_neigh_cleanup();
+ 	dn_fib_cleanup();
+ 
+-	proc_net_remove("decnet");
++	proc_net_remove(&init_net, "decnet");
+ 
+ 	proto_unregister(&dn_proto);
+ }
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_dev.c linux-2.6.22-try2-netns/net/decnet/dn_dev.c
+--- linux-2.6.22-try2/net/decnet/dn_dev.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_dev.c	2007-12-19 22:49:18.000000000 -0500
+@@ -47,6 +47,7 @@
+ #include <net/flow.h>
+ #include <net/fib_rules.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_dev.h>
+ #include <net/dn_route.h>
+@@ -513,7 +514,7 @@
+ 	ifr->ifr_name[IFNAMSIZ-1] = 0;
+ 
+ #ifdef CONFIG_KMOD
+-	dev_load(ifr->ifr_name);
++	dev_load(&init_net, ifr->ifr_name);
+ #endif
+ 
+ 	switch(cmd) {
+@@ -531,7 +532,7 @@
+ 
+ 	rtnl_lock();
+ 
+-	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL) {
++	if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
+ 		ret = -ENODEV;
+ 		goto done;
+ 	}
+@@ -629,7 +630,7 @@
+ {
+ 	struct net_device *dev;
+ 	struct dn_dev *dn_dev = NULL;
+-	dev = dev_get_by_index(ifindex);
++	dev = dev_get_by_index(&init_net, ifindex);
+ 	if (dev) {
+ 		dn_dev = dev->dn_ptr;
+ 		dev_put(dev);
+@@ -647,12 +648,16 @@
+ 
+ static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct dn_dev *dn_db;
+ 	struct ifaddrmsg *ifm;
+ 	struct dn_ifaddr *ifa, **ifap;
+ 	int err = -EADDRNOTAVAIL;
+ 
++	if (net != &init_net)
++		goto errout;
++
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ 	if (err < 0)
+ 		goto errout;
+@@ -679,6 +684,7 @@
+ 
+ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct net_device *dev;
+ 	struct dn_dev *dn_db;
+@@ -686,6 +692,9 @@
+ 	struct dn_ifaddr *ifa;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy);
+ 	if (err < 0)
+ 		return err;
+@@ -694,7 +703,7 @@
+ 		return -EINVAL;
+ 
+ 	ifm = nlmsg_data(nlh);
+-	if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL)
++	if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if ((dn_db = dev->dn_ptr) == NULL) {
+@@ -783,24 +792,28 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
++	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
+ }
+ 
+ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int idx, dn_idx = 0, skip_ndevs, skip_naddr;
+ 	struct net_device *dev;
+ 	struct dn_dev *dn_db;
+ 	struct dn_ifaddr *ifa;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	skip_ndevs = cb->args[0];
+ 	skip_naddr = cb->args[1];
+ 
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (idx < skip_ndevs)
+ 			goto cont;
+ 		else if (idx > skip_ndevs) {
+@@ -869,10 +882,10 @@
+ 		rv = dn_dev_get_first(dev, addr);
+ 		read_unlock(&dev_base_lock);
+ 		dev_put(dev);
+-		if (rv == 0 || dev == &loopback_dev)
++		if (rv == 0 || dev == &init_net.loopback_dev)
+ 			return rv;
+ 	}
+-	dev = &loopback_dev;
++	dev = &init_net.loopback_dev;
+ 	dev_hold(dev);
+ 	goto last_chance;
+ }
+@@ -1299,7 +1312,7 @@
+ 	struct net_device *dev;
+ 
+ 	rtnl_lock();
+-	for_each_netdev(dev)
++	for_each_netdev(&init_net, dev)
+ 		dn_dev_down(dev);
+ 	rtnl_unlock();
+ 
+@@ -1310,7 +1323,7 @@
+ 	struct net_device *dev;
+ 
+ 	rtnl_lock();
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (dev->flags & IFF_UP)
+ 			dn_dev_up(dev);
+ 	}
+@@ -1344,7 +1357,7 @@
+ 		return SEQ_START_TOKEN;
+ 
+ 	i = 1;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (!is_dn_dev(dev))
+ 			continue;
+ 
+@@ -1363,9 +1376,9 @@
+ 
+ 	dev = (struct net_device *)v;
+ 	if (v == SEQ_START_TOKEN)
+-		dev = net_device_entry(&dev_base_head);
++		dev = net_device_entry(&init_net.dev_base_head);
+ 
+-	for_each_netdev_continue(dev) {
++	for_each_netdev_continue(&init_net, dev) {
+ 		if (!is_dn_dev(dev))
+ 			continue;
+ 
+@@ -1465,7 +1478,7 @@
+ 	rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL);
+ 	rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr);
+ 
+-	proc_net_fops_create("decnet_dev", S_IRUGO, &dn_dev_seq_fops);
++	proc_net_fops_create(&init_net, "decnet_dev", S_IRUGO, &dn_dev_seq_fops);
+ 
+ #ifdef CONFIG_SYSCTL
+ 	{
+@@ -1486,7 +1499,7 @@
+ 	}
+ #endif /* CONFIG_SYSCTL */
+ 
+-	proc_net_remove("decnet_dev");
++	proc_net_remove(&init_net, "decnet_dev");
+ 
+ 	dn_dev_devices_off();
+ }
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_fib.c linux-2.6.22-try2-netns/net/decnet/dn_fib.c
+--- linux-2.6.22-try2/net/decnet/dn_fib.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_fib.c	2007-12-19 22:49:18.000000000 -0500
+@@ -203,8 +203,6 @@
+ 		struct flowi fl;
+ 		struct dn_fib_res res;
+ 
+-		memset(&fl, 0, sizeof(fl));
+-
+ 		if (nh->nh_flags&RTNH_F_ONLINK) {
+ 			struct net_device *dev;
+ 
+@@ -212,7 +210,7 @@
+ 				return -EINVAL;
+ 			if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
+ 				return -EINVAL;
+-			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
++			if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
+ 				return -ENODEV;
+ 			if (!(dev->flags&IFF_UP))
+ 				return -ENETDOWN;
+@@ -223,6 +221,7 @@
+ 		}
+ 
+ 		memset(&fl, 0, sizeof(fl));
++		fl.fl_net = &init_net;
+ 		fl.fld_dst = nh->nh_gw;
+ 		fl.oif = nh->nh_oif;
+ 		fl.fld_scope = r->rtm_scope + 1;
+@@ -255,7 +254,7 @@
+ 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ 			return -EINVAL;
+ 
+-		dev = __dev_get_by_index(nh->nh_oif);
++		dev = __dev_get_by_index(&init_net, nh->nh_oif);
+ 		if (dev == NULL || dev->dn_ptr == NULL)
+ 			return -ENODEV;
+ 		if (!(dev->flags&IFF_UP))
+@@ -355,7 +354,7 @@
+ 		if (nhs != 1 || nh->nh_gw)
+ 			goto err_inval;
+ 		nh->nh_scope = RT_SCOPE_NOWHERE;
+-		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
++		nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
+ 		err = -ENODEV;
+ 		if (nh->nh_dev == NULL)
+ 			goto failure;
+@@ -506,10 +505,14 @@
+ 
+ static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct dn_fib_table *tb;
+ 	struct rtattr **rta = arg;
+ 	struct rtmsg *r = NLMSG_DATA(nlh);
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	if (dn_fib_check_attr(r, rta))
+ 		return -EINVAL;
+ 
+@@ -522,10 +525,14 @@
+ 
+ static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct dn_fib_table *tb;
+ 	struct rtattr **rta = arg;
+ 	struct rtmsg *r = NLMSG_DATA(nlh);
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	if (dn_fib_check_attr(r, rta))
+ 		return -EINVAL;
+ 
+@@ -602,7 +609,7 @@
+ 
+ 	/* Scan device list */
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		dn_db = dev->dn_ptr;
+ 		if (dn_db == NULL)
+ 			continue;
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_neigh.c linux-2.6.22-try2-netns/net/decnet/dn_neigh.c
+--- linux-2.6.22-try2/net/decnet/dn_neigh.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_neigh.c	2007-12-19 22:49:18.000000000 -0500
+@@ -38,6 +38,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/jhash.h>
+ #include <asm/atomic.h>
++#include <net/net_namespace.h>
+ #include <net/neighbour.h>
+ #include <net/dst.h>
+ #include <net/flow.h>
+@@ -591,6 +592,7 @@
+ 
+ 	seq          = file->private_data;
+ 	seq->private = s;
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -598,12 +600,20 @@
+ 	goto out;
+ }
+ 
++static int dn_neigh_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct neigh_seq_state *state = seq->private;
++	put_net(state->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations dn_neigh_seq_fops = {
+ 	.owner		= THIS_MODULE,
+ 	.open		= dn_neigh_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++	.release	= dn_neigh_seq_release,
+ };
+ 
+ #endif
+@@ -611,11 +621,11 @@
+ void __init dn_neigh_init(void)
+ {
+ 	neigh_table_init(&dn_neigh_table);
+-	proc_net_fops_create("decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
++	proc_net_fops_create(&init_net, "decnet_neigh", S_IRUGO, &dn_neigh_seq_fops);
+ }
+ 
+ void __exit dn_neigh_cleanup(void)
+ {
+-	proc_net_remove("decnet_neigh");
++	proc_net_remove(&init_net, "decnet_neigh");
+ 	neigh_table_clear(&dn_neigh_table);
+ }
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_nsp_out.c linux-2.6.22-try2-netns/net/decnet/dn_nsp_out.c
+--- linux-2.6.22-try2/net/decnet/dn_nsp_out.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_nsp_out.c	2007-12-19 22:49:18.000000000 -0500
+@@ -91,6 +91,7 @@
+ 	}
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.oif = sk->sk_bound_dev_if;
+ 	fl.fld_src = dn_saddr2dn(&scp->addr);
+ 	fl.fld_dst = dn_saddr2dn(&scp->peer);
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_route.c linux-2.6.22-try2-netns/net/decnet/dn_route.c
+--- linux-2.6.22-try2/net/decnet/dn_route.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_route.c	2007-12-19 22:49:18.000000000 -0500
+@@ -82,6 +82,7 @@
+ #include <net/dst.h>
+ #include <net/flow.h>
+ #include <net/fib_rules.h>
++#include <net/net_namespace.h>
+ #include <net/dn.h>
+ #include <net/dn_dev.h>
+ #include <net/dn_nsp.h>
+@@ -583,6 +584,9 @@
+ 	struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr;
+ 	unsigned char padlen = 0;
+ 
++	if (dev->nd_net != &init_net)
++		goto dump_it;
++
+ 	if (dn == NULL)
+ 		goto dump_it;
+ 
+@@ -877,13 +881,14 @@
+ 
+ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard)
+ {
+-	struct flowi fl = { .nl_u = { .dn_u =
++	struct flowi fl = { .fl_net = &init_net,
++			    .nl_u = { .dn_u =
+ 				      { .daddr = oldflp->fld_dst,
+ 					.saddr = oldflp->fld_src,
+ 					.scope = RT_SCOPE_UNIVERSE,
+ 				     } },
+ 			    .mark = oldflp->mark,
+-			    .iif = loopback_dev.ifindex,
++			    .iif = init_net.loopback_dev.ifindex,
+ 			    .oif = oldflp->oif };
+ 	struct dn_route *rt = NULL;
+ 	struct net_device *dev_out = NULL, *dev;
+@@ -900,11 +905,11 @@
+ 		       "dn_route_output_slow: dst=%04x src=%04x mark=%d"
+ 		       " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst),
+ 		       dn_ntohs(oldflp->fld_src),
+-		       oldflp->mark, loopback_dev.ifindex, oldflp->oif);
++		       oldflp->mark, init_net.loopback_dev.ifindex, oldflp->oif);
+ 
+ 	/* If we have an output interface, verify its a DECnet device */
+ 	if (oldflp->oif) {
+-		dev_out = dev_get_by_index(oldflp->oif);
++		dev_out = dev_get_by_index(&init_net, oldflp->oif);
+ 		err = -ENODEV;
+ 		if (dev_out && dev_out->dn_ptr == NULL) {
+ 			dev_put(dev_out);
+@@ -925,7 +930,7 @@
+ 			goto out;
+ 		}
+ 		read_lock(&dev_base_lock);
+-		for_each_netdev(dev) {
++		for_each_netdev(&init_net, dev) {
+ 			if (!dev->dn_ptr)
+ 				continue;
+ 			if (!dn_dev_islocal(dev, oldflp->fld_src))
+@@ -953,7 +958,7 @@
+ 		err = -EADDRNOTAVAIL;
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &init_net.loopback_dev;
+ 		dev_hold(dev_out);
+ 		if (!fl.fld_dst) {
+ 			fl.fld_dst =
+@@ -962,7 +967,7 @@
+ 			if (!fl.fld_dst)
+ 				goto out;
+ 		}
+-		fl.oif = loopback_dev.ifindex;
++		fl.oif = init_net.loopback_dev.ifindex;
+ 		res.type = RTN_LOCAL;
+ 		goto make_route;
+ 	}
+@@ -995,7 +1000,7 @@
+ 		 * here
+ 		 */
+ 		if (!try_hard) {
+-			neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst);
++			neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
+ 			if (neigh) {
+ 				if ((oldflp->oif &&
+ 				    (neigh->dev->ifindex != oldflp->oif)) ||
+@@ -1008,7 +1013,7 @@
+ 					if (dev_out)
+ 						dev_put(dev_out);
+ 					if (dn_dev_islocal(neigh->dev, fl.fld_dst)) {
+-						dev_out = &loopback_dev;
++						dev_out = &init_net.loopback_dev;
+ 						res.type = RTN_LOCAL;
+ 					} else {
+ 						dev_out = neigh->dev;
+@@ -1029,7 +1034,7 @@
+ 		/* Possible improvement - check all devices for local addr */
+ 		if (dn_dev_islocal(dev_out, fl.fld_dst)) {
+ 			dev_put(dev_out);
+-			dev_out = &loopback_dev;
++			dev_out = &init_net.loopback_dev;
+ 			dev_hold(dev_out);
+ 			res.type = RTN_LOCAL;
+ 			goto select_source;
+@@ -1065,7 +1070,7 @@
+ 			fl.fld_src = fl.fld_dst;
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &init_net.loopback_dev;
+ 		dev_hold(dev_out);
+ 		fl.oif = dev_out->ifindex;
+ 		if (res.fi)
+@@ -1103,6 +1108,7 @@
+ 	atomic_set(&rt->u.dst.__refcnt, 1);
+ 	rt->u.dst.flags   = DST_HOST;
+ 
++	rt->fl.fl_net     = &init_net;
+ 	rt->fl.fld_src    = oldflp->fld_src;
+ 	rt->fl.fld_dst    = oldflp->fld_dst;
+ 	rt->fl.oif        = oldflp->oif;
+@@ -1226,7 +1232,8 @@
+ 	int flags = 0;
+ 	__le16 gateway = 0;
+ 	__le16 local_src = 0;
+-	struct flowi fl = { .nl_u = { .dn_u =
++	struct flowi fl = { .fl_net = &init_net,
++			    .nl_u = { .dn_u = 
+ 				     { .daddr = cb->dst,
+ 				       .saddr = cb->src,
+ 				       .scope = RT_SCOPE_UNIVERSE,
+@@ -1374,6 +1381,7 @@
+ 	rt->rt_dst_map    = fl.fld_dst;
+ 	rt->rt_src_map    = fl.fld_src;
+ 
++	rt->fl.fl_net     = &init_net;
+ 	rt->fl.fld_src    = cb->src;
+ 	rt->fl.fld_dst    = cb->dst;
+ 	rt->fl.oif        = 0;
+@@ -1526,6 +1534,7 @@
+  */
+ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = in_skb->sk->sk_net;
+ 	struct rtattr **rta = arg;
+ 	struct rtmsg *rtm = NLMSG_DATA(nlh);
+ 	struct dn_route *rt = NULL;
+@@ -1534,7 +1543,11 @@
+ 	struct sk_buff *skb;
+ 	struct flowi fl;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = DNPROTO_NSP;
+ 
+ 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+@@ -1552,7 +1565,7 @@
+ 
+ 	if (fl.iif) {
+ 		struct net_device *dev;
+-		if ((dev = dev_get_by_index(fl.iif)) == NULL) {
++		if ((dev = dev_get_by_index(&init_net, fl.iif)) == NULL) {
+ 			kfree_skb(skb);
+ 			return -ENODEV;
+ 		}
+@@ -1598,7 +1611,7 @@
+ 		goto out_free;
+ 	}
+ 
+-	return rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++	return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ 
+ out_free:
+ 	kfree_skb(skb);
+@@ -1611,10 +1624,14 @@
+  */
+ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct dn_route *rt;
+ 	int h, s_h;
+ 	int idx, s_idx;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg))
+ 		return -EINVAL;
+ 	if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED))
+@@ -1814,7 +1831,7 @@
+ 
+ 	dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
+ 
+-	proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
++	proc_net_fops_create(&init_net, "decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops);
+ 
+ #ifdef CONFIG_DECNET_ROUTER
+ 	rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, dn_fib_dump);
+@@ -1829,6 +1846,6 @@
+ 	del_timer(&dn_route_timer);
+ 	dn_run_flush(0);
+ 
+-	proc_net_remove("decnet_cache");
++	proc_net_remove(&init_net, "decnet_cache");
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_rules.c linux-2.6.22-try2-netns/net/decnet/dn_rules.c
+--- linux-2.6.22-try2/net/decnet/dn_rules.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_rules.c	2007-12-19 22:49:18.000000000 -0500
+@@ -186,7 +186,10 @@
+ 
+ unsigned dnet_addr_type(__le16 addr)
+ {
+-	struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } };
++	struct flowi fl = { 
++		.fl_net = &init_net,
++		.nl_u = { .dn_u = { .daddr = addr } } 
++	};
+ 	struct dn_fib_res res;
+ 	unsigned ret = RTN_UNICAST;
+ 	struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
+@@ -223,7 +226,7 @@
+ 	return -ENOBUFS;
+ }
+ 
+-static u32 dn_fib_rule_default_pref(void)
++static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
+ {
+ 	struct list_head *pos;
+ 	struct fib_rule *rule;
+@@ -240,7 +243,7 @@
+ 	return 0;
+ }
+ 
+-static void dn_fib_rule_flush_cache(void)
++static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
+ {
+ 	dn_rt_cache_flush(-1);
+ }
+@@ -265,12 +268,12 @@
+ void __init dn_fib_rules_init(void)
+ {
+ 	list_add_tail(&default_rule.common.list, &dn_fib_rules);
+-	fib_rules_register(&dn_fib_rules_ops);
++	fib_rules_register(&init_net, &dn_fib_rules_ops);
+ }
+ 
+ void __exit dn_fib_rules_cleanup(void)
+ {
+-	fib_rules_unregister(&dn_fib_rules_ops);
++	fib_rules_unregister(&init_net, &dn_fib_rules_ops);
+ }
+ 
+ 
+diff -Nurb linux-2.6.22-try2/net/decnet/dn_table.c linux-2.6.22-try2-netns/net/decnet/dn_table.c
+--- linux-2.6.22-try2/net/decnet/dn_table.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/dn_table.c	2007-12-19 22:49:18.000000000 -0500
+@@ -375,10 +375,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
++	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
+ }
+ 
+ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
+@@ -463,12 +463,16 @@
+ 
+ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	unsigned int h, s_h;
+ 	unsigned int e = 0, s_e;
+ 	struct dn_fib_table *tb;
+ 	struct hlist_node *node;
+ 	int dumped = 0;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) &&
+ 		((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)
+ 			return dn_cache_dump(skb, cb);
+diff -Nurb linux-2.6.22-try2/net/decnet/netfilter/dn_rtmsg.c linux-2.6.22-try2-netns/net/decnet/netfilter/dn_rtmsg.c
+--- linux-2.6.22-try2/net/decnet/netfilter/dn_rtmsg.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/netfilter/dn_rtmsg.c	2007-12-19 22:49:18.000000000 -0500
+@@ -93,6 +93,10 @@
+ 			const struct net_device *out,
+ 			int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	dnrmg_send_peer(*pskb);
+ 	return NF_ACCEPT;
+ }
+@@ -137,7 +141,8 @@
+ {
+ 	int rv = 0;
+ 
+-	dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
++	dnrmg = netlink_kernel_create(&init_net,
++				      NETLINK_DNRTMSG, DNRNG_NLGRP_MAX,
+ 				      dnrmg_receive_user_sk, NULL, THIS_MODULE);
+ 	if (dnrmg == NULL) {
+ 		printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
+diff -Nurb linux-2.6.22-try2/net/decnet/sysctl_net_decnet.c linux-2.6.22-try2-netns/net/decnet/sysctl_net_decnet.c
+--- linux-2.6.22-try2/net/decnet/sysctl_net_decnet.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/decnet/sysctl_net_decnet.c	2007-12-19 22:49:18.000000000 -0500
+@@ -259,7 +259,7 @@
+ 
+ 		devname[newlen] = 0;
+ 
+-		dev = dev_get_by_name(devname);
++		dev = dev_get_by_name(&init_net, devname);
+ 		if (dev == NULL)
+ 			return -ENODEV;
+ 
+@@ -299,7 +299,7 @@
+ 		devname[*lenp] = 0;
+ 		strip_it(devname);
+ 
+-		dev = dev_get_by_name(devname);
++		dev = dev_get_by_name(&init_net, devname);
+ 		if (dev == NULL)
+ 			return -ENODEV;
+ 
+diff -Nurb linux-2.6.22-try2/net/econet/af_econet.c linux-2.6.22-try2-netns/net/econet/af_econet.c
+--- linux-2.6.22-try2/net/econet/af_econet.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/econet/af_econet.c	2007-12-19 22:49:18.000000000 -0500
+@@ -608,12 +608,15 @@
+  *	Create an Econet socket
+  */
+ 
+-static int econet_create(struct socket *sock, int protocol)
++static int econet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct econet_sock *eo;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	/* Econet only provides datagram services. */
+ 	if (sock->type != SOCK_DGRAM)
+ 		return -ESOCKTNOSUPPORT;
+@@ -621,7 +624,7 @@
+ 	sock->state = SS_UNCONNECTED;
+ 
+ 	err = -ENOBUFS;
+-	sk = sk_alloc(PF_ECONET, GFP_KERNEL, &econet_proto, 1);
++	sk = sk_alloc(net, PF_ECONET, GFP_KERNEL, &econet_proto, 1);
+ 	if (sk == NULL)
+ 		goto out;
+ 
+@@ -659,7 +662,7 @@
+ 	if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
+ 		return -EFAULT;
+ 
+-	if ((dev = dev_get_by_name(ifr.ifr_name)) == NULL)
++	if ((dev = dev_get_by_name(&init_net, ifr.ifr_name)) == NULL)
+ 		return -ENODEV;
+ 
+ 	sec = (struct sockaddr_ec *)&ifr.ifr_addr;
+@@ -1062,6 +1065,9 @@
+ 	struct sock *sk;
+ 	struct ec_device *edev = dev->ec_ptr;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	if (skb->pkt_type == PACKET_OTHERHOST)
+ 		goto drop;
+ 
+@@ -1116,6 +1122,9 @@
+ 	struct net_device *dev = (struct net_device *)data;
+ 	struct ec_device *edev;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (msg) {
+ 	case NETDEV_UNREGISTER:
+ 		/* A device has gone down - kill any data we hold for it. */
+diff -Nurb linux-2.6.22-try2/net/ieee80211/ieee80211_module.c linux-2.6.22-try2-netns/net/ieee80211/ieee80211_module.c
+--- linux-2.6.22-try2/net/ieee80211/ieee80211_module.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ieee80211/ieee80211_module.c	2007-12-19 22:49:18.000000000 -0500
+@@ -264,7 +264,7 @@
+ 	struct proc_dir_entry *e;
+ 
+ 	ieee80211_debug_level = debug;
+-	ieee80211_proc = proc_mkdir(DRV_NAME, proc_net);
++	ieee80211_proc = proc_mkdir(DRV_NAME, init_net.proc_net);
+ 	if (ieee80211_proc == NULL) {
+ 		IEEE80211_ERROR("Unable to create " DRV_NAME
+ 				" proc directory\n");
+@@ -273,7 +273,7 @@
+ 	e = create_proc_entry("debug_level", S_IFREG | S_IRUGO | S_IWUSR,
+ 			      ieee80211_proc);
+ 	if (!e) {
+-		remove_proc_entry(DRV_NAME, proc_net);
++		remove_proc_entry(DRV_NAME, init_net.proc_net);
+ 		ieee80211_proc = NULL;
+ 		return -EIO;
+ 	}
+@@ -293,7 +293,7 @@
+ #ifdef CONFIG_IEEE80211_DEBUG
+ 	if (ieee80211_proc) {
+ 		remove_proc_entry("debug_level", ieee80211_proc);
+-		remove_proc_entry(DRV_NAME, proc_net);
++		remove_proc_entry(DRV_NAME, init_net.proc_net);
+ 		ieee80211_proc = NULL;
+ 	}
+ #endif				/* CONFIG_IEEE80211_DEBUG */
+diff -Nurb linux-2.6.22-try2/net/ipv4/af_inet.c linux-2.6.22-try2-netns/net/ipv4/af_inet.c
+--- linux-2.6.22-try2/net/ipv4/af_inet.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/af_inet.c	2007-12-19 23:20:19.000000000 -0500
+@@ -244,7 +244,7 @@
+  *	Create an inet socket.
+  */
+ 
+-static int inet_create(struct socket *sock, int protocol)
++static int inet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct list_head *p;
+@@ -310,6 +310,10 @@
+ 			goto out_rcu_unlock;
+ 	}
+ 
++	err = -EPROTONOSUPPORT;
++	if (!(answer->flags & INET_PROTOSW_NETNS) && (net != &init_net))
++		goto out_rcu_unlock;
++
+ 	err = -EPERM;
+ 	if ((protocol == IPPROTO_ICMP) &&
+ 		nx_capable(answer->capability, NXC_RAW_ICMP))
+@@ -326,7 +330,7 @@
+ 	BUG_TRAP(answer_prot->slab != NULL);
+ 
+ 	err = -ENOBUFS;
+-	sk = sk_alloc(PF_INET, GFP_KERNEL, answer_prot, 1);
++	sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, 1);
+ 	if (sk == NULL)
+ 		goto out;
+ 
+@@ -344,7 +348,7 @@
+ 			inet->hdrincl = 1;
+ 	}
+ 
+-	if (ipv4_config.no_pmtu_disc)
++	if (net->sysctl_ipv4_no_pmtu_disc)
+ 		inet->pmtudisc = IP_PMTUDISC_DONT;
+ 	else
+ 		inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -423,12 +427,12 @@
+ }
+ 
+ /* It is off by default, see below. */
+-int sysctl_ip_nonlocal_bind __read_mostly;
+ 
+ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ {
+ 	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct nx_v4_sock_addr nsa;
+ 	unsigned short snum;
+@@ -448,7 +452,7 @@
+ 	if (err)
+ 		goto out;
+ 
+-	chk_addr_ret = inet_addr_type(nsa.saddr);
++	chk_addr_ret = inet_addr_type(net, nsa.saddr);
+ 
+ 	/* Not specified by any standard per-se, however it breaks too
+ 	 * many applications when removed.  It is unfortunate since
+@@ -458,7 +462,7 @@
+ 	 *  is temporarily down)
+ 	 */
+ 	err = -EADDRNOTAVAIL;
+-	if (!sysctl_ip_nonlocal_bind &&
++	if (!net->sysctl_ip_nonlocal_bind &&
+ 	    !inet->freebind &&
+ 	    nsa.saddr != INADDR_ANY &&
+ 	    chk_addr_ret != RTN_LOCAL &&
+@@ -787,6 +791,7 @@
+ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	int err = 0;
+ 
+ 	switch (cmd) {
+@@ -799,12 +804,12 @@
+ 		case SIOCADDRT:
+ 		case SIOCDELRT:
+ 		case SIOCRTMSG:
+-			err = ip_rt_ioctl(cmd, (void __user *)arg);
++			err = ip_rt_ioctl(net, cmd, (void __user *)arg);
+ 			break;
+ 		case SIOCDARP:
+ 		case SIOCGARP:
+ 		case SIOCSARP:
+-			err = arp_ioctl(cmd, (void __user *)arg);
++			err = arp_ioctl(net, cmd, (void __user *)arg);
+ 			break;
+ 		case SIOCGIFADDR:
+ 		case SIOCSIFADDR:
+@@ -817,7 +822,7 @@
+ 		case SIOCSIFPFLAGS:
+ 		case SIOCGIFPFLAGS:
+ 		case SIOCSIFFLAGS:
+-			err = devinet_ioctl(cmd, (void __user *)arg);
++			err = devinet_ioctl(net, cmd, (void __user *)arg);
+ 			break;
+ 		default:
+ 			if (sk->sk_prot->ioctl)
+@@ -927,7 +932,8 @@
+ 		.capability = -1,
+ 		.no_check =   0,
+ 		.flags =      INET_PROTOSW_PERMANENT |
+-			      INET_PROTOSW_ICSK,
++			      INET_PROTOSW_ICSK |
++			      INET_PROTOSW_NETNS,
+ 	},
+ 
+ 	{
+@@ -937,7 +943,8 @@
+ 		.ops =        &inet_dgram_ops,
+ 		.capability = -1,
+ 		.no_check =   UDP_CSUM_DEFAULT,
+-		.flags =      INET_PROTOSW_PERMANENT,
++		.flags =      INET_PROTOSW_PERMANENT |
++			      INET_PROTOSW_NETNS,
+        },
+ 
+ 
+@@ -948,7 +955,8 @@
+ 	       .ops =        &inet_sockraw_ops,
+ 	       .capability = CAP_NET_RAW,
+ 	       .no_check =   UDP_CSUM_DEFAULT,
+-	       .flags =      INET_PROTOSW_REUSE,
++	       .flags =      INET_PROTOSW_REUSE |
++			     INET_PROTOSW_NETNS,
+        }
+ };
+ 
+@@ -1029,8 +1037,6 @@
+  *      Shall we try to damage output packets if routing dev changes?
+  */
+ 
+-int sysctl_ip_dynaddr __read_mostly;
+-
+ static int inet_sk_reselect_saddr(struct sock *sk)
+ {
+ 	struct inet_sock *inet = inet_sk(sk);
+@@ -1059,7 +1065,7 @@
+ 	if (new_saddr == old_saddr)
+ 		return 0;
+ 
+-	if (sysctl_ip_dynaddr > 1) {
++	if (sk->sk_net->sysctl_ip_dynaddr > 1) {
+ 		printk(KERN_INFO "%s(): shifting inet->"
+ 				 "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n",
+ 		       __FUNCTION__,
+@@ -1098,6 +1104,7 @@
+ 		daddr = inet->opt->faddr;
+ {
+ 	struct flowi fl = {
++		.fl_net = sk->sk_net,
+ 		.oif = sk->sk_bound_dev_if,
+ 		.nl_u = {
+ 			.ip4_u = {
+@@ -1127,7 +1134,7 @@
+ 		 * Other protocols have to map its equivalent state to TCP_SYN_SENT.
+ 		 * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
+ 		 */
+-		if (!sysctl_ip_dynaddr ||
++		if (!sk->sk_net->sysctl_ip_dynaddr ||
+ 		    sk->sk_state != TCP_SYN_SENT ||
+ 		    (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
+ 		    (err = inet_sk_reselect_saddr(sk)) != 0)
+@@ -1356,6 +1363,24 @@
+ 	.gso_segment = inet_gso_segment,
+ };
+ 
++
++static int inet_net_init(struct net *net)
++{
++	net->sysctl_ip_default_ttl = IPDEFTTL;
++	net->sysctl_ip_dynaddr = 0;
++
++	return 0;
++}
++
++static void inet_net_exit(struct net *net)
++{
++}
++
++static struct pernet_operations inet_net_ops = {
++	.init = inet_net_init,
++	.exit = inet_net_exit,
++};
++
+ static int __init inet_init(void)
+ {
+ 	struct sk_buff *dummy_skb;
+@@ -1377,6 +1402,10 @@
+ 	if (rc)
+ 		goto out_unregister_udp_proto;
+ 
++	rc = register_pernet_subsys(&inet_net_ops);
++	if (rc)
++		goto out_unregister_raw_proto;
++
+ 	/*
+ 	 *	Tell SOCKET that we are alive...
+ 	 */
+@@ -1453,6 +1482,8 @@
+ 	rc = 0;
+ out:
+ 	return rc;
++out_unregister_raw_proto:
++	proto_unregister(&raw_prot);
+ out_unregister_udp_proto:
+ 	proto_unregister(&udp_prot);
+ out_unregister_tcp_proto:
+@@ -1475,15 +1506,11 @@
+ 		goto out_tcp;
+ 	if (udp4_proc_init())
+ 		goto out_udp;
+-	if (fib_proc_init())
+-		goto out_fib;
+ 	if (ip_misc_proc_init())
+ 		goto out_misc;
+ out:
+ 	return rc;
+ out_misc:
+-	fib_proc_exit();
+-out_fib:
+ 	udp4_proc_exit();
+ out_udp:
+ 	tcp4_proc_exit();
+@@ -1519,4 +1546,3 @@
+ EXPORT_SYMBOL(inet_stream_ops);
+ EXPORT_SYMBOL(inet_unregister_protosw);
+ EXPORT_SYMBOL(net_statistics);
+-EXPORT_SYMBOL(sysctl_ip_nonlocal_bind);
+diff -Nurb linux-2.6.22-try2/net/ipv4/ah4.c linux-2.6.22-try2-netns/net/ipv4/ah4.c
+--- linux-2.6.22-try2/net/ipv4/ah4.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ah4.c	2007-12-19 22:49:18.000000000 -0500
+@@ -198,6 +198,9 @@
+ 	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
+ 	struct xfrm_state *x;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ 		return;
+diff -Nurb linux-2.6.22-try2/net/ipv4/arp.c linux-2.6.22-try2-netns/net/ipv4/arp.c
+--- linux-2.6.22-try2/net/ipv4/arp.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/arp.c	2007-12-19 22:49:18.000000000 -0500
+@@ -109,6 +109,7 @@
+ #include <net/protocol.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/arp.h>
+ #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+ #include <net/ax25.h>
+@@ -235,10 +236,11 @@
+ {
+ 	__be32 addr = *(__be32*)neigh->primary_key;
+ 	struct net_device *dev = neigh->dev;
++	struct net *net = dev->nd_net;
+ 	struct in_device *in_dev;
+ 	struct neigh_parms *parms;
+ 
+-	neigh->type = inet_addr_type(addr);
++	neigh->type = inet_addr_type(net, addr);
+ 
+ 	rcu_read_lock();
+ 	in_dev = __in_dev_get_rcu(dev);
+@@ -332,6 +334,7 @@
+ 	__be32 saddr = 0;
+ 	u8  *dst_ha = NULL;
+ 	struct net_device *dev = neigh->dev;
++	struct net *net = dev->nd_net;
+ 	__be32 target = *(__be32*)neigh->primary_key;
+ 	int probes = atomic_read(&neigh->probes);
+ 	struct in_device *in_dev = in_dev_get(dev);
+@@ -342,14 +345,14 @@
+ 	switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
+ 	default:
+ 	case 0:		/* By default announce any local IP */
+-		if (skb && inet_addr_type(ip_hdr(skb)->saddr) == RTN_LOCAL)
++		if (skb && inet_addr_type(net, ip_hdr(skb)->saddr) == RTN_LOCAL)
+ 			saddr = ip_hdr(skb)->saddr;
+ 		break;
+ 	case 1:		/* Restrict announcements of saddr in same subnet */
+ 		if (!skb)
+ 			break;
+ 		saddr = ip_hdr(skb)->saddr;
+-		if (inet_addr_type(saddr) == RTN_LOCAL) {
++		if (inet_addr_type(net, saddr) == RTN_LOCAL) {
+ 			/* saddr should be known to target */
+ 			if (inet_addr_onlink(in_dev, target, saddr))
+ 				break;
+@@ -386,6 +389,7 @@
+ static int arp_ignore(struct in_device *in_dev, struct net_device *dev,
+ 		      __be32 sip, __be32 tip)
+ {
++	struct net *net = dev->nd_net;
+ 	int scope;
+ 
+ 	switch (IN_DEV_ARP_IGNORE(in_dev)) {
+@@ -416,13 +420,15 @@
+ 	default:
+ 		return 0;
+ 	}
+-	return !inet_confirm_addr(dev, sip, tip, scope);
++	return !inet_confirm_addr(net, dev, sip, tip, scope);
+ }
+ 
+ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
+ {
+-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
+-						 .saddr = tip } } };
++	struct flowi fl = {
++		.fl_net = dev->nd_net,
++		.nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } }
++	};
+ 	struct rtable *rt;
+ 	int flag = 0;
+ 	/*unsigned long now; */
+@@ -469,6 +475,7 @@
+ int arp_find(unsigned char *haddr, struct sk_buff *skb)
+ {
+ 	struct net_device *dev = skb->dev;
++	struct net *net = dev->nd_net;
+ 	__be32 paddr;
+ 	struct neighbour *n;
+ 
+@@ -480,7 +487,7 @@
+ 
+ 	paddr = ((struct rtable*)skb->dst)->rt_gateway;
+ 
+-	if (arp_set_predefined(inet_addr_type(paddr), haddr, paddr, dev))
++	if (arp_set_predefined(inet_addr_type(net, paddr), haddr, paddr, dev))
+ 		return 0;
+ 
+ 	n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
+@@ -704,6 +711,7 @@
+ static int arp_process(struct sk_buff *skb)
+ {
+ 	struct net_device *dev = skb->dev;
++	struct net *net = dev->nd_net;
+ 	struct in_device *in_dev = in_dev_get(dev);
+ 	struct arphdr *arp;
+ 	unsigned char *arp_ptr;
+@@ -824,7 +832,7 @@
+ 	/* Special case: IPv4 duplicate address detection packet (RFC2131) */
+ 	if (sip == 0) {
+ 		if (arp->ar_op == htons(ARPOP_REQUEST) &&
+-		    inet_addr_type(tip) == RTN_LOCAL &&
++		    inet_addr_type(net, tip) == RTN_LOCAL &&
+ 		    !arp_ignore(in_dev,dev,sip,tip))
+ 			arp_send(ARPOP_REPLY,ETH_P_ARP,tip,dev,tip,sha,dev->dev_addr,dev->dev_addr);
+ 		goto out;
+@@ -854,7 +862,7 @@
+ 		} else if (IN_DEV_FORWARD(in_dev)) {
+ 			if ((rt->rt_flags&RTCF_DNAT) ||
+ 			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
+-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
++			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))) {
+ 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
+ 				if (n)
+ 					neigh_release(n);
+@@ -877,14 +885,14 @@
+ 
+ 	n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
+ 
+-	if (IPV4_DEVCONF_ALL(ARP_ACCEPT)) {
++	if (IPV4_DEVCONF_ALL(net, ARP_ACCEPT)) {
+ 		/* Unsolicited ARP is not accepted by default.
+ 		   It is possible, that this option should be enabled for some
+ 		   devices (strip is candidate)
+ 		 */
+ 		if (n == NULL &&
+ 		    arp->ar_op == htons(ARPOP_REPLY) &&
+-		    inet_addr_type(sip) == RTN_UNICAST)
++		    inet_addr_type(net, sip) == RTN_UNICAST)
+ 			n = __neigh_lookup(&arp_tbl, &sip, dev, -1);
+ 	}
+ 
+@@ -966,7 +974,7 @@
+  *	Set (create) an ARP cache entry.
+  */
+ 
+-static int arp_req_set(struct arpreq *r, struct net_device * dev)
++static int arp_req_set(struct net *net, struct arpreq *r, struct net_device * dev)
+ {
+ 	__be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr;
+ 	struct neighbour *neigh;
+@@ -977,17 +985,17 @@
+ 		if (mask && mask != htonl(0xFFFFFFFF))
+ 			return -EINVAL;
+ 		if (!dev && (r->arp_flags & ATF_COM)) {
+-			dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data);
++			dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, r->arp_ha.sa_data);
+ 			if (!dev)
+ 				return -ENODEV;
+ 		}
+ 		if (mask) {
+-			if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
++			if (pneigh_lookup(&arp_tbl, net, &ip, dev, 1) == NULL)
+ 				return -ENOBUFS;
+ 			return 0;
+ 		}
+ 		if (dev == NULL) {
+-			IPV4_DEVCONF_ALL(PROXY_ARP) = 1;
++			IPV4_DEVCONF_ALL(net, PROXY_ARP) = 1;
+ 			return 0;
+ 		}
+ 		if (__in_dev_get_rtnl(dev)) {
+@@ -1000,8 +1008,10 @@
+ 	if (r->arp_flags & ATF_PERM)
+ 		r->arp_flags |= ATF_COM;
+ 	if (dev == NULL) {
+-		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+-							 .tos = RTO_ONLINK } } };
++		struct flowi fl = { 
++			.fl_net = net,
++			.nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } }
++		};
+ 		struct rtable * rt;
+ 		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ 			return err;
+@@ -1080,7 +1090,7 @@
+ 	return err;
+ }
+ 
+-static int arp_req_delete(struct arpreq *r, struct net_device * dev)
++static int arp_req_delete(struct net *net, struct arpreq *r, struct net_device * dev)
+ {
+ 	int err;
+ 	__be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
+@@ -1090,10 +1100,10 @@
+ 		__be32 mask =
+ 		       ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+ 		if (mask == htonl(0xFFFFFFFF))
+-			return pneigh_delete(&arp_tbl, &ip, dev);
++			return pneigh_delete(&arp_tbl, net, &ip, dev);
+ 		if (mask == 0) {
+ 			if (dev == NULL) {
+-				IPV4_DEVCONF_ALL(PROXY_ARP) = 0;
++				IPV4_DEVCONF_ALL(net, PROXY_ARP) = 0;
+ 				return 0;
+ 			}
+ 			if (__in_dev_get_rtnl(dev)) {
+@@ -1107,8 +1117,10 @@
+ 	}
+ 
+ 	if (dev == NULL) {
+-		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
+-							 .tos = RTO_ONLINK } } };
++		struct flowi fl = {
++			.fl_net = net,
++			.nl_u = { .ip4_u = { .daddr = ip, .tos = RTO_ONLINK } }
++		};
+ 		struct rtable * rt;
+ 		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ 			return err;
+@@ -1133,7 +1145,7 @@
+  *	Handle an ARP layer I/O control request.
+  */
+ 
+-int arp_ioctl(unsigned int cmd, void __user *arg)
++int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ 	int err;
+ 	struct arpreq r;
+@@ -1165,7 +1177,7 @@
+ 	rtnl_lock();
+ 	if (r.arp_dev[0]) {
+ 		err = -ENODEV;
+-		if ((dev = __dev_get_by_name(r.arp_dev)) == NULL)
++		if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL)
+ 			goto out;
+ 
+ 		/* Mmmm... It is wrong... ARPHRD_NETROM==0 */
+@@ -1181,10 +1193,10 @@
+ 
+ 	switch (cmd) {
+ 	case SIOCDARP:
+-		err = arp_req_delete(&r, dev);
++		err = arp_req_delete(net, &r, dev);
+ 		break;
+ 	case SIOCSARP:
+-		err = arp_req_set(&r, dev);
++		err = arp_req_set(net, &r, dev);
+ 		break;
+ 	case SIOCGARP:
+ 		err = arp_req_get(&r, dev);
+@@ -1201,6 +1213,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 	case NETDEV_CHANGEADDR:
+ 		neigh_changeaddr(&arp_tbl, dev);
+@@ -1227,6 +1242,54 @@
+ }
+ 
+ 
++static int arp_proc_init(struct net *net);
++static void arp_proc_exit(struct net *net);
++
++
++static int arp_net_init(struct net *net)
++{
++	int error;
++	if ((error = arp_proc_init(net)))
++		goto out_proc;
++
++	error = -ENOMEM;
++	net->arp_neigh_parms_default = neigh_parms_alloc_default(&arp_tbl, net);
++	if (!net->arp_neigh_parms_default)
++		goto out_parm;
++
++#ifdef CONFIG_SYSCTL
++	if ((error = neigh_sysctl_register(
++		     NULL, net->arp_neigh_parms_default,
++		     NET_IPV4, NET_IPV4_NEIGH, "ipv4", NULL, NULL)))
++		goto out_sysctl;
++#endif
++
++out:
++	return error;
++
++#ifdef CONFIG_SYSCTL
++out_sysctl:
++	neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default);
++#endif
++out_parm:
++	arp_proc_exit(net);
++out_proc:
++	goto out;
++}
++
++static void arp_net_exit(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++	neigh_sysctl_unregister(net->arp_neigh_parms_default);
++#endif
++	neigh_parms_release(&arp_tbl, net->arp_neigh_parms_default);
++	arp_proc_exit(net);
++}
++
++static struct pernet_operations arp_net_ops = {
++	.init = arp_net_init,
++	.exit = arp_net_exit,
++};
+ /*
+  *	Called once on startup.
+  */
+@@ -1236,18 +1299,12 @@
+ 	.func =	arp_rcv,
+ };
+ 
+-static int arp_proc_init(void);
+-
+ void __init arp_init(void)
+ {
+ 	neigh_table_init(&arp_tbl);
+ 
+ 	dev_add_pack(&arp_packet_type);
+-	arp_proc_init();
+-#ifdef CONFIG_SYSCTL
+-	neigh_sysctl_register(NULL, &arp_tbl.parms, NET_IPV4,
+-			      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+-#endif
++	register_pernet_subsys(&arp_net_ops);
+ 	register_netdevice_notifier(&arp_netdev_notifier);
+ }
+ 
+@@ -1383,6 +1440,8 @@
+ 
+ 	seq	     = file->private_data;
+ 	seq->private = s;
++	s->net = get_net(PROC_NET(inode));
++
+ out:
+ 	return rc;
+ out_kfree:
+@@ -1390,28 +1449,46 @@
+ 	goto out;
+ }
+ 
++static int arp_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct neigh_seq_state *state = seq->private;
++	put_net(state->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations arp_seq_fops = {
+ 	.owner		= THIS_MODULE,
+ 	.open           = arp_seq_open,
+ 	.read           = seq_read,
+ 	.llseek         = seq_lseek,
+-	.release	= seq_release_private,
++	.release	= arp_seq_release,
+ };
+ 
+-static int __init arp_proc_init(void)
++static int arp_proc_init(struct net *net)
+ {
+-	if (!proc_net_fops_create("arp", S_IRUGO, &arp_seq_fops))
++	if (!proc_net_fops_create(net, "arp", S_IRUGO, &arp_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
++static void arp_proc_exit(struct net *net)
++{
++	proc_net_remove(net, "arp");
++}
++
+ #else /* CONFIG_PROC_FS */
+ 
+-static int __init arp_proc_init(void)
++static int arp_proc_init(struct net *net)
+ {
+ 	return 0;
+ }
+ 
++static void arp_proc_exit(struct net *net)
++{
++	return;
++}
++
+ #endif /* CONFIG_PROC_FS */
+ 
+ EXPORT_SYMBOL(arp_broken_ops);
+diff -Nurb linux-2.6.22-try2/net/ipv4/devinet.c linux-2.6.22-try2-netns/net/ipv4/devinet.c
+--- linux-2.6.22-try2/net/ipv4/devinet.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/devinet.c	2007-12-19 22:49:18.000000000 -0500
+@@ -63,7 +63,7 @@
+ #include <net/ip_fib.h>
+ #include <net/rtnetlink.h>
+ 
+-struct ipv4_devconf ipv4_devconf = {
++static struct ipv4_devconf ipv4_devconf_template = {
+ 	.data = {
+ 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+@@ -72,7 +72,7 @@
+ 	},
+ };
+ 
+-static struct ipv4_devconf ipv4_devconf_dflt = {
++static struct ipv4_devconf ipv4_devconf_dflt_template = {
+ 	.data = {
+ 		[NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
+ 		[NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
+@@ -82,7 +82,7 @@
+ 	},
+ };
+ 
+-#define IPV4_DEVCONF_DFLT(attr) IPV4_DEVCONF(ipv4_devconf_dflt, attr)
++#define IPV4_DEVCONF_DFLT(net, attr) IPV4_DEVCONF(*((net)->ipv4_devconf_dflt), attr)
+ 
+ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
+ 	[IFA_LOCAL]     	= { .type = NLA_U32 },
+@@ -98,7 +98,7 @@
+ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
+ 			 int destroy);
+ #ifdef CONFIG_SYSCTL
+-static void devinet_sysctl_register(struct in_device *in_dev,
++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev,
+ 				    struct ipv4_devconf *p);
+ static void devinet_sysctl_unregister(struct ipv4_devconf *p);
+ #endif
+@@ -149,6 +149,7 @@
+ 
+ static struct in_device *inetdev_init(struct net_device *dev)
+ {
++	struct net *net = dev->nd_net;
+ 	struct in_device *in_dev;
+ 
+ 	ASSERT_RTNL();
+@@ -157,7 +158,7 @@
+ 	if (!in_dev)
+ 		goto out;
+ 	INIT_RCU_HEAD(&in_dev->rcu_head);
+-	memcpy(&in_dev->cnf, &ipv4_devconf_dflt, sizeof(in_dev->cnf));
++	memcpy(&in_dev->cnf, &net->ipv4_devconf_dflt, sizeof(in_dev->cnf));
+ 	in_dev->cnf.sysctl = NULL;
+ 	in_dev->dev = dev;
+ 	if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
+@@ -173,7 +174,7 @@
+ 	in_dev_hold(in_dev);
+ 
+ #ifdef CONFIG_SYSCTL
+-	devinet_sysctl_register(in_dev, &in_dev->cnf);
++	devinet_sysctl_register(net, in_dev, &in_dev->cnf);
+ #endif
+ 	ip_mc_init_dev(in_dev);
+ 	if (dev->flags & IFF_UP)
+@@ -203,8 +204,6 @@
+ 	ASSERT_RTNL();
+ 
+ 	dev = in_dev->dev;
+-	if (dev == &loopback_dev)
+-		return;
+ 
+ 	in_dev->dead = 1;
+ 
+@@ -415,12 +414,12 @@
+ 	return inet_insert_ifa(ifa);
+ }
+ 
+-struct in_device *inetdev_by_index(int ifindex)
++struct in_device *inetdev_by_index(struct net *net, int ifindex)
+ {
+ 	struct net_device *dev;
+ 	struct in_device *in_dev = NULL;
+ 	read_lock(&dev_base_lock);
+-	dev = __dev_get_by_index(ifindex);
++	dev = __dev_get_by_index(net, ifindex);
+ 	if (dev)
+ 		in_dev = in_dev_get(dev);
+ 	read_unlock(&dev_base_lock);
+@@ -444,6 +443,7 @@
+ 
+ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct in_device *in_dev;
+ 	struct ifaddrmsg *ifm;
+@@ -457,7 +457,7 @@
+ 		goto errout;
+ 
+ 	ifm = nlmsg_data(nlh);
+-	in_dev = inetdev_by_index(ifm->ifa_index);
++	in_dev = inetdev_by_index(net, ifm->ifa_index);
+ 	if (in_dev == NULL) {
+ 		err = -ENODEV;
+ 		goto errout;
+@@ -488,7 +488,7 @@
+ 	return err;
+ }
+ 
+-static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh)
++static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
+ {
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct in_ifaddr *ifa;
+@@ -507,7 +507,7 @@
+ 		goto errout;
+ 	}
+ 
+-	dev = __dev_get_by_index(ifm->ifa_index);
++	dev = __dev_get_by_index(net, ifm->ifa_index);
+ 	if (dev == NULL) {
+ 		err = -ENODEV;
+ 		goto errout;
+@@ -564,11 +564,12 @@
+ 
+ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct in_ifaddr *ifa;
+ 
+ 	ASSERT_RTNL();
+ 
+-	ifa = rtm_to_ifaddr(nlh);
++	ifa = rtm_to_ifaddr(net, nlh);
+ 	if (IS_ERR(ifa))
+ 		return PTR_ERR(ifa);
+ 
+@@ -600,7 +601,7 @@
+ }
+ 
+ 
+-int devinet_ioctl(unsigned int cmd, void __user *arg)
++int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ 	struct ifreq ifr;
+ 	struct sockaddr_in sin_orig;
+@@ -629,7 +630,7 @@
+ 		*colon = 0;
+ 
+ #ifdef CONFIG_KMOD
+-	dev_load(ifr.ifr_name);
++	dev_load(net, ifr.ifr_name);
+ #endif
+ 
+ 	switch (cmd) {
+@@ -670,7 +671,7 @@
+ 	rtnl_lock();
+ 
+ 	ret = -ENODEV;
+-	if ((dev = __dev_get_by_name(ifr.ifr_name)) == NULL)
++	if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
+ 		goto done;
+ 
+ 	if (colon)
+@@ -889,6 +890,7 @@
+ 
+ __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
+ {
++	struct net *net = dev->nd_net;
+ 	__be32 addr = 0;
+ 	struct in_device *in_dev;
+ 
+@@ -919,7 +921,7 @@
+ 	 */
+ 	read_lock(&dev_base_lock);
+ 	rcu_read_lock();
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
+ 			continue;
+ 
+@@ -982,7 +984,7 @@
+  * - local: address, 0=autoselect the local address
+  * - scope: maximum allowed scope value for the local address
+  */
+-__be32 inet_confirm_addr(const struct net_device *dev, __be32 dst, __be32 local, int scope)
++__be32 inet_confirm_addr(struct net *net, const struct net_device *dev, __be32 dst, __be32 local, int scope)
+ {
+ 	__be32 addr = 0;
+ 	struct in_device *in_dev;
+@@ -998,7 +1000,7 @@
+ 
+ 	read_lock(&dev_base_lock);
+ 	rcu_read_lock();
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if ((in_dev = __in_dev_get_rcu(dev))) {
+ 			addr = confirm_addr_indev(in_dev, dst, local, scope);
+ 			if (addr)
+@@ -1059,6 +1061,7 @@
+ 			 void *ptr)
+ {
+ 	struct net_device *dev = ptr;
++	struct net *net = dev->nd_net;
+ 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ 
+ 	ASSERT_RTNL();
+@@ -1066,7 +1069,7 @@
+ 	if (!in_dev) {
+ 		if (event == NETDEV_REGISTER) {
+ 			in_dev = inetdev_init(dev);
+-			if (dev == &loopback_dev) {
++			if (dev == &net->loopback_dev) {
+ 				if (!in_dev)
+ 					panic("devinet: "
+ 					      "Failed to create loopback\n");
+@@ -1085,7 +1088,7 @@
+ 	case NETDEV_UP:
+ 		if (dev->mtu < 68)
+ 			break;
+-		if (dev == &loopback_dev) {
++		if (dev == &net->loopback_dev) {
+ 			struct in_ifaddr *ifa;
+ 			if ((ifa = inet_alloc_ifa()) != NULL) {
+ 				ifa->ifa_local =
+@@ -1122,7 +1125,7 @@
+ 		neigh_sysctl_unregister(in_dev->arp_parms);
+ 		neigh_sysctl_register(dev, in_dev->arp_parms, NET_IPV4,
+ 				      NET_IPV4_NEIGH, "ipv4", NULL, NULL);
+-		devinet_sysctl_register(in_dev, &in_dev->cnf);
++		devinet_sysctl_register(net, in_dev, &in_dev->cnf);
+ #endif
+ 		break;
+ 	}
+@@ -1185,6 +1188,7 @@
+ 
+ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int idx, ip_idx;
+ 	struct net_device *dev;
+ 	struct in_device *in_dev;
+@@ -1194,7 +1198,7 @@
+ 
+ 	s_ip_idx = ip_idx = cb->args[1];
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		if (idx < s_idx)
+ 			goto cont;
+ 		if (idx > s_idx)
+@@ -1228,6 +1232,7 @@
+ 		      u32 pid)
+ {
+ 	struct sk_buff *skb;
++	struct net *net = ifa->ifa_dev->dev->nd_net;
+ 	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+ 	int err = -ENOBUFS;
+ 
+@@ -1242,25 +1247,25 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
++	err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err);
++		rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
+ }
+ 
+ #ifdef CONFIG_SYSCTL
+ 
+-static void devinet_copy_dflt_conf(int i)
++static void devinet_copy_dflt_conf(struct net *net, int i)
+ {
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		struct in_device *in_dev;
+ 		rcu_read_lock();
+ 		in_dev = __in_dev_get_rcu(dev);
+ 		if (in_dev && !test_bit(i, in_dev->cnf.state))
+-			in_dev->cnf.data[i] = ipv4_devconf_dflt.data[i];
++			in_dev->cnf.data[i] = net->ipv4_devconf_dflt->data[i];
+ 		rcu_read_unlock();
+ 	}
+ 	read_unlock(&dev_base_lock);
+@@ -1274,12 +1279,13 @@
+ 
+ 	if (write) {
+ 		struct ipv4_devconf *cnf = ctl->extra1;
++		struct net *net = ctl->extra2;
+ 		int i = (int *)ctl->data - cnf->data;
+ 
+ 		set_bit(i, cnf->state);
+ 
+-		if (cnf == &ipv4_devconf_dflt)
+-			devinet_copy_dflt_conf(i);
++		if (cnf == net->ipv4_devconf_dflt)
++			devinet_copy_dflt_conf(net, i);
+ 	}
+ 
+ 	return ret;
+@@ -1291,6 +1297,7 @@
+ {
+ 	struct ipv4_devconf *cnf;
+ 	int *valp = table->data;
++	struct net *net;
+ 	int new;
+ 	int i;
+ 
+@@ -1325,26 +1332,27 @@
+ 	*valp = new;
+ 
+ 	cnf = table->extra1;
++	net = table->extra2;
+ 	i = (int *)table->data - cnf->data;
+ 
+ 	set_bit(i, cnf->state);
+ 
+-	if (cnf == &ipv4_devconf_dflt)
+-		devinet_copy_dflt_conf(i);
++	if (cnf == net->ipv4_devconf_dflt)
++		devinet_copy_dflt_conf(net, i);
+ 
+ 	return 1;
+ }
+ 
+-void inet_forward_change(void)
++void inet_forward_change(struct net *net)
+ {
+ 	struct net_device *dev;
+-	int on = IPV4_DEVCONF_ALL(FORWARDING);
++	int on = IPV4_DEVCONF_ALL(net, FORWARDING);
+ 
+-	IPV4_DEVCONF_ALL(ACCEPT_REDIRECTS) = !on;
+-	IPV4_DEVCONF_DFLT(FORWARDING) = on;
++	IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
++	IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(net, dev) {
+ 		struct in_device *in_dev;
+ 		rcu_read_lock();
+ 		in_dev = __in_dev_get_rcu(dev);
+@@ -1364,11 +1372,12 @@
+ 	int *valp = ctl->data;
+ 	int val = *valp;
+ 	int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
++	struct net *net = ctl->extra2;
+ 
+ 	if (write && *valp != val) {
+-		if (valp == &IPV4_DEVCONF_ALL(FORWARDING))
+-			inet_forward_change();
+-		else if (valp != &IPV4_DEVCONF_DFLT(FORWARDING))
++		if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
++			inet_forward_change(net);
++		else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
+ 			rt_cache_flush(0);
+ 	}
+ 
+@@ -1407,13 +1416,14 @@
+ 	{ \
+ 		.ctl_name	= NET_IPV4_CONF_ ## attr, \
+ 		.procname	= name, \
+-		.data		= ipv4_devconf.data + \
++		.data		= ipv4_devconf_template.data + \
+ 				  NET_IPV4_CONF_ ## attr - 1, \
+ 		.maxlen		= sizeof(int), \
+ 		.mode		= mval, \
+ 		.proc_handler	= proc, \
+ 		.strategy	= sysctl, \
+-		.extra1		= &ipv4_devconf, \
++		.extra1		= &ipv4_devconf_template, \
++		.extra2		= &init_net, \
+ 	}
+ 
+ #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
+@@ -1503,25 +1513,29 @@
+ 	},
+ };
+ 
+-static void devinet_sysctl_register(struct in_device *in_dev,
++static void devinet_sysctl_register(struct net *net, struct in_device *in_dev,
+ 				    struct ipv4_devconf *p)
+ {
+ 	int i;
+ 	struct net_device *dev = in_dev ? in_dev->dev : NULL;
+-	struct devinet_sysctl_table *t = kmemdup(&devinet_sysctl, sizeof(*t),
+-						 GFP_KERNEL);
++	struct devinet_sysctl_table *t;
+ 	char *dev_name = NULL;
+ 
++	t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
+ 	if (!t)
+ 		return;
+ 	for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
+-		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
++		t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf_template;
+ 		t->devinet_vars[i].extra1 = p;
++		t->devinet_vars[i].extra2 = net;
+ 	}
+ 
+ 	if (dev) {
+ 		dev_name = dev->name;
+ 		t->devinet_dev[0].ctl_name = dev->ifindex;
++	} else if (p == net->ipv4_devconf) {
++		dev_name = "all";
++		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_ALL;
+ 	} else {
+ 		dev_name = "default";
+ 		t->devinet_dev[0].ctl_name = NET_PROTO_CONF_DEFAULT;
+@@ -1542,7 +1556,7 @@
+ 	t->devinet_proto_dir[0].child = t->devinet_conf_dir;
+ 	t->devinet_root_dir[0].child  = t->devinet_proto_dir;
+ 
+-	t->sysctl_header = register_sysctl_table(t->devinet_root_dir);
++	t->sysctl_header = register_net_sysctl_table(net, t->devinet_root_dir);
+ 	if (!t->sysctl_header)
+ 	    goto free_procname;
+ 
+@@ -1562,26 +1576,59 @@
+ 	if (p->sysctl) {
+ 		struct devinet_sysctl_table *t = p->sysctl;
+ 		p->sysctl = NULL;
+-		unregister_sysctl_table(t->sysctl_header);
++		unregister_net_sysctl_table(t->sysctl_header);
+ 		kfree(t->devinet_dev[0].procname);
+ 		kfree(t);
+ 	}
+ }
+ #endif
+ 
++static int devinet_net_init(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++	net->ipv4_devconf = kmemdup(&ipv4_devconf_template, 
++				    sizeof(ipv4_devconf_template), GFP_KERNEL);
++	if (!net->ipv4_devconf)
++		return -ENOMEM;
++	
++	net->ipv4_devconf_dflt = kmemdup(&ipv4_devconf_dflt_template, 
++					 sizeof(ipv4_devconf_template),
++					 GFP_KERNEL);
++	if (!net->ipv4_devconf_dflt) {
++		kfree(net->ipv4_devconf);
++		return -ENOMEM;
++	}
++
++	devinet_sysctl_register(net, NULL, net->ipv4_devconf);
++	devinet_sysctl_register(net, NULL, net->ipv4_devconf_dflt);
++
++	multi_ipv4_table[0].data = &IPV4_DEVCONF_ALL(net, FORWARDING);
++#endif
++	return 0;
++}
++
++static void devinet_net_exit(struct net *net)
++{
++#ifdef CONFIG_SYSCTL
++	devinet_sysctl_unregister(net->ipv4_devconf_dflt);
++	devinet_sysctl_unregister(net->ipv4_devconf);
++#endif
++}
++
++static struct pernet_operations devinet_net_ops = {
++	.init = devinet_net_init,
++	.exit = devinet_net_exit,
++};
++
+ void __init devinet_init(void)
+ {
++	register_pernet_subsys(&devinet_net_ops);
+ 	register_gifconf(PF_INET, inet_gifconf);
+ 	register_netdevice_notifier(&ip_netdev_notifier);
+ 
+ 	rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
+ 	rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
+ 	rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
+-#ifdef CONFIG_SYSCTL
+-	devinet_sysctl.sysctl_header =
+-		register_sysctl_table(devinet_sysctl.devinet_root_dir);
+-	devinet_sysctl_register(NULL, &ipv4_devconf_dflt);
+-#endif
+ }
+ 
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+diff -Nurb linux-2.6.22-try2/net/ipv4/esp4.c linux-2.6.22-try2-netns/net/ipv4/esp4.c
+--- linux-2.6.22-try2/net/ipv4/esp4.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/esp4.c	2007-12-19 22:49:18.000000000 -0500
+@@ -307,6 +307,9 @@
+ 	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
+ 	struct xfrm_state *x;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ 		return;
+diff -Nurb linux-2.6.22-try2/net/ipv4/fib_frontend.c linux-2.6.22-try2-netns/net/ipv4/fib_frontend.c
+--- linux-2.6.22-try2/net/ipv4/fib_frontend.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/fib_frontend.c	2007-12-19 22:49:18.000000000 -0500
+@@ -51,38 +51,34 @@
+ 
+ #ifndef CONFIG_IP_MULTIPLE_TABLES
+ 
+-struct fib_table *ip_fib_local_table;
+-struct fib_table *ip_fib_main_table;
+-
+ #define FIB_TABLE_HASHSZ 1
+-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+ 
+ #else
+ 
+ #define FIB_TABLE_HASHSZ 256
+-static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+ 
+-struct fib_table *fib_new_table(u32 id)
++struct fib_table *fib_new_table(struct net *net, u32 id)
+ {
+ 	struct fib_table *tb;
+ 	unsigned int h;
+ 
+ 	if (id == 0)
+ 		id = RT_TABLE_MAIN;
+-	tb = fib_get_table(id);
++	tb = fib_get_table(net, id);
+ 	if (tb)
+ 		return tb;
+ 	tb = fib_hash_init(id);
+ 	if (!tb)
+ 		return NULL;
+ 	h = id & (FIB_TABLE_HASHSZ - 1);
+-	hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]);
++	hlist_add_head_rcu(&tb->tb_hlist, &net->ip_fib_table_hash[h]);
+ 	return tb;
+ }
+ 
+-struct fib_table *fib_get_table(u32 id)
++struct fib_table *fib_get_table(struct net *net, u32 id)
+ {
+ 	struct fib_table *tb;
++	struct hlist_head *head;
+ 	struct hlist_node *node;
+ 	unsigned int h;
+ 
+@@ -90,7 +86,8 @@
+ 		id = RT_TABLE_MAIN;
+ 	h = id & (FIB_TABLE_HASHSZ - 1);
+ 	rcu_read_lock();
+-	hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) {
++	head = &net->ip_fib_table_hash[h];
++	hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
+ 		if (tb->tb_id == id) {
+ 			rcu_read_unlock();
+ 			return tb;
+@@ -99,9 +96,10 @@
+ 	rcu_read_unlock();
+ 	return NULL;
+ }
++
+ #endif /* CONFIG_IP_MULTIPLE_TABLES */
+ 
+-static void fib_flush(void)
++static void fib_flush(struct net *net)
+ {
+ 	int flushed = 0;
+ 	struct fib_table *tb;
+@@ -109,7 +107,8 @@
+ 	unsigned int h;
+ 
+ 	for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
+-		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist)
++		struct hlist_head *head = &net->ip_fib_table_hash[h];
++		hlist_for_each_entry(tb, node, head, tb_hlist)
+ 			flushed += tb->tb_flush(tb);
+ 	}
+ 
+@@ -121,18 +120,23 @@
+  *	Find the first device with a given source address.
+  */
+ 
+-struct net_device * ip_dev_find(__be32 addr)
++struct net_device * ip_dev_find(struct net *net, __be32 addr)
+ {
+-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
++	struct flowi fl = { 
++		.fl_net = net,
++		.nl_u = { .ip4_u = { .daddr = addr } }
++	};
+ 	struct fib_result res;
+ 	struct net_device *dev = NULL;
++	struct fib_table *local_table;
+ 
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ 	res.r = NULL;
+ #endif
+ 
+-	if (!ip_fib_local_table ||
+-	    ip_fib_local_table->tb_lookup(ip_fib_local_table, &fl, &res))
++	local_table = fib_get_table(net, RT_TABLE_LOCAL);
++	if (!local_table ||
++	    local_table->tb_lookup(local_table, &fl, &res))
+ 		return NULL;
+ 	if (res.type != RTN_LOCAL)
+ 		goto out;
+@@ -145,11 +149,15 @@
+ 	return dev;
+ }
+ 
+-unsigned inet_addr_type(__be32 addr)
++unsigned inet_addr_type(struct net *net, __be32 addr)
+ {
+-	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
++	struct flowi		fl = {
++		.fl_net = net,
++		.nl_u = { .ip4_u = { .daddr = addr } }
++	};
+ 	struct fib_result	res;
+ 	unsigned ret = RTN_BROADCAST;
++	struct fib_table *local_table;
+ 
+ 	if (ZERONET(addr) || BADCLASS(addr))
+ 		return RTN_BROADCAST;
+@@ -160,10 +168,10 @@
+ 	res.r = NULL;
+ #endif
+ 
+-	if (ip_fib_local_table) {
++	local_table = fib_get_table(net, RT_TABLE_LOCAL);
++	if (local_table) {
+ 		ret = RTN_UNICAST;
+-		if (!ip_fib_local_table->tb_lookup(ip_fib_local_table,
+-						   &fl, &res)) {
++		if (!local_table->tb_lookup(local_table, &fl, &res)) {
+ 			ret = res.type;
+ 			fib_res_put(&res);
+ 		}
+@@ -183,7 +191,8 @@
+ 			struct net_device *dev, __be32 *spec_dst, u32 *itag)
+ {
+ 	struct in_device *in_dev;
+-	struct flowi fl = { .nl_u = { .ip4_u =
++	struct flowi fl = { .fl_net = dev->nd_net,
++			    .nl_u = { .ip4_u =
+ 				      { .daddr = src,
+ 					.saddr = dst,
+ 					.tos = tos } },
+@@ -267,13 +276,16 @@
+ 	return len + nla_total_size(4);
+ }
+ 
+-static int rtentry_to_fib_config(int cmd, struct rtentry *rt,
++static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
+ 				 struct fib_config *cfg)
+ {
+ 	__be32 addr;
+ 	int plen;
+ 
+ 	memset(cfg, 0, sizeof(*cfg));
++	cfg->fc_nlinfo.pid = 0;
++	cfg->fc_nlinfo.nlh = NULL;
++	cfg->fc_nlinfo.net = net;
+ 
+ 	if (rt->rt_dst.sa_family != AF_INET)
+ 		return -EAFNOSUPPORT;
+@@ -334,7 +346,7 @@
+ 		colon = strchr(devname, ':');
+ 		if (colon)
+ 			*colon = 0;
+-		dev = __dev_get_by_name(devname);
++		dev = __dev_get_by_name(net, devname);
+ 		if (!dev)
+ 			return -ENODEV;
+ 		cfg->fc_oif = dev->ifindex;
+@@ -357,7 +369,7 @@
+ 	if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ 		cfg->fc_gw = addr;
+ 		if (rt->rt_flags & RTF_GATEWAY &&
+-		    inet_addr_type(addr) == RTN_UNICAST)
++		    inet_addr_type(net, addr) == RTN_UNICAST)
+ 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
+ 	}
+ 
+@@ -398,7 +410,7 @@
+  *	Handle IP routing ioctl calls. These are used to manipulate the routing tables
+  */
+ 
+-int ip_rt_ioctl(unsigned int cmd, void __user *arg)
++int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+ {
+ 	struct fib_config cfg;
+ 	struct rtentry rt;
+@@ -414,18 +426,18 @@
+ 			return -EFAULT;
+ 
+ 		rtnl_lock();
+-		err = rtentry_to_fib_config(cmd, &rt, &cfg);
++		err = rtentry_to_fib_config(net, cmd, &rt, &cfg);
+ 		if (err == 0) {
+ 			struct fib_table *tb;
+ 
+ 			if (cmd == SIOCDELRT) {
+-				tb = fib_get_table(cfg.fc_table);
++				tb = fib_get_table(net, cfg.fc_table);
+ 				if (tb)
+ 					err = tb->tb_delete(tb, &cfg);
+ 				else
+ 					err = -ESRCH;
+ 			} else {
+-				tb = fib_new_table(cfg.fc_table);
++				tb = fib_new_table(net, cfg.fc_table);
+ 				if (tb)
+ 					err = tb->tb_insert(tb, &cfg);
+ 				else
+@@ -480,6 +492,7 @@
+ 
+ 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ 	cfg->fc_nlinfo.nlh = nlh;
++	cfg->fc_nlinfo.net = skb->sk->sk_net;
+ 
+ 	if (cfg->fc_type > RTN_MAX) {
+ 		err = -EINVAL;
+@@ -527,6 +540,7 @@
+ 
+ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib_config cfg;
+ 	struct fib_table *tb;
+ 	int err;
+@@ -535,7 +549,7 @@
+ 	if (err < 0)
+ 		goto errout;
+ 
+-	tb = fib_get_table(cfg.fc_table);
++	tb = fib_get_table(net, cfg.fc_table);
+ 	if (tb == NULL) {
+ 		err = -ESRCH;
+ 		goto errout;
+@@ -548,6 +562,7 @@
+ 
+ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib_config cfg;
+ 	struct fib_table *tb;
+ 	int err;
+@@ -556,7 +571,7 @@
+ 	if (err < 0)
+ 		goto errout;
+ 
+-	tb = fib_new_table(cfg.fc_table);
++	tb = fib_new_table(net, cfg.fc_table);
+ 	if (tb == NULL) {
+ 		err = -ENOBUFS;
+ 		goto errout;
+@@ -569,6 +584,7 @@
+ 
+ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	unsigned int h, s_h;
+ 	unsigned int e = 0, s_e;
+ 	struct fib_table *tb;
+@@ -583,8 +599,9 @@
+ 	s_e = cb->args[1];
+ 
+ 	for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
++		struct hlist_head *head = &net->ip_fib_table_hash[h];
+ 		e = 0;
+-		hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) {
++		hlist_for_each_entry(tb, node, head, tb_hlist) {
+ 			if (e < s_e)
+ 				goto next;
+ 			if (dumped)
+@@ -613,6 +630,7 @@
+ 
+ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+ {
++	struct net *net = ifa->ifa_dev->dev->nd_net;
+ 	struct fib_table *tb;
+ 	struct fib_config cfg = {
+ 		.fc_protocol = RTPROT_KERNEL,
+@@ -622,12 +640,13 @@
+ 		.fc_prefsrc = ifa->ifa_local,
+ 		.fc_oif = ifa->ifa_dev->dev->ifindex,
+ 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
++		.fc_nlinfo.net = net,
+ 	};
+ 
+ 	if (type == RTN_UNICAST)
+-		tb = fib_new_table(RT_TABLE_MAIN);
++		tb = fib_new_table(net, RT_TABLE_MAIN);
+ 	else
+-		tb = fib_new_table(RT_TABLE_LOCAL);
++		tb = fib_new_table(net, RT_TABLE_LOCAL);
+ 
+ 	if (tb == NULL)
+ 		return;
+@@ -688,6 +707,7 @@
+ {
+ 	struct in_device *in_dev = ifa->ifa_dev;
+ 	struct net_device *dev = in_dev->dev;
++	struct net *net = dev->nd_net;
+ 	struct in_ifaddr *ifa1;
+ 	struct in_ifaddr *prim = ifa;
+ 	__be32 brd = ifa->ifa_address|~ifa->ifa_mask;
+@@ -736,15 +756,15 @@
+ 		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+ 
+ 		/* Check, that this local address finally disappeared. */
+-		if (inet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
++		if (inet_addr_type(net, ifa->ifa_local) != RTN_LOCAL) {
+ 			/* And the last, but not the least thing.
+ 			   We must flush stray FIB entries.
+ 
+ 			   First of all, we scan fib_info list searching
+ 			   for stray nexthop entries, then ignite fib_flush.
+ 			*/
+-			if (fib_sync_down(ifa->ifa_local, NULL, 0))
+-				fib_flush();
++			if (fib_sync_down(net, ifa->ifa_local, NULL, 0))
++				fib_flush(net);
+ 		}
+ 	}
+ #undef LOCAL_OK
+@@ -753,11 +773,12 @@
+ #undef BRD1_OK
+ }
+ 
+-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
++static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn, struct fib_table *tb )
+ {
+ 
+ 	struct fib_result       res;
+-	struct flowi            fl = { .mark = frn->fl_mark,
++	struct flowi            fl = { .fl_net = net,
++				       .mark = frn->fl_mark,
+ 				       .nl_u = { .ip4_u = { .daddr = frn->fl_addr,
+ 							    .tos = frn->fl_tos,
+ 							    .scope = frn->fl_scope } } };
+@@ -786,6 +807,7 @@
+ 
+ static void nl_fib_input(struct sock *sk, int len)
+ {
++	struct net *net = sk->sk_net;
+ 	struct sk_buff *skb = NULL;
+ 	struct nlmsghdr *nlh = NULL;
+ 	struct fib_result_nl *frn;
+@@ -804,9 +826,9 @@
+ 	}
+ 
+ 	frn = (struct fib_result_nl *) NLMSG_DATA(nlh);
+-	tb = fib_get_table(frn->tb_id_in);
++	tb = fib_get_table(net, frn->tb_id_in);
+ 
+-	nl_fib_lookup(frn, tb);
++	nl_fib_lookup(net, frn, tb);
+ 
+ 	pid = NETLINK_CB(skb).pid;       /* pid of sending process */
+ 	NETLINK_CB(skb).pid = 0;         /* from kernel */
+@@ -814,16 +836,36 @@
+ 	netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
+ }
+ 
+-static void nl_fib_lookup_init(void)
++static int nl_fib_lookup_init(struct net *net)
+ {
+-      netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, NULL,
+-      			    THIS_MODULE);
++	int error = -ENOMEM;
++	struct sock *sk;
++	sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, nl_fib_input,
++					NULL, THIS_MODULE);
++	if (sk) {
++		/* Don't hold an extra reference on the namespace */
++		put_net(sk->sk_net);
++		net->nlfl = sk;
++		error = 0;
++	}
++	return error;
++}
++
++static void nl_fib_lookup_exit(struct net *net)
++{
++	/* At the last minute lie and say this is a socket for the
++	 * initial network namespace.  So the socket will  be safe to
++	 * free. 
++	 */
++	net->nlfl->sk_net = get_net(&init_net);
++	sock_put(net->nlfl);
+ }
+ 
+ static void fib_disable_ip(struct net_device *dev, int force)
+ {
+-	if (fib_sync_down(0, dev, force))
+-		fib_flush();
++	struct net *net = dev->nd_net;
++	if (fib_sync_down(net, 0, dev, force))
++		fib_flush(net);
+ 	rt_cache_flush(0);
+ 	arp_ifdown(dev);
+ }
+@@ -860,6 +902,9 @@
+ 	struct net_device *dev = ptr;
+ 	struct in_device *in_dev = __in_dev_get_rtnl(dev);
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_UNREGISTER) {
+ 		fib_disable_ip(dev, 2);
+ 		return NOTIFY_DONE;
+@@ -889,6 +934,85 @@
+ 	return NOTIFY_DONE;
+ }
+ 
++static int ip_fib_net_init(struct net *net)
++{
++	unsigned int i;
++
++	net->ip_fib_table_hash = kzalloc(
++		sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL);
++	if (!net->ip_fib_table_hash)
++		return -ENOMEM;
++
++	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
++		INIT_HLIST_HEAD(&net->ip_fib_table_hash[i]);
++#ifndef CONFIG_IP_MULTIPLE_TABLES
++	net->ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
++	hlist_add_head_rcu(&net->ip_fib_local_table->tb_hlist,
++				&net->ip_fib_table_hash[0]);
++	net->ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN);
++	hlist_add_head_rcu(&net->ip_fib_main_table->tb_hlist,
++				&net->ip_fib_table_hash[0]);
++#else
++	fib4_rules_init(net);
++#endif
++	return 0;
++}
++
++static void ip_fib_net_exit(struct net *net)
++{
++	unsigned int i;
++
++#ifdef CONFIG_IP_MULTIPLE_TABLES
++	fib4_rules_exit(net);
++#endif
++
++	synchronize_rcu(); /* needed? */
++	for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
++		struct fib_table *tb;
++		struct hlist_head *head;
++		struct hlist_node *node, *tmp;
++
++		head = &net->ip_fib_table_hash[i];
++		hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) {
++			hlist_del(node);
++			fib_hash_exit(tb);
++		}
++	}
++	kfree(net->ip_fib_table_hash);
++}
++
++static int fib_net_init(struct net *net)
++{
++	int error;
++
++	error = 0;
++	if ((error = ip_fib_net_init(net)))
++		goto out;
++	if ((error = fib_info_init(net)))
++		goto out_info;
++	if ((error = nl_fib_lookup_init(net)))
++		goto out_nlfl;
++	if ((error = fib_proc_init(net)))
++		goto out_proc;
++out:
++	return error;
++out_proc:
++	nl_fib_lookup_exit(net);
++out_nlfl:
++	fib_info_exit(net);
++out_info:
++	ip_fib_net_exit(net);
++	goto out;
++}
++
++static void fib_net_exit(struct net *net)
++{
++	fib_proc_exit(net);
++	nl_fib_lookup_exit(net);
++	fib_info_exit(net);
++	ip_fib_net_exit(net);
++}
++
+ static struct notifier_block fib_inetaddr_notifier = {
+ 	.notifier_call =fib_inetaddr_event,
+ };
+@@ -897,28 +1021,20 @@
+ 	.notifier_call =fib_netdev_event,
+ };
+ 
++static struct pernet_operations fib_net_ops = {
++	.init = fib_net_init,
++	.exit = fib_net_exit,
++};
++
+ void __init ip_fib_init(void)
+ {
+-	unsigned int i;
+-
+-	for (i = 0; i < FIB_TABLE_HASHSZ; i++)
+-		INIT_HLIST_HEAD(&fib_table_hash[i]);
+-#ifndef CONFIG_IP_MULTIPLE_TABLES
+-	ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL);
+-	hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]);
+-	ip_fib_main_table  = fib_hash_init(RT_TABLE_MAIN);
+-	hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]);
+-#else
+-	fib4_rules_init();
+-#endif
+-
+-	register_netdevice_notifier(&fib_netdev_notifier);
+-	register_inetaddr_notifier(&fib_inetaddr_notifier);
+-	nl_fib_lookup_init();
+-
+ 	rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL);
+ 	rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL);
+ 	rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib);
++
++	register_pernet_subsys(&fib_net_ops);
++	register_netdevice_notifier(&fib_netdev_notifier);
++	register_inetaddr_notifier(&fib_inetaddr_notifier);
+ }
+ 
+ EXPORT_SYMBOL(inet_addr_type);
+diff -Nurb linux-2.6.22-try2/net/ipv4/fib_hash.c linux-2.6.22-try2-netns/net/ipv4/fib_hash.c
+--- linux-2.6.22-try2/net/ipv4/fib_hash.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/fib_hash.c	2007-12-19 22:49:18.000000000 -0500
+@@ -40,6 +40,7 @@
+ #include <net/route.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/ip_fib.h>
+ 
+ #include "fib_lookup.h"
+@@ -274,11 +275,10 @@
+ 	return err;
+ }
+ 
+-static int fn_hash_last_dflt=-1;
+-
+ static void
+ fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
++	struct net *net = flp->fl_net;
+ 	int order, last_idx;
+ 	struct hlist_node *node;
+ 	struct fib_node *f;
+@@ -316,12 +316,12 @@
+ 				if (next_fi != res->fi)
+ 					break;
+ 			} else if (!fib_detect_death(fi, order, &last_resort,
+-						     &last_idx, &fn_hash_last_dflt)) {
++						     &last_idx, &net->fn_hash_last_dflt)) {
+ 				if (res->fi)
+ 					fib_info_put(res->fi);
+ 				res->fi = fi;
+ 				atomic_inc(&fi->fib_clntref);
+-				fn_hash_last_dflt = order;
++				net->fn_hash_last_dflt = order;
+ 				goto out;
+ 			}
+ 			fi = next_fi;
+@@ -330,16 +330,16 @@
+ 	}
+ 
+ 	if (order <= 0 || fi == NULL) {
+-		fn_hash_last_dflt = -1;
++		net->fn_hash_last_dflt = -1;
+ 		goto out;
+ 	}
+ 
+-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
++	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->fn_hash_last_dflt)) {
+ 		if (res->fi)
+ 			fib_info_put(res->fi);
+ 		res->fi = fi;
+ 		atomic_inc(&fi->fib_clntref);
+-		fn_hash_last_dflt = order;
++		net->fn_hash_last_dflt = order;
+ 		goto out;
+ 	}
+ 
+@@ -350,7 +350,7 @@
+ 		if (last_resort)
+ 			atomic_inc(&last_resort->fib_clntref);
+ 	}
+-	fn_hash_last_dflt = last_idx;
++	net->fn_hash_last_dflt = last_idx;
+ out:
+ 	read_unlock(&fib_hash_lock);
+ }
+@@ -759,11 +759,15 @@
+ 	return skb->len;
+ }
+ 
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++void fib_hash_exit(struct fib_table *tb)
++{
++	if (!tb)
++		return;
++	fn_hash_flush(tb);
++	kfree(tb);
++}
++
+ struct fib_table * fib_hash_init(u32 id)
+-#else
+-struct fib_table * __init fib_hash_init(u32 id)
+-#endif
+ {
+ 	struct fib_table *tb;
+ 
+@@ -799,6 +803,7 @@
+ #ifdef CONFIG_PROC_FS
+ 
+ struct fib_iter_state {
++	struct net *net;
+ 	struct fn_zone	*zone;
+ 	int		bucket;
+ 	struct hlist_head *hash_head;
+@@ -812,7 +817,8 @@
+ static struct fib_alias *fib_get_first(struct seq_file *seq)
+ {
+ 	struct fib_iter_state *iter = seq->private;
+-	struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
++	struct fib_table *main_table = fib_get_table(iter->net, RT_TABLE_MAIN);
++	struct fn_hash *table = (struct fn_hash *) main_table->tb_data;
+ 
+ 	iter->bucket    = 0;
+ 	iter->hash_head = NULL;
+@@ -948,10 +954,11 @@
+ 
+ static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++	struct fib_iter_state *iter = seq->private;
+ 	void *v = NULL;
+ 
+ 	read_lock(&fib_hash_lock);
+-	if (ip_fib_main_table)
++	if (fib_get_table(iter->net, RT_TABLE_MAIN))
+ 		v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
+ 	return v;
+ }
+@@ -1051,6 +1058,7 @@
+ 
+ 	seq	     = file->private_data;
+ 	seq->private = s;
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -1058,23 +1066,32 @@
+ 	goto out;
+ }
+ 
++static int fib_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct fib_iter_state *iter = seq->private;
++	put_net(iter->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations fib_seq_fops = {
+ 	.owner		= THIS_MODULE,
+ 	.open           = fib_seq_open,
+ 	.read           = seq_read,
+ 	.llseek         = seq_lseek,
+-	.release	= seq_release_private,
++	.release	= fib_seq_release,
+ };
+ 
+-int __init fib_proc_init(void)
++int fib_proc_init(struct net *net)
+ {
+-	if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
++	net->fn_hash_last_dflt = -1;
++	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+-void __init fib_proc_exit(void)
++void fib_proc_exit(struct net *net)
+ {
+-	proc_net_remove("route");
++	proc_net_remove(net, "route");
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/ipv4/fib_rules.c linux-2.6.22-try2-netns/net/ipv4/fib_rules.c
+--- linux-2.6.22-try2/net/ipv4/fib_rules.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/fib_rules.c	2007-12-19 22:49:18.000000000 -0500
+@@ -32,8 +32,6 @@
+ #include <net/ip_fib.h>
+ #include <net/fib_rules.h>
+ 
+-static struct fib_rules_ops fib4_rules_ops;
+-
+ struct fib4_rule
+ {
+ 	struct fib_rule		common;
+@@ -49,35 +47,14 @@
+ #endif
+ };
+ 
+-static struct fib4_rule default_rule = {
+-	.common = {
+-		.refcnt =	ATOMIC_INIT(2),
+-		.pref =		0x7FFF,
+-		.table =	RT_TABLE_DEFAULT,
+-		.action =	FR_ACT_TO_TBL,
+-	},
++struct fib4_rule_table {
++	struct list_head	fib4_rules;
++	struct fib4_rule	default_rule;
++	struct fib4_rule	main_rule;
++	struct fib4_rule	local_rule;
++	struct fib_rules_ops	fib4_rules_ops;
+ };
+ 
+-static struct fib4_rule main_rule = {
+-	.common = {
+-		.refcnt =	ATOMIC_INIT(2),
+-		.pref =		0x7FFE,
+-		.table =	RT_TABLE_MAIN,
+-		.action =	FR_ACT_TO_TBL,
+-	},
+-};
+-
+-static struct fib4_rule local_rule = {
+-	.common = {
+-		.refcnt =	ATOMIC_INIT(2),
+-		.table =	RT_TABLE_LOCAL,
+-		.action =	FR_ACT_TO_TBL,
+-		.flags =	FIB_RULE_PERMANENT,
+-	},
+-};
+-
+-static LIST_HEAD(fib4_rules);
+-
+ #ifdef CONFIG_NET_CLS_ROUTE
+ u32 fib_rules_tclass(struct fib_result *res)
+ {
+@@ -87,12 +64,14 @@
+ 
+ int fib_lookup(struct flowi *flp, struct fib_result *res)
+ {
++	struct net *net = flp->fl_net;
++	struct fib4_rule_table *table = net->fib4_table;
+ 	struct fib_lookup_arg arg = {
+ 		.result = res,
+ 	};
+ 	int err;
+ 
+-	err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg);
++	err = fib_rules_lookup(&table->fib4_rules_ops, flp, 0, &arg);
+ 	res->r = arg.rule;
+ 
+ 	return err;
+@@ -122,7 +101,7 @@
+ 		goto errout;
+ 	}
+ 
+-	if ((tbl = fib_get_table(rule->table)) == NULL)
++	if ((tbl = fib_get_table(flp->fl_net, rule->table)) == NULL)
+ 		goto errout;
+ 
+ 	err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result);
+@@ -138,7 +117,7 @@
+ 	if (res->r && res->r->action == FR_ACT_TO_TBL &&
+ 	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+ 		struct fib_table *tb;
+-		if ((tb = fib_get_table(res->r->table)) != NULL)
++		if ((tb = fib_get_table(flp->fl_net, res->r->table)) != NULL)
+ 			tb->tb_select_default(tb, flp, res);
+ 	}
+ }
+@@ -159,13 +138,13 @@
+ 	return 1;
+ }
+ 
+-static struct fib_table *fib_empty_table(void)
++static struct fib_table *fib_empty_table(struct net *net)
+ {
+ 	u32 id;
+ 
+ 	for (id = 1; id <= RT_TABLE_MAX; id++)
+-		if (fib_get_table(id) == NULL)
+-			return fib_new_table(id);
++		if (fib_get_table(net, id) == NULL)
++			return fib_new_table(net, id);
+ 	return NULL;
+ }
+ 
+@@ -178,6 +157,7 @@
+ 			       struct nlmsghdr *nlh, struct fib_rule_hdr *frh,
+ 			       struct nlattr **tb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int err = -EINVAL;
+ 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
+ 
+@@ -188,7 +168,7 @@
+ 		if (rule->action == FR_ACT_TO_TBL) {
+ 			struct fib_table *table;
+ 
+-			table = fib_empty_table();
++			table = fib_empty_table(net);
+ 			if (table == NULL) {
+ 				err = -ENOBUFS;
+ 				goto errout;
+@@ -274,14 +254,15 @@
+ 	return -ENOBUFS;
+ }
+ 
+-static u32 fib4_rule_default_pref(void)
++static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
+ {
+-	struct list_head *pos;
++	struct list_head *list, *pos;
+ 	struct fib_rule *rule;
+ 
+-	if (!list_empty(&fib4_rules)) {
+-		pos = fib4_rules.next;
+-		if (pos->next != &fib4_rules) {
++	list = ops->rules_list;
++	if (!list_empty(list)) {
++		pos = list->next;
++		if (pos->next != list) {
+ 			rule = list_entry(pos->next, struct fib_rule, list);
+ 			if (rule->pref)
+ 				return rule->pref - 1;
+@@ -298,12 +279,37 @@
+ 	       + nla_total_size(4); /* flow */
+ }
+ 
+-static void fib4_rule_flush_cache(void)
++static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
+ {
+ 	rt_cache_flush(-1);
+ }
+ 
+-static struct fib_rules_ops fib4_rules_ops = {
++static struct fib4_rule_table fib4_rule_table = {
++	.default_rule = {
++		.common = {
++			.refcnt =	ATOMIC_INIT(2),
++			.pref =		0x7FFF,
++			.table =	RT_TABLE_DEFAULT,
++			.action =	FR_ACT_TO_TBL,
++		},
++	},
++	.main_rule = {
++		.common = {
++			.refcnt =	ATOMIC_INIT(2),
++			.pref =		0x7FFE,
++			.table =	RT_TABLE_MAIN,
++			.action =	FR_ACT_TO_TBL,
++		},
++	},
++	.local_rule = {
++		.common = {
++			.refcnt =	ATOMIC_INIT(2),
++			.table =	RT_TABLE_LOCAL,
++			.action =	FR_ACT_TO_TBL,
++			.flags =	FIB_RULE_PERMANENT,
++		},
++	},
++	.fib4_rules_ops = {
+ 	.family		= AF_INET,
+ 	.rule_size	= sizeof(struct fib4_rule),
+ 	.addr_size	= sizeof(u32),
+@@ -317,15 +323,34 @@
+ 	.flush_cache	= fib4_rule_flush_cache,
+ 	.nlgroup	= RTNLGRP_IPV4_RULE,
+ 	.policy		= fib4_rule_policy,
+-	.rules_list	= &fib4_rules,
++		.rules_list	= &fib4_rule_table.fib4_rules, /* &fib4_rules, */
+ 	.owner		= THIS_MODULE,
++	},
+ };
+ 
+-void __init fib4_rules_init(void)
++
++void fib4_rules_init(struct net *net)
+ {
+-	list_add_tail(&local_rule.common.list, &fib4_rules);
+-	list_add_tail(&main_rule.common.list, &fib4_rules);
+-	list_add_tail(&default_rule.common.list, &fib4_rules);
++	struct fib4_rule_table *table;
++	table = kmemdup(&fib4_rule_table, sizeof(*table), GFP_KERNEL);
++	if (!table)
++		return;
++	INIT_LIST_HEAD(&table->fib4_rules);
++	list_add_tail(&table->local_rule.common.list,   &table->fib4_rules);
++	list_add_tail(&table->main_rule.common.list,    &table->fib4_rules);
++	list_add_tail(&table->default_rule.common.list, &table->fib4_rules);
++	table->fib4_rules_ops.rules_list = &table->fib4_rules;
++	if (fib_rules_register(net, &table->fib4_rules_ops)) {
++		kfree(table);
++		return;
++	}
++	net->fib4_table = table;
++}
+ 
+-	fib_rules_register(&fib4_rules_ops);
++void fib4_rules_exit(struct net *net)
++{
++	struct fib4_rule_table *table = net->fib4_table;
++	if (table)
++		fib_rules_unregister(net, &table->fib4_rules_ops);
++	kfree(table);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/fib_semantics.c linux-2.6.22-try2-netns/net/ipv4/fib_semantics.c
+--- linux-2.6.22-try2/net/ipv4/fib_semantics.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/fib_semantics.c	2007-12-19 22:49:18.000000000 -0500
+@@ -50,14 +50,9 @@
+ #define FSprintk(a...)
+ 
+ static DEFINE_SPINLOCK(fib_info_lock);
+-static struct hlist_head *fib_info_hash;
+-static struct hlist_head *fib_info_laddrhash;
+-static unsigned int fib_hash_size;
+-static unsigned int fib_info_cnt;
+ 
+ #define DEVINDEX_HASHBITS 8
+ #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
+-static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+ 
+@@ -153,7 +148,8 @@
+ 			dev_put(nh->nh_dev);
+ 		nh->nh_dev = NULL;
+ 	} endfor_nexthops(fi);
+-	fib_info_cnt--;
++	fi->fib_net->fib_info_cnt--;
++	release_net(fi->fib_net);
+ 	kfree(fi);
+ }
+ 
+@@ -196,9 +192,9 @@
+ 	return 0;
+ }
+ 
+-static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
++static inline unsigned int fib_info_hashfn(struct net *net, const struct fib_info *fi)
+ {
+-	unsigned int mask = (fib_hash_size - 1);
++	unsigned int mask = net->fib_info_hash_size - 1;
+ 	unsigned int val = fi->fib_nhs;
+ 
+ 	val ^= fi->fib_protocol;
+@@ -208,15 +204,15 @@
+ 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
+ }
+ 
+-static struct fib_info *fib_find_info(const struct fib_info *nfi)
++static struct fib_info *fib_find_info(struct net *net, const struct fib_info *nfi)
+ {
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
+ 	struct fib_info *fi;
+ 	unsigned int hash;
+ 
+-	hash = fib_info_hashfn(nfi);
+-	head = &fib_info_hash[hash];
++	hash = fib_info_hashfn(net, nfi);
++	head = &net->fib_info_hash[hash];
+ 
+ 	hlist_for_each_entry(fi, node, head, fib_hash) {
+ 		if (fi->fib_nhs != nfi->fib_nhs)
+@@ -249,6 +245,7 @@
+ 
+ int ip_fib_check_default(__be32 gw, struct net_device *dev)
+ {
++	struct net *net = dev->nd_net;
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
+ 	struct fib_nh *nh;
+@@ -257,7 +254,7 @@
+ 	spin_lock(&fib_info_lock);
+ 
+ 	hash = fib_devindex_hashfn(dev->ifindex);
+-	head = &fib_info_devhash[hash];
++	head = &net->fib_info_devhash[hash];
+ 	hlist_for_each_entry(nh, node, head, nh_hash) {
+ 		if (nh->nh_dev == dev &&
+ 		    nh->nh_gw == gw &&
+@@ -320,11 +317,11 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
++	err = rtnl_notify(skb, info->net, info->pid, RTNLGRP_IPV4_ROUTE,
+ 			  info->nlh, GFP_KERNEL);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
++		rtnl_set_sk_err(info->net, RTNLGRP_IPV4_ROUTE, err);
+ }
+ 
+ /* Return the first fib alias matching TOS with
+@@ -517,6 +514,7 @@
+ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
+ 			struct fib_nh *nh)
+ {
++	struct net *net = cfg->fc_nlinfo.net;
+ 	int err;
+ 
+ 	if (nh->nh_gw) {
+@@ -531,9 +529,9 @@
+ 
+ 			if (cfg->fc_scope >= RT_SCOPE_LINK)
+ 				return -EINVAL;
+-			if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
++			if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
+ 				return -EINVAL;
+-			if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
++			if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
+ 				return -ENODEV;
+ 			if (!(dev->flags&IFF_UP))
+ 				return -ENETDOWN;
+@@ -544,6 +542,7 @@
+ 		}
+ 		{
+ 			struct flowi fl = {
++				.fl_net = net,
+ 				.nl_u = {
+ 					.ip4_u = {
+ 						.daddr = nh->nh_gw,
+@@ -580,7 +579,7 @@
+ 		if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
+ 			return -EINVAL;
+ 
+-		in_dev = inetdev_by_index(nh->nh_oif);
++		in_dev = inetdev_by_index(net, nh->nh_oif);
+ 		if (in_dev == NULL)
+ 			return -ENODEV;
+ 		if (!(in_dev->dev->flags&IFF_UP)) {
+@@ -595,9 +594,9 @@
+ 	return 0;
+ }
+ 
+-static inline unsigned int fib_laddr_hashfn(__be32 val)
++static inline unsigned int fib_laddr_hashfn(struct net *net, __be32 val)
+ {
+-	unsigned int mask = (fib_hash_size - 1);
++	unsigned int mask = net->fib_info_hash_size - 1;
+ 
+ 	return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
+ }
+@@ -622,21 +621,22 @@
+ 		free_pages((unsigned long) hash, get_order(bytes));
+ }
+ 
+-static void fib_hash_move(struct hlist_head *new_info_hash,
++static void fib_hash_move(struct net *net,
++			  struct hlist_head *new_info_hash,
+ 			  struct hlist_head *new_laddrhash,
+ 			  unsigned int new_size)
+ {
+ 	struct hlist_head *old_info_hash, *old_laddrhash;
+-	unsigned int old_size = fib_hash_size;
++	unsigned int old_size = net->fib_info_hash_size;
+ 	unsigned int i, bytes;
+ 
+ 	spin_lock_bh(&fib_info_lock);
+-	old_info_hash = fib_info_hash;
+-	old_laddrhash = fib_info_laddrhash;
+-	fib_hash_size = new_size;
++	old_info_hash = net->fib_info_hash;
++	old_laddrhash = net->fib_info_laddrhash;
++	net->fib_info_hash_size = new_size;
+ 
+ 	for (i = 0; i < old_size; i++) {
+-		struct hlist_head *head = &fib_info_hash[i];
++		struct hlist_head *head = &net->fib_info_hash[i];
+ 		struct hlist_node *node, *n;
+ 		struct fib_info *fi;
+ 
+@@ -646,15 +646,15 @@
+ 
+ 			hlist_del(&fi->fib_hash);
+ 
+-			new_hash = fib_info_hashfn(fi);
++			new_hash = fib_info_hashfn(net, fi);
+ 			dest = &new_info_hash[new_hash];
+ 			hlist_add_head(&fi->fib_hash, dest);
+ 		}
+ 	}
+-	fib_info_hash = new_info_hash;
++	net->fib_info_hash = new_info_hash;
+ 
+ 	for (i = 0; i < old_size; i++) {
+-		struct hlist_head *lhead = &fib_info_laddrhash[i];
++		struct hlist_head *lhead = &net->fib_info_laddrhash[i];
+ 		struct hlist_node *node, *n;
+ 		struct fib_info *fi;
+ 
+@@ -664,12 +664,12 @@
+ 
+ 			hlist_del(&fi->fib_lhash);
+ 
+-			new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
++			new_hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
+ 			ldest = &new_laddrhash[new_hash];
+ 			hlist_add_head(&fi->fib_lhash, ldest);
+ 		}
+ 	}
+-	fib_info_laddrhash = new_laddrhash;
++	net->fib_info_laddrhash = new_laddrhash;
+ 
+ 	spin_unlock_bh(&fib_info_lock);
+ 
+@@ -680,6 +680,7 @@
+ 
+ struct fib_info *fib_create_info(struct fib_config *cfg)
+ {
++	struct net *net = cfg->fc_nlinfo.net;
+ 	int err;
+ 	struct fib_info *fi = NULL;
+ 	struct fib_info *ofi;
+@@ -698,8 +699,8 @@
+ #endif
+ 
+ 	err = -ENOBUFS;
+-	if (fib_info_cnt >= fib_hash_size) {
+-		unsigned int new_size = fib_hash_size << 1;
++	if (net->fib_info_cnt >= net->fib_info_hash_size) {
++		unsigned int new_size = net->fib_info_hash_size << 1;
+ 		struct hlist_head *new_info_hash;
+ 		struct hlist_head *new_laddrhash;
+ 		unsigned int bytes;
+@@ -716,18 +717,19 @@
+ 			memset(new_info_hash, 0, bytes);
+ 			memset(new_laddrhash, 0, bytes);
+ 
+-			fib_hash_move(new_info_hash, new_laddrhash, new_size);
++			fib_hash_move(net, new_info_hash, new_laddrhash, new_size);
+ 		}
+ 
+-		if (!fib_hash_size)
++		if (!net->fib_info_hash_size)
+ 			goto failure;
+ 	}
+ 
+ 	fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
+ 	if (fi == NULL)
+ 		goto failure;
+-	fib_info_cnt++;
++	net->fib_info_cnt++;
+ 
++	fi->fib_net = hold_net(net);
+ 	fi->fib_protocol = cfg->fc_protocol;
+ 	fi->fib_flags = cfg->fc_flags;
+ 	fi->fib_priority = cfg->fc_priority;
+@@ -799,7 +801,7 @@
+ 		if (nhs != 1 || nh->nh_gw)
+ 			goto err_inval;
+ 		nh->nh_scope = RT_SCOPE_NOWHERE;
+-		nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
++		nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
+ 		err = -ENODEV;
+ 		if (nh->nh_dev == NULL)
+ 			goto failure;
+@@ -813,12 +815,12 @@
+ 	if (fi->fib_prefsrc) {
+ 		if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ 		    fi->fib_prefsrc != cfg->fc_dst)
+-			if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
++			if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
+ 				goto err_inval;
+ 	}
+ 
+ link_it:
+-	if ((ofi = fib_find_info(fi)) != NULL) {
++	if ((ofi = fib_find_info(net, fi)) != NULL) {
+ 		fi->fib_dead = 1;
+ 		free_fib_info(fi);
+ 		ofi->fib_treeref++;
+@@ -829,11 +831,13 @@
+ 	atomic_inc(&fi->fib_clntref);
+ 	spin_lock_bh(&fib_info_lock);
+ 	hlist_add_head(&fi->fib_hash,
+-		       &fib_info_hash[fib_info_hashfn(fi)]);
++		       &net->fib_info_hash[fib_info_hashfn(net, fi)]);
+ 	if (fi->fib_prefsrc) {
+ 		struct hlist_head *head;
++		unsigned int hash;
+ 
+-		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
++		hash = fib_laddr_hashfn(net, fi->fib_prefsrc);
++		head = &net->fib_info_laddrhash[hash];
+ 		hlist_add_head(&fi->fib_lhash, head);
+ 	}
+ 	change_nexthops(fi) {
+@@ -843,7 +847,7 @@
+ 		if (!nh->nh_dev)
+ 			continue;
+ 		hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
+-		head = &fib_info_devhash[hash];
++		head = &net->fib_info_devhash[hash];
+ 		hlist_add_head(&nh->nh_hash, head);
+ 	} endfor_nexthops(fi)
+ 	spin_unlock_bh(&fib_info_lock);
+@@ -1030,7 +1034,7 @@
+    - device went down -> we must shutdown all nexthops going via it.
+  */
+ 
+-int fib_sync_down(__be32 local, struct net_device *dev, int force)
++int fib_sync_down(struct net *net, __be32 local, struct net_device *dev, int force)
+ {
+ 	int ret = 0;
+ 	int scope = RT_SCOPE_NOWHERE;
+@@ -1038,9 +1042,9 @@
+ 	if (force)
+ 		scope = -1;
+ 
+-	if (local && fib_info_laddrhash) {
+-		unsigned int hash = fib_laddr_hashfn(local);
+-		struct hlist_head *head = &fib_info_laddrhash[hash];
++	if (local && net->fib_info_laddrhash) {
++		unsigned int hash = fib_laddr_hashfn(net, local);
++		struct hlist_head *head = &net->fib_info_laddrhash[hash];
+ 		struct hlist_node *node;
+ 		struct fib_info *fi;
+ 
+@@ -1055,7 +1059,7 @@
+ 	if (dev) {
+ 		struct fib_info *prev_fi = NULL;
+ 		unsigned int hash = fib_devindex_hashfn(dev->ifindex);
+-		struct hlist_head *head = &fib_info_devhash[hash];
++		struct hlist_head *head = &net->fib_info_devhash[hash];
+ 		struct hlist_node *node;
+ 		struct fib_nh *nh;
+ 
+@@ -1108,6 +1112,7 @@
+ 
+ int fib_sync_up(struct net_device *dev)
+ {
++	struct net *net = dev->nd_net;
+ 	struct fib_info *prev_fi;
+ 	unsigned int hash;
+ 	struct hlist_head *head;
+@@ -1120,7 +1125,7 @@
+ 
+ 	prev_fi = NULL;
+ 	hash = fib_devindex_hashfn(dev->ifindex);
+-	head = &fib_info_devhash[hash];
++	head = &net->fib_info_devhash[hash];
+ 	ret = 0;
+ 
+ 	hlist_for_each_entry(nh, node, head, nh_hash) {
+@@ -1210,3 +1215,17 @@
+ 	spin_unlock_bh(&fib_multipath_lock);
+ }
+ #endif
++
++int fib_info_init(struct net *net)
++{
++	net->fib_info_devhash = kzalloc(
++		sizeof(struct hlist_head)*DEVINDEX_HASHSIZE, GFP_KERNEL);
++	if (!net->fib_info_devhash)
++		return -ENOMEM;
++	return 0;
++}
++
++void fib_info_exit(struct net *net)
++{
++	kfree(net->fib_info_devhash);
++}
+diff -Nurb linux-2.6.22-try2/net/ipv4/fib_trie.c linux-2.6.22-try2-netns/net/ipv4/fib_trie.c
+--- linux-2.6.22-try2/net/ipv4/fib_trie.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/fib_trie.c	2007-12-19 22:49:18.000000000 -0500
+@@ -78,6 +78,7 @@
+ #include <net/route.h>
+ #include <net/tcp.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/ip_fib.h>
+ #include "fib_lookup.h"
+ 
+@@ -172,7 +173,6 @@
+ static void tnode_free(struct tnode *tn);
+ 
+ static struct kmem_cache *fn_alias_kmem __read_mostly;
+-static struct trie *trie_local = NULL, *trie_main = NULL;
+ 
+ 
+ /* rcu_read_lock needs to be hold by caller from readside */
+@@ -290,11 +290,10 @@
+ 	WARN_ON(tn && tn->pos+tn->bits > 32);
+ }
+ 
+-static int halve_threshold = 25;
+-static int inflate_threshold = 50;
+-static int halve_threshold_root = 8;
+-static int inflate_threshold_root = 15;
+-
++static const int halve_threshold = 25;
++static const int inflate_threshold = 50;
++static const int halve_threshold_root = 15;
++static const int inflate_threshold_root = 25;
+ 
+ static void __alias_free_mem(struct rcu_head *head)
+ {
+@@ -1771,11 +1770,10 @@
+ 	return found;
+ }
+ 
+-static int trie_last_dflt = -1;
+-
+ static void
+ fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
++	struct net *net = flp->fl_net;
+ 	struct trie *t = (struct trie *) tb->tb_data;
+ 	int order, last_idx;
+ 	struct fib_info *fi = NULL;
+@@ -1819,28 +1817,28 @@
+ 			if (next_fi != res->fi)
+ 				break;
+ 		} else if (!fib_detect_death(fi, order, &last_resort,
+-					     &last_idx, &trie_last_dflt)) {
++					     &last_idx, &net->trie_last_dflt)) {
+ 			if (res->fi)
+ 				fib_info_put(res->fi);
+ 			res->fi = fi;
+ 			atomic_inc(&fi->fib_clntref);
+-			trie_last_dflt = order;
++			net->trie_last_dflt = order;
+ 			goto out;
+ 		}
+ 		fi = next_fi;
+ 		order++;
+ 	}
+ 	if (order <= 0 || fi == NULL) {
+-		trie_last_dflt = -1;
++		net->trie_last_dflt = -1;
+ 		goto out;
+ 	}
+ 
+-	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
++	if (!fib_detect_death(fi, order, &last_resort, &last_idx, &net->trie_last_dflt)) {
+ 		if (res->fi)
+ 			fib_info_put(res->fi);
+ 		res->fi = fi;
+ 		atomic_inc(&fi->fib_clntref);
+-		trie_last_dflt = order;
++		net->trie_last_dflt = order;
+ 		goto out;
+ 	}
+ 	if (last_idx >= 0) {
+@@ -1850,7 +1848,7 @@
+ 		if (last_resort)
+ 			atomic_inc(&last_resort->fib_clntref);
+ 	}
+-	trie_last_dflt = last_idx;
++	net->trie_last_dflt = last_idx;
+  out:;
+ 	rcu_read_unlock();
+ }
+@@ -1957,11 +1955,15 @@
+ 
+ /* Fix more generic FIB names for init later */
+ 
+-#ifdef CONFIG_IP_MULTIPLE_TABLES
++void fib_hash_exit(struct fib_table *tb)
++{
++	if (!tb)
++		return;
++	fn_trie_flush(tb);
++	kfree(tb);
++}
++
+ struct fib_table * fib_hash_init(u32 id)
+-#else
+-struct fib_table * __init fib_hash_init(u32 id)
+-#endif
+ {
+ 	struct fib_table *tb;
+ 	struct trie *t;
+@@ -1991,11 +1993,6 @@
+ 	trie_init(t);
+ 
+ 	if (id == RT_TABLE_LOCAL)
+-		trie_local = t;
+-	else if (id == RT_TABLE_MAIN)
+-		trie_main = t;
+-
+-	if (id == RT_TABLE_LOCAL)
+ 		printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
+ 
+ 	return tb;
+@@ -2004,6 +2001,8 @@
+ #ifdef CONFIG_PROC_FS
+ /* Depth first Trie walk iterator */
+ struct fib_trie_iter {
++	struct net *net;
++	struct trie *trie_local, *trie_main;
+ 	struct tnode *tnode;
+ 	struct trie *trie;
+ 	unsigned index;
+@@ -2170,7 +2169,21 @@
+ 
+ static int fib_triestat_seq_show(struct seq_file *seq, void *v)
+ {
++	struct net *net = seq->private;
++	struct trie *trie_local, *trie_main;
+ 	struct trie_stat *stat;
++	struct fib_table *tb;
++
++	trie_local = NULL;
++	tb = fib_get_table(net, RT_TABLE_LOCAL);
++	if (tb)
++		trie_local = (struct trie *) tb->tb_data;
++
++	trie_main = NULL;
++	tb = fib_get_table(net, RT_TABLE_MAIN);
++	if (tb)
++		trie_main = (struct trie *) tb->tb_data;
++
+ 
+ 	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
+ 	if (!stat)
+@@ -2197,7 +2210,15 @@
+ 
+ static int fib_triestat_seq_open(struct inode *inode, struct file *file)
+ {
+-	return single_open(file, fib_triestat_seq_show, NULL);
++	return single_open(file, fib_triestat_seq_show, 
++				get_net(PROC_NET(inode)));
++}
++
++static int fib_triestat_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	put_net(seq->private);
++	return single_release(inode, file);
+ }
+ 
+ static const struct file_operations fib_triestat_fops = {
+@@ -2205,7 +2226,7 @@
+ 	.open	= fib_triestat_seq_open,
+ 	.read	= seq_read,
+ 	.llseek	= seq_lseek,
+-	.release = single_release,
++	.release = fib_triestat_seq_release,
+ };
+ 
+ static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
+@@ -2214,13 +2235,13 @@
+ 	loff_t idx = 0;
+ 	struct node *n;
+ 
+-	for (n = fib_trie_get_first(iter, trie_local);
++	for (n = fib_trie_get_first(iter, iter->trie_local);
+ 	     n; ++idx, n = fib_trie_get_next(iter)) {
+ 		if (pos == idx)
+ 			return n;
+ 	}
+ 
+-	for (n = fib_trie_get_first(iter, trie_main);
++	for (n = fib_trie_get_first(iter, iter->trie_main);
+ 	     n; ++idx, n = fib_trie_get_next(iter)) {
+ 		if (pos == idx)
+ 			return n;
+@@ -2230,10 +2251,23 @@
+ 
+ static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++	struct fib_trie_iter *iter = seq->private;
++	struct fib_table *tb;
++
++	if (!iter->trie_local) {
++		tb = fib_get_table(iter->net, RT_TABLE_LOCAL);
++		if (tb)
++			iter->trie_local = (struct trie *) tb->tb_data;
++	}
++	if (!iter->trie_main) {
++		tb = fib_get_table(iter->net, RT_TABLE_MAIN);
++		if (tb)
++			iter->trie_main = (struct trie *) tb->tb_data;
++	}
+ 	rcu_read_lock();
+ 	if (*pos == 0)
+ 		return SEQ_START_TOKEN;
+-	return fib_trie_get_idx(seq->private, *pos - 1);
++	return fib_trie_get_idx(iter, *pos - 1);
+ }
+ 
+ static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+@@ -2251,8 +2285,8 @@
+ 		return v;
+ 
+ 	/* continue scan in next trie */
+-	if (iter->trie == trie_local)
+-		return fib_trie_get_first(iter, trie_main);
++	if (iter->trie == iter->trie_local)
++		return fib_trie_get_first(iter, iter->trie_main);
+ 
+ 	return NULL;
+ }
+@@ -2318,7 +2352,7 @@
+ 		return 0;
+ 
+ 	if (!NODE_PARENT(n)) {
+-		if (iter->trie == trie_local)
++		if (iter->trie == iter->trie_local)
+ 			seq_puts(seq, "<local>:\n");
+ 		else
+ 			seq_puts(seq, "<main>:\n");
+@@ -2384,6 +2418,7 @@
+ 	seq	     = file->private_data;
+ 	seq->private = s;
+ 	memset(s, 0, sizeof(*s));
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -2391,12 +2426,20 @@
+ 	goto out;
+ }
+ 
++static int fib_trie_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct fib_trie_iter *iter = seq->private;
++	put_net(iter->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations fib_trie_fops = {
+ 	.owner  = THIS_MODULE,
+ 	.open   = fib_trie_seq_open,
+ 	.read   = seq_read,
+ 	.llseek = seq_lseek,
+-	.release = seq_release_private,
++	.release = fib_trie_seq_release,
+ };
+ 
+ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+@@ -2434,7 +2477,7 @@
+ 		return 0;
+ 	}
+ 
+-	if (iter->trie == trie_local)
++	if (iter->trie == iter->trie_local)
+ 		return 0;
+ 	if (IS_TNODE(l))
+ 		return 0;
+@@ -2505,6 +2548,7 @@
+ 	seq	     = file->private_data;
+ 	seq->private = s;
+ 	memset(s, 0, sizeof(*s));
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -2517,35 +2561,37 @@
+ 	.open   = fib_route_seq_open,
+ 	.read   = seq_read,
+ 	.llseek = seq_lseek,
+-	.release = seq_release_private,
++	.release = fib_trie_seq_release,
+ };
+ 
+-int __init fib_proc_init(void)
++int fib_proc_init(struct net *net)
+ {
+-	if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
++	net->trie_last_dflt = -1;
++
++	if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
+ 		goto out1;
+ 
+-	if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
++	if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO, &fib_triestat_fops))
+ 		goto out2;
+ 
+-	if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
++	if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
+ 		goto out3;
+ 
+ 	return 0;
+ 
+ out3:
+-	proc_net_remove("fib_triestat");
++	proc_net_remove(net, "fib_triestat");
+ out2:
+-	proc_net_remove("fib_trie");
++	proc_net_remove(net, "fib_trie");
+ out1:
+ 	return -ENOMEM;
+ }
+ 
+-void __init fib_proc_exit(void)
++void fib_proc_exit(struct net *net)
+ {
+-	proc_net_remove("fib_trie");
+-	proc_net_remove("fib_triestat");
+-	proc_net_remove("route");
++	proc_net_remove(net, "fib_trie");
++	proc_net_remove(net, "fib_triestat");
++	proc_net_remove(net, "route");
+ }
+ 
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/ipv4/icmp.c linux-2.6.22-try2-netns/net/ipv4/icmp.c
+--- linux-2.6.22-try2/net/ipv4/icmp.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/icmp.c	2007-12-19 22:49:18.000000000 -0500
+@@ -229,14 +229,13 @@
+  *
+  *	On SMP we have one ICMP socket per-cpu.
+  */
+-static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;
+-#define icmp_socket	__get_cpu_var(__icmp_socket)
++#define icmp_socket(NET) (*per_cpu_ptr((NET)->__icmp_socket, smp_processor_id()))
+ 
+-static __inline__ int icmp_xmit_lock(void)
++static __inline__ int icmp_xmit_lock(struct net *net)
+ {
+ 	local_bh_disable();
+ 
+-	if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {
++	if (unlikely(!spin_trylock(&icmp_socket(net)->sk->sk_lock.slock))) {
+ 		/* This can happen if the output path signals a
+ 		 * dst_link_failure() for an outgoing ICMP packet.
+ 		 */
+@@ -246,9 +245,9 @@
+ 	return 0;
+ }
+ 
+-static void icmp_xmit_unlock(void)
++static void icmp_xmit_unlock(struct net *net)
+ {
+-	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);
++	spin_unlock_bh(&icmp_socket(net)->sk->sk_lock.slock);
+ }
+ 
+ /*
+@@ -347,19 +346,20 @@
+ static void icmp_push_reply(struct icmp_bxm *icmp_param,
+ 			    struct ipcm_cookie *ipc, struct rtable *rt)
+ {
++	struct net *net = icmp_param->skb->dev->nd_net;
+ 	struct sk_buff *skb;
+ 
+-	if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
++	if (ip_append_data(icmp_socket(net)->sk, icmp_glue_bits, icmp_param,
+ 			   icmp_param->data_len+icmp_param->head_len,
+ 			   icmp_param->head_len,
+ 			   ipc, rt, MSG_DONTWAIT) < 0)
+-		ip_flush_pending_frames(icmp_socket->sk);
+-	else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
++		ip_flush_pending_frames(icmp_socket(net)->sk);
++	else if ((skb = skb_peek(&icmp_socket(net)->sk->sk_write_queue)) != NULL) {
+ 		struct icmphdr *icmph = icmp_hdr(skb);
+ 		__wsum csum = 0;
+ 		struct sk_buff *skb1;
+ 
+-		skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {
++		skb_queue_walk(&icmp_socket(net)->sk->sk_write_queue, skb1) {
+ 			csum = csum_add(csum, skb1->csum);
+ 		}
+ 		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
+@@ -367,7 +367,7 @@
+ 						 icmp_param->head_len, csum);
+ 		icmph->checksum = csum_fold(csum);
+ 		skb->ip_summed = CHECKSUM_NONE;
+-		ip_push_pending_frames(icmp_socket->sk);
++		ip_push_pending_frames(icmp_socket(net)->sk);
+ 	}
+ }
+ 
+@@ -377,7 +377,8 @@
+ 
+ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
+ {
+-	struct sock *sk = icmp_socket->sk;
++	struct net *net = icmp_param->skb->dev->nd_net;
++	struct sock *sk = icmp_socket(net)->sk;
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct ipcm_cookie ipc;
+ 	struct rtable *rt = (struct rtable *)skb->dst;
+@@ -386,7 +387,7 @@
+ 	if (ip_options_echo(&icmp_param->replyopts, skb))
+ 		return;
+ 
+-	if (icmp_xmit_lock())
++	if (icmp_xmit_lock(net))
+ 		return;
+ 
+ 	icmp_param->data.icmph.checksum = 0;
+@@ -401,7 +402,8 @@
+ 			daddr = icmp_param->replyopts.faddr;
+ 	}
+ 	{
+-		struct flowi fl = { .nl_u = { .ip4_u =
++		struct flowi fl = { .fl_net = net,
++				    .nl_u = { .ip4_u =
+ 					      { .daddr = daddr,
+ 						.saddr = rt->rt_spec_dst,
+ 						.tos = RT_TOS(ip_hdr(skb)->tos) } },
+@@ -415,7 +417,7 @@
+ 		icmp_push_reply(icmp_param, &ipc, rt);
+ 	ip_rt_put(rt);
+ out_unlock:
+-	icmp_xmit_unlock();
++	icmp_xmit_unlock(net);
+ }
+ 
+ 
+@@ -436,6 +438,7 @@
+ 	int room;
+ 	struct icmp_bxm icmp_param;
+ 	struct rtable *rt = (struct rtable *)skb_in->dst;
++	struct net *net;
+ 	struct ipcm_cookie ipc;
+ 	__be32 saddr;
+ 	u8  tos;
+@@ -443,6 +446,7 @@
+ 	if (!rt)
+ 		goto out;
+ 
++	net = rt->fl.fl_net;
+ 	/*
+ 	 *	Find the original header. It is expected to be valid, of course.
+ 	 *	Check this, icmp_send is called from the most obscure devices
+@@ -505,7 +509,7 @@
+ 		}
+ 	}
+ 
+-	if (icmp_xmit_lock())
++	if (icmp_xmit_lock(net))
+ 		return;
+ 
+ 	/*
+@@ -517,7 +521,7 @@
+ 		struct net_device *dev = NULL;
+ 
+ 		if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)
+-			dev = dev_get_by_index(rt->fl.iif);
++			dev = dev_get_by_index(&init_net, rt->fl.iif);
+ 
+ 		if (dev) {
+ 			saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);
+@@ -545,12 +549,13 @@
+ 	icmp_param.skb	  = skb_in;
+ 	icmp_param.offset = skb_network_offset(skb_in);
+ 	icmp_out_count(icmp_param.data.icmph.type);
+-	inet_sk(icmp_socket->sk)->tos = tos;
++	inet_sk(icmp_socket(net)->sk)->tos = tos;
+ 	ipc.addr = iph->saddr;
+ 	ipc.opt = &icmp_param.replyopts;
+ 
+ 	{
+ 		struct flowi fl = {
++			.fl_net = net,
+ 			.nl_u = {
+ 				.ip4_u = {
+ 					.daddr = icmp_param.replyopts.srr ?
+@@ -593,7 +598,7 @@
+ ende:
+ 	ip_rt_put(rt);
+ out_unlock:
+-	icmp_xmit_unlock();
++	icmp_xmit_unlock(net);
+ out:;
+ }
+ 
+@@ -604,6 +609,7 @@
+ 
+ static void icmp_unreach(struct sk_buff *skb)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct iphdr *iph;
+ 	struct icmphdr *icmph;
+ 	int hash, protocol;
+@@ -634,7 +640,7 @@
+ 		case ICMP_PORT_UNREACH:
+ 			break;
+ 		case ICMP_FRAG_NEEDED:
+-			if (ipv4_config.no_pmtu_disc) {
++			if (net->sysctl_ipv4_no_pmtu_disc) {
+ 				LIMIT_NETDEBUG(KERN_INFO "ICMP: %u.%u.%u.%u: "
+ 							 "fragmentation needed "
+ 							 "and DF set.\n",
+@@ -678,7 +684,7 @@
+ 	 */
+ 
+ 	if (!sysctl_icmp_ignore_bogus_error_responses &&
+-	    inet_addr_type(iph->daddr) == RTN_BROADCAST) {
++	    inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
+ 		if (net_ratelimit())
+ 			printk(KERN_WARNING "%u.%u.%u.%u sent an invalid ICMP "
+ 					    "type %u, code %u "
+@@ -707,7 +713,7 @@
+ 	hash = protocol & (MAX_INET_PROTOS - 1);
+ 	read_lock(&raw_v4_lock);
+ 	if ((raw_sk = sk_head(&raw_v4_htable[hash])) != NULL) {
+-		while ((raw_sk = __raw_v4_lookup(raw_sk, protocol, iph->daddr,
++		while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr,
+ 						 iph->saddr,
+ 						 skb->dev->ifindex, skb->skb_tag)) != NULL) {
+ 			raw_err(raw_sk, skb, info);
+@@ -1179,29 +1185,54 @@
+ 	},
+ };
+ 
+-void __init icmp_init(struct net_proto_family *ops)
++static void icmp_net_exit(struct net *net)
+ {
+-	struct inet_sock *inet;
++	struct socket **sock;
+ 	int i;
+ 
+ 	for_each_possible_cpu(i) {
++		sock = percpu_ptr(net->__icmp_socket, i);
++		if (!*sock)
++			continue;
++		/* At the last minute lie and say this is a socket for
++		 * the initial network namespace.  So the socket will
++		 * be safe to free. 
++		 */
++		(*sock)->sk->sk_net = get_net(&init_net);
++		sock_release(*sock);
++		*sock = NULL;
++	}
++	percpu_free(net->__icmp_socket);
++}
++
++static int icmp_net_init(struct net *net)
++{
++	struct socket **sock;
++	struct inet_sock *inet;
+ 		int err;
++	int i;
++
++	net->__icmp_socket = alloc_percpu(struct socket *);
++	if (!net->__icmp_socket)
++		return -ENOMEM;
++	
++	for_each_possible_cpu(i) {
+ 
+-		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP,
+-				       &per_cpu(__icmp_socket, i));
++		sock = percpu_ptr(net->__icmp_socket, i);
+ 
++		err = sock_create_kern(PF_INET, SOCK_RAW, IPPROTO_ICMP, sock);
+ 		if (err < 0)
+-			panic("Failed to create the ICMP control socket.\n");
++			goto fail;
+ 
+-		per_cpu(__icmp_socket, i)->sk->sk_allocation = GFP_ATOMIC;
++		(*sock)->sk->sk_allocation = GFP_ATOMIC;
+ 
+ 		/* Enough space for 2 64K ICMP packets, including
+ 		 * sk_buff struct overhead.
+ 		 */
+-		per_cpu(__icmp_socket, i)->sk->sk_sndbuf =
++		(*sock)->sk->sk_sndbuf =
+ 			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+ 
+-		inet = inet_sk(per_cpu(__icmp_socket, i)->sk);
++		inet = inet_sk((*sock)->sk);
+ 		inet->uc_ttl = -1;
+ 		inet->pmtudisc = IP_PMTUDISC_DONT;
+ 
+@@ -1209,8 +1240,27 @@
+ 		 * see it, we do not wish this socket to see incoming
+ 		 * packets.
+ 		 */
+-		per_cpu(__icmp_socket, i)->sk->sk_prot->unhash(per_cpu(__icmp_socket, i)->sk);
++		(*sock)->sk->sk_prot->unhash((*sock)->sk);
++
++		/* Don't hold an extra reference on the namespace */
++		put_net((*sock)->sk->sk_net);
+ 	}
++	return 0;
++fail:
++	icmp_net_exit(net);
++	return err;
++
++}
++
++static struct pernet_operations icmp_net_ops = {
++	.init = icmp_net_init,
++	.exit = icmp_net_exit,
++};
++
++void __init icmp_init(struct net_proto_family *ops)
++{
++	if (register_pernet_subsys(&icmp_net_ops))
++		panic("Failed to create the ICMP control socket.\n");
+ }
+ 
+ EXPORT_SYMBOL(icmp_err_convert);
+diff -Nurb linux-2.6.22-try2/net/ipv4/igmp.c linux-2.6.22-try2-netns/net/ipv4/igmp.c
+--- linux-2.6.22-try2/net/ipv4/igmp.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/igmp.c	2007-12-19 22:49:18.000000000 -0500
+@@ -97,6 +97,7 @@
+ #include <net/route.h>
+ #include <net/sock.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ #include <linux/netfilter_ipv4.h>
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+@@ -129,12 +130,12 @@
+  */
+ 
+ #define IGMP_V1_SEEN(in_dev) \
+-	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 1 || \
++	(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 1 || \
+ 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 1 || \
+ 	 ((in_dev)->mr_v1_seen && \
+ 	  time_before(jiffies, (in_dev)->mr_v1_seen)))
+ #define IGMP_V2_SEEN(in_dev) \
+-	(IPV4_DEVCONF_ALL(FORCE_IGMP_VERSION) == 2 || \
++	(IPV4_DEVCONF_ALL((in_dev)->dev->nd_net, FORCE_IGMP_VERSION) == 2 || \
+ 	 IN_DEV_CONF_GET((in_dev), FORCE_IGMP_VERSION) == 2 || \
+ 	 ((in_dev)->mr_v2_seen && \
+ 	  time_before(jiffies, (in_dev)->mr_v2_seen)))
+@@ -296,7 +297,8 @@
+ 		return NULL;
+ 
+ 	{
+-		struct flowi fl = { .oif = dev->ifindex,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = dev->ifindex,
+ 				    .nl_u = { .ip4_u = {
+ 				    .daddr = IGMPV3_ALL_MCR } },
+ 				    .proto = IPPROTO_IGMP };
+@@ -646,7 +648,8 @@
+ 		dst = group;
+ 
+ 	{
+-		struct flowi fl = { .oif = dev->ifindex,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = dev->ifindex,
+ 				    .nl_u = { .ip4_u = { .daddr = dst } },
+ 				    .proto = IPPROTO_IGMP };
+ 		if (ip_route_output_key(&rt, &fl))
+@@ -929,6 +932,11 @@
+ 	struct in_device *in_dev = in_dev_get(skb->dev);
+ 	int len = skb->len;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	if (in_dev==NULL) {
+ 		kfree_skb(skb);
+ 		return 0;
+@@ -1393,20 +1401,22 @@
+ 
+ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+ {
+-	struct flowi fl = { .nl_u = { .ip4_u =
+-				      { .daddr = imr->imr_multiaddr.s_addr } } };
++	struct flowi fl = { 
++		.fl_net = &init_net,
++		.nl_u = { .ip4_u = { .daddr = imr->imr_multiaddr.s_addr } }
++	};
+ 	struct rtable *rt;
+ 	struct net_device *dev = NULL;
+ 	struct in_device *idev = NULL;
+ 
+ 	if (imr->imr_ifindex) {
+-		idev = inetdev_by_index(imr->imr_ifindex);
++		idev = inetdev_by_index(&init_net, imr->imr_ifindex);
+ 		if (idev)
+ 			__in_dev_put(idev);
+ 		return idev;
+ 	}
+ 	if (imr->imr_address.s_addr) {
+-		dev = ip_dev_find(imr->imr_address.s_addr);
++		dev = ip_dev_find(&init_net, imr->imr_address.s_addr);
+ 		if (!dev)
+ 			return NULL;
+ 		dev_put(dev);
+@@ -2234,7 +2244,7 @@
+ 		struct in_device *in_dev;
+ 		inet->mc_list = iml->next;
+ 
+-		in_dev = inetdev_by_index(iml->multi.imr_ifindex);
++		in_dev = inetdev_by_index(&init_net, iml->multi.imr_ifindex);
+ 		(void) ip_mc_leave_src(sk, iml, in_dev);
+ 		if (in_dev != NULL) {
+ 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
+@@ -2291,7 +2301,7 @@
+ 	struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq);
+ 
+ 	state->in_dev = NULL;
+-	for_each_netdev(state->dev) {
++	for_each_netdev(&init_net, state->dev) {
+ 		struct in_device *in_dev;
+ 		in_dev = in_dev_get(state->dev);
+ 		if (!in_dev)
+@@ -2453,7 +2463,7 @@
+ 
+ 	state->idev = NULL;
+ 	state->im = NULL;
+-	for_each_netdev(state->dev) {
++	for_each_netdev(&init_net, state->dev) {
+ 		struct in_device *idev;
+ 		idev = in_dev_get(state->dev);
+ 		if (unlikely(idev == NULL))
+@@ -2613,8 +2623,8 @@
+ 
+ int __init igmp_mc_proc_init(void)
+ {
+-	proc_net_fops_create("igmp", S_IRUGO, &igmp_mc_seq_fops);
+-	proc_net_fops_create("mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
++	proc_net_fops_create(&init_net, "igmp", S_IRUGO, &igmp_mc_seq_fops);
++	proc_net_fops_create(&init_net, "mcfilter", S_IRUGO, &igmp_mcf_seq_fops);
+ 	return 0;
+ }
+ #endif
+diff -Nurb linux-2.6.22-try2/net/ipv4/inet_connection_sock.c linux-2.6.22-try2-netns/net/ipv4/inet_connection_sock.c
+--- linux-2.6.22-try2/net/ipv4/inet_connection_sock.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/inet_connection_sock.c	2007-12-19 23:25:05.000000000 -0500
+@@ -32,7 +32,7 @@
+ /*
+  * This array holds the first and last local port number.
+  */
+-int sysctl_local_port_range[2] = { 32768, 61000 };
++//int sysctl_local_port_range[2] = { 32768, 61000 };
+ 
+ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+ {
+@@ -74,6 +74,7 @@
+ 
+ 	sk_for_each_bound(sk2, node, &tb->owners) {
+ 		if (sk != sk2 &&
++ 		    (sk->sk_net == sk2->sk_net) &&
+ 		    !inet_v6_ipv6only(sk2) &&
+ 		    (!sk->sk_bound_dev_if ||
+ 		     !sk2->sk_bound_dev_if ||
+@@ -98,6 +99,7 @@
+ 		      int (*bind_conflict)(const struct sock *sk,
+ 					   const struct inet_bind_bucket *tb))
+ {
++ 	struct net *net = sk->sk_net;
+ 	struct inet_bind_hashbucket *head;
+ 	struct hlist_node *node;
+ 	struct inet_bind_bucket *tb;
+@@ -105,16 +107,16 @@
+ 
+ 	local_bh_disable();
+ 	if (!snum) {
+-		int low = sysctl_local_port_range[0];
+-		int high = sysctl_local_port_range[1];
++		int low = sk->sk_net->sysctl_local_port_range[0];
++		int high = sk->sk_net->sysctl_local_port_range[1];
+ 		int remaining = (high - low) + 1;
+ 		int rover = net_random() % (high - low) + low;
+ 
+ 		do {
+-			head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)];
++			head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+ 			inet_bind_bucket_for_each(tb, node, &head->chain)
+-				if (tb->port == rover)
++ 				if ((tb->port == rover) && (tb->net == net))
+ 					goto next;
+ 			break;
+ 		next:
+@@ -138,10 +140,10 @@
+ 		 */
+ 		snum = rover;
+ 	} else {
+-		head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)];
++		head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)];
+ 		spin_lock(&head->lock);
+ 		inet_bind_bucket_for_each(tb, node, &head->chain)
+-			if (tb->port == snum)
++			if ((tb->port == snum) && (tb->net==net))
+ 				goto tb_found;
+ 	}
+ 	tb = NULL;
+@@ -161,7 +163,7 @@
+ 	}
+ tb_not_found:
+ 	ret = 1;
+-	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL)
++	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, net, snum)) == NULL)
+ 		goto fail_unlock;
+ 	if (hlist_empty(&tb->owners)) {
+ 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
+@@ -341,7 +343,8 @@
+ 	struct rtable *rt;
+ 	const struct inet_request_sock *ireq = inet_rsk(req);
+ 	struct ip_options *opt = inet_rsk(req)->opt;
+-	struct flowi fl = { .oif = sk->sk_bound_dev_if,
++	struct flowi fl = { .fl_net = sk->sk_net,
++			.oif = sk->sk_bound_dev_if,
+ 			    .nl_u = { .ip4_u =
+ 				      { .daddr = ((opt && opt->srr) ?
+ 						  opt->faddr :
+diff -Nurb linux-2.6.22-try2/net/ipv4/inet_diag.c linux-2.6.22-try2-netns/net/ipv4/inet_diag.c
+--- linux-2.6.22-try2/net/ipv4/inet_diag.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/inet_diag.c	2007-12-19 22:49:20.000000000 -0500
+@@ -227,6 +227,7 @@
+ static int inet_diag_get_exact(struct sk_buff *in_skb,
+ 			       const struct nlmsghdr *nlh)
+ {
++	struct net *net = in_skb->sk->sk_net;
+ 	int err;
+ 	struct sock *sk;
+ 	struct inet_diag_req *req = NLMSG_DATA(nlh);
+@@ -242,7 +243,7 @@
+ 		/* TODO: lback */
+ 		sk = inet_lookup(hashinfo, req->id.idiag_dst[0],
+ 				 req->id.idiag_dport, req->id.idiag_src[0],
+-				 req->id.idiag_sport, req->id.idiag_if);
++				 req->id.idiag_sport, req->id.idiag_if, net);
+ 	}
+ #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ 	else if (req->idiag_family == AF_INET6) {
+@@ -251,7 +252,7 @@
+ 				  req->id.idiag_dport,
+ 				  (struct in6_addr *)req->id.idiag_src,
+ 				  req->id.idiag_sport,
+-				  req->id.idiag_if);
++				  req->id.idiag_if, net);
+ 	}
+ #endif
+ 	else {
+@@ -906,8 +907,8 @@
+ 	if (!inet_diag_table)
+ 		goto out;
+ 
+-	idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv,
+-					NULL, THIS_MODULE);
++	idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
++					inet_diag_rcv, NULL, THIS_MODULE);
+ 	if (idiagnl == NULL)
+ 		goto out_free_table;
+ 	err = 0;
+diff -Nurb linux-2.6.22-try2/net/ipv4/inet_hashtables.c linux-2.6.22-try2-netns/net/ipv4/inet_hashtables.c
+--- linux-2.6.22-try2/net/ipv4/inet_hashtables.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/inet_hashtables.c	2007-12-19 23:27:05.000000000 -0500
+@@ -29,11 +29,13 @@
+  */
+ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
+ 						 struct inet_bind_hashbucket *head,
++						 struct net *net,
+ 						 const unsigned short snum)
+ {
+ 	struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+ 
+ 	if (tb != NULL) {
++		tb->net	      = net;
+ 		tb->port      = snum;
+ 		tb->fastreuse = 0;
+ 		INIT_HLIST_HEAD(&tb->owners);
+@@ -66,7 +68,7 @@
+  */
+ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk)
+ {
+-	const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size);
++	const int bhash = inet_bhashfn(sk->sk_net, inet_sk(sk)->num, hashinfo->bhash_size);
+ 	struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
+ 	struct inet_bind_bucket *tb;
+ 
+@@ -127,7 +129,7 @@
+ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head,
+ 					      const __be32 daddr,
+ 					      const unsigned short hnum,
+-					      const int dif)
++					      const int dif, struct net *net)
+ {
+ 	struct sock *result = NULL, *sk;
+ 	const struct hlist_node *node;
+@@ -149,6 +151,8 @@
+ 					continue;
+ 				score += 2;
+ 			}
++			if (sk->sk_net != net)
++				continue;
+ 			if (score == 5)
+ 				return sk;
+ 			if (score > hiscore) {
+@@ -163,22 +167,22 @@
+ /* Optimize the common listener case. */
+ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo,
+ 				    const __be32 daddr, const unsigned short hnum,
+-				    const int dif)
++				    const int dif, struct net *net)
+ {
+ 	struct sock *sk = NULL;
+ 	const struct hlist_head *head;
+ 
+ 	read_lock(&hashinfo->lhash_lock);
+-	head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
++	head = &hashinfo->listening_hash[net, inet_lhashfn(hnum)];
+ 	if (!hlist_empty(head)) {
+ 		const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
+ 
+ 		if (inet->num == hnum && !sk->sk_node.next &&
+ 		    v4_inet_addr_match(sk->sk_nx_info, daddr, inet->rcv_saddr) &&
+ 		    (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
+-		    !sk->sk_bound_dev_if)
++		    !sk->sk_bound_dev_if && (sk->sk_net == net))
+ 			goto sherry_cache;
+-		sk = inet_lookup_listener_slow(head, daddr, hnum, dif);
++		sk = inet_lookup_listener_slow(head, daddr, hnum, dif,net );
+ 	}
+ 	if (sk) {
+ sherry_cache:
+@@ -196,12 +200,13 @@
+ {
+ 	struct inet_hashinfo *hinfo = death_row->hashinfo;
+ 	struct inet_sock *inet = inet_sk(sk);
++	struct net *net = sk->sk_net;
+ 	__be32 daddr = inet->rcv_saddr;
+ 	__be32 saddr = inet->daddr;
+ 	int dif = sk->sk_bound_dev_if;
+ 	INET_ADDR_COOKIE(acookie, saddr, daddr)
+ 	const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport);
+-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
++	unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
+ 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+ 	struct sock *sk2;
+ 	const struct hlist_node *node;
+@@ -214,7 +219,7 @@
+ 	sk_for_each(sk2, node, &head->twchain) {
+ 		tw = inet_twsk(sk2);
+ 
+-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
++		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net)) {
+ 			if (twsk_unique(sk, sk2, twp))
+ 				goto unique;
+ 			else
+@@ -225,7 +230,7 @@
+ 
+ 	/* And established part... */
+ 	sk_for_each(sk2, node, &head->chain) {
+-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
++		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif, net))
+ 			goto not_unique;
+ 	}
+ 
+@@ -271,6 +276,7 @@
+ int inet_hash_connect(struct inet_timewait_death_row *death_row,
+ 		      struct sock *sk)
+ {
++	struct net *net = sk->sk_net;
+ 	struct inet_hashinfo *hinfo = death_row->hashinfo;
+ 	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+@@ -278,8 +284,8 @@
+ 	int ret;
+ 
+ 	if (!snum) {
+-		int low = sysctl_local_port_range[0];
+-		int high = sysctl_local_port_range[1];
++		int low = sk->sk_net->sysctl_local_port_range[0];
++		int high = sk->sk_net->sysctl_local_port_range[1];
+ 		int range = high - low;
+ 		int i;
+ 		int port;
+@@ -291,7 +297,7 @@
+ 		local_bh_disable();
+ 		for (i = 1; i <= range; i++) {
+ 			port = low + (i + offset) % range;
+-			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++			head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+ 
+ 			/* Does not bother with rcv_saddr checks,
+@@ -299,7 +305,7 @@
+ 			 * unique enough.
+ 			 */
+ 			inet_bind_bucket_for_each(tb, node, &head->chain) {
+-				if (tb->port == port) {
++				if ((tb->port == port) && (tb->net == net)) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+@@ -311,7 +317,7 @@
+ 				}
+ 			}
+ 
+-			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
++			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, net, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+@@ -346,7 +352,7 @@
+ 		goto out;
+ 	}
+ 
+-	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
+ 	tb  = inet_csk(sk)->icsk_bind_hash;
+ 	spin_lock_bh(&head->lock);
+ 	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+diff -Nurb linux-2.6.22-try2/net/ipv4/inet_timewait_sock.c linux-2.6.22-try2-netns/net/ipv4/inet_timewait_sock.c
+--- linux-2.6.22-try2/net/ipv4/inet_timewait_sock.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/inet_timewait_sock.c	2007-12-19 22:49:20.000000000 -0500
+@@ -31,7 +31,7 @@
+ 	write_unlock(&ehead->lock);
+ 
+ 	/* Disassociate with bind bucket. */
+-	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)];
++	bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_net, tw->tw_num, hashinfo->bhash_size)];
+ 	spin_lock(&bhead->lock);
+ 	tb = tw->tw_tb;
+ 	__hlist_del(&tw->tw_bind_node);
+@@ -65,7 +65,7 @@
+ 	   Note, that any socket with inet->num != 0 MUST be bound in
+ 	   binding cache, even if it is closed.
+ 	 */
+-	bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)];
++	bhead = &hashinfo->bhash[inet_bhashfn(sk->sk_net, inet->num, hashinfo->bhash_size)];
+ 	spin_lock(&bhead->lock);
+ 	tw->tw_tb = icsk->icsk_bind_hash;
+ 	BUG_TRAP(icsk->icsk_bind_hash);
+diff -Nurb linux-2.6.22-try2/net/ipv4/inetpeer.c linux-2.6.22-try2-netns/net/ipv4/inetpeer.c
+--- linux-2.6.22-try2/net/ipv4/inetpeer.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/inetpeer.c	2007-12-19 22:49:20.000000000 -0500
+@@ -81,71 +81,94 @@
+ 	.avl_height	= 0
+ };
+ #define peer_avl_empty (&peer_fake_node)
+-static struct inet_peer *peer_root = peer_avl_empty;
+ static DEFINE_RWLOCK(peer_pool_lock);
+ #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */
+ 
+-static int peer_total;
+-/* Exported for sysctl_net_ipv4.  */
+-int inet_peer_threshold __read_mostly = 65536 + 128;	/* start to throw entries more
+-					 * aggressively at this stage */
+-int inet_peer_minttl __read_mostly = 120 * HZ;	/* TTL under high load: 120 sec */
+-int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;	/* usual time to live: 10 min */
+-int inet_peer_gc_mintime __read_mostly = 10 * HZ;
+-int inet_peer_gc_maxtime __read_mostly = 120 * HZ;
+-
+-static struct inet_peer *inet_peer_unused_head;
+-static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+ static DEFINE_SPINLOCK(inet_peer_unused_lock);
+ 
+ static void peer_check_expire(unsigned long dummy);
+-static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
+ 
++static int inet_peers_net_init(struct net *net);
++static void inet_peers_net_exit(struct net *net);
++static struct pernet_operations inet_peers_net_ops = {
++	.init = inet_peers_net_init,
++	.exit = inet_peers_net_exit,
++};
+ 
+ /* Called from ip_output.c:ip_init  */
+ void __init inet_initpeers(void)
+ {
++	peer_cachep = kmem_cache_create("inet_peer_cache",
++			sizeof(struct inet_peer),
++			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
++			NULL, NULL);
++
++	register_pernet_subsys(&inet_peers_net_ops);
++}
++
++static int inet_peers_net_init(struct net *net)
++{
+ 	struct sysinfo si;
+ 
++	net->peer_root = peer_avl_empty;
++	net->inet_peer_unused_tailp = &net->inet_peer_unused_head;
++
++	net->inet_peer_threshold = 65536 + 128;	/* start to throw entries more
++						 * aggressively at this stage */
++	net->inet_peer_minttl = 120 * HZ;	/* TTL under high load: 120 sec */
++	net->inet_peer_maxttl = 10 * 60 * HZ;	/* usual time to live: 10 min */
++	net->inet_peer_gc_mintime = 10 * HZ;
++	net->inet_peer_gc_maxtime = 120 * HZ;
++
+ 	/* Use the straight interface to information about memory. */
+ 	si_meminfo(&si);
++
+ 	/* The values below were suggested by Alexey Kuznetsov
+ 	 * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
+ 	 * myself.  --SAW
+ 	 */
+ 	if (si.totalram <= (32768*1024)/PAGE_SIZE)
+-		inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
++		net->inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
+ 	if (si.totalram <= (16384*1024)/PAGE_SIZE)
+-		inet_peer_threshold >>= 1; /* about 512KB */
++		net->inet_peer_threshold >>= 1; /* about 512KB */
+ 	if (si.totalram <= (8192*1024)/PAGE_SIZE)
+-		inet_peer_threshold >>= 2; /* about 128KB */
++		net->inet_peer_threshold >>= 2; /* about 128KB */
+ 
+-	peer_cachep = kmem_cache_create("inet_peer_cache",
+-			sizeof(struct inet_peer),
+-			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
+-			NULL, NULL);
+ 
++	init_timer(&net->peer_periodic_timer);
++	net->peer_periodic_timer.function = peer_check_expire;
+ 	/* All the timers, started at system startup tend
+ 	   to synchronize. Perturb it a bit.
+ 	 */
+-	peer_periodic_timer.expires = jiffies
+-		+ net_random() % inet_peer_gc_maxtime
+-		+ inet_peer_gc_maxtime;
+-	add_timer(&peer_periodic_timer);
++	net->peer_periodic_timer.expires = jiffies
++		+ net_random() % net->inet_peer_gc_maxtime
++		+ net->inet_peer_gc_maxtime;
++	/* Remember our namespace */
++	net->peer_periodic_timer.data = (unsigned long)net;
++	add_timer(&net->peer_periodic_timer);
++	
++	return 0;
++}
++
++static void inet_peers_net_exit(struct net *net)
++{
++	del_timer(&net->peer_periodic_timer);
++	/* CHECKME do I need to do something to release all of the peers */
+ }
+ 
+ /* Called with or without local BH being disabled. */
+-static void unlink_from_unused(struct inet_peer *p)
++static void unlink_from_unused(struct net *net, struct inet_peer *p)
+ {
+ 	spin_lock_bh(&inet_peer_unused_lock);
+ 	if (p->unused_prevp != NULL) {
+ 		/* On unused list. */
+-		*p->unused_prevp = p->unused_next;
+-		if (p->unused_next != NULL)
+-			p->unused_next->unused_prevp = p->unused_prevp;
++		*p->unused_prevp = p->u.unused_next;
++		if (p->u.unused_next != NULL)
++			p->u.unused_next->unused_prevp = p->unused_prevp;
+ 		else
+-			inet_peer_unused_tailp = p->unused_prevp;
++			net->inet_peer_unused_tailp = p->unused_prevp;
+ 		p->unused_prevp = NULL; /* mark it as removed */
++		p->u.net = hold_net(net); /* Remember the net */
+ 	}
+ 	spin_unlock_bh(&inet_peer_unused_lock);
+ }
+@@ -160,9 +183,9 @@
+ 	struct inet_peer *u, **v;				\
+ 	if (_stack) {						\
+ 		stackptr = _stack;				\
+-		*stackptr++ = &peer_root;			\
++		*stackptr++ = &net->peer_root;			\
+ 	}							\
+-	for (u = peer_root; u != peer_avl_empty; ) {		\
++	for (u = net->peer_root; u != peer_avl_empty; ) {	\
+ 		if (_daddr == u->v4daddr)			\
+ 			break;					\
+ 		if ((__force __u32)_daddr < (__force __u32)u->v4daddr)	\
+@@ -279,7 +302,7 @@
+ } while(0)
+ 
+ /* May be called with local BH enabled. */
+-static void unlink_from_pool(struct inet_peer *p)
++static void unlink_from_pool(struct net *net, struct inet_peer *p)
+ {
+ 	int do_free;
+ 
+@@ -317,7 +340,7 @@
+ 			delp[1] = &t->avl_left; /* was &p->avl_left */
+ 		}
+ 		peer_avl_rebalance(stack, stackptr);
+-		peer_total--;
++		net->peer_total--;
+ 		do_free = 1;
+ 	}
+ 	write_unlock_bh(&peer_pool_lock);
+@@ -335,13 +358,13 @@
+ }
+ 
+ /* May be called with local BH enabled. */
+-static int cleanup_once(unsigned long ttl)
++static int cleanup_once(struct net *net, unsigned long ttl)
+ {
+ 	struct inet_peer *p;
+ 
+ 	/* Remove the first entry from the list of unused nodes. */
+ 	spin_lock_bh(&inet_peer_unused_lock);
+-	p = inet_peer_unused_head;
++	p = net->inet_peer_unused_head;
+ 	if (p != NULL) {
+ 		__u32 delta = (__u32)jiffies - p->dtime;
+ 		if (delta < ttl) {
+@@ -349,12 +372,13 @@
+ 			spin_unlock_bh(&inet_peer_unused_lock);
+ 			return -1;
+ 		}
+-		inet_peer_unused_head = p->unused_next;
+-		if (p->unused_next != NULL)
+-			p->unused_next->unused_prevp = p->unused_prevp;
++		net->inet_peer_unused_head = p->u.unused_next;
++		if (p->u.unused_next != NULL)
++			p->u.unused_next->unused_prevp = p->unused_prevp;
+ 		else
+-			inet_peer_unused_tailp = p->unused_prevp;
++			net->inet_peer_unused_tailp = p->unused_prevp;
+ 		p->unused_prevp = NULL; /* mark as not on the list */
++		p->u.net = hold_net(net);
+ 		/* Grab an extra reference to prevent node disappearing
+ 		 * before unlink_from_pool() call. */
+ 		atomic_inc(&p->refcnt);
+@@ -367,12 +391,12 @@
+ 		 * happen because of entry limits in route cache. */
+ 		return -1;
+ 
+-	unlink_from_pool(p);
++	unlink_from_pool(net, p);
+ 	return 0;
+ }
+ 
+ /* Called with or without local BH being disabled. */
+-struct inet_peer *inet_getpeer(__be32 daddr, int create)
++struct inet_peer *inet_getpeer(struct net *net, __be32 daddr, int create)
+ {
+ 	struct inet_peer *p, *n;
+ 	struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr;
+@@ -387,7 +411,7 @@
+ 	if (p != peer_avl_empty) {
+ 		/* The existing node has been found. */
+ 		/* Remove the entry from unused list if it was there. */
+-		unlink_from_unused(p);
++		unlink_from_unused(net, p);
+ 		return p;
+ 	}
+ 
+@@ -413,13 +437,13 @@
+ 	/* Link the node. */
+ 	link_to_pool(n);
+ 	n->unused_prevp = NULL; /* not on the list */
+-	peer_total++;
++	n->u.net = hold_net(net); /* Remember the net */
++	net->peer_total++;
+ 	write_unlock_bh(&peer_pool_lock);
+ 
+-	if (peer_total >= inet_peer_threshold)
++	if (net->peer_total >= net->inet_peer_threshold)
+ 		/* Remove one less-recently-used entry. */
+-		cleanup_once(0);
+-
++		cleanup_once(net, 0);
+ 	return n;
+ 
+ out_free:
+@@ -427,25 +451,26 @@
+ 	atomic_inc(&p->refcnt);
+ 	write_unlock_bh(&peer_pool_lock);
+ 	/* Remove the entry from unused list if it was there. */
+-	unlink_from_unused(p);
++	unlink_from_unused(net, p);
+ 	/* Free preallocated the preallocated node. */
+ 	kmem_cache_free(peer_cachep, n);
+ 	return p;
+ }
+ 
+ /* Called with local BH disabled. */
+-static void peer_check_expire(unsigned long dummy)
++static void peer_check_expire(unsigned long arg)
+ {
++	struct net *net = (void *)arg;
+ 	unsigned long now = jiffies;
+ 	int ttl;
+ 
+-	if (peer_total >= inet_peer_threshold)
+-		ttl = inet_peer_minttl;
++	if (net->peer_total >= net->inet_peer_threshold)
++		ttl = net->inet_peer_minttl;
+ 	else
+-		ttl = inet_peer_maxttl
+-				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
+-					peer_total / inet_peer_threshold * HZ;
+-	while (!cleanup_once(ttl)) {
++		ttl = net->inet_peer_maxttl
++			- (net->inet_peer_maxttl - net->inet_peer_minttl) / HZ *
++				net->peer_total / net->inet_peer_threshold * HZ;
++	while (!cleanup_once(net, ttl)) {
+ 		if (jiffies != now)
+ 			break;
+ 	}
+@@ -453,25 +478,30 @@
+ 	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
+ 	 * interval depending on the total number of entries (more entries,
+ 	 * less interval). */
+-	if (peer_total >= inet_peer_threshold)
+-		peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
++	if (net->peer_total >= net->inet_peer_threshold)
++		net->peer_periodic_timer.expires = jiffies 
++			+ net->inet_peer_gc_mintime;
+ 	else
+-		peer_periodic_timer.expires = jiffies
+-			+ inet_peer_gc_maxtime
+-			- (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+-				peer_total / inet_peer_threshold * HZ;
+-	add_timer(&peer_periodic_timer);
++		net->peer_periodic_timer.expires = jiffies
++			+ net->inet_peer_gc_maxtime
++			- (net->inet_peer_gc_maxtime - net->inet_peer_gc_mintime) / HZ *
++				net->peer_total / net->inet_peer_threshold * HZ;
++	add_timer(&net->peer_periodic_timer);
+ }
+ 
+ void inet_putpeer(struct inet_peer *p)
+ {
+ 	spin_lock_bh(&inet_peer_unused_lock);
+ 	if (atomic_dec_and_test(&p->refcnt)) {
+-		p->unused_prevp = inet_peer_unused_tailp;
+-		p->unused_next = NULL;
+-		*inet_peer_unused_tailp = p;
+-		inet_peer_unused_tailp = &p->unused_next;
++		struct net *net = p->u.net;
++
++		p->unused_prevp = net->inet_peer_unused_tailp;
++		p->u.unused_next = NULL;
++		*net->inet_peer_unused_tailp = p;
++		net->inet_peer_unused_tailp = &p->u.unused_next;
+ 		p->dtime = (__u32)jiffies;
++
++		release_net(net);
+ 	}
+ 	spin_unlock_bh(&inet_peer_unused_lock);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_fragment.c linux-2.6.22-try2-netns/net/ipv4/ip_fragment.c
+--- linux-2.6.22-try2/net/ipv4/ip_fragment.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_fragment.c	2007-12-19 22:49:20.000000000 -0500
+@@ -49,21 +49,6 @@
+  * as well. Or notify me, at least. --ANK
+  */
+ 
+-/* Fragment cache limits. We will commit 256K at one time. Should we
+- * cross that limit we will prune down to 192K. This should cope with
+- * even the most extreme cases without allowing an attacker to measurably
+- * harm machine performance.
+- */
+-int sysctl_ipfrag_high_thresh __read_mostly = 256*1024;
+-int sysctl_ipfrag_low_thresh __read_mostly = 192*1024;
+-
+-int sysctl_ipfrag_max_dist __read_mostly = 64;
+-
+-/* Important NOTE! Fragment queue must be destroyed before MSL expires.
+- * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
+- */
+-int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME;
+-
+ struct ipfrag_skb_cb
+ {
+ 	struct inet_skb_parm	h;
+@@ -96,6 +81,7 @@
+ 	int             iif;
+ 	unsigned int    rid;
+ 	struct inet_peer *peer;
++	struct net 	*net;
+ };
+ 
+ /* Hash table. */
+@@ -103,17 +89,13 @@
+ #define IPQ_HASHSZ	64
+ 
+ /* Per-bucket lock is easy to add now. */
+-static struct hlist_head ipq_hash[IPQ_HASHSZ];
+ static DEFINE_RWLOCK(ipfrag_lock);
+-static u32 ipfrag_hash_rnd;
+-static LIST_HEAD(ipq_lru_list);
+-int ip_frag_nqueues = 0;
+ 
+ static __inline__ void __ipq_unlink(struct ipq *qp)
+ {
+ 	hlist_del(&qp->list);
+ 	list_del(&qp->lru_list);
+-	ip_frag_nqueues--;
++	qp->net->ip_frag_nqueues--;
+ }
+ 
+ static __inline__ void ipq_unlink(struct ipq *ipq)
+@@ -123,70 +105,71 @@
+ 	write_unlock(&ipfrag_lock);
+ }
+ 
+-static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
++static unsigned int ipqhashfn(struct net *net, __be16 id, __be32 saddr, __be32 daddr, u8 prot)
+ {
+ 	return jhash_3words((__force u32)id << 16 | prot,
+ 			    (__force u32)saddr, (__force u32)daddr,
+-			    ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
++			    net->ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
+ }
+ 
+-static struct timer_list ipfrag_secret_timer;
+-int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;
+-
+-static void ipfrag_secret_rebuild(unsigned long dummy)
++static void ipfrag_secret_rebuild(unsigned long arg)
+ {
++	struct net *net = (void *)arg;
+ 	unsigned long now = jiffies;
+ 	int i;
+ 
+ 	write_lock(&ipfrag_lock);
+-	get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
++	get_random_bytes(&net->ipfrag_hash_rnd, sizeof(u32));
+ 	for (i = 0; i < IPQ_HASHSZ; i++) {
+ 		struct ipq *q;
++		struct hlist_head *head;
+ 		struct hlist_node *p, *n;
+ 
+-		hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) {
+-			unsigned int hval = ipqhashfn(q->id, q->saddr,
++		head = &net->ipq_hash[i];
++		hlist_for_each_entry_safe(q, p, n, head, list) {
++			unsigned int hval = ipqhashfn(net, q->id, q->saddr,
+ 						      q->daddr, q->protocol);
+ 
+ 			if (hval != i) {
+ 				hlist_del(&q->list);
+ 
+ 				/* Relink to new hash chain. */
+-				hlist_add_head(&q->list, &ipq_hash[hval]);
++				hlist_add_head(&q->list, &net->ipq_hash[hval]);
+ 			}
+ 		}
+ 	}
+ 	write_unlock(&ipfrag_lock);
+ 
+-	mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
++	mod_timer(&net->ipfrag_secret_timer, 
++		now + net->sysctl_ipfrag_secret_interval);
+ }
+ 
+-atomic_t ip_frag_mem = ATOMIC_INIT(0);	/* Memory used for fragments */
+-
+ /* Memory Tracking Functions. */
+-static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
++static __inline__ void frag_kfree_skb(struct net *net, struct sk_buff *skb, int *work)
+ {
+ 	if (work)
+ 		*work -= skb->truesize;
+-	atomic_sub(skb->truesize, &ip_frag_mem);
++	atomic_sub(skb->truesize, &net->ip_frag_mem);
+ 	kfree_skb(skb);
+ }
+ 
+ static __inline__ void frag_free_queue(struct ipq *qp, int *work)
+ {
++	struct net *net = qp->net;
+ 	if (work)
+ 		*work -= sizeof(struct ipq);
+-	atomic_sub(sizeof(struct ipq), &ip_frag_mem);
++	atomic_sub(sizeof(struct ipq), &net->ip_frag_mem);
++	release_net(net);
+ 	kfree(qp);
+ }
+ 
+-static __inline__ struct ipq *frag_alloc_queue(void)
++static __inline__ struct ipq *frag_alloc_queue(struct net *net)
+ {
+ 	struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC);
+ 
+ 	if (!qp)
+ 		return NULL;
+-	atomic_add(sizeof(struct ipq), &ip_frag_mem);
++	atomic_add(sizeof(struct ipq), &net->ip_frag_mem);
+ 	return qp;
+ }
+ 
+@@ -209,7 +192,7 @@
+ 	while (fp) {
+ 		struct sk_buff *xp = fp->next;
+ 
+-		frag_kfree_skb(fp, work);
++		frag_kfree_skb(qp->net, fp, work);
+ 		fp = xp;
+ 	}
+ 
+@@ -241,23 +224,23 @@
+ /* Memory limiting on fragments.  Evictor trashes the oldest
+  * fragment queue until we are back under the threshold.
+  */
+-static void ip_evictor(void)
++static void ip_evictor(struct net *net)
+ {
+ 	struct ipq *qp;
+ 	struct list_head *tmp;
+ 	int work;
+ 
+-	work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
++	work = atomic_read(&net->ip_frag_mem) - net->sysctl_ipfrag_low_thresh;
+ 	if (work <= 0)
+ 		return;
+ 
+ 	while (work > 0) {
+ 		read_lock(&ipfrag_lock);
+-		if (list_empty(&ipq_lru_list)) {
++		if (list_empty(&net->ipq_lru_list)) {
+ 			read_unlock(&ipfrag_lock);
+ 			return;
+ 		}
+-		tmp = ipq_lru_list.next;
++		tmp = net->ipq_lru_list.next;
+ 		qp = list_entry(tmp, struct ipq, lru_list);
+ 		atomic_inc(&qp->refcnt);
+ 		read_unlock(&ipfrag_lock);
+@@ -292,7 +275,7 @@
+ 	if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) {
+ 		struct sk_buff *head = qp->fragments;
+ 		/* Send an ICMP "Fragment Reassembly Timeout" message. */
+-		if ((head->dev = dev_get_by_index(qp->iif)) != NULL) {
++		if ((head->dev = dev_get_by_index(qp->net, qp->iif)) != NULL) {
+ 			icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0);
+ 			dev_put(head->dev);
+ 		}
+@@ -304,7 +287,7 @@
+ 
+ /* Creation primitives. */
+ 
+-static struct ipq *ip_frag_intern(struct ipq *qp_in)
++static struct ipq *ip_frag_intern(struct net *net, struct ipq *qp_in)
+ {
+ 	struct ipq *qp;
+ #ifdef CONFIG_SMP
+@@ -313,14 +296,14 @@
+ 	unsigned int hash;
+ 
+ 	write_lock(&ipfrag_lock);
+-	hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
++	hash = ipqhashfn(net, qp_in->id, qp_in->saddr, qp_in->daddr,
+ 			 qp_in->protocol);
+ #ifdef CONFIG_SMP
+ 	/* With SMP race we have to recheck hash table, because
+ 	 * such entry could be created on other cpu, while we
+ 	 * promoted read lock to write lock.
+ 	 */
+-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
++	hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
+ 		if (qp->id == qp_in->id		&&
+ 		    qp->saddr == qp_in->saddr	&&
+ 		    qp->daddr == qp_in->daddr	&&
+@@ -336,26 +319,27 @@
+ #endif
+ 	qp = qp_in;
+ 
+-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time))
++	if (!mod_timer(&qp->timer, jiffies + net->sysctl_ipfrag_time))
+ 		atomic_inc(&qp->refcnt);
+ 
+ 	atomic_inc(&qp->refcnt);
+-	hlist_add_head(&qp->list, &ipq_hash[hash]);
++	hlist_add_head(&qp->list, &net->ipq_hash[hash]);
+ 	INIT_LIST_HEAD(&qp->lru_list);
+-	list_add_tail(&qp->lru_list, &ipq_lru_list);
+-	ip_frag_nqueues++;
++	list_add_tail(&qp->lru_list, &net->ipq_lru_list);
++	net->ip_frag_nqueues++;
+ 	write_unlock(&ipfrag_lock);
+ 	return qp;
+ }
+ 
+ /* Add an entry to the 'ipq' queue for a newly received IP datagram. */
+-static struct ipq *ip_frag_create(struct iphdr *iph, u32 user)
++static struct ipq *ip_frag_create(struct net *net, struct iphdr *iph, u32 user)
+ {
+ 	struct ipq *qp;
+ 
+-	if ((qp = frag_alloc_queue()) == NULL)
++	if ((qp = frag_alloc_queue(net)) == NULL)
+ 		goto out_nomem;
+ 
++	qp->net = hold_net(net);
+ 	qp->protocol = iph->protocol;
+ 	qp->last_in = 0;
+ 	qp->id = iph->id;
+@@ -366,7 +350,8 @@
+ 	qp->meat = 0;
+ 	qp->fragments = NULL;
+ 	qp->iif = 0;
+-	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
++	qp->peer = net->sysctl_ipfrag_max_dist ?
++		inet_getpeer(net, iph->saddr, 1) : NULL;
+ 
+ 	/* Initialize a timer for this entry. */
+ 	init_timer(&qp->timer);
+@@ -375,7 +360,7 @@
+ 	spin_lock_init(&qp->lock);
+ 	atomic_set(&qp->refcnt, 1);
+ 
+-	return ip_frag_intern(qp);
++	return ip_frag_intern(net, qp);
+ 
+ out_nomem:
+ 	LIMIT_NETDEBUG(KERN_ERR "ip_frag_create: no memory left !\n");
+@@ -385,7 +370,7 @@
+ /* Find the correct entry in the "incomplete datagrams" queue for
+  * this IP datagram, and create new one, if nothing is found.
+  */
+-static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
++static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
+ {
+ 	__be16 id = iph->id;
+ 	__be32 saddr = iph->saddr;
+@@ -396,8 +381,8 @@
+ 	struct hlist_node *n;
+ 
+ 	read_lock(&ipfrag_lock);
+-	hash = ipqhashfn(id, saddr, daddr, protocol);
+-	hlist_for_each_entry(qp, n, &ipq_hash[hash], list) {
++	hash = ipqhashfn(net, id, saddr, daddr, protocol);
++	hlist_for_each_entry(qp, n, &net->ipq_hash[hash], list) {
+ 		if (qp->id == id		&&
+ 		    qp->saddr == saddr	&&
+ 		    qp->daddr == daddr	&&
+@@ -410,14 +395,14 @@
+ 	}
+ 	read_unlock(&ipfrag_lock);
+ 
+-	return ip_frag_create(iph, user);
++	return ip_frag_create(net, iph, user);
+ }
+ 
+ /* Is the fragment too far ahead to be part of ipq? */
+ static inline int ip_frag_too_far(struct ipq *qp)
+ {
+ 	struct inet_peer *peer = qp->peer;
+-	unsigned int max = sysctl_ipfrag_max_dist;
++	unsigned int max = qp->net->sysctl_ipfrag_max_dist;
+ 	unsigned int start, end;
+ 
+ 	int rc;
+@@ -442,7 +427,7 @@
+ {
+ 	struct sk_buff *fp;
+ 
+-	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
++	if (!mod_timer(&qp->timer, jiffies + qp->net->sysctl_ipfrag_time)) {
+ 		atomic_inc(&qp->refcnt);
+ 		return -ETIMEDOUT;
+ 	}
+@@ -450,7 +435,7 @@
+ 	fp = qp->fragments;
+ 	do {
+ 		struct sk_buff *xp = fp->next;
+-		frag_kfree_skb(fp, NULL);
++		frag_kfree_skb(qp->net, fp, NULL);
+ 		fp = xp;
+ 	} while (fp);
+ 
+@@ -466,6 +451,7 @@
+ /* Add new segment to existing queue. */
+ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
+ {
++	struct net *net = qp->net;
+ 	struct sk_buff *prev, *next;
+ 	int flags, offset;
+ 	int ihl, end;
+@@ -576,7 +562,7 @@
+ 				qp->fragments = next;
+ 
+ 			qp->meat -= free_it->len;
+-			frag_kfree_skb(free_it, NULL);
++			frag_kfree_skb(net, free_it, NULL);
+ 		}
+ 	}
+ 
+@@ -594,12 +580,12 @@
+ 	skb->dev = NULL;
+ 	qp->stamp = skb->tstamp;
+ 	qp->meat += skb->len;
+-	atomic_add(skb->truesize, &ip_frag_mem);
++	atomic_add(skb->truesize, &net->ip_frag_mem);
+ 	if (offset == 0)
+ 		qp->last_in |= FIRST_IN;
+ 
+ 	write_lock(&ipfrag_lock);
+-	list_move_tail(&qp->lru_list, &ipq_lru_list);
++	list_move_tail(&qp->lru_list, &net->ipq_lru_list);
+ 	write_unlock(&ipfrag_lock);
+ 
+ 	return;
+@@ -613,6 +599,7 @@
+ 
+ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
+ {
++	struct net *net = qp->net;
+ 	struct iphdr *iph;
+ 	struct sk_buff *fp, *head = qp->fragments;
+ 	int len;
+@@ -654,12 +641,12 @@
+ 		head->len -= clone->len;
+ 		clone->csum = 0;
+ 		clone->ip_summed = head->ip_summed;
+-		atomic_add(clone->truesize, &ip_frag_mem);
++		atomic_add(clone->truesize, &net->ip_frag_mem);
+ 	}
+ 
+ 	skb_shinfo(head)->frag_list = head->next;
+ 	skb_push(head, head->data - skb_network_header(head));
+-	atomic_sub(head->truesize, &ip_frag_mem);
++	atomic_sub(head->truesize, &net->ip_frag_mem);
+ 
+ 	for (fp=head->next; fp; fp = fp->next) {
+ 		head->data_len += fp->len;
+@@ -669,7 +656,7 @@
+ 		else if (head->ip_summed == CHECKSUM_COMPLETE)
+ 			head->csum = csum_add(head->csum, fp->csum);
+ 		head->truesize += fp->truesize;
+-		atomic_sub(fp->truesize, &ip_frag_mem);
++		atomic_sub(fp->truesize, &net->ip_frag_mem);
+ 	}
+ 
+ 	head->next = NULL;
+@@ -700,19 +687,20 @@
+ /* Process an incoming IP datagram fragment. */
+ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct ipq *qp;
+ 	struct net_device *dev;
+ 
+ 	IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
+ 
+ 	/* Start by cleaning up the memory. */
+-	if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
+-		ip_evictor();
++	if (atomic_read(&net->ip_frag_mem) > net->sysctl_ipfrag_high_thresh)
++		ip_evictor(net);
+ 
+ 	dev = skb->dev;
+ 
+ 	/* Lookup (or create) queue header */
+-	if ((qp = ip_find(ip_hdr(skb), user)) != NULL) {
++	if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
+ 		struct sk_buff *ret = NULL;
+ 
+ 		spin_lock(&qp->lock);
+@@ -733,15 +721,70 @@
+ 	return NULL;
+ }
+ 
+-void __init ipfrag_init(void)
++static int ipfrag_net_init(struct net *net)
+ {
+-	ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
++	struct timer_list *secret_timer;
++	int i;
++
++	/* Fragment cache limits. We will commit 256K at one time. Should we
++	 * cross that limit we will prune down to 192K. This should cope with
++	 * even the most extreme cases without allowing an attacker to measurably
++	 * harm machine performance.
++	 */
++	net->sysctl_ipfrag_high_thresh = 256*1024;
++	net->sysctl_ipfrag_low_thresh = 192*1024;
++	net->sysctl_ipfrag_max_dist = 64;
++
++	/* Important NOTE! Fragment queue must be destroyed before MSL expires.
++	 * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
++	 */
++	net->sysctl_ipfrag_time = IP_FRAG_TIME;
++
++	net->sysctl_ipfrag_secret_interval = 10 * 60 * HZ;
++
++	net->ipq_hash = kzalloc(sizeof(*net->ipq_hash)*IPQ_HASHSZ, GFP_KERNEL);
++	if (!net->ipq_hash)
++		return -ENOMEM;
++
++	for (i = 0; i < IPQ_HASHSZ; i++)
++		INIT_HLIST_HEAD(&net->ipq_hash[i]);
++	INIT_LIST_HEAD(&net->ipq_lru_list);
++	net->ip_frag_nqueues = 0;
++	atomic_set(&net->ip_frag_mem, 0);
++
++
++	net->ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ 				 (jiffies ^ (jiffies >> 6)));
+ 
+-	init_timer(&ipfrag_secret_timer);
+-	ipfrag_secret_timer.function = ipfrag_secret_rebuild;
+-	ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
+-	add_timer(&ipfrag_secret_timer);
++	secret_timer = &net->ipfrag_secret_timer;
++	init_timer(secret_timer);
++	secret_timer->function = ipfrag_secret_rebuild;
++	secret_timer->expires = jiffies + net->sysctl_ipfrag_secret_interval;
++	secret_timer->data = (unsigned long)net;
++	add_timer(secret_timer);
++
++	return 0;
++}
++
++static void ipfrag_net_exit(struct net *net)
++{
++	del_timer(&net->ipfrag_secret_timer);
++
++	net->sysctl_ipfrag_low_thresh = 0;
++	while (atomic_read(&net->ip_frag_mem))
++		ip_evictor(net);
++
++	kfree(net->ipq_hash);
++}
++
++static struct pernet_operations ipfrag_net_ops = {
++	.init = ipfrag_net_init,
++	.exit = ipfrag_net_exit,
++};
++
++void ipfrag_init(void)
++{
++	register_pernet_subsys(&ipfrag_net_ops);
+ }
+ 
+ EXPORT_SYMBOL(ip_defrag);
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_gre.c linux-2.6.22-try2-netns/net/ipv4/ip_gre.c
+--- linux-2.6.22-try2/net/ipv4/ip_gre.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_gre.c	2007-12-19 22:49:20.000000000 -0500
+@@ -262,7 +262,7 @@
+ 		int i;
+ 		for (i=1; i<100; i++) {
+ 			sprintf(name, "gre%d", i);
+-			if (__dev_get_by_name(name) == NULL)
++			if (__dev_get_by_name(&init_net, name) == NULL)
+ 				break;
+ 		}
+ 		if (i==100)
+@@ -397,6 +397,9 @@
+ 	struct flowi fl;
+ 	struct rtable *rt;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (p[1] != htons(ETH_P_IP))
+ 		return;
+ 
+@@ -475,6 +478,7 @@
+ 
+ 	/* Try to guess incoming interface */
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.fl4_dst = eiph->saddr;
+ 	fl.fl4_tos = RT_TOS(eiph->tos);
+ 	fl.proto = IPPROTO_GRE;
+@@ -559,6 +563,10 @@
+ 	struct ip_tunnel *tunnel;
+ 	int    offset = 4;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	if (!pskb_may_pull(skb, 16))
+ 		goto drop_nolock;
+ 
+@@ -740,7 +748,8 @@
+ 	}
+ 
+ 	{
+-		struct flowi fl = { .oif = tunnel->parms.link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = tunnel->parms.link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = dst,
+ 						.saddr = tiph->saddr,
+@@ -1095,7 +1104,8 @@
+ 	struct ip_tunnel *t = netdev_priv(dev);
+ 
+ 	if (MULTICAST(t->parms.iph.daddr)) {
+-		struct flowi fl = { .oif = t->parms.link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = t->parms.link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = t->parms.iph.daddr,
+ 						.saddr = t->parms.iph.saddr,
+@@ -1118,7 +1128,7 @@
+ {
+ 	struct ip_tunnel *t = netdev_priv(dev);
+ 	if (MULTICAST(t->parms.iph.daddr) && t->mlink) {
+-		struct in_device *in_dev = inetdev_by_index(t->mlink);
++		struct in_device *in_dev = inetdev_by_index(&init_net, t->mlink);
+ 		if (in_dev) {
+ 			ip_mc_dec_group(in_dev, t->parms.iph.daddr);
+ 			in_dev_put(in_dev);
+@@ -1168,7 +1178,8 @@
+ 	/* Guess output device to choose reasonable mtu and hard_header_len */
+ 
+ 	if (iph->daddr) {
+-		struct flowi fl = { .oif = tunnel->parms.link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = tunnel->parms.link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = iph->daddr,
+ 						.saddr = iph->saddr,
+@@ -1195,7 +1206,7 @@
+ 	}
+ 
+ 	if (!tdev && tunnel->parms.link)
+-		tdev = __dev_get_by_index(tunnel->parms.link);
++		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ 
+ 	if (tdev) {
+ 		hlen = tdev->hard_header_len;
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_input.c linux-2.6.22-try2-netns/net/ipv4/ip_input.c
+--- linux-2.6.22-try2/net/ipv4/ip_input.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -280,6 +280,10 @@
+ 	struct iphdr *iph;
+ 	struct net_device *dev = skb->dev;
+ 
++
++	if (skb->dev->nd_net != &init_net)
++		goto drop;
++
+ 	/* It looks as overkill, because not all
+ 	   IP options require packet mangling.
+ 	   But it is the easiest for now, especially taking
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_options.c linux-2.6.22-try2-netns/net/ipv4/ip_options.c
+--- linux-2.6.22-try2/net/ipv4/ip_options.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_options.c	2007-12-19 22:49:20.000000000 -0500
+@@ -151,7 +151,7 @@
+ 						__be32 addr;
+ 
+ 						memcpy(&addr, sptr+soffset-1, 4);
+-						if (inet_addr_type(addr) != RTN_LOCAL) {
++						if (inet_addr_type(&init_net, addr) != RTN_LOCAL) {
+ 							dopt->ts_needtime = 1;
+ 							soffset += 8;
+ 						}
+@@ -400,7 +400,7 @@
+ 					{
+ 						__be32 addr;
+ 						memcpy(&addr, &optptr[optptr[2]-1], 4);
+-						if (inet_addr_type(addr) == RTN_UNICAST)
++						if (inet_addr_type(&init_net, addr) == RTN_UNICAST)
+ 							break;
+ 						if (skb)
+ 							timeptr = (__be32*)&optptr[optptr[2]+3];
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_output.c linux-2.6.22-try2-netns/net/ipv4/ip_output.c
+--- linux-2.6.22-try2/net/ipv4/ip_output.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_output.c	2007-12-19 22:49:20.000000000 -0500
+@@ -83,8 +83,6 @@
+ #include <linux/netlink.h>
+ #include <linux/tcp.h>
+ 
+-int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
+-
+ /* Generate a checksum for an outgoing IP datagram. */
+ __inline__ void ip_send_check(struct iphdr *iph)
+ {
+@@ -317,7 +315,8 @@
+ 			daddr = opt->faddr;
+ 
+ 		{
+-			struct flowi fl = { .oif = sk->sk_bound_dev_if,
++			struct flowi fl = { .fl_net = sk->sk_net,
++					    .oif = sk->sk_bound_dev_if,
+ 					    .nl_u = { .ip4_u =
+ 						      { .daddr = daddr,
+ 							.saddr = inet->saddr,
+@@ -1352,7 +1351,8 @@
+ 	}
+ 
+ 	{
+-		struct flowi fl = { .oif = arg->bound_dev_if,
++		struct flowi fl = { .fl_net = sk->sk_net,
++				    .oif = arg->bound_dev_if,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = daddr,
+ 						.saddr = rt->rt_spec_dst,
+diff -Nurb linux-2.6.22-try2/net/ipv4/ip_sockglue.c linux-2.6.22-try2-netns/net/ipv4/ip_sockglue.c
+--- linux-2.6.22-try2/net/ipv4/ip_sockglue.c	2007-12-19 13:37:57.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ip_sockglue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -411,6 +411,7 @@
+ static int do_ip_setsockopt(struct sock *sk, int level,
+ 			    int optname, char __user *optval, int optlen)
+ {
++	struct net *net = sk->sk_net;
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	int val=0,err;
+ 
+@@ -596,13 +597,13 @@
+ 				err = 0;
+ 				break;
+ 			}
+-			dev = ip_dev_find(mreq.imr_address.s_addr);
++			dev = ip_dev_find(net, mreq.imr_address.s_addr);
+ 			if (dev) {
+ 				mreq.imr_ifindex = dev->ifindex;
+ 				dev_put(dev);
+ 			}
+ 		} else
+-			dev = __dev_get_by_index(mreq.imr_ifindex);
++			dev = __dev_get_by_index(net, mreq.imr_ifindex);
+ 
+ 
+ 		err = -EADDRNOTAVAIL;
+@@ -956,6 +957,7 @@
+ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
+ 			    char __user *optval, int __user *optlen)
+ {
++	struct net *net = sk->sk_net;
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	int val;
+ 	int len;
+@@ -1023,7 +1025,7 @@
+ 		break;
+ 	case IP_TTL:
+ 		val = (inet->uc_ttl == -1 ?
+-		       sysctl_ip_default_ttl :
++		       net->sysctl_ip_default_ttl :
+ 		       inet->uc_ttl);
+ 		break;
+ 	case IP_HDRINCL:
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipcomp.c linux-2.6.22-try2-netns/net/ipv4/ipcomp.c
+--- linux-2.6.22-try2/net/ipv4/ipcomp.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipcomp.c	2007-12-19 22:49:20.000000000 -0500
+@@ -175,6 +175,9 @@
+ 	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
+ 	struct xfrm_state *x;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
+ 	    icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
+ 		return;
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipconfig.c linux-2.6.22-try2-netns/net/ipv4/ipconfig.c
+--- linux-2.6.22-try2/net/ipv4/ipconfig.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipconfig.c	2007-12-19 22:49:20.000000000 -0500
+@@ -59,6 +59,7 @@
+ #include <net/ip.h>
+ #include <net/ipconfig.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/uaccess.h>
+ #include <net/checksum.h>
+@@ -184,16 +185,18 @@
+ 	struct ic_device *d, **last;
+ 	struct net_device *dev;
+ 	unsigned short oflags;
++	struct net_device *lo;
+ 
+ 	last = &ic_first_dev;
+ 	rtnl_lock();
+ 
+ 	/* bring loopback device up first */
+-	if (dev_change_flags(&loopback_dev, loopback_dev.flags | IFF_UP) < 0)
+-		printk(KERN_ERR "IP-Config: Failed to open %s\n", loopback_dev.name);
++	lo = &init_net.loopback_dev;
++	if (dev_change_flags(lo, lo->flags | IFF_UP) < 0)
++		printk(KERN_ERR "IP-Config: Failed to open %s\n", lo->name);
+ 
+-	for_each_netdev(dev) {
+-		if (dev == &loopback_dev)
++	for_each_netdev(&init_net, dev) {
++		if (dev == lo)
+ 			continue;
+ 		if (user_dev_name[0] ? !strcmp(dev->name, user_dev_name) :
+ 		    (!(dev->flags & IFF_LOOPBACK) &&
+@@ -283,7 +286,7 @@
+ 
+ 	mm_segment_t oldfs = get_fs();
+ 	set_fs(get_ds());
+-	res = devinet_ioctl(cmd, (struct ifreq __user *) arg);
++	res = devinet_ioctl(&init_net, cmd, (struct ifreq __user *) arg);
+ 	set_fs(oldfs);
+ 	return res;
+ }
+@@ -294,7 +297,7 @@
+ 
+ 	mm_segment_t oldfs = get_fs();
+ 	set_fs(get_ds());
+-	res = ip_rt_ioctl(cmd, (void __user *) arg);
++	res = ip_rt_ioctl(&init_net, cmd, (void __user *) arg);
+ 	set_fs(oldfs);
+ 	return res;
+ }
+@@ -425,6 +428,9 @@
+ 	unsigned char *sha, *tha;		/* s for "source", t for "target" */
+ 	struct ic_device *d;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ 		return NET_RX_DROP;
+ 
+@@ -834,6 +840,9 @@
+ 	struct ic_device *d;
+ 	int len, ext_len;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	/* Perform verifications before taking the lock.  */
+ 	if (skb->pkt_type == PACKET_OTHERHOST)
+ 		goto drop;
+@@ -1253,7 +1262,7 @@
+ 	__be32 addr;
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("pnp", S_IRUGO, &pnp_seq_fops);
++	proc_net_fops_create(&init_net, "pnp", S_IRUGO, &pnp_seq_fops);
+ #endif /* CONFIG_PROC_FS */
+ 
+ 	if (!ic_enable)
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipip.c linux-2.6.22-try2-netns/net/ipv4/ipip.c
+--- linux-2.6.22-try2/net/ipv4/ipip.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipip.c	2007-12-19 22:49:20.000000000 -0500
+@@ -225,7 +225,7 @@
+ 		int i;
+ 		for (i=1; i<100; i++) {
+ 			sprintf(name, "tunl%d", i);
+-			if (__dev_get_by_name(name) == NULL)
++			if (__dev_get_by_name(&init_net, name) == NULL)
+ 				break;
+ 		}
+ 		if (i==100)
+@@ -403,6 +403,7 @@
+ 
+ 	/* Try to guess incoming interface */
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.fl4_daddr = eiph->saddr;
+ 	fl.fl4_tos = RT_TOS(eiph->tos);
+ 	fl.proto = IPPROTO_IPIP;
+@@ -542,7 +543,8 @@
+ 	}
+ 
+ 	{
+-		struct flowi fl = { .oif = tunnel->parms.link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = tunnel->parms.link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = dst,
+ 						.saddr = tiph->saddr,
+@@ -806,7 +808,8 @@
+ 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+ 
+ 	if (iph->daddr) {
+-		struct flowi fl = { .oif = tunnel->parms.link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = tunnel->parms.link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = iph->daddr,
+ 						.saddr = iph->saddr,
+@@ -821,7 +824,7 @@
+ 	}
+ 
+ 	if (!tdev && tunnel->parms.link)
+-		tdev = __dev_get_by_index(tunnel->parms.link);
++		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ 
+ 	if (tdev) {
+ 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipmr.c linux-2.6.22-try2-netns/net/ipv4/ipmr.c
+--- linux-2.6.22-try2/net/ipv4/ipmr.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipmr.c	2007-12-19 22:49:20.000000000 -0500
+@@ -62,6 +62,7 @@
+ #include <linux/netfilter_ipv4.h>
+ #include <net/ipip.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ #include <net/netlink.h>
+ 
+ #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+@@ -124,7 +125,7 @@
+ {
+ 	struct net_device  *dev;
+ 
+-	dev = __dev_get_by_name("tunl0");
++	dev = __dev_get_by_name(&init_net, "tunl0");
+ 
+ 	if (dev) {
+ 		int err;
+@@ -148,7 +149,7 @@
+ 
+ 		dev = NULL;
+ 
+-		if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
++		if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) {
+ 			dev->flags |= IFF_MULTICAST;
+ 
+ 			in_dev = __in_dev_get_rtnl(dev);
+@@ -320,7 +321,7 @@
+ 			e->error = -ETIMEDOUT;
+ 			memset(&e->msg, 0, sizeof(e->msg));
+ 
+-			rtnl_unicast(skb, NETLINK_CB(skb).pid);
++			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ 		} else
+ 			kfree_skb(skb);
+ 	}
+@@ -422,7 +423,7 @@
+ 			return -ENOBUFS;
+ 		break;
+ 	case 0:
+-		dev = ip_dev_find(vifc->vifc_lcl_addr.s_addr);
++		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
+ 		if (!dev)
+ 			return -EADDRNOTAVAIL;
+ 		dev_put(dev);
+@@ -532,7 +533,7 @@
+ 				memset(&e->msg, 0, sizeof(e->msg));
+ 			}
+ 
+-			rtnl_unicast(skb, NETLINK_CB(skb).pid);
++			rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
+ 		} else
+ 			ip_mr_forward(skb, c, 0);
+ 	}
+@@ -848,7 +849,7 @@
+ {
+ 	rtnl_lock();
+ 	if (sk == mroute_socket) {
+-		IPV4_DEVCONF_ALL(MC_FORWARDING)--;
++		IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)--;
+ 
+ 		write_lock_bh(&mrt_lock);
+ 		mroute_socket=NULL;
+@@ -897,7 +898,7 @@
+ 			mroute_socket=sk;
+ 			write_unlock_bh(&mrt_lock);
+ 
+-			IPV4_DEVCONF_ALL(MC_FORWARDING)++;
++			IPV4_DEVCONF_ALL(sk->sk_net, MC_FORWARDING)++;
+ 		}
+ 		rtnl_unlock();
+ 		return ret;
+@@ -1082,13 +1083,18 @@
+ 
+ static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
++	struct net_device *dev = ptr;
+ 	struct vif_device *v;
+ 	int ct;
++
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event != NETDEV_UNREGISTER)
+ 		return NOTIFY_DONE;
+ 	v=&vif_table[0];
+ 	for (ct=0;ct<maxvif;ct++,v++) {
+-		if (v->dev==ptr)
++		if (v->dev==dev)
+ 			vif_delete(ct);
+ 	}
+ 	return NOTIFY_DONE;
+@@ -1171,7 +1177,8 @@
+ #endif
+ 
+ 	if (vif->flags&VIFF_TUNNEL) {
+-		struct flowi fl = { .oif = vif->link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = vif->link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = vif->remote,
+ 						.saddr = vif->local,
+@@ -1181,7 +1188,8 @@
+ 			goto out_free;
+ 		encap = sizeof(struct iphdr);
+ 	} else {
+-		struct flowi fl = { .oif = vif->link,
++		struct flowi fl = { .fl_net = &init_net,
++				    .oif = vif->link,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = iph->daddr,
+ 						.tos = RT_TOS(iph->tos) } },
+@@ -1498,6 +1506,10 @@
+ 	struct iphdr   *encap;
+ 	struct net_device  *reg_dev = NULL;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
+ 		goto drop;
+ 
+@@ -1922,7 +1934,7 @@
+ 	ipmr_expire_timer.function=ipmr_expire_process;
+ 	register_netdevice_notifier(&ip_mr_notifier);
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("ip_mr_vif", 0, &ipmr_vif_fops);
+-	proc_net_fops_create("ip_mr_cache", 0, &ipmr_mfc_fops);
++	proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops);
++	proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops);
+ #endif
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_app.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_app.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_app.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_app.c	2007-12-19 22:49:20.000000000 -0500
+@@ -32,6 +32,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/mutex.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/ip_vs.h>
+ 
+@@ -616,12 +617,12 @@
+ int ip_vs_app_init(void)
+ {
+ 	/* we will replace it with proc_net_ipvs_create() soon */
+-	proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
++	proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
+ 	return 0;
+ }
+ 
+ 
+ void ip_vs_app_cleanup(void)
+ {
+-	proc_net_remove("ip_vs_app");
++	proc_net_remove(&init_net, "ip_vs_app");
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_conn.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_conn.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_conn.c	2007-12-19 22:49:20.000000000 -0500
+@@ -34,6 +34,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/jhash.h>
+ #include <linux/random.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/ip_vs.h>
+ 
+@@ -922,7 +923,7 @@
+ 		rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
+ 	}
+ 
+-	proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
++	proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
+ 
+ 	/* calculate the random value for connection hash */
+ 	get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
+@@ -938,6 +939,6 @@
+ 
+ 	/* Release the empty cache */
+ 	kmem_cache_destroy(ip_vs_conn_cachep);
+-	proc_net_remove("ip_vs_conn");
++	proc_net_remove(&init_net, "ip_vs_conn");
+ 	vfree(ip_vs_conn_tab);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_core.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_core.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_core.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_core.c	2007-12-19 22:49:20.000000000 -0500
+@@ -460,7 +460,7 @@
+ 	   and the destination is RTN_UNICAST (and not local), then create
+ 	   a cache_bypass connection entry */
+ 	if (sysctl_ip_vs_cache_bypass && svc->fwmark
+-	    && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
++	    && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
+ 		int ret, cs;
+ 		struct ip_vs_conn *cp;
+ 
+@@ -530,6 +530,10 @@
+ 				       const struct net_device *out,
+ 				       int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (!((*pskb)->ipvs_property))
+ 		return NF_ACCEPT;
+ 	/* The packet was sent from IPVS, exit this chain */
+@@ -734,6 +738,10 @@
+ 	struct ip_vs_conn *cp;
+ 	int ihl;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	EnterFunction(11);
+ 
+ 	if (skb->ipvs_property)
+@@ -818,7 +826,7 @@
+ 	 * if it came from this machine itself.  So re-compute
+ 	 * the routing information.
+ 	 */
+-	if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
++	if (ip_route_me_harder(&init_net, pskb, RTN_LOCAL) != 0)
+ 		goto drop;
+ 	skb = *pskb;
+ 
+@@ -956,12 +964,16 @@
+ 	int ret, restart;
+ 	int ihl;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/*
+ 	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
+ 	 *	... don't know why 1st test DOES NOT include 2nd (?)
+ 	 */
+ 	if (unlikely(skb->pkt_type != PACKET_HOST
+-		     || skb->dev == &loopback_dev || skb->sk)) {
++		     || skb->dev == &init_net.loopback_dev || skb->sk)) {
+ 		IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
+ 			  skb->pkt_type,
+ 			  ip_hdr(skb)->protocol,
+@@ -1062,6 +1074,10 @@
+ {
+ 	int r;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP)
+ 		return NF_ACCEPT;
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_ctl.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_ctl.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_ctl.c	2007-12-19 22:49:20.000000000 -0500
+@@ -39,6 +39,7 @@
+ #include <net/ip.h>
+ #include <net/route.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -679,7 +680,7 @@
+ 	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
+ 
+ 	/* check if local node and update the flags */
+-	if (inet_addr_type(udest->addr) == RTN_LOCAL) {
++	if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
+ 		conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
+ 			| IP_VS_CONN_F_LOCALNODE;
+ 	}
+@@ -731,7 +732,7 @@
+ 
+ 	EnterFunction(2);
+ 
+-	atype = inet_addr_type(udest->addr);
++	atype = inet_addr_type(&init_net, udest->addr);
+ 	if (atype != RTN_LOCAL && atype != RTN_UNICAST)
+ 		return -EINVAL;
+ 
+@@ -1932,6 +1933,9 @@
+ 	struct ip_vs_service *svc;
+ 	struct ip_vs_dest_user *udest;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -2196,6 +2200,9 @@
+ 	unsigned char arg[128];
+ 	int ret = 0;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -2356,8 +2363,8 @@
+ 		return ret;
+ 	}
+ 
+-	proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
+-	proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
++	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
++	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
+ 
+ 	sysctl_header = register_sysctl_table(vs_root_table);
+ 
+@@ -2390,8 +2397,8 @@
+ 	cancel_work_sync(&defense_work.work);
+ 	ip_vs_kill_estimator(&ip_vs_stats);
+ 	unregister_sysctl_table(sysctl_header);
+-	proc_net_remove("ip_vs_stats");
+-	proc_net_remove("ip_vs");
++	proc_net_remove(&init_net, "ip_vs_stats");
++	proc_net_remove(&init_net, "ip_vs");
+ 	nf_unregister_sockopt(&ip_vs_sockopts);
+ 	LeaveFunction(2);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_lblcr.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_lblcr.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_lblcr.c	2007-12-19 22:49:20.000000000 -0500
+@@ -843,7 +843,7 @@
+ 	INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
+ 	sysctl_header = register_sysctl_table(lblcr_root_table);
+ #ifdef CONFIG_IP_VS_LBLCR_DEBUG
+-	proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
++	proc_net_create(&init_net, "ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
+ #endif
+ 	return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+ }
+@@ -852,7 +852,7 @@
+ static void __exit ip_vs_lblcr_cleanup(void)
+ {
+ #ifdef CONFIG_IP_VS_LBLCR_DEBUG
+-	proc_net_remove("ip_vs_lblcr");
++	proc_net_remove(&init_net, "ip_vs_lblcr");
+ #endif
+ 	unregister_sysctl_table(sysctl_header);
+ 	unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_sync.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_sync.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_sync.c	2007-12-19 22:49:20.000000000 -0500
+@@ -387,7 +387,7 @@
+ 	struct net_device *dev;
+ 	struct inet_sock *inet = inet_sk(sk);
+ 
+-	if ((dev = __dev_get_by_name(ifname)) == NULL)
++	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+@@ -412,7 +412,7 @@
+ 	int num;
+ 
+ 	if (sync_state == IP_VS_STATE_MASTER) {
+-		if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
++		if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ 			return -ENODEV;
+ 
+ 		num = (dev->mtu - sizeof(struct iphdr) -
+@@ -423,7 +423,7 @@
+ 		IP_VS_DBG(7, "setting the maximum length of sync sending "
+ 			  "message %d.\n", sync_send_mesg_maxlen);
+ 	} else if (sync_state == IP_VS_STATE_BACKUP) {
+-		if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
++		if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ 			return -ENODEV;
+ 
+ 		sync_recv_mesg_maxlen = dev->mtu -
+@@ -451,7 +451,7 @@
+ 	memset(&mreq, 0, sizeof(mreq));
+ 	memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
+ 
+-	if ((dev = __dev_get_by_name(ifname)) == NULL)
++	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ 		return -ENODEV;
+ 	if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
+ 		return -EINVAL;
+@@ -472,7 +472,7 @@
+ 	__be32 addr;
+ 	struct sockaddr_in sin;
+ 
+-	if ((dev = __dev_get_by_name(ifname)) == NULL)
++	if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ 		return -ENODEV;
+ 
+ 	addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
+diff -Nurb linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_xmit.c
+--- linux-2.6.22-try2/net/ipv4/ipvs/ip_vs_xmit.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/ipvs/ip_vs_xmit.c	2007-12-19 22:49:20.000000000 -0500
+@@ -70,6 +70,7 @@
+ 		if (!(rt = (struct rtable *)
+ 		      __ip_vs_dst_check(dest, rtos, 0))) {
+ 			struct flowi fl = {
++				.fl_net = &init_net,
+ 				.oif = 0,
+ 				.nl_u = {
+ 					.ip4_u = {
+@@ -93,6 +94,7 @@
+ 		spin_unlock(&dest->dst_lock);
+ 	} else {
+ 		struct flowi fl = {
++			.fl_net = &init_net,
+ 			.oif = 0,
+ 			.nl_u = {
+ 				.ip4_u = {
+@@ -160,6 +162,7 @@
+ 	u8     tos = iph->tos;
+ 	int    mtu;
+ 	struct flowi fl = {
++		.fl_net = &init_net,
+ 		.oif = 0,
+ 		.nl_u = {
+ 			.ip4_u = {
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/arp_tables.c linux-2.6.22-try2-netns/net/ipv4/netfilter/arp_tables.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/arp_tables.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/arp_tables.c	2007-12-19 22:49:20.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <net/sock.h>
+ 
+ #include <asm/uaccess.h>
+ #include <linux/mutex.h>
+@@ -773,7 +774,7 @@
+ 	int ret;
+ 	struct arpt_table *t;
+ 
+-	t = xt_find_table_lock(NF_ARP, entries->name);
++	t = xt_find_table_lock(&init_net, NF_ARP, entries->name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct xt_table_info *private = t->private;
+ 		duprintf("t->private->number = %u\n",
+@@ -843,7 +844,7 @@
+ 
+ 	duprintf("arp_tables: Translated table\n");
+ 
+-	t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
++	t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, tmp.name),
+ 				    "arptable_%s", tmp.name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -936,7 +937,7 @@
+ 		goto free;
+ 	}
+ 
+-	t = xt_find_table_lock(NF_ARP, tmp.name);
++	t = xt_find_table_lock(&init_net, NF_ARP, tmp.name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -971,6 +972,9 @@
+ {
+ 	int ret;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -995,6 +999,9 @@
+ {
+ 	int ret;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -1016,7 +1023,7 @@
+ 		}
+ 		name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+ 
+-		t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
++		t = try_then_request_module(xt_find_table_lock(&init_net, NF_ARP, name),
+ 					    "arptable_%s", name);
+ 		if (t && !IS_ERR(t)) {
+ 			struct arpt_getinfo info;
+@@ -1116,7 +1123,7 @@
+ 		return ret;
+ 	}
+ 
+-	ret = xt_register_table(table, &bootstrap, newinfo);
++	ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
+ 	if (ret != 0) {
+ 		xt_free_table_info(newinfo);
+ 		return ret;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/arptable_filter.c linux-2.6.22-try2-netns/net/ipv4/netfilter/arptable_filter.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/arptable_filter.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/arptable_filter.c	2007-12-19 22:49:20.000000000 -0500
+@@ -61,6 +61,10 @@
+ 			      const struct net_device *out,
+ 			      int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return arpt_do_table(pskb, hook, in, out, &packet_filter);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ip_queue.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_queue.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ip_queue.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_queue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <linux/mutex.h>
+ #include <net/sock.h>
+ #include <net/route.h>
++#include <net/net_namespace.h>
+ 
+ #define IPQ_QMAX_DEFAULT 1024
+ #define IPQ_PROC_FS_NAME "ip_queue"
+@@ -556,6 +557,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* Drop any packets associated with the downed device */
+ 	if (event == NETDEV_DOWN)
+ 		ipq_dev_drop(dev->ifindex);
+@@ -575,7 +579,7 @@
+ 	if (event == NETLINK_URELEASE &&
+ 	    n->protocol == NETLINK_FIREWALL && n->pid) {
+ 		write_lock_bh(&queue_lock);
+-		if (n->pid == peer_pid)
++		if ((n->net == &init_net) && (n->pid == peer_pid))
+ 			__ipq_reset();
+ 		write_unlock_bh(&queue_lock);
+ 	}
+@@ -667,14 +671,14 @@
+ 	struct proc_dir_entry *proc;
+ 
+ 	netlink_register_notifier(&ipq_nl_notifier);
+-	ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
+-				      NULL, THIS_MODULE);
++	ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
++				      ipq_rcv_sk, NULL, THIS_MODULE);
+ 	if (ipqnl == NULL) {
+ 		printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
+ 		goto cleanup_netlink_notifier;
+ 	}
+ 
+-	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
++	proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ 	if (proc)
+ 		proc->owner = THIS_MODULE;
+ 	else {
+@@ -695,8 +699,7 @@
+ cleanup_sysctl:
+ 	unregister_sysctl_table(ipq_sysctl_header);
+ 	unregister_netdevice_notifier(&ipq_dev_notifier);
+-	proc_net_remove(IPQ_PROC_FS_NAME);
+-
++	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+ cleanup_ipqnl:
+ 	sock_release(ipqnl->sk_socket);
+ 	mutex_lock(&ipqnl_mutex);
+@@ -715,7 +718,7 @@
+ 
+ 	unregister_sysctl_table(ipq_sysctl_header);
+ 	unregister_netdevice_notifier(&ipq_dev_notifier);
+-	proc_net_remove(IPQ_PROC_FS_NAME);
++	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+ 
+ 	sock_release(ipqnl->sk_socket);
+ 	mutex_lock(&ipqnl_mutex);
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ip_tables.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_tables.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ip_tables.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ip_tables.c	2007-12-19 22:49:20.000000000 -0500
+@@ -1039,7 +1039,7 @@
+ }
+ #endif
+ 
+-static int get_info(void __user *user, int *len, int compat)
++static int get_info(struct net *net, void __user *user, int *len, int compat)
+ {
+ 	char name[IPT_TABLE_MAXNAMELEN];
+ 	struct xt_table *t;
+@@ -1059,7 +1059,7 @@
+ 	if (compat)
+ 		xt_compat_lock(AF_INET);
+ #endif
+-	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ 			"iptable_%s", name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct ipt_getinfo info;
+@@ -1099,7 +1099,7 @@
+ }
+ 
+ static int
+-get_entries(struct ipt_get_entries __user *uptr, int *len)
++get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
+ {
+ 	int ret;
+ 	struct ipt_get_entries get;
+@@ -1119,7 +1119,7 @@
+ 		return -EINVAL;
+ 	}
+ 
+-	t = xt_find_table_lock(AF_INET, get.name);
++	t = xt_find_table_lock(net, AF_INET, get.name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct xt_table_info *private = t->private;
+ 		duprintf("t->private->number = %u\n",
+@@ -1142,7 +1142,7 @@
+ }
+ 
+ static int
+-__do_replace(const char *name, unsigned int valid_hooks,
++__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ 		struct xt_table_info *newinfo, unsigned int num_counters,
+ 		void __user *counters_ptr)
+ {
+@@ -1159,7 +1159,7 @@
+ 		goto out;
+ 	}
+ 
+-	t = try_then_request_module(xt_find_table_lock(AF_INET, name),
++	t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ 				    "iptable_%s", name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -1211,7 +1211,7 @@
+ }
+ 
+ static int
+-do_replace(void __user *user, unsigned int len)
++do_replace(struct net *net, void __user *user, unsigned int len)
+ {
+ 	int ret;
+ 	struct ipt_replace tmp;
+@@ -1252,7 +1252,7 @@
+ 
+ 	duprintf("ip_tables: Translated table\n");
+ 
+-	ret = __do_replace(tmp.name, tmp.valid_hooks,
++	ret = __do_replace(net, tmp.name, tmp.valid_hooks,
+ 			      newinfo, tmp.num_counters,
+ 			      tmp.counters);
+ 	if (ret)
+@@ -1289,7 +1289,7 @@
+ }
+ 
+ static int
+-do_add_counters(void __user *user, unsigned int len, int compat)
++do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
+ {
+ 	unsigned int i;
+ 	struct xt_counters_info tmp;
+@@ -1341,7 +1341,7 @@
+ 		goto free;
+ 	}
+ 
+-	t = xt_find_table_lock(AF_INET, name);
++	t = xt_find_table_lock(net, AF_INET, name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1745,7 +1745,7 @@
+ }
+ 
+ static int
+-compat_do_replace(void __user *user, unsigned int len)
++compat_do_replace(struct net *net, void __user *user, unsigned int len)
+ {
+ 	int ret;
+ 	struct compat_ipt_replace tmp;
+@@ -1786,7 +1786,7 @@
+ 
+ 	duprintf("compat_do_replace: Translated table\n");
+ 
+-	ret = __do_replace(tmp.name, tmp.valid_hooks,
++	ret = __do_replace(net, tmp.name, tmp.valid_hooks,
+ 			      newinfo, tmp.num_counters,
+ 			      compat_ptr(tmp.counters));
+ 	if (ret)
+@@ -1811,11 +1811,11 @@
+ 
+ 	switch (cmd) {
+ 	case IPT_SO_SET_REPLACE:
+-		ret = compat_do_replace(user, len);
++		ret = compat_do_replace(sk->sk_net, user, len);
+ 		break;
+ 
+ 	case IPT_SO_SET_ADD_COUNTERS:
+-		ret = do_add_counters(user, len, 1);
++		ret = do_add_counters(sk->sk_net, user, len, 1);
+ 		break;
+ 
+ 	default:
+@@ -1904,7 +1904,7 @@
+ }
+ 
+ static int
+-compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len)
++compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len)
+ {
+ 	int ret;
+ 	struct compat_ipt_get_entries get;
+@@ -1928,7 +1928,7 @@
+ 	}
+ 
+ 	xt_compat_lock(AF_INET);
+-	t = xt_find_table_lock(AF_INET, get.name);
++	t = xt_find_table_lock(net, AF_INET, get.name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct xt_table_info *private = t->private;
+ 		struct xt_table_info info;
+@@ -1966,10 +1966,10 @@
+ 
+ 	switch (cmd) {
+ 	case IPT_SO_GET_INFO:
+-		ret = get_info(user, len, 1);
++		ret = get_info(sk->sk_net, user, len, 1);
+ 		break;
+ 	case IPT_SO_GET_ENTRIES:
+-		ret = compat_get_entries(user, len);
++		ret = compat_get_entries(sk->sk_net, user, len);
+ 		break;
+ 	default:
+ 		ret = do_ipt_get_ctl(sk, cmd, user, len);
+@@ -1988,11 +1988,11 @@
+ 
+ 	switch (cmd) {
+ 	case IPT_SO_SET_REPLACE:
+-		ret = do_replace(user, len);
++		ret = do_replace(sk->sk_net, user, len);
+ 		break;
+ 
+ 	case IPT_SO_SET_ADD_COUNTERS:
+-		ret = do_add_counters(user, len, 0);
++		ret = do_add_counters(sk->sk_net, user, len, 0);
+ 		break;
+ 
+ 	default:
+@@ -2013,11 +2013,11 @@
+ 
+ 	switch (cmd) {
+ 	case IPT_SO_GET_INFO:
+-		ret = get_info(user, len, 0);
++		ret = get_info(sk->sk_net, user, len, 0);
+ 		break;
+ 
+ 	case IPT_SO_GET_ENTRIES:
+-		ret = get_entries(user, len);
++		ret = get_entries(sk->sk_net, user, len);
+ 		break;
+ 
+ 	case IPT_SO_GET_REVISION_MATCH:
+@@ -2054,7 +2054,7 @@
+ 	return ret;
+ }
+ 
+-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
++int ipt_register_table(struct net *net, struct xt_table *table, const struct ipt_replace *repl)
+ {
+ 	int ret;
+ 	struct xt_table_info *newinfo;
+@@ -2082,7 +2082,7 @@
+ 		return ret;
+ 	}
+ 
+-	ret = xt_register_table(table, &bootstrap, newinfo);
++	ret = xt_register_table(net, table, &bootstrap, newinfo);
+ 	if (ret != 0) {
+ 		xt_free_table_info(newinfo);
+ 		return ret;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_CLUSTERIP.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_CLUSTERIP.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_CLUSTERIP.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_CLUSTERIP.c	2007-12-19 22:49:20.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/checksum.h>
++#include <net/net_namespace.h>
+ 
+ #define CLUSTERIP_VERSION "0.8"
+ 
+@@ -427,7 +428,7 @@
+ 				return 0;
+ 			}
+ 
+-			dev = dev_get_by_name(e->ip.iniface);
++			dev = dev_get_by_name(&init_net, e->ip.iniface);
+ 			if (!dev) {
+ 				printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+ 				return 0;
+@@ -523,6 +524,10 @@
+ 	struct arp_payload *payload;
+ 	struct clusterip_config *c;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* we don't care about non-ethernet and non-ipv4 ARP */
+ 	if (arp->ar_hrd != htons(ARPHRD_ETHER)
+ 	    || arp->ar_pro != htons(ETH_P_IP)
+@@ -735,7 +740,7 @@
+ 		goto cleanup_target;
+ 
+ #ifdef CONFIG_PROC_FS
+-	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
++	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
+ 	if (!clusterip_procdir) {
+ 		printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+ 		ret = -ENOMEM;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_MASQUERADE.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_MASQUERADE.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_MASQUERADE.c	2007-12-19 22:49:20.000000000 -0500
+@@ -131,6 +131,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_DOWN) {
+ 		/* Device was downed.  Search entire table for
+ 		   conntracks which were associated with that device,
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_REJECT.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_REJECT.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_REJECT.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_REJECT.c	2007-12-19 22:49:20.000000000 -0500
+@@ -137,7 +137,7 @@
+ 	   )
+ 		addr_type = RTN_LOCAL;
+ 
+-	if (ip_route_me_harder(&nskb, addr_type))
++	if (ip_route_me_harder(&init_net, &nskb, addr_type))
+ 		goto free_nskb;
+ 
+ 	nskb->ip_summed = CHECKSUM_NONE;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_ULOG.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_ULOG.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_ULOG.c	2007-12-19 22:49:20.000000000 -0500
+@@ -419,7 +419,8 @@
+ 	for (i = 0; i < ULOG_MAXNLGROUPS; i++)
+ 		setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+ 
+-	nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
++	nflognl = netlink_kernel_create(&init_net,
++					NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
+ 					NULL, THIS_MODULE);
+ 	if (!nflognl)
+ 		return -ENOMEM;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_addrtype.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_addrtype.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_addrtype.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_addrtype.c	2007-12-19 22:49:20.000000000 -0500
+@@ -24,7 +24,7 @@
+ 
+ static inline int match_type(__be32 addr, u_int16_t mask)
+ {
+-	return !!(mask & (1 << inet_addr_type(addr)));
++	return !!(mask & (1 << inet_addr_type(&init_net, addr)));
+ }
+ 
+ static int match(const struct sk_buff *skb,
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/ipt_recent.c linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_recent.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/ipt_recent.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/ipt_recent.c	2007-12-19 22:49:20.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/bitops.h>
+ #include <linux/skbuff.h>
+ #include <linux/inet.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ipt_recent.h>
+@@ -485,7 +486,7 @@
+ #ifdef CONFIG_PROC_FS
+ 	if (err)
+ 		return err;
+-	proc_dir = proc_mkdir("ipt_recent", proc_net);
++	proc_dir = proc_mkdir("ipt_recent", init_net.proc_net);
+ 	if (proc_dir == NULL) {
+ 		xt_unregister_match(&recent_match);
+ 		err = -ENOMEM;
+@@ -499,7 +500,7 @@
+ 	BUG_ON(!list_empty(&tables));
+ 	xt_unregister_match(&recent_match);
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("ipt_recent", proc_net);
++	remove_proc_entry("ipt_recent", init_net.proc_net);
+ #endif
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_filter.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_filter.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/iptable_filter.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_filter.c	2007-12-19 22:49:20.000000000 -0500
+@@ -26,7 +26,7 @@
+ 	struct ipt_replace repl;
+ 	struct ipt_standard entries[3];
+ 	struct ipt_error term;
+-} initial_table __initdata = {
++} initial_table = {
+ 	.repl = {
+ 		.name = "filter",
+ 		.valid_hooks = FILTER_VALID_HOOKS,
+@@ -51,7 +51,7 @@
+ 	.term = IPT_ERROR_INIT,			/* ERROR */
+ };
+ 
+-static struct xt_table packet_filter = {
++static struct xt_table ip_packet_filter_dflt = {
+ 	.name		= "filter",
+ 	.valid_hooks	= FILTER_VALID_HOOKS,
+ 	.lock		= RW_LOCK_UNLOCKED,
+@@ -67,7 +67,9 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
++	struct net *net = (in?in:out)->nd_net;
++
++	return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
+ }
+ 
+ static unsigned int
+@@ -77,6 +79,8 @@
+ 		   const struct net_device *out,
+ 		   int (*okfn)(struct sk_buff *))
+ {
++	struct net *net = (in?in:out)->nd_net;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr)
+ 	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -86,7 +90,7 @@
+ 		return NF_ACCEPT;
+ 	}
+ 
+-	return ipt_do_table(pskb, hook, in, out, &packet_filter);
++	return ipt_do_table(pskb, hook, in, out, net->ip_packet_filter);
+ }
+ 
+ static struct nf_hook_ops ipt_ops[] = {
+@@ -117,6 +121,30 @@
+ static int forward = NF_ACCEPT;
+ module_param(forward, bool, 0000);
+ 
++static int iptable_filter_net_init(struct net *net)
++{
++	/* Allocate the table */
++	net->ip_packet_filter = kmemdup(&ip_packet_filter_dflt,
++					sizeof(*net->ip_packet_filter),
++					GFP_KERNEL);
++	if (!net->ip_packet_filter)
++		return -ENOMEM;
++
++	/* Register table */
++	return ipt_register_table(net, net->ip_packet_filter, &initial_table.repl);
++}
++
++static void iptable_filter_net_exit(struct net *net)
++{
++	ipt_unregister_table(net->ip_packet_filter);
++	kfree(net->ip_packet_filter);
++}
++
++static struct pernet_operations iptable_filter_net_ops = {
++	.init = iptable_filter_net_init,
++	.exit = iptable_filter_net_exit,
++};
++
+ static int __init iptable_filter_init(void)
+ {
+ 	int ret;
+@@ -130,7 +158,7 @@
+ 	initial_table.entries[1].target.verdict = -forward - 1;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_filter, &initial_table.repl);
++	ret = register_pernet_subsys(&iptable_filter_net_ops);
+ 	if (ret < 0)
+ 		return ret;
+ 
+@@ -142,14 +170,14 @@
+ 	return ret;
+ 
+  cleanup_table:
+-	ipt_unregister_table(&packet_filter);
++	unregister_pernet_subsys(&iptable_filter_net_ops);
+ 	return ret;
+ }
+ 
+ static void __exit iptable_filter_fini(void)
+ {
+ 	nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+-	ipt_unregister_table(&packet_filter);
++	unregister_pernet_subsys(&iptable_filter_net_ops);
+ }
+ 
+ module_init(iptable_filter_init);
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_mangle.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_mangle.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/iptable_mangle.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_mangle.c	2007-12-19 22:49:20.000000000 -0500
+@@ -80,6 +80,10 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ipt_do_table(pskb, hook, in, out, &packet_mangler);
+ }
+ 
+@@ -96,6 +100,10 @@
+ 	__be32 saddr, daddr;
+ 	u_int32_t mark;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr)
+ 	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -121,7 +129,7 @@
+ 		    iph->daddr != daddr ||
+ 		    (*pskb)->mark != mark ||
+ 		    iph->tos != tos)
+-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
++			if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
+ 				ret = NF_DROP;
+ 	}
+ 
+@@ -171,7 +179,7 @@
+ 	int ret;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_mangler, &initial_table.repl);
++	ret = ipt_register_table(&init_net, &packet_mangler, &initial_table.repl);
+ 	if (ret < 0)
+ 		return ret;
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/iptable_raw.c linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_raw.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/iptable_raw.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/iptable_raw.c	2007-12-19 22:49:20.000000000 -0500
+@@ -52,6 +52,10 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ipt_do_table(pskb, hook, in, out, &packet_raw);
+ }
+ 
+@@ -96,7 +100,7 @@
+ 	int ret;
+ 
+ 	/* Register table */
+-	ret = ipt_register_table(&packet_raw, &initial_table.repl);
++	ret = ipt_register_table(&init_net, &packet_raw, &initial_table.repl);
+ 	if (ret < 0)
+ 		return ret;
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c	2007-12-19 22:49:20.000000000 -0500
+@@ -120,6 +120,10 @@
+ 				 const struct net_device *out,
+ 				 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* We've seen it coming out the other side: confirm it */
+ 	return nf_conntrack_confirm(pskb);
+ }
+@@ -135,6 +139,10 @@
+ 	struct nf_conn_help *help;
+ 	struct nf_conntrack_helper *helper;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* This is where we call the helper: as the packet goes out. */
+ 	ct = nf_ct_get(*pskb, &ctinfo);
+ 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+@@ -157,6 +165,10 @@
+ 					  const struct net_device *out,
+ 					  int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* Previously seen (loopback)?  Ignore.  Do this before
+ 	   fragment check. */
+ 	if ((*pskb)->nfct)
+@@ -180,6 +192,10 @@
+ 				      const struct net_device *out,
+ 				      int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return nf_conntrack_in(PF_INET, hooknum, pskb);
+ }
+ 
+@@ -189,6 +205,10 @@
+ 					 const struct net_device *out,
+ 					 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr)
+ 	    || ip_hdrlen(*pskb) < sizeof(struct iphdr)) {
+@@ -325,6 +345,9 @@
+ 	struct nf_conntrack_tuple_hash *h;
+ 	struct nf_conntrack_tuple tuple;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	NF_CT_TUPLE_U_BLANK(&tuple);
+ 	tuple.src.u3.ip = inet->rcv_saddr;
+ 	tuple.src.u.tcp.port = inet->sport;
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c	2007-12-19 22:49:20.000000000 -0500
+@@ -11,6 +11,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/percpu.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/netfilter.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+@@ -378,16 +379,16 @@
+ {
+ 	struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+ 
+-	proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
++	proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops);
+ 	if (!proc)
+ 		goto err1;
+ 
+-	proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
++	proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440,
+ 					&ip_exp_file_ops);
+ 	if (!proc_exp)
+ 		goto err2;
+ 
+-	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
++	proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, init_net.proc_net_stat);
+ 	if (!proc_stat)
+ 		goto err3;
+ 
+@@ -397,16 +398,16 @@
+ 	return 0;
+ 
+ err3:
+-	proc_net_remove("ip_conntrack_expect");
++	proc_net_remove(&init_net, "ip_conntrack_expect");
+ err2:
+-	proc_net_remove("ip_conntrack");
++	proc_net_remove(&init_net, "ip_conntrack");
+ err1:
+ 	return -ENOMEM;
+ }
+ 
+ void __exit nf_conntrack_ipv4_compat_fini(void)
+ {
+-	remove_proc_entry("ip_conntrack", proc_net_stat);
+-	proc_net_remove("ip_conntrack_expect");
+-	proc_net_remove("ip_conntrack");
++	remove_proc_entry("ip_conntrack", init_net.proc_net_stat);
++	proc_net_remove(&init_net, "ip_conntrack_expect");
++	proc_net_remove(&init_net, "ip_conntrack");
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_rule.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_rule.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_rule.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_rule.c	2007-12-19 22:49:20.000000000 -0500
+@@ -98,7 +98,10 @@
+ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip)
+ {
+ 	static int warned = 0;
+-	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
++	struct flowi fl = {
++		.fl_net = &init_net,
++		.nl_u = { .ip4_u = { .daddr = dstip } }
++	};
+ 	struct rtable *rt;
+ 
+ 	if (ip_route_output_key(&rt, &fl) != 0)
+@@ -252,7 +255,7 @@
+ {
+ 	int ret;
+ 
+-	ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
++	ret = ipt_register_table(&init_net, &nat_table, &nat_initial_table.repl);
+ 	if (ret != 0)
+ 		return ret;
+ 	ret = xt_register_target(&ipt_snat_reg);
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_standalone.c linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_standalone.c
+--- linux-2.6.22-try2/net/ipv4/netfilter/nf_nat_standalone.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter/nf_nat_standalone.c	2007-12-19 22:49:20.000000000 -0500
+@@ -83,6 +83,10 @@
+ 	/* maniptype == SRC for postrouting. */
+ 	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* We never see fragments: conntrack defrags on pre-routing
+ 	   and local-out, and nf_nat_out protects post-routing. */
+ 	NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)));
+@@ -172,6 +176,10 @@
+ 	unsigned int ret;
+ 	__be32 daddr = ip_hdr(*pskb)->daddr;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	ret = nf_nat_fn(hooknum, pskb, in, out, okfn);
+ 	if (ret != NF_DROP && ret != NF_STOLEN &&
+ 	    daddr != ip_hdr(*pskb)->daddr) {
+@@ -194,6 +202,10 @@
+ #endif
+ 	unsigned int ret;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr) ||
+ 	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+@@ -227,6 +239,10 @@
+ 	enum ip_conntrack_info ctinfo;
+ 	unsigned int ret;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr) ||
+ 	    ip_hdrlen(*pskb) < sizeof(struct iphdr))
+@@ -239,7 +255,7 @@
+ 
+ 		if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ 		    ct->tuplehash[!dir].tuple.src.u3.ip) {
+-			if (ip_route_me_harder(pskb, RTN_UNSPEC))
++			if (ip_route_me_harder(&init_net, pskb, RTN_UNSPEC))
+ 				ret = NF_DROP;
+ 		}
+ #ifdef CONFIG_XFRM
+@@ -262,6 +278,10 @@
+ 	struct nf_conn *ct;
+ 	enum ip_conntrack_info ctinfo;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	ct = nf_ct_get(*pskb, &ctinfo);
+ 	if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
+ 		DEBUGP("nf_nat_standalone: adjusting sequence number\n");
+diff -Nurb linux-2.6.22-try2/net/ipv4/netfilter.c linux-2.6.22-try2-netns/net/ipv4/netfilter.c
+--- linux-2.6.22-try2/net/ipv4/netfilter.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/netfilter.c	2007-12-19 22:49:20.000000000 -0500
+@@ -8,7 +8,7 @@
+ #include <net/ip.h>
+ 
+ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
+-int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
++int ip_route_me_harder(struct net *net, struct sk_buff **pskb, unsigned addr_type)
+ {
+ 	const struct iphdr *iph = ip_hdr(*pskb);
+ 	struct rtable *rt;
+@@ -17,7 +17,8 @@
+ 	unsigned int hh_len;
+ 	unsigned int type;
+ 
+-	type = inet_addr_type(iph->saddr);
++	fl.fl_net = net;
++	type = inet_addr_type(net, iph->saddr);
+ 	if (addr_type == RTN_UNSPEC)
+ 		addr_type = type;
+ 
+@@ -155,12 +156,13 @@
+ 	const struct ip_rt_info *rt_info = nf_info_reroute(info);
+ 
+ 	if (info->hook == NF_IP_LOCAL_OUT) {
++		struct net *net = (info->indev?info->indev:info->outdev)->nd_net;
+ 		const struct iphdr *iph = ip_hdr(*pskb);
+ 
+ 		if (!(iph->tos == rt_info->tos
+ 		      && iph->daddr == rt_info->daddr
+ 		      && iph->saddr == rt_info->saddr))
+-			return ip_route_me_harder(pskb, RTN_UNSPEC);
++			return ip_route_me_harder(net, pskb, RTN_UNSPEC);
+ 	}
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/proc.c linux-2.6.22-try2-netns/net/ipv4/proc.c
+--- linux-2.6.22-try2/net/ipv4/proc.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/proc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -44,6 +44,7 @@
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
+ #include <net/raw.h>
++#include <net/net_namespace.h>
+ 
+ static int fold_prot_inuse(struct proto *proto)
+ {
+@@ -69,8 +70,9 @@
+ 	seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
+ 	seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
+ 	seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
+-	seq_printf(seq,  "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
+-		   atomic_read(&ip_frag_mem));
++	seq_printf(seq,  "FRAG: inuse %d memory %d\n", 
++		   init_net.ip_frag_nqueues,
++		   atomic_read(&init_net.ip_frag_mem));
+ 	return 0;
+ }
+ 
+@@ -260,7 +262,8 @@
+ 		seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
+ 
+ 	seq_printf(seq, "\nIp: %d %d",
+-		   IPV4_DEVCONF_ALL(FORWARDING) ? 1 : 2, sysctl_ip_default_ttl);
++		   IPV4_DEVCONF_ALL(&init_net, FORWARDING) ? 1 : 2,
++		   init_net.sysctl_ip_default_ttl);
+ 
+ 	for (i = 0; snmp4_ipstats_list[i].name != NULL; i++)
+ 		seq_printf(seq, " %lu",
+@@ -380,20 +383,20 @@
+ {
+ 	int rc = 0;
+ 
+-	if (!proc_net_fops_create("netstat", S_IRUGO, &netstat_seq_fops))
++	if (!proc_net_fops_create(&init_net, "netstat", S_IRUGO, &netstat_seq_fops))
+ 		goto out_netstat;
+ 
+-	if (!proc_net_fops_create("snmp", S_IRUGO, &snmp_seq_fops))
++	if (!proc_net_fops_create(&init_net, "snmp", S_IRUGO, &snmp_seq_fops))
+ 		goto out_snmp;
+ 
+-	if (!proc_net_fops_create("sockstat", S_IRUGO, &sockstat_seq_fops))
++	if (!proc_net_fops_create(&init_net, "sockstat", S_IRUGO, &sockstat_seq_fops))
+ 		goto out_sockstat;
+ out:
+ 	return rc;
+ out_sockstat:
+-	proc_net_remove("snmp");
++	proc_net_remove(&init_net, "snmp");
+ out_snmp:
+-	proc_net_remove("netstat");
++	proc_net_remove(&init_net, "netstat");
+ out_netstat:
+ 	rc = -ENOMEM;
+ 	goto out;
+diff -Nurb linux-2.6.22-try2/net/ipv4/raw.c linux-2.6.22-try2-netns/net/ipv4/raw.c
+--- linux-2.6.22-try2/net/ipv4/raw.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/raw.c	2007-12-19 23:30:30.000000000 -0500
+@@ -73,6 +73,7 @@
+ #include <net/inet_common.h>
+ #include <net/checksum.h>
+ #include <net/xfrm.h>
++#include <net/net_namespace.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+@@ -101,7 +102,7 @@
+ 	write_unlock_bh(&raw_v4_lock);
+ }
+ 
+-struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
++struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, unsigned short num,
+ 			     __be32 raddr, __be32 laddr,
+ 			     int dif, int tag)
+ {
+@@ -110,6 +111,9 @@
+ 	sk_for_each_from(sk, node) {
+ 		struct inet_sock *inet = inet_sk(sk);
+ 
++		if (sk->sk_net != net)
++			continue;
++
+ 		if (inet->num == num 					&&
+ 		    !(inet->daddr && inet->daddr != raddr) 		&&
+ 		    (!sk->sk_nx_info || tag == 1 || sk->sk_nid == tag)	&&
+@@ -152,6 +156,7 @@
+  */
+ int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct sock *sk;
+ 	struct hlist_head *head;
+ 	int delivered = 0;
+@@ -160,7 +165,7 @@
+ 	head = &raw_v4_htable[hash];
+ 	if (hlist_empty(head))
+ 		goto out;
+-	sk = __raw_v4_lookup(__sk_head(head), iph->protocol,
++	sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol,
+ 			     iph->saddr, iph->daddr,
+ 			     skb->dev->ifindex, skb->skb_tag);
+ 
+@@ -173,7 +178,7 @@
+ 			if (clone)
+ 				raw_rcv(sk, clone);
+ 		}
+-		sk = __raw_v4_lookup(sk_next(sk), iph->protocol,
++		sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol,
+ 				     iph->saddr, iph->daddr,
+ 				     skb->dev->ifindex, skb->skb_tag);
+ 	}
+@@ -484,7 +489,8 @@
+ 	}
+ 
+ 	{
+-		struct flowi fl = { .oif = ipc.oif,
++		struct flowi fl = { .fl_net = sk->sk_net,
++				    .oif = ipc.oif,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = daddr,
+ 						.saddr = saddr,
+@@ -574,7 +580,7 @@
+ 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
+ 		goto out;
+ 	v4_map_sock_addr(inet, addr, &nsa);
+-	chk_addr_ret = inet_addr_type(nsa.saddr);
++	chk_addr_ret = inet_addr_type(sk->sk_net, nsa.saddr);
+ 	ret = -EADDRNOTAVAIL;
+ 	if (nsa.saddr && chk_addr_ret != RTN_LOCAL &&
+ 	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+@@ -798,6 +804,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ struct raw_iter_state {
++	struct net *net;
+ 	int bucket;
+ };
+ 
+@@ -811,11 +818,14 @@
+ 	for (state->bucket = 0; state->bucket < RAWV4_HTABLE_SIZE; ++state->bucket) {
+ 		struct hlist_node *node;
+ 
+-		sk_for_each(sk, node, &raw_v4_htable[state->bucket])
++		sk_for_each(sk, node, &raw_v4_htable[state->bucket]) {
++ 			if (sk->sk_net != state->net)
++ 				continue;
+ 			if (sk->sk_family == PF_INET &&
+ 				nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ 				goto found;
+ 	}
++	}
+ 	sk = NULL;
+ found:
+ 	return sk;
+@@ -830,7 +840,7 @@
+ try_again:
+ 		;
+ 	} while (sk && (sk->sk_family != PF_INET ||
+-		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
++		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT) || (sk->sk_net != state->net)));
+ 
+ 	if (!sk && ++state->bucket < RAWV4_HTABLE_SIZE) {
+ 		sk = sk_head(&raw_v4_htable[state->bucket]);
+@@ -933,6 +943,7 @@
+ 	seq = file->private_data;
+ 	seq->private = s;
+ 	memset(s, 0, sizeof(*s));
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -940,23 +951,46 @@
+ 	goto out;
+ }
+ 
++static int raw_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct raw_iter_state *state = seq->private;
++	put_net(state->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations raw_seq_fops = {
+ 	.owner	 = THIS_MODULE,
+ 	.open	 = raw_seq_open,
+ 	.read	 = seq_read,
+ 	.llseek	 = seq_lseek,
+-	.release = seq_release_private,
++	.release = raw_seq_release,
+ };
+ 
+-int __init raw_proc_init(void)
++static int raw_proc_net_init(struct net *net)
+ {
+-	if (!proc_net_fops_create("raw", S_IRUGO, &raw_seq_fops))
++	if (!proc_net_fops_create(net, "raw", S_IRUGO, &raw_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
++static void raw_proc_net_exit(struct net *net)
++{
++	proc_net_remove(net, "raw");
++}
++
++static struct pernet_operations raw_proc_net_ops = {
++	.init = raw_proc_net_init,
++	.exit = raw_proc_net_exit,
++};
++
++int __init raw_proc_init(void)
++{
++	return register_pernet_subsys(&raw_proc_net_ops);
++}
++
+ void __init raw_proc_exit(void)
+ {
+-	proc_net_remove("raw");
++	unregister_pernet_subsys(&raw_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/ipv4/route.c linux-2.6.22-try2-netns/net/ipv4/route.c
+--- linux-2.6.22-try2/net/ipv4/route.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/route.c	2007-12-19 22:49:20.000000000 -0500
+@@ -102,6 +102,7 @@
+ #include <net/icmp.h>
+ #include <net/xfrm.h>
+ #include <net/netevent.h>
++#include <net/net_namespace.h>
+ #include <net/rtnetlink.h>
+ #ifdef CONFIG_SYSCTL
+ #include <linux/sysctl.h>
+@@ -265,6 +266,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ struct rt_cache_iter_state {
++	struct net *net;
+ 	int bucket;
+ };
+ 
+@@ -333,6 +335,7 @@
+ 
+ static int rt_cache_seq_show(struct seq_file *seq, void *v)
+ {
++	struct rt_cache_iter_state *st = seq->private;
+ 	if (v == SEQ_START_TOKEN)
+ 		seq_printf(seq, "%-127s\n",
+ 			   "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
+@@ -342,6 +345,9 @@
+ 		struct rtable *r = v;
+ 		char temp[256];
+ 
++		if (r->fl.fl_net != st->net)
++			return 0;
++
+ 		sprintf(temp, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
+ 			      "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X",
+ 			r->u.dst.dev ? r->u.dst.dev->name : "*",
+@@ -384,6 +390,7 @@
+ 	seq          = file->private_data;
+ 	seq->private = s;
+ 	memset(s, 0, sizeof(*s));
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -391,12 +398,20 @@
+ 	goto out;
+ }
+ 
++static int rt_cache_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct rt_cache_iter_state *st = seq->private;
++	put_net(st->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations rt_cache_seq_fops = {
+ 	.owner	 = THIS_MODULE,
+ 	.open	 = rt_cache_seq_open,
+ 	.read	 = seq_read,
+ 	.llseek	 = seq_lseek,
+-	.release = seq_release_private,
++	.release = rt_cache_seq_release,
+ };
+ 
+ 
+@@ -562,13 +577,14 @@
+ 
+ static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
+ {
+-	return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
++	return (((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
+ 		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
+ 		(fl1->mark ^ fl2->mark) |
+ 		(*(u16 *)&fl1->nl_u.ip4_u.tos ^
+ 		 *(u16 *)&fl2->nl_u.ip4_u.tos) |
+ 		(fl1->oif ^ fl2->oif) |
+-		(fl1->iif ^ fl2->iif)) == 0;
++		(fl1->iif ^ fl2->iif)) == 0) &&
++		fl1->fl_net == fl2->fl_net;
+ }
+ 
+ /* This runs via a timer and thus is always in BH context. */
+@@ -963,7 +979,7 @@
+ 	static DEFINE_SPINLOCK(rt_peer_lock);
+ 	struct inet_peer *peer;
+ 
+-	peer = inet_getpeer(rt->rt_dst, create);
++	peer = inet_getpeer(rt->fl.fl_net, rt->rt_dst, create);
+ 
+ 	spin_lock_bh(&rt_peer_lock);
+ 	if (rt->peer == NULL) {
+@@ -1056,7 +1072,7 @@
+ 		if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
+ 			goto reject_redirect;
+ 	} else {
+-		if (inet_addr_type(new_gw) != RTN_UNICAST)
++		if (inet_addr_type(dev->nd_net, new_gw) != RTN_UNICAST)
+ 			goto reject_redirect;
+ 	}
+ 
+@@ -1097,6 +1113,7 @@
+ 
+ 				/* Copy all the information. */
+ 				*rt = *rth;
++				hold_net(rt->fl.fl_net);
+ 				INIT_RCU_HEAD(&rt->u.dst.rcu_head);
+ 				rt->u.dst.__use		= 1;
+ 				atomic_set(&rt->u.dst.__refcnt, 1);
+@@ -1315,7 +1332,7 @@
+ 	__be32  daddr = iph->daddr;
+ 	unsigned short est_mtu = 0;
+ 
+-	if (ipv4_config.no_pmtu_disc)
++	if (init_net.sysctl_ipv4_no_pmtu_disc)
+ 		return 0;
+ 
+ 	for (i = 0; i < 2; i++) {
+@@ -1397,6 +1414,7 @@
+ 		rt->idev = NULL;
+ 		in_dev_put(idev);
+ 	}
++	release_net(rt->fl.fl_net);
+ }
+ 
+ static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+@@ -1404,8 +1422,9 @@
+ {
+ 	struct rtable *rt = (struct rtable *) dst;
+ 	struct in_device *idev = rt->idev;
+-	if (dev != &loopback_dev && idev && idev->dev == dev) {
+-		struct in_device *loopback_idev = in_dev_get(&loopback_dev);
++	struct net *net = dev->nd_net;
++	if (dev != &net->loopback_dev && idev && idev->dev == dev) {
++		struct in_device *loopback_idev = in_dev_get(&net->loopback_dev);
+ 		if (loopback_idev) {
+ 			rt->idev = loopback_idev;
+ 			in_dev_put(idev);
+@@ -1492,7 +1511,7 @@
+ 		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
+ 
+ 	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
+-		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
++		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = init_net.sysctl_ip_default_ttl;
+ 	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
+ 		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+ 	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
+@@ -1513,6 +1532,7 @@
+ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ 				u8 tos, struct net_device *dev, int our)
+ {
++	struct net *net = dev->nd_net;
+ 	unsigned hash;
+ 	struct rtable *rth;
+ 	__be32 spec_dst;
+@@ -1546,6 +1566,7 @@
+ 	rth->u.dst.flags= DST_HOST;
+ 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ 		rth->u.dst.flags |= DST_NOPOLICY;
++	rth->fl.fl_net	= hold_net(net);
+ 	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+ 	rth->fl.fl4_tos	= tos;
+@@ -1557,7 +1578,7 @@
+ #endif
+ 	rth->rt_iif	=
+ 	rth->fl.iif	= dev->ifindex;
+-	rth->u.dst.dev	= &loopback_dev;
++	rth->u.dst.dev	= &net->loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+ 	rth->idev	= in_dev_get(rth->u.dst.dev);
+ 	rth->fl.oif	= 0;
+@@ -1686,6 +1707,7 @@
+ 		rth->u.dst.flags |= DST_NOPOLICY;
+ 	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
+ 		rth->u.dst.flags |= DST_NOXFRM;
++	rth->fl.fl_net	= hold_net(in_dev->dev->nd_net);
+ 	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+ 	rth->fl.fl4_tos	= tos;
+@@ -1754,9 +1776,11 @@
+ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ 			       u8 tos, struct net_device *dev)
+ {
++	struct net *net = dev->nd_net;
+ 	struct fib_result res;
+ 	struct in_device *in_dev = in_dev_get(dev);
+-	struct flowi fl = { .nl_u = { .ip4_u =
++	struct flowi fl = { .fl_net = net,
++			    .nl_u = { .ip4_u =
+ 				      { .daddr = daddr,
+ 					.saddr = saddr,
+ 					.tos = tos,
+@@ -1814,7 +1838,7 @@
+ 	if (res.type == RTN_LOCAL) {
+ 		int result;
+ 		result = fib_validate_source(saddr, daddr, tos,
+-					     loopback_dev.ifindex,
++					     net->loopback_dev.ifindex,
+ 					     dev, &spec_dst, &itag);
+ 		if (result < 0)
+ 			goto martian_source;
+@@ -1870,6 +1894,7 @@
+ 	rth->u.dst.flags= DST_HOST;
+ 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ 		rth->u.dst.flags |= DST_NOPOLICY;
++	rth->fl.fl_net	= hold_net(net);
+ 	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+ 	rth->fl.fl4_tos	= tos;
+@@ -1881,7 +1906,7 @@
+ #endif
+ 	rth->rt_iif	=
+ 	rth->fl.iif	= dev->ifindex;
+-	rth->u.dst.dev	= &loopback_dev;
++	rth->u.dst.dev	= &net->loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+ 	rth->idev	= in_dev_get(rth->u.dst.dev);
+ 	rth->rt_gateway	= daddr;
+@@ -1939,6 +1964,7 @@
+ 	struct rtable * rth;
+ 	unsigned	hash;
+ 	int iif = dev->ifindex;
++	struct net *net = dev->nd_net;
+ 
+ 	tos &= IPTOS_RT_MASK;
+ 	hash = rt_hash(daddr, saddr, iif);
+@@ -1951,7 +1977,8 @@
+ 		    rth->fl.iif == iif &&
+ 		    rth->fl.oif == 0 &&
+ 		    rth->fl.mark == skb->mark &&
+-		    rth->fl.fl4_tos == tos) {
++		    rth->fl.fl4_tos == tos &&
++		    rth->fl.fl_net == net) {
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+ 			rth->u.dst.__use++;
+@@ -2063,6 +2090,7 @@
+ 	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
+ 		rth->u.dst.flags |= DST_NOPOLICY;
+ 
++	rth->fl.fl_net	= hold_net(oldflp->fl_net);
+ 	rth->fl.fl4_dst	= oldflp->fl4_dst;
+ 	rth->fl.fl4_tos	= tos;
+ 	rth->fl.fl4_src	= oldflp->fl4_src;
+@@ -2142,7 +2170,9 @@
+ static int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+ {
+ 	u32 tos	= RT_FL_TOS(oldflp);
+-	struct flowi fl = { .nl_u = { .ip4_u =
++	struct net *net = oldflp->fl_net;
++	struct flowi fl = { .fl_net = net,
++			    .nl_u = { .ip4_u =
+ 				      { .daddr = oldflp->fl4_dst,
+ 					.saddr = oldflp->fl4_src,
+ 					.tos = tos & IPTOS_RT_MASK,
+@@ -2151,7 +2181,7 @@
+ 						  RT_SCOPE_UNIVERSE),
+ 				      } },
+ 			    .mark = oldflp->mark,
+-			    .iif = loopback_dev.ifindex,
++			    .iif = net->loopback_dev.ifindex,
+ 			    .oif = oldflp->oif };
+ 	struct fib_result res;
+ 	unsigned flags = 0;
+@@ -2173,7 +2203,7 @@
+ 			goto out;
+ 
+ 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+-		dev_out = ip_dev_find(oldflp->fl4_src);
++		dev_out = ip_dev_find(net, oldflp->fl4_src);
+ 		if (dev_out == NULL)
+ 			goto out;
+ 
+@@ -2212,7 +2242,7 @@
+ 
+ 
+ 	if (oldflp->oif) {
+-		dev_out = dev_get_by_index(oldflp->oif);
++		dev_out = dev_get_by_index(net, oldflp->oif);
+ 		err = -ENODEV;
+ 		if (dev_out == NULL)
+ 			goto out;
+@@ -2245,9 +2275,9 @@
+ 			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &net->loopback_dev;
+ 		dev_hold(dev_out);
+-		fl.oif = loopback_dev.ifindex;
++		fl.oif = net->loopback_dev.ifindex;
+ 		res.type = RTN_LOCAL;
+ 		flags |= RTCF_LOCAL;
+ 		goto make_route;
+@@ -2292,7 +2322,7 @@
+ 			fl.fl4_src = fl.fl4_dst;
+ 		if (dev_out)
+ 			dev_put(dev_out);
+-		dev_out = &loopback_dev;
++		dev_out = &net->loopback_dev;
+ 		dev_hold(dev_out);
+ 		fl.oif = dev_out->ifindex;
+ 		if (res.fi)
+@@ -2346,6 +2376,7 @@
+ 		    rth->fl.iif == 0 &&
+ 		    rth->fl.oif == flp->oif &&
+ 		    rth->fl.mark == flp->mark &&
++		    rth->fl.fl_net == flp->fl_net &&
+ 		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+ 			rth->u.dst.lastuse = jiffies;
+@@ -2522,7 +2553,7 @@
+ 		__be32 dst = rt->rt_dst;
+ 
+ 		if (MULTICAST(dst) && !LOCAL_MCAST(dst) &&
+-		    IPV4_DEVCONF_ALL(MC_FORWARDING)) {
++		    IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) {
+ 			int err = ipmr_get_route(skb, r, nowait);
+ 			if (err <= 0) {
+ 				if (!nowait) {
+@@ -2553,6 +2584,7 @@
+ 
+ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = in_skb->sk->sk_net;
+ 	struct rtmsg *rtm;
+ 	struct nlattr *tb[RTA_MAX+1];
+ 	struct rtable *rt = NULL;
+@@ -2591,7 +2623,7 @@
+ 	if (iif) {
+ 		struct net_device *dev;
+ 
+-		dev = __dev_get_by_index(iif);
++		dev = __dev_get_by_index(net, iif);
+ 		if (dev == NULL) {
+ 			err = -ENODEV;
+ 			goto errout_free;
+@@ -2608,6 +2640,7 @@
+ 			err = -rt->u.dst.error;
+ 	} else {
+ 		struct flowi fl = {
++			.fl_net = net,
+ 			.nl_u = {
+ 				.ip4_u = {
+ 					.daddr = dst,
+@@ -2632,7 +2665,7 @@
+ 	if (err <= 0)
+ 		goto errout_free;
+ 
+-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
+ errout:
+ 	return err;
+ 
+@@ -2945,6 +2978,48 @@
+ }
+ __setup("rhash_entries=", set_rhash_entries);
+ 
++
++static void ip_rt_net_exit(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++# ifdef CONFIG_NET_CLS_ROUTE
++	proc_net_remove(net, "rt_acct");
++# endif
++	remove_proc_entry("rt_cache", net->proc_net_stat);
++	proc_net_remove(net, "rt_cache");
++#endif
++	rt_run_flush(0);
++}
++
++static int ip_rt_net_init(struct net *net)
++{
++	int error = -ENOMEM;
++#ifdef CONFIG_PROC_FS
++	struct proc_dir_entry *rtstat_pde;
++	if (!proc_net_fops_create(net, "rt_cache", S_IRUGO, &rt_cache_seq_fops))
++		goto out;
++	if (!(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
++			net->proc_net_stat)))
++		goto out;
++	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
++# ifdef CONFIG_NET_CLS_ROUTE
++	if (!create_proc_read_entry("rt_acct", 0, net->proc_net,
++		    ip_rt_acct_read, NULL))
++		goto out;
++# endif
++#endif
++	error = 0;
++out:
++	if (error)
++		ip_rt_net_exit(net);
++	return error;
++}
++
++struct pernet_operations ip_rt_net_ops = {
++	.init = ip_rt_net_init,
++	.exit = ip_rt_net_exit,
++};
++ 
+ int __init ip_rt_init(void)
+ {
+ 	int rc = 0;
+@@ -3008,20 +3083,7 @@
+ 		ip_rt_secret_interval;
+ 	add_timer(&rt_secret_timer);
+ 
+-#ifdef CONFIG_PROC_FS
+-	{
+-	struct proc_dir_entry *rtstat_pde = NULL; /* keep gcc happy */
+-	if (!proc_net_fops_create("rt_cache", S_IRUGO, &rt_cache_seq_fops) ||
+-	    !(rtstat_pde = create_proc_entry("rt_cache", S_IRUGO,
+-					     proc_net_stat))) {
+-		return -ENOMEM;
+-	}
+-	rtstat_pde->proc_fops = &rt_cpu_seq_fops;
+-	}
+-#ifdef CONFIG_NET_CLS_ROUTE
+-	create_proc_read_entry("rt_acct", 0, proc_net, ip_rt_acct_read, NULL);
+-#endif
+-#endif
++	register_pernet_subsys(&ip_rt_net_ops);
+ #ifdef CONFIG_XFRM
+ 	xfrm_init();
+ 	xfrm4_init();
+diff -Nurb linux-2.6.22-try2/net/ipv4/syncookies.c linux-2.6.22-try2-netns/net/ipv4/syncookies.c
+--- linux-2.6.22-try2/net/ipv4/syncookies.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/syncookies.c	2007-12-19 22:49:20.000000000 -0500
+@@ -253,7 +253,8 @@
+ 	 * no easy way to do this.
+ 	 */
+ 	{
+-		struct flowi fl = { .nl_u = { .ip4_u =
++		struct flowi fl = { .fl_net = &init_net,
++				    .nl_u = { .ip4_u =
+ 					      { .daddr = ((opt && opt->srr) ?
+ 							  opt->faddr :
+ 							  ireq->rmt_addr),
+diff -Nurb linux-2.6.22-try2/net/ipv4/sysctl_net_ipv4.c linux-2.6.22-try2-netns/net/ipv4/sysctl_net_ipv4.c
+--- linux-2.6.22-try2/net/ipv4/sysctl_net_ipv4.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/sysctl_net_ipv4.c	2007-12-19 22:49:20.000000000 -0500
+@@ -29,21 +29,21 @@
+ static int ip_local_port_range_max[] = { 65535, 65535 };
+ #endif
+ 
+-struct ipv4_config ipv4_config;
+-
+ #ifdef CONFIG_SYSCTL
+ 
+ static
+ int ipv4_sysctl_forward(ctl_table *ctl, int write, struct file * filp,
+ 			void __user *buffer, size_t *lenp, loff_t *ppos)
+ {
+-	int val = IPV4_DEVCONF_ALL(FORWARDING);
++	struct net *net = ctl->extra2;
++	int *valp = ctl->data;
++	int old = *valp;
+ 	int ret;
+ 
+ 	ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
+ 
+-	if (write && IPV4_DEVCONF_ALL(FORWARDING) != val)
+-		inet_forward_change();
++	if (write && *valp != old)
++		inet_forward_change(net);
+ 
+ 	return ret;
+ }
+@@ -53,6 +53,7 @@
+ 			 void __user *oldval, size_t __user *oldlenp,
+ 			 void __user *newval, size_t newlen)
+ {
++	struct net *net = table->extra2;
+ 	int *valp = table->data;
+ 	int new;
+ 
+@@ -85,7 +86,7 @@
+ 	}
+ 
+ 	*valp = new;
+-	inet_forward_change();
++	inet_forward_change(net);
+ 	return 1;
+ }
+ 
+@@ -188,22 +189,6 @@
+ 
+ ctl_table ipv4_table[] = {
+ 	{
+-		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
+-		.procname	= "tcp_timestamps",
+-		.data		= &sysctl_tcp_timestamps,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_TCP_WINDOW_SCALING,
+-		.procname	= "tcp_window_scaling",
+-		.data		= &sysctl_tcp_window_scaling,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+ 		.ctl_name	= NET_IPV4_TCP_SACK,
+ 		.procname	= "tcp_sack",
+ 		.data		= &sysctl_tcp_sack,
+@@ -220,40 +205,6 @@
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ 	{
+-		.ctl_name	= NET_IPV4_FORWARD,
+-		.procname	= "ip_forward",
+-		.data		= &IPV4_DEVCONF_ALL(FORWARDING),
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &ipv4_sysctl_forward,
+-		.strategy	= &ipv4_sysctl_forward_strategy
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_DEFAULT_TTL,
+-		.procname	= "ip_default_ttl",
+-		.data		= &sysctl_ip_default_ttl,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &ipv4_doint_and_flush,
+-		.strategy	= &ipv4_doint_and_flush_strategy,
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_NO_PMTU_DISC,
+-		.procname	= "ip_no_pmtu_disc",
+-		.data		= &ipv4_config.no_pmtu_disc,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_NONLOCAL_BIND,
+-		.procname	= "ip_nonlocal_bind",
+-		.data		= &sysctl_ip_nonlocal_bind,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+ 		.ctl_name	= NET_IPV4_TCP_SYN_RETRIES,
+ 		.procname	= "tcp_syn_retries",
+ 		.data		= &sysctl_tcp_syn_retries,
+@@ -286,39 +237,6 @@
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ 	{
+-		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
+-		.procname	= "ipfrag_high_thresh",
+-		.data		= &sysctl_ipfrag_high_thresh,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
+-		.procname	= "ipfrag_low_thresh",
+-		.data		= &sysctl_ipfrag_low_thresh,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_DYNADDR,
+-		.procname	= "ip_dynaddr",
+-		.data		= &sysctl_ip_dynaddr,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_IPFRAG_TIME,
+-		.procname	= "ipfrag_time",
+-		.data		= &sysctl_ipfrag_time,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+ 		.ctl_name	= NET_IPV4_TCP_KEEPALIVE_TIME,
+ 		.procname	= "tcp_keepalive_time",
+ 		.data		= &sysctl_tcp_keepalive_time,
+@@ -422,17 +340,6 @@
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ 	{
+-		.ctl_name	= NET_IPV4_LOCAL_PORT_RANGE,
+-		.procname	= "ip_local_port_range",
+-		.data		= &sysctl_local_port_range,
+-		.maxlen		= sizeof(sysctl_local_port_range),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_minmax,
+-		.strategy	= &sysctl_intvec,
+-		.extra1		= ip_local_port_range_min,
+-		.extra2		= ip_local_port_range_max
+-	},
+-	{
+ 		.ctl_name	= NET_IPV4_ICMP_ECHO_IGNORE_ALL,
+ 		.procname	= "icmp_echo_ignore_all",
+ 		.data		= &sysctl_icmp_echo_ignore_all,
+@@ -534,50 +441,6 @@
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ 	{
+-		.ctl_name	= NET_IPV4_INET_PEER_THRESHOLD,
+-		.procname	= "inet_peer_threshold",
+-		.data		= &inet_peer_threshold,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_INET_PEER_MINTTL,
+-		.procname	= "inet_peer_minttl",
+-		.data		= &inet_peer_minttl,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_INET_PEER_MAXTTL,
+-		.procname	= "inet_peer_maxttl",
+-		.data		= &inet_peer_maxttl,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_INET_PEER_GC_MINTIME,
+-		.procname	= "inet_peer_gc_mintime",
+-		.data		= &inet_peer_gc_mintime,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_INET_PEER_GC_MAXTIME,
+-		.procname	= "inet_peer_gc_maxtime",
+-		.data		= &inet_peer_gc_maxtime,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+ 		.ctl_name	= NET_TCP_ORPHAN_RETRIES,
+ 		.procname	= "tcp_orphan_retries",
+ 		.data		= &sysctl_tcp_orphan_retries,
+@@ -706,24 +569,6 @@
+ 		.proc_handler	= &proc_dointvec
+ 	},
+ 	{
+-		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
+-		.procname	= "ipfrag_secret_interval",
+-		.data		= &sysctl_ipfrag_secret_interval,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_jiffies,
+-		.strategy	= &sysctl_jiffies
+-	},
+-	{
+-		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
+-		.procname	= "ipfrag_max_dist",
+-		.data		= &sysctl_ipfrag_max_dist,
+-		.maxlen		= sizeof(int),
+-		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec_minmax,
+-		.extra1		= &zero
+-	},
+-	{
+ 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
+ 		.procname	= "tcp_no_metrics_save",
+ 		.data		= &sysctl_tcp_nometrics_save,
+@@ -865,6 +710,181 @@
+ 	{ .ctl_name = 0 }
+ };
+ 
+-#endif /* CONFIG_SYSCTL */
++struct ctl_table multi_ipv4_table[] = {
++	{
++		/* .data is filled in by devinet_net_init.
++		 * As a consequence this table entry must be the first
++		 * entry in multi_ipv4_table.
++		 */
++		.ctl_name	= NET_IPV4_FORWARD,
++		.procname	= "ip_forward",
++		.data		= NULL,
++		.extra2		= &init_net,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &ipv4_sysctl_forward,
++		.strategy	= &ipv4_sysctl_forward_strategy
++	},
++	{
++		.ctl_name	= NET_IPV4_DEFAULT_TTL,
++		.procname	= "ip_default_ttl",
++		.data		= &init_net.sysctl_ip_default_ttl,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &ipv4_doint_and_flush,
++		.strategy	= &ipv4_doint_and_flush_strategy,
++	},
++	{
++		.ctl_name	= NET_IPV4_NO_PMTU_DISC,
++		.procname	= "ip_no_pmtu_disc",
++		.data		= &init_net.sysctl_ipv4_no_pmtu_disc,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_NONLOCAL_BIND,
++		.procname	= "ip_nonlocal_bind",
++		.data		= &init_net.sysctl_ip_nonlocal_bind,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_LOCAL_PORT_RANGE,
++		.procname	= "ip_local_port_range",
++		.data		= &init_net.sysctl_local_port_range,
++		.maxlen		= sizeof(init_net.sysctl_local_port_range),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.strategy	= &sysctl_intvec,
++		.extra1		= ip_local_port_range_min,
++		.extra2		= ip_local_port_range_max
++	},
++	{
++		.ctl_name	= NET_IPV4_IPFRAG_HIGH_THRESH,
++		.procname	= "ipfrag_high_thresh",
++		.data		= &init_net.sysctl_ipfrag_high_thresh,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_IPFRAG_LOW_THRESH,
++		.procname	= "ipfrag_low_thresh",
++		.data		= &init_net.sysctl_ipfrag_low_thresh,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_IPFRAG_TIME,
++		.procname	= "ipfrag_time",
++		.data		= &init_net.sysctl_ipfrag_time,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_IPFRAG_SECRET_INTERVAL,
++		.procname	= "ipfrag_secret_interval",
++		.data		= &init_net.sysctl_ipfrag_secret_interval,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
++		.procname	= "ipfrag_max_dist",
++		.data		= &init_net.sysctl_ipfrag_max_dist,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.extra1		= &zero
++	},
++	{
++		.ctl_name	= NET_IPV4_DYNADDR,
++		.procname	= "ip_dynaddr",
++		.data		= &init_net.sysctl_ip_dynaddr,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_LOCAL_PORT_RANGE,
++		.procname	= "ip_local_port_range",
++		.data		= &init_net.sysctl_local_port_range,
++		.maxlen		= sizeof(init_net.sysctl_local_port_range),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.strategy	= &sysctl_intvec,
++		.extra1		= ip_local_port_range_min,
++		.extra2		= ip_local_port_range_max
++	},
++	{
++		.ctl_name	= NET_IPV4_INET_PEER_THRESHOLD,
++		.procname	= "inet_peer_threshold",
++		.data		= &init_net.inet_peer_threshold,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{
++		.ctl_name	= NET_IPV4_INET_PEER_MINTTL,
++		.procname	= "inet_peer_minttl",
++		.data		= &init_net.inet_peer_minttl,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_INET_PEER_MAXTTL,
++		.procname	= "inet_peer_maxttl",
++		.data		= &init_net.inet_peer_maxttl,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_INET_PEER_GC_MINTIME,
++		.procname	= "inet_peer_gc_mintime",
++		.data		= &init_net.inet_peer_gc_mintime,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_INET_PEER_GC_MAXTIME,
++		.procname	= "inet_peer_gc_maxtime",
++		.data		= &init_net.inet_peer_gc_maxtime,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_jiffies,
++		.strategy	= &sysctl_jiffies
++	},
++	{
++		.ctl_name	= NET_IPV4_TCP_TIMESTAMPS,
++		.procname	= "tcp_timestamps",
++		.data		= &init_net.sysctl_tcp_timestamps,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++
++	},
++	{
++		.ctl_name	= NET_IPV4_TCP_WINDOW_SCALING,
++		.procname	= "tcp_window_scaling",
++		.data		= &init_net.sysctl_tcp_window_scaling,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec
++	},
++	{}
++};
+ 
+-EXPORT_SYMBOL(ipv4_config);
++#endif /* CONFIG_SYSCTL */
+diff -Nurb linux-2.6.22-try2/net/ipv4/tcp.c linux-2.6.22-try2-netns/net/ipv4/tcp.c
+--- linux-2.6.22-try2/net/ipv4/tcp.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tcp.c	2007-12-19 22:49:20.000000000 -0500
+@@ -2409,6 +2409,23 @@
+ }
+ __setup("thash_entries=", set_thash_entries);
+ 
++static int tcp_net_init(struct net *net)
++{
++	/*
++	 * This array holds the first and last local port number.
++	 */
++	net->sysctl_local_port_range[0] = 32768;
++	net->sysctl_local_port_range[1] = 61000;
++
++	net->sysctl_tcp_timestamps = 1;
++	net->sysctl_tcp_window_scaling = 1;
++	return 0;
++}
++
++static struct pernet_operations tcp_net_ops = {
++	.init = tcp_net_init,
++};
++
+ void __init tcp_init(void)
+ {
+ 	struct sk_buff *skb = NULL;
+@@ -2502,6 +2519,8 @@
+ 	sysctl_tcp_rmem[1] = 87380;
+ 	sysctl_tcp_rmem[2] = max(87380, max_share);
+ 
++	register_pernet_subsys(&tcp_net_ops);
++
+ 	printk(KERN_INFO "TCP: Hash tables configured "
+ 	       "(established %d bind %d)\n",
+ 	       tcp_hashinfo.ehash_size, tcp_hashinfo.bhash_size);
+diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_input.c linux-2.6.22-try2-netns/net/ipv4/tcp_input.c
+--- linux-2.6.22-try2/net/ipv4/tcp_input.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tcp_input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -72,8 +72,6 @@
+ #include <asm/unaligned.h>
+ #include <net/netdma.h>
+ 
+-int sysctl_tcp_timestamps __read_mostly = 1;
+-int sysctl_tcp_window_scaling __read_mostly = 1;
+ int sysctl_tcp_sack __read_mostly = 1;
+ int sysctl_tcp_fack __read_mostly = 1;
+ int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+@@ -2922,7 +2920,7 @@
+ 					break;
+ 				case TCPOPT_WINDOW:
+ 					if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
+-						if (sysctl_tcp_window_scaling) {
++						if (init_net.sysctl_tcp_window_scaling) {
+ 							__u8 snd_wscale = *(__u8 *) ptr;
+ 							opt_rx->wscale_ok = 1;
+ 							if (snd_wscale > 14) {
+@@ -2938,7 +2936,7 @@
+ 				case TCPOPT_TIMESTAMP:
+ 					if (opsize==TCPOLEN_TIMESTAMP) {
+ 						if ((estab && opt_rx->tstamp_ok) ||
+-						    (!estab && sysctl_tcp_timestamps)) {
++						    (!estab && init_net.sysctl_tcp_timestamps)) {
+ 							opt_rx->saw_tstamp = 1;
+ 							opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+ 							opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_ipv4.c linux-2.6.22-try2-netns/net/ipv4/tcp_ipv4.c
+--- linux-2.6.22-try2/net/ipv4/tcp_ipv4.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tcp_ipv4.c	2007-12-19 23:33:32.000000000 -0500
+@@ -71,6 +71,7 @@
+ #include <net/timewait_sock.h>
+ #include <net/xfrm.h>
+ #include <net/netdma.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/inet.h>
+ #include <linux/ipv6.h>
+@@ -353,6 +354,7 @@
+ 
+ void tcp_v4_err(struct sk_buff *skb, u32 info)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct iphdr *iph = (struct iphdr *)skb->data;
+ 	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
+ 	struct tcp_sock *tp;
+@@ -369,7 +371,7 @@
+ 	}
+ 
+ 	sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr,
+-			 th->source, inet_iif(skb));
++			 th->source, inet_iif(skb), net);
+ 	if (!sk) {
+ 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ 		return;
+@@ -1499,7 +1501,8 @@
+ 		return tcp_check_req(sk, skb, req, prev);
+ 
+ 	nsk = inet_lookup_established(&tcp_hashinfo, iph->saddr, th->source,
+-				      iph->daddr, th->dest, inet_iif(skb));
++				      iph->daddr, th->dest, inet_iif(skb),
++				      sk->sk_net);
+ 
+ 	if (nsk) {
+ 		if (nsk->sk_state != TCP_TIME_WAIT) {
+@@ -1618,6 +1621,7 @@
+ 
+ int tcp_v4_rcv(struct sk_buff *skb)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	const struct iphdr *iph;
+ 	struct tcphdr *th;
+ 	struct sock *sk;
+@@ -1657,7 +1661,7 @@
+ 	TCP_SKB_CB(skb)->sacked	 = 0;
+ 
+ 	sk = __inet_lookup(&tcp_hashinfo, iph->saddr, th->source,
+-			   iph->daddr, th->dest, inet_iif(skb));
++			   iph->daddr, th->dest, inet_iif(skb), net);
+ 	if (!sk)
+ 		goto no_tcp_socket;
+ 
+@@ -1732,7 +1736,7 @@
+ 	case TCP_TW_SYN: {
+ 		struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo,
+ 							iph->daddr, th->dest,
+-							inet_iif(skb));
++							inet_iif(skb), net);
+ 		if (sk2) {
+ 			inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ 			inet_twsk_put(inet_twsk(sk));
+@@ -1766,7 +1770,7 @@
+ 	int release_it = 0;
+ 
+ 	if (!rt || rt->rt_dst != inet->daddr) {
+-		peer = inet_getpeer(inet->daddr, 1);
++		peer = inet_getpeer(sk->sk_net, inet->daddr, 1);
+ 		release_it = 1;
+ 	} else {
+ 		if (!rt->peer)
+@@ -1791,7 +1795,7 @@
+ 
+ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
+ {
+-	struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1);
++	struct inet_peer *peer = inet_getpeer(tw->tw_net, tw->tw_daddr, 1);
+ 
+ 	if (peer) {
+ 		const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+@@ -1980,7 +1984,8 @@
+ 				if (req->sk &&
+ 					!nx_check(req->sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ 					continue;
+-				if (req->rsk_ops->family == st->family) {
++ 				if ((req->rsk_ops->family == st->family) &&
++ 				    (req->sk->sk_net == st->net)) {
+ 					cur = req;
+ 					goto out;
+ 				}
+@@ -2004,6 +2009,8 @@
+ 	}
+ get_sk:
+ 	sk_for_each_from(sk, node) {
++ 		if (sk->sk_net != st->net)
++ 			continue;
+ 		vxdprintk(VXD_CBIT(net, 6), "sk: %p [#%d] (from %d)",
+ 			sk, sk->sk_nid, nx_current_nid());
+ 		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+@@ -2056,6 +2063,8 @@
+ 
+ 		read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock);
+ 		sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
++ 			if (sk->sk_net != st->net)
++ 				continue;
+ 			vxdprintk(VXD_CBIT(net, 6),
+ 				"sk,egf: %p [#%d] (from %d)",
+ 				sk, sk->sk_nid, nx_current_nid());
+@@ -2069,6 +2078,8 @@
+ 		st->state = TCP_SEQ_STATE_TIME_WAIT;
+ 		inet_twsk_for_each(tw, node,
+ 				   &tcp_hashinfo.ehash[st->bucket].twchain) {
++ 			if (tw->tw_net != st->net)
++ 				continue;
+ 			vxdprintk(VXD_CBIT(net, 6),
+ 				"tw: %p [#%d] (from %d)",
+ 				tw, tw->tw_nid, nx_current_nid());
+@@ -2099,7 +2110,8 @@
+ 		tw = cur;
+ 		tw = tw_next(tw);
+ get_tw:
+-		while (tw && (tw->tw_family != st->family ||
++		while (tw && ((tw->tw_net != st->net) || 
++					(tw->tw_family != st->family) ||
+ 			!nx_check(tw->tw_nid, VS_WATCH_P | VS_IDENT))) {
+ 			tw = tw_next(tw);
+ 		}
+@@ -2124,6 +2136,8 @@
+ 		vxdprintk(VXD_CBIT(net, 6),
+ 			"sk,egn: %p [#%d] (from %d)",
+ 			sk, sk->sk_nid, nx_current_nid());
++ 		if (sk->sk_net != st->net)
++ 			continue;
+ 		if (!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT))
+ 			continue;
+ 		if (sk->sk_family == st->family)
+@@ -2253,6 +2267,7 @@
+ 		goto out_kfree;
+ 	seq	     = file->private_data;
+ 	seq->private = s;
++	s->net = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -2260,20 +2275,30 @@
+ 	goto out;
+ }
+ 
+-int tcp_proc_register(struct tcp_seq_afinfo *afinfo)
++static int tcp_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct tcp_iter_state *st = seq->private;
++	put_net(st->net);
++	return seq_release_private(inode, file);
++}
++
++int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
+ {
+ 	int rc = 0;
+ 	struct proc_dir_entry *p;
+ 
+ 	if (!afinfo)
+ 		return -EINVAL;
++	if (net == &init_net) {
+ 	afinfo->seq_fops->owner		= afinfo->owner;
+ 	afinfo->seq_fops->open		= tcp_seq_open;
+ 	afinfo->seq_fops->read		= seq_read;
+ 	afinfo->seq_fops->llseek	= seq_lseek;
+-	afinfo->seq_fops->release	= seq_release_private;
++		afinfo->seq_fops->release	= tcp_seq_release;
++	}
+ 
+-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++	p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ 	if (p)
+ 		p->data = afinfo;
+ 	else
+@@ -2281,11 +2306,12 @@
+ 	return rc;
+ }
+ 
+-void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo)
++void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
+ {
+ 	if (!afinfo)
+ 		return;
+-	proc_net_remove(afinfo->name);
++	proc_net_remove(net, afinfo->name);
++	if (net == &init_net)
+ 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+ 
+@@ -2430,14 +2456,29 @@
+ 	.seq_fops	= &tcp4_seq_fops,
+ };
+ 
++static int tcp4_proc_net_init(struct net *net)
++{
++	return tcp_proc_register(net, &tcp4_seq_afinfo);
++}
++
++static void tcp4_proc_net_exit(struct net *net)
++{
++	tcp_proc_unregister(net, &tcp4_seq_afinfo);
++}
++
++static struct pernet_operations tcp4_proc_net_ops = {
++	.init = tcp4_proc_net_init,
++	.exit = tcp4_proc_net_exit,
++};
++
+ int __init tcp4_proc_init(void)
+ {
+-	return tcp_proc_register(&tcp4_seq_afinfo);
++	return register_pernet_subsys(&tcp4_proc_net_ops);
+ }
+ 
+ void tcp4_proc_exit(void)
+ {
+-	tcp_proc_unregister(&tcp4_seq_afinfo);
++	unregister_pernet_subsys(&tcp4_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+ 
+@@ -2499,6 +2540,5 @@
+ EXPORT_SYMBOL(tcp_proc_register);
+ EXPORT_SYMBOL(tcp_proc_unregister);
+ #endif
+-EXPORT_SYMBOL(sysctl_local_port_range);
+ EXPORT_SYMBOL(sysctl_tcp_low_latency);
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_output.c linux-2.6.22-try2-netns/net/ipv4/tcp_output.c
+--- linux-2.6.22-try2/net/ipv4/tcp_output.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tcp_output.c	2007-12-19 22:49:20.000000000 -0500
+@@ -432,11 +432,11 @@
+ 	sysctl_flags = 0;
+ 	if (unlikely(tcb->flags & TCPCB_FLAG_SYN)) {
+ 		tcp_header_size = sizeof(struct tcphdr) + TCPOLEN_MSS;
+-		if (sysctl_tcp_timestamps) {
++		if (sk->sk_net->sysctl_tcp_timestamps) {
+ 			tcp_header_size += TCPOLEN_TSTAMP_ALIGNED;
+ 			sysctl_flags |= SYSCTL_FLAG_TSTAMPS;
+ 		}
+-		if (sysctl_tcp_window_scaling) {
++		if (sk->sk_net->sysctl_tcp_window_scaling) {
+ 			tcp_header_size += TCPOLEN_WSCALE_ALIGNED;
+ 			sysctl_flags |= SYSCTL_FLAG_WSCALE;
+ 		}
+@@ -2215,7 +2215,7 @@
+ 	 * See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
+ 	 */
+ 	tp->tcp_header_len = sizeof(struct tcphdr) +
+-		(sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
++		(sk->sk_net->sysctl_tcp_timestamps ? TCPOLEN_TSTAMP_ALIGNED : 0);
+ 
+ #ifdef CONFIG_TCP_MD5SIG
+ 	if (tp->af_specific->md5_lookup(sk, sk) != NULL)
+@@ -2238,7 +2238,7 @@
+ 				  tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
+ 				  &tp->rcv_wnd,
+ 				  &tp->window_clamp,
+-				  sysctl_tcp_window_scaling,
++				  sk->sk_net->sysctl_tcp_window_scaling,
+ 				  &rcv_wscale);
+ 
+ 	tp->rx_opt.rcv_wscale = rcv_wscale;
+diff -Nurb linux-2.6.22-try2/net/ipv4/tcp_probe.c linux-2.6.22-try2-netns/net/ipv4/tcp_probe.c
+--- linux-2.6.22-try2/net/ipv4/tcp_probe.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tcp_probe.c	2007-12-19 22:49:20.000000000 -0500
+@@ -172,7 +172,7 @@
+ 	if (IS_ERR(tcpw.fifo))
+ 		return PTR_ERR(tcpw.fifo);
+ 
+-	if (!proc_net_fops_create(procname, S_IRUSR, &tcpprobe_fops))
++	if (!proc_net_fops_create(&init_net, procname, S_IRUSR, &tcpprobe_fops))
+ 		goto err0;
+ 
+ 	ret = register_jprobe(&tcp_probe);
+@@ -182,7 +182,7 @@
+ 	pr_info("TCP watch registered (port=%d)\n", port);
+ 	return 0;
+  err1:
+-	proc_net_remove(procname);
++	proc_net_remove(&init_net, procname);
+  err0:
+ 	kfifo_free(tcpw.fifo);
+ 	return ret;
+@@ -192,7 +192,7 @@
+ static __exit void tcpprobe_exit(void)
+ {
+ 	kfifo_free(tcpw.fifo);
+-	proc_net_remove(procname);
++	proc_net_remove(&init_net, procname);
+ 	unregister_jprobe(&tcp_probe);
+ 
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv4/tunnel4.c linux-2.6.22-try2-netns/net/ipv4/tunnel4.c
+--- linux-2.6.22-try2/net/ipv4/tunnel4.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/tunnel4.c	2007-12-19 22:49:20.000000000 -0500
+@@ -75,6 +75,10 @@
+ {
+ 	struct xfrm_tunnel *handler;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ 		goto drop;
+ 
+@@ -113,6 +117,9 @@
+ {
+ 	struct xfrm_tunnel *handler;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	for (handler = tunnel4_handlers; handler; handler = handler->next)
+ 		if (!handler->err_handler(skb, info))
+ 			break;
+diff -Nurb linux-2.6.22-try2/net/ipv4/udp.c linux-2.6.22-try2-netns/net/ipv4/udp.c
+--- linux-2.6.22-try2/net/ipv4/udp.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/udp.c	2007-12-19 23:34:00.000000000 -0500
+@@ -101,6 +101,7 @@
+ #include <net/route.h>
+ #include <net/checksum.h>
+ #include <net/xfrm.h>
++#include <net/net_namespace.h>
+ #include "udp_impl.h"
+ 
+ /*
+@@ -112,16 +113,17 @@
+ struct hlist_head udp_hash[UDP_HTABLE_SIZE];
+ DEFINE_RWLOCK(udp_hash_lock);
+ 
+-static int udp_port_rover;
+-
+-static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[])
++static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, struct hlist_head udptable[])
+ {
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 
+-	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)])
++	sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) {
++		if (sk->sk_net != net)
++			continue;
+ 		if (sk->sk_hash == num)
+ 			return 1;
++	}
+ 	return 0;
+ }
+ 
+@@ -148,9 +150,9 @@
+ 	if (snum == 0) {
+ 		int best_size_so_far, best, result, i;
+ 
+-		if (*port_rover > sysctl_local_port_range[1] ||
+-		    *port_rover < sysctl_local_port_range[0])
+-			*port_rover = sysctl_local_port_range[0];
++		if (*port_rover > sk->sk_net->sysctl_local_port_range[1] ||
++		    *port_rover < sk->sk_net->sysctl_local_port_range[0])
++			*port_rover = sk->sk_net->sysctl_local_port_range[0];
+ 		best_size_so_far = 32767;
+ 		best = result = *port_rover;
+ 		for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) {
+@@ -158,9 +160,9 @@
+ 
+ 			head = &udptable[result & (UDP_HTABLE_SIZE - 1)];
+ 			if (hlist_empty(head)) {
+-				if (result > sysctl_local_port_range[1])
+-					result = sysctl_local_port_range[0] +
+-						((result - sysctl_local_port_range[0]) &
++				if (result > sk->sk_net->sysctl_local_port_range[1])
++					result = sk->sk_net->sysctl_local_port_range[0] +
++						((result - sk->sk_net->sysctl_local_port_range[0]) &
+ 						 (UDP_HTABLE_SIZE - 1));
+ 				goto gotit;
+ 			}
+@@ -177,11 +179,11 @@
+ 		result = best;
+ 		for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE;
+ 		     i++, result += UDP_HTABLE_SIZE) {
+-			if (result > sysctl_local_port_range[1])
+-				result = sysctl_local_port_range[0]
+-					+ ((result - sysctl_local_port_range[0]) &
++			if (result > sk->sk_net->sysctl_local_port_range[1])
++				result = sk->sk_net->sysctl_local_port_range[0]
++					+ ((result - sk->sk_net->sysctl_local_port_range[0]) &
+ 					   (UDP_HTABLE_SIZE - 1));
+-			if (! __udp_lib_lport_inuse(result, udptable))
++			if (! __udp_lib_lport_inuse(sk->sk_net, result, udptable))
+ 				break;
+ 		}
+ 		if (i >= (1 << 16) / UDP_HTABLE_SIZE)
+@@ -194,6 +196,7 @@
+ 		sk_for_each(sk2, node, head)
+ 			if (sk2->sk_hash == snum                             &&
+ 			    sk2 != sk                                        &&
++			    sk->sk_net == sk2->sk_net			     &&
+ 			    (!sk2->sk_reuse        || !sk->sk_reuse)         &&
+ 			    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ 			     || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+@@ -216,7 +219,7 @@
+ int udp_get_port(struct sock *sk, unsigned short snum,
+ 			int (*scmp)(const struct sock *, const struct sock *))
+ {
+-	return  __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp);
++	return  __udp_lib_get_port(sk, snum, udp_hash, &sk->sk_net->udp_port_rover, scmp);
+ }
+ 
+ extern int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2);
+@@ -229,7 +232,8 @@
+ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try
+  * harder than this. -DaveM
+  */
+-static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport,
++static struct sock *__udp4_lib_lookup(struct net *net,
++				      __be32 saddr, __be16 sport,
+ 				      __be32 daddr, __be16 dport,
+ 				      int dif, struct hlist_head udptable[])
+ {
+@@ -243,6 +247,9 @@
+ 	sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
+ 		struct inet_sock *inet = inet_sk(sk);
+ 
++		if (sk->sk_net != net)
++			continue;
++
+ 		if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) {
+ 			int score = (sk->sk_family == PF_INET ? 1 : 0);
+ 
+@@ -299,6 +306,9 @@
+ 	sk_for_each_from(s, node) {
+ 		struct inet_sock *inet = inet_sk(s);
+ 
++		if (s->sk_net != sk->sk_net)
++			continue;
++
+ 		if (s->sk_hash != hnum					||
+ 		    (inet->daddr && inet->daddr != rmt_addr)		||
+ 		    (inet->dport != rmt_port && inet->dport)		||
+@@ -328,6 +338,7 @@
+ 
+ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct inet_sock *inet;
+ 	struct iphdr *iph = (struct iphdr*)skb->data;
+ 	struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
+@@ -337,7 +348,7 @@
+ 	int harderr;
+ 	int err;
+ 
+-	sk = __udp4_lib_lookup(iph->daddr, uh->dest, iph->saddr, uh->source,
++	sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source,
+ 			       skb->dev->ifindex, udptable		    );
+ 	if (sk == NULL) {
+ 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+@@ -623,7 +634,8 @@
+ 		rt = (struct rtable*)sk_dst_check(sk, 0);
+ 
+ 	if (rt == NULL) {
+-		struct flowi fl = { .oif = ipc.oif,
++		struct flowi fl = { .fl_net = sk->sk_net,
++				    .oif = ipc.oif,
+ 				    .nl_u = { .ip4_u =
+ 					      { .daddr = faddr,
+ 						.saddr = saddr,
+@@ -1288,6 +1300,7 @@
+ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
+ 		   int proto)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct sock *sk;
+ 	struct udphdr *uh = udp_hdr(skb);
+ 	unsigned short ulen;
+@@ -1322,7 +1335,7 @@
+ 	udp_ping_of_death(skb, uh, saddr);
+ #endif
+ 
+-	sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest,
++	sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, uh->dest,
+ 			       skb->dev->ifindex, udptable        );
+ 
+ 	if (sk != NULL) {
+@@ -1651,7 +1664,7 @@
+ 		sk = sk_next(sk);
+ try_again:
+ 		;
+-	} while (sk && (sk->sk_family != state->family ||
++	} while (sk && ((sk->sk_net != state->net) || sk->sk_family != state->family ||
+ 		!nx_check(sk->sk_nid, VS_WATCH_P | VS_IDENT)));
+ 
+ 	if (!sk && ++state->bucket < UDP_HTABLE_SIZE) {
+@@ -1717,6 +1730,7 @@
+ 
+ 	seq	     = file->private_data;
+ 	seq->private = s;
++	s->net	     = get_net(PROC_NET(inode));
+ out:
+ 	return rc;
+ out_kfree:
+@@ -1724,21 +1738,31 @@
+ 	goto out;
+ }
+ 
++static int udp_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct udp_iter_state *state = seq->private;
++	put_net(state->net);
++	return seq_release_private(inode, file);
++}
++
+ /* ------------------------------------------------------------------------ */
+-int udp_proc_register(struct udp_seq_afinfo *afinfo)
++int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo)
+ {
+ 	struct proc_dir_entry *p;
+ 	int rc = 0;
+ 
+ 	if (!afinfo)
+ 		return -EINVAL;
++	if (net == &init_net) {
+ 	afinfo->seq_fops->owner		= afinfo->owner;
+ 	afinfo->seq_fops->open		= udp_seq_open;
+ 	afinfo->seq_fops->read		= seq_read;
+ 	afinfo->seq_fops->llseek	= seq_lseek;
+-	afinfo->seq_fops->release	= seq_release_private;
++		afinfo->seq_fops->release	= udp_seq_release;
++	}
+ 
+-	p = proc_net_fops_create(afinfo->name, S_IRUGO, afinfo->seq_fops);
++	p = proc_net_fops_create(net, afinfo->name, S_IRUGO, afinfo->seq_fops);
+ 	if (p)
+ 		p->data = afinfo;
+ 	else
+@@ -1746,11 +1770,12 @@
+ 	return rc;
+ }
+ 
+-void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
++void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo)
+ {
+ 	if (!afinfo)
+ 		return;
+-	proc_net_remove(afinfo->name);
++	proc_net_remove(net, afinfo->name);
++	if (net == &init_net)
+ 	memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
+ }
+ 
+@@ -1803,14 +1828,30 @@
+ 	.seq_fops	= &udp4_seq_fops,
+ };
+ 
++
++static int udp4_proc_net_init(struct net *net)
++{
++	return udp_proc_register(net, &udp4_seq_afinfo);
++}
++
++static void udp4_proc_net_exit(struct net *net)
++{
++	udp_proc_unregister(net, &udp4_seq_afinfo);
++}
++
++static struct pernet_operations udp4_proc_net_ops = {
++	.init = udp4_proc_net_init,
++	.exit = udp4_proc_net_exit,
++};
++
+ int __init udp4_proc_init(void)
+ {
+-	return udp_proc_register(&udp4_seq_afinfo);
++	return register_pernet_subsys(&udp4_proc_net_ops);
+ }
+ 
+ void udp4_proc_exit(void)
+ {
+-	udp_proc_unregister(&udp4_seq_afinfo);
++	unregister_pernet_subsys(&udp4_proc_net_ops);
+ }
+ #endif /* CONFIG_PROC_FS */
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/udplite.c linux-2.6.22-try2-netns/net/ipv4/udplite.c
+--- linux-2.6.22-try2/net/ipv4/udplite.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/udplite.c	2007-12-19 22:49:20.000000000 -0500
+@@ -31,11 +31,18 @@
+ 
+ static int udplite_rcv(struct sk_buff *skb)
+ {
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	return __udp4_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE);
+ }
+ 
+ static void udplite_err(struct sk_buff *skb, u32 info)
+ {
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	return __udp4_lib_err(skb, info, udplite_hash);
+ }
+ 
+@@ -103,7 +110,7 @@
+ 	inet_register_protosw(&udplite4_protosw);
+ 
+ #ifdef CONFIG_PROC_FS
+-	if (udp_proc_register(&udplite4_seq_afinfo)) /* udplite4_proc_init() */
++	if (udp_proc_register(&init_net, &udplite4_seq_afinfo)) /* udplite4_proc_init() */
+ 		printk(KERN_ERR "%s: Cannot register /proc!\n", __FUNCTION__);
+ #endif
+ 	return;
+diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_input.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_input.c
+--- linux-2.6.22-try2/net/ipv4/xfrm4_input.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -18,6 +18,10 @@
+ 
+ int xfrm4_rcv(struct sk_buff *skb)
+ {
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	return xfrm4_rcv_encap(skb, 0);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_policy.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_policy.c
+--- linux-2.6.22-try2/net/ipv4/xfrm4_policy.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_policy.c	2007-12-19 22:49:20.000000000 -0500
+@@ -25,6 +25,7 @@
+ {
+ 	struct rtable *rt;
+ 	struct flowi fl_tunnel = {
++		.fl_net = &init_net,
+ 		.nl_u = {
+ 			.ip4_u = {
+ 				.daddr = daddr->a4,
+@@ -73,6 +74,7 @@
+ 	struct rtable *rt0 = (struct rtable*)(*dst_p);
+ 	struct rtable *rt = rt0;
+ 	struct flowi fl_tunnel = {
++		.fl_net = &init_net,
+ 		.nl_u = {
+ 			.ip4_u = {
+ 				.saddr = fl->fl4_src,
+@@ -213,6 +215,7 @@
+ 	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+ 
+ 	memset(fl, 0, sizeof(struct flowi));
++	fl->fl_net = &init_net;
+ 	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
+ 		switch (iph->protocol) {
+ 		case IPPROTO_UDP:
+@@ -306,7 +309,7 @@
+ 
+ 	xdst = (struct xfrm_dst *)dst;
+ 	if (xdst->u.rt.idev->dev == dev) {
+-		struct in_device *loopback_idev = in_dev_get(&loopback_dev);
++		struct in_device *loopback_idev = in_dev_get(&init_net.loopback_dev);
+ 		BUG_ON(!loopback_idev);
+ 
+ 		do {
+diff -Nurb linux-2.6.22-try2/net/ipv4/xfrm4_state.c linux-2.6.22-try2-netns/net/ipv4/xfrm4_state.c
+--- linux-2.6.22-try2/net/ipv4/xfrm4_state.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv4/xfrm4_state.c	2007-12-19 22:49:20.000000000 -0500
+@@ -16,7 +16,7 @@
+ 
+ static int xfrm4_init_flags(struct xfrm_state *x)
+ {
+-	if (ipv4_config.no_pmtu_disc)
++	if (init_net.sysctl_ipv4_no_pmtu_disc)
+ 		x->props.flags |= XFRM_STATE_NOPMTUDISC;
+ 	return 0;
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/addrconf.c linux-2.6.22-try2-netns/net/ipv6/addrconf.c
+--- linux-2.6.22-try2/net/ipv6/addrconf.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/addrconf.c	2007-12-19 23:35:57.000000000 -0500
+@@ -73,6 +73,7 @@
+ #include <net/tcp.h>
+ #include <net/ip.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ #include <net/pkt_sched.h>
+ #include <linux/if_tunnel.h>
+ #include <linux/rtnetlink.h>
+@@ -457,7 +458,7 @@
+ 	struct inet6_dev *idev;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		rcu_read_lock();
+ 		idev = __in6_dev_get(dev);
+ 		if (idev) {
+@@ -920,7 +921,7 @@
+ 	read_lock(&dev_base_lock);
+ 	rcu_read_lock();
+ 
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		struct inet6_dev *idev;
+ 		struct inet6_ifaddr *ifa;
+ 
+@@ -1882,7 +1883,7 @@
+ 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
+ 		goto err_exit;
+ 
+-	dev = __dev_get_by_index(ireq.ifr6_ifindex);
++	dev = __dev_get_by_index(&init_net, ireq.ifr6_ifindex);
+ 
+ 	err = -ENODEV;
+ 	if (dev == NULL)
+@@ -1913,7 +1914,7 @@
+ 
+ 		if (err == 0) {
+ 			err = -ENOBUFS;
+-			if ((dev = __dev_get_by_name(p.name)) == NULL)
++			if ((dev = __dev_get_by_name(&init_net, p.name)) == NULL)
+ 				goto err_exit;
+ 			err = dev_open(dev);
+ 		}
+@@ -1943,7 +1944,7 @@
+ 	if (!valid_lft || prefered_lft > valid_lft)
+ 		return -EINVAL;
+ 
+-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
++	if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if ((idev = addrconf_add_dev(dev)) == NULL)
+@@ -1994,7 +1995,7 @@
+ 	struct inet6_dev *idev;
+ 	struct net_device *dev;
+ 
+-	if ((dev = __dev_get_by_index(ifindex)) == NULL)
++	if ((dev = __dev_get_by_index(&init_net, ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if ((idev = __in6_dev_get(dev)) == NULL)
+@@ -2089,7 +2090,7 @@
+ 		return;
+ 	}
+ 
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		struct in_device * in_dev = __in_dev_get_rtnl(dev);
+ 		if (in_dev && (dev->flags & IFF_UP)) {
+ 			struct in_ifaddr * ifa;
+@@ -2245,12 +2246,12 @@
+ 
+ 	/* first try to inherit the link-local address from the link device */
+ 	if (idev->dev->iflink &&
+-	    (link_dev = __dev_get_by_index(idev->dev->iflink))) {
++	    (link_dev = __dev_get_by_index(&init_net, idev->dev->iflink))) {
+ 		if (!ipv6_inherit_linklocal(idev, link_dev))
+ 			return;
+ 	}
+ 	/* then try to inherit it from any device */
+-	for_each_netdev(link_dev) {
++	for_each_netdev(&init_net, link_dev) {
+ 		if (!ipv6_inherit_linklocal(idev, link_dev))
+ 			return;
+ 	}
+@@ -2282,6 +2283,9 @@
+ 	struct inet6_dev *idev = __in6_dev_get(dev);
+ 	int run_pending = 0;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch(event) {
+ 	case NETDEV_REGISTER:
+ 		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
+@@ -2419,7 +2423,7 @@
+ 
+ 	ASSERT_RTNL();
+ 
+-	if (dev == &loopback_dev && how == 1)
++	if (dev == &init_net.loopback_dev && how == 1)
+ 		how = 0;
+ 
+ 	rt6_ifdown(dev);
+@@ -2850,14 +2854,14 @@
+ 
+ int __init if6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("if_inet6", S_IRUGO, &if6_fops))
++	if (!proc_net_fops_create(&init_net, "if_inet6", S_IRUGO, &if6_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void if6_proc_exit(void)
+ {
+-	proc_net_remove("if_inet6");
++	proc_net_remove(&init_net, "if_inet6");
+ }
+ #endif	/* CONFIG_PROC_FS */
+ 
+@@ -3017,11 +3021,15 @@
+ static int
+ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ifaddrmsg *ifm;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct in6_addr *pfx;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ 	if (err < 0)
+ 		return err;
+@@ -3074,6 +3082,7 @@
+ static int
+ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct ifaddrmsg *ifm;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct in6_addr *pfx;
+@@ -3083,6 +3092,9 @@
+ 	u8 ifa_flags;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ 	if (err < 0)
+ 		return err;
+@@ -3103,7 +3115,7 @@
+ 		valid_lft = INFINITY_LIFE_TIME;
+ 	}
+ 
+-	dev =  __dev_get_by_index(ifm->ifa_index);
++	dev =  __dev_get_by_index(&init_net, ifm->ifa_index);
+ 	if (dev == NULL)
+ 		return -ENODEV;
+ 
+@@ -3292,7 +3304,7 @@
+ 	s_ip_idx = ip_idx = cb->args[1];
+ 
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (idx < s_idx)
+ 			goto cont;
+ 		if (idx > s_idx)
+@@ -3367,26 +3379,42 @@
+ 
+ static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	enum addr_type_t type = UNICAST_ADDR;
++
++	if (net != &init_net)
++		return 0;
++
+ 	return inet6_dump_addr(skb, cb, type);
+ }
+ 
+ static int inet6_dump_ifmcaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	enum addr_type_t type = MULTICAST_ADDR;
++
++	if (net != &init_net)
++		return 0;
++
+ 	return inet6_dump_addr(skb, cb, type);
+ }
+ 
+ 
+ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	enum addr_type_t type = ANYCAST_ADDR;
++
++	if (net != &init_net)
++		return 0;
++
+ 	return inet6_dump_addr(skb, cb, type);
+ }
+ 
+ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
+ 			     void *arg)
+ {
++	struct net *net = in_skb->sk->sk_net;
+ 	struct ifaddrmsg *ifm;
+ 	struct nlattr *tb[IFA_MAX+1];
+ 	struct in6_addr *addr = NULL;
+@@ -3395,6 +3423,9 @@
+ 	struct sk_buff *skb;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy);
+ 	if (err < 0)
+ 		goto errout;
+@@ -3407,7 +3438,7 @@
+ 
+ 	ifm = nlmsg_data(nlh);
+ 	if (ifm->ifa_index)
+-		dev = __dev_get_by_index(ifm->ifa_index);
++		dev = __dev_get_by_index(&init_net, ifm->ifa_index);
+ 
+ 	if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) {
+ 		err = -EADDRNOTAVAIL;
+@@ -3427,7 +3458,7 @@
+ 		kfree_skb(skb);
+ 		goto errout_ifa;
+ 	}
+-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ errout_ifa:
+ 	in6_ifa_put(ifa);
+ errout:
+@@ -3450,10 +3481,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
++	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+ 
+ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
+@@ -3612,19 +3643,22 @@
+ 
+ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++ 	struct net *net = skb->sk->sk_net;
+ 	int idx, err;
+ 	int s_idx = cb->args[0];
+ 	struct net_device *dev;
+ 	struct inet6_dev *idev;
+ 	struct nx_info *nxi = skb->sk ? skb->sk->sk_nx_info : NULL;
+ 
++ 	if (net != &init_net)
++ 		return 0;
+ 	/* FIXME: maybe disable ipv6 on non v6 guests?
+ 	if (skb->sk && skb->sk->sk_vx_info)
+ 		return skb->len;	*/
+ 
+ 	read_lock(&dev_base_lock);
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (idx < s_idx)
+ 			goto cont;
+ 		if (!v6_dev_in_nx_info(dev, nxi))
+@@ -3661,10 +3695,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
++	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_IFADDR, err);
+ }
+ 
+ static inline size_t inet6_prefix_nlmsg_size(void)
+@@ -3730,10 +3764,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
++	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_PREFIX, err);
+ }
+ 
+ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
+@@ -4244,16 +4278,16 @@
+ 	 * device and it being up should be removed.
+ 	 */
+ 	rtnl_lock();
+-	if (!ipv6_add_dev(&loopback_dev))
++	if (!ipv6_add_dev(&init_net.loopback_dev))
+ 		err = -ENOMEM;
+ 	rtnl_unlock();
+ 	if (err)
+ 		return err;
+ 
+-	ip6_null_entry.rt6i_idev = in6_dev_get(&loopback_dev);
++	ip6_null_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #ifdef CONFIG_IPV6_MULTIPLE_TABLES
+-	ip6_prohibit_entry.rt6i_idev = in6_dev_get(&loopback_dev);
+-	ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&loopback_dev);
++	ip6_prohibit_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
++	ip6_blk_hole_entry.rt6i_idev = in6_dev_get(&init_net.loopback_dev);
+ #endif
+ 
+ 	register_netdevice_notifier(&ipv6_dev_notf);
+@@ -4304,12 +4338,12 @@
+ 	 *	clean dev list.
+ 	 */
+ 
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((idev = __in6_dev_get(dev)) == NULL)
+ 			continue;
+ 		addrconf_ifdown(dev, 1);
+ 	}
+-	addrconf_ifdown(&loopback_dev, 2);
++	addrconf_ifdown(&init_net.loopback_dev, 2);
+ 
+ 	/*
+ 	 *	Check hash table.
+@@ -4335,6 +4369,6 @@
+ 	rtnl_unlock();
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("if_inet6");
++	proc_net_remove(&init_net, "if_inet6");
+ #endif
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/af_inet6.c linux-2.6.22-try2-netns/net/ipv6/af_inet6.c
+--- linux-2.6.22-try2/net/ipv6/af_inet6.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/af_inet6.c	2007-12-19 22:49:20.000000000 -0500
+@@ -82,7 +82,7 @@
+ 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
+ }
+ 
+-static int inet6_create(struct socket *sock, int protocol)
++static int inet6_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct inet_sock *inet;
+ 	struct ipv6_pinfo *np;
+@@ -95,6 +95,9 @@
+ 	int try_loading_module = 0;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (sock->type != SOCK_RAW &&
+ 	    sock->type != SOCK_DGRAM &&
+ 	    !inet_ehash_secret)
+@@ -163,7 +166,7 @@
+ 	BUG_TRAP(answer_prot->slab != NULL);
+ 
+ 	err = -ENOBUFS;
+-	sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1);
++	sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, 1);
+ 	if (sk == NULL)
+ 		goto out;
+ 
+@@ -206,7 +209,7 @@
+ 	inet->mc_index	= 0;
+ 	inet->mc_list	= NULL;
+ 
+-	if (ipv4_config.no_pmtu_disc)
++	if (init_net.sysctl_ipv4_no_pmtu_disc)
+ 		inet->pmtudisc = IP_PMTUDISC_DONT;
+ 	else
+ 		inet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -287,7 +290,7 @@
+ 	/* Check if the address belongs to the host. */
+ 	if (addr_type == IPV6_ADDR_MAPPED) {
+ 		v4addr = addr->sin6_addr.s6_addr32[3];
+-		if (inet_addr_type(v4addr) != RTN_LOCAL) {
++		if (inet_addr_type(&init_net, v4addr) != RTN_LOCAL) {
+ 			err = -EADDRNOTAVAIL;
+ 			goto out;
+ 		}
+@@ -313,7 +316,7 @@
+ 					err = -EINVAL;
+ 					goto out;
+ 				}
+-				dev = dev_get_by_index(sk->sk_bound_dev_if);
++				dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ 				if (!dev) {
+ 					err = -ENODEV;
+ 					goto out;
+@@ -672,6 +675,7 @@
+ 		struct flowi fl;
+ 
+ 		memset(&fl, 0, sizeof(fl));
++		fl.fl_net = &init_net;
+ 		fl.proto = sk->sk_protocol;
+ 		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ 		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+diff -Nurb linux-2.6.22-try2/net/ipv6/anycast.c linux-2.6.22-try2-netns/net/ipv6/anycast.c
+--- linux-2.6.22-try2/net/ipv6/anycast.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/anycast.c	2007-12-19 22:49:20.000000000 -0500
+@@ -32,6 +32,7 @@
+ 
+ #include <net/sock.h>
+ #include <net/snmp.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/ipv6.h>
+ #include <net/protocol.h>
+@@ -112,10 +113,10 @@
+ 		} else {
+ 			/* router, no matching interface: just pick one */
+ 
+-			dev = dev_get_by_flags(IFF_UP, IFF_UP|IFF_LOOPBACK);
++			dev = dev_get_by_flags(&init_net, IFF_UP, IFF_UP|IFF_LOOPBACK);
+ 		}
+ 	} else
+-		dev = dev_get_by_index(ifindex);
++		dev = dev_get_by_index(&init_net, ifindex);
+ 
+ 	if (dev == NULL) {
+ 		err = -ENODEV;
+@@ -196,7 +197,7 @@
+ 
+ 	write_unlock_bh(&ipv6_sk_ac_lock);
+ 
+-	dev = dev_get_by_index(pac->acl_ifindex);
++	dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ 	if (dev) {
+ 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
+ 		dev_put(dev);
+@@ -224,7 +225,7 @@
+ 		if (pac->acl_ifindex != prev_index) {
+ 			if (dev)
+ 				dev_put(dev);
+-			dev = dev_get_by_index(pac->acl_ifindex);
++			dev = dev_get_by_index(&init_net, pac->acl_ifindex);
+ 			prev_index = pac->acl_ifindex;
+ 		}
+ 		if (dev)
+@@ -429,7 +430,7 @@
+ 	if (dev)
+ 		return ipv6_chk_acast_dev(dev, addr);
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev)
++	for_each_netdev(&init_net, dev)
+ 		if (ipv6_chk_acast_dev(dev, addr)) {
+ 			found = 1;
+ 			break;
+@@ -453,7 +454,7 @@
+ 	struct ac6_iter_state *state = ac6_seq_private(seq);
+ 
+ 	state->idev = NULL;
+-	for_each_netdev(state->dev) {
++	for_each_netdev(&init_net, state->dev) {
+ 		struct inet6_dev *idev;
+ 		idev = in6_dev_get(state->dev);
+ 		if (!idev)
+@@ -579,7 +580,7 @@
+ 
+ int __init ac6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("anycast6", S_IRUGO, &ac6_seq_fops))
++	if (!proc_net_fops_create(&init_net, "anycast6", S_IRUGO, &ac6_seq_fops))
+ 		return -ENOMEM;
+ 
+ 	return 0;
+@@ -587,7 +588,7 @@
+ 
+ void ac6_proc_exit(void)
+ {
+-	proc_net_remove("anycast6");
++	proc_net_remove(&init_net, "anycast6");
+ }
+ #endif
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/datagram.c linux-2.6.22-try2-netns/net/ipv6/datagram.c
+--- linux-2.6.22-try2/net/ipv6/datagram.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/datagram.c	2007-12-19 22:49:20.000000000 -0500
+@@ -60,6 +60,7 @@
+ 		return -EAFNOSUPPORT;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	if (np->sndflow) {
+ 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ 		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+@@ -544,7 +545,7 @@
+ 				if (!src_info->ipi6_ifindex)
+ 					return -EINVAL;
+ 				else {
+-					dev = dev_get_by_index(src_info->ipi6_ifindex);
++					dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex);
+ 					if (!dev)
+ 						return -ENODEV;
+ 				}
+diff -Nurb linux-2.6.22-try2/net/ipv6/fib6_rules.c linux-2.6.22-try2-netns/net/ipv6/fib6_rules.c
+--- linux-2.6.22-try2/net/ipv6/fib6_rules.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/fib6_rules.c	2007-12-19 22:49:20.000000000 -0500
+@@ -244,7 +244,7 @@
+ 	return -ENOBUFS;
+ }
+ 
+-static u32 fib6_rule_default_pref(void)
++static u32 fib6_rule_default_pref(struct fib_rules_ops *ops)
+ {
+ 	return 0x3FFF;
+ }
+@@ -277,10 +277,10 @@
+ 	list_add_tail(&local_rule.common.list, &fib6_rules);
+ 	list_add_tail(&main_rule.common.list, &fib6_rules);
+ 
+-	fib_rules_register(&fib6_rules_ops);
++	fib_rules_register(&init_net, &fib6_rules_ops);
+ }
+ 
+ void fib6_rules_cleanup(void)
+ {
+-	fib_rules_unregister(&fib6_rules_ops);
++	fib_rules_unregister(&init_net, &fib6_rules_ops);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/icmp.c linux-2.6.22-try2-netns/net/ipv6/icmp.c
+--- linux-2.6.22-try2/net/ipv6/icmp.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/icmp.c	2007-12-19 22:49:20.000000000 -0500
+@@ -377,6 +377,7 @@
+ 	mip6_addr_swap(skb);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = IPPROTO_ICMPV6;
+ 	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
+ 	if (saddr)
+@@ -495,6 +496,7 @@
+ 	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = IPPROTO_ICMPV6;
+ 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ 	if (saddr)
+diff -Nurb linux-2.6.22-try2/net/ipv6/inet6_connection_sock.c linux-2.6.22-try2-netns/net/ipv6/inet6_connection_sock.c
+--- linux-2.6.22-try2/net/ipv6/inet6_connection_sock.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/inet6_connection_sock.c	2007-12-19 22:49:20.000000000 -0500
+@@ -149,6 +149,7 @@
+ 	struct in6_addr *final_p = NULL, final;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = sk->sk_protocol;
+ 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ 	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+diff -Nurb linux-2.6.22-try2/net/ipv6/inet6_hashtables.c linux-2.6.22-try2-netns/net/ipv6/inet6_hashtables.c
+--- linux-2.6.22-try2/net/ipv6/inet6_hashtables.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/inet6_hashtables.c	2007-12-19 22:49:20.000000000 -0500
+@@ -61,7 +61,7 @@
+ 					   const __be16 sport,
+ 					   const struct in6_addr *daddr,
+ 					   const u16 hnum,
+-					   const int dif)
++					   const int dif, struct net *net)
+ {
+ 	struct sock *sk;
+ 	const struct hlist_node *node;
+@@ -105,7 +105,7 @@
+ 
+ struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
+ 				   const struct in6_addr *daddr,
+-				   const unsigned short hnum, const int dif)
++				   const unsigned short hnum, const int dif, struct net *net)
+ {
+ 	struct sock *sk;
+ 	const struct hlist_node *node;
+@@ -113,7 +113,7 @@
+ 	int score, hiscore = 0;
+ 
+ 	read_lock(&hashinfo->lhash_lock);
+-	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) {
++	sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(net, hnum)]) {
+ 		if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
+ 			const struct ipv6_pinfo *np = inet6_sk(sk);
+ 
+@@ -152,12 +152,12 @@
+ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
+ 			  const struct in6_addr *saddr, const __be16 sport,
+ 			  const struct in6_addr *daddr, const __be16 dport,
+-			  const int dif)
++			  const int dif, struct net *net)
+ {
+ 	struct sock *sk;
+ 
+ 	local_bh_disable();
+-	sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
++	sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif, net);
+ 	local_bh_enable();
+ 
+ 	return sk;
+@@ -251,6 +251,7 @@
+ int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+ 		       struct sock *sk)
+ {
++	struct net *net = sk->sk_net;
+ 	struct inet_hashinfo *hinfo = death_row->hashinfo;
+ 	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+@@ -258,8 +259,8 @@
+ 	int ret;
+ 
+ 	if (snum == 0) {
+-		const int low = sysctl_local_port_range[0];
+-		const int high = sysctl_local_port_range[1];
++		const int low = sk->sk_net->sysctl_local_port_range[0];
++		const int high = sk->sk_net->sysctl_local_port_range[1];
+ 		const int range = high - low;
+ 		int i, port;
+ 		static u32 hint;
+@@ -270,7 +271,7 @@
+ 		local_bh_disable();
+ 		for (i = 1; i <= range; i++) {
+ 			port = low + (i + offset) % range;
+-			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
++			head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+ 
+ 			/* Does not bother with rcv_saddr checks,
+@@ -278,7 +279,7 @@
+ 			 * unique enough.
+ 			 */
+ 			inet_bind_bucket_for_each(tb, node, &head->chain) {
+-				if (tb->port == port) {
++				if ((tb->port == port) && (tb->net == net)) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+@@ -291,7 +292,7 @@
+ 			}
+ 
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+-						     head, port);
++						     head, net, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+@@ -326,7 +327,7 @@
+ 		goto out;
+ 	}
+ 
+-	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
++	head = &hinfo->bhash[inet_bhashfn(net, snum, hinfo->bhash_size)];
+ 	tb   = inet_csk(sk)->icsk_bind_hash;
+ 	spin_lock_bh(&head->lock);
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_fib.c linux-2.6.22-try2-netns/net/ipv6/ip6_fib.c
+--- linux-2.6.22-try2/net/ipv6/ip6_fib.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ip6_fib.c	2007-12-19 22:49:20.000000000 -0500
+@@ -361,6 +361,7 @@
+ 
+ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	unsigned int h, s_h;
+ 	unsigned int e = 0, s_e;
+ 	struct rt6_rtnl_dump_arg arg;
+@@ -369,6 +370,9 @@
+ 	struct hlist_node *node;
+ 	int res = 0;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	s_h = cb->args[0];
+ 	s_e = cb->args[1];
+ 
+@@ -1311,6 +1315,11 @@
+ 
+ static int fib6_clean_node(struct fib6_walker_t *w)
+ {
++	struct nl_info info = {
++		.nlh = NULL,
++		.pid = 0,
++		.net = &init_net,
++	};
+ 	int res;
+ 	struct rt6_info *rt;
+ 	struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
+@@ -1319,7 +1328,7 @@
+ 		res = c->func(rt, c->arg);
+ 		if (res < 0) {
+ 			w->leaf = rt;
+-			res = fib6_del(rt, NULL);
++			res = fib6_del(rt, &info);
+ 			if (res) {
+ #if RT6_DEBUG >= 2
+ 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_flowlabel.c linux-2.6.22-try2-netns/net/ipv6/ip6_flowlabel.c
+--- linux-2.6.22-try2/net/ipv6/ip6_flowlabel.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ip6_flowlabel.c	2007-12-19 22:49:20.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/seq_file.h>
+ 
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/ipv6.h>
+ #include <net/ndisc.h>
+@@ -309,6 +310,7 @@
+ 
+ 		msg.msg_controllen = olen;
+ 		msg.msg_control = (void*)(fl->opt+1);
++		flowi.fl_net = &init_net;
+ 		flowi.oif = 0;
+ 
+ 		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+@@ -690,7 +692,7 @@
+ void ip6_flowlabel_init(void)
+ {
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
++	proc_net_fops_create(&init_net, "ip6_flowlabel", S_IRUGO, &ip6fl_seq_fops);
+ #endif
+ }
+ 
+@@ -698,6 +700,6 @@
+ {
+ 	del_timer(&ip6_fl_gc_timer);
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("ip6_flowlabel");
++	proc_net_remove(&init_net, "ip6_flowlabel");
+ #endif
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_input.c linux-2.6.22-try2-netns/net/ipv6/ip6_input.c
+--- linux-2.6.22-try2/net/ipv6/ip6_input.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ip6_input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -61,6 +61,11 @@
+ 	u32 		pkt_len;
+ 	struct inet6_dev *idev;
+ 
++	if (dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
++
+ 	if (skb->pkt_type == PACKET_OTHERHOST) {
+ 		kfree_skb(skb);
+ 		return 0;
+diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_output.c linux-2.6.22-try2-netns/net/ipv6/ip6_output.c
+--- linux-2.6.22-try2/net/ipv6/ip6_output.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ip6_output.c	2007-12-19 22:49:20.000000000 -0500
+@@ -423,7 +423,7 @@
+ 
+ 	/* XXX: idev->cnf.proxy_ndp? */
+ 	if (ipv6_devconf.proxy_ndp &&
+-	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
++	    pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
+ 		int proxied = ip6_forward_proxy_check(skb);
+ 		if (proxied > 0)
+ 			return ip6_input(skb);
+diff -Nurb linux-2.6.22-try2/net/ipv6/ip6_tunnel.c linux-2.6.22-try2-netns/net/ipv6/ip6_tunnel.c
+--- linux-2.6.22-try2/net/ipv6/ip6_tunnel.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ip6_tunnel.c	2007-12-19 22:49:20.000000000 -0500
+@@ -235,7 +235,7 @@
+ 		int i;
+ 		for (i = 1; i < IP6_TNL_MAX; i++) {
+ 			sprintf(name, "ip6tnl%d", i);
+-			if (__dev_get_by_name(name) == NULL)
++			if (__dev_get_by_name(&init_net, name) == NULL)
+ 				break;
+ 		}
+ 		if (i == IP6_TNL_MAX)
+@@ -651,7 +651,7 @@
+ 		struct net_device *ldev = NULL;
+ 
+ 		if (p->link)
+-			ldev = dev_get_by_index(p->link);
++			ldev = dev_get_by_index(&init_net, p->link);
+ 
+ 		if ((ipv6_addr_is_multicast(&p->laddr) ||
+ 		     likely(ipv6_chk_addr(&p->laddr, ldev, 0))) &&
+@@ -787,7 +787,7 @@
+ 		struct net_device *ldev = NULL;
+ 
+ 		if (p->link)
+-			ldev = dev_get_by_index(p->link);
++			ldev = dev_get_by_index(&init_net, p->link);
+ 
+ 		if (unlikely(!ipv6_chk_addr(&p->laddr, ldev, 0)))
+ 			printk(KERN_WARNING
+diff -Nurb linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c linux-2.6.22-try2-netns/net/ipv6/ipv6_sockglue.c
+--- linux-2.6.22-try2/net/ipv6/ipv6_sockglue.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ipv6_sockglue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -463,6 +463,7 @@
+ 		struct flowi fl;
+ 		int junk;
+ 
++		fl.fl_net = &init_net;
+ 		fl.fl6_flowlabel = 0;
+ 		fl.oif = sk->sk_bound_dev_if;
+ 
+@@ -547,7 +548,7 @@
+ 		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != val)
+ 			goto e_inval;
+ 
+-		if (__dev_get_by_index(val) == NULL) {
++		if (__dev_get_by_index(&init_net, val) == NULL) {
+ 			retv = -ENODEV;
+ 			break;
+ 		}
+diff -Nurb linux-2.6.22-try2/net/ipv6/mcast.c linux-2.6.22-try2-netns/net/ipv6/mcast.c
+--- linux-2.6.22-try2/net/ipv6/mcast.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/mcast.c	2007-12-19 22:49:20.000000000 -0500
+@@ -51,6 +51,7 @@
+ 
+ #include <net/sock.h>
+ #include <net/snmp.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/ipv6.h>
+ #include <net/protocol.h>
+@@ -214,7 +215,7 @@
+ 			dst_release(&rt->u.dst);
+ 		}
+ 	} else
+-		dev = dev_get_by_index(ifindex);
++		dev = dev_get_by_index(&init_net, ifindex);
+ 
+ 	if (dev == NULL) {
+ 		sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
+@@ -265,7 +266,7 @@
+ 			*lnk = mc_lst->next;
+ 			write_unlock_bh(&ipv6_sk_mc_lock);
+ 
+-			if ((dev = dev_get_by_index(mc_lst->ifindex)) != NULL) {
++			if ((dev = dev_get_by_index(&init_net, mc_lst->ifindex)) != NULL) {
+ 				struct inet6_dev *idev = in6_dev_get(dev);
+ 
+ 				(void) ip6_mc_leave_src(sk, mc_lst, idev);
+@@ -300,7 +301,7 @@
+ 			dst_release(&rt->u.dst);
+ 		}
+ 	} else
+-		dev = dev_get_by_index(ifindex);
++		dev = dev_get_by_index(&init_net, ifindex);
+ 
+ 	if (!dev)
+ 		return NULL;
+@@ -331,7 +332,7 @@
+ 		np->ipv6_mc_list = mc_lst->next;
+ 		write_unlock_bh(&ipv6_sk_mc_lock);
+ 
+-		dev = dev_get_by_index(mc_lst->ifindex);
++		dev = dev_get_by_index(&init_net, mc_lst->ifindex);
+ 		if (dev) {
+ 			struct inet6_dev *idev = in6_dev_get(dev);
+ 
+@@ -2332,7 +2333,7 @@
+ 	struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq);
+ 
+ 	state->idev = NULL;
+-	for_each_netdev(state->dev) {
++	for_each_netdev(&init_net, state->dev) {
+ 		struct inet6_dev *idev;
+ 		idev = in6_dev_get(state->dev);
+ 		if (!idev)
+@@ -2476,7 +2477,7 @@
+ 
+ 	state->idev = NULL;
+ 	state->im = NULL;
+-	for_each_netdev(state->dev) {
++	for_each_netdev(&init_net, state->dev) {
+ 		struct inet6_dev *idev;
+ 		idev = in6_dev_get(state->dev);
+ 		if (unlikely(idev == NULL))
+@@ -2658,8 +2659,8 @@
+ 	np->hop_limit = 1;
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("igmp6", S_IRUGO, &igmp6_mc_seq_fops);
+-	proc_net_fops_create("mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
++	proc_net_fops_create(&init_net, "igmp6", S_IRUGO, &igmp6_mc_seq_fops);
++	proc_net_fops_create(&init_net, "mcfilter6", S_IRUGO, &igmp6_mcf_seq_fops);
+ #endif
+ 
+ 	return 0;
+@@ -2671,7 +2672,7 @@
+ 	igmp6_socket = NULL; /* for safety */
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("mcfilter6");
+-	proc_net_remove("igmp6");
++	proc_net_remove(&init_net, "mcfilter6");
++	proc_net_remove(&init_net, "igmp6");
+ #endif
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/ndisc.c linux-2.6.22-try2-netns/net/ipv6/ndisc.c
+--- linux-2.6.22-try2/net/ipv6/ndisc.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/ndisc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -418,6 +418,7 @@
+ 			    int oif)
+ {
+ 	memset(fl, 0, sizeof(*fl));
++	fl->fl_net = &init_net;
+ 	ipv6_addr_copy(&fl->fl6_src, saddr);
+ 	ipv6_addr_copy(&fl->fl6_dst, daddr);
+ 	fl->proto	 	= IPPROTO_ICMPV6;
+@@ -760,7 +761,7 @@
+ 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
+ 		    (idev->cnf.forwarding &&
+ 		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
+-		     (pneigh = pneigh_lookup(&nd_tbl,
++		     (pneigh = pneigh_lookup(&nd_tbl, &init_net,
+ 					     &msg->target, dev, 0)) != NULL)) {
+ 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
+ 			    skb->pkt_type != PACKET_HOST &&
+@@ -901,7 +902,7 @@
+ 		 */
+ 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
+ 		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
+-		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
++		    pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
+ 			/* XXX: idev->cnf.prixy_ndp */
+ 			goto out;
+ 		}
+@@ -1525,6 +1526,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 	case NETDEV_CHANGEADDR:
+ 		neigh_changeaddr(&nd_tbl, dev);
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6_queue.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_queue.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6_queue.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_queue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+@@ -546,6 +547,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* Drop any packets associated with the downed device */
+ 	if (event == NETDEV_DOWN)
+ 		ipq_dev_drop(dev->ifindex);
+@@ -565,7 +569,7 @@
+ 	if (event == NETLINK_URELEASE &&
+ 	    n->protocol == NETLINK_IP6_FW && n->pid) {
+ 		write_lock_bh(&queue_lock);
+-		if (n->pid == peer_pid)
++		if ((n->net == &init_net) && (n->pid == peer_pid))
+ 			__ipq_reset();
+ 		write_unlock_bh(&queue_lock);
+ 	}
+@@ -657,14 +661,14 @@
+ 	struct proc_dir_entry *proc;
+ 
+ 	netlink_register_notifier(&ipq_nl_notifier);
+-	ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, NULL,
+-				      THIS_MODULE);
++	ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, ipq_rcv_sk,
++					NULL, THIS_MODULE);
+ 	if (ipqnl == NULL) {
+ 		printk(KERN_ERR "ip6_queue: failed to create netlink socket\n");
+ 		goto cleanup_netlink_notifier;
+ 	}
+ 
+-	proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
++	proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info);
+ 	if (proc)
+ 		proc->owner = THIS_MODULE;
+ 	else {
+@@ -685,7 +689,7 @@
+ cleanup_sysctl:
+ 	unregister_sysctl_table(ipq_sysctl_header);
+ 	unregister_netdevice_notifier(&ipq_dev_notifier);
+-	proc_net_remove(IPQ_PROC_FS_NAME);
++	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+ 
+ cleanup_ipqnl:
+ 	sock_release(ipqnl->sk_socket);
+@@ -705,7 +709,7 @@
+ 
+ 	unregister_sysctl_table(ipq_sysctl_header);
+ 	unregister_netdevice_notifier(&ipq_dev_notifier);
+-	proc_net_remove(IPQ_PROC_FS_NAME);
++	proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
+ 
+ 	sock_release(ipqnl->sk_socket);
+ 	mutex_lock(&ipqnl_mutex);
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6_tables.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_tables.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6_tables.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6_tables.c	2007-12-19 22:49:20.000000000 -0500
+@@ -906,7 +906,7 @@
+ 	int ret;
+ 	struct xt_table *t;
+ 
+-	t = xt_find_table_lock(AF_INET6, entries->name);
++	t = xt_find_table_lock(&init_net, AF_INET6, entries->name);
+ 	if (t && !IS_ERR(t)) {
+ 		struct xt_table_info *private = t->private;
+ 		duprintf("t->private->number = %u\n", private->number);
+@@ -972,7 +972,7 @@
+ 
+ 	duprintf("ip_tables: Translated table\n");
+ 
+-	t = try_then_request_module(xt_find_table_lock(AF_INET6, tmp.name),
++	t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, tmp.name),
+ 				    "ip6table_%s", tmp.name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+@@ -1073,7 +1073,7 @@
+ 		goto free;
+ 	}
+ 
+-	t = xt_find_table_lock(AF_INET6, tmp.name);
++	t = xt_find_table_lock(&init_net, AF_INET6, tmp.name);
+ 	if (!t || IS_ERR(t)) {
+ 		ret = t ? PTR_ERR(t) : -ENOENT;
+ 		goto free;
+@@ -1109,6 +1109,9 @@
+ {
+ 	int ret;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -1134,6 +1137,9 @@
+ {
+ 	int ret;
+ 
++	if (sk->sk_net != &init_net)
++		return -ENOPROTOOPT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+@@ -1155,7 +1161,7 @@
+ 		}
+ 		name[IP6T_TABLE_MAXNAMELEN-1] = '\0';
+ 
+-		t = try_then_request_module(xt_find_table_lock(AF_INET6, name),
++		t = try_then_request_module(xt_find_table_lock(&init_net, AF_INET6, name),
+ 					    "ip6table_%s", name);
+ 		if (t && !IS_ERR(t)) {
+ 			struct ip6t_getinfo info;
+@@ -1259,7 +1265,7 @@
+ 		return ret;
+ 	}
+ 
+-	ret = xt_register_table(table, &bootstrap, newinfo);
++	ret = xt_register_table(&init_net, table, &bootstrap, newinfo);
+ 	if (ret != 0) {
+ 		xt_free_table_info(newinfo);
+ 		return ret;
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6t_REJECT.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6t_REJECT.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6t_REJECT.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6t_REJECT.c	2007-12-19 22:49:20.000000000 -0500
+@@ -92,6 +92,7 @@
+ 	}
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = IPPROTO_TCP;
+ 	ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr);
+ 	ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr);
+@@ -172,7 +173,7 @@
+ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum)
+ {
+ 	if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL)
+-		skb_in->dev = &loopback_dev;
++		skb_in->dev = &init_net.loopback_dev;
+ 
+ 	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL);
+ }
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_filter.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_filter.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_filter.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_filter.c	2007-12-19 22:49:20.000000000 -0500
+@@ -65,6 +65,10 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ip6t_do_table(pskb, hook, in, out, &packet_filter);
+ }
+ 
+@@ -75,6 +79,10 @@
+ 		   const struct net_device *out,
+ 		   int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ #if 0
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr)
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_mangle.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_mangle.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_mangle.c	2007-12-19 22:49:20.000000000 -0500
+@@ -79,6 +79,10 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ip6t_do_table(pskb, hook, in, out, &packet_mangler);
+ }
+ 
+@@ -95,6 +99,10 @@
+ 	u_int8_t hop_limit;
+ 	u_int32_t flowlabel, mark;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ #if 0
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct iphdr)
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/ip6table_raw.c linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_raw.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/ip6table_raw.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/ip6table_raw.c	2007-12-19 22:49:20.000000000 -0500
+@@ -57,6 +57,10 @@
+ 	 const struct net_device *out,
+ 	 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return ip6t_do_table(pskb, hook, in, out, &packet_raw);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.22-try2-netns/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+--- linux-2.6.22-try2/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c	2007-12-19 22:49:20.000000000 -0500
+@@ -167,6 +167,10 @@
+ 	unsigned char pnum = ipv6_hdr(*pskb)->nexthdr;
+ 
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* This is where we call the helper: as the packet goes out. */
+ 	ct = nf_ct_get(*pskb, &ctinfo);
+ 	if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
+@@ -203,6 +207,10 @@
+ {
+ 	struct sk_buff *reasm;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* Previously seen (loopback)?  */
+ 	if ((*pskb)->nfct)
+ 		return NF_ACCEPT;
+@@ -231,6 +239,10 @@
+ {
+ 	struct sk_buff *reasm = (*pskb)->nfct_reasm;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* This packet is fragmented and has reassembled packet. */
+ 	if (reasm) {
+ 		/* Reassembled packet isn't parsed yet ? */
+@@ -256,6 +268,10 @@
+ 					 const struct net_device *out,
+ 					 int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	/* root is playing with raw sockets. */
+ 	if ((*pskb)->len < sizeof(struct ipv6hdr)) {
+ 		if (net_ratelimit())
+diff -Nurb linux-2.6.22-try2/net/ipv6/netfilter.c linux-2.6.22-try2-netns/net/ipv6/netfilter.c
+--- linux-2.6.22-try2/net/ipv6/netfilter.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/netfilter.c	2007-12-19 22:49:20.000000000 -0500
+@@ -14,6 +14,7 @@
+ 	struct ipv6hdr *iph = ipv6_hdr(skb);
+ 	struct dst_entry *dst;
+ 	struct flowi fl = {
++		.fl_net = &init_net,
+ 		.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
+ 		.mark = skb->mark,
+ 		.nl_u =
+diff -Nurb linux-2.6.22-try2/net/ipv6/proc.c linux-2.6.22-try2-netns/net/ipv6/proc.c
+--- linux-2.6.22-try2/net/ipv6/proc.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/proc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <net/tcp.h>
+ #include <net/transp_v6.h>
+ #include <net/ipv6.h>
++#include <net/net_namespace.h>
+ 
+ static struct proc_dir_entry *proc_net_devsnmp6;
+ 
+@@ -231,22 +232,22 @@
+ {
+ 	int rc = 0;
+ 
+-	if (!proc_net_fops_create("snmp6", S_IRUGO, &snmp6_seq_fops))
++	if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops))
+ 		goto proc_snmp6_fail;
+ 
+-	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", proc_net);
++	proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net);
+ 	if (!proc_net_devsnmp6)
+ 		goto proc_dev_snmp6_fail;
+ 
+-	if (!proc_net_fops_create("sockstat6", S_IRUGO, &sockstat6_seq_fops))
++	if (!proc_net_fops_create(&init_net, "sockstat6", S_IRUGO, &sockstat6_seq_fops))
+ 		goto proc_sockstat6_fail;
+ out:
+ 	return rc;
+ 
+ proc_sockstat6_fail:
+-	proc_net_remove("dev_snmp6");
++	proc_net_remove(&init_net, "dev_snmp6");
+ proc_dev_snmp6_fail:
+-	proc_net_remove("snmp6");
++	proc_net_remove(&init_net, "snmp6");
+ proc_snmp6_fail:
+ 	rc = -ENOMEM;
+ 	goto out;
+@@ -254,8 +255,8 @@
+ 
+ void ipv6_misc_proc_exit(void)
+ {
+-	proc_net_remove("sockstat6");
+-	proc_net_remove("dev_snmp6");
+-	proc_net_remove("snmp6");
++	proc_net_remove(&init_net, "sockstat6");
++	proc_net_remove(&init_net, "dev_snmp6");
++	proc_net_remove(&init_net, "snmp6");
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/raw.c linux-2.6.22-try2-netns/net/ipv6/raw.c
+--- linux-2.6.22-try2/net/ipv6/raw.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/raw.c	2007-12-19 22:49:20.000000000 -0500
+@@ -49,6 +49,7 @@
+ #include <net/udp.h>
+ #include <net/inet_common.h>
+ #include <net/tcp_states.h>
++#include <net/net_namespace.h>
+ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
+ #include <net/mip6.h>
+ #endif
+@@ -282,7 +283,7 @@
+ 			if (!sk->sk_bound_dev_if)
+ 				goto out;
+ 
+-			dev = dev_get_by_index(sk->sk_bound_dev_if);
++			dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ 			if (!dev) {
+ 				err = -ENODEV;
+ 				goto out;
+@@ -728,6 +729,7 @@
+ 	 *	Get and verify the address.
+ 	 */
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 
+ 	if (sin6) {
+ 		if (addr_len < SIN6_LEN_RFC2133)
+@@ -1315,13 +1317,13 @@
+ 
+ int __init raw6_proc_init(void)
+ {
+-	if (!proc_net_fops_create("raw6", S_IRUGO, &raw6_seq_fops))
++	if (!proc_net_fops_create(&init_net, "raw6", S_IRUGO, &raw6_seq_fops))
+ 		return -ENOMEM;
+ 	return 0;
+ }
+ 
+ void raw6_proc_exit(void)
+ {
+-	proc_net_remove("raw6");
++	proc_net_remove(&init_net, "raw6");
+ }
+ #endif	/* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/ipv6/reassembly.c linux-2.6.22-try2-netns/net/ipv6/reassembly.c
+--- linux-2.6.22-try2/net/ipv6/reassembly.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/reassembly.c	2007-12-19 22:49:20.000000000 -0500
+@@ -301,7 +301,7 @@
+ 
+ 	fq_kill(fq);
+ 
+-	dev = dev_get_by_index(fq->iif);
++	dev = dev_get_by_index(&init_net, fq->iif);
+ 	if (!dev)
+ 		goto out;
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/route.c linux-2.6.22-try2-netns/net/ipv6/route.c
+--- linux-2.6.22-try2/net/ipv6/route.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/route.c	2007-12-19 22:49:20.000000000 -0500
+@@ -56,6 +56,7 @@
+ #include <net/xfrm.h>
+ #include <net/netevent.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -137,7 +138,7 @@
+ 		.dst = {
+ 			.__refcnt	= ATOMIC_INIT(1),
+ 			.__use		= 1,
+-			.dev		= &loopback_dev,
++			.dev		= NULL,
+ 			.obsolete	= -1,
+ 			.error		= -ENETUNREACH,
+ 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -163,7 +164,7 @@
+ 		.dst = {
+ 			.__refcnt	= ATOMIC_INIT(1),
+ 			.__use		= 1,
+-			.dev		= &loopback_dev,
++			.dev		= NULL,
+ 			.obsolete	= -1,
+ 			.error		= -EACCES,
+ 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -183,7 +184,7 @@
+ 		.dst = {
+ 			.__refcnt	= ATOMIC_INIT(1),
+ 			.__use		= 1,
+-			.dev		= &loopback_dev,
++			.dev		= NULL,
+ 			.obsolete	= -1,
+ 			.error		= -EINVAL,
+ 			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
+@@ -223,8 +224,8 @@
+ 	struct rt6_info *rt = (struct rt6_info *)dst;
+ 	struct inet6_dev *idev = rt->rt6i_idev;
+ 
+-	if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
+-		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
++	if (dev != &init_net.loopback_dev && idev != NULL && idev->dev == dev) {
++		struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev);
+ 		if (loopback_idev != NULL) {
+ 			rt->rt6i_idev = loopback_idev;
+ 			in6_dev_put(idev);
+@@ -564,6 +565,7 @@
+ 			    int oif, int strict)
+ {
+ 	struct flowi fl = {
++		.fl_net = &init_net,
+ 		.oif = oif,
+ 		.nl_u = {
+ 			.ip6_u = {
+@@ -611,7 +613,12 @@
+ 
+ int ip6_ins_rt(struct rt6_info *rt)
+ {
+-	return __ip6_ins_rt(rt, NULL);
++	struct nl_info info = {
++		.nlh = NULL,
++		.pid = 0,
++		.net = &init_net,
++	};
++	return __ip6_ins_rt(rt, &info);
+ }
+ 
+ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
+@@ -742,6 +749,7 @@
+ 	struct ipv6hdr *iph = ipv6_hdr(skb);
+ 	int flags = RT6_LOOKUP_F_HAS_SADDR;
+ 	struct flowi fl = {
++		.fl_net = &init_net,
+ 		.iif = skb->dev->ifindex,
+ 		.nl_u = {
+ 			.ip6_u = {
+@@ -1129,7 +1137,7 @@
+ #endif
+ 	if (cfg->fc_ifindex) {
+ 		err = -ENODEV;
+-		dev = dev_get_by_index(cfg->fc_ifindex);
++		dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
+ 		if (!dev)
+ 			goto out;
+ 		idev = in6_dev_get(dev);
+@@ -1187,12 +1195,12 @@
+ 	if ((cfg->fc_flags & RTF_REJECT) ||
+ 	    (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
+ 		/* hold loopback dev/idev if we haven't done so. */
+-		if (dev != &loopback_dev) {
++		if (dev != &init_net.loopback_dev) {
+ 			if (dev) {
+ 				dev_put(dev);
+ 				in6_dev_put(idev);
+ 			}
+-			dev = &loopback_dev;
++			dev = &init_net.loopback_dev;
+ 			dev_hold(dev);
+ 			idev = in6_dev_get(dev);
+ 			if (!idev) {
+@@ -1333,7 +1341,12 @@
+ 
+ int ip6_del_rt(struct rt6_info *rt)
+ {
+-	return __ip6_del_rt(rt, NULL);
++	struct nl_info info = {
++		.nlh = NULL,
++		.pid = 0,
++		.net = &init_net,
++	};
++	return __ip6_del_rt(rt, &info);
+ }
+ 
+ static int ip6_route_del(struct fib6_config *cfg)
+@@ -1444,6 +1457,7 @@
+ 	int flags = RT6_LOOKUP_F_HAS_SADDR;
+ 	struct ip6rd_flowi rdfl = {
+ 		.fl = {
++			.fl_net = &init_net,
+ 			.oif = dev->ifindex,
+ 			.nl_u = {
+ 				.ip6_u = {
+@@ -1896,13 +1910,13 @@
+ 	if (rt == NULL)
+ 		return ERR_PTR(-ENOMEM);
+ 
+-	dev_hold(&loopback_dev);
++	dev_hold(&init_net.loopback_dev);
+ 	in6_dev_hold(idev);
+ 
+ 	rt->u.dst.flags = DST_HOST;
+ 	rt->u.dst.input = ip6_input;
+ 	rt->u.dst.output = ip6_output;
+-	rt->rt6i_dev = &loopback_dev;
++	rt->rt6i_dev = &init_net.loopback_dev;
+ 	rt->rt6i_idev = idev;
+ 	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
+ 	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
+@@ -2033,6 +2047,7 @@
+ 
+ 	cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
+ 	cfg->fc_nlinfo.nlh = nlh;
++	cfg->fc_nlinfo.net = skb->sk->sk_net;
+ 
+ 	if (tb[RTA_GATEWAY]) {
+ 		nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
+@@ -2078,9 +2093,13 @@
+ 
+ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib6_config cfg;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = rtm_to_fib6_config(skb, nlh, &cfg);
+ 	if (err < 0)
+ 		return err;
+@@ -2090,9 +2109,13 @@
+ 
+ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct fib6_config cfg;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = rtm_to_fib6_config(skb, nlh, &cfg);
+ 	if (err < 0)
+ 		return err;
+@@ -2227,6 +2250,7 @@
+ 
+ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
+ {
++	struct net *net = in_skb->sk->sk_net;
+ 	struct nlattr *tb[RTA_MAX+1];
+ 	struct rt6_info *rt;
+ 	struct sk_buff *skb;
+@@ -2234,12 +2258,16 @@
+ 	struct flowi fl;
+ 	int err, iif = 0;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
+ 	if (err < 0)
+ 		goto errout;
+ 
+ 	err = -EINVAL;
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 
+ 	if (tb[RTA_SRC]) {
+ 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
+@@ -2263,7 +2291,7 @@
+ 
+ 	if (iif) {
+ 		struct net_device *dev;
+-		dev = __dev_get_by_index(iif);
++		dev = __dev_get_by_index(&init_net, iif);
+ 		if (!dev) {
+ 			err = -ENODEV;
+ 			goto errout;
+@@ -2293,7 +2321,7 @@
+ 		goto errout;
+ 	}
+ 
+-	err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
++	err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
+ errout:
+ 	return err;
+ }
+@@ -2301,17 +2329,10 @@
+ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
+ {
+ 	struct sk_buff *skb;
+-	u32 pid = 0, seq = 0;
+-	struct nlmsghdr *nlh = NULL;
++	u32 pid = info->pid, seq = info->nlh ? info->nlh->nlmsg_seq : 0;
++	struct nlmsghdr *nlh = info->nlh;
+ 	int err = -ENOBUFS;
+ 
+-	if (info) {
+-		pid = info->pid;
+-		nlh = info->nlh;
+-		if (nlh)
+-			seq = nlh->nlmsg_seq;
+-	}
+-
+ 	skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ 	if (skb == NULL)
+ 		goto errout;
+@@ -2323,10 +2344,10 @@
+ 		kfree_skb(skb);
+ 		goto errout;
+ 	}
+-	err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
++	err = rtnl_notify(skb, &init_net, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
+ errout:
+ 	if (err < 0)
+-		rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
++		rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
+ }
+ 
+ /*
+@@ -2558,13 +2579,19 @@
+ 				  SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ 	ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
+ 
++	/* Perform the initialization we can't perform at compile time */
++	ip6_null_entry.u.dst.dev = &init_net.loopback_dev;
++#ifdef CONFIG_IPV6_MULTIPLE_TABLES
++	ip6_prohibit_entry.u.dst.dev = &init_net.loopback_dev;
++	ip6_blk_hole_entry.u.dst.dev = &init_net.loopback_dev;
++#endif	
+ 	fib6_init();
+ #ifdef 	CONFIG_PROC_FS
+-	p = proc_net_create("ipv6_route", 0, rt6_proc_info);
++	p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info);
+ 	if (p)
+ 		p->owner = THIS_MODULE;
+ 
+-	proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
++	proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
+ #endif
+ #ifdef CONFIG_XFRM
+ 	xfrm6_init();
+@@ -2584,8 +2611,8 @@
+ 	fib6_rules_cleanup();
+ #endif
+ #ifdef CONFIG_PROC_FS
+-	proc_net_remove("ipv6_route");
+-	proc_net_remove("rt6_stats");
++	proc_net_remove(&init_net, "ipv6_route");
++	proc_net_remove(&init_net, "rt6_stats");
+ #endif
+ #ifdef CONFIG_XFRM
+ 	xfrm6_fini();
+diff -Nurb linux-2.6.22-try2/net/ipv6/sit.c linux-2.6.22-try2-netns/net/ipv6/sit.c
+--- linux-2.6.22-try2/net/ipv6/sit.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/sit.c	2007-12-19 22:49:20.000000000 -0500
+@@ -167,7 +167,7 @@
+ 		int i;
+ 		for (i=1; i<100; i++) {
+ 			sprintf(name, "sit%d", i);
+-			if (__dev_get_by_name(name) == NULL)
++			if (__dev_get_by_name(&init_net, name) == NULL)
+ 				break;
+ 		}
+ 		if (i==100)
+@@ -283,6 +283,9 @@
+ 	struct sk_buff *skb2;
+ 	struct rt6_info *rt6i;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (len < hlen + sizeof(struct ipv6hdr))
+ 		return;
+ 	iph6 = (struct ipv6hdr*)(dp + hlen);
+@@ -369,6 +372,10 @@
+ 	struct iphdr *iph;
+ 	struct ip_tunnel *tunnel;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ 		goto out;
+ 
+@@ -474,7 +481,8 @@
+ 	}
+ 
+ 	{
+-		struct flowi fl = { .nl_u = { .ip4_u =
++		struct flowi fl = { .fl_net = &init_net,
++				    .nl_u = { .ip4_u =
+ 					      { .daddr = dst,
+ 						.saddr = tiph->saddr,
+ 						.tos = RT_TOS(tos) } },
+@@ -745,7 +753,8 @@
+ 	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
+ 
+ 	if (iph->daddr) {
+-		struct flowi fl = { .nl_u = { .ip4_u =
++		struct flowi fl = { .fl_net = &init_net,
++				    .nl_u = { .ip4_u =
+ 					      { .daddr = iph->daddr,
+ 						.saddr = iph->saddr,
+ 						.tos = RT_TOS(iph->tos) } },
+@@ -760,7 +769,7 @@
+ 	}
+ 
+ 	if (!tdev && tunnel->parms.link)
+-		tdev = __dev_get_by_index(tunnel->parms.link);
++		tdev = __dev_get_by_index(&init_net, tunnel->parms.link);
+ 
+ 	if (tdev) {
+ 		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
+diff -Nurb linux-2.6.22-try2/net/ipv6/tcp_ipv6.c linux-2.6.22-try2-netns/net/ipv6/tcp_ipv6.c
+--- linux-2.6.22-try2/net/ipv6/tcp_ipv6.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/tcp_ipv6.c	2007-12-19 22:49:20.000000000 -0500
+@@ -143,6 +143,7 @@
+ 		return(-EAFNOSUPPORT);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 
+ 	if (np->sndflow) {
+ 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+@@ -330,6 +331,7 @@
+ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ 		int type, int code, int offset, __be32 info)
+ {
++	struct net *net = skb->dev->nd_net;
+ 	struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
+ 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
+ 	struct ipv6_pinfo *np;
+@@ -339,7 +341,7 @@
+ 	__u32 seq;
+ 
+ 	sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
+-			  th->source, skb->dev->ifindex);
++			  th->source, skb->dev->ifindex, net);
+ 
+ 	if (sk == NULL) {
+ 		ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+@@ -388,6 +390,7 @@
+ 			   for now.
+ 			 */
+ 			memset(&fl, 0, sizeof(fl));
++			fl.fl_net = &init_net;
+ 			fl.proto = IPPROTO_TCP;
+ 			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ 			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+@@ -481,6 +484,7 @@
+ 	int err = -1;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	fl.proto = IPPROTO_TCP;
+ 	ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+ 	ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
+@@ -1066,6 +1070,7 @@
+ 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
+ 
+@@ -1167,6 +1172,7 @@
+ 	buff->csum = csum_partial((char *)t1, tot_len, 0);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
+ 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
+ 
+@@ -1224,7 +1230,8 @@
+ 
+ 	nsk = __inet6_lookup_established(&tcp_hashinfo, &ipv6_hdr(skb)->saddr,
+ 					 th->source, &ipv6_hdr(skb)->daddr,
+-					 ntohs(th->dest), inet6_iif(skb));
++					 ntohs(th->dest), inet6_iif(skb),
++					 sk->sk_net);
+ 
+ 	if (nsk) {
+ 		if (nsk->sk_state != TCP_TIME_WAIT) {
+@@ -1414,6 +1421,7 @@
+ 		struct flowi fl;
+ 
+ 		memset(&fl, 0, sizeof(fl));
++		fl.fl_net = &init_net;
+ 		fl.proto = IPPROTO_TCP;
+ 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
+ 		if (opt && opt->srcrt) {
+@@ -1700,6 +1708,7 @@
+ static int tcp_v6_rcv(struct sk_buff **pskb)
+ {
+ 	struct sk_buff *skb = *pskb;
++	struct net *net = skb->dev->nd_net;
+ 	struct tcphdr *th;
+ 	struct sock *sk;
+ 	int ret;
+@@ -1736,7 +1745,7 @@
+ 
+ 	sk = __inet6_lookup(&tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source,
+ 			    &ipv6_hdr(skb)->daddr, ntohs(th->dest),
+-			    inet6_iif(skb));
++			    inet6_iif(skb), net);
+ 
+ 	if (!sk)
+ 		goto no_tcp_socket;
+@@ -1816,7 +1825,8 @@
+ 
+ 		sk2 = inet6_lookup_listener(&tcp_hashinfo,
+ 					    &ipv6_hdr(skb)->daddr,
+-					    ntohs(th->dest), inet6_iif(skb));
++					    ntohs(th->dest), inet6_iif(skb),
++					    net);
+ 		if (sk2 != NULL) {
+ 			struct inet_timewait_sock *tw = inet_twsk(sk);
+ 			inet_twsk_deschedule(tw, &tcp_death_row);
+@@ -2121,12 +2131,12 @@
+ 
+ int __init tcp6_proc_init(void)
+ {
+-	return tcp_proc_register(&tcp6_seq_afinfo);
++	return tcp_proc_register(&init_net, &tcp6_seq_afinfo);
+ }
+ 
+ void tcp6_proc_exit(void)
+ {
+-	tcp_proc_unregister(&tcp6_seq_afinfo);
++	tcp_proc_unregister(&init_net, &tcp6_seq_afinfo);
+ }
+ #endif
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/udp.c linux-2.6.22-try2-netns/net/ipv6/udp.c
+--- linux-2.6.22-try2/net/ipv6/udp.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/udp.c	2007-12-19 22:49:20.000000000 -0500
+@@ -657,6 +657,7 @@
+ 	ulen += sizeof(struct udphdr);
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 
+ 	if (sin6) {
+ 		if (sin6->sin6_port == 0)
+@@ -967,11 +968,11 @@
+ 
+ int __init udp6_proc_init(void)
+ {
+-	return udp_proc_register(&udp6_seq_afinfo);
++	return udp_proc_register(&init_net, &udp6_seq_afinfo);
+ }
+ 
+ void udp6_proc_exit(void) {
+-	udp_proc_unregister(&udp6_seq_afinfo);
++	udp_proc_unregister(&init_net, &udp6_seq_afinfo);
+ }
+ #endif /* CONFIG_PROC_FS */
+ 
+diff -Nurb linux-2.6.22-try2/net/ipv6/udplite.c linux-2.6.22-try2-netns/net/ipv6/udplite.c
+--- linux-2.6.22-try2/net/ipv6/udplite.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/udplite.c	2007-12-19 22:49:20.000000000 -0500
+@@ -95,11 +95,11 @@
+ 
+ int __init udplite6_proc_init(void)
+ {
+-	return udp_proc_register(&udplite6_seq_afinfo);
++	return udp_proc_register(&init_net, &udplite6_seq_afinfo);
+ }
+ 
+ void udplite6_proc_exit(void)
+ {
+-	udp_proc_unregister(&udplite6_seq_afinfo);
++	udp_proc_unregister(&init_net, &udplite6_seq_afinfo);
+ }
+ #endif
+diff -Nurb linux-2.6.22-try2/net/ipv6/xfrm6_policy.c linux-2.6.22-try2-netns/net/ipv6/xfrm6_policy.c
+--- linux-2.6.22-try2/net/ipv6/xfrm6_policy.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipv6/xfrm6_policy.c	2007-12-19 22:49:20.000000000 -0500
+@@ -40,6 +40,7 @@
+ {
+ 	struct rt6_info *rt;
+ 	struct flowi fl_tunnel = {
++		.fl_net = &init_net,
+ 		.nl_u = {
+ 			.ip6_u = {
+ 				.daddr = *(struct in6_addr *)&daddr->a6,
+@@ -132,6 +133,7 @@
+ 	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
+ 	struct rt6_info *rt  = rt0;
+ 	struct flowi fl_tunnel = {
++		.fl_net = &init_net,
+ 		.nl_u = {
+ 			.ip6_u = {
+ 				.saddr = fl->fl6_src,
+@@ -278,6 +280,7 @@
+ 	u8 nexthdr = nh[IP6CB(skb)->nhoff];
+ 
+ 	memset(fl, 0, sizeof(struct flowi));
++	fl->fl_net = &init_net;
+ 	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
+ 	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
+ 
+@@ -375,7 +378,7 @@
+ 
+ 	xdst = (struct xfrm_dst *)dst;
+ 	if (xdst->u.rt6.rt6i_idev->dev == dev) {
+-		struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
++		struct inet6_dev *loopback_idev = in6_dev_get(&init_net.loopback_dev);
+ 		BUG_ON(!loopback_idev);
+ 
+ 		do {
+diff -Nurb linux-2.6.22-try2/net/ipx/af_ipx.c linux-2.6.22-try2-netns/net/ipx/af_ipx.c
+--- linux-2.6.22-try2/net/ipx/af_ipx.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipx/af_ipx.c	2007-12-19 22:49:20.000000000 -0500
+@@ -347,6 +347,9 @@
+ 	struct net_device *dev = ptr;
+ 	struct ipx_interface *i, *tmp;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event != NETDEV_DOWN && event != NETDEV_UP)
+ 		goto out;
+ 
+@@ -986,7 +989,7 @@
+ 	if (intrfc)
+ 		ipxitf_put(intrfc);
+ 
+-	dev = dev_get_by_name(idef->ipx_device);
++	dev = dev_get_by_name(&init_net, idef->ipx_device);
+ 	rc = -ENODEV;
+ 	if (!dev)
+ 		goto out;
+@@ -1094,7 +1097,7 @@
+ 	if (!dlink_type)
+ 		goto out;
+ 
+-	dev = __dev_get_by_name(idef->ipx_device);
++	dev = __dev_get_by_name(&init_net, idef->ipx_device);
+ 	rc = -ENODEV;
+ 	if (!dev)
+ 		goto out;
+@@ -1189,7 +1192,7 @@
+ 		if (copy_from_user(&ifr, arg, sizeof(ifr)))
+ 			break;
+ 		sipx = (struct sockaddr_ipx *)&ifr.ifr_addr;
+-		dev  = __dev_get_by_name(ifr.ifr_name);
++		dev  = __dev_get_by_name(&init_net, ifr.ifr_name);
+ 		rc   = -ENODEV;
+ 		if (!dev)
+ 			break;
+@@ -1360,11 +1363,14 @@
+ 	.obj_size = sizeof(struct ipx_sock),
+ };
+ 
+-static int ipx_create(struct socket *sock, int protocol)
++static int ipx_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	int rc = -ESOCKTNOSUPPORT;
+ 	struct sock *sk;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	/*
+ 	 * SPX support is not anymore in the kernel sources. If you want to
+ 	 * ressurrect it, completing it and making it understand shared skbs,
+@@ -1375,7 +1381,7 @@
+ 		goto out;
+ 
+ 	rc = -ENOMEM;
+-	sk = sk_alloc(PF_IPX, GFP_KERNEL, &ipx_proto, 1);
++	sk = sk_alloc(net, PF_IPX, GFP_KERNEL, &ipx_proto, 1);
+ 	if (!sk)
+ 		goto out;
+ #ifdef IPX_REFCNT_DEBUG
+@@ -1644,6 +1650,9 @@
+ 	u16 ipx_pktsize;
+ 	int rc = 0;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	/* Not ours */
+ 	if (skb->pkt_type == PACKET_OTHERHOST)
+ 		goto drop;
+diff -Nurb linux-2.6.22-try2/net/ipx/ipx_proc.c linux-2.6.22-try2-netns/net/ipx/ipx_proc.c
+--- linux-2.6.22-try2/net/ipx/ipx_proc.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/ipx/ipx_proc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/spinlock.h>
+ #include <linux/seq_file.h>
++#include <net/net_namespace.h>
+ #include <net/tcp_states.h>
+ #include <net/ipx.h>
+ 
+@@ -353,7 +354,7 @@
+ 	struct proc_dir_entry *p;
+ 	int rc = -ENOMEM;
+ 
+-	ipx_proc_dir = proc_mkdir("ipx", proc_net);
++	ipx_proc_dir = proc_mkdir("ipx", init_net.proc_net);
+ 
+ 	if (!ipx_proc_dir)
+ 		goto out;
+@@ -381,7 +382,7 @@
+ out_route:
+ 	remove_proc_entry("interface", ipx_proc_dir);
+ out_interface:
+-	remove_proc_entry("ipx", proc_net);
++	remove_proc_entry("ipx", init_net.proc_net);
+ 	goto out;
+ }
+ 
+@@ -390,7 +391,7 @@
+ 	remove_proc_entry("interface", ipx_proc_dir);
+ 	remove_proc_entry("route", ipx_proc_dir);
+ 	remove_proc_entry("socket", ipx_proc_dir);
+-	remove_proc_entry("ipx", proc_net);
++	remove_proc_entry("ipx", init_net.proc_net);
+ }
+ 
+ #else /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/irda/af_irda.c linux-2.6.22-try2-netns/net/irda/af_irda.c
+--- linux-2.6.22-try2/net/irda/af_irda.c	2007-12-19 13:37:58.000000000 -0500
++++ linux-2.6.22-try2-netns/net/irda/af_irda.c	2007-12-19 22:49:20.000000000 -0500
+@@ -60,7 +60,7 @@
+ 
+ #include <net/irda/af_irda.h>
+ 
+-static int irda_create(struct socket *sock, int protocol);
++static int irda_create(struct net *net, struct socket *sock, int protocol);
+ 
+ static const struct proto_ops irda_stream_ops;
+ static const struct proto_ops irda_seqpacket_ops;
+@@ -831,7 +831,7 @@
+ 
+ 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+ 
+-	err = irda_create(newsock, sk->sk_protocol);
++	err = irda_create(sk->sk_net, newsock, sk->sk_protocol);
+ 	if (err)
+ 		return err;
+ 
+@@ -1057,13 +1057,16 @@
+  *    Create IrDA socket
+  *
+  */
+-static int irda_create(struct socket *sock, int protocol)
++static int irda_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct irda_sock *self;
+ 
+ 	IRDA_DEBUG(2, "%s()\n", __FUNCTION__);
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	/* Check for valid socket type */
+ 	switch (sock->type) {
+ 	case SOCK_STREAM:     /* For TTP connections with SAR disabled */
+@@ -1075,7 +1078,7 @@
+ 	}
+ 
+ 	/* Allocate networking socket */
+-	sk = sk_alloc(PF_IRDA, GFP_ATOMIC, &irda_proto, 1);
++	sk = sk_alloc(net, PF_IRDA, GFP_ATOMIC, &irda_proto, 1);
+ 	if (sk == NULL)
+ 		return -ENOMEM;
+ 
+diff -Nurb linux-2.6.22-try2/net/irda/irlap_frame.c linux-2.6.22-try2-netns/net/irda/irlap_frame.c
+--- linux-2.6.22-try2/net/irda/irlap_frame.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/irda/irlap_frame.c	2007-12-19 22:49:20.000000000 -0500
+@@ -1319,6 +1319,9 @@
+ 	int command;
+ 	__u8 control;
+ 
++	if (dev->nd_net != &init_net)
++		goto out;
++
+ 	/* FIXME: should we get our own field? */
+ 	self = (struct irlap_cb *) dev->atalk_ptr;
+ 
+diff -Nurb linux-2.6.22-try2/net/irda/irproc.c linux-2.6.22-try2-netns/net/irda/irproc.c
+--- linux-2.6.22-try2/net/irda/irproc.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/irda/irproc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/module.h>
+ #include <linux/init.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/irda/irda.h>
+ #include <net/irda/irlap.h>
+@@ -66,7 +67,7 @@
+ 	int i;
+ 	struct proc_dir_entry *d;
+ 
+-	proc_irda = proc_mkdir("irda", proc_net);
++	proc_irda = proc_mkdir("irda", init_net.proc_net);
+ 	if (proc_irda == NULL)
+ 		return;
+ 	proc_irda->owner = THIS_MODULE;
+@@ -92,7 +93,7 @@
+ 		for (i=0; i<ARRAY_SIZE(irda_dirs); i++)
+ 			remove_proc_entry(irda_dirs[i].name, proc_irda);
+ 
+-		remove_proc_entry("irda", proc_net);
++		remove_proc_entry("irda", init_net.proc_net);
+ 		proc_irda = NULL;
+ 	}
+ }
+diff -Nurb linux-2.6.22-try2/net/key/af_key.c linux-2.6.22-try2-netns/net/key/af_key.c
+--- linux-2.6.22-try2/net/key/af_key.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/key/af_key.c	2007-12-19 22:49:20.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/init.h>
+ #include <net/xfrm.h>
+ #include <linux/audit.h>
++#include <net/net_namespace.h>
+ 
+ #include <net/sock.h>
+ 
+@@ -136,11 +137,14 @@
+ 	.obj_size = sizeof(struct pfkey_sock),
+ };
+ 
+-static int pfkey_create(struct socket *sock, int protocol)
++static int pfkey_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (!capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 	if (sock->type != SOCK_RAW)
+@@ -149,7 +153,7 @@
+ 		return -EPROTONOSUPPORT;
+ 
+ 	err = -ENOMEM;
+-	sk = sk_alloc(PF_KEY, GFP_KERNEL, &key_proto, 1);
++	sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, 1);
+ 	if (sk == NULL)
+ 		goto out;
+ 
+@@ -3781,7 +3785,7 @@
+ static void __exit ipsec_pfkey_exit(void)
+ {
+ 	xfrm_unregister_km(&pfkeyv2_mgr);
+-	remove_proc_entry("net/pfkey", NULL);
++	remove_proc_entry("pfkey", init_net.proc_net);
+ 	sock_unregister(PF_KEY);
+ 	proto_unregister(&key_proto);
+ }
+@@ -3798,7 +3802,7 @@
+ 		goto out_unregister_key_proto;
+ #ifdef CONFIG_PROC_FS
+ 	err = -ENOMEM;
+-	if (create_proc_read_entry("net/pfkey", 0, NULL, pfkey_read_proc, NULL) == NULL)
++	if (create_proc_read_entry("pfkey", 0, init_net.proc_net, pfkey_read_proc, NULL) == NULL)
+ 		goto out_sock_unregister;
+ #endif
+ 	err = xfrm_register_km(&pfkeyv2_mgr);
+diff -Nurb linux-2.6.22-try2/net/llc/af_llc.c linux-2.6.22-try2-netns/net/llc/af_llc.c
+--- linux-2.6.22-try2/net/llc/af_llc.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/llc/af_llc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -150,14 +150,17 @@
+  *	socket type we have available.
+  *	Returns 0 upon success, negative upon failure.
+  */
+-static int llc_ui_create(struct socket *sock, int protocol)
++static int llc_ui_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	int rc = -ESOCKTNOSUPPORT;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (likely(sock->type == SOCK_DGRAM || sock->type == SOCK_STREAM)) {
+ 		rc = -ENOMEM;
+-		sk = llc_sk_alloc(PF_LLC, GFP_KERNEL, &llc_proto);
++		sk = llc_sk_alloc(net, PF_LLC, GFP_KERNEL, &llc_proto);
+ 		if (sk) {
+ 			rc = 0;
+ 			llc_ui_sk_init(sock, sk);
+@@ -249,7 +252,7 @@
+ 	if (!sock_flag(sk, SOCK_ZAPPED))
+ 		goto out;
+ 	rc = -ENODEV;
+-	llc->dev = dev_getfirstbyhwtype(addr->sllc_arphrd);
++	llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+ 	if (!llc->dev)
+ 		goto out;
+ 	rc = -EUSERS;
+@@ -300,7 +303,7 @@
+ 		goto out;
+ 	rc = -ENODEV;
+ 	rtnl_lock();
+-	llc->dev = dev_getbyhwaddr(addr->sllc_arphrd, addr->sllc_mac);
++	llc->dev = dev_getbyhwaddr(&init_net, addr->sllc_arphrd, addr->sllc_mac);
+ 	rtnl_unlock();
+ 	if (!llc->dev)
+ 		goto out;
+diff -Nurb linux-2.6.22-try2/net/llc/llc_conn.c linux-2.6.22-try2-netns/net/llc/llc_conn.c
+--- linux-2.6.22-try2/net/llc/llc_conn.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/llc/llc_conn.c	2007-12-19 22:49:20.000000000 -0500
+@@ -700,7 +700,7 @@
+ 					     struct llc_addr *saddr,
+ 					     struct llc_addr *daddr)
+ {
+-	struct sock *newsk = llc_sk_alloc(sk->sk_family, GFP_ATOMIC,
++	struct sock *newsk = llc_sk_alloc(sk->sk_net, sk->sk_family, GFP_ATOMIC,
+ 					  sk->sk_prot);
+ 	struct llc_sock *newllc, *llc = llc_sk(sk);
+ 
+@@ -867,9 +867,9 @@
+  *	Allocates a LLC sock and initializes it. Returns the new LLC sock
+  *	or %NULL if there's no memory available for one
+  */
+-struct sock *llc_sk_alloc(int family, gfp_t priority, struct proto *prot)
++struct sock *llc_sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot)
+ {
+-	struct sock *sk = sk_alloc(family, priority, prot, 1);
++	struct sock *sk = sk_alloc(net, family, priority, prot, 1);
+ 
+ 	if (!sk)
+ 		goto out;
+diff -Nurb linux-2.6.22-try2/net/llc/llc_core.c linux-2.6.22-try2-netns/net/llc/llc_core.c
+--- linux-2.6.22-try2/net/llc/llc_core.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/llc/llc_core.c	2007-12-19 22:49:20.000000000 -0500
+@@ -19,6 +19,7 @@
+ #include <linux/slab.h>
+ #include <linux/string.h>
+ #include <linux/init.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ 
+ LIST_HEAD(llc_sap_list);
+@@ -162,7 +163,7 @@
+ {
+ 	struct net_device *dev;
+ 
+-	dev = first_net_device();
++	dev = first_net_device(&init_net);
+ 	if (dev != NULL)
+ 		dev = next_net_device(dev);
+ 
+diff -Nurb linux-2.6.22-try2/net/llc/llc_input.c linux-2.6.22-try2-netns/net/llc/llc_input.c
+--- linux-2.6.22-try2/net/llc/llc_input.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/llc/llc_input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -12,6 +12,7 @@
+  * See the GNU General Public License for more details.
+  */
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_pdu.h>
+ #include <net/llc_sap.h>
+@@ -145,6 +146,9 @@
+ 	int (*rcv)(struct sk_buff *, struct net_device *,
+ 		   struct packet_type *, struct net_device *);
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	/*
+ 	 * When the interface is in promisc. mode, drop all the crap that it
+ 	 * receives, do not try to analyse it.
+diff -Nurb linux-2.6.22-try2/net/llc/llc_proc.c linux-2.6.22-try2-netns/net/llc/llc_proc.c
+--- linux-2.6.22-try2/net/llc/llc_proc.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/llc/llc_proc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -18,6 +18,7 @@
+ #include <linux/errno.h>
+ #include <linux/seq_file.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <net/llc.h>
+ #include <net/llc_c_ac.h>
+ #include <net/llc_c_ev.h>
+@@ -231,7 +232,7 @@
+ 	int rc = -ENOMEM;
+ 	struct proc_dir_entry *p;
+ 
+-	llc_proc_dir = proc_mkdir("llc", proc_net);
++	llc_proc_dir = proc_mkdir("llc", init_net.proc_net);
+ 	if (!llc_proc_dir)
+ 		goto out;
+ 	llc_proc_dir->owner = THIS_MODULE;
+@@ -254,7 +255,7 @@
+ out_core:
+ 	remove_proc_entry("socket", llc_proc_dir);
+ out_socket:
+-	remove_proc_entry("llc", proc_net);
++	remove_proc_entry("llc", init_net.proc_net);
+ 	goto out;
+ }
+ 
+@@ -262,5 +263,5 @@
+ {
+ 	remove_proc_entry("socket", llc_proc_dir);
+ 	remove_proc_entry("core", llc_proc_dir);
+-	remove_proc_entry("llc", proc_net);
++	remove_proc_entry("llc", init_net.proc_net);
+ }
+diff -Nurb linux-2.6.22-try2/net/netfilter/core.c linux-2.6.22-try2-netns/net/netfilter/core.c
+--- linux-2.6.22-try2/net/netfilter/core.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/core.c	2007-12-19 22:49:20.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/mutex.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ 
+ #include "nf_internals.h"
+ 
+@@ -280,8 +281,28 @@
+ #endif /* CONFIG_NF_CONNTRACK */
+ 
+ #ifdef CONFIG_PROC_FS
+-struct proc_dir_entry *proc_net_netfilter;
+-EXPORT_SYMBOL(proc_net_netfilter);
++static int netfilter_proc_init(struct net * net)
++{
++	int error = -ENOMEM;
++	net->proc_net_netfilter = proc_mkdir("netfilter", net->proc_net);
++
++	if (net->proc_net_netfilter) {
++		net->proc_net_netfilter->data = net;
++		error = 0;
++	}
++	return error;
++}
++
++static void netfilter_proc_exit(struct net *net)
++{
++	remove_proc_entry("netfilter", net->proc_net);
++}
++
++static struct pernet_operations netfilter_proc_ops = {
++	.init = netfilter_proc_init,
++	.exit = netfilter_proc_exit,
++};
++
+ #endif
+ 
+ void __init netfilter_init(void)
+@@ -293,8 +314,7 @@
+ 	}
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_netfilter = proc_mkdir("netfilter", proc_net);
+-	if (!proc_net_netfilter)
++	if (register_pernet_subsys(&netfilter_proc_ops) < 0)
+ 		panic("cannot create netfilter proc entry");
+ #endif
+ 
+diff -Nurb linux-2.6.22-try2/net/netfilter/nf_conntrack_h323_main.c linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_h323_main.c
+--- linux-2.6.22-try2/net/netfilter/nf_conntrack_h323_main.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_h323_main.c	2007-12-19 22:49:20.000000000 -0500
+@@ -724,6 +724,8 @@
+ 
+ 	memset(&fl1, 0, sizeof(fl1));
+ 	memset(&fl2, 0, sizeof(fl2));
++	fl1.fl_net = &init_net;
++	fl2.fl_net = &init_net;
+ 
+ 	switch (family) {
+ 	case AF_INET: {
+diff -Nurb linux-2.6.22-try2/net/netfilter/nf_conntrack_standalone.c linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_standalone.c
+--- linux-2.6.22-try2/net/netfilter/nf_conntrack_standalone.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nf_conntrack_standalone.c	2007-12-19 22:49:20.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/percpu.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ #ifdef CONFIG_SYSCTL
+ #include <linux/sysctl.h>
+ #endif
+@@ -419,14 +420,14 @@
+ 		return ret;
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops);
++	proc = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops);
+ 	if (!proc) goto cleanup_init;
+ 
+-	proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440,
++	proc_exp = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440,
+ 					&exp_file_ops);
+ 	if (!proc_exp) goto cleanup_proc;
+ 
+-	proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat);
++	proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, init_net.proc_net_stat);
+ 	if (!proc_stat)
+ 		goto cleanup_proc_exp;
+ 
+@@ -447,11 +448,11 @@
+  cleanup_proc_stat:
+ #endif
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("nf_conntrack", proc_net_stat);
++	remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
+  cleanup_proc_exp:
+-	proc_net_remove("nf_conntrack_expect");
++	proc_net_remove(&init_net, "nf_conntrack_expect");
+  cleanup_proc:
+-	proc_net_remove("nf_conntrack");
++	proc_net_remove(&init_net, "nf_conntrack");
+  cleanup_init:
+ #endif /* CNFIG_PROC_FS */
+ 	nf_conntrack_cleanup();
+@@ -464,9 +465,9 @@
+ 	unregister_sysctl_table(nf_ct_sysctl_header);
+ #endif
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("nf_conntrack", proc_net_stat);
+-	proc_net_remove("nf_conntrack_expect");
+-	proc_net_remove("nf_conntrack");
++	remove_proc_entry("nf_conntrack", init_net.proc_net_stat);
++	proc_net_remove(&init_net, "nf_conntrack_expect");
++	proc_net_remove(&init_net, "nf_conntrack");
+ #endif /* CNFIG_PROC_FS */
+ 	nf_conntrack_cleanup();
+ }
+diff -Nurb linux-2.6.22-try2/net/netfilter/nf_log.c linux-2.6.22-try2-netns/net/netfilter/nf_log.c
+--- linux-2.6.22-try2/net/netfilter/nf_log.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nf_log.c	2007-12-19 22:49:20.000000000 -0500
+@@ -168,7 +168,8 @@
+ #ifdef CONFIG_PROC_FS
+ 	struct proc_dir_entry *pde;
+ 
+-	pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter);
++	pde = create_proc_entry("nf_log", S_IRUGO,
++		init_net.proc_net_netfilter);
+ 	if (!pde)
+ 		return -1;
+ 
+diff -Nurb linux-2.6.22-try2/net/netfilter/nf_queue.c linux-2.6.22-try2-netns/net/netfilter/nf_queue.c
+--- linux-2.6.22-try2/net/netfilter/nf_queue.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nf_queue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -346,7 +346,7 @@
+ #ifdef CONFIG_PROC_FS
+ 	struct proc_dir_entry *pde;
+ 
+-	pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter);
++	pde = create_proc_entry("nf_queue", S_IRUGO, init_net.proc_net_netfilter);
+ 	if (!pde)
+ 		return -1;
+ 	pde->proc_fops = &nfqueue_file_ops;
+diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink.c
+--- linux-2.6.22-try2/net/netfilter/nfnetlink.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink.c	2007-12-19 22:49:20.000000000 -0500
+@@ -264,7 +264,7 @@
+ {
+ 	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+ 
+-	nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX,
++	nfnl = netlink_kernel_create(&init_net, NETLINK_NETFILTER, NFNLGRP_MAX,
+ 				     nfnetlink_rcv, NULL, THIS_MODULE);
+ 	if (!nfnl) {
+ 		printk(KERN_ERR "cannot initialize nfnetlink!\n");
+diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink_log.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink_log.c
+--- linux-2.6.22-try2/net/netfilter/nfnetlink_log.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink_log.c	2007-12-19 22:49:20.000000000 -0500
+@@ -705,7 +705,8 @@
+ 
+ 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+ 				UDEBUG("node = %p\n", inst);
+-				if (n->pid == inst->peer_pid)
++				if ((n->net == &init_net) && 
++				    (n->pid == inst->peer_pid))
+ 					__instance_destroy(inst);
+ 			}
+ 		}
+@@ -1023,7 +1024,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ 	proc_nful = create_proc_entry("nfnetlink_log", 0440,
+-				      proc_net_netfilter);
++				      init_net.proc_net_netfilter);
+ 	if (!proc_nful)
+ 		goto cleanup_subsys;
+ 	proc_nful->proc_fops = &nful_file_ops;
+@@ -1043,7 +1044,7 @@
+ {
+ 	nf_log_unregister(&nfulnl_logger);
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("nfnetlink_log", proc_net_netfilter);
++	remove_proc_entry("nfnetlink_log", init_net.proc_net_netfilter);
+ #endif
+ 	nfnetlink_subsys_unregister(&nfulnl_subsys);
+ 	netlink_unregister_notifier(&nfulnl_rtnl_notifier);
+diff -Nurb linux-2.6.22-try2/net/netfilter/nfnetlink_queue.c linux-2.6.22-try2-netns/net/netfilter/nfnetlink_queue.c
+--- linux-2.6.22-try2/net/netfilter/nfnetlink_queue.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/nfnetlink_queue.c	2007-12-19 22:49:20.000000000 -0500
+@@ -734,6 +734,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	/* Drop any packets associated with the downed device */
+ 	if (event == NETDEV_DOWN)
+ 		nfqnl_dev_drop(dev->ifindex);
+@@ -762,7 +765,8 @@
+ 			struct hlist_head *head = &instance_table[i];
+ 
+ 			hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) {
+-				if (n->pid == inst->peer_pid)
++				if ((n->net == &init_net) && 
++				    (n->pid == inst->peer_pid))
+ 					__instance_destroy(inst);
+ 			}
+ 		}
+@@ -1106,7 +1110,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ 	proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440,
+-					 proc_net_netfilter);
++					 init_net.proc_net_netfilter);
+ 	if (!proc_nfqueue)
+ 		goto cleanup_subsys;
+ 	proc_nfqueue->proc_fops = &nfqnl_file_ops;
+@@ -1129,7 +1133,7 @@
+ 	nf_unregister_queue_handlers(&nfqh);
+ 	unregister_netdevice_notifier(&nfqnl_dev_notifier);
+ #ifdef CONFIG_PROC_FS
+-	remove_proc_entry("nfnetlink_queue", proc_net_netfilter);
++	remove_proc_entry("nfnetlink_queue", init_net.proc_net_netfilter);
+ #endif
+ 	nfnetlink_subsys_unregister(&nfqnl_subsys);
+ 	netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+diff -Nurb linux-2.6.22-try2/net/netfilter/x_tables.c linux-2.6.22-try2-netns/net/netfilter/x_tables.c
+--- linux-2.6.22-try2/net/netfilter/x_tables.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/x_tables.c	2007-12-19 22:49:20.000000000 -0500
+@@ -22,6 +22,7 @@
+ #include <linux/vmalloc.h>
+ #include <linux/mutex.h>
+ #include <linux/mm.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_arp.h>
+@@ -37,11 +38,16 @@
+ 	struct mutex mutex;
+ 	struct list_head match;
+ 	struct list_head target;
+-	struct list_head tables;
+ 	struct mutex compat_mutex;
+ };
+ 
+-static struct xt_af *xt;
++
++struct xt_af_pernet {
++	struct list_head tables;
++};
++
++static struct xt_af * xt;
++
+ 
+ #ifdef DEBUG_IP_FIREWALL_USER
+ #define duprintf(format, args...) printk(format , ## args)
+@@ -286,9 +292,9 @@
+ 		return 1;
+ 	}
+ 	if (target == 1)
+-		have_rev = target_revfn(af, name, revision, &best);
++		have_rev = target_revfn( af, name, revision, &best);
+ 	else
+-		have_rev = match_revfn(af, name, revision, &best);
++		have_rev = match_revfn( af, name, revision, &best);
+ 	mutex_unlock(&xt[af].mutex);
+ 
+ 	/* Nothing at all?  Return 0 to try loading module. */
+@@ -533,14 +539,14 @@
+ EXPORT_SYMBOL(xt_free_table_info);
+ 
+ /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
+-struct xt_table *xt_find_table_lock(int af, const char *name)
++struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name)
+ {
+ 	struct xt_table *t;
+ 
+ 	if (mutex_lock_interruptible(&xt[af].mutex) != 0)
+ 		return ERR_PTR(-EINTR);
+ 
+-	list_for_each_entry(t, &xt[af].tables, list)
++	list_for_each_entry(t, &net->xtn[af].tables, list)
+ 		if (strcmp(t->name, name) == 0 && try_module_get(t->me))
+ 			return t;
+ 	mutex_unlock(&xt[af].mutex);
+@@ -596,7 +602,7 @@
+ }
+ EXPORT_SYMBOL_GPL(xt_replace_table);
+ 
+-int xt_register_table(struct xt_table *table,
++int xt_register_table(struct net *net, struct xt_table *table,
+ 		      struct xt_table_info *bootstrap,
+ 		      struct xt_table_info *newinfo)
+ {
+@@ -609,7 +615,7 @@
+ 		return ret;
+ 
+ 	/* Don't autoload: we'd eat our tail... */
+-	list_for_each_entry(t, &xt[table->af].tables, list) {
++	list_for_each_entry(t, &net->xtn[table->af].tables, list) {
+ 		if (strcmp(t->name, table->name) == 0) {
+ 			ret = -EEXIST;
+ 			goto unlock;
+@@ -628,7 +634,7 @@
+ 	/* save number of initial entries */
+ 	private->initial_entries = private->number;
+ 
+-	list_add(&table->list, &xt[table->af].tables);
++	list_add(&table->list, &net->xtn[table->af].tables);
+ 
+ 	ret = 0;
+  unlock:
+@@ -666,7 +672,7 @@
+ 	return pos ? NULL : head;
+ }
+ 
+-static struct list_head *type2list(u_int16_t af, u_int16_t type)
++static struct list_head *type2list(struct net *net, u_int16_t af, u_int16_t type)
+ {
+ 	struct list_head *list;
+ 
+@@ -678,7 +684,7 @@
+ 		list = &xt[af].match;
+ 		break;
+ 	case TABLE:
+-		list = &xt[af].tables;
++		list = &net->xtn[af].tables;
+ 		break;
+ 	default:
+ 		list = NULL;
+@@ -691,6 +697,7 @@
+ static void *xt_tgt_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+ 	struct proc_dir_entry *pde = (struct proc_dir_entry *) seq->private;
++	struct net *net = PDE_NET(pde);
+ 	u_int16_t af = (unsigned long)pde->data & 0xffff;
+ 	u_int16_t type = (unsigned long)pde->data >> 16;
+ 	struct list_head *list;
+@@ -698,7 +705,7 @@
+ 	if (af >= NPROTO)
+ 		return NULL;
+ 
+-	list = type2list(af, type);
++	list = type2list(net, af, type);
+ 	if (!list)
+ 		return NULL;
+ 
+@@ -711,6 +718,7 @@
+ static void *xt_tgt_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
+ 	struct proc_dir_entry *pde = seq->private;
++	struct net *net = PDE_NET(pde);
+ 	u_int16_t af = (unsigned long)pde->data & 0xffff;
+ 	u_int16_t type = (unsigned long)pde->data >> 16;
+ 	struct list_head *list;
+@@ -718,7 +726,7 @@
+ 	if (af >= NPROTO)
+ 		return NULL;
+ 
+-	list = type2list(af, type);
++	list = type2list(net, af, type);
+ 	if (!list)
+ 		return NULL;
+ 
+@@ -759,6 +767,7 @@
+ 	if (!ret) {
+ 		struct seq_file *seq = file->private_data;
+ 		struct proc_dir_entry *pde = PDE(inode);
++		get_net(PROC_NET(inode));
+ 
+ 		seq->private = pde;
+ 	}
+@@ -766,12 +775,18 @@
+ 	return ret;
+ }
+ 
++static int xt_tgt_release(struct inode *inode, struct file *file)
++{
++	put_net(PROC_NET(inode));
++	return seq_release(inode, file);
++}
++
+ static const struct file_operations xt_file_ops = {
+ 	.owner	 = THIS_MODULE,
+ 	.open	 = xt_tgt_open,
+ 	.read	 = seq_read,
+ 	.llseek	 = seq_lseek,
+-	.release = seq_release,
++	.release = xt_tgt_release,
+ };
+ 
+ #define FORMAT_TABLES	"_tables_names"
+@@ -794,7 +809,7 @@
+ #ifdef CONFIG_PROC_FS
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+-	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++	proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ 	if (!proc)
+ 		goto out;
+ 	proc->data = (void *) ((unsigned long) af | (TABLE << 16));
+@@ -802,14 +817,14 @@
+ 
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+-	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++	proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ 	if (!proc)
+ 		goto out_remove_tables;
+ 	proc->data = (void *) ((unsigned long) af | (MATCH << 16));
+ 
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+-	proc = proc_net_fops_create(buf, 0440, &xt_file_ops);
++	proc = proc_net_fops_create(&init_net, buf, 0440, &xt_file_ops);
+ 	if (!proc)
+ 		goto out_remove_matches;
+ 	proc->data = (void *) ((unsigned long) af | (TARGET << 16));
+@@ -821,12 +836,12 @@
+ out_remove_matches:
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+-	proc_net_remove(buf);
++	proc_net_remove(&init_net, buf);
+ 
+ out_remove_tables:
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+-	proc_net_remove(buf);
++	proc_net_remove(&init_net, buf);
+ out:
+ 	return -1;
+ #endif
+@@ -840,19 +855,42 @@
+ 
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_TABLES, sizeof(buf));
+-	proc_net_remove(buf);
++	proc_net_remove(&init_net, buf);
+ 
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_TARGETS, sizeof(buf));
+-	proc_net_remove(buf);
++	proc_net_remove(&init_net, buf);
+ 
+ 	strlcpy(buf, xt_prefix[af], sizeof(buf));
+ 	strlcat(buf, FORMAT_MATCHES, sizeof(buf));
+-	proc_net_remove(buf);
++	proc_net_remove(&init_net, buf);
+ #endif /*CONFIG_PROC_FS*/
+ }
+ EXPORT_SYMBOL_GPL(xt_proto_fini);
+ 
++static int xt_net_init(struct net *net)
++{
++	int i;
++
++	net->xtn = kmalloc(sizeof(struct xt_af_pernet) * NPROTO, GFP_KERNEL);
++	if (!net->xtn)
++		return -ENOMEM;
++
++	for (i = 0; i < NPROTO; i++) {
++		INIT_LIST_HEAD(&net->xtn[i].tables);
++	}
++	return 0;
++}
++
++static void xt_net_exit(struct net *net)
++{
++	kfree(net->xtn);
++}
++
++static struct pernet_operations xt_net_ops = {
++	.init = xt_net_init,
++	.exit = xt_net_exit,
++};
+ 
+ static int __init xt_init(void)
+ {
+@@ -869,13 +907,13 @@
+ #endif
+ 		INIT_LIST_HEAD(&xt[i].target);
+ 		INIT_LIST_HEAD(&xt[i].match);
+-		INIT_LIST_HEAD(&xt[i].tables);
+ 	}
+-	return 0;
++	return register_pernet_subsys(&xt_net_ops);
+ }
+ 
+ static void __exit xt_fini(void)
+ {
++	unregister_pernet_subsys(&xt_net_ops);
+ 	kfree(xt);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/netfilter/xt_hashlimit.c linux-2.6.22-try2-netns/net/netfilter/xt_hashlimit.c
+--- linux-2.6.22-try2/net/netfilter/xt_hashlimit.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netfilter/xt_hashlimit.c	2007-12-19 22:49:20.000000000 -0500
+@@ -21,6 +21,7 @@
+ #include <linux/in.h>
+ #include <linux/ip.h>
+ #include <linux/ipv6.h>
++#include <net/net_namespace.h>
+ 
+ #include <linux/netfilter/x_tables.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+@@ -736,13 +737,13 @@
+ 		printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+ 		goto err2;
+ 	}
+-	hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", proc_net);
++	hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
+ 	if (!hashlimit_procdir4) {
+ 		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ 				"entry\n");
+ 		goto err3;
+ 	}
+-	hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", proc_net);
++	hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
+ 	if (!hashlimit_procdir6) {
+ 		printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
+ 				"entry\n");
+@@ -750,7 +751,7 @@
+ 	}
+ 	return 0;
+ err4:
+-	remove_proc_entry("ipt_hashlimit", proc_net);
++	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
+ err3:
+ 	kmem_cache_destroy(hashlimit_cachep);
+ err2:
+@@ -762,8 +763,8 @@
+ 
+ static void __exit xt_hashlimit_fini(void)
+ {
+-	remove_proc_entry("ipt_hashlimit", proc_net);
+-	remove_proc_entry("ip6t_hashlimit", proc_net);
++	remove_proc_entry("ipt_hashlimit", init_net.proc_net);
++	remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
+ 	kmem_cache_destroy(hashlimit_cachep);
+ 	xt_unregister_matches(xt_hashlimit, ARRAY_SIZE(xt_hashlimit));
+ }
+diff -Nurb linux-2.6.22-try2/net/netlink/af_netlink.c linux-2.6.22-try2-netns/net/netlink/af_netlink.c
+--- linux-2.6.22-try2/net/netlink/af_netlink.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netlink/af_netlink.c	2007-12-19 22:49:20.000000000 -0500
+@@ -63,6 +63,7 @@
+ #include <net/sock.h>
+ #include <net/scm.h>
+ #include <net/netlink.h>
++#include <net/net_namespace.h>
+ 
+ #define NLGRPSZ(x)	(ALIGN(x, sizeof(unsigned long) * 8) / 8)
+ 
+@@ -212,7 +213,7 @@
+ 		wake_up(&nl_table_wait);
+ }
+ 
+-static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
++static __inline__ struct sock *netlink_lookup(struct net *net, int protocol, u32 pid)
+ {
+ 	struct nl_pid_hash *hash = &nl_table[protocol].hash;
+ 	struct hlist_head *head;
+@@ -222,7 +223,7 @@
+ 	read_lock(&nl_table_lock);
+ 	head = nl_pid_hashfn(hash, pid);
+ 	sk_for_each(sk, node, head) {
+-		if (nlk_sk(sk)->pid == pid) {
++		if ((sk->sk_net == net) && (nlk_sk(sk)->pid == pid)) {
+ 			sock_hold(sk);
+ 			goto found;
+ 		}
+@@ -327,7 +328,7 @@
+ 	 * makes sure updates are visible before bind or setsockopt return. */
+ }
+ 
+-static int netlink_insert(struct sock *sk, u32 pid)
++static int netlink_insert(struct sock *sk, struct net *net, u32 pid)
+ {
+ 	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ 	struct hlist_head *head;
+@@ -340,7 +341,7 @@
+ 	head = nl_pid_hashfn(hash, pid);
+ 	len = 0;
+ 	sk_for_each(osk, node, head) {
+-		if (nlk_sk(osk)->pid == pid)
++		if ((osk->sk_net == net) && (nlk_sk(osk)->pid == pid))
+ 			break;
+ 		len++;
+ 	}
+@@ -383,15 +384,15 @@
+ 	.obj_size = sizeof(struct netlink_sock),
+ };
+ 
+-static int __netlink_create(struct socket *sock, struct mutex *cb_mutex,
+-			    int protocol)
++static int __netlink_create(struct net *net, struct socket *sock,
++			    struct mutex *cb_mutex, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct netlink_sock *nlk;
+ 
+ 	sock->ops = &netlink_ops;
+ 
+-	sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
++	sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+@@ -411,7 +412,7 @@
+ 	return 0;
+ }
+ 
+-static int netlink_create(struct socket *sock, int protocol)
++static int netlink_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct module *module = NULL;
+ 	struct mutex *cb_mutex;
+@@ -440,7 +441,7 @@
+ 	cb_mutex = nl_table[protocol].cb_mutex;
+ 	netlink_unlock_table();
+ 
+-	if ((err = __netlink_create(sock, cb_mutex, protocol)) < 0)
++	if ((err = __netlink_create(net, sock, cb_mutex, protocol)) < 0)
+ 		goto out_module;
+ 
+ 	nlk = nlk_sk(sock->sk);
+@@ -477,6 +478,7 @@
+ 
+ 	if (nlk->pid && !nlk->subscriptions) {
+ 		struct netlink_notify n = {
++						.net = sk->sk_net,
+ 						.protocol = sk->sk_protocol,
+ 						.pid = nlk->pid,
+ 					  };
+@@ -505,6 +507,7 @@
+ static int netlink_autobind(struct socket *sock)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
+ 	struct hlist_head *head;
+ 	struct sock *osk;
+@@ -518,6 +521,8 @@
+ 	netlink_table_grab();
+ 	head = nl_pid_hashfn(hash, pid);
+ 	sk_for_each(osk, node, head) {
++		if ((osk->sk_net != net))
++			continue;
+ 		if (nlk_sk(osk)->pid == pid) {
+ 			/* Bind collision, search negative pid values. */
+ 			pid = rover--;
+@@ -529,7 +534,7 @@
+ 	}
+ 	netlink_table_ungrab();
+ 
+-	err = netlink_insert(sk, pid);
++	err = netlink_insert(sk, net, pid);
+ 	if (err == -EADDRINUSE)
+ 		goto retry;
+ 
+@@ -583,6 +588,7 @@
+ static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct netlink_sock *nlk = nlk_sk(sk);
+ 	struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
+ 	int err;
+@@ -606,7 +612,7 @@
+ 			return -EINVAL;
+ 	} else {
+ 		err = nladdr->nl_pid ?
+-			netlink_insert(sk, nladdr->nl_pid) :
++			netlink_insert(sk, net, nladdr->nl_pid) :
+ 			netlink_autobind(sock);
+ 		if (err)
+ 			return err;
+@@ -690,10 +696,12 @@
+ static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
+ {
+ 	int protocol = ssk->sk_protocol;
++	struct net *net;
+ 	struct sock *sock;
+ 	struct netlink_sock *nlk;
+ 
+-	sock = netlink_lookup(protocol, pid);
++	net = ssk->sk_net;
++	sock = netlink_lookup(net, protocol, pid);
+ 	if (!sock)
+ 		return ERR_PTR(-ECONNREFUSED);
+ 
+@@ -866,6 +874,7 @@
+ 
+ struct netlink_broadcast_data {
+ 	struct sock *exclude_sk;
++	struct net *net;
+ 	u32 pid;
+ 	u32 group;
+ 	int failure;
+@@ -888,6 +897,9 @@
+ 	    !test_bit(p->group - 1, nlk->groups))
+ 		goto out;
+ 
++	if ((sk->sk_net != p->net))
++		goto out;
++
+ 	if (p->failure) {
+ 		netlink_overrun(sk);
+ 		goto out;
+@@ -926,6 +938,7 @@
+ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+ 		      u32 group, gfp_t allocation)
+ {
++	struct net *net = ssk->sk_net;
+ 	struct netlink_broadcast_data info;
+ 	struct hlist_node *node;
+ 	struct sock *sk;
+@@ -933,6 +946,7 @@
+ 	skb = netlink_trim(skb, allocation);
+ 
+ 	info.exclude_sk = ssk;
++	info.net = net;
+ 	info.pid = pid;
+ 	info.group = group;
+ 	info.failure = 0;
+@@ -981,6 +995,9 @@
+ 	if (sk == p->exclude_sk)
+ 		goto out;
+ 
++	if (sk->sk_net != p->exclude_sk->sk_net)
++		goto out;
++
+ 	if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
+ 	    !test_bit(p->group - 1, nlk->groups))
+ 		goto out;
+@@ -1276,7 +1293,7 @@
+  */
+ 
+ struct sock *
+-netlink_kernel_create(int unit, unsigned int groups,
++netlink_kernel_create(struct net *net, int unit, unsigned int groups,
+ 		      void (*input)(struct sock *sk, int len),
+ 		      struct mutex *cb_mutex, struct module *module)
+ {
+@@ -1293,7 +1310,7 @@
+ 	if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
+ 		return NULL;
+ 
+-	if (__netlink_create(sock, cb_mutex, unit) < 0)
++	if (__netlink_create(net, sock, cb_mutex, unit) < 0)
+ 		goto out_sock_release;
+ 
+ 	if (groups < 32)
+@@ -1308,18 +1325,20 @@
+ 	if (input)
+ 		nlk_sk(sk)->data_ready = input;
+ 
+-	if (netlink_insert(sk, 0))
++	if (netlink_insert(sk, net, 0))
+ 		goto out_sock_release;
+ 
+ 	nlk = nlk_sk(sk);
+ 	nlk->flags |= NETLINK_KERNEL_SOCKET;
+ 
+ 	netlink_table_grab();
++	if (!nl_table[unit].registered) {
+ 	nl_table[unit].groups = groups;
+ 	nl_table[unit].listeners = listeners;
+ 	nl_table[unit].cb_mutex = cb_mutex;
+ 	nl_table[unit].module = module;
+ 	nl_table[unit].registered = 1;
++	}
+ 	netlink_table_ungrab();
+ 
+ 	return sk;
+@@ -1420,7 +1439,7 @@
+ 	atomic_inc(&skb->users);
+ 	cb->skb = skb;
+ 
+-	sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
++	sk = netlink_lookup(ssk->sk_net, ssk->sk_protocol, NETLINK_CB(skb).pid);
+ 	if (sk == NULL) {
+ 		netlink_destroy_callback(cb);
+ 		return -ECONNREFUSED;
+@@ -1462,7 +1481,8 @@
+ 	if (!skb) {
+ 		struct sock *sk;
+ 
+-		sk = netlink_lookup(in_skb->sk->sk_protocol,
++		sk = netlink_lookup(in_skb->sk->sk_net,
++				    in_skb->sk->sk_protocol,
+ 				    NETLINK_CB(in_skb).pid);
+ 		if (sk) {
+ 			sk->sk_err = ENOBUFS;
+@@ -1613,6 +1633,7 @@
+ 
+ #ifdef CONFIG_PROC_FS
+ struct nl_seq_iter {
++	struct net *net;
+ 	int link;
+ 	int hash_idx;
+ };
+@@ -1630,6 +1651,8 @@
+ 
+ 		for (j = 0; j <= hash->mask; j++) {
+ 			sk_for_each(s, node, &hash->table[j]) {
++				if (iter->net != s->sk_net)
++					continue;
+ 				if (off == pos) {
+ 					iter->link = i;
+ 					iter->hash_idx = j;
+@@ -1659,11 +1682,14 @@
+ 	if (v == SEQ_START_TOKEN)
+ 		return netlink_seq_socket_idx(seq, 0);
+ 
+-	s = sk_next(v);
++	iter = seq->private;
++	s = v;
++	do {
++		s = sk_next(s);
++	} while (s && (iter->net != s->sk_net));
+ 	if (s)
+ 		return s;
+ 
+-	iter = seq->private;
+ 	i = iter->link;
+ 	j = iter->hash_idx + 1;
+ 
+@@ -1672,6 +1698,8 @@
+ 
+ 		for (; j <= hash->mask; j++) {
+ 			s = sk_head(&hash->table[j]);
++			while (s && (iter->net != s->sk_net))
++				s = sk_next(s);
+ 			if (s) {
+ 				iter->link = i;
+ 				iter->hash_idx = j;
+@@ -1742,15 +1770,24 @@
+ 
+ 	seq = file->private_data;
+ 	seq->private = iter;
++	iter->net = get_net(PROC_NET(inode));
+ 	return 0;
+ }
+ 
++static int netlink_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct nl_seq_iter *iter = seq->private;
++	put_net(iter->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations netlink_seq_fops = {
+ 	.owner		= THIS_MODULE,
+ 	.open		= netlink_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++	.release	= netlink_seq_release,
+ };
+ 
+ #endif
+@@ -1792,6 +1829,27 @@
+ 	.owner	= THIS_MODULE,	/* for consistency 8) */
+ };
+ 
++static int netlink_net_init(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++	if (!proc_net_fops_create(net, "netlink", 0, &netlink_seq_fops))
++		return -ENOMEM;
++#endif
++	return 0;
++}
++
++static void netlink_net_exit(struct net *net)
++{
++#ifdef CONFIG_PROC_FS
++	proc_net_remove(net, "netlink");
++#endif
++}
++
++static struct pernet_operations netlink_net_ops = {
++	.init = netlink_net_init,
++	.exit = netlink_net_exit,
++};
++
+ static int __init netlink_proto_init(void)
+ {
+ 	struct sk_buff *dummy_skb;
+@@ -1837,9 +1895,7 @@
+ 	}
+ 
+ 	sock_register(&netlink_family_ops);
+-#ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("netlink", 0, &netlink_seq_fops);
+-#endif
++	register_pernet_subsys(&netlink_net_ops);
+ 	/* The netlink device handler may be needed early. */
+ 	rtnetlink_init();
+ out:
+diff -Nurb linux-2.6.22-try2/net/netlink/genetlink.c linux-2.6.22-try2-netns/net/netlink/genetlink.c
+--- linux-2.6.22-try2/net/netlink/genetlink.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netlink/genetlink.c	2007-12-19 22:49:20.000000000 -0500
+@@ -557,8 +557,9 @@
+ 		goto errout_register;
+ 
+ 	netlink_set_nonroot(NETLINK_GENERIC, NL_NONROOT_RECV);
+-	genl_sock = netlink_kernel_create(NETLINK_GENERIC, GENL_MAX_ID,
+-					  genl_rcv, NULL, THIS_MODULE);
++	genl_sock = netlink_kernel_create(&init_net, NETLINK_GENERIC,
++					  GENL_MAX_ID, genl_rcv, NULL,
++					  THIS_MODULE);
+ 	if (genl_sock == NULL)
+ 		panic("GENL: Cannot initialize generic netlink\n");
+ 
+diff -Nurb linux-2.6.22-try2/net/netrom/af_netrom.c linux-2.6.22-try2-netns/net/netrom/af_netrom.c
+--- linux-2.6.22-try2/net/netrom/af_netrom.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netrom/af_netrom.c	2007-12-19 22:49:20.000000000 -0500
+@@ -41,6 +41,7 @@
+ #include <net/ip.h>
+ #include <net/tcp_states.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ #include <linux/init.h>
+ 
+ static int nr_ndevs = 4;
+@@ -105,6 +106,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *)ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event != NETDEV_DOWN)
+ 		return NOTIFY_DONE;
+ 
+@@ -408,15 +412,18 @@
+ 	.obj_size = sizeof(struct nr_sock),
+ };
+ 
+-static int nr_create(struct socket *sock, int protocol)
++static int nr_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct nr_sock *nr;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (sock->type != SOCK_SEQPACKET || protocol != 0)
+ 		return -ESOCKTNOSUPPORT;
+ 
+-	if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL)
++	if ((sk = sk_alloc(net, PF_NETROM, GFP_ATOMIC, &nr_proto, 1)) == NULL)
+ 		return -ENOMEM;
+ 
+ 	nr = nr_sk(sk);
+@@ -458,7 +465,7 @@
+ 	if (osk->sk_type != SOCK_SEQPACKET)
+ 		return NULL;
+ 
+-	if ((sk = sk_alloc(PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
++	if ((sk = sk_alloc(osk->sk_net, PF_NETROM, GFP_ATOMIC, osk->sk_prot, 1)) == NULL)
+ 		return NULL;
+ 
+ 	nr = nr_sk(sk);
+@@ -1447,9 +1454,9 @@
+ 
+ 	nr_loopback_init();
+ 
+-	proc_net_fops_create("nr", S_IRUGO, &nr_info_fops);
+-	proc_net_fops_create("nr_neigh", S_IRUGO, &nr_neigh_fops);
+-	proc_net_fops_create("nr_nodes", S_IRUGO, &nr_nodes_fops);
++	proc_net_fops_create(&init_net, "nr", S_IRUGO, &nr_info_fops);
++	proc_net_fops_create(&init_net, "nr_neigh", S_IRUGO, &nr_neigh_fops);
++	proc_net_fops_create(&init_net, "nr_nodes", S_IRUGO, &nr_nodes_fops);
+ out:
+ 	return rc;
+ fail:
+@@ -1477,9 +1484,9 @@
+ {
+ 	int i;
+ 
+-	proc_net_remove("nr");
+-	proc_net_remove("nr_neigh");
+-	proc_net_remove("nr_nodes");
++	proc_net_remove(&init_net, "nr");
++	proc_net_remove(&init_net, "nr_neigh");
++	proc_net_remove(&init_net, "nr_nodes");
+ 	nr_loopback_clear();
+ 
+ 	nr_rt_free();
+diff -Nurb linux-2.6.22-try2/net/netrom/nr_route.c linux-2.6.22-try2-netns/net/netrom/nr_route.c
+--- linux-2.6.22-try2/net/netrom/nr_route.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/netrom/nr_route.c	2007-12-19 22:49:20.000000000 -0500
+@@ -580,7 +580,7 @@
+ {
+ 	struct net_device *dev;
+ 
+-	if ((dev = dev_get_by_name(devname)) == NULL)
++	if ((dev = dev_get_by_name(&init_net, devname)) == NULL)
+ 		return NULL;
+ 
+ 	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+@@ -598,7 +598,7 @@
+ 	struct net_device *dev, *first = NULL;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM)
+ 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+ 				first = dev;
+@@ -618,7 +618,7 @@
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_NETROM && ax25cmp(addr, (ax25_address *)dev->dev_addr) == 0) {
+ 			dev_hold(dev);
+ 			goto out;
+diff -Nurb linux-2.6.22-try2/net/packet/af_packet.c linux-2.6.22-try2-netns/net/packet/af_packet.c
+--- linux-2.6.22-try2/net/packet/af_packet.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/packet/af_packet.c	2007-12-19 22:49:20.000000000 -0500
+@@ -65,6 +65,7 @@
+ #include <net/protocol.h>
+ #include <linux/skbuff.h>
+ #include <net/sock.h>
++#include <net/net_namespace.h>
+ #include <linux/errno.h>
+ #include <linux/timer.h>
+ #include <asm/system.h>
+@@ -135,10 +136,6 @@
+    packet classifier depends on it.
+  */
+ 
+-/* List of all packet sockets. */
+-static HLIST_HEAD(packet_sklist);
+-static DEFINE_RWLOCK(packet_sklist_lock);
+-
+ static atomic_t packet_socks_nr;
+ 
+ 
+@@ -273,6 +270,9 @@
+ 	if (skb->pkt_type == PACKET_LOOPBACK)
+ 		goto out;
+ 
++	if (dev->nd_net != sk->sk_net)
++		goto out;
++
+ 	if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
+ 		goto oom;
+ 
+@@ -344,7 +344,7 @@
+ 	 */
+ 
+ 	saddr->spkt_device[13] = 0;
+-	dev = dev_get_by_name(saddr->spkt_device);
++	dev = dev_get_by_name(sk->sk_net, saddr->spkt_device);
+ 	err = -ENODEV;
+ 	if (dev == NULL)
+ 		goto out_unlock;
+@@ -462,6 +462,9 @@
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (dev->nd_net != sk->sk_net)
++		goto drop;
++
+ 	skb->dev = dev;
+ 
+ 	if (dev->hard_header) {
+@@ -578,6 +581,9 @@
+ 	sk = pt->af_packet_priv;
+ 	po = pkt_sk(sk);
+ 
++	if (dev->nd_net != sk->sk_net)
++		goto drop;
++
+ 	if (dev->hard_header) {
+ 		if (sk->sk_type != SOCK_DGRAM)
+ 			skb_push(skb, skb->data - skb_mac_header(skb));
+@@ -738,7 +744,7 @@
+ 	}
+ 
+ 
+-	dev = dev_get_by_index(ifindex);
++	dev = dev_get_by_index(sk->sk_net, ifindex);
+ 	err = -ENXIO;
+ 	if (dev == NULL)
+ 		goto out_unlock;
+@@ -811,15 +817,17 @@
+ {
+ 	struct sock *sk = sock->sk;
+ 	struct packet_sock *po;
++	struct net *net;
+ 
+ 	if (!sk)
+ 		return 0;
+ 
++	net = sk->sk_net;
+ 	po = pkt_sk(sk);
+ 
+-	write_lock_bh(&packet_sklist_lock);
++	write_lock_bh(&net->packet_sklist_lock);
+ 	sk_del_node_init(sk);
+-	write_unlock_bh(&packet_sklist_lock);
++	write_unlock_bh(&net->packet_sklist_lock);
+ 
+ 	/*
+ 	 *	Unhook packet receive handler.
+@@ -933,7 +941,7 @@
+ 		return -EINVAL;
+ 	strlcpy(name,uaddr->sa_data,sizeof(name));
+ 
+-	dev = dev_get_by_name(name);
++	dev = dev_get_by_name(sk->sk_net, name);
+ 	if (dev) {
+ 		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
+ 		dev_put(dev);
+@@ -960,7 +968,7 @@
+ 
+ 	if (sll->sll_ifindex) {
+ 		err = -ENODEV;
+-		dev = dev_get_by_index(sll->sll_ifindex);
++		dev = dev_get_by_index(sk->sk_net, sll->sll_ifindex);
+ 		if (dev == NULL)
+ 			goto out;
+ 	}
+@@ -982,7 +990,7 @@
+  *	Create a packet of type SOCK_PACKET.
+  */
+ 
+-static int packet_create(struct socket *sock, int protocol)
++static int packet_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct packet_sock *po;
+@@ -998,7 +1006,7 @@
+ 	sock->state = SS_UNCONNECTED;
+ 
+ 	err = -ENOBUFS;
+-	sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
++	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, 1);
+ 	if (sk == NULL)
+ 		goto out;
+ 
+@@ -1034,9 +1042,9 @@
+ 		po->running = 1;
+ 	}
+ 
+-	write_lock_bh(&packet_sklist_lock);
+-	sk_add_node(sk, &packet_sklist);
+-	write_unlock_bh(&packet_sklist_lock);
++	write_lock_bh(&net->packet_sklist_lock);
++	sk_add_node(sk, &net->packet_sklist);
++	write_unlock_bh(&net->packet_sklist_lock);
+ 	return(0);
+ out:
+ 	return err;
+@@ -1154,7 +1162,7 @@
+ 		return -EOPNOTSUPP;
+ 
+ 	uaddr->sa_family = AF_PACKET;
+-	dev = dev_get_by_index(pkt_sk(sk)->ifindex);
++	dev = dev_get_by_index(sk->sk_net, pkt_sk(sk)->ifindex);
+ 	if (dev) {
+ 		strlcpy(uaddr->sa_data, dev->name, 15);
+ 		dev_put(dev);
+@@ -1179,7 +1187,7 @@
+ 	sll->sll_family = AF_PACKET;
+ 	sll->sll_ifindex = po->ifindex;
+ 	sll->sll_protocol = po->num;
+-	dev = dev_get_by_index(po->ifindex);
++	dev = dev_get_by_index(sk->sk_net, po->ifindex);
+ 	if (dev) {
+ 		sll->sll_hatype = dev->type;
+ 		sll->sll_halen = dev->addr_len;
+@@ -1231,7 +1239,7 @@
+ 	rtnl_lock();
+ 
+ 	err = -ENODEV;
+-	dev = __dev_get_by_index(mreq->mr_ifindex);
++	dev = __dev_get_by_index(sk->sk_net, mreq->mr_ifindex);
+ 	if (!dev)
+ 		goto done;
+ 
+@@ -1285,7 +1293,7 @@
+ 			if (--ml->count == 0) {
+ 				struct net_device *dev;
+ 				*mlp = ml->next;
+-				dev = dev_get_by_index(ml->ifindex);
++				dev = dev_get_by_index(sk->sk_net, ml->ifindex);
+ 				if (dev) {
+ 					packet_dev_mc(dev, ml, -1);
+ 					dev_put(dev);
+@@ -1313,7 +1321,7 @@
+ 		struct net_device *dev;
+ 
+ 		po->mclist = ml->next;
+-		if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
++		if ((dev = dev_get_by_index(sk->sk_net, ml->ifindex)) != NULL) {
+ 			packet_dev_mc(dev, ml, -1);
+ 			dev_put(dev);
+ 		}
+@@ -1469,9 +1477,10 @@
+ 	struct sock *sk;
+ 	struct hlist_node *node;
+ 	struct net_device *dev = data;
++	struct net *net = dev->nd_net;
+ 
+-	read_lock(&packet_sklist_lock);
+-	sk_for_each(sk, node, &packet_sklist) {
++	read_lock(&net->packet_sklist_lock);
++	sk_for_each(sk, node, &net->packet_sklist) {
+ 		struct packet_sock *po = pkt_sk(sk);
+ 
+ 		switch (msg) {
+@@ -1510,7 +1519,7 @@
+ 			break;
+ 		}
+ 	}
+-	read_unlock(&packet_sklist_lock);
++	read_unlock(&net->packet_sklist_lock);
+ 	return NOTIFY_DONE;
+ }
+ 
+@@ -1878,12 +1887,12 @@
+ };
+ 
+ #ifdef CONFIG_PROC_FS
+-static inline struct sock *packet_seq_idx(loff_t off)
++static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
+ {
+ 	struct sock *s;
+ 	struct hlist_node *node;
+ 
+-	sk_for_each(s, node, &packet_sklist) {
++	sk_for_each(s, node, &net->packet_sklist) {
+ 		if (!off--)
+ 			return s;
+ 	}
+@@ -1892,21 +1901,24 @@
+ 
+ static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
+ {
+-	read_lock(&packet_sklist_lock);
+-	return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
++	struct net *net = seq->private;
++	read_lock(&net->packet_sklist_lock);
++	return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN;
+ }
+ 
+ static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++	struct net *net = seq->private;
+ 	++*pos;
+ 	return  (v == SEQ_START_TOKEN)
+-		? sk_head(&packet_sklist)
++		? sk_head(&net->packet_sklist)
+ 		: sk_next((struct sock*)v) ;
+ }
+ 
+ static void packet_seq_stop(struct seq_file *seq, void *v)
+ {
+-	read_unlock(&packet_sklist_lock);
++	struct net *net = seq->private;
++	read_unlock(&net->packet_sklist_lock);
+ }
+ 
+ static int packet_seq_show(struct seq_file *seq, void *v)
+@@ -1942,7 +1954,22 @@
+ 
+ static int packet_seq_open(struct inode *inode, struct file *file)
+ {
+-	return seq_open(file, &packet_seq_ops);
++	struct seq_file *seq;
++	int res;
++	res = seq_open(file, &packet_seq_ops);
++	if (!res) {
++		seq = file->private_data;
++		seq->private = get_net(PROC_NET(inode));
++	}
++	return res;
++}
++
++static int packet_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq=  file->private_data;
++	struct net *net = seq->private;
++	put_net(net);
++	return seq_release(inode, file);
+ }
+ 
+ static const struct file_operations packet_seq_fops = {
+@@ -1950,15 +1977,37 @@
+ 	.open		= packet_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release,
++	.release	= packet_seq_release,
+ };
+ 
+ #endif
+ 
++static int packet_net_init(struct net *net)
++{
++	rwlock_init(&net->packet_sklist_lock);
++	INIT_HLIST_HEAD(&net->packet_sklist);
++
++	if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
++		return -ENOMEM;
++
++	return 0;
++}
++
++static void packet_net_exit(struct net *net)
++{
++	proc_net_remove(net, "packet");
++}
++
++static struct pernet_operations packet_net_ops = {
++	.init = packet_net_init,
++	.exit = packet_net_exit,
++};
++
++
+ static void __exit packet_exit(void)
+ {
+-	proc_net_remove("packet");
+ 	unregister_netdevice_notifier(&packet_netdev_notifier);
++	unregister_pernet_subsys(&packet_net_ops);
+ 	sock_unregister(PF_PACKET);
+ 	proto_unregister(&packet_proto);
+ }
+@@ -1971,8 +2020,8 @@
+ 		goto out;
+ 
+ 	sock_register(&packet_family_ops);
++	register_pernet_subsys(&packet_net_ops);
+ 	register_netdevice_notifier(&packet_netdev_notifier);
+-	proc_net_fops_create("packet", 0, &packet_seq_fops);
+ out:
+ 	return rc;
+ }
+diff -Nurb linux-2.6.22-try2/net/rose/af_rose.c linux-2.6.22-try2-netns/net/rose/af_rose.c
+--- linux-2.6.22-try2/net/rose/af_rose.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/rose/af_rose.c	2007-12-19 22:49:20.000000000 -0500
+@@ -45,6 +45,7 @@
+ #include <net/tcp_states.h>
+ #include <net/ip.h>
+ #include <net/arp.h>
++#include <net/net_namespace.h>
+ 
+ static int rose_ndevs = 10;
+ 
+@@ -196,6 +197,9 @@
+ {
+ 	struct net_device *dev = (struct net_device *)ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event != NETDEV_DOWN)
+ 		return NOTIFY_DONE;
+ 
+@@ -498,15 +502,18 @@
+ 	.obj_size = sizeof(struct rose_sock),
+ };
+ 
+-static int rose_create(struct socket *sock, int protocol)
++static int rose_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct rose_sock *rose;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (sock->type != SOCK_SEQPACKET || protocol != 0)
+ 		return -ESOCKTNOSUPPORT;
+ 
+-	if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
++	if ((sk = sk_alloc(net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+ 		return -ENOMEM;
+ 
+ 	rose = rose_sk(sk);
+@@ -544,7 +551,7 @@
+ 	if (osk->sk_type != SOCK_SEQPACKET)
+ 		return NULL;
+ 
+-	if ((sk = sk_alloc(PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
++	if ((sk = sk_alloc(osk->sk_net, PF_ROSE, GFP_ATOMIC, &rose_proto, 1)) == NULL)
+ 		return NULL;
+ 
+ 	rose = rose_sk(sk);
+@@ -1576,10 +1583,10 @@
+ 
+ 	rose_add_loopback_neigh();
+ 
+-	proc_net_fops_create("rose", S_IRUGO, &rose_info_fops);
+-	proc_net_fops_create("rose_neigh", S_IRUGO, &rose_neigh_fops);
+-	proc_net_fops_create("rose_nodes", S_IRUGO, &rose_nodes_fops);
+-	proc_net_fops_create("rose_routes", S_IRUGO, &rose_routes_fops);
++	proc_net_fops_create(&init_net, "rose", S_IRUGO, &rose_info_fops);
++	proc_net_fops_create(&init_net, "rose_neigh", S_IRUGO, &rose_neigh_fops);
++	proc_net_fops_create(&init_net, "rose_nodes", S_IRUGO, &rose_nodes_fops);
++	proc_net_fops_create(&init_net, "rose_routes", S_IRUGO, &rose_routes_fops);
+ out:
+ 	return rc;
+ fail:
+@@ -1606,10 +1613,10 @@
+ {
+ 	int i;
+ 
+-	proc_net_remove("rose");
+-	proc_net_remove("rose_neigh");
+-	proc_net_remove("rose_nodes");
+-	proc_net_remove("rose_routes");
++	proc_net_remove(&init_net, "rose");
++	proc_net_remove(&init_net, "rose_neigh");
++	proc_net_remove(&init_net, "rose_nodes");
++	proc_net_remove(&init_net, "rose_routes");
+ 	rose_loopback_clear();
+ 
+ 	rose_rt_free();
+diff -Nurb linux-2.6.22-try2/net/rose/rose_route.c linux-2.6.22-try2-netns/net/rose/rose_route.c
+--- linux-2.6.22-try2/net/rose/rose_route.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/rose/rose_route.c	2007-12-19 22:49:20.000000000 -0500
+@@ -583,7 +583,7 @@
+ {
+ 	struct net_device *dev;
+ 
+-	if ((dev = dev_get_by_name(devname)) == NULL)
++	if ((dev = dev_get_by_name(&init_net, devname)) == NULL)
+ 		return NULL;
+ 
+ 	if ((dev->flags & IFF_UP) && dev->type == ARPHRD_AX25)
+@@ -601,7 +601,7 @@
+ 	struct net_device *dev, *first = NULL;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE)
+ 			if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
+ 				first = dev;
+@@ -619,7 +619,7 @@
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0) {
+ 			dev_hold(dev);
+ 			goto out;
+@@ -636,7 +636,7 @@
+ 	struct net_device *dev;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if ((dev->flags & IFF_UP) && dev->type == ARPHRD_ROSE && rosecmp(addr, (rose_address *)dev->dev_addr) == 0)
+ 			goto out;
+ 	}
+diff -Nurb linux-2.6.22-try2/net/rxrpc/af_rxrpc.c linux-2.6.22-try2-netns/net/rxrpc/af_rxrpc.c
+--- linux-2.6.22-try2/net/rxrpc/af_rxrpc.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/rxrpc/af_rxrpc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -14,6 +14,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/poll.h>
+ #include <linux/proc_fs.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/af_rxrpc.h>
+ #include "ar-internal.h"
+@@ -605,13 +606,16 @@
+ /*
+  * create an RxRPC socket
+  */
+-static int rxrpc_create(struct socket *sock, int protocol)
++static int rxrpc_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct rxrpc_sock *rx;
+ 	struct sock *sk;
+ 
+ 	_enter("%p,%d", sock, protocol);
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	/* we support transport protocol UDP only */
+ 	if (protocol != PF_INET)
+ 		return -EPROTONOSUPPORT;
+@@ -622,7 +626,7 @@
+ 	sock->ops = &rxrpc_rpc_ops;
+ 	sock->state = SS_UNCONNECTED;
+ 
+-	sk = sk_alloc(PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
++	sk = sk_alloc(net, PF_RXRPC, GFP_KERNEL, &rxrpc_proto, 1);
+ 	if (!sk)
+ 		return -ENOMEM;
+ 
+@@ -829,8 +833,8 @@
+ 	}
+ 
+ #ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("rxrpc_calls", 0, &rxrpc_call_seq_fops);
+-	proc_net_fops_create("rxrpc_conns", 0, &rxrpc_connection_seq_fops);
++	proc_net_fops_create(&init_net, "rxrpc_calls", 0, &rxrpc_call_seq_fops);
++	proc_net_fops_create(&init_net, "rxrpc_conns", 0, &rxrpc_connection_seq_fops);
+ #endif
+ 	return 0;
+ 
+@@ -868,8 +872,8 @@
+ 
+ 	_debug("flush scheduled work");
+ 	flush_workqueue(rxrpc_workqueue);
+-	proc_net_remove("rxrpc_conns");
+-	proc_net_remove("rxrpc_calls");
++	proc_net_remove(&init_net, "rxrpc_conns");
++	proc_net_remove(&init_net, "rxrpc_calls");
+ 	destroy_workqueue(rxrpc_workqueue);
+ 	kmem_cache_destroy(rxrpc_call_jar);
+ 	_leave("");
+diff -Nurb linux-2.6.22-try2/net/sched/act_api.c linux-2.6.22-try2-netns/net/sched/act_api.c
+--- linux-2.6.22-try2/net/sched/act_api.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/act_api.c	2007-12-19 22:49:20.000000000 -0500
+@@ -27,6 +27,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/init.h>
+ #include <linux/kmod.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/sch_generic.h>
+ #include <net/act_api.h>
+@@ -675,7 +676,7 @@
+ 		return -EINVAL;
+ 	}
+ 
+-	return rtnl_unicast(skb, pid);
++	return rtnl_unicast(skb, &init_net, pid);
+ }
+ 
+ static struct tc_action *
+@@ -796,7 +797,7 @@
+ 	nlh->nlmsg_flags |= NLM_F_ROOT;
+ 	module_put(a->ops->owner);
+ 	kfree(a);
+-	err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ 	if (err > 0)
+ 		return 0;
+ 
+@@ -859,7 +860,7 @@
+ 
+ 		/* now do the delete */
+ 		tcf_action_destroy(head, 0);
+-		ret = rtnetlink_send(skb, pid, RTNLGRP_TC,
++		ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+ 				     n->nlmsg_flags&NLM_F_ECHO);
+ 		if (ret > 0)
+ 			return 0;
+@@ -903,7 +904,7 @@
+ 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
+ 
+-	err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
++	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+ 	if (err > 0)
+ 		err = 0;
+ 	return err;
+@@ -941,10 +942,14 @@
+ 
+ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct rtattr **tca = arg;
+ 	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
+ 	int ret = 0, ovr = 0;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ 	if (tca[TCA_ACT_TAB-1] == NULL) {
+ 		printk("tc_ctl_action: received NO action attribs\n");
+ 		return -EINVAL;
+@@ -1014,6 +1019,7 @@
+ static int
+ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct nlmsghdr *nlh;
+ 	unsigned char *b = skb_tail_pointer(skb);
+ 	struct rtattr *x;
+@@ -1023,6 +1029,9 @@
+ 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
+ 	struct rtattr *kind = find_dump_kind(cb->nlh);
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	if (kind == NULL) {
+ 		printk("tc_dump_action: action bad kind\n");
+ 		return 0;
+diff -Nurb linux-2.6.22-try2/net/sched/act_mirred.c linux-2.6.22-try2-netns/net/sched/act_mirred.c
+--- linux-2.6.22-try2/net/sched/act_mirred.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/act_mirred.c	2007-12-19 22:49:20.000000000 -0500
+@@ -85,7 +85,7 @@
+ 	parm = RTA_DATA(tb[TCA_MIRRED_PARMS-1]);
+ 
+ 	if (parm->ifindex) {
+-		dev = __dev_get_by_index(parm->ifindex);
++		dev = __dev_get_by_index(&init_net, parm->ifindex);
+ 		if (dev == NULL)
+ 			return -ENODEV;
+ 		switch (dev->type) {
+diff -Nurb linux-2.6.22-try2/net/sched/cls_api.c linux-2.6.22-try2-netns/net/sched/cls_api.c
+--- linux-2.6.22-try2/net/sched/cls_api.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/cls_api.c	2007-12-19 22:49:20.000000000 -0500
+@@ -129,6 +129,7 @@
+ 
+ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct rtattr **tca;
+ 	struct tcmsg *t;
+ 	u32 protocol;
+@@ -145,6 +146,9 @@
+ 	unsigned long fh;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ replay:
+ 	tca = arg;
+ 	t = NLMSG_DATA(n);
+@@ -164,7 +168,7 @@
+ 	/* Find head of filter chain. */
+ 
+ 	/* Find link */
+-	if ((dev = __dev_get_by_index(t->tcm_ifindex)) == NULL)
++	if ((dev = __dev_get_by_index(&init_net, t->tcm_ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	/* Find qdisc */
+@@ -365,7 +369,7 @@
+ 		return -EINVAL;
+ 	}
+ 
+-	return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ }
+ 
+ struct tcf_dump_args
+@@ -385,6 +389,7 @@
+ 
+ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int t;
+ 	int s_t;
+ 	struct net_device *dev;
+@@ -395,9 +400,12 @@
+ 	struct Qdisc_class_ops *cops;
+ 	struct tcf_dump_args arg;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ 		return skb->len;
+-	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ 		return skb->len;
+ 
+ 	if (!tcm->tcm_parent)
+diff -Nurb linux-2.6.22-try2/net/sched/em_meta.c linux-2.6.22-try2-netns/net/sched/em_meta.c
+--- linux-2.6.22-try2/net/sched/em_meta.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/em_meta.c	2007-12-19 22:49:20.000000000 -0500
+@@ -291,7 +291,7 @@
+ 	 } else  {
+ 		struct net_device *dev;
+ 
+-		dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
++		dev = dev_get_by_index(&init_net, skb->sk->sk_bound_dev_if);
+ 		*err = var_dev(dev, dst);
+ 		if (dev)
+ 			dev_put(dev);
+diff -Nurb linux-2.6.22-try2/net/sched/sch_api.c linux-2.6.22-try2-netns/net/sched/sch_api.c
+--- linux-2.6.22-try2/net/sched/sch_api.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/sch_api.c	2007-12-19 22:49:20.000000000 -0500
+@@ -35,6 +35,7 @@
+ #include <linux/bitops.h>
+ #include <linux/hrtimer.h>
+ 
++#include <net/net_namespace.h>
+ #include <net/netlink.h>
+ #include <net/sock.h>
+ #include <net/pkt_sched.h>
+@@ -609,6 +610,7 @@
+ 
+ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct tcmsg *tcm = NLMSG_DATA(n);
+ 	struct rtattr **tca = arg;
+ 	struct net_device *dev;
+@@ -617,7 +619,10 @@
+ 	struct Qdisc *p = NULL;
+ 	int err;
+ 
+-	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++	if (net != &init_net)
++		return -EINVAL;
++
++	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if (clid) {
+@@ -670,6 +675,7 @@
+ 
+ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct tcmsg *tcm;
+ 	struct rtattr **tca;
+ 	struct net_device *dev;
+@@ -677,6 +683,9 @@
+ 	struct Qdisc *q, *p;
+ 	int err;
+ 
++	if (net != &init_net)
++		return -EINVAL;
++
+ replay:
+ 	/* Reinit, just in case something touches this. */
+ 	tcm = NLMSG_DATA(n);
+@@ -684,7 +693,7 @@
+ 	clid = tcm->tcm_parent;
+ 	q = p = NULL;
+ 
+-	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	if (clid) {
+@@ -873,7 +882,7 @@
+ 	}
+ 
+ 	if (skb->len)
+-		return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++		return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ 
+ err_out:
+ 	kfree_skb(skb);
+@@ -882,16 +891,20 @@
+ 
+ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int idx, q_idx;
+ 	int s_idx, s_q_idx;
+ 	struct net_device *dev;
+ 	struct Qdisc *q;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	s_idx = cb->args[0];
+ 	s_q_idx = q_idx = cb->args[1];
+ 	read_lock(&dev_base_lock);
+ 	idx = 0;
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		if (idx < s_idx)
+ 			goto cont;
+ 		if (idx > s_idx)
+@@ -930,6 +943,7 @@
+ 
+ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct tcmsg *tcm = NLMSG_DATA(n);
+ 	struct rtattr **tca = arg;
+ 	struct net_device *dev;
+@@ -942,7 +956,10 @@
+ 	u32 qid = TC_H_MAJ(clid);
+ 	int err;
+ 
+-	if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++	if (net != &init_net)
++		return -EINVAL;
++
++	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ 		return -ENODEV;
+ 
+ 	/*
+@@ -1096,7 +1113,7 @@
+ 		return -EINVAL;
+ 	}
+ 
+-	return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
++	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+ }
+ 
+ struct qdisc_dump_args
+@@ -1116,6 +1133,7 @@
+ 
+ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	int t;
+ 	int s_t;
+ 	struct net_device *dev;
+@@ -1123,9 +1141,12 @@
+ 	struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+ 	struct qdisc_dump_args arg;
+ 
++	if (net != &init_net)
++		return 0;
++
+ 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
+ 		return 0;
+-	if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
++	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+ 		return 0;
+ 
+ 	s_t = cb->args[0];
+@@ -1252,7 +1273,7 @@
+ {
+ 	register_qdisc(&pfifo_qdisc_ops);
+ 	register_qdisc(&bfifo_qdisc_ops);
+-	proc_net_fops_create("psched", 0, &psched_fops);
++	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
+ 
+ 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
+ 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
+diff -Nurb linux-2.6.22-try2/net/sched/sch_ingress.c linux-2.6.22-try2-netns/net/sched/sch_ingress.c
+--- linux-2.6.22-try2/net/sched/sch_ingress.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sched/sch_ingress.c	2007-12-19 22:49:20.000000000 -0500
+@@ -243,6 +243,10 @@
+ 	struct net_device *dev = skb->dev;
+ 	int fwres=NF_ACCEPT;
+ 
++	/* Only filter packets in the initial network namespace */
++	if ((indev?indev:outdev)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	DPRINTK("ing_hook: skb %s dev=%s len=%u\n",
+ 		skb->sk ? "(owned)" : "(unowned)",
+ 		skb->dev ? (*pskb)->dev->name : "(no dev)",
+diff -Nurb linux-2.6.22-try2/net/sctp/input.c linux-2.6.22-try2-netns/net/sctp/input.c
+--- linux-2.6.22-try2/net/sctp/input.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sctp/input.c	2007-12-19 22:49:20.000000000 -0500
+@@ -126,6 +126,10 @@
+ 	int family;
+ 	struct sctp_af *af;
+ 
++	if (skb->dev->nd_net != &init_net) {
++		kfree_skb(skb);
++		return 0;
++	}
+ 	if (skb->pkt_type!=PACKET_HOST)
+ 		goto discard_it;
+ 
+@@ -509,6 +513,9 @@
+ 	sk_buff_data_t saveip, savesctp;
+ 	int err;
+ 
++	if (skb->dev->nd_net != &init_net)
++		return;
++
+ 	if (skb->len < ihlen + 8) {
+ 		ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+ 		return;
+diff -Nurb linux-2.6.22-try2/net/sctp/ipv6.c linux-2.6.22-try2-netns/net/sctp/ipv6.c
+--- linux-2.6.22-try2/net/sctp/ipv6.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sctp/ipv6.c	2007-12-19 22:49:20.000000000 -0500
+@@ -189,6 +189,7 @@
+ 
+ 	memset(&fl, 0, sizeof(fl));
+ 
++	fl.fl_net = &init_net;
+ 	fl.proto = sk->sk_protocol;
+ 
+ 	/* Fill in the dest address from the route entry passed with the skb
+@@ -230,6 +231,7 @@
+ 	struct flowi fl;
+ 
+ 	memset(&fl, 0, sizeof(fl));
++	fl.fl_net = &init_net;
+ 	ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr);
+ 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ 		fl.oif = daddr->v6.sin6_scope_id;
+@@ -619,7 +621,7 @@
+ 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ 	struct sctp6_sock *newsctp6sk;
+ 
+-	newsk = sk_alloc(PF_INET6, GFP_KERNEL, sk->sk_prot, 1);
++	newsk = sk_alloc(sk->sk_net, PF_INET6, GFP_KERNEL, sk->sk_prot, 1);
+ 	if (!newsk)
+ 		goto out;
+ 
+@@ -664,7 +666,7 @@
+ 	newinet->mc_index = 0;
+ 	newinet->mc_list = NULL;
+ 
+-	if (ipv4_config.no_pmtu_disc)
++	if (init_net.sysctl_ipv4_no_pmtu_disc)
+ 		newinet->pmtudisc = IP_PMTUDISC_DONT;
+ 	else
+ 		newinet->pmtudisc = IP_PMTUDISC_WANT;
+@@ -841,7 +843,7 @@
+ 		if (type & IPV6_ADDR_LINKLOCAL) {
+ 			if (!addr->v6.sin6_scope_id)
+ 				return 0;
+-			dev = dev_get_by_index(addr->v6.sin6_scope_id);
++			dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
+ 			if (!dev)
+ 				return 0;
+ 			if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) {
+@@ -872,7 +874,7 @@
+ 		if (type & IPV6_ADDR_LINKLOCAL) {
+ 			if (!addr->v6.sin6_scope_id)
+ 				return 0;
+-			dev = dev_get_by_index(addr->v6.sin6_scope_id);
++			dev = dev_get_by_index(&init_net, addr->v6.sin6_scope_id);
+ 			if (!dev)
+ 				return 0;
+ 			if (!ipv6_chk_addr(&addr->v6.sin6_addr, dev, 0)) {
+diff -Nurb linux-2.6.22-try2/net/sctp/protocol.c linux-2.6.22-try2-netns/net/sctp/protocol.c
+--- linux-2.6.22-try2/net/sctp/protocol.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sctp/protocol.c	2007-12-19 22:49:20.000000000 -0500
+@@ -59,6 +59,7 @@
+ #include <net/addrconf.h>
+ #include <net/inet_common.h>
+ #include <net/inet_ecn.h>
++#include <net/net_namespace.h>
+ 
+ /* Global data structures. */
+ struct sctp_globals sctp_globals __read_mostly;
+@@ -93,7 +94,7 @@
+ {
+ 	if (!proc_net_sctp) {
+ 		struct proc_dir_entry *ent;
+-		ent = proc_mkdir("net/sctp", NULL);
++		ent = proc_mkdir("sctp", init_net.proc_net);
+ 		if (ent) {
+ 			ent->owner = THIS_MODULE;
+ 			proc_net_sctp = ent;
+@@ -126,7 +127,7 @@
+ 
+ 	if (proc_net_sctp) {
+ 		proc_net_sctp = NULL;
+-		remove_proc_entry("net/sctp", NULL);
++		remove_proc_entry("sctp", init_net.proc_net);
+ 	}
+ }
+ 
+@@ -170,7 +171,7 @@
+ 	struct sctp_af *af;
+ 
+ 	read_lock(&dev_base_lock);
+-	for_each_netdev(dev) {
++	for_each_netdev(&init_net, dev) {
+ 		__list_for_each(pos, &sctp_address_families) {
+ 			af = list_entry(pos, struct sctp_af, list);
+ 			af->copy_addrlist(&sctp_local_addr_list, dev);
+@@ -354,13 +355,13 @@
+ /* Should this be available for binding?   */
+ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
+ {
+-	int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
++	int ret = inet_addr_type(&init_net, addr->v4.sin_addr.s_addr);
+ 
+ 
+ 	if (addr->v4.sin_addr.s_addr != INADDR_ANY &&
+ 	   ret != RTN_LOCAL &&
+ 	   !sp->inet.freebind &&
+-	   !sysctl_ip_nonlocal_bind)
++	   !init_net.sysctl_ip_nonlocal_bind)
+ 		return 0;
+ 
+ 	return 1;
+@@ -423,6 +424,7 @@
+ 	union sctp_addr dst_saddr;
+ 
+ 	memset(&fl, 0x0, sizeof(struct flowi));
++	fl.fl_net = &init_net;
+ 	fl.fl4_dst  = daddr->v4.sin_addr.s_addr;
+ 	fl.proto = IPPROTO_SCTP;
+ 	if (asoc) {
+@@ -539,7 +541,7 @@
+ {
+ 	struct inet_sock *inet = inet_sk(sk);
+ 	struct inet_sock *newinet;
+-	struct sock *newsk = sk_alloc(PF_INET, GFP_KERNEL, sk->sk_prot, 1);
++	struct sock *newsk = sk_alloc(sk->sk_net, PF_INET, GFP_KERNEL, sk->sk_prot, 1);
+ 
+ 	if (!newsk)
+ 		goto out;
+@@ -1122,7 +1124,7 @@
+ 	}
+ 
+ 	spin_lock_init(&sctp_port_alloc_lock);
+-	sctp_port_rover = sysctl_local_port_range[0] - 1;
++	sctp_port_rover = init_net.sysctl_local_port_range[0] - 1;
+ 
+ 	printk(KERN_INFO "SCTP: Hash tables configured "
+ 			 "(established %d bind %d)\n",
+diff -Nurb linux-2.6.22-try2/net/sctp/socket.c linux-2.6.22-try2-netns/net/sctp/socket.c
+--- linux-2.6.22-try2/net/sctp/socket.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sctp/socket.c	2007-12-19 22:49:20.000000000 -0500
+@@ -5021,8 +5021,8 @@
+ 		 * already in the hash table; if not, we use that; if
+ 		 * it is, we try next.
+ 		 */
+-		int low = sysctl_local_port_range[0];
+-		int high = sysctl_local_port_range[1];
++		int low = sk->sk_net->sysctl_local_port_range[0];
++		int high = sk->sk_net->sysctl_local_port_range[1];
+ 		int remaining = (high - low) + 1;
+ 		int rover;
+ 		int index;
+diff -Nurb linux-2.6.22-try2/net/socket.c linux-2.6.22-try2-netns/net/socket.c
+--- linux-2.6.22-try2/net/socket.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/socket.c	2007-12-19 22:49:20.000000000 -0500
+@@ -84,6 +84,7 @@
+ #include <linux/kmod.h>
+ #include <linux/audit.h>
+ #include <linux/wireless.h>
++#include <linux/nsproxy.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+@@ -821,9 +822,9 @@
+  */
+ 
+ static DEFINE_MUTEX(br_ioctl_mutex);
+-static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL;
++static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL;
+ 
+-void brioctl_set(int (*hook) (unsigned int, void __user *))
++void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *))
+ {
+ 	mutex_lock(&br_ioctl_mutex);
+ 	br_ioctl_hook = hook;
+@@ -833,9 +834,9 @@
+ EXPORT_SYMBOL(brioctl_set);
+ 
+ static DEFINE_MUTEX(vlan_ioctl_mutex);
+-static int (*vlan_ioctl_hook) (void __user *arg);
++static int (*vlan_ioctl_hook) (struct net *, void __user *arg);
+ 
+-void vlan_ioctl_set(int (*hook) (void __user *))
++void vlan_ioctl_set(int (*hook) (struct net *, void __user *))
+ {
+ 	mutex_lock(&vlan_ioctl_mutex);
+ 	vlan_ioctl_hook = hook;
+@@ -864,16 +865,20 @@
+ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+ {
+ 	struct socket *sock;
++	struct sock *sk;
+ 	void __user *argp = (void __user *)arg;
+ 	int pid, err;
++	struct net *net;
+ 
+ 	sock = file->private_data;
++	sk = sock->sk;
++	net = sk->sk_net;
+ 	if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
+-		err = dev_ioctl(cmd, argp);
++		err = dev_ioctl(net, cmd, argp);
+ 	} else
+ #ifdef CONFIG_WIRELESS_EXT
+ 	if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
+-		err = dev_ioctl(cmd, argp);
++		err = dev_ioctl(net, cmd, argp);
+ 	} else
+ #endif				/* CONFIG_WIRELESS_EXT */
+ 		switch (cmd) {
+@@ -899,7 +904,7 @@
+ 
+ 			mutex_lock(&br_ioctl_mutex);
+ 			if (br_ioctl_hook)
+-				err = br_ioctl_hook(cmd, argp);
++				err = br_ioctl_hook(net, cmd, argp);
+ 			mutex_unlock(&br_ioctl_mutex);
+ 			break;
+ 		case SIOCGIFVLAN:
+@@ -910,7 +915,7 @@
+ 
+ 			mutex_lock(&vlan_ioctl_mutex);
+ 			if (vlan_ioctl_hook)
+-				err = vlan_ioctl_hook(argp);
++				err = vlan_ioctl_hook(net, argp);
+ 			mutex_unlock(&vlan_ioctl_mutex);
+ 			break;
+ 		case SIOCADDDLCI:
+@@ -933,7 +938,7 @@
+ 			 * to the NIC driver.
+ 			 */
+ 			if (err == -ENOIOCTLCMD)
+-				err = dev_ioctl(cmd, argp);
++				err = dev_ioctl(net, cmd, argp);
+ 			break;
+ 		}
+ 	return err;
+@@ -1102,7 +1107,7 @@
+ 	return 0;
+ }
+ 
+-static int __sock_create(int family, int type, int protocol,
++static int __sock_create(struct net *net, int family, int type, int protocol,
+ 			 struct socket **res, int kern)
+ {
+ 	int err;
+@@ -1185,7 +1190,7 @@
+ 	/* Now protected by module ref count */
+ 	rcu_read_unlock();
+ 
+-	err = pf->create(sock, protocol);
++	err = pf->create(net, sock, protocol);
+ 	if (err < 0)
+ 		goto out_module_put;
+ 
+@@ -1224,12 +1229,12 @@
+ 
+ int sock_create(int family, int type, int protocol, struct socket **res)
+ {
+-	return __sock_create(family, type, protocol, res, 0);
++	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
+ }
+ 
+ int sock_create_kern(int family, int type, int protocol, struct socket **res)
+ {
+-	return __sock_create(family, type, protocol, res, 1);
++	return __sock_create(&init_net, family, type, protocol, res, 1);
+ }
+ 
+ asmlinkage long sys_socket(int family, int type, int protocol)
+@@ -1389,8 +1394,6 @@
+  *	ready for listening.
+  */
+ 
+-int sysctl_somaxconn __read_mostly = SOMAXCONN;
+-
+ asmlinkage long sys_listen(int fd, int backlog)
+ {
+ 	struct socket *sock;
+@@ -1398,8 +1401,9 @@
+ 
+ 	sock = sockfd_lookup_light(fd, &err, &fput_needed);
+ 	if (sock) {
+-		if ((unsigned)backlog > sysctl_somaxconn)
+-			backlog = sysctl_somaxconn;
++		struct net *net = sock->sk->sk_net;
++		if ((unsigned)backlog > net->sysctl_somaxconn)
++			backlog = net->sysctl_somaxconn;
+ 
+ 		err = security_socket_listen(sock, backlog);
+ 		if (!err)
+@@ -2189,6 +2193,16 @@
+ 	printk(KERN_INFO "NET: Unregistered protocol family %d\n", family);
+ }
+ 
++static int sock_pernet_init(struct net *net)
++{
++	net->sysctl_somaxconn = SOMAXCONN;
++	return 0;
++}
++
++static struct pernet_operations sock_net_ops = {
++	.init = sock_pernet_init,
++};
++
+ static int __init sock_init(void)
+ {
+ 	/*
+@@ -2217,6 +2231,8 @@
+ 	netfilter_init();
+ #endif
+ 
++	register_pernet_subsys(&sock_net_ops);
++
+ 	return 0;
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/sunrpc/stats.c linux-2.6.22-try2-netns/net/sunrpc/stats.c
+--- linux-2.6.22-try2/net/sunrpc/stats.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sunrpc/stats.c	2007-12-19 22:49:20.000000000 -0500
+@@ -21,6 +21,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/svcsock.h>
+ #include <linux/sunrpc/metrics.h>
++#include <net/net_namespace.h>
+ 
+ #define RPCDBG_FACILITY	RPCDBG_MISC
+ 
+@@ -265,7 +266,7 @@
+ 	dprintk("RPC:       registering /proc/net/rpc\n");
+ 	if (!proc_net_rpc) {
+ 		struct proc_dir_entry *ent;
+-		ent = proc_mkdir("rpc", proc_net);
++		ent = proc_mkdir("rpc", init_net.proc_net);
+ 		if (ent) {
+ 			ent->owner = THIS_MODULE;
+ 			proc_net_rpc = ent;
+@@ -279,7 +280,7 @@
+ 	dprintk("RPC:       unregistering /proc/net/rpc\n");
+ 	if (proc_net_rpc) {
+ 		proc_net_rpc = NULL;
+-		remove_proc_entry("net/rpc", NULL);
++		remove_proc_entry("rpc", init_net.proc_net);
+ 	}
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/sysctl_net.c linux-2.6.22-try2-netns/net/sysctl_net.c
+--- linux-2.6.22-try2/net/sysctl_net.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/sysctl_net.c	2007-12-19 22:49:20.000000000 -0500
+@@ -54,3 +54,31 @@
+ #endif
+ 	{ 0 },
+ };
++
++struct ctl_table multi_net_table[] = {
++	{
++		.ctl_name	= NET_CORE,
++		.procname	= "core",
++		.mode		= 0555,
++		.child		= multi_core_table,
++	},
++#ifdef CONFIG_INET
++	{
++		.ctl_name	= NET_IPV4,
++		.procname	= "ipv4",
++		.mode		= 0555,
++		.child		= multi_ipv4_table,
++	},
++#endif
++	{},
++};
++
++struct ctl_table net_root_table[] = {
++	{
++		.ctl_name	= CTL_NET,
++		.procname	= "net",
++		.mode		= 0555,
++		.child		= multi_net_table,
++	},
++	{},
++};
+diff -Nurb linux-2.6.22-try2/net/tipc/eth_media.c linux-2.6.22-try2-netns/net/tipc/eth_media.c
+--- linux-2.6.22-try2/net/tipc/eth_media.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/tipc/eth_media.c	2007-12-19 22:49:20.000000000 -0500
+@@ -38,6 +38,7 @@
+ #include <net/tipc/tipc_bearer.h>
+ #include <net/tipc/tipc_msg.h>
+ #include <linux/netdevice.h>
++#include <net/net_namespace.h>
+ 
+ #define MAX_ETH_BEARERS		2
+ #define ETH_LINK_PRIORITY	TIPC_DEF_LINK_PRI
+@@ -100,6 +101,11 @@
+ 	struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
+ 	u32 size;
+ 
++	if (dev->nd_net != &init_net) {
++		kfree_skb(buf);
++		return 0;
++	}
++
+ 	if (likely(eb_ptr->bearer)) {
+ 		if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
+ 			size = msg_size((struct tipc_msg *)buf->data);
+@@ -129,7 +135,7 @@
+ 
+ 	/* Find device with specified name */
+ 
+-	for_each_netdev(pdev){
++	for_each_netdev(&init_net, pdev){
+ 		if (!strncmp(pdev->name, driver_name, IFNAMSIZ)) {
+ 			dev = pdev;
+ 			break;
+@@ -192,6 +198,9 @@
+ 	struct eth_bearer *eb_ptr = &eth_bearers[0];
+ 	struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	while ((eb_ptr->dev != dev)) {
+ 		if (++eb_ptr == stop)
+ 			return NOTIFY_DONE;	/* couldn't find device */
+diff -Nurb linux-2.6.22-try2/net/tipc/socket.c linux-2.6.22-try2-netns/net/tipc/socket.c
+--- linux-2.6.22-try2/net/tipc/socket.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/net/tipc/socket.c	2007-12-19 22:49:20.000000000 -0500
+@@ -162,13 +162,16 @@
+  *
+  * Returns 0 on success, errno otherwise
+  */
+-static int tipc_create(struct socket *sock, int protocol)
++static int tipc_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct tipc_sock *tsock;
+ 	struct tipc_port *port;
+ 	struct sock *sk;
+ 	u32 ref;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (unlikely(protocol != 0))
+ 		return -EPROTONOSUPPORT;
+ 
+@@ -198,7 +201,7 @@
+ 		return -EPROTOTYPE;
+ 	}
+ 
+-	sk = sk_alloc(AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
++	sk = sk_alloc(net, AF_TIPC, GFP_KERNEL, &tipc_proto, 1);
+ 	if (!sk) {
+ 		tipc_deleteport(ref);
+ 		return -ENOMEM;
+@@ -1372,7 +1375,7 @@
+ 	}
+ 	buf = skb_peek(&sock->sk->sk_receive_queue);
+ 
+-	res = tipc_create(newsock, 0);
++	res = tipc_create(sock->sk->sk_net, newsock, 0);
+ 	if (!res) {
+ 		struct tipc_sock *new_tsock = tipc_sk(newsock->sk);
+ 		struct tipc_portid id;
+diff -Nurb linux-2.6.22-try2/net/unix/af_unix.c linux-2.6.22-try2-netns/net/unix/af_unix.c
+--- linux-2.6.22-try2/net/unix/af_unix.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/unix/af_unix.c	2007-12-19 23:38:14.000000000 -0500
+@@ -117,8 +117,8 @@
+ #include <linux/security.h>
+ #include <linux/vs_context.h>
+ #include <linux/vs_limit.h>
++#include <net/net_namespace.h>
+ 
+-int sysctl_unix_max_dgram_qlen __read_mostly = 10;
+ 
+ struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
+ DEFINE_SPINLOCK(unix_table_lock);
+@@ -245,7 +245,8 @@
+ 	spin_unlock(&unix_table_lock);
+ }
+ 
+-static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
++static struct sock *__unix_find_socket_byname(struct net *net,
++					      struct sockaddr_un *sunname,
+ 					      int len, int type, unsigned hash)
+ {
+ 	struct sock *s;
+@@ -254,7 +255,7 @@
+ 	sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
+ 		struct unix_sock *u = unix_sk(s);
+ 
+-		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT))
++		if (!nx_check(s->sk_nid, VS_WATCH_P | VS_IDENT) || (s->sk_net != net))
+ 			continue;
+ 		if (u->addr->len == len &&
+ 		    !memcmp(u->addr->name, sunname, len))
+@@ -265,21 +266,22 @@
+ 	return s;
+ }
+ 
+-static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
++static inline struct sock *unix_find_socket_byname(struct net *net,
++						   struct sockaddr_un *sunname,
+ 						   int len, int type,
+ 						   unsigned hash)
+ {
+ 	struct sock *s;
+ 
+ 	spin_lock(&unix_table_lock);
+-	s = __unix_find_socket_byname(sunname, len, type, hash);
++	s = __unix_find_socket_byname(net, sunname, len, type, hash);
+ 	if (s)
+ 		sock_hold(s);
+ 	spin_unlock(&unix_table_lock);
+ 	return s;
+ }
+ 
+-static struct sock *unix_find_socket_byinode(struct inode *i)
++static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
+ {
+ 	struct sock *s;
+ 	struct hlist_node *node;
+@@ -289,6 +291,9 @@
+ 		    &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
+ 		struct dentry *dentry = unix_sk(s)->dentry;
+ 
++		if (s->sk_net != net)
++			continue;
++
+ 		if(dentry && dentry->d_inode == i)
+ 		{
+ 			sock_hold(s);
+@@ -571,7 +576,7 @@
+  */
+ static struct lock_class_key af_unix_sk_receive_queue_lock_key;
+ 
+-static struct sock * unix_create1(struct socket *sock)
++static struct sock * unix_create1(struct net *net, struct socket *sock)
+ {
+ 	struct sock *sk = NULL;
+ 	struct unix_sock *u;
+@@ -579,7 +584,7 @@
+ 	if (atomic_read(&unix_nr_socks) >= 2*get_max_files())
+ 		goto out;
+ 
+-	sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
++	sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, 1);
+ 	if (!sk)
+ 		goto out;
+ 
+@@ -590,7 +595,7 @@
+ 				&af_unix_sk_receive_queue_lock_key);
+ 
+ 	sk->sk_write_space	= unix_write_space;
+-	sk->sk_max_ack_backlog	= sysctl_unix_max_dgram_qlen;
++	sk->sk_max_ack_backlog	= net->sysctl_unix_max_dgram_qlen;
+ 	sk->sk_destruct		= unix_sock_destructor;
+ 	u	  = unix_sk(sk);
+ 	u->dentry = NULL;
+@@ -604,7 +609,7 @@
+ 	return sk;
+ }
+ 
+-static int unix_create(struct socket *sock, int protocol)
++static int unix_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	if (protocol && protocol != PF_UNIX)
+ 		return -EPROTONOSUPPORT;
+@@ -631,7 +636,7 @@
+ 		return -ESOCKTNOSUPPORT;
+ 	}
+ 
+-	return unix_create1(sock) ? 0 : -ENOMEM;
++	return unix_create1(net, sock) ? 0 : -ENOMEM;
+ }
+ 
+ static int unix_release(struct socket *sock)
+@@ -649,6 +654,7 @@
+ static int unix_autobind(struct socket *sock)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct unix_sock *u = unix_sk(sk);
+ 	static u32 ordernum = 1;
+ 	struct unix_address * addr;
+@@ -675,7 +681,7 @@
+ 	spin_lock(&unix_table_lock);
+ 	ordernum = (ordernum+1)&0xFFFFF;
+ 
+-	if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
++	if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
+ 				      addr->hash)) {
+ 		spin_unlock(&unix_table_lock);
+ 		/* Sanity yield. It is unusual case, but yet... */
+@@ -695,7 +701,8 @@
+ 	return err;
+ }
+ 
+-static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
++static struct sock *unix_find_other(struct net *net,
++				    struct sockaddr_un *sunname, int len,
+ 				    int type, unsigned hash, int *error)
+ {
+ 	struct sock *u;
+@@ -713,7 +720,7 @@
+ 		err = -ECONNREFUSED;
+ 		if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
+ 			goto put_fail;
+-		u=unix_find_socket_byinode(nd.dentry->d_inode);
++		u=unix_find_socket_byinode(net, nd.dentry->d_inode);
+ 		if (!u)
+ 			goto put_fail;
+ 
+@@ -729,7 +736,7 @@
+ 		}
+ 	} else {
+ 		err = -ECONNREFUSED;
+-		u=unix_find_socket_byname(sunname, len, type, hash);
++		u=unix_find_socket_byname(net, sunname, len, type, hash);
+ 		if (u) {
+ 			struct dentry *dentry;
+ 			dentry = unix_sk(u)->dentry;
+@@ -751,6 +758,7 @@
+ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct unix_sock *u = unix_sk(sk);
+ 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ 	struct dentry * dentry = NULL;
+@@ -825,7 +833,7 @@
+ 
+ 	if (!sunaddr->sun_path[0]) {
+ 		err = -EADDRINUSE;
+-		if (__unix_find_socket_byname(sunaddr, addr_len,
++		if (__unix_find_socket_byname(net, sunaddr, addr_len,
+ 					      sk->sk_type, hash)) {
+ 			unix_release_addr(addr);
+ 			goto out_unlock;
+@@ -891,6 +899,7 @@
+ 			      int alen, int flags)
+ {
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
+ 	struct sock *other;
+ 	unsigned hash;
+@@ -907,7 +916,7 @@
+ 			goto out;
+ 
+ restart:
+-		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
++		other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
+ 		if (!other)
+ 			goto out;
+ 
+@@ -987,6 +996,7 @@
+ {
+ 	struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct unix_sock *u = unix_sk(sk), *newu, *otheru;
+ 	struct sock *newsk = NULL;
+ 	struct sock *other = NULL;
+@@ -1015,7 +1025,7 @@
+ 	err = -ENOMEM;
+ 
+ 	/* create new sock for complete connection */
+-	newsk = unix_create1(NULL);
++	newsk = unix_create1(sk->sk_net, NULL);
+ 	if (newsk == NULL)
+ 		goto out;
+ 
+@@ -1026,7 +1036,7 @@
+ 
+ restart:
+ 	/*  Find listening sock. */
+-	other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
++	other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
+ 	if (!other)
+ 		goto out;
+ 
+@@ -1305,6 +1315,7 @@
+ {
+ 	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
+ 	struct sock *sk = sock->sk;
++	struct net *net = sk->sk_net;
+ 	struct unix_sock *u = unix_sk(sk);
+ 	struct sockaddr_un *sunaddr=msg->msg_name;
+ 	struct sock *other = NULL;
+@@ -1368,7 +1379,7 @@
+ 		if (sunaddr == NULL)
+ 			goto out_free;
+ 
+-		other = unix_find_other(sunaddr, namelen, sk->sk_type,
++		other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
+ 					hash, &err);
+ 		if (other==NULL)
+ 			goto out_free;
+@@ -1974,12 +1985,18 @@
+ 
+ 
+ #ifdef CONFIG_PROC_FS
+-static struct sock *unix_seq_idx(int *iter, loff_t pos)
++struct unix_iter_state {
++	struct net *net;
++	int i;
++};
++static struct sock *unix_seq_idx(struct unix_iter_state *iter, loff_t pos)
+ {
+ 	loff_t off = 0;
+ 	struct sock *s;
+ 
+-	for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
++	for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
++		if (s->sk_net != iter->net)
++			continue;
+ 		if (off == pos)
+ 			return s;
+ 		++off;
+@@ -1990,17 +2007,24 @@
+ 
+ static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
+ {
++	struct unix_iter_state *iter = seq->private;
+ 	spin_lock(&unix_table_lock);
+-	return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
++	return *pos ? unix_seq_idx(iter, *pos - 1) : ((void *) 1);
+ }
+ 
+ static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+ {
++	struct unix_iter_state *iter = seq->private;
++	struct sock *sk = v;
+ 	++*pos;
+ 
+ 	if (v == (void *)1)
+-		return first_unix_socket(seq->private);
+-	return next_unix_socket(seq->private, v);
++		sk = first_unix_socket(&iter->i);
++	else
++		sk = next_unix_socket(&iter->i, sk);
++	while (sk && (sk->sk_net != iter->net))
++		sk = next_unix_socket(&iter->i, sk);
++	return sk;
+ }
+ 
+ static void unix_seq_stop(struct seq_file *seq, void *v)
+@@ -2064,7 +2088,7 @@
+ {
+ 	struct seq_file *seq;
+ 	int rc = -ENOMEM;
+-	int *iter = kmalloc(sizeof(int), GFP_KERNEL);
++	struct unix_iter_state *iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ 
+ 	if (!iter)
+ 		goto out;
+@@ -2075,7 +2099,8 @@
+ 
+ 	seq	     = file->private_data;
+ 	seq->private = iter;
+-	*iter = 0;
++	iter->net = get_net(PROC_NET(inode));
++	iter->i = 0;
+ out:
+ 	return rc;
+ out_kfree:
+@@ -2083,12 +2108,20 @@
+ 	goto out;
+ }
+ 
++static int unix_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct unix_iter_state *iter = seq->private;
++	put_net(iter->net);
++	return seq_release_private(inode, file);
++}
++
+ static const struct file_operations unix_seq_fops = {
+ 	.owner		= THIS_MODULE,
+ 	.open		= unix_seq_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release_private,
++	.release	= unix_seq_release,
+ };
+ 
+ #endif
+@@ -2099,6 +2132,33 @@
+ 	.owner	= THIS_MODULE,
+ };
+ 
++
++static int unix_net_init(struct net *net)
++{
++	int error = -ENOMEM;
++
++	net->sysctl_unix_max_dgram_qlen = 10;
++#ifdef CONFIG_PROC_FS
++	if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops))
++		goto out;
++#endif
++	unix_sysctl_register(net);
++	error = 0;
++out:
++	return 0;
++}
++
++static void unix_net_exit(struct net *net)
++{
++	unix_sysctl_unregister(net);
++	proc_net_remove(net, "unix");
++}
++
++static struct pernet_operations unix_net_ops = {
++	.init = unix_net_init,
++	.exit = unix_net_exit,
++};
++
+ static int __init af_unix_init(void)
+ {
+ 	int rc = -1;
+@@ -2114,10 +2174,7 @@
+ 	}
+ 
+ 	sock_register(&unix_family_ops);
+-#ifdef CONFIG_PROC_FS
+-	proc_net_fops_create("unix", 0, &unix_seq_fops);
+-#endif
+-	unix_sysctl_register();
++	register_pernet_subsys(&unix_net_ops);
+ out:
+ 	return rc;
+ }
+@@ -2125,9 +2182,8 @@
+ static void __exit af_unix_exit(void)
+ {
+ 	sock_unregister(PF_UNIX);
+-	unix_sysctl_unregister();
+-	proc_net_remove("unix");
+ 	proto_unregister(&unix_proto);
++	unregister_pernet_subsys(&unix_net_ops);
+ }
+ 
+ module_init(af_unix_init);
+diff -Nurb linux-2.6.22-try2/net/unix/sysctl_net_unix.c linux-2.6.22-try2-netns/net/unix/sysctl_net_unix.c
+--- linux-2.6.22-try2/net/unix/sysctl_net_unix.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/unix/sysctl_net_unix.c	2007-12-19 22:49:20.000000000 -0500
+@@ -14,47 +14,71 @@
+ 
+ #include <net/af_unix.h>
+ 
+-static ctl_table unix_table[] = {
++static struct unix_sysctl_table {
++	struct ctl_table_header *sysctl_header;
++	struct ctl_table	unix_table[2];
++	struct ctl_table	unix_net_table[2];
++	struct ctl_table	unix_root_table[2];
++} unix_sysctl = {
++	.unix_table = {
+ 	{
+ 		.ctl_name	= NET_UNIX_MAX_DGRAM_QLEN,
+ 		.procname	= "max_dgram_qlen",
+-		.data		= &sysctl_unix_max_dgram_qlen,
++			.data		= &init_net.sysctl_unix_max_dgram_qlen,
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+ 		.proc_handler	= &proc_dointvec
+ 	},
+-	{ .ctl_name = 0 }
+-};
+-
+-static ctl_table unix_net_table[] = {
++		{}
++	},
++	.unix_net_table = {
+ 	{
+ 		.ctl_name	= NET_UNIX,
+ 		.procname	= "unix",
+ 		.mode		= 0555,
+-		.child		= unix_table
++			.child		= unix_sysctl.unix_table
+ 	},
+-	{ .ctl_name = 0 }
+-};
+-
+-static ctl_table unix_root_table[] = {
++		{}
++	},
++	.unix_root_table = {
+ 	{
+ 		.ctl_name	= CTL_NET,
+ 		.procname	= "net",
+ 		.mode		= 0555,
+-		.child		= unix_net_table
++			.child		= unix_sysctl.unix_net_table
+ 	},
+-	{ .ctl_name = 0 }
++		{}
++	}
+ };
+ 
+-static struct ctl_table_header * unix_sysctl_header;
+-
+-void unix_sysctl_register(void)
++void unix_sysctl_register(struct net *net)
+ {
+-	unix_sysctl_header = register_sysctl_table(unix_root_table);
++	struct unix_sysctl_table *table;
++	int i;
++
++	table = kmemdup(&unix_sysctl, sizeof(*table), GFP_KERNEL);
++	if (!table)
++		return;
++	for (i = 0; i < ARRAY_SIZE(table->unix_table) - 1; i++)
++		table->unix_table[i].data += (char *)net - (char *)&init_net;
++
++	table->unix_net_table[0].child = table->unix_table;
++	table->unix_root_table[0].child = table->unix_net_table;
++
++	table->sysctl_header = 
++		register_net_sysctl_table(net, table->unix_root_table);
++	if (!table->sysctl_header) {
++		kfree(table);
++		return;
++	}
++	net->unix_sysctl = table;
+ }
+ 
+-void unix_sysctl_unregister(void)
++void unix_sysctl_unregister(struct net *net)
+ {
+-	unregister_sysctl_table(unix_sysctl_header);
++	struct unix_sysctl_table *table = net->unix_sysctl;
++	if (table)
++		unregister_net_sysctl_table(table->sysctl_header);
++	kfree(table);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/wanrouter/wanproc.c linux-2.6.22-try2-netns/net/wanrouter/wanproc.c
+--- linux-2.6.22-try2/net/wanrouter/wanproc.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/wanrouter/wanproc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -28,6 +28,7 @@
+ #include <linux/wanrouter.h>	/* WAN router API definitions */
+ #include <linux/seq_file.h>
+ #include <linux/smp_lock.h>
++#include <net/net_namespace.h>
+ 
+ #include <asm/io.h>
+ 
+@@ -287,7 +288,7 @@
+ int __init wanrouter_proc_init(void)
+ {
+ 	struct proc_dir_entry *p;
+-	proc_router = proc_mkdir(ROUTER_NAME, proc_net);
++	proc_router = proc_mkdir(ROUTER_NAME, init_net.proc_net);
+ 	if (!proc_router)
+ 		goto fail;
+ 
+@@ -303,7 +304,7 @@
+ fail_stat:
+ 	remove_proc_entry("config", proc_router);
+ fail_config:
+-	remove_proc_entry(ROUTER_NAME, proc_net);
++	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
+ fail:
+ 	return -ENOMEM;
+ }
+@@ -316,7 +317,7 @@
+ {
+ 	remove_proc_entry("config", proc_router);
+ 	remove_proc_entry("status", proc_router);
+-	remove_proc_entry(ROUTER_NAME, proc_net);
++	remove_proc_entry(ROUTER_NAME, init_net.proc_net);
+ }
+ 
+ /*
+diff -Nurb linux-2.6.22-try2/net/wireless/wext.c linux-2.6.22-try2-netns/net/wireless/wext.c
+--- linux-2.6.22-try2/net/wireless/wext.c	2007-12-19 13:37:59.000000000 -0500
++++ linux-2.6.22-try2-netns/net/wireless/wext.c	2007-12-19 22:49:20.000000000 -0500
+@@ -95,6 +95,7 @@
+ #include <linux/interrupt.h>
+ 
+ #include <linux/wireless.h>		/* Pretty obvious */
++#include <net/net_namespace.h>
+ #include <net/iw_handler.h>		/* New driver API */
+ #include <net/netlink.h>
+ #include <net/wext.h>
+@@ -672,7 +673,22 @@
+ 
+ static int wireless_seq_open(struct inode *inode, struct file *file)
+ {
+-	return seq_open(file, &wireless_seq_ops);
++	struct seq_file *seq;
++	int res;
++	res = seq_open(file, &wireless_seq_ops);
++	if (!res) {
++		seq = file->private_data;
++		seq->private = get_net(PROC_NET(inode));
++	}
++	return res;
++}
++
++static int wireless_seq_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *seq = file->private_data;
++	struct net *net = seq->private;
++	put_net(net);
++	return seq_release(inode, file);
+ }
+ 
+ static const struct file_operations wireless_seq_fops = {
+@@ -680,17 +696,22 @@
+ 	.open    = wireless_seq_open,
+ 	.read    = seq_read,
+ 	.llseek  = seq_lseek,
+-	.release = seq_release,
++	.release = wireless_seq_release,
+ };
+ 
+-int __init wext_proc_init(void)
++int wext_proc_init(struct net *net)
+ {
+ 	/* Create /proc/net/wireless entry */
+-	if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops))
++	if (!proc_net_fops_create(net, "wireless", S_IRUGO, &wireless_seq_fops))
+ 		return -ENOMEM;
+ 
+ 	return 0;
+ }
++
++void wext_proc_exit(struct net *net)
++{
++	proc_net_remove(net, "wireless");
++}
+ #endif	/* CONFIG_PROC_FS */
+ 
+ /************************** IOCTL SUPPORT **************************/
+@@ -1010,7 +1031,7 @@
+  * Main IOCTl dispatcher.
+  * Check the type of IOCTL and call the appropriate wrapper...
+  */
+-static int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd)
++static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd)
+ {
+ 	struct net_device *dev;
+ 	iw_handler	handler;
+@@ -1019,7 +1040,7 @@
+ 	 * The copy_to/from_user() of ifr is also dealt with in there */
+ 
+ 	/* Make sure the device exist */
+-	if ((dev = __dev_get_by_name(ifr->ifr_name)) == NULL)
++	if ((dev = __dev_get_by_name(net, ifr->ifr_name)) == NULL)
+ 		return -ENODEV;
+ 
+ 	/* A bunch of special cases, then the generic case...
+@@ -1053,7 +1074,7 @@
+ }
+ 
+ /* entry point from dev ioctl */
+-int wext_handle_ioctl(struct ifreq *ifr, unsigned int cmd,
++int wext_handle_ioctl(struct net *net, struct ifreq *ifr, unsigned int cmd,
+ 		      void __user *arg)
+ {
+ 	int ret;
+@@ -1065,9 +1086,9 @@
+ 	    && !capable(CAP_NET_ADMIN))
+ 		return -EPERM;
+ 
+-	dev_load(ifr->ifr_name);
++	dev_load(net, ifr->ifr_name);
+ 	rtnl_lock();
+-	ret = wireless_process_ioctl(ifr, cmd);
++	ret = wireless_process_ioctl(net, ifr, cmd);
+ 	rtnl_unlock();
+ 	if (IW_IS_GET(cmd) && copy_to_user(arg, ifr, sizeof(struct ifreq)))
+ 		return -EFAULT;
+@@ -1111,8 +1132,13 @@
+ {
+ 	struct sk_buff *skb;
+ 
+-	while ((skb = skb_dequeue(&wireless_nlevent_queue)))
+-		rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++	while ((skb = skb_dequeue(&wireless_nlevent_queue))) {
++		struct net_device *dev = skb->dev;
++		struct net *net = dev->nd_net;
++		skb->dev = NULL;
++		rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
++		dev_put(dev);
++	}
+ }
+ 
+ static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0);
+@@ -1173,6 +1199,9 @@
+ 		kfree_skb(skb);
+ 		return;
+ 	}
++	/* Remember the device until we are in process context */
++	dev_hold(dev);
++	skb->dev = dev;
+ 	NETLINK_CB(skb).dst_group = RTNLGRP_LINK;
+ 	skb_queue_tail(&wireless_nlevent_queue, skb);
+ 	tasklet_schedule(&wireless_nlevent_tasklet);
+diff -Nurb linux-2.6.22-try2/net/x25/af_x25.c linux-2.6.22-try2-netns/net/x25/af_x25.c
+--- linux-2.6.22-try2/net/x25/af_x25.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/x25/af_x25.c	2007-12-19 22:49:20.000000000 -0500
+@@ -191,6 +191,9 @@
+ 	struct net_device *dev = ptr;
+ 	struct x25_neigh *nb;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (dev->type == ARPHRD_X25
+ #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
+ 	 || dev->type == ARPHRD_ETHER
+@@ -466,10 +469,10 @@
+ 	.obj_size = sizeof(struct x25_sock),
+ };
+ 
+-static struct sock *x25_alloc_socket(void)
++static struct sock *x25_alloc_socket(struct net *net)
+ {
+ 	struct x25_sock *x25;
+-	struct sock *sk = sk_alloc(AF_X25, GFP_ATOMIC, &x25_proto, 1);
++	struct sock *sk = sk_alloc(net, AF_X25, GFP_ATOMIC, &x25_proto, 1);
+ 
+ 	if (!sk)
+ 		goto out;
+@@ -485,17 +488,20 @@
+ 	return sk;
+ }
+ 
+-static int x25_create(struct socket *sock, int protocol)
++static int x25_create(struct net *net, struct socket *sock, int protocol)
+ {
+ 	struct sock *sk;
+ 	struct x25_sock *x25;
+ 	int rc = -ESOCKTNOSUPPORT;
+ 
++	if (net != &init_net)
++		return -EAFNOSUPPORT;
++
+ 	if (sock->type != SOCK_SEQPACKET || protocol)
+ 		goto out;
+ 
+ 	rc = -ENOMEM;
+-	if ((sk = x25_alloc_socket()) == NULL)
++	if ((sk = x25_alloc_socket(net)) == NULL)
+ 		goto out;
+ 
+ 	x25 = x25_sk(sk);
+@@ -546,7 +552,7 @@
+ 	if (osk->sk_type != SOCK_SEQPACKET)
+ 		goto out;
+ 
+-	if ((sk = x25_alloc_socket()) == NULL)
++	if ((sk = x25_alloc_socket(osk->sk_net)) == NULL)
+ 		goto out;
+ 
+ 	x25 = x25_sk(sk);
+diff -Nurb linux-2.6.22-try2/net/x25/x25_dev.c linux-2.6.22-try2-netns/net/x25/x25_dev.c
+--- linux-2.6.22-try2/net/x25/x25_dev.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/x25/x25_dev.c	2007-12-19 22:49:20.000000000 -0500
+@@ -95,6 +95,9 @@
+ 	struct sk_buff *nskb;
+ 	struct x25_neigh *nb;
+ 
++	if (dev->nd_net != &init_net)
++		goto drop;
++
+ 	nskb = skb_copy(skb, GFP_ATOMIC);
+ 	if (!nskb)
+ 		goto drop;
+diff -Nurb linux-2.6.22-try2/net/x25/x25_proc.c linux-2.6.22-try2-netns/net/x25/x25_proc.c
+--- linux-2.6.22-try2/net/x25/x25_proc.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/x25/x25_proc.c	2007-12-19 22:49:20.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/init.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <net/net_namespace.h>
+ #include <net/sock.h>
+ #include <net/x25.h>
+ 
+@@ -301,7 +302,7 @@
+ 	struct proc_dir_entry *p;
+ 	int rc = -ENOMEM;
+ 
+-	x25_proc_dir = proc_mkdir("x25", proc_net);
++	x25_proc_dir = proc_mkdir("x25", init_net.proc_net);
+ 	if (!x25_proc_dir)
+ 		goto out;
+ 
+@@ -328,7 +329,7 @@
+ out_socket:
+ 	remove_proc_entry("route", x25_proc_dir);
+ out_route:
+-	remove_proc_entry("x25", proc_net);
++	remove_proc_entry("x25", init_net.proc_net);
+ 	goto out;
+ }
+ 
+@@ -337,7 +338,7 @@
+ 	remove_proc_entry("forward", x25_proc_dir);
+ 	remove_proc_entry("route", x25_proc_dir);
+ 	remove_proc_entry("socket", x25_proc_dir);
+-	remove_proc_entry("x25", proc_net);
++	remove_proc_entry("x25", init_net.proc_net);
+ }
+ 
+ #else /* CONFIG_PROC_FS */
+diff -Nurb linux-2.6.22-try2/net/x25/x25_route.c linux-2.6.22-try2-netns/net/x25/x25_route.c
+--- linux-2.6.22-try2/net/x25/x25_route.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/x25/x25_route.c	2007-12-19 22:49:20.000000000 -0500
+@@ -129,7 +129,7 @@
+  */
+ struct net_device *x25_dev_get(char *devname)
+ {
+-	struct net_device *dev = dev_get_by_name(devname);
++	struct net_device *dev = dev_get_by_name(&init_net, devname);
+ 
+ 	if (dev &&
+ 	    (!(dev->flags & IFF_UP) || (dev->type != ARPHRD_X25
+diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_policy.c linux-2.6.22-try2-netns/net/xfrm/xfrm_policy.c
+--- linux-2.6.22-try2/net/xfrm/xfrm_policy.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/xfrm/xfrm_policy.c	2007-12-19 22:49:20.000000000 -0500
+@@ -30,8 +30,6 @@
+ 
+ #include "xfrm_hash.h"
+ 
+-int sysctl_xfrm_larval_drop __read_mostly;
+-
+ DEFINE_MUTEX(xfrm_cfg_mutex);
+ EXPORT_SYMBOL(xfrm_cfg_mutex);
+ 
+@@ -1570,7 +1568,7 @@
+ 
+ 		if (unlikely(nx<0)) {
+ 			err = nx;
+-			if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
++			if (err == -EAGAIN && init_net.sysctl_xfrm_larval_drop) {
+ 				/* EREMOTE tells the caller to generate
+ 				 * a one-shot blackhole route.
+ 				 */
+@@ -1954,8 +1952,8 @@
+ void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
+ {
+ 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+-		dst->dev = &loopback_dev;
+-		dev_hold(&loopback_dev);
++		dst->dev = &init_net.loopback_dev;
++		dev_hold(dst->dev);
+ 		dev_put(dev);
+ 	}
+ }
+@@ -2357,6 +2355,11 @@
+ 
+ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+ {
++	struct net_device *dev = ptr;
++
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	switch (event) {
+ 	case NETDEV_DOWN:
+ 		xfrm_flush_bundles();
+diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_state.c linux-2.6.22-try2-netns/net/xfrm/xfrm_state.c
+--- linux-2.6.22-try2/net/xfrm/xfrm_state.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/xfrm/xfrm_state.c	2007-12-19 22:49:20.000000000 -0500
+@@ -28,14 +28,6 @@
+ struct sock *xfrm_nl;
+ EXPORT_SYMBOL(xfrm_nl);
+ 
+-u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
+-EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
+-
+-u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
+-EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
+-
+-u32 sysctl_xfrm_acq_expires __read_mostly = 30;
+-
+ /* Each xfrm_state may be linked to two tables:
+ 
+    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
+@@ -665,8 +657,8 @@
+ 				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
+ 				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
+ 			}
+-			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
+-			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
++			x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires;
++			x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ;
+ 			add_timer(&x->timer);
+ 			xfrm_state_num++;
+ 			xfrm_hash_grow_check(x->bydst.next != NULL);
+@@ -815,9 +807,9 @@
+ 		x->props.family = family;
+ 		x->props.mode = mode;
+ 		x->props.reqid = reqid;
+-		x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
++		x->lft.hard_add_expires_seconds = init_net.sysctl_xfrm_acq_expires;
+ 		xfrm_state_hold(x);
+-		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
++		x->timer.expires = jiffies + init_net.sysctl_xfrm_acq_expires*HZ;
+ 		add_timer(&x->timer);
+ 		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
+ 		h = xfrm_src_hash(daddr, saddr, family);
+@@ -1775,6 +1767,19 @@
+ 
+ EXPORT_SYMBOL(xfrm_init_state);
+ 
++
++static int xfrm_state_pernet_init(struct net *net)
++{
++	net->sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
++	net->sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
++	net->sysctl_xfrm_acq_expires = 30;
++	return 0;
++}
++
++static struct pernet_operations xfrm_state_net_ops = {
++	.init = xfrm_state_pernet_init,
++};
++
+ void __init xfrm_state_init(void)
+ {
+ 	unsigned int sz;
+@@ -1789,5 +1794,7 @@
+ 	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
+ 
+ 	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
++
++	register_pernet_subsys(&xfrm_state_net_ops);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/net/xfrm/xfrm_user.c linux-2.6.22-try2-netns/net/xfrm/xfrm_user.c
+--- linux-2.6.22-try2/net/xfrm/xfrm_user.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/net/xfrm/xfrm_user.c	2007-12-19 22:49:20.000000000 -0500
+@@ -374,7 +374,8 @@
+ 	return err;
+ }
+ 
+-static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
++static struct xfrm_state *xfrm_state_construct(struct net *net,
++					       struct xfrm_usersa_info *p,
+ 					       struct rtattr **xfrma,
+ 					       int *errp)
+ {
+@@ -410,9 +411,9 @@
+ 		goto error;
+ 
+ 	x->km.seq = p->seq;
+-	x->replay_maxdiff = sysctl_xfrm_aevent_rseqth;
++	x->replay_maxdiff = net->sysctl_xfrm_aevent_rseqth;
+ 	/* sysctl_xfrm_aevent_etime is in 100ms units */
+-	x->replay_maxage = (sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M;
++	x->replay_maxage = (net->sysctl_xfrm_aevent_etime*HZ)/XFRM_AE_ETH_M;
+ 	x->preplay.bitmap = 0;
+ 	x->preplay.seq = x->replay.seq+x->replay_maxdiff;
+ 	x->preplay.oseq = x->replay.oseq +x->replay_maxdiff;
+@@ -436,6 +437,7 @@
+ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh,
+ 		struct rtattr **xfrma)
+ {
++	struct net *net = skb->sk->sk_net;
+ 	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
+ 	struct xfrm_state *x;
+ 	int err;
+@@ -445,7 +447,7 @@
+ 	if (err)
+ 		return err;
+ 
+-	x = xfrm_state_construct(p, xfrma, &err);
++	x = xfrm_state_construct(net, p, xfrma, &err);
+ 	if (!x)
+ 		return err;
+ 
+@@ -2559,7 +2561,7 @@
+ 
+ 	printk(KERN_INFO "Initializing XFRM netlink socket\n");
+ 
+-	nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX,
++	nlsk = netlink_kernel_create(&init_net, NETLINK_XFRM, XFRMNLGRP_MAX,
+ 				     xfrm_netlink_rcv, NULL, THIS_MODULE);
+ 	if (nlsk == NULL)
+ 		return -ENOMEM;
+diff -Nurb linux-2.6.22-try2/security/selinux/hooks.c linux-2.6.22-try2-netns/security/selinux/hooks.c
+--- linux-2.6.22-try2/security/selinux/hooks.c	2007-12-19 15:29:23.000000000 -0500
++++ linux-2.6.22-try2-netns/security/selinux/hooks.c	2007-12-19 22:49:20.000000000 -0500
+@@ -3231,8 +3231,8 @@
+ /* Range of port numbers used to automatically bind.
+    Need to determine whether we should perform a name_bind
+    permission check between the socket and the port number. */
+-#define ip_local_port_range_0 sysctl_local_port_range[0]
+-#define ip_local_port_range_1 sysctl_local_port_range[1]
++#define ip_local_port_range_0 (sk->sk_net->sysctl_local_port_range[0])
++#define ip_local_port_range_1 (sk->sk_net->sysctl_local_port_range[1])
+ 
+ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen)
+ {
+@@ -3976,6 +3976,10 @@
+ 						const struct net_device *out,
+ 						int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET);
+ }
+ 
+@@ -3987,6 +3991,10 @@
+ 						const struct net_device *out,
+ 						int (*okfn)(struct sk_buff *))
+ {
++	/* Only filter packets in the initial network namespace */
++	if ((in?in:out)->nd_net != &init_net)
++		return NF_ACCEPT;
++
+ 	return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6);
+ }
+ 
+diff -Nurb linux-2.6.22-try2/security/selinux/netif.c linux-2.6.22-try2-netns/security/selinux/netif.c
+--- linux-2.6.22-try2/security/selinux/netif.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/security/selinux/netif.c	2007-12-19 22:49:20.000000000 -0500
+@@ -20,6 +20,7 @@
+ #include <linux/notifier.h>
+ #include <linux/netdevice.h>
+ #include <linux/rcupdate.h>
++#include <net/net_namespace.h>
+ 
+ #include "security.h"
+ #include "objsec.h"
+@@ -234,6 +235,9 @@
+ {
+ 	struct net_device *dev = ptr;
+ 
++	if (dev->nd_net != &init_net)
++		return NOTIFY_DONE;
++
+ 	if (event == NETDEV_DOWN)
+ 		sel_netif_kill(dev);
+ 
+diff -Nurb linux-2.6.22-try2/security/selinux/netlink.c linux-2.6.22-try2-netns/security/selinux/netlink.c
+--- linux-2.6.22-try2/security/selinux/netlink.c	2007-12-19 13:38:00.000000000 -0500
++++ linux-2.6.22-try2-netns/security/selinux/netlink.c	2007-12-19 22:49:20.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include <linux/skbuff.h>
+ #include <linux/netlink.h>
+ #include <linux/selinux_netlink.h>
++#include <net/net_namespace.h>
+ 
+ static struct sock *selnl;
+ 
+@@ -104,8 +105,8 @@
+ 
+ static int __init selnl_init(void)
+ {
+-	selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, NULL,
+-	                              THIS_MODULE);
++	selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX,
++				      SELNLGRP_MAX, NULL, NULL, THIS_MODULE);
+ 	if (selnl == NULL)
+ 		panic("SELinux:  Cannot create netlink socket.");
+ 	netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV);