Fedora kernel-2.6.17-1.2142_FC4 patched with stable patch-2.6.17.4-vs2.0.2-rc26.diff
[linux-2.6.git] / arch / ia64 / sn / kernel / xpc_main.c
index 177ddb7..1def91f 100644 (file)
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2004-2006 Silicon Graphics, Inc.  All Rights Reserved.
  */
 
 
 #include <linux/syscalls.h>
 #include <linux/cache.h>
 #include <linux/interrupt.h>
-#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/reboot.h>
+#include <linux/completion.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/sn_sal.h>
+#include <asm/kdebug.h>
 #include <asm/uaccess.h>
-#include "xpc.h"
+#include <asm/sn/xpc.h>
 
 
 /* define two XPC debug device structures to be used with dev_dbg() et al */
@@ -79,13 +82,22 @@ struct device *xpc_part = &xpc_part_dbg_subname;
 struct device *xpc_chan = &xpc_chan_dbg_subname;
 
 
+static int xpc_kdebug_ignore;
+
+
 /* systune related variables for /proc/sys directories */
 
-static int xpc_hb_min = 1;
-static int xpc_hb_max = 10;
+static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
+static int xpc_hb_min_interval = 1;
+static int xpc_hb_max_interval = 10;
 
-static int xpc_hb_check_min = 10;
-static int xpc_hb_check_max = 120;
+static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
+static int xpc_hb_check_min_interval = 10;
+static int xpc_hb_check_max_interval = 120;
+
+int xpc_disengage_request_timelimit = XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT;
+static int xpc_disengage_request_min_timelimit = 0;
+static int xpc_disengage_request_max_timelimit = 120;
 
 static ctl_table xpc_sys_xpc_hb_dir[] = {
        {
@@ -96,9 +108,11 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
                0644,
                NULL,
                &proc_dointvec_minmax,
+               NULL,
                &sysctl_intvec,
                NULL,
-               &xpc_hb_min, &xpc_hb_max
+               &xpc_hb_min_interval,
+               &xpc_hb_max_interval
        },
        {
                2,
@@ -108,9 +122,11 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
                0644,
                NULL,
                &proc_dointvec_minmax,
+               NULL,
                &sysctl_intvec,
                NULL,
-               &xpc_hb_check_min, &xpc_hb_check_max
+               &xpc_hb_check_min_interval,
+               &xpc_hb_check_max_interval
        },
        {0}
 };
@@ -123,6 +139,20 @@ static ctl_table xpc_sys_xpc_dir[] = {
                0555,
                xpc_sys_xpc_hb_dir
        },
+       {
+               2,
+               "disengage_request_timelimit",
+               &xpc_disengage_request_timelimit,
+               sizeof(int),
+               0644,
+               NULL,
+               &proc_dointvec_minmax,
+               NULL,
+               &sysctl_intvec,
+               NULL,
+               &xpc_disengage_request_min_timelimit,
+               &xpc_disengage_request_max_timelimit
+       },
        {0}
 };
 static ctl_table xpc_sys_dir[] = {
@@ -138,6 +168,8 @@ static ctl_table xpc_sys_dir[] = {
 };
 static struct ctl_table_header *xpc_sysctl;
 
+/* non-zero if any remote partition disengage request was timed out */
+int xpc_disengage_request_timedout;
 
 /* #of IRQs received */
 static atomic_t xpc_act_IRQ_rcvd;
@@ -147,11 +179,11 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
 
 static unsigned long xpc_hb_check_timeout;
 
-/* xpc_hb_checker thread exited notification */
-static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited);
+/* notification that the xpc_hb_checker thread has exited */
+static DECLARE_COMPLETION(xpc_hb_checker_exited);
 
-/* xpc_discovery thread exited notification */
-static DECLARE_MUTEX_LOCKED(xpc_discovery_exited);
+/* notification that the xpc_discovery thread has exited */
+static DECLARE_COMPLETION(xpc_discovery_exited);
 
 
 static struct timer_list xpc_hb_timer;
@@ -160,6 +192,35 @@ static struct timer_list xpc_hb_timer;
 static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
 
 
+static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_reboot_notifier = {
+       .notifier_call = xpc_system_reboot,
+};
+
+static int xpc_system_die(struct notifier_block *, unsigned long, void *);
+static struct notifier_block xpc_die_notifier = {
+       .notifier_call = xpc_system_die,
+};
+
+
+/*
+ * Timer function to enforce the timelimit on the partition disengage request.
+ */
+static void
+xpc_timeout_partition_disengage_request(unsigned long data)
+{
+       struct xpc_partition *part = (struct xpc_partition *) data;
+
+
+       DBUG_ON(jiffies < part->disengage_request_timeout);
+
+       (void) xpc_partition_disengaged(part);
+
+       DBUG_ON(part->disengage_request_timeout != 0);
+       DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
+}
+
+
 /*
  * Notify the heartbeat check thread that an IRQ has been received.
  */
@@ -213,12 +274,6 @@ xpc_hb_checker(void *ignore)
 
        while (!(volatile int) xpc_exiting) {
 
-               /* wait for IRQ or timeout */
-               (void) wait_event_interruptible(xpc_act_IRQ_wq,
-                           (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
-                                       jiffies >= xpc_hb_check_timeout ||
-                                               (volatile int) xpc_exiting));
-
                dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
                        "been received\n",
                        (int) (xpc_hb_check_timeout - jiffies),
@@ -239,6 +294,7 @@ xpc_hb_checker(void *ignore)
                }
 
 
+               /* check for outstanding IRQs */
                new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
                if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
                        force_IRQ = 0;
@@ -256,13 +312,19 @@ xpc_hb_checker(void *ignore)
                        xpc_hb_check_timeout = jiffies +
                                           (xpc_hb_check_interval * HZ);
                }
+
+               /* wait for IRQ or timeout */
+               (void) wait_event_interruptible(xpc_act_IRQ_wq,
+                           (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
+                                       jiffies >= xpc_hb_check_timeout ||
+                                               (volatile int) xpc_exiting));
        }
 
        dev_dbg(xpc_part, "heartbeat checker is exiting\n");
 
 
-       /* mark this thread as inactive */
-       up(&xpc_hb_checker_exited);
+       /* mark this thread as having exited */
+       complete(&xpc_hb_checker_exited);
        return 0;
 }
 
@@ -281,8 +343,8 @@ xpc_initiate_discovery(void *ignore)
 
        dev_dbg(xpc_part, "discovery thread is exiting\n");
 
-       /* mark this thread as inactive */
-       up(&xpc_discovery_exited);
+       /* mark this thread as having exited */
+       complete(&xpc_discovery_exited);
        return 0;
 }
 
@@ -308,8 +370,7 @@ xpc_make_first_contact(struct xpc_partition *part)
                        "partition %d\n", XPC_PARTID(part));
 
                /* wait a 1/4 of a second or so */
-               set_current_state(TASK_INTERRUPTIBLE);
-               (void) schedule_timeout(0.25 * HZ);
+               (void) msleep_interruptible(250);
 
                if (part->act_state == XPC_P_DEACTIVATING) {
                        return part->reason;
@@ -336,7 +397,8 @@ static void
 xpc_channel_mgr(struct xpc_partition *part)
 {
        while (part->act_state != XPC_P_DEACTIVATING ||
-                               atomic_read(&part->nchannels_active) > 0) {
+                       atomic_read(&part->nchannels_active) > 0 ||
+                                       !xpc_partition_disengaged(part)) {
 
                xpc_process_channel_activity(part);
 
@@ -360,7 +422,8 @@ xpc_channel_mgr(struct xpc_partition *part)
                                (volatile u64) part->local_IPI_amo != 0 ||
                                ((volatile u8) part->act_state ==
                                                        XPC_P_DEACTIVATING &&
-                               atomic_read(&part->nchannels_active) == 0)));
+                               atomic_read(&part->nchannels_active) == 0 &&
+                               xpc_partition_disengaged(part))));
                atomic_set(&part->channel_mgr_requests, 1);
 
                // >>> Does it need to wakeup periodically as well? In case we
@@ -420,7 +483,7 @@ xpc_activating(void *__partid)
        partid_t partid = (u64) __partid;
        struct xpc_partition *part = &xpc_partitions[partid];
        unsigned long irq_flags;
-       struct sched_param param = { sched_priority: MAX_USER_RT_PRIO - 1 };
+       struct sched_param param = { sched_priority: MAX_RT_PRIO - 1 };
        int ret;
 
 
@@ -482,7 +545,7 @@ xpc_activating(void *__partid)
                return 0;
        }
 
-       XPC_ALLOW_HB(partid, xpc_vars);
+       xpc_allow_hb(partid, xpc_vars);
        xpc_IPI_send_activated(part);
 
 
@@ -492,6 +555,7 @@ xpc_activating(void *__partid)
         */
        (void) xpc_partition_up(part);
 
+       xpc_disallow_hb(partid, xpc_vars);
        xpc_mark_partition_inactive(part);
 
        if (part->reason == xpcReactivating) {
@@ -513,18 +577,21 @@ xpc_activate_partition(struct xpc_partition *part)
 
        spin_lock_irqsave(&part->act_lock, irq_flags);
 
-       pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
-
        DBUG_ON(part->act_state != XPC_P_INACTIVE);
 
-       if (pid > 0) {
-               part->act_state = XPC_P_ACTIVATION_REQ;
-               XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
-       } else {
-               XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
-       }
+       part->act_state = XPC_P_ACTIVATION_REQ;
+       XPC_SET_REASON(part, xpcCloneKThread, __LINE__);
 
        spin_unlock_irqrestore(&part->act_lock, irq_flags);
+
+       pid = kernel_thread(xpc_activating, (void *) ((u64) partid), 0);
+
+       if (unlikely(pid <= 0)) {
+               spin_lock_irqsave(&part->act_lock, irq_flags);
+               part->act_state = XPC_P_INACTIVE;
+               XPC_SET_REASON(part, xpcCloneKThreadFailed, __LINE__);
+               spin_unlock_irqrestore(&part->act_lock, irq_flags);
+       }
 }
 
 
@@ -670,6 +737,7 @@ xpc_daemonize_kthread(void *args)
        struct xpc_partition *part = &xpc_partitions[partid];
        struct xpc_channel *ch;
        int n_needed;
+       unsigned long irq_flags;
 
 
        daemonize("xpc%02dc%d", partid, ch_number);
@@ -680,13 +748,20 @@ xpc_daemonize_kthread(void *args)
        ch = &part->channels[ch_number];
 
        if (!(ch->flags & XPC_C_DISCONNECTING)) {
-               DBUG_ON(!(ch->flags & XPC_C_CONNECTED));
 
                /* let registerer know that connection has been established */
 
-               if (atomic_read(&ch->kthreads_assigned) == 1) {
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               if (!(ch->flags & XPC_C_CONNECTEDCALLOUT)) {
+                       ch->flags |= XPC_C_CONNECTEDCALLOUT;
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+
                        xpc_connected_callout(ch);
 
+                       spin_lock_irqsave(&ch->lock, irq_flags);
+                       ch->flags |= XPC_C_CONNECTEDCALLOUT_MADE;
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+
                        /*
                         * It is possible that while the callout was being
                         * made that the remote partition sent some messages.
@@ -699,16 +774,30 @@ xpc_daemonize_kthread(void *args)
                                        !(ch->flags & XPC_C_DISCONNECTING)) {
                                xpc_activate_kthreads(ch, n_needed);
                        }
+               } else {
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
                }
 
                xpc_kthread_waitmsgs(part, ch);
        }
 
-       if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
-                       ((ch->flags & XPC_C_CONNECTCALLOUT) ||
-                               (ch->reason != xpcUnregistering &&
-                                       ch->reason != xpcOtherUnregistering))) {
-               xpc_disconnected_callout(ch);
+       if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) &&
+                               !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) {
+                       ch->flags |= XPC_C_DISCONNECTINGCALLOUT;
+                       spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+                       xpc_disconnect_callout(ch, xpcDisconnecting);
+
+                       spin_lock_irqsave(&ch->lock, irq_flags);
+                       ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE;
+               }
+               spin_unlock_irqrestore(&ch->lock, irq_flags);
+               if (atomic_dec_return(&part->nchannels_engaged) == 0) {
+                       xpc_mark_partition_disengaged(part);
+                       xpc_IPI_send_disengage(part);
+               }
        }
 
 
@@ -740,12 +829,33 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
        unsigned long irq_flags;
        pid_t pid;
        u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
+       struct xpc_partition *part = &xpc_partitions[ch->partid];
 
 
        while (needed-- > 0) {
+
+               /*
+                * The following is done on behalf of the newly created
+                * kthread. That kthread is responsible for doing the
+                * counterpart to the following before it exits.
+                */
+               (void) xpc_part_ref(part);
+               xpc_msgqueue_ref(ch);
+               if (atomic_inc_return(&ch->kthreads_assigned) == 1 &&
+                   atomic_inc_return(&part->nchannels_engaged) == 1) {
+                       xpc_mark_partition_engaged(part);
+               }
+
                pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0);
                if (pid < 0) {
                        /* the fork failed */
+                       if (atomic_dec_return(&ch->kthreads_assigned) == 0 &&
+                           atomic_dec_return(&part->nchannels_engaged) == 0) {
+                               xpc_mark_partition_disengaged(part);
+                               xpc_IPI_send_disengage(part);
+                       }
+                       xpc_msgqueue_deref(ch);
+                       xpc_part_deref(part);
 
                        if (atomic_read(&ch->kthreads_assigned) <
                                                ch->kthreads_idle_limit) {
@@ -765,14 +875,6 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
                        break;
                }
 
-               /*
-                * The following is done on behalf of the newly created
-                * kthread. That kthread is responsible for doing the
-                * counterpart to the following before it exits.
-                */
-               (void) xpc_part_ref(&xpc_partitions[ch->partid]);
-               xpc_msgqueue_ref(ch);
-               atomic_inc(&ch->kthreads_assigned);
                ch->kthreads_created++; // >>> temporary debug only!!!
        }
 }
@@ -781,93 +883,167 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
 void
 xpc_disconnect_wait(int ch_number)
 {
+       unsigned long irq_flags;
        partid_t partid;
        struct xpc_partition *part;
        struct xpc_channel *ch;
+       int wakeup_channel_mgr;
 
 
        /* now wait for all callouts to the caller's function to cease */
        for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
                part = &xpc_partitions[partid];
 
-               if (xpc_part_ref(part)) {
-                       ch = &part->channels[ch_number];
+               if (!xpc_part_ref(part)) {
+                       continue;
+               }
 
-// >>> how do we keep from falling into the window between our check and going
-// >>> down and coming back up where sema is re-inited?
-                       if (ch->flags & XPC_C_SETUP) {
-                               (void) down(&ch->teardown_sema);
-                       }
+               ch = &part->channels[ch_number];
 
+               if (!(ch->flags & XPC_C_WDISCONNECT)) {
                        xpc_part_deref(part);
+                       continue;
+               }
+
+               wait_for_completion(&ch->wdisconnect_wait);
+
+               spin_lock_irqsave(&ch->lock, irq_flags);
+               DBUG_ON(!(ch->flags & XPC_C_DISCONNECTED));
+               wakeup_channel_mgr = 0;
+
+               if (ch->delayed_IPI_flags) {
+                       if (part->act_state != XPC_P_DEACTIVATING) {
+                               spin_lock(&part->IPI_lock);
+                               XPC_SET_IPI_FLAGS(part->local_IPI_amo,
+                                       ch->number, ch->delayed_IPI_flags);
+                               spin_unlock(&part->IPI_lock);
+                               wakeup_channel_mgr = 1;
+                       }
+                       ch->delayed_IPI_flags = 0;
+               }
+
+               ch->flags &= ~XPC_C_WDISCONNECT;
+               spin_unlock_irqrestore(&ch->lock, irq_flags);
+
+               if (wakeup_channel_mgr) {
+                       xpc_wakeup_channel_mgr(part);
                }
+
+               xpc_part_deref(part);
        }
 }
 
 
 static void
-xpc_do_exit(void)
+xpc_do_exit(enum xpc_retval reason)
 {
        partid_t partid;
-       int active_part_count;
+       int active_part_count, printed_waiting_msg = 0;
        struct xpc_partition *part;
+       unsigned long printmsg_time, disengage_request_timeout = 0;
 
 
-       /* now it's time to eliminate our heartbeat */
-       del_timer_sync(&xpc_hb_timer);
-       xpc_vars->heartbeating_to_mask = 0;
-
-       /* indicate to others that our reserved page is uninitialized */
-       xpc_rsvd_page->vars_pa = 0;
-
-       /*
-        * Ignore all incoming interrupts. Without interupts the heartbeat
-        * checker won't activate any new partitions that may come up.
-        */
-       free_irq(SGI_XPC_ACTIVATE, NULL);
+       /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
+       DBUG_ON(xpc_exiting == 1);
 
        /*
-        * Cause the heartbeat checker and the discovery threads to exit.
-        * We don't want them attempting to activate new partitions as we
-        * try to deactivate the existing ones.
+        * Let the heartbeat checker thread and the discovery thread
+        * (if one is running) know that they should exit. Also wake up
+        * the heartbeat checker thread in case it's sleeping.
         */
        xpc_exiting = 1;
        wake_up_interruptible(&xpc_act_IRQ_wq);
 
-       /* wait for the heartbeat checker thread to mark itself inactive */
-       down(&xpc_hb_checker_exited);
+       /* ignore all incoming interrupts */
+       free_irq(SGI_XPC_ACTIVATE, NULL);
 
-       /* wait for the discovery thread to mark itself inactive */
-       down(&xpc_discovery_exited);
+       /* wait for the discovery thread to exit */
+       wait_for_completion(&xpc_discovery_exited);
 
+       /* wait for the heartbeat checker thread to exit */
+       wait_for_completion(&xpc_hb_checker_exited);
 
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule_timeout(0.3 * HZ);
-       set_current_state(TASK_RUNNING);
+
+       /* sleep for a 1/3 of a second or so */
+       (void) msleep_interruptible(300);
 
 
        /* wait for all partitions to become inactive */
 
+       printmsg_time = jiffies + (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+       xpc_disengage_request_timedout = 0;
+
        do {
                active_part_count = 0;
 
                for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
                        part = &xpc_partitions[partid];
-                       if (part->act_state != XPC_P_INACTIVE) {
-                               active_part_count++;
 
-                               XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
+                       if (xpc_partition_disengaged(part) &&
+                                       part->act_state == XPC_P_INACTIVE) {
+                               continue;
+                       }
+
+                       active_part_count++;
+
+                       XPC_DEACTIVATE_PARTITION(part, reason);
+
+                       if (part->disengage_request_timeout >
+                                               disengage_request_timeout) {
+                               disengage_request_timeout =
+                                               part->disengage_request_timeout;
                        }
                }
 
-               if (active_part_count) {
-                       set_current_state(TASK_INTERRUPTIBLE);
-                       schedule_timeout(0.3 * HZ);
-                       set_current_state(TASK_RUNNING);
+               if (xpc_partition_engaged(-1UL)) {
+                       if (time_after(jiffies, printmsg_time)) {
+                               dev_info(xpc_part, "waiting for remote "
+                                       "partitions to disengage, timeout in "
+                                       "%ld seconds\n",
+                                       (disengage_request_timeout - jiffies)
+                                                                       / HZ);
+                               printmsg_time = jiffies +
+                                       (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
+                               printed_waiting_msg = 1;
+                       }
+
+               } else if (active_part_count > 0) {
+                       if (printed_waiting_msg) {
+                               dev_info(xpc_part, "waiting for local partition"
+                                       " to disengage\n");
+                               printed_waiting_msg = 0;
+                       }
+
+               } else {
+                       if (!xpc_disengage_request_timedout) {
+                               dev_info(xpc_part, "all partitions have "
+                                       "disengaged\n");
+                       }
+                       break;
                }
 
-       } while (active_part_count > 0);
+               /* sleep for a 1/3 of a second or so */
+               (void) msleep_interruptible(300);
+
+       } while (1);
+
+       DBUG_ON(xpc_partition_engaged(-1UL));
+
+
+       /* indicate to others that our reserved page is uninitialized */
+       xpc_rsvd_page->vars_pa = 0;
+
+       /* now it's time to eliminate our heartbeat */
+       del_timer_sync(&xpc_hb_timer);
+       DBUG_ON(xpc_vars->heartbeating_to_mask != 0);
 
+       if (reason == xpcUnloading) {
+               /* take ourselves off of the reboot_notifier_list */
+               (void) unregister_reboot_notifier(&xpc_reboot_notifier);
+
+               /* take ourselves off of the die_notifier list */
+               (void) unregister_die_notifier(&xpc_die_notifier);
+       }
 
        /* close down protections for IPI operations */
        xpc_restrict_IPI_ops();
@@ -882,6 +1058,156 @@ xpc_do_exit(void)
 }
 
 
+/*
+ * This function is called when the system is being rebooted.
+ */
+static int
+xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
+{
+       enum xpc_retval reason;
+
+
+       switch (event) {
+       case SYS_RESTART:
+               reason = xpcSystemReboot;
+               break;
+       case SYS_HALT:
+               reason = xpcSystemHalt;
+               break;
+       case SYS_POWER_OFF:
+               reason = xpcSystemPoweroff;
+               break;
+       default:
+               reason = xpcSystemGoingDown;
+       }
+
+       xpc_do_exit(reason);
+       return NOTIFY_DONE;
+}
+
+
+/*
+ * Notify other partitions to disengage from all references to our memory.
+ */
+static void
+xpc_die_disengage(void)
+{
+       struct xpc_partition *part;
+       partid_t partid;
+       unsigned long engaged;
+       long time, printmsg_time, disengage_request_timeout;
+
+
+       /* keep xpc_hb_checker thread from doing anything (just in case) */
+       xpc_exiting = 1;
+
+       xpc_vars->heartbeating_to_mask = 0;  /* indicate we're deactivated */
+
+       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+               part = &xpc_partitions[partid];
+
+               if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
+                                                       remote_vars_version)) {
+
+                       /* just in case it was left set by an earlier XPC */
+                       xpc_clear_partition_engaged(1UL << partid);
+                       continue;
+               }
+
+               if (xpc_partition_engaged(1UL << partid) ||
+                                       part->act_state != XPC_P_INACTIVE) {
+                       xpc_request_partition_disengage(part);
+                       xpc_mark_partition_disengaged(part);
+                       xpc_IPI_send_disengage(part);
+               }
+       }
+
+       time = rtc_time();
+       printmsg_time = time +
+               (XPC_DISENGAGE_PRINTMSG_INTERVAL * sn_rtc_cycles_per_second);
+       disengage_request_timeout = time +
+               (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
+
+       /* wait for all other partitions to disengage from us */
+
+       while (1) {
+               engaged = xpc_partition_engaged(-1UL);
+               if (!engaged) {
+                       dev_info(xpc_part, "all partitions have disengaged\n");
+                       break;
+               }
+
+               time = rtc_time();
+               if (time >= disengage_request_timeout) {
+                       for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
+                               if (engaged & (1UL << partid)) {
+                                       dev_info(xpc_part, "disengage from "
+                                               "remote partition %d timed "
+                                               "out\n", partid);
+                               }
+                       }
+                       break;
+               }
+
+               if (time >= printmsg_time) {
+                       dev_info(xpc_part, "waiting for remote partitions to "
+                               "disengage, timeout in %ld seconds\n",
+                               (disengage_request_timeout - time) /
+                                               sn_rtc_cycles_per_second);
+                       printmsg_time = time +
+                                       (XPC_DISENGAGE_PRINTMSG_INTERVAL *
+                                               sn_rtc_cycles_per_second);
+               }
+       }
+}
+
+
+/*
+ * This function is called when the system is being restarted or halted due
+ * to some sort of system failure. If this is the case we need to notify the
+ * other partitions to disengage from all references to our memory.
+ * This function can also be called when our heartbeater could be offlined
+ * for a time. In this case we need to notify other partitions to not worry
+ * about the lack of a heartbeat.
+ */
+static int
+xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
+{
+       switch (event) {
+       case DIE_MACHINE_RESTART:
+       case DIE_MACHINE_HALT:
+               xpc_die_disengage();
+               break;
+
+       case DIE_KDEBUG_ENTER:
+               /* Should lack of heartbeat be ignored by other partitions? */
+               if (!xpc_kdebug_ignore) {
+                       break;
+               }
+               /* fall through */
+       case DIE_MCA_MONARCH_ENTER:
+       case DIE_INIT_MONARCH_ENTER:
+               xpc_vars->heartbeat++;
+               xpc_vars->heartbeat_offline = 1;
+               break;
+
+       case DIE_KDEBUG_LEAVE:
+               /* Is lack of heartbeat being ignored by other partitions? */
+               if (!xpc_kdebug_ignore) {
+                       break;
+               }
+               /* fall through */
+       case DIE_MCA_MONARCH_LEAVE:
+       case DIE_INIT_MONARCH_LEAVE:
+               xpc_vars->heartbeat++;
+               xpc_vars->heartbeat_offline = 0;
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+
 int __init
 xpc_init(void)
 {
@@ -891,13 +1217,17 @@ xpc_init(void)
        pid_t pid;
 
 
+       if (!ia64_platform_is("sn2")) {
+               return -ENODEV;
+       }
+
        /*
         * xpc_remote_copy_buffer is used as a temporary buffer for bte_copy'ng
-        * both a partition's reserved page and its XPC variables. Its size was
-        * based on the size of a reserved page. So we need to ensure that the
-        * XPC variables will fit as well.
+        * various portions of a partition's reserved page. Its size is based
+        * on the size of the reserved page header and part_nasids mask. So we
+        * need to ensure that the other items will fit as well.
         */
-       if (XPC_VARS_ALIGNED_SIZE > XPC_RSVD_PAGE_ALIGNED_SIZE) {
+       if (XPC_RP_VARS_SIZE > XPC_RP_HEADER_SIZE + XP_NASID_MASK_BYTES) {
                dev_err(xpc_part, "xpc_remote_copy_buffer is not big enough\n");
                return -EPERM;
        }
@@ -926,6 +1256,12 @@ xpc_init(void)
                spin_lock_init(&part->act_lock);
                part->act_state = XPC_P_INACTIVE;
                XPC_SET_REASON(part, 0, 0);
+
+               init_timer(&part->disengage_request_timer);
+               part->disengage_request_timer.function =
+                               xpc_timeout_partition_disengage_request;
+               part->disengage_request_timer.data = (unsigned long) part;
+
                part->setup_state = XPC_P_UNSET;
                init_waitqueue_head(&part->teardown_wq);
                atomic_set(&part->references, 0);
@@ -982,6 +1318,19 @@ xpc_init(void)
        }
 
 
+       /* add ourselves to the reboot_notifier_list */
+       ret = register_reboot_notifier(&xpc_reboot_notifier);
+       if (ret != 0) {
+               dev_warn(xpc_part, "can't register reboot notifier\n");
+       }
+
+       /* add ourselves to the die_notifier list (i.e., ia64die_chain) */
+       ret = register_die_notifier(&xpc_die_notifier);
+       if (ret != 0) {
+               dev_warn(xpc_part, "can't register die notifier\n");
+       }
+
+
        /*
         * Set the beating to other partitions into motion.  This is
         * the last requirement for other partitions' discovery to
@@ -1003,6 +1352,12 @@ xpc_init(void)
                /* indicate to others that our reserved page is uninitialized */
                xpc_rsvd_page->vars_pa = 0;
 
+               /* take ourselves off of the reboot_notifier_list */
+               (void) unregister_reboot_notifier(&xpc_reboot_notifier);
+
+               /* take ourselves off of the die_notifier list */
+               (void) unregister_die_notifier(&xpc_die_notifier);
+
                del_timer_sync(&xpc_hb_timer);
                free_irq(SGI_XPC_ACTIVATE, NULL);
                xpc_restrict_IPI_ops();
@@ -1024,9 +1379,9 @@ xpc_init(void)
                dev_err(xpc_part, "failed while forking discovery thread\n");
 
                /* mark this new thread as a non-starter */
-               up(&xpc_discovery_exited);
+               complete(&xpc_discovery_exited);
 
-               xpc_do_exit();
+               xpc_do_exit(xpcUnloading);
                return -EBUSY;
        }
 
@@ -1045,7 +1400,7 @@ module_init(xpc_init);
 void __exit
 xpc_exit(void)
 {
-       xpc_do_exit();
+       xpc_do_exit(xpcUnloading);
 }
 module_exit(xpc_exit);
 
@@ -1062,3 +1417,11 @@ module_param(xpc_hb_check_interval, int, 0);
 MODULE_PARM_DESC(xpc_hb_check_interval, "Number of seconds between "
                "heartbeat checks.");
 
+module_param(xpc_disengage_request_timelimit, int, 0);
+MODULE_PARM_DESC(xpc_disengage_request_timelimit, "Number of seconds to wait "
+               "for disengage request to complete.");
+
+module_param(xpc_kdebug_ignore, int, 0);
+MODULE_PARM_DESC(xpc_kdebug_ignore, "Should lack of heartbeat be ignored by "
+               "other partitions when dropping into kdebug.");
+