This commit was manufactured by cvs2svn to create branch 'vserver'.
[linux-2.6.git] / arch / ia64 / sn / kernel / xpc_partition.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2005 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9
10 /*
11  * Cross Partition Communication (XPC) partition support.
12  *
13  *      This is the part of XPC that detects the presence/absence of
14  *      other partitions. It provides a heartbeat and monitors the
15  *      heartbeats of other partitions.
16  *
17  */
18
19
20 #include <linux/kernel.h>
21 #include <linux/sysctl.h>
22 #include <linux/cache.h>
23 #include <linux/mmzone.h>
24 #include <linux/nodemask.h>
25 #include <asm/sn/bte.h>
26 #include <asm/sn/intr.h>
27 #include <asm/sn/sn_sal.h>
28 #include <asm/sn/nodepda.h>
29 #include <asm/sn/addrs.h>
30 #include "xpc.h"
31
32
33 /* XPC is exiting flag */
34 int xpc_exiting;
35
36
37 /* SH_IPI_ACCESS shub register value on startup */
38 static u64 xpc_sh1_IPI_access;
39 static u64 xpc_sh2_IPI_access0;
40 static u64 xpc_sh2_IPI_access1;
41 static u64 xpc_sh2_IPI_access2;
42 static u64 xpc_sh2_IPI_access3;
43
44
45 /* original protection values for each node */
46 u64 xpc_prot_vec[MAX_COMPACT_NODES];
47
48
49 /* this partition's reserved page */
50 struct xpc_rsvd_page *xpc_rsvd_page;
51
52 /* this partition's XPC variables (within the reserved page) */
53 struct xpc_vars *xpc_vars;
54 struct xpc_vars_part *xpc_vars_part;
55
56
57 /*
58  * For performance reasons, each entry of xpc_partitions[] is cacheline
59  * aligned. And xpc_partitions[] is padded with an additional entry at the
60  * end so that the last legitimate entry doesn't share its cacheline with
61  * another variable.
62  */
63 struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
64
65
66 /*
67  * Generic buffer used to store a local copy of the remote partitions
68  * reserved page or XPC variables.
69  *
70  * xpc_discovery runs only once and is a seperate thread that is
71  * very likely going to be processing in parallel with receiving
72  * interrupts.
73  */
74 char ____cacheline_aligned
75                 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
76
77
78 /* systune related variables */
79 int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
80 int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
81
82
83 /*
84  * Given a nasid, get the physical address of the  partition's reserved page
85  * for that nasid. This function returns 0 on any error.
86  */
87 static u64
88 xpc_get_rsvd_page_pa(int nasid, u64 buf, u64 buf_size)
89 {
90         bte_result_t bte_res;
91         s64 status;
92         u64 cookie = 0;
93         u64 rp_pa = nasid;      /* seed with nasid */
94         u64 len = 0;
95
96
97         while (1) {
98
99                 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
100                                                                 &len);
101
102                 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
103                         "0x%016lx, address=0x%016lx, len=0x%016lx\n",
104                         status, cookie, rp_pa, len);
105
106                 if (status != SALRET_MORE_PASSES) {
107                         break;
108                 }
109
110                 if (len > buf_size) {
111                         dev_err(xpc_part, "len (=0x%016lx) > buf_size\n", len);
112                         status = SALRET_ERROR;
113                         break;
114                 }
115
116                 bte_res = xp_bte_copy(rp_pa, ia64_tpa(buf), buf_size,
117                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
118                 if (bte_res != BTE_SUCCESS) {
119                         dev_dbg(xpc_part, "xp_bte_copy failed %i\n", bte_res);
120                         status = SALRET_ERROR;
121                         break;
122                 }
123         }
124
125         if (status != SALRET_OK) {
126                 rp_pa = 0;
127         }
128         dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
129         return rp_pa;
130 }
131
132
133 /*
134  * Fill the partition reserved page with the information needed by
135  * other partitions to discover we are alive and establish initial
136  * communications.
137  */
138 struct xpc_rsvd_page *
139 xpc_rsvd_page_init(void)
140 {
141         struct xpc_rsvd_page *rp;
142         AMO_t *amos_page;
143         u64 rp_pa, next_cl, nasid_array = 0;
144         int i, ret;
145
146
147         /* get the local reserved page's address */
148
149         rp_pa = xpc_get_rsvd_page_pa(cnodeid_to_nasid(0),
150                                         (u64) xpc_remote_copy_buffer,
151                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
152         if (rp_pa == 0) {
153                 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
154                 return NULL;
155         }
156         rp = (struct xpc_rsvd_page *) __va(rp_pa);
157
158         if (rp->partid != sn_partition_id) {
159                 dev_err(xpc_part, "the reserved page's partid of %d should be "
160                         "%d\n", rp->partid, sn_partition_id);
161                 return NULL;
162         }
163
164         rp->version = XPC_RP_VERSION;
165
166         /*
167          * Place the XPC variables on the cache line following the
168          * reserved page structure.
169          */
170         next_cl = (u64) rp + XPC_RSVD_PAGE_ALIGNED_SIZE;
171         xpc_vars = (struct xpc_vars *) next_cl;
172
173         /*
174          * Before clearing xpc_vars, see if a page of AMOs had been previously
175          * allocated. If not we'll need to allocate one and set permissions
176          * so that cross-partition AMOs are allowed.
177          *
178          * The allocated AMO page needs MCA reporting to remain disabled after
179          * XPC has unloaded.  To make this work, we keep a copy of the pointer
180          * to this page (i.e., amos_page) in the struct xpc_vars structure,
181          * which is pointed to by the reserved page, and re-use that saved copy
182          * on subsequent loads of XPC. This AMO page is never freed, and its
183          * memory protections are never restricted.
184          */
185         if ((amos_page = xpc_vars->amos_page) == NULL) {
186                 amos_page = (AMO_t *) mspec_kalloc_page(0);
187                 if (amos_page == NULL) {
188                         dev_err(xpc_part, "can't allocate page of AMOs\n");
189                         return NULL;
190                 }
191
192                 /*
193                  * Open up AMO-R/W to cpu.  This is done for Shub 1.1 systems
194                  * when xpc_allow_IPI_ops() is called via xpc_hb_init().
195                  */
196                 if (!enable_shub_wars_1_1()) {
197                         ret = sn_change_memprotect(ia64_tpa((u64) amos_page),
198                                         PAGE_SIZE, SN_MEMPROT_ACCESS_CLASS_1,
199                                         &nasid_array);
200                         if (ret != 0) {
201                                 dev_err(xpc_part, "can't change memory "
202                                         "protections\n");
203                                 mspec_kfree_page((unsigned long) amos_page);
204                                 return NULL;
205                         }
206                 }
207         } else if (!IS_AMO_ADDRESS((u64) amos_page)) {
208                 /*
209                  * EFI's XPBOOT can also set amos_page in the reserved page,
210                  * but it happens to leave it as an uncached physical address
211                  * and we need it to be an uncached virtual, so we'll have to
212                  * convert it.
213                  */
214                 if (!IS_AMO_PHYS_ADDRESS((u64) amos_page)) {
215                         dev_err(xpc_part, "previously used amos_page address "
216                                 "is bad = 0x%p\n", (void *) amos_page);
217                         return NULL;
218                 }
219                 amos_page = (AMO_t *) TO_AMO((u64) amos_page);
220         }
221
222         memset(xpc_vars, 0, sizeof(struct xpc_vars));
223
224         /*
225          * Place the XPC per partition specific variables on the cache line
226          * following the XPC variables structure.
227          */
228         next_cl += XPC_VARS_ALIGNED_SIZE;
229         memset((u64 *) next_cl, 0, sizeof(struct xpc_vars_part) *
230                                                         XP_MAX_PARTITIONS);
231         xpc_vars_part = (struct xpc_vars_part *) next_cl;
232         xpc_vars->vars_part_pa = __pa(next_cl);
233
234         xpc_vars->version = XPC_V_VERSION;
235         xpc_vars->act_nasid = cpuid_to_nasid(0);
236         xpc_vars->act_phys_cpuid = cpu_physical_id(0);
237         xpc_vars->amos_page = amos_page;  /* save for next load of XPC */
238
239
240         /*
241          * Initialize the activation related AMO variables.
242          */
243         xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
244         for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
245                 xpc_IPI_init(i + XP_MAX_PARTITIONS);
246         }
247         /* export AMO page's physical address to other partitions */
248         xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
249
250         /*
251          * This signifies to the remote partition that our reserved
252          * page is initialized.
253          */
254         (volatile u64) rp->vars_pa = __pa(xpc_vars);
255
256         return rp;
257 }
258
259
260 /*
261  * Change protections to allow IPI operations (and AMO operations on
262  * Shub 1.1 systems).
263  */
264 void
265 xpc_allow_IPI_ops(void)
266 {
267         int node;
268         int nasid;
269
270
271         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
272
273         if (is_shub2()) {
274                 xpc_sh2_IPI_access0 =
275                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
276                 xpc_sh2_IPI_access1 =
277                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
278                 xpc_sh2_IPI_access2 =
279                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
280                 xpc_sh2_IPI_access3 =
281                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
282
283                 for_each_online_node(node) {
284                         nasid = cnodeid_to_nasid(node);
285                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
286                                                                 -1UL);
287                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
288                                                                 -1UL);
289                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
290                                                                 -1UL);
291                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
292                                                                 -1UL);
293                 }
294
295         } else {
296                 xpc_sh1_IPI_access =
297                         (u64) HUB_L((u64 *) LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
298
299                 for_each_online_node(node) {
300                         nasid = cnodeid_to_nasid(node);
301                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
302                                                                 -1UL);
303
304                         /*
305                          * Since the BIST collides with memory operations on
306                          * SHUB 1.1 sn_change_memprotect() cannot be used.
307                          */
308                         if (enable_shub_wars_1_1()) {
309                                 /* open up everything */
310                                 xpc_prot_vec[node] = (u64) HUB_L((u64 *)
311                                                 GLOBAL_MMR_ADDR(nasid,
312                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0));
313                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
314                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
315                                                                 -1UL);
316                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
317                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
318                                                                 -1UL);
319                         }
320                 }
321         }
322 }
323
324
325 /*
326  * Restrict protections to disallow IPI operations (and AMO operations on
327  * Shub 1.1 systems).
328  */
329 void
330 xpc_restrict_IPI_ops(void)
331 {
332         int node;
333         int nasid;
334
335
336         // >>> Change SH_IPI_ACCESS code to use SAL call once it is available.
337
338         if (is_shub2()) {
339
340                 for_each_online_node(node) {
341                         nasid = cnodeid_to_nasid(node);
342                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
343                                                         xpc_sh2_IPI_access0);
344                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
345                                                         xpc_sh2_IPI_access1);
346                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
347                                                         xpc_sh2_IPI_access2);
348                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
349                                                         xpc_sh2_IPI_access3);
350                 }
351
352         } else {
353
354                 for_each_online_node(node) {
355                         nasid = cnodeid_to_nasid(node);
356                         HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
357                                                         xpc_sh1_IPI_access);
358
359                         if (enable_shub_wars_1_1()) {
360                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
361                                                 SH1_MD_DQLP_MMR_DIR_PRIVEC0),
362                                                         xpc_prot_vec[node]);
363                                 HUB_S((u64 *) GLOBAL_MMR_ADDR(nasid,
364                                                 SH1_MD_DQRP_MMR_DIR_PRIVEC0),
365                                                         xpc_prot_vec[node]);
366                         }
367                 }
368         }
369 }
370
371
372 /*
373  * At periodic intervals, scan through all active partitions and ensure
374  * their heartbeat is still active.  If not, the partition is deactivated.
375  */
376 void
377 xpc_check_remote_hb(void)
378 {
379         struct xpc_vars *remote_vars;
380         struct xpc_partition *part;
381         partid_t partid;
382         bte_result_t bres;
383
384
385         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
386
387         for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
388                 if (partid == sn_partition_id) {
389                         continue;
390                 }
391
392                 part = &xpc_partitions[partid];
393
394                 if (part->act_state == XPC_P_INACTIVE ||
395                                 part->act_state == XPC_P_DEACTIVATING) {
396                         continue;
397                 }
398
399                 /* pull the remote_hb cache line */
400                 bres = xp_bte_copy(part->remote_vars_pa,
401                                         ia64_tpa((u64) remote_vars),
402                                         XPC_VARS_ALIGNED_SIZE,
403                                         (BTE_NOTIFY | BTE_WACQUIRE), NULL);
404                 if (bres != BTE_SUCCESS) {
405                         XPC_DEACTIVATE_PARTITION(part,
406                                                 xpc_map_bte_errors(bres));
407                         continue;
408                 }
409
410                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
411                         " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid,
412                         remote_vars->heartbeat, part->last_heartbeat,
413                         remote_vars->kdb_status,
414                         remote_vars->heartbeating_to_mask);
415
416                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
417                         (remote_vars->kdb_status == 0)) ||
418                              !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
419
420                         XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
421                         continue;
422                 }
423
424                 part->last_heartbeat = remote_vars->heartbeat;
425         }
426 }
427
428
429 /*
430  * Get a copy of the remote partition's rsvd page.
431  *
432  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
433  * assumed to be of size XPC_RSVD_PAGE_ALIGNED_SIZE.
434  */
435 static enum xpc_retval
436 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
437                 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa)
438 {
439         int bres, i;
440
441
442         /* get the reserved page's physical address */
443
444         *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
445                                                 XPC_RSVD_PAGE_ALIGNED_SIZE);
446         if (*remote_rsvd_page_pa == 0) {
447                 return xpcNoRsvdPageAddr;
448         }
449
450
451         /* pull over the reserved page structure */
452
453         bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp),
454                                 XPC_RSVD_PAGE_ALIGNED_SIZE,
455                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
456         if (bres != BTE_SUCCESS) {
457                 return xpc_map_bte_errors(bres);
458         }
459
460
461         if (discovered_nasids != NULL) {
462                 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
463                         discovered_nasids[i] |= remote_rp->part_nasids[i];
464                 }
465         }
466
467
468         /* check that the partid is for another partition */
469
470         if (remote_rp->partid < 1 ||
471                                 remote_rp->partid > (XP_MAX_PARTITIONS - 1)) {
472                 return xpcInvalidPartid;
473         }
474
475         if (remote_rp->partid == sn_partition_id) {
476                 return xpcLocalPartid;
477         }
478
479
480         if (XPC_VERSION_MAJOR(remote_rp->version) !=
481                                         XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
482                 return xpcBadVersion;
483         }
484
485         return xpcSuccess;
486 }
487
488
489 /*
490  * Get a copy of the remote partition's XPC variables.
491  *
492  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
493  * assumed to be of size XPC_VARS_ALIGNED_SIZE.
494  */
495 static enum xpc_retval
496 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
497 {
498         int bres;
499
500
501         if (remote_vars_pa == 0) {
502                 return xpcVarsNotSet;
503         }
504
505
506         /* pull over the cross partition variables */
507
508         bres = xp_bte_copy(remote_vars_pa, ia64_tpa((u64) remote_vars),
509                                 XPC_VARS_ALIGNED_SIZE,
510                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
511         if (bres != BTE_SUCCESS) {
512                 return xpc_map_bte_errors(bres);
513         }
514
515         if (XPC_VERSION_MAJOR(remote_vars->version) !=
516                                         XPC_VERSION_MAJOR(XPC_V_VERSION)) {
517                 return xpcBadVersion;
518         }
519
520         return xpcSuccess;
521 }
522
523
524 /*
525  * Prior code has determine the nasid which generated an IPI.  Inspect
526  * that nasid to determine if its partition needs to be activated or
527  * deactivated.
528  *
529  * A partition is consider "awaiting activation" if our partition
530  * flags indicate it is not active and it has a heartbeat.  A
531  * partition is considered "awaiting deactivation" if our partition
532  * flags indicate it is active but it has no heartbeat or it is not
533  * sending its heartbeat to us.
534  *
535  * To determine the heartbeat, the remote nasid must have a properly
536  * initialized reserved page.
537  */
538 static void
539 xpc_identify_act_IRQ_req(int nasid)
540 {
541         struct xpc_rsvd_page *remote_rp;
542         struct xpc_vars *remote_vars;
543         u64 remote_rsvd_page_pa;
544         u64 remote_vars_pa;
545         partid_t partid;
546         struct xpc_partition *part;
547         enum xpc_retval ret;
548
549
550         /* pull over the reserved page structure */
551
552         remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
553
554         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa);
555         if (ret != xpcSuccess) {
556                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
557                         "which sent interrupt, reason=%d\n", nasid, ret);
558                 return;
559         }
560
561         remote_vars_pa = remote_rp->vars_pa;
562         partid = remote_rp->partid;
563         part = &xpc_partitions[partid];
564
565
566         /* pull over the cross partition variables */
567
568         remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
569
570         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
571         if (ret != xpcSuccess) {
572
573                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
574                         "which sent interrupt, reason=%d\n", nasid, ret);
575
576                 XPC_DEACTIVATE_PARTITION(part, ret);
577                 return;
578         }
579
580
581         part->act_IRQ_rcvd++;
582
583         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
584                 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
585                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
586
587
588         if (part->act_state == XPC_P_INACTIVE) {
589
590                 part->remote_rp_pa = remote_rsvd_page_pa;
591                 dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n",
592                         part->remote_rp_pa);
593
594                 part->remote_vars_pa = remote_vars_pa;
595                 dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
596                         part->remote_vars_pa);
597
598                 part->last_heartbeat = remote_vars->heartbeat;
599                 dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
600                         part->last_heartbeat);
601
602                 part->remote_vars_part_pa = remote_vars->vars_part_pa;
603                 dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
604                         part->remote_vars_part_pa);
605
606                 part->remote_act_nasid = remote_vars->act_nasid;
607                 dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
608                         part->remote_act_nasid);
609
610                 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
611                 dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
612                         part->remote_act_phys_cpuid);
613
614                 part->remote_amos_page_pa = remote_vars->amos_page_pa;
615                 dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
616                         part->remote_amos_page_pa);
617
618                 xpc_activate_partition(part);
619
620         } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa ||
621                         !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) {
622
623                 part->reactivate_nasid = nasid;
624                 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
625         }
626 }
627
628
629 /*
630  * Loop through the activation AMO variables and process any bits
631  * which are set.  Each bit indicates a nasid sending a partition
632  * activation or deactivation request.
633  *
634  * Return #of IRQs detected.
635  */
636 int
637 xpc_identify_act_IRQ_sender(void)
638 {
639         int word, bit;
640         u64 nasid_mask;
641         u64 nasid;                      /* remote nasid */
642         int n_IRQs_detected = 0;
643         AMO_t *act_amos;
644         struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
645
646
647         act_amos = xpc_vars->act_amos;
648
649
650         /* scan through act AMO variable looking for non-zero entries */
651         for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
652
653                 nasid_mask = xpc_IPI_receive(&act_amos[word]);
654                 if (nasid_mask == 0) {
655                         /* no IRQs from nasids in this variable */
656                         continue;
657                 }
658
659                 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
660                         nasid_mask);
661
662
663                 /*
664                  * If this nasid has been added to the machine since
665                  * our partition was reset, this will retain the
666                  * remote nasid in our reserved pages machine mask.
667                  * This is used in the event of module reload.
668                  */
669                 rp->mach_nasids[word] |= nasid_mask;
670
671
672                 /* locate the nasid(s) which sent interrupts */
673
674                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
675                         if (nasid_mask & (1UL << bit)) {
676                                 n_IRQs_detected++;
677                                 nasid = XPC_NASID_FROM_W_B(word, bit);
678                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
679                                         nasid);
680                                 xpc_identify_act_IRQ_req(nasid);
681                         }
682                 }
683         }
684         return n_IRQs_detected;
685 }
686
687
688 /*
689  * Mark specified partition as active.
690  */
691 enum xpc_retval
692 xpc_mark_partition_active(struct xpc_partition *part)
693 {
694         unsigned long irq_flags;
695         enum xpc_retval ret;
696
697
698         dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
699
700         spin_lock_irqsave(&part->act_lock, irq_flags);
701         if (part->act_state == XPC_P_ACTIVATING) {
702                 part->act_state = XPC_P_ACTIVE;
703                 ret = xpcSuccess;
704         } else {
705                 DBUG_ON(part->reason == xpcSuccess);
706                 ret = part->reason;
707         }
708         spin_unlock_irqrestore(&part->act_lock, irq_flags);
709
710         return ret;
711 }
712
713
714 /*
715  * Notify XPC that the partition is down.
716  */
717 void
718 xpc_deactivate_partition(const int line, struct xpc_partition *part,
719                                 enum xpc_retval reason)
720 {
721         unsigned long irq_flags;
722         partid_t partid = XPC_PARTID(part);
723
724
725         spin_lock_irqsave(&part->act_lock, irq_flags);
726
727         if (part->act_state == XPC_P_INACTIVE) {
728                 XPC_SET_REASON(part, reason, line);
729                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
730                 if (reason == xpcReactivating) {
731                         /* we interrupt ourselves to reactivate partition */
732                         xpc_IPI_send_reactivate(part);
733                 }
734                 return;
735         }
736         if (part->act_state == XPC_P_DEACTIVATING) {
737                 if ((part->reason == xpcUnloading && reason != xpcUnloading) ||
738                                         reason == xpcReactivating) {
739                         XPC_SET_REASON(part, reason, line);
740                 }
741                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
742                 return;
743         }
744
745         part->act_state = XPC_P_DEACTIVATING;
746         XPC_SET_REASON(part, reason, line);
747
748         spin_unlock_irqrestore(&part->act_lock, irq_flags);
749
750         XPC_DISALLOW_HB(partid, xpc_vars);
751
752         dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
753                 reason);
754
755         xpc_partition_down(part, reason);
756 }
757
758
759 /*
760  * Mark specified partition as active.
761  */
762 void
763 xpc_mark_partition_inactive(struct xpc_partition *part)
764 {
765         unsigned long irq_flags;
766
767
768         dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
769                 XPC_PARTID(part));
770
771         spin_lock_irqsave(&part->act_lock, irq_flags);
772         part->act_state = XPC_P_INACTIVE;
773         spin_unlock_irqrestore(&part->act_lock, irq_flags);
774         part->remote_rp_pa = 0;
775 }
776
777
778 /*
779  * SAL has provided a partition and machine mask.  The partition mask
780  * contains a bit for each even nasid in our partition.  The machine
781  * mask contains a bit for each even nasid in the entire machine.
782  *
783  * Using those two bit arrays, we can determine which nasids are
784  * known in the machine.  Each should also have a reserved page
785  * initialized if they are available for partitioning.
786  */
787 void
788 xpc_discovery(void)
789 {
790         void *remote_rp_base;
791         struct xpc_rsvd_page *remote_rp;
792         struct xpc_vars *remote_vars;
793         u64 remote_rsvd_page_pa;
794         u64 remote_vars_pa;
795         int region;
796         int max_regions;
797         int nasid;
798         struct xpc_rsvd_page *rp;
799         partid_t partid;
800         struct xpc_partition *part;
801         u64 *discovered_nasids;
802         enum xpc_retval ret;
803
804
805         remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RSVD_PAGE_ALIGNED_SIZE,
806                                                 GFP_KERNEL, &remote_rp_base);
807         if (remote_rp == NULL) {
808                 return;
809         }
810         remote_vars = (struct xpc_vars *) remote_rp;
811
812
813         discovered_nasids = kmalloc(sizeof(u64) * XP_NASID_MASK_WORDS,
814                                                         GFP_KERNEL);
815         if (discovered_nasids == NULL) {
816                 kfree(remote_rp_base);
817                 return;
818         }
819         memset(discovered_nasids, 0, sizeof(u64) * XP_NASID_MASK_WORDS);
820
821         rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
822
823         /*
824          * The term 'region' in this context refers to the minimum number of
825          * nodes that can comprise an access protection grouping. The access
826          * protection is in regards to memory, IOI and IPI.
827          */
828 //>>> move the next two #defines into either include/asm-ia64/sn/arch.h or
829 //>>> include/asm-ia64/sn/addrs.h
830 #define SH1_MAX_REGIONS         64
831 #define SH2_MAX_REGIONS         256
832         max_regions = is_shub2() ? SH2_MAX_REGIONS : SH1_MAX_REGIONS;
833
834         for (region = 0; region < max_regions; region++) {
835
836                 if ((volatile int) xpc_exiting) {
837                         break;
838                 }
839
840                 dev_dbg(xpc_part, "searching region %d\n", region);
841
842                 for (nasid = (region * sn_region_size * 2);
843                      nasid < ((region + 1) * sn_region_size * 2);
844                      nasid += 2) {
845
846                         if ((volatile int) xpc_exiting) {
847                                 break;
848                         }
849
850                         dev_dbg(xpc_part, "checking nasid %d\n", nasid);
851
852
853                         if (XPC_NASID_IN_ARRAY(nasid, rp->part_nasids)) {
854                                 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
855                                         "part of the local partition; skipping "
856                                         "region\n", nasid);
857                                 break;
858                         }
859
860                         if (!(XPC_NASID_IN_ARRAY(nasid, rp->mach_nasids))) {
861                                 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
862                                         "not on Numa-Link network at reset\n",
863                                         nasid);
864                                 continue;
865                         }
866
867                         if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
868                                 dev_dbg(xpc_part, "Nasid %d is part of a "
869                                         "partition which was previously "
870                                         "discovered\n", nasid);
871                                 continue;
872                         }
873
874
875                         /* pull over the reserved page structure */
876
877                         ret = xpc_get_remote_rp(nasid, discovered_nasids,
878                                               remote_rp, &remote_rsvd_page_pa);
879                         if (ret != xpcSuccess) {
880                                 dev_dbg(xpc_part, "unable to get reserved page "
881                                         "from nasid %d, reason=%d\n", nasid,
882                                         ret);
883
884                                 if (ret == xpcLocalPartid) {
885                                         break;
886                                 }
887                                 continue;
888                         }
889
890                         remote_vars_pa = remote_rp->vars_pa;
891
892                         partid = remote_rp->partid;
893                         part = &xpc_partitions[partid];
894
895
896                         /* pull over the cross partition variables */
897
898                         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
899                         if (ret != xpcSuccess) {
900                                 dev_dbg(xpc_part, "unable to get XPC variables "
901                                         "from nasid %d, reason=%d\n", nasid,
902                                         ret);
903
904                                 XPC_DEACTIVATE_PARTITION(part, ret);
905                                 continue;
906                         }
907
908                         if (part->act_state != XPC_P_INACTIVE) {
909                                 dev_dbg(xpc_part, "partition %d on nasid %d is "
910                                         "already activating\n", partid, nasid);
911                                 break;
912                         }
913
914                         /*
915                          * Register the remote partition's AMOs with SAL so it
916                          * can handle and cleanup errors within that address
917                          * range should the remote partition go down. We don't
918                          * unregister this range because it is difficult to
919                          * tell when outstanding writes to the remote partition
920                          * are finished and thus when it is thus safe to
921                          * unregister. This should not result in wasted space
922                          * in the SAL xp_addr_region table because we should
923                          * get the same page for remote_act_amos_pa after
924                          * module reloads and system reboots.
925                          */
926                         if (sn_register_xp_addr_region(
927                                             remote_vars->amos_page_pa,
928                                                         PAGE_SIZE, 1) < 0) {
929                                 dev_dbg(xpc_part, "partition %d failed to "
930                                         "register xp_addr region 0x%016lx\n",
931                                         partid, remote_vars->amos_page_pa);
932
933                                 XPC_SET_REASON(part, xpcPhysAddrRegFailed,
934                                                 __LINE__);
935                                 break;
936                         }
937
938                         /*
939                          * The remote nasid is valid and available.
940                          * Send an interrupt to that nasid to notify
941                          * it that we are ready to begin activation.
942                          */
943                         dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
944                                 "nasid %d, phys_cpuid 0x%x\n",
945                                 remote_vars->amos_page_pa,
946                                 remote_vars->act_nasid,
947                                 remote_vars->act_phys_cpuid);
948
949                         xpc_IPI_send_activate(remote_vars);
950                 }
951         }
952
953         kfree(discovered_nasids);
954         kfree(remote_rp_base);
955 }
956
957
958 /*
959  * Given a partid, get the nasids owned by that partition from the
960  * remote partition's reserved page.
961  */
962 enum xpc_retval
963 xpc_initiate_partid_to_nasids(partid_t partid, void *nasid_mask)
964 {
965         struct xpc_partition *part;
966         u64 part_nasid_pa;
967         int bte_res;
968
969
970         part = &xpc_partitions[partid];
971         if (part->remote_rp_pa == 0) {
972                 return xpcPartitionDown;
973         }
974
975         part_nasid_pa = part->remote_rp_pa +
976                 (u64) &((struct xpc_rsvd_page *) 0)->part_nasids;
977
978         bte_res = xp_bte_copy(part_nasid_pa, ia64_tpa((u64) nasid_mask),
979                                 L1_CACHE_ALIGN(XP_NASID_MASK_BYTES),
980                                 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
981
982         return xpc_map_bte_errors(bte_res);
983 }
984