2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Copyright (C) 1992 - 1997, 2000,2002-2003 Silicon Graphics, Inc. All rights reserved.
10 #include <linux/types.h>
11 #include <linux/slab.h>
13 #include <asm/sn/sgi.h>
14 #include <asm/sn/io.h>
15 #include <asm/sn/iograph.h>
16 #include <asm/sn/hcl.h>
17 #include <asm/sn/labelcl.h>
18 #include <asm/sn/sn_private.h>
19 #include <asm/sn/klconfig.h>
20 #include <asm/sn/sn_cpuid.h>
21 #include <asm/sn/pci/pciio.h>
22 #include <asm/sn/pci/pcibr.h>
23 #include <asm/sn/xtalk/xtalk.h>
24 #include <asm/sn/pci/pcibr_private.h>
25 #include <asm/sn/intr.h>
26 #include <asm/sn/ioerror_handling.h>
27 #include <asm/sn/ioerror.h>
28 #include <asm/sn/sn2/shubio.h>
31 error_state_t error_state_get(vertex_hdl_t v);
32 error_return_code_t error_state_set(vertex_hdl_t v,error_state_t new_state);
36 * Get the xtalk provider function pointer for the
52 error_state_t e_state;
56 * Before walking down to the next level, check if
57 * the I/O link is up. If it's been disabled by the
58 * hub ii for some reason, we can't even touch the
61 iio_imem = REMOTE_HUB_L(nasid, IIO_IMEM);
63 if (!(iio_imem & (IIO_IMEM_B0ESD|IIO_IMEM_W0ESD))){
65 * IIO_IMEM_B0ESD getting set, indicates II shutdown
66 * on HUB0 parts.. Hopefully that's not true for
70 * If either one of them is shut down, can't
73 return IOERROR_XTALKLEVEL;
76 /* Get the error state of the hub */
77 e_state = error_state_get(hub_v);
79 cnode = nasid_to_cnodeid(nasid);
81 xswitch = NODEPDA(cnode)->basew_xc;
83 /* Set the error state of the crosstalk device to that of
86 if (error_state_set(xswitch , e_state) ==
87 ERROR_RETURN_CODE_CANNOT_SET_STATE)
88 return(IOERROR_UNHANDLED);
90 /* Clean the error state of the hub if we are in the action handling
93 if (e_state == ERROR_STATE_ACTION)
94 (void)error_state_set(hub_v, ERROR_STATE_NONE);
95 /* hand the error off to the switch or the directly
96 * connected crosstalk device.
98 return xtalk_error_handler(xswitch,
99 error_code, mode, ioerror);
104 * Check if the widget in error has been enabled for PIO accesses
107 is_widget_pio_enabled(ioerror_t *ioerror)
115 /* Get the node where the PIO error occurred */
116 IOERROR_GETVALUE(p,ioerror, srcnode);
118 if (src_node == CNODEID_NONE)
121 /* Get the nasid for the cnode */
122 src_nasid = cnodeid_to_nasid(src_node);
123 if (src_nasid == INVALID_NASID)
126 /* Read the Outbound widget access register for this hub */
127 ii_iowa = REMOTE_HUB_L(src_nasid, IIO_IOWA);
128 IOERROR_GETVALUE(p,ioerror, widgetnum);
131 /* Check if the PIOs to the widget with PIO error have been
134 if (ii_iowa & IIO_IOWA_WIDGET(widget))
141 * Hub IO error handling.
143 * Gets invoked for different types of errors found at the hub.
144 * Typically this includes situations from bus error or due to
145 * an error interrupt (mostly generated at the hub).
152 struct io_error_s *ioerror)
154 hubinfo_t hinfo; /* Hub info pointer */
161 hubinfo_get(hub_v, &hinfo);
164 /* Print an error message and return */
167 nasid = hinfo->h_nasid;
173 * Cpu got a bus error while accessing IO space.
174 * hubaddr field in ioerror structure should have
175 * the IO address that caused access error.
179 * Identify if the physical address in hub_error_data
180 * corresponds to small/large window, and accordingly,
181 * get the xtalk address.
185 * Evaluate the widget number and the widget address that
186 * caused the error. Use 'vaddr' if it's there.
187 * This is typically true either during probing
188 * or a kernel driver getting into trouble.
189 * Otherwise, use paddr to figure out widget details
190 * This is typically true for user mode bus errors while
191 * accessing I/O space.
193 IOERROR_GETVALUE(cp,ioerror,vaddr);
196 * If neither in small window nor in large window range,
197 * outright reject it.
199 IOERROR_GETVALUE(cp,ioerror,vaddr);
200 if (NODE_SWIN_ADDR(nasid, (paddr_t)cp)){
202 xwidgetnum_t widgetnum;
205 IOERROR_GETVALUE(p,ioerror,hubaddr);
207 widgetnum = SWIN_WIDGETNUM(hubaddr);
208 xtalkaddr = SWIN_WIDGETADDR(hubaddr);
210 * differentiate local register vs IO space access
212 IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
213 IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
216 } else if (NODE_BWIN_ADDR(nasid, (paddr_t)cp)){
218 * Address corresponds to large window space.
219 * Convert it to xtalk address.
222 hub_piomap_t bw_piomap;
223 xtalk_piomap_t xt_pmap = NULL;
225 xwidgetnum_t widgetnum;
228 IOERROR_GETVALUE(p,ioerror,hubaddr);
232 * Have to loop to find the correct xtalk_piomap
233 * because the're not allocated on a one-to-one
234 * basis to the window number.
236 for (bigwin=0; bigwin < HUB_NUM_BIG_WINDOW; bigwin++) {
237 bw_piomap = hubinfo_bwin_piomap_get(hinfo,
240 if (bw_piomap->hpio_bigwin_num ==
241 (BWIN_WINDOWNUM(hubaddr) - 1)) {
242 xt_pmap = hub_piomap_xt_piomap(bw_piomap);
249 widgetnum = xtalk_pio_target_get(xt_pmap);
250 xtalkaddr = xtalk_pio_xtalk_addr_get(xt_pmap) + BWIN_WIDGETADDR(hubaddr);
252 IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
253 IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
256 * Make sure that widgetnum doesnot map to hub
257 * register widget number, as we never use
258 * big window to access hub registers.
260 ASSERT(widgetnum != HUB_REGISTER_WIDGET);
262 } else if (IOERROR_FIELDVALID(ioerror,hubaddr)) {
264 xwidgetnum_t widgetnum;
267 IOERROR_GETVALUE(p,ioerror,hubaddr);
269 if (BWIN_WINDOWNUM(hubaddr)){
270 int window = BWIN_WINDOWNUM(hubaddr) - 1;
272 itte = (hubreg_t)HUB_L(IIO_ITTE_GET(nasid, window));
273 widgetnum = (itte >> IIO_ITTE_WIDGET_SHIFT) &
274 IIO_ITTE_WIDGET_MASK;
275 xtalkaddr = (((itte >> IIO_ITTE_OFFSET_SHIFT) &
276 IIO_ITTE_OFFSET_MASK) <<
278 BWIN_WIDGETADDR(hubaddr);
280 widgetnum = SWIN_WIDGETNUM(hubaddr);
281 xtalkaddr = SWIN_WIDGETADDR(hubaddr);
283 IOERROR_SETVALUE(ioerror,widgetnum,widgetnum);
284 IOERROR_SETVALUE(ioerror,xtalkaddr,xtalkaddr);
287 "hub_ioerror_handler: Invalid address passed"));
289 return IOERROR_INVALIDADDR;
293 IOERROR_GETVALUE(p,ioerror,widgetnum);
294 if ((p) == HUB_REGISTER_WIDGET) {
296 * Error in accessing Hub local register
297 * This should happen mostly in SABLE mode..
301 /* Make sure that the outbound widget access for this
304 if (!is_widget_pio_enabled(ioerror)) {
305 return(IOERROR_HANDLED);
309 retval = hub_xp_error_handler(
310 hub_v, nasid, error_code, mode, ioerror);
315 "hub_ioerror_handler:PIO_READ_ERROR return: %d",
320 case PIO_WRITE_ERROR:
322 * This hub received an interrupt indicating a widget
323 * attached to this hub got a timeout.
324 * widgetnum field should be filled to indicate the
325 * widget that caused error.
327 * NOTE: This hub may have nothing to do with this error.
328 * We are here since the widget attached to the xbow
329 * gets its PIOs through this hub.
331 * There is nothing that can be done at this level.
332 * Just invoke the xtalk error handling mechanism.
334 IOERROR_GETVALUE(p,ioerror,widgetnum);
335 if ((p) == HUB_REGISTER_WIDGET) {
337 /* Make sure that the outbound widget access for this
341 if (!is_widget_pio_enabled(ioerror)) {
342 return(IOERROR_HANDLED);
345 retval = hub_xp_error_handler(
346 hub_v, nasid, error_code, mode, ioerror);
352 * DMA Read error always ends up generating an interrupt
353 * at the widget level, and never at the hub level. So,
354 * we don't expect to come here any time
357 retval = IOERROR_UNHANDLED;
360 case DMA_WRITE_ERROR:
362 * DMA Write error is generated when a write by an I/O
363 * device could not be completed. Problem is, device is
364 * totally unaware of this problem, and would continue
365 * writing to system memory. So, hub has a way to send
366 * an error interrupt on the first error, and bitbucket
367 * all further write transactions.
368 * Coming here indicates that hub detected one such error,
369 * and we need to handle it.
371 * Hub interrupt handler would have extracted physaddr,
372 * widgetnum, and widgetdevice from the CRB
374 * There is nothing special to do here, since gathering
375 * data from crb's is done elsewhere. Just pass the
376 * error to xtalk layer.
378 retval = hub_xp_error_handler(hub_v, nasid, error_code, mode,
384 return IOERROR_BADERRORCODE;
389 * If error was not handled, we may need to take certain action
390 * based on the error code.
391 * For e.g. in case of PIO_READ_ERROR, we may need to release the
392 * PIO Read entry table (they are sticky after errors).
393 * Similarly other cases.
398 if (retval == IOERROR_HWGRAPH_LOOKUP) {
400 * If we get errors very early, we can't traverse
401 * the path using hardware graph.
402 * To handle this situation, we need a functions
403 * which don't depend on the hardware graph vertex to
404 * handle errors. This break the modularity of the
405 * existing code. Instead we print out the reason for
406 * not handling error, and return. On return, all the
407 * info collected would be dumped. This should provide
408 * sufficient info to analyse the error.
410 printk("Unable to handle IO error: hardware graph not setup\n");
416 #define INFO_LBL_ERROR_STATE "error_state"
418 #define v_error_state_get(v,s) \
419 (hwgraph_info_get_LBL(v,INFO_LBL_ERROR_STATE, (arbitrary_info_t *)&s))
421 #define v_error_state_set(v,s,replace) \
423 hwgraph_info_replace_LBL(v,INFO_LBL_ERROR_STATE,(arbitrary_info_t)s,0) :\
424 hwgraph_info_add_LBL(v,INFO_LBL_ERROR_STATE, (arbitrary_info_t)s))
427 #define v_error_state_clear(v) \
428 (hwgraph_info_remove_LBL(v,INFO_LBL_ERROR_STATE,0))
432 * Get the state of the vertex.
433 * Returns ERROR_STATE_INVALID on failure
434 * current state otherwise
437 error_state_get(vertex_hdl_t v)
441 /* Check if we have a valid hwgraph vertex */
442 if ( v == (vertex_hdl_t)0 )
443 return(ERROR_STATE_NONE);
445 /* Get the labelled info hanging off the vertex which corresponds
448 if (v_error_state_get(v, s) != GRAPH_SUCCESS) {
449 return(ERROR_STATE_NONE);
457 * Set the state of the vertex
458 * Returns ERROR_RETURN_CODE_CANNOT_SET_STATE on failure
459 * ERROR_RETURN_CODE_SUCCESS otherwise
462 error_state_set(vertex_hdl_t v,error_state_t new_state)
464 error_state_t old_state;
467 /* Check if we have a valid hwgraph vertex */
468 if ( v == (vertex_hdl_t)0 )
469 return(ERROR_RETURN_CODE_GENERAL_FAILURE);
472 /* This means that the error state needs to be cleaned */
473 if (new_state == ERROR_STATE_NONE) {
474 /* Make sure that we have an error state */
475 if (v_error_state_get(v,old_state) == GRAPH_SUCCESS)
476 v_error_state_clear(v);
477 return(ERROR_RETURN_CODE_SUCCESS);
480 /* Check if the state information has been set at least once
483 if (v_error_state_get(v,old_state) != GRAPH_SUCCESS)
486 if (v_error_state_set(v,new_state,replace) != GRAPH_SUCCESS) {
487 return(ERROR_RETURN_CODE_CANNOT_SET_STATE);
489 return(ERROR_RETURN_CODE_SUCCESS);