VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / ia64 / kernel / fsys.S
1 /*
2  * This file contains the light-weight system call handlers (fsyscall-handlers).
3  *
4  * Copyright (C) 2003 Hewlett-Packard Co
5  *      David Mosberger-Tang <davidm@hpl.hp.com>
6  *
7  * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
8  * 18-Feb-03 louisk     Implement fsys_gettimeofday().
9  * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10  *                      probably broke it along the way... ;-)
11  */
12
13 #include <asm/asmmacro.h>
14 #include <asm/errno.h>
15 #include <asm/offsets.h>
16 #include <asm/percpu.h>
17 #include <asm/thread_info.h>
18 #include <asm/sal.h>
19 #include <asm/signal.h>
20 #include <asm/system.h>
21 #include <asm/unistd.h>
22
23 #include "entry.h"
24
25 /*
26  * See Documentation/ia64/fsys.txt for details on fsyscalls.
27  *
28  * On entry to an fsyscall handler:
29  *   r10        = 0 (i.e., defaults to "successful syscall return")
30  *   r11        = saved ar.pfs (a user-level value)
31  *   r15        = system call number
32  *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
33  *   r32-r39    = system call arguments
34  *   b6         = return address (a user-level value)
35  *   ar.pfs     = previous frame-state (a user-level value)
36  *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
37  *   all other registers may contain values passed in from user-mode
38  *
39  * On return from an fsyscall handler:
40  *   r11        = saved ar.pfs (as passed into the fsyscall handler)
41  *   r15        = system call number (as passed into the fsyscall handler)
42  *   r32-r39    = system call arguments (as passed into the fsyscall handler)
43  *   b6         = return address (as passed into the fsyscall handler)
44  *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
45  */
46
47 ENTRY(fsys_ni_syscall)
48         .prologue
49         .altrp b6
50         .body
51         mov r8=ENOSYS
52         mov r10=-1
53         FSYS_RETURN
54 END(fsys_ni_syscall)
55
56 ENTRY(fsys_getpid)
57         .prologue
58         .altrp b6
59         .body
60         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
61         ;;
62         ld4 r9=[r9]
63         add r8=IA64_TASK_TGID_OFFSET,r16
64         ;;
65         and r9=TIF_ALLWORK_MASK,r9
66         ld4 r8=[r8]                             // r8 = current->tgid
67         ;;
68         cmp.ne p8,p0=0,r9
69 (p8)    br.spnt.many fsys_fallback_syscall
70         FSYS_RETURN
71 END(fsys_getpid)
72
73 ENTRY(fsys_getppid)
74         .prologue
75         .altrp b6
76         .body
77         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
78         ;;
79         ld8 r17=[r17]                           // r17 = current->group_leader
80         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
81         ;;
82
83         ld4 r9=[r9]
84         add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
85         ;;
86         and r9=TIF_ALLWORK_MASK,r9
87
88 1:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
89         ;;
90         cmp.ne p8,p0=0,r9
91         add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
92         ;;
93
94         /*
95          * The .acq is needed to ensure that the read of tgid has returned its data before
96          * we re-check "real_parent".
97          */
98         ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
99 #ifdef CONFIG_SMP
100         /*
101          * Re-read current->group_leader->real_parent.
102          */
103         ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
104 (p8)    br.spnt.many fsys_fallback_syscall
105         ;;
106         cmp.ne p6,p0=r18,r19                    // did real_parent change?
107         mov r19=0                       // i must not leak kernel bits...
108 (p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
109         ;;
110         mov r17=0                       // i must not leak kernel bits...
111         mov r18=0                       // i must not leak kernel bits...
112 #else
113         mov r17=0                       // i must not leak kernel bits...
114         mov r18=0                       // i must not leak kernel bits...
115         mov r19=0                       // i must not leak kernel bits...
116 #endif
117         FSYS_RETURN
118 END(fsys_getppid)
119
120 ENTRY(fsys_set_tid_address)
121         .prologue
122         .altrp b6
123         .body
124         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
125         ;;
126         ld4 r9=[r9]
127         tnat.z p6,p7=r32                // check argument register for being NaT
128         ;;
129         and r9=TIF_ALLWORK_MASK,r9
130         add r8=IA64_TASK_PID_OFFSET,r16
131         add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
132         ;;
133         ld4 r8=[r8]
134         cmp.ne p8,p0=0,r9
135         mov r17=-1
136         ;;
137 (p6)    st8 [r18]=r32
138 (p7)    st8 [r18]=r17
139 (p8)    br.spnt.many fsys_fallback_syscall
140         ;;
141         mov r17=0                       // i must not leak kernel bits...
142         mov r18=0                       // i must not leak kernel bits...
143         FSYS_RETURN
144 END(fsys_set_tid_address)
145
146 /*
147  * Note 1: This routine uses floating-point registers, but only with registers that
148  *         operate on integers.  Because of that, we don't need to set ar.fpsr to the
149  *         kernel default value.
150  *
151  * Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
152  *         If that wasn't the case, we would have to disable preemption (e.g.,
153  *         by disabling interrupts) between reading the ITC and reading
154  *         local_cpu_data->nsec_per_cyc.
155  *
156  * Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
157  *         we ought to either skip the ITC-based interpolation or run an ntp-like
158  *         daemon to keep the ITCs from drifting too far apart.
159  */
160
161 ENTRY(fsys_gettimeofday)
162         .prologue
163         .altrp b6
164         .body
165         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
166         addl r3=THIS_CPU(cpu_info),r0
167
168 #ifdef CONFIG_SMP
169         movl r10=__per_cpu_offset
170         movl r2=sal_platform_features
171         ;;
172
173         ld8 r2=[r2]
174         movl r19=xtime                  // xtime is a timespec struct
175
176         ld8 r10=[r10]                   // r10 <- __per_cpu_offset[0]
177         addl r21=THIS_CPU(cpu_info),r0
178         ;;
179         add r10=r21, r10                // r10 <- &cpu_data(time_keeper_id)
180         tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
181 (p8)    br.spnt.many fsys_fallback_syscall
182 #else
183         ;;
184         mov r10=r3
185         movl r19=xtime                  // xtime is a timespec struct
186 #endif
187         ld4 r9=[r9]
188         movl r17=xtime_lock
189         ;;
190
191         // r32, r33 should contain the 2 args of gettimeofday
192         adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
193         mov r2=-1
194         tnat.nz p6,p7=r32               // guard against NaT args
195         ;;
196
197         adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
198 (p7)    tnat.nz p6,p0=r33
199 (p6)    br.cond.spnt.few .fail_einval
200
201         adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
202         movl r24=2361183241434822607    // for division hack (only for / 1000)
203         ;;
204
205         ldf8 f7=[r10]                   // f7 now contains itm_delta
206         setf.sig f11=r2
207         adds r10=8, r32
208
209         adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19      // r20 = &xtime->tv_nsec
210         movl r26=jiffies
211
212         setf.sig f9=r24                 // f9 is used for division hack
213         movl r27=wall_jiffies
214
215         and r9=TIF_ALLWORK_MASK,r9
216         movl r25=last_nsec_offset
217         ;;
218
219         /*
220          * Verify that we have permission to write to struct timeval.  Note:
221          * Another thread might unmap the mapping before we actually get
222          * to store the result.  That's OK as long as the stores are also
223          * protect by EX().
224          */
225 EX(.fail_efault, probe.w.fault r32, 3)          // this must come _after_ NaT-check
226 EX(.fail_efault, probe.w.fault r10, 3)          // this must come _after_ NaT-check
227         nop 0
228
229         ldf8 f10=[r8]                   // f10 <- local_cpu_data->nsec_per_cyc value
230         cmp.ne p8, p0=0, r9
231 (p8)    br.spnt.many fsys_fallback_syscall
232         ;;
233 .retry: // *** seq = read_seqbegin(&xtime_lock); ***
234         ld4.acq r23=[r17]               // since &xtime_lock == &xtime_lock->sequence
235         ld8 r14=[r25]                   // r14 (old) = last_nsec_offset
236
237         ld8 r28=[r26]                   // r28 = jiffies
238         ld8 r29=[r27]                   // r29 = wall_jiffies
239         ;;
240
241         ldf8 f8=[r21]                   // f8 now contains itm_next
242         mov.m r31=ar.itc                // put time stamp into r31 (ITC) == now
243         sub r28=r29, r28, 1             // r28 now contains "-(lost + 1)"
244         ;;
245
246         ld8 r2=[r19]                    // r2 = sec = xtime.tv_sec
247         ld8 r29=[r20]                   // r29 = nsec = xtime.tv_nsec
248         tbit.nz p9, p10=r23, 0          // p9 <- is_odd(r23), p10 <- is_even(r23)
249
250         setf.sig f6=r28                 // f6 <- -(lost + 1)                            (6 cyc)
251         ;;
252
253         mf
254         xma.l f8=f6, f7, f8     // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next   (5 cyc)
255         nop 0
256
257         setf.sig f12=r31                // f12 <- ITC                                   (6 cyc)
258         // *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
259         ld4 r24=[r17]                   // r24 = xtime_lock->sequence (re-read)
260         nop 0
261         ;;
262
263         xma.l f8=f11, f8, f12   // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick)
264         nop 0
265         ;;
266
267         getf.sig r18=f8                 // r18 <- (now - last_tick)
268         xmpy.l f8=f8, f10               // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc)
269         add r3=r29, r14                 // r3 = (nsec + old)
270         ;;
271
272         cmp.lt p7, p8=r18, r0           // if now < last_tick, set p7 = 1, p8 = 0
273         getf.sig r18=f8                 // r18 = elapsed_cycles*nsec_per_cyc            (6 cyc)
274         nop 0
275         ;;
276
277 (p10)   cmp.ne p9, p0=r23, r24          // if xtime_lock->sequence != seq, set p9
278         shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT  // r18 <- offset
279 (p9)    br.spnt.many .retry
280         ;;
281
282         mov ar.ccv=r14                  // ar.ccv = old                                 (1 cyc)
283         cmp.leu p7, p8=r18, r14         // if (offset <= old), set p7 = 1, p8 = 0
284         ;;
285
286 (p8)    cmpxchg8.rel r24=[r25], r18, ar.ccv     // compare-and-exchange (atomic!)
287 (p8)    add r3=r29, r18                 // r3 = (nsec + offset)
288         ;;
289         shr.u r3=r3, 3                  // initiate dividing r3 by 1000
290         ;;
291         setf.sig f8=r3                  //                                              (6 cyc)
292         mov r10=1000000                 // r10 = 1000000
293         ;;
294 (p8)    cmp.ne.unc p9, p0=r24, r14
295         xmpy.hu f6=f8, f9               //                                              (5 cyc)
296 (p9)    br.spnt.many .retry
297         ;;
298
299         getf.sig r3=f6                  //                                              (6 cyc)
300         ;;
301         shr.u r3=r3, 4                  // end of division, r3 is divided by 1000 (=usec)
302         ;;
303
304 1:      cmp.geu p7, p0=r3, r10          // while (usec >= 1000000)
305         ;;
306 (p7)    sub r3=r3, r10                  // usec -= 1000000
307 (p7)    adds r2=1, r2                   // ++sec
308 (p7)    br.spnt.many 1b
309
310         // finally: r2 = sec, r3 = usec
311 EX(.fail_efault, st8 [r32]=r2)
312         adds r9=8, r32
313         mov r8=r0                       // success
314         ;;
315 EX(.fail_efault, st8 [r9]=r3)           // store them in the timeval struct
316         mov r10=0
317         FSYS_RETURN
318         /*
319          * Note: We are NOT clearing the scratch registers here.  Since the only things
320          *       in those registers are time-related variables and some addresses (which
321          *       can be obtained from System.map), none of this should be security-sensitive
322          *       and we should be fine.
323          */
324
325 .fail_einval:
326         mov r8=EINVAL                   // r8 = EINVAL
327         mov r10=-1                      // r10 = -1
328         FSYS_RETURN
329
330 .fail_efault:
331         mov r8=EFAULT                   // r8 = EFAULT
332         mov r10=-1                      // r10 = -1
333         FSYS_RETURN
334 END(fsys_gettimeofday)
335
336 /*
337  * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
338  */
339 #if _NSIG_WORDS != 1
340 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
341 #endif
342 ENTRY(fsys_rt_sigprocmask)
343         .prologue
344         .altrp b6
345         .body
346
347         add r2=IA64_TASK_BLOCKED_OFFSET,r16
348         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
349         cmp4.ltu p6,p0=SIG_SETMASK,r32
350
351         cmp.ne p15,p0=r0,r34                    // oset != NULL?
352         tnat.nz p8,p0=r34
353         add r31=IA64_TASK_SIGHAND_OFFSET,r16
354         ;;
355         ld8 r3=[r2]                             // read/prefetch current->blocked
356         ld4 r9=[r9]
357         tnat.nz.or p6,p0=r35
358
359         cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
360         tnat.nz.or p6,p0=r32
361 (p6)    br.spnt.few .fail_einval                // fail with EINVAL
362         ;;
363 #ifdef CONFIG_SMP
364         ld8 r31=[r31]                           // r31 <- current->sighand
365 #endif
366         and r9=TIF_ALLWORK_MASK,r9
367         tnat.nz.or p8,p0=r33
368         ;;
369         cmp.ne p7,p0=0,r9
370         cmp.eq p6,p0=r0,r33                     // set == NULL?
371         add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
372 (p8)    br.spnt.few .fail_efault                // fail with EFAULT
373 (p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
374 (p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
375
376         /* Argh, we actually have to do some work and _update_ the signal mask: */
377
378 EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
379 EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
380         mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
381         ;;
382
383         rsm psr.i                               // mask interrupt delivery
384         mov ar.ccv=0
385         andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
386
387 #ifdef CONFIG_SMP
388         mov r17=1
389         ;;
390         cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
391         mov r8=EINVAL                   // default to EINVAL
392         ;;
393         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
394         cmp4.ne p6,p0=r18,r0
395 (p6)    br.cond.spnt.many .lock_contention
396         ;;
397 #else
398         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
399         mov r8=EINVAL                   // default to EINVAL
400 #endif
401         add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
402         add r19=IA64_TASK_SIGNAL_OFFSET,r16
403         cmp4.eq p6,p0=SIG_BLOCK,r32
404         ;;
405         ld8 r19=[r19]                   // r19 <- current->signal
406         cmp4.eq p7,p0=SIG_UNBLOCK,r32
407         cmp4.eq p8,p0=SIG_SETMASK,r32
408         ;;
409         ld8 r18=[r18]                   // r18 <- current->pending.signal
410         .pred.rel.mutex p6,p7,p8
411 (p6)    or r14=r3,r14                   // SIG_BLOCK
412 (p7)    andcm r14=r3,r14                // SIG_UNBLOCK
413
414 (p8)    mov r14=r14                     // SIG_SETMASK
415 (p6)    mov r8=0                        // clear error code
416         // recalc_sigpending()
417         add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
418
419         add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
420         ;;
421         ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
422 (p7)    mov r8=0                // clear error code
423
424         ld8 r19=[r19]           // r19 <- current->signal->shared_pending
425         ;;
426         cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
427 (p8)    mov r8=0                // clear error code
428
429         or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
430         ;;
431         // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
432         andcm r18=r18,r14
433         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
434         ;;
435
436 (p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
437         mov r19=0                                       // i must not leak kernel bits...
438 (p6)    br.cond.dpnt.many .sig_pending
439         ;;
440
441 1:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
442         ;;
443         mov ar.ccv=r17
444         and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
445         ;;
446
447         st8 [r2]=r14                            // update current->blocked with new mask
448         cmpxchg4.acq r14=[r9],r18,ar.ccv        // current->thread_info->flags <- r18
449         ;;
450         cmp.ne p6,p0=r17,r14                    // update failed?
451 (p6)    br.cond.spnt.few 1b                     // yes -> retry
452
453 #ifdef CONFIG_SMP
454         st4.rel [r31]=r0                        // release the lock
455 #endif
456         ssm psr.i
457         ;;
458
459         srlz.d                                  // ensure psr.i is set again
460         mov r18=0                                       // i must not leak kernel bits...
461
462 .store_mask:
463 EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
464 EX(.fail_efault, (p15) st8 [r34]=r3)
465         mov r2=0                                        // i must not leak kernel bits...
466         mov r3=0                                        // i must not leak kernel bits...
467         mov r8=0                                // return 0
468         mov r9=0                                        // i must not leak kernel bits...
469         mov r14=0                                       // i must not leak kernel bits...
470         mov r17=0                                       // i must not leak kernel bits...
471         mov r31=0                                       // i must not leak kernel bits...
472         FSYS_RETURN
473
474 .sig_pending:
475 #ifdef CONFIG_SMP
476         st4.rel [r31]=r0                        // release the lock
477 #endif
478         ssm psr.i
479         ;;
480         srlz.d
481         br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
482
483 #ifdef CONFIG_SMP
484 .lock_contention:
485         /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
486         ssm psr.i
487         ;;
488         srlz.d
489         br.sptk.many fsys_fallback_syscall
490 #endif
491 END(fsys_rt_sigprocmask)
492
493 ENTRY(fsys_fallback_syscall)
494         .prologue
495         .altrp b6
496         .body
497         /*
498          * We only get here from light-weight syscall handlers.  Thus, we already
499          * know that r15 contains a valid syscall number.  No need to re-check.
500          */
501         adds r17=-1024,r15
502         movl r14=sys_call_table
503         ;;
504         rsm psr.i
505         shladd r18=r17,3,r14
506         ;;
507         ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
508         mov r29=psr                             // read psr (12 cyc load latency)
509         mov r27=ar.rsc
510         mov r21=ar.fpsr
511         mov r26=ar.pfs
512 END(fsys_fallback_syscall)
513         /* FALL THROUGH */
514 GLOBAL_ENTRY(fsys_bubble_down)
515         .prologue
516         .altrp b6
517         .body
518         /*
519          * We get here for syscalls that don't have a lightweight handler.  For those, we
520          * need to bubble down into the kernel and that requires setting up a minimal
521          * pt_regs structure, and initializing the CPU state more or less as if an
522          * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
523          * such that cr_iip points to the second instruction in syscall_via_break.
524          * Decrementing the IP hence will restart the syscall via break and not
525          * decrementing IP will return us to the caller, as usual.  Note that we preserve
526          * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
527          * possible to distinguish fsyscall execution from other privileged execution.
528          *
529          * On entry:
530          *      - normal fsyscall handler register usage, except that we also have:
531          *      - r18: address of syscall entry point
532          *      - r21: ar.fpsr
533          *      - r26: ar.pfs
534          *      - r27: ar.rsc
535          *      - r29: psr
536          */
537 #       define PSR_PRESERVED_BITS       (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
538                                          | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
539                                          | IA64_PSR_IC)
540         /*
541          * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
542          * to synthesize.
543          */
544 #       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
545                                          | IA64_PSR_BN | IA64_PSR_I)
546
547         invala
548         movl r8=PSR_ONE_BITS
549
550         mov r25=ar.unat                 // save ar.unat (5 cyc)
551         movl r9=PSR_PRESERVED_BITS
552
553         mov ar.rsc=0                    // set enforced lazy mode, pl 0, little-endian, loadrs=0
554         movl r28=__kernel_syscall_via_break
555         ;;
556         mov r23=ar.bspstore             // save ar.bspstore (12 cyc)
557         mov r31=pr                      // save pr (2 cyc)
558         mov r20=r1                      // save caller's gp in r20
559         ;;
560         mov r2=r16                      // copy current task addr to addl-addressable register
561         and r9=r9,r29
562         mov r19=b6                      // save b6 (2 cyc)
563         ;;
564         mov psr.l=r9                    // slam the door (17 cyc to srlz.i)
565         or r29=r8,r29                   // construct cr.ipsr value to save
566         addl r22=IA64_RBS_OFFSET,r2     // compute base of RBS
567         ;;
568         // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
569         // we may be reading ar.itc after writing to psr.l.  Avoid that message with
570         // this directive:
571         dv_serialize_data
572         mov.m r24=ar.rnat               // read ar.rnat (5 cyc lat)
573         lfetch.fault.excl.nt1 [r22]
574         adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
575
576         // ensure previous insn group is issued before we stall for srlz.i:
577         ;;
578         srlz.i                          // ensure new psr.l has been established
579         /////////////////////////////////////////////////////////////////////////////
580         ////////// from this point on, execution is not interruptible anymore
581         /////////////////////////////////////////////////////////////////////////////
582         addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2    // compute base of memory stack
583         cmp.ne pKStk,pUStk=r0,r0        // set pKStk <- 0, pUStk <- 1
584         ;;
585         st1 [r16]=r0                    // clear current->thread.on_ustack flag
586         mov ar.bspstore=r22             // switch to kernel RBS
587         mov b6=r18                      // copy syscall entry-point to b6 (7 cyc)
588         add r3=TI_FLAGS+IA64_TASK_SIZE,r2
589         ;;
590         ld4 r3=[r3]                             // r2 = current_thread_info()->flags
591         mov r18=ar.bsp                  // save (kernel) ar.bsp (12 cyc)
592         mov ar.rsc=0x3                  // set eager mode, pl 0, little-endian, loadrs=0
593         br.call.sptk.many b7=ia64_syscall_setup
594         ;;
595         ssm psr.i
596         movl r2=ia64_ret_from_syscall
597         ;;
598         mov rp=r2                               // set the real return addr
599         tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
600
601 (p8)    br.call.sptk.many b6=b6                 // ignore this return addr
602         br.cond.sptk ia64_trace_syscall
603 END(fsys_bubble_down)
604
605         .rodata
606         .align 8
607         .globl fsyscall_table
608
609         data8 fsys_bubble_down
610 fsyscall_table:
611         data8 fsys_ni_syscall
612         data8 0                         // exit                 // 1025
613         data8 0                         // read
614         data8 0                         // write
615         data8 0                         // open
616         data8 0                         // close
617         data8 0                         // creat                // 1030
618         data8 0                         // link
619         data8 0                         // unlink
620         data8 0                         // execve
621         data8 0                         // chdir
622         data8 0                         // fchdir               // 1035
623         data8 0                         // utimes
624         data8 0                         // mknod
625         data8 0                         // chmod
626         data8 0                         // chown
627         data8 0                         // lseek                // 1040
628         data8 fsys_getpid               // getpid
629         data8 fsys_getppid              // getppid
630         data8 0                         // mount
631         data8 0                         // umount
632         data8 0                         // setuid               // 1045
633         data8 0                         // getuid
634         data8 0                         // geteuid
635         data8 0                         // ptrace
636         data8 0                         // access
637         data8 0                         // sync                 // 1050
638         data8 0                         // fsync
639         data8 0                         // fdatasync
640         data8 0                         // kill
641         data8 0                         // rename
642         data8 0                         // mkdir                // 1055
643         data8 0                         // rmdir
644         data8 0                         // dup
645         data8 0                         // pipe
646         data8 0                         // times
647         data8 0                         // brk                  // 1060
648         data8 0                         // setgid
649         data8 0                         // getgid
650         data8 0                         // getegid
651         data8 0                         // acct
652         data8 0                         // ioctl                // 1065
653         data8 0                         // fcntl
654         data8 0                         // umask
655         data8 0                         // chroot
656         data8 0                         // ustat
657         data8 0                         // dup2                 // 1070
658         data8 0                         // setreuid
659         data8 0                         // setregid
660         data8 0                         // getresuid
661         data8 0                         // setresuid
662         data8 0                         // getresgid            // 1075
663         data8 0                         // setresgid
664         data8 0                         // getgroups
665         data8 0                         // setgroups
666         data8 0                         // getpgid
667         data8 0                         // setpgid              // 1080
668         data8 0                         // setsid
669         data8 0                         // getsid
670         data8 0                         // sethostname
671         data8 0                         // setrlimit
672         data8 0                         // getrlimit            // 1085
673         data8 0                         // getrusage
674         data8 fsys_gettimeofday         // gettimeofday
675         data8 0                         // settimeofday
676         data8 0                         // select
677         data8 0                         // poll                 // 1090
678         data8 0                         // symlink
679         data8 0                         // readlink
680         data8 0                         // uselib
681         data8 0                         // swapon
682         data8 0                         // swapoff              // 1095
683         data8 0                         // reboot
684         data8 0                         // truncate
685         data8 0                         // ftruncate
686         data8 0                         // fchmod
687         data8 0                         // fchown               // 1100
688         data8 0                         // getpriority
689         data8 0                         // setpriority
690         data8 0                         // statfs
691         data8 0                         // fstatfs
692         data8 0                         // gettid               // 1105
693         data8 0                         // semget
694         data8 0                         // semop
695         data8 0                         // semctl
696         data8 0                         // msgget
697         data8 0                         // msgsnd               // 1110
698         data8 0                         // msgrcv
699         data8 0                         // msgctl
700         data8 0                         // shmget
701         data8 0                         // shmat
702         data8 0                         // shmdt                // 1115
703         data8 0                         // shmctl
704         data8 0                         // syslog
705         data8 0                         // setitimer
706         data8 0                         // getitimer
707         data8 0                                                 // 1120
708         data8 0
709         data8 0
710         data8 0                         // vhangup
711         data8 0                         // lchown
712         data8 0                         // remap_file_pages     // 1125
713         data8 0                         // wait4
714         data8 0                         // sysinfo
715         data8 0                         // clone
716         data8 0                         // setdomainname
717         data8 0                         // newuname             // 1130
718         data8 0                         // adjtimex
719         data8 0
720         data8 0                         // init_module
721         data8 0                         // delete_module
722         data8 0                                                 // 1135
723         data8 0
724         data8 0                         // quotactl
725         data8 0                         // bdflush
726         data8 0                         // sysfs
727         data8 0                         // personality          // 1140
728         data8 0                         // afs_syscall
729         data8 0                         // setfsuid
730         data8 0                         // setfsgid
731         data8 0                         // getdents
732         data8 0                         // flock                // 1145
733         data8 0                         // readv
734         data8 0                         // writev
735         data8 0                         // pread64
736         data8 0                         // pwrite64
737         data8 0                         // sysctl               // 1150
738         data8 0                         // mmap
739         data8 0                         // munmap
740         data8 0                         // mlock
741         data8 0                         // mlockall
742         data8 0                         // mprotect             // 1155
743         data8 0                         // mremap
744         data8 0                         // msync
745         data8 0                         // munlock
746         data8 0                         // munlockall
747         data8 0                         // sched_getparam       // 1160
748         data8 0                         // sched_setparam
749         data8 0                         // sched_getscheduler
750         data8 0                         // sched_setscheduler
751         data8 0                         // sched_yield
752         data8 0                         // sched_get_priority_max       // 1165
753         data8 0                         // sched_get_priority_min
754         data8 0                         // sched_rr_get_interval
755         data8 0                         // nanosleep
756         data8 0                         // nfsservctl
757         data8 0                         // prctl                // 1170
758         data8 0                         // getpagesize
759         data8 0                         // mmap2
760         data8 0                         // pciconfig_read
761         data8 0                         // pciconfig_write
762         data8 0                         // perfmonctl           // 1175
763         data8 0                         // sigaltstack
764         data8 0                         // rt_sigaction
765         data8 0                         // rt_sigpending
766         data8 fsys_rt_sigprocmask       // rt_sigprocmask
767         data8 0                         // rt_sigqueueinfo      // 1180
768         data8 0                         // rt_sigreturn
769         data8 0                         // rt_sigsuspend
770         data8 0                         // rt_sigtimedwait
771         data8 0                         // getcwd
772         data8 0                         // capget               // 1185
773         data8 0                         // capset
774         data8 0                         // sendfile
775         data8 0
776         data8 0
777         data8 0                         // socket               // 1190
778         data8 0                         // bind
779         data8 0                         // connect
780         data8 0                         // listen
781         data8 0                         // accept
782         data8 0                         // getsockname          // 1195
783         data8 0                         // getpeername
784         data8 0                         // socketpair
785         data8 0                         // send
786         data8 0                         // sendto
787         data8 0                         // recv                 // 1200
788         data8 0                         // recvfrom
789         data8 0                         // shutdown
790         data8 0                         // setsockopt
791         data8 0                         // getsockopt
792         data8 0                         // sendmsg              // 1205
793         data8 0                         // recvmsg
794         data8 0                         // pivot_root
795         data8 0                         // mincore
796         data8 0                         // madvise
797         data8 0                         // newstat              // 1210
798         data8 0                         // newlstat
799         data8 0                         // newfstat
800         data8 0                         // clone2
801         data8 0                         // getdents64
802         data8 0                         // getunwind            // 1215
803         data8 0                         // readahead
804         data8 0                         // setxattr
805         data8 0                         // lsetxattr
806         data8 0                         // fsetxattr
807         data8 0                         // getxattr             // 1220
808         data8 0                         // lgetxattr
809         data8 0                         // fgetxattr
810         data8 0                         // listxattr
811         data8 0                         // llistxattr
812         data8 0                         // flistxattr           // 1225
813         data8 0                         // removexattr
814         data8 0                         // lremovexattr
815         data8 0                         // fremovexattr
816         data8 0                         // tkill
817         data8 0                         // futex                // 1230
818         data8 0                         // sched_setaffinity
819         data8 0                         // sched_getaffinity
820         data8 fsys_set_tid_address      // set_tid_address
821         data8 0                         // fadvise64_64
822         data8 0                         // tgkill               // 1235
823         data8 0                         // exit_group
824         data8 0                         // lookup_dcookie
825         data8 0                         // io_setup
826         data8 0                         // io_destroy
827         data8 0                         // io_getevents         // 1240
828         data8 0                         // io_submit
829         data8 0                         // io_cancel
830         data8 0                         // epoll_create
831         data8 0                         // epoll_ctl
832         data8 0                         // epoll_wait           // 1245
833         data8 0                         // restart_syscall
834         data8 0                         // semtimedop
835         data8 0                         // timer_create
836         data8 0                         // timer_settime
837         data8 0                         // timer_gettime        // 1250
838         data8 0                         // timer_getoverrun
839         data8 0                         // timer_delete
840         data8 0                         // clock_settime
841         data8 0                         // clock_gettime
842         data8 0                         // clock_getres         // 1255
843         data8 0                         // clock_nanosleep
844         data8 0                         // fstatfs64
845         data8 0                         // statfs64
846         data8 0
847         data8 0                                                 // 1260
848         data8 0
849         data8 0                         // mq_open
850         data8 0                         // mq_unlink
851         data8 0                         // mq_timedsend
852         data8 0                         // mq_timedreceive      // 1265
853         data8 0                         // mq_notify
854         data8 0                         // mq_getsetattr
855         data8 0                         // kexec_load
856         data8 0
857         data8 0                                                 // 1270
858         data8 0
859         data8 0
860         data8 0
861         data8 0
862         data8 0                                                 // 1275
863         data8 0
864         data8 0
865         data8 0
866         data8 0
867
868         .org fsyscall_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls