ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / ia64 / kernel / fsys.S
1 /*
2  * This file contains the light-weight system call handlers (fsyscall-handlers).
3  *
4  * Copyright (C) 2003 Hewlett-Packard Co
5  *      David Mosberger-Tang <davidm@hpl.hp.com>
6  *
7  * 25-Sep-03 davidm     Implement fsys_rt_sigprocmask().
8  * 18-Feb-03 louisk     Implement fsys_gettimeofday().
9  * 28-Feb-03 davidm     Fixed several bugs in fsys_gettimeofday().  Tuned it some more,
10  *                      probably broke it along the way... ;-)
11  */
12
13 #include <asm/asmmacro.h>
14 #include <asm/errno.h>
15 #include <asm/offsets.h>
16 #include <asm/percpu.h>
17 #include <asm/thread_info.h>
18 #include <asm/sal.h>
19 #include <asm/signal.h>
20 #include <asm/system.h>
21 #include <asm/unistd.h>
22
23 #include "entry.h"
24
25 /*
26  * See Documentation/ia64/fsys.txt for details on fsyscalls.
27  *
28  * On entry to an fsyscall handler:
29  *   r10        = 0 (i.e., defaults to "successful syscall return")
30  *   r11        = saved ar.pfs (a user-level value)
31  *   r15        = system call number
32  *   r16        = "current" task pointer (in normal kernel-mode, this is in r13)
33  *   r32-r39    = system call arguments
34  *   b6         = return address (a user-level value)
35  *   ar.pfs     = previous frame-state (a user-level value)
36  *   PSR.be     = cleared to zero (i.e., little-endian byte order is in effect)
37  *   all other registers may contain values passed in from user-mode
38  *
39  * On return from an fsyscall handler:
40  *   r11        = saved ar.pfs (as passed into the fsyscall handler)
41  *   r15        = system call number (as passed into the fsyscall handler)
42  *   r32-r39    = system call arguments (as passed into the fsyscall handler)
43  *   b6         = return address (as passed into the fsyscall handler)
44  *   ar.pfs     = previous frame-state (as passed into the fsyscall handler)
45  */
46
47 ENTRY(fsys_ni_syscall)
48         .prologue
49         .altrp b6
50         .body
51         mov r8=ENOSYS
52         mov r10=-1
53         FSYS_RETURN
54 END(fsys_ni_syscall)
55
56 ENTRY(fsys_getpid)
57         .prologue
58         .altrp b6
59         .body
60         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
61         ;;
62         ld4 r9=[r9]
63         add r8=IA64_TASK_TGID_OFFSET,r16
64         ;;
65         and r9=TIF_ALLWORK_MASK,r9
66         ld4 r8=[r8]                             // r8 = current->tgid
67         ;;
68         cmp.ne p8,p0=0,r9
69 (p8)    br.spnt.many fsys_fallback_syscall
70         FSYS_RETURN
71 END(fsys_getpid)
72
73 ENTRY(fsys_getppid)
74         .prologue
75         .altrp b6
76         .body
77         add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
78         ;;
79         ld8 r17=[r17]                           // r17 = current->group_leader
80         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
81         ;;
82
83         ld4 r9=[r9]
84         add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
85         ;;
86         and r9=TIF_ALLWORK_MASK,r9
87
88 1:      ld8 r18=[r17]                           // r18 = current->group_leader->real_parent
89         ;;
90         cmp.ne p8,p0=0,r9
91         add r8=IA64_TASK_TGID_OFFSET,r18        // r8 = &current->group_leader->real_parent->tgid
92         ;;
93
94         /*
95          * The .acq is needed to ensure that the read of tgid has returned its data before
96          * we re-check "real_parent".
97          */
98         ld4.acq r8=[r8]                         // r8 = current->group_leader->real_parent->tgid
99 #ifdef CONFIG_SMP
100         /*
101          * Re-read current->group_leader->real_parent.
102          */
103         ld8 r19=[r17]                           // r19 = current->group_leader->real_parent
104 (p8)    br.spnt.many fsys_fallback_syscall
105         ;;
106         cmp.ne p6,p0=r18,r19                    // did real_parent change?
107         mov r19=0                       // i must not leak kernel bits...
108 (p6)    br.cond.spnt.few 1b                     // yes -> redo the read of tgid and the check
109         ;;
110         mov r17=0                       // i must not leak kernel bits...
111         mov r18=0                       // i must not leak kernel bits...
112 #else
113         mov r17=0                       // i must not leak kernel bits...
114         mov r18=0                       // i must not leak kernel bits...
115         mov r19=0                       // i must not leak kernel bits...
116 #endif
117         FSYS_RETURN
118 END(fsys_getppid)
119
120 ENTRY(fsys_set_tid_address)
121         .prologue
122         .altrp b6
123         .body
124         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
125         ;;
126         ld4 r9=[r9]
127         tnat.z p6,p7=r32                // check argument register for being NaT
128         ;;
129         and r9=TIF_ALLWORK_MASK,r9
130         add r8=IA64_TASK_PID_OFFSET,r16
131         add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
132         ;;
133         ld4 r8=[r8]
134         cmp.ne p8,p0=0,r9
135         mov r17=-1
136         ;;
137 (p6)    st8 [r18]=r32
138 (p7)    st8 [r18]=r17
139 (p8)    br.spnt.many fsys_fallback_syscall
140         ;;
141         mov r17=0                       // i must not leak kernel bits...
142         mov r18=0                       // i must not leak kernel bits...
143         FSYS_RETURN
144 END(fsys_set_tid_address)
145
146 /*
147  * Note 1: This routine uses floating-point registers, but only with registers that
148  *         operate on integers.  Because of that, we don't need to set ar.fpsr to the
149  *         kernel default value.
150  *
151  * Note 2: For now, we will assume that all CPUs run at the same clock-frequency.
152  *         If that wasn't the case, we would have to disable preemption (e.g.,
153  *         by disabling interrupts) between reading the ITC and reading
154  *         local_cpu_data->nsec_per_cyc.
155  *
156  * Note 3: On platforms where the ITC-drift bit is set in the SAL feature vector,
157  *         we ought to either skip the ITC-based interpolation or run an ntp-like
158  *         daemon to keep the ITCs from drifting too far apart.
159  */
160
161 ENTRY(fsys_gettimeofday)
162         .prologue
163         .altrp b6
164         .body
165         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
166         addl r3=THIS_CPU(cpu_info),r0
167
168         mov.m r31=ar.itc                // put time stamp into r31 (ITC) == now         (35 cyc)
169 #ifdef CONFIG_SMP
170         movl r10=__per_cpu_offset
171         movl r2=sal_platform_features
172         ;;
173
174         ld8 r2=[r2]
175         movl r19=xtime                  // xtime is a timespec struct
176
177         ld8 r10=[r10]                   // r10 <- __per_cpu_offset[0]
178         addl r21=THIS_CPU(cpu_info),r0
179         ;;
180         add r10=r21, r10                // r10 <- &cpu_data(time_keeper_id)
181         tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
182 (p8)    br.spnt.many fsys_fallback_syscall
183 #else
184         ;;
185         mov r10=r3
186         movl r19=xtime                  // xtime is a timespec struct
187 #endif
188         ld4 r9=[r9]
189         movl r17=xtime_lock
190         ;;
191
192         // r32, r33 should contain the 2 args of gettimeofday
193         adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
194         mov r2=-1
195         tnat.nz p6,p7=r32               // guard against NaT args
196         ;;
197
198         adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
199 (p7)    tnat.nz p6,p0=r33
200 (p6)    br.cond.spnt.few .fail_einval
201
202         adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
203         movl r24=2361183241434822607    // for division hack (only for / 1000)
204         ;;
205
206         ldf8 f7=[r10]                   // f7 now contains itm_delta
207         setf.sig f11=r2
208         adds r10=8, r32
209
210         adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19      // r20 = &xtime->tv_nsec
211         movl r26=jiffies
212
213         setf.sig f9=r24                 // f9 is used for division hack
214         movl r27=wall_jiffies
215
216         and r9=TIF_ALLWORK_MASK,r9
217         movl r25=last_nsec_offset
218         ;;
219
220         /*
221          * Verify that we have permission to write to struct timeval.  Note:
222          * Another thread might unmap the mapping before we actually get
223          * to store the result.  That's OK as long as the stores are also
224          * protect by EX().
225          */
226 EX(.fail_efault, probe.w.fault r32, 3)          // this must come _after_ NaT-check
227 EX(.fail_efault, probe.w.fault r10, 3)          // this must come _after_ NaT-check
228         nop 0
229
230         ldf8 f10=[r8]                   // f10 <- local_cpu_data->nsec_per_cyc value
231         cmp.ne p8, p0=0, r9
232 (p8)    br.spnt.many fsys_fallback_syscall
233         ;;
234 .retry: // *** seq = read_seqbegin(&xtime_lock); ***
235         ld4.acq r23=[r17]               // since &xtime_lock == &xtime_lock->sequence
236         ld8 r14=[r25]                   // r14 (old) = last_nsec_offset
237
238         ld8 r28=[r26]                   // r28 = jiffies
239         ld8 r29=[r27]                   // r29 = wall_jiffies
240         ;;
241
242         ldf8 f8=[r21]                   // f8 now contains itm_next
243         sub r28=r29, r28, 1             // r28 now contains "-(lost + 1)"
244         tbit.nz p9, p10=r23, 0          // p9 <- is_odd(r23), p10 <- is_even(r23)
245         ;;
246
247         ld8 r2=[r19]                    // r2 = sec = xtime.tv_sec
248         ld8 r29=[r20]                   // r29 = nsec = xtime.tv_nsec
249
250         setf.sig f6=r28                 // f6 <- -(lost + 1)                            (6 cyc)
251         ;;
252
253         mf
254         xma.l f8=f6, f7, f8     // f8 (last_tick) <- -(lost + 1)*itm_delta + itm_next   (5 cyc)
255         nop 0
256
257         setf.sig f12=r31                // f12 <- ITC                                   (6 cyc)
258         // *** if (unlikely(read_seqretry(&xtime_lock, seq))) continue; ***
259         ld4 r24=[r17]                   // r24 = xtime_lock->sequence (re-read)
260         nop 0
261         ;;
262
263         mov r31=ar.itc                  // re-read ITC in case we .retry                (35 cyc)
264         xma.l f8=f11, f8, f12   // f8 (elapsed_cycles) <- (-1*last_tick + now) = (now - last_tick)
265         nop 0
266         ;;
267
268         getf.sig r18=f8                 // r18 <- (now - last_tick)
269         xmpy.l f8=f8, f10               // f8 <- elapsed_cycles*nsec_per_cyc (5 cyc)
270         add r3=r29, r14                 // r3 = (nsec + old)
271         ;;
272
273         cmp.lt p7, p8=r18, r0           // if now < last_tick, set p7 = 1, p8 = 0
274         getf.sig r18=f8                 // r18 = elapsed_cycles*nsec_per_cyc            (6 cyc)
275         nop 0
276         ;;
277
278 (p10)   cmp.ne p9, p0=r23, r24          // if xtime_lock->sequence != seq, set p9
279         shr.u r18=r18, IA64_NSEC_PER_CYC_SHIFT  // r18 <- offset
280 (p9)    br.spnt.many .retry
281         ;;
282
283         mov ar.ccv=r14                  // ar.ccv = old                                 (1 cyc)
284         cmp.leu p7, p8=r18, r14         // if (offset <= old), set p7 = 1, p8 = 0
285         ;;
286
287 (p8)    cmpxchg8.rel r24=[r25], r18, ar.ccv     // compare-and-exchange (atomic!)
288 (p8)    add r3=r29, r18                 // r3 = (nsec + offset)
289         ;;
290         shr.u r3=r3, 3                  // initiate dividing r3 by 1000
291         ;;
292         setf.sig f8=r3                  //                                              (6 cyc)
293         mov r10=1000000                 // r10 = 1000000
294         ;;
295 (p8)    cmp.ne.unc p9, p0=r24, r14
296         xmpy.hu f6=f8, f9               //                                              (5 cyc)
297 (p9)    br.spnt.many .retry
298         ;;
299
300         getf.sig r3=f6                  //                                              (6 cyc)
301         ;;
302         shr.u r3=r3, 4                  // end of division, r3 is divided by 1000 (=usec)
303         ;;
304
305 1:      cmp.geu p7, p0=r3, r10          // while (usec >= 1000000)
306         ;;
307 (p7)    sub r3=r3, r10                  // usec -= 1000000
308 (p7)    adds r2=1, r2                   // ++sec
309 (p7)    br.spnt.many 1b
310
311         // finally: r2 = sec, r3 = usec
312 EX(.fail_efault, st8 [r32]=r2)
313         adds r9=8, r32
314         mov r8=r0                       // success
315         ;;
316 EX(.fail_efault, st8 [r9]=r3)           // store them in the timeval struct
317         mov r10=0
318         FSYS_RETURN
319         /*
320          * Note: We are NOT clearing the scratch registers here.  Since the only things
321          *       in those registers are time-related variables and some addresses (which
322          *       can be obtained from System.map), none of this should be security-sensitive
323          *       and we should be fine.
324          */
325
326 .fail_einval:
327         mov r8=EINVAL                   // r8 = EINVAL
328         mov r10=-1                      // r10 = -1
329         FSYS_RETURN
330
331 .fail_efault:
332         mov r8=EFAULT                   // r8 = EFAULT
333         mov r10=-1                      // r10 = -1
334         FSYS_RETURN
335 END(fsys_gettimeofday)
336
337 /*
338  * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
339  */
340 #if _NSIG_WORDS != 1
341 # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
342 #endif
343 ENTRY(fsys_rt_sigprocmask)
344         .prologue
345         .altrp b6
346         .body
347
348         mf                                      // ensure reading of current->blocked is ordered
349         add r2=IA64_TASK_BLOCKED_OFFSET,r16
350         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
351         ;;
352         /*
353          * Since we're only reading a single word, we can do it
354          * atomically without acquiring current->sighand->siglock.  To
355          * be on the safe side, we need a fully-ordered load, though:
356          */
357         ld8.acq r3=[r2]                         // read/prefetch current->blocked
358         ld4 r9=[r9]
359         add r31=IA64_TASK_SIGHAND_OFFSET,r16
360         ;;
361 #ifdef CONFIG_SMP
362         ld8 r31=[r31]                           // r31 <- current->sighand
363 #endif
364         and r9=TIF_ALLWORK_MASK,r9
365         tnat.nz p6,p0=r32
366         ;;
367         cmp.ne p7,p0=0,r9
368         tnat.nz.or p6,p0=r35
369         tnat.nz p8,p0=r34
370         ;;
371         cmp.ne p15,p0=r0,r34                    // oset != NULL?
372         cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
373         tnat.nz.or p8,p0=r33
374
375 (p6)    br.spnt.few .fail_einval                // fail with EINVAL
376 (p7)    br.spnt.many fsys_fallback_syscall      // got pending kernel work...
377 (p8)    br.spnt.few .fail_efault                // fail with EFAULT
378         ;;
379
380         cmp.eq p6,p7=r0,r33                     // set == NULL?
381         add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
382 (p6)    br.dpnt.many .store_mask                // -> short-circuit to just reading the signal mask
383
384         /* Argh, we actually have to do some work and _update_ the signal mask: */
385
386 EX(.fail_efault, probe.r.fault r33, 3)          // verify user has read-access to *set
387 EX(.fail_efault, ld8 r14=[r33])                 // r14 <- *set
388         mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
389         ;;
390
391         rsm psr.i                               // mask interrupt delivery
392         mov ar.ccv=0
393         andcm r14=r14,r17                       // filter out SIGKILL & SIGSTOP
394
395 #ifdef CONFIG_SMP
396         mov r17=1
397         ;;
398         cmpxchg4.acq r18=[r31],r17,ar.ccv       // try to acquire the lock
399         mov r8=EINVAL                   // default to EINVAL
400         ;;
401         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
402         cmp4.ne p6,p0=r18,r0
403 (p6)    br.cond.spnt.many .lock_contention
404         ;;
405 #else
406         ld8 r3=[r2]                     // re-read current->blocked now that we hold the lock
407         mov r8=EINVAL                   // default to EINVAL
408 #endif
409         add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
410         add r19=IA64_TASK_SIGNAL_OFFSET,r16
411         cmp4.eq p6,p0=SIG_BLOCK,r32
412         ;;
413         ld8 r19=[r19]                   // r19 <- current->signal
414         cmp4.eq p7,p0=SIG_UNBLOCK,r32
415         cmp4.eq p8,p0=SIG_SETMASK,r32
416         ;;
417         ld8 r18=[r18]                   // r18 <- current->pending.signal
418         .pred.rel.mutex p6,p7,p8
419 (p6)    or r14=r3,r14                   // SIG_BLOCK
420 (p7)    andcm r14=r3,r14                // SIG_UNBLOCK
421
422 (p8)    mov r14=r14                     // SIG_SETMASK
423 (p6)    mov r8=0                        // clear error code
424         // recalc_sigpending()
425         add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
426
427         add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
428         ;;
429         ld4 r17=[r17]           // r17 <- current->signal->group_stop_count
430 (p7)    mov r8=0                // clear error code
431
432         ld8 r19=[r19]           // r19 <- current->signal->shared_pending
433         ;;
434         cmp4.gt p6,p7=r17,r0    // p6/p7 <- (current->signal->group_stop_count > 0)?
435 (p8)    mov r8=0                // clear error code
436
437         or r18=r18,r19          // r18 <- current->pending | current->signal->shared_pending
438         ;;
439         // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
440         andcm r18=r18,r14
441         add r9=TI_FLAGS+IA64_TASK_SIZE,r16
442         ;;
443
444 (p7)    cmp.ne.or.andcm p6,p7=r18,r0            // p6/p7 <- signal pending
445         mov r19=0                                       // i must not leak kernel bits...
446 (p6)    br.cond.dpnt.many .sig_pending
447         ;;
448
449 1:      ld4 r17=[r9]                            // r17 <- current->thread_info->flags
450         ;;
451         mov ar.ccv=r17
452         and r18=~_TIF_SIGPENDING,r17            // r18 <- r17 & ~(1 << TIF_SIGPENDING)
453         ;;
454
455         st8 [r2]=r14                            // update current->blocked with new mask
456         cmpxchg4.acq r14=[r9],r18,ar.ccv        // current->thread_info->flags <- r18
457         ;;
458         cmp.ne p6,p0=r17,r14                    // update failed?
459 (p6)    br.cond.spnt.few 1b                     // yes -> retry
460
461 #ifdef CONFIG_SMP
462         st4.rel [r31]=r0                        // release the lock
463 #endif
464         ssm psr.i
465         cmp.ne p9,p0=r8,r0                      // check for bad HOW value
466         ;;
467
468         srlz.d                                  // ensure psr.i is set again
469         mov r18=0                                       // i must not leak kernel bits...
470 (p9)    br.spnt.few .fail_einval                // bail out for bad HOW value
471
472 .store_mask:
473 EX(.fail_efault, (p15) probe.w.fault r34, 3)    // verify user has write-access to *oset
474 EX(.fail_efault, (p15) st8 [r34]=r3)
475         mov r2=0                                        // i must not leak kernel bits...
476         mov r3=0                                        // i must not leak kernel bits...
477         mov r8=0                                // return 0
478         mov r9=0                                        // i must not leak kernel bits...
479         mov r14=0                                       // i must not leak kernel bits...
480         mov r17=0                                       // i must not leak kernel bits...
481         mov r31=0                                       // i must not leak kernel bits...
482         FSYS_RETURN
483
484 .sig_pending:
485 #ifdef CONFIG_SMP
486         st4.rel [r31]=r0                        // release the lock
487 #endif
488         ssm psr.i
489         ;;
490         srlz.d
491         br.sptk.many fsys_fallback_syscall      // with signal pending, do the heavy-weight syscall
492
493 #ifdef CONFIG_SMP
494 .lock_contention:
495         /* Rather than spinning here, fall back on doing a heavy-weight syscall.  */
496         ssm psr.i
497         ;;
498         srlz.d
499         br.sptk.many fsys_fallback_syscall
500 #endif
501 END(fsys_rt_sigprocmask)
502
503 ENTRY(fsys_fallback_syscall)
504         .prologue
505         .altrp b6
506         .body
507         /*
508          * We only get here from light-weight syscall handlers.  Thus, we already
509          * know that r15 contains a valid syscall number.  No need to re-check.
510          */
511         adds r17=-1024,r15
512         movl r14=sys_call_table
513         ;;
514         shladd r18=r17,3,r14
515         ;;
516         ld8 r18=[r18]                           // load normal (heavy-weight) syscall entry-point
517         mov r29=psr                             // read psr (12 cyc load latency)
518         mov r27=ar.rsc
519         mov r21=ar.fpsr
520         mov r26=ar.pfs
521 END(fsys_fallback_syscall)
522         /* FALL THROUGH */
523 GLOBAL_ENTRY(fsys_bubble_down)
524         .prologue
525         .altrp b6
526         .body
527         /*
528          * We get here for syscalls that don't have a lightweight handler.  For those, we
529          * need to bubble down into the kernel and that requires setting up a minimal
530          * pt_regs structure, and initializing the CPU state more or less as if an
531          * interruption had occurred.  To make syscall-restarts work, we setup pt_regs
532          * such that cr_iip points to the second instruction in syscall_via_break.
533          * Decrementing the IP hence will restart the syscall via break and not
534          * decrementing IP will return us to the caller, as usual.  Note that we preserve
535          * the value of psr.pp rather than initializing it from dcr.pp.  This makes it
536          * possible to distinguish fsyscall execution from other privileged execution.
537          *
538          * On entry:
539          *      - normal fsyscall handler register usage, except that we also have:
540          *      - r18: address of syscall entry point
541          *      - r21: ar.fpsr
542          *      - r26: ar.pfs
543          *      - r27: ar.rsc
544          *      - r29: psr
545          */
546 #       define PSR_PRESERVED_BITS       (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
547                                          | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
548                                          | IA64_PSR_IC)
549         /*
550          * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc.  The rest we have
551          * to synthesize.
552          */
553 #       define PSR_ONE_BITS             ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
554                                          | IA64_PSR_BN)
555
556         invala
557         movl r8=PSR_ONE_BITS
558
559         mov r25=ar.unat                 // save ar.unat (5 cyc)
560         movl r9=PSR_PRESERVED_BITS
561
562         mov ar.rsc=0                    // set enforced lazy mode, pl 0, little-endian, loadrs=0
563         movl r28=__kernel_syscall_via_break
564         ;;
565         mov r23=ar.bspstore             // save ar.bspstore (12 cyc)
566         mov r31=pr                      // save pr (2 cyc)
567         mov r20=r1                      // save caller's gp in r20
568         ;;
569         mov r2=r16                      // copy current task addr to addl-addressable register
570         and r9=r9,r29
571         mov r19=b6                      // save b6 (2 cyc)
572         ;;
573         mov psr.l=r9                    // slam the door (17 cyc to srlz.i)
574         or r29=r8,r29                   // construct cr.ipsr value to save
575         addl r22=IA64_RBS_OFFSET,r2     // compute base of RBS
576         ;;
577         mov.m r24=ar.rnat               // read ar.rnat (5 cyc lat)
578         lfetch.fault.excl.nt1 [r22]
579         adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
580
581         // ensure previous insn group is issued before we stall for srlz.i:
582         ;;
583         srlz.i                          // ensure new psr.l has been established
584         /////////////////////////////////////////////////////////////////////////////
585         ////////// from this point on, execution is not interruptible anymore
586         /////////////////////////////////////////////////////////////////////////////
587         addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2    // compute base of memory stack
588         cmp.ne pKStk,pUStk=r0,r0        // set pKStk <- 0, pUStk <- 1
589         ;;
590         st1 [r16]=r0                    // clear current->thread.on_ustack flag
591         mov ar.bspstore=r22             // switch to kernel RBS
592         mov b6=r18                      // copy syscall entry-point to b6 (7 cyc)
593         add r3=TI_FLAGS+IA64_TASK_SIZE,r2
594         ;;
595         ld4 r3=[r3]                             // r2 = current_thread_info()->flags
596         mov r18=ar.bsp                  // save (kernel) ar.bsp (12 cyc)
597         mov ar.rsc=0x3                  // set eager mode, pl 0, little-endian, loadrs=0
598         br.call.sptk.many b7=ia64_syscall_setup
599         ;;
600         ssm psr.i
601         movl r2=ia64_ret_from_syscall
602         ;;
603         mov rp=r2                               // set the real return addr
604         tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
605
606 (p8)    br.call.sptk.many b6=b6                 // ignore this return addr
607         br.cond.sptk ia64_trace_syscall
608 END(fsys_bubble_down)
609
610         .rodata
611         .align 8
612         .globl fsyscall_table
613
614         data8 fsys_bubble_down
615 fsyscall_table:
616         data8 fsys_ni_syscall
617         data8 0                         // exit                 // 1025
618         data8 0                         // read
619         data8 0                         // write
620         data8 0                         // open
621         data8 0                         // close
622         data8 0                         // creat                // 1030
623         data8 0                         // link
624         data8 0                         // unlink
625         data8 0                         // execve
626         data8 0                         // chdir
627         data8 0                         // fchdir               // 1035
628         data8 0                         // utimes
629         data8 0                         // mknod
630         data8 0                         // chmod
631         data8 0                         // chown
632         data8 0                         // lseek                // 1040
633         data8 fsys_getpid               // getpid
634         data8 fsys_getppid              // getppid
635         data8 0                         // mount
636         data8 0                         // umount
637         data8 0                         // setuid               // 1045
638         data8 0                         // getuid
639         data8 0                         // geteuid
640         data8 0                         // ptrace
641         data8 0                         // access
642         data8 0                         // sync                 // 1050
643         data8 0                         // fsync
644         data8 0                         // fdatasync
645         data8 0                         // kill
646         data8 0                         // rename
647         data8 0                         // mkdir                // 1055
648         data8 0                         // rmdir
649         data8 0                         // dup
650         data8 0                         // pipe
651         data8 0                         // times
652         data8 0                         // brk                  // 1060
653         data8 0                         // setgid
654         data8 0                         // getgid
655         data8 0                         // getegid
656         data8 0                         // acct
657         data8 0                         // ioctl                // 1065
658         data8 0                         // fcntl
659         data8 0                         // umask
660         data8 0                         // chroot
661         data8 0                         // ustat
662         data8 0                         // dup2                 // 1070
663         data8 0                         // setreuid
664         data8 0                         // setregid
665         data8 0                         // getresuid
666         data8 0                         // setresuid
667         data8 0                         // getresgid            // 1075
668         data8 0                         // setresgid
669         data8 0                         // getgroups
670         data8 0                         // setgroups
671         data8 0                         // getpgid
672         data8 0                         // setpgid              // 1080
673         data8 0                         // setsid
674         data8 0                         // getsid
675         data8 0                         // sethostname
676         data8 0                         // setrlimit
677         data8 0                         // getrlimit            // 1085
678         data8 0                         // getrusage
679         data8 fsys_gettimeofday         // gettimeofday
680         data8 0                         // settimeofday
681         data8 0                         // select
682         data8 0                         // poll                 // 1090
683         data8 0                         // symlink
684         data8 0                         // readlink
685         data8 0                         // uselib
686         data8 0                         // swapon
687         data8 0                         // swapoff              // 1095
688         data8 0                         // reboot
689         data8 0                         // truncate
690         data8 0                         // ftruncate
691         data8 0                         // fchmod
692         data8 0                         // fchown               // 1100
693         data8 0                         // getpriority
694         data8 0                         // setpriority
695         data8 0                         // statfs
696         data8 0                         // fstatfs
697         data8 0                         // gettid               // 1105
698         data8 0                         // semget
699         data8 0                         // semop
700         data8 0                         // semctl
701         data8 0                         // msgget
702         data8 0                         // msgsnd               // 1110
703         data8 0                         // msgrcv
704         data8 0                         // msgctl
705         data8 0                         // shmget
706         data8 0                         // shmat
707         data8 0                         // shmdt                // 1115
708         data8 0                         // shmctl
709         data8 0                         // syslog
710         data8 0                         // setitimer
711         data8 0                         // getitimer
712         data8 0                                                 // 1120
713         data8 0
714         data8 0
715         data8 0                         // vhangup
716         data8 0                         // lchown
717         data8 0                         // remap_file_pages     // 1125
718         data8 0                         // wait4
719         data8 0                         // sysinfo
720         data8 0                         // clone
721         data8 0                         // setdomainname
722         data8 0                         // newuname             // 1130
723         data8 0                         // adjtimex
724         data8 0
725         data8 0                         // init_module
726         data8 0                         // delete_module
727         data8 0                                                 // 1135
728         data8 0
729         data8 0                         // quotactl
730         data8 0                         // bdflush
731         data8 0                         // sysfs
732         data8 0                         // personality          // 1140
733         data8 0                         // afs_syscall
734         data8 0                         // setfsuid
735         data8 0                         // setfsgid
736         data8 0                         // getdents
737         data8 0                         // flock                // 1145
738         data8 0                         // readv
739         data8 0                         // writev
740         data8 0                         // pread64
741         data8 0                         // pwrite64
742         data8 0                         // sysctl               // 1150
743         data8 0                         // mmap
744         data8 0                         // munmap
745         data8 0                         // mlock
746         data8 0                         // mlockall
747         data8 0                         // mprotect             // 1155
748         data8 0                         // mremap
749         data8 0                         // msync
750         data8 0                         // munlock
751         data8 0                         // munlockall
752         data8 0                         // sched_getparam       // 1160
753         data8 0                         // sched_setparam
754         data8 0                         // sched_getscheduler
755         data8 0                         // sched_setscheduler
756         data8 0                         // sched_yield
757         data8 0                         // sched_get_priority_max       // 1165
758         data8 0                         // sched_get_priority_min
759         data8 0                         // sched_rr_get_interval
760         data8 0                         // nanosleep
761         data8 0                         // nfsservctl
762         data8 0                         // prctl                // 1170
763         data8 0                         // getpagesize
764         data8 0                         // mmap2
765         data8 0                         // pciconfig_read
766         data8 0                         // pciconfig_write
767         data8 0                         // perfmonctl           // 1175
768         data8 0                         // sigaltstack
769         data8 0                         // rt_sigaction
770         data8 0                         // rt_sigpending
771         data8 fsys_rt_sigprocmask       // rt_sigprocmask
772         data8 0                         // rt_sigqueueinfo      // 1180
773         data8 0                         // rt_sigreturn
774         data8 0                         // rt_sigsuspend
775         data8 0                         // rt_sigtimedwait
776         data8 0                         // getcwd
777         data8 0                         // capget               // 1185
778         data8 0                         // capset
779         data8 0                         // sendfile
780         data8 0
781         data8 0
782         data8 0                         // socket               // 1190
783         data8 0                         // bind
784         data8 0                         // connect
785         data8 0                         // listen
786         data8 0                         // accept
787         data8 0                         // getsockname          // 1195
788         data8 0                         // getpeername
789         data8 0                         // socketpair
790         data8 0                         // send
791         data8 0                         // sendto
792         data8 0                         // recv                 // 1200
793         data8 0                         // recvfrom
794         data8 0                         // shutdown
795         data8 0                         // setsockopt
796         data8 0                         // getsockopt
797         data8 0                         // sendmsg              // 1205
798         data8 0                         // recvmsg
799         data8 0                         // pivot_root
800         data8 0                         // mincore
801         data8 0                         // madvise
802         data8 0                         // newstat              // 1210
803         data8 0                         // newlstat
804         data8 0                         // newfstat
805         data8 0                         // clone2
806         data8 0                         // getdents64
807         data8 0                         // getunwind            // 1215
808         data8 0                         // readahead
809         data8 0                         // setxattr
810         data8 0                         // lsetxattr
811         data8 0                         // fsetxattr
812         data8 0                         // getxattr             // 1220
813         data8 0                         // lgetxattr
814         data8 0                         // fgetxattr
815         data8 0                         // listxattr
816         data8 0                         // llistxattr
817         data8 0                         // flistxattr           // 1225
818         data8 0                         // removexattr
819         data8 0                         // lremovexattr
820         data8 0                         // fremovexattr
821         data8 0                         // tkill
822         data8 0                         // futex                // 1230
823         data8 0                         // sched_setaffinity
824         data8 0                         // sched_getaffinity
825         data8 fsys_set_tid_address      // set_tid_address
826         data8 0                         // fadvise64_64
827         data8 0                         // tgkill               // 1235
828         data8 0                         // exit_group
829         data8 0                         // lookup_dcookie
830         data8 0                         // io_setup
831         data8 0                         // io_destroy
832         data8 0                         // io_getevents         // 1240
833         data8 0                         // io_submit
834         data8 0                         // io_cancel
835         data8 0                         // epoll_create
836         data8 0                         // epoll_ctl
837         data8 0                         // epoll_wait           // 1245
838         data8 0                         // restart_syscall
839         data8 0                         // semtimedop
840         data8 0                         // timer_create
841         data8 0                         // timer_settime
842         data8 0                         // timer_gettime        // 1250
843         data8 0                         // timer_getoverrun
844         data8 0                         // timer_delete
845         data8 0                         // clock_settime
846         data8 0                         // clock_gettime
847         data8 0                         // clock_getres         // 1255
848         data8 0                         // clock_nanosleep
849         data8 0                         // fstatfs64
850         data8 0                         // statfs64
851         data8 0
852         data8 0                                                 // 1260
853         data8 0
854         data8 0                         // mq_open
855         data8 0                         // mq_unlink
856         data8 0                         // mq_timedsend
857         data8 0                         // mq_timedreceive      // 1265
858         data8 0                         // mq_notify
859         data8 0                         // mq_getsetattr
860         data8 0
861         data8 0
862         data8 0                                                 // 1270
863         data8 0
864         data8 0
865         data8 0
866         data8 0
867         data8 0                                                 // 1275
868         data8 0
869         data8 0
870         data8 0
871         data8 0
872
873         .org fsyscall_table + 8*NR_syscalls     // guard against failures to increase NR_syscalls