- add r9=TI_FLAGS+IA64_TASK_SIZE,r16
- addl r3=THIS_CPU(cpu_info),r0
-
- mov.m r31=ar.itc // put time stamp into r31 (ITC) == now (35 cyc)
-#ifdef CONFIG_SMP
- movl r10=__per_cpu_offset
- movl r2=sal_platform_features
- ;;
-
- ld8 r2=[r2]
- movl r19=xtime // xtime is a timespec struct
-
- ld8 r10=[r10] // r10 <- __per_cpu_offset[0]
- addl r21=THIS_CPU(cpu_info),r0
- ;;
- add r10=r21, r10 // r10 <- &cpu_data(time_keeper_id)
- tbit.nz p8,p0 = r2, IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT
-(p8) br.spnt.many fsys_fallback_syscall
-#else
- ;;
- mov r10=r3
- movl r19=xtime // xtime is a timespec struct
-#endif
- ld4 r9=[r9]
- movl r17=xtime_lock
- ;;
-
- // r32, r33 should contain the 2 args of gettimeofday
- adds r21=IA64_CPUINFO_ITM_NEXT_OFFSET, r10
- mov r2=-1
- tnat.nz p6,p7=r32 // guard against NaT args
- ;;
-
- adds r10=IA64_CPUINFO_ITM_DELTA_OFFSET, r10
-(p7) tnat.nz p6,p0=r33
-(p6) br.cond.spnt.few .fail_einval
-
- adds r8=IA64_CPUINFO_NSEC_PER_CYC_OFFSET, r3
- movl r24=2361183241434822607 // for division hack (only for / 1000)
- ;;
-
- ldf8 f7=[r10] // f7 now contains itm_delta
- setf.sig f11=r2
- adds r10=8, r32
-
- adds r20=IA64_TIMESPEC_TV_NSEC_OFFSET, r19 // r20 = &xtime->tv_nsec
- movl r26=jiffies
-
- setf.sig f9=r24 // f9 is used for division hack
- movl r27=wall_jiffies
-
- and r9=TIF_ALLWORK_MASK,r9
- movl r25=last_nsec_offset
- ;;
-
- /*
- * Verify that we have permission to write to struct timeval. Note:
- * Another thread might unmap the mapping before we actually get
- * to store the result. That's OK as long as the stores are also
- * protect by EX().
- */
-EX(.fail_efault, probe.w.fault r32, 3) // this must come _after_ NaT-check
-EX(.fail_efault, probe.w.fault r10, 3) // this must come _after_ NaT-check
- nop 0
-
- ldf8 f10=[r8] // f10 <- local_cpu_data->nsec_per_cyc value
- cmp.ne p8, p0=0, r9
-(p8) br.spnt.many fsys_fallback_syscall
- ;;
-.retry: // *** seq = read_seqbegin(&xtime_lock); ***
- ld4.acq r23=[r17] // since &xtime_lock == &xtime_lock->sequence
- ld8 r14=[r25] // r14 (old) = last_nsec_offset
-
- ld8 r28=[r26] // r28 = jiffies
- ld8 r29=[r27] // r29 = wall_jiffies
- ;;
-
- ldf8 f8=[r21] // f8 now contains itm_next
- sub r28=r29, r28, 1 // r28 now contains "-(lost + 1)"
- tbit.nz p9, p10=r23, 0 // p9 <- is_odd(r23), p10 <- is_even(r23)
- ;;
-
- ld8 r2=[r19] // r2 = sec = xtime.tv_sec
- ld8 r29=[r20] // r29 = nsec = xtime.tv_nsec
-
- setf.sig f6=r28 // f6 <- -(lost + 1) (6 cyc)
- ;;
-
+ mov r31 = r32
+ tnat.nz p6,p0 = r33 // guard against NaT argument
+(p6) br.cond.spnt.few .fail_einval
+ mov r30 = CLOCK_DIVIDE_BY_1000
+ ;;
+.gettime:
+ // Register map
+ // Incoming r31 = pointer to address where to place result
+ // r30 = flags determining how time is processed
+ // r2,r3 = temp r4-r7 preserved
+ // r8 = result nanoseconds
+ // r9 = result seconds
+ // r10 = temporary storage for clock difference
+ // r11 = preserved: saved ar.pfs
+ // r12 = preserved: memory stack
+ // r13 = preserved: thread pointer
+ // r14 = debug pointer / usable
+ // r15 = preserved: system call number
+ // r16 = preserved: current task pointer
+ // r17 = wall to monotonic use
+ // r18 = time_interpolator->offset
+ // r19 = address of wall_to_monotonic
+ // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
+ // r21 = shift factor
+ // r22 = address of time interpolator->last_counter
+ // r23 = address of time_interpolator->last_cycle
+ // r24 = adress of time_interpolator->offset
+ // r25 = last_cycle value
+ // r26 = last_counter value
+ // r27 = pointer to xtime
+ // r28 = sequence number at the beginning of critcal section
+ // r29 = address of seqlock
+ // r30 = time processing flags / memory address
+ // r31 = pointer to result
+ // Predicates
+ // p6,p7 short term use
+ // p8 = timesource ar.itc
+ // p9 = timesource mmio64
+ // p10 = timesource mmio32
+ // p11 = timesource not to be handled by asm code
+ // p12 = memory time source ( = p9 | p10)
+ // p13 = do cmpxchg with time_interpolator_last_cycle
+ // p14 = Divide by 1000
+ // p15 = Add monotonic
+ //
+ // Note that instructions are optimized for McKinley. McKinley can process two
+ // bundles simultaneously and therefore we continuously try to feed the CPU
+ // two bundles and then a stop.
+ tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
+ mov pr = r30,0xc000 // Set predicates according to function
+ add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
+ movl r20 = time_interpolator
+ ;;
+ ld8 r20 = [r20] // get pointer to time_interpolator structure
+ movl r29 = xtime_lock
+ ld4 r2 = [r2] // process work pending flags
+ movl r27 = xtime
+ ;; // only one bundle here
+ ld8 r21 = [r20] // first quad with control information
+ and r2 = TIF_ALLWORK_MASK,r2
+(p6) br.cond.spnt.few .fail_einval // deferred branch
+ ;;
+ add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
+ extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
+ extr r8 = r21,0,16 // time_interpolator->source
+ nop.i 123
+ cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
+(p6) br.cond.spnt.many fsys_fallback_syscall
+ ;;
+ cmp.eq p8,p12 = 0,r8 // Check for cpu timer
+ cmp.eq p9,p0 = 1,r8 // MMIO64 ?
+ extr r2 = r21,24,8 // time_interpolator->jitter
+ cmp.eq p10,p0 = 2,r8 // MMIO32 ?
+ cmp.ltu p11,p0 = 2,r8 // function or other clock
+(p11) br.cond.spnt.many fsys_fallback_syscall
+ ;;
+ setf.sig f7 = r3 // Setup for scaling of counter
+(p15) movl r19 = wall_to_monotonic
+(p12) ld8 r30 = [r10]
+ cmp.ne p13,p0 = r2,r0 // need jitter compensation?
+ extr r21 = r21,16,8 // shift factor
+ ;;
+.time_redo:
+ .pred.rel.mutex p8,p9,p10
+ ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
+(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
+ add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
+(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
+(p10) ld4 r2 = [r30] // readw(ti->address)
+(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
+ ;; // could be removed by moving the last add upward
+ ld8 r26 = [r22] // time_interpolator->last_counter
+(p13) ld8 r25 = [r23] // time interpolator->last_cycle
+ add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
+(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
+ ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
+ nop.i 123
+ ;;
+ ld8 r18 = [r24] // time_interpolator->offset
+ ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
+(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
+ ;;
+(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
+ sub r10 = r2,r26 // current_counter - last_counter
+ ;;
+(p6) sub r10 = r25,r26 // time we got was less than last_cycle
+(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
+ ;;
+ setf.sig f8 = r10
+ nop.i 123
+ ;;
+(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
+EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
+ xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
+(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
+ ;;
+(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
+(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
+ // simulate tbit.nz.or p7,p0 = r28,0
+ and r28 = ~1,r28 // Make sequence even to force retry if odd
+ getf.sig r2 = f8