- shr.u r8=r8,5 // we flush 32 bytes per iteration
- .save ar.lc, r3
- mov r3=ar.lc // save ar.lc
+ shr.u r23=in0,r20 // start / (stride size)
+ shr.u r22=r22,r20 // (last byte address) / (stride size)
+ shl r21=r21,r20 // r21: stride size of the i-cache(s)
+ ;;
+ sub r8=r22,r23 // number of strides - 1
+ shl r24=r23,r20 // r24: addresses for "fc.i" =
+ // "start" rounded down to stride boundary
+ .save ar.lc,r3
+ mov r3=ar.lc // save ar.lc