VServer 1.9.2 (patch-2.6.8.1-vs1.9.2.diff)
[linux-2.6.git] / arch / sparc64 / lib / VIScopy.S
1 /* $Id: VIScopy.S,v 1.27 2002/02/09 19:49:30 davem Exp $
2  * VIScopy.S: High speed copy operations utilizing the UltraSparc
3  *            Visual Instruction Set.
4  *
5  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
7  */
8
9 #include "VIS.h"
10
11         /* VIS code can be used for numerous copy/set operation variants.
12          * It can be made to work in the kernel, one single instance,
13          * for all of memcpy, copy_to_user, and copy_from_user by setting
14          * the ASI src/dest globals correctly.  Furthermore it can
15          * be used for kernel-->kernel page copies as well, a hook label
16          * is put in here just for this purpose.
17          *
18          * For userland, compiling this without __KERNEL__ defined makes
19          * it work just fine as a generic libc bcopy and memcpy.
20          * If for userland it is compiled with a 32bit gcc (but you need
21          * -Wa,-Av9a for as), the code will just rely on lower 32bits of
22          * IEU registers, if you compile it with 64bit gcc (ie. define
23          * __sparc_v9__), the code will use full 64bit.
24          */
25          
26 #ifdef __KERNEL__
27
28 #include <asm/visasm.h>
29 #include <asm/thread_info.h>
30
31 #define FPU_CLEAN_RETL                                  \
32         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
33         VISExit                                         \
34         clr             %o0;                            \
35         retl;                                           \
36          wr             %o1, %g0, %asi;
37 #define FPU_RETL                                        \
38         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
39         VISExit                                         \
40         clr             %o0;                            \
41         retl;                                           \
42          wr             %o1, %g0, %asi;
43 #define NORMAL_RETL                                     \
44         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
45         clr             %o0;                            \
46         retl;                                           \
47          wr             %o1, %g0, %asi;
48 #define EX(x,y,a,b)                             \
49 98:     x,y;                                    \
50         .section .fixup;                        \
51         .align  4;                              \
52 99:     ba      VIScopyfixup_ret;               \
53          a, b, %o1;                             \
54         .section __ex_table;                    \
55         .align  4;                              \
56         .word   98b, 99b;                       \
57         .text;                                  \
58         .align  4;
59 #define EX2(x,y,c,d,e,a,b)                      \
60 98:     x,y;                                    \
61         .section .fixup;                        \
62         .align  4;                              \
63 99:     c, d, e;                                \
64         ba      VIScopyfixup_ret;               \
65          a, b, %o1;                             \
66         .section __ex_table;                    \
67         .align  4;                              \
68         .word   98b, 99b;                       \
69         .text;                                  \
70         .align  4;
71 #define EXO2(x,y)                               \
72 98:     x,y;                                    \
73         .section __ex_table;                    \
74         .align  4;                              \
75         .word   98b, VIScopyfixup_reto2;        \
76         .text;                                  \
77         .align  4;
78 #define EXVISN(x,y,n)                           \
79 98:     x,y;                                    \
80         .section __ex_table;                    \
81         .align  4;                              \
82         .word   98b, VIScopyfixup_vis##n;       \
83         .text;                                  \
84         .align  4;
85 #define EXT(start,end,handler)                  \
86         .section __ex_table;                    \
87         .align  4;                              \
88         .word   start, 0, end, handler;         \
89         .text;                                  \
90         .align  4;
91 #else
92 #ifdef REGS_64BIT
93 #define FPU_CLEAN_RETL                          \
94         retl;                                   \
95          mov    %g6, %o0;
96 #define FPU_RETL                                \
97         retl;                                   \
98          mov    %g6, %o0;
99 #else
100 #define FPU_CLEAN_RETL                          \
101         wr      %g0, FPRS_FEF, %fprs;           \
102         retl;                                   \
103          mov    %g6, %o0;
104 #define FPU_RETL                                \
105         wr      %g0, FPRS_FEF, %fprs;           \
106         retl;                                   \
107          mov    %g6, %o0;
108 #endif
109 #define NORMAL_RETL     \
110         retl;           \
111          mov    %g6, %o0;
112 #define EX(x,y,a,b)             x,y
113 #define EX2(x,y,c,d,e,a,b)      x,y
114 #define EXO2(x,y)               x,y
115 #define EXVISN(x,y,n)           x,y
116 #define EXT(a,b,c)
117 #endif
118 #define EXVIS(x,y) EXVISN(x,y,0)
119 #define EXVIS1(x,y) EXVISN(x,y,1)
120 #define EXVIS2(x,y) EXVISN(x,y,2)
121 #define EXVIS3(x,y) EXVISN(x,y,3)
122 #define EXVIS4(x,y) EXVISN(x,y,4)
123
124 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
125         faligndata              %f1, %f2, %f48;                 \
126         faligndata              %f2, %f3, %f50;                 \
127         faligndata              %f3, %f4, %f52;                 \
128         faligndata              %f4, %f5, %f54;                 \
129         faligndata              %f5, %f6, %f56;                 \
130         faligndata              %f6, %f7, %f58;                 \
131         faligndata              %f7, %f8, %f60;                 \
132         faligndata              %f8, %f9, %f62;
133
134 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
135         EXVIS(LDBLK             [%src] ASIBLK, %fdest);         \
136         ASI_SETDST_BLK                                          \
137         EXVIS(STBLK             %fsrc, [%dest] ASIBLK);         \
138         add                     %src, 0x40, %src;               \
139         subcc                   %len, 0x40, %len;               \
140         be,pn                   %xcc, jmptgt;                   \
141          add                    %dest, 0x40, %dest;             \
142         ASI_SETSRC_BLK
143
144 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
145         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
146 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
147         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
148 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
149         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
150
151 #define STORE_SYNC(dest, fsrc)                                  \
152         EXVIS(STBLK             %fsrc, [%dest] ASIBLK);         \
153         add                     %dest, 0x40, %dest;
154
155 #ifdef __KERNEL__
156 #define STORE_JUMP(dest, fsrc, target)                          \
157         srl                     asi_dest, 3, %g5;               \
158         EXVIS2(STBLK            %fsrc, [%dest] ASIBLK);         \
159         xor                    asi_dest, ASI_BLK_XOR1, asi_dest;\
160         add                     %dest, 0x40, %dest;             \
161         xor                     asi_dest, %g5, asi_dest;        \
162         ba,pt                   %xcc, target;
163 #else
164 #define STORE_JUMP(dest, fsrc, target)                          \
165         EXVIS2(STBLK            %fsrc, [%dest] ASIBLK);         \
166         add                     %dest, 0x40, %dest;             \
167         ba,pt                   %xcc, target;
168 #endif
169
170 #ifndef __KERNEL__
171 #define VISLOOP_PAD nop; nop; nop; nop; \
172                     nop; nop; nop; nop; \
173                     nop; nop; nop; nop; \
174                     nop; nop; nop;
175 #else
176 #define VISLOOP_PAD
177 #endif
178
179 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
180         ASI_SETDST_NOBLK                                        \
181         subcc                   %left, 8, %left;                \
182         bl,pn                   %xcc, vis_out;                  \
183          faligndata             %f0, %f1, %f48;                 \
184         EXVIS3(STDF             %f48, [%dest] ASINORMAL);       \
185         add                     %dest, 8, %dest;
186
187 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
188         subcc                   %left, 8, %left;                \
189         bl,pn                   %xcc, vis_out;                  \
190          fsrc1                  %f0, %f1;
191 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
192         UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
193         ba,a,pt                 %xcc, vis_out_slk;
194
195         /* Macros for non-VIS memcpy code. */
196 #ifdef REGS_64BIT
197
198 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
199         ASI_SETSRC_NOBLK                                                \
200         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
201         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
202         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
203         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
204         ASI_SETDST_NOBLK                                                \
205         STW                     %t0, [%dst + offset + 0x04] ASINORMAL;  \
206         srlx                    %t0, 32, %t0;                           \
207         STW                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
208         STW                     %t1, [%dst + offset + 0x0c] ASINORMAL;  \
209         srlx                    %t1, 32, %t1;                           \
210         STW                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
211         STW                     %t2, [%dst + offset + 0x14] ASINORMAL;  \
212         srlx                    %t2, 32, %t2;                           \
213         STW                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
214         STW                     %t3, [%dst + offset + 0x1c] ASINORMAL;  \
215         srlx                    %t3, 32, %t3;                           \
216         STW                     %t3, [%dst + offset + 0x18] ASINORMAL;
217
218 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)            \
219         ASI_SETSRC_NOBLK                                                \
220         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
221         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
222         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
223         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
224         ASI_SETDST_NOBLK                                                \
225         STX                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
226         STX                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
227         STX                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
228         STX                     %t3, [%dst + offset + 0x18] ASINORMAL;  \
229         ASI_SETSRC_NOBLK                                                \
230         LDX                     [%src + offset + 0x20] ASINORMAL, %t0;  \
231         LDX                     [%src + offset + 0x28] ASINORMAL, %t1;  \
232         LDX                     [%src + offset + 0x30] ASINORMAL, %t2;  \
233         LDX                     [%src + offset + 0x38] ASINORMAL, %t3;  \
234         ASI_SETDST_NOBLK                                                \
235         STX                     %t0, [%dst + offset + 0x20] ASINORMAL;  \
236         STX                     %t1, [%dst + offset + 0x28] ASINORMAL;  \
237         STX                     %t2, [%dst + offset + 0x30] ASINORMAL;  \
238         STX                     %t3, [%dst + offset + 0x38] ASINORMAL;
239
240 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
241         ASI_SETSRC_NOBLK                                                \
242         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
243         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
244         ASI_SETDST_NOBLK                                                \
245         STW                     %t0, [%dst - offset - 0x0c] ASINORMAL;  \
246         srlx                    %t0, 32, %t2;                           \
247         STW                     %t2, [%dst - offset - 0x10] ASINORMAL;  \
248         STW                     %t1, [%dst - offset - 0x04] ASINORMAL;  \
249         srlx                    %t1, 32, %t3;                           \
250         STW                     %t3, [%dst - offset - 0x08] ASINORMAL;
251
252 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)                   \
253         ASI_SETSRC_NOBLK                                                \
254         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
255         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
256         ASI_SETDST_NOBLK                                                \
257         STX                     %t0, [%dst - offset - 0x10] ASINORMAL;  \
258         STX                     %t1, [%dst - offset - 0x08] ASINORMAL;
259
260 #else /* !REGS_64BIT */
261
262 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
263         lduw                    [%src + offset + 0x00], %t0;            \
264         lduw                    [%src + offset + 0x04], %t1;            \
265         lduw                    [%src + offset + 0x08], %t2;            \
266         lduw                    [%src + offset + 0x0c], %t3;            \
267         stw                     %t0, [%dst + offset + 0x00];            \
268         stw                     %t1, [%dst + offset + 0x04];            \
269         stw                     %t2, [%dst + offset + 0x08];            \
270         stw                     %t3, [%dst + offset + 0x0c];            \
271         lduw                    [%src + offset + 0x10], %t0;            \
272         lduw                    [%src + offset + 0x14], %t1;            \
273         lduw                    [%src + offset + 0x18], %t2;            \
274         lduw                    [%src + offset + 0x1c], %t3;            \
275         stw                     %t0, [%dst + offset + 0x10];            \
276         stw                     %t1, [%dst + offset + 0x14];            \
277         stw                     %t2, [%dst + offset + 0x18];            \
278         stw                     %t3, [%dst + offset + 0x1c];
279
280 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
281         lduw                    [%src - offset - 0x10], %t0;            \
282         lduw                    [%src - offset - 0x0c], %t1;            \
283         lduw                    [%src - offset - 0x08], %t2;            \
284         lduw                    [%src - offset - 0x04], %t3;            \
285         stw                     %t0, [%dst - offset - 0x10];            \
286         stw                     %t1, [%dst - offset - 0x0c];            \
287         stw                     %t2, [%dst - offset - 0x08];            \
288         stw                     %t3, [%dst - offset - 0x04];
289
290 #endif /* !REGS_64BIT */
291
292 #ifdef __KERNEL__
293                 .section        __ex_table,#alloc
294                 .section        .fixup,#alloc,#execinstr
295 #endif
296
297                 .text
298                 .align                  32
299                 .globl                  memcpy
300                 .type                   memcpy,@function
301
302                 .globl                  bcopy
303                 .type                   bcopy,@function
304
305 #ifdef __KERNEL__
306 memcpy_private:
307 memcpy:         mov             ASI_P, asi_src                  ! IEU0  Group
308                 brnz,pt         %o2, __memcpy_entry             ! CTI
309                  mov            ASI_P, asi_dest                 ! IEU1
310                 retl
311                  clr            %o0
312
313                 .align                  32
314                 .globl                  __copy_from_user
315                 .type                   __copy_from_user,@function
316 __copy_from_user:rd             %asi, asi_src                   ! IEU0  Group
317                 brnz,pt         %o2, __memcpy_entry             ! CTI
318                  mov            ASI_P, asi_dest                 ! IEU1
319
320                 .globl                  __copy_to_user
321                 .type                   __copy_to_user,@function
322 __copy_to_user: mov             ASI_P, asi_src                  ! IEU0  Group
323                 brnz,pt         %o2, __memcpy_entry             ! CTI
324                  rd             %asi, asi_dest                  ! IEU1
325                 retl                                            ! CTI   Group
326                  clr            %o0                             ! IEU0  Group
327
328                 .globl                  __copy_in_user
329                 .type                   __copy_in_user,@function
330 __copy_in_user: rd              %asi, asi_src                   ! IEU0  Group
331                 brnz,pt         %o2, __memcpy_entry             ! CTI
332                  mov            asi_src, asi_dest               ! IEU1
333                 retl                                            ! CTI   Group
334                  clr            %o0                             ! IEU0  Group
335 #endif
336
337 bcopy:          or              %o0, 0, %g3                     ! IEU0  Group
338                 addcc           %o1, 0, %o0                     ! IEU1
339                 brgez,pt        %o2, memcpy_private             ! CTI
340                  or             %g3, 0, %o1                     ! IEU0  Group
341                 retl                                            ! CTI   Group brk forced
342                  clr            %o0                             ! IEU0
343
344
345 #ifdef __KERNEL__
346 #define BRANCH_ALWAYS   0x10680000
347 #define NOP             0x01000000
348 #define ULTRA3_DO_PATCH(OLD, NEW)       \
349         sethi   %hi(NEW), %g1; \
350         or      %g1, %lo(NEW), %g1; \
351         sethi   %hi(OLD), %g2; \
352         or      %g2, %lo(OLD), %g2; \
353         sub     %g1, %g2, %g1; \
354         sethi   %hi(BRANCH_ALWAYS), %g3; \
355         srl     %g1, 2, %g1; \
356         or      %g3, %lo(BRANCH_ALWAYS), %g3; \
357         or      %g3, %g1, %g3; \
358         stw     %g3, [%g2]; \
359         sethi   %hi(NOP), %g3; \
360         or      %g3, %lo(NOP), %g3; \
361         stw     %g3, [%g2 + 0x4]; \
362         flush   %g2;
363
364         .globl  cheetah_patch_copyops
365 cheetah_patch_copyops:
366         ULTRA3_DO_PATCH(memcpy, U3memcpy)
367         ULTRA3_DO_PATCH(__copy_from_user, U3copy_from_user)
368         ULTRA3_DO_PATCH(__copy_to_user, U3copy_to_user)
369         ULTRA3_DO_PATCH(__copy_in_user, U3copy_in_user)
370         retl
371          nop
372 #undef BRANCH_ALWAYS
373 #undef NOP
374 #undef ULTRA3_DO_PATCH
375 #endif /* __KERNEL__ */
376
377         .align                  32
378 #ifdef __KERNEL__
379         andcc                   %o0, 7, %g2                     ! IEU1  Group
380 #endif
381 VIS_enter:
382         be,pt                   %xcc, dest_is_8byte_aligned     ! CTI
383 #ifdef __KERNEL__
384          nop                                                    ! IEU0  Group
385 #else
386          andcc                  %o0, 0x38, %g5                  ! IEU1  Group
387 #endif
388 do_dest_8byte_align:
389         mov                     8, %g1                          ! IEU0
390         sub                     %g1, %g2, %g2                   ! IEU0  Group
391         andcc                   %o0, 1, %g0                     ! IEU1
392         be,pt                   %icc, 2f                        ! CTI
393          sub                    %o2, %g2, %o2                   ! IEU0  Group
394 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
395         EX(LDUB                 [%o1] ASINORMAL, %o5, 
396                                 add %o2, %g2)                   ! Load  Group
397         add                     %o1, 1, %o1                     ! IEU0
398         add                     %o0, 1, %o0                     ! IEU1
399         ASI_SETDST_NOBLK                                        ! LSU   Group
400         subcc                   %g2, 1, %g2                     ! IEU1  Group
401         be,pn                   %xcc, 3f                        ! CTI
402          EX2(STB                %o5, [%o0 - 1] ASINORMAL,
403                                 add %g2, 1, %g2,
404                                 add %o2, %g2)                   ! Store
405 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
406         EX(LDUB                 [%o1] ASINORMAL, %o5, 
407                                 add %o2, %g2)                   ! Load  Group
408         add                     %o0, 2, %o0                     ! IEU0
409         EX2(LDUB                [%o1 + 1] ASINORMAL, %g3,
410                                 sub %o0, 2, %o0,
411                                 add %o2, %g2)                   ! Load  Group
412         ASI_SETDST_NOBLK                                        ! LSU   Group
413         subcc                   %g2, 2, %g2                     ! IEU1  Group
414         EX2(STB                 %o5, [%o0 - 2] ASINORMAL,
415                                 add %g2, 2, %g2,
416                                 add %o2, %g2)                   ! Store
417         add                     %o1, 2, %o1                     ! IEU0
418         bne,pt                  %xcc, 2b                        ! CTI   Group
419          EX2(STB                %g3, [%o0 - 1] ASINORMAL,
420                                 add %g2, 1, %g2,
421                                 add %o2, %g2)                   ! Store
422 #ifdef __KERNEL__
423 3:
424 dest_is_8byte_aligned:
425         VISEntry
426         andcc                   %o0, 0x38, %g5                  ! IEU1  Group
427 #else
428 3:      andcc                   %o0, 0x38, %g5                  ! IEU1  Group
429 dest_is_8byte_aligned:
430 #endif
431         be,pt                   %icc, dest_is_64byte_aligned    ! CTI
432          mov                    64, %g1                         ! IEU0
433         fmovd                   %f0, %f2                        ! FPU
434         sub                     %g1, %g5, %g5                   ! IEU0  Group
435         ASI_SETSRC_NOBLK                                        ! LSU   Group
436         alignaddr               %o1, %g0, %g1                   ! GRU   Group
437         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
438         sub                     %o2, %g5, %o2                   ! IEU0
439 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
440                                 add %o2, %g5)                   ! Load  Group
441         add                     %g1, 0x8, %g1                   ! IEU0  Group
442         subcc                   %g5, 8, %g5                     ! IEU1
443         ASI_SETDST_NOBLK                                        ! LSU   Group
444         faligndata              %f4, %f6, %f0                   ! GRU   Group
445         EX2(STDF                %f0, [%o0] ASINORMAL,
446                                 add %g5, 8, %g5,
447                                 add %o2, %g5)                   ! Store
448         add                     %o1, 8, %o1                     ! IEU0  Group
449         be,pn                   %xcc, dest_is_64byte_aligned    ! CTI
450          add                    %o0, 8, %o0                     ! IEU1
451         ASI_SETSRC_NOBLK                                        ! LSU   Group
452         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
453                                 add %o2, %g5)                   ! Load  Group
454         add                     %g1, 8, %g1                     ! IEU0
455         subcc                   %g5, 8, %g5                     ! IEU1
456         ASI_SETDST_NOBLK                                        ! LSU   Group
457         faligndata              %f6, %f4, %f0                   ! GRU   Group
458         EX2(STDF                %f0, [%o0] ASINORMAL,
459                                 add %g5, 8, %g5,
460                                 add %o2, %g5)                   ! Store
461         add                     %o1, 8, %o1                     ! IEU0
462         ASI_SETSRC_NOBLK                                        ! LSU   Group
463         bne,pt                  %xcc, 1b                        ! CTI   Group
464          add                    %o0, 8, %o0                     ! IEU0
465 dest_is_64byte_aligned:
466         membar            #LoadStore | #StoreStore | #StoreLoad ! LSU   Group
467 #ifndef __KERNEL__
468         wr                      %g0, ASI_BLK_P, %asi            ! LSU   Group
469 #endif
470         subcc                   %o2, 0x40, %g7                  ! IEU1  Group
471         mov                     %o1, %g1                        ! IEU0
472         andncc                  %g7, (0x40 - 1), %g7            ! IEU1  Group
473         srl                     %g1, 3, %g2                     ! IEU0
474         sub                     %o2, %g7, %g3                   ! IEU0  Group
475         andn                    %o1, (0x40 - 1), %o1            ! IEU1
476         and                     %g2, 7, %g2                     ! IEU0  Group
477         andncc                  %g3, 0x7, %g3                   ! IEU1
478         fmovd                   %f0, %f2                        ! FPU
479         sub                     %g3, 0x10, %g3                  ! IEU0  Group
480         sub                     %o2, %g7, %o2                   ! IEU1
481 #ifdef __KERNEL__
482         or                      asi_src, ASI_BLK_OR, asi_src    ! IEU0  Group
483         or                      asi_dest, ASI_BLK_OR, asi_dest  ! IEU1
484 #endif
485         alignaddr               %g1, %g0, %g0                   ! GRU   Group
486         add                     %g1, %g7, %g1                   ! IEU0  Group
487         subcc                   %o2, %g3, %o2                   ! IEU1
488         ASI_SETSRC_BLK                                          ! LSU   Group
489         EXVIS1(LDBLK            [%o1 + 0x00] ASIBLK, %f0)       ! LSU   Group
490         add                     %g1, %g3, %g1                   ! IEU0
491         EXVIS1(LDBLK            [%o1 + 0x40] ASIBLK, %f16)      ! LSU   Group
492         sub                     %g7, 0x80, %g7                  ! IEU0
493         EXVIS(LDBLK             [%o1 + 0x80] ASIBLK, %f32)      ! LSU   Group
494 #ifdef __KERNEL__
495 vispc:  sll                     %g2, 9, %g2                     ! IEU0  Group
496         sethi                   %hi(vis00), %g5                 ! IEU1
497         or                      %g5, %lo(vis00), %g5            ! IEU0  Group
498         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
499          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
500 #else
501                                                                 ! Clk1  Group 8-(
502                                                                 ! Clk2  Group 8-(
503                                                                 ! Clk3  Group 8-(
504                                                                 ! Clk4  Group 8-(
505 vispc:  rd                      %pc, %g5                        ! PDU   Group 8-(
506         addcc                   %g5, %lo(vis00 - vispc), %g5    ! IEU1  Group
507         sll                     %g2, 9, %g2                     ! IEU0
508         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
509          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
510 #endif
511         .align                  512             /* OK, here comes the fun part... */
512 vis00:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g7, vis01)
513       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g7, vis02)
514       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  LOOP_CHUNK3(o1, o0, g7, vis03)
515       b,pt                      %xcc, vis00+4; faligndata %f0, %f2, %f48
516 vis01:FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync
517       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_JUMP(o0, f48, finish_f0) membar #Sync
518 vis02:FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_SYNC(o0, f48) membar #Sync
519       FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, finish_f16) membar #Sync
520 vis03:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync
521       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, finish_f32) membar #Sync
522       VISLOOP_PAD
523 vis10:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g7, vis11)
524       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g7, vis12)
525       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  LOOP_CHUNK3(o1, o0, g7, vis13)
526       b,pt                      %xcc, vis10+4; faligndata %f2, %f4, %f48
527 vis11:FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync
528       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_JUMP(o0, f48, finish_f2) membar #Sync
529 vis12:FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_SYNC(o0, f48) membar #Sync
530       FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, finish_f18) membar #Sync
531 vis13:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync
532       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, finish_f34) membar #Sync
533       VISLOOP_PAD
534 vis20:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g7, vis21)
535       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g7, vis22)
536       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  LOOP_CHUNK3(o1, o0, g7, vis23)
537       b,pt                      %xcc, vis20+4; faligndata %f4, %f6, %f48
538 vis21:FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync
539       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_JUMP(o0, f48, finish_f4) membar #Sync
540 vis22:FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_SYNC(o0, f48) membar #Sync
541       FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, finish_f20) membar #Sync
542 vis23:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync
543       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, finish_f36) membar #Sync
544       VISLOOP_PAD
545 vis30:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g7, vis31)
546       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g7, vis32)
547       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  LOOP_CHUNK3(o1, o0, g7, vis33)
548       b,pt                      %xcc, vis30+4; faligndata %f6, %f8, %f48
549 vis31:FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync
550       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_JUMP(o0, f48, finish_f6) membar #Sync
551 vis32:FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_SYNC(o0, f48) membar #Sync
552       FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, finish_f22) membar #Sync
553 vis33:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync
554       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, finish_f38) membar #Sync
555       VISLOOP_PAD
556 vis40:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g7, vis41)
557       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g7, vis42)
558       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  LOOP_CHUNK3(o1, o0, g7, vis43)
559       b,pt                      %xcc, vis40+4; faligndata %f8, %f10, %f48
560 vis41:FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync
561       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_JUMP(o0, f48, finish_f8) membar #Sync
562 vis42:FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_SYNC(o0, f48) membar #Sync
563       FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, finish_f24) membar #Sync
564 vis43:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync
565       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, finish_f40) membar #Sync
566       VISLOOP_PAD
567 vis50:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g7, vis51)
568       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g7, vis52)
569       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g7, vis53)
570       b,pt                      %xcc, vis50+4; faligndata %f10, %f12, %f48
571 vis51:FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync
572       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, finish_f10) membar #Sync
573 vis52:FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync
574       FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, finish_f26) membar #Sync
575 vis53:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync
576       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, finish_f42) membar #Sync
577       VISLOOP_PAD
578 vis60:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g7, vis61)
579       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g7, vis62)
580       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g7, vis63)
581       b,pt                      %xcc, vis60+4; faligndata %f12, %f14, %f48
582 vis61:FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync
583       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, finish_f12) membar #Sync
584 vis62:FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync
585       FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, finish_f28) membar #Sync
586 vis63:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync
587       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, finish_f44) membar #Sync
588       VISLOOP_PAD
589 vis70:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g7, vis71)
590       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g7, vis72)
591       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g7, vis73)
592       b,pt                      %xcc, vis70+4; faligndata %f14, %f16, %f48
593 vis71:FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync
594       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, finish_f14) membar #Sync
595 vis72:FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync
596       FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, finish_f30) membar #Sync
597 vis73:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync
598       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, finish_f46) membar #Sync
599       VISLOOP_PAD
600 finish_f0:      FINISH_VISCHUNK(o0, f0,  f2,  g3)
601 finish_f2:      FINISH_VISCHUNK(o0, f2,  f4,  g3)
602 finish_f4:      FINISH_VISCHUNK(o0, f4,  f6,  g3)
603 finish_f6:      FINISH_VISCHUNK(o0, f6,  f8,  g3)
604 finish_f8:      FINISH_VISCHUNK(o0, f8,  f10, g3)
605 finish_f10:     FINISH_VISCHUNK(o0, f10, f12, g3)
606 finish_f12:     FINISH_VISCHUNK(o0, f12, f14, g3)
607 finish_f14:     UNEVEN_VISCHUNK(o0, f14, f0,  g3)
608 finish_f16:     FINISH_VISCHUNK(o0, f16, f18, g3)
609 finish_f18:     FINISH_VISCHUNK(o0, f18, f20, g3)
610 finish_f20:     FINISH_VISCHUNK(o0, f20, f22, g3)
611 finish_f22:     FINISH_VISCHUNK(o0, f22, f24, g3)
612 finish_f24:     FINISH_VISCHUNK(o0, f24, f26, g3)
613 finish_f26:     FINISH_VISCHUNK(o0, f26, f28, g3)
614 finish_f28:     FINISH_VISCHUNK(o0, f28, f30, g3)
615 finish_f30:     UNEVEN_VISCHUNK(o0, f30, f0,  g3)
616 finish_f32:     FINISH_VISCHUNK(o0, f32, f34, g3)
617 finish_f34:     FINISH_VISCHUNK(o0, f34, f36, g3)
618 finish_f36:     FINISH_VISCHUNK(o0, f36, f38, g3)
619 finish_f38:     FINISH_VISCHUNK(o0, f38, f40, g3)
620 finish_f40:     FINISH_VISCHUNK(o0, f40, f42, g3)
621 finish_f42:     FINISH_VISCHUNK(o0, f42, f44, g3)
622 finish_f44:     FINISH_VISCHUNK(o0, f44, f46, g3)
623 finish_f46:     UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
624 vis_out_slk:
625 #ifdef __KERNEL__
626         srl             asi_src, 3, %g5                         ! IEU0  Group
627         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
628         xor             asi_src, %g5, asi_src                   ! IEU0  Group
629 #endif
630 vis_slk:ASI_SETSRC_NOBLK                                        ! LSU   Group
631         EXVIS3(LDDF     [%o1] ASINORMAL, %f2)                   ! Load  Group
632         add             %o1, 8, %o1                             ! IEU0
633         subcc           %g3, 8, %g3                             ! IEU1
634         ASI_SETDST_NOBLK                                        ! LSU   Group
635         faligndata      %f0, %f2, %f8                           ! GRU   Group
636         EXVIS4(STDF     %f8, [%o0] ASINORMAL)                   ! Store
637         bl,pn           %xcc, vis_out_slp                       ! CTI
638          add            %o0, 8, %o0                             ! IEU0  Group
639         ASI_SETSRC_NOBLK                                        ! LSU   Group
640         EXVIS3(LDDF     [%o1] ASINORMAL, %f0)                   ! Load  Group
641         add             %o1, 8, %o1                             ! IEU0
642         subcc           %g3, 8, %g3                             ! IEU1
643         ASI_SETDST_NOBLK                                        ! LSU   Group
644         faligndata      %f2, %f0, %f8                           ! GRU   Group
645         EXVIS4(STDF     %f8, [%o0] ASINORMAL)                   ! Store
646         bge,pt          %xcc, vis_slk                           ! CTI
647          add            %o0, 8, %o0                             ! IEU0  Group
648 vis_out_slp:
649 #ifdef __KERNEL__
650         brz,pt          %o2, vis_ret                            ! CTI   Group
651          mov            %g1, %o1                                ! IEU0
652         ba,pt           %xcc, vis_slp+4                         ! CTI   Group
653          ASI_SETSRC_NOBLK                                       ! LSU   Group
654 #endif
655 vis_out:brz,pt          %o2, vis_ret                            ! CTI   Group
656          mov            %g1, %o1                                ! IEU0
657 #ifdef __KERNEL__
658         srl             asi_src, 3, %g5                         ! IEU0  Group
659         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
660         xor             asi_src, %g5, asi_src                   ! IEU0  Group
661 #endif
662 vis_slp:ASI_SETSRC_NOBLK                                        ! LSU   Group
663         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
664         add             %o1, 1, %o1                             ! IEU0
665         add             %o0, 1, %o0                             ! IEU1
666         ASI_SETDST_NOBLK                                        ! LSU   Group
667         subcc           %o2, 1, %o2                             ! IEU1
668         bne,pt          %xcc, vis_slp                           ! CTI
669          EX(STB         %g5, [%o0 - 1] ASINORMAL,
670                         add %o2, 1)                             ! Store Group
671 vis_ret:membar          #StoreLoad | #StoreStore                ! LSU   Group
672         FPU_CLEAN_RETL
673
674
675 __memcpy_short:
676         andcc           %o2, 1, %g0                             ! IEU1  Group
677         be,pt           %icc, 2f                                ! CTI
678 1:       ASI_SETSRC_NOBLK                                       ! LSU   Group
679         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
680         add             %o1, 1, %o1                             ! IEU0
681         add             %o0, 1, %o0                             ! IEU1
682         ASI_SETDST_NOBLK                                        ! LSU   Group
683         subcc           %o2, 1, %o2                             ! IEU1  Group
684         be,pn           %xcc, short_ret                         ! CTI
685          EX(STB         %g5, [%o0 - 1] ASINORMAL,
686                         add %o2, 1)                             ! Store
687 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
688         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
689         add             %o0, 2, %o0                             ! IEU0
690         EX2(LDUB        [%o1 + 1] ASINORMAL, %o5,
691                         sub %o0, 2, %o0,
692                         add %o2, %g0)                           ! LOAD  Group
693         add             %o1, 2, %o1                             ! IEU0
694         ASI_SETDST_NOBLK                                        ! LSU   Group
695         subcc           %o2, 2, %o2                             ! IEU1  Group
696         EX(STB          %g5, [%o0 - 2] ASINORMAL,
697                         add %o2, 2)                             ! Store
698         bne,pt          %xcc, 2b                                ! CTI
699          EX(STB         %o5, [%o0 - 1] ASINORMAL,
700                         add %o2, 1)                             ! Store
701 short_ret:
702         NORMAL_RETL
703
704 #ifndef __KERNEL__
705 memcpy_private:
706 memcpy:
707 #ifndef REGS_64BIT
708         srl             %o2, 0, %o2                             ! IEU1  Group
709 #endif  
710         brz,pn          %o2, short_ret                          ! CTI   Group
711          mov            %o0, %g6                                ! IEU0
712 #endif
713 __memcpy_entry:
714         cmp             %o2, 15                                 ! IEU1  Group
715         bleu,pn         %xcc, __memcpy_short                    ! CTI
716          cmp            %o2, (64 * 6)                           ! IEU1  Group
717         bgeu,pn         %xcc, VIS_enter                         ! CTI
718          andcc          %o0, 7, %g2                             ! IEU1  Group
719         sub             %o0, %o1, %g5                           ! IEU0
720         andcc           %g5, 3, %o5                             ! IEU1  Group
721         bne,pn          %xcc, memcpy_noVIS_misaligned           ! CTI
722          andcc          %o1, 3, %g0                             ! IEU1  Group
723 #ifdef REGS_64BIT
724         be,a,pt         %xcc, 3f                                ! CTI
725          andcc          %o1, 4, %g0                             ! IEU1  Group
726         andcc           %o1, 1, %g0                             ! IEU1  Group
727 #else /* !REGS_64BIT */
728         be,pt           %xcc, 5f                                ! CTI
729          andcc          %o1, 1, %g0                             ! IEU1  Group
730 #endif /* !REGS_64BIT */
731         be,pn           %xcc, 4f                                ! CTI
732          andcc          %o1, 2, %g0                             ! IEU1  Group
733         ASI_SETSRC_NOBLK                                        ! LSU   Group
734         EXO2(LDUB       [%o1] ASINORMAL, %g2)                   ! Load  Group
735         add             %o1, 1, %o1                             ! IEU0
736         add             %o0, 1, %o0                             ! IEU1
737         sub             %o2, 1, %o2                             ! IEU0  Group
738         ASI_SETDST_NOBLK                                        ! LSU   Group
739         bne,pn          %xcc, 5f                                ! CTI   Group
740          EX(STB         %g2, [%o0 - 1] ASINORMAL,
741                         add %o2, 1)                             ! Store
742 4:      ASI_SETSRC_NOBLK                                        ! LSU   Group
743         EXO2(LDUH       [%o1] ASINORMAL, %g2)                   ! Load  Group
744         add             %o1, 2, %o1                             ! IEU0
745         add             %o0, 2, %o0                             ! IEU1
746         ASI_SETDST_NOBLK                                        ! LSU   Group
747         sub             %o2, 2, %o2                             ! IEU0
748         EX(STH          %g2, [%o0 - 2] ASINORMAL,
749                         add %o2, 2)                             ! Store Group + bubble
750 #ifdef REGS_64BIT
751 5:      andcc           %o1, 4, %g0                             ! IEU1
752 3:      be,a,pn         %xcc, 2f                                ! CTI
753          andcc          %o2, -128, %g7                          ! IEU1  Group
754         ASI_SETSRC_NOBLK                                        ! LSU   Group
755         EXO2(LDUW       [%o1] ASINORMAL, %g5)                   ! Load  Group
756         add             %o1, 4, %o1                             ! IEU0
757         add             %o0, 4, %o0                             ! IEU1
758         ASI_SETDST_NOBLK                                        ! LSU   Group
759         sub             %o2, 4, %o2                             ! IEU0  Group
760         EX(STW          %g5, [%o0 - 4] ASINORMAL,
761                         add %o2, 4)                             ! Store
762         andcc           %o2, -128, %g7                          ! IEU1  Group
763 2:      be,pn           %xcc, 3f                                ! CTI
764          andcc          %o0, 4, %g0                             ! IEU1  Group
765         be,pn           %xcc, 82f + 4                           ! CTI   Group
766 #else /* !REGS_64BIT */
767 5:      andcc           %o2, -128, %g7                          ! IEU1
768         be,a,pn         %xcc, 41f                               ! CTI
769          andcc          %o2, 0x70, %g7                          ! IEU1  Group
770 #endif /* !REGS_64BIT */
771 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
772         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
773         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
774         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
775         EXT(5b,35f,VIScopyfixup1)
776 35:     subcc           %g7, 128, %g7                           ! IEU1  Group
777         add             %o1, 128, %o1                           ! IEU0
778         bne,pt          %xcc, 5b                                ! CTI
779          add            %o0, 128, %o0                           ! IEU0  Group
780 3:      andcc           %o2, 0x70, %g7                          ! IEU1  Group
781 41:     be,pn           %xcc, 80f                               ! CTI
782          andcc          %o2, 8, %g0                             ! IEU1  Group
783 #ifdef __KERNEL__
784 79:     sethi           %hi(80f), %o5                           ! IEU0
785         sll             %g7, 1, %g5                             ! IEU0  Group
786         add             %o1, %g7, %o1                           ! IEU1
787         srl             %g7, 1, %g2                             ! IEU0  Group
788         sub             %o5, %g5, %o5                           ! IEU1
789         sub             %o5, %g2, %o5                           ! IEU0  Group
790         jmpl            %o5 + %lo(80f), %g0                     ! CTI   Group brk forced
791          add            %o0, %g7, %o0                           ! IEU0  Group
792 #else
793                                                                 ! Clk1 8-(
794                                                                 ! Clk2 8-(
795                                                                 ! Clk3 8-(
796                                                                 ! Clk4 8-(
797 79:     rd              %pc, %o5                                ! PDU   Group
798         sll             %g7, 1, %g5                             ! IEU0  Group
799         add             %o1, %g7, %o1                           ! IEU1
800         sub             %o5, %g5, %o5                           ! IEU0  Group
801         jmpl            %o5 + %lo(80f - 79b), %g0               ! CTI   Group brk forced
802          add            %o0, %g7, %o0                           ! IEU0  Group
803 #endif
804 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
805         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
806         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
807         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
808         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
809         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
810         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
811         EXT(36b,80f,VIScopyfixup2)
812 80:     be,pt           %xcc, 81f                               ! CTI
813          andcc          %o2, 4, %g0                             ! IEU1
814 #ifdef REGS_64BIT
815         ASI_SETSRC_NOBLK                                        ! LSU   Group
816         EX(LDX          [%o1] ASINORMAL, %g2,
817                         and %o2, 0xf)                           ! Load  Group
818         add             %o0, 8, %o0                             ! IEU0
819         ASI_SETDST_NOBLK                                        ! LSU   Group
820         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
821                         and %o2, 0xf)                           ! Store Group
822         add             %o1, 8, %o1                             ! IEU1
823         srlx            %g2, 32, %g2                            ! IEU0  Group
824         EX2(STW         %g2, [%o0 - 0x8] ASINORMAL,
825                         and %o2, 0xf, %o2,
826                         sub %o2, 4)                             ! Store
827 #else /* !REGS_64BIT */
828         lduw            [%o1], %g2                              ! Load  Group
829         add             %o0, 8, %o0                             ! IEU0
830         lduw            [%o1 + 0x4], %g3                        ! Load  Group
831         add             %o1, 8, %o1                             ! IEU0
832         stw             %g2, [%o0 - 0x8]                        ! Store Group
833         stw             %g3, [%o0 - 0x4]                        ! Store Group
834 #endif /* !REGS_64BIT */
835 81:     be,pt           %xcc, 1f                                ! CTI
836          andcc          %o2, 2, %g0                             ! IEU1  Group
837         ASI_SETSRC_NOBLK                                        ! LSU   Group
838         EX(LDUW         [%o1] ASINORMAL, %g2,
839                         and %o2, 0x7)                           ! Load  Group
840         add             %o1, 4, %o1                             ! IEU0
841         ASI_SETDST_NOBLK                                        ! LSU   Group
842         EX(STW          %g2, [%o0] ASINORMAL,
843                         and %o2, 0x7)                           ! Store Group
844         add             %o0, 4, %o0                             ! IEU0
845 1:      be,pt           %xcc, 1f                                ! CTI
846          andcc          %o2, 1, %g0                             ! IEU1  Group
847         ASI_SETSRC_NOBLK                                        ! LSU   Group
848         EX(LDUH         [%o1] ASINORMAL, %g2,
849                         and %o2, 0x3)                           ! Load  Group
850         add             %o1, 2, %o1                             ! IEU0
851         ASI_SETDST_NOBLK                                        ! LSU   Group
852         EX(STH          %g2, [%o0] ASINORMAL,
853                         and %o2, 0x3)                           ! Store Group
854         add             %o0, 2, %o0                             ! IEU0
855 1:      be,pt           %xcc, normal_retl                       ! CTI
856          nop                                                    ! IEU1
857         ASI_SETSRC_NOBLK                                        ! LSU   Group
858         EX(LDUB         [%o1] ASINORMAL, %g2,
859                         add %g0, 1)                             ! Load  Group
860         ASI_SETDST_NOBLK                                        ! LSU   Group
861         EX(STB          %g2, [%o0] ASINORMAL,
862                         add %g0, 1)                             ! Store Group + bubble
863 normal_retl:
864         NORMAL_RETL
865
866 #ifdef REGS_64BIT
867 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
868         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
869         EXT(82b,37f,VIScopyfixup3)
870 37:     subcc           %g7, 128, %g7                           ! IEU1  Group
871         add             %o1, 128, %o1                           ! IEU0
872         bne,pt          %xcc, 82b                               ! CTI
873          add            %o0, 128, %o0                           ! IEU0  Group
874         andcc           %o2, 0x70, %g7                          ! IEU1
875         be,pn           %xcc, 84f                               ! CTI
876          andcc          %o2, 8, %g0                             ! IEU1  Group
877 #ifdef __KERNEL__
878 83:     srl             %g7, 1, %g5                             ! IEU0
879         sethi           %hi(84f), %o5                           ! IEU0  Group
880         add             %g7, %g5, %g5                           ! IEU1
881         add             %o1, %g7, %o1                           ! IEU0  Group
882         sub             %o5, %g5, %o5                           ! IEU1
883         jmpl            %o5 + %lo(84f), %g0                     ! CTI   Group brk forced
884          add            %o0, %g7, %o0                           ! IEU0  Group
885 #else
886                                                                 ! Clk1 8-(
887                                                                 ! Clk2 8-(
888                                                                 ! Clk3 8-(
889                                                                 ! Clk4 8-(
890 83:     rd              %pc, %o5                                ! PDU   Group
891         add             %o1, %g7, %o1                           ! IEU0  Group
892         sub             %o5, %g7, %o5                           ! IEU1
893         jmpl            %o5 + %lo(84f - 83b), %g0               ! CTI   Group brk forced
894          add            %o0, %g7, %o0                           ! IEU0  Group
895 #endif
896 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
897         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
898         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
899         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
900         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
901         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
902         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
903         EXT(38b,84f,VIScopyfixup4)
904 84:     be,pt           %xcc, 85f                               ! CTI   Group
905          andcc          %o2, 4, %g0                             ! IEU1
906         ASI_SETSRC_NOBLK                                        ! LSU   Group
907         EX(LDX          [%o1] ASINORMAL, %g2,
908                         and %o2, 0xf)                           ! Load  Group
909         add             %o0, 8, %o0                             ! IEU0
910         ASI_SETDST_NOBLK                                        ! LSU   Group
911         add             %o1, 8, %o1                             ! IEU0  Group
912         EX(STX          %g2, [%o0 - 0x8] ASINORMAL,
913                         and %o2, 0xf)                           ! Store
914 85:     be,pt           %xcc, 1f                                ! CTI
915          andcc          %o2, 2, %g0                             ! IEU1  Group
916         ASI_SETSRC_NOBLK                                        ! LSU   Group
917         EX(LDUW         [%o1] ASINORMAL, %g2,
918                         and %o2, 0x7)                           ! Load  Group
919         add             %o0, 4, %o0                             ! IEU0
920         ASI_SETDST_NOBLK                                        ! LSU   Group
921         add             %o1, 4, %o1                             ! IEU0  Group
922         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
923                         and %o2, 0x7)                           ! Store
924 1:      be,pt           %xcc, 1f                                ! CTI
925          andcc          %o2, 1, %g0                             ! IEU1  Group
926         ASI_SETSRC_NOBLK                                        ! LSU   Group
927         EX(LDUH         [%o1] ASINORMAL, %g2,
928                         and %o2, 0x3)                           ! Load  Group
929         add             %o0, 2, %o0                             ! IEU0
930         ASI_SETDST_NOBLK                                        ! LSU   Group
931         add             %o1, 2, %o1                             ! IEU0  Group
932         EX(STH          %g2, [%o0 - 0x2] ASINORMAL,
933                         and %o2, 0x3)                           ! Store
934 1:      be,pt           %xcc, 1f                                ! CTI
935          nop                                                    ! IEU0  Group
936         ASI_SETSRC_NOBLK                                        ! LSU   Group
937         EX(LDUB         [%o1] ASINORMAL, %g2,
938                         add %g0, 1)                             ! Load  Group
939         ASI_SETDST_NOBLK                                        ! LSU   Group
940         EX(STB          %g2, [%o0] ASINORMAL,
941                         add %g0, 1)                             ! Store Group + bubble
942 1:      NORMAL_RETL
943 #endif  /* REGS_64BIT */
944
945 memcpy_noVIS_misaligned:
946         brz,pt                  %g2, 2f                         ! CTI   Group
947          mov                    8, %g1                          ! IEU0
948         sub                     %g1, %g2, %g2                   ! IEU0  Group
949         sub                     %o2, %g2, %o2                   ! IEU0  Group
950 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
951         EX(LDUB                 [%o1] ASINORMAL, %g5,
952                                 add %o2, %g2)                   ! Load  Group
953         add                     %o1, 1, %o1                     ! IEU0
954         add                     %o0, 1, %o0                     ! IEU1
955         ASI_SETDST_NOBLK                                        ! LSU   Group
956         subcc                   %g2, 1, %g2                     ! IEU1  Group
957         bne,pt                  %xcc, 1b                        ! CTI
958          EX2(STB                %g5, [%o0 - 1] ASINORMAL,
959                                 add %o2, %g2, %o2,
960                                 add %o2, 1)                     ! Store
961 2:
962 #ifdef __KERNEL__
963         VISEntry
964 #endif
965         andn                    %o2, 7, %g5                     ! IEU0  Group
966         and                     %o2, 7, %o2                     ! IEU1
967         fmovd                   %f0, %f2                        ! FPU
968         ASI_SETSRC_NOBLK                                        ! LSU   Group
969         alignaddr               %o1, %g0, %g1                   ! GRU   Group
970         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
971 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
972                                 add %o2, %g5)                   ! Load  Group
973         add                     %g1, 0x8, %g1                   ! IEU0  Group
974         subcc                   %g5, 8, %g5                     ! IEU1
975         ASI_SETDST_NOBLK                                        ! LSU   Group
976         faligndata              %f4, %f6, %f0                   ! GRU   Group
977         EX2(STDF                %f0, [%o0] ASINORMAL,
978                                 add %o2, %g5, %o2,
979                                 add %o2, 8)                     ! Store
980         add                     %o1, 8, %o1                     ! IEU0  Group
981         be,pn                   %xcc, end_cruft                 ! CTI
982          add                    %o0, 8, %o0                     ! IEU1
983         ASI_SETSRC_NOBLK                                        ! LSU   Group
984         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
985                                 add %o2, %g5)                   ! Load  Group
986         add                     %g1, 8, %g1                     ! IEU0
987         subcc                   %g5, 8, %g5                     ! IEU1
988         ASI_SETDST_NOBLK                                        ! LSU   Group
989         faligndata              %f6, %f4, %f0                   ! GRU   Group
990         EX2(STDF                %f0, [%o0] ASINORMAL,
991                                 add %o2, %g5, %o2,
992                                 add %o2, 8)                     ! Store
993         add                     %o1, 8, %o1                     ! IEU0
994         ASI_SETSRC_NOBLK                                        ! LSU   Group
995         bne,pn                  %xcc, 1b                        ! CTI   Group
996          add                    %o0, 8, %o0                     ! IEU0
997 end_cruft:
998         brz,pn                  %o2, fpu_retl                   ! CTI   Group
999 #ifndef __KERNEL__
1000          nop                                                    ! IEU0
1001 #else
1002          ASI_SETSRC_NOBLK                                       ! LSU   Group
1003 #endif
1004         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
1005         add             %o1, 1, %o1                             ! IEU0
1006         add             %o0, 1, %o0                             ! IEU1
1007         ASI_SETDST_NOBLK                                        ! LSU   Group
1008         subcc           %o2, 1, %o2                             ! IEU1
1009         bne,pt          %xcc, vis_slp                           ! CTI
1010          EX(STB         %g5, [%o0 - 1] ASINORMAL,
1011                         add %o2, 1)                             ! Store Group
1012 fpu_retl:
1013         FPU_RETL
1014
1015 #ifdef __KERNEL__
1016                 .section        .fixup
1017                 .align          4
1018 VIScopyfixup_reto2:
1019                 mov             %o2, %o1
1020 VIScopyfixup_ret:
1021                 /* If this is copy_from_user(), zero out the rest of the
1022                  * kernel buffer.
1023                  */
1024                 ldub            [%g6 + TI_CURRENT_DS], %o4
1025                 andcc           asi_src, 0x1, %g0
1026                 be,pt           %icc, 1f
1027                  VISExit
1028                 andcc           asi_dest, 0x1, %g0
1029                 bne,pn          %icc, 1f
1030                  nop
1031                 save            %sp, -160, %sp
1032                 mov             %i0, %o0
1033                 call            __bzero
1034                  mov            %i1, %o1
1035                 restore
1036 1:              mov             %o1, %o0
1037                 retl
1038                  wr             %o4, %g0, %asi
1039 VIScopyfixup1:  subcc           %g2, 18, %g2
1040                 add             %o0, 32, %o0
1041                 bgeu,a,pt       %icc, VIScopyfixup1
1042                  sub            %g7, 32, %g7
1043                 sub             %o0, 32, %o0
1044                 rd              %pc, %g5
1045                 add             %g2, (18 + 16), %g2
1046                 ldub            [%g5 + %g2], %g2
1047                 ba,a,pt         %xcc, 2f
1048 .byte           0, 0, 0, 0, 0, 0, 0, 4, 4, 8, 12, 12, 16, 20, 20, 24, 28, 28
1049                 .align          4
1050 VIScopyfixup2:  mov             (7 * 16), %g7
1051 1:              subcc           %g2, 10, %g2
1052                 bgeu,a,pt       %icc, 1b
1053                  sub            %g7, 16, %g7
1054                 sub             %o0, %g7, %o0
1055                 rd              %pc, %g5
1056                 add             %g2, (10 + 16), %g2
1057                 ldub            [%g5 + %g2], %g2
1058                 ba,a,pt         %xcc, 4f
1059 .byte           0, 0, 0, 0, 0, 4, 4, 8, 12, 12
1060                 .align          4
1061 VIScopyfixup3:  subcc           %g2, 10, %g2
1062                 add             %o0, 32, %o0
1063                 bgeu,a,pt       %icc, VIScopyfixup3
1064                  sub            %g7, 32, %g7
1065                 sub             %o0, 32, %o0
1066                 rd              %pc, %g5
1067                 add             %g2, (10 + 16), %g2
1068                 ldub            [%g5 + %g2], %g2
1069                 ba,a,pt         %xcc, 2f
1070 .byte           0, 0, 0, 0, 0, 0, 0, 8, 16, 24
1071                 .align          4
1072 2:              and             %o2, 0x7f, %o2
1073                 sub             %g7, %g2, %g7
1074                 ba,pt           %xcc, VIScopyfixup_ret
1075                  add            %g7, %o2, %o1
1076 VIScopyfixup4:  mov             (7 * 16), %g7
1077 3:              subcc           %g2, 6, %g2
1078                 bgeu,a,pt       %icc, 3b
1079                  sub            %g7, 16, %g7
1080                 sub             %o0, %g7, %o0
1081                 rd              %pc, %g5
1082                 add             %g2, (6 + 16), %g2
1083                 ldub            [%g5 + %g2], %g2
1084                 ba,a,pt         %xcc, 4f
1085 .byte           0, 0, 0, 0, 0, 8
1086                 .align          4
1087 4:              and             %o2, 0xf, %o2
1088                 sub             %g7, %g2, %g7
1089                 ba,pt           %xcc, VIScopyfixup_ret
1090                  add            %g7, %o2, %o1
1091 VIScopyfixup_vis2:
1092                 sub             %o2, 0x40, %o2
1093 VIScopyfixup_vis0:
1094                 add             %o2, 0x80, %o2
1095 VIScopyfixup_vis1:
1096                 add             %g7, %g3, %g7
1097                 ba,pt           %xcc, VIScopyfixup_ret
1098                  add            %o2, %g7, %o1
1099 VIScopyfixup_vis4:
1100                 add             %g3, 8, %g3
1101 VIScopyfixup_vis3:
1102                 add             %g3, 8, %g3
1103                 ba,pt           %xcc, VIScopyfixup_ret
1104                  add            %o2, %g3, %o1
1105 #endif
1106
1107 #ifdef __KERNEL__
1108                 .text
1109                 .align          32
1110
1111                 .globl          __memmove
1112                 .type           __memmove,@function
1113
1114                 .globl          memmove
1115                 .type           memmove,@function
1116
1117 memmove:
1118 __memmove:      cmp             %o0, %o1
1119                 blu,pt          %xcc, memcpy_private
1120                  sub            %o0, %o1, %g5
1121                 add             %o1, %o2, %g3
1122                 cmp             %g3, %o0
1123                 bleu,pt         %xcc, memcpy_private
1124                  add            %o1, %o2, %g5
1125                 add             %o0, %o2, %o5
1126
1127                 sub             %g5, 1, %o1
1128                 sub             %o5, 1, %o0
1129 1:              ldub            [%o1], %g5
1130                 subcc           %o2, 1, %o2
1131                 sub             %o1, 1, %o1
1132                 stb             %g5, [%o0]
1133                 bne,pt          %icc, 1b
1134                  sub            %o0, 1, %o0
1135
1136                 retl
1137                  clr            %o0
1138 #endif