ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / sparc64 / lib / VIScopy.S
1 /* $Id: VIScopy.S,v 1.27 2002/02/09 19:49:30 davem Exp $
2  * VIScopy.S: High speed copy operations utilizing the UltraSparc
3  *            Visual Instruction Set.
4  *
5  * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6  * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
7  */
8
9 #include "VIS.h"
10
11         /* VIS code can be used for numerous copy/set operation variants.
12          * It can be made to work in the kernel, one single instance,
13          * for all of memcpy, copy_to_user, and copy_from_user by setting
14          * the ASI src/dest globals correctly.  Furthermore it can
15          * be used for kernel-->kernel page copies as well, a hook label
16          * is put in here just for this purpose.
17          *
18          * For userland, compiling this without __KERNEL__ defined makes
19          * it work just fine as a generic libc bcopy and memcpy.
20          * If for userland it is compiled with a 32bit gcc (but you need
21          * -Wa,-Av9a for as), the code will just rely on lower 32bits of
22          * IEU registers, if you compile it with 64bit gcc (ie. define
23          * __sparc_v9__), the code will use full 64bit.
24          */
25          
26 #ifdef __KERNEL__
27
28 #include <asm/visasm.h>
29 #include <asm/thread_info.h>
30
31 #define FPU_CLEAN_RETL                                  \
32         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
33         VISExit                                         \
34         clr             %o0;                            \
35         retl;                                           \
36          wr             %o1, %g0, %asi;
37 #define FPU_RETL                                        \
38         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
39         VISExit                                         \
40         clr             %o0;                            \
41         retl;                                           \
42          wr             %o1, %g0, %asi;
43 #define NORMAL_RETL                                     \
44         ldub            [%g6 + TI_CURRENT_DS], %o1;     \
45         clr             %o0;                            \
46         retl;                                           \
47          wr             %o1, %g0, %asi;
48 #define EX(x,y,a,b)                             \
49 98:     x,y;                                    \
50         .section .fixup;                        \
51         .align  4;                              \
52 99:     ba      VIScopyfixup_ret;               \
53          a, b, %o1;                             \
54         .section __ex_table;                    \
55         .align  4;                              \
56         .word   98b, 99b;                       \
57         .text;                                  \
58         .align  4;
59 #define EX2(x,y,c,d,e,a,b)                      \
60 98:     x,y;                                    \
61         .section .fixup;                        \
62         .align  4;                              \
63 99:     c, d, e;                                \
64         ba      VIScopyfixup_ret;               \
65          a, b, %o1;                             \
66         .section __ex_table;                    \
67         .align  4;                              \
68         .word   98b, 99b;                       \
69         .text;                                  \
70         .align  4;
71 #define EXO2(x,y)                               \
72 98:     x,y;                                    \
73         .section __ex_table;                    \
74         .align  4;                              \
75         .word   98b, VIScopyfixup_reto2;        \
76         .text;                                  \
77         .align  4;
78 #define EXVISN(x,y,n)                           \
79 98:     x,y;                                    \
80         .section __ex_table;                    \
81         .align  4;                              \
82         .word   98b, VIScopyfixup_vis##n;       \
83         .text;                                  \
84         .align  4;
85 #define EXT(start,end,handler)                  \
86         .section __ex_table;                    \
87         .align  4;                              \
88         .word   start, 0, end, handler;         \
89         .text;                                  \
90         .align  4;
91 #else
92 #ifdef REGS_64BIT
93 #define FPU_CLEAN_RETL                          \
94         retl;                                   \
95          mov    %g6, %o0;
96 #define FPU_RETL                                \
97         retl;                                   \
98          mov    %g6, %o0;
99 #else
100 #define FPU_CLEAN_RETL                          \
101         wr      %g0, FPRS_FEF, %fprs;           \
102         retl;                                   \
103          mov    %g6, %o0;
104 #define FPU_RETL                                \
105         wr      %g0, FPRS_FEF, %fprs;           \
106         retl;                                   \
107          mov    %g6, %o0;
108 #endif
109 #define NORMAL_RETL     \
110         retl;           \
111          mov    %g6, %o0;
112 #define EX(x,y,a,b)             x,y
113 #define EX2(x,y,c,d,e,a,b)      x,y
114 #define EXO2(x,y)               x,y
115 #define EXVISN(x,y,n)           x,y
116 #define EXT(a,b,c)
117 #endif
118 #define EXVIS(x,y) EXVISN(x,y,0)
119 #define EXVIS1(x,y) EXVISN(x,y,1)
120 #define EXVIS2(x,y) EXVISN(x,y,2)
121 #define EXVIS3(x,y) EXVISN(x,y,3)
122 #define EXVIS4(x,y) EXVISN(x,y,4)
123
124 #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9)           \
125         faligndata              %f1, %f2, %f48;                 \
126         faligndata              %f2, %f3, %f50;                 \
127         faligndata              %f3, %f4, %f52;                 \
128         faligndata              %f4, %f5, %f54;                 \
129         faligndata              %f5, %f6, %f56;                 \
130         faligndata              %f6, %f7, %f58;                 \
131         faligndata              %f7, %f8, %f60;                 \
132         faligndata              %f8, %f9, %f62;
133
134 #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt)    \
135         EXVIS(LDBLK             [%src] ASIBLK, %fdest);         \
136         ASI_SETDST_BLK                                          \
137         EXVIS(STBLK             %fsrc, [%dest] ASIBLK);         \
138         add                     %src, 0x40, %src;               \
139         subcc                   %len, 0x40, %len;               \
140         be,pn                   %xcc, jmptgt;                   \
141          add                    %dest, 0x40, %dest;             \
142         ASI_SETSRC_BLK
143
144 #define LOOP_CHUNK1(src, dest, len, branch_dest)                \
145         MAIN_LOOP_CHUNK(src, dest, f0,  f48, len, branch_dest)
146 #define LOOP_CHUNK2(src, dest, len, branch_dest)                \
147         MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest)
148 #define LOOP_CHUNK3(src, dest, len, branch_dest)                \
149         MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
150
151 #define STORE_SYNC(dest, fsrc)                                  \
152         EXVIS(STBLK             %fsrc, [%dest] ASIBLK);         \
153         add                     %dest, 0x40, %dest;
154
155 #ifdef __KERNEL__
156 #define STORE_JUMP(dest, fsrc, target)                          \
157         srl                     asi_dest, 3, %g5;               \
158         EXVIS2(STBLK            %fsrc, [%dest] ASIBLK);         \
159         xor                    asi_dest, ASI_BLK_XOR1, asi_dest;\
160         add                     %dest, 0x40, %dest;             \
161         xor                     asi_dest, %g5, asi_dest;        \
162         ba,pt                   %xcc, target;
163 #else
164 #define STORE_JUMP(dest, fsrc, target)                          \
165         EXVIS2(STBLK            %fsrc, [%dest] ASIBLK);         \
166         add                     %dest, 0x40, %dest;             \
167         ba,pt                   %xcc, target;
168 #endif
169
170 #ifndef __KERNEL__
171 #define VISLOOP_PAD nop; nop; nop; nop; \
172                     nop; nop; nop; nop; \
173                     nop; nop; nop; nop; \
174                     nop; nop; nop;
175 #else
176 #define VISLOOP_PAD
177 #endif
178
179 #define FINISH_VISCHUNK(dest, f0, f1, left)                     \
180         ASI_SETDST_NOBLK                                        \
181         subcc                   %left, 8, %left;                \
182         bl,pn                   %xcc, vis_out;                  \
183          faligndata             %f0, %f1, %f48;                 \
184         EXVIS3(STDF             %f48, [%dest] ASINORMAL);       \
185         add                     %dest, 8, %dest;
186
187 #define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
188         subcc                   %left, 8, %left;                \
189         bl,pn                   %xcc, vis_out;                  \
190          fsrc1                  %f0, %f1;
191 #define UNEVEN_VISCHUNK(dest, f0, f1, left)                     \
192         UNEVEN_VISCHUNK_LAST(dest, f0, f1, left)                \
193         ba,a,pt                 %xcc, vis_out_slk;
194
195         /* Macros for non-VIS memcpy code. */
196 #ifdef REGS_64BIT
197
198 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
199         ASI_SETSRC_NOBLK                                                \
200         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
201         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
202         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
203         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
204         ASI_SETDST_NOBLK                                                \
205         STW                     %t0, [%dst + offset + 0x04] ASINORMAL;  \
206         srlx                    %t0, 32, %t0;                           \
207         STW                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
208         STW                     %t1, [%dst + offset + 0x0c] ASINORMAL;  \
209         srlx                    %t1, 32, %t1;                           \
210         STW                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
211         STW                     %t2, [%dst + offset + 0x14] ASINORMAL;  \
212         srlx                    %t2, 32, %t2;                           \
213         STW                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
214         STW                     %t3, [%dst + offset + 0x1c] ASINORMAL;  \
215         srlx                    %t3, 32, %t3;                           \
216         STW                     %t3, [%dst + offset + 0x18] ASINORMAL;
217
218 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3)            \
219         ASI_SETSRC_NOBLK                                                \
220         LDX                     [%src + offset + 0x00] ASINORMAL, %t0;  \
221         LDX                     [%src + offset + 0x08] ASINORMAL, %t1;  \
222         LDX                     [%src + offset + 0x10] ASINORMAL, %t2;  \
223         LDX                     [%src + offset + 0x18] ASINORMAL, %t3;  \
224         ASI_SETDST_NOBLK                                                \
225         STX                     %t0, [%dst + offset + 0x00] ASINORMAL;  \
226         STX                     %t1, [%dst + offset + 0x08] ASINORMAL;  \
227         STX                     %t2, [%dst + offset + 0x10] ASINORMAL;  \
228         STX                     %t3, [%dst + offset + 0x18] ASINORMAL;  \
229         ASI_SETSRC_NOBLK                                                \
230         LDX                     [%src + offset + 0x20] ASINORMAL, %t0;  \
231         LDX                     [%src + offset + 0x28] ASINORMAL, %t1;  \
232         LDX                     [%src + offset + 0x30] ASINORMAL, %t2;  \
233         LDX                     [%src + offset + 0x38] ASINORMAL, %t3;  \
234         ASI_SETDST_NOBLK                                                \
235         STX                     %t0, [%dst + offset + 0x20] ASINORMAL;  \
236         STX                     %t1, [%dst + offset + 0x28] ASINORMAL;  \
237         STX                     %t2, [%dst + offset + 0x30] ASINORMAL;  \
238         STX                     %t3, [%dst + offset + 0x38] ASINORMAL;
239
240 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
241         ASI_SETSRC_NOBLK                                                \
242         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
243         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
244         ASI_SETDST_NOBLK                                                \
245         STW                     %t0, [%dst - offset - 0x0c] ASINORMAL;  \
246         srlx                    %t0, 32, %t2;                           \
247         STW                     %t2, [%dst - offset - 0x10] ASINORMAL;  \
248         STW                     %t1, [%dst - offset - 0x04] ASINORMAL;  \
249         srlx                    %t1, 32, %t3;                           \
250         STW                     %t3, [%dst - offset - 0x08] ASINORMAL;
251
252 #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1)                   \
253         ASI_SETSRC_NOBLK                                                \
254         LDX                     [%src - offset - 0x10] ASINORMAL, %t0;  \
255         LDX                     [%src - offset - 0x08] ASINORMAL, %t1;  \
256         ASI_SETDST_NOBLK                                                \
257         STX                     %t0, [%dst - offset - 0x10] ASINORMAL;  \
258         STX                     %t1, [%dst - offset - 0x08] ASINORMAL;
259
260 #else /* !REGS_64BIT */
261
262 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3)                 \
263         lduw                    [%src + offset + 0x00], %t0;            \
264         lduw                    [%src + offset + 0x04], %t1;            \
265         lduw                    [%src + offset + 0x08], %t2;            \
266         lduw                    [%src + offset + 0x0c], %t3;            \
267         stw                     %t0, [%dst + offset + 0x00];            \
268         stw                     %t1, [%dst + offset + 0x04];            \
269         stw                     %t2, [%dst + offset + 0x08];            \
270         stw                     %t3, [%dst + offset + 0x0c];            \
271         lduw                    [%src + offset + 0x10], %t0;            \
272         lduw                    [%src + offset + 0x14], %t1;            \
273         lduw                    [%src + offset + 0x18], %t2;            \
274         lduw                    [%src + offset + 0x1c], %t3;            \
275         stw                     %t0, [%dst + offset + 0x10];            \
276         stw                     %t1, [%dst + offset + 0x14];            \
277         stw                     %t2, [%dst + offset + 0x18];            \
278         stw                     %t3, [%dst + offset + 0x1c];
279
280 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3)                \
281         lduw                    [%src - offset - 0x10], %t0;            \
282         lduw                    [%src - offset - 0x0c], %t1;            \
283         lduw                    [%src - offset - 0x08], %t2;            \
284         lduw                    [%src - offset - 0x04], %t3;            \
285         stw                     %t0, [%dst - offset - 0x10];            \
286         stw                     %t1, [%dst - offset - 0x0c];            \
287         stw                     %t2, [%dst - offset - 0x08];            \
288         stw                     %t3, [%dst - offset - 0x04];
289
290 #endif /* !REGS_64BIT */
291
292 #ifdef __KERNEL__
293                 .section        __ex_table,#alloc
294                 .section        .fixup,#alloc,#execinstr
295 #endif
296
297                 .text
298                 .align                  32
299                 .globl                  memcpy
300                 .type                   memcpy,@function
301
302                 .globl                  bcopy
303                 .type                   bcopy,@function
304
305 #ifdef __KERNEL__
306                 .globl                  __memcpy_begin
307 __memcpy_begin:
308
309                 .globl                  __memcpy
310                 .type                   __memcpy,@function
311
312 memcpy_private:
313 __memcpy:
314 memcpy:         mov             ASI_P, asi_src                  ! IEU0  Group
315                 brnz,pt         %o2, __memcpy_entry             ! CTI
316                  mov            ASI_P, asi_dest                 ! IEU1
317                 retl
318                  clr            %o0
319
320                 .align                  32
321                 .globl                  __copy_from_user
322                 .type                   __copy_from_user,@function
323 __copy_from_user:rd             %asi, asi_src                   ! IEU0  Group
324                 brnz,pt         %o2, __memcpy_entry             ! CTI
325                  mov            ASI_P, asi_dest                 ! IEU1
326
327                 .globl                  __copy_to_user
328                 .type                   __copy_to_user,@function
329 __copy_to_user: mov             ASI_P, asi_src                  ! IEU0  Group
330                 brnz,pt         %o2, __memcpy_entry             ! CTI
331                  rd             %asi, asi_dest                  ! IEU1
332                 retl                                            ! CTI   Group
333                  clr            %o0                             ! IEU0  Group
334
335                 .globl                  __copy_in_user
336                 .type                   __copy_in_user,@function
337 __copy_in_user: rd              %asi, asi_src                   ! IEU0  Group
338                 brnz,pt         %o2, __memcpy_entry             ! CTI
339                  mov            asi_src, asi_dest               ! IEU1
340                 retl                                            ! CTI   Group
341                  clr            %o0                             ! IEU0  Group
342 #endif
343
344 bcopy:          or              %o0, 0, %g3                     ! IEU0  Group
345                 addcc           %o1, 0, %o0                     ! IEU1
346                 brgez,pt        %o2, memcpy_private             ! CTI
347                  or             %g3, 0, %o1                     ! IEU0  Group
348                 retl                                            ! CTI   Group brk forced
349                  clr            %o0                             ! IEU0
350
351
352 #ifdef __KERNEL__
353 #define BRANCH_ALWAYS   0x10680000
354 #define NOP             0x01000000
355 #define ULTRA3_DO_PATCH(OLD, NEW)       \
356         sethi   %hi(NEW), %g1; \
357         or      %g1, %lo(NEW), %g1; \
358         sethi   %hi(OLD), %g2; \
359         or      %g2, %lo(OLD), %g2; \
360         sub     %g1, %g2, %g1; \
361         sethi   %hi(BRANCH_ALWAYS), %g3; \
362         srl     %g1, 2, %g1; \
363         or      %g3, %lo(BRANCH_ALWAYS), %g3; \
364         or      %g3, %g1, %g3; \
365         stw     %g3, [%g2]; \
366         sethi   %hi(NOP), %g3; \
367         or      %g3, %lo(NOP), %g3; \
368         stw     %g3, [%g2 + 0x4]; \
369         flush   %g2;
370 #define ULTRA3_PCACHE_DO_NOP(symbol)    \
371         sethi   %hi(symbol##_nop_1_6), %g1; \
372         or      %g1, %lo(symbol##_nop_1_6), %g1; \
373         sethi   %hi(NOP), %g2; \
374         stw     %g2, [%g1 + 0x00]; \
375         stw     %g2, [%g1 + 0x04]; \
376         flush   %g1 + 0x00; \
377         stw     %g2, [%g1 + 0x08]; \
378         stw     %g2, [%g1 + 0x0c]; \
379         flush   %g1 + 0x08; \
380         stw     %g2, [%g1 + 0x10]; \
381         stw     %g2, [%g1 + 0x04]; \
382         flush   %g1 + 0x10; \
383         sethi   %hi(symbol##_nop_2_3), %g1; \
384         or      %g1, %lo(symbol##_nop_2_3), %g1; \
385         stw     %g2, [%g1 + 0x00]; \
386         stw     %g2, [%g1 + 0x04]; \
387         flush   %g1 + 0x00; \
388         stw     %g2, [%g1 + 0x08]; \
389         flush   %g1 + 0x08;
390
391 #include <asm/dcu.h>
392
393         .globl  cheetah_patch_copyops
394 cheetah_patch_copyops:
395         ULTRA3_DO_PATCH(memcpy, U3memcpy)
396         ULTRA3_DO_PATCH(__copy_from_user, U3copy_from_user)
397         ULTRA3_DO_PATCH(__copy_to_user, U3copy_to_user)
398         ULTRA3_DO_PATCH(__copy_in_user, U3copy_in_user)
399 #if 0 /* Causes data corruption, nop out the optimization
400        * for now -DaveM
401        */
402         ldxa                    [%g0] ASI_DCU_CONTROL_REG, %g3
403         sethi                   %uhi(DCU_PE), %o3
404         sllx                    %o3, 32, %o3
405         andcc                   %g3, %o3, %g0
406         be,pn                   %xcc, pcache_disabled
407          nop
408 #endif
409         ULTRA3_PCACHE_DO_NOP(U3memcpy)
410         ULTRA3_PCACHE_DO_NOP(U3copy_from_user)
411         ULTRA3_PCACHE_DO_NOP(U3copy_to_user)
412         ULTRA3_PCACHE_DO_NOP(cheetah_copy_user_page)
413 #if 0
414 pcache_disabled:
415 #endif
416         retl
417          nop
418 #undef BRANCH_ALWAYS
419 #undef NOP
420 #undef ULTRA3_DO_PATCH
421 #endif /* __KERNEL__ */
422
423         .align                  32
424 #ifdef __KERNEL__
425         andcc                   %o0, 7, %g2                     ! IEU1  Group
426 #endif
427 VIS_enter:
428         be,pt                   %xcc, dest_is_8byte_aligned     ! CTI
429 #ifdef __KERNEL__
430          nop                                                    ! IEU0  Group
431 #else
432          andcc                  %o0, 0x38, %g5                  ! IEU1  Group
433 #endif
434 do_dest_8byte_align:
435         mov                     8, %g1                          ! IEU0
436         sub                     %g1, %g2, %g2                   ! IEU0  Group
437         andcc                   %o0, 1, %g0                     ! IEU1
438         be,pt                   %icc, 2f                        ! CTI
439          sub                    %o2, %g2, %o2                   ! IEU0  Group
440 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
441         EX(LDUB                 [%o1] ASINORMAL, %o5, 
442                                 add %o2, %g2)                   ! Load  Group
443         add                     %o1, 1, %o1                     ! IEU0
444         add                     %o0, 1, %o0                     ! IEU1
445         ASI_SETDST_NOBLK                                        ! LSU   Group
446         subcc                   %g2, 1, %g2                     ! IEU1  Group
447         be,pn                   %xcc, 3f                        ! CTI
448          EX2(STB                %o5, [%o0 - 1] ASINORMAL,
449                                 add %g2, 1, %g2,
450                                 add %o2, %g2)                   ! Store
451 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
452         EX(LDUB                 [%o1] ASINORMAL, %o5, 
453                                 add %o2, %g2)                   ! Load  Group
454         add                     %o0, 2, %o0                     ! IEU0
455         EX2(LDUB                [%o1 + 1] ASINORMAL, %g3,
456                                 sub %o0, 2, %o0,
457                                 add %o2, %g2)                   ! Load  Group
458         ASI_SETDST_NOBLK                                        ! LSU   Group
459         subcc                   %g2, 2, %g2                     ! IEU1  Group
460         EX2(STB                 %o5, [%o0 - 2] ASINORMAL,
461                                 add %g2, 2, %g2,
462                                 add %o2, %g2)                   ! Store
463         add                     %o1, 2, %o1                     ! IEU0
464         bne,pt                  %xcc, 2b                        ! CTI   Group
465          EX2(STB                %g3, [%o0 - 1] ASINORMAL,
466                                 add %g2, 1, %g2,
467                                 add %o2, %g2)                   ! Store
468 #ifdef __KERNEL__
469 3:
470 dest_is_8byte_aligned:
471         VISEntry
472         andcc                   %o0, 0x38, %g5                  ! IEU1  Group
473 #else
474 3:      andcc                   %o0, 0x38, %g5                  ! IEU1  Group
475 dest_is_8byte_aligned:
476 #endif
477         be,pt                   %icc, dest_is_64byte_aligned    ! CTI
478          mov                    64, %g1                         ! IEU0
479         fmovd                   %f0, %f2                        ! FPU
480         sub                     %g1, %g5, %g5                   ! IEU0  Group
481         ASI_SETSRC_NOBLK                                        ! LSU   Group
482         alignaddr               %o1, %g0, %g1                   ! GRU   Group
483         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
484         sub                     %o2, %g5, %o2                   ! IEU0
485 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
486                                 add %o2, %g5)                   ! Load  Group
487         add                     %g1, 0x8, %g1                   ! IEU0  Group
488         subcc                   %g5, 8, %g5                     ! IEU1
489         ASI_SETDST_NOBLK                                        ! LSU   Group
490         faligndata              %f4, %f6, %f0                   ! GRU   Group
491         EX2(STDF                %f0, [%o0] ASINORMAL,
492                                 add %g5, 8, %g5,
493                                 add %o2, %g5)                   ! Store
494         add                     %o1, 8, %o1                     ! IEU0  Group
495         be,pn                   %xcc, dest_is_64byte_aligned    ! CTI
496          add                    %o0, 8, %o0                     ! IEU1
497         ASI_SETSRC_NOBLK                                        ! LSU   Group
498         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
499                                 add %o2, %g5)                   ! Load  Group
500         add                     %g1, 8, %g1                     ! IEU0
501         subcc                   %g5, 8, %g5                     ! IEU1
502         ASI_SETDST_NOBLK                                        ! LSU   Group
503         faligndata              %f6, %f4, %f0                   ! GRU   Group
504         EX2(STDF                %f0, [%o0] ASINORMAL,
505                                 add %g5, 8, %g5,
506                                 add %o2, %g5)                   ! Store
507         add                     %o1, 8, %o1                     ! IEU0
508         ASI_SETSRC_NOBLK                                        ! LSU   Group
509         bne,pt                  %xcc, 1b                        ! CTI   Group
510          add                    %o0, 8, %o0                     ! IEU0
511 dest_is_64byte_aligned:
512         membar            #LoadStore | #StoreStore | #StoreLoad ! LSU   Group
513 #ifndef __KERNEL__
514         wr                      %g0, ASI_BLK_P, %asi            ! LSU   Group
515 #endif
516         subcc                   %o2, 0x40, %g7                  ! IEU1  Group
517         mov                     %o1, %g1                        ! IEU0
518         andncc                  %g7, (0x40 - 1), %g7            ! IEU1  Group
519         srl                     %g1, 3, %g2                     ! IEU0
520         sub                     %o2, %g7, %g3                   ! IEU0  Group
521         andn                    %o1, (0x40 - 1), %o1            ! IEU1
522         and                     %g2, 7, %g2                     ! IEU0  Group
523         andncc                  %g3, 0x7, %g3                   ! IEU1
524         fmovd                   %f0, %f2                        ! FPU
525         sub                     %g3, 0x10, %g3                  ! IEU0  Group
526         sub                     %o2, %g7, %o2                   ! IEU1
527 #ifdef __KERNEL__
528         or                      asi_src, ASI_BLK_OR, asi_src    ! IEU0  Group
529         or                      asi_dest, ASI_BLK_OR, asi_dest  ! IEU1
530 #endif
531         alignaddr               %g1, %g0, %g0                   ! GRU   Group
532         add                     %g1, %g7, %g1                   ! IEU0  Group
533         subcc                   %o2, %g3, %o2                   ! IEU1
534         ASI_SETSRC_BLK                                          ! LSU   Group
535         EXVIS1(LDBLK            [%o1 + 0x00] ASIBLK, %f0)       ! LSU   Group
536         add                     %g1, %g3, %g1                   ! IEU0
537         EXVIS1(LDBLK            [%o1 + 0x40] ASIBLK, %f16)      ! LSU   Group
538         sub                     %g7, 0x80, %g7                  ! IEU0
539         EXVIS(LDBLK             [%o1 + 0x80] ASIBLK, %f32)      ! LSU   Group
540 #ifdef __KERNEL__
541 vispc:  sll                     %g2, 9, %g2                     ! IEU0  Group
542         sethi                   %hi(vis00), %g5                 ! IEU1
543         or                      %g5, %lo(vis00), %g5            ! IEU0  Group
544         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
545          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
546 #else
547                                                                 ! Clk1  Group 8-(
548                                                                 ! Clk2  Group 8-(
549                                                                 ! Clk3  Group 8-(
550                                                                 ! Clk4  Group 8-(
551 vispc:  rd                      %pc, %g5                        ! PDU   Group 8-(
552         addcc                   %g5, %lo(vis00 - vispc), %g5    ! IEU1  Group
553         sll                     %g2, 9, %g2                     ! IEU0
554         jmpl                    %g5 + %g2, %g0                  ! CTI   Group brk forced
555          addcc                  %o1, 0xc0, %o1                  ! IEU1  Group
556 #endif
557         .align                  512             /* OK, here comes the fun part... */
558 vis00:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g7, vis01)
559       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g7, vis02)
560       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  LOOP_CHUNK3(o1, o0, g7, vis03)
561       b,pt                      %xcc, vis00+4; faligndata %f0, %f2, %f48
562 vis01:FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync
563       FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_JUMP(o0, f48, finish_f0) membar #Sync
564 vis02:FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)  STORE_SYNC(o0, f48) membar #Sync
565       FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, finish_f16) membar #Sync
566 vis03:FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync
567       FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, finish_f32) membar #Sync
568       VISLOOP_PAD
569 vis10:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g7, vis11)
570       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g7, vis12)
571       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  LOOP_CHUNK3(o1, o0, g7, vis13)
572       b,pt                      %xcc, vis10+4; faligndata %f2, %f4, %f48
573 vis11:FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync
574       FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_JUMP(o0, f48, finish_f2) membar #Sync
575 vis12:FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)  STORE_SYNC(o0, f48) membar #Sync
576       FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, finish_f18) membar #Sync
577 vis13:FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync
578       FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, finish_f34) membar #Sync
579       VISLOOP_PAD
580 vis20:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g7, vis21)
581       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g7, vis22)
582       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  LOOP_CHUNK3(o1, o0, g7, vis23)
583       b,pt                      %xcc, vis20+4; faligndata %f4, %f6, %f48
584 vis21:FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync
585       FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_JUMP(o0, f48, finish_f4) membar #Sync
586 vis22:FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)  STORE_SYNC(o0, f48) membar #Sync
587       FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, finish_f20) membar #Sync
588 vis23:FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync
589       FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, finish_f36) membar #Sync
590       VISLOOP_PAD
591 vis30:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g7, vis31)
592       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g7, vis32)
593       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  LOOP_CHUNK3(o1, o0, g7, vis33)
594       b,pt                      %xcc, vis30+4; faligndata %f6, %f8, %f48
595 vis31:FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync
596       FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_JUMP(o0, f48, finish_f6) membar #Sync
597 vis32:FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)  STORE_SYNC(o0, f48) membar #Sync
598       FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, finish_f22) membar #Sync
599 vis33:FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync
600       FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, finish_f38) membar #Sync
601       VISLOOP_PAD
602 vis40:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g7, vis41)
603       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g7, vis42)
604       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  LOOP_CHUNK3(o1, o0, g7, vis43)
605       b,pt                      %xcc, vis40+4; faligndata %f8, %f10, %f48
606 vis41:FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync
607       FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_JUMP(o0, f48, finish_f8) membar #Sync
608 vis42:FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)  STORE_SYNC(o0, f48) membar #Sync
609       FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, finish_f24) membar #Sync
610 vis43:FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync
611       FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, finish_f40) membar #Sync
612       VISLOOP_PAD
613 vis50:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g7, vis51)
614       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g7, vis52)
615       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g7, vis53)
616       b,pt                      %xcc, vis50+4; faligndata %f10, %f12, %f48
617 vis51:FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync
618       FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, finish_f10) membar #Sync
619 vis52:FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync
620       FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, finish_f26) membar #Sync
621 vis53:FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync
622       FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, finish_f42) membar #Sync
623       VISLOOP_PAD
624 vis60:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g7, vis61)
625       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g7, vis62)
626       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g7, vis63)
627       b,pt                      %xcc, vis60+4; faligndata %f12, %f14, %f48
628 vis61:FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync
629       FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, finish_f12) membar #Sync
630 vis62:FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync
631       FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, finish_f28) membar #Sync
632 vis63:FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync
633       FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, finish_f44) membar #Sync
634       VISLOOP_PAD
635 vis70:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g7, vis71)
636       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g7, vis72)
637       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g7, vis73)
638       b,pt                      %xcc, vis70+4; faligndata %f14, %f16, %f48
639 vis71:FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync
640       FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, finish_f14) membar #Sync
641 vis72:FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync
642       FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, finish_f30) membar #Sync
643 vis73:FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync
644       FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, finish_f46) membar #Sync
645       VISLOOP_PAD
646 finish_f0:      FINISH_VISCHUNK(o0, f0,  f2,  g3)
647 finish_f2:      FINISH_VISCHUNK(o0, f2,  f4,  g3)
648 finish_f4:      FINISH_VISCHUNK(o0, f4,  f6,  g3)
649 finish_f6:      FINISH_VISCHUNK(o0, f6,  f8,  g3)
650 finish_f8:      FINISH_VISCHUNK(o0, f8,  f10, g3)
651 finish_f10:     FINISH_VISCHUNK(o0, f10, f12, g3)
652 finish_f12:     FINISH_VISCHUNK(o0, f12, f14, g3)
653 finish_f14:     UNEVEN_VISCHUNK(o0, f14, f0,  g3)
654 finish_f16:     FINISH_VISCHUNK(o0, f16, f18, g3)
655 finish_f18:     FINISH_VISCHUNK(o0, f18, f20, g3)
656 finish_f20:     FINISH_VISCHUNK(o0, f20, f22, g3)
657 finish_f22:     FINISH_VISCHUNK(o0, f22, f24, g3)
658 finish_f24:     FINISH_VISCHUNK(o0, f24, f26, g3)
659 finish_f26:     FINISH_VISCHUNK(o0, f26, f28, g3)
660 finish_f28:     FINISH_VISCHUNK(o0, f28, f30, g3)
661 finish_f30:     UNEVEN_VISCHUNK(o0, f30, f0,  g3)
662 finish_f32:     FINISH_VISCHUNK(o0, f32, f34, g3)
663 finish_f34:     FINISH_VISCHUNK(o0, f34, f36, g3)
664 finish_f36:     FINISH_VISCHUNK(o0, f36, f38, g3)
665 finish_f38:     FINISH_VISCHUNK(o0, f38, f40, g3)
666 finish_f40:     FINISH_VISCHUNK(o0, f40, f42, g3)
667 finish_f42:     FINISH_VISCHUNK(o0, f42, f44, g3)
668 finish_f44:     FINISH_VISCHUNK(o0, f44, f46, g3)
669 finish_f46:     UNEVEN_VISCHUNK_LAST(o0, f46, f0,  g3)
670 vis_out_slk:
671 #ifdef __KERNEL__
672         srl             asi_src, 3, %g5                         ! IEU0  Group
673         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
674         xor             asi_src, %g5, asi_src                   ! IEU0  Group
675 #endif
676 vis_slk:ASI_SETSRC_NOBLK                                        ! LSU   Group
677         EXVIS3(LDDF     [%o1] ASINORMAL, %f2)                   ! Load  Group
678         add             %o1, 8, %o1                             ! IEU0
679         subcc           %g3, 8, %g3                             ! IEU1
680         ASI_SETDST_NOBLK                                        ! LSU   Group
681         faligndata      %f0, %f2, %f8                           ! GRU   Group
682         EXVIS4(STDF     %f8, [%o0] ASINORMAL)                   ! Store
683         bl,pn           %xcc, vis_out_slp                       ! CTI
684          add            %o0, 8, %o0                             ! IEU0  Group
685         ASI_SETSRC_NOBLK                                        ! LSU   Group
686         EXVIS3(LDDF     [%o1] ASINORMAL, %f0)                   ! Load  Group
687         add             %o1, 8, %o1                             ! IEU0
688         subcc           %g3, 8, %g3                             ! IEU1
689         ASI_SETDST_NOBLK                                        ! LSU   Group
690         faligndata      %f2, %f0, %f8                           ! GRU   Group
691         EXVIS4(STDF     %f8, [%o0] ASINORMAL)                   ! Store
692         bge,pt          %xcc, vis_slk                           ! CTI
693          add            %o0, 8, %o0                             ! IEU0  Group
694 vis_out_slp:
695 #ifdef __KERNEL__
696         brz,pt          %o2, vis_ret                            ! CTI   Group
697          mov            %g1, %o1                                ! IEU0
698         ba,pt           %xcc, vis_slp+4                         ! CTI   Group
699          ASI_SETSRC_NOBLK                                       ! LSU   Group
700 #endif
701 vis_out:brz,pt          %o2, vis_ret                            ! CTI   Group
702          mov            %g1, %o1                                ! IEU0
703 #ifdef __KERNEL__
704         srl             asi_src, 3, %g5                         ! IEU0  Group
705         xor             asi_src, ASI_BLK_XOR1, asi_src          ! IEU1
706         xor             asi_src, %g5, asi_src                   ! IEU0  Group
707 #endif
708 vis_slp:ASI_SETSRC_NOBLK                                        ! LSU   Group
709         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
710         add             %o1, 1, %o1                             ! IEU0
711         add             %o0, 1, %o0                             ! IEU1
712         ASI_SETDST_NOBLK                                        ! LSU   Group
713         subcc           %o2, 1, %o2                             ! IEU1
714         bne,pt          %xcc, vis_slp                           ! CTI
715          EX(STB         %g5, [%o0 - 1] ASINORMAL,
716                         add %o2, 1)                             ! Store Group
717 vis_ret:membar          #StoreLoad | #StoreStore                ! LSU   Group
718         FPU_CLEAN_RETL
719
720
721 __memcpy_short:
722         andcc           %o2, 1, %g0                             ! IEU1  Group
723         be,pt           %icc, 2f                                ! CTI
724 1:       ASI_SETSRC_NOBLK                                       ! LSU   Group
725         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
726         add             %o1, 1, %o1                             ! IEU0
727         add             %o0, 1, %o0                             ! IEU1
728         ASI_SETDST_NOBLK                                        ! LSU   Group
729         subcc           %o2, 1, %o2                             ! IEU1  Group
730         be,pn           %xcc, short_ret                         ! CTI
731          EX(STB         %g5, [%o0 - 1] ASINORMAL,
732                         add %o2, 1)                             ! Store
733 2:      ASI_SETSRC_NOBLK                                        ! LSU   Group
734         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD  Group
735         add             %o0, 2, %o0                             ! IEU0
736         EX2(LDUB        [%o1 + 1] ASINORMAL, %o5,
737                         sub %o0, 2, %o0,
738                         add %o2, %g0)                           ! LOAD  Group
739         add             %o1, 2, %o1                             ! IEU0
740         ASI_SETDST_NOBLK                                        ! LSU   Group
741         subcc           %o2, 2, %o2                             ! IEU1  Group
742         EX(STB          %g5, [%o0 - 2] ASINORMAL,
743                         add %o2, 2)                             ! Store
744         bne,pt          %xcc, 2b                                ! CTI
745          EX(STB         %o5, [%o0 - 1] ASINORMAL,
746                         add %o2, 1)                             ! Store
747 short_ret:
748         NORMAL_RETL
749
750 #ifndef __KERNEL__
751 memcpy_private:
752 memcpy:
753 #ifndef REGS_64BIT
754         srl             %o2, 0, %o2                             ! IEU1  Group
755 #endif  
756         brz,pn          %o2, short_ret                          ! CTI   Group
757          mov            %o0, %g6                                ! IEU0
758 #endif
759 __memcpy_entry:
760         cmp             %o2, 15                                 ! IEU1  Group
761         bleu,pn         %xcc, __memcpy_short                    ! CTI
762          cmp            %o2, (64 * 6)                           ! IEU1  Group
763         bgeu,pn         %xcc, VIS_enter                         ! CTI
764          andcc          %o0, 7, %g2                             ! IEU1  Group
765         sub             %o0, %o1, %g5                           ! IEU0
766         andcc           %g5, 3, %o5                             ! IEU1  Group
767         bne,pn          %xcc, memcpy_noVIS_misaligned           ! CTI
768          andcc          %o1, 3, %g0                             ! IEU1  Group
769 #ifdef REGS_64BIT
770         be,a,pt         %xcc, 3f                                ! CTI
771          andcc          %o1, 4, %g0                             ! IEU1  Group
772         andcc           %o1, 1, %g0                             ! IEU1  Group
773 #else /* !REGS_64BIT */
774         be,pt           %xcc, 5f                                ! CTI
775          andcc          %o1, 1, %g0                             ! IEU1  Group
776 #endif /* !REGS_64BIT */
777         be,pn           %xcc, 4f                                ! CTI
778          andcc          %o1, 2, %g0                             ! IEU1  Group
779         ASI_SETSRC_NOBLK                                        ! LSU   Group
780         EXO2(LDUB       [%o1] ASINORMAL, %g2)                   ! Load  Group
781         add             %o1, 1, %o1                             ! IEU0
782         add             %o0, 1, %o0                             ! IEU1
783         sub             %o2, 1, %o2                             ! IEU0  Group
784         ASI_SETDST_NOBLK                                        ! LSU   Group
785         bne,pn          %xcc, 5f                                ! CTI   Group
786          EX(STB         %g2, [%o0 - 1] ASINORMAL,
787                         add %o2, 1)                             ! Store
788 4:      ASI_SETSRC_NOBLK                                        ! LSU   Group
789         EXO2(LDUH       [%o1] ASINORMAL, %g2)                   ! Load  Group
790         add             %o1, 2, %o1                             ! IEU0
791         add             %o0, 2, %o0                             ! IEU1
792         ASI_SETDST_NOBLK                                        ! LSU   Group
793         sub             %o2, 2, %o2                             ! IEU0
794         EX(STH          %g2, [%o0 - 2] ASINORMAL,
795                         add %o2, 2)                             ! Store Group + bubble
796 #ifdef REGS_64BIT
797 5:      andcc           %o1, 4, %g0                             ! IEU1
798 3:      be,a,pn         %xcc, 2f                                ! CTI
799          andcc          %o2, -128, %g7                          ! IEU1  Group
800         ASI_SETSRC_NOBLK                                        ! LSU   Group
801         EXO2(LDUW       [%o1] ASINORMAL, %g5)                   ! Load  Group
802         add             %o1, 4, %o1                             ! IEU0
803         add             %o0, 4, %o0                             ! IEU1
804         ASI_SETDST_NOBLK                                        ! LSU   Group
805         sub             %o2, 4, %o2                             ! IEU0  Group
806         EX(STW          %g5, [%o0 - 4] ASINORMAL,
807                         add %o2, 4)                             ! Store
808         andcc           %o2, -128, %g7                          ! IEU1  Group
809 2:      be,pn           %xcc, 3f                                ! CTI
810          andcc          %o0, 4, %g0                             ! IEU1  Group
811         be,pn           %xcc, 82f + 4                           ! CTI   Group
812 #else /* !REGS_64BIT */
813 5:      andcc           %o2, -128, %g7                          ! IEU1
814         be,a,pn         %xcc, 41f                               ! CTI
815          andcc          %o2, 0x70, %g7                          ! IEU1  Group
816 #endif /* !REGS_64BIT */
817 5:      MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
818         MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5)
819         MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
820         MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5)
821         EXT(5b,35f,VIScopyfixup1)
822 35:     subcc           %g7, 128, %g7                           ! IEU1  Group
823         add             %o1, 128, %o1                           ! IEU0
824         bne,pt          %xcc, 5b                                ! CTI
825          add            %o0, 128, %o0                           ! IEU0  Group
826 3:      andcc           %o2, 0x70, %g7                          ! IEU1  Group
827 41:     be,pn           %xcc, 80f                               ! CTI
828          andcc          %o2, 8, %g0                             ! IEU1  Group
829 #ifdef __KERNEL__
830 79:     sethi           %hi(80f), %o5                           ! IEU0
831         sll             %g7, 1, %g5                             ! IEU0  Group
832         add             %o1, %g7, %o1                           ! IEU1
833         srl             %g7, 1, %g2                             ! IEU0  Group
834         sub             %o5, %g5, %o5                           ! IEU1
835         sub             %o5, %g2, %o5                           ! IEU0  Group
836         jmpl            %o5 + %lo(80f), %g0                     ! CTI   Group brk forced
837          add            %o0, %g7, %o0                           ! IEU0  Group
838 #else
839                                                                 ! Clk1 8-(
840                                                                 ! Clk2 8-(
841                                                                 ! Clk3 8-(
842                                                                 ! Clk4 8-(
843 79:     rd              %pc, %o5                                ! PDU   Group
844         sll             %g7, 1, %g5                             ! IEU0  Group
845         add             %o1, %g7, %o1                           ! IEU1
846         sub             %o5, %g5, %o5                           ! IEU0  Group
847         jmpl            %o5 + %lo(80f - 79b), %g0               ! CTI   Group brk forced
848          add            %o0, %g7, %o0                           ! IEU0  Group
849 #endif
850 36:     MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5)
851         MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5)
852         MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5)
853         MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5)
854         MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5)
855         MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5)
856         MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5)
857         EXT(36b,80f,VIScopyfixup2)
858 80:     be,pt           %xcc, 81f                               ! CTI
859          andcc          %o2, 4, %g0                             ! IEU1
860 #ifdef REGS_64BIT
861         ASI_SETSRC_NOBLK                                        ! LSU   Group
862         EX(LDX          [%o1] ASINORMAL, %g2,
863                         and %o2, 0xf)                           ! Load  Group
864         add             %o0, 8, %o0                             ! IEU0
865         ASI_SETDST_NOBLK                                        ! LSU   Group
866         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
867                         and %o2, 0xf)                           ! Store Group
868         add             %o1, 8, %o1                             ! IEU1
869         srlx            %g2, 32, %g2                            ! IEU0  Group
870         EX2(STW         %g2, [%o0 - 0x8] ASINORMAL,
871                         and %o2, 0xf, %o2,
872                         sub %o2, 4)                             ! Store
873 #else /* !REGS_64BIT */
874         lduw            [%o1], %g2                              ! Load  Group
875         add             %o0, 8, %o0                             ! IEU0
876         lduw            [%o1 + 0x4], %g3                        ! Load  Group
877         add             %o1, 8, %o1                             ! IEU0
878         stw             %g2, [%o0 - 0x8]                        ! Store Group
879         stw             %g3, [%o0 - 0x4]                        ! Store Group
880 #endif /* !REGS_64BIT */
881 81:     be,pt           %xcc, 1f                                ! CTI
882          andcc          %o2, 2, %g0                             ! IEU1  Group
883         ASI_SETSRC_NOBLK                                        ! LSU   Group
884         EX(LDUW         [%o1] ASINORMAL, %g2,
885                         and %o2, 0x7)                           ! Load  Group
886         add             %o1, 4, %o1                             ! IEU0
887         ASI_SETDST_NOBLK                                        ! LSU   Group
888         EX(STW          %g2, [%o0] ASINORMAL,
889                         and %o2, 0x7)                           ! Store Group
890         add             %o0, 4, %o0                             ! IEU0
891 1:      be,pt           %xcc, 1f                                ! CTI
892          andcc          %o2, 1, %g0                             ! IEU1  Group
893         ASI_SETSRC_NOBLK                                        ! LSU   Group
894         EX(LDUH         [%o1] ASINORMAL, %g2,
895                         and %o2, 0x3)                           ! Load  Group
896         add             %o1, 2, %o1                             ! IEU0
897         ASI_SETDST_NOBLK                                        ! LSU   Group
898         EX(STH          %g2, [%o0] ASINORMAL,
899                         and %o2, 0x3)                           ! Store Group
900         add             %o0, 2, %o0                             ! IEU0
901 1:      be,pt           %xcc, normal_retl                       ! CTI
902          nop                                                    ! IEU1
903         ASI_SETSRC_NOBLK                                        ! LSU   Group
904         EX(LDUB         [%o1] ASINORMAL, %g2,
905                         add %g0, 1)                             ! Load  Group
906         ASI_SETDST_NOBLK                                        ! LSU   Group
907         EX(STB          %g2, [%o0] ASINORMAL,
908                         add %g0, 1)                             ! Store Group + bubble
909 normal_retl:
910         NORMAL_RETL
911
912 #ifdef REGS_64BIT
913 82:     MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5)
914         MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5)
915         EXT(82b,37f,VIScopyfixup3)
916 37:     subcc           %g7, 128, %g7                           ! IEU1  Group
917         add             %o1, 128, %o1                           ! IEU0
918         bne,pt          %xcc, 82b                               ! CTI
919          add            %o0, 128, %o0                           ! IEU0  Group
920         andcc           %o2, 0x70, %g7                          ! IEU1
921         be,pn           %xcc, 84f                               ! CTI
922          andcc          %o2, 8, %g0                             ! IEU1  Group
923 #ifdef __KERNEL__
924 83:     srl             %g7, 1, %g5                             ! IEU0
925         sethi           %hi(84f), %o5                           ! IEU0  Group
926         add             %g7, %g5, %g5                           ! IEU1
927         add             %o1, %g7, %o1                           ! IEU0  Group
928         sub             %o5, %g5, %o5                           ! IEU1
929         jmpl            %o5 + %lo(84f), %g0                     ! CTI   Group brk forced
930          add            %o0, %g7, %o0                           ! IEU0  Group
931 #else
932                                                                 ! Clk1 8-(
933                                                                 ! Clk2 8-(
934                                                                 ! Clk3 8-(
935                                                                 ! Clk4 8-(
936 83:     rd              %pc, %o5                                ! PDU   Group
937         add             %o1, %g7, %o1                           ! IEU0  Group
938         sub             %o5, %g7, %o5                           ! IEU1
939         jmpl            %o5 + %lo(84f - 83b), %g0               ! CTI   Group brk forced
940          add            %o0, %g7, %o0                           ! IEU0  Group
941 #endif
942 38:     MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3)
943         MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3)
944         MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3)
945         MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3)
946         MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3)
947         MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3)
948         MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3)
949         EXT(38b,84f,VIScopyfixup4)
950 84:     be,pt           %xcc, 85f                               ! CTI   Group
951          andcc          %o2, 4, %g0                             ! IEU1
952         ASI_SETSRC_NOBLK                                        ! LSU   Group
953         EX(LDX          [%o1] ASINORMAL, %g2,
954                         and %o2, 0xf)                           ! Load  Group
955         add             %o0, 8, %o0                             ! IEU0
956         ASI_SETDST_NOBLK                                        ! LSU   Group
957         add             %o1, 8, %o1                             ! IEU0  Group
958         EX(STX          %g2, [%o0 - 0x8] ASINORMAL,
959                         and %o2, 0xf)                           ! Store
960 85:     be,pt           %xcc, 1f                                ! CTI
961          andcc          %o2, 2, %g0                             ! IEU1  Group
962         ASI_SETSRC_NOBLK                                        ! LSU   Group
963         EX(LDUW         [%o1] ASINORMAL, %g2,
964                         and %o2, 0x7)                           ! Load  Group
965         add             %o0, 4, %o0                             ! IEU0
966         ASI_SETDST_NOBLK                                        ! LSU   Group
967         add             %o1, 4, %o1                             ! IEU0  Group
968         EX(STW          %g2, [%o0 - 0x4] ASINORMAL,
969                         and %o2, 0x7)                           ! Store
970 1:      be,pt           %xcc, 1f                                ! CTI
971          andcc          %o2, 1, %g0                             ! IEU1  Group
972         ASI_SETSRC_NOBLK                                        ! LSU   Group
973         EX(LDUH         [%o1] ASINORMAL, %g2,
974                         and %o2, 0x3)                           ! Load  Group
975         add             %o0, 2, %o0                             ! IEU0
976         ASI_SETDST_NOBLK                                        ! LSU   Group
977         add             %o1, 2, %o1                             ! IEU0  Group
978         EX(STH          %g2, [%o0 - 0x2] ASINORMAL,
979                         and %o2, 0x3)                           ! Store
980 1:      be,pt           %xcc, 1f                                ! CTI
981          nop                                                    ! IEU0  Group
982         ASI_SETSRC_NOBLK                                        ! LSU   Group
983         EX(LDUB         [%o1] ASINORMAL, %g2,
984                         add %g0, 1)                             ! Load  Group
985         ASI_SETDST_NOBLK                                        ! LSU   Group
986         EX(STB          %g2, [%o0] ASINORMAL,
987                         add %g0, 1)                             ! Store Group + bubble
988 1:      NORMAL_RETL
989 #endif  /* REGS_64BIT */
990
991 memcpy_noVIS_misaligned:
992         brz,pt                  %g2, 2f                         ! CTI   Group
993          mov                    8, %g1                          ! IEU0
994         sub                     %g1, %g2, %g2                   ! IEU0  Group
995         sub                     %o2, %g2, %o2                   ! IEU0  Group
996 1:      ASI_SETSRC_NOBLK                                        ! LSU   Group
997         EX(LDUB                 [%o1] ASINORMAL, %g5,
998                                 add %o2, %g2)                   ! Load  Group
999         add                     %o1, 1, %o1                     ! IEU0
1000         add                     %o0, 1, %o0                     ! IEU1
1001         ASI_SETDST_NOBLK                                        ! LSU   Group
1002         subcc                   %g2, 1, %g2                     ! IEU1  Group
1003         bne,pt                  %xcc, 1b                        ! CTI
1004          EX2(STB                %g5, [%o0 - 1] ASINORMAL,
1005                                 add %o2, %g2, %o2,
1006                                 add %o2, 1)                     ! Store
1007 2:
1008 #ifdef __KERNEL__
1009         VISEntry
1010 #endif
1011         andn                    %o2, 7, %g5                     ! IEU0  Group
1012         and                     %o2, 7, %o2                     ! IEU1
1013         fmovd                   %f0, %f2                        ! FPU
1014         ASI_SETSRC_NOBLK                                        ! LSU   Group
1015         alignaddr               %o1, %g0, %g1                   ! GRU   Group
1016         EXO2(LDDF               [%g1] ASINORMAL, %f4)           ! Load  Group
1017 1:      EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f6,
1018                                 add %o2, %g5)                   ! Load  Group
1019         add                     %g1, 0x8, %g1                   ! IEU0  Group
1020         subcc                   %g5, 8, %g5                     ! IEU1
1021         ASI_SETDST_NOBLK                                        ! LSU   Group
1022         faligndata              %f4, %f6, %f0                   ! GRU   Group
1023         EX2(STDF                %f0, [%o0] ASINORMAL,
1024                                 add %o2, %g5, %o2,
1025                                 add %o2, 8)                     ! Store
1026         add                     %o1, 8, %o1                     ! IEU0  Group
1027         be,pn                   %xcc, end_cruft                 ! CTI
1028          add                    %o0, 8, %o0                     ! IEU1
1029         ASI_SETSRC_NOBLK                                        ! LSU   Group
1030         EX(LDDF                 [%g1 + 0x8] ASINORMAL, %f4,
1031                                 add %o2, %g5)                   ! Load  Group
1032         add                     %g1, 8, %g1                     ! IEU0
1033         subcc                   %g5, 8, %g5                     ! IEU1
1034         ASI_SETDST_NOBLK                                        ! LSU   Group
1035         faligndata              %f6, %f4, %f0                   ! GRU   Group
1036         EX2(STDF                %f0, [%o0] ASINORMAL,
1037                                 add %o2, %g5, %o2,
1038                                 add %o2, 8)                     ! Store
1039         add                     %o1, 8, %o1                     ! IEU0
1040         ASI_SETSRC_NOBLK                                        ! LSU   Group
1041         bne,pn                  %xcc, 1b                        ! CTI   Group
1042          add                    %o0, 8, %o0                     ! IEU0
1043 end_cruft:
1044         brz,pn                  %o2, fpu_retl                   ! CTI   Group
1045 #ifndef __KERNEL__
1046          nop                                                    ! IEU0
1047 #else
1048          ASI_SETSRC_NOBLK                                       ! LSU   Group
1049 #endif
1050         EXO2(LDUB       [%o1] ASINORMAL, %g5)                   ! LOAD
1051         add             %o1, 1, %o1                             ! IEU0
1052         add             %o0, 1, %o0                             ! IEU1
1053         ASI_SETDST_NOBLK                                        ! LSU   Group
1054         subcc           %o2, 1, %o2                             ! IEU1
1055         bne,pt          %xcc, vis_slp                           ! CTI
1056          EX(STB         %g5, [%o0 - 1] ASINORMAL,
1057                         add %o2, 1)                             ! Store Group
1058 fpu_retl:
1059         FPU_RETL
1060
1061 #ifdef __KERNEL__
1062         .globl          __memcpy_end
1063 __memcpy_end:
1064
1065                 .section        .fixup
1066                 .align          4
1067 VIScopyfixup_reto2:
1068                 mov             %o2, %o1
1069 VIScopyfixup_ret:
1070                 /* If this is copy_from_user(), zero out the rest of the
1071                  * kernel buffer.
1072                  */
1073                 ldub            [%g6 + TI_CURRENT_DS], %o4
1074                 andcc           asi_src, 0x1, %g0
1075                 be,pt           %icc, 1f
1076                  VISExit
1077                 andcc           asi_dest, 0x1, %g0
1078                 bne,pn          %icc, 1f
1079                  nop
1080                 save            %sp, -160, %sp
1081                 mov             %i0, %o0
1082                 call            __bzero
1083                  mov            %i1, %o1
1084                 restore
1085 1:              mov             %o1, %o0
1086                 retl
1087                  wr             %o4, %g0, %asi
1088 VIScopyfixup1:  subcc           %g2, 18, %g2
1089                 add             %o0, 32, %o0
1090                 bgeu,a,pt       %icc, VIScopyfixup1
1091                  sub            %g7, 32, %g7
1092                 sub             %o0, 32, %o0
1093                 rd              %pc, %g5
1094                 add             %g2, (18 + 16), %g2
1095                 ldub            [%g5 + %g2], %g2
1096                 ba,a,pt         %xcc, 2f
1097 .byte           0, 0, 0, 0, 0, 0, 0, 4, 4, 8, 12, 12, 16, 20, 20, 24, 28, 28
1098                 .align          4
1099 VIScopyfixup2:  mov             (7 * 16), %g7
1100 1:              subcc           %g2, 10, %g2
1101                 bgeu,a,pt       %icc, 1b
1102                  sub            %g7, 16, %g7
1103                 sub             %o0, %g7, %o0
1104                 rd              %pc, %g5
1105                 add             %g2, (10 + 16), %g2
1106                 ldub            [%g5 + %g2], %g2
1107                 ba,a,pt         %xcc, 4f
1108 .byte           0, 0, 0, 0, 0, 4, 4, 8, 12, 12
1109                 .align          4
1110 VIScopyfixup3:  subcc           %g2, 10, %g2
1111                 add             %o0, 32, %o0
1112                 bgeu,a,pt       %icc, VIScopyfixup3
1113                  sub            %g7, 32, %g7
1114                 sub             %o0, 32, %o0
1115                 rd              %pc, %g5
1116                 add             %g2, (10 + 16), %g2
1117                 ldub            [%g5 + %g2], %g2
1118                 ba,a,pt         %xcc, 2f
1119 .byte           0, 0, 0, 0, 0, 0, 0, 8, 16, 24
1120                 .align          4
1121 2:              and             %o2, 0x7f, %o2
1122                 sub             %g7, %g2, %g7
1123                 ba,pt           %xcc, VIScopyfixup_ret
1124                  add            %g7, %o2, %o1
1125 VIScopyfixup4:  mov             (7 * 16), %g7
1126 3:              subcc           %g2, 6, %g2
1127                 bgeu,a,pt       %icc, 3b
1128                  sub            %g7, 16, %g7
1129                 sub             %o0, %g7, %o0
1130                 rd              %pc, %g5
1131                 add             %g2, (6 + 16), %g2
1132                 ldub            [%g5 + %g2], %g2
1133                 ba,a,pt         %xcc, 4f
1134 .byte           0, 0, 0, 0, 0, 8
1135                 .align          4
1136 4:              and             %o2, 0xf, %o2
1137                 sub             %g7, %g2, %g7
1138                 ba,pt           %xcc, VIScopyfixup_ret
1139                  add            %g7, %o2, %o1
1140 VIScopyfixup_vis2:
1141                 sub             %o2, 0x40, %o2
1142 VIScopyfixup_vis0:
1143                 add             %o2, 0x80, %o2
1144 VIScopyfixup_vis1:
1145                 add             %g7, %g3, %g7
1146                 ba,pt           %xcc, VIScopyfixup_ret
1147                  add            %o2, %g7, %o1
1148 VIScopyfixup_vis4:
1149                 add             %g3, 8, %g3
1150 VIScopyfixup_vis3:
1151                 add             %g3, 8, %g3
1152                 ba,pt           %xcc, VIScopyfixup_ret
1153                  add            %o2, %g3, %o1
1154 #endif
1155
1156 #ifdef __KERNEL__
1157                 .text
1158                 .align          32
1159
1160                 .globl          __memmove
1161                 .type           __memmove,@function
1162
1163                 .globl          memmove
1164                 .type           memmove,@function
1165
1166 memmove:
1167 __memmove:      cmp             %o0, %o1
1168                 blu,pt          %xcc, memcpy_private
1169                  sub            %o0, %o1, %g5
1170                 add             %o1, %o2, %g3
1171                 cmp             %g3, %o0
1172                 bleu,pt         %xcc, memcpy_private
1173                  add            %o1, %o2, %g5
1174                 add             %o0, %o2, %o5
1175
1176                 sub             %g5, 1, %o1
1177                 sub             %o5, 1, %o0
1178 1:              ldub            [%o1], %g5
1179                 subcc           %o2, 1, %o2
1180                 sub             %o1, 1, %o1
1181                 stb             %g5, [%o0]
1182                 bne,pt          %icc, 1b
1183                  sub            %o0, 1, %o0
1184
1185                 retl
1186                  clr            %o0
1187 #endif