ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-2.6.6.tar.bz2
[linux-2.6.git] / arch / sparc64 / lib / VIScsum.S
1 /* $Id: VIScsum.S,v 1.7 2002/02/09 19:49:30 davem Exp $
2  * VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc
3  *            Visual Instruction Set.
4  *
5  * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6  * Copyright (C) 2000 David S. Miller (davem@redhat.com)
7  *
8  * Based on older sparc32/sparc64 checksum.S, which is:
9  *
10  *      Copyright(C) 1995 Linus Torvalds
11  *      Copyright(C) 1995 Miguel de Icaza
12  *      Copyright(C) 1996, 1997 David S. Miller
13  *    derived from:
14  *        Linux/Alpha checksum c-code
15  *        Linux/ix86 inline checksum assembly
16  *        RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
17  *        David Mosberger-Tang for optimized reference c-code
18  *        BSD4.4 portable checksum routine
19  */
20
21 #ifdef __sparc_v9__
22 #define STACKOFF        2175
23 #else
24 #define STACKOFF        64
25 #endif
26
27 #ifdef __KERNEL__
28 #include <asm/head.h>
29 #include <asm/asi.h>
30 #include <asm/visasm.h>
31 #include <asm/thread_info.h>
32 #else
33 #define ASI_BLK_P       0xf0
34 #define FRPS_FEF        0x04
35 #endif
36
37 /* Dobrou noc, SunSoft engineers. Spete sladce.
38  * This has a couple of tricks in and those
39  * tricks are UltraLinux trade secrets :))
40  */
41
42 #define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10)                                          \
43         fcmpgt32        %fz, %f0, %g1           /*  FPM         Group   */;             \
44         fcmpgt32        %fz, %f2, %g2           /*  FPM         Group   */;             \
45         fcmpgt32        %fz, %f4, %g3           /*  FPM         Group   */;             \
46         inc             %g1                     /*  IEU0        Group   */;             \
47         fcmpgt32        %fz, %f6, %g5           /*  FPM                 */;             \
48         srl             %g1, 1, %g1             /*  IEU0        Group   */;             \
49         fcmpgt32        %fz, %f8, %g7           /*  FPM                 */;             \
50         inc             %g2                     /*  IEU0        Group   */;             \
51         fcmpgt32        %fz, %f10, %o3          /*  FPM                 */;             \
52         srl             %g2, 1, %g2             /*  IEU0        Group   */;             \
53         inc             %g3                     /*  IEU1                */;             \
54         srl             %g3, 1, %g3             /*  IEU0        Group   */;             \
55         add             %o2, %g1, %o2           /*  IEU1                */;             \
56         add             %o2, %g2, %o2           /*  IEU0        Group   */;             \
57         inc             %g5                     /*  IEU1                */;             \
58         add             %o2, %g3, %o2           /*  IEU0        Group   */;
59
60 #define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14)     \
61         srl             %g5, 1, %g5             /*  IEU0        Group   */;             \
62         fpadd32         %F0, %f0, %F0           /*  FPA                 */;             \
63         fcmpgt32        %O12, %f12, %o4         /*  FPM                 */;             \
64         inc             %g7                     /*  IEU0        Group   */;             \
65         fpadd32         %F2, %f2, %F2           /*  FPA                 */;             \
66         fcmpgt32        %O14, %f14, %o5         /*  FPM                 */;             \
67         add             %o2, %g5, %o2           /*  IEU1        Group   */;             \
68         fpadd32         %F4, %f4, %F4           /*  FPA                 */;             \
69         fcmpgt32        %f0, %F0, %g1           /*  FPM                 */;             \
70         srl             %g7, 1, %g7             /*  IEU0        Group   */;             \
71         fpadd32         %F6, %f6, %F6           /*  FPA                 */;             \
72         fcmpgt32        %f2, %F2, %g2           /*  FPM                 */;             \
73         add             %o2, %g7, %o2           /*  IEU0        Group   */;             \
74         fpadd32         %F8, %f8, %F8           /*  FPA                 */;             \
75         fcmpgt32        %f4, %F4, %g3           /*  FPM                 */;             \
76         inc             %o3                     /*  IEU0        Group   */;             \
77         fpadd32         %F10, %f10, %F10        /*  FPA                 */;             \
78         fcmpgt32        %f6, %F6, %g5           /*  FPM                 */;             \
79         srl             %o3, 1, %o3             /*  IEU0        Group   */;             \
80         fpadd32         %F12, %f12, %F12        /*  FPA                 */;             \
81         fcmpgt32        %f8, %F8, %g7           /*  FPM                 */;             \
82         add             %o2, %o3, %o2           /*  IEU0        Group   */;             \
83         fpadd32         %F14, %f14, %F14        /*  FPA                 */;             \
84         fcmpgt32        %f10, %F10, %o3         /*  FPM                 */;             \
85         inc             %o4                     /*  IEU0        Group   */;             \
86         inc             %o5                     /*  IEU1                */;             \
87         srl             %o4, 1, %o4             /*  IEU0        Group   */;             \
88         inc             %g1                     /*  IEU1                */;             \
89         srl             %o5, 1, %o5             /*  IEU0        Group   */;             \
90         add             %o2, %o4, %o2           /*  IEU1                */;             \
91         srl             %g1, 1, %g1             /*  IEU0        Group   */;             \
92         add             %o2, %o5, %o2           /*  IEU1                */;             \
93         inc             %g2                     /*  IEU0        Group   */;             \
94         add             %o2, %g1, %o2           /*  IEU1                */;             \
95         srl             %g2, 1, %g2             /*  IEU0        Group   */;             \
96         inc             %g3                     /*  IEU1                */;             \
97         srl             %g3, 1, %g3             /*  IEU0        Group   */;             \
98         add             %o2, %g2, %o2           /*  IEU1                */;             \
99         inc             %g5                     /*  IEU0        Group   */;             \
100         add             %o2, %g3, %o2           /*  IEU0                */;
101
102 #define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz)       \
103         srl             %g5, 1, %g5             /*  IEU0        Group   */;             \
104         fpadd32         %f2, %f0, %S0           /*  FPA                 */;             \
105         fcmpgt32        %O12, %f12, %o4         /*  FPM                 */;             \
106         inc             %g7                     /*  IEU0        Group   */;             \
107         fpadd32         %f6, %f4, %S1           /*  FPA                 */;             \
108         fcmpgt32        %O14, %f14, %o5         /*  FPM                 */;             \
109         srl             %g7, 1, %g7             /*  IEU0        Group   */;             \
110         fpadd32         %f10, %f8, %S2          /*  FPA                 */;             \
111         fcmpgt32        %f0, %S0, %g1           /*  FPM                 */;             \
112         inc             %o3                     /*  IEU0        Group   */;             \
113         fpadd32         %f14, %f12, %S3         /*  FPA                 */;             \
114         fcmpgt32        %f4, %S1, %g2           /*  FPM                 */;             \
115         add             %o2, %g5, %o2           /*  IEU0        Group   */;             \
116         fpadd32         %S0, %S1, %T0           /*  FPA                 */;             \
117         fcmpgt32        %f8, %S2, %g3           /*  FPM                 */;             \
118         add             %o2, %g7, %o2           /*  IEU0        Group   */;             \
119         fzero           %fz                     /*  FPA                 */;             \
120         fcmpgt32        %f12, %S3, %g5          /*  FPM                 */;             \
121         srl             %o3, 1, %o3             /*  IEU0        Group   */;             \
122         fpadd32         %S2, %S3, %T1           /*  FPA                 */;             \
123         fcmpgt32        %S0, %T0, %g7           /*  FPM                 */;             \
124         add             %o2, %o3, %o2           /*  IEU0        Group   */;             \
125         fpadd32         %T0, %T1, %U0           /*  FPA                 */;             \
126         fcmpgt32        %S2, %T1, %o3           /*  FPM                 */;             \
127         inc             %o4                     /*  IEU0        Group   */;             \
128         inc             %o5                     /*  IEU1                */;             \
129         srl             %o4, 1, %o4             /*  IEU0        Group   */;             \
130         inc             %g1                     /*  IEU1                */;             \
131         add             %o2, %o4, %o2           /*  IEU0        Group   */;             \
132         fcmpgt32        %fz, %f2, %o4           /*  FPM                 */;             \
133         srl             %o5, 1, %o5             /*  IEU0        Group   */;             \
134         inc             %g2                     /*  IEU1                */;             \
135         add             %o2, %o5, %o2           /*  IEU0        Group   */;             \
136         fcmpgt32        %fz, %f6, %o5           /*  FPM                 */;             \
137         srl             %g1, 1, %g1             /*  IEU0        Group   */;             \
138         inc             %g3                     /*  IEU1                */;             \
139         add             %o2, %g1, %o2           /*  IEU0        Group   */;             \
140         fcmpgt32        %fz, %f10, %g1          /*  FPM                 */;             \
141         srl             %g2, 1, %g2             /*  IEU0        Group   */;             \
142         inc             %g5                     /*  IEU1                */;             \
143         add             %o2, %g2, %o2           /*  IEU0        Group   */;             \
144         fcmpgt32        %fz, %f14, %g2          /*  FPM                 */;             \
145         srl             %g3, 1, %g3             /*  IEU0        Group   */;             \
146         inc             %g7                     /*  IEU1                */;             \
147         add             %o2, %g3, %o2           /*  IEU0        Group   */;             \
148         fcmpgt32        %fz, %S1, %g3           /*  FPM                 */;             \
149         srl             %g5, 1, %g5             /*  IEU0        Group   */;             \
150         inc             %o3                     /*  IEU1                */;             \
151         add             %o2, %g5, %o2           /*  IEU0        Group   */;             \
152         fcmpgt32        %fz, %S3, %g5           /*  FPM                 */;             \
153         srl             %g7, 1, %g7             /*  IEU0        Group   */;             \
154         inc             %o4                     /*  IEU1                */;             \
155         add             %o2, %g7, %o2           /*  IEU0        Group   */;             \
156         fcmpgt32        %fz, %T1, %g7           /*  FPM                 */;             \
157         srl             %o3, 1, %o3             /*  IEU0        Group   */;             \
158         inc             %o5                     /*  IEU1                */;             \
159         add             %o2, %o3, %o2           /*  IEU0        Group   */;             \
160         fcmpgt32        %T0, %U0, %o3           /*  FPM                 */;             \
161         srl             %o4, 1, %o4             /*  IEU0        Group   */;             \
162         inc             %g1                     /*  IEU1                */;             \
163         sub             %o2, %o4, %o2           /*  IEU0        Group   */;             \
164         fcmpgt32        %fz, %U0, %o4           /*  FPM                 */;             \
165         srl             %o5, 1, %o5             /*  IEU0        Group   */;             \
166         inc             %g2                     /*  IEU1                */;             \
167         srl             %g1, 1, %g1             /*  IEU0        Group   */;             \
168         sub             %o2, %o5, %o2           /*  IEU1                */;             \
169         std             %U0, [%sp + STACKOFF]   /*  Store               */;             \
170         srl             %g2, 1, %g2             /*  IEU0        Group   */;             \
171         sub             %o2, %g1, %o2           /*  IEU1                */;             \
172         inc             %g3                     /*  IEU0        Group   */;             \
173         sub             %o2, %g2, %o2           /*  IEU1                */;             \
174         srl             %g3, 1, %g3             /*  IEU0        Group   */;             \
175         inc             %g5                     /*  IEU1                */;             \
176         srl             %g5, 1, %g5             /*  IEU0        Group   */;             \
177         sub             %o2, %g3, %o2           /*  IEU1                */;             \
178         ldx             [%sp + STACKOFF], %o5   /*  Load        Group   */;             \
179         inc             %g7                     /*  IEU0                */;             \
180         sub             %o2, %g5, %o2           /*  IEU1                */;             \
181         srl             %g7, 1, %g7             /*  IEU0        Group   */;             \
182         inc             %o3                     /*  IEU1                */;             \
183         srl             %o3, 1, %o3             /*  IEU0        Group   */;             \
184         sub             %o2, %g7, %o2           /*  IEU1                */;             \
185         inc             %o4                     /*  IEU0        Group   */;             \
186         add             %o2, %o3, %o2           /*  IEU1                */;             \
187         srl             %o4, 1, %o4             /*  IEU0        Group   */;             \
188         sub             %o2, %o4, %o2           /*  IEU0        Group   */;             \
189         addcc           %o2, %o5, %o2           /*  IEU1        Group   */;             \
190         bcs,a,pn        %xcc, 33f               /*  CTI                 */;             \
191          add            %o2, 1, %o2             /*  IEU0                */;             \
192 33:                                             /*  That's it           */;
193
194 #define CSUM_LASTCHUNK(offset)                                                          \
195         ldx             [%o0 - offset - 0x10], %g2;                                     \
196         ldx             [%o0 - offset - 0x08], %g3;                                     \
197         addcc           %g2, %o2, %o2;                                                  \
198         bcs,a,pn        %xcc, 31f;                                                      \
199          add            %o2, 1, %o2;                                                    \
200 31:     addcc           %g3, %o2, %o2;                                                  \
201         bcs,a,pn        %xcc, 32f;                                                      \
202          add            %o2, 1, %o2;                                                    \
203 32:
204
205         .text
206         .globl          csum_partial
207         .align          32
208 csum_partial:
209         andcc           %o0, 7, %g0             /*  IEU1        Group           */
210         be,pt           %icc, 4f                /*  CTI                         */
211          andcc          %o0, 0x38, %g3          /*  IEU1                        */
212         mov             1, %g5                  /*  IEU0        Group           */
213         cmp             %o1, 6                  /*  IEU1                        */
214         bl,pn           %icc, 21f               /*  CTI                         */
215          andcc          %o0, 1, %g0             /*  IEU1        Group           */
216         bne,pn          %icc, csump_really_slow /*  CTI                         */
217          andcc          %o0, 2, %g0             /*  IEU1        Group           */
218         be,pt           %icc, 1f                /*  CTI                         */
219          and            %o0, 4, %g7             /*  IEU0                        */
220         lduh            [%o0], %g2              /*  Load                        */
221         sub             %o1, 2, %o1             /*  IEU0        Group           */
222         add             %o0, 2, %o0             /*  IEU1                        */
223         andcc           %o0, 4, %g7             /*  IEU1        Group           */
224         sll             %g5, 16, %g5            /*  IEU0                        */
225         sll             %g2, 16, %g2            /*  IEU0        Group           */
226         addcc           %g2, %o2, %o2           /*  IEU1        Group (regdep)  */
227         bcs,a,pn        %icc, 1f                /*  CTI                         */
228          add            %o2, %g5, %o2           /*  IEU0                        */
229 1:      ld              [%o0], %g2              /*  Load                        */
230         brz,a,pn        %g7, 4f                 /*  CTI+IEU1    Group           */
231          and            %o0, 0x38, %g3          /*  IEU0                        */
232         add             %o0, 4, %o0             /*  IEU0        Group           */
233         sub             %o1, 4, %o1             /*  IEU1                        */
234         addcc           %g2, %o2, %o2           /*  IEU1        Group           */
235         bcs,a,pn        %icc, 1f                /*  CTI                         */
236          add            %o2, 1, %o2             /*  IEU0                        */
237 1:      and             %o0, 0x38, %g3          /*  IEU1        Group           */
238 4:      srl             %o2, 0, %o2             /*  IEU0        Group           */
239         mov             0x40, %g1               /*  IEU1                        */
240         brz,pn          %g3, 3f                 /*  CTI+IEU1    Group           */
241          sub            %g1, %g3, %g1           /*  IEU0                        */
242         cmp             %o1, 56                 /*  IEU1        Group           */
243         blu,pn          %icc, 20f               /*  CTI                         */
244          andcc          %o0, 8, %g0             /*  IEU1        Group           */
245         be,pn           %icc, 1f                /*  CTI                         */
246          ldx            [%o0], %g2              /*  Load                        */
247         add             %o0, 8, %o0             /*  IEU0        Group           */
248         sub             %o1, 8, %o1             /*  IEU1                        */
249         addcc           %g2, %o2, %o2           /*  IEU1        Group           */
250         bcs,a,pn        %xcc, 1f                /*  CTI                         */
251          add            %o2, 1, %o2             /*  IEU0                        */
252 1:      andcc           %g1, 0x10, %g0          /*  IEU1        Group           */
253         be,pn           %icc, 2f                /*  CTI                         */
254          and            %g1, 0x20, %g1          /*  IEU0                        */
255         ldx             [%o0], %g2              /*  Load                        */
256         ldx             [%o0+8], %g3            /*  Load        Group           */
257         add             %o0, 16, %o0            /*  IEU0                        */
258         sub             %o1, 16, %o1            /*  IEU1                        */
259         addcc           %g2, %o2, %o2           /*  IEU1        Group           */
260         bcs,a,pn        %xcc, 1f                /*  CTI                         */
261          add            %o2, 1, %o2             /*  IEU0                        */
262 1:      addcc           %g3, %o2, %o2           /*  IEU1        Group           */
263         bcs,a,pn        %xcc, 2f                /*  CTI                         */
264          add            %o2, 1, %o2             /*  IEU0                        */
265 2:      brz,pn          %g1, 3f                 /*  CTI+IEU1    Group           */
266          ldx            [%o0], %g2              /*  Load                        */
267         ldx             [%o0+8], %g3            /*  Load        Group           */
268         ldx             [%o0+16], %g5           /*  Load        Group           */
269         ldx             [%o0+24], %g7           /*  Load        Group           */
270         add             %o0, 32, %o0            /*  IEU0                        */
271         sub             %o1, 32, %o1            /*  IEU1                        */
272         addcc           %g2, %o2, %o2           /*  IEU1        Group           */
273         bcs,a,pn        %xcc, 1f                /*  CTI                         */
274          add            %o2, 1, %o2             /*  IEU0                        */
275 1:      addcc           %g3, %o2, %o2           /*  IEU1        Group           */
276         bcs,a,pn        %xcc, 1f                /*  CTI                         */
277          add            %o2, 1, %o2             /*  IEU0                        */
278 1:      addcc           %g5, %o2, %o2           /*  IEU1        Group           */
279         bcs,a,pn        %xcc, 1f                /*  CTI                         */
280          add            %o2, 1, %o2             /*  IEU0                        */
281 1:      addcc           %g7, %o2, %o2           /*  IEU1        Group           */
282         bcs,a,pn        %xcc, 3f                /*  CTI                         */
283          add            %o2, 1, %o2             /*  IEU0                        */
284 3:      cmp             %o1, 0xc0               /*  IEU1        Group           */
285         blu,pn          %icc, 20f               /*  CTI                         */
286          sllx           %o2, 32, %g5            /*  IEU0                        */
287 #ifdef __KERNEL__
288         VISEntry
289 #endif
290         addcc           %o2, %g5, %o2           /*  IEU1        Group           */
291         sub             %o1, 0xc0, %o1          /*  IEU0                        */
292         wr              %g0, ASI_BLK_P, %asi    /*  LSU         Group           */
293         membar          #StoreLoad              /*  LSU         Group           */
294         srlx            %o2, 32, %o2            /*  IEU0        Group           */
295         bcs,a,pn        %xcc, 1f                /*  CTI                         */
296          add            %o2, 1, %o2             /*  IEU1                        */
297 1:      andcc           %o1, 0x80, %g0          /*  IEU1        Group           */
298         bne,pn          %icc, 7f                /*  CTI                         */
299          andcc          %o1, 0x40, %g0          /*  IEU1        Group           */
300         be,pn           %icc, 6f                /*  CTI                         */
301          fzero          %f12                    /*  FPA                         */
302         fzero           %f14                    /*  FPA         Group           */
303         ldda            [%o0 + 0x000] %asi, %f16
304         ldda            [%o0 + 0x040] %asi, %f32
305         ldda            [%o0 + 0x080] %asi, %f48
306         START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26)
307         ba,a,pt         %xcc, 3f
308 6:      sub             %o0, 0x40, %o0          /*  IEU0        Group           */
309         fzero           %f28                    /*  FPA                         */
310         fzero           %f30                    /*  FPA         Group           */
311         ldda            [%o0 + 0x040] %asi, %f32
312         ldda            [%o0 + 0x080] %asi, %f48
313         ldda            [%o0 + 0x0c0] %asi, %f0
314         START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42)
315         ba,a,pt         %xcc, 4f
316 7:      bne,pt          %icc, 8f                /*  CTI                         */
317          fzero          %f44                    /*  FPA                         */
318         add             %o0, 0x40, %o0          /*  IEU0        Group           */
319         fzero           %f60                    /*  FPA                         */
320         fzero           %f62                    /*  FPA         Group           */
321         ldda            [%o0 - 0x040] %asi, %f0
322         ldda            [%o0 + 0x000] %asi, %f16
323         ldda            [%o0 + 0x040] %asi, %f32
324         START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10)
325         ba,a,pt         %xcc, 2f
326 8:      add             %o0, 0x80, %o0          /*  IEU0        Group           */
327         fzero           %f46                    /*  FPA                         */
328         ldda            [%o0 - 0x080] %asi, %f48
329         ldda            [%o0 - 0x040] %asi, %f0
330         ldda            [%o0 + 0x000] %asi, %f16
331         START_THE_TRICK(f44,f48,f50,f52,f54,f56,f58)
332 1:      DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14)
333         ldda            [%o0 + 0x040] %asi, %f32
334 2:      DO_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
335         ldda            [%o0 + 0x080] %asi, %f48
336 3:      DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46)
337         ldda            [%o0 + 0x0c0] %asi, %f0
338 4:      DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62)
339         add             %o0, 0x100, %o0         /*  IEU0        Group           */
340         subcc           %o1, 0x100, %o1         /*  IEU1                        */
341         bgeu,a,pt       %icc, 1b                /*  CTI                         */
342          ldda           [%o0 + 0x000] %asi, %f16
343         membar          #Sync                   /*  LSU         Group           */
344         DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14)
345         END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
346 #ifdef __KERNEL__
347         ldub            [%g6 + TI_CURRENT_DS], %g7
348 #endif
349         and             %o1, 0x3f, %o1          /*  IEU0        Group           */
350 #ifdef __KERNEL__
351         VISExit
352         wr              %g7, %g0, %asi
353 #endif
354 20:     andcc           %o1, 0xf0, %g1          /*  IEU1        Group           */
355         be,pn           %icc, 23f               /*  CTI                         */
356          and            %o1, 0xf, %o3           /*  IEU0                        */
357 #ifdef __KERNEL__
358 22:     sll             %g1, 1, %o4             /*  IEU0        Group           */
359         sethi           %hi(23f), %g7           /*  IEU1                        */
360         sub             %g7, %o4, %g7           /*  IEU0        Group           */
361         jmpl            %g7 + %lo(23f), %g0     /*  CTI         Group brk forced*/
362          add            %o0, %g1, %o0           /*  IEU0                        */
363 #else
364 22:     rd              %pc, %g7                /*  LSU         Group+4bubbles  */
365         sll             %g1, 1, %o4             /*  IEU0        Group           */
366         sub             %g7, %o4, %g7           /*  IEU0        Group (regdep)  */
367         jmpl            %g7 + (23f - 22b), %g0  /*  CTI         Group brk forced*/
368          add            %o0, %g1, %o0           /*  IEU0                        */
369 #endif
370         CSUM_LASTCHUNK(0xe0)
371         CSUM_LASTCHUNK(0xd0)
372         CSUM_LASTCHUNK(0xc0)
373         CSUM_LASTCHUNK(0xb0)
374         CSUM_LASTCHUNK(0xa0)
375         CSUM_LASTCHUNK(0x90)
376         CSUM_LASTCHUNK(0x80)
377         CSUM_LASTCHUNK(0x70)
378         CSUM_LASTCHUNK(0x60)
379         CSUM_LASTCHUNK(0x50)
380         CSUM_LASTCHUNK(0x40)
381         CSUM_LASTCHUNK(0x30)
382         CSUM_LASTCHUNK(0x20)
383         CSUM_LASTCHUNK(0x10)
384         CSUM_LASTCHUNK(0x00)
385 23:     brnz,pn         %o3, 26f                /*  CTI+IEU1    Group           */
386 24:      sllx           %o2, 32, %g1            /*  IEU0                        */
387 25:     addcc           %o2, %g1, %o0           /*  IEU1        Group           */
388         srlx            %o0, 32, %o0            /*  IEU0        Group (regdep)  */
389         bcs,a,pn        %xcc, 1f                /*  CTI                         */
390          add            %o0, 1, %o0             /*  IEU1                        */
391 1:      retl                                    /*  CTI         Group brk forced*/
392          srl            %o0, 0, %o0             /*  IEU0                        */
393 26:     andcc           %o1, 8, %g0             /*  IEU1        Group           */
394         be,pn           %icc, 1f                /*  CTI                         */
395          ldx            [%o0], %g3              /*  Load                        */
396         add             %o0, 8, %o0             /*  IEU0        Group           */
397         addcc           %g3, %o2, %o2           /*  IEU1        Group           */
398         bcs,a,pn        %xcc, 1f                /*  CTI                         */
399          add            %o2, 1, %o2             /*  IEU0                        */
400 1:      andcc           %o1, 4, %g0             /*  IEU1        Group           */
401         be,a,pn         %icc, 1f                /*  CTI                         */
402          clr            %g2                     /*  IEU0                        */
403         ld              [%o0], %g2              /*  Load                        */
404         add             %o0, 4, %o0             /*  IEU0        Group           */
405         sllx            %g2, 32, %g2            /*  IEU0        Group           */
406 1:      andcc           %o1, 2, %g0             /*  IEU1                        */
407         be,a,pn         %icc, 1f                /*  CTI                         */
408          clr            %o4                     /*  IEU0        Group           */
409         lduh            [%o0], %o4              /*  Load                        */
410         add             %o0, 2, %o0             /*  IEU1                        */
411         sll             %o4, 16, %o4            /*  IEU0        Group           */
412 1:      andcc           %o1, 1, %g0             /*  IEU1                        */
413         be,a,pn         %icc, 1f                /*  CTI                         */
414          clr            %o5                     /*  IEU0        Group           */
415         ldub            [%o0], %o5              /*  Load                        */
416         sll             %o5, 8, %o5             /*  IEU0        Group           */
417 1:      or              %g2, %o4, %o4           /*  IEU1                        */
418         or              %o5, %o4, %o4           /*  IEU0        Group (regdep)  */
419         addcc           %o4, %o2, %o2           /*  IEU1        Group (regdep)  */
420         bcs,a,pn        %xcc, 1f                /*  CTI                         */
421          add            %o2, 1, %o2             /*  IEU0                        */
422 1:      ba,pt           %xcc, 25b               /*  CTI         Group           */
423          sllx           %o2, 32, %g1            /*  IEU0                        */
424 21:     srl             %o2, 0, %o2             /*  IEU0        Group           */
425         cmp             %o1, 0                  /*  IEU1                        */
426         be,pn           %icc, 24b               /*  CTI                         */
427          andcc          %o1, 4, %g0             /*  IEU1        Group           */
428         be,a,pn         %icc, 1f                /*  CTI                         */
429          clr            %g2                     /*  IEU0                        */
430         lduh            [%o0], %g3              /*  Load                        */
431         lduh            [%o0+2], %g2            /*  Load        Group           */
432         add             %o0, 4, %o0             /*  IEU0        Group           */
433         sllx            %g3, 48, %g3            /*  IEU0        Group           */
434         sllx            %g2, 32, %g2            /*  IEU0        Group           */
435         or              %g3, %g2, %g2           /*  IEU0        Group           */
436 1:      andcc           %o1, 2, %g0             /*  IEU1                        */
437         be,a,pn         %icc, 1f                /*  CTI                         */
438          clr            %o4                     /*  IEU0        Group           */
439         lduh            [%o0], %o4              /*  Load                        */
440         add             %o0, 2, %o0             /*  IEU1                        */
441         sll             %o4, 16, %o4            /*  IEU0        Group           */
442 1:      andcc           %o1, 1, %g0             /*  IEU1                        */
443         be,a,pn         %icc, 1f                /*  CTI                         */
444          clr            %o5                     /*  IEU0        Group           */
445         ldub            [%o0], %o5              /*  Load                        */
446         sll             %o5, 8, %o5             /*  IEU0        Group           */
447 1:      or              %g2, %o4, %o4           /*  IEU1                        */
448         or              %o5, %o4, %o4           /*  IEU0        Group (regdep)  */
449         addcc           %o4, %o2, %o2           /*  IEU1        Group (regdep)  */
450         bcs,a,pn        %xcc, 1f                /*  CTI                         */
451          add            %o2, 1, %o2             /*  IEU0                        */
452 1:      ba,pt           %xcc, 25b               /*  CTI         Group           */
453          sllx           %o2, 32, %g1            /*  IEU0                        */
454
455         /* When buff is byte aligned and len is large, we backoff to
456          * this really slow handling.  The issue is that we cannot do
457          * the VIS stuff when buff is byte aligned as unaligned.c will
458          * not fix it up.
459          */
460 csump_really_slow:
461         mov     %o0, %o3
462         mov     %o1, %o4
463         cmp     %o1, 0
464         ble,pn  %icc, 9f
465          mov    0, %o0
466         andcc   %o3, 1, %o5
467         be,pt   %icc, 1f
468          sra    %o4, 1, %g3
469         add     %o1, -1, %o4
470         ldub    [%o3], %o0
471         add     %o3, 1, %o3
472         sra     %o4, 1, %g3
473 1:
474         cmp     %g3, 0
475         be,pt   %icc, 3f
476          and    %o4, 1, %g2
477         and     %o3, 2, %g2
478         brz,a,pt %g2, 1f
479          sra    %g3, 1, %g3
480         add     %g3, -1, %g3
481         add     %o4, -2, %o4
482         lduh    [%o3], %g2
483         add     %o3, 2, %o3
484         add     %o0, %g2, %o0
485         sra     %g3, 1, %g3
486 1:
487         cmp     %g3, 0
488         be,pt   %icc, 2f
489          and    %o4, 2, %g2
490 1:
491         ld      [%o3], %g2
492         addcc   %o0, %g2, %o0
493         addx    %o0, %g0, %o0
494         addcc   %g3, -1, %g3
495         bne,pt  %icc, 1b
496          add    %o3, 4, %o3
497         srl     %o0, 16, %o1
498         sethi   %hi(64512), %g2
499         or      %g2, 1023, %g2
500         and     %o0, %g2, %g3
501         add     %g3, %o1, %g3
502         srl     %g3, 16, %o0
503         and     %g3, %g2, %g2
504         add     %g2, %o0, %g3
505         sll     %g3, 16, %g3
506         srl     %g3, 16, %o0
507         and     %o4, 2, %g2
508 2:
509         cmp     %g2, 0
510         be,pt   %icc, 3f
511          and    %o4, 1, %g2
512         lduh    [%o3], %g2
513         add     %o3, 2, %o3
514         add     %o0, %g2, %o0
515         and     %o4, 1, %g2
516 3:
517         cmp     %g2, 0
518         be,pt   %icc, 1f
519          srl    %o0, 16, %o1
520         ldub    [%o3], %g2
521         sll     %g2, 8, %g2
522         add     %o0, %g2, %o0
523         srl     %o0, 16, %o1
524 1:
525         sethi   %hi(64512), %g2
526         or      %g2, 1023, %g2
527         cmp     %o5, 0
528         and     %o0, %g2, %g3
529         add     %g3, %o1, %g3
530         srl     %g3, 16, %o0
531         and     %g3, %g2, %g2
532         add     %g2, %o0, %g3
533         sll     %g3, 16, %g3
534         srl     %g3, 16, %o0
535         srl     %g3, 24, %g3
536         and     %o0, 255, %g2
537         sll     %g2, 8, %g2
538         bne,pt  %icc, 1f
539          or     %g3, %g2, %g2
540 9:
541         mov     %o0, %g2
542 1:
543         addcc   %g2, %o2, %g2
544         addx    %g2, %g0, %g2
545         retl
546          srl    %g2, 0, %o0