1 /* checksum.S: Sparc V9 optimized checksum code.
3 * Copyright(C) 1995 Linus Torvalds
4 * Copyright(C) 1995 Miguel de Icaza
5 * Copyright(C) 1996, 2000 David S. Miller
6 * Copyright(C) 1997 Jakub Jelinek
9 * Linux/Alpha checksum c-code
10 * Linux/ix86 inline checksum assembly
11 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
12 * David Mosberger-Tang for optimized reference c-code
13 * BSD4.4 portable checksum routine
16 #include <asm/errno.h>
18 #include <asm/ptrace.h>
21 #include <asm/thread_info.h>
23 /* The problem with the "add with carry" instructions on Ultra
24 * are two fold. Firstly, they cannot pair with jack shit,
25 * and also they only add in the 32-bit carry condition bit
26 * into the accumulated sum. The following is much better.
27 * For larger chunks we use VIS code, which is faster ;)
36 /* I think I have an erection... Once _AGAIN_ the SunSoft
37 * engineers are caught asleep at the keyboard, tsk tsk...
40 #define CSUMCOPY_LASTCHUNK(off, t0, t1) \
41 ldxa [%src - off - 0x08] %asi, t0; \
42 ldxa [%src - off - 0x00] %asi, t1; \
44 addcc t0, %sum, %sum; \
45 stw t0, [%dst - off - 0x04]; \
48 stw t0, [%dst - off - 0x08]; \
50 51: addcc t1, %sum, %sum; \
51 stw t1, [%dst - off + 0x04]; \
54 stw t1, [%dst - off - 0x00]; \
60 andcc %g7, 8, %g0 ! IEU1 Group
62 and %g7, 4, %g5 ! IEU0
63 ldxa [%src + 0x00] %asi, %g2 ! Load Group
64 add %dst, 8, %dst ! IEU0
65 add %src, 8, %src ! IEU1
66 addcc %g2, %sum, %sum ! IEU1 Group + 2 bubbles
67 stw %g2, [%dst - 0x04] ! Store
68 srlx %g2, 32, %g2 ! IEU0
69 bcc,pt %xcc, 1f ! CTI Group
70 stw %g2, [%dst - 0x08] ! Store
71 add %sum, 1, %sum ! IEU0
72 1: brz,pt %g5, 1f ! CTI Group
74 lduwa [%src + 0x00] %asi, %g2 ! Load
75 add %dst, 4, %dst ! IEU0 Group
76 add %src, 4, %src ! IEU1
77 stw %g2, [%dst - 0x04] ! Store Group + 2 bubbles
78 sllx %g2, 32, %g2 ! IEU0
79 1: andcc %g7, 2, %g0 ! IEU1
80 be,pn %icc, 1f ! CTI Group
82 lduha [%src + 0x00] %asi, %o4 ! Load
83 add %src, 2, %src ! IEU0 Group
84 add %dst, 2, %dst ! IEU1
85 sth %o4, [%dst - 0x2] ! Store Group + 2 bubbles
86 sll %o4, 16, %o4 ! IEU0
87 1: andcc %g7, 1, %g0 ! IEU1
88 be,pn %icc, 1f ! CTI Group
90 lduba [%src + 0x00] %asi, %o5 ! Load
91 stb %o5, [%dst + 0x00] ! Store Group + 2 bubbles
92 sll %o5, 8, %o5 ! IEU0
93 1: or %g2, %o4, %o4 ! IEU1
94 or %o5, %o4, %o4 ! IEU0 Group
95 addcc %o4, %sum, %sum ! IEU1
96 bcc,pt %xcc, ccfold ! CTI
98 b,pt %xcc, ccfold ! CTI
99 add %sum, 1, %sum ! IEU1
102 cmp %len, 6 ! IEU1 Group
103 bl,a,pn %icc, ccte ! CTI
104 andcc %len, 0xf, %g7 ! IEU1 Group
105 andcc %src, 2, %g0 ! IEU1 Group
107 andcc %src, 0x4, %g0 ! IEU1 Group
108 lduha [%src + 0x00] %asi, %g4 ! Load
109 sub %len, 2, %len ! IEU0
110 add %src, 2, %src ! IEU0 Group
111 add %dst, 2, %dst ! IEU1
112 sll %g4, 16, %g3 ! IEU0 Group + 1 bubble
113 addcc %g3, %sum, %sum ! IEU1
114 bcc,pt %xcc, 0f ! CTI
115 srl %sum, 16, %g3 ! IEU0 Group
116 add %g3, 1, %g3 ! IEU0 4 clocks (mispredict)
117 0: andcc %src, 0x4, %g0 ! IEU1 Group
118 sth %g4, [%dst - 0x2] ! Store
119 sll %sum, 16, %sum ! IEU0
120 sll %g3, 16, %g3 ! IEU0 Group
121 srl %sum, 16, %sum ! IEU0 Group
122 or %g3, %sum, %sum ! IEU0 Group (regdep)
123 1: be,pt %icc, ccmerge ! CTI
124 andcc %len, 0xf0, %g1 ! IEU1
125 lduwa [%src + 0x00] %asi, %g4 ! Load Group
126 sub %len, 4, %len ! IEU0
127 add %src, 4, %src ! IEU1
128 add %dst, 4, %dst ! IEU0 Group
129 addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble
130 stw %g4, [%dst - 0x4] ! Store
131 bcc,pt %xcc, ccmerge ! CTI
132 andcc %len, 0xf0, %g1 ! IEU1 Group
133 b,pt %xcc, ccmerge ! CTI 4 clocks (mispredict)
134 add %sum, 1, %sum ! IEU0
137 .globl csum_partial_copy_sparc64
138 csum_partial_copy_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */
139 xorcc %src, %dst, %o4 ! IEU1 Group
140 srl %sum, 0, %sum ! IEU0
141 andcc %o4, 3, %g0 ! IEU1 Group
142 srl %len, 0, %len ! IEU0
143 bne,pn %icc, ccslow ! CTI
144 andcc %src, 1, %g0 ! IEU1 Group
145 bne,pn %icc, ccslow ! CTI
146 cmp %len, 256 ! IEU1 Group
147 bgeu,pt %icc, csum_partial_copy_vis ! CTI
148 andcc %src, 7, %g0 ! IEU1 Group
149 bne,pn %icc, cc_fixit ! CTI
150 andcc %len, 0xf0, %g1 ! IEU1 Group
151 ccmerge:be,pn %icc, ccte ! CTI
152 andcc %len, 0xf, %g7 ! IEU1 Group
153 sll %g1, 2, %o4 ! IEU0
154 13: sethi %hi(12f), %o5 ! IEU0 Group
155 add %src, %g1, %src ! IEU1
156 sub %o5, %o4, %o5 ! IEU0 Group
157 jmpl %o5 + %lo(12f), %g0 ! CTI Group brk forced
158 add %dst, %g1, %dst ! IEU0 Group
159 cctbl: CSUMCOPY_LASTCHUNK(0xe8,%g2,%g3)
160 CSUMCOPY_LASTCHUNK(0xd8,%g2,%g3)
161 CSUMCOPY_LASTCHUNK(0xc8,%g2,%g3)
162 CSUMCOPY_LASTCHUNK(0xb8,%g2,%g3)
163 CSUMCOPY_LASTCHUNK(0xa8,%g2,%g3)
164 CSUMCOPY_LASTCHUNK(0x98,%g2,%g3)
165 CSUMCOPY_LASTCHUNK(0x88,%g2,%g3)
166 CSUMCOPY_LASTCHUNK(0x78,%g2,%g3)
167 CSUMCOPY_LASTCHUNK(0x68,%g2,%g3)
168 CSUMCOPY_LASTCHUNK(0x58,%g2,%g3)
169 CSUMCOPY_LASTCHUNK(0x48,%g2,%g3)
170 CSUMCOPY_LASTCHUNK(0x38,%g2,%g3)
171 CSUMCOPY_LASTCHUNK(0x28,%g2,%g3)
172 CSUMCOPY_LASTCHUNK(0x18,%g2,%g3)
173 CSUMCOPY_LASTCHUNK(0x08,%g2,%g3)
175 andcc %len, 0xf, %g7 ! IEU1 Group
176 ccte: bne,pn %icc, cc_end_cruft ! CTI
178 ccfold: sllx %sum, 32, %o0 ! IEU0 Group
179 addcc %sum, %o0, %o0 ! IEU1 Group (regdep)
180 srlx %o0, 32, %o0 ! IEU0 Group (regdep)
181 bcs,a,pn %xcc, 1f ! CTI
182 add %o0, 1, %o0 ! IEU1 4 clocks (mispredict)
183 1: retl ! CTI Group brk forced
184 ldx [%g6 + TI_TASK], %g4 ! Load
192 lduba [%src] %asi, %g5
202 lduha [%src] %asi, %o4
214 lduwa [%src] %asi, %o4
228 lduwa [%src] %asi, %o4
236 lduha [%src] %asi, %o4
246 lduba [%src] %asi, %g2
251 1: addcc %o4, %g5, %g5
260 4: addcc %sum, %g5, %sum
266 /* Now the version with userspace as the destination */
267 #define CSUMCOPY_LASTCHUNK_USER(off, t0, t1) \
268 ldx [%src - off - 0x08], t0; \
269 ldx [%src - off - 0x00], t1; \
271 addcc t0, %sum, %sum; \
272 stwa t0, [%dst - off - 0x04] %asi; \
275 stwa t0, [%dst - off - 0x08] %asi; \
277 51: addcc t1, %sum, %sum; \
278 stwa t1, [%dst - off + 0x04] %asi; \
281 stwa t1, [%dst - off - 0x00] %asi; \
287 andcc %g7, 8, %g0 ! IEU1 Group
289 and %g7, 4, %g5 ! IEU0
290 ldx [%src + 0x00], %g2 ! Load Group
291 add %dst, 8, %dst ! IEU0
292 add %src, 8, %src ! IEU1
293 addcc %g2, %sum, %sum ! IEU1 Group + 2 bubbles
294 stwa %g2, [%dst - 0x04] %asi ! Store
295 srlx %g2, 32, %g2 ! IEU0
296 bcc,pt %xcc, 1f ! CTI Group
297 stwa %g2, [%dst - 0x08] %asi ! Store
298 add %sum, 1, %sum ! IEU0
299 1: brz,pt %g5, 1f ! CTI Group
301 lduw [%src + 0x00], %g2 ! Load
302 add %dst, 4, %dst ! IEU0 Group
303 add %src, 4, %src ! IEU1
304 stwa %g2, [%dst - 0x04] %asi ! Store Group + 2 bubbles
305 sllx %g2, 32, %g2 ! IEU0
306 1: andcc %g7, 2, %g0 ! IEU1
307 be,pn %icc, 1f ! CTI Group
309 lduh [%src + 0x00], %o4 ! Load
310 add %src, 2, %src ! IEU0 Group
311 add %dst, 2, %dst ! IEU1
312 stha %o4, [%dst - 0x2] %asi ! Store Group + 2 bubbles
313 sll %o4, 16, %o4 ! IEU0
314 1: andcc %g7, 1, %g0 ! IEU1
315 be,pn %icc, 1f ! CTI Group
317 ldub [%src + 0x00], %o5 ! Load
318 stba %o5, [%dst + 0x00] %asi ! Store Group + 2 bubbles
319 sll %o5, 8, %o5 ! IEU0
320 1: or %g2, %o4, %o4 ! IEU1
321 or %o5, %o4, %o4 ! IEU0 Group
322 addcc %o4, %sum, %sum ! IEU1
323 bcc,pt %xcc, ccuserfold ! CTI
325 b,pt %xcc, ccuserfold ! CTI
326 add %sum, 1, %sum ! IEU1
329 cmp %len, 6 ! IEU1 Group
330 bl,a,pn %icc, ccuserte ! CTI
331 andcc %len, 0xf, %g7 ! IEU1 Group
332 andcc %src, 2, %g0 ! IEU1 Group
334 andcc %src, 0x4, %g0 ! IEU1 Group
335 lduh [%src + 0x00], %g4 ! Load
336 sub %len, 2, %len ! IEU0
337 add %src, 2, %src ! IEU0 Group
338 add %dst, 2, %dst ! IEU1
339 sll %g4, 16, %g3 ! IEU0 Group + 1 bubble
340 addcc %g3, %sum, %sum ! IEU1
341 bcc,pt %xcc, 0f ! CTI
342 srl %sum, 16, %g3 ! IEU0 Group
343 add %g3, 1, %g3 ! IEU0 4 clocks (mispredict)
344 0: andcc %src, 0x4, %g0 ! IEU1 Group
345 stha %g4, [%dst - 0x2] %asi ! Store
346 sll %sum, 16, %sum ! IEU0
347 sll %g3, 16, %g3 ! IEU0 Group
348 srl %sum, 16, %sum ! IEU0 Group
349 or %g3, %sum, %sum ! IEU0 Group (regdep)
350 1: be,pt %icc, ccusermerge ! CTI
351 andcc %len, 0xf0, %g1 ! IEU1
352 lduw [%src + 0x00], %g4 ! Load Group
353 sub %len, 4, %len ! IEU0
354 add %src, 4, %src ! IEU1
355 add %dst, 4, %dst ! IEU0 Group
356 addcc %g4, %sum, %sum ! IEU1 Group + 1 bubble
357 stwa %g4, [%dst - 0x4] %asi ! Store
358 bcc,pt %xcc, ccusermerge ! CTI
359 andcc %len, 0xf0, %g1 ! IEU1 Group
360 b,pt %xcc, ccusermerge ! CTI 4 clocks (mispredict)
361 add %sum, 1, %sum ! IEU0
364 .globl csum_partial_copy_user_sparc64
365 csum_partial_copy_user_sparc64: /* %o0=src, %o1=dest, %o2=len, %o3=sum */
366 xorcc %src, %dst, %o4 ! IEU1 Group
367 srl %sum, 0, %sum ! IEU0
368 andcc %o4, 3, %g0 ! IEU1 Group
369 srl %len, 0, %len ! IEU0
370 bne,pn %icc, ccuserslow ! CTI
371 andcc %src, 1, %g0 ! IEU1 Group
372 bne,pn %icc, ccuserslow ! CTI
373 cmp %len, 256 ! IEU1 Group
374 bgeu,pt %icc, csum_partial_copy_user_vis ! CTI
375 andcc %src, 7, %g0 ! IEU1 Group
376 bne,pn %icc, cc_user_fixit ! CTI
377 andcc %len, 0xf0, %g1 ! IEU1 Group
379 be,pn %icc, ccuserte ! CTI
380 andcc %len, 0xf, %g7 ! IEU1 Group
381 sll %g1, 2, %o4 ! IEU0
382 13: sethi %hi(12f), %o5 ! IEU0 Group
383 add %src, %g1, %src ! IEU1
384 sub %o5, %o4, %o5 ! IEU0 Group
385 jmpl %o5 + %lo(12f), %g0 ! CTI Group brk forced
386 add %dst, %g1, %dst ! IEU0 Group
388 CSUMCOPY_LASTCHUNK_USER(0xe8,%g2,%g3)
389 CSUMCOPY_LASTCHUNK_USER(0xd8,%g2,%g3)
390 CSUMCOPY_LASTCHUNK_USER(0xc8,%g2,%g3)
391 CSUMCOPY_LASTCHUNK_USER(0xb8,%g2,%g3)
392 CSUMCOPY_LASTCHUNK_USER(0xa8,%g2,%g3)
393 CSUMCOPY_LASTCHUNK_USER(0x98,%g2,%g3)
394 CSUMCOPY_LASTCHUNK_USER(0x88,%g2,%g3)
395 CSUMCOPY_LASTCHUNK_USER(0x78,%g2,%g3)
396 CSUMCOPY_LASTCHUNK_USER(0x68,%g2,%g3)
397 CSUMCOPY_LASTCHUNK_USER(0x58,%g2,%g3)
398 CSUMCOPY_LASTCHUNK_USER(0x48,%g2,%g3)
399 CSUMCOPY_LASTCHUNK_USER(0x38,%g2,%g3)
400 CSUMCOPY_LASTCHUNK_USER(0x28,%g2,%g3)
401 CSUMCOPY_LASTCHUNK_USER(0x18,%g2,%g3)
402 CSUMCOPY_LASTCHUNK_USER(0x08,%g2,%g3)
404 andcc %len, 0xf, %g7 ! IEU1 Group
406 bne,pn %icc, cc_user_end_cruft ! CTI
409 sllx %sum, 32, %o0 ! IEU0 Group
410 addcc %sum, %o0, %o0 ! IEU1 Group (regdep)
411 srlx %o0, 32, %o0 ! IEU0 Group (regdep)
412 bcs,a,pn %xcc, 1f ! CTI
413 add %o0, 1, %o0 ! IEU1 4 clocks (mispredict)
414 1: retl ! CTI Group brk forced
415 ldx [%g6 + TI_TASK], %g4 ! IEU0 Group
426 stba %g5, [%dst] %asi
438 stba %g2, [%dst] %asi
440 stba %o4, [%dst + 1] %asi
449 stba %g2, [%dst] %asi
451 stba %g3, [%dst + 1] %asi
453 stba %g2, [%dst + 2] %asi
455 stba %o4, [%dst + 3] %asi
472 stba %g2, [%dst] %asi
474 stba %o4, [%dst + 1] %asi
480 stba %g2, [%dst] %asi
483 1: addcc %o4, %g5, %g5
492 4: addcc %sum, %g5, %sum
500 ldx [%sp + 0x7ff + 128], %g1
501 ldub [%g6 + TI_CURRENT_DS], %g3
507 ldx [%g6 + TI_TASK], %g4
511 .word cpc_start, 0, cpc_end, cpc_handler
512 .word cpc_user_start, 0, cpc_user_end, cpc_handler