-/*
- * This is really ugly, but its highly optimizatiable by the
- * compiler and is meant as compensation for gcc's missing
- * __builtin_memset(). For the 680[23]0 it might be worth considering
- * the optimal number of misaligned writes compared to the number of
- * tests'n'branches needed to align the destination address. The
- * 680[46]0 doesn't really care due to their copy-back caches.
- * 10/09/96 - Jes Sorensen
- */
-static inline void * __memset_g(void * s, int c, size_t count)
-{
- void *xs = s;
- size_t temp;
-
- if (!count)
- return xs;
-
- c &= 0xff;
- c |= c << 8;
- c |= c << 16;
-
- if (count < 36){
- long *ls = s;
-
- switch(count){
- case 32: case 33: case 34: case 35:
- *ls++ = c;
- case 28: case 29: case 30: case 31:
- *ls++ = c;
- case 24: case 25: case 26: case 27:
- *ls++ = c;
- case 20: case 21: case 22: case 23:
- *ls++ = c;
- case 16: case 17: case 18: case 19:
- *ls++ = c;
- case 12: case 13: case 14: case 15:
- *ls++ = c;
- case 8: case 9: case 10: case 11:
- *ls++ = c;
- case 4: case 5: case 6: case 7:
- *ls++ = c;
- break;
- default:
- break;
- }
- s = ls;
- if (count & 0x02){
- short *ss = s;
- *ss++ = c;
- s = ss;
- }
- if (count & 0x01){
- char *cs = s;
- *cs++ = c;
- s = cs;
- }
- return xs;
- }
-
- if ((long) s & 1)
- {
- char *cs = s;
- *cs++ = c;
- s = cs;
- count--;
- }
- if (count > 2 && (long) s & 2)
- {
- short *ss = s;
- *ss++ = c;
- s = ss;
- count -= 2;
- }
- temp = count >> 2;
- if (temp)
- {
- long *ls = s;
- temp--;
- do
- *ls++ = c;
- while (temp--);
- s = ls;
- }
- if (count & 2)
- {
- short *ss = s;
- *ss++ = c;
- s = ss;
- }
- if (count & 1)
- {
- char *cs = s;
- *cs = c;
- }
- return xs;
-}
-
-/*
- * __memset_page assumes that data is longword aligned. Most, if not
- * all, of these page sized memsets are performed on page aligned
- * areas, thus we do not need to check if the destination is longword
- * aligned. Of course we suffer a serious performance loss if this is
- * not the case but I think the risk of this ever happening is
- * extremely small. We spend a lot of time clearing pages in
- * get_empty_page() so I think it is worth it anyway. Besides, the
- * 680[46]0 do not really care about misaligned writes due to their
- * copy-back cache.
- *
- * The optimized case for the 680[46]0 is implemented using the move16
- * instruction. My tests showed that this implementation is 35-45%
- * faster than the original implementation using movel, the only
- * caveat is that the destination address must be 16-byte aligned.
- * 01/09/96 - Jes Sorensen
- */
-static inline void * __memset_page(void * s,int c,size_t count)
-{
- unsigned long data, tmp;
- void *xs = s;
-
- c = c & 255;
- data = c | (c << 8);
- data |= data << 16;
-
-#ifdef CPU_M68040_OR_M68060_ONLY
-
- if (((unsigned long) s) & 0x0f)
- __memset_g(s, c, count);
- else{
- unsigned long *sp = s;
- *sp++ = data;
- *sp++ = data;
- *sp++ = data;
- *sp++ = data;
-
- __asm__ __volatile__("1:\t"
- ".chip 68040\n\t"
- "move16 %2@+,%0@+\n\t"
- ".chip 68k\n\t"
- "subqw #8,%2\n\t"
- "subqw #8,%2\n\t"
- "dbra %1,1b\n\t"
- : "=a" (sp), "=d" (tmp)
- : "a" (s), "0" (sp), "1" ((count - 16) / 16 - 1)
- );
- }
-
-#else
- __asm__ __volatile__("1:\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "movel %2,%0@+\n\t"
- "dbra %1,1b\n\t"
- : "=a" (s), "=d" (tmp)
- : "d" (data), "0" (s), "1" (count / 32 - 1)
- );
-#endif
-
- return xs;
-}
-
-extern void *memset(void *,int,__kernel_size_t);
-
-#define __memset_const(s,c,count) \
-((count==PAGE_SIZE) ? \
- __memset_page((s),(c),(count)) : \
- __memset_g((s),(c),(count)))
-
-#define memset(s, c, count) \
-(__builtin_constant_p(count) ? \
- __memset_const((s),(c),(count)) : \
- __memset_g((s),(c),(count)))