X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fvideo%2Fcfbcopyarea.c;h=6faea4034e3db7dccd666bb032c73a1fece237a3;hb=refs%2Fheads%2Fvserver;hp=b4b286a4cfb12e61d9b3821df21d0dcfdfb577bd;hpb=c7b5ebbddf7bcd3651947760f423e3783bbe6573;p=linux-2.6.git diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c index b4b286a4c..6faea4034 100644 --- a/drivers/video/cfbcopyarea.c +++ b/drivers/video/cfbcopyarea.c @@ -1,26 +1,29 @@ /* * Generic function for frame buffer with packed pixels of any depth. * - * Copyright (C) June 1999 James Simmons + * Copyright (C) 1999-2005 James Simmons * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. * * NOTES: - * - * This is for cfb packed pixels. Iplan and such are incorporated in the + * + * This is for cfb packed pixels. Iplan and such are incorporated in the * drivers that need them. - * + * * FIXME - * The code for 24 bit is horrible. It copies byte by byte size instead of - * longs like the other sizes. Needs to be optimized. * - * Also need to add code to deal with cards endians that are different than + * Also need to add code to deal with cards endians that are different than * the native cpu endians. I also need to deal with MSB position in the word. - * + * + * The two functions or copying forward and backward could be split up like + * the ones for filling, i.e. in aligned and unaligned versions. This would + * help moving some redundant computations and branches out of the loop, too. */ -#include + + + #include #include #include @@ -29,58 +32,61 @@ #include #include -#define LONG_MASK (BITS_PER_LONG - 1) - #if BITS_PER_LONG == 32 -#define FB_WRITEL fb_writel -#define FB_READL fb_readl -#define SHIFT_PER_LONG 5 -#define BYTES_PER_LONG 4 +# define FB_WRITEL fb_writel +# define FB_READL fb_readl #else -#define FB_WRITEL fb_writeq -#define FB_READL fb_readq -#define SHIFT_PER_LONG 6 -#define BYTES_PER_LONG 8 +# define FB_WRITEL fb_writeq +# define FB_READL fb_readq #endif -static void bitcpy(unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, - unsigned long n) + /* + * Compose two values, using a bitmask as decision value + * This is equivalent to (a & mask) | (b & ~mask) + */ + +static inline unsigned long +comp(unsigned long a, unsigned long b, unsigned long mask) +{ + return ((a ^ b) & mask) ^ b; +} + + /* + * Generic bitwise copy algorithm + */ + +static void +bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src, + int src_idx, int bits, unsigned n) { unsigned long first, last; - int shift = dst_idx-src_idx, left, right; - unsigned long d0, d1; - int m; - - if (!n) - return; - - shift = dst_idx-src_idx; - first = ~0UL >> dst_idx; - last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG)); - + int const shift = dst_idx-src_idx; + int left, right; + + first = FB_SHIFT_HIGH(~0UL, dst_idx); + last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits)); + if (!shift) { // Same alignment for source and dest - - if (dst_idx+n <= BITS_PER_LONG) { + + if (dst_idx+n <= bits) { // Single word if (last) first &= last; - FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); } else { // Multiple destination words + // Leading bits - if (first) { - - FB_WRITEL((FB_READL(src) & first) | - (FB_READL(dst) & ~first), dst); + if (first != ~0UL) { + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); dst++; src++; - n -= BITS_PER_LONG-dst_idx; + n -= bits - dst_idx; } - + // Main chunk - n /= BITS_PER_LONG; + n /= bits; while (n >= 8) { FB_WRITEL(FB_READL(src++), dst++); FB_WRITEL(FB_READL(src++), dst++); @@ -94,58 +100,61 @@ static void bitcpy(unsigned long __iomem *dst, int dst_idx, } while (n--) FB_WRITEL(FB_READL(src++), dst++); + // Trailing bits if (last) - FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst); + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } } else { + unsigned long d0, d1; + int m; // Different alignment for source and dest - - right = shift & (BITS_PER_LONG-1); - left = -shift & (BITS_PER_LONG-1); - - if (dst_idx+n <= BITS_PER_LONG) { + + right = shift & (bits - 1); + left = -shift & (bits - 1); + + if (dst_idx+n <= bits) { // Single destination word if (last) first &= last; if (shift > 0) { // Single source word - FB_WRITEL(((FB_READL(src) >> right) & first) | - (FB_READL(dst) & ~first), dst); - } else if (src_idx+n <= BITS_PER_LONG) { + FB_WRITEL( comp( FB_READL(src) >> right, FB_READL(dst), first), dst); + } else if (src_idx+n <= bits) { // Single source word - FB_WRITEL(((FB_READL(src) << left) & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp(FB_READL(src) << left, FB_READL(dst), first), dst); } else { // 2 source words d0 = FB_READL(src++); d1 = FB_READL(src); - FB_WRITEL(((d0<>right) & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp(d0<>right, FB_READL(dst), first), dst); } } else { // Multiple destination words + /** We must always remember the last value read, because in case + SRC and DST overlap bitwise (e.g. when moving just one pixel in + 1bpp), we always collect one full long for DST and that might + overlap with the current long from SRC. We store this value in + 'd0'. */ d0 = FB_READL(src++); // Leading bits if (shift > 0) { // Single source word - FB_WRITEL(((d0 >> right) & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp(d0 >> right, FB_READL(dst), first), dst); dst++; - n -= BITS_PER_LONG-dst_idx; + n -= bits - dst_idx; } else { // 2 source words d1 = FB_READL(src++); - FB_WRITEL(((d0<>right) & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp(d0<>right, FB_READL(dst), first), dst); d0 = d1; dst++; - n -= BITS_PER_LONG-dst_idx; + n -= bits - dst_idx; } - + // Main chunk - m = n % BITS_PER_LONG; - n /= BITS_PER_LONG; + m = n % bits; + n /= bits; while (n >= 4) { d1 = FB_READL(src++); FB_WRITEL(d0 << left | d1 >> right, dst++); @@ -166,72 +175,70 @@ static void bitcpy(unsigned long __iomem *dst, int dst_idx, FB_WRITEL(d0 << left | d1 >> right, dst++); d0 = d1; } - + // Trailing bits if (last) { if (m <= right) { // Single source word - FB_WRITEL(((d0 << left) & last) | - (FB_READL(dst) & ~last), - dst); + FB_WRITEL( comp(d0 << left, FB_READL(dst), last), dst); } else { // 2 source words d1 = FB_READL(src); - FB_WRITEL(((d0<>right) & - last) | (FB_READL(dst) & - ~last), dst); + FB_WRITEL( comp(d0<>right, FB_READL(dst), last), dst); } } } } } -static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx, - const unsigned long __iomem *src, int src_idx, unsigned long n) + /* + * Generic bitwise copy algorithm, operating backward + */ + +static void +bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src, + int src_idx, int bits, unsigned n) { unsigned long first, last; - int shift = dst_idx-src_idx, left, right; - unsigned long d0, d1; - int m; - - if (!n) - return; - - dst += (n-1)/BITS_PER_LONG; - src += (n-1)/BITS_PER_LONG; - if ((n-1) % BITS_PER_LONG) { - dst_idx += (n-1) % BITS_PER_LONG; - dst += dst_idx >> SHIFT_PER_LONG; - dst_idx &= BITS_PER_LONG-1; - src_idx += (n-1) % BITS_PER_LONG; - src += src_idx >> SHIFT_PER_LONG; - src_idx &= BITS_PER_LONG-1; + int shift; + + dst += (n-1)/bits; + src += (n-1)/bits; + if ((n-1) % bits) { + dst_idx += (n-1) % bits; + dst += dst_idx >> (ffs(bits) - 1); + dst_idx &= bits - 1; + src_idx += (n-1) % bits; + src += src_idx >> (ffs(bits) - 1); + src_idx &= bits - 1; } - + shift = dst_idx-src_idx; - first = ~0UL << (BITS_PER_LONG-1-dst_idx); - last = ~(~0UL << (BITS_PER_LONG-1-((dst_idx-n) % BITS_PER_LONG))); - + + first = FB_SHIFT_LOW(~0UL, bits - 1 - dst_idx); + last = ~(FB_SHIFT_LOW(~0UL, bits - 1 - ((dst_idx-n) % bits))); + if (!shift) { // Same alignment for source and dest - + if ((unsigned long)dst_idx+1 >= n) { // Single word if (last) first &= last; - FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); } else { // Multiple destination words + // Leading bits - if (first) { - FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); + if (first != ~0UL) { + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst); dst--; src--; n -= dst_idx+1; } - + // Main chunk - n /= BITS_PER_LONG; + n /= bits; while (n >= 8) { FB_WRITEL(FB_READL(src--), dst--); FB_WRITEL(FB_READL(src--), dst--); @@ -245,59 +252,58 @@ static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx, } while (n--) FB_WRITEL(FB_READL(src--), dst--); - + // Trailing bits if (last) - FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst); + FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst); } } else { // Different alignment for source and dest - - right = shift & (BITS_PER_LONG-1); - left = -shift & (BITS_PER_LONG-1); - + + int const left = -shift & (bits-1); + int const right = shift & (bits-1); + if ((unsigned long)dst_idx+1 >= n) { // Single destination word if (last) first &= last; if (shift < 0) { // Single source word - FB_WRITEL((FB_READL(src) << left & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp( FB_READL(src)<= n) { // Single source word - FB_WRITEL(((FB_READL(src) >> right) & first) | - (FB_READL(dst) & ~first), dst); + FB_WRITEL( comp( FB_READL(src)>>right, FB_READL(dst), first), dst); } else { // 2 source words - d0 = FB_READL(src--); - d1 = FB_READL(src); - FB_WRITEL(((d0>>right | d1<>right | FB_READL(src-1)<>right | d1<>right | d1<= 4) { d1 = FB_READL(src--); FB_WRITEL(d0 >> right | d1 << left, dst--); @@ -318,20 +324,16 @@ static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx, FB_WRITEL(d0 >> right | d1 << left, dst--); d0 = d1; } - + // Trailing bits if (last) { if (m <= left) { // Single source word - FB_WRITEL(((d0 >> right) & last) | - (FB_READL(dst) & ~last), - dst); + FB_WRITEL( comp(d0 >> right, FB_READL(dst), last), dst); } else { // 2 source words d1 = FB_READL(src); - FB_WRITEL(((d0>>right | d1<>right | d1<dx, dy = area->dy, sx = area->sx, sy = area->sy; u32 height = area->height, width = area->width; - int x2, y2, old_dx, old_dy, vxres, vyres; - unsigned long next_line = p->fix.line_length; - int dst_idx = 0, src_idx = 0, rev_copy = 0; + unsigned long const bits_per_line = p->fix.line_length*8u; unsigned long __iomem *dst = NULL, *src = NULL; + int bits = BITS_PER_LONG, bytes = bits >> 3; + int dst_idx = 0, src_idx = 0, rev_copy = 0; if (p->state != FBINFO_STATE_RUNNING) return; - /* We want rotation but lack hardware to do it for us. */ - if (!p->fbops->fb_rotate && p->var.rotate) { - } - - vxres = p->var.xres_virtual; - vyres = p->var.yres_virtual; - - if (area->dx > vxres || area->sx > vxres || - area->dy > vyres || area->sy > vyres) - return; - - /* clip the destination */ - old_dx = area->dx; - old_dy = area->dy; - - /* - * We could use hardware clipping but on many cards you get around - * hardware clipping by writing to framebuffer directly. - */ - x2 = area->dx + area->width; - y2 = area->dy + area->height; - dx = area->dx > 0 ? area->dx : 0; - dy = area->dy > 0 ? area->dy : 0; - x2 = x2 < vxres ? x2 : vxres; - y2 = y2 < vyres ? y2 : vyres; - width = x2 - dx; - height = y2 - dy; - - /* update sx1,sy1 */ - sx += (dx - old_dx); - sy += (dy - old_dy); - - /* the source must be completely inside the virtual screen */ - if (sx < 0 || sy < 0 || - (sx + width) > vxres || - (sy + height) > vyres) - return; - - if ((dy == sy && dx > sx) || - (dy > sy)) { + /* if the beginning of the target area might overlap with the end of + the source area, be have to copy the area reverse. */ + if ((dy == sy && dx > sx) || (dy > sy)) { dy += height; sy += height; rev_copy = 1; } - dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & - ~(BYTES_PER_LONG-1)); - dst_idx = src_idx = (unsigned long)p->screen_base & (BYTES_PER_LONG-1); - dst_idx += dy*next_line*8 + dx*p->var.bits_per_pixel; - src_idx += sy*next_line*8 + sx*p->var.bits_per_pixel; - + // split the base of the framebuffer into a long-aligned address and the + // index of the first bit + dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1)); + dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1)); + // add offset of source and target area + dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel; + src_idx += sy*bits_per_line + sx*p->var.bits_per_pixel; + if (p->fbops->fb_sync) p->fbops->fb_sync(p); + if (rev_copy) { while (height--) { - dst_idx -= next_line*8; - src_idx -= next_line*8; - dst += dst_idx >> SHIFT_PER_LONG; - dst_idx &= (BYTES_PER_LONG-1); - src += src_idx >> SHIFT_PER_LONG; - src_idx &= (BYTES_PER_LONG-1); - bitcpy_rev(dst, dst_idx, src, src_idx, - width*p->var.bits_per_pixel); - } + dst_idx -= bits_per_line; + src_idx -= bits_per_line; + dst += dst_idx >> (ffs(bits) - 1); + dst_idx &= (bytes - 1); + src += src_idx >> (ffs(bits) - 1); + src_idx &= (bytes - 1); + bitcpy_rev(dst, dst_idx, src, src_idx, bits, + width*p->var.bits_per_pixel); + } } else { while (height--) { - dst += dst_idx >> SHIFT_PER_LONG; - dst_idx &= (BYTES_PER_LONG-1); - src += src_idx >> SHIFT_PER_LONG; - src_idx &= (BYTES_PER_LONG-1); - bitcpy(dst, dst_idx, src, src_idx, - width*p->var.bits_per_pixel); - dst_idx += next_line*8; - src_idx += next_line*8; - } + dst += dst_idx >> (ffs(bits) - 1); + dst_idx &= (bytes - 1); + src += src_idx >> (ffs(bits) - 1); + src_idx &= (bytes - 1); + bitcpy(dst, dst_idx, src, src_idx, bits, + width*p->var.bits_per_pixel); + dst_idx += bits_per_line; + src_idx += bits_per_line; + } } }