vserver 2.0 rc7
[linux-2.6.git] / drivers / video / cfbcopyarea.c
index b4b286a..67711f7 100644 (file)
@@ -1,25 +1,29 @@
 /*
  *  Generic function for frame buffer with packed pixels of any depth.
  *
- *      Copyright (C)  June 1999 James Simmons
+ *      Copyright (C)  1999-2005 James Simmons <jsimmons@www.infradead.org>
  *
  *  This file is subject to the terms and conditions of the GNU General Public
  *  License.  See the file COPYING in the main directory of this archive for
  *  more details.
  *
  * NOTES:
- * 
- *  This is for cfb packed pixels. Iplan and such are incorporated in the 
+ *
+ *  This is for cfb packed pixels. Iplan and such are incorporated in the
  *  drivers that need them.
- * 
+ *
  *  FIXME
- *  The code for 24 bit is horrible. It copies byte by byte size instead of 
- *  longs like the other sizes. Needs to be optimized. 
  *
- *  Also need to add code to deal with cards endians that are different than 
+ *  Also need to add code to deal with cards endians that are different than
  *  the native cpu endians. I also need to deal with MSB position in the word.
- *  
+ *
+ *  The two functions or copying forward and backward could be split up like
+ *  the ones for filling, i.e. in aligned and unaligned versions. This would
+ *  help moving some redundant computations and branches out of the loop, too.
  */
+
+
+
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <asm/types.h>
 #include <asm/io.h>
 
-#define LONG_MASK  (BITS_PER_LONG - 1)
-
 #if BITS_PER_LONG == 32
-#define FB_WRITEL fb_writel
-#define FB_READL  fb_readl
-#define SHIFT_PER_LONG 5
-#define BYTES_PER_LONG 4
+#  define FB_WRITEL fb_writel
+#  define FB_READL  fb_readl
 #else
-#define FB_WRITEL fb_writeq
-#define FB_READL  fb_readq
-#define SHIFT_PER_LONG 6
-#define BYTES_PER_LONG 8
+#  define FB_WRITEL fb_writeq
+#  define FB_READL  fb_readq
 #endif
 
-static void bitcpy(unsigned long __iomem *dst, int dst_idx,
-                  const unsigned long __iomem *src, int src_idx,
-                  unsigned long n)
+    /*
+     *  Compose two values, using a bitmask as decision value
+     *  This is equivalent to (a & mask) | (b & ~mask)
+     */
+
+static inline unsigned long
+comp(unsigned long a, unsigned long b, unsigned long mask)
+{
+    return ((a ^ b) & mask) ^ b;
+}
+
+    /*
+     *  Generic bitwise copy algorithm
+     */
+
+static void
+bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
+       int src_idx, int bits, unsigned n)
 {
        unsigned long first, last;
-       int shift = dst_idx-src_idx, left, right;
-       unsigned long d0, d1;
-       int m;
-       
-       if (!n)
-               return;
-       
-       shift = dst_idx-src_idx;
+       int const shift = dst_idx-src_idx;
+       int left, right;
+
        first = ~0UL >> dst_idx;
-       last = ~(~0UL >> ((dst_idx+n) % BITS_PER_LONG));
-       
+       last = ~(~0UL >> ((dst_idx+n) % bits));
+
        if (!shift) {
                // Same alignment for source and dest
-               
-               if (dst_idx+n <= BITS_PER_LONG) {
+
+               if (dst_idx+n <= bits) {
                        // Single word
                        if (last)
                                first &= last;
-                       FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst);
+                       FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
                } else {
                        // Multiple destination words
+
                        // Leading bits
-                       if (first) {
-                               
-                               FB_WRITEL((FB_READL(src) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                       if (first != ~0UL) {
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
                                dst++;
                                src++;
-                               n -= BITS_PER_LONG-dst_idx;
+                               n -= bits - dst_idx;
                        }
-                       
+
                        // Main chunk
-                       n /= BITS_PER_LONG;
+                       n /= bits;
                        while (n >= 8) {
                                FB_WRITEL(FB_READL(src++), dst++);
                                FB_WRITEL(FB_READL(src++), dst++);
@@ -94,58 +101,61 @@ static void bitcpy(unsigned long __iomem *dst, int dst_idx,
                        }
                        while (n--)
                                FB_WRITEL(FB_READL(src++), dst++);
+
                        // Trailing bits
                        if (last)
-                               FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst);
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
                }
        } else {
+               unsigned long d0, d1;
+               int m;
                // Different alignment for source and dest
-               
-               right = shift & (BITS_PER_LONG-1);
-               left = -shift & (BITS_PER_LONG-1);
-               
-               if (dst_idx+n <= BITS_PER_LONG) {
+
+               right = shift & (bits - 1);
+               left = -shift & (bits - 1);
+
+               if (dst_idx+n <= bits) {
                        // Single destination word
                        if (last)
                                first &= last;
                        if (shift > 0) {
                                // Single source word
-                               FB_WRITEL(((FB_READL(src) >> right) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
-                       } else if (src_idx+n <= BITS_PER_LONG) {
+                               FB_WRITEL( comp( FB_READL(src) >> right, FB_READL(dst), first), dst);
+                       } else if (src_idx+n <= bits) {
                                // Single source word
-                               FB_WRITEL(((FB_READL(src) << left) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp(FB_READL(src) << left, FB_READL(dst), first), dst);
                        } else {
                                // 2 source words
                                d0 = FB_READL(src++);
                                d1 = FB_READL(src);
-                               FB_WRITEL(((d0<<left | d1>>right) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
                        }
                } else {
                        // Multiple destination words
+                       /** We must always remember the last value read, because in case
+                       SRC and DST overlap bitwise (e.g. when moving just one pixel in
+                       1bpp), we always collect one full long for DST and that might
+                       overlap with the current long from SRC. We store this value in
+                       'd0'. */
                        d0 = FB_READL(src++);
                        // Leading bits
                        if (shift > 0) {
                                // Single source word
-                               FB_WRITEL(((d0 >> right) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp(d0 >> right, FB_READL(dst), first), dst);
                                dst++;
-                               n -= BITS_PER_LONG-dst_idx;
+                               n -= bits - dst_idx;
                        } else {
                                // 2 source words
                                d1 = FB_READL(src++);
-                               FB_WRITEL(((d0<<left | d1>>right) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), first), dst);
                                d0 = d1;
                                dst++;
-                               n -= BITS_PER_LONG-dst_idx;
+                               n -= bits - dst_idx;
                        }
-                       
+
                        // Main chunk
-                       m = n % BITS_PER_LONG;
-                       n /= BITS_PER_LONG;
+                       m = n % bits;
+                       n /= bits;
                        while (n >= 4) {
                                d1 = FB_READL(src++);
                                FB_WRITEL(d0 << left | d1 >> right, dst++);
@@ -166,72 +176,70 @@ static void bitcpy(unsigned long __iomem *dst, int dst_idx,
                                FB_WRITEL(d0 << left | d1 >> right, dst++);
                                d0 = d1;
                        }
-                       
+
                        // Trailing bits
                        if (last) {
                                if (m <= right) {
                                        // Single source word
-                                       FB_WRITEL(((d0 << left) & last) | 
-                                                 (FB_READL(dst) & ~last), 
-                                                 dst);
+                                       FB_WRITEL( comp(d0 << left, FB_READL(dst), last), dst);
                                } else {
                                        // 2 source words
                                        d1 = FB_READL(src);
-                                       FB_WRITEL(((d0<<left | d1>>right) & 
-                                                  last) | (FB_READL(dst) & 
-                                                           ~last), dst);
+                                       FB_WRITEL( comp(d0<<left | d1>>right, FB_READL(dst), last), dst);
                                }
                        }
                }
        }
 }
 
-static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx,
-                      const unsigned long __iomem *src, int src_idx, unsigned long n)
+    /*
+     *  Generic bitwise copy algorithm, operating backward
+     */
+
+static void
+bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src,
+               int src_idx, int bits, unsigned n)
 {
        unsigned long first, last;
-       int shift = dst_idx-src_idx, left, right;
-       unsigned long d0, d1;
-       int m;
-       
-       if (!n)
-               return;
-       
-       dst += (n-1)/BITS_PER_LONG;
-       src += (n-1)/BITS_PER_LONG;
-       if ((n-1) % BITS_PER_LONG) {
-               dst_idx += (n-1) % BITS_PER_LONG;
-               dst += dst_idx >> SHIFT_PER_LONG;
-               dst_idx &= BITS_PER_LONG-1;
-               src_idx += (n-1) % BITS_PER_LONG;
-               src += src_idx >> SHIFT_PER_LONG;
-               src_idx &= BITS_PER_LONG-1;
+       int shift;
+
+       dst += (n-1)/bits;
+       src += (n-1)/bits;
+       if ((n-1) % bits) {
+               dst_idx += (n-1) % bits;
+               dst += dst_idx >> (ffs(bits) - 1);
+               dst_idx &= bits - 1;
+               src_idx += (n-1) % bits;
+               src += src_idx >> (ffs(bits) - 1);
+               src_idx &= bits - 1;
        }
-       
+
        shift = dst_idx-src_idx;
-       first = ~0UL << (BITS_PER_LONG-1-dst_idx);
-       last = ~(~0UL << (BITS_PER_LONG-1-((dst_idx-n) % BITS_PER_LONG)));
-       
+
+       first = ~0UL << (bits - 1 - dst_idx);
+       last = ~(~0UL << (bits - 1 - ((dst_idx-n) % bits)));
+
        if (!shift) {
                // Same alignment for source and dest
-               
+
                if ((unsigned long)dst_idx+1 >= n) {
                        // Single word
                        if (last)
                                first &= last;
-                       FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); 
+                       FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
                } else {
                        // Multiple destination words
+
                        // Leading bits
-                       if (first) {
-                               FB_WRITEL((FB_READL(src) & first) | (FB_READL(dst) & ~first), dst); 
+                       if (first != ~0UL) {
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), first), dst);
                                dst--;
                                src--;
                                n -= dst_idx+1;
                        }
-                       
+
                        // Main chunk
-                       n /= BITS_PER_LONG;
+                       n /= bits;
                        while (n >= 8) {
                                FB_WRITEL(FB_READL(src--), dst--);
                                FB_WRITEL(FB_READL(src--), dst--);
@@ -245,59 +253,58 @@ static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx,
                        }
                        while (n--)
                                FB_WRITEL(FB_READL(src--), dst--);
-                       
+
                        // Trailing bits
                        if (last)
-                               FB_WRITEL((FB_READL(src) & last) | (FB_READL(dst) & ~last), dst);
+                               FB_WRITEL( comp( FB_READL(src), FB_READL(dst), last), dst);
                }
        } else {
                // Different alignment for source and dest
-               
-               right = shift & (BITS_PER_LONG-1);
-               left = -shift & (BITS_PER_LONG-1);
-               
+
+               int const left = -shift & (bits-1);
+               int const right = shift & (bits-1);
+
                if ((unsigned long)dst_idx+1 >= n) {
                        // Single destination word
                        if (last)
                                first &= last;
                        if (shift < 0) {
                                // Single source word
-                               FB_WRITEL((FB_READL(src) << left & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp( FB_READL(src)<<left, FB_READL(dst), first), dst);
                        } else if (1+(unsigned long)src_idx >= n) {
                                // Single source word
-                               FB_WRITEL(((FB_READL(src) >> right) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp( FB_READL(src)>>right, FB_READL(dst), first), dst);
                        } else {
                                // 2 source words
-                               d0 = FB_READL(src--);
-                               d1 = FB_READL(src);
-                               FB_WRITEL(((d0>>right | d1<<left) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp( (FB_READL(src)>>right | FB_READL(src-1)<<left), FB_READL(dst), first), dst);
                        }
                } else {
                        // Multiple destination words
+                       /** We must always remember the last value read, because in case
+                       SRC and DST overlap bitwise (e.g. when moving just one pixel in
+                       1bpp), we always collect one full long for DST and that might
+                       overlap with the current long from SRC. We store this value in
+                       'd0'. */
+                       unsigned long d0, d1;
+                       int m;
+
                        d0 = FB_READL(src--);
                        // Leading bits
                        if (shift < 0) {
                                // Single source word
-                               FB_WRITEL(((d0 << left) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
-                               dst--;
-                               n -= dst_idx+1;
+                               FB_WRITEL( comp( (d0 << left), FB_READL(dst), first), dst);
                        } else {
                                // 2 source words
                                d1 = FB_READL(src--);
-                               FB_WRITEL(((d0>>right | d1<<left) & first) | 
-                                         (FB_READL(dst) & ~first), dst);
+                               FB_WRITEL( comp( (d0>>right | d1<<left), FB_READL(dst), first), dst);
                                d0 = d1;
-                               dst--;
-                               n -= dst_idx+1;
                        }
-                       
+                       dst--;
+                       n -= dst_idx+1;
+
                        // Main chunk
-                       m = n % BITS_PER_LONG;
-                       n /= BITS_PER_LONG;
+                       m = n % bits;
+                       n /= bits;
                        while (n >= 4) {
                                d1 = FB_READL(src--);
                                FB_WRITEL(d0 >> right | d1 << left, dst--);
@@ -318,20 +325,16 @@ static void bitcpy_rev(unsigned long __iomem *dst, int dst_idx,
                                FB_WRITEL(d0 >> right | d1 << left, dst--);
                                d0 = d1;
                        }
-                       
+
                        // Trailing bits
                        if (last) {
                                if (m <= left) {
                                        // Single source word
-                                       FB_WRITEL(((d0 >> right) & last) | 
-                                                 (FB_READL(dst) & ~last), 
-                                                 dst);
+                                       FB_WRITEL( comp(d0 >> right, FB_READL(dst), last), dst);
                                } else {
                                        // 2 source words
                                        d1 = FB_READL(src);
-                                       FB_WRITEL(((d0>>right | d1<<left) & 
-                                                  last) | (FB_READL(dst) & 
-                                                           ~last), dst);
+                                       FB_WRITEL( comp(d0>>right | d1<<left, FB_READL(dst), last), dst);
                                }
                        }
                }
@@ -342,30 +345,27 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
 {
        u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
        u32 height = area->height, width = area->width;
-       int x2, y2, old_dx, old_dy, vxres, vyres;
-       unsigned long next_line = p->fix.line_length;
-       int dst_idx = 0, src_idx = 0, rev_copy = 0;
+       unsigned long const bits_per_line = p->fix.line_length*8u;
        unsigned long __iomem *dst = NULL, *src = NULL;
+       int bits = BITS_PER_LONG, bytes = bits >> 3;
+       int dst_idx = 0, src_idx = 0, rev_copy = 0;
+       int x2, y2, vxres, vyres;
 
        if (p->state != FBINFO_STATE_RUNNING)
                return;
 
        /* We want rotation but lack hardware to do it for us. */
        if (!p->fbops->fb_rotate && p->var.rotate) {
-       }       
-       
+       }
+
        vxres = p->var.xres_virtual;
        vyres = p->var.yres_virtual;
 
-       if (area->dx > vxres || area->sx > vxres || 
+       if (area->dx > vxres || area->sx > vxres ||
            area->dy > vyres || area->sy > vyres)
                return;
 
-       /* clip the destination */
-       old_dx = area->dx;
-       old_dy = area->dy;
-
-       /*
+       /* clip the destination
         * We could use hardware clipping but on many cards you get around
         * hardware clipping by writing to framebuffer directly.
         */
@@ -378,53 +378,58 @@ void cfb_copyarea(struct fb_info *p, const struct fb_copyarea *area)
        width = x2 - dx;
        height = y2 - dy;
 
+       if ((width==0) ||(height==0))
+               return;
+
        /* update sx1,sy1 */
-       sx += (dx - old_dx);
-       sy += (dy - old_dy);
+       sx += (dx - area->dx);
+       sy += (dy - area->dy);
 
        /* the source must be completely inside the virtual screen */
-       if (sx < 0 || sy < 0 ||
-           (sx + width) > vxres ||
-           (sy + height) > vyres)
+       if (sx < 0 || sy < 0 || (sx + width) > vxres || (sy + height) > vyres)
                return;
 
-       if ((dy == sy && dx > sx) ||    
-           (dy > sy)) { 
+       /* if the beginning of the target area might overlap with the end of
+       the source area, be have to copy the area reverse. */
+       if ((dy == sy && dx > sx) || (dy > sy)) {
                dy += height;
                sy += height;
                rev_copy = 1;
        }
 
-       dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & 
-                                     ~(BYTES_PER_LONG-1));
-       dst_idx = src_idx = (unsigned long)p->screen_base & (BYTES_PER_LONG-1);
-       dst_idx += dy*next_line*8 + dx*p->var.bits_per_pixel;
-       src_idx += sy*next_line*8 + sx*p->var.bits_per_pixel;
-       
+       // split the base of the framebuffer into a long-aligned address and the
+       // index of the first bit
+       dst = src = (unsigned long __iomem *)((unsigned long)p->screen_base & ~(bytes-1));
+       dst_idx = src_idx = 8*((unsigned long)p->screen_base & (bytes-1));
+       // add offset of source and target area
+       dst_idx += dy*bits_per_line + dx*p->var.bits_per_pixel;
+       src_idx += sy*bits_per_line + sx*p->var.bits_per_pixel;
+
        if (p->fbops->fb_sync)
                p->fbops->fb_sync(p);
+
        if (rev_copy) {
                while (height--) {
-                       dst_idx -= next_line*8;
-                       src_idx -= next_line*8;
-                       dst += dst_idx >> SHIFT_PER_LONG;
-                       dst_idx &= (BYTES_PER_LONG-1);
-                       src += src_idx >> SHIFT_PER_LONG;
-                       src_idx &= (BYTES_PER_LONG-1);
-                       bitcpy_rev(dst, dst_idx, src, src_idx, 
-                                  width*p->var.bits_per_pixel);
-               }       
+                       dst_idx -= bits_per_line;
+                       src_idx -= bits_per_line;
+                       dst += dst_idx >> (ffs(bits) - 1);
+                       dst_idx &= (bytes - 1);
+                       src += src_idx >> (ffs(bits) - 1);
+                       src_idx &= (bytes - 1);
+                       bitcpy_rev(dst, dst_idx, src, src_idx, bits,
+                               width*p->var.bits_per_pixel);
+               }
        } else {
                while (height--) {
-                       dst += dst_idx >> SHIFT_PER_LONG;
-                       dst_idx &= (BYTES_PER_LONG-1);
-                       src += src_idx >> SHIFT_PER_LONG;
-                       src_idx &= (BYTES_PER_LONG-1);
-                       bitcpy(dst, dst_idx, src, src_idx, 
-                              width*p->var.bits_per_pixel);
-                       dst_idx += next_line*8;
-                       src_idx += next_line*8;
-               }       
+                       dst += dst_idx >> (ffs(bits) - 1);
+                       dst_idx &= (bytes - 1);
+                       src += src_idx >> (ffs(bits) - 1);
+                       src_idx &= (bytes - 1);
+                       bitcpy(dst, dst_idx, src, src_idx, bits,
+                               width*p->var.bits_per_pixel);
+                       dst_idx += bits_per_line;
+                       src_idx += bits_per_line;
+               }
        }
 }