vserver 1.9.5.x5
[linux-2.6.git] / drivers / char / drm / radeon_state.c
index 64143d1..b8d38e1 100644 (file)
  *    Kevin E. Martin <martin@valinux.com>
  */
 
-#include "radeon.h"
 #include "drmP.h"
 #include "drm.h"
 #include "drm_sarea.h"
 #include "radeon_drm.h"
 #include "radeon_drv.h"
 
+drm_ioctl_desc_t radeon_ioctls[] = {
+       [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)]    = { radeon_cp_init,      1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_CP_START)]   = { radeon_cp_start,     1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)]    = { radeon_cp_stop,      1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)]   = { radeon_cp_reset,     1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)]    = { radeon_cp_idle,      1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)]  = { radeon_cp_resume,    1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_RESET)]      = { radeon_engine_reset, 1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen,   1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_SWAP)]       = { radeon_cp_swap,      1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_CLEAR)]      = { radeon_cp_clear,     1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_VERTEX)]     = { radeon_cp_vertex,    1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_INDICES)]    = { radeon_cp_indices,   1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)]    = { radeon_cp_texture,   1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)]    = { radeon_cp_stipple,   1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)]   = { radeon_cp_indirect,  1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)]    = { radeon_cp_vertex2,   1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)]     = { radeon_cp_cmdbuf,    1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)]   = { radeon_cp_getparam,  1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_FLIP)]       = { radeon_cp_flip,      1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_ALLOC)]      = { radeon_mem_alloc,    1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_FREE)]       = { radeon_mem_free,     1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)]  = { radeon_mem_init_heap,1, 1 },
+       [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)]   = { radeon_irq_emit,     1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)]   = { radeon_irq_wait,     1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)]   = { radeon_cp_setparam,  1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
+       [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)]  = { radeon_surface_free, 1, 0 }
+};
+
+int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
 
 /* ================================================================
  * Helper functions for client state checking and fixup
@@ -43,12 +73,14 @@ static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_p
                                                     drm_file_t *filp_priv,
                                                     u32 *offset ) {
        u32 off = *offset;
+       struct drm_radeon_driver_file_fields *radeon_priv;
 
        if ( off >= dev_priv->fb_location &&
             off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
                return 0;
 
-       off += filp_priv->radeon_fb_delta;
+       radeon_priv = filp_priv->driver_priv;
+       off += radeon_priv->radeon_fb_delta;
 
        DRM_DEBUG( "offset fixed up to 0x%x\n", off );
 
@@ -61,40 +93,25 @@ static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_p
        return 0;
 }
 
-static __inline__ int radeon_check_and_fixup_offset_user( drm_radeon_private_t *dev_priv,
-                                                         drm_file_t *filp_priv,
-                                                         u32 *offset ) {
-       u32 off;
-
-       DRM_GET_USER_UNCHECKED( off, offset );
-
-       if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &off ) )
-               return DRM_ERR( EINVAL );
-
-       DRM_PUT_USER_UNCHECKED( offset, off );
-
-       return 0;
-}
-
 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
                                                      drm_file_t *filp_priv,
                                                      int id,
-                                                     u32 *data ) {
+                                                     u32 __user *data ) {
        switch ( id ) {
 
        case RADEON_EMIT_PP_MISC:
-               if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
-                                                        &data[( RADEON_RB3D_DEPTHOFFSET
-                                                                - RADEON_PP_MISC ) / 4] ) ) {
+               if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
+                                                   &data[( RADEON_RB3D_DEPTHOFFSET
+                                                           - RADEON_PP_MISC ) / 4] ) ) {
                        DRM_ERROR( "Invalid depth buffer offset\n" );
                        return DRM_ERR( EINVAL );
                }
                break;
 
        case RADEON_EMIT_PP_CNTL:
-               if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
-                                                        &data[( RADEON_RB3D_COLOROFFSET
-                                                                - RADEON_PP_CNTL ) / 4] ) ) {
+               if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
+                                                   &data[( RADEON_RB3D_COLOROFFSET
+                                                           - RADEON_PP_CNTL ) / 4] ) ) {
                        DRM_ERROR( "Invalid colour buffer offset\n" );
                        return DRM_ERR( EINVAL );
                }
@@ -106,8 +123,8 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
        case R200_EMIT_PP_TXOFFSET_3:
        case R200_EMIT_PP_TXOFFSET_4:
        case R200_EMIT_PP_TXOFFSET_5:
-               if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
-                                                        &data[0] ) ) {
+               if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
+                                                   &data[0] ) ) {
                        DRM_ERROR( "Invalid R200 texture offset\n" );
                        return DRM_ERR( EINVAL );
                }
@@ -116,9 +133,9 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
        case RADEON_EMIT_PP_TXFILTER_0:
        case RADEON_EMIT_PP_TXFILTER_1:
        case RADEON_EMIT_PP_TXFILTER_2:
-               if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
-                                                        &data[( RADEON_PP_TXOFFSET_0
-                                                                - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
+               if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
+                                                   &data[( RADEON_PP_TXOFFSET_0
+                                                           - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
                        DRM_ERROR( "Invalid R100 texture offset\n" );
                        return DRM_ERR( EINVAL );
                }
@@ -132,9 +149,8 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
        case R200_EMIT_PP_CUBIC_OFFSETS_5: {
                int i;
                for ( i = 0; i < 5; i++ ) {
-                       if ( radeon_check_and_fixup_offset_user( dev_priv,
-                                                                filp_priv,
-                                                                &data[i] ) ) {
+                       if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
+                                                           &data[i] ) ) {
                                DRM_ERROR( "Invalid R200 cubic texture offset\n" );
                                return DRM_ERR( EINVAL );
                        }
@@ -202,6 +218,7 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_
        case RADEON_EMIT_PP_TEX_SIZE_1:
        case RADEON_EMIT_PP_TEX_SIZE_2:
        case R200_EMIT_RB3D_BLENDCOLOR:
+       case R200_EMIT_TCL_POINT_SPRITE_CNTL:
                /* These packets don't contain memory offsets */
                break;
 
@@ -217,16 +234,11 @@ static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_
                                                      drm_file_t *filp_priv,
                                                      drm_radeon_cmd_buffer_t *cmdbuf,
                                                      unsigned int *cmdsz ) {
-       u32 tmp[4], *cmd = ( u32* )cmdbuf->buf;
-
-       if ( DRM_COPY_FROM_USER_UNCHECKED( tmp, cmd, sizeof( tmp ) ) ) {
-               DRM_ERROR( "Failed to copy data from user space\n" );
-               return DRM_ERR( EFAULT );
-       }
+       u32 *cmd = (u32 *) cmdbuf->buf;
 
-       *cmdsz = 2 + ( ( tmp[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
+       *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
 
-       if ( ( tmp[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
+       if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
                DRM_ERROR( "Not a type 3 packet\n" );
                return DRM_ERR( EINVAL );
        }
@@ -237,32 +249,27 @@ static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_
        }
 
        /* Check client state and fix it up if necessary */
-       if ( tmp[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
+       if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
                u32 offset;
 
-               if ( tmp[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
+               if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
                              | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
-                       offset = tmp[2] << 10;
+                       offset = cmd[2] << 10;
                        if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
                                DRM_ERROR( "Invalid first packet offset\n" );
                                return DRM_ERR( EINVAL );
                        }
-                       tmp[2] = ( tmp[2] & 0xffc00000 ) | offset >> 10;
+                       cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
                }
 
-               if ( ( tmp[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
-                    ( tmp[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
-                       offset = tmp[3] << 10;
+               if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
+                    ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
+                       offset = cmd[3] << 10;
                        if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
                                DRM_ERROR( "Invalid second packet offset\n" );
                                return DRM_ERR( EINVAL );
                        }
-                       tmp[3] = ( tmp[3] & 0xffc00000 ) | offset >> 10;
-               }
-
-               if ( DRM_COPY_TO_USER_UNCHECKED( cmd, tmp, sizeof( tmp ) ) ) {
-                       DRM_ERROR( "Failed to copy data to user space\n" );
-                       return DRM_ERR( EFAULT );
+                       cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
                }
        }
 
@@ -563,8 +570,9 @@ static struct {
        { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
        { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
        { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
-       { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_1" },
+       { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
        { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
+       { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
 };
 
 
@@ -775,13 +783,160 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
                        }
                }
        }
+       
+       /* hyper z clear */
+       /* no docs available, based on reverse engeneering by Stephane Marchesin */
+       if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
+
+               int i;
+               int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
+                       (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
+               
+               u32 clearmask;
+
+               u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
+                       ((clear->depth_mask & 0xff) << 24);
+       
+               
+               /* Make sure we restore the 3D state next time.
+                * we haven't touched any "normal" state - still need this?
+                */
+               dev_priv->sarea_priv->ctx_owner = 0;
+
+               if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
+               /* FIXME : reverse engineer that for Rx00 cards */
+               /* FIXME : the mask supposedly contains low-res z values. So can't set
+                  just to the max (0xff? or actually 0x3fff?), need to take z clear
+                  value into account? */
+               /* pattern seems to work for r100, though get slight
+                  rendering errors with glxgears. If hierz is not enabled for r100,
+                  only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
+                  other ones are ignored, and the same clear mask can be used. That's
+                  very different behaviour than R200 which needs different clear mask
+                  and different number of tiles to clear if hierz is enabled or not !?!
+               */
+                       clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
+               }
+               else {
+               /* clear mask : chooses the clearing pattern.
+                  rv250: could be used to clear only parts of macrotiles
+                  (but that would get really complicated...)?
+                  bit 0 and 1 (either or both of them ?!?!) are used to
+                  not clear tile (or maybe one of the bits indicates if the tile is
+                  compressed or not), bit 2 and 3 to not clear tile 1,...,.
+                  Pattern is as follows:
+                       | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
+                  bits -------------------------------------------------
+                       | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
+                  rv100: clearmask covers 2x8 4x1 tiles, but one clear still
+                  covers 256 pixels ?!?
+               */
+                       clearmask = 0x0;
+               }
+
+               BEGIN_RING( 8 );
+               RADEON_WAIT_UNTIL_2D_IDLE();
+               OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
+                       tempRB3D_DEPTHCLEARVALUE);
+               /* what offset is this exactly ? */
+               OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
+               /* need ctlstat, otherwise get some strange black flickering */
+               OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
+               ADVANCE_RING();
+
+               for (i = 0; i < nbox; i++) {
+                       int tileoffset, nrtilesx, nrtilesy, j;
+                       /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
+                       if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
+                               /* FIXME : figure this out for r200 (when hierz is enabled). Or
+                                  maybe r200 actually doesn't need to put the low-res z value into
+                                  the tile cache like r100, but just needs to clear the hi-level z-buffer?
+                                  Works for R100, both with hierz and without.
+                                  R100 seems to operate on 2x1 8x8 tiles, but...
+                                  odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
+                                  problematic with resolutions which are not 64 pix aligned? */
+                               tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
+                               nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
+                               nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
+                               for (j = 0; j <= nrtilesy; j++) {
+                                       BEGIN_RING( 4 );
+                                       OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+                                       /* first tile */
+                                       OUT_RING( tileoffset * 8 );
+                                       /* the number of tiles to clear */
+                                       OUT_RING( nrtilesx + 4 );
+                                       /* clear mask : chooses the clearing pattern. */
+                                       OUT_RING( clearmask );
+                                       ADVANCE_RING();
+                                       tileoffset += depthpixperline >> 6;
+                               }
+                       }
+                       else if (dev_priv->microcode_version==UCODE_R200) {
+                               /* works for rv250. */
+                               /* find first macro tile (8x2 4x4 z-pixels on rv250) */
+                               tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
+                               nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
+                               nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
+                               for (j = 0; j <= nrtilesy; j++) {
+                                       BEGIN_RING( 4 );
+                                       OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+                                       /* first tile */
+                                       /* judging by the first tile offset needed, could possibly
+                                          directly address/clear 4x4 tiles instead of 8x2 * 4x4
+                                          macro tiles, though would still need clear mask for
+                                          right/bottom if truely 4x4 granularity is desired ? */
+                                       OUT_RING( tileoffset * 16 );
+                                       /* the number of tiles to clear */
+                                       OUT_RING( nrtilesx + 1 );
+                                       /* clear mask : chooses the clearing pattern. */
+                                       OUT_RING( clearmask );
+                                       ADVANCE_RING();
+                                       tileoffset += depthpixperline >> 5;
+                               }
+                       }
+                       else { /* rv 100 */
+                               /* rv100 might not need 64 pix alignment, who knows */
+                               /* offsets are, hmm, weird */
+                               tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
+                               nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
+                               nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
+                               for (j = 0; j <= nrtilesy; j++) {
+                                       BEGIN_RING( 4 );
+                                       OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+                                       OUT_RING( tileoffset * 128 );
+                                       /* the number of tiles to clear */
+                                       OUT_RING( nrtilesx + 4 );
+                                       /* clear mask : chooses the clearing pattern. */
+                                       OUT_RING( clearmask );
+                                       ADVANCE_RING();
+                                       tileoffset += depthpixperline >> 6;
+                               }
+                       }
+               }
+
+               /* TODO don't always clear all hi-level z tiles */
+               if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
+                       && (flags & RADEON_USE_HIERZ))
+               /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
+               /* FIXME : the mask supposedly contains low-res z values. So can't set
+                  just to the max (0xff? or actually 0x3fff?), need to take z clear
+                  value into account? */
+               {
+                       BEGIN_RING( 4 );
+                       OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
+                       OUT_RING( 0x0 ); /* First tile */
+                       OUT_RING( 0x3cc0 );
+                       OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
+                       ADVANCE_RING();
+               }
+       }
 
        /* We have to clear the depth and/or stencil buffers by
         * rendering a quad into just those buffers.  Thus, we have to
         * make sure the 3D engine is configured correctly.
         */
-       if ( dev_priv->is_r200 &&
-            (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
+       if ((dev_priv->microcode_version == UCODE_R200) &&
+           (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 
                int tempPP_CNTL;
                int tempRE_CNTL;
@@ -800,7 +955,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
                tempRE_CNTL = 0;
 
                tempRB3D_CNTL = depth_clear->rb3d_cntl;
-               tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */
 
                tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
                tempRB3D_STENCILREFMASK = 0x0;
@@ -851,6 +1005,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
                        tempRB3D_STENCILREFMASK = 0x00000000;
                }
 
+               if (flags & RADEON_USE_COMP_ZBUF) {
+                       tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
+                               RADEON_Z_DECOMPRESSION_ENABLE;
+               }
+               if (flags & RADEON_USE_HIERZ) {
+                       tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
+               }
+
                BEGIN_RING( 26 );
                RADEON_WAIT_UNTIL_2D_IDLE();
 
@@ -905,6 +1067,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
        } 
        else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
 
+               int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
+
                rb3d_cntl = depth_clear->rb3d_cntl;
 
                if ( flags & RADEON_DEPTH ) {
@@ -921,6 +1085,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
                        rb3d_stencilrefmask = 0x00000000;
                }
 
+               if (flags & RADEON_USE_COMP_ZBUF) {
+                       tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
+                               RADEON_Z_DECOMPRESSION_ENABLE;
+               }
+               if (flags & RADEON_USE_HIERZ) {
+                       tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
+               }
+
                BEGIN_RING( 13 );
                RADEON_WAIT_UNTIL_2D_IDLE();
 
@@ -928,8 +1100,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
                OUT_RING( 0x00000000 );
                OUT_RING( rb3d_cntl );
                
-               OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
-                             depth_clear->rb3d_zstencilcntl );
+               OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
                OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
                              rb3d_stencilrefmask );
                OUT_RING_REG( RADEON_RB3D_PLANEMASK,
@@ -1246,7 +1417,7 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev,
                 */
                if ( dwords & 1 ) {
                        u32 *data = (u32 *)
-                               ((char *)dev_priv->buffers->handle
+                               ((char *)dev->agp_buffer_map->handle
                                 + buf->offset + start);
                        data[dwords++] = RADEON_CP_PACKET2;
                }
@@ -1300,7 +1471,7 @@ static void radeon_cp_dispatch_indices( drm_device_t *dev,
 
        dwords = (prim->finish - prim->start + 3) / sizeof(u32);
 
-       data = (u32 *)((char *)dev_priv->buffers->handle +
+       data = (u32 *)((char *)dev->agp_buffer_map->handle +
                       elt_buf->offset + prim->start);
 
        data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
@@ -1339,7 +1510,7 @@ static int radeon_cp_dispatch_texture( DRMFILE filp,
        drm_buf_t *buf;
        u32 format;
        u32 *buffer;
-       const u8 *data;
+       const u8 __user *data;
        int size, dwords, tex_width, blit_width;
        u32 height;
        int i;
@@ -1417,7 +1588,7 @@ static int radeon_cp_dispatch_texture( DRMFILE filp,
                 * update them for a multi-pass texture blit.
                 */
                height = image->height;
-               data = (const u8 *)image->data;
+               data = (const u8 __user *)image->data;
                
                size = height * blit_width;
 
@@ -1437,14 +1608,15 @@ static int radeon_cp_dispatch_texture( DRMFILE filp,
                }
                if ( !buf ) {
                        DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
-                       DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+                       if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
+                               return DRM_ERR(EFAULT);
                        return DRM_ERR(EAGAIN);
                }
 
 
                /* Dispatch the indirect buffer.
                 */
-               buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+               buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
                dwords = size / 4;
                buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
                buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
@@ -1499,7 +1671,7 @@ static int radeon_cp_dispatch_texture( DRMFILE filp,
                /* Update the input parameters for next time */
                image->y += height;
                image->height -= height;
-               image->data = (const u8 *)image->data + size;
+               image->data = (const u8 __user *)image->data + size;
        } while (image->height > 0);
 
        /* Flush the pixel cache after the blit completes.  This ensures
@@ -1534,10 +1706,203 @@ static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
        ADVANCE_RING();
 }
 
+static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
+{
+       if (!dev_priv->mmio)
+               return;
+
+       radeon_do_cp_idle(dev_priv);
+
+       RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
+               dev_priv->surfaces[surf_index].flags);
+       RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
+               dev_priv->surfaces[surf_index].lower);
+       RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
+               dev_priv->surfaces[surf_index].upper);
+}
+
+
+/* Allocates a virtual surface
+ * doesn't always allocate a real surface, will stretch an existing 
+ * surface when possible.
+ *
+ * Note that refcount can be at most 2, since during a free refcount=3
+ * might mean we have to allocate a new surface which might not always
+ * be available.
+ * For example : we allocate three contigous surfaces ABC. If B is 
+ * freed, we suddenly need two surfaces to store A and C, which might
+ * not always be available.
+ */
+static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
+{
+       struct radeon_virt_surface *s;
+       int i;
+       int virt_surface_index;
+       uint32_t new_upper, new_lower;
+
+       new_lower = new->address;
+       new_upper = new_lower + new->size - 1;
+
+       /* sanity check */
+       if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
+               ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
+               ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
+               return -1;
+
+       /* make sure there is no overlap with existing surfaces */
+       for (i = 0; i < RADEON_MAX_SURFACES; i++) {
+               if ((dev_priv->surfaces[i].refcount != 0) &&
+               (( (new_lower >= dev_priv->surfaces[i].lower) &&
+                       (new_lower < dev_priv->surfaces[i].upper) ) ||
+                ( (new_lower < dev_priv->surfaces[i].lower) &&
+                       (new_upper > dev_priv->surfaces[i].lower) )) ){
+               return -1;}
+       }
+
+       /* find a virtual surface */
+       for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
+               if (dev_priv->virt_surfaces[i].filp == 0)
+                       break;
+       if (i == 2*RADEON_MAX_SURFACES) {
+               return -1;}
+       virt_surface_index = i;
+
+       /* try to reuse an existing surface */
+       for (i = 0; i < RADEON_MAX_SURFACES; i++) {
+               /* extend before */
+               if ((dev_priv->surfaces[i].refcount == 1) &&
+                 (new->flags == dev_priv->surfaces[i].flags) &&
+                 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
+                       s = &(dev_priv->virt_surfaces[virt_surface_index]);
+                       s->surface_index = i;
+                       s->lower = new_lower;
+                       s->upper = new_upper;
+                       s->flags = new->flags;
+                       s->filp = filp;
+                       dev_priv->surfaces[i].refcount++;
+                       dev_priv->surfaces[i].lower = s->lower;
+                       radeon_apply_surface_regs(s->surface_index, dev_priv);
+                       return virt_surface_index;
+               }
+
+               /* extend after */
+               if ((dev_priv->surfaces[i].refcount == 1) &&
+                 (new->flags == dev_priv->surfaces[i].flags) &&
+                 (new_lower == dev_priv->surfaces[i].upper + 1)) {
+                       s = &(dev_priv->virt_surfaces[virt_surface_index]);
+                       s->surface_index = i;
+                       s->lower = new_lower;
+                       s->upper = new_upper;
+                       s->flags = new->flags;
+                       s->filp = filp;
+                       dev_priv->surfaces[i].refcount++;
+                       dev_priv->surfaces[i].upper = s->upper;
+                       radeon_apply_surface_regs(s->surface_index, dev_priv);
+                       return virt_surface_index;
+               }
+       }
+
+       /* okay, we need a new one */
+       for (i = 0; i < RADEON_MAX_SURFACES; i++) {
+               if (dev_priv->surfaces[i].refcount == 0) {
+                       s = &(dev_priv->virt_surfaces[virt_surface_index]);
+                       s->surface_index = i;
+                       s->lower = new_lower;
+                       s->upper = new_upper;
+                       s->flags = new->flags;
+                       s->filp = filp;
+                       dev_priv->surfaces[i].refcount = 1;
+                       dev_priv->surfaces[i].lower = s->lower;
+                       dev_priv->surfaces[i].upper = s->upper;
+                       dev_priv->surfaces[i].flags = s->flags;
+                       radeon_apply_surface_regs(s->surface_index, dev_priv);
+                       return virt_surface_index;
+               }
+       }
+
+       /* we didn't find anything */
+       return -1;
+}
+
+static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
+{
+       struct radeon_virt_surface *s;
+       int i;
+       /* find the virtual surface */
+       for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
+               s = &(dev_priv->virt_surfaces[i]);
+               if (s->filp) {
+                       if ((lower == s->lower) && (filp == s->filp)) {
+                               if (dev_priv->surfaces[s->surface_index].lower == s->lower)
+                                       dev_priv->surfaces[s->surface_index].lower = s->upper;
+
+                               if (dev_priv->surfaces[s->surface_index].upper == s->upper)
+                                       dev_priv->surfaces[s->surface_index].upper = s->lower;
+
+                               dev_priv->surfaces[s->surface_index].refcount--;
+                               if (dev_priv->surfaces[s->surface_index].refcount == 0)
+                                       dev_priv->surfaces[s->surface_index].flags = 0;
+                               s->filp = 0;
+                               radeon_apply_surface_regs(s->surface_index, dev_priv);
+                               return 0;
+                       }
+               }
+       }
+       return 1;
+}
+
+static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
+{
+       int i;
+       for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
+       {
+               if (dev_priv->virt_surfaces[i].filp == filp)
+                       free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
+       }
+}
 
 /* ================================================================
  * IOCTL functions
  */
+int radeon_surface_alloc(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       drm_radeon_surface_alloc_t alloc;
+
+       if (!dev_priv) {
+               DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+               return DRM_ERR(EINVAL);
+       }
+
+       DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
+                                 sizeof(alloc));
+
+       if (alloc_surface(&alloc, dev_priv, filp) == -1)
+               return DRM_ERR(EINVAL);
+       else
+               return 0;
+}
+
+int radeon_surface_free(DRM_IOCTL_ARGS)
+{
+       DRM_DEVICE;
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       drm_radeon_surface_free_t memfree;
+
+       if (!dev_priv) {
+               DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
+               return DRM_ERR(EINVAL);
+       }
+
+       DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
+                                 sizeof(memfree) );
+
+       if (free_surface(filp, dev_priv, memfree.address))
+               return DRM_ERR(EINVAL);
+       else
+               return 0;
+}
 
 int radeon_cp_clear( DRM_IOCTL_ARGS )
 {
@@ -1550,7 +1915,7 @@ int radeon_cp_clear( DRM_IOCTL_ARGS )
 
        LOCK_TEST_WITH_RETURN( dev, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
                             sizeof(clear) );
 
        RING_SPACE_TEST_WITH_RETURN( dev_priv );
@@ -1593,7 +1958,7 @@ static int radeon_do_init_pageflip( drm_device_t *dev )
        return 0;
 }
 
-/* Called whenever a client dies, from DRM(release).
+/* Called whenever a client dies, from drm_release.
  * NOTE:  Lock isn't necessarily held when this is called!
  */
 int radeon_do_cleanup_pageflip( drm_device_t *dev )
@@ -1664,14 +2029,9 @@ int radeon_cp_vertex( DRM_IOCTL_ARGS )
 
        LOCK_TEST_WITH_RETURN( dev, filp );
 
-       if ( !dev_priv ) {
-               DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
-               return DRM_ERR(EINVAL);
-       }
-
        DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
                             sizeof(vertex) );
 
        DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
@@ -1762,7 +2122,7 @@ int radeon_cp_indices( DRM_IOCTL_ARGS )
 
        DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
                             sizeof(elts) );
 
        DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
@@ -1853,7 +2213,7 @@ int radeon_cp_texture( DRM_IOCTL_ARGS )
 
        LOCK_TEST_WITH_RETURN( dev, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t *)data, sizeof(tex) );
+       DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
 
        if ( tex.image == NULL ) {
                DRM_ERROR( "null texture image!\n" );
@@ -1861,7 +2221,7 @@ int radeon_cp_texture( DRM_IOCTL_ARGS )
        }
 
        if ( DRM_COPY_FROM_USER( &image,
-                            (drm_radeon_tex_image_t *)tex.image,
+                            (drm_radeon_tex_image_t __user *)tex.image,
                             sizeof(image) ) )
                return DRM_ERR(EFAULT);
 
@@ -1883,7 +2243,7 @@ int radeon_cp_stipple( DRM_IOCTL_ARGS )
 
        LOCK_TEST_WITH_RETURN( dev, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
                             sizeof(stipple) );
 
        if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
@@ -1913,7 +2273,7 @@ int radeon_cp_indirect( DRM_IOCTL_ARGS )
                return DRM_ERR(EINVAL);
        }
 
-       DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
                             sizeof(indirect) );
 
        DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
@@ -1993,7 +2353,7 @@ int radeon_cp_vertex2( DRM_IOCTL_ARGS )
 
        DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
                             sizeof(vertex) );
 
        DRM_DEBUG( "pid=%d index=%d discard=%d\n",
@@ -2107,7 +2467,7 @@ static int radeon_emit_packets(
 
        BEGIN_RING(sz+1);
        OUT_RING( CP_PACKET0( reg, (sz-1) ) );
-       OUT_RING_USER_TABLE( data, sz );
+       OUT_RING_TABLE( data, sz );
        ADVANCE_RING();
 
        cmdbuf->buf += sz * sizeof(int);
@@ -2121,7 +2481,6 @@ static __inline__ int radeon_emit_scalars(
        drm_radeon_cmd_buffer_t *cmdbuf )
 {
        int sz = header.scalars.count;
-       int *data = (int *)cmdbuf->buf;
        int start = header.scalars.offset;
        int stride = header.scalars.stride;
        RING_LOCALS;
@@ -2130,7 +2489,7 @@ static __inline__ int radeon_emit_scalars(
        OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
        OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
        OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
-       OUT_RING_USER_TABLE( data, sz );
+       OUT_RING_TABLE( cmdbuf->buf, sz );
        ADVANCE_RING();
        cmdbuf->buf += sz * sizeof(int);
        cmdbuf->bufsz -= sz * sizeof(int);
@@ -2145,7 +2504,6 @@ static __inline__ int radeon_emit_scalars2(
        drm_radeon_cmd_buffer_t *cmdbuf )
 {
        int sz = header.scalars.count;
-       int *data = (int *)cmdbuf->buf;
        int start = ((unsigned int)header.scalars.offset) + 0x100;
        int stride = header.scalars.stride;
        RING_LOCALS;
@@ -2154,7 +2512,7 @@ static __inline__ int radeon_emit_scalars2(
        OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
        OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
        OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
-       OUT_RING_USER_TABLE( data, sz );
+       OUT_RING_TABLE( cmdbuf->buf, sz );
        ADVANCE_RING();
        cmdbuf->buf += sz * sizeof(int);
        cmdbuf->bufsz -= sz * sizeof(int);
@@ -2167,7 +2525,6 @@ static __inline__ int radeon_emit_vectors(
        drm_radeon_cmd_buffer_t *cmdbuf )
 {
        int sz = header.vectors.count;
-       int *data = (int *)cmdbuf->buf;
        int start = header.vectors.offset;
        int stride = header.vectors.stride;
        RING_LOCALS;
@@ -2176,7 +2533,7 @@ static __inline__ int radeon_emit_vectors(
        OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
        OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
        OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
-       OUT_RING_USER_TABLE( data, sz );
+       OUT_RING_TABLE( cmdbuf->buf, sz );
        ADVANCE_RING();
 
        cmdbuf->buf += sz * sizeof(int);
@@ -2191,7 +2548,7 @@ static int radeon_emit_packet3( drm_device_t *dev,
 {
        drm_radeon_private_t *dev_priv = dev->dev_private;
        unsigned int cmdsz;
-       int *cmd = (int *)cmdbuf->buf, ret;
+       int ret;
        RING_LOCALS;
 
        DRM_DEBUG("\n");
@@ -2203,7 +2560,7 @@ static int radeon_emit_packet3( drm_device_t *dev,
        }
 
        BEGIN_RING( cmdsz );
-       OUT_RING_USER_TABLE( cmd, cmdsz );
+       OUT_RING_TABLE( cmdbuf->buf, cmdsz );
        ADVANCE_RING();
 
        cmdbuf->buf += cmdsz * 4;
@@ -2220,8 +2577,8 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev,
        drm_radeon_private_t *dev_priv = dev->dev_private;
        drm_clip_rect_t box;
        unsigned int cmdsz;
-       int *cmd = (int *)cmdbuf->buf, ret;
-       drm_clip_rect_t *boxes = cmdbuf->boxes;
+       int ret;
+       drm_clip_rect_t __user *boxes = cmdbuf->boxes;
        int i = 0;
        RING_LOCALS;
 
@@ -2238,7 +2595,7 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev,
 
        do {
                if ( i < cmdbuf->nbox ) {
-                       if (DRM_COPY_FROM_USER_UNCHECKED( &box, &boxes[i], sizeof(box) ))
+                       if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
                                return DRM_ERR(EFAULT);
                        /* FIXME The second and subsequent times round
                         * this loop, send a WAIT_UNTIL_3D_IDLE before
@@ -2261,7 +2618,7 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev,
                }
                
                BEGIN_RING( cmdsz );
-               OUT_RING_USER_TABLE( cmd, cmdsz );
+               OUT_RING_TABLE( cmdbuf->buf, cmdsz );
                ADVANCE_RING();
 
        } while ( ++i < cmdbuf->nbox );
@@ -2310,11 +2667,12 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
        drm_radeon_private_t *dev_priv = dev->dev_private;
        drm_file_t *filp_priv;
        drm_device_dma_t *dma = dev->dma;
-       drm_buf_t *buf = 0;
+       drm_buf_t *buf = NULL;
        int idx;
        drm_radeon_cmd_buffer_t cmdbuf;
        drm_radeon_cmd_header_t header;
-       int orig_nbox;
+       int orig_nbox, orig_bufsz;
+       char *kbuf=NULL;
 
        LOCK_TEST_WITH_RETURN( dev, filp );
 
@@ -2325,30 +2683,35 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
 
        DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
                             sizeof(cmdbuf) );
 
        RING_SPACE_TEST_WITH_RETURN( dev_priv );
        VB_AGE_TEST_WITH_RETURN( dev_priv );
 
+       if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
+               return DRM_ERR(EINVAL);
+       }
 
-       if (DRM_VERIFYAREA_READ( cmdbuf.buf, cmdbuf.bufsz ))
-               return DRM_ERR(EFAULT);
-
-       if (cmdbuf.nbox &&
-           DRM_VERIFYAREA_READ(cmdbuf.boxes, 
-                        cmdbuf.nbox * sizeof(drm_clip_rect_t)))
-               return DRM_ERR(EFAULT);
+       /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
+        * races between checking values and using those values in other code,
+        * and simply to avoid a lot of function calls to copy in data.
+        */
+       orig_bufsz = cmdbuf.bufsz;
+       if (orig_bufsz != 0) {
+               kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
+               if (kbuf == NULL)
+                       return DRM_ERR(ENOMEM);
+               if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz))
+                       return DRM_ERR(EFAULT);
+               cmdbuf.buf = kbuf;
+       }
 
        orig_nbox = cmdbuf.nbox;
 
        while ( cmdbuf.bufsz >= sizeof(header) ) {
-               
-               if (DRM_GET_USER_UNCHECKED( header.i, (int *)cmdbuf.buf )) {
-                       DRM_ERROR("__get_user %p\n", cmdbuf.buf);
-                       return DRM_ERR(EFAULT);
-               }
 
+               header.i = *(int *)cmdbuf.buf;
                cmdbuf.buf += sizeof(header);
                cmdbuf.bufsz -= sizeof(header);
 
@@ -2357,7 +2720,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_PACKET\n");
                        if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
                                DRM_ERROR("radeon_emit_packets failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2365,7 +2728,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_SCALARS\n");
                        if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
                                DRM_ERROR("radeon_emit_scalars failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2373,7 +2736,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_VECTORS\n");
                        if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
                                DRM_ERROR("radeon_emit_vectors failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2383,14 +2746,14 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        if ( idx < 0 || idx >= dma->buf_count ) {
                                DRM_ERROR( "buffer index %d (of %d max)\n",
                                           idx, dma->buf_count - 1 );
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
 
                        buf = dma->buflist[idx];
                        if ( buf->filp != filp || buf->pending ) {
                                DRM_ERROR( "bad buffer %p %p %d\n",
                                           buf->filp, filp, buf->pending);
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
 
                        radeon_cp_discard_buffer( dev, buf );
@@ -2400,7 +2763,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_PACKET3\n");
                        if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
                                DRM_ERROR("radeon_emit_packet3 failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2408,7 +2771,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
                        if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
                                DRM_ERROR("radeon_emit_packet3_clip failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2416,7 +2779,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_SCALARS2\n");
                        if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
                                DRM_ERROR("radeon_emit_scalars2 failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
 
@@ -2424,21 +2787,28 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
                        DRM_DEBUG("RADEON_CMD_WAIT\n");
                        if (radeon_emit_wait( dev, header.wait.flags )) {
                                DRM_ERROR("radeon_emit_wait failed\n");
-                               return DRM_ERR(EINVAL);
+                               goto err;
                        }
                        break;
                default:
                        DRM_ERROR("bad cmd_type %d at %p\n", 
                                  header.header.cmd_type,
                                  cmdbuf.buf - sizeof(header));
-                       return DRM_ERR(EINVAL);
+                       goto err;
                }
        }
 
+       if (orig_bufsz != 0)
+               drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
 
        DRM_DEBUG("DONE\n");
        COMMIT_RING();
        return 0;
+
+err:
+       if (orig_bufsz != 0)
+               drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
+       return DRM_ERR(EINVAL);
 }
 
 
@@ -2455,7 +2825,7 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )
                return DRM_ERR(EINVAL);
        }
 
-       DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t *)data,
+       DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
                             sizeof(param) );
 
        DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
@@ -2522,6 +2892,7 @@ int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
        drm_radeon_private_t *dev_priv = dev->dev_private;
        drm_file_t *filp_priv;
        drm_radeon_setparam_t sp;
+       struct drm_radeon_driver_file_fields *radeon_priv;
 
        if ( !dev_priv ) {
                DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
@@ -2530,13 +2901,28 @@ int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
 
        DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
 
-       DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t* )data,
+       DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
                                  sizeof( sp ) );
 
        switch( sp.param ) {
        case RADEON_SETPARAM_FB_LOCATION:
-               filp_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
+               radeon_priv = filp_priv->driver_priv;
+               radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
                break;
+       case RADEON_SETPARAM_SWITCH_TILING:
+               if (sp.value == 0) {
+                       DRM_DEBUG( "color tiling disabled\n" );
+                       dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
+                       dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
+                       dev_priv->sarea_priv->tiling_enabled = 0;
+               }
+               else if (sp.value == 1) {
+                       DRM_DEBUG( "color tiling enabled\n" );
+                       dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
+                       dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
+                       dev_priv->sarea_priv->tiling_enabled = 1;
+               }
+               break;  
        default:
                DRM_DEBUG( "Invalid parameter %d\n", sp.param );
                return DRM_ERR( EINVAL );
@@ -2544,3 +2930,53 @@ int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
 
        return 0;
 }
+
+/* When a client dies:
+ *    - Check for and clean up flipped page state
+ *    - Free any alloced GART memory.
+ *
+ * DRM infrastructure takes care of reclaiming dma buffers.
+ */
+void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
+{
+       if ( dev->dev_private ) {                               
+               drm_radeon_private_t *dev_priv = dev->dev_private; 
+               if ( dev_priv->page_flipping ) {                
+                       radeon_do_cleanup_pageflip( dev );      
+               }                                               
+               radeon_mem_release( filp, dev_priv->gart_heap ); 
+               radeon_mem_release( filp, dev_priv->fb_heap );  
+               radeon_surfaces_release(filp, dev_priv);
+       }                               
+}
+
+void radeon_driver_pretakedown(drm_device_t *dev)
+{
+       radeon_do_release(dev);
+}
+
+int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
+{
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       struct drm_radeon_driver_file_fields *radeon_priv;
+       
+       radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
+       
+       if (!radeon_priv)
+               return -ENOMEM;
+
+       filp_priv->driver_priv = radeon_priv;
+       if ( dev_priv )
+               radeon_priv->radeon_fb_delta = dev_priv->fb_location;
+       else
+               radeon_priv->radeon_fb_delta = 0;
+       return 0;
+}
+
+
+void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
+{
+        struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
+        
+        drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
+}