X-Git-Url: http://git.onelab.eu/?a=blobdiff_plain;f=drivers%2Fchar%2Fdrm%2Fradeon_state.c;h=b8d38e106708dcdeb0785a27a524247dc026224e;hb=6a77f38946aaee1cd85eeec6cf4229b204c15071;hp=f3cc86eea0ab36764d2478ee40fcadc46f04f355;hpb=87fc8d1bb10cd459024a742c6a10961fefcef18f;p=linux-2.6.git diff --git a/drivers/char/drm/radeon_state.c b/drivers/char/drm/radeon_state.c index f3cc86eea..b8d38e106 100644 --- a/drivers/char/drm/radeon_state.c +++ b/drivers/char/drm/radeon_state.c @@ -27,13 +27,43 @@ * Kevin E. Martin */ -#include "radeon.h" #include "drmP.h" #include "drm.h" #include "drm_sarea.h" #include "radeon_drm.h" #include "radeon_drv.h" +drm_ioctl_desc_t radeon_ioctls[] = { + [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = { radeon_cp_init, 1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = { radeon_cp_start, 1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = { radeon_cp_stop, 1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = { radeon_cp_reset, 1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = { radeon_cp_idle, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = { radeon_cp_resume, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_RESET)] = { radeon_engine_reset, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = { radeon_cp_swap, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = { radeon_cp_clear, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = { radeon_cp_vertex, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = { radeon_cp_indices, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = { radeon_cp_texture, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = { radeon_cp_stipple, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = { radeon_cp_indirect, 1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = { radeon_cp_cmdbuf, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = { radeon_cp_getparam, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = { radeon_cp_flip, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = { radeon_mem_alloc, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_FREE)] = { radeon_mem_free, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = { radeon_mem_init_heap,1, 1 }, + [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = { radeon_cp_setparam, 1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 }, + [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = { radeon_surface_free, 1, 0 } +}; + +int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls); /* ================================================================ * Helper functions for client state checking and fixup @@ -63,21 +93,6 @@ static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_p return 0; } -static __inline__ int radeon_check_and_fixup_offset_user( drm_radeon_private_t *dev_priv, - drm_file_t *filp_priv, - u32 __user *offset ) { - u32 off; - - DRM_GET_USER_UNCHECKED( off, offset ); - - if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &off ) ) - return DRM_ERR( EINVAL ); - - DRM_PUT_USER_UNCHECKED( offset, off ); - - return 0; -} - static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv, drm_file_t *filp_priv, int id, @@ -85,18 +100,18 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_ switch ( id ) { case RADEON_EMIT_PP_MISC: - if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, - &data[( RADEON_RB3D_DEPTHOFFSET - - RADEON_PP_MISC ) / 4] ) ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &data[( RADEON_RB3D_DEPTHOFFSET + - RADEON_PP_MISC ) / 4] ) ) { DRM_ERROR( "Invalid depth buffer offset\n" ); return DRM_ERR( EINVAL ); } break; case RADEON_EMIT_PP_CNTL: - if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, - &data[( RADEON_RB3D_COLOROFFSET - - RADEON_PP_CNTL ) / 4] ) ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &data[( RADEON_RB3D_COLOROFFSET + - RADEON_PP_CNTL ) / 4] ) ) { DRM_ERROR( "Invalid colour buffer offset\n" ); return DRM_ERR( EINVAL ); } @@ -108,8 +123,8 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_ case R200_EMIT_PP_TXOFFSET_3: case R200_EMIT_PP_TXOFFSET_4: case R200_EMIT_PP_TXOFFSET_5: - if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, - &data[0] ) ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &data[0] ) ) { DRM_ERROR( "Invalid R200 texture offset\n" ); return DRM_ERR( EINVAL ); } @@ -118,9 +133,9 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_ case RADEON_EMIT_PP_TXFILTER_0: case RADEON_EMIT_PP_TXFILTER_1: case RADEON_EMIT_PP_TXFILTER_2: - if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv, - &data[( RADEON_PP_TXOFFSET_0 - - RADEON_PP_TXFILTER_0 ) / 4] ) ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &data[( RADEON_PP_TXOFFSET_0 + - RADEON_PP_TXFILTER_0 ) / 4] ) ) { DRM_ERROR( "Invalid R100 texture offset\n" ); return DRM_ERR( EINVAL ); } @@ -134,9 +149,8 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_ case R200_EMIT_PP_CUBIC_OFFSETS_5: { int i; for ( i = 0; i < 5; i++ ) { - if ( radeon_check_and_fixup_offset_user( dev_priv, - filp_priv, - &data[i] ) ) { + if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, + &data[i] ) ) { DRM_ERROR( "Invalid R200 cubic texture offset\n" ); return DRM_ERR( EINVAL ); } @@ -204,6 +218,7 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_ case RADEON_EMIT_PP_TEX_SIZE_1: case RADEON_EMIT_PP_TEX_SIZE_2: case R200_EMIT_RB3D_BLENDCOLOR: + case R200_EMIT_TCL_POINT_SPRITE_CNTL: /* These packets don't contain memory offsets */ break; @@ -219,17 +234,11 @@ static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_ drm_file_t *filp_priv, drm_radeon_cmd_buffer_t *cmdbuf, unsigned int *cmdsz ) { - u32 tmp[4]; - u32 __user *cmd = (u32 __user *)cmdbuf->buf; - - if ( DRM_COPY_FROM_USER_UNCHECKED( tmp, cmd, sizeof( tmp ) ) ) { - DRM_ERROR( "Failed to copy data from user space\n" ); - return DRM_ERR( EFAULT ); - } + u32 *cmd = (u32 *) cmdbuf->buf; - *cmdsz = 2 + ( ( tmp[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 ); + *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 ); - if ( ( tmp[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) { + if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) { DRM_ERROR( "Not a type 3 packet\n" ); return DRM_ERR( EINVAL ); } @@ -240,32 +249,27 @@ static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_ } /* Check client state and fix it up if necessary */ - if ( tmp[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */ + if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */ u32 offset; - if ( tmp[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL + if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) { - offset = tmp[2] << 10; + offset = cmd[2] << 10; if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) { DRM_ERROR( "Invalid first packet offset\n" ); return DRM_ERR( EINVAL ); } - tmp[2] = ( tmp[2] & 0xffc00000 ) | offset >> 10; + cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10; } - if ( ( tmp[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) && - ( tmp[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) { - offset = tmp[3] << 10; + if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) && + ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) { + offset = cmd[3] << 10; if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) { DRM_ERROR( "Invalid second packet offset\n" ); return DRM_ERR( EINVAL ); } - tmp[3] = ( tmp[3] & 0xffc00000 ) | offset >> 10; - } - - if ( DRM_COPY_TO_USER_UNCHECKED( cmd, tmp, sizeof( tmp ) ) ) { - DRM_ERROR( "Failed to copy data to user space\n" ); - return DRM_ERR( EFAULT ); + cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10; } } @@ -568,6 +572,7 @@ static struct { { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" }, { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" }, { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" }, + { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" }, }; @@ -778,13 +783,160 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, } } } + + /* hyper z clear */ + /* no docs available, based on reverse engeneering by Stephane Marchesin */ + if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) { + + int i; + int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? + (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4); + + u32 clearmask; + + u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth | + ((clear->depth_mask & 0xff) << 24); + + + /* Make sure we restore the 3D state next time. + * we haven't touched any "normal" state - still need this? + */ + dev_priv->sarea_priv->ctx_owner = 0; + + if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) { + /* FIXME : reverse engineer that for Rx00 cards */ + /* FIXME : the mask supposedly contains low-res z values. So can't set + just to the max (0xff? or actually 0x3fff?), need to take z clear + value into account? */ + /* pattern seems to work for r100, though get slight + rendering errors with glxgears. If hierz is not enabled for r100, + only 4 bits which indicate clear (15,16,31,32, all zero) matter, the + other ones are ignored, and the same clear mask can be used. That's + very different behaviour than R200 which needs different clear mask + and different number of tiles to clear if hierz is enabled or not !?! + */ + clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f; + } + else { + /* clear mask : chooses the clearing pattern. + rv250: could be used to clear only parts of macrotiles + (but that would get really complicated...)? + bit 0 and 1 (either or both of them ?!?!) are used to + not clear tile (or maybe one of the bits indicates if the tile is + compressed or not), bit 2 and 3 to not clear tile 1,...,. + Pattern is as follows: + | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29| + bits ------------------------------------------------- + | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31| + rv100: clearmask covers 2x8 4x1 tiles, but one clear still + covers 256 pixels ?!? + */ + clearmask = 0x0; + } + + BEGIN_RING( 8 ); + RADEON_WAIT_UNTIL_2D_IDLE(); + OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE, + tempRB3D_DEPTHCLEARVALUE); + /* what offset is this exactly ? */ + OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 ); + /* need ctlstat, otherwise get some strange black flickering */ + OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL ); + ADVANCE_RING(); + + for (i = 0; i < nbox; i++) { + int tileoffset, nrtilesx, nrtilesy, j; + /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */ + if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) { + /* FIXME : figure this out for r200 (when hierz is enabled). Or + maybe r200 actually doesn't need to put the low-res z value into + the tile cache like r100, but just needs to clear the hi-level z-buffer? + Works for R100, both with hierz and without. + R100 seems to operate on 2x1 8x8 tiles, but... + odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially + problematic with resolutions which are not 64 pix aligned? */ + tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6; + nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; + nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); + /* first tile */ + OUT_RING( tileoffset * 8 ); + /* the number of tiles to clear */ + OUT_RING( nrtilesx + 4 ); + /* clear mask : chooses the clearing pattern. */ + OUT_RING( clearmask ); + ADVANCE_RING(); + tileoffset += depthpixperline >> 6; + } + } + else if (dev_priv->microcode_version==UCODE_R200) { + /* works for rv250. */ + /* find first macro tile (8x2 4x4 z-pixels on rv250) */ + tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5; + nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5); + nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); + /* first tile */ + /* judging by the first tile offset needed, could possibly + directly address/clear 4x4 tiles instead of 8x2 * 4x4 + macro tiles, though would still need clear mask for + right/bottom if truely 4x4 granularity is desired ? */ + OUT_RING( tileoffset * 16 ); + /* the number of tiles to clear */ + OUT_RING( nrtilesx + 1 ); + /* clear mask : chooses the clearing pattern. */ + OUT_RING( clearmask ); + ADVANCE_RING(); + tileoffset += depthpixperline >> 5; + } + } + else { /* rv 100 */ + /* rv100 might not need 64 pix alignment, who knows */ + /* offsets are, hmm, weird */ + tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6; + nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; + nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4); + for (j = 0; j <= nrtilesy; j++) { + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); + OUT_RING( tileoffset * 128 ); + /* the number of tiles to clear */ + OUT_RING( nrtilesx + 4 ); + /* clear mask : chooses the clearing pattern. */ + OUT_RING( clearmask ); + ADVANCE_RING(); + tileoffset += depthpixperline >> 6; + } + } + } + + /* TODO don't always clear all hi-level z tiles */ + if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200) + && (flags & RADEON_USE_HIERZ)) + /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */ + /* FIXME : the mask supposedly contains low-res z values. So can't set + just to the max (0xff? or actually 0x3fff?), need to take z clear + value into account? */ + { + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) ); + OUT_RING( 0x0 ); /* First tile */ + OUT_RING( 0x3cc0 ); + OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f); + ADVANCE_RING(); + } + } /* We have to clear the depth and/or stencil buffers by * rendering a quad into just those buffers. Thus, we have to * make sure the 3D engine is configured correctly. */ - if ( dev_priv->is_r200 && - (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { + if ((dev_priv->microcode_version == UCODE_R200) && + (flags & (RADEON_DEPTH | RADEON_STENCIL))) { int tempPP_CNTL; int tempRE_CNTL; @@ -803,7 +955,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, tempRE_CNTL = 0; tempRB3D_CNTL = depth_clear->rb3d_cntl; - tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */ tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; tempRB3D_STENCILREFMASK = 0x0; @@ -854,6 +1005,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, tempRB3D_STENCILREFMASK = 0x00000000; } + if (flags & RADEON_USE_COMP_ZBUF) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; + } + if (flags & RADEON_USE_HIERZ) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; + } + BEGIN_RING( 26 ); RADEON_WAIT_UNTIL_2D_IDLE(); @@ -908,6 +1067,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, } else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { + int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; + rb3d_cntl = depth_clear->rb3d_cntl; if ( flags & RADEON_DEPTH ) { @@ -924,6 +1085,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, rb3d_stencilrefmask = 0x00000000; } + if (flags & RADEON_USE_COMP_ZBUF) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | + RADEON_Z_DECOMPRESSION_ENABLE; + } + if (flags & RADEON_USE_HIERZ) { + tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; + } + BEGIN_RING( 13 ); RADEON_WAIT_UNTIL_2D_IDLE(); @@ -931,8 +1100,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, OUT_RING( 0x00000000 ); OUT_RING( rb3d_cntl ); - OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, - depth_clear->rb3d_zstencilcntl ); + OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL ); OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask ); OUT_RING_REG( RADEON_RB3D_PLANEMASK, @@ -1440,7 +1608,8 @@ static int radeon_cp_dispatch_texture( DRMFILE filp, } if ( !buf ) { DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n"); - DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ); + if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) )) + return DRM_ERR(EFAULT); return DRM_ERR(EAGAIN); } @@ -1537,10 +1706,203 @@ static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple ) ADVANCE_RING(); } +static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv) +{ + if (!dev_priv->mmio) + return; + + radeon_do_cp_idle(dev_priv); + + RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index, + dev_priv->surfaces[surf_index].flags); + RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index, + dev_priv->surfaces[surf_index].lower); + RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index, + dev_priv->surfaces[surf_index].upper); +} + + +/* Allocates a virtual surface + * doesn't always allocate a real surface, will stretch an existing + * surface when possible. + * + * Note that refcount can be at most 2, since during a free refcount=3 + * might mean we have to allocate a new surface which might not always + * be available. + * For example : we allocate three contigous surfaces ABC. If B is + * freed, we suddenly need two surfaces to store A and C, which might + * not always be available. + */ +static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp) +{ + struct radeon_virt_surface *s; + int i; + int virt_surface_index; + uint32_t new_upper, new_lower; + + new_lower = new->address; + new_upper = new_lower + new->size - 1; + + /* sanity check */ + if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) || + ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) || + ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0)) + return -1; + + /* make sure there is no overlap with existing surfaces */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + if ((dev_priv->surfaces[i].refcount != 0) && + (( (new_lower >= dev_priv->surfaces[i].lower) && + (new_lower < dev_priv->surfaces[i].upper) ) || + ( (new_lower < dev_priv->surfaces[i].lower) && + (new_upper > dev_priv->surfaces[i].lower) )) ){ + return -1;} + } + + /* find a virtual surface */ + for (i = 0; i < 2*RADEON_MAX_SURFACES; i++) + if (dev_priv->virt_surfaces[i].filp == 0) + break; + if (i == 2*RADEON_MAX_SURFACES) { + return -1;} + virt_surface_index = i; + + /* try to reuse an existing surface */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + /* extend before */ + if ((dev_priv->surfaces[i].refcount == 1) && + (new->flags == dev_priv->surfaces[i].flags) && + (new_upper + 1 == dev_priv->surfaces[i].lower)) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->filp = filp; + dev_priv->surfaces[i].refcount++; + dev_priv->surfaces[i].lower = s->lower; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + + /* extend after */ + if ((dev_priv->surfaces[i].refcount == 1) && + (new->flags == dev_priv->surfaces[i].flags) && + (new_lower == dev_priv->surfaces[i].upper + 1)) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->filp = filp; + dev_priv->surfaces[i].refcount++; + dev_priv->surfaces[i].upper = s->upper; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + } + + /* okay, we need a new one */ + for (i = 0; i < RADEON_MAX_SURFACES; i++) { + if (dev_priv->surfaces[i].refcount == 0) { + s = &(dev_priv->virt_surfaces[virt_surface_index]); + s->surface_index = i; + s->lower = new_lower; + s->upper = new_upper; + s->flags = new->flags; + s->filp = filp; + dev_priv->surfaces[i].refcount = 1; + dev_priv->surfaces[i].lower = s->lower; + dev_priv->surfaces[i].upper = s->upper; + dev_priv->surfaces[i].flags = s->flags; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return virt_surface_index; + } + } + + /* we didn't find anything */ + return -1; +} + +static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower) +{ + struct radeon_virt_surface *s; + int i; + /* find the virtual surface */ + for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) { + s = &(dev_priv->virt_surfaces[i]); + if (s->filp) { + if ((lower == s->lower) && (filp == s->filp)) { + if (dev_priv->surfaces[s->surface_index].lower == s->lower) + dev_priv->surfaces[s->surface_index].lower = s->upper; + + if (dev_priv->surfaces[s->surface_index].upper == s->upper) + dev_priv->surfaces[s->surface_index].upper = s->lower; + + dev_priv->surfaces[s->surface_index].refcount--; + if (dev_priv->surfaces[s->surface_index].refcount == 0) + dev_priv->surfaces[s->surface_index].flags = 0; + s->filp = 0; + radeon_apply_surface_regs(s->surface_index, dev_priv); + return 0; + } + } + } + return 1; +} + +static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv) +{ + int i; + for( i = 0; i < 2*RADEON_MAX_SURFACES; i++) + { + if (dev_priv->virt_surfaces[i].filp == filp) + free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower); + } +} /* ================================================================ * IOCTL functions */ +int radeon_surface_alloc(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_surface_alloc_t alloc; + + if (!dev_priv) { + DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ ); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data, + sizeof(alloc)); + + if (alloc_surface(&alloc, dev_priv, filp) == -1) + return DRM_ERR(EINVAL); + else + return 0; +} + +int radeon_surface_free(DRM_IOCTL_ARGS) +{ + DRM_DEVICE; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_surface_free_t memfree; + + if (!dev_priv) { + DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ ); + return DRM_ERR(EINVAL); + } + + DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data, + sizeof(memfree) ); + + if (free_surface(filp, dev_priv, memfree.address)) + return DRM_ERR(EINVAL); + else + return 0; +} int radeon_cp_clear( DRM_IOCTL_ARGS ) { @@ -1596,7 +1958,7 @@ static int radeon_do_init_pageflip( drm_device_t *dev ) return 0; } -/* Called whenever a client dies, from DRM(release). +/* Called whenever a client dies, from drm_release. * NOTE: Lock isn't necessarily held when this is called! */ int radeon_do_cleanup_pageflip( drm_device_t *dev ) @@ -2084,7 +2446,7 @@ static int radeon_emit_packets( { int id = (int)header.packet.packet_id; int sz, reg; - int __user *data = (int __user *)cmdbuf->buf; + int *data = (int *)cmdbuf->buf; RING_LOCALS; if (id >= RADEON_MAX_STATE_PACKETS) @@ -2105,7 +2467,7 @@ static int radeon_emit_packets( BEGIN_RING(sz+1); OUT_RING( CP_PACKET0( reg, (sz-1) ) ); - OUT_RING_USER_TABLE( data, sz ); + OUT_RING_TABLE( data, sz ); ADVANCE_RING(); cmdbuf->buf += sz * sizeof(int); @@ -2119,7 +2481,6 @@ static __inline__ int radeon_emit_scalars( drm_radeon_cmd_buffer_t *cmdbuf ) { int sz = header.scalars.count; - int __user *data = (int __user *)cmdbuf->buf; int start = header.scalars.offset; int stride = header.scalars.stride; RING_LOCALS; @@ -2128,7 +2489,7 @@ static __inline__ int radeon_emit_scalars( OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); - OUT_RING_USER_TABLE( data, sz ); + OUT_RING_TABLE( cmdbuf->buf, sz ); ADVANCE_RING(); cmdbuf->buf += sz * sizeof(int); cmdbuf->bufsz -= sz * sizeof(int); @@ -2143,7 +2504,6 @@ static __inline__ int radeon_emit_scalars2( drm_radeon_cmd_buffer_t *cmdbuf ) { int sz = header.scalars.count; - int __user *data = (int __user *)cmdbuf->buf; int start = ((unsigned int)header.scalars.offset) + 0x100; int stride = header.scalars.stride; RING_LOCALS; @@ -2152,7 +2512,7 @@ static __inline__ int radeon_emit_scalars2( OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); - OUT_RING_USER_TABLE( data, sz ); + OUT_RING_TABLE( cmdbuf->buf, sz ); ADVANCE_RING(); cmdbuf->buf += sz * sizeof(int); cmdbuf->bufsz -= sz * sizeof(int); @@ -2165,7 +2525,6 @@ static __inline__ int radeon_emit_vectors( drm_radeon_cmd_buffer_t *cmdbuf ) { int sz = header.vectors.count; - int __user *data = (int __user *)cmdbuf->buf; int start = header.vectors.offset; int stride = header.vectors.stride; RING_LOCALS; @@ -2174,7 +2533,7 @@ static __inline__ int radeon_emit_vectors( OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) ); OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) ); - OUT_RING_USER_TABLE( data, sz ); + OUT_RING_TABLE( cmdbuf->buf, sz ); ADVANCE_RING(); cmdbuf->buf += sz * sizeof(int); @@ -2189,7 +2548,6 @@ static int radeon_emit_packet3( drm_device_t *dev, { drm_radeon_private_t *dev_priv = dev->dev_private; unsigned int cmdsz; - int __user *cmd = (int __user *)cmdbuf->buf; int ret; RING_LOCALS; @@ -2202,7 +2560,7 @@ static int radeon_emit_packet3( drm_device_t *dev, } BEGIN_RING( cmdsz ); - OUT_RING_USER_TABLE( cmd, cmdsz ); + OUT_RING_TABLE( cmdbuf->buf, cmdsz ); ADVANCE_RING(); cmdbuf->buf += cmdsz * 4; @@ -2219,7 +2577,6 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev, drm_radeon_private_t *dev_priv = dev->dev_private; drm_clip_rect_t box; unsigned int cmdsz; - int __user *cmd = (int __user *)cmdbuf->buf; int ret; drm_clip_rect_t __user *boxes = cmdbuf->boxes; int i = 0; @@ -2238,7 +2595,7 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev, do { if ( i < cmdbuf->nbox ) { - if (DRM_COPY_FROM_USER_UNCHECKED( &box, &boxes[i], sizeof(box) )) + if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) )) return DRM_ERR(EFAULT); /* FIXME The second and subsequent times round * this loop, send a WAIT_UNTIL_3D_IDLE before @@ -2261,7 +2618,7 @@ static int radeon_emit_packet3_cliprect( drm_device_t *dev, } BEGIN_RING( cmdsz ); - OUT_RING_USER_TABLE( cmd, cmdsz ); + OUT_RING_TABLE( cmdbuf->buf, cmdsz ); ADVANCE_RING(); } while ( ++i < cmdbuf->nbox ); @@ -2314,7 +2671,8 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) int idx; drm_radeon_cmd_buffer_t cmdbuf; drm_radeon_cmd_header_t header; - int orig_nbox; + int orig_nbox, orig_bufsz; + char *kbuf=NULL; LOCK_TEST_WITH_RETURN( dev, filp ); @@ -2331,24 +2689,29 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) RING_SPACE_TEST_WITH_RETURN( dev_priv ); VB_AGE_TEST_WITH_RETURN( dev_priv ); + if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) { + return DRM_ERR(EINVAL); + } - if (DRM_VERIFYAREA_READ( cmdbuf.buf, cmdbuf.bufsz )) - return DRM_ERR(EFAULT); - - if (cmdbuf.nbox && - DRM_VERIFYAREA_READ(cmdbuf.boxes, - cmdbuf.nbox * sizeof(drm_clip_rect_t))) - return DRM_ERR(EFAULT); + /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid + * races between checking values and using those values in other code, + * and simply to avoid a lot of function calls to copy in data. + */ + orig_bufsz = cmdbuf.bufsz; + if (orig_bufsz != 0) { + kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER); + if (kbuf == NULL) + return DRM_ERR(ENOMEM); + if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) + return DRM_ERR(EFAULT); + cmdbuf.buf = kbuf; + } orig_nbox = cmdbuf.nbox; while ( cmdbuf.bufsz >= sizeof(header) ) { - - if (DRM_GET_USER_UNCHECKED( header.i, (int __user *)cmdbuf.buf )) { - DRM_ERROR("__get_user %p\n", cmdbuf.buf); - return DRM_ERR(EFAULT); - } + header.i = *(int *)cmdbuf.buf; cmdbuf.buf += sizeof(header); cmdbuf.bufsz -= sizeof(header); @@ -2357,7 +2720,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_PACKET\n"); if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_packets failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2365,7 +2728,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_SCALARS\n"); if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_scalars failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2373,7 +2736,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_VECTORS\n"); if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_vectors failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2383,14 +2746,14 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) if ( idx < 0 || idx >= dma->buf_count ) { DRM_ERROR( "buffer index %d (of %d max)\n", idx, dma->buf_count - 1 ); - return DRM_ERR(EINVAL); + goto err; } buf = dma->buflist[idx]; if ( buf->filp != filp || buf->pending ) { DRM_ERROR( "bad buffer %p %p %d\n", buf->filp, filp, buf->pending); - return DRM_ERR(EINVAL); + goto err; } radeon_cp_discard_buffer( dev, buf ); @@ -2400,7 +2763,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_PACKET3\n"); if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) { DRM_ERROR("radeon_emit_packet3 failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2408,7 +2771,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n"); if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) { DRM_ERROR("radeon_emit_packet3_clip failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2416,7 +2779,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_SCALARS2\n"); if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_scalars2 failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; @@ -2424,21 +2787,28 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) DRM_DEBUG("RADEON_CMD_WAIT\n"); if (radeon_emit_wait( dev, header.wait.flags )) { DRM_ERROR("radeon_emit_wait failed\n"); - return DRM_ERR(EINVAL); + goto err; } break; default: DRM_ERROR("bad cmd_type %d at %p\n", header.header.cmd_type, cmdbuf.buf - sizeof(header)); - return DRM_ERR(EINVAL); + goto err; } } + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); DRM_DEBUG("DONE\n"); COMMIT_RING(); return 0; + +err: + if (orig_bufsz != 0) + drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER); + return DRM_ERR(EINVAL); } @@ -2539,6 +2909,20 @@ int radeon_cp_setparam( DRM_IOCTL_ARGS ) { radeon_priv = filp_priv->driver_priv; radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value; break; + case RADEON_SETPARAM_SWITCH_TILING: + if (sp.value == 0) { + DRM_DEBUG( "color tiling disabled\n" ); + dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO; + dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO; + dev_priv->sarea_priv->tiling_enabled = 0; + } + else if (sp.value == 1) { + DRM_DEBUG( "color tiling enabled\n" ); + dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO; + dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO; + dev_priv->sarea_priv->tiling_enabled = 1; + } + break; default: DRM_DEBUG( "Invalid parameter %d\n", sp.param ); return DRM_ERR( EINVAL ); @@ -2553,7 +2937,7 @@ int radeon_cp_setparam( DRM_IOCTL_ARGS ) { * * DRM infrastructure takes care of reclaiming dma buffers. */ -static void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp) +void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp) { if ( dev->dev_private ) { drm_radeon_private_t *dev_priv = dev->dev_private; @@ -2562,20 +2946,21 @@ static void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp) } radeon_mem_release( filp, dev_priv->gart_heap ); radeon_mem_release( filp, dev_priv->fb_heap ); + radeon_surfaces_release(filp, dev_priv); } } -static void radeon_driver_pretakedown(drm_device_t *dev) +void radeon_driver_pretakedown(drm_device_t *dev) { radeon_do_release(dev); } -static int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv) +int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv) { drm_radeon_private_t *dev_priv = dev->dev_private; struct drm_radeon_driver_file_fields *radeon_priv; - radeon_priv = (struct drm_radeon_driver_file_fields *)DRM(alloc)(sizeof(*radeon_priv), DRM_MEM_FILES); + radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES); if (!radeon_priv) return -ENOMEM; @@ -2589,24 +2974,9 @@ static int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv) } -static void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv) +void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv) { struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv; - DRM(free)(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES); -} - -void radeon_driver_register_fns(struct drm_device *dev) -{ - dev->driver_features = DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_SG | DRIVER_HAVE_IRQ | DRIVER_HAVE_DMA | DRIVER_IRQ_SHARED | DRIVER_IRQ_VBL; - dev->dev_priv_size = sizeof(drm_radeon_buf_priv_t); - dev->fn_tbl.prerelease = radeon_driver_prerelease; - dev->fn_tbl.pretakedown = radeon_driver_pretakedown; - dev->fn_tbl.open_helper = radeon_driver_open_helper; - dev->fn_tbl.free_filp_priv = radeon_driver_free_filp_priv; - dev->fn_tbl.vblank_wait = radeon_driver_vblank_wait; - dev->fn_tbl.irq_preinstall = radeon_driver_irq_preinstall; - dev->fn_tbl.irq_postinstall = radeon_driver_irq_postinstall; - dev->fn_tbl.irq_uninstall = radeon_driver_irq_uninstall; - dev->fn_tbl.irq_handler = radeon_driver_irq_handler; + drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES); }