vserver 1.9.5.x5
[linux-2.6.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
35
36 drm_ioctl_desc_t radeon_ioctls[] = {
37         [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)]    = { radeon_cp_init,      1, 1 },
38         [DRM_IOCTL_NR(DRM_RADEON_CP_START)]   = { radeon_cp_start,     1, 1 },
39         [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)]    = { radeon_cp_stop,      1, 1 },
40         [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)]   = { radeon_cp_reset,     1, 1 },
41         [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)]    = { radeon_cp_idle,      1, 0 },
42         [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)]  = { radeon_cp_resume,    1, 0 },
43         [DRM_IOCTL_NR(DRM_RADEON_RESET)]      = { radeon_engine_reset, 1, 0 },
44         [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen,   1, 0 },
45         [DRM_IOCTL_NR(DRM_RADEON_SWAP)]       = { radeon_cp_swap,      1, 0 },
46         [DRM_IOCTL_NR(DRM_RADEON_CLEAR)]      = { radeon_cp_clear,     1, 0 },
47         [DRM_IOCTL_NR(DRM_RADEON_VERTEX)]     = { radeon_cp_vertex,    1, 0 },
48         [DRM_IOCTL_NR(DRM_RADEON_INDICES)]    = { radeon_cp_indices,   1, 0 },
49         [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)]    = { radeon_cp_texture,   1, 0 },
50         [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)]    = { radeon_cp_stipple,   1, 0 },
51         [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)]   = { radeon_cp_indirect,  1, 1 },
52         [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)]    = { radeon_cp_vertex2,   1, 0 },
53         [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)]     = { radeon_cp_cmdbuf,    1, 0 },
54         [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)]   = { radeon_cp_getparam,  1, 0 },
55         [DRM_IOCTL_NR(DRM_RADEON_FLIP)]       = { radeon_cp_flip,      1, 0 },
56         [DRM_IOCTL_NR(DRM_RADEON_ALLOC)]      = { radeon_mem_alloc,    1, 0 },
57         [DRM_IOCTL_NR(DRM_RADEON_FREE)]       = { radeon_mem_free,     1, 0 },
58         [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)]  = { radeon_mem_init_heap,1, 1 },
59         [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)]   = { radeon_irq_emit,     1, 0 },
60         [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)]   = { radeon_irq_wait,     1, 0 },
61         [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)]   = { radeon_cp_setparam,  1, 0 },
62         [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
63         [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)]  = { radeon_surface_free, 1, 0 }
64 };
65
66 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);
67
68 /* ================================================================
69  * Helper functions for client state checking and fixup
70  */
71
72 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
73                                                      drm_file_t *filp_priv,
74                                                      u32 *offset ) {
75         u32 off = *offset;
76         struct drm_radeon_driver_file_fields *radeon_priv;
77
78         if ( off >= dev_priv->fb_location &&
79              off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
80                 return 0;
81
82         radeon_priv = filp_priv->driver_priv;
83         off += radeon_priv->radeon_fb_delta;
84
85         DRM_DEBUG( "offset fixed up to 0x%x\n", off );
86
87         if ( off < dev_priv->fb_location ||
88              off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
89                 return DRM_ERR( EINVAL );
90
91         *offset = off;
92
93         return 0;
94 }
95
96 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
97                                                       drm_file_t *filp_priv,
98                                                       int id,
99                                                       u32 __user *data ) {
100         switch ( id ) {
101
102         case RADEON_EMIT_PP_MISC:
103                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
104                                                     &data[( RADEON_RB3D_DEPTHOFFSET
105                                                             - RADEON_PP_MISC ) / 4] ) ) {
106                         DRM_ERROR( "Invalid depth buffer offset\n" );
107                         return DRM_ERR( EINVAL );
108                 }
109                 break;
110
111         case RADEON_EMIT_PP_CNTL:
112                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
113                                                     &data[( RADEON_RB3D_COLOROFFSET
114                                                             - RADEON_PP_CNTL ) / 4] ) ) {
115                         DRM_ERROR( "Invalid colour buffer offset\n" );
116                         return DRM_ERR( EINVAL );
117                 }
118                 break;
119
120         case R200_EMIT_PP_TXOFFSET_0:
121         case R200_EMIT_PP_TXOFFSET_1:
122         case R200_EMIT_PP_TXOFFSET_2:
123         case R200_EMIT_PP_TXOFFSET_3:
124         case R200_EMIT_PP_TXOFFSET_4:
125         case R200_EMIT_PP_TXOFFSET_5:
126                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
127                                                     &data[0] ) ) {
128                         DRM_ERROR( "Invalid R200 texture offset\n" );
129                         return DRM_ERR( EINVAL );
130                 }
131                 break;
132
133         case RADEON_EMIT_PP_TXFILTER_0:
134         case RADEON_EMIT_PP_TXFILTER_1:
135         case RADEON_EMIT_PP_TXFILTER_2:
136                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
137                                                     &data[( RADEON_PP_TXOFFSET_0
138                                                             - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
139                         DRM_ERROR( "Invalid R100 texture offset\n" );
140                         return DRM_ERR( EINVAL );
141                 }
142                 break;
143
144         case R200_EMIT_PP_CUBIC_OFFSETS_0:
145         case R200_EMIT_PP_CUBIC_OFFSETS_1:
146         case R200_EMIT_PP_CUBIC_OFFSETS_2:
147         case R200_EMIT_PP_CUBIC_OFFSETS_3:
148         case R200_EMIT_PP_CUBIC_OFFSETS_4:
149         case R200_EMIT_PP_CUBIC_OFFSETS_5: {
150                 int i;
151                 for ( i = 0; i < 5; i++ ) {
152                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
153                                                             &data[i] ) ) {
154                                 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
155                                 return DRM_ERR( EINVAL );
156                         }
157                 }
158                 break;
159         }
160
161         case RADEON_EMIT_RB3D_COLORPITCH:
162         case RADEON_EMIT_RE_LINE_PATTERN:
163         case RADEON_EMIT_SE_LINE_WIDTH:
164         case RADEON_EMIT_PP_LUM_MATRIX:
165         case RADEON_EMIT_PP_ROT_MATRIX_0:
166         case RADEON_EMIT_RB3D_STENCILREFMASK:
167         case RADEON_EMIT_SE_VPORT_XSCALE:
168         case RADEON_EMIT_SE_CNTL:
169         case RADEON_EMIT_SE_CNTL_STATUS:
170         case RADEON_EMIT_RE_MISC:
171         case RADEON_EMIT_PP_BORDER_COLOR_0:
172         case RADEON_EMIT_PP_BORDER_COLOR_1:
173         case RADEON_EMIT_PP_BORDER_COLOR_2:
174         case RADEON_EMIT_SE_ZBIAS_FACTOR:
175         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
176         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
177         case R200_EMIT_PP_TXCBLEND_0:
178         case R200_EMIT_PP_TXCBLEND_1:
179         case R200_EMIT_PP_TXCBLEND_2:
180         case R200_EMIT_PP_TXCBLEND_3:
181         case R200_EMIT_PP_TXCBLEND_4:
182         case R200_EMIT_PP_TXCBLEND_5:
183         case R200_EMIT_PP_TXCBLEND_6:
184         case R200_EMIT_PP_TXCBLEND_7:
185         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
186         case R200_EMIT_TFACTOR_0:
187         case R200_EMIT_VTX_FMT_0:
188         case R200_EMIT_VAP_CTL:
189         case R200_EMIT_MATRIX_SELECT_0:
190         case R200_EMIT_TEX_PROC_CTL_2:
191         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
192         case R200_EMIT_PP_TXFILTER_0:
193         case R200_EMIT_PP_TXFILTER_1:
194         case R200_EMIT_PP_TXFILTER_2:
195         case R200_EMIT_PP_TXFILTER_3:
196         case R200_EMIT_PP_TXFILTER_4:
197         case R200_EMIT_PP_TXFILTER_5:
198         case R200_EMIT_VTE_CNTL:
199         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
200         case R200_EMIT_PP_TAM_DEBUG3:
201         case R200_EMIT_PP_CNTL_X:
202         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
203         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
204         case R200_EMIT_RE_SCISSOR_TL_0:
205         case R200_EMIT_RE_SCISSOR_TL_1:
206         case R200_EMIT_RE_SCISSOR_TL_2:
207         case R200_EMIT_SE_VAP_CNTL_STATUS:
208         case R200_EMIT_SE_VTX_STATE_CNTL:
209         case R200_EMIT_RE_POINTSIZE:
210         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
211         case R200_EMIT_PP_CUBIC_FACES_0:
212         case R200_EMIT_PP_CUBIC_FACES_1:
213         case R200_EMIT_PP_CUBIC_FACES_2:
214         case R200_EMIT_PP_CUBIC_FACES_3:
215         case R200_EMIT_PP_CUBIC_FACES_4:
216         case R200_EMIT_PP_CUBIC_FACES_5:
217         case RADEON_EMIT_PP_TEX_SIZE_0:
218         case RADEON_EMIT_PP_TEX_SIZE_1:
219         case RADEON_EMIT_PP_TEX_SIZE_2:
220         case R200_EMIT_RB3D_BLENDCOLOR:
221         case R200_EMIT_TCL_POINT_SPRITE_CNTL:
222                 /* These packets don't contain memory offsets */
223                 break;
224
225         default:
226                 DRM_ERROR( "Unknown state packet ID %d\n", id );
227                 return DRM_ERR( EINVAL );
228         }
229
230         return 0;
231 }
232
233 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
234                                                       drm_file_t *filp_priv,
235                                                       drm_radeon_cmd_buffer_t *cmdbuf,
236                                                       unsigned int *cmdsz ) {
237         u32 *cmd = (u32 *) cmdbuf->buf;
238
239         *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
240
241         if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
242                 DRM_ERROR( "Not a type 3 packet\n" );
243                 return DRM_ERR( EINVAL );
244         }
245
246         if ( 4 * *cmdsz > cmdbuf->bufsz ) {
247                 DRM_ERROR( "Packet size larger than size of data provided\n" );
248                 return DRM_ERR( EINVAL );
249         }
250
251         /* Check client state and fix it up if necessary */
252         if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
253                 u32 offset;
254
255                 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
256                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
257                         offset = cmd[2] << 10;
258                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
259                                 DRM_ERROR( "Invalid first packet offset\n" );
260                                 return DRM_ERR( EINVAL );
261                         }
262                         cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
263                 }
264
265                 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
266                      ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
267                         offset = cmd[3] << 10;
268                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
269                                 DRM_ERROR( "Invalid second packet offset\n" );
270                                 return DRM_ERR( EINVAL );
271                         }
272                         cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
273                 }
274         }
275
276         return 0;
277 }
278
279
280 /* ================================================================
281  * CP hardware state programming functions
282  */
283
284 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
285                                           drm_clip_rect_t *box )
286 {
287         RING_LOCALS;
288
289         DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
290                    box->x1, box->y1, box->x2, box->y2 );
291
292         BEGIN_RING( 4 );
293         OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
294         OUT_RING( (box->y1 << 16) | box->x1 );
295         OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
296         OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
297         ADVANCE_RING();
298 }
299
300 /* Emit 1.1 state
301  */
302 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
303                               drm_file_t *filp_priv,
304                               drm_radeon_context_regs_t *ctx,
305                               drm_radeon_texture_regs_t *tex,
306                               unsigned int dirty )
307 {
308         RING_LOCALS;
309         DRM_DEBUG( "dirty=0x%08x\n", dirty );
310
311         if ( dirty & RADEON_UPLOAD_CONTEXT ) {
312                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
313                                                     &ctx->rb3d_depthoffset ) ) {
314                         DRM_ERROR( "Invalid depth buffer offset\n" );
315                         return DRM_ERR( EINVAL );
316                 }
317
318                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
319                                                     &ctx->rb3d_coloroffset ) ) {
320                         DRM_ERROR( "Invalid depth buffer offset\n" );
321                         return DRM_ERR( EINVAL );
322                 }
323
324                 BEGIN_RING( 14 );
325                 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
326                 OUT_RING( ctx->pp_misc );
327                 OUT_RING( ctx->pp_fog_color );
328                 OUT_RING( ctx->re_solid_color );
329                 OUT_RING( ctx->rb3d_blendcntl );
330                 OUT_RING( ctx->rb3d_depthoffset );
331                 OUT_RING( ctx->rb3d_depthpitch );
332                 OUT_RING( ctx->rb3d_zstencilcntl );
333                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
334                 OUT_RING( ctx->pp_cntl );
335                 OUT_RING( ctx->rb3d_cntl );
336                 OUT_RING( ctx->rb3d_coloroffset );
337                 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
338                 OUT_RING( ctx->rb3d_colorpitch );
339                 ADVANCE_RING();
340         }
341
342         if ( dirty & RADEON_UPLOAD_VERTFMT ) {
343                 BEGIN_RING( 2 );
344                 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
345                 OUT_RING( ctx->se_coord_fmt );
346                 ADVANCE_RING();
347         }
348
349         if ( dirty & RADEON_UPLOAD_LINE ) {
350                 BEGIN_RING( 5 );
351                 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
352                 OUT_RING( ctx->re_line_pattern );
353                 OUT_RING( ctx->re_line_state );
354                 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
355                 OUT_RING( ctx->se_line_width );
356                 ADVANCE_RING();
357         }
358
359         if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
360                 BEGIN_RING( 5 );
361                 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
362                 OUT_RING( ctx->pp_lum_matrix );
363                 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
364                 OUT_RING( ctx->pp_rot_matrix_0 );
365                 OUT_RING( ctx->pp_rot_matrix_1 );
366                 ADVANCE_RING();
367         }
368
369         if ( dirty & RADEON_UPLOAD_MASKS ) {
370                 BEGIN_RING( 4 );
371                 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
372                 OUT_RING( ctx->rb3d_stencilrefmask );
373                 OUT_RING( ctx->rb3d_ropcntl );
374                 OUT_RING( ctx->rb3d_planemask );
375                 ADVANCE_RING();
376         }
377
378         if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
379                 BEGIN_RING( 7 );
380                 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
381                 OUT_RING( ctx->se_vport_xscale );
382                 OUT_RING( ctx->se_vport_xoffset );
383                 OUT_RING( ctx->se_vport_yscale );
384                 OUT_RING( ctx->se_vport_yoffset );
385                 OUT_RING( ctx->se_vport_zscale );
386                 OUT_RING( ctx->se_vport_zoffset );
387                 ADVANCE_RING();
388         }
389
390         if ( dirty & RADEON_UPLOAD_SETUP ) {
391                 BEGIN_RING( 4 );
392                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
393                 OUT_RING( ctx->se_cntl );
394                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
395                 OUT_RING( ctx->se_cntl_status );
396                 ADVANCE_RING();
397         }
398
399         if ( dirty & RADEON_UPLOAD_MISC ) {
400                 BEGIN_RING( 2 );
401                 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
402                 OUT_RING( ctx->re_misc );
403                 ADVANCE_RING();
404         }
405
406         if ( dirty & RADEON_UPLOAD_TEX0 ) {
407                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
408                                                     &tex[0].pp_txoffset ) ) {
409                         DRM_ERROR( "Invalid texture offset for unit 0\n" );
410                         return DRM_ERR( EINVAL );
411                 }
412
413                 BEGIN_RING( 9 );
414                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
415                 OUT_RING( tex[0].pp_txfilter );
416                 OUT_RING( tex[0].pp_txformat );
417                 OUT_RING( tex[0].pp_txoffset );
418                 OUT_RING( tex[0].pp_txcblend );
419                 OUT_RING( tex[0].pp_txablend );
420                 OUT_RING( tex[0].pp_tfactor );
421                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
422                 OUT_RING( tex[0].pp_border_color );
423                 ADVANCE_RING();
424         }
425
426         if ( dirty & RADEON_UPLOAD_TEX1 ) {
427                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
428                                                     &tex[1].pp_txoffset ) ) {
429                         DRM_ERROR( "Invalid texture offset for unit 1\n" );
430                         return DRM_ERR( EINVAL );
431                 }
432
433                 BEGIN_RING( 9 );
434                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
435                 OUT_RING( tex[1].pp_txfilter );
436                 OUT_RING( tex[1].pp_txformat );
437                 OUT_RING( tex[1].pp_txoffset );
438                 OUT_RING( tex[1].pp_txcblend );
439                 OUT_RING( tex[1].pp_txablend );
440                 OUT_RING( tex[1].pp_tfactor );
441                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
442                 OUT_RING( tex[1].pp_border_color );
443                 ADVANCE_RING();
444         }
445
446         if ( dirty & RADEON_UPLOAD_TEX2 ) {
447                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
448                                                     &tex[2].pp_txoffset ) ) {
449                         DRM_ERROR( "Invalid texture offset for unit 2\n" );
450                         return DRM_ERR( EINVAL );
451                 }
452
453                 BEGIN_RING( 9 );
454                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
455                 OUT_RING( tex[2].pp_txfilter );
456                 OUT_RING( tex[2].pp_txformat );
457                 OUT_RING( tex[2].pp_txoffset );
458                 OUT_RING( tex[2].pp_txcblend );
459                 OUT_RING( tex[2].pp_txablend );
460                 OUT_RING( tex[2].pp_tfactor );
461                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
462                 OUT_RING( tex[2].pp_border_color );
463                 ADVANCE_RING();
464         }
465
466         return 0;
467 }
468
469 /* Emit 1.2 state
470  */
471 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
472                                drm_file_t *filp_priv,
473                                drm_radeon_state_t *state )
474 {
475         RING_LOCALS;
476
477         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
478                 BEGIN_RING( 3 );
479                 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
480                 OUT_RING( state->context2.se_zbias_factor ); 
481                 OUT_RING( state->context2.se_zbias_constant ); 
482                 ADVANCE_RING();
483         }
484
485         return radeon_emit_state( dev_priv, filp_priv, &state->context,
486                            state->tex, state->dirty );
487 }
488
489 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
490  * 1.3 cmdbuffers allow all previous state to be updated as well as
491  * the tcl scalar and vector areas.  
492  */
493 static struct { 
494         int start; 
495         int len; 
496         const char *name;
497 } packet[RADEON_MAX_STATE_PACKETS] = {
498         { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
499         { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
500         { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
501         { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
502         { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
503         { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
504         { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
505         { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
506         { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
507         { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
508         { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
509         { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
510         { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
511         { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
512         { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
513         { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
514         { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
515         { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
516         { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
517         { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
518         { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
519         { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
520         { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
521         { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
522         { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
523         { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
524         { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
525         { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
526         { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
527         { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
528         { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
529         { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
530         { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
531         { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
532         { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
533         { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
534         { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
535         { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
536         { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
537         { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
538         { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
539         { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
540         { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
541         { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
542         { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
543         { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
544         { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
545         { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
546         { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
547         { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
548         { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
549         { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
550         { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
551         { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
552         { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
553         { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
554         { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
555         { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
556         { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
557         { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
558         { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
559         { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
560         { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
561         { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
562         { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
563         { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
564         { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
565         { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
566         { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
567         { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
568         { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
569         { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
570         { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
571         { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
572         { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
573         { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
574         { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
575         { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
576 };
577
578
579
580 /* ================================================================
581  * Performance monitoring functions
582  */
583
584 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
585                               int x, int y, int w, int h,
586                               int r, int g, int b )
587 {
588         u32 color;
589         RING_LOCALS;
590
591         x += dev_priv->sarea_priv->boxes[0].x1;
592         y += dev_priv->sarea_priv->boxes[0].y1;
593
594         switch ( dev_priv->color_fmt ) {
595         case RADEON_COLOR_FORMAT_RGB565:
596                 color = (((r & 0xf8) << 8) |
597                          ((g & 0xfc) << 3) |
598                          ((b & 0xf8) >> 3));
599                 break;
600         case RADEON_COLOR_FORMAT_ARGB8888:
601         default:
602                 color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
603                 break;
604         }
605
606         BEGIN_RING( 4 );
607         RADEON_WAIT_UNTIL_3D_IDLE();            
608         OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
609         OUT_RING( 0xffffffff );
610         ADVANCE_RING();
611
612         BEGIN_RING( 6 );
613
614         OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
615         OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
616                   RADEON_GMC_BRUSH_SOLID_COLOR |
617                   (dev_priv->color_fmt << 8) |
618                   RADEON_GMC_SRC_DATATYPE_COLOR |
619                   RADEON_ROP3_P |
620                   RADEON_GMC_CLR_CMP_CNTL_DIS );
621
622         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
623                 OUT_RING( dev_priv->front_pitch_offset );
624         } else {         
625                 OUT_RING( dev_priv->back_pitch_offset );
626         } 
627
628         OUT_RING( color );
629
630         OUT_RING( (x << 16) | y );
631         OUT_RING( (w << 16) | h );
632
633         ADVANCE_RING();
634 }
635
636 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
637 {
638         /* Collapse various things into a wait flag -- trying to
639          * guess if userspase slept -- better just to have them tell us.
640          */
641         if (dev_priv->stats.last_frame_reads > 1 ||
642             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
643                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
644         }
645
646         if (dev_priv->stats.freelist_loops) {
647                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
648         }
649
650         /* Purple box for page flipping
651          */
652         if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
653                 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
654
655         /* Red box if we have to wait for idle at any point
656          */
657         if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
658                 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
659
660         /* Blue box: lost context?
661          */
662
663         /* Yellow box for texture swaps
664          */
665         if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
666                 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
667
668         /* Green box if hardware never idles (as far as we can tell)
669          */
670         if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
671                 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
672
673
674         /* Draw bars indicating number of buffers allocated 
675          * (not a great measure, easily confused)
676          */
677         if (dev_priv->stats.requested_bufs) {
678                 if (dev_priv->stats.requested_bufs > 100)
679                         dev_priv->stats.requested_bufs = 100;
680
681                 radeon_clear_box( dev_priv, 4, 16,  
682                                   dev_priv->stats.requested_bufs, 4,
683                                   196, 128, 128 );
684         }
685
686         memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
687
688 }
689 /* ================================================================
690  * CP command dispatch functions
691  */
692
693 static void radeon_cp_dispatch_clear( drm_device_t *dev,
694                                       drm_radeon_clear_t *clear,
695                                       drm_radeon_clear_rect_t *depth_boxes )
696 {
697         drm_radeon_private_t *dev_priv = dev->dev_private;
698         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
699         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
700         int nbox = sarea_priv->nbox;
701         drm_clip_rect_t *pbox = sarea_priv->boxes;
702         unsigned int flags = clear->flags;
703         u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
704         int i;
705         RING_LOCALS;
706         DRM_DEBUG( "flags = 0x%x\n", flags );
707
708         dev_priv->stats.clears++;
709
710         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
711                 unsigned int tmp = flags;
712
713                 flags &= ~(RADEON_FRONT | RADEON_BACK);
714                 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
715                 if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
716         }
717
718         if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
719
720                 BEGIN_RING( 4 );
721
722                 /* Ensure the 3D stream is idle before doing a
723                  * 2D fill to clear the front or back buffer.
724                  */
725                 RADEON_WAIT_UNTIL_3D_IDLE();
726                 
727                 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
728                 OUT_RING( clear->color_mask );
729
730                 ADVANCE_RING();
731
732                 /* Make sure we restore the 3D state next time.
733                  */
734                 dev_priv->sarea_priv->ctx_owner = 0;
735
736                 for ( i = 0 ; i < nbox ; i++ ) {
737                         int x = pbox[i].x1;
738                         int y = pbox[i].y1;
739                         int w = pbox[i].x2 - x;
740                         int h = pbox[i].y2 - y;
741
742                         DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
743                                    x, y, w, h, flags );
744
745                         if ( flags & RADEON_FRONT ) {
746                                 BEGIN_RING( 6 );
747                                 
748                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
749                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
750                                           RADEON_GMC_BRUSH_SOLID_COLOR |
751                                           (dev_priv->color_fmt << 8) |
752                                           RADEON_GMC_SRC_DATATYPE_COLOR |
753                                           RADEON_ROP3_P |
754                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
755
756                                 OUT_RING( dev_priv->front_pitch_offset );
757                                 OUT_RING( clear->clear_color );
758                                 
759                                 OUT_RING( (x << 16) | y );
760                                 OUT_RING( (w << 16) | h );
761                                 
762                                 ADVANCE_RING();
763                         }
764                         
765                         if ( flags & RADEON_BACK ) {
766                                 BEGIN_RING( 6 );
767                                 
768                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
769                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
770                                           RADEON_GMC_BRUSH_SOLID_COLOR |
771                                           (dev_priv->color_fmt << 8) |
772                                           RADEON_GMC_SRC_DATATYPE_COLOR |
773                                           RADEON_ROP3_P |
774                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
775                                 
776                                 OUT_RING( dev_priv->back_pitch_offset );
777                                 OUT_RING( clear->clear_color );
778
779                                 OUT_RING( (x << 16) | y );
780                                 OUT_RING( (w << 16) | h );
781
782                                 ADVANCE_RING();
783                         }
784                 }
785         }
786         
787         /* hyper z clear */
788         /* no docs available, based on reverse engeneering by Stephane Marchesin */
789         if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
790
791                 int i;
792                 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
793                         (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
794                 
795                 u32 clearmask;
796
797                 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
798                         ((clear->depth_mask & 0xff) << 24);
799         
800                 
801                 /* Make sure we restore the 3D state next time.
802                  * we haven't touched any "normal" state - still need this?
803                  */
804                 dev_priv->sarea_priv->ctx_owner = 0;
805
806                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
807                 /* FIXME : reverse engineer that for Rx00 cards */
808                 /* FIXME : the mask supposedly contains low-res z values. So can't set
809                    just to the max (0xff? or actually 0x3fff?), need to take z clear
810                    value into account? */
811                 /* pattern seems to work for r100, though get slight
812                    rendering errors with glxgears. If hierz is not enabled for r100,
813                    only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
814                    other ones are ignored, and the same clear mask can be used. That's
815                    very different behaviour than R200 which needs different clear mask
816                    and different number of tiles to clear if hierz is enabled or not !?!
817                 */
818                         clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
819                 }
820                 else {
821                 /* clear mask : chooses the clearing pattern.
822                    rv250: could be used to clear only parts of macrotiles
823                    (but that would get really complicated...)?
824                    bit 0 and 1 (either or both of them ?!?!) are used to
825                    not clear tile (or maybe one of the bits indicates if the tile is
826                    compressed or not), bit 2 and 3 to not clear tile 1,...,.
827                    Pattern is as follows:
828                         | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
829                    bits -------------------------------------------------
830                         | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
831                    rv100: clearmask covers 2x8 4x1 tiles, but one clear still
832                    covers 256 pixels ?!?
833                 */
834                         clearmask = 0x0;
835                 }
836
837                 BEGIN_RING( 8 );
838                 RADEON_WAIT_UNTIL_2D_IDLE();
839                 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
840                         tempRB3D_DEPTHCLEARVALUE);
841                 /* what offset is this exactly ? */
842                 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
843                 /* need ctlstat, otherwise get some strange black flickering */
844                 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
845                 ADVANCE_RING();
846
847                 for (i = 0; i < nbox; i++) {
848                         int tileoffset, nrtilesx, nrtilesy, j;
849                         /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
850                         if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
851                                 /* FIXME : figure this out for r200 (when hierz is enabled). Or
852                                    maybe r200 actually doesn't need to put the low-res z value into
853                                    the tile cache like r100, but just needs to clear the hi-level z-buffer?
854                                    Works for R100, both with hierz and without.
855                                    R100 seems to operate on 2x1 8x8 tiles, but...
856                                    odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
857                                    problematic with resolutions which are not 64 pix aligned? */
858                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
859                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
860                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
861                                 for (j = 0; j <= nrtilesy; j++) {
862                                         BEGIN_RING( 4 );
863                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
864                                         /* first tile */
865                                         OUT_RING( tileoffset * 8 );
866                                         /* the number of tiles to clear */
867                                         OUT_RING( nrtilesx + 4 );
868                                         /* clear mask : chooses the clearing pattern. */
869                                         OUT_RING( clearmask );
870                                         ADVANCE_RING();
871                                         tileoffset += depthpixperline >> 6;
872                                 }
873                         }
874                         else if (dev_priv->microcode_version==UCODE_R200) {
875                                 /* works for rv250. */
876                                 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
877                                 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
878                                 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
879                                 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
880                                 for (j = 0; j <= nrtilesy; j++) {
881                                         BEGIN_RING( 4 );
882                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
883                                         /* first tile */
884                                         /* judging by the first tile offset needed, could possibly
885                                            directly address/clear 4x4 tiles instead of 8x2 * 4x4
886                                            macro tiles, though would still need clear mask for
887                                            right/bottom if truely 4x4 granularity is desired ? */
888                                         OUT_RING( tileoffset * 16 );
889                                         /* the number of tiles to clear */
890                                         OUT_RING( nrtilesx + 1 );
891                                         /* clear mask : chooses the clearing pattern. */
892                                         OUT_RING( clearmask );
893                                         ADVANCE_RING();
894                                         tileoffset += depthpixperline >> 5;
895                                 }
896                         }
897                         else { /* rv 100 */
898                                 /* rv100 might not need 64 pix alignment, who knows */
899                                 /* offsets are, hmm, weird */
900                                 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
901                                 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
902                                 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
903                                 for (j = 0; j <= nrtilesy; j++) {
904                                         BEGIN_RING( 4 );
905                                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
906                                         OUT_RING( tileoffset * 128 );
907                                         /* the number of tiles to clear */
908                                         OUT_RING( nrtilesx + 4 );
909                                         /* clear mask : chooses the clearing pattern. */
910                                         OUT_RING( clearmask );
911                                         ADVANCE_RING();
912                                         tileoffset += depthpixperline >> 6;
913                                 }
914                         }
915                 }
916
917                 /* TODO don't always clear all hi-level z tiles */
918                 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
919                         && (flags & RADEON_USE_HIERZ))
920                 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
921                 /* FIXME : the mask supposedly contains low-res z values. So can't set
922                    just to the max (0xff? or actually 0x3fff?), need to take z clear
923                    value into account? */
924                 {
925                         BEGIN_RING( 4 );
926                         OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
927                         OUT_RING( 0x0 ); /* First tile */
928                         OUT_RING( 0x3cc0 );
929                         OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
930                         ADVANCE_RING();
931                 }
932         }
933
934         /* We have to clear the depth and/or stencil buffers by
935          * rendering a quad into just those buffers.  Thus, we have to
936          * make sure the 3D engine is configured correctly.
937          */
938         if ((dev_priv->microcode_version == UCODE_R200) &&
939             (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
940
941                 int tempPP_CNTL;
942                 int tempRE_CNTL;
943                 int tempRB3D_CNTL;
944                 int tempRB3D_ZSTENCILCNTL;
945                 int tempRB3D_STENCILREFMASK;
946                 int tempRB3D_PLANEMASK;
947                 int tempSE_CNTL;
948                 int tempSE_VTE_CNTL;
949                 int tempSE_VTX_FMT_0;
950                 int tempSE_VTX_FMT_1;
951                 int tempSE_VAP_CNTL;
952                 int tempRE_AUX_SCISSOR_CNTL;
953
954                 tempPP_CNTL = 0;
955                 tempRE_CNTL = 0;
956
957                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
958
959                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
960                 tempRB3D_STENCILREFMASK = 0x0;
961
962                 tempSE_CNTL = depth_clear->se_cntl;
963
964
965
966                 /* Disable TCL */
967
968                 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
969                                    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
970
971                 tempRB3D_PLANEMASK = 0x0;
972
973                 tempRE_AUX_SCISSOR_CNTL = 0x0;
974
975                 tempSE_VTE_CNTL =
976                         SE_VTE_CNTL__VTX_XY_FMT_MASK |
977                         SE_VTE_CNTL__VTX_Z_FMT_MASK;
978
979                 /* Vertex format (X, Y, Z, W)*/
980                 tempSE_VTX_FMT_0 =
981                         SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
982                         SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
983                 tempSE_VTX_FMT_1 = 0x0;
984
985
986                 /* 
987                  * Depth buffer specific enables 
988                  */
989                 if (flags & RADEON_DEPTH) {
990                         /* Enable depth buffer */
991                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
992                 } else {
993                         /* Disable depth buffer */
994                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
995                 }
996
997                 /* 
998                  * Stencil buffer specific enables
999                  */
1000                 if ( flags & RADEON_STENCIL ) {
1001                         tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
1002                         tempRB3D_STENCILREFMASK = clear->depth_mask; 
1003                 } else {
1004                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1005                         tempRB3D_STENCILREFMASK = 0x00000000;
1006                 }
1007
1008                 if (flags & RADEON_USE_COMP_ZBUF) {
1009                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1010                                 RADEON_Z_DECOMPRESSION_ENABLE;
1011                 }
1012                 if (flags & RADEON_USE_HIERZ) {
1013                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1014                 }
1015
1016                 BEGIN_RING( 26 );
1017                 RADEON_WAIT_UNTIL_2D_IDLE();
1018
1019                 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1020                 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1021                 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1022                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1023                               tempRB3D_ZSTENCILCNTL );
1024                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
1025                               tempRB3D_STENCILREFMASK );
1026                 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1027                 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1028                 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1029                 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1030                 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1031                 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1032                 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
1033                               tempRE_AUX_SCISSOR_CNTL );
1034                 ADVANCE_RING();
1035
1036                 /* Make sure we restore the 3D state next time.
1037                  */
1038                 dev_priv->sarea_priv->ctx_owner = 0;
1039
1040                 for ( i = 0 ; i < nbox ; i++ ) {
1041                         
1042                         /* Funny that this should be required -- 
1043                          *  sets top-left?
1044                          */
1045                         radeon_emit_clip_rect( dev_priv,
1046                                                &sarea_priv->boxes[i] );
1047
1048                         BEGIN_RING( 14 );
1049                         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1050                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1051                                    RADEON_PRIM_WALK_RING |
1052                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1053                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1054                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1055                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1056                         OUT_RING( 0x3f800000 );
1057                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1058                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1059                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1060                         OUT_RING( 0x3f800000 );
1061                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1062                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1063                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1064                         OUT_RING( 0x3f800000 );
1065                         ADVANCE_RING();
1066                 }
1067         } 
1068         else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1069
1070                 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1071
1072                 rb3d_cntl = depth_clear->rb3d_cntl;
1073
1074                 if ( flags & RADEON_DEPTH ) {
1075                         rb3d_cntl |=  RADEON_Z_ENABLE;
1076                 } else {
1077                         rb3d_cntl &= ~RADEON_Z_ENABLE;
1078                 }
1079
1080                 if ( flags & RADEON_STENCIL ) {
1081                         rb3d_cntl |=  RADEON_STENCIL_ENABLE;
1082                         rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1083                 } else {
1084                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1085                         rb3d_stencilrefmask = 0x00000000;
1086                 }
1087
1088                 if (flags & RADEON_USE_COMP_ZBUF) {
1089                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1090                                 RADEON_Z_DECOMPRESSION_ENABLE;
1091                 }
1092                 if (flags & RADEON_USE_HIERZ) {
1093                         tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1094                 }
1095
1096                 BEGIN_RING( 13 );
1097                 RADEON_WAIT_UNTIL_2D_IDLE();
1098
1099                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1100                 OUT_RING( 0x00000000 );
1101                 OUT_RING( rb3d_cntl );
1102                 
1103                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1104                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1105                               rb3d_stencilrefmask );
1106                 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1107                               0x00000000 );
1108                 OUT_RING_REG( RADEON_SE_CNTL,
1109                               depth_clear->se_cntl );
1110                 ADVANCE_RING();
1111
1112                 /* Make sure we restore the 3D state next time.
1113                  */
1114                 dev_priv->sarea_priv->ctx_owner = 0;
1115
1116                 for ( i = 0 ; i < nbox ; i++ ) {
1117                         
1118                         /* Funny that this should be required -- 
1119                          *  sets top-left?
1120                          */
1121                         radeon_emit_clip_rect( dev_priv,
1122                                                &sarea_priv->boxes[i] );
1123
1124                         BEGIN_RING( 15 );
1125
1126                         OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1127                         OUT_RING( RADEON_VTX_Z_PRESENT |
1128                                   RADEON_VTX_PKCOLOR_PRESENT);
1129                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1130                                    RADEON_PRIM_WALK_RING |
1131                                    RADEON_MAOS_ENABLE |
1132                                    RADEON_VTX_FMT_RADEON_MODE |
1133                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
1134
1135
1136                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1138                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139                         OUT_RING( 0x0 );
1140
1141                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1142                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144                         OUT_RING( 0x0 );
1145
1146                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1147                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1148                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1149                         OUT_RING( 0x0 );
1150
1151                         ADVANCE_RING();
1152                 }
1153         }
1154
1155         /* Increment the clear counter.  The client-side 3D driver must
1156          * wait on this value before performing the clear ioctl.  We
1157          * need this because the card's so damned fast...
1158          */
1159         dev_priv->sarea_priv->last_clear++;
1160
1161         BEGIN_RING( 4 );
1162
1163         RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1164         RADEON_WAIT_UNTIL_IDLE();
1165
1166         ADVANCE_RING();
1167 }
1168
1169 static void radeon_cp_dispatch_swap( drm_device_t *dev )
1170 {
1171         drm_radeon_private_t *dev_priv = dev->dev_private;
1172         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1173         int nbox = sarea_priv->nbox;
1174         drm_clip_rect_t *pbox = sarea_priv->boxes;
1175         int i;
1176         RING_LOCALS;
1177         DRM_DEBUG( "\n" );
1178
1179         /* Do some trivial performance monitoring...
1180          */
1181         if (dev_priv->do_boxes)
1182                 radeon_cp_performance_boxes( dev_priv );
1183
1184
1185         /* Wait for the 3D stream to idle before dispatching the bitblt.
1186          * This will prevent data corruption between the two streams.
1187          */
1188         BEGIN_RING( 2 );
1189
1190         RADEON_WAIT_UNTIL_3D_IDLE();
1191
1192         ADVANCE_RING();
1193
1194         for ( i = 0 ; i < nbox ; i++ ) {
1195                 int x = pbox[i].x1;
1196                 int y = pbox[i].y1;
1197                 int w = pbox[i].x2 - x;
1198                 int h = pbox[i].y2 - y;
1199
1200                 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1201                            x, y, w, h );
1202
1203                 BEGIN_RING( 7 );
1204
1205                 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1206                 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1207                           RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1208                           RADEON_GMC_BRUSH_NONE |
1209                           (dev_priv->color_fmt << 8) |
1210                           RADEON_GMC_SRC_DATATYPE_COLOR |
1211                           RADEON_ROP3_S |
1212                           RADEON_DP_SRC_SOURCE_MEMORY |
1213                           RADEON_GMC_CLR_CMP_CNTL_DIS |
1214                           RADEON_GMC_WR_MSK_DIS );
1215                 
1216                 /* Make this work even if front & back are flipped:
1217                  */
1218                 if (dev_priv->current_page == 0) {
1219                         OUT_RING( dev_priv->back_pitch_offset );
1220                         OUT_RING( dev_priv->front_pitch_offset );
1221                 } 
1222                 else {
1223                         OUT_RING( dev_priv->front_pitch_offset );
1224                         OUT_RING( dev_priv->back_pitch_offset );
1225                 }
1226
1227                 OUT_RING( (x << 16) | y );
1228                 OUT_RING( (x << 16) | y );
1229                 OUT_RING( (w << 16) | h );
1230
1231                 ADVANCE_RING();
1232         }
1233
1234         /* Increment the frame counter.  The client-side 3D driver must
1235          * throttle the framerate by waiting for this value before
1236          * performing the swapbuffer ioctl.
1237          */
1238         dev_priv->sarea_priv->last_frame++;
1239
1240         BEGIN_RING( 4 );
1241
1242         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1243         RADEON_WAIT_UNTIL_2D_IDLE();
1244
1245         ADVANCE_RING();
1246 }
1247
1248 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1249 {
1250         drm_radeon_private_t *dev_priv = dev->dev_private;
1251         drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1252         int offset = (dev_priv->current_page == 1)
1253                    ? dev_priv->front_offset : dev_priv->back_offset;
1254         RING_LOCALS;
1255         DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1256                 __FUNCTION__, 
1257                 dev_priv->current_page,
1258                 dev_priv->sarea_priv->pfCurrentPage);
1259
1260         /* Do some trivial performance monitoring...
1261          */
1262         if (dev_priv->do_boxes) {
1263                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1264                 radeon_cp_performance_boxes( dev_priv );
1265         }
1266
1267         /* Update the frame offsets for both CRTCs
1268          */
1269         BEGIN_RING( 6 );
1270
1271         RADEON_WAIT_UNTIL_3D_IDLE();
1272         OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1273                                               + sarea->frame.x 
1274                                               * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1275                                           + offset );
1276         OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1277                                            + offset );
1278
1279         ADVANCE_RING();
1280
1281         /* Increment the frame counter.  The client-side 3D driver must
1282          * throttle the framerate by waiting for this value before
1283          * performing the swapbuffer ioctl.
1284          */
1285         dev_priv->sarea_priv->last_frame++;
1286         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1287                                               1 - dev_priv->current_page;
1288
1289         BEGIN_RING( 2 );
1290
1291         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1292
1293         ADVANCE_RING();
1294 }
1295
1296 static int bad_prim_vertex_nr( int primitive, int nr )
1297 {
1298         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1299         case RADEON_PRIM_TYPE_NONE:
1300         case RADEON_PRIM_TYPE_POINT:
1301                 return nr < 1;
1302         case RADEON_PRIM_TYPE_LINE:
1303                 return (nr & 1) || nr == 0;
1304         case RADEON_PRIM_TYPE_LINE_STRIP:
1305                 return nr < 2;
1306         case RADEON_PRIM_TYPE_TRI_LIST:
1307         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1308         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1309         case RADEON_PRIM_TYPE_RECT_LIST:
1310                 return nr % 3 || nr == 0;
1311         case RADEON_PRIM_TYPE_TRI_FAN:
1312         case RADEON_PRIM_TYPE_TRI_STRIP:
1313                 return nr < 3;
1314         default:
1315                 return 1;
1316         }       
1317 }
1318
1319
1320
1321 typedef struct {
1322         unsigned int start;
1323         unsigned int finish;
1324         unsigned int prim;
1325         unsigned int numverts;
1326         unsigned int offset;   
1327         unsigned int vc_format;
1328 } drm_radeon_tcl_prim_t;
1329
1330 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1331                                        drm_buf_t *buf,
1332                                        drm_radeon_tcl_prim_t *prim )
1333
1334 {
1335         drm_radeon_private_t *dev_priv = dev->dev_private;
1336         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1337         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1338         int numverts = (int)prim->numverts;
1339         int nbox = sarea_priv->nbox;
1340         int i = 0;
1341         RING_LOCALS;
1342
1343         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1344                   prim->prim,
1345                   prim->vc_format,
1346                   prim->start,
1347                   prim->finish,
1348                   prim->numverts);
1349
1350         if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1351                 DRM_ERROR( "bad prim %x numverts %d\n", 
1352                            prim->prim, prim->numverts );
1353                 return;
1354         }
1355
1356         do {
1357                 /* Emit the next cliprect */
1358                 if ( i < nbox ) {
1359                         radeon_emit_clip_rect( dev_priv, 
1360                                                &sarea_priv->boxes[i] );
1361                 }
1362
1363                 /* Emit the vertex buffer rendering commands */
1364                 BEGIN_RING( 5 );
1365
1366                 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1367                 OUT_RING( offset );
1368                 OUT_RING( numverts );
1369                 OUT_RING( prim->vc_format );
1370                 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1371                           RADEON_COLOR_ORDER_RGBA |
1372                           RADEON_VTX_FMT_RADEON_MODE |
1373                           (numverts << RADEON_NUM_VERTICES_SHIFT) );
1374
1375                 ADVANCE_RING();
1376
1377                 i++;
1378         } while ( i < nbox );
1379 }
1380
1381
1382
1383 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1384 {
1385         drm_radeon_private_t *dev_priv = dev->dev_private;
1386         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1387         RING_LOCALS;
1388
1389         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1390
1391         /* Emit the vertex buffer age */
1392         BEGIN_RING( 2 );
1393         RADEON_DISPATCH_AGE( buf_priv->age );
1394         ADVANCE_RING();
1395
1396         buf->pending = 1;
1397         buf->used = 0;
1398 }
1399
1400 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1401                                          drm_buf_t *buf,
1402                                          int start, int end )
1403 {
1404         drm_radeon_private_t *dev_priv = dev->dev_private;
1405         RING_LOCALS;
1406         DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1407                    buf->idx, start, end );
1408
1409         if ( start != end ) {
1410                 int offset = (dev_priv->gart_buffers_offset
1411                               + buf->offset + start);
1412                 int dwords = (end - start + 3) / sizeof(u32);
1413
1414                 /* Indirect buffer data must be an even number of
1415                  * dwords, so if we've been given an odd number we must
1416                  * pad the data with a Type-2 CP packet.
1417                  */
1418                 if ( dwords & 1 ) {
1419                         u32 *data = (u32 *)
1420                                 ((char *)dev->agp_buffer_map->handle
1421                                  + buf->offset + start);
1422                         data[dwords++] = RADEON_CP_PACKET2;
1423                 }
1424
1425                 /* Fire off the indirect buffer */
1426                 BEGIN_RING( 3 );
1427
1428                 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1429                 OUT_RING( offset );
1430                 OUT_RING( dwords );
1431
1432                 ADVANCE_RING();
1433         }
1434 }
1435
1436
1437 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1438                                         drm_buf_t *elt_buf,
1439                                         drm_radeon_tcl_prim_t *prim )
1440 {
1441         drm_radeon_private_t *dev_priv = dev->dev_private;
1442         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1443         int offset = dev_priv->gart_buffers_offset + prim->offset;
1444         u32 *data;
1445         int dwords;
1446         int i = 0;
1447         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1448         int count = (prim->finish - start) / sizeof(u16);
1449         int nbox = sarea_priv->nbox;
1450
1451         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1452                   prim->prim,
1453                   prim->vc_format,
1454                   prim->start,
1455                   prim->finish,
1456                   prim->offset,
1457                   prim->numverts);
1458
1459         if (bad_prim_vertex_nr( prim->prim, count )) {
1460                 DRM_ERROR( "bad prim %x count %d\n", 
1461                            prim->prim, count );
1462                 return;
1463         }
1464
1465
1466         if ( start >= prim->finish ||
1467              (prim->start & 0x7) ) {
1468                 DRM_ERROR( "buffer prim %d\n", prim->prim );
1469                 return;
1470         }
1471
1472         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1473
1474         data = (u32 *)((char *)dev->agp_buffer_map->handle +
1475                        elt_buf->offset + prim->start);
1476
1477         data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1478         data[1] = offset;
1479         data[2] = prim->numverts;
1480         data[3] = prim->vc_format;
1481         data[4] = (prim->prim |
1482                    RADEON_PRIM_WALK_IND |
1483                    RADEON_COLOR_ORDER_RGBA |
1484                    RADEON_VTX_FMT_RADEON_MODE |
1485                    (count << RADEON_NUM_VERTICES_SHIFT) );
1486
1487         do {
1488                 if ( i < nbox ) 
1489                         radeon_emit_clip_rect( dev_priv, 
1490                                                &sarea_priv->boxes[i] );
1491
1492                 radeon_cp_dispatch_indirect( dev, elt_buf,
1493                                              prim->start,
1494                                              prim->finish );
1495
1496                 i++;
1497         } while ( i < nbox );
1498
1499 }
1500
1501 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1502
1503 static int radeon_cp_dispatch_texture( DRMFILE filp,
1504                                        drm_device_t *dev,
1505                                        drm_radeon_texture_t *tex,
1506                                        drm_radeon_tex_image_t *image )
1507 {
1508         drm_radeon_private_t *dev_priv = dev->dev_private;
1509         drm_file_t *filp_priv;
1510         drm_buf_t *buf;
1511         u32 format;
1512         u32 *buffer;
1513         const u8 __user *data;
1514         int size, dwords, tex_width, blit_width;
1515         u32 height;
1516         int i;
1517         RING_LOCALS;
1518
1519         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1520
1521         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1522                 DRM_ERROR( "Invalid destination offset\n" );
1523                 return DRM_ERR( EINVAL );
1524         }
1525
1526         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1527
1528         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1529          * up with the texture data from the host data blit, otherwise
1530          * part of the texture image may be corrupted.
1531          */
1532         BEGIN_RING( 4 );
1533         RADEON_FLUSH_CACHE();
1534         RADEON_WAIT_UNTIL_IDLE();
1535         ADVANCE_RING();
1536
1537 #ifdef __BIG_ENDIAN
1538         /* The Mesa texture functions provide the data in little endian as the
1539          * chip wants it, but we need to compensate for the fact that the CP
1540          * ring gets byte-swapped
1541          */
1542         BEGIN_RING( 2 );
1543         OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1544         ADVANCE_RING();
1545 #endif
1546
1547
1548         /* The compiler won't optimize away a division by a variable,
1549          * even if the only legal values are powers of two.  Thus, we'll
1550          * use a shift instead.
1551          */
1552         switch ( tex->format ) {
1553         case RADEON_TXFORMAT_ARGB8888:
1554         case RADEON_TXFORMAT_RGBA8888:
1555                 format = RADEON_COLOR_FORMAT_ARGB8888;
1556                 tex_width = tex->width * 4;
1557                 blit_width = image->width * 4;
1558                 break;
1559         case RADEON_TXFORMAT_AI88:
1560         case RADEON_TXFORMAT_ARGB1555:
1561         case RADEON_TXFORMAT_RGB565:
1562         case RADEON_TXFORMAT_ARGB4444:
1563         case RADEON_TXFORMAT_VYUY422:
1564         case RADEON_TXFORMAT_YVYU422:
1565                 format = RADEON_COLOR_FORMAT_RGB565;
1566                 tex_width = tex->width * 2;
1567                 blit_width = image->width * 2;
1568                 break;
1569         case RADEON_TXFORMAT_I8:
1570         case RADEON_TXFORMAT_RGB332:
1571                 format = RADEON_COLOR_FORMAT_CI8;
1572                 tex_width = tex->width * 1;
1573                 blit_width = image->width * 1;
1574                 break;
1575         default:
1576                 DRM_ERROR( "invalid texture format %d\n", tex->format );
1577                 return DRM_ERR(EINVAL);
1578         }
1579
1580         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1581
1582         do {
1583                 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1584                            tex->offset >> 10, tex->pitch, tex->format,
1585                            image->x, image->y, image->width, image->height );
1586
1587                 /* Make a copy of some parameters in case we have to
1588                  * update them for a multi-pass texture blit.
1589                  */
1590                 height = image->height;
1591                 data = (const u8 __user *)image->data;
1592                 
1593                 size = height * blit_width;
1594
1595                 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1596                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1597                         size = height * blit_width;
1598                 } else if ( size < 4 && size > 0 ) {
1599                         size = 4;
1600                 } else if ( size == 0 ) {
1601                         return 0;
1602                 }
1603
1604                 buf = radeon_freelist_get( dev );
1605                 if ( 0 && !buf ) {
1606                         radeon_do_cp_idle( dev_priv );
1607                         buf = radeon_freelist_get( dev );
1608                 }
1609                 if ( !buf ) {
1610                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1611                         if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1612                                 return DRM_ERR(EFAULT);
1613                         return DRM_ERR(EAGAIN);
1614                 }
1615
1616
1617                 /* Dispatch the indirect buffer.
1618                  */
1619                 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1620                 dwords = size / 4;
1621                 buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1622                 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1623                              RADEON_GMC_BRUSH_NONE |
1624                              (format << 8) |
1625                              RADEON_GMC_SRC_DATATYPE_COLOR |
1626                              RADEON_ROP3_S |
1627                              RADEON_DP_SRC_SOURCE_HOST_DATA |
1628                              RADEON_GMC_CLR_CMP_CNTL_DIS |
1629                              RADEON_GMC_WR_MSK_DIS);
1630                 
1631                 buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
1632                 buffer[3] = 0xffffffff;
1633                 buffer[4] = 0xffffffff;
1634                 buffer[5] = (image->y << 16) | image->x;
1635                 buffer[6] = (height << 16) | image->width;
1636                 buffer[7] = dwords;
1637                 buffer += 8;
1638
1639                 if ( tex_width >= 32 ) {
1640                         /* Texture image width is larger than the minimum, so we
1641                          * can upload it directly.
1642                          */
1643                         if ( DRM_COPY_FROM_USER( buffer, data, 
1644                                                  dwords * sizeof(u32) ) ) {
1645                                 DRM_ERROR( "EFAULT on data, %d dwords\n", 
1646                                            dwords );
1647                                 return DRM_ERR(EFAULT);
1648                         }
1649                 } else {
1650                         /* Texture image width is less than the minimum, so we
1651                          * need to pad out each image scanline to the minimum
1652                          * width.
1653                          */
1654                         for ( i = 0 ; i < tex->height ; i++ ) {
1655                                 if ( DRM_COPY_FROM_USER( buffer, data, 
1656                                                          tex_width ) ) {
1657                                         DRM_ERROR( "EFAULT on pad, %d bytes\n",
1658                                                    tex_width );
1659                                         return DRM_ERR(EFAULT);
1660                                 }
1661                                 buffer += 8;
1662                                 data += tex_width;
1663                         }
1664                 }
1665
1666                 buf->filp = filp;
1667                 buf->used = (dwords + 8) * sizeof(u32);
1668                 radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
1669                 radeon_cp_discard_buffer( dev, buf );
1670
1671                 /* Update the input parameters for next time */
1672                 image->y += height;
1673                 image->height -= height;
1674                 image->data = (const u8 __user *)image->data + size;
1675         } while (image->height > 0);
1676
1677         /* Flush the pixel cache after the blit completes.  This ensures
1678          * the texture data is written out to memory before rendering
1679          * continues.
1680          */
1681         BEGIN_RING( 4 );
1682         RADEON_FLUSH_CACHE();
1683         RADEON_WAIT_UNTIL_2D_IDLE();
1684         ADVANCE_RING();
1685         return 0;
1686 }
1687
1688
1689 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1690 {
1691         drm_radeon_private_t *dev_priv = dev->dev_private;
1692         int i;
1693         RING_LOCALS;
1694         DRM_DEBUG( "\n" );
1695
1696         BEGIN_RING( 35 );
1697
1698         OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1699         OUT_RING( 0x00000000 );
1700
1701         OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1702         for ( i = 0 ; i < 32 ; i++ ) {
1703                 OUT_RING( stipple[i] );
1704         }
1705
1706         ADVANCE_RING();
1707 }
1708
1709 static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1710 {
1711         if (!dev_priv->mmio)
1712                 return;
1713
1714         radeon_do_cp_idle(dev_priv);
1715
1716         RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1717                 dev_priv->surfaces[surf_index].flags);
1718         RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1719                 dev_priv->surfaces[surf_index].lower);
1720         RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1721                 dev_priv->surfaces[surf_index].upper);
1722 }
1723
1724
1725 /* Allocates a virtual surface
1726  * doesn't always allocate a real surface, will stretch an existing 
1727  * surface when possible.
1728  *
1729  * Note that refcount can be at most 2, since during a free refcount=3
1730  * might mean we have to allocate a new surface which might not always
1731  * be available.
1732  * For example : we allocate three contigous surfaces ABC. If B is 
1733  * freed, we suddenly need two surfaces to store A and C, which might
1734  * not always be available.
1735  */
1736 static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1737 {
1738         struct radeon_virt_surface *s;
1739         int i;
1740         int virt_surface_index;
1741         uint32_t new_upper, new_lower;
1742
1743         new_lower = new->address;
1744         new_upper = new_lower + new->size - 1;
1745
1746         /* sanity check */
1747         if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1748                 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1749                 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1750                 return -1;
1751
1752         /* make sure there is no overlap with existing surfaces */
1753         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1754                 if ((dev_priv->surfaces[i].refcount != 0) &&
1755                 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1756                         (new_lower < dev_priv->surfaces[i].upper) ) ||
1757                  ( (new_lower < dev_priv->surfaces[i].lower) &&
1758                         (new_upper > dev_priv->surfaces[i].lower) )) ){
1759                 return -1;}
1760         }
1761
1762         /* find a virtual surface */
1763         for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1764                 if (dev_priv->virt_surfaces[i].filp == 0)
1765                         break;
1766         if (i == 2*RADEON_MAX_SURFACES) {
1767                 return -1;}
1768         virt_surface_index = i;
1769
1770         /* try to reuse an existing surface */
1771         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1772                 /* extend before */
1773                 if ((dev_priv->surfaces[i].refcount == 1) &&
1774                   (new->flags == dev_priv->surfaces[i].flags) &&
1775                   (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1776                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1777                         s->surface_index = i;
1778                         s->lower = new_lower;
1779                         s->upper = new_upper;
1780                         s->flags = new->flags;
1781                         s->filp = filp;
1782                         dev_priv->surfaces[i].refcount++;
1783                         dev_priv->surfaces[i].lower = s->lower;
1784                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1785                         return virt_surface_index;
1786                 }
1787
1788                 /* extend after */
1789                 if ((dev_priv->surfaces[i].refcount == 1) &&
1790                   (new->flags == dev_priv->surfaces[i].flags) &&
1791                   (new_lower == dev_priv->surfaces[i].upper + 1)) {
1792                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1793                         s->surface_index = i;
1794                         s->lower = new_lower;
1795                         s->upper = new_upper;
1796                         s->flags = new->flags;
1797                         s->filp = filp;
1798                         dev_priv->surfaces[i].refcount++;
1799                         dev_priv->surfaces[i].upper = s->upper;
1800                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1801                         return virt_surface_index;
1802                 }
1803         }
1804
1805         /* okay, we need a new one */
1806         for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1807                 if (dev_priv->surfaces[i].refcount == 0) {
1808                         s = &(dev_priv->virt_surfaces[virt_surface_index]);
1809                         s->surface_index = i;
1810                         s->lower = new_lower;
1811                         s->upper = new_upper;
1812                         s->flags = new->flags;
1813                         s->filp = filp;
1814                         dev_priv->surfaces[i].refcount = 1;
1815                         dev_priv->surfaces[i].lower = s->lower;
1816                         dev_priv->surfaces[i].upper = s->upper;
1817                         dev_priv->surfaces[i].flags = s->flags;
1818                         radeon_apply_surface_regs(s->surface_index, dev_priv);
1819                         return virt_surface_index;
1820                 }
1821         }
1822
1823         /* we didn't find anything */
1824         return -1;
1825 }
1826
1827 static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1828 {
1829         struct radeon_virt_surface *s;
1830         int i;
1831         /* find the virtual surface */
1832         for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1833                 s = &(dev_priv->virt_surfaces[i]);
1834                 if (s->filp) {
1835                         if ((lower == s->lower) && (filp == s->filp)) {
1836                                 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1837                                         dev_priv->surfaces[s->surface_index].lower = s->upper;
1838
1839                                 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1840                                         dev_priv->surfaces[s->surface_index].upper = s->lower;
1841
1842                                 dev_priv->surfaces[s->surface_index].refcount--;
1843                                 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1844                                         dev_priv->surfaces[s->surface_index].flags = 0;
1845                                 s->filp = 0;
1846                                 radeon_apply_surface_regs(s->surface_index, dev_priv);
1847                                 return 0;
1848                         }
1849                 }
1850         }
1851         return 1;
1852 }
1853
1854 static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1855 {
1856         int i;
1857         for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1858         {
1859                 if (dev_priv->virt_surfaces[i].filp == filp)
1860                         free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1861         }
1862 }
1863
1864 /* ================================================================
1865  * IOCTL functions
1866  */
1867 int radeon_surface_alloc(DRM_IOCTL_ARGS)
1868 {
1869         DRM_DEVICE;
1870         drm_radeon_private_t *dev_priv = dev->dev_private;
1871         drm_radeon_surface_alloc_t alloc;
1872
1873         if (!dev_priv) {
1874                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1875                 return DRM_ERR(EINVAL);
1876         }
1877
1878         DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1879                                   sizeof(alloc));
1880
1881         if (alloc_surface(&alloc, dev_priv, filp) == -1)
1882                 return DRM_ERR(EINVAL);
1883         else
1884                 return 0;
1885 }
1886
1887 int radeon_surface_free(DRM_IOCTL_ARGS)
1888 {
1889         DRM_DEVICE;
1890         drm_radeon_private_t *dev_priv = dev->dev_private;
1891         drm_radeon_surface_free_t memfree;
1892
1893         if (!dev_priv) {
1894                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1895                 return DRM_ERR(EINVAL);
1896         }
1897
1898         DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1899                                   sizeof(memfree) );
1900
1901         if (free_surface(filp, dev_priv, memfree.address))
1902                 return DRM_ERR(EINVAL);
1903         else
1904                 return 0;
1905 }
1906
1907 int radeon_cp_clear( DRM_IOCTL_ARGS )
1908 {
1909         DRM_DEVICE;
1910         drm_radeon_private_t *dev_priv = dev->dev_private;
1911         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1912         drm_radeon_clear_t clear;
1913         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1914         DRM_DEBUG( "\n" );
1915
1916         LOCK_TEST_WITH_RETURN( dev, filp );
1917
1918         DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
1919                              sizeof(clear) );
1920
1921         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1922
1923         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
1924                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1925
1926         if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
1927                              sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
1928                 return DRM_ERR(EFAULT);
1929
1930         radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
1931
1932         COMMIT_RING();
1933         return 0;
1934 }
1935
1936
1937 /* Not sure why this isn't set all the time:
1938  */ 
1939 static int radeon_do_init_pageflip( drm_device_t *dev )
1940 {
1941         drm_radeon_private_t *dev_priv = dev->dev_private;
1942         RING_LOCALS;
1943
1944         DRM_DEBUG( "\n" );
1945
1946         BEGIN_RING( 6 );
1947         RADEON_WAIT_UNTIL_3D_IDLE();
1948         OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
1949         OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1950         OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
1951         OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1952         ADVANCE_RING();
1953
1954         dev_priv->page_flipping = 1;
1955         dev_priv->current_page = 0;
1956         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
1957
1958         return 0;
1959 }
1960
1961 /* Called whenever a client dies, from drm_release.
1962  * NOTE:  Lock isn't necessarily held when this is called!
1963  */
1964 int radeon_do_cleanup_pageflip( drm_device_t *dev )
1965 {
1966         drm_radeon_private_t *dev_priv = dev->dev_private;
1967         DRM_DEBUG( "\n" );
1968
1969         if (dev_priv->current_page != 0)
1970                 radeon_cp_dispatch_flip( dev );
1971
1972         dev_priv->page_flipping = 0;
1973         return 0;
1974 }
1975
1976 /* Swapping and flipping are different operations, need different ioctls.
1977  * They can & should be intermixed to support multiple 3d windows.  
1978  */
1979 int radeon_cp_flip( DRM_IOCTL_ARGS )
1980 {
1981         DRM_DEVICE;
1982         drm_radeon_private_t *dev_priv = dev->dev_private;
1983         DRM_DEBUG( "\n" );
1984
1985         LOCK_TEST_WITH_RETURN( dev, filp );
1986
1987         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1988
1989         if (!dev_priv->page_flipping) 
1990                 radeon_do_init_pageflip( dev );
1991                 
1992         radeon_cp_dispatch_flip( dev );
1993
1994         COMMIT_RING();
1995         return 0;
1996 }
1997
1998 int radeon_cp_swap( DRM_IOCTL_ARGS )
1999 {
2000         DRM_DEVICE;
2001         drm_radeon_private_t *dev_priv = dev->dev_private;
2002         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2003         DRM_DEBUG( "\n" );
2004
2005         LOCK_TEST_WITH_RETURN( dev, filp );
2006
2007         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2008
2009         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2010                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2011
2012         radeon_cp_dispatch_swap( dev );
2013         dev_priv->sarea_priv->ctx_owner = 0;
2014
2015         COMMIT_RING();
2016         return 0;
2017 }
2018
2019 int radeon_cp_vertex( DRM_IOCTL_ARGS )
2020 {
2021         DRM_DEVICE;
2022         drm_radeon_private_t *dev_priv = dev->dev_private;
2023         drm_file_t *filp_priv;
2024         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2025         drm_device_dma_t *dma = dev->dma;
2026         drm_buf_t *buf;
2027         drm_radeon_vertex_t vertex;
2028         drm_radeon_tcl_prim_t prim;
2029
2030         LOCK_TEST_WITH_RETURN( dev, filp );
2031
2032         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2033
2034         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2035                              sizeof(vertex) );
2036
2037         DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2038                    DRM_CURRENTPID,
2039                    vertex.idx, vertex.count, vertex.discard );
2040
2041         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2042                 DRM_ERROR( "buffer index %d (of %d max)\n",
2043                            vertex.idx, dma->buf_count - 1 );
2044                 return DRM_ERR(EINVAL);
2045         }
2046         if ( vertex.prim < 0 ||
2047              vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2048                 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2049                 return DRM_ERR(EINVAL);
2050         }
2051
2052         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2053         VB_AGE_TEST_WITH_RETURN( dev_priv );
2054
2055         buf = dma->buflist[vertex.idx];
2056
2057         if ( buf->filp != filp ) {
2058                 DRM_ERROR( "process %d using buffer owned by %p\n",
2059                            DRM_CURRENTPID, buf->filp );
2060                 return DRM_ERR(EINVAL);
2061         }
2062         if ( buf->pending ) {
2063                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2064                 return DRM_ERR(EINVAL);
2065         }
2066
2067         /* Build up a prim_t record:
2068          */
2069         if (vertex.count) {
2070                 buf->used = vertex.count; /* not used? */
2071
2072                 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2073                         if ( radeon_emit_state( dev_priv, filp_priv,
2074                                                 &sarea_priv->context_state,
2075                                                 sarea_priv->tex_state,
2076                                                 sarea_priv->dirty ) ) {
2077                                 DRM_ERROR( "radeon_emit_state failed\n" );
2078                                 return DRM_ERR( EINVAL );
2079                         }
2080
2081                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2082                                                RADEON_UPLOAD_TEX1IMAGES |
2083                                                RADEON_UPLOAD_TEX2IMAGES |
2084                                                RADEON_REQUIRE_QUIESCENCE);
2085                 }
2086
2087                 prim.start = 0;
2088                 prim.finish = vertex.count; /* unused */
2089                 prim.prim = vertex.prim;
2090                 prim.numverts = vertex.count;
2091                 prim.vc_format = dev_priv->sarea_priv->vc_format;
2092                 
2093                 radeon_cp_dispatch_vertex( dev, buf, &prim );
2094         }
2095
2096         if (vertex.discard) {
2097                 radeon_cp_discard_buffer( dev, buf );
2098         }
2099
2100         COMMIT_RING();
2101         return 0;
2102 }
2103
2104 int radeon_cp_indices( DRM_IOCTL_ARGS )
2105 {
2106         DRM_DEVICE;
2107         drm_radeon_private_t *dev_priv = dev->dev_private;
2108         drm_file_t *filp_priv;
2109         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2110         drm_device_dma_t *dma = dev->dma;
2111         drm_buf_t *buf;
2112         drm_radeon_indices_t elts;
2113         drm_radeon_tcl_prim_t prim;
2114         int count;
2115
2116         LOCK_TEST_WITH_RETURN( dev, filp );
2117
2118         if ( !dev_priv ) {
2119                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2120                 return DRM_ERR(EINVAL);
2121         }
2122
2123         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2124
2125         DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2126                              sizeof(elts) );
2127
2128         DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2129                    DRM_CURRENTPID,
2130                    elts.idx, elts.start, elts.end, elts.discard );
2131
2132         if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2133                 DRM_ERROR( "buffer index %d (of %d max)\n",
2134                            elts.idx, dma->buf_count - 1 );
2135                 return DRM_ERR(EINVAL);
2136         }
2137         if ( elts.prim < 0 ||
2138              elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2139                 DRM_ERROR( "buffer prim %d\n", elts.prim );
2140                 return DRM_ERR(EINVAL);
2141         }
2142
2143         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2144         VB_AGE_TEST_WITH_RETURN( dev_priv );
2145
2146         buf = dma->buflist[elts.idx];
2147
2148         if ( buf->filp != filp ) {
2149                 DRM_ERROR( "process %d using buffer owned by %p\n",
2150                            DRM_CURRENTPID, buf->filp );
2151                 return DRM_ERR(EINVAL);
2152         }
2153         if ( buf->pending ) {
2154                 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2155                 return DRM_ERR(EINVAL);
2156         }
2157
2158         count = (elts.end - elts.start) / sizeof(u16);
2159         elts.start -= RADEON_INDEX_PRIM_OFFSET;
2160
2161         if ( elts.start & 0x7 ) {
2162                 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2163                 return DRM_ERR(EINVAL);
2164         }
2165         if ( elts.start < buf->used ) {
2166                 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2167                 return DRM_ERR(EINVAL);
2168         }
2169
2170         buf->used = elts.end;
2171
2172         if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2173                 if ( radeon_emit_state( dev_priv, filp_priv,
2174                                         &sarea_priv->context_state,
2175                                         sarea_priv->tex_state,
2176                                         sarea_priv->dirty ) ) {
2177                         DRM_ERROR( "radeon_emit_state failed\n" );
2178                         return DRM_ERR( EINVAL );
2179                 }
2180
2181                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2182                                        RADEON_UPLOAD_TEX1IMAGES |
2183                                        RADEON_UPLOAD_TEX2IMAGES |
2184                                        RADEON_REQUIRE_QUIESCENCE);
2185         }
2186
2187
2188         /* Build up a prim_t record:
2189          */
2190         prim.start = elts.start;
2191         prim.finish = elts.end; 
2192         prim.prim = elts.prim;
2193         prim.offset = 0;        /* offset from start of dma buffers */
2194         prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2195         prim.vc_format = dev_priv->sarea_priv->vc_format;
2196         
2197         radeon_cp_dispatch_indices( dev, buf, &prim );
2198         if (elts.discard) {
2199                 radeon_cp_discard_buffer( dev, buf );
2200         }
2201
2202         COMMIT_RING();
2203         return 0;
2204 }
2205
2206 int radeon_cp_texture( DRM_IOCTL_ARGS )
2207 {
2208         DRM_DEVICE;
2209         drm_radeon_private_t *dev_priv = dev->dev_private;
2210         drm_radeon_texture_t tex;
2211         drm_radeon_tex_image_t image;
2212         int ret;
2213
2214         LOCK_TEST_WITH_RETURN( dev, filp );
2215
2216         DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2217
2218         if ( tex.image == NULL ) {
2219                 DRM_ERROR( "null texture image!\n" );
2220                 return DRM_ERR(EINVAL);
2221         }
2222
2223         if ( DRM_COPY_FROM_USER( &image,
2224                              (drm_radeon_tex_image_t __user *)tex.image,
2225                              sizeof(image) ) )
2226                 return DRM_ERR(EFAULT);
2227
2228         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2229         VB_AGE_TEST_WITH_RETURN( dev_priv );
2230
2231         ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2232
2233         COMMIT_RING();
2234         return ret;
2235 }
2236
2237 int radeon_cp_stipple( DRM_IOCTL_ARGS )
2238 {
2239         DRM_DEVICE;
2240         drm_radeon_private_t *dev_priv = dev->dev_private;
2241         drm_radeon_stipple_t stipple;
2242         u32 mask[32];
2243
2244         LOCK_TEST_WITH_RETURN( dev, filp );
2245
2246         DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2247                              sizeof(stipple) );
2248
2249         if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2250                 return DRM_ERR(EFAULT);
2251
2252         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2253
2254         radeon_cp_dispatch_stipple( dev, mask );
2255
2256         COMMIT_RING();
2257         return 0;
2258 }
2259
2260 int radeon_cp_indirect( DRM_IOCTL_ARGS )
2261 {
2262         DRM_DEVICE;
2263         drm_radeon_private_t *dev_priv = dev->dev_private;
2264         drm_device_dma_t *dma = dev->dma;
2265         drm_buf_t *buf;
2266         drm_radeon_indirect_t indirect;
2267         RING_LOCALS;
2268
2269         LOCK_TEST_WITH_RETURN( dev, filp );
2270
2271         if ( !dev_priv ) {
2272                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2273                 return DRM_ERR(EINVAL);
2274         }
2275
2276         DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2277                              sizeof(indirect) );
2278
2279         DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2280                    indirect.idx, indirect.start,
2281                    indirect.end, indirect.discard );
2282
2283         if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2284                 DRM_ERROR( "buffer index %d (of %d max)\n",
2285                            indirect.idx, dma->buf_count - 1 );
2286                 return DRM_ERR(EINVAL);
2287         }
2288
2289         buf = dma->buflist[indirect.idx];
2290
2291         if ( buf->filp != filp ) {
2292                 DRM_ERROR( "process %d using buffer owned by %p\n",
2293                            DRM_CURRENTPID, buf->filp );
2294                 return DRM_ERR(EINVAL);
2295         }
2296         if ( buf->pending ) {
2297                 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2298                 return DRM_ERR(EINVAL);
2299         }
2300
2301         if ( indirect.start < buf->used ) {
2302                 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2303                            indirect.start, buf->used );
2304                 return DRM_ERR(EINVAL);
2305         }
2306
2307         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2308         VB_AGE_TEST_WITH_RETURN( dev_priv );
2309
2310         buf->used = indirect.end;
2311
2312         /* Wait for the 3D stream to idle before the indirect buffer
2313          * containing 2D acceleration commands is processed.
2314          */
2315         BEGIN_RING( 2 );
2316
2317         RADEON_WAIT_UNTIL_3D_IDLE();
2318
2319         ADVANCE_RING();
2320
2321         /* Dispatch the indirect buffer full of commands from the
2322          * X server.  This is insecure and is thus only available to
2323          * privileged clients.
2324          */
2325         radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2326         if (indirect.discard) {
2327                 radeon_cp_discard_buffer( dev, buf );
2328         }
2329
2330
2331         COMMIT_RING();
2332         return 0;
2333 }
2334
2335 int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2336 {
2337         DRM_DEVICE;
2338         drm_radeon_private_t *dev_priv = dev->dev_private;
2339         drm_file_t *filp_priv;
2340         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2341         drm_device_dma_t *dma = dev->dma;
2342         drm_buf_t *buf;
2343         drm_radeon_vertex2_t vertex;
2344         int i;
2345         unsigned char laststate;
2346
2347         LOCK_TEST_WITH_RETURN( dev, filp );
2348
2349         if ( !dev_priv ) {
2350                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2351                 return DRM_ERR(EINVAL);
2352         }
2353
2354         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2355
2356         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2357                              sizeof(vertex) );
2358
2359         DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2360                    DRM_CURRENTPID,
2361                    vertex.idx, vertex.discard );
2362
2363         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2364                 DRM_ERROR( "buffer index %d (of %d max)\n",
2365                            vertex.idx, dma->buf_count - 1 );
2366                 return DRM_ERR(EINVAL);
2367         }
2368
2369         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2370         VB_AGE_TEST_WITH_RETURN( dev_priv );
2371
2372         buf = dma->buflist[vertex.idx];
2373
2374         if ( buf->filp != filp ) {
2375                 DRM_ERROR( "process %d using buffer owned by %p\n",
2376                            DRM_CURRENTPID, buf->filp );
2377                 return DRM_ERR(EINVAL);
2378         }
2379
2380         if ( buf->pending ) {
2381                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2382                 return DRM_ERR(EINVAL);
2383         }
2384         
2385         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2386                 return DRM_ERR(EINVAL);
2387
2388         for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2389                 drm_radeon_prim_t prim;
2390                 drm_radeon_tcl_prim_t tclprim;
2391                 
2392                 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2393                         return DRM_ERR(EFAULT);
2394                 
2395                 if ( prim.stateidx != laststate ) {
2396                         drm_radeon_state_t state;                              
2397                                 
2398                         if ( DRM_COPY_FROM_USER( &state, 
2399                                              &vertex.state[prim.stateidx], 
2400                                              sizeof(state) ) )
2401                                 return DRM_ERR(EFAULT);
2402
2403                         if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2404                                 DRM_ERROR( "radeon_emit_state2 failed\n" );
2405                                 return DRM_ERR( EINVAL );
2406                         }
2407
2408                         laststate = prim.stateidx;
2409                 }
2410
2411                 tclprim.start = prim.start;
2412                 tclprim.finish = prim.finish;
2413                 tclprim.prim = prim.prim;
2414                 tclprim.vc_format = prim.vc_format;
2415
2416                 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2417                         tclprim.offset = prim.numverts * 64;
2418                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2419
2420                         radeon_cp_dispatch_indices( dev, buf, &tclprim );
2421                 } else {
2422                         tclprim.numverts = prim.numverts;
2423                         tclprim.offset = 0; /* not used */
2424
2425                         radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2426                 }
2427                 
2428                 if (sarea_priv->nbox == 1)
2429                         sarea_priv->nbox = 0;
2430         }
2431
2432         if ( vertex.discard ) {
2433                 radeon_cp_discard_buffer( dev, buf );
2434         }
2435
2436         COMMIT_RING();
2437         return 0;
2438 }
2439
2440
2441 static int radeon_emit_packets( 
2442         drm_radeon_private_t *dev_priv,
2443         drm_file_t *filp_priv,
2444         drm_radeon_cmd_header_t header,
2445         drm_radeon_cmd_buffer_t *cmdbuf )
2446 {
2447         int id = (int)header.packet.packet_id;
2448         int sz, reg;
2449         int *data = (int *)cmdbuf->buf;
2450         RING_LOCALS;
2451    
2452         if (id >= RADEON_MAX_STATE_PACKETS)
2453                 return DRM_ERR(EINVAL);
2454
2455         sz = packet[id].len;
2456         reg = packet[id].start;
2457
2458         if (sz * sizeof(int) > cmdbuf->bufsz) {
2459                 DRM_ERROR( "Packet size provided larger than data provided\n" );
2460                 return DRM_ERR(EINVAL);
2461         }
2462
2463         if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2464                 DRM_ERROR( "Packet verification failed\n" );
2465                 return DRM_ERR( EINVAL );
2466         }
2467
2468         BEGIN_RING(sz+1);
2469         OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2470         OUT_RING_TABLE( data, sz );
2471         ADVANCE_RING();
2472
2473         cmdbuf->buf += sz * sizeof(int);
2474         cmdbuf->bufsz -= sz * sizeof(int);
2475         return 0;
2476 }
2477
2478 static __inline__ int radeon_emit_scalars( 
2479         drm_radeon_private_t *dev_priv,
2480         drm_radeon_cmd_header_t header,
2481         drm_radeon_cmd_buffer_t *cmdbuf )
2482 {
2483         int sz = header.scalars.count;
2484         int start = header.scalars.offset;
2485         int stride = header.scalars.stride;
2486         RING_LOCALS;
2487
2488         BEGIN_RING( 3+sz );
2489         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2490         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2491         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2492         OUT_RING_TABLE( cmdbuf->buf, sz );
2493         ADVANCE_RING();
2494         cmdbuf->buf += sz * sizeof(int);
2495         cmdbuf->bufsz -= sz * sizeof(int);
2496         return 0;
2497 }
2498
2499 /* God this is ugly
2500  */
2501 static __inline__ int radeon_emit_scalars2( 
2502         drm_radeon_private_t *dev_priv,
2503         drm_radeon_cmd_header_t header,
2504         drm_radeon_cmd_buffer_t *cmdbuf )
2505 {
2506         int sz = header.scalars.count;
2507         int start = ((unsigned int)header.scalars.offset) + 0x100;
2508         int stride = header.scalars.stride;
2509         RING_LOCALS;
2510
2511         BEGIN_RING( 3+sz );
2512         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2513         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2514         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2515         OUT_RING_TABLE( cmdbuf->buf, sz );
2516         ADVANCE_RING();
2517         cmdbuf->buf += sz * sizeof(int);
2518         cmdbuf->bufsz -= sz * sizeof(int);
2519         return 0;
2520 }
2521
2522 static __inline__ int radeon_emit_vectors( 
2523         drm_radeon_private_t *dev_priv,
2524         drm_radeon_cmd_header_t header,
2525         drm_radeon_cmd_buffer_t *cmdbuf )
2526 {
2527         int sz = header.vectors.count;
2528         int start = header.vectors.offset;
2529         int stride = header.vectors.stride;
2530         RING_LOCALS;
2531
2532         BEGIN_RING( 3+sz );
2533         OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2534         OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2535         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2536         OUT_RING_TABLE( cmdbuf->buf, sz );
2537         ADVANCE_RING();
2538
2539         cmdbuf->buf += sz * sizeof(int);
2540         cmdbuf->bufsz -= sz * sizeof(int);
2541         return 0;
2542 }
2543
2544
2545 static int radeon_emit_packet3( drm_device_t *dev,
2546                                 drm_file_t *filp_priv,
2547                                 drm_radeon_cmd_buffer_t *cmdbuf )
2548 {
2549         drm_radeon_private_t *dev_priv = dev->dev_private;
2550         unsigned int cmdsz;
2551         int ret;
2552         RING_LOCALS;
2553
2554         DRM_DEBUG("\n");
2555
2556         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2557                                                      cmdbuf, &cmdsz ) ) ) {
2558                 DRM_ERROR( "Packet verification failed\n" );
2559                 return ret;
2560         }
2561
2562         BEGIN_RING( cmdsz );
2563         OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2564         ADVANCE_RING();
2565
2566         cmdbuf->buf += cmdsz * 4;
2567         cmdbuf->bufsz -= cmdsz * 4;
2568         return 0;
2569 }
2570
2571
2572 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2573                                          drm_file_t *filp_priv,
2574                                          drm_radeon_cmd_buffer_t *cmdbuf,
2575                                          int orig_nbox )
2576 {
2577         drm_radeon_private_t *dev_priv = dev->dev_private;
2578         drm_clip_rect_t box;
2579         unsigned int cmdsz;
2580         int ret;
2581         drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2582         int i = 0;
2583         RING_LOCALS;
2584
2585         DRM_DEBUG("\n");
2586
2587         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2588                                                      cmdbuf, &cmdsz ) ) ) {
2589                 DRM_ERROR( "Packet verification failed\n" );
2590                 return ret;
2591         }
2592
2593         if (!orig_nbox)
2594                 goto out;
2595
2596         do {
2597                 if ( i < cmdbuf->nbox ) {
2598                         if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2599                                 return DRM_ERR(EFAULT);
2600                         /* FIXME The second and subsequent times round
2601                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2602                          * calling emit_clip_rect(). This fixes a
2603                          * lockup on fast machines when sending
2604                          * several cliprects with a cmdbuf, as when
2605                          * waving a 2D window over a 3D
2606                          * window. Something in the commands from user
2607                          * space seems to hang the card when they're
2608                          * sent several times in a row. That would be
2609                          * the correct place to fix it but this works
2610                          * around it until I can figure that out - Tim
2611                          * Smith */
2612                         if ( i ) {
2613                                 BEGIN_RING( 2 );
2614                                 RADEON_WAIT_UNTIL_3D_IDLE();
2615                                 ADVANCE_RING();
2616                         }
2617                         radeon_emit_clip_rect( dev_priv, &box );
2618                 }
2619                 
2620                 BEGIN_RING( cmdsz );
2621                 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2622                 ADVANCE_RING();
2623
2624         } while ( ++i < cmdbuf->nbox );
2625         if (cmdbuf->nbox == 1)
2626                 cmdbuf->nbox = 0;
2627
2628  out:
2629         cmdbuf->buf += cmdsz * 4;
2630         cmdbuf->bufsz -= cmdsz * 4;
2631         return 0;
2632 }
2633
2634
2635 static int radeon_emit_wait( drm_device_t *dev, int flags )
2636 {
2637         drm_radeon_private_t *dev_priv = dev->dev_private;
2638         RING_LOCALS;
2639
2640         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2641         switch (flags) {
2642         case RADEON_WAIT_2D:
2643                 BEGIN_RING( 2 );
2644                 RADEON_WAIT_UNTIL_2D_IDLE(); 
2645                 ADVANCE_RING();
2646                 break;
2647         case RADEON_WAIT_3D:
2648                 BEGIN_RING( 2 );
2649                 RADEON_WAIT_UNTIL_3D_IDLE(); 
2650                 ADVANCE_RING();
2651                 break;
2652         case RADEON_WAIT_2D|RADEON_WAIT_3D:
2653                 BEGIN_RING( 2 );
2654                 RADEON_WAIT_UNTIL_IDLE(); 
2655                 ADVANCE_RING();
2656                 break;
2657         default:
2658                 return DRM_ERR(EINVAL);
2659         }
2660
2661         return 0;
2662 }
2663
2664 int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2665 {
2666         DRM_DEVICE;
2667         drm_radeon_private_t *dev_priv = dev->dev_private;
2668         drm_file_t *filp_priv;
2669         drm_device_dma_t *dma = dev->dma;
2670         drm_buf_t *buf = NULL;
2671         int idx;
2672         drm_radeon_cmd_buffer_t cmdbuf;
2673         drm_radeon_cmd_header_t header;
2674         int orig_nbox, orig_bufsz;
2675         char *kbuf=NULL;
2676
2677         LOCK_TEST_WITH_RETURN( dev, filp );
2678
2679         if ( !dev_priv ) {
2680                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2681                 return DRM_ERR(EINVAL);
2682         }
2683
2684         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2685
2686         DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2687                              sizeof(cmdbuf) );
2688
2689         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2690         VB_AGE_TEST_WITH_RETURN( dev_priv );
2691
2692         if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2693                 return DRM_ERR(EINVAL);
2694         }
2695
2696         /* Allocate an in-kernel area and copy in the cmdbuf.  Do this to avoid
2697          * races between checking values and using those values in other code,
2698          * and simply to avoid a lot of function calls to copy in data.
2699          */
2700         orig_bufsz = cmdbuf.bufsz;
2701         if (orig_bufsz != 0) {
2702                 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2703                 if (kbuf == NULL)
2704                         return DRM_ERR(ENOMEM);
2705                 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz))
2706                         return DRM_ERR(EFAULT);
2707                 cmdbuf.buf = kbuf;
2708         }
2709
2710         orig_nbox = cmdbuf.nbox;
2711
2712         while ( cmdbuf.bufsz >= sizeof(header) ) {
2713
2714                 header.i = *(int *)cmdbuf.buf;
2715                 cmdbuf.buf += sizeof(header);
2716                 cmdbuf.bufsz -= sizeof(header);
2717
2718                 switch (header.header.cmd_type) {
2719                 case RADEON_CMD_PACKET: 
2720                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2721                         if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2722                                 DRM_ERROR("radeon_emit_packets failed\n");
2723                                 goto err;
2724                         }
2725                         break;
2726
2727                 case RADEON_CMD_SCALARS:
2728                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2729                         if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2730                                 DRM_ERROR("radeon_emit_scalars failed\n");
2731                                 goto err;
2732                         }
2733                         break;
2734
2735                 case RADEON_CMD_VECTORS:
2736                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2737                         if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2738                                 DRM_ERROR("radeon_emit_vectors failed\n");
2739                                 goto err;
2740                         }
2741                         break;
2742
2743                 case RADEON_CMD_DMA_DISCARD:
2744                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2745                         idx = header.dma.buf_idx;
2746                         if ( idx < 0 || idx >= dma->buf_count ) {
2747                                 DRM_ERROR( "buffer index %d (of %d max)\n",
2748                                            idx, dma->buf_count - 1 );
2749                                 goto err;
2750                         }
2751
2752                         buf = dma->buflist[idx];
2753                         if ( buf->filp != filp || buf->pending ) {
2754                                 DRM_ERROR( "bad buffer %p %p %d\n",
2755                                            buf->filp, filp, buf->pending);
2756                                 goto err;
2757                         }
2758
2759                         radeon_cp_discard_buffer( dev, buf );
2760                         break;
2761
2762                 case RADEON_CMD_PACKET3:
2763                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2764                         if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2765                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2766                                 goto err;
2767                         }
2768                         break;
2769
2770                 case RADEON_CMD_PACKET3_CLIP:
2771                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2772                         if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2773                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2774                                 goto err;
2775                         }
2776                         break;
2777
2778                 case RADEON_CMD_SCALARS2:
2779                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2780                         if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2781                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2782                                 goto err;
2783                         }
2784                         break;
2785
2786                 case RADEON_CMD_WAIT:
2787                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2788                         if (radeon_emit_wait( dev, header.wait.flags )) {
2789                                 DRM_ERROR("radeon_emit_wait failed\n");
2790                                 goto err;
2791                         }
2792                         break;
2793                 default:
2794                         DRM_ERROR("bad cmd_type %d at %p\n", 
2795                                   header.header.cmd_type,
2796                                   cmdbuf.buf - sizeof(header));
2797                         goto err;
2798                 }
2799         }
2800
2801         if (orig_bufsz != 0)
2802                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2803
2804         DRM_DEBUG("DONE\n");
2805         COMMIT_RING();
2806         return 0;
2807
2808 err:
2809         if (orig_bufsz != 0)
2810                 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2811         return DRM_ERR(EINVAL);
2812 }
2813
2814
2815
2816 int radeon_cp_getparam( DRM_IOCTL_ARGS )
2817 {
2818         DRM_DEVICE;
2819         drm_radeon_private_t *dev_priv = dev->dev_private;
2820         drm_radeon_getparam_t param;
2821         int value;
2822
2823         if ( !dev_priv ) {
2824                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2825                 return DRM_ERR(EINVAL);
2826         }
2827
2828         DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2829                              sizeof(param) );
2830
2831         DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2832
2833         switch( param.param ) {
2834         case RADEON_PARAM_GART_BUFFER_OFFSET:
2835                 value = dev_priv->gart_buffers_offset;
2836                 break;
2837         case RADEON_PARAM_LAST_FRAME:
2838                 dev_priv->stats.last_frame_reads++;
2839                 value = GET_SCRATCH( 0 );
2840                 break;
2841         case RADEON_PARAM_LAST_DISPATCH:
2842                 value = GET_SCRATCH( 1 );
2843                 break;
2844         case RADEON_PARAM_LAST_CLEAR:
2845                 dev_priv->stats.last_clear_reads++;
2846                 value = GET_SCRATCH( 2 );
2847                 break;
2848         case RADEON_PARAM_IRQ_NR:
2849                 value = dev->irq;
2850                 break;
2851         case RADEON_PARAM_GART_BASE:
2852                 value = dev_priv->gart_vm_start;
2853                 break;
2854         case RADEON_PARAM_REGISTER_HANDLE:
2855                 value = dev_priv->mmio_offset;
2856                 break;
2857         case RADEON_PARAM_STATUS_HANDLE:
2858                 value = dev_priv->ring_rptr_offset;
2859                 break;
2860 #if BITS_PER_LONG == 32
2861         /*
2862          * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2863          * pointer which can't fit into an int-sized variable.  According to
2864          * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2865          * not supporting it shouldn't be a problem.  If the same functionality
2866          * is needed on 64-bit platforms, a new ioctl() would have to be added,
2867          * so backwards-compatibility for the embedded platforms can be
2868          * maintained.  --davidm 4-Feb-2004.
2869          */
2870         case RADEON_PARAM_SAREA_HANDLE:
2871                 /* The lock is the first dword in the sarea. */
2872                 value = (long)dev->lock.hw_lock;
2873                 break;
2874 #endif
2875         case RADEON_PARAM_GART_TEX_HANDLE:
2876                 value = dev_priv->gart_textures_offset;
2877                 break;
2878         default:
2879                 return DRM_ERR(EINVAL);
2880         }
2881
2882         if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2883                 DRM_ERROR( "copy_to_user\n" );
2884                 return DRM_ERR(EFAULT);
2885         }
2886         
2887         return 0;
2888 }
2889
2890 int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2891         DRM_DEVICE;
2892         drm_radeon_private_t *dev_priv = dev->dev_private;
2893         drm_file_t *filp_priv;
2894         drm_radeon_setparam_t sp;
2895         struct drm_radeon_driver_file_fields *radeon_priv;
2896
2897         if ( !dev_priv ) {
2898                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2899                 return DRM_ERR( EINVAL );
2900         }
2901
2902         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2903
2904         DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
2905                                   sizeof( sp ) );
2906
2907         switch( sp.param ) {
2908         case RADEON_SETPARAM_FB_LOCATION:
2909                 radeon_priv = filp_priv->driver_priv;
2910                 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2911                 break;
2912         case RADEON_SETPARAM_SWITCH_TILING:
2913                 if (sp.value == 0) {
2914                         DRM_DEBUG( "color tiling disabled\n" );
2915                         dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2916                         dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
2917                         dev_priv->sarea_priv->tiling_enabled = 0;
2918                 }
2919                 else if (sp.value == 1) {
2920                         DRM_DEBUG( "color tiling enabled\n" );
2921                         dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
2922                         dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
2923                         dev_priv->sarea_priv->tiling_enabled = 1;
2924                 }
2925                 break;  
2926         default:
2927                 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
2928                 return DRM_ERR( EINVAL );
2929         }
2930
2931         return 0;
2932 }
2933
2934 /* When a client dies:
2935  *    - Check for and clean up flipped page state
2936  *    - Free any alloced GART memory.
2937  *
2938  * DRM infrastructure takes care of reclaiming dma buffers.
2939  */
2940 void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
2941 {
2942         if ( dev->dev_private ) {                               
2943                 drm_radeon_private_t *dev_priv = dev->dev_private; 
2944                 if ( dev_priv->page_flipping ) {                
2945                         radeon_do_cleanup_pageflip( dev );      
2946                 }                                               
2947                 radeon_mem_release( filp, dev_priv->gart_heap ); 
2948                 radeon_mem_release( filp, dev_priv->fb_heap );  
2949                 radeon_surfaces_release(filp, dev_priv);
2950         }                               
2951 }
2952
2953 void radeon_driver_pretakedown(drm_device_t *dev)
2954 {
2955         radeon_do_release(dev);
2956 }
2957
2958 int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
2959 {
2960         drm_radeon_private_t *dev_priv = dev->dev_private;
2961         struct drm_radeon_driver_file_fields *radeon_priv;
2962         
2963         radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
2964         
2965         if (!radeon_priv)
2966                 return -ENOMEM;
2967
2968         filp_priv->driver_priv = radeon_priv;
2969         if ( dev_priv )
2970                 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
2971         else
2972                 radeon_priv->radeon_fb_delta = 0;
2973         return 0;
2974 }
2975
2976
2977 void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
2978 {
2979          struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
2980          
2981          drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
2982 }