This commit was manufactured by cvs2svn to create tag
[linux-2.6.git] / drivers / char / drm / radeon_state.c
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*-
2  *
3  * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  *
25  * Authors:
26  *    Gareth Hughes <gareth@valinux.com>
27  *    Kevin E. Martin <martin@valinux.com>
28  */
29
30 #include "radeon.h"
31 #include "drmP.h"
32 #include "drm.h"
33 #include "drm_sarea.h"
34 #include "radeon_drm.h"
35 #include "radeon_drv.h"
36
37
38 /* ================================================================
39  * Helper functions for client state checking and fixup
40  */
41
42 static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
43                                                      drm_file_t *filp_priv,
44                                                      u32 *offset ) {
45         u32 off = *offset;
46
47         if ( off >= dev_priv->fb_location &&
48              off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
49                 return 0;
50
51         off += filp_priv->radeon_fb_delta;
52
53         DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55         if ( off < dev_priv->fb_location ||
56              off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57                 return DRM_ERR( EINVAL );
58
59         *offset = off;
60
61         return 0;
62 }
63
64 static __inline__ int radeon_check_and_fixup_offset_user( drm_radeon_private_t *dev_priv,
65                                                           drm_file_t *filp_priv,
66                                                           u32 *offset ) {
67         u32 off;
68
69         DRM_GET_USER_UNCHECKED( off, offset );
70
71         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &off ) )
72                 return DRM_ERR( EINVAL );
73
74         DRM_PUT_USER_UNCHECKED( offset, off );
75
76         return 0;
77 }
78
79 static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
80                                                       drm_file_t *filp_priv,
81                                                       int id,
82                                                       u32 *data ) {
83         switch ( id ) {
84
85         case RADEON_EMIT_PP_MISC:
86                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
87                                                          &data[( RADEON_RB3D_DEPTHOFFSET
88                                                                  - RADEON_PP_MISC ) / 4] ) ) {
89                         DRM_ERROR( "Invalid depth buffer offset\n" );
90                         return DRM_ERR( EINVAL );
91                 }
92                 break;
93
94         case RADEON_EMIT_PP_CNTL:
95                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
96                                                          &data[( RADEON_RB3D_COLOROFFSET
97                                                                  - RADEON_PP_CNTL ) / 4] ) ) {
98                         DRM_ERROR( "Invalid colour buffer offset\n" );
99                         return DRM_ERR( EINVAL );
100                 }
101                 break;
102
103         case R200_EMIT_PP_TXOFFSET_0:
104         case R200_EMIT_PP_TXOFFSET_1:
105         case R200_EMIT_PP_TXOFFSET_2:
106         case R200_EMIT_PP_TXOFFSET_3:
107         case R200_EMIT_PP_TXOFFSET_4:
108         case R200_EMIT_PP_TXOFFSET_5:
109                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
110                                                          &data[0] ) ) {
111                         DRM_ERROR( "Invalid R200 texture offset\n" );
112                         return DRM_ERR( EINVAL );
113                 }
114                 break;
115
116         case RADEON_EMIT_PP_TXFILTER_0:
117         case RADEON_EMIT_PP_TXFILTER_1:
118         case RADEON_EMIT_PP_TXFILTER_2:
119                 if ( radeon_check_and_fixup_offset_user( dev_priv, filp_priv,
120                                                          &data[( RADEON_PP_TXOFFSET_0
121                                                                  - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
122                         DRM_ERROR( "Invalid R100 texture offset\n" );
123                         return DRM_ERR( EINVAL );
124                 }
125                 break;
126
127         case R200_EMIT_PP_CUBIC_OFFSETS_0:
128         case R200_EMIT_PP_CUBIC_OFFSETS_1:
129         case R200_EMIT_PP_CUBIC_OFFSETS_2:
130         case R200_EMIT_PP_CUBIC_OFFSETS_3:
131         case R200_EMIT_PP_CUBIC_OFFSETS_4:
132         case R200_EMIT_PP_CUBIC_OFFSETS_5: {
133                 int i;
134                 for ( i = 0; i < 5; i++ ) {
135                         if ( radeon_check_and_fixup_offset_user( dev_priv,
136                                                                  filp_priv,
137                                                                  &data[i] ) ) {
138                                 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
139                                 return DRM_ERR( EINVAL );
140                         }
141                 }
142                 break;
143         }
144
145         case RADEON_EMIT_RB3D_COLORPITCH:
146         case RADEON_EMIT_RE_LINE_PATTERN:
147         case RADEON_EMIT_SE_LINE_WIDTH:
148         case RADEON_EMIT_PP_LUM_MATRIX:
149         case RADEON_EMIT_PP_ROT_MATRIX_0:
150         case RADEON_EMIT_RB3D_STENCILREFMASK:
151         case RADEON_EMIT_SE_VPORT_XSCALE:
152         case RADEON_EMIT_SE_CNTL:
153         case RADEON_EMIT_SE_CNTL_STATUS:
154         case RADEON_EMIT_RE_MISC:
155         case RADEON_EMIT_PP_BORDER_COLOR_0:
156         case RADEON_EMIT_PP_BORDER_COLOR_1:
157         case RADEON_EMIT_PP_BORDER_COLOR_2:
158         case RADEON_EMIT_SE_ZBIAS_FACTOR:
159         case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160         case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161         case R200_EMIT_PP_TXCBLEND_0:
162         case R200_EMIT_PP_TXCBLEND_1:
163         case R200_EMIT_PP_TXCBLEND_2:
164         case R200_EMIT_PP_TXCBLEND_3:
165         case R200_EMIT_PP_TXCBLEND_4:
166         case R200_EMIT_PP_TXCBLEND_5:
167         case R200_EMIT_PP_TXCBLEND_6:
168         case R200_EMIT_PP_TXCBLEND_7:
169         case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170         case R200_EMIT_TFACTOR_0:
171         case R200_EMIT_VTX_FMT_0:
172         case R200_EMIT_VAP_CTL:
173         case R200_EMIT_MATRIX_SELECT_0:
174         case R200_EMIT_TEX_PROC_CTL_2:
175         case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176         case R200_EMIT_PP_TXFILTER_0:
177         case R200_EMIT_PP_TXFILTER_1:
178         case R200_EMIT_PP_TXFILTER_2:
179         case R200_EMIT_PP_TXFILTER_3:
180         case R200_EMIT_PP_TXFILTER_4:
181         case R200_EMIT_PP_TXFILTER_5:
182         case R200_EMIT_VTE_CNTL:
183         case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184         case R200_EMIT_PP_TAM_DEBUG3:
185         case R200_EMIT_PP_CNTL_X:
186         case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187         case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188         case R200_EMIT_RE_SCISSOR_TL_0:
189         case R200_EMIT_RE_SCISSOR_TL_1:
190         case R200_EMIT_RE_SCISSOR_TL_2:
191         case R200_EMIT_SE_VAP_CNTL_STATUS:
192         case R200_EMIT_SE_VTX_STATE_CNTL:
193         case R200_EMIT_RE_POINTSIZE:
194         case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195         case R200_EMIT_PP_CUBIC_FACES_0:
196         case R200_EMIT_PP_CUBIC_FACES_1:
197         case R200_EMIT_PP_CUBIC_FACES_2:
198         case R200_EMIT_PP_CUBIC_FACES_3:
199         case R200_EMIT_PP_CUBIC_FACES_4:
200         case R200_EMIT_PP_CUBIC_FACES_5:
201         case RADEON_EMIT_PP_TEX_SIZE_0:
202         case RADEON_EMIT_PP_TEX_SIZE_1:
203         case RADEON_EMIT_PP_TEX_SIZE_2:
204                 /* These packets don't contain memory offsets */
205                 break;
206
207         default:
208                 DRM_ERROR( "Unknown state packet ID %d\n", id );
209                 return DRM_ERR( EINVAL );
210         }
211
212         return 0;
213 }
214
215 static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
216                                                       drm_file_t *filp_priv,
217                                                       drm_radeon_cmd_buffer_t *cmdbuf,
218                                                       unsigned int *cmdsz ) {
219         u32 tmp[4], *cmd = ( u32* )cmdbuf->buf;
220
221         if ( DRM_COPY_FROM_USER_UNCHECKED( tmp, cmd, sizeof( tmp ) ) ) {
222                 DRM_ERROR( "Failed to copy data from user space\n" );
223                 return DRM_ERR( EFAULT );
224         }
225
226         *cmdsz = 2 + ( ( tmp[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
227
228         if ( ( tmp[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
229                 DRM_ERROR( "Not a type 3 packet\n" );
230                 return DRM_ERR( EINVAL );
231         }
232
233         if ( 4 * *cmdsz > cmdbuf->bufsz ) {
234                 DRM_ERROR( "Packet size larger than size of data provided\n" );
235                 return DRM_ERR( EINVAL );
236         }
237
238         /* Check client state and fix it up if necessary */
239         if ( tmp[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
240                 u32 offset;
241
242                 if ( tmp[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
243                               | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
244                         offset = tmp[2] << 10;
245                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
246                                 DRM_ERROR( "Invalid first packet offset\n" );
247                                 return DRM_ERR( EINVAL );
248                         }
249                         tmp[2] = ( tmp[2] & 0xffc00000 ) | offset >> 10;
250                 }
251
252                 if ( ( tmp[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
253                      ( tmp[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
254                         offset = tmp[3] << 10;
255                         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
256                                 DRM_ERROR( "Invalid second packet offset\n" );
257                                 return DRM_ERR( EINVAL );
258                         }
259                         tmp[3] = ( tmp[3] & 0xffc00000 ) | offset >> 10;
260                 }
261
262                 if ( DRM_COPY_TO_USER_UNCHECKED( cmd, tmp, sizeof( tmp ) ) ) {
263                         DRM_ERROR( "Failed to copy data to user space\n" );
264                         return DRM_ERR( EFAULT );
265                 }
266         }
267
268         return 0;
269 }
270
271
272 /* ================================================================
273  * CP hardware state programming functions
274  */
275
276 static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
277                                           drm_clip_rect_t *box )
278 {
279         RING_LOCALS;
280
281         DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
282                    box->x1, box->y1, box->x2, box->y2 );
283
284         BEGIN_RING( 4 );
285         OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
286         OUT_RING( (box->y1 << 16) | box->x1 );
287         OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
288         OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
289         ADVANCE_RING();
290 }
291
292 /* Emit 1.1 state
293  */
294 static int radeon_emit_state( drm_radeon_private_t *dev_priv,
295                               drm_file_t *filp_priv,
296                               drm_radeon_context_regs_t *ctx,
297                               drm_radeon_texture_regs_t *tex,
298                               unsigned int dirty )
299 {
300         RING_LOCALS;
301         DRM_DEBUG( "dirty=0x%08x\n", dirty );
302
303         if ( dirty & RADEON_UPLOAD_CONTEXT ) {
304                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
305                                                     &ctx->rb3d_depthoffset ) ) {
306                         DRM_ERROR( "Invalid depth buffer offset\n" );
307                         return DRM_ERR( EINVAL );
308                 }
309
310                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
311                                                     &ctx->rb3d_coloroffset ) ) {
312                         DRM_ERROR( "Invalid depth buffer offset\n" );
313                         return DRM_ERR( EINVAL );
314                 }
315
316                 BEGIN_RING( 14 );
317                 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
318                 OUT_RING( ctx->pp_misc );
319                 OUT_RING( ctx->pp_fog_color );
320                 OUT_RING( ctx->re_solid_color );
321                 OUT_RING( ctx->rb3d_blendcntl );
322                 OUT_RING( ctx->rb3d_depthoffset );
323                 OUT_RING( ctx->rb3d_depthpitch );
324                 OUT_RING( ctx->rb3d_zstencilcntl );
325                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
326                 OUT_RING( ctx->pp_cntl );
327                 OUT_RING( ctx->rb3d_cntl );
328                 OUT_RING( ctx->rb3d_coloroffset );
329                 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
330                 OUT_RING( ctx->rb3d_colorpitch );
331                 ADVANCE_RING();
332         }
333
334         if ( dirty & RADEON_UPLOAD_VERTFMT ) {
335                 BEGIN_RING( 2 );
336                 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
337                 OUT_RING( ctx->se_coord_fmt );
338                 ADVANCE_RING();
339         }
340
341         if ( dirty & RADEON_UPLOAD_LINE ) {
342                 BEGIN_RING( 5 );
343                 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
344                 OUT_RING( ctx->re_line_pattern );
345                 OUT_RING( ctx->re_line_state );
346                 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
347                 OUT_RING( ctx->se_line_width );
348                 ADVANCE_RING();
349         }
350
351         if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
352                 BEGIN_RING( 5 );
353                 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
354                 OUT_RING( ctx->pp_lum_matrix );
355                 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
356                 OUT_RING( ctx->pp_rot_matrix_0 );
357                 OUT_RING( ctx->pp_rot_matrix_1 );
358                 ADVANCE_RING();
359         }
360
361         if ( dirty & RADEON_UPLOAD_MASKS ) {
362                 BEGIN_RING( 4 );
363                 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
364                 OUT_RING( ctx->rb3d_stencilrefmask );
365                 OUT_RING( ctx->rb3d_ropcntl );
366                 OUT_RING( ctx->rb3d_planemask );
367                 ADVANCE_RING();
368         }
369
370         if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
371                 BEGIN_RING( 7 );
372                 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
373                 OUT_RING( ctx->se_vport_xscale );
374                 OUT_RING( ctx->se_vport_xoffset );
375                 OUT_RING( ctx->se_vport_yscale );
376                 OUT_RING( ctx->se_vport_yoffset );
377                 OUT_RING( ctx->se_vport_zscale );
378                 OUT_RING( ctx->se_vport_zoffset );
379                 ADVANCE_RING();
380         }
381
382         if ( dirty & RADEON_UPLOAD_SETUP ) {
383                 BEGIN_RING( 4 );
384                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
385                 OUT_RING( ctx->se_cntl );
386                 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
387                 OUT_RING( ctx->se_cntl_status );
388                 ADVANCE_RING();
389         }
390
391         if ( dirty & RADEON_UPLOAD_MISC ) {
392                 BEGIN_RING( 2 );
393                 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
394                 OUT_RING( ctx->re_misc );
395                 ADVANCE_RING();
396         }
397
398         if ( dirty & RADEON_UPLOAD_TEX0 ) {
399                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
400                                                     &tex[0].pp_txoffset ) ) {
401                         DRM_ERROR( "Invalid texture offset for unit 0\n" );
402                         return DRM_ERR( EINVAL );
403                 }
404
405                 BEGIN_RING( 9 );
406                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
407                 OUT_RING( tex[0].pp_txfilter );
408                 OUT_RING( tex[0].pp_txformat );
409                 OUT_RING( tex[0].pp_txoffset );
410                 OUT_RING( tex[0].pp_txcblend );
411                 OUT_RING( tex[0].pp_txablend );
412                 OUT_RING( tex[0].pp_tfactor );
413                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
414                 OUT_RING( tex[0].pp_border_color );
415                 ADVANCE_RING();
416         }
417
418         if ( dirty & RADEON_UPLOAD_TEX1 ) {
419                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
420                                                     &tex[1].pp_txoffset ) ) {
421                         DRM_ERROR( "Invalid texture offset for unit 1\n" );
422                         return DRM_ERR( EINVAL );
423                 }
424
425                 BEGIN_RING( 9 );
426                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
427                 OUT_RING( tex[1].pp_txfilter );
428                 OUT_RING( tex[1].pp_txformat );
429                 OUT_RING( tex[1].pp_txoffset );
430                 OUT_RING( tex[1].pp_txcblend );
431                 OUT_RING( tex[1].pp_txablend );
432                 OUT_RING( tex[1].pp_tfactor );
433                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
434                 OUT_RING( tex[1].pp_border_color );
435                 ADVANCE_RING();
436         }
437
438         if ( dirty & RADEON_UPLOAD_TEX2 ) {
439                 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
440                                                     &tex[2].pp_txoffset ) ) {
441                         DRM_ERROR( "Invalid texture offset for unit 2\n" );
442                         return DRM_ERR( EINVAL );
443                 }
444
445                 BEGIN_RING( 9 );
446                 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
447                 OUT_RING( tex[2].pp_txfilter );
448                 OUT_RING( tex[2].pp_txformat );
449                 OUT_RING( tex[2].pp_txoffset );
450                 OUT_RING( tex[2].pp_txcblend );
451                 OUT_RING( tex[2].pp_txablend );
452                 OUT_RING( tex[2].pp_tfactor );
453                 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
454                 OUT_RING( tex[2].pp_border_color );
455                 ADVANCE_RING();
456         }
457
458         return 0;
459 }
460
461 /* Emit 1.2 state
462  */
463 static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
464                                drm_file_t *filp_priv,
465                                drm_radeon_state_t *state )
466 {
467         RING_LOCALS;
468
469         if (state->dirty & RADEON_UPLOAD_ZBIAS) {
470                 BEGIN_RING( 3 );
471                 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
472                 OUT_RING( state->context2.se_zbias_factor ); 
473                 OUT_RING( state->context2.se_zbias_constant ); 
474                 ADVANCE_RING();
475         }
476
477         return radeon_emit_state( dev_priv, filp_priv, &state->context,
478                            state->tex, state->dirty );
479 }
480
481 /* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
482  * 1.3 cmdbuffers allow all previous state to be updated as well as
483  * the tcl scalar and vector areas.  
484  */
485 static struct { 
486         int start; 
487         int len; 
488         const char *name;
489 } packet[RADEON_MAX_STATE_PACKETS] = {
490         { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
491         { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
492         { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
493         { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
494         { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
495         { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
496         { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
497         { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
498         { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
499         { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
500         { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
501         { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
502         { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
503         { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
504         { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
505         { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
506         { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
507         { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
508         { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
509         { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
510         { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
511         { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
512         { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
513         { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
514         { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
515         { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
516         { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
517         { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
518         { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
519         { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
520         { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
521         { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
522         { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
523         { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
524         { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
525         { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
526         { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
527         { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
528         { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
529         { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
530         { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
531         { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
532         { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
533         { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
534         { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
535         { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
536         { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
537         { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
538         { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
539         { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
540         { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
541         { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
542         { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
543         { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
544         { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
545         { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
546         { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
547         { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
548         { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
549         { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
550         { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
551         { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
552         { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
553         { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
554         { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
555         { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
556         { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
557         { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
558         { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
559         { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
560         { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
561         { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
562         { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
563         { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
564         { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
565         { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_1" },
566 };
567
568
569
570 /* ================================================================
571  * Performance monitoring functions
572  */
573
574 static void radeon_clear_box( drm_radeon_private_t *dev_priv,
575                               int x, int y, int w, int h,
576                               int r, int g, int b )
577 {
578         u32 color;
579         RING_LOCALS;
580
581         x += dev_priv->sarea_priv->boxes[0].x1;
582         y += dev_priv->sarea_priv->boxes[0].y1;
583
584         switch ( dev_priv->color_fmt ) {
585         case RADEON_COLOR_FORMAT_RGB565:
586                 color = (((r & 0xf8) << 8) |
587                          ((g & 0xfc) << 3) |
588                          ((b & 0xf8) >> 3));
589                 break;
590         case RADEON_COLOR_FORMAT_ARGB8888:
591         default:
592                 color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
593                 break;
594         }
595
596         BEGIN_RING( 4 );
597         RADEON_WAIT_UNTIL_3D_IDLE();            
598         OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
599         OUT_RING( 0xffffffff );
600         ADVANCE_RING();
601
602         BEGIN_RING( 6 );
603
604         OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
605         OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
606                   RADEON_GMC_BRUSH_SOLID_COLOR |
607                   (dev_priv->color_fmt << 8) |
608                   RADEON_GMC_SRC_DATATYPE_COLOR |
609                   RADEON_ROP3_P |
610                   RADEON_GMC_CLR_CMP_CNTL_DIS );
611
612         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
613                 OUT_RING( dev_priv->front_pitch_offset );
614         } else {         
615                 OUT_RING( dev_priv->back_pitch_offset );
616         } 
617
618         OUT_RING( color );
619
620         OUT_RING( (x << 16) | y );
621         OUT_RING( (w << 16) | h );
622
623         ADVANCE_RING();
624 }
625
626 static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
627 {
628         /* Collapse various things into a wait flag -- trying to
629          * guess if userspase slept -- better just to have them tell us.
630          */
631         if (dev_priv->stats.last_frame_reads > 1 ||
632             dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
633                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
634         }
635
636         if (dev_priv->stats.freelist_loops) {
637                 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
638         }
639
640         /* Purple box for page flipping
641          */
642         if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
643                 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
644
645         /* Red box if we have to wait for idle at any point
646          */
647         if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
648                 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
649
650         /* Blue box: lost context?
651          */
652
653         /* Yellow box for texture swaps
654          */
655         if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
656                 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
657
658         /* Green box if hardware never idles (as far as we can tell)
659          */
660         if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
661                 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
662
663
664         /* Draw bars indicating number of buffers allocated 
665          * (not a great measure, easily confused)
666          */
667         if (dev_priv->stats.requested_bufs) {
668                 if (dev_priv->stats.requested_bufs > 100)
669                         dev_priv->stats.requested_bufs = 100;
670
671                 radeon_clear_box( dev_priv, 4, 16,  
672                                   dev_priv->stats.requested_bufs, 4,
673                                   196, 128, 128 );
674         }
675
676         memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
677
678 }
679 /* ================================================================
680  * CP command dispatch functions
681  */
682
683 static void radeon_cp_dispatch_clear( drm_device_t *dev,
684                                       drm_radeon_clear_t *clear,
685                                       drm_radeon_clear_rect_t *depth_boxes )
686 {
687         drm_radeon_private_t *dev_priv = dev->dev_private;
688         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
689         drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
690         int nbox = sarea_priv->nbox;
691         drm_clip_rect_t *pbox = sarea_priv->boxes;
692         unsigned int flags = clear->flags;
693         u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
694         int i;
695         RING_LOCALS;
696         DRM_DEBUG( "flags = 0x%x\n", flags );
697
698         dev_priv->stats.clears++;
699
700         if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
701                 unsigned int tmp = flags;
702
703                 flags &= ~(RADEON_FRONT | RADEON_BACK);
704                 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
705                 if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
706         }
707
708         if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
709
710                 BEGIN_RING( 4 );
711
712                 /* Ensure the 3D stream is idle before doing a
713                  * 2D fill to clear the front or back buffer.
714                  */
715                 RADEON_WAIT_UNTIL_3D_IDLE();
716                 
717                 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
718                 OUT_RING( clear->color_mask );
719
720                 ADVANCE_RING();
721
722                 /* Make sure we restore the 3D state next time.
723                  */
724                 dev_priv->sarea_priv->ctx_owner = 0;
725
726                 for ( i = 0 ; i < nbox ; i++ ) {
727                         int x = pbox[i].x1;
728                         int y = pbox[i].y1;
729                         int w = pbox[i].x2 - x;
730                         int h = pbox[i].y2 - y;
731
732                         DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
733                                    x, y, w, h, flags );
734
735                         if ( flags & RADEON_FRONT ) {
736                                 BEGIN_RING( 6 );
737                                 
738                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
739                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
740                                           RADEON_GMC_BRUSH_SOLID_COLOR |
741                                           (dev_priv->color_fmt << 8) |
742                                           RADEON_GMC_SRC_DATATYPE_COLOR |
743                                           RADEON_ROP3_P |
744                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
745
746                                 OUT_RING( dev_priv->front_pitch_offset );
747                                 OUT_RING( clear->clear_color );
748                                 
749                                 OUT_RING( (x << 16) | y );
750                                 OUT_RING( (w << 16) | h );
751                                 
752                                 ADVANCE_RING();
753                         }
754                         
755                         if ( flags & RADEON_BACK ) {
756                                 BEGIN_RING( 6 );
757                                 
758                                 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
759                                 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
760                                           RADEON_GMC_BRUSH_SOLID_COLOR |
761                                           (dev_priv->color_fmt << 8) |
762                                           RADEON_GMC_SRC_DATATYPE_COLOR |
763                                           RADEON_ROP3_P |
764                                           RADEON_GMC_CLR_CMP_CNTL_DIS );
765                                 
766                                 OUT_RING( dev_priv->back_pitch_offset );
767                                 OUT_RING( clear->clear_color );
768
769                                 OUT_RING( (x << 16) | y );
770                                 OUT_RING( (w << 16) | h );
771
772                                 ADVANCE_RING();
773                         }
774                 }
775         }
776
777         /* We have to clear the depth and/or stencil buffers by
778          * rendering a quad into just those buffers.  Thus, we have to
779          * make sure the 3D engine is configured correctly.
780          */
781         if ( dev_priv->is_r200 &&
782              (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
783
784                 int tempPP_CNTL;
785                 int tempRE_CNTL;
786                 int tempRB3D_CNTL;
787                 int tempRB3D_ZSTENCILCNTL;
788                 int tempRB3D_STENCILREFMASK;
789                 int tempRB3D_PLANEMASK;
790                 int tempSE_CNTL;
791                 int tempSE_VTE_CNTL;
792                 int tempSE_VTX_FMT_0;
793                 int tempSE_VTX_FMT_1;
794                 int tempSE_VAP_CNTL;
795                 int tempRE_AUX_SCISSOR_CNTL;
796
797                 tempPP_CNTL = 0;
798                 tempRE_CNTL = 0;
799
800                 tempRB3D_CNTL = depth_clear->rb3d_cntl;
801                 tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */
802
803                 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
804                 tempRB3D_STENCILREFMASK = 0x0;
805
806                 tempSE_CNTL = depth_clear->se_cntl;
807
808
809
810                 /* Disable TCL */
811
812                 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
813                                    (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
814
815                 tempRB3D_PLANEMASK = 0x0;
816
817                 tempRE_AUX_SCISSOR_CNTL = 0x0;
818
819                 tempSE_VTE_CNTL =
820                         SE_VTE_CNTL__VTX_XY_FMT_MASK |
821                         SE_VTE_CNTL__VTX_Z_FMT_MASK;
822
823                 /* Vertex format (X, Y, Z, W)*/
824                 tempSE_VTX_FMT_0 =
825                         SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
826                         SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
827                 tempSE_VTX_FMT_1 = 0x0;
828
829
830                 /* 
831                  * Depth buffer specific enables 
832                  */
833                 if (flags & RADEON_DEPTH) {
834                         /* Enable depth buffer */
835                         tempRB3D_CNTL |= RADEON_Z_ENABLE;
836                 } else {
837                         /* Disable depth buffer */
838                         tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
839                 }
840
841                 /* 
842                  * Stencil buffer specific enables
843                  */
844                 if ( flags & RADEON_STENCIL ) {
845                         tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
846                         tempRB3D_STENCILREFMASK = clear->depth_mask; 
847                 } else {
848                         tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
849                         tempRB3D_STENCILREFMASK = 0x00000000;
850                 }
851
852                 BEGIN_RING( 26 );
853                 RADEON_WAIT_UNTIL_2D_IDLE();
854
855                 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
856                 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
857                 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
858                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
859                               tempRB3D_ZSTENCILCNTL );
860                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
861                               tempRB3D_STENCILREFMASK );
862                 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
863                 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
864                 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
865                 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
866                 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
867                 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
868                 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
869                               tempRE_AUX_SCISSOR_CNTL );
870                 ADVANCE_RING();
871
872                 /* Make sure we restore the 3D state next time.
873                  */
874                 dev_priv->sarea_priv->ctx_owner = 0;
875
876                 for ( i = 0 ; i < nbox ; i++ ) {
877                         
878                         /* Funny that this should be required -- 
879                          *  sets top-left?
880                          */
881                         radeon_emit_clip_rect( dev_priv,
882                                                &sarea_priv->boxes[i] );
883
884                         BEGIN_RING( 14 );
885                         OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
886                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
887                                    RADEON_PRIM_WALK_RING |
888                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
889                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
890                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
891                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
892                         OUT_RING( 0x3f800000 );
893                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
894                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
895                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
896                         OUT_RING( 0x3f800000 );
897                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
898                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
899                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
900                         OUT_RING( 0x3f800000 );
901                         ADVANCE_RING();
902                 }
903         } 
904         else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
905
906                 rb3d_cntl = depth_clear->rb3d_cntl;
907
908                 if ( flags & RADEON_DEPTH ) {
909                         rb3d_cntl |=  RADEON_Z_ENABLE;
910                 } else {
911                         rb3d_cntl &= ~RADEON_Z_ENABLE;
912                 }
913
914                 if ( flags & RADEON_STENCIL ) {
915                         rb3d_cntl |=  RADEON_STENCIL_ENABLE;
916                         rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
917                 } else {
918                         rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
919                         rb3d_stencilrefmask = 0x00000000;
920                 }
921
922                 BEGIN_RING( 13 );
923                 RADEON_WAIT_UNTIL_2D_IDLE();
924
925                 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
926                 OUT_RING( 0x00000000 );
927                 OUT_RING( rb3d_cntl );
928                 
929                 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
930                               depth_clear->rb3d_zstencilcntl );
931                 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
932                               rb3d_stencilrefmask );
933                 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
934                               0x00000000 );
935                 OUT_RING_REG( RADEON_SE_CNTL,
936                               depth_clear->se_cntl );
937                 ADVANCE_RING();
938
939                 /* Make sure we restore the 3D state next time.
940                  */
941                 dev_priv->sarea_priv->ctx_owner = 0;
942
943                 for ( i = 0 ; i < nbox ; i++ ) {
944                         
945                         /* Funny that this should be required -- 
946                          *  sets top-left?
947                          */
948                         radeon_emit_clip_rect( dev_priv,
949                                                &sarea_priv->boxes[i] );
950
951                         BEGIN_RING( 15 );
952
953                         OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
954                         OUT_RING( RADEON_VTX_Z_PRESENT |
955                                   RADEON_VTX_PKCOLOR_PRESENT);
956                         OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
957                                    RADEON_PRIM_WALK_RING |
958                                    RADEON_MAOS_ENABLE |
959                                    RADEON_VTX_FMT_RADEON_MODE |
960                                    (3 << RADEON_NUM_VERTICES_SHIFT)) );
961
962
963                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
964                         OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
965                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
966                         OUT_RING( 0x0 );
967
968                         OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
969                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
970                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
971                         OUT_RING( 0x0 );
972
973                         OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
974                         OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
975                         OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
976                         OUT_RING( 0x0 );
977
978                         ADVANCE_RING();
979                 }
980         }
981
982         /* Increment the clear counter.  The client-side 3D driver must
983          * wait on this value before performing the clear ioctl.  We
984          * need this because the card's so damned fast...
985          */
986         dev_priv->sarea_priv->last_clear++;
987
988         BEGIN_RING( 4 );
989
990         RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
991         RADEON_WAIT_UNTIL_IDLE();
992
993         ADVANCE_RING();
994 }
995
996 static void radeon_cp_dispatch_swap( drm_device_t *dev )
997 {
998         drm_radeon_private_t *dev_priv = dev->dev_private;
999         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1000         int nbox = sarea_priv->nbox;
1001         drm_clip_rect_t *pbox = sarea_priv->boxes;
1002         int i;
1003         RING_LOCALS;
1004         DRM_DEBUG( "\n" );
1005
1006         /* Do some trivial performance monitoring...
1007          */
1008         if (dev_priv->do_boxes)
1009                 radeon_cp_performance_boxes( dev_priv );
1010
1011
1012         /* Wait for the 3D stream to idle before dispatching the bitblt.
1013          * This will prevent data corruption between the two streams.
1014          */
1015         BEGIN_RING( 2 );
1016
1017         RADEON_WAIT_UNTIL_3D_IDLE();
1018
1019         ADVANCE_RING();
1020
1021         for ( i = 0 ; i < nbox ; i++ ) {
1022                 int x = pbox[i].x1;
1023                 int y = pbox[i].y1;
1024                 int w = pbox[i].x2 - x;
1025                 int h = pbox[i].y2 - y;
1026
1027                 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1028                            x, y, w, h );
1029
1030                 BEGIN_RING( 7 );
1031
1032                 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1033                 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1034                           RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1035                           RADEON_GMC_BRUSH_NONE |
1036                           (dev_priv->color_fmt << 8) |
1037                           RADEON_GMC_SRC_DATATYPE_COLOR |
1038                           RADEON_ROP3_S |
1039                           RADEON_DP_SRC_SOURCE_MEMORY |
1040                           RADEON_GMC_CLR_CMP_CNTL_DIS |
1041                           RADEON_GMC_WR_MSK_DIS );
1042                 
1043                 /* Make this work even if front & back are flipped:
1044                  */
1045                 if (dev_priv->current_page == 0) {
1046                         OUT_RING( dev_priv->back_pitch_offset );
1047                         OUT_RING( dev_priv->front_pitch_offset );
1048                 } 
1049                 else {
1050                         OUT_RING( dev_priv->front_pitch_offset );
1051                         OUT_RING( dev_priv->back_pitch_offset );
1052                 }
1053
1054                 OUT_RING( (x << 16) | y );
1055                 OUT_RING( (x << 16) | y );
1056                 OUT_RING( (w << 16) | h );
1057
1058                 ADVANCE_RING();
1059         }
1060
1061         /* Increment the frame counter.  The client-side 3D driver must
1062          * throttle the framerate by waiting for this value before
1063          * performing the swapbuffer ioctl.
1064          */
1065         dev_priv->sarea_priv->last_frame++;
1066
1067         BEGIN_RING( 4 );
1068
1069         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1070         RADEON_WAIT_UNTIL_2D_IDLE();
1071
1072         ADVANCE_RING();
1073 }
1074
1075 static void radeon_cp_dispatch_flip( drm_device_t *dev )
1076 {
1077         drm_radeon_private_t *dev_priv = dev->dev_private;
1078         drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1079         int offset = (dev_priv->current_page == 1)
1080                    ? dev_priv->front_offset : dev_priv->back_offset;
1081         RING_LOCALS;
1082         DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1083                 __FUNCTION__, 
1084                 dev_priv->current_page,
1085                 dev_priv->sarea_priv->pfCurrentPage);
1086
1087         /* Do some trivial performance monitoring...
1088          */
1089         if (dev_priv->do_boxes) {
1090                 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1091                 radeon_cp_performance_boxes( dev_priv );
1092         }
1093
1094         /* Update the frame offsets for both CRTCs
1095          */
1096         BEGIN_RING( 6 );
1097
1098         RADEON_WAIT_UNTIL_3D_IDLE();
1099         OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1100                                               + sarea->frame.x 
1101                                               * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1102                                           + offset );
1103         OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1104                                            + offset );
1105
1106         ADVANCE_RING();
1107
1108         /* Increment the frame counter.  The client-side 3D driver must
1109          * throttle the framerate by waiting for this value before
1110          * performing the swapbuffer ioctl.
1111          */
1112         dev_priv->sarea_priv->last_frame++;
1113         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1114                                               1 - dev_priv->current_page;
1115
1116         BEGIN_RING( 2 );
1117
1118         RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1119
1120         ADVANCE_RING();
1121 }
1122
1123 static int bad_prim_vertex_nr( int primitive, int nr )
1124 {
1125         switch (primitive & RADEON_PRIM_TYPE_MASK) {
1126         case RADEON_PRIM_TYPE_NONE:
1127         case RADEON_PRIM_TYPE_POINT:
1128                 return nr < 1;
1129         case RADEON_PRIM_TYPE_LINE:
1130                 return (nr & 1) || nr == 0;
1131         case RADEON_PRIM_TYPE_LINE_STRIP:
1132                 return nr < 2;
1133         case RADEON_PRIM_TYPE_TRI_LIST:
1134         case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1135         case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1136         case RADEON_PRIM_TYPE_RECT_LIST:
1137                 return nr % 3 || nr == 0;
1138         case RADEON_PRIM_TYPE_TRI_FAN:
1139         case RADEON_PRIM_TYPE_TRI_STRIP:
1140                 return nr < 3;
1141         default:
1142                 return 1;
1143         }       
1144 }
1145
1146
1147
1148 typedef struct {
1149         unsigned int start;
1150         unsigned int finish;
1151         unsigned int prim;
1152         unsigned int numverts;
1153         unsigned int offset;   
1154         unsigned int vc_format;
1155 } drm_radeon_tcl_prim_t;
1156
1157 static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1158                                        drm_buf_t *buf,
1159                                        drm_radeon_tcl_prim_t *prim )
1160
1161 {
1162         drm_radeon_private_t *dev_priv = dev->dev_private;
1163         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1164         int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1165         int numverts = (int)prim->numverts;
1166         int nbox = sarea_priv->nbox;
1167         int i = 0;
1168         RING_LOCALS;
1169
1170         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1171                   prim->prim,
1172                   prim->vc_format,
1173                   prim->start,
1174                   prim->finish,
1175                   prim->numverts);
1176
1177         if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1178                 DRM_ERROR( "bad prim %x numverts %d\n", 
1179                            prim->prim, prim->numverts );
1180                 return;
1181         }
1182
1183         do {
1184                 /* Emit the next cliprect */
1185                 if ( i < nbox ) {
1186                         radeon_emit_clip_rect( dev_priv, 
1187                                                &sarea_priv->boxes[i] );
1188                 }
1189
1190                 /* Emit the vertex buffer rendering commands */
1191                 BEGIN_RING( 5 );
1192
1193                 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1194                 OUT_RING( offset );
1195                 OUT_RING( numverts );
1196                 OUT_RING( prim->vc_format );
1197                 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1198                           RADEON_COLOR_ORDER_RGBA |
1199                           RADEON_VTX_FMT_RADEON_MODE |
1200                           (numverts << RADEON_NUM_VERTICES_SHIFT) );
1201
1202                 ADVANCE_RING();
1203
1204                 i++;
1205         } while ( i < nbox );
1206 }
1207
1208
1209
1210 static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1211 {
1212         drm_radeon_private_t *dev_priv = dev->dev_private;
1213         drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1214         RING_LOCALS;
1215
1216         buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1217
1218         /* Emit the vertex buffer age */
1219         BEGIN_RING( 2 );
1220         RADEON_DISPATCH_AGE( buf_priv->age );
1221         ADVANCE_RING();
1222
1223         buf->pending = 1;
1224         buf->used = 0;
1225 }
1226
1227 static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1228                                          drm_buf_t *buf,
1229                                          int start, int end )
1230 {
1231         drm_radeon_private_t *dev_priv = dev->dev_private;
1232         RING_LOCALS;
1233         DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1234                    buf->idx, start, end );
1235
1236         if ( start != end ) {
1237                 int offset = (dev_priv->gart_buffers_offset
1238                               + buf->offset + start);
1239                 int dwords = (end - start + 3) / sizeof(u32);
1240
1241                 /* Indirect buffer data must be an even number of
1242                  * dwords, so if we've been given an odd number we must
1243                  * pad the data with a Type-2 CP packet.
1244                  */
1245                 if ( dwords & 1 ) {
1246                         u32 *data = (u32 *)
1247                                 ((char *)dev_priv->buffers->handle
1248                                  + buf->offset + start);
1249                         data[dwords++] = RADEON_CP_PACKET2;
1250                 }
1251
1252                 /* Fire off the indirect buffer */
1253                 BEGIN_RING( 3 );
1254
1255                 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1256                 OUT_RING( offset );
1257                 OUT_RING( dwords );
1258
1259                 ADVANCE_RING();
1260         }
1261 }
1262
1263
1264 static void radeon_cp_dispatch_indices( drm_device_t *dev,
1265                                         drm_buf_t *elt_buf,
1266                                         drm_radeon_tcl_prim_t *prim )
1267 {
1268         drm_radeon_private_t *dev_priv = dev->dev_private;
1269         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1270         int offset = dev_priv->gart_buffers_offset + prim->offset;
1271         u32 *data;
1272         int dwords;
1273         int i = 0;
1274         int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1275         int count = (prim->finish - start) / sizeof(u16);
1276         int nbox = sarea_priv->nbox;
1277
1278         DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1279                   prim->prim,
1280                   prim->vc_format,
1281                   prim->start,
1282                   prim->finish,
1283                   prim->offset,
1284                   prim->numverts);
1285
1286         if (bad_prim_vertex_nr( prim->prim, count )) {
1287                 DRM_ERROR( "bad prim %x count %d\n", 
1288                            prim->prim, count );
1289                 return;
1290         }
1291
1292
1293         if ( start >= prim->finish ||
1294              (prim->start & 0x7) ) {
1295                 DRM_ERROR( "buffer prim %d\n", prim->prim );
1296                 return;
1297         }
1298
1299         dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1300
1301         data = (u32 *)((char *)dev_priv->buffers->handle +
1302                        elt_buf->offset + prim->start);
1303
1304         data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1305         data[1] = offset;
1306         data[2] = prim->numverts;
1307         data[3] = prim->vc_format;
1308         data[4] = (prim->prim |
1309                    RADEON_PRIM_WALK_IND |
1310                    RADEON_COLOR_ORDER_RGBA |
1311                    RADEON_VTX_FMT_RADEON_MODE |
1312                    (count << RADEON_NUM_VERTICES_SHIFT) );
1313
1314         do {
1315                 if ( i < nbox ) 
1316                         radeon_emit_clip_rect( dev_priv, 
1317                                                &sarea_priv->boxes[i] );
1318
1319                 radeon_cp_dispatch_indirect( dev, elt_buf,
1320                                              prim->start,
1321                                              prim->finish );
1322
1323                 i++;
1324         } while ( i < nbox );
1325
1326 }
1327
1328 #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1329
1330 static int radeon_cp_dispatch_texture( DRMFILE filp,
1331                                        drm_device_t *dev,
1332                                        drm_radeon_texture_t *tex,
1333                                        drm_radeon_tex_image_t *image )
1334 {
1335         drm_radeon_private_t *dev_priv = dev->dev_private;
1336         drm_file_t *filp_priv;
1337         drm_buf_t *buf;
1338         u32 format;
1339         u32 *buffer;
1340         const u8 *data;
1341         int size, dwords, tex_width, blit_width;
1342         u32 height;
1343         int i;
1344         RING_LOCALS;
1345
1346         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1347
1348         if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1349                 DRM_ERROR( "Invalid destination offset\n" );
1350                 return DRM_ERR( EINVAL );
1351         }
1352
1353         dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1354
1355         /* Flush the pixel cache.  This ensures no pixel data gets mixed
1356          * up with the texture data from the host data blit, otherwise
1357          * part of the texture image may be corrupted.
1358          */
1359         BEGIN_RING( 4 );
1360         RADEON_FLUSH_CACHE();
1361         RADEON_WAIT_UNTIL_IDLE();
1362         ADVANCE_RING();
1363
1364 #ifdef __BIG_ENDIAN
1365         /* The Mesa texture functions provide the data in little endian as the
1366          * chip wants it, but we need to compensate for the fact that the CP
1367          * ring gets byte-swapped
1368          */
1369         BEGIN_RING( 2 );
1370         OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1371         ADVANCE_RING();
1372 #endif
1373
1374
1375         /* The compiler won't optimize away a division by a variable,
1376          * even if the only legal values are powers of two.  Thus, we'll
1377          * use a shift instead.
1378          */
1379         switch ( tex->format ) {
1380         case RADEON_TXFORMAT_ARGB8888:
1381         case RADEON_TXFORMAT_RGBA8888:
1382                 format = RADEON_COLOR_FORMAT_ARGB8888;
1383                 tex_width = tex->width * 4;
1384                 blit_width = image->width * 4;
1385                 break;
1386         case RADEON_TXFORMAT_AI88:
1387         case RADEON_TXFORMAT_ARGB1555:
1388         case RADEON_TXFORMAT_RGB565:
1389         case RADEON_TXFORMAT_ARGB4444:
1390         case RADEON_TXFORMAT_VYUY422:
1391         case RADEON_TXFORMAT_YVYU422:
1392                 format = RADEON_COLOR_FORMAT_RGB565;
1393                 tex_width = tex->width * 2;
1394                 blit_width = image->width * 2;
1395                 break;
1396         case RADEON_TXFORMAT_I8:
1397         case RADEON_TXFORMAT_RGB332:
1398                 format = RADEON_COLOR_FORMAT_CI8;
1399                 tex_width = tex->width * 1;
1400                 blit_width = image->width * 1;
1401                 break;
1402         default:
1403                 DRM_ERROR( "invalid texture format %d\n", tex->format );
1404                 return DRM_ERR(EINVAL);
1405         }
1406
1407         DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1408
1409         do {
1410                 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1411                            tex->offset >> 10, tex->pitch, tex->format,
1412                            image->x, image->y, image->width, image->height );
1413
1414                 /* Make a copy of some parameters in case we have to
1415                  * update them for a multi-pass texture blit.
1416                  */
1417                 height = image->height;
1418                 data = (const u8 *)image->data;
1419                 
1420                 size = height * blit_width;
1421
1422                 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1423                         height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1424                         size = height * blit_width;
1425                 } else if ( size < 4 && size > 0 ) {
1426                         size = 4;
1427                 } else if ( size == 0 ) {
1428                         return 0;
1429                 }
1430
1431                 buf = radeon_freelist_get( dev );
1432                 if ( 0 && !buf ) {
1433                         radeon_do_cp_idle( dev_priv );
1434                         buf = radeon_freelist_get( dev );
1435                 }
1436                 if ( !buf ) {
1437                         DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1438                         DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
1439                         return DRM_ERR(EAGAIN);
1440                 }
1441
1442
1443                 /* Dispatch the indirect buffer.
1444                  */
1445                 buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
1446                 dwords = size / 4;
1447                 buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1448                 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1449                              RADEON_GMC_BRUSH_NONE |
1450                              (format << 8) |
1451                              RADEON_GMC_SRC_DATATYPE_COLOR |
1452                              RADEON_ROP3_S |
1453                              RADEON_DP_SRC_SOURCE_HOST_DATA |
1454                              RADEON_GMC_CLR_CMP_CNTL_DIS |
1455                              RADEON_GMC_WR_MSK_DIS);
1456                 
1457                 buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
1458                 buffer[3] = 0xffffffff;
1459                 buffer[4] = 0xffffffff;
1460                 buffer[5] = (image->y << 16) | image->x;
1461                 buffer[6] = (height << 16) | image->width;
1462                 buffer[7] = dwords;
1463                 buffer += 8;
1464
1465                 if ( tex_width >= 32 ) {
1466                         /* Texture image width is larger than the minimum, so we
1467                          * can upload it directly.
1468                          */
1469                         if ( DRM_COPY_FROM_USER( buffer, data, 
1470                                                  dwords * sizeof(u32) ) ) {
1471                                 DRM_ERROR( "EFAULT on data, %d dwords\n", 
1472                                            dwords );
1473                                 return DRM_ERR(EFAULT);
1474                         }
1475                 } else {
1476                         /* Texture image width is less than the minimum, so we
1477                          * need to pad out each image scanline to the minimum
1478                          * width.
1479                          */
1480                         for ( i = 0 ; i < tex->height ; i++ ) {
1481                                 if ( DRM_COPY_FROM_USER( buffer, data, 
1482                                                          tex_width ) ) {
1483                                         DRM_ERROR( "EFAULT on pad, %d bytes\n",
1484                                                    tex_width );
1485                                         return DRM_ERR(EFAULT);
1486                                 }
1487                                 buffer += 8;
1488                                 data += tex_width;
1489                         }
1490                 }
1491
1492                 buf->filp = filp;
1493                 buf->used = (dwords + 8) * sizeof(u32);
1494                 radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
1495                 radeon_cp_discard_buffer( dev, buf );
1496
1497                 /* Update the input parameters for next time */
1498                 image->y += height;
1499                 image->height -= height;
1500                 image->data = (const u8 *)image->data + size;
1501         } while (image->height > 0);
1502
1503         /* Flush the pixel cache after the blit completes.  This ensures
1504          * the texture data is written out to memory before rendering
1505          * continues.
1506          */
1507         BEGIN_RING( 4 );
1508         RADEON_FLUSH_CACHE();
1509         RADEON_WAIT_UNTIL_2D_IDLE();
1510         ADVANCE_RING();
1511         return 0;
1512 }
1513
1514
1515 static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1516 {
1517         drm_radeon_private_t *dev_priv = dev->dev_private;
1518         int i;
1519         RING_LOCALS;
1520         DRM_DEBUG( "\n" );
1521
1522         BEGIN_RING( 35 );
1523
1524         OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1525         OUT_RING( 0x00000000 );
1526
1527         OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1528         for ( i = 0 ; i < 32 ; i++ ) {
1529                 OUT_RING( stipple[i] );
1530         }
1531
1532         ADVANCE_RING();
1533 }
1534
1535
1536 /* ================================================================
1537  * IOCTL functions
1538  */
1539
1540 int radeon_cp_clear( DRM_IOCTL_ARGS )
1541 {
1542         DRM_DEVICE;
1543         drm_radeon_private_t *dev_priv = dev->dev_private;
1544         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1545         drm_radeon_clear_t clear;
1546         drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1547         DRM_DEBUG( "\n" );
1548
1549         LOCK_TEST_WITH_RETURN( dev, filp );
1550
1551         DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t *)data,
1552                              sizeof(clear) );
1553
1554         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1555
1556         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
1557                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1558
1559         if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
1560                              sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
1561                 return DRM_ERR(EFAULT);
1562
1563         radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
1564
1565         COMMIT_RING();
1566         return 0;
1567 }
1568
1569
1570 /* Not sure why this isn't set all the time:
1571  */ 
1572 static int radeon_do_init_pageflip( drm_device_t *dev )
1573 {
1574         drm_radeon_private_t *dev_priv = dev->dev_private;
1575         RING_LOCALS;
1576
1577         DRM_DEBUG( "\n" );
1578
1579         BEGIN_RING( 6 );
1580         RADEON_WAIT_UNTIL_3D_IDLE();
1581         OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
1582         OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1583         OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
1584         OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
1585         ADVANCE_RING();
1586
1587         dev_priv->page_flipping = 1;
1588         dev_priv->current_page = 0;
1589         dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
1590
1591         return 0;
1592 }
1593
1594 /* Called whenever a client dies, from DRM(release).
1595  * NOTE:  Lock isn't necessarily held when this is called!
1596  */
1597 int radeon_do_cleanup_pageflip( drm_device_t *dev )
1598 {
1599         drm_radeon_private_t *dev_priv = dev->dev_private;
1600         DRM_DEBUG( "\n" );
1601
1602         if (dev_priv->current_page != 0)
1603                 radeon_cp_dispatch_flip( dev );
1604
1605         dev_priv->page_flipping = 0;
1606         return 0;
1607 }
1608
1609 /* Swapping and flipping are different operations, need different ioctls.
1610  * They can & should be intermixed to support multiple 3d windows.  
1611  */
1612 int radeon_cp_flip( DRM_IOCTL_ARGS )
1613 {
1614         DRM_DEVICE;
1615         drm_radeon_private_t *dev_priv = dev->dev_private;
1616         DRM_DEBUG( "\n" );
1617
1618         LOCK_TEST_WITH_RETURN( dev, filp );
1619
1620         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1621
1622         if (!dev_priv->page_flipping) 
1623                 radeon_do_init_pageflip( dev );
1624                 
1625         radeon_cp_dispatch_flip( dev );
1626
1627         COMMIT_RING();
1628         return 0;
1629 }
1630
1631 int radeon_cp_swap( DRM_IOCTL_ARGS )
1632 {
1633         DRM_DEVICE;
1634         drm_radeon_private_t *dev_priv = dev->dev_private;
1635         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1636         DRM_DEBUG( "\n" );
1637
1638         LOCK_TEST_WITH_RETURN( dev, filp );
1639
1640         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1641
1642         if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
1643                 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
1644
1645         radeon_cp_dispatch_swap( dev );
1646         dev_priv->sarea_priv->ctx_owner = 0;
1647
1648         COMMIT_RING();
1649         return 0;
1650 }
1651
1652 int radeon_cp_vertex( DRM_IOCTL_ARGS )
1653 {
1654         DRM_DEVICE;
1655         drm_radeon_private_t *dev_priv = dev->dev_private;
1656         drm_file_t *filp_priv;
1657         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1658         drm_device_dma_t *dma = dev->dma;
1659         drm_buf_t *buf;
1660         drm_radeon_vertex_t vertex;
1661         drm_radeon_tcl_prim_t prim;
1662
1663         LOCK_TEST_WITH_RETURN( dev, filp );
1664
1665         if ( !dev_priv ) {
1666                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1667                 return DRM_ERR(EINVAL);
1668         }
1669
1670         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1671
1672         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t *)data,
1673                              sizeof(vertex) );
1674
1675         DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
1676                    DRM_CURRENTPID,
1677                    vertex.idx, vertex.count, vertex.discard );
1678
1679         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
1680                 DRM_ERROR( "buffer index %d (of %d max)\n",
1681                            vertex.idx, dma->buf_count - 1 );
1682                 return DRM_ERR(EINVAL);
1683         }
1684         if ( vertex.prim < 0 ||
1685              vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
1686                 DRM_ERROR( "buffer prim %d\n", vertex.prim );
1687                 return DRM_ERR(EINVAL);
1688         }
1689
1690         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1691         VB_AGE_TEST_WITH_RETURN( dev_priv );
1692
1693         buf = dma->buflist[vertex.idx];
1694
1695         if ( buf->filp != filp ) {
1696                 DRM_ERROR( "process %d using buffer owned by %p\n",
1697                            DRM_CURRENTPID, buf->filp );
1698                 return DRM_ERR(EINVAL);
1699         }
1700         if ( buf->pending ) {
1701                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
1702                 return DRM_ERR(EINVAL);
1703         }
1704
1705         /* Build up a prim_t record:
1706          */
1707         if (vertex.count) {
1708                 buf->used = vertex.count; /* not used? */
1709
1710                 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
1711                         if ( radeon_emit_state( dev_priv, filp_priv,
1712                                                 &sarea_priv->context_state,
1713                                                 sarea_priv->tex_state,
1714                                                 sarea_priv->dirty ) ) {
1715                                 DRM_ERROR( "radeon_emit_state failed\n" );
1716                                 return DRM_ERR( EINVAL );
1717                         }
1718
1719                         sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
1720                                                RADEON_UPLOAD_TEX1IMAGES |
1721                                                RADEON_UPLOAD_TEX2IMAGES |
1722                                                RADEON_REQUIRE_QUIESCENCE);
1723                 }
1724
1725                 prim.start = 0;
1726                 prim.finish = vertex.count; /* unused */
1727                 prim.prim = vertex.prim;
1728                 prim.numverts = vertex.count;
1729                 prim.vc_format = dev_priv->sarea_priv->vc_format;
1730                 
1731                 radeon_cp_dispatch_vertex( dev, buf, &prim );
1732         }
1733
1734         if (vertex.discard) {
1735                 radeon_cp_discard_buffer( dev, buf );
1736         }
1737
1738         COMMIT_RING();
1739         return 0;
1740 }
1741
1742 int radeon_cp_indices( DRM_IOCTL_ARGS )
1743 {
1744         DRM_DEVICE;
1745         drm_radeon_private_t *dev_priv = dev->dev_private;
1746         drm_file_t *filp_priv;
1747         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1748         drm_device_dma_t *dma = dev->dma;
1749         drm_buf_t *buf;
1750         drm_radeon_indices_t elts;
1751         drm_radeon_tcl_prim_t prim;
1752         int count;
1753
1754         LOCK_TEST_WITH_RETURN( dev, filp );
1755
1756         if ( !dev_priv ) {
1757                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1758                 return DRM_ERR(EINVAL);
1759         }
1760
1761         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1762
1763         DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t *)data,
1764                              sizeof(elts) );
1765
1766         DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
1767                    DRM_CURRENTPID,
1768                    elts.idx, elts.start, elts.end, elts.discard );
1769
1770         if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
1771                 DRM_ERROR( "buffer index %d (of %d max)\n",
1772                            elts.idx, dma->buf_count - 1 );
1773                 return DRM_ERR(EINVAL);
1774         }
1775         if ( elts.prim < 0 ||
1776              elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
1777                 DRM_ERROR( "buffer prim %d\n", elts.prim );
1778                 return DRM_ERR(EINVAL);
1779         }
1780
1781         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1782         VB_AGE_TEST_WITH_RETURN( dev_priv );
1783
1784         buf = dma->buflist[elts.idx];
1785
1786         if ( buf->filp != filp ) {
1787                 DRM_ERROR( "process %d using buffer owned by %p\n",
1788                            DRM_CURRENTPID, buf->filp );
1789                 return DRM_ERR(EINVAL);
1790         }
1791         if ( buf->pending ) {
1792                 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
1793                 return DRM_ERR(EINVAL);
1794         }
1795
1796         count = (elts.end - elts.start) / sizeof(u16);
1797         elts.start -= RADEON_INDEX_PRIM_OFFSET;
1798
1799         if ( elts.start & 0x7 ) {
1800                 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
1801                 return DRM_ERR(EINVAL);
1802         }
1803         if ( elts.start < buf->used ) {
1804                 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
1805                 return DRM_ERR(EINVAL);
1806         }
1807
1808         buf->used = elts.end;
1809
1810         if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
1811                 if ( radeon_emit_state( dev_priv, filp_priv,
1812                                         &sarea_priv->context_state,
1813                                         sarea_priv->tex_state,
1814                                         sarea_priv->dirty ) ) {
1815                         DRM_ERROR( "radeon_emit_state failed\n" );
1816                         return DRM_ERR( EINVAL );
1817                 }
1818
1819                 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
1820                                        RADEON_UPLOAD_TEX1IMAGES |
1821                                        RADEON_UPLOAD_TEX2IMAGES |
1822                                        RADEON_REQUIRE_QUIESCENCE);
1823         }
1824
1825
1826         /* Build up a prim_t record:
1827          */
1828         prim.start = elts.start;
1829         prim.finish = elts.end; 
1830         prim.prim = elts.prim;
1831         prim.offset = 0;        /* offset from start of dma buffers */
1832         prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
1833         prim.vc_format = dev_priv->sarea_priv->vc_format;
1834         
1835         radeon_cp_dispatch_indices( dev, buf, &prim );
1836         if (elts.discard) {
1837                 radeon_cp_discard_buffer( dev, buf );
1838         }
1839
1840         COMMIT_RING();
1841         return 0;
1842 }
1843
1844 int radeon_cp_texture( DRM_IOCTL_ARGS )
1845 {
1846         DRM_DEVICE;
1847         drm_radeon_private_t *dev_priv = dev->dev_private;
1848         drm_radeon_texture_t tex;
1849         drm_radeon_tex_image_t image;
1850         int ret;
1851
1852         LOCK_TEST_WITH_RETURN( dev, filp );
1853
1854         DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t *)data, sizeof(tex) );
1855
1856         if ( tex.image == NULL ) {
1857                 DRM_ERROR( "null texture image!\n" );
1858                 return DRM_ERR(EINVAL);
1859         }
1860
1861         if ( DRM_COPY_FROM_USER( &image,
1862                              (drm_radeon_tex_image_t *)tex.image,
1863                              sizeof(image) ) )
1864                 return DRM_ERR(EFAULT);
1865
1866         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1867         VB_AGE_TEST_WITH_RETURN( dev_priv );
1868
1869         ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
1870
1871         COMMIT_RING();
1872         return ret;
1873 }
1874
1875 int radeon_cp_stipple( DRM_IOCTL_ARGS )
1876 {
1877         DRM_DEVICE;
1878         drm_radeon_private_t *dev_priv = dev->dev_private;
1879         drm_radeon_stipple_t stipple;
1880         u32 mask[32];
1881
1882         LOCK_TEST_WITH_RETURN( dev, filp );
1883
1884         DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t *)data,
1885                              sizeof(stipple) );
1886
1887         if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
1888                 return DRM_ERR(EFAULT);
1889
1890         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1891
1892         radeon_cp_dispatch_stipple( dev, mask );
1893
1894         COMMIT_RING();
1895         return 0;
1896 }
1897
1898 int radeon_cp_indirect( DRM_IOCTL_ARGS )
1899 {
1900         DRM_DEVICE;
1901         drm_radeon_private_t *dev_priv = dev->dev_private;
1902         drm_device_dma_t *dma = dev->dma;
1903         drm_buf_t *buf;
1904         drm_radeon_indirect_t indirect;
1905         RING_LOCALS;
1906
1907         LOCK_TEST_WITH_RETURN( dev, filp );
1908
1909         if ( !dev_priv ) {
1910                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1911                 return DRM_ERR(EINVAL);
1912         }
1913
1914         DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t *)data,
1915                              sizeof(indirect) );
1916
1917         DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
1918                    indirect.idx, indirect.start,
1919                    indirect.end, indirect.discard );
1920
1921         if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
1922                 DRM_ERROR( "buffer index %d (of %d max)\n",
1923                            indirect.idx, dma->buf_count - 1 );
1924                 return DRM_ERR(EINVAL);
1925         }
1926
1927         buf = dma->buflist[indirect.idx];
1928
1929         if ( buf->filp != filp ) {
1930                 DRM_ERROR( "process %d using buffer owned by %p\n",
1931                            DRM_CURRENTPID, buf->filp );
1932                 return DRM_ERR(EINVAL);
1933         }
1934         if ( buf->pending ) {
1935                 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
1936                 return DRM_ERR(EINVAL);
1937         }
1938
1939         if ( indirect.start < buf->used ) {
1940                 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
1941                            indirect.start, buf->used );
1942                 return DRM_ERR(EINVAL);
1943         }
1944
1945         RING_SPACE_TEST_WITH_RETURN( dev_priv );
1946         VB_AGE_TEST_WITH_RETURN( dev_priv );
1947
1948         buf->used = indirect.end;
1949
1950         /* Wait for the 3D stream to idle before the indirect buffer
1951          * containing 2D acceleration commands is processed.
1952          */
1953         BEGIN_RING( 2 );
1954
1955         RADEON_WAIT_UNTIL_3D_IDLE();
1956
1957         ADVANCE_RING();
1958
1959         /* Dispatch the indirect buffer full of commands from the
1960          * X server.  This is insecure and is thus only available to
1961          * privileged clients.
1962          */
1963         radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
1964         if (indirect.discard) {
1965                 radeon_cp_discard_buffer( dev, buf );
1966         }
1967
1968
1969         COMMIT_RING();
1970         return 0;
1971 }
1972
1973 int radeon_cp_vertex2( DRM_IOCTL_ARGS )
1974 {
1975         DRM_DEVICE;
1976         drm_radeon_private_t *dev_priv = dev->dev_private;
1977         drm_file_t *filp_priv;
1978         drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1979         drm_device_dma_t *dma = dev->dma;
1980         drm_buf_t *buf;
1981         drm_radeon_vertex2_t vertex;
1982         int i;
1983         unsigned char laststate;
1984
1985         LOCK_TEST_WITH_RETURN( dev, filp );
1986
1987         if ( !dev_priv ) {
1988                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1989                 return DRM_ERR(EINVAL);
1990         }
1991
1992         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1993
1994         DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t *)data,
1995                              sizeof(vertex) );
1996
1997         DRM_DEBUG( "pid=%d index=%d discard=%d\n",
1998                    DRM_CURRENTPID,
1999                    vertex.idx, vertex.discard );
2000
2001         if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2002                 DRM_ERROR( "buffer index %d (of %d max)\n",
2003                            vertex.idx, dma->buf_count - 1 );
2004                 return DRM_ERR(EINVAL);
2005         }
2006
2007         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2008         VB_AGE_TEST_WITH_RETURN( dev_priv );
2009
2010         buf = dma->buflist[vertex.idx];
2011
2012         if ( buf->filp != filp ) {
2013                 DRM_ERROR( "process %d using buffer owned by %p\n",
2014                            DRM_CURRENTPID, buf->filp );
2015                 return DRM_ERR(EINVAL);
2016         }
2017
2018         if ( buf->pending ) {
2019                 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2020                 return DRM_ERR(EINVAL);
2021         }
2022         
2023         if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2024                 return DRM_ERR(EINVAL);
2025
2026         for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2027                 drm_radeon_prim_t prim;
2028                 drm_radeon_tcl_prim_t tclprim;
2029                 
2030                 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2031                         return DRM_ERR(EFAULT);
2032                 
2033                 if ( prim.stateidx != laststate ) {
2034                         drm_radeon_state_t state;                              
2035                                 
2036                         if ( DRM_COPY_FROM_USER( &state, 
2037                                              &vertex.state[prim.stateidx], 
2038                                              sizeof(state) ) )
2039                                 return DRM_ERR(EFAULT);
2040
2041                         if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2042                                 DRM_ERROR( "radeon_emit_state2 failed\n" );
2043                                 return DRM_ERR( EINVAL );
2044                         }
2045
2046                         laststate = prim.stateidx;
2047                 }
2048
2049                 tclprim.start = prim.start;
2050                 tclprim.finish = prim.finish;
2051                 tclprim.prim = prim.prim;
2052                 tclprim.vc_format = prim.vc_format;
2053
2054                 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2055                         tclprim.offset = prim.numverts * 64;
2056                         tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2057
2058                         radeon_cp_dispatch_indices( dev, buf, &tclprim );
2059                 } else {
2060                         tclprim.numverts = prim.numverts;
2061                         tclprim.offset = 0; /* not used */
2062
2063                         radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2064                 }
2065                 
2066                 if (sarea_priv->nbox == 1)
2067                         sarea_priv->nbox = 0;
2068         }
2069
2070         if ( vertex.discard ) {
2071                 radeon_cp_discard_buffer( dev, buf );
2072         }
2073
2074         COMMIT_RING();
2075         return 0;
2076 }
2077
2078
2079 static int radeon_emit_packets( 
2080         drm_radeon_private_t *dev_priv,
2081         drm_file_t *filp_priv,
2082         drm_radeon_cmd_header_t header,
2083         drm_radeon_cmd_buffer_t *cmdbuf )
2084 {
2085         int id = (int)header.packet.packet_id;
2086         int sz, reg;
2087         int *data = (int *)cmdbuf->buf;
2088         RING_LOCALS;
2089    
2090         if (id >= RADEON_MAX_STATE_PACKETS)
2091                 return DRM_ERR(EINVAL);
2092
2093         sz = packet[id].len;
2094         reg = packet[id].start;
2095
2096         if (sz * sizeof(int) > cmdbuf->bufsz) {
2097                 DRM_ERROR( "Packet size provided larger than data provided\n" );
2098                 return DRM_ERR(EINVAL);
2099         }
2100
2101         if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2102                 DRM_ERROR( "Packet verification failed\n" );
2103                 return DRM_ERR( EINVAL );
2104         }
2105
2106         BEGIN_RING(sz+1);
2107         OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2108         OUT_RING_USER_TABLE( data, sz );
2109         ADVANCE_RING();
2110
2111         cmdbuf->buf += sz * sizeof(int);
2112         cmdbuf->bufsz -= sz * sizeof(int);
2113         return 0;
2114 }
2115
2116 static __inline__ int radeon_emit_scalars( 
2117         drm_radeon_private_t *dev_priv,
2118         drm_radeon_cmd_header_t header,
2119         drm_radeon_cmd_buffer_t *cmdbuf )
2120 {
2121         int sz = header.scalars.count;
2122         int *data = (int *)cmdbuf->buf;
2123         int start = header.scalars.offset;
2124         int stride = header.scalars.stride;
2125         RING_LOCALS;
2126
2127         BEGIN_RING( 3+sz );
2128         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2129         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2130         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2131         OUT_RING_USER_TABLE( data, sz );
2132         ADVANCE_RING();
2133         cmdbuf->buf += sz * sizeof(int);
2134         cmdbuf->bufsz -= sz * sizeof(int);
2135         return 0;
2136 }
2137
2138 /* God this is ugly
2139  */
2140 static __inline__ int radeon_emit_scalars2( 
2141         drm_radeon_private_t *dev_priv,
2142         drm_radeon_cmd_header_t header,
2143         drm_radeon_cmd_buffer_t *cmdbuf )
2144 {
2145         int sz = header.scalars.count;
2146         int *data = (int *)cmdbuf->buf;
2147         int start = ((unsigned int)header.scalars.offset) + 0x100;
2148         int stride = header.scalars.stride;
2149         RING_LOCALS;
2150
2151         BEGIN_RING( 3+sz );
2152         OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2153         OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2154         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2155         OUT_RING_USER_TABLE( data, sz );
2156         ADVANCE_RING();
2157         cmdbuf->buf += sz * sizeof(int);
2158         cmdbuf->bufsz -= sz * sizeof(int);
2159         return 0;
2160 }
2161
2162 static __inline__ int radeon_emit_vectors( 
2163         drm_radeon_private_t *dev_priv,
2164         drm_radeon_cmd_header_t header,
2165         drm_radeon_cmd_buffer_t *cmdbuf )
2166 {
2167         int sz = header.vectors.count;
2168         int *data = (int *)cmdbuf->buf;
2169         int start = header.vectors.offset;
2170         int stride = header.vectors.stride;
2171         RING_LOCALS;
2172
2173         BEGIN_RING( 3+sz );
2174         OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2175         OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2176         OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2177         OUT_RING_USER_TABLE( data, sz );
2178         ADVANCE_RING();
2179
2180         cmdbuf->buf += sz * sizeof(int);
2181         cmdbuf->bufsz -= sz * sizeof(int);
2182         return 0;
2183 }
2184
2185
2186 static int radeon_emit_packet3( drm_device_t *dev,
2187                                 drm_file_t *filp_priv,
2188                                 drm_radeon_cmd_buffer_t *cmdbuf )
2189 {
2190         drm_radeon_private_t *dev_priv = dev->dev_private;
2191         unsigned int cmdsz;
2192         int *cmd = (int *)cmdbuf->buf, ret;
2193         RING_LOCALS;
2194
2195         DRM_DEBUG("\n");
2196
2197         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2198                                                      cmdbuf, &cmdsz ) ) ) {
2199                 DRM_ERROR( "Packet verification failed\n" );
2200                 return ret;
2201         }
2202
2203         BEGIN_RING( cmdsz );
2204         OUT_RING_USER_TABLE( cmd, cmdsz );
2205         ADVANCE_RING();
2206
2207         cmdbuf->buf += cmdsz * 4;
2208         cmdbuf->bufsz -= cmdsz * 4;
2209         return 0;
2210 }
2211
2212
2213 static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2214                                          drm_file_t *filp_priv,
2215                                          drm_radeon_cmd_buffer_t *cmdbuf,
2216                                          int orig_nbox )
2217 {
2218         drm_radeon_private_t *dev_priv = dev->dev_private;
2219         drm_clip_rect_t box;
2220         unsigned int cmdsz;
2221         int *cmd = (int *)cmdbuf->buf, ret;
2222         drm_clip_rect_t *boxes = cmdbuf->boxes;
2223         int i = 0;
2224         RING_LOCALS;
2225
2226         DRM_DEBUG("\n");
2227
2228         if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2229                                                      cmdbuf, &cmdsz ) ) ) {
2230                 DRM_ERROR( "Packet verification failed\n" );
2231                 return ret;
2232         }
2233
2234         if (!orig_nbox)
2235                 goto out;
2236
2237         do {
2238                 if ( i < cmdbuf->nbox ) {
2239                         if (DRM_COPY_FROM_USER_UNCHECKED( &box, &boxes[i], sizeof(box) ))
2240                                 return DRM_ERR(EFAULT);
2241                         /* FIXME The second and subsequent times round
2242                          * this loop, send a WAIT_UNTIL_3D_IDLE before
2243                          * calling emit_clip_rect(). This fixes a
2244                          * lockup on fast machines when sending
2245                          * several cliprects with a cmdbuf, as when
2246                          * waving a 2D window over a 3D
2247                          * window. Something in the commands from user
2248                          * space seems to hang the card when they're
2249                          * sent several times in a row. That would be
2250                          * the correct place to fix it but this works
2251                          * around it until I can figure that out - Tim
2252                          * Smith */
2253                         if ( i ) {
2254                                 BEGIN_RING( 2 );
2255                                 RADEON_WAIT_UNTIL_3D_IDLE();
2256                                 ADVANCE_RING();
2257                         }
2258                         radeon_emit_clip_rect( dev_priv, &box );
2259                 }
2260                 
2261                 BEGIN_RING( cmdsz );
2262                 OUT_RING_USER_TABLE( cmd, cmdsz );
2263                 ADVANCE_RING();
2264
2265         } while ( ++i < cmdbuf->nbox );
2266         if (cmdbuf->nbox == 1)
2267                 cmdbuf->nbox = 0;
2268
2269  out:
2270         cmdbuf->buf += cmdsz * 4;
2271         cmdbuf->bufsz -= cmdsz * 4;
2272         return 0;
2273 }
2274
2275
2276 static int radeon_emit_wait( drm_device_t *dev, int flags )
2277 {
2278         drm_radeon_private_t *dev_priv = dev->dev_private;
2279         RING_LOCALS;
2280
2281         DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2282         switch (flags) {
2283         case RADEON_WAIT_2D:
2284                 BEGIN_RING( 2 );
2285                 RADEON_WAIT_UNTIL_2D_IDLE(); 
2286                 ADVANCE_RING();
2287                 break;
2288         case RADEON_WAIT_3D:
2289                 BEGIN_RING( 2 );
2290                 RADEON_WAIT_UNTIL_3D_IDLE(); 
2291                 ADVANCE_RING();
2292                 break;
2293         case RADEON_WAIT_2D|RADEON_WAIT_3D:
2294                 BEGIN_RING( 2 );
2295                 RADEON_WAIT_UNTIL_IDLE(); 
2296                 ADVANCE_RING();
2297                 break;
2298         default:
2299                 return DRM_ERR(EINVAL);
2300         }
2301
2302         return 0;
2303 }
2304
2305 int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2306 {
2307         DRM_DEVICE;
2308         drm_radeon_private_t *dev_priv = dev->dev_private;
2309         drm_file_t *filp_priv;
2310         drm_device_dma_t *dma = dev->dma;
2311         drm_buf_t *buf = 0;
2312         int idx;
2313         drm_radeon_cmd_buffer_t cmdbuf;
2314         drm_radeon_cmd_header_t header;
2315         int orig_nbox;
2316
2317         LOCK_TEST_WITH_RETURN( dev, filp );
2318
2319         if ( !dev_priv ) {
2320                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2321                 return DRM_ERR(EINVAL);
2322         }
2323
2324         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2325
2326         DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t *)data,
2327                              sizeof(cmdbuf) );
2328
2329         RING_SPACE_TEST_WITH_RETURN( dev_priv );
2330         VB_AGE_TEST_WITH_RETURN( dev_priv );
2331
2332
2333         if (DRM_VERIFYAREA_READ( cmdbuf.buf, cmdbuf.bufsz ))
2334                 return DRM_ERR(EFAULT);
2335
2336         if (cmdbuf.nbox &&
2337             DRM_VERIFYAREA_READ(cmdbuf.boxes, 
2338                          cmdbuf.nbox * sizeof(drm_clip_rect_t)))
2339                 return DRM_ERR(EFAULT);
2340
2341         orig_nbox = cmdbuf.nbox;
2342
2343         while ( cmdbuf.bufsz >= sizeof(header) ) {
2344                 
2345                 if (DRM_GET_USER_UNCHECKED( header.i, (int *)cmdbuf.buf )) {
2346                         DRM_ERROR("__get_user %p\n", cmdbuf.buf);
2347                         return DRM_ERR(EFAULT);
2348                 }
2349
2350                 cmdbuf.buf += sizeof(header);
2351                 cmdbuf.bufsz -= sizeof(header);
2352
2353                 switch (header.header.cmd_type) {
2354                 case RADEON_CMD_PACKET: 
2355                         DRM_DEBUG("RADEON_CMD_PACKET\n");
2356                         if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2357                                 DRM_ERROR("radeon_emit_packets failed\n");
2358                                 return DRM_ERR(EINVAL);
2359                         }
2360                         break;
2361
2362                 case RADEON_CMD_SCALARS:
2363                         DRM_DEBUG("RADEON_CMD_SCALARS\n");
2364                         if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2365                                 DRM_ERROR("radeon_emit_scalars failed\n");
2366                                 return DRM_ERR(EINVAL);
2367                         }
2368                         break;
2369
2370                 case RADEON_CMD_VECTORS:
2371                         DRM_DEBUG("RADEON_CMD_VECTORS\n");
2372                         if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2373                                 DRM_ERROR("radeon_emit_vectors failed\n");
2374                                 return DRM_ERR(EINVAL);
2375                         }
2376                         break;
2377
2378                 case RADEON_CMD_DMA_DISCARD:
2379                         DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2380                         idx = header.dma.buf_idx;
2381                         if ( idx < 0 || idx >= dma->buf_count ) {
2382                                 DRM_ERROR( "buffer index %d (of %d max)\n",
2383                                            idx, dma->buf_count - 1 );
2384                                 return DRM_ERR(EINVAL);
2385                         }
2386
2387                         buf = dma->buflist[idx];
2388                         if ( buf->filp != filp || buf->pending ) {
2389                                 DRM_ERROR( "bad buffer %p %p %d\n",
2390                                            buf->filp, filp, buf->pending);
2391                                 return DRM_ERR(EINVAL);
2392                         }
2393
2394                         radeon_cp_discard_buffer( dev, buf );
2395                         break;
2396
2397                 case RADEON_CMD_PACKET3:
2398                         DRM_DEBUG("RADEON_CMD_PACKET3\n");
2399                         if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2400                                 DRM_ERROR("radeon_emit_packet3 failed\n");
2401                                 return DRM_ERR(EINVAL);
2402                         }
2403                         break;
2404
2405                 case RADEON_CMD_PACKET3_CLIP:
2406                         DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2407                         if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2408                                 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2409                                 return DRM_ERR(EINVAL);
2410                         }
2411                         break;
2412
2413                 case RADEON_CMD_SCALARS2:
2414                         DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2415                         if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2416                                 DRM_ERROR("radeon_emit_scalars2 failed\n");
2417                                 return DRM_ERR(EINVAL);
2418                         }
2419                         break;
2420
2421                 case RADEON_CMD_WAIT:
2422                         DRM_DEBUG("RADEON_CMD_WAIT\n");
2423                         if (radeon_emit_wait( dev, header.wait.flags )) {
2424                                 DRM_ERROR("radeon_emit_wait failed\n");
2425                                 return DRM_ERR(EINVAL);
2426                         }
2427                         break;
2428                 default:
2429                         DRM_ERROR("bad cmd_type %d at %p\n", 
2430                                   header.header.cmd_type,
2431                                   cmdbuf.buf - sizeof(header));
2432                         return DRM_ERR(EINVAL);
2433                 }
2434         }
2435
2436
2437         DRM_DEBUG("DONE\n");
2438         COMMIT_RING();
2439         return 0;
2440 }
2441
2442
2443
2444 int radeon_cp_getparam( DRM_IOCTL_ARGS )
2445 {
2446         DRM_DEVICE;
2447         drm_radeon_private_t *dev_priv = dev->dev_private;
2448         drm_radeon_getparam_t param;
2449         int value;
2450
2451         if ( !dev_priv ) {
2452                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2453                 return DRM_ERR(EINVAL);
2454         }
2455
2456         DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t *)data,
2457                              sizeof(param) );
2458
2459         DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2460
2461         switch( param.param ) {
2462         case RADEON_PARAM_GART_BUFFER_OFFSET:
2463                 value = dev_priv->gart_buffers_offset;
2464                 break;
2465         case RADEON_PARAM_LAST_FRAME:
2466                 dev_priv->stats.last_frame_reads++;
2467                 value = GET_SCRATCH( 0 );
2468                 break;
2469         case RADEON_PARAM_LAST_DISPATCH:
2470                 value = GET_SCRATCH( 1 );
2471                 break;
2472         case RADEON_PARAM_LAST_CLEAR:
2473                 dev_priv->stats.last_clear_reads++;
2474                 value = GET_SCRATCH( 2 );
2475                 break;
2476         case RADEON_PARAM_IRQ_NR:
2477                 value = dev->irq;
2478                 break;
2479         case RADEON_PARAM_GART_BASE:
2480                 value = dev_priv->gart_vm_start;
2481                 break;
2482         case RADEON_PARAM_REGISTER_HANDLE:
2483                 value = dev_priv->mmio_offset;
2484                 break;
2485         case RADEON_PARAM_STATUS_HANDLE:
2486                 value = dev_priv->ring_rptr_offset;
2487                 break;
2488 #if BITS_PER_LONG == 32
2489         /*
2490          * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2491          * pointer which can't fit into an int-sized variable.  According to
2492          * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2493          * not supporting it shouldn't be a problem.  If the same functionality
2494          * is needed on 64-bit platforms, a new ioctl() would have to be added,
2495          * so backwards-compatibility for the embedded platforms can be
2496          * maintained.  --davidm 4-Feb-2004.
2497          */
2498         case RADEON_PARAM_SAREA_HANDLE:
2499                 /* The lock is the first dword in the sarea. */
2500                 value = (long)dev->lock.hw_lock;
2501                 break;
2502 #endif
2503         case RADEON_PARAM_GART_TEX_HANDLE:
2504                 value = dev_priv->gart_textures_offset;
2505                 break;
2506         default:
2507                 return DRM_ERR(EINVAL);
2508         }
2509
2510         if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2511                 DRM_ERROR( "copy_to_user\n" );
2512                 return DRM_ERR(EFAULT);
2513         }
2514         
2515         return 0;
2516 }
2517
2518 int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2519         DRM_DEVICE;
2520         drm_radeon_private_t *dev_priv = dev->dev_private;
2521         drm_file_t *filp_priv;
2522         drm_radeon_setparam_t sp;
2523
2524         if ( !dev_priv ) {
2525                 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2526                 return DRM_ERR( EINVAL );
2527         }
2528
2529         DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2530
2531         DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t* )data,
2532                                   sizeof( sp ) );
2533
2534         switch( sp.param ) {
2535         case RADEON_SETPARAM_FB_LOCATION:
2536                 filp_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2537                 break;
2538         default:
2539                 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
2540                 return DRM_ERR( EINVAL );
2541         }
2542
2543         return 0;
2544 }